├── .github
    └── workflows
    │   ├── c-cpp.yml
    │   └── codeql.yml
├── Kanzi_VS2008.zip
├── Kanzi_VS2022.zip
├── LICENSE
├── README.md
├── SECURITY.md
├── bin
    └── .gitignore
├── kanzi.1.gz
├── lib
    └── .gitignore
└── src
    ├── BitStreamException.hpp
    ├── CMakeLists.txt
    ├── Context.hpp
    ├── EntropyDecoder.hpp
    ├── EntropyEncoder.hpp
    ├── Error.hpp
    ├── Event.cpp
    ├── Event.hpp
    ├── Global.cpp
    ├── Global.hpp
    ├── InputBitStream.hpp
    ├── InputStream.hpp
    ├── Listener.hpp
    ├── Magic.hpp
    ├── Makefile
    ├── Makefile.tcmalloc
    ├── Memory.hpp
    ├── OutputBitStream.hpp
    ├── OutputStream.hpp
    ├── Predictor.hpp
    ├── Seekable.hpp
    ├── SliceArray.hpp
    ├── Transform.hpp
    ├── api
        ├── Compressor.cpp
        ├── Compressor.hpp
        ├── Decompressor.cpp
        └── Decompressor.hpp
    ├── app
        ├── BlockCompressor.cpp
        ├── BlockCompressor.hpp
        ├── BlockDecompressor.cpp
        ├── BlockDecompressor.hpp
        ├── InfoPrinter.cpp
        ├── InfoPrinter.hpp
        └── Kanzi.cpp
    ├── bitstream
        ├── DebugInputBitStream.cpp
        ├── DebugInputBitStream.hpp
        ├── DebugOutputBitStream.cpp
        ├── DebugOutputBitStream.hpp
        ├── DefaultInputBitStream.cpp
        ├── DefaultInputBitStream.hpp
        ├── DefaultOutputBitStream.cpp
        └── DefaultOutputBitStream.hpp
    ├── concurrent.hpp
    ├── configure
    ├── entropy
        ├── ANSRangeDecoder.cpp
        ├── ANSRangeDecoder.hpp
        ├── ANSRangeEncoder.cpp
        ├── ANSRangeEncoder.hpp
        ├── AdaptiveProbMap.hpp
        ├── BinaryEntropyDecoder.cpp
        ├── BinaryEntropyDecoder.hpp
        ├── BinaryEntropyEncoder.cpp
        ├── BinaryEntropyEncoder.hpp
        ├── CMPredictor.cpp
        ├── CMPredictor.hpp
        ├── EntropyDecoderFactory.hpp
        ├── EntropyEncoderFactory.hpp
        ├── EntropyUtils.cpp
        ├── EntropyUtils.hpp
        ├── ExpGolombDecoder.cpp
        ├── ExpGolombDecoder.hpp
        ├── ExpGolombEncoder.cpp
        ├── ExpGolombEncoder.hpp
        ├── FPAQDecoder.cpp
        ├── FPAQDecoder.hpp
        ├── FPAQEncoder.cpp
        ├── FPAQEncoder.hpp
        ├── HuffmanCommon.cpp
        ├── HuffmanCommon.hpp
        ├── HuffmanDecoder.cpp
        ├── HuffmanDecoder.hpp
        ├── HuffmanEncoder.cpp
        ├── HuffmanEncoder.hpp
        ├── NullEntropyDecoder.hpp
        ├── NullEntropyEncoder.hpp
        ├── RangeDecoder.cpp
        ├── RangeDecoder.hpp
        ├── RangeEncoder.cpp
        ├── RangeEncoder.hpp
        ├── TPAQPredictor.cpp
        └── TPAQPredictor.hpp
    ├── io
        ├── CompressedInputStream.cpp
        ├── CompressedInputStream.hpp
        ├── CompressedOutputStream.cpp
        ├── CompressedOutputStream.hpp
        ├── IOException.hpp
        ├── IOUtil.hpp
        └── NullOutputStream.hpp
    ├── msvc_dirent.hpp
    ├── test
        ├── TestBWT.cpp
        ├── TestCompressedStream.cpp
        ├── TestDefaultBitStream.cpp
        ├── TestEntropyCodec.cpp
        └── TestTransforms.cpp
    ├── transform
        ├── AliasCodec.cpp
        ├── AliasCodec.hpp
        ├── BWT.cpp
        ├── BWT.hpp
        ├── BWTBlockCodec.cpp
        ├── BWTBlockCodec.hpp
        ├── BWTS.cpp
        ├── BWTS.hpp
        ├── DivSufSort.cpp
        ├── DivSufSort.hpp
        ├── EXECodec.cpp
        ├── EXECodec.hpp
        ├── FSDCodec.cpp
        ├── FSDCodec.hpp
        ├── LZCodec.cpp
        ├── LZCodec.hpp
        ├── NullTransform.hpp
        ├── RLT.cpp
        ├── RLT.hpp
        ├── ROLZCodec.cpp
        ├── ROLZCodec.hpp
        ├── SBRT.cpp
        ├── SBRT.hpp
        ├── SRT.cpp
        ├── SRT.hpp
        ├── TextCodec.cpp
        ├── TextCodec.hpp
        ├── TransformFactory.hpp
        ├── TransformSequence.hpp
        ├── UTFCodec.cpp
        ├── UTFCodec.hpp
        ├── ZRLT.cpp
        └── ZRLT.hpp
    ├── types.hpp
    ├── util.hpp
    └── util
        ├── Clock.hpp
        ├── Printer.hpp
        ├── XXHash.hpp
        └── strings.hpp


/.github/workflows/c-cpp.yml:
--------------------------------------------------------------------------------
 1 | name: C/C++ CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     strategy:
12 |       matrix:
13 |         os: [ ubuntu-latest, macos-latest ]
14 |         compiler: [ clang, gcc ]
15 |     runs-on: ${{ matrix.os }}
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     - name: make
20 |       run: cd src && make clean && make all
21 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "master" ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "master" ]
20 |   schedule:
21 |     - cron: '18 0 * * 5'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'cpp' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Use only 'java' to analyze code written in Java, Kotlin or both
38 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 | 
41 |     steps:
42 |     - name: Checkout repository
43 |       uses: actions/checkout@v3
44 | 
45 |     # Initializes the CodeQL tools for scanning.
46 |     - name: Initialize CodeQL
47 |       uses: github/codeql-action/init@v3
48 |       with:
49 |         languages: ${{ matrix.language }}
50 |         # If you wish to specify custom queries, you can do so here or in a config file.
51 |         # By default, queries listed here will override any specified in a config file.
52 |         # Prefix the list here with "+" to use these queries and those in the config file.
53 | 
54 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 |         # queries: security-extended,security-and-quality
56 | 
57 | 
58 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
59 |     # If this step fails, then you should remove it and run the build manually (see below)
60 |     - name: Autobuild
61 |       uses: github/codeql-action/autobuild@v3
62 | 
63 |     # ℹ️ Command-line programs to run using the OS shell.
64 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 | 
66 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
67 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 | 
69 |     # - run: |
70 |     #   echo "Run, Build Application using script"
71 |     #   ./location_of_script_within_repo/buildscript.sh
72 | 
73 |     - name: Perform CodeQL Analysis
74 |       uses: github/codeql-action/analyze@v3
75 |       with:
76 |         category: "/language:${{matrix.language}}"
77 | 


--------------------------------------------------------------------------------
/Kanzi_VS2008.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/Kanzi_VS2008.zip


--------------------------------------------------------------------------------
/Kanzi_VS2022.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/Kanzi_VS2022.zip


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | Security updates are applied only to the latest release.
 4 | 
 5 | ## Vulnerability Definition
 6 | 
 7 | A security vulnerability is a bug that, given a certain input, triggers a crash or an infinite loop. Compression and decompression failures do not belong in this category.
 8 | 
 9 | ## Reporting a Vulnerability
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.** If you have discovered a security vulnerability in this project, report it privately.
12 | 
13 | Please disclose it at [security advisory](https://github.com/flanglet/kanzi-cpp/security/advisories/new).
14 | 
15 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
16 | 
17 | * Operating system
18 | * Hardware: CPU, memory
19 | * Kanzi version
20 | * Command line invoked
21 | * Error reported/crash data/log output
22 | 
23 | If possible provide a minimal reproducer.
24 | 


--------------------------------------------------------------------------------
/bin/.gitignore:
--------------------------------------------------------------------------------
 1 | **/Debug/**
 2 | **/Release/**
 3 | **/*.obj
 4 | **/*.o
 5 | **/*.htm
 6 | **/*.exe
 7 | **/*.idb
 8 | **/*.pdb
 9 | **/*.ncb
10 | **/*.sln
11 | **/*.suo
12 | **/*vcproj*


--------------------------------------------------------------------------------
/kanzi.1.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/kanzi.1.gz


--------------------------------------------------------------------------------
/lib/.gitignore:
--------------------------------------------------------------------------------
 1 | **/Debug/**
 2 | **/Release/**
 3 | **/*.obj
 4 | **/*.o
 5 | **/*.htm
 6 | **/*.exe
 7 | **/*.idb
 8 | **/*.pdb
 9 | **/*.ncb
10 | **/*.sln
11 | **/*.suo
12 | **/*vcproj*


--------------------------------------------------------------------------------
/src/BitStreamException.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _BitStreamException_
18 | #define _BitStreamException_
19 | 
20 | #include <string>
21 | #include <stdexcept>
22 | #include "types.hpp"
23 | 
24 | 
25 | namespace kanzi
26 | {
27 | 
28 |    class BitStreamException : public std::runtime_error
29 |    {
30 |    private:
31 |        int _code;
32 | 
33 |    public:
34 |        enum BitStreamStatus {
35 |            UNDEFINED = 0,
36 |            INPUT_OUTPUT = 1,
37 |            END_OF_STREAM = 2,
38 |            INVALID_STREAM = 3,
39 |            STREAM_CLOSED = 4
40 |        };
41 | 
42 |        BitStreamException(const std::string& msg) : std::runtime_error(msg)
43 |        {
44 |            _code = UNDEFINED;
45 |        }
46 | 
47 |        BitStreamException(const std::string& msg, int code) : std::runtime_error(msg), _code(code)
48 |        {
49 |        }
50 | 
51 |        int error() const { return _code; }
52 | 
53 |        virtual ~BitStreamException() NOEXCEPT {}
54 |    };
55 | 
56 | }
57 | #endif
58 | 
59 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.10)
  2 | project(kanzi)
  3 | 
  4 | # Set C++ standard
  5 | set(CMAKE_CXX_STANDARD 17)
  6 | #set(CMAKE_CXX_STANDARD_REQUIRED True)
  7 | #set(CMAKE_CXX_COMPILER "clang++")
  8 | 
  9 | if(CONCURRENCY_DISABLED)
 10 |     add_definitions(-DCONCURRENCY_DISABLED)
 11 | endif()
 12 | 
 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O3 -fomit-frame-pointer -fPIC -DNDEBUG -pedantic -march=native -fno-rtti")
 14 | 
 15 | if(MSVC)
 16 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 17 | endif()
 18 | 
 19 | # Source files
 20 | set(LIB_COMMON_SOURCES
 21 |     Global.cpp
 22 |     Event.cpp
 23 |     entropy/EntropyUtils.cpp
 24 |     entropy/HuffmanCommon.cpp
 25 |     entropy/CMPredictor.cpp
 26 |     entropy/TPAQPredictor.cpp
 27 |     transform/AliasCodec.cpp
 28 |     transform/BWT.cpp
 29 |     transform/BWTS.cpp
 30 |     transform/DivSufSort.cpp
 31 |     transform/SBRT.cpp
 32 |     transform/BWTBlockCodec.cpp
 33 |     transform/LZCodec.cpp
 34 |     transform/FSDCodec.cpp
 35 |     transform/ROLZCodec.cpp
 36 |     transform/RLT.cpp
 37 |     transform/SRT.cpp
 38 |     transform/TextCodec.cpp
 39 |     transform/UTFCodec.cpp
 40 |     transform/EXECodec.cpp
 41 |     transform/ZRLT.cpp
 42 | )
 43 | 
 44 | set(LIB_COMP_SOURCES
 45 |     api/Compressor.cpp
 46 |     bitstream/DebugOutputBitStream.cpp
 47 |     bitstream/DefaultOutputBitStream.cpp
 48 |     io/CompressedOutputStream.cpp
 49 |     entropy/ANSRangeEncoder.cpp
 50 |     entropy/BinaryEntropyEncoder.cpp
 51 |     entropy/ExpGolombEncoder.cpp
 52 |     entropy/FPAQEncoder.cpp
 53 |     entropy/HuffmanEncoder.cpp
 54 |     entropy/RangeEncoder.cpp
 55 | )
 56 | 
 57 | set(LIB_DECOMP_SOURCES
 58 |     api/Decompressor.cpp
 59 |     bitstream/DebugInputBitStream.cpp
 60 |     bitstream/DefaultInputBitStream.cpp
 61 |     io/CompressedInputStream.cpp
 62 |     entropy/ANSRangeDecoder.cpp
 63 |     entropy/BinaryEntropyDecoder.cpp
 64 |     entropy/ExpGolombDecoder.cpp
 65 |     entropy/FPAQDecoder.cpp
 66 |     entropy/HuffmanDecoder.cpp
 67 |     entropy/RangeDecoder.cpp
 68 | )
 69 | 
 70 | set(TEST_SOURCES
 71 |     test/TestEntropyCodec.cpp
 72 |     test/TestBWT.cpp
 73 |     test/TestCompressedStream.cpp
 74 |     test/TestDefaultBitStream.cpp
 75 |     test/TestTransforms.cpp
 76 | )
 77 | 
 78 | set(APP_SOURCES
 79 |     app/Kanzi.cpp
 80 |     app/InfoPrinter.cpp
 81 |     app/BlockCompressor.cpp
 82 |     app/BlockDecompressor.cpp
 83 | )
 84 | 
 85 | # Libraries
 86 | add_library(libkanzi STATIC ${LIB_COMMON_SOURCES} ${LIB_COMP_SOURCES} ${LIB_DECOMP_SOURCES})
 87 | add_library(libkanzi_shared SHARED ${LIB_COMMON_SOURCES} ${LIB_COMP_SOURCES} ${LIB_DECOMP_SOURCES})
 88 | set_target_properties(libkanzi PROPERTIES OUTPUT_NAME "kanzi")
 89 | set_target_properties(libkanzi_shared PROPERTIES OUTPUT_NAME "kanzi")
 90 | 
 91 | #add_library(libkanzi_comp STATIC ${LIB_COMP_SOURCES})
 92 | #add_library(libkanzi_decomp STATIC ${LIB_DECOMP_SOURCES})
 93 | 
 94 | #add_library(libkanzi_comp_shared SHARED ${LIB_COMP_SOURCES})
 95 | #add_library(libkanzi_decomp_shared SHARED ${LIB_DECOMP_SOURCES})
 96 | 
 97 | # Test executables
 98 | add_executable(testBWT test/TestBWT.cpp)
 99 | target_link_libraries(testBWT libkanzi)
100 | 
101 | add_executable(testTransforms test/TestTransforms.cpp)
102 | target_link_libraries(testTransforms libkanzi)
103 | 
104 | add_executable(testEntropyCodec test/TestEntropyCodec.cpp)
105 | target_link_libraries(testEntropyCodec libkanzi)
106 | 
107 | add_executable(testDefaultBitStream test/TestDefaultBitStream.cpp)
108 | target_link_libraries(testDefaultBitStream libkanzi)
109 | 
110 | add_executable(testCompressedStream test/TestCompressedStream.cpp)
111 | target_link_libraries(testCompressedStream libkanzi)
112 | 
113 | # Main executable
114 | add_executable(kanzi ${APP_SOURCES})
115 | target_link_libraries(kanzi libkanzi)
116 | 
117 | # Custom target to build all tests
118 | add_custom_target(test
119 |     DEPENDS testBWT testTransforms testEntropyCodec testDefaultBitStream testCompressedStream
120 | )
121 | # Custom target to build static libraries
122 | add_custom_target(static_lib
123 |     DEPENDS libkanzi #libkanzi_comp libkanzi_decomp
124 | )
125 | 
126 | # Custom target to build shared libraries
127 | add_custom_target(shared_lib
128 |     DEPENDS libkanzi_shared #libkanzi_comp_shared libkanzi_decomp_shared
129 | )
130 | 
131 | # Custom target to build all libraries (static and shared)
132 | add_custom_target(lib
133 |     DEPENDS static_lib shared_lib
134 | )
135 | 


--------------------------------------------------------------------------------
/src/Context.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _Context_
 18 | #define _Context_
 19 | 
 20 | #include <map>
 21 | #include <sstream>
 22 | #include <string>
 23 | #include "concurrent.hpp"
 24 | #include "util/strings.hpp"
 25 | 
 26 | namespace kanzi
 27 | {
 28 | 
 29 |    // Poor's man equivalent to std::variant used to support C++98 and up.
 30 |    // union cannot be used due to the std:string field.
 31 |    // The extra memory used does not matter for the application context since
 32 |    // the map is small.
 33 |    typedef struct ContextVal {
 34 |        int64 lVal;
 35 |        std::string sVal;
 36 |        bool isString;
 37 | 
 38 |        ContextVal(bool b, int64 val, const std::string& str) : lVal(val), sVal(str), isString(b) {}
 39 |        ContextVal() { isString = false; lVal = 0; }
 40 |    } ctxVal;
 41 | 
 42 |    class Context
 43 |    {
 44 |    public:
 45 | 
 46 | #ifdef CONCURRENCY_ENABLED
 47 |        Context(ThreadPool* p = nullptr) : _pool(p) {}
 48 |        Context(const Context& c) : _map(c._map), _pool(c._pool) {}
 49 |        Context(const Context& c, ThreadPool* p) : _map(c._map), _pool(p) {}
 50 |        Context& operator=(const Context& c) = default;
 51 | #else
 52 |        Context() {}
 53 |        Context(const Context& c) : _map(c._map) {}
 54 |        Context& operator=(const Context& c) { _map = c._map; return *this; };
 55 | #endif
 56 | 
 57 |        virtual ~Context() {}
 58 |        bool has(const std::string& key) const;
 59 |        int getInt(const std::string& key, int defValue = 0) const;
 60 |        int64 getLong(const std::string& key, int64 defValue = 0) const;
 61 |        std::string getString(const std::string& key, const std::string& defValue = "") const;
 62 |        void putInt(const std::string& key, int value);
 63 |        void putLong(const std::string& key, int64 value);
 64 |        void putString(const std::string& key, const std::string& value);
 65 | 
 66 | #ifdef CONCURRENCY_ENABLED
 67 |        ThreadPool* getPool() const { return _pool; }
 68 | #endif
 69 | 
 70 |    private:
 71 |        std::map<std::string, ContextVal> _map;
 72 | 
 73 | #ifdef CONCURRENCY_ENABLED
 74 |        ThreadPool* _pool;
 75 | #endif
 76 |    };
 77 | 
 78 | 
 79 |    inline bool Context::has(const std::string& key) const
 80 |    {
 81 |       return _map.find(key) != _map.end();
 82 |    }
 83 | 
 84 | 
 85 |    inline int Context::getInt(const std::string& key, int defValue) const
 86 |    {
 87 |       return int(this->getLong(key, defValue));
 88 |    }
 89 | 
 90 | 
 91 |    inline int64 Context::getLong(const std::string& key, int64 defValue) const
 92 |    {
 93 |       std::map<std::string, ContextVal>::const_iterator it = _map.find(key);
 94 | 
 95 |       if (it == _map.end())
 96 |           return defValue;
 97 | 
 98 |       return it->second.isString == true ? defValue : it->second.lVal;
 99 |    }
100 | 
101 | 
102 |    inline std::string Context::getString(const std::string& key, const std::string& defValue) const
103 |    {
104 |       std::map<std::string, ContextVal>::const_iterator it = _map.find(key);
105 | 
106 |       if (it == _map.end())
107 |           return defValue;
108 | 
109 |       return it->second.isString == true ? it->second.sVal : defValue;
110 |    }
111 | 
112 | 
113 |    inline void Context::putInt(const std::string& key, int value)
114 |    {
115 |       _map[key] = ctxVal(false, value, "");
116 |    }
117 | 
118 | 
119 |    inline void Context::putLong(const std::string& key, int64 value)
120 |    {
121 |       _map[key] = ctxVal(false, value, "");
122 |    }
123 | 
124 | 
125 |    inline void Context::putString(const std::string& key, const std::string& value)
126 |    {
127 |       _map[key] = ctxVal(true, 0, value);
128 |    }
129 | 
130 | }
131 | #endif
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/src/EntropyDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _EntropyDecoder_
18 | #define _EntropyDecoder_
19 | 
20 | #include "InputBitStream.hpp"
21 | 
22 | namespace kanzi
23 | {
24 |    // EntropyDecoder entropy decodes data from a bitstream
25 |    class EntropyDecoder
26 |    {
27 |    public:
28 |        // Decode the array provided from the bitstream. Return the number of bytes
29 |        // read from the bitstream
30 |        virtual int decode(byte block[], uint blkptr, uint len) = 0;
31 | 
32 |        // Return the underlying bitstream
33 |        virtual InputBitStream& getBitStream() const = 0;
34 | 
35 |        // Must be called before getting rid of the entropy decoder.
36 |        // Trying to decode after a call to dispose gives undefined behavior
37 |        virtual void dispose() = 0;
38 | 
39 |        virtual ~EntropyDecoder(){}
40 |    };
41 | 
42 | }
43 | #endif
44 | 
45 | 


--------------------------------------------------------------------------------
/src/EntropyEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _EntropyEncoder_
18 | #define _EntropyEncoder_
19 | 
20 | #include "OutputBitStream.hpp"
21 | 
22 | namespace kanzi
23 | {
24 |    // EntropyEncoder entropy encodes data to a bitstream
25 |    class EntropyEncoder
26 |    {
27 |    public:
28 |        // Encode the array provided into the bitstream. Return the number of bytes
29 |        // written to the bitstream
30 |        virtual int encode(const byte block[], uint blkptr, uint len) = 0;
31 | 
32 |        // Return the underlying bitstream
33 |        virtual OutputBitStream& getBitStream() const = 0;
34 | 
35 |        // Must be called before getting rid of the entropy encoder.
36 |        // Trying to encode after a call to dispose gives undefined behavior
37 |        virtual void dispose() = 0;
38 | 
39 |        virtual ~EntropyEncoder(){}
40 |    };
41 | 
42 | }
43 | #endif
44 | 
45 | 


--------------------------------------------------------------------------------
/src/Error.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Error_
18 | #define _Error_
19 | 
20 | namespace kanzi
21 | {
22 | 
23 |    struct Error
24 |    {
25 |    public:
26 |        enum ErrorCode {
27 |            ERR_MISSING_PARAM = 1,
28 |            ERR_BLOCK_SIZE = 2,
29 |            ERR_INVALID_CODEC = 3,
30 |            ERR_CREATE_COMPRESSOR = 4,
31 |            ERR_CREATE_DECOMPRESSOR = 5,
32 |            ERR_OUTPUT_IS_DIR = 6,
33 |            ERR_OVERWRITE_FILE = 7,
34 |            ERR_CREATE_FILE = 8,
35 |            ERR_CREATE_BITSTREAM = 9,
36 |            ERR_OPEN_FILE = 10,
37 |            ERR_READ_FILE = 11,
38 |            ERR_WRITE_FILE = 12,
39 |            ERR_PROCESS_BLOCK = 13,
40 |            ERR_CREATE_CODEC = 14,
41 |            ERR_INVALID_FILE = 15,
42 |            ERR_STREAM_VERSION = 16,
43 |            ERR_CREATE_STREAM = 17,
44 |            ERR_INVALID_PARAM = 18,
45 |            ERR_CRC_CHECK = 19,
46 |            ERR_RESERVED_NAME = 20,
47 |            ERR_UNKNOWN = 127
48 |        };
49 |    };
50 | 
51 | }
52 | #endif
53 | 
54 | 


--------------------------------------------------------------------------------
/src/Event.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <iomanip>
 17 | #include <ios>
 18 | #include <sstream>
 19 | #include "Event.hpp"
 20 | 
 21 | using namespace kanzi;
 22 | 
 23 | Event::Event(Event::Type type, int id, const std::string& msg, clock_t evtTime)
 24 |     : _type(type)
 25 |     , _time(evtTime)
 26 |     , _msg(msg)
 27 |     , _id(id)
 28 | {
 29 |     _size = 0;
 30 |     _hash = 0;
 31 |     _hashType = NO_HASH;
 32 |     _offset = -1;
 33 |     _skipFlags = 0;
 34 | }
 35 | 
 36 | Event::Event(Event::Type type, int id, int64 size, clock_t evtTime,
 37 |              uint64 hash, HashType hashType, int64 offset, uint8 skipFlags)
 38 |     : _type(type)
 39 |     , _time(evtTime)
 40 |     , _id(id)
 41 |     , _size(size)
 42 |     , _offset(offset)
 43 |     , _hash(hash)
 44 |     , _hashType(hashType)
 45 |     , _skipFlags(skipFlags)
 46 | {
 47 | }
 48 | 
 49 | std::string Event::toString() const
 50 | {
 51 |     if (_msg.size() > 0)
 52 |         return _msg;
 53 | 
 54 |     std::stringstream ss;
 55 |     ss << "{ \"type\":\"" << getTypeAsString() << "\"";
 56 | 
 57 |     if (_id >= 0)
 58 |         ss << ", \"id\":" << getId();
 59 | 
 60 |     ss << ", \"size\":" << getSize();
 61 | 
 62 |     if (getType() != BLOCK_INFO)
 63 |         ss << ", \"time\":" << getTime();
 64 | 
 65 |     if (_hashType != NO_HASH) {
 66 |         ss << ", \"hash\":\"";
 67 |         ss << std::uppercase << std::setfill('0');
 68 | 
 69 |         if (_hashType == SIZE_32)
 70 |            ss << std::setw(8) << std::hex << getHash() << "\"";
 71 |         else
 72 |            ss << std::setw(16) << std::hex << getHash() << "\"";
 73 |     }
 74 | 
 75 |     if (getType() == BLOCK_INFO) {
 76 |          ss << ", \"offset\":" << getOffset();
 77 |          ss << ", \"skipFlags\": ";
 78 | 
 79 |         for (int i = 128; i >= 1; i >>= 1)
 80 |            ss << ((_skipFlags & i) == 0 ? "0" : "1");
 81 |     }
 82 | 
 83 |     ss << " }";
 84 |     return ss.str();
 85 | }
 86 | 
 87 | std::string Event::getTypeAsString() const
 88 | {
 89 |     switch (_type) {
 90 |     case AFTER_HEADER_DECODING:
 91 |         return "AFTER_HEADER_DECODING";
 92 | 
 93 |     case COMPRESSION_END:
 94 |         return "COMPRESSION_END";
 95 | 
 96 |     case BEFORE_TRANSFORM:
 97 |         return "BEFORE_TRANSFORM";
 98 | 
 99 |     case AFTER_TRANSFORM:
100 |         return "AFTER_TRANSFORM";
101 | 
102 |     case BEFORE_ENTROPY:
103 |         return "BEFORE_ENTROPY";
104 | 
105 |     case AFTER_ENTROPY:
106 |         return "AFTER_ENTROPY";
107 | 
108 |     case DECOMPRESSION_START:
109 |         return "DECOMPRESSION_START";
110 | 
111 |     case DECOMPRESSION_END:
112 |         return "DECOMPRESSION_END";
113 | 
114 |     case COMPRESSION_START:
115 |         return "COMPRESSION_START";
116 | 
117 |     case BLOCK_INFO:
118 |         return "BLOCK_INFO";
119 | 
120 |     default:
121 |         return "Unknown Type";
122 |     }
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/src/Event.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Event_
18 | #define _Event_
19 | 
20 | #include <string>
21 | #include <time.h>
22 | #include "types.hpp"
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    class Event {
28 |       public:
29 |           enum Type {
30 |               COMPRESSION_START,
31 |               COMPRESSION_END,
32 |               BEFORE_TRANSFORM,
33 |               AFTER_TRANSFORM,
34 |               BEFORE_ENTROPY,
35 |               AFTER_ENTROPY,
36 |               DECOMPRESSION_START,
37 |               DECOMPRESSION_END,
38 |               AFTER_HEADER_DECODING,
39 |               BLOCK_INFO
40 |           };
41 | 
42 |           enum HashType {
43 |               NO_HASH,
44 |               SIZE_32,
45 |               SIZE_64
46 |           };
47 | 
48 |           Event(Type type, int id, const std::string& msg, clock_t evtTime = 0);
49 | 
50 |           Event(Type type, int id, int64 size, clock_t evtTime, uint64 hash = 0,
51 |                 HashType hashType = NO_HASH, int64 offset = -1, uint8 skipFlags = 0);
52 | 
53 |           virtual ~Event() {}
54 | 
55 |           int getId() const { return _id; }
56 | 
57 |           int64 getSize() const { return _size; }
58 | 
59 |           Event::Type getType() const { return _type; }
60 | 
61 |           std::string getTypeAsString() const;
62 | 
63 |           clock_t getTime() const { return _time; }
64 | 
65 |           uint64 getHash() const { return _hashType != NO_HASH ? _hash : 0; }
66 | 
67 |           int64 getOffset() const { return _offset; }
68 | 
69 |           HashType getHashType() const { return _hashType; }
70 | 
71 |           std::string toString() const;
72 | 
73 |       private:
74 |           Event::Type _type;
75 |           clock_t _time;
76 |           std::string _msg;
77 |           int _id;
78 |           int64 _size;
79 |           int64 _offset;
80 |           uint64 _hash;
81 |           HashType _hashType;
82 |           uint8 _skipFlags;
83 |       };
84 | }
85 | #endif
86 | 
87 | 


--------------------------------------------------------------------------------
/src/Global.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _Global_
 18 | #define _Global_
 19 | 
 20 | #include <set>
 21 | #include <string>
 22 | 
 23 | #include "types.hpp"
 24 | 
 25 | namespace kanzi {
 26 | 
 27 |    class Global {
 28 |    public:
 29 |        enum DataType { UNDEFINED, TEXT, MULTIMEDIA, EXE, NUMERIC, BASE64, DNA, BIN, UTF8, SMALL_ALPHABET };
 30 | 
 31 |        static int stretch(int d); // ln(x / (1 - x))
 32 | 
 33 |        static int squash(int d); // 1 / (1 + e-x)  (inverse of stretch)
 34 | 
 35 |        static int log2(uint32 x); // fast, integer rounded
 36 | 
 37 |        static int log2(uint64 x); // fast, integer rounded
 38 | 
 39 |        static int _log2(uint32 x); // same as log2 minus check on input value
 40 | 
 41 |        static int _log2(uint64 x); // same as log2 minus check on input value
 42 | 
 43 |        static int trailingZeros(uint32 x);
 44 | 
 45 |        static int trailingZeros(uint64 x);
 46 | 
 47 |        static int log2_1024(uint32 x); // slow, accurate to 1/1024th
 48 | 
 49 |        static void computeJobsPerTask(int jobsPerTask[], int jobs, int tasks);
 50 | 
 51 |        static int computeFirstOrderEntropy1024(int blockLen, const uint histo[]);
 52 | 
 53 |        static void computeHistogram(const byte block[], int end, uint freqs[], bool isOrder0=true, bool withTotal=false);
 54 | 
 55 |        static DataType detectSimpleType(int count, const uint histo[]);
 56 | 
 57 |        static bool isReservedName(std::string fileName);
 58 | 
 59 |    private:
 60 |        Global();
 61 |        ~Global() {}
 62 | 
 63 |        static const Global _singleton;
 64 |        static const int LOG2_4096[257]; // 4096*Math.log2(x)
 65 |        static const int LOG2[256]; // int(Math.log2(x-1))
 66 |        static int STRETCH[4096];
 67 |        static int SQUASH[4096];
 68 |        static char BASE64_SYMBOLS[];
 69 |        static char DNA_SYMBOLS[];
 70 |        static char NUMERIC_SYMBOLS[];
 71 | 
 72 |        std::set<std::string> _reservedNames;
 73 |    };
 74 | 
 75 | 
 76 |    // return p = 1/(1 + exp(-d)), d scaled by 8 bits, p scaled by 12 bits
 77 |    inline int Global::squash(int d)
 78 |    {
 79 |        if (d >= 2048)
 80 |            return 4095;
 81 | 
 82 |        return (d <= -2048) ? 0 : SQUASH[d + 2047];
 83 |    }
 84 | 
 85 |    inline int Global::stretch(int d)
 86 |    {
 87 |        return STRETCH[d];
 88 |    }
 89 | 
 90 |    // x cannot be 0
 91 |    inline int Global::_log2(uint32 x)
 92 |    {
 93 |        #if defined(_MSC_VER)
 94 |            unsigned long res;
 95 |            _BitScanReverse(&res, x);
 96 |            return int(res);
 97 |        #elif defined(__GNUG__)
 98 |            return 31 ^ __builtin_clz(x);
 99 |        #elif defined(__clang__)
100 |            return 31 ^ __builtin_clz(x);
101 |        #else
102 |            int res = 0;
103 | 
104 |            if (x >= 1 << 16) {
105 |               x >>= 16;
106 |               res = 16;
107 |            }
108 | 
109 |            if (x >= 1 << 8) {
110 |               x >>= 8;
111 |               res += 8;
112 |            }
113 | 
114 |            return res + Global::LOG2[x - 1];
115 |        #endif
116 |    }
117 | 
118 | 
119 |    // x cannot be 0
120 |    inline int Global::_log2(uint64 x)
121 |    {
122 |        #if defined(_MSC_VER) && defined(_M_AMD64)
123 |            unsigned long res;
124 |            _BitScanReverse64(&res, x);
125 |            return int(res);
126 |        #elif defined(__GNUG__)
127 |            return 63 ^ __builtin_clzll(x);
128 |        #elif defined(__clang__)
129 |            return 63 ^ __builtin_clzll(x);
130 |        #else
131 |            int res = 0;
132 | 
133 |            if (x >= uint64(1) << 32) {
134 |               x >>= 32;
135 |               res = 32;
136 |            }
137 | 
138 |            if (x >= uint64(1) << 16) {
139 |               x >>= 16;
140 |               res += 16;
141 |            }
142 | 
143 |            if (x >= uint64(1) << 8) {
144 |               x >>= 8;
145 |               res += 8;
146 |            }
147 | 
148 |            return res + Global::LOG2[x - 1];
149 |        #endif
150 |    }
151 | 
152 | 
153 |    // x cannot be 0
154 |    inline int Global::trailingZeros(uint32 x)
155 |    {
156 |        #if defined(_MSC_VER)
157 |            unsigned long res;
158 |            _BitScanForward(&res, x);
159 |            return int(res);
160 |        #elif defined(__GNUG__)
161 |            return __builtin_ctz(x);
162 |        #elif defined(__clang__)
163 |            return __builtin_ctz(x);
164 |        #else
165 |            return _log2((x & (~x + 1)) - 1);
166 |        #endif
167 |    }
168 | 
169 | 
170 |    // x cannot be 0
171 |    inline int Global::trailingZeros(uint64 x)
172 |    {
173 |        #if defined(_MSC_VER) && defined(_M_AMD64)
174 |            unsigned long res;
175 |            _BitScanForward64(&res, x);
176 |            return int(res);
177 |        #elif defined(__GNUG__)
178 |            return __builtin_ctzll(x);
179 |        #elif defined(__clang__)
180 |            return __builtin_ctzll(x);
181 |        #else
182 |            return _log2((x & (~x + 1)) - 1);
183 |        #endif
184 |    }
185 | }
186 | #endif
187 | 
188 | 


--------------------------------------------------------------------------------
/src/InputBitStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _InputBitStream_
18 | #define _InputBitStream_
19 | 
20 | #include "types.hpp"
21 | 
22 | namespace kanzi
23 | {
24 | 
25 |    class InputBitStream
26 |    {
27 |    public:
28 |        // Returns 1 or 0
29 |        virtual int readBit() = 0;
30 | 
31 |        // Length is the number of bits in [1..64]. Return the bits read as a long
32 |        // Throws if the stream is closed.
33 |        virtual uint64 readBits(uint length) = 0;
34 | 
35 |        // Read bits and put them in the byte array. Length is the number of bits
36 |        // Return the number of bits read.
37 |        // Throws if the stream is closed.
38 |        virtual uint readBits(byte bits[], uint length) = 0;
39 | 
40 |        virtual void close() = 0;
41 | 
42 |        // Number of bits read
43 |        virtual uint64 read() const = 0;
44 | 
45 |        // Return false when the bitstream is closed or the End-Of-Stream has been reached
46 |        virtual bool hasMoreToRead() = 0;
47 | 
48 |        InputBitStream(){}
49 | 
50 |        virtual ~InputBitStream(){}
51 |    };
52 | 
53 | }
54 | #endif
55 | 
56 | 


--------------------------------------------------------------------------------
/src/InputStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _InputStream_
18 | #define _InputStream_
19 | 
20 | #include <istream>
21 | 
22 | namespace kanzi
23 | {
24 |    // Maps to istream
25 |    typedef std::istream InputStream;
26 | }
27 | #endif
28 | 
29 | 


--------------------------------------------------------------------------------
/src/Listener.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Listener_
18 | #define _Listener_
19 | 
20 | namespace kanzi
21 | {
22 | 
23 |    template <class T>
24 |    class Listener
25 |    {
26 |    public:
27 |        Listener(){}
28 | 
29 |        virtual void processEvent(const T& evt) = 0;
30 | 
31 |        virtual ~Listener(){}
32 |    };
33 | 
34 | }
35 | #endif
36 | 
37 | 


--------------------------------------------------------------------------------
/src/OutputBitStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _OutputBitStream_
18 | #define _OutputBitStream_
19 | 
20 | #include "types.hpp"
21 | 
22 | namespace kanzi
23 | {
24 | 
25 |    class OutputBitStream
26 |    {
27 |    public:
28 |        // Write the least significant bit of the input integer
29 |        // Throws if the stream is closed.
30 |        virtual void writeBit(int bit) = 0;
31 | 
32 |        // Length is the number of bits in [1..64]. Return the number of bits written.
33 |        // Throws if the stream is closed.
34 |        virtual uint writeBits(uint64 bits, uint length) = 0;
35 | 
36 |        // Write bits ouf of the byte array. Length is the number of bits.
37 |        // Return the number of bits written.
38 |        // Throws if the stream is closed.
39 |        virtual uint writeBits(const byte bits[], uint length) = 0;
40 | 
41 |        virtual void close() = 0;
42 | 
43 |        // Number of bits written
44 |        virtual uint64 written() const = 0;
45 | 
46 |        OutputBitStream(){}
47 | 
48 |        virtual ~OutputBitStream(){}
49 |    };
50 | 
51 | }
52 | #endif
53 | 
54 | 


--------------------------------------------------------------------------------
/src/OutputStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _OutputStream_
18 | #define _OutputStream_
19 | 
20 | #include <ostream>
21 | 
22 | namespace kanzi
23 | {
24 |    // Maps to ostream
25 |    typedef std::ostream OutputStream;
26 | }
27 | #endif
28 | 
29 | 


--------------------------------------------------------------------------------
/src/Predictor.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Predictor_
18 | #define _Predictor_
19 | 
20 | namespace kanzi
21 | {
22 | 
23 |    // Predictor predicts the probability of the next bit being 1.
24 |    class Predictor
25 |    {
26 |    public:
27 |        Predictor(){}
28 | 
29 |        // Updates the internal probability model based on the observed bit
30 |        virtual void update(int bit) = 0;
31 | 
32 |        // Returns the value representing the probability of the next bit being 1
33 |        // in the [0..4095] range.
34 |        // E.G. 410 represents roughly a probability of 10% for 1
35 |        virtual int get() = 0;
36 | 
37 |        virtual ~Predictor(){}
38 |    };
39 | 
40 | }
41 | #endif
42 | 
43 | 


--------------------------------------------------------------------------------
/src/Seekable.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Seekable_
18 | #define _Seekable_
19 | 
20 | #include "types.hpp"
21 | 
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    class Seekable
27 |    {
28 |    public:
29 |        Seekable(){}
30 | 
31 |        // return position in bits
32 |        virtual int64 tell() = 0;
33 | 
34 |        // position in bits
35 |        // return success or failure
36 |        virtual bool seek(int64 position) = 0;
37 | 
38 |        virtual ~Seekable(){}
39 |    };
40 | 
41 | }
42 | #endif
43 | 
44 | 


--------------------------------------------------------------------------------
/src/SliceArray.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _SliceArray_
18 | #define _SliceArray_
19 | 
20 | namespace kanzi
21 | {
22 | 
23 |    template <class T>
24 |    class SliceArray
25 |    {
26 |    public:
27 |       T* _array;
28 |       int _length; // buffer length (a.k.a capacity)
29 |       int _index;
30 | 
31 |       SliceArray(T* arr, int len, int index = 0) : _array(arr), _length(len), _index(index) {}
32 | 
33 | #if __cplusplus < 201103L
34 |       SliceArray(const SliceArray& sa) { _array = sa._array; _length = sa._length; _index = sa._index; }
35 | 
36 |       SliceArray& operator=(const SliceArray& sa);
37 | 
38 |       ~SliceArray(){} // does not deallocate buffer memory
39 | #else
40 |       SliceArray(SliceArray&& sa) noexcept = default;
41 | 
42 |       SliceArray& operator=(SliceArray&& sa) noexcept = default;
43 | 
44 |       ~SliceArray() = default;
45 | #endif
46 | 
47 |       // Utility methods
48 |       static bool isValid(const SliceArray& sa);
49 |    };
50 | 
51 |    template <class T>
52 |    inline bool SliceArray<T>::isValid(const SliceArray& sa) {
53 |        return ((sa._array != nullptr) && (sa._index >= 0) && (sa._length >= 0) && (sa._index <= sa._length));
54 |    }
55 | 
56 | #if __cplusplus < 201103L
57 |    template <class T>
58 |    inline SliceArray<T>& SliceArray<T>::operator=(const SliceArray& sa) {
59 |       _array = sa._array;
60 |       _length = sa._length;
61 |       _index = sa._index;
62 |       return *this;
63 |    }
64 | #endif
65 | 
66 | }
67 | #endif
68 | 
69 | 


--------------------------------------------------------------------------------
/src/Transform.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Transform_
18 | #define _Transform_
19 | 
20 | #include "SliceArray.hpp"
21 | 
22 | namespace kanzi
23 | {
24 | 
25 |    // Transform is a class used to transform an input byte array and write
26 |    // the result to an output byte array. The result may have a different size.
27 |    // The transform must be stateless to ensure that the compression results
28 |    // are the same regardless of the number of jobs (ie no information is retained
29 |    // between to invocations of forward or inverse).
30 |    template <class T>
31 |    class Transform
32 |    {
33 |    public:
34 |        Transform(){}
35 | 
36 |        virtual bool forward(SliceArray<T>& src, SliceArray<T>& dst, int length) = 0;
37 | 
38 |        virtual bool inverse(SliceArray<T>& src, SliceArray<T>& dst, int length) = 0;
39 | 
40 |        virtual int getMaxEncodedLength(int srcLen) const = 0;
41 | 
42 |        virtual ~Transform(){}
43 |    };
44 | 
45 | }
46 | #endif
47 | 
48 | 


--------------------------------------------------------------------------------
/src/api/Compressor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _Compressor_
 18 | #define _Compressor_
 19 | 
 20 | #ifdef _WIN32
 21 |    #define CDECL __cdecl
 22 | #else
 23 |    #define CDECL
 24 | #endif
 25 | 
 26 | #include <stdio.h>
 27 | 
 28 | 
 29 | #ifdef __cplusplus
 30 |    extern "C" {
 31 | #endif
 32 | 
 33 |    typedef unsigned char BYTE;
 34 | 
 35 |    /**
 36 |     *  Compression parameters
 37 |     */
 38 |    struct cData {
 39 |        char transform[64];          /* name of transforms [None|PACK|BWT|BWTS|LZ|LZX|LZP|ROLZ|ROLZX]
 40 |                                                           [RLT|ZRLT|MTFT|RANK|SRT|TEXT|MM|EXE|UTF|DNA] */
 41 |        char entropy[16];            /* name of entropy codec [None|Huffman|ANS0|ANS1|Range|FPAQ|TPAQ|TPAQX|CM] */
 42 |        unsigned int blockSize;      /* size of block in bytes */
 43 |        unsigned int jobs;           /* max number of concurrent tasks */
 44 |        int checksum;                /* 0, 32 or 64 to indicate size of block checksum */
 45 |        int headerless;              /* bool to indicate if the bitstream has a header (usually set to 0) */
 46 |    };
 47 | 
 48 |    /**
 49 |     *  Compression context: encapsulates compressor state (opaque: could change in future versions)
 50 |     */
 51 |    struct cContext {
 52 |        void* pCos;
 53 |        unsigned int blockSize;
 54 |        void* fos;
 55 |    };
 56 | 
 57 | 
 58 |     /**
 59 |     *  Initialize the compressor internal states.
 60 |     *
 61 |     *  @param cParam [IN] - the compression parameters
 62 |     *  @param dst [IN] - the destination stream of compressed data
 63 |     *  @param ctx [IN|OUT] - pointer to the compression context created by the call
 64 |     *
 65 |     *  @return 0 in case of success
 66 |     */
 67 |    int CDECL initCompressor(struct cData* cParam, FILE* dst, struct cContext** ctx);
 68 | 
 69 |     /**
 70 |     *  Compress a block of data. The compressor must have been initialized.
 71 |     *
 72 |     *  @param ctx [IN] - the compression context created during initialization
 73 |     *  @param src [IN] - the source block of data to compress
 74 |     *  @param inSize [IN|OUT] - the size of the source block to compress.
 75 |                                 Updated to reflect the number bytes written to the destination.
 76 |     *  @param outSize [OUT] - the size of the compressed data
 77 |     *
 78 |     *  @return 0 in case of success
 79 |     */
 80 |    int CDECL compress(struct cContext* ctx, const BYTE* src, int* inSize, int* outSize);
 81 | 
 82 |    /**
 83 |     *  Dispose the compressor and cleanup memory resources.
 84 |     *
 85 |     *  @param ctx [IN] - the compression context created during initialization
 86 |     *  @param outSize [IN|OUT] - the number of bytes written to the destination
 87 |     *                            (the compressor may flush internal data)
 88 |     *
 89 |     *  @return 0 in case of success
 90 |     */
 91 |    int CDECL disposeCompressor(struct cContext* ctx, int* outSize);
 92 | 
 93 | #ifdef __cplusplus
 94 |    }
 95 | #endif
 96 | 
 97 | 
 98 | #endif
 99 | 
100 | 


--------------------------------------------------------------------------------
/src/api/Decompressor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _Decompressor_
 18 | #define _Decompressor_
 19 | 
 20 | #ifdef _WIN32
 21 |    #define CDECL __cdecl
 22 | #else
 23 |    #define CDECL
 24 | #endif
 25 | 
 26 | #include <stdio.h>
 27 | 
 28 | #ifdef __cplusplus
 29 |    extern "C" {
 30 | #endif
 31 | 
 32 |    typedef unsigned char BYTE;
 33 | 
 34 |    /**
 35 |     *  Decompression parameters
 36 |     */
 37 |    struct dData {
 38 |        // Required fields
 39 |        unsigned int bufferSize;      /* read buffer size (at least block size) */
 40 |        unsigned int jobs;            /* max number of concurrent tasks */
 41 |        int headerless;               /* bool to indicate if the bitstream has a header (usually set to 0) */
 42 | 
 43 |        // Optional fields: only required if headerless is true
 44 |        char transform[64];           /* name of transforms [None|PACK|BWT|BWTS|LZ|LZX|LZP|ROLZ|ROLZX]
 45 |                                                        [RLT|ZRLT|MTFT|RANK|SRT|TEXT|MM|EXE|UTF|DNA] */
 46 |        char entropy[16];             /* name of entropy codec [None|Huffman|ANS0|ANS1|Range|FPAQ|TPAQ|TPAQX|CM] */
 47 |        unsigned int blockSize;       /* size of block in bytes */
 48 |        unsigned long originalSize;   /* size of original file in bytes */
 49 |        int checksum;                 /* 0, 32 or 64 to indicate size of block checksum */
 50 |        int bsVersion;                /* version of the bitstream */
 51 |    };
 52 | 
 53 |    /**
 54 |     *  Decompression context: encapsulates decompressor state (opaque: could change in future versions)
 55 |     */
 56 |    struct dContext {
 57 |        void* pCis;
 58 |        unsigned int bufferSize;
 59 |        void* fis;
 60 |    };
 61 | 
 62 |    /**
 63 |     *  Initialize the decompressor internal states.
 64 |     *
 65 |     *  @param dParam [IN] - the decompression parameters
 66 |     *  @param src [IN] - the source stream of compressed data
 67 |     *  @param ctx [IN|OUT] - a pointer to the decompression context created by the call
 68 |     *
 69 |     *  @return 0 in case of success
 70 |     */
 71 |    int CDECL initDecompressor(struct dData* dParam, FILE* src, struct dContext** ctx);
 72 | 
 73 |    /**
 74 |     *  Decompress a block of data. The decompressor must have been initialized.
 75 |     *
 76 |     *  @param ctx [IN] - the decompression context created during initialization
 77 |     *  @param dst [IN] - the destination block of decompressed data
 78 |     *  @param inSize [OUT] - the number of bytes read from source.
 79 |     *  @param outSize [IN|OUT] - the size of the block to decompress.
 80 |     *                            Updated to reflect the number of decompressed bytes
 81 |     *
 82 |     *  @return 0 in case of success
 83 |     */
 84 |    int CDECL decompress(struct dContext* ctx, BYTE* dst, int* inSize, int* outSize);
 85 | 
 86 |    /**
 87 |     *  Dispose the decompressor and cleanup memory resources.
 88 |     *
 89 |     *  @param ctx [IN] - the compression context created during initialization
 90 |     *
 91 |     *  @return 0 in case of success
 92 |     */
 93 |    int CDECL disposeDecompressor(struct dContext* ctx);
 94 | 
 95 | #ifdef __cplusplus
 96 |    }
 97 | #endif
 98 | 
 99 | 
100 | #endif
101 | 
102 | 


--------------------------------------------------------------------------------
/src/app/BlockCompressor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _BlockCompressor_
 18 | #define _BlockCompressor_
 19 | 
 20 | #include <map>
 21 | #include <vector>
 22 | #include "../InputStream.hpp"
 23 | #include "../io/CompressedOutputStream.hpp"
 24 | 
 25 | namespace kanzi {
 26 | 
 27 |    class FileCompressResult {
 28 |    public:
 29 |        int _code;
 30 |        uint64 _read;
 31 |        uint64 _written;
 32 |        std::string _errMsg;
 33 | 
 34 |        FileCompressResult()
 35 |           : _code(0)
 36 |           , _read(0)
 37 |           , _written(0)
 38 |           , _errMsg()
 39 |        {
 40 |        }
 41 | 
 42 |        FileCompressResult(int code, uint64 read, uint64 written, const std::string& errMsg)
 43 |            : _code(code)
 44 |            , _read(read)
 45 |            , _written(written)
 46 |            , _errMsg(errMsg)
 47 |        {
 48 |        }
 49 | 
 50 | #if __cplusplus < 201103L
 51 |        FileCompressResult(const FileCompressResult& fcr)
 52 |            : _code(fcr._code)
 53 |            , _read(fcr._read)
 54 |            , _written(fcr._written)
 55 |            , _errMsg(fcr._errMsg)
 56 |        {
 57 |        }
 58 | 
 59 |        FileCompressResult& operator=(const FileCompressResult& fcr)
 60 |        {
 61 |            _errMsg = fcr._errMsg;
 62 |            _code = fcr._code;
 63 |            _read = fcr._read;
 64 |            _written = fcr._written;
 65 |            return *this;
 66 |        }
 67 | 
 68 |        ~FileCompressResult() {}
 69 | #else
 70 |        FileCompressResult(const FileCompressResult& fdr) = delete;
 71 | 
 72 |        FileCompressResult& operator=(const FileCompressResult& fdr) = delete;
 73 | 
 74 |        FileCompressResult(FileCompressResult&& fdr) = default;
 75 | 
 76 |        FileCompressResult& operator=(FileCompressResult&& fdr) = default;
 77 | 
 78 |        ~FileCompressResult() = default;
 79 | #endif
 80 |    };
 81 | 
 82 | #ifdef CONCURRENCY_ENABLED
 83 |    template <class T, class R>
 84 |    class FileCompressWorker FINAL : public Task<R> {
 85 |    public:
 86 |        FileCompressWorker(BoundedConcurrentQueue<T>* queue) : _queue(queue) { }
 87 | 
 88 |        ~FileCompressWorker() {}
 89 | 
 90 |        R run();
 91 | 
 92 |    private:
 93 |        BoundedConcurrentQueue<T>* _queue;
 94 |    };
 95 | #endif
 96 | 
 97 |    template <class T>
 98 |    class FileCompressTask FINAL : public Task<T> {
 99 |    public:
100 |        static const int DEFAULT_BUFFER_SIZE = 65536;
101 | 
102 |        FileCompressTask(const Context& ctx, std::vector<Listener<Event>*>& listeners);
103 | 
104 |        ~FileCompressTask();
105 | 
106 |        T run();
107 | 
108 |        void dispose();
109 | 
110 |    private:
111 |        Context _ctx;
112 |        InputStream* _is;
113 |        CompressedOutputStream* _cos;
114 |        std::vector<Listener<Event>*> _listeners;
115 |    };
116 | 
117 | 
118 |    typedef FileCompressTask<FileCompressResult> FCTask;
119 | 
120 |    class BlockCompressor {
121 |        friend class FileCompressTask<FileCompressResult>;
122 | 
123 |    public:
124 |        BlockCompressor(const Context& ctx);
125 | 
126 |        ~BlockCompressor();
127 | 
128 |        int compress(uint64& written);
129 | 
130 |        bool addListener(Listener<Event>& bl);
131 | 
132 |        bool removeListener(Listener<Event>& bl);
133 | 
134 |        void dispose() const {};
135 | 
136 |    private:
137 |        static const int DEFAULT_BLOCK_SIZE;
138 |        static const int MIN_BLOCK_SIZE;
139 |        static const int MAX_BLOCK_SIZE;
140 | 
141 |        int _verbosity;
142 |        int _checksum;
143 |        bool _overwrite;
144 |        bool _skipBlocks;
145 |        std::string _inputName;
146 |        std::string _outputName;
147 |        std::string _codec;
148 |        std::string _transform;
149 |        int _blockSize;
150 |        bool _autoBlockSize; // derive block size from input size and jobs
151 |        int _jobs;
152 |        std::vector<Listener<Event>*> _listeners;
153 |        bool _reorderFiles;
154 |        bool _noDotFiles;
155 |        bool _noLinks;
156 |        Context _ctx;
157 | 
158 |        static void notifyListeners(std::vector<Listener<Event>*>& listeners, const Event& evt);
159 | 
160 |        static void getTransformAndCodec(int level, std::string tranformAndCodec[2]);
161 |    };
162 | }
163 | #endif
164 | 
165 | 


--------------------------------------------------------------------------------
/src/app/BlockDecompressor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _BlockDecompressor_
 18 | #define _BlockDecompressor_
 19 | 
 20 | #include <map>
 21 | #include <vector>
 22 | #include "../OutputStream.hpp"
 23 | #include "../io/CompressedInputStream.hpp"
 24 | 
 25 | namespace kanzi {
 26 |    class FileDecompressResult {
 27 |    public:
 28 |        int _code;
 29 |        uint64 _read;
 30 |        std::string _errMsg;
 31 | 
 32 |        FileDecompressResult()
 33 |           : _code(0)
 34 |           , _read(0)
 35 |           , _errMsg()
 36 |        {
 37 |        }
 38 | 
 39 |        FileDecompressResult(int code, uint64 read, const std::string& errMsg)
 40 |            : _code(code)
 41 |            , _read(read)
 42 |            , _errMsg(errMsg)
 43 |        {
 44 |        }
 45 | 
 46 | #if __cplusplus < 201103L
 47 |        FileDecompressResult(const FileDecompressResult& fdr)
 48 |            : _code(fdr._code)
 49 |            , _read(fdr._read)
 50 |            , _errMsg(fdr._errMsg)
 51 |        {
 52 |        }
 53 | 
 54 |        FileDecompressResult& operator=(const FileDecompressResult& fdr)
 55 |        {
 56 |            _errMsg = fdr._errMsg;
 57 |            _code = fdr._code;
 58 |            _read = fdr._read;
 59 |            return *this;
 60 |        }
 61 | 
 62 |        ~FileDecompressResult() {}
 63 | #else
 64 |        FileDecompressResult(const FileDecompressResult& fcr) = delete;
 65 | 
 66 |        FileDecompressResult& operator=(const FileDecompressResult& fcr) = delete;
 67 | 
 68 |        FileDecompressResult(FileDecompressResult&& fcr) = default;
 69 | 
 70 |        FileDecompressResult& operator=(FileDecompressResult&& fcr) = default;
 71 | 
 72 |        ~FileDecompressResult() = default;
 73 | #endif
 74 |    };
 75 | 
 76 | #ifdef CONCURRENCY_ENABLED
 77 |    template <class T, class R>
 78 |    class FileDecompressWorker FINAL : public Task<R> {
 79 |    public:
 80 |        FileDecompressWorker(BoundedConcurrentQueue<T>* queue) : _queue(queue) { }
 81 | 
 82 |        ~FileDecompressWorker() {}
 83 | 
 84 |        R run();
 85 | 
 86 |    private:
 87 |        BoundedConcurrentQueue<T>* _queue;
 88 |    };
 89 | #endif
 90 | 
 91 |    template <class T>
 92 |    class FileDecompressTask FINAL : public Task<T> {
 93 |    public:
 94 |        FileDecompressTask(const Context& ctx, std::vector<Listener<Event>*>& listeners);
 95 | 
 96 |        ~FileDecompressTask();
 97 | 
 98 |        T run();
 99 | 
100 |        void dispose();
101 | 
102 |    private:
103 |        Context _ctx;
104 |        OutputStream* _os;
105 |        CompressedInputStream* _cis;
106 |        std::vector<Listener<Event>*> _listeners;
107 |    };
108 | 
109 |    typedef FileDecompressTask<FileDecompressResult> FDTask;
110 | 
111 |    class BlockDecompressor {
112 |        friend class FileDecompressTask<FileDecompressResult>;
113 | 
114 |    public:
115 |        BlockDecompressor(const Context& ctx);
116 | 
117 |        ~BlockDecompressor();
118 | 
119 |        int decompress(uint64& read);
120 | 
121 |        bool addListener(Listener<Event>& bl);
122 | 
123 |        bool removeListener(Listener<Event>& bl);
124 | 
125 |        void dispose() const {};
126 | 
127 |    private:
128 |        int _verbosity;
129 |        bool _overwrite;
130 |        std::string _inputName;
131 |        std::string _outputName;
132 |        int _blockSize;
133 |        int _jobs;
134 |        std::vector<Listener<Event>*> _listeners;
135 |        bool _noDotFiles;
136 |        bool _noLinks;
137 |        Context _ctx;
138 | 
139 |        static void notifyListeners(std::vector<Listener<Event>*>& listeners, const Event& evt);
140 |    };
141 | }
142 | #endif
143 | 
144 | 


--------------------------------------------------------------------------------
/src/app/InfoPrinter.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <iomanip>
 17 | #include <ios>
 18 | #include <sstream>
 19 | #include "InfoPrinter.hpp"
 20 | 
 21 | using namespace kanzi;
 22 | using namespace std;
 23 | 
 24 | InfoPrinter::InfoPrinter(int infoLevel, InfoPrinter::Type type, OutputStream& os)
 25 |     : _os(os)
 26 |     , _type(type)
 27 |     , _level(infoLevel)
 28 | {
 29 |     if (type == InfoPrinter::ENCODING) {
 30 |         _thresholds[0] = Event::COMPRESSION_START;
 31 |         _thresholds[1] = Event::BEFORE_TRANSFORM;
 32 |         _thresholds[2] = Event::AFTER_TRANSFORM;
 33 |         _thresholds[3] = Event::BEFORE_ENTROPY;
 34 |         _thresholds[4] = Event::AFTER_ENTROPY;
 35 |         _thresholds[5] = Event::COMPRESSION_END;
 36 |     }
 37 |     else {
 38 |         _thresholds[0] = Event::DECOMPRESSION_START;
 39 |         _thresholds[1] = Event::BEFORE_ENTROPY;
 40 |         _thresholds[2] = Event::AFTER_ENTROPY;
 41 |         _thresholds[3] = Event::BEFORE_TRANSFORM;
 42 |         _thresholds[4] = Event::AFTER_TRANSFORM;
 43 |         _thresholds[5] = Event::DECOMPRESSION_END;
 44 |     }
 45 | 	
 46 |     for (int i = 0; i < 1024; i++)
 47 |         _map[i] = nullptr;
 48 | }
 49 | 
 50 | void InfoPrinter::processEvent(const Event& evt)
 51 | {
 52 |     int currentBlockId = evt.getId();
 53 | 
 54 |     if (evt.getType() == _thresholds[1]) {
 55 |         // Register initial block size
 56 |         BlockInfo* bi = new BlockInfo();
 57 |         _clock12.start();
 58 | 
 59 |         bi->_stage0Size = evt.getSize();
 60 |         _map[hash(currentBlockId)] = bi;
 61 | 
 62 |         if (_level >= 5) {
 63 |             _os << evt.toString() << endl;
 64 |         }
 65 |     }
 66 |     else if (evt.getType() == _thresholds[2]) {
 67 |         BlockInfo* bi = _map[hash(currentBlockId)];
 68 | 
 69 |         if (bi == nullptr)
 70 |             return;
 71 | 
 72 |         _clock12.stop();
 73 |         _clock23.start();
 74 | 
 75 |         if (_level >= 5) {
 76 |             stringstream ss;
 77 |             ss << evt.toString() << " [" << int64(_clock12.elapsed()) << " ms]";
 78 |             _os << ss.str() << endl;
 79 |         }
 80 |     }
 81 |     else if (evt.getType() == _thresholds[3]) {
 82 |         BlockInfo* bi = _map[hash(currentBlockId)];
 83 | 
 84 |         if (bi == nullptr)
 85 |             return;
 86 | 
 87 |         _clock23.stop();
 88 |         _clock34.start();
 89 |         bi->_stage1Size = evt.getSize();
 90 | 
 91 |         if (_level >= 5) {
 92 |             _os << evt.toString() << endl;
 93 |         }
 94 |     }
 95 |     else if (evt.getType() == _thresholds[4]) {
 96 |         BlockInfo* bi = _map[hash(currentBlockId)];
 97 | 
 98 |         if (bi == nullptr)
 99 |             return;
100 | 
101 |         if (_level < 3) {
102 |             delete bi;
103 |             _map[hash(currentBlockId)] = nullptr;
104 |             return;
105 |         }
106 | 
107 |         int64 stage2Size = evt.getSize();
108 |         _clock34.stop();
109 |         stringstream ss;
110 | 
111 |         if (_level >= 5) {
112 |             ss << evt.toString() << endl;
113 |         }
114 | 
115 |         // Display block info
116 |         if (_level >= 4) {
117 |             ss << "Block " << currentBlockId << ": " << bi->_stage0Size << " => ";
118 |             ss << bi->_stage1Size << " [" << int64(_clock12.elapsed()) << " ms] => " << stage2Size;
119 |             ss << " [" << int64(_clock34.elapsed()) << " ms]";
120 | 
121 |             // Add compression ratio for encoding
122 |             if ((_type == InfoPrinter::ENCODING) && (bi->_stage0Size != 0)) {
123 |                 ss << " (" << uint(double(stage2Size) * double(100) / double(bi->_stage0Size));
124 |                 ss << "%)";
125 |             }
126 | 
127 |             // Optionally add hash
128 |             if (evt.getHash() != 0) {
129 |                 ss << std::uppercase << std::hex << " [" << evt.getHash() << "]";
130 |             }
131 | 
132 |             _os << ss.str() << endl;
133 |         }
134 | 
135 |         delete bi;
136 |         _map[hash(currentBlockId)] = nullptr;
137 |     }
138 |     else if ((evt.getType() == Event::AFTER_HEADER_DECODING) && (_level >= 3)) {
139 |         _os << evt.toString() << endl;
140 |     }
141 |     else if (_level >= 5) {
142 |         _os << evt.toString() << endl;
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/src/app/InfoPrinter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _InfoPrinter_
18 | #define _InfoPrinter_
19 | 
20 | #include "../Event.hpp"
21 | #include "../Listener.hpp"
22 | #include "../OutputStream.hpp"
23 | #include "../util/Clock.hpp"
24 | 
25 | 
26 | namespace kanzi
27 | {
28 | 
29 |    class BlockInfo {
30 |    public:
31 |        int64 _stage0Size;
32 |        int64 _stage1Size;
33 |    };
34 | 
35 |    // An implementation of Listener to display block information (verbose option
36 |    // of the BlockCompressor/BlockDecompressor)
37 |    class InfoPrinter : public Listener<Event> {
38 |    public:
39 |        enum Type {
40 |            ENCODING,
41 |            DECODING
42 |        };
43 | 
44 |        InfoPrinter(int infoLevel, InfoPrinter::Type type, OutputStream& os);
45 | 
46 |        ~InfoPrinter() {
47 |           for (int i = 0; i < 1024; i++) {
48 |              if (_map[i] != nullptr)
49 |                 delete _map[i];
50 |           }
51 |        }
52 | 
53 |        void processEvent(const Event& evt);
54 | 
55 |    private:
56 |        OutputStream& _os;
57 |        BlockInfo* _map[1024];
58 |        Event::Type _thresholds[6];
59 |        InfoPrinter::Type _type;
60 |        int _level;
61 |        Clock _clock12;
62 |        Clock _clock23;
63 |        Clock _clock34;
64 | 	   
65 |        static uint hash(uint id) { return (id * 0x1E35A7BD) & 0x03FF; }
66 |    };
67 | }
68 | #endif
69 | 
70 | 


--------------------------------------------------------------------------------
/src/bitstream/DebugInputBitStream.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <iomanip>
 17 | #include <iostream>
 18 | #include "../bitstream/DebugInputBitStream.hpp"
 19 | 
 20 | using namespace kanzi;
 21 | using namespace std;
 22 | 
 23 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs) : _delegate(ibs), _out(cout), _width(80)
 24 | {
 25 |     _idx = 0;
 26 |     _mark = false;
 27 |     _show = false;
 28 |     _hexa = false;
 29 |     _current = byte(0);
 30 | }
 31 | 
 32 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs, ostream& os) : _delegate(ibs), _out(os), _width(80)
 33 | {
 34 |     _idx = 0;
 35 |     _mark = false;
 36 |     _show = false;
 37 |     _hexa = false;
 38 |     _current = byte(0);
 39 | }
 40 | 
 41 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs, ostream& os, int width) : _delegate(ibs), _out(os)
 42 | {
 43 |     if ((width != -1) && (width < 8))
 44 |         width = 8;
 45 | 
 46 |     if (width != -1)
 47 |         width &= 0xFFFFFFF8;
 48 | 
 49 |     _width = width;
 50 |     _idx = 0;
 51 |     _mark = false;
 52 |     _show = false;
 53 |     _hexa = false;
 54 |     _current = byte(0);
 55 | }
 56 | 
 57 | DebugInputBitStream::~DebugInputBitStream()
 58 | {
 59 |     _close();
 60 | }
 61 | 
 62 | // Returns 1 or 0
 63 | int DebugInputBitStream::readBit()
 64 | {
 65 |     int res = _delegate.readBit();
 66 |     _current <<= 1;
 67 |     _current |= byte(res);
 68 |     _out << ((res & 1) == 1 ? "1" : "0");
 69 |     _idx++;
 70 | 
 71 |     if (_mark == true)
 72 |         _out << "r";
 73 | 
 74 |     if ((_width != -1) && ((_idx - 1) % _width == _width - 1)) {
 75 |         if (showByte())
 76 |             printByte(_current);
 77 | 
 78 |         _out << endl;
 79 |         _idx = 0;
 80 |     }
 81 |     else if ((_idx & 7) == 0) {
 82 |         if (showByte())
 83 |             printByte(_current);
 84 |         else
 85 |             _out << " ";
 86 |     }
 87 | 
 88 |     return res;
 89 | }
 90 | 
 91 | uint64 DebugInputBitStream::readBits(uint count)
 92 | {
 93 |     uint64 res = _delegate.readBits(count);
 94 | 
 95 |     for (uint i = 1; i <= count; i++) {
 96 |         int bit = (res >> (count - i)) & 1;
 97 |         _idx++;
 98 |         _current <<= 1;
 99 |         _current |= byte(bit);
100 |         _out << ((bit == 1) ? "1" : "0");
101 | 
102 |         if ((_mark == true) && (i == count))
103 |             _out << "r";
104 | 
105 |         if ((_width != -1) && (_idx % _width == 0)) {
106 |             if (showByte())
107 |                 printByte(_current);
108 | 
109 |             _out << endl;
110 |             _idx = 0;
111 |         }
112 |         else if ((_idx & 7) == 0) {
113 |             if (showByte())
114 |                 printByte(_current);
115 |             else
116 |                 _out << " ";
117 |         }
118 |     }
119 | 
120 |     return res;
121 | }
122 | 
123 | uint DebugInputBitStream::readBits(byte bits[], uint count)
124 | {
125 |     count = _delegate.readBits(bits, count);
126 | 
127 |     for (uint i = 0; i < (count >> 3); i++) {
128 |         for (int j = 7; j >= 0; j--) {
129 |             int bit = int(bits[i] >> j) & 1;
130 |             _idx++;
131 |             _current <<= 1;
132 |             _current |= byte(bit);
133 |             _out << ((bit == 1) ? "1" : "0");
134 | 
135 |             if ((_mark == true) && (j == int(count)))
136 |                 _out << "r";
137 | 
138 |             if ((_width != -1) && (_idx % _width == 0)) {
139 |                 if (showByte())
140 |                     printByte(_current);
141 | 
142 |                 _out << endl;
143 |                 _idx = 0;
144 |             }
145 |             else if ((_idx & 7) == 0) {
146 |                 if (showByte())
147 |                     printByte(_current);
148 |                 else
149 |                     _out << " ";
150 |             }
151 |         }
152 |     }
153 | 
154 |     return count;
155 | }
156 | 
157 | void DebugInputBitStream::printByte(byte b)
158 | {
159 |     int val = int(b);
160 | 
161 |     if (_hexa == true) {
162 |         _out << hex << " [0x";
163 |         _out << ((val < 16) ? "0" : "");
164 |         _out << val << "] ";
165 |         _out << dec;
166 |         return;
167 |     }
168 | 
169 |     _out << " [";
170 | 
171 |     if (val < 10)
172 |         _out << "00";
173 |     else if (val < 100)
174 |         _out << "0";
175 | 
176 |     _out << val << "] ";
177 | }
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/src/bitstream/DebugInputBitStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _DebugInputBitStream_
18 | #define _DebugInputBitStream_
19 | 
20 | #include "../InputBitStream.hpp"
21 | #include "../OutputStream.hpp"
22 | 
23 | 
24 | namespace kanzi {
25 | 
26 |    class DebugInputBitStream FINAL : public InputBitStream
27 |    {
28 |    private:
29 |        InputBitStream& _delegate;
30 |        OutputStream& _out;
31 |        int _width;
32 |        int _idx;
33 |        bool _mark;
34 |        bool _hexa;
35 |        bool _show;
36 |        byte _current;
37 | 
38 |        void printByte(byte val);
39 | 
40 |        void _close() { _delegate.close(); }
41 | 
42 |    public:
43 |        DebugInputBitStream(InputBitStream& ibs);
44 | 
45 |        DebugInputBitStream(InputBitStream& ibs, OutputStream& os);
46 | 
47 |        DebugInputBitStream(InputBitStream& ibs, OutputStream& os, int width);
48 | 
49 |        ~DebugInputBitStream();
50 | 
51 |        // Returns 1 or 0
52 |        int readBit();
53 | 
54 |        uint64 readBits(uint length);
55 | 
56 |        uint readBits(byte bits[], uint length);
57 | 
58 |        // Number of bits read
59 |        uint64 read() const { return _delegate.read(); }
60 | 
61 |        // Return false when the bitstream is closed or the End-Of-Stream has been reached
62 |        bool hasMoreToRead() { return _delegate.hasMoreToRead(); }
63 | 
64 |        void close() { _close(); }
65 | 
66 |        void showByte(bool show) { _show = show; }
67 | 
68 |        void setHexa(bool hexa) { _hexa = hexa; }
69 | 
70 |        bool hexa() const { return _hexa; }
71 | 
72 |        bool showByte() const { return _show; }
73 | 
74 |        void setMark(bool mark) { _mark = mark; }
75 | 
76 |        bool mark() const { return _mark; }
77 |    };
78 | }
79 | #endif
80 | 
81 | 


--------------------------------------------------------------------------------
/src/bitstream/DebugOutputBitStream.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <iostream>
 17 | #include "DebugOutputBitStream.hpp"
 18 | 
 19 | using namespace kanzi;
 20 | using namespace std;
 21 | 
 22 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs) : _delegate(obs), _out(cout), _width(80)
 23 | {
 24 |     _mark = false;
 25 |     _hexa = false;
 26 |     _show = false;
 27 |     _current = byte(0);
 28 |     _idx = 0;
 29 | }
 30 | 
 31 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs, OutputStream& os) : _delegate(obs), _out(os), _width(80)
 32 | {
 33 |     _mark = false;
 34 |     _hexa = false;
 35 |     _show = false;
 36 |     _current = byte(0);
 37 |     _idx = 0;
 38 | }
 39 | 
 40 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs, OutputStream& os, int width) : _delegate(obs), _out(os)
 41 | {
 42 |     if ((width != -1) && (width < 8))
 43 |         width = 8;
 44 | 
 45 |     if (width != -1)
 46 |         width &= 0xFFFFFFF8;
 47 | 
 48 |     _width = width;
 49 |     _mark = false;
 50 |     _hexa = false;
 51 |     _show = false;
 52 |     _current = byte(0);
 53 |     _idx = 0;
 54 | }
 55 | 
 56 | DebugOutputBitStream::~DebugOutputBitStream()
 57 | {
 58 |     _close();
 59 | }
 60 | 
 61 | void DebugOutputBitStream::writeBit(int bit)
 62 | {
 63 |     bit &= 1;
 64 |     _out << ((bit == 1) ? "1" : "0");
 65 |     _current <<= 1;
 66 |     _current |= byte(bit);
 67 |     _idx++;
 68 | 
 69 |     if (_mark == true)
 70 |         _out << "w";
 71 | 
 72 |     if ((_width != -1) && ((_idx - 1) % _width == _width - 1)) {
 73 |         if (showByte())
 74 |             printByte(_current);
 75 | 
 76 |         _out << endl;
 77 |         _idx = 0;
 78 |     }
 79 |     else if ((_idx & 7) == 0) {
 80 |         if (showByte())
 81 |             printByte(_current);
 82 |         else
 83 |             _out << " ";
 84 |     }
 85 | 
 86 |     _delegate.writeBit(bit);
 87 | }
 88 | 
 89 | uint DebugOutputBitStream::writeBits(uint64 bits, uint count)
 90 | {
 91 |     uint res = _delegate.writeBits(bits, count);
 92 | 
 93 |     for (uint i = 1; i <= res; i++) {
 94 |         uint64 bit = (bits >> (res - i)) & 1;
 95 |         _current <<= 1;
 96 |         _current |= byte(bit);
 97 |         _idx++;
 98 |         _out << ((bit == 1) ? "1" : "0");
 99 | 
100 |         if ((_mark == true) && (i == res))
101 |             _out << "w";
102 | 
103 |         if ((_width != -1) && (_idx % _width == 0)) {
104 |              if (showByte())
105 |                 printByte(_current);
106 | 
107 |             _out << endl;
108 |             _idx = 0;
109 |         }
110 |         else if ((_idx & 7) == 0) {
111 |             if (showByte())
112 |                 printByte(_current);
113 |             else
114 |                 _out << " ";
115 |         }
116 |     }
117 | 
118 |     return res;
119 | }
120 | 
121 | uint DebugOutputBitStream::writeBits(const byte bits[], uint count)
122 | {
123 |     int res = _delegate.writeBits(bits, count);
124 |     const int end = int(count >> 3);
125 | 
126 |     for (int i = 0; i < end; i++) {
127 |         for (int j = 7; j >=0 ; j--) {
128 |            uint64 bit = uint64(bits[i] >> j) & 1;
129 |            _current <<= 1;
130 |            _current |= byte(bit);
131 |            _idx++;
132 |            _out << ((bit == 1) ? "1" : "0");
133 | 
134 |            if ((_mark == true) && (i == res))
135 |                _out << "w";
136 | 
137 |            if ((_width != -1) && (_idx % _width == 0)) {
138 |                 if (showByte())
139 |                     printByte(_current);
140 | 
141 |                 _out << endl;
142 |                 _idx = 0;
143 |            }
144 |            else if ((_idx & 7) == 0) {
145 |                if (showByte())
146 |                    printByte(_current);
147 |                else
148 |                    _out << " ";
149 |            }
150 |        }
151 |     }
152 | 
153 |     return res;
154 | }
155 | 
156 | void DebugOutputBitStream::printByte(byte b)
157 | {
158 |     int val = int(b);
159 | 
160 |     if (_hexa == true) {
161 |         _out << hex << " [0x";
162 |         _out <<	((val < 16) ? "0" : "");
163 |         _out << val << "] ";
164 |         _out << dec;
165 |         return;
166 |     }
167 | 
168 |     _out << " [";
169 | 
170 |     if (val < 10)
171 |         _out << "00";
172 |     else if (val < 100)
173 |         _out << "0";
174 | 
175 |     _out << val << "] ";
176 | }
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/src/bitstream/DebugOutputBitStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _DebugOutputBitStream_
18 | #define _DebugOutputBitStream_
19 | 
20 | #include "../OutputBitStream.hpp"
21 | #include "../OutputStream.hpp"
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    class DebugOutputBitStream FINAL : public OutputBitStream
27 |    {
28 |    private:
29 |        OutputBitStream& _delegate;
30 |        OutputStream& _out;
31 |        int _width;
32 |        int _idx;
33 |        bool _mark;
34 |        bool _show;
35 |        bool _hexa;
36 |        byte _current;
37 | 
38 |        void printByte(byte val);
39 | 
40 |        void _close() { _delegate.close(); }
41 | 
42 |    public:
43 |        DebugOutputBitStream(OutputBitStream& obs);
44 | 
45 |        DebugOutputBitStream(OutputBitStream& obs, OutputStream& os);
46 | 
47 |        DebugOutputBitStream(OutputBitStream& obs, OutputStream& os, int width);
48 | 
49 |        ~DebugOutputBitStream();
50 | 
51 |        void writeBit(int bit);
52 | 
53 |        uint writeBits(uint64 bits, uint length);
54 | 
55 |        uint writeBits(const byte bits[], uint length);
56 | 
57 |        // Return number of bits written so far
58 |        uint64 written() const { return _delegate.written(); }
59 | 
60 |        void close() { _close(); }
61 | 
62 |        void showByte(bool show) { _show = show; }
63 | 
64 |        void setHexa(bool hexa) { _hexa = hexa; }
65 | 
66 |        bool hexa() const { return _hexa; }
67 | 
68 |        bool showByte() const { return _show; }
69 | 
70 |        void setMark(bool mark) { _mark = mark; }
71 | 
72 |        bool mark() const { return _mark; }
73 |    };
74 | 
75 | }
76 | #endif
77 | 
78 | 


--------------------------------------------------------------------------------
/src/bitstream/DefaultOutputBitStream.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _DefaultOutputBitStream_
 18 | #define _DefaultOutputBitStream_
 19 | 
 20 | #include "../BitStreamException.hpp"
 21 | #include "../OutputStream.hpp"
 22 | #include "../OutputBitStream.hpp"
 23 | #include "../Memory.hpp"
 24 | #include "../Seekable.hpp"
 25 | #include "../util/strings.hpp"
 26 | 
 27 | 
 28 | namespace kanzi
 29 | {
 30 | 
 31 | #if defined(_MSC_VER) && _MSC_VER <= 1500
 32 |    class DefaultOutputBitStream FINAL : public OutputBitStream
 33 | #else
 34 |    class DefaultOutputBitStream FINAL : public OutputBitStream, public Seekable
 35 | #endif
 36 |    {
 37 |    private:
 38 |        OutputStream& _os;
 39 |        byte* _buffer;
 40 |        bool _closed;
 41 |        uint _bufferSize;
 42 |        uint _position; // index of current byte in buffer
 43 |        uint _availBits; // bits not consumed in _current
 44 |        int64 _written;
 45 |        uint64 _current; // cached bits
 46 | 
 47 |        void pushCurrent();
 48 | 
 49 |        void flush();
 50 | 
 51 |        void _close();
 52 | 
 53 |    public:
 54 |        DefaultOutputBitStream(OutputStream& os, uint bufferSize=65536);
 55 | 
 56 |        ~DefaultOutputBitStream();
 57 | 
 58 |        void writeBit(int bit);
 59 | 
 60 |        uint writeBits(uint64 bits, uint length);
 61 | 
 62 |        uint writeBits(const byte bits[], uint length);
 63 | 
 64 |        void close() { _close(); }
 65 | 
 66 | #if !defined(_MSC_VER) || _MSC_VER > 1500
 67 |        int64 tell();
 68 | 
 69 |        bool seek(int64 pos);
 70 | #endif
 71 | 
 72 |        // Return number of bits written so far
 73 |        uint64 written() const
 74 |        {
 75 |            // Number of bits flushed + bytes written in memory + bits written in memory
 76 |            return uint64(_written + (int64(_position) << 3) + int64(64 - _availBits));
 77 |        }
 78 | 
 79 |        bool isClosed() const { return _closed; }
 80 |    };
 81 | 
 82 |    // Write least significant bit of the input integer. Trigger exception if stream is closed
 83 |    inline void DefaultOutputBitStream::writeBit(int bit)
 84 |    {
 85 |        if (_availBits <= 1) { // _availBits = 0 if stream is closed => force pushCurrent()
 86 |            _current |= (uint64(bit) & 1);
 87 |            pushCurrent();
 88 |        }
 89 |        else {
 90 |            _availBits--;
 91 |            _current |= (uint64(bit & 1) << _availBits);
 92 |        }
 93 |    }
 94 | 
 95 |    // Write 'count' (in [1..64]) bits. Trigger exception if stream is closed
 96 |    inline uint DefaultOutputBitStream::writeBits(uint64 value, uint count)
 97 |    {
 98 |        if (count > 64)
 99 |            throw BitStreamException("Invalid bit count: " + TOSTR(count) + " (must be in [1..64])");
100 |  
101 |        _current |= ((value << (64 - count)) >> (64 - _availBits));
102 | 
103 |        if (count >= _availBits) {
104 |            // Not enough spots available in 'current'
105 |            const uint remaining = count - _availBits;
106 |            pushCurrent();
107 | 
108 |            if (remaining != 0) {
109 |                _availBits -= remaining;
110 |                _current = value << _availBits;
111 |            }
112 |        }
113 |        else {
114 |            _availBits -= count;
115 |        }
116 | 
117 |        return count;
118 |    }
119 | 
120 |    // Push 64 bits of current value into buffer.
121 |    inline void DefaultOutputBitStream::pushCurrent()
122 |    {
123 |        BigEndian::writeLong64(&_buffer[_position], _current);
124 |        _availBits = 64;
125 |        _current = 0;
126 |        _position += 8;
127 | 
128 |        if (_position >= _bufferSize - 8)
129 |            flush();
130 |    }
131 | 
132 | #if !defined(_MSC_VER) || _MSC_VER > 1500
133 |    inline int64 DefaultOutputBitStream::tell()
134 |    {
135 |        if (isClosed() == true)
136 |            return -1;
137 | 
138 |        _os.clear();
139 |        const int64 res = int64(_os.tellp());
140 |        return (res < 0) ? -1 : 8 * res + (int64(_position) << 3) + int64(64 - _availBits);
141 |    }
142 | 
143 |    // Only support a new position at the byte boundary (pos & 7 == 0)
144 |    inline bool DefaultOutputBitStream::seek(int64 pos)
145 |    {
146 |        if (isClosed() == true)
147 |            return false;
148 | 
149 |        if ((pos < 0) || ((pos & 7) != 0))
150 |            return false;
151 | 
152 |        // Flush buffer
153 |        // Round down to byte alignment
154 |        const uint a = _availBits & -8;
155 | 
156 |        for (uint i = 56; i >= a; i -= 8) {
157 |           _buffer[_position++] = byte(_current >> i);
158 | 
159 |           if (_position >= _bufferSize)
160 |              flush();
161 |        }
162 | 
163 |        _availBits = 64;
164 |        flush();
165 |        _os.clear();
166 |        _os.seekp(std::streampos(pos >> 3));
167 |        return true;
168 |    }
169 | #endif
170 | 
171 | }
172 | #endif
173 | 
174 | 


--------------------------------------------------------------------------------
/src/configure:
--------------------------------------------------------------------------------
1 | # Fake config file
2 | 


--------------------------------------------------------------------------------
/src/entropy/ANSRangeDecoder.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _ANSRangeDecoder_
 18 | #define _ANSRangeDecoder_
 19 | 
 20 | #include "../EntropyDecoder.hpp"
 21 | #include "../types.hpp"
 22 | 
 23 | 
 24 | // Implementation of an Asymmetric Numeral System decoder.
 25 | // See "Asymmetric Numeral System" by Jarek Duda at http://arxiv.org/abs/0902.0271
 26 | // Some code has been ported from https://github.com/rygorous/ryg_rans
 27 | // For an alternate C implementation example, see https://github.com/Cyan4973/FiniteStateEntropy
 28 | 
 29 | namespace kanzi
 30 | {
 31 | 
 32 |    struct ANSDecSymbol
 33 |    {
 34 |       void reset(int cumFreq, int freq, int logRange);
 35 | 
 36 |       uint16 _cumFreq;
 37 |       uint16 _freq;
 38 |    };
 39 | 
 40 | 
 41 |    class ANSRangeDecoder : public EntropyDecoder {
 42 |    public:
 43 |       static const uint ANS_TOP;
 44 | 
 45 |       ANSRangeDecoder(InputBitStream& bitstream,
 46 |                       int order = 0,
 47 |                       int chunkSize = DEFAULT_ANS0_CHUNK_SIZE);
 48 | 
 49 |       ~ANSRangeDecoder();
 50 | 
 51 |       int decode(byte block[], uint blkptr, uint len);
 52 | 
 53 |       InputBitStream& getBitStream() const { return _bitstream; }
 54 | 
 55 |       void dispose() { _dispose(); }
 56 | 
 57 | 
 58 |    private:
 59 |       static const int DEFAULT_ANS0_CHUNK_SIZE;
 60 |       static const int DEFAULT_LOG_RANGE;
 61 |       static const int MIN_CHUNK_SIZE;
 62 |       static const int MAX_CHUNK_SIZE;
 63 | 
 64 |       InputBitStream& _bitstream;
 65 |       uint* _freqs;
 66 |       uint8* _f2s;
 67 |       int _f2sSize;
 68 |       ANSDecSymbol* _symbols;
 69 |       byte* _buffer;
 70 |       uint _bufferSize;
 71 |       uint _chunkSize;
 72 |       uint _order;
 73 |       uint _logRange;
 74 | 
 75 |       bool decodeChunk(byte block[], uint count);
 76 | 
 77 |       uint decodeSymbol(byte*& p, uint& st, const ANSDecSymbol& sym, const int mask) const;
 78 | 
 79 |       int decodeHeader(uint frequencies[], uint alphabet[]);
 80 | 
 81 |       void _dispose() const {}
 82 |    };
 83 | 
 84 | 
 85 |    inline void ANSDecSymbol::reset(int cumFreq, int freq, int logRange)
 86 |    {
 87 |        _cumFreq = uint16(cumFreq);
 88 |        _freq = (freq >= (1 << logRange)) ? uint16((1 << logRange) - 1) : uint16(freq); // Mirror encoder
 89 |    }
 90 | 
 91 | 
 92 |    inline uint ANSRangeDecoder::decodeSymbol(byte*& p, uint& st, const ANSDecSymbol& sym, const int mask) const
 93 |    {
 94 |       // Compute next ANS state
 95 |       // D(x) = (s, q_s (x/M) + mod(x,M) - b_s) where s is such b_s <= x mod M < b_{s+1}
 96 |       st = uint(sym._freq) * (st >> _logRange) + (st & mask) - uint(sym._cumFreq);
 97 | 
 98 |       // Normalize
 99 |       const int x = (st < ANS_TOP) ? -1 : 0;
100 |       st = (st << (x & 16)) | (x & ((uint(p[0]) << 8) | uint(p[1])));
101 |       p -= (x + x);
102 |       return st;
103 |    }
104 | 
105 | }
106 | #endif
107 | 
108 | 


--------------------------------------------------------------------------------
/src/entropy/ANSRangeEncoder.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _ANSRangeEncoder_
 18 | #define _ANSRangeEncoder_
 19 | 
 20 | #include "../EntropyEncoder.hpp"
 21 | 
 22 | 
 23 | // Implementation of an Asymmetric Numeral System encoder.
 24 | // See "Asymmetric Numeral System" by Jarek Duda at http://arxiv.org/abs/0902.0271
 25 | // Some code has been ported from https://github.com/rygorous/ryg_rans
 26 | // For an alternate C implementation example, see https://github.com/Cyan4973/FiniteStateEntropy
 27 | 
 28 | namespace kanzi
 29 | {
 30 | 
 31 |    struct ANSEncSymbol
 32 |    {
 33 |       void reset(int cumFreq, int freq, uint logRange);
 34 | 
 35 |       int _xMax; // (Exclusive) upper bound of pre-normalization interval
 36 |       int _bias; // Bias
 37 |       int _cmplFreq; // Complement of frequency: (1 << scale_bits) - freq
 38 |       int _invShift; // Reciprocal shift
 39 |       uint64 _invFreq; // Fixed-point reciprocal frequency
 40 |    };
 41 | 
 42 | 
 43 |    class ANSRangeEncoder : public EntropyEncoder
 44 |    {
 45 |    public:
 46 |        static const int ANS_TOP;
 47 | 
 48 |        ANSRangeEncoder(OutputBitStream& bitstream,
 49 |                       int order = 0,
 50 |                       int chunkSize = DEFAULT_ANS0_CHUNK_SIZE,
 51 |                       int logRange = DEFAULT_LOG_RANGE);
 52 | 
 53 |        ~ANSRangeEncoder();
 54 | 
 55 |        int updateFrequencies(uint frequencies[], uint lr);
 56 | 
 57 |        int encode(const byte block[], uint blkptr, uint len);
 58 | 
 59 |        OutputBitStream& getBitStream() const { return _bitstream; }
 60 | 
 61 |        void dispose() { _dispose(); }
 62 | 
 63 | 
 64 |    private:
 65 |        static const int DEFAULT_ANS0_CHUNK_SIZE;
 66 |        static const int DEFAULT_LOG_RANGE;
 67 |        static const int MIN_CHUNK_SIZE;
 68 |        static const int MAX_CHUNK_SIZE;
 69 | 
 70 |        ANSEncSymbol* _symbols;
 71 |        uint* _freqs;
 72 |        byte* _buffer;
 73 |        uint _bufferSize;
 74 |        OutputBitStream& _bitstream;
 75 |        uint _chunkSize;
 76 |        uint _logRange;
 77 |        uint _order;
 78 | 
 79 | 
 80 |        int rebuildStatistics(const byte block[], int end, uint lr);
 81 | 
 82 |        void encodeChunk(const byte block[], int end);
 83 | 
 84 |        int encodeSymbol(byte*& p, int& st, const ANSEncSymbol& sym) const;
 85 | 
 86 |        bool encodeHeader(int alphabetSize, const uint alphabet[], const uint frequencies[], uint lr) const;
 87 | 
 88 |        void _dispose() const {}
 89 |    };
 90 | 
 91 | 
 92 |    inline void ANSEncSymbol::reset(int cumFreq, int freq, uint logRange)
 93 |    {
 94 |       // Make sure xMax is a positive int32. Compatibility with Java implementation
 95 |       if (freq >= 1 << logRange)
 96 |          freq = (1 << logRange) - 1;
 97 | 
 98 |       _xMax = ((ANSRangeEncoder::ANS_TOP >> logRange) << 16) * freq;
 99 |       _cmplFreq = (1 << logRange) - freq;
100 | 
101 |       if (freq < 2) {
102 |          _invFreq = uint64(0xFFFFFFFF);
103 |          _invShift = 32;
104 |          _bias = cumFreq + (1 << logRange) - 1;
105 |       }
106 |       else {
107 |          int shift = 0;
108 | 
109 |          while (freq > (1 << shift))
110 |                shift++;
111 | 
112 |          // Alverson, "Integer Division using reciprocals"
113 |          _invFreq = (((uint64(1) << (shift + 31)) + freq - 1) / freq) & uint64(0xFFFFFFFF);
114 |          _invShift = 32 + shift - 1;
115 |          _bias = cumFreq;
116 |       }
117 |    }
118 | 
119 |    inline int ANSRangeEncoder::encodeSymbol(byte*& p, int& st, const ANSEncSymbol& sym) const
120 |    {
121 |       const int x = (st >= sym._xMax) ? 1 : 0;
122 |       *p = byte(st);
123 |       p -= x;
124 |       *p = byte(st >> 8);
125 |       p -= x;
126 |       st >>= (-x & 16);
127 | 
128 |       // Compute next ANS state
129 |       // C(s,x) = M floor(x/q_s) + mod(x,q_s) + b_s where b_s = q_0 + ... + q_{s-1}
130 |       // st = ((st / freq) << lr) + (st % freq) + cumFreq;
131 |       return st + sym._bias + int((st * sym._invFreq) >> sym._invShift) * sym._cmplFreq;
132 |    }
133 | }
134 | #endif
135 | 
136 | 


--------------------------------------------------------------------------------
/src/entropy/AdaptiveProbMap.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _AdaptiveProbMap_
 18 | #define _AdaptiveProbMap_
 19 | 
 20 | #include "../Global.hpp"
 21 | 
 22 | // APM maps a probability and a context into a new probability
 23 | // that the next bit will be 1. After each guess, it updates
 24 | // its state to improve future guesses.
 25 | 
 26 | namespace kanzi {
 27 |    template <int RATE>
 28 |    class LinearAdaptiveProbMap {
 29 |    public:
 30 | #if __cplusplus >= 202002L // simple-template-id in ctors and dtors rejected in C++20
 31 |        LinearAdaptiveProbMap(int n);
 32 | 
 33 |        ~LinearAdaptiveProbMap() { delete[] _data; }
 34 | #else
 35 |        LinearAdaptiveProbMap<RATE>(int n);
 36 | 
 37 |        ~LinearAdaptiveProbMap<RATE>() { delete[] _data; }
 38 | #endif
 39 | 
 40 |        int get(int bit, int pr, int ctx);
 41 | 
 42 |    private:
 43 |        int _index; // last p, context
 44 |        uint16* _data; // [NbCtx][33]:  p, context -> p
 45 |    };
 46 | 
 47 |    template <int RATE>
 48 |    inline LinearAdaptiveProbMap<RATE>::LinearAdaptiveProbMap(int n)
 49 |    {
 50 |        const int size = (n == 0) ? 65 : n * 65;
 51 |        _data = new uint16[size];
 52 |        _index = 0;
 53 | 
 54 |        for (int j = 0; j <= 64; j++) {
 55 |            _data[j] = uint16(j << 6) << 4;
 56 |        }
 57 | 
 58 |        for (int i = 1; i < n; i++) {
 59 |            memcpy(&_data[i * 65], &_data[0], 65 * sizeof(uint16));
 60 |        }
 61 |    }
 62 | 
 63 |    // Return improved prediction given current bit, prediction and context
 64 |    template <int RATE>
 65 |    inline int LinearAdaptiveProbMap<RATE>::get(int bit, int pr, int ctx)
 66 |    {
 67 |        // Update probability based on error and learning rate
 68 |        const int g = -bit & 65528;
 69 |        _data[_index] += (((g - int(_data[_index])) >> RATE) + bit);
 70 |        _data[_index + 1] += (((g - int(_data[_index + 1])) >> RATE) + bit);
 71 | 
 72 |        // Find index: 65*ctx + quantized prediction in [0..64]
 73 |        _index = (pr >> 6) + 65 * ctx;
 74 | 
 75 |        // Return interpolated probabibility
 76 |        const uint16 w = uint16(pr & 127);
 77 |        return int(_data[_index] * (128 - w) + _data[_index + 1] * w) >> 11;
 78 |    }
 79 | 
 80 | 
 81 | 
 82 |    template <bool FAST, int RATE>
 83 |    class LogisticAdaptiveProbMap {
 84 |    public:
 85 | #if __cplusplus >= 202002L // simple-template-id in ctors and dtors rejected in C++20
 86 |        LogisticAdaptiveProbMap(int n);
 87 | 
 88 |        ~LogisticAdaptiveProbMap() { delete[] _data; }
 89 | #else
 90 |        LogisticAdaptiveProbMap<FAST, RATE>(int n);
 91 | 
 92 |        ~LogisticAdaptiveProbMap<FAST, RATE>() { delete[] _data; }
 93 | #endif
 94 | 
 95 |        int get(int bit, int pr, int ctx);
 96 | 
 97 |    private:
 98 |        int _index; // last p, context
 99 |        uint16* _data; // [NbCtx][33]:  p, context -> p
100 |    };
101 | 
102 |    template <bool FAST, int RATE>
103 |    inline LogisticAdaptiveProbMap<FAST, RATE>::LogisticAdaptiveProbMap(int n)
104 |    {
105 |        const int mult = (FAST == false) ? 33 : 32;
106 |        const int size = (n == 0) ? mult : n * mult;
107 |        _data = new uint16[size];
108 |        _index = 0;
109 | 
110 |        for (int j = 0; j < mult; j++)
111 |            _data[j] = uint16(Global::squash((j - 16) * 128) << 4);
112 | 
113 |        for (int i = 1; i < n; i++)
114 |            memcpy(&_data[i * mult], &_data[0], mult * sizeof(uint16));
115 |    }
116 | 
117 |    // Return improved prediction given current bit, prediction and context
118 |    template <bool FAST, int RATE>
119 |    inline int LogisticAdaptiveProbMap<FAST, RATE>::get(int bit, int pr, int ctx)
120 |    {
121 |        // Update probability based on error and learning rate
122 |        const int g = -bit & 65528;
123 |        _data[_index] += (((g - int(_data[_index])) >> RATE) + bit);
124 | 
125 |        if (FAST == false) {
126 |            _data[_index + 1] += (((g - int(_data[_index + 1])) >> RATE) + bit);
127 |            pr = Global::stretch(pr);
128 |            _index = ((pr + 2048) >> 7) + 33 * ctx;
129 | 
130 |            // Return interpolated probabibility
131 |            const uint16 w = uint16(pr & 127);
132 |            return int(_data[_index] * (128 - w) + _data[_index + 1] * w) >> 11;
133 |        } else {
134 |            _index = ((Global::stretch(pr) + 2048) >> 7) + 32 * ctx;
135 |            return int(_data[_index]) >> 4;
136 |        }
137 |    }
138 | 
139 | }
140 | #endif
141 | 
142 | 


--------------------------------------------------------------------------------
/src/entropy/BinaryEntropyDecoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <algorithm>
 17 | #include <stdexcept>
 18 | #include "BinaryEntropyDecoder.hpp"
 19 | #include "../Memory.hpp"
 20 | #include "EntropyUtils.hpp"
 21 | 
 22 | using namespace kanzi;
 23 | using namespace std;
 24 | 
 25 | const uint64 BinaryEntropyDecoder::TOP = 0x00FFFFFFFFFFFFFF;
 26 | const uint64 BinaryEntropyDecoder::MASK_0_56 = 0x00FFFFFFFFFFFFFF;
 27 | const uint64 BinaryEntropyDecoder::MASK_0_32 = 0x00000000FFFFFFFF;
 28 | const int BinaryEntropyDecoder::MAX_BLOCK_SIZE = 1 << 30;
 29 | const int BinaryEntropyDecoder::MAX_CHUNK_SIZE = 1 << 26;
 30 | 
 31 | 
 32 | BinaryEntropyDecoder::BinaryEntropyDecoder(InputBitStream& bitstream, Predictor* predictor, bool deallocate)
 33 |     : _predictor(predictor)
 34 |     , _bitstream(bitstream)
 35 |     , _deallocate(deallocate)
 36 |     , _sba(new byte[0], 0)
 37 | {
 38 |     if (predictor == nullptr)
 39 |         throw invalid_argument("Invalid null predictor parameter");
 40 | 
 41 |     _low = 0;
 42 |     _high = TOP;
 43 |     _current = 0;
 44 | }
 45 | 
 46 | BinaryEntropyDecoder::~BinaryEntropyDecoder()
 47 | {
 48 |     _dispose();
 49 |     delete[] _sba._array;
 50 | 
 51 |     if (_deallocate)
 52 |         delete _predictor;
 53 | }
 54 | 
 55 | int BinaryEntropyDecoder::decode(byte block[], uint blkptr, uint count)
 56 | {
 57 |     if (count >= MAX_BLOCK_SIZE)
 58 |         throw invalid_argument("Invalid block size parameter (max is 1<<30)");
 59 | 
 60 |     uint startChunk = blkptr;
 61 |     const uint end = blkptr + count;
 62 |     uint length = max(count, 64u);
 63 | 
 64 |     if (length >= MAX_CHUNK_SIZE) {
 65 |         // If the block is big (>=64MB), split the decoding to avoid allocating
 66 |         // too much memory.
 67 |         length = (length / 8 < MAX_CHUNK_SIZE) ? count >> 3 : count >> 4;
 68 |     }
 69 | 
 70 |     const uint bufSize = length + (length >> 3);
 71 | 
 72 |     if (_sba._length < int(bufSize)) {
 73 |         delete[] _sba._array;
 74 |         _sba._length = int(bufSize);
 75 |         _sba._array = new byte[_sba._length];
 76 |     }
 77 | 
 78 |     // Split block into chunks, read bit array from bitstream and decode chunk
 79 |     while (startChunk < end) {
 80 |         const uint chunkSize = min(length, end - startChunk);
 81 |         const uint szBytes = uint(EntropyUtils::readVarInt(_bitstream));
 82 | 
 83 |         if (szBytes > bufSize)
 84 |            return 0;
 85 | 
 86 |         _current = _bitstream.readBits(56);
 87 | 
 88 |         if (szBytes != 0)
 89 |             _bitstream.readBits(&_sba._array[0], 8 * szBytes);
 90 | 
 91 |         _sba._index = 0;
 92 |         const uint endChunk = startChunk + chunkSize;
 93 | 
 94 |         for (uint i = startChunk; i < endChunk; i++) {
 95 |             block[i] = byte((decodeBit(_predictor->get()) << 7)
 96 |                           | (decodeBit(_predictor->get()) << 6)
 97 |                           | (decodeBit(_predictor->get()) << 5)
 98 |                           | (decodeBit(_predictor->get()) << 4)
 99 |                           | (decodeBit(_predictor->get()) << 3)
100 |                           | (decodeBit(_predictor->get()) << 2)
101 |                           | (decodeBit(_predictor->get()) << 1)
102 |                           |  decodeBit(_predictor->get()));
103 |         }
104 | 
105 |         startChunk = endChunk;
106 |     }
107 | 
108 |     return count;
109 | }
110 | 
111 | 
112 | // no inline
113 | void BinaryEntropyDecoder::read()
114 | {
115 |     _low = (_low << 32) & MASK_0_56;
116 |     _high = ((_high << 32) | MASK_0_32) & MASK_0_56;
117 |     const uint64 val = BigEndian::readInt32(&_sba._array[_sba._index]) & MASK_0_32;
118 |     _current = ((_current << 32) | val) & MASK_0_56;
119 |     _sba._index += 4;
120 | }
121 | 
122 | // no inline
123 | byte BinaryEntropyDecoder::decodeByte()
124 | {
125 |     return byte((decodeBit(_predictor->get()) << 7)
126 |         | (decodeBit(_predictor->get()) << 6)
127 |         | (decodeBit(_predictor->get()) << 5)
128 |         | (decodeBit(_predictor->get()) << 4)
129 |         | (decodeBit(_predictor->get()) << 3)
130 |         | (decodeBit(_predictor->get()) << 2)
131 |         | (decodeBit(_predictor->get()) << 1)
132 |         |  decodeBit(_predictor->get()));
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/src/entropy/BinaryEntropyDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _BinaryEntropyDecoder_
18 | #define _BinaryEntropyDecoder_
19 | 
20 | #include "../EntropyDecoder.hpp"
21 | #include "../Predictor.hpp"
22 | #include "../SliceArray.hpp"
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    // This class is a generic implementation of a bool entropy decoder
28 |    class BinaryEntropyDecoder FINAL : public EntropyDecoder
29 |    {
30 |    private:
31 |        static const uint64 TOP;
32 |        static const uint64 MASK_0_56;
33 |        static const uint64 MASK_0_32;
34 |        static const int MAX_BLOCK_SIZE;
35 |        static const int MAX_CHUNK_SIZE;
36 | 
37 |        Predictor* _predictor;
38 |        uint64 _low;
39 |        uint64 _high;
40 |        uint64 _current;
41 |        InputBitStream& _bitstream;
42 |        bool _deallocate;
43 |        SliceArray<byte> _sba;
44 | 
45 |        void read();
46 | 
47 |        void _dispose() const {}
48 | 
49 |    public:
50 |        BinaryEntropyDecoder(InputBitStream& bitstream, Predictor* predictor, bool deallocate=true);
51 | 
52 |        ~BinaryEntropyDecoder();
53 | 
54 |        int decode(byte block[], uint blkptr, uint count);
55 | 
56 |        InputBitStream& getBitStream() const { return _bitstream; }
57 | 
58 |        void dispose() { _dispose(); }
59 | 
60 |        byte decodeByte();
61 | 
62 |        int decodeBit(int pred = 2048);
63 |    };
64 | 
65 | 
66 |    inline int BinaryEntropyDecoder::decodeBit(int pred)
67 |    {
68 |        // Calculate interval split
69 |        const uint64 split = ((((_high - _low) >> 4) * uint64(pred)) >> 8) + _low;
70 |        int bit;
71 | 
72 |        // Update predictor
73 |        if (split >= _current) {
74 |            bit = 1;
75 |            _high = split;
76 |            _predictor->update(1);
77 |        }
78 |        else {
79 |            bit = 0;
80 |            _low = split + 1;
81 |            _predictor->update(0);
82 |        }
83 | 
84 |        // Read 32 bits from bitstream
85 |        if (((_low ^ _high) >> 24) == 0)
86 |            read();
87 | 
88 |        return bit;
89 |    }
90 | 
91 | }
92 | #endif
93 | 
94 | 


--------------------------------------------------------------------------------
/src/entropy/BinaryEntropyEncoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <algorithm>
 17 | #include <stdexcept>
 18 | #include "BinaryEntropyEncoder.hpp"
 19 | #include "../Memory.hpp"
 20 | #include "EntropyUtils.hpp"
 21 | 
 22 | using namespace kanzi;
 23 | using namespace std;
 24 | 
 25 | const uint64 BinaryEntropyEncoder::TOP = 0x00FFFFFFFFFFFFFF;
 26 | const uint64 BinaryEntropyEncoder::MASK_0_24 = 0x0000000000FFFFFF;
 27 | const uint64 BinaryEntropyEncoder::MASK_0_32 = 0x00000000FFFFFFFF;
 28 | const int BinaryEntropyEncoder::MAX_BLOCK_SIZE = 1 << 30;
 29 | const int BinaryEntropyEncoder::MAX_CHUNK_SIZE = 1 << 26;
 30 | 
 31 | 
 32 | BinaryEntropyEncoder::BinaryEntropyEncoder(OutputBitStream& bitstream, Predictor* predictor, bool deallocate)
 33 |     : _predictor(predictor)
 34 |     , _bitstream(bitstream)
 35 |     , _deallocate(deallocate)
 36 |     , _sba(new byte[0], 0)
 37 | {
 38 |     if (predictor == nullptr)
 39 |         throw invalid_argument("Invalid null predictor parameter");
 40 | 
 41 |     _low = 0;
 42 |     _high = TOP;
 43 |     _disposed = false;
 44 | }
 45 | 
 46 | BinaryEntropyEncoder::~BinaryEntropyEncoder()
 47 | {
 48 |     _dispose();
 49 |     delete[] _sba._array;
 50 | 
 51 |     if (_deallocate)
 52 |         delete _predictor;
 53 | }
 54 | 
 55 | int BinaryEntropyEncoder::encode(const byte block[], uint blkptr, uint count)
 56 | {
 57 |     if (count >= MAX_BLOCK_SIZE)
 58 |         throw invalid_argument("Invalid block size parameter (max is 1<<30)");
 59 | 
 60 |     uint startChunk = blkptr;
 61 |     const uint end = blkptr + count;
 62 |     uint length = max(count, 64u);
 63 | 
 64 |     if (length >= MAX_CHUNK_SIZE) {
 65 |         // If the block is big (>=64MB), split the encoding to avoid allocating
 66 |         // too much memory.
 67 |         length = (length / 8 < MAX_CHUNK_SIZE) ? count >> 3 : count >> 4;
 68 |     }
 69 | 
 70 |     const uint bufSize = length + (length >> 3);
 71 | 
 72 |     if (_sba._length < int(bufSize)) {
 73 |         delete[] _sba._array;
 74 |         _sba._length = int(bufSize);
 75 |         _sba._array = new byte[_sba._length];
 76 |     }
 77 | 
 78 |     // Split block into chunks, encode chunk and write bit array to bitstream
 79 |     while (startChunk < end) {
 80 |         const uint chunkSize = min(length, end - startChunk);
 81 |         const uint endChunk = startChunk + chunkSize;
 82 |         _sba._index = 0;
 83 | 
 84 |         for (uint i = startChunk; i < endChunk; i++) {
 85 |             encodeBit(int(block[i]) & 0x80, _predictor->get());
 86 |             encodeBit(int(block[i]) & 0x40, _predictor->get());
 87 |             encodeBit(int(block[i]) & 0x20, _predictor->get());
 88 |             encodeBit(int(block[i]) & 0x10, _predictor->get());
 89 |             encodeBit(int(block[i]) & 0x08, _predictor->get());
 90 |             encodeBit(int(block[i]) & 0x04, _predictor->get());
 91 |             encodeBit(int(block[i]) & 0x02, _predictor->get());
 92 |             encodeBit(int(block[i]) & 0x01, _predictor->get());
 93 |         }
 94 | 
 95 |         EntropyUtils::writeVarInt(_bitstream, uint32(_sba._index));
 96 |         _bitstream.writeBits(&_sba._array[0], 8 * _sba._index);
 97 |         startChunk = endChunk;
 98 | 
 99 |         if (startChunk < end)
100 |             _bitstream.writeBits(_low | MASK_0_24, 56);
101 |     }
102 | 
103 |     return count;
104 | }
105 | 
106 | void BinaryEntropyEncoder::_dispose()
107 | {
108 |     if (_disposed == true)
109 |         return;
110 | 
111 |     _disposed = true;
112 |     _bitstream.writeBits(_low | MASK_0_24, 56);
113 | }
114 | 
115 | // no inline
116 | void BinaryEntropyEncoder::flush()
117 | {
118 |     BigEndian::writeInt32(&_sba._array[_sba._index], int32(_high >> 24));
119 |     _sba._index += 4;
120 |     _low <<= 32;
121 |     _high = (_high << 32) | MASK_0_32;
122 | }
123 | 
124 | // no inline
125 | void BinaryEntropyEncoder::encodeByte(byte val)
126 | {
127 |     encodeBit(int(val) & 0x80, _predictor->get());
128 |     encodeBit(int(val) & 0x40, _predictor->get());
129 |     encodeBit(int(val) & 0x20, _predictor->get());
130 |     encodeBit(int(val) & 0x10, _predictor->get());
131 |     encodeBit(int(val) & 0x08, _predictor->get());
132 |     encodeBit(int(val) & 0x04, _predictor->get());
133 |     encodeBit(int(val) & 0x02, _predictor->get());
134 |     encodeBit(int(val) & 0x01, _predictor->get());
135 | }
136 | 
137 | 


--------------------------------------------------------------------------------
/src/entropy/BinaryEntropyEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _BinaryEntropyEncoder_
18 | #define _BinaryEntropyEncoder_
19 | 
20 | #include "../EntropyEncoder.hpp"
21 | #include "../Predictor.hpp"
22 | #include "../SliceArray.hpp"
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    // This class is a generic implementation of a bool entropy encoder
28 |    class BinaryEntropyEncoder FINAL : public EntropyEncoder
29 |    {
30 |    private:
31 |        static const uint64 TOP;
32 |        static const uint64 MASK_0_24;
33 |        static const uint64 MASK_0_32;
34 |        static const int MAX_BLOCK_SIZE;
35 |        static const int MAX_CHUNK_SIZE;
36 | 
37 |        Predictor* _predictor;
38 |        uint64 _low;
39 |        uint64 _high;
40 |        OutputBitStream& _bitstream;
41 |        bool _disposed;
42 |        bool _deallocate;
43 |        SliceArray<byte> _sba;
44 | 
45 |        void _dispose();
46 | 
47 |        void flush();
48 | 
49 |    public:
50 |        BinaryEntropyEncoder(OutputBitStream& bitstream, Predictor* predictor, bool deallocate=true);
51 | 
52 |        ~BinaryEntropyEncoder();
53 | 
54 |        int encode(const byte block[], uint blkptr, uint count);
55 | 
56 |        OutputBitStream& getBitStream() const { return _bitstream; }
57 | 
58 |        void dispose() { _dispose(); }
59 | 
60 |        void encodeByte(byte val);
61 | 
62 |        void encodeBit(int bit, int pred = 2048);
63 |    };
64 | 
65 | 
66 |    inline void BinaryEntropyEncoder::encodeBit(int bit, int pred)
67 |    {
68 |        // Update fields with new interval bounds and predictor
69 |        const uint64 mid = _low + ((((_high - _low) >> 4) * uint64(pred)) >> 8);
70 |        (bit != 0) ? _high = mid : _low = mid + 1;
71 |        _predictor->update(bit != 0);
72 | 
73 |        // Write unchanged first 32 bits to bitstream
74 |        if (((_low ^ _high) >> 24) == 0)
75 |            flush();
76 |    }
77 | }
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/src/entropy/CMPredictor.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | 
17 | #include "CMPredictor.hpp"
18 | 
19 | using namespace kanzi;
20 | 
21 | const int CMPredictor::FAST_RATE = 2;
22 | const int CMPredictor::MEDIUM_RATE = 4;
23 | const int CMPredictor::SLOW_RATE = 6;
24 | const int CMPredictor::PSCALE = 65536;
25 | 
26 | 
27 | CMPredictor::CMPredictor()
28 | {
29 |     _ctx = 1;
30 |     _runMask = 0;
31 |     _c1 = 0;
32 |     _c2 = 0;
33 | 
34 |     for (int i = 0; i < 256; i++) {
35 |         for (int j = 0; j <= 256; j++)
36 |             _counter1[i][j] = 32768;
37 | 
38 |         for (int j = 0; j < 16; j++) {
39 |             _counter2[2 * i][j] = j << 12;
40 |             _counter2[2 * i + 1][j] = j << 12;
41 |         }
42 | 
43 |         _counter2[2 * i][16] = 65535;
44 |         _counter2[2 * i + 1][16] = 65535;
45 |     }
46 | 
47 |     _pc1 = _counter1[_ctx];
48 |     _pc2 = &_counter2[_ctx | _runMask][8];
49 | }
50 | 


--------------------------------------------------------------------------------
/src/entropy/CMPredictor.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _CMPredictor_
18 | #define _CMPredictor_
19 | 
20 | #include "../Predictor.hpp"
21 | #include "../types.hpp"
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    class CMPredictor FINAL : public Predictor
27 |    {
28 |    private:
29 |        static const int FAST_RATE;
30 |        static const int MEDIUM_RATE;
31 |        static const int SLOW_RATE;
32 |        static const int PSCALE;
33 | 
34 |        int _c1;
35 |        int _c2;
36 |        int _ctx;
37 |        int _runMask;
38 |        int _counter1[256][257];
39 |        int _counter2[512][17];
40 |        int* _pc1;
41 |        int* _pc2;
42 | 
43 |    public:
44 |        CMPredictor();
45 | 
46 |        ~CMPredictor(){}
47 | 
48 |        void update(int bit);
49 | 
50 |        int get();
51 |    };
52 | 
53 |    // Update the probability model
54 |    inline void CMPredictor::update(int bit)
55 |    {
56 |        if (bit == 0) {
57 |            _pc1[256] -= (_pc1[256] >> FAST_RATE);
58 |            _pc1[_c1] -= (_pc1[_c1] >> MEDIUM_RATE);
59 |            _pc2[0] -= (_pc2[0]>> SLOW_RATE);
60 |            _pc2[1] -= (_pc2[1]>> SLOW_RATE);
61 |            _ctx += _ctx;
62 |        }
63 |        else {
64 |            _pc1[256] -= ((_pc1[256] - PSCALE + 16) >> FAST_RATE);
65 |            _pc1[_c1] -= ((_pc1[_c1] - PSCALE + 16) >> MEDIUM_RATE);
66 |            _pc2[0] -= ((_pc2[0] - PSCALE + 16) >> SLOW_RATE);
67 |            _pc2[1] -= ((_pc2[1] - PSCALE + 16) >> SLOW_RATE);
68 |            _ctx += (_ctx + 1);
69 |        }
70 | 
71 |        if (_ctx > 255) {
72 |            _c2 = _c1;
73 |            _c1 = _ctx & 0xFF;
74 |            _ctx = 1;
75 |            _runMask = (_c1 == _c2) ? 0x100 : 0;
76 |        }
77 |    }
78 | 
79 |    // Return the split value representing the probability of 1 in the [0..4095] range.
80 |    inline int CMPredictor::get()
81 |    {
82 |        _pc1 = _counter1[_ctx];
83 |        const int p = (13 * (_pc1[256] + _pc1[_c1]) + 6 * _pc1[_c2]) >> 5;
84 |        _pc2 = &_counter2[_ctx | _runMask][p >> 12];
85 |        return (p + p + 3 * (_pc2[0] + _pc2[1]) + 64) >> 7; // rescale to [0..4095]
86 |    }
87 | }
88 | #endif
89 | 
90 | 


--------------------------------------------------------------------------------
/src/entropy/EntropyDecoderFactory.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _EntropyDecoderFactory_
 18 | #define _EntropyDecoderFactory_
 19 | 
 20 | #include <algorithm>
 21 | #include "../Context.hpp"
 22 | #include "ANSRangeDecoder.hpp"
 23 | #include "BinaryEntropyDecoder.hpp"
 24 | #include "HuffmanDecoder.hpp"
 25 | #include "NullEntropyDecoder.hpp"
 26 | #include "RangeDecoder.hpp"
 27 | #include "CMPredictor.hpp"
 28 | #include "FPAQDecoder.hpp"
 29 | #include "TPAQPredictor.hpp"
 30 | 
 31 | 
 32 | namespace kanzi {
 33 | 
 34 |    class EntropyDecoderFactory {
 35 |    public:
 36 |        static const short NONE_TYPE = 0; // No compression
 37 |        static const short HUFFMAN_TYPE = 1; // Huffman
 38 |        static const short FPAQ_TYPE = 2; // Fast PAQ (order 0)
 39 |        static const short PAQ_TYPE = 3; // Obsolete
 40 |        static const short RANGE_TYPE = 4; // Range
 41 |        static const short ANS0_TYPE = 5; // Asymmetric Numerical System order 0
 42 |        static const short CM_TYPE = 6; // Context Model
 43 |        static const short TPAQ_TYPE = 7; // Tangelo PAQ
 44 |        static const short ANS1_TYPE = 8; // Asymmetric Numerical System order 1
 45 |        static const short TPAQX_TYPE = 9; // Tangelo PAQ Extra
 46 |        static const short RESERVED1 = 10; //Reserved
 47 |        static const short RESERVED2 = 11; //Reserved
 48 |        static const short RESERVED3 = 12; //Reserved
 49 |        static const short RESERVED4 = 13; //Reserved
 50 |        static const short RESERVED5 = 14; //Reserved
 51 |        static const short RESERVED6 = 15; //Reserved
 52 | 
 53 |        static EntropyDecoder* newDecoder(InputBitStream& ibs, Context& ctx, short entropyType);
 54 | 
 55 |        static const char* getName(short entropyType);
 56 | 
 57 |        static short getType(const char* name);
 58 |    };
 59 | 
 60 | 
 61 |    inline EntropyDecoder* EntropyDecoderFactory::newDecoder(InputBitStream& ibs, Context& ctx, short entropyType)
 62 |    {
 63 |        switch (entropyType) {
 64 |        // Each block is decoded separately
 65 |        // Rebuild the entropy decoder to reset block statistics
 66 |        case HUFFMAN_TYPE:
 67 |            return new HuffmanDecoder(ibs);
 68 | 
 69 |        case ANS0_TYPE:
 70 |            return new ANSRangeDecoder(ibs, 0);
 71 | 
 72 |        case ANS1_TYPE:
 73 |            return new ANSRangeDecoder(ibs, 1);
 74 | 
 75 |        case RANGE_TYPE:
 76 |            return new RangeDecoder(ibs);
 77 | 
 78 |        case FPAQ_TYPE:
 79 |            return new FPAQDecoder(ibs);
 80 | 
 81 |        case CM_TYPE:
 82 |            return new BinaryEntropyDecoder(ibs, new CMPredictor());
 83 | 
 84 |        case TPAQ_TYPE:
 85 |            return new BinaryEntropyDecoder(ibs, new TPAQPredictor<false>(&ctx));
 86 | 
 87 |        case TPAQX_TYPE:
 88 |            return new BinaryEntropyDecoder(ibs, new TPAQPredictor<true>(&ctx));
 89 | 
 90 |        case NONE_TYPE:
 91 |            return new NullEntropyDecoder(ibs);
 92 | 
 93 |        default:
 94 |            std::string msg = "Unknown entropy codec type: '";
 95 |            msg += char(entropyType);
 96 |            msg += '\'';
 97 |            throw std::invalid_argument(msg);
 98 |        }
 99 |    }
100 | 
101 | 
102 |    inline const char* EntropyDecoderFactory::getName(short entropyType)
103 |    {
104 |        switch (entropyType) {
105 |        case HUFFMAN_TYPE:
106 |            return "HUFFMAN";
107 | 
108 |        case ANS0_TYPE:
109 |            return "ANS0";
110 | 
111 |        case ANS1_TYPE:
112 |            return "ANS1";
113 | 
114 |        case RANGE_TYPE:
115 |            return "RANGE";
116 | 
117 |        case FPAQ_TYPE:
118 |            return "FPAQ";
119 | 
120 |        case CM_TYPE:
121 |            return "CM";
122 | 
123 |        case TPAQ_TYPE:
124 |            return "TPAQ";
125 | 
126 |        case TPAQX_TYPE:
127 |            return "TPAQX";
128 | 
129 |        case NONE_TYPE:
130 |            return "NONE";
131 | 
132 |        default:
133 |            std::string msg = "Unknown entropy codec type: '";
134 |            msg += char(entropyType);
135 |            msg += '\'';
136 |            throw std::invalid_argument(msg);
137 |        }
138 |    }
139 | 
140 | 
141 |    inline short EntropyDecoderFactory::getType(const char* str)
142 |    {
143 |        std::string name = str;
144 |        transform(name.begin(), name.end(), name.begin(), ::toupper);
145 | 
146 |        if (name == "HUFFMAN")
147 |            return HUFFMAN_TYPE;
148 | 
149 |        if (name == "ANS0")
150 |            return ANS0_TYPE;
151 | 
152 |        if (name == "ANS1")
153 |            return ANS1_TYPE;
154 | 
155 |        if (name == "FPAQ")
156 |            return FPAQ_TYPE;
157 | 
158 |        if (name == "RANGE")
159 |            return RANGE_TYPE;
160 | 
161 |        if (name == "CM")
162 |            return CM_TYPE;
163 | 
164 |        if (name == "TPAQ")
165 |            return TPAQ_TYPE;
166 | 
167 |        if (name == "TPAQX")
168 |            return TPAQX_TYPE;
169 | 
170 |        if (name == "NONE")
171 |            return NONE_TYPE;
172 | 
173 |        throw std::invalid_argument("Unsupported entropy codec type: '" + name + "'");
174 |    }
175 | }
176 | #endif
177 | 
178 | 


--------------------------------------------------------------------------------
/src/entropy/EntropyEncoderFactory.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _EntropyEncoderFactory_
 18 | #define _EntropyEncoderFactory_
 19 | 
 20 | #include <algorithm>
 21 | #include "../Context.hpp"
 22 | #include "ANSRangeEncoder.hpp"
 23 | #include "BinaryEntropyEncoder.hpp"
 24 | #include "HuffmanEncoder.hpp"
 25 | #include "NullEntropyEncoder.hpp"
 26 | #include "RangeEncoder.hpp"
 27 | #include "CMPredictor.hpp"
 28 | #include "FPAQEncoder.hpp"
 29 | #include "TPAQPredictor.hpp"
 30 | 
 31 | 
 32 | namespace kanzi {
 33 | 
 34 |    class EntropyEncoderFactory {
 35 |    public:
 36 |        static const short NONE_TYPE = 0; // No compression
 37 |        static const short HUFFMAN_TYPE = 1; // Huffman
 38 |        static const short FPAQ_TYPE = 2; // Fast PAQ (order 0)
 39 |        static const short PAQ_TYPE = 3; // Obsolete
 40 |        static const short RANGE_TYPE = 4; // Range
 41 |        static const short ANS0_TYPE = 5; // Asymmetric Numerical System order 0
 42 |        static const short CM_TYPE = 6; // Context Model
 43 |        static const short TPAQ_TYPE = 7; // Tangelo PAQ
 44 |        static const short ANS1_TYPE = 8; // Asymmetric Numerical System order 1
 45 |        static const short TPAQX_TYPE = 9; // Tangelo PAQ Extra
 46 |        static const short RESERVED1 = 10; //Reserved
 47 |        static const short RESERVED2 = 11; //Reserved
 48 |        static const short RESERVED3 = 12; //Reserved
 49 |        static const short RESERVED4 = 13; //Reserved
 50 |        static const short RESERVED5 = 14; //Reserved
 51 |        static const short RESERVED6 = 15; //Reserved
 52 | 
 53 |        static EntropyEncoder* newEncoder(OutputBitStream& obs, Context& ctx, short entropyType);
 54 | 
 55 |        static const char* getName(short entropyType);
 56 | 
 57 |        static short getType(const char* name);
 58 |    };
 59 | 
 60 | 
 61 |    inline EntropyEncoder* EntropyEncoderFactory::newEncoder(OutputBitStream& obs, Context& ctx, short entropyType)
 62 |    {
 63 |        switch (entropyType) {
 64 |        case HUFFMAN_TYPE:
 65 |            return new HuffmanEncoder(obs);
 66 | 
 67 |        case ANS0_TYPE:
 68 |            return new ANSRangeEncoder(obs, 0);
 69 | 
 70 |        case ANS1_TYPE:
 71 |            return new ANSRangeEncoder(obs, 1);
 72 | 
 73 |        case RANGE_TYPE:
 74 |            return new RangeEncoder(obs);
 75 | 
 76 |        case FPAQ_TYPE:
 77 |            return new FPAQEncoder(obs);
 78 | 
 79 |        case CM_TYPE:
 80 |            return new BinaryEntropyEncoder(obs, new CMPredictor());
 81 | 
 82 |        case TPAQ_TYPE:
 83 |            return new BinaryEntropyEncoder(obs, new TPAQPredictor<false>(&ctx));
 84 | 
 85 |        case TPAQX_TYPE:
 86 |            return new BinaryEntropyEncoder(obs, new TPAQPredictor<true>(&ctx));
 87 | 
 88 |        case NONE_TYPE:
 89 |            return new NullEntropyEncoder(obs);
 90 | 
 91 |        default:
 92 |            std::string msg = "Unknown entropy codec type: '";
 93 |            msg += char(entropyType);
 94 |            msg += '\'';
 95 |            throw std::invalid_argument(msg);
 96 |        }
 97 |    }
 98 | 
 99 | 
100 |    inline const char* EntropyEncoderFactory::getName(short entropyType)
101 |    {
102 |        switch (entropyType) {
103 |        case HUFFMAN_TYPE:
104 |            return "HUFFMAN";
105 | 
106 |        case ANS0_TYPE:
107 |            return "ANS0";
108 | 
109 |        case ANS1_TYPE:
110 |            return "ANS1";
111 | 
112 |        case RANGE_TYPE:
113 |            return "RANGE";
114 | 
115 |        case FPAQ_TYPE:
116 |            return "FPAQ";
117 | 
118 |        case CM_TYPE:
119 |            return "CM";
120 | 
121 |        case TPAQ_TYPE:
122 |            return "TPAQ";
123 | 
124 |        case TPAQX_TYPE:
125 |            return "TPAQX";
126 | 
127 |        case NONE_TYPE:
128 |            return "NONE";
129 | 
130 |        default:
131 |            std::string msg = "Unknown entropy codec type: '";
132 |            msg += char(entropyType);
133 |            msg += '\'';
134 |            throw std::invalid_argument(msg);
135 |        }
136 |    }
137 | 
138 | 
139 |    inline short EntropyEncoderFactory::getType(const char* str)
140 |    {
141 |        std::string name = str;
142 |        transform(name.begin(), name.end(), name.begin(), ::toupper);
143 | 
144 |        if (name == "HUFFMAN")
145 |            return HUFFMAN_TYPE;
146 | 
147 |        if (name == "ANS0")
148 |            return ANS0_TYPE;
149 | 
150 |        if (name == "ANS1")
151 |            return ANS1_TYPE;
152 | 
153 |        if (name == "FPAQ")
154 |            return FPAQ_TYPE;
155 | 
156 |        if (name == "RANGE")
157 |            return RANGE_TYPE;
158 | 
159 |        if (name == "CM")
160 |            return CM_TYPE;
161 | 
162 |        if (name == "TPAQ")
163 |            return TPAQ_TYPE;
164 | 
165 |        if (name == "TPAQX")
166 |            return TPAQX_TYPE;
167 | 
168 |        if (name == "NONE")
169 |            return NONE_TYPE;
170 | 
171 |        throw std::invalid_argument("Unsupported entropy codec type: '" + name + "'");
172 |    }
173 | }
174 | #endif
175 | 
176 | 


--------------------------------------------------------------------------------
/src/entropy/EntropyUtils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _EntropyUtils_
18 | #define _EntropyUtils_
19 | 
20 | #include "../InputBitStream.hpp"
21 | #include "../OutputBitStream.hpp"
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    class EntropyUtils
27 |    {
28 |    private:
29 |        static const int FULL_ALPHABET;
30 |        static const int PARTIAL_ALPHABET;
31 |        static const int ALPHABET_256;
32 |        static const int ALPHABET_0;
33 | 
34 |    public:
35 |        static const int INCOMPRESSIBLE_THRESHOLD;
36 | 
37 |        EntropyUtils() {}
38 | 
39 |        ~EntropyUtils() {}
40 | 
41 |        static int encodeAlphabet(OutputBitStream& obs, const uint alphabet[], int length, int count);
42 | 
43 |        static int decodeAlphabet(InputBitStream& ibs, uint alphabet[]);
44 | 
45 |        static int normalizeFrequencies(uint freqs[], uint alphabet[], int length, uint totalFreq, uint scale);
46 | 
47 |        static int writeVarInt(OutputBitStream& obs, uint32 val);
48 | 
49 |        static uint32 readVarInt(InputBitStream& ibs);
50 |    };
51 | 
52 | }
53 | #endif
54 | 
55 | 


--------------------------------------------------------------------------------
/src/entropy/ExpGolombDecoder.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #include "ExpGolombDecoder.hpp"
17 | 
18 | using namespace kanzi;
19 | 
20 | ExpGolombDecoder::ExpGolombDecoder(InputBitStream& bitstream, bool sgn)
21 |     : _bitstream(bitstream), _signed(sgn)
22 | {
23 | }
24 | 
25 | 
26 | int ExpGolombDecoder::decode(byte block[], uint blkptr, uint len)
27 | {
28 |     byte* buf = &block[blkptr];
29 |     const uint len8 = len & uint(-8);
30 | 
31 |     for (uint i = 0; i < len8; i += 8) {
32 |         buf[i]   = decodeByte();
33 |         buf[i+1] = decodeByte();
34 |         buf[i+2] = decodeByte();
35 |         buf[i+3] = decodeByte();
36 |         buf[i+4] = decodeByte();
37 |         buf[i+5] = decodeByte();
38 |         buf[i+6] = decodeByte();
39 |         buf[i+7] = decodeByte();
40 |     }
41 | 
42 |     for (uint i = len8; i < len; i++)
43 |         buf[i] = decodeByte();
44 | 
45 |     return len;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/entropy/ExpGolombDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _ExpGolombDecoder_
18 | #define _ExpGolombDecoder_
19 | 
20 | #include "../EntropyDecoder.hpp"
21 | 
22 | namespace kanzi
23 | {
24 | 
25 |    class ExpGolombDecoder : public EntropyDecoder
26 |    {
27 |    private:
28 |        InputBitStream& _bitstream;
29 |        const bool _signed;
30 | 
31 |        void flush();
32 | 
33 |        void _dispose() const {}
34 | 
35 |    public:
36 |        ExpGolombDecoder(InputBitStream& bitstream, bool sign=true);
37 | 
38 |        ~ExpGolombDecoder() { _dispose(); }
39 | 
40 |        int decode(byte arr[], uint blkptr, uint len);
41 | 
42 |        InputBitStream& getBitStream() const { return _bitstream; }
43 | 
44 |        byte decodeByte();
45 | 
46 |        void dispose() { _dispose(); }
47 | 
48 |        bool isSigned() const { return _signed; }
49 |    };
50 | 
51 | 
52 |    inline byte ExpGolombDecoder::decodeByte()
53 |    {
54 |        if (_bitstream.readBit() == 1)
55 |            return byte(0);
56 | 
57 |        uint log2 = 1;
58 | 
59 |        while (_bitstream.readBit() == 0)
60 |            log2++;
61 | 
62 |        // Clamp. Do not attempt to detect a corrupted bitstream
63 |        log2 &= 7;
64 | 
65 |        if (_signed == true) {
66 |            // Decode signed: read value + sign
67 |            int res = int(_bitstream.readBits(log2 + 1));
68 |            const int sgn = res & 1;
69 |            res = (res >> 1) + (1 << log2) - 1;
70 |            return byte((res - sgn) ^ -sgn); // res or -res
71 |        }
72 | 
73 |        // Decode unsigned
74 |        return byte((1 << log2) - 1 + _bitstream.readBits(log2));
75 |    }
76 | }
77 | #endif
78 | 
79 | 


--------------------------------------------------------------------------------
/src/entropy/ExpGolombEncoder.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #include "ExpGolombEncoder.hpp"
17 | 
18 | using namespace kanzi;
19 | 
20 | const int ExpGolombEncoder::CACHE[2][256] = {
21 |        // Unsigned
22 |        {
23 |           513, 1538, 1539, 2564, 2565, 2566, 2567, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 4624,
24 |          4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 5664,
25 |          5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680,
26 |          5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 6720,
27 |          6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736,
28 |          6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752,
29 |          6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768,
30 |          6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 7808,
31 |          7809, 7808, 6783, 6782, 6781, 6780, 6779, 6778, 6777, 6776, 6775, 6774, 6773, 6772, 6771, 6770,
32 |          6769, 6768, 6767, 6766, 6765, 6764, 6763, 6762, 6761, 6760, 6759, 6758, 6757, 6756, 6755, 6754,
33 |          6753, 6752, 6751, 6750, 6749, 6748, 6747, 6746, 6745, 6744, 6743, 6742, 6741, 6740, 6739, 6738,
34 |          6737, 6736, 6735, 6734, 6733, 6732, 6731, 6730, 6729, 6728, 6727, 6726, 6725, 6724, 6723, 6722,
35 |          6721, 6720, 5695, 5694, 5693, 5692, 5691, 5690, 5689, 5688, 5687, 5686, 5685, 5684, 5683, 5682,
36 |          5681, 5680, 5679, 5678, 5677, 5676, 5675, 5674, 5673, 5672, 5671, 5670, 5669, 5668, 5667, 5666,
37 |          5665, 5664, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626,
38 |          4625, 4624, 3599, 3598, 3597, 3596, 3595, 3594, 3593, 3592, 2567, 2566, 2565, 2564, 1539, 1538
39 |       },
40 |       // Signed
41 |       {
42 |           513, 2052, 2054, 3080, 3082, 3084, 3086, 4112, 4114, 4116, 4118, 4120, 4122, 4124, 4126, 5152,
43 |          5154, 5156, 5158, 5160, 5162, 5164, 5166, 5168, 5170, 5172, 5174, 5176, 5178, 5180, 5182, 6208,
44 |          6210, 6212, 6214, 6216, 6218, 6220, 6222, 6224, 6226, 6228, 6230, 6232, 6234, 6236, 6238, 6240,
45 |          6242, 6244, 6246, 6248, 6250, 6252, 6254, 6256, 6258, 6260, 6262, 6264, 6266, 6268, 6270, 7296,
46 |          7298, 7300, 7302, 7304, 7306, 7308, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326, 7328,
47 |          7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350, 7352, 7354, 7356, 7358, 7360,
48 |          7362, 7364, 7366, 7368, 7370, 7372, 7374, 7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392,
49 |          7394, 7396, 7398, 7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422, 8448,
50 |          8451, 8449, 7423, 7421, 7419, 7417, 7415, 7413, 7411, 7409, 7407, 7405, 7403, 7401, 7399, 7397,
51 |          7395, 7393, 7391, 7389, 7387, 7385, 7383, 7381, 7379, 7377, 7375, 7373, 7371, 7369, 7367, 7365,
52 |          7363, 7361, 7359, 7357, 7355, 7353, 7351, 7349, 7347, 7345, 7343, 7341, 7339, 7337, 7335, 7333,
53 |          7331, 7329, 7327, 7325, 7323, 7321, 7319, 7317, 7315, 7313, 7311, 7309, 7307, 7305, 7303, 7301,
54 |          7299, 7297, 6271, 6269, 6267, 6265, 6263, 6261, 6259, 6257, 6255, 6253, 6251, 6249, 6247, 6245,
55 |          6243, 6241, 6239, 6237, 6235, 6233, 6231, 6229, 6227, 6225, 6223, 6221, 6219, 6217, 6215, 6213,
56 |          6211, 6209, 5183, 5181, 5179, 5177, 5175, 5173, 5171, 5169, 5167, 5165, 5163, 5161, 5159, 5157,
57 |          5155, 5153, 4127, 4125, 4123, 4121, 4119, 4117, 4115, 4113, 3087, 3085, 3083, 3081, 2055, 2053
58 |       }
59 |    };
60 | 
61 | ExpGolombEncoder::ExpGolombEncoder(OutputBitStream& bitstream, bool sgn)
62 |     : _bitstream(bitstream), _signed((sgn == true) ? 1 : 0)
63 | {
64 | }
65 | 
66 | int ExpGolombEncoder::encode(const byte block[], uint blkptr, uint len)
67 | {
68 |     const byte* buf = &block[blkptr];
69 |     const uint len8 = len & uint(-8);
70 | 
71 |     for (uint i = 0; i < len8; i += 8) {
72 |         encodeByte(buf[i]);
73 |         encodeByte(buf[i+1]);
74 |         encodeByte(buf[i+2]);
75 |         encodeByte(buf[i+3]);
76 |         encodeByte(buf[i+4]);
77 |         encodeByte(buf[i+5]);
78 |         encodeByte(buf[i+6]);
79 |         encodeByte(buf[i+7]);
80 |     }
81 | 
82 |     for (uint i = len8; i < len; i++)
83 |         encodeByte(buf[i]);
84 | 
85 |     return len;
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/src/entropy/ExpGolombEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _ExpGolombEncoder_
18 | #define _ExpGolombEncoder_
19 | 
20 | #include "../EntropyEncoder.hpp"
21 | 
22 | namespace kanzi
23 | {
24 | 
25 |    class ExpGolombEncoder : public EntropyEncoder
26 |    {
27 |    private:
28 |        static const int CACHE[2][256];
29 |        OutputBitStream& _bitstream;
30 |        const int _signed;
31 | 
32 |        void _dispose() const {}
33 | 
34 |    public:
35 |        ExpGolombEncoder(OutputBitStream& bitstream, bool sign=true);
36 | 
37 |        ~ExpGolombEncoder() { _dispose(); }
38 | 
39 |        int encode(const byte block[], uint blkptr, uint len);
40 | 
41 |        OutputBitStream& getBitStream() const { return _bitstream; }
42 | 
43 |        void encodeByte(byte val);
44 | 
45 |        void dispose() { _dispose(); }
46 | 
47 |        bool isSigned() const { return _signed == 1; }
48 |    };
49 | 
50 | 
51 |    inline void ExpGolombEncoder::encodeByte(byte val)
52 |    {
53 |        if (val == byte(0)) {
54 |            // shortcut when input is 0
55 |            _bitstream.writeBit(1);
56 |            return;
57 |        }
58 | 
59 |        const int emit = CACHE[_signed][uint8(val)];
60 |        _bitstream.writeBits(emit & 0x1FF, emit >> 9);
61 |    }
62 | }
63 | #endif
64 | 
65 | 


--------------------------------------------------------------------------------
/src/entropy/FPAQDecoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <algorithm>
 17 | #include <stdexcept>
 18 | #include "FPAQDecoder.hpp"
 19 | #include "EntropyUtils.hpp"
 20 | 
 21 | using namespace kanzi;
 22 | using namespace std;
 23 | 
 24 | 
 25 | const uint64 FPAQDecoder::TOP = 0x00FFFFFFFFFFFFFF;
 26 | const uint64 FPAQDecoder::MASK_0_56 = 0x00FFFFFFFFFFFFFF;
 27 | const uint64 FPAQDecoder::MASK_0_32 = 0x00000000FFFFFFFF;
 28 | const uint FPAQDecoder::DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
 29 | const uint FPAQDecoder::MAX_BLOCK_SIZE = 1 << 30;
 30 | const int FPAQDecoder::PSCALE = 65536;
 31 | 
 32 | 
 33 | FPAQDecoder::FPAQDecoder(InputBitStream& bitstream)
 34 |     : _bitstream(bitstream)
 35 | {
 36 |     reset();
 37 | }
 38 | 
 39 | FPAQDecoder::~FPAQDecoder()
 40 | {
 41 |     _dispose();
 42 | }
 43 | 
 44 | bool FPAQDecoder::reset()
 45 | {
 46 |     _low = 0;
 47 |     _high = TOP;
 48 |     _current = 0;
 49 |     _ctx = 1;
 50 | 
 51 |     for (int i = 0; i < 4; i++) {
 52 |         for (int j = 0; j < 256; j++)
 53 |             _probs[i][j] = PSCALE >> 1;
 54 |     }
 55 | 
 56 |     _p = _probs[0];
 57 |     return true;
 58 | }
 59 | 
 60 | int FPAQDecoder::decode(byte block[], uint blkptr, uint count)
 61 | {
 62 |     if (count >= MAX_BLOCK_SIZE)
 63 |         throw invalid_argument("Invalid block size parameter (max is 1<<30)");
 64 | 
 65 |     uint startChunk = blkptr;
 66 |     const uint end = blkptr + count;
 67 | 
 68 |     // Read bit array from bitstream and decode chunk
 69 |     while (startChunk < end) {
 70 |         const uint szBytes = uint(EntropyUtils::readVarInt(_bitstream));
 71 | 
 72 |         // Sanity check
 73 |         if (szBytes >= 2 * count)
 74 |             return 0;
 75 | 
 76 |         const size_t bufSize = max(szBytes + (szBytes >> 2), 8192u);
 77 | 
 78 |         if (_buf.size() < bufSize)
 79 |             _buf.resize(bufSize);
 80 | 
 81 |         _current = _bitstream.readBits(56);
 82 | 
 83 |         if (bufSize > szBytes)
 84 |             memset(&_buf[szBytes], 0, bufSize - szBytes);
 85 | 
 86 |         _bitstream.readBits(&_buf[0], 8 * szBytes);
 87 |         _index = 0;
 88 |         const uint chunkSize = min(DEFAULT_CHUNK_SIZE, end - startChunk);
 89 |         const uint endChunk = startChunk + chunkSize;
 90 |         _p = _probs[0];
 91 | 
 92 |         for (uint i = startChunk; i < endChunk; i++) {
 93 |             _ctx = 1;
 94 |             decodeBit(_p[_ctx]);
 95 |             decodeBit(_p[_ctx]);
 96 |             decodeBit(_p[_ctx]);
 97 |             decodeBit(_p[_ctx]);
 98 |             decodeBit(_p[_ctx]);
 99 |             decodeBit(_p[_ctx]);
100 |             decodeBit(_p[_ctx]);
101 |             decodeBit(_p[_ctx]);
102 |             block[i] = byte(_ctx);
103 |             _p = _probs[(_ctx & 0xFF) >> 6];
104 |         }
105 | 
106 |         startChunk = endChunk;
107 |     }
108 | 
109 |     return count;
110 | }
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/src/entropy/FPAQDecoder.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _FPAQDecoder_
 18 | #define _FPAQDecoder_
 19 | 
 20 | #include <vector>
 21 | 
 22 | #include "../EntropyDecoder.hpp"
 23 | #include "../Memory.hpp"
 24 | #include "../SliceArray.hpp"
 25 | 
 26 | namespace kanzi
 27 | {
 28 | 
 29 |    // Derived from fpaq0r by Matt Mahoney & Alexander Ratushnyak.
 30 |    // See http://mattmahoney.net/dc/#fpaq0.
 31 |    // Simple (and fast) adaptive entropy bit coder
 32 |    class FPAQDecoder : public EntropyDecoder
 33 |    {
 34 |    private:
 35 |        static const uint64 TOP;
 36 |        static const uint64 MASK_0_56;
 37 |        static const uint64 MASK_0_32;
 38 |        static const uint DEFAULT_CHUNK_SIZE;
 39 |        static const uint MAX_BLOCK_SIZE;
 40 |        static const int PSCALE;
 41 | 
 42 |        uint64 _low;
 43 |        uint64 _high;
 44 |        uint64 _current;
 45 |        InputBitStream& _bitstream;
 46 |        std::vector<byte> _buf;
 47 |        int _index;
 48 |        uint16 _probs[4][256]; // probability of bit=1
 49 |        uint16* _p; // pointer to current prob
 50 |        int _ctx; // previous bits
 51 | 
 52 |        void _dispose() const {}
 53 | 
 54 |        int decodeBit(int pred = 2048);
 55 | 
 56 |        bool reset();
 57 | 
 58 |    public:
 59 |        FPAQDecoder(InputBitStream& bitstream);
 60 | 
 61 |        ~FPAQDecoder();
 62 | 
 63 |        int decode(byte block[], uint blkptr, uint count);
 64 | 
 65 |        InputBitStream& getBitStream() const { return _bitstream; }
 66 | 
 67 |        void dispose() { _dispose(); }
 68 | 
 69 |        void read();
 70 |    };
 71 | 
 72 | 
 73 |    inline int FPAQDecoder::decodeBit(int prob)
 74 |    {
 75 |        // Calculate interval split
 76 |        // Written in a way to maximize accuracy of multiplication/division
 77 |        const uint64 split = ((((_high - _low) >> 8) * uint64(prob)) >> 8) + _low;
 78 |        int bit;
 79 | 
 80 |        // Update probabilities
 81 |        if (split >= _current) {
 82 |            _high = split;
 83 |            _p[_ctx] -= uint16((_p[_ctx] - PSCALE + 64) >> 6);
 84 |            _ctx += (_ctx + 1);
 85 |            bit = 1;
 86 |        }
 87 |        else {
 88 |            _low = split + 1;
 89 |            _p[_ctx] -= uint16(_p[_ctx] >> 6);
 90 |            _ctx += _ctx;
 91 |            bit = 0;
 92 |        }
 93 | 
 94 |        // Read 32 bits from bitstream
 95 |        if (((_low ^ _high) >> 24) == 0)
 96 |            read();
 97 | 
 98 |        return bit;
 99 |    }
100 | 
101 | 
102 |    inline void FPAQDecoder::read()
103 |    {
104 |        _low = (_low << 32) & MASK_0_56;
105 |        _high = ((_high << 32) | MASK_0_32) & MASK_0_56;
106 |        const uint64 val = BigEndian::readInt32(&_buf[_index]) & MASK_0_32;
107 |        _current = ((_current << 32) | val) & MASK_0_56;
108 |        _index += 4;
109 |    }
110 | }
111 | #endif
112 | 
113 | 


--------------------------------------------------------------------------------
/src/entropy/FPAQEncoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <algorithm>
 17 | #include <stdexcept>
 18 | #include "FPAQEncoder.hpp"
 19 | #include "EntropyUtils.hpp"
 20 | 
 21 | using namespace kanzi;
 22 | using namespace std;
 23 | 
 24 | const uint64 FPAQEncoder::TOP = 0x00FFFFFFFFFFFFFF;
 25 | const uint64 FPAQEncoder::MASK_0_24 = 0x0000000000FFFFFF;
 26 | const uint64 FPAQEncoder::MASK_0_32 = 0x00000000FFFFFFFF;
 27 | const uint FPAQEncoder::DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
 28 | const uint FPAQEncoder::MAX_BLOCK_SIZE = 1 << 30;
 29 | const int FPAQEncoder::PSCALE = 65536;
 30 | 
 31 | 
 32 | FPAQEncoder::FPAQEncoder(OutputBitStream& bitstream)
 33 |     : _bitstream(bitstream)
 34 | {
 35 |     reset();
 36 | }
 37 | 
 38 | FPAQEncoder::~FPAQEncoder()
 39 | {
 40 |     _dispose();
 41 | }
 42 | 
 43 | bool FPAQEncoder::reset()
 44 | {
 45 |     _index = 0;
 46 |     _low = 0;
 47 |     _high = TOP;
 48 |     _disposed = false;
 49 | 
 50 |     for (int i = 0; i < 4; i++) {
 51 |         for (int j = 0; j < 256; j++)
 52 |             _probs[i][j] = PSCALE >> 1;
 53 |     }
 54 | 
 55 |     return true;
 56 | }
 57 | 
 58 | int FPAQEncoder::encode(const byte block[], uint blkptr, uint count)
 59 | {
 60 |     if (count >= MAX_BLOCK_SIZE)
 61 |         throw invalid_argument("Invalid block size parameter (max is 1<<30)");
 62 | 
 63 |     uint startChunk = blkptr;
 64 |     const uint end = blkptr + count;
 65 | 
 66 |     // Split block into chunks, encode chunk and write bit array to bitstream
 67 |     while (startChunk < end) {
 68 |         const uint chunkSize = min(DEFAULT_CHUNK_SIZE, end - startChunk);
 69 |         const size_t bufSize = max(chunkSize + (chunkSize >> 3), 1024u);
 70 | 
 71 |         if (_buf.size() < bufSize)
 72 |             _buf.resize(bufSize);
 73 | 
 74 |         _index = 0;
 75 |         const uint endChunk = startChunk + chunkSize;
 76 |         uint16* p = _probs[0];
 77 | 
 78 |         for (uint i = startChunk; i < endChunk; i++) {
 79 |             const int val = int(block[i]);
 80 |             const int bits = val + 256;
 81 |             encodeBit(val & 0x80, p[1]);
 82 |             encodeBit(val & 0x40, p[bits >> 7]);
 83 |             encodeBit(val & 0x20, p[bits >> 6]);
 84 |             encodeBit(val & 0x10, p[bits >> 5]);
 85 |             encodeBit(val & 0x08, p[bits >> 4]);
 86 |             encodeBit(val & 0x04, p[bits >> 3]);
 87 |             encodeBit(val & 0x02, p[bits >> 2]);
 88 |             encodeBit(val & 0x01, p[bits >> 1]);
 89 |             p = _probs[val >> 6];
 90 |         }
 91 | 
 92 |         EntropyUtils::writeVarInt(_bitstream, uint32(_index));
 93 |         _bitstream.writeBits(&_buf[0], 8 * _index);
 94 |         startChunk += chunkSize;
 95 | 
 96 |         if (startChunk < end)
 97 |             _bitstream.writeBits(_low | MASK_0_24, 56);
 98 |     }
 99 | 
100 |     return count;
101 | }
102 | 
103 | void FPAQEncoder::_dispose()
104 | {
105 |     if (_disposed == true)
106 |         return;
107 | 
108 |     _disposed = true;
109 |     _bitstream.writeBits(_low | MASK_0_24, 56);
110 | }
111 | 


--------------------------------------------------------------------------------
/src/entropy/FPAQEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _FPAQEncoder_
18 | #define _FPAQEncoder_
19 | 
20 | #include <vector>
21 | 
22 | #include "../EntropyEncoder.hpp"
23 | #include "../Memory.hpp"
24 | #include "../SliceArray.hpp"
25 | 
26 | namespace kanzi
27 | {
28 | 
29 |    // Derived from fpaq0r by Matt Mahoney & Alexander Ratushnyak.
30 |    // See http://mattmahoney.net/dc/#fpaq0.
31 |    // Simple (and fast) adaptive entropy bit coder
32 |    class FPAQEncoder : public EntropyEncoder
33 |    {
34 |    private:
35 |        static const uint64 TOP;
36 |        static const uint64 MASK_0_24;
37 |        static const uint64 MASK_0_32;
38 |        static const uint DEFAULT_CHUNK_SIZE;
39 |        static const uint MAX_BLOCK_SIZE;
40 |        static const int PSCALE;
41 | 
42 |        uint64 _low;
43 |        uint64 _high;
44 |        bool _disposed;
45 |        OutputBitStream& _bitstream;
46 |        std::vector<byte> _buf;
47 |        int _index;
48 |        uint16 _probs[4][256]; // probability of bit=1
49 | 
50 | 
51 |        void encodeBit(int bit, uint16& prob);
52 | 
53 |        bool reset();
54 | 
55 |        void _dispose();
56 | 
57 |    public:
58 |        FPAQEncoder(OutputBitStream& bitstream);
59 | 
60 |        ~FPAQEncoder();
61 | 
62 |        int encode(const byte block[], uint blkptr, uint count);
63 | 
64 |        OutputBitStream& getBitStream() const { return _bitstream; }
65 | 
66 |        void dispose() { _dispose(); }
67 | 
68 |        void flush();
69 |    };
70 | 
71 | 
72 |    inline void FPAQEncoder::encodeBit(int bit, uint16& prob)
73 |    {
74 |        // Update probabilities
75 |        if (bit == 0) {
76 |           _low = _low + ((((_high - _low) >> 8) * uint64(prob)) >> 8) + 1;
77 |           prob -= uint16(prob >> 6);
78 |        } else  {
79 |           _high = _low + ((((_high - _low) >> 8) * uint64(prob)) >> 8);
80 |           prob -= uint16((prob - PSCALE + 64) >> 6);
81 |        }
82 | 
83 |        // Write unchanged first 32 bits to bitstream
84 |        if (((_low ^ _high) >> 24) == 0)
85 |            flush();
86 |    }
87 | 
88 |    inline void FPAQEncoder::flush()
89 |    {
90 |        BigEndian::writeInt32(&_buf[_index], int32(_high >> 24));
91 |        _index += 4;
92 |        _low <<= 32;
93 |        _high = (_high << 32) | MASK_0_32;
94 |    }
95 | }
96 | #endif
97 | 
98 | 


--------------------------------------------------------------------------------
/src/entropy/HuffmanCommon.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #include "HuffmanCommon.hpp"
17 | 
18 | using namespace kanzi;
19 | 
20 | 
21 | const int HuffmanCommon::LOG_MAX_CHUNK_SIZE = 14;
22 | const int HuffmanCommon::MAX_CHUNK_SIZE = 1 << LOG_MAX_CHUNK_SIZE;
23 | const int HuffmanCommon::MAX_SYMBOL_SIZE = 12;
24 | const int HuffmanCommon::BUFFER_SIZE = (MAX_SYMBOL_SIZE << 8) + 256;
25 | 
26 | 
27 | // Return the number of codes generated
28 | // codes and symbols are updated
29 | int HuffmanCommon::generateCanonicalCodes(const uint16 sizes[], uint16 codes[], uint symbols[], int count)
30 | {
31 |     if (count == 0)
32 |         return 0;
33 | 
34 |     if (count > 1) {
35 |         int8 buf[BUFFER_SIZE] = { int8(0) };
36 | 
37 |         for (int i = 0; i < count; i++) {
38 |             const uint s = symbols[i];
39 | 
40 |             if ((s > 255) || (sizes[s] > MAX_SYMBOL_SIZE))
41 |                 return -1;
42 | 
43 |             buf[((sizes[s] - 1) << 8) | s] = int8(1);
44 |         }
45 | 
46 |         for (int i = 0, n = 0; n < count; i++) {
47 |             symbols[n] = i & 0xFF;
48 |             n += buf[i];
49 |         }
50 |     }
51 | 
52 |     int curLen = sizes[symbols[0]];
53 | 
54 |     for (int i = 0, code = 0; i < count; i++) {
55 |         const int s = symbols[i];
56 |         code <<= (sizes[s] - curLen);
57 |         curLen = sizes[s];
58 |         codes[s] = uint16(code);
59 |         code++;
60 |     }
61 | 
62 |     return count;
63 | }
64 | 


--------------------------------------------------------------------------------
/src/entropy/HuffmanCommon.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _HuffmanCommon_
18 | #define _HuffmanCommon_
19 | 
20 | #include "../types.hpp"
21 | 
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    class HuffmanCommon
27 |    {
28 |    public:
29 |        static const int LOG_MAX_CHUNK_SIZE;
30 |        static const int MAX_CHUNK_SIZE;
31 |        static const int MAX_SYMBOL_SIZE;
32 | 
33 |        static int generateCanonicalCodes(const uint16 sizes[], uint16 codes[], uint ranks[], int count);
34 | 
35 |    private:
36 |        static const int BUFFER_SIZE;
37 |    };
38 | 
39 | }
40 | #endif
41 | 
42 | 


--------------------------------------------------------------------------------
/src/entropy/HuffmanDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _HuffmanDecoder_
18 | #define _HuffmanDecoder_
19 | 
20 | #include "HuffmanCommon.hpp"
21 | #include "../EntropyDecoder.hpp"
22 | 
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    // Implementation of a static Huffman coder.
28 |    class HuffmanDecoder : public EntropyDecoder
29 |    {
30 |    public:
31 |        HuffmanDecoder(InputBitStream& bitstream, int chunkSize = HuffmanCommon::MAX_CHUNK_SIZE);
32 | 
33 |        ~HuffmanDecoder() { _dispose(); delete[] _buffer; }
34 | 
35 |        int decode(byte block[], uint blkptr, uint len);
36 | 
37 |        InputBitStream& getBitStream() const { return _bitstream; }
38 | 
39 |        void dispose() { _dispose(); }
40 | 
41 |    private:
42 |        static const int DECODING_BATCH_SIZE;
43 |        static const int TABLE_MASK;
44 | 
45 |        InputBitStream& _bitstream;
46 |        byte* _buffer;
47 |        uint _bufferSize;
48 |        uint16 _codes[256];
49 |        uint _alphabet[256];
50 |        uint16 _sizes[256];
51 |        uint16 _table[1 << 12]; // decoding table: code -> size, symbol
52 |        int _chunkSize;
53 | 
54 |        int readLengths();
55 | 
56 |        bool buildDecodingTable(int count);
57 | 
58 |        bool reset();
59 | 
60 |        void _dispose() const {}
61 |    };
62 | 
63 | 
64 | }
65 | #endif
66 | 
67 | 


--------------------------------------------------------------------------------
/src/entropy/HuffmanEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _HuffmanEncoder_
18 | #define _HuffmanEncoder_
19 | 
20 | #include "HuffmanCommon.hpp"
21 | #include "../EntropyEncoder.hpp"
22 | 
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    // Implementation of a static Huffman encoder.
28 |    // Uses in place generation of canonical codes instead of a tree
29 |    class HuffmanEncoder : public EntropyEncoder
30 |    {
31 |    public:
32 |        HuffmanEncoder(OutputBitStream& bitstream, int chunkSize = HuffmanCommon::MAX_CHUNK_SIZE);
33 | 
34 |        ~HuffmanEncoder() { _dispose(); delete[] _buffer; }
35 | 
36 |        int updateFrequencies(uint frequencies[]);
37 | 
38 |        int encode(const byte block[], uint blkptr, uint len);
39 | 
40 |        OutputBitStream& getBitStream() const { return _bitstream; }
41 | 
42 |        void dispose() { _dispose(); }
43 | 
44 | 
45 |    private:
46 |        OutputBitStream& _bitstream;
47 |        uint16 _codes[256];
48 |        int _chunkSize;
49 |        byte* _buffer;
50 |        uint _bufferSize;
51 | 
52 |        int computeCodeLengths(uint16 sizes[], uint sranks[], int count) const;
53 | 
54 |        int limitCodeLengths(const uint alphabet[], uint freqs[], uint16 sizes[], uint sranks[], int count) const;
55 | 
56 |        void _dispose() const {}
57 | 
58 |        bool reset();
59 | 
60 |        static void computeInPlaceSizesPhase1(uint data[], int n);
61 | 
62 |        static uint computeInPlaceSizesPhase2(uint data[], int n);
63 |    };
64 | 
65 | }
66 | #endif
67 | 
68 | 


--------------------------------------------------------------------------------
/src/entropy/NullEntropyDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _NullEntropyDecoder_
18 | #define _NullEntropyDecoder_
19 | 
20 | #include "../EntropyDecoder.hpp"
21 | #include "../InputBitStream.hpp"
22 | 
23 | namespace kanzi {
24 | 
25 |    // Null entropy decoder
26 |    // Pass through that writes the data directly to the bitstream
27 |    class NullEntropyDecoder FINAL : public EntropyDecoder {
28 |    private:
29 |        InputBitStream& _bitstream;
30 | 
31 | 
32 |    public:
33 |        NullEntropyDecoder(InputBitStream& bitstream);
34 | 
35 |        ~NullEntropyDecoder() {}
36 | 
37 |        int decode(byte block[], uint blkptr, uint len);
38 | 
39 |        byte decodeByte();
40 | 
41 |        InputBitStream& getBitStream() const { return _bitstream; }
42 | 
43 |        void dispose() {}
44 |    };
45 | 
46 |    inline NullEntropyDecoder::NullEntropyDecoder(InputBitStream& bitstream)
47 |        : _bitstream(bitstream)
48 |    {
49 |    }
50 | 
51 |    inline int NullEntropyDecoder::decode(byte block[], uint blkptr, uint count)
52 |    {
53 |       uint res = 0;
54 | 
55 |       while (count != 0) {
56 |           const uint ckSize = (count < 1<<23) ? count : 1<<23;
57 |           const uint r = uint(_bitstream.readBits(&block[blkptr], 8 * ckSize) >> 3);
58 | 
59 |           if (r == 0)
60 |              break;
61 | 
62 |           res += r;
63 |           blkptr += r;
64 |           count -= r;
65 |       }
66 | 
67 |       return res;
68 |    }
69 | 
70 |    inline byte NullEntropyDecoder::decodeByte()
71 |    {
72 |       return byte(_bitstream.readBits(8));
73 |    }
74 | }
75 | #endif
76 | 
77 | 


--------------------------------------------------------------------------------
/src/entropy/NullEntropyEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _NullEntropyEncoder_
18 | #define _NullEntropyEncoder_
19 | 
20 | #include "../EntropyEncoder.hpp"
21 | #include "../OutputBitStream.hpp"
22 | 
23 | namespace kanzi {
24 | 
25 |    // Null entropy encoder
26 |    // Pass through that writes the data directly to the bitstream
27 |    class NullEntropyEncoder FINAL : public EntropyEncoder {
28 |    private:
29 |        OutputBitStream& _bitstream;
30 | 
31 | 
32 |    public:
33 |        NullEntropyEncoder(OutputBitStream& bitstream);
34 | 
35 |        ~NullEntropyEncoder() {}
36 | 
37 |        int encode(const byte block[], uint blkptr, uint len);
38 | 
39 |        void encodeByte(byte val);
40 | 
41 |        OutputBitStream& getBitStream() const { return _bitstream; }
42 | 
43 |        void dispose() {}
44 |    };
45 | 
46 |    inline NullEntropyEncoder::NullEntropyEncoder(OutputBitStream& bitstream)
47 |        : _bitstream(bitstream)
48 |    {
49 |    }
50 | 
51 |    inline int NullEntropyEncoder::encode(const byte block[], uint blkptr, uint count)
52 |    {
53 |       uint res = 0;
54 | 
55 |       while (count != 0) {
56 |           const uint ckSize = (count < 1<<23) ? count : 1<<23;
57 |           const uint w = uint(_bitstream.writeBits(&block[blkptr], 8 * ckSize) >> 3);
58 | 
59 |           if (w == 0)
60 |              break;
61 | 
62 |           res += w;
63 |           blkptr += w;
64 |           count -= w;
65 |       }
66 | 
67 |       return res;
68 |    }
69 | 
70 |    inline void NullEntropyEncoder::encodeByte(byte val)
71 |    {
72 |       _bitstream.writeBits(uint64(val), 8);
73 |    }
74 | }
75 | #endif
76 | 
77 | 


--------------------------------------------------------------------------------
/src/entropy/RangeDecoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | 
17 | #pragma once
18 | #ifndef _RangeDecoder_
19 | #define _RangeDecoder_
20 | 
21 | #include "../EntropyDecoder.hpp"
22 | 
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    // Based on Order 0 range coder by Dmitry Subbotin itself derived from the algorithm
28 |    // described by G.N.N Martin in his seminal article in 1979.
29 |    // [G.N.N. Martin on the Data Recording Conference, Southampton, 1979]
30 |    // Optimized for speed.
31 | 
32 |    class RangeDecoder : public EntropyDecoder {
33 |    public:
34 |        static const int DECODING_BATCH_SIZE;
35 |        static const int DECODING_MASK;
36 | 
37 |        RangeDecoder(InputBitStream& bitstream, int chunkSize = DEFAULT_CHUNK_SIZE);
38 | 
39 |        ~RangeDecoder() { _dispose(); delete[] _f2s; }
40 | 
41 |        int decode(byte block[], uint blkptr, uint len);
42 | 
43 |        InputBitStream& getBitStream() const { return _bitstream; }
44 | 
45 |        void dispose() { _dispose(); }
46 | 
47 |    private:
48 |        static const uint64 TOP_RANGE;
49 |        static const uint64 BOTTOM_RANGE;
50 |        static const uint64 RANGE_MASK;
51 |        static const int DEFAULT_CHUNK_SIZE;
52 |        static const int DEFAULT_LOG_RANGE;
53 |        static const int MAX_CHUNK_SIZE;
54 | 
55 |        uint64 _code;
56 |        uint64 _low;
57 |        uint64 _range;
58 |        uint _alphabet[256];
59 |        uint _freqs[256];
60 |        uint64 _cumFreqs[257];
61 |        short* _f2s;
62 |        int _lenF2S;
63 |        InputBitStream& _bitstream;
64 |        uint _chunkSize;
65 |        uint _shift;
66 | 
67 |        int decodeHeader(uint frequencies[]);
68 | 
69 |        byte decodeByte();
70 | 
71 |        bool reset();
72 | 
73 |        void _dispose() const {}
74 |    };
75 | 
76 | }
77 | #endif
78 | 
79 | 


--------------------------------------------------------------------------------
/src/entropy/RangeEncoder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _RangeEncoder_
18 | #define _RangeEncoder_
19 | 
20 | #include "../EntropyEncoder.hpp"
21 | 
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    // Based on Order 0 range coder by Dmitry Subbotin itself derived from the algorithm
27 |    // described by G.N.N Martin in his seminal article in 1979.
28 |    // [G.N.N. Martin on the Data Recording Conference, Southampton, 1979]
29 |    // Optimized for speed.
30 | 
31 |    class RangeEncoder : public EntropyEncoder
32 |    {
33 |    public:
34 |        RangeEncoder(OutputBitStream& bitstream, int chunkSize = DEFAULT_CHUNK_SIZE, int logRange=DEFAULT_LOG_RANGE);
35 | 
36 |        ~RangeEncoder() { _dispose(); }
37 | 
38 |        int encode(const byte block[], uint blkptr, uint len);
39 | 
40 |        OutputBitStream& getBitStream() const { return _bitstream; }
41 | 
42 |        void dispose() { _dispose(); }
43 | 
44 |    private:
45 |        static const uint64 TOP_RANGE;
46 |        static const uint64 BOTTOM_RANGE;
47 |        static const uint64 RANGE_MASK;
48 |        static const int DEFAULT_CHUNK_SIZE;
49 |        static const int DEFAULT_LOG_RANGE;
50 |        static const int MAX_CHUNK_SIZE;
51 | 
52 |        uint64 _low;
53 |        uint64 _range;
54 |        uint _alphabet[256];
55 |        uint _freqs[256];
56 |        uint64 _cumFreqs[257];
57 |        OutputBitStream& _bitstream;
58 |        uint _chunkSize;
59 |        uint _logRange;
60 |        uint _shift;
61 | 
62 |        int rebuildStatistics(const byte block[], int start, int end, int lr);
63 | 
64 |        int updateFrequencies(uint frequencies[], int size, int lr);
65 | 
66 |        void encodeByte(byte b);
67 | 
68 |        bool encodeHeader(int alphabetSize, const uint alphabet[], const uint frequencies[], int lr) const;
69 | 
70 |        bool reset();
71 | 
72 |        void _dispose() const {}
73 |    };
74 | 
75 | }
76 | #endif
77 | 
78 | 


--------------------------------------------------------------------------------
/src/entropy/TPAQPredictor.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #include "TPAQPredictor.hpp"
17 | 
18 | using namespace kanzi;
19 | 
20 | const int TPAQMixer::BEGIN_LEARN_RATE = 60 << 7;
21 | const int TPAQMixer::END_LEARN_RATE = 11 << 7;
22 | 
23 | template<>
24 | const int TPAQPredictor<true>::MAX_LENGTH = 88;
25 | template<>
26 | const int TPAQPredictor<true>::BUFFER_SIZE = 64 * 1024 * 1024;
27 | template<>
28 | const int TPAQPredictor<true>::HASH_SIZE = 16 * 1024 * 1024;
29 | template<>
30 | const int TPAQPredictor<true>::HASH = 0x7FEB352D;
31 | template<>
32 | const int TPAQPredictor<false>::MAX_LENGTH = 88;
33 | template<>
34 | const int TPAQPredictor<false>::BUFFER_SIZE = 64 * 1024 * 1024;
35 | template<>
36 | const int TPAQPredictor<false>::HASH_SIZE = 16 * 1024 * 1024;
37 | template<>
38 | const int TPAQPredictor<false>::HASH = 0x7FEB352D;
39 | 
40 | 
41 | TPAQMixer::TPAQMixer()
42 | {
43 |     _pr = 2048;
44 |     _skew = 0;
45 |     _w0 = _w1 = _w2 = _w3 = _w4 = _w5 = _w6 = _w7 = 32768;
46 |     _p0 = _p1 = _p2 = _p3 = _p4 = _p5 = _p6 = _p7 = 0;
47 |     _learnRate = BEGIN_LEARN_RATE;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/io/IOException.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _IOException_
18 | #define _IOException_
19 | 
20 | #include <string>
21 | #include <stdexcept>
22 | #include "../Error.hpp"
23 | #include "../types.hpp"
24 | #include "../util/strings.hpp"
25 | 
26 | 
27 | namespace kanzi
28 | {
29 | 
30 |    class IOException : public std::runtime_error
31 |    {
32 |    private:
33 |        int _code;
34 | 
35 |    public:
36 |        IOException(const std::string& msg) : std::runtime_error(msg + ". Error code: " + TOSTR(Error::ERR_UNKNOWN))
37 |        {
38 |            _code = Error::ERR_UNKNOWN;
39 |        }
40 | 
41 |        IOException(const std::string& msg, int error) : std::runtime_error(msg + ". Error code: " + TOSTR(error))
42 |        {
43 |            _code = error;
44 |        }
45 | 
46 |        int error() const { return _code; }
47 | 
48 |        ~IOException() NOEXCEPT {}
49 |    };
50 | 
51 | }
52 | #endif
53 | 
54 | 


--------------------------------------------------------------------------------
/src/io/NullOutputStream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _NullOutputStream_
18 | #define _NullOutputStream_
19 | 
20 | namespace kanzi
21 | {
22 |    template <class T, class traits = std::char_traits<T> >
23 |    class basic_nullbuf : public std::basic_streambuf<T, traits>
24 |    {
25 |        typename traits::int_type overflow(typename traits::int_type c)
26 |        {
27 |            return traits::not_eof(c);
28 |        }
29 | 
30 |        void close() {}
31 |    };
32 | 
33 |    template <class T, class traits = std::char_traits<T> >
34 |    class basic_onullstream : public std::basic_ostream<T, traits>
35 |    {
36 |    public:
37 |        basic_onullstream() :
38 |            std::basic_ios<T, traits>(&_sbuf),
39 |            std::basic_ostream<T, traits>(&_sbuf)
40 |        {
41 |            this->init(&_sbuf);
42 |        }
43 | 
44 |    private:
45 |        basic_nullbuf<T, traits> _sbuf;
46 |    };
47 | 
48 |    typedef basic_onullstream<char> NullOutputStream;
49 | }
50 | 
51 | #endif
52 | 
53 | 


--------------------------------------------------------------------------------
/src/transform/AliasCodec.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _AliasCodec_
18 | #define _AliasCodec_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | namespace kanzi {
24 |     typedef struct ssAlias
25 |     {
26 |         uint32 val;
27 |         uint32 freq;
28 | 
29 |         ssAlias(uint32 v, uint32 f) : val(v), freq(f) { }
30 | 
31 |         friend bool operator< (ssAlias const& lhs, ssAlias const& rhs) {
32 |             int r;
33 |             return ((r = int(lhs.freq - rhs.freq)) != 0) ? r > 0 : lhs.val > rhs.val;
34 |         }
35 |     } sdAlias;
36 | 
37 | 
38 |    // Simple codec replacing large symbols with small aliases whenever possible
39 |    class AliasCodec FINAL : public Transform<byte> 
40 |    {
41 | 
42 |    public:
43 |        AliasCodec() { _pCtx = nullptr; _onlyDNA = false; }
44 | 
45 |        AliasCodec(Context& ctx);
46 | 
47 |        ~AliasCodec() {}
48 | 
49 |        bool forward(SliceArray<byte>& src, SliceArray<byte>& dst, int length);
50 | 
51 |        bool inverse(SliceArray<byte>& src, SliceArray<byte>& dst, int length);
52 | 
53 | 
54 |        // Required encoding output buffer size
55 |        int getMaxEncodedLength(int srcLen) const
56 |        {
57 |            return srcLen + 1024;
58 |        }
59 | 
60 |    private:
61 |        static const int MIN_BLOCK_SIZE;
62 | 
63 |        Context* _pCtx;
64 |        bool _onlyDNA;
65 |    };
66 | }
67 | 
68 | #endif
69 | 
70 | 


--------------------------------------------------------------------------------
/src/transform/BWT.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _BWT_
 18 | #define _BWT_
 19 | 
 20 | #include "../concurrent.hpp"
 21 | #include "../Context.hpp"
 22 | #include "../Transform.hpp"
 23 | #include "DivSufSort.hpp"
 24 | 
 25 | 
 26 | namespace kanzi {
 27 | // The Burrows-Wheeler Transform is a reversible transform based on
 28 | // permutation of the data in the original message to reduce the entropy.
 29 | 
 30 | // The initial text can be found here:
 31 | // Burrows M and Wheeler D, [A block sorting lossless data compression algorithm]
 32 | // Technical Report 124, Digital Equipment Corporation, 1994
 33 | 
 34 | // See also Peter Fenwick, [Block sorting text compression - final report]
 35 | // Technical Report 130, 1996
 36 | 
 37 | // This implementation replaces the 'slow' sorting of permutation strings
 38 | // with the construction of a suffix array (faster but more complex).
 39 | //
 40 | // E.G.    0123456789A
 41 | // Source: mississippi\0
 42 | // Suffixes:    rank  sorted
 43 | // mississippi\0  0  -> 4             i\0
 44 | //  ississippi\0  1  -> 3          ippi\0
 45 | //   ssissippi\0  2  -> 10      issippi\0
 46 | //    sissippi\0  3  -> 8    ississippi\0
 47 | //     issippi\0  4  -> 2   mississippi\0
 48 | //      ssippi\0  5  -> 9            pi\0
 49 | //       sippi\0  6  -> 7           ppi\0
 50 | //        ippi\0  7  -> 1         sippi\0
 51 | //         ppi\0  8  -> 6      sissippi\0
 52 | //          pi\0  9  -> 5        ssippi\0
 53 | //           i\0  10 -> 0     ssissippi\0
 54 | // Suffix array SA : 10 7 4 1 0 9 8 6 3 5 2
 55 | // BWT[i] = input[SA[i]-1] => BWT(input) = ipssmpissii (+ primary index 5)
 56 | // The suffix array and permutation vector are equal when the input is 0 terminated
 57 | // The insertion of a guard is done internally and is entirely transparent.
 58 | //
 59 | // This implementation extends the canonical algorithm to use up to MAX_CHUNKS primary
 60 | // indexes (based on input block size). Each primary index corresponds to a data chunk.
 61 | // Chunks may be inverted concurrently.
 62 |    template <class T>
 63 |    class InverseBiPSIv2Task FINAL : public Task<T> {
 64 |    private:
 65 |        uint* _data;
 66 |        uint* _buckets;
 67 |        uint16* _fastBits;
 68 |        int* _primaryIndexes;
 69 |        byte* _dst;
 70 |        int _total;
 71 |        int _start;
 72 |        int _ckSize;
 73 |        int _firstChunk;
 74 |        int _lastChunk;
 75 | 
 76 |    public:
 77 |        InverseBiPSIv2Task(uint* buf, uint* buckets, uint16* fastBits, byte* output,
 78 |            int* primaryIndexes, int total, int start, int ckSize, int firstChunk, int lastChunk);
 79 |        ~InverseBiPSIv2Task() {}
 80 | 
 81 |        T run();
 82 |    };
 83 | 
 84 |    class BWT FINAL : public Transform<byte> {
 85 | 
 86 |    private:
 87 |        static const int MAX_BLOCK_SIZE;
 88 |        static const int NB_FASTBITS;
 89 |        static const int BLOCK_SIZE_THRESHOLD1;
 90 |        static const int BLOCK_SIZE_THRESHOLD2;
 91 | 
 92 |        uint* _buffer;
 93 |        int* _sa;
 94 |        int _bufferSize;
 95 |        int _saSize;
 96 |        int _primaryIndexes[8];
 97 |        DivSufSort _saAlgo;
 98 |        int _jobs;
 99 | #ifdef CONCURRENCY_ENABLED
100 |        ThreadPool* _pool;
101 | #endif
102 | 
103 |        bool inverseBiPSIv2(SliceArray<byte>& input, SliceArray<byte>& output, int count);
104 | 
105 |        bool inverseMergeTPSI(SliceArray<byte>& input, SliceArray<byte>& output, int count);
106 | 
107 |    public:
108 |        static const int MASK_FASTBITS;
109 | 
110 |        BWT(int jobs = 1);
111 | 
112 |        BWT(Context& ctx);
113 | 
114 |        ~BWT() { delete[] _buffer; delete[] _sa; }
115 | 
116 |        bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length);
117 | 
118 |        bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length);
119 | 
120 |        int getPrimaryIndex(int n) const { return _primaryIndexes[n]; }
121 | 
122 |        bool setPrimaryIndex(int n, int primaryIndex);
123 | 
124 |        int getMaxEncodedLength(int srcLen) const { return srcLen; }
125 | 
126 |        static int getBWTChunks(int size);
127 |    };
128 | 
129 | 
130 |    inline int BWT::getBWTChunks(int size)
131 |    {
132 |        return (size < BLOCK_SIZE_THRESHOLD1) ? 1 : 8;
133 |    }
134 | }
135 | #endif
136 | 
137 | 


--------------------------------------------------------------------------------
/src/transform/BWTBlockCodec.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _BWTBlockCodec_
18 | #define _BWTBlockCodec_
19 | 
20 | #include "../transform/BWT.hpp"
21 | #include "../Context.hpp"
22 | 
23 | 
24 | namespace kanzi {
25 | 
26 |    // Utility class to en/de-code a BWT data block and its associated primary index(es)
27 | 
28 |    // BWT stream format: Header (mode + primary index(es)) | Data (n bytes)
29 |    //   mode (8 bits): xxxyyyzz
30 |    //   xxx: ignored
31 |    //   yyy: log(chunks)
32 |    //   zz: primary index size - 1 (in bytes)
33 |    //   primary indexes (chunks * (8|16|24|32 bits))
34 | 
35 |    class BWTBlockCodec FINAL : public Transform<byte> {
36 |    public:
37 | 
38 |        BWTBlockCodec(Context& ctx);
39 | 
40 |        ~BWTBlockCodec() { delete _pBWT; }
41 | 
42 |        bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length);
43 | 
44 |        bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length);
45 | 
46 |        // Required encoding output buffer size
47 |        int getMaxEncodedLength(int srcLen) const
48 |        {
49 |            return srcLen + 32 /* max header size */;
50 |        }
51 | 
52 |    private:
53 |        BWT* _pBWT;
54 |        int _bsVersion;
55 |    };
56 | }
57 | #endif
58 | 
59 | 


--------------------------------------------------------------------------------
/src/transform/BWTS.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _BWTS_
18 | #define _BWTS_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | #include "DivSufSort.hpp"
23 | 
24 | 
25 | namespace kanzi
26 | {
27 | 
28 |    // Bijective version of the Burrows-Wheeler Transform
29 |    // The main advantage over the regular BWT is that there is no need for a primary
30 |    // index (hence the bijectivity). BWTS is about 10% slower than BWT.
31 |    // Forward transform based on the code at https://code.google.com/p/mk-bwts/
32 |    // by Neal Burns and DivSufSort (port of libDivSufSort by Yuta Mori)
33 | 
34 |    class BWTS FINAL : public Transform<byte> {
35 | 
36 |    private:
37 |        static const int MAX_BLOCK_SIZE;
38 | 
39 |        int* _buffer1;
40 |        int* _buffer2;
41 |        int _bufferSize;
42 |        DivSufSort _saAlgo;
43 | 
44 |        int moveLyndonWordHead(int sa[], int isa[], const byte data[],
45 |                               int count, int start, int size, int rank) const;
46 | 
47 |    public:
48 |        BWTS()
49 |        {
50 |            _buffer1 = new int[0];
51 |            _buffer2 = new int[0];
52 |            _bufferSize = 0;
53 |        }
54 | 
55 |        BWTS(Context&)
56 |        {
57 |            _buffer1 = new int[0];
58 |            _buffer2 = new int[0];
59 |            _bufferSize = 0;
60 |        }
61 | 
62 |        ~BWTS()
63 |        {
64 |           delete[] _buffer1;
65 |           delete[] _buffer2;
66 |        }
67 | 
68 |        bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length);
69 | 
70 |        bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length);
71 | 
72 |        int getMaxEncodedLength(int srcLen) const { return srcLen; }
73 |    };
74 | 
75 | }
76 | #endif
77 | 
78 | 


--------------------------------------------------------------------------------
/src/transform/EXECodec.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _EXECodec_
 18 | #define _EXECodec_
 19 | 
 20 | #include "../Context.hpp"
 21 | #include "../Transform.hpp"
 22 | 
 23 | namespace kanzi
 24 | {
 25 |    class EXECodec FINAL : public Transform<byte> {
 26 |    public:
 27 |        EXECodec() { _pCtx = nullptr; }
 28 | 
 29 |        EXECodec(Context& ctx) : _pCtx(&ctx) {}
 30 | 
 31 |        ~EXECodec() {}
 32 | 
 33 |        bool forward(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 34 | 
 35 |        bool inverse(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 36 | 
 37 |        int getMaxEncodedLength(int inputLen) const;
 38 | 
 39 |    private:
 40 | 
 41 |        static const byte X86_MASK_JUMP;
 42 |        static const byte X86_INSTRUCTION_JUMP;
 43 |        static const byte X86_INSTRUCTION_JCC;
 44 |        static const byte X86_TWO_BYTE_PREFIX;
 45 |        static const byte X86_MASK_JCC;
 46 |        static const byte X86_ESCAPE;
 47 |        static const byte NOT_EXE;
 48 |        static const byte X86;
 49 |        static const byte ARM64;
 50 |        static const byte MASK_DT;
 51 |        static const int X86_ADDR_MASK;
 52 |        static const int MASK_ADDRESS;
 53 |        static const int ARM_B_ADDR_MASK;
 54 |        static const int ARM_B_OPCODE_MASK;
 55 |        static const int ARM_B_ADDR_SGN_MASK;
 56 |        static const int ARM_OPCODE_B;
 57 |        static const int ARM_OPCODE_BL;
 58 |        static const int ARM_CB_REG_BITS;
 59 |        static const int ARM_CB_ADDR_MASK;
 60 |        static const int ARM_CB_ADDR_SGN_MASK;
 61 |        static const int ARM_CB_OPCODE_MASK;
 62 |        static const int ARM_OPCODE_CBZ;
 63 |        static const int ARM_OPCODE_CBNZ;
 64 |        static const int WIN_PE;
 65 |        static const uint16 WIN_X86_ARCH;
 66 |        static const uint16 WIN_AMD64_ARCH;
 67 |        static const uint16 WIN_ARM64_ARCH;
 68 |        static const int ELF_X86_ARCH;
 69 |        static const int ELF_AMD64_ARCH;
 70 |        static const int ELF_ARM64_ARCH;
 71 |        static const int MAC_AMD64_ARCH;
 72 |        static const int MAC_ARM64_ARCH;
 73 |        static const int MAC_MH_EXECUTE;
 74 |        static const int MAC_LC_SEGMENT;
 75 |        static const int MAC_LC_SEGMENT64;
 76 |        static const int MIN_BLOCK_SIZE;
 77 |        static const int MAX_BLOCK_SIZE;
 78 | 
 79 | 
 80 |        bool forwardARM(SliceArray<byte>& source, SliceArray<byte>& destination, int length, int codeStart, int codeEnd);
 81 | 
 82 |        bool forwardX86(SliceArray<byte>& source, SliceArray<byte>& destination, int length, int codeStart, int codeEnd);
 83 | 
 84 |        bool inverseARM(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 85 | 
 86 |        bool inverseX86(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 87 | 
 88 |        static byte detectType(const byte src[], int count, int& codeStart, int& codeEnd);
 89 |        
 90 |        static bool parseHeader(const byte src[], int count, uint magic, int& arch, int& codeStart, int& codeEnd);
 91 | 
 92 |        Context* _pCtx;
 93 |    };
 94 |    
 95 |    
 96 |     inline int EXECodec::getMaxEncodedLength(int srcLen) const
 97 |     {
 98 |         // Allocate some extra buffer for incompressible data.
 99 |         return (srcLen <= 256) ? srcLen + 32 : srcLen + srcLen / 8;
100 |     }   
101 | 
102 | }
103 | #endif
104 | 
105 | 


--------------------------------------------------------------------------------
/src/transform/FSDCodec.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _FSDCodec_
18 | #define _FSDCodec_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | 
24 | // Fixed Step Delta codec
25 | // Decorrelate values separated by a constant distance (step) and encode residuals
26 | namespace kanzi {
27 | 
28 |    class FSDCodec FINAL : public Transform<byte> {
29 | 
30 |    public:
31 |        FSDCodec() { _pCtx = nullptr; }
32 | 
33 |        FSDCodec(Context& ctx) : _pCtx(&ctx) {}
34 | 
35 |        ~FSDCodec() {}
36 | 
37 |        bool forward(SliceArray<byte>& src, SliceArray<byte>& dst, int length);
38 | 
39 |        bool inverse(SliceArray<byte>& src, SliceArray<byte>& dst, int length);
40 | 
41 |        // Required encoding output buffer size
42 |        int getMaxEncodedLength(int srcLen) const
43 |        {
44 |            return srcLen + ((srcLen < 1024) ? 64 : srcLen >> 4); // limit expansion
45 |        }
46 | 
47 |    private:
48 |        static const int MIN_LENGTH;
49 |        static const byte ESCAPE_TOKEN;
50 |        static const byte DELTA_CODING;
51 |        static const byte XOR_CODING;
52 |        static const uint8 ZIGZAG1[256];
53 |        static const int8 ZIGZAG2[256];
54 | 
55 |        Context* _pCtx;
56 |    };
57 | }
58 | #endif
59 | 
60 | 


--------------------------------------------------------------------------------
/src/transform/NullTransform.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _NullTransform_
18 | #define _NullTransform_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | 
24 | namespace kanzi
25 | {
26 | 
27 |    class NullTransform FINAL : public Transform<byte> {
28 |    public:
29 |        NullTransform() {}
30 |        NullTransform(Context&) {}
31 |        ~NullTransform() {}
32 | 
33 |        bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length) { return doCopy(input, output, length); }
34 | 
35 |        bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length) { return doCopy(input, output, length); }
36 | 
37 |        // Required encoding output buffer size
38 |        int getMaxEncodedLength(int inputLen) const { return inputLen; }
39 | 
40 |    private:
41 |        bool doCopy(SliceArray<byte>& input, SliceArray<byte>& output, int length) const;
42 | 
43 |    };
44 | 
45 |    inline bool NullTransform::doCopy(SliceArray<byte>& input, SliceArray<byte>& output, int length) const
46 |    {
47 |        if (length == 0)
48 |            return true;
49 | 
50 |        if (!SliceArray<byte>::isValid(input))
51 |             throw std::invalid_argument("Invalid input block");
52 | 
53 |        if (!SliceArray<byte>::isValid(output))
54 |            throw std::invalid_argument("Invalid output block");
55 | 
56 |        if (input._index + length > input._length)
57 |            return false;
58 | 
59 |        if (output._index + length > output._length)
60 |            return false;
61 | 
62 |        memcpy(&output._array[output._index], &input._array[input._index], size_t(length));
63 |        input._index += length;
64 |        output._index += length;
65 |        return true;
66 |    }
67 | 
68 | }
69 | #endif
70 | 
71 | 


--------------------------------------------------------------------------------
/src/transform/RLT.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _RLT_
18 | #define _RLT_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | namespace kanzi
24 | {
25 | 
26 |    // Implementation of an escaped RLE
27 |    // Run length encoding:
28 |    // RUN_LEN_ENCODE1 = 224 => RUN_LEN_ENCODE2 = 31*224 = 6944
29 |    // 4    <= runLen < 224+4      -> 1 byte
30 |    // 228  <= runLen < 6944+228   -> 2 bytes
31 |    // 7172 <= runLen < 65535+7172 -> 3 bytes
32 | 
33 |    class RLT FINAL : public Transform<byte>
34 |    {
35 |    public:
36 |        RLT() { _pCtx = nullptr; }
37 |        RLT(Context& ctx) : _pCtx(&ctx) {}
38 |        ~RLT() {}
39 | 
40 |        bool forward(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
41 | 
42 |        bool inverse(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
43 | 
44 |        int getMaxEncodedLength(int srcLen) const { return (srcLen <= 512) ? srcLen + 32 : srcLen; }
45 | 
46 |    private:
47 |        static const int RUN_LEN_ENCODE1;
48 |        static const int RUN_LEN_ENCODE2;
49 |        static const int RUN_THRESHOLD;
50 |        static const int MAX_RUN;
51 |        static const int MAX_RUN4;
52 |        static const int MIN_BLOCK_LENGTH;
53 |        static const byte DEFAULT_ESCAPE;
54 | 
55 |        static int emitRunLength(byte dst[], int run, byte escape, byte val);
56 | 
57 |        Context* _pCtx;
58 |    };
59 | 
60 | }
61 | #endif
62 | 
63 | 


--------------------------------------------------------------------------------
/src/transform/SBRT.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #include <stdexcept>
 17 | #include "SBRT.hpp"
 18 | 
 19 | using namespace kanzi;
 20 | 
 21 | 
 22 | const int SBRT::MODE_MTF = 1; // alpha = 0
 23 | const int SBRT::MODE_RANK = 2; // alpha = 1/2
 24 | const int SBRT::MODE_TIMESTAMP = 3; // alpha = 1
 25 | 
 26 | 
 27 | 
 28 | SBRT::SBRT(int mode) :
 29 | 	  _mask1((mode == MODE_TIMESTAMP) ? 0 : -1)
 30 | 	, _mask2((mode == MODE_MTF) ? 0 : -1)
 31 | 	, _shift((mode == MODE_RANK) ? 1 : 0)
 32 | {
 33 |     if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP))
 34 |         throw std::invalid_argument("Invalid mode parameter");
 35 | }
 36 | 
 37 | SBRT::SBRT(int mode, Context&) :
 38 | 	  _mask1((mode == MODE_TIMESTAMP) ? 0 : -1)
 39 | 	, _mask2((mode == MODE_MTF) ? 0 : -1)
 40 | 	, _shift((mode == MODE_RANK) ? 1 : 0)
 41 | {
 42 |     if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP))
 43 |         throw std::invalid_argument("Invalid mode parameter");
 44 | }
 45 | 
 46 | bool SBRT::forward(SliceArray<byte>& input, SliceArray<byte>& output, int count)
 47 | {
 48 |     if (count == 0)
 49 |         return true;
 50 | 
 51 |     if (!SliceArray<byte>::isValid(input))
 52 |         throw std::invalid_argument("SBRT: Invalid input block");
 53 | 
 54 |     if (!SliceArray<byte>::isValid(output))
 55 |         throw std::invalid_argument("SBRT: Invalid output block");
 56 | 
 57 |     // Aliasing
 58 |     const byte* src = &input._array[input._index];
 59 |     byte* dst = &output._array[output._index];
 60 |     int p[256] = { 0 };
 61 |     int q[256] = { 0 };
 62 |     uint8 s2r[256];
 63 |     uint8 r2s[256];
 64 | 
 65 |     for (int i = 0; i < 256; i++) {
 66 |         s2r[i] = uint8(i);
 67 |         r2s[i] = uint8(i);
 68 |     }
 69 | 
 70 |     for (int i = 0; i < count; i++) {
 71 |         const uint8 c = uint8(src[i]);
 72 |         int r = int(s2r[c]);
 73 |         dst[i] = byte(r);
 74 |         const int qc = ((i & _mask1) + (p[c] & _mask2)) >> _shift;
 75 |         p[c] = i;
 76 |         q[c] = qc;
 77 | 
 78 |         // Move up symbol to correct rank
 79 |         while ((r > 0) && (q[r2s[r - 1]] <= qc)) {
 80 |             r2s[r] = r2s[r - 1];
 81 |             s2r[r2s[r]] = uint8(r);
 82 |             r--;
 83 |         }
 84 | 
 85 |         r2s[r] = c;
 86 |         s2r[c] = uint8(r);
 87 |     }
 88 | 
 89 |     input._index += count;
 90 |     output._index += count;
 91 |     return true;
 92 | }
 93 | 
 94 | bool SBRT::inverse(SliceArray<byte>& input, SliceArray<byte>& output, int count)
 95 | {
 96 |     if (count == 0)
 97 |         return true;
 98 | 
 99 |     if (!SliceArray<byte>::isValid(input))
100 |         throw std::invalid_argument("SBRT: Invalid input block");
101 | 
102 |     if (!SliceArray<byte>::isValid(output))
103 |         throw std::invalid_argument("SBRT: Invalid output block");
104 | 
105 |     // Aliasing
106 |     const byte* src = &input._array[input._index];
107 |     byte* dst = &output._array[output._index];
108 |     int p[256] = { 0 };
109 |     int q[256] = { 0 };
110 |     uint8 r2s[256];
111 | 
112 |     for (int i = 0; i < 256; i++)
113 |         r2s[i] = uint8(i);
114 | 
115 |     for (int i = 0; i < count; i++) {
116 |         int r = int(src[i]);
117 |         const int c = int(r2s[r]);
118 |         dst[i] = byte(r2s[r]);
119 |         const int qc = ((i & _mask1) + (p[c] & _mask2)) >> _shift;
120 |         p[c] = i;
121 |         q[c] = qc;
122 | 
123 |         // Move up symbol to correct rank
124 |         while ((r > 0) && (q[r2s[r - 1]] <= qc)) {
125 |             r2s[r] = r2s[r - 1];
126 |             r--;
127 |         }
128 | 
129 |         r2s[r] = uint8(c);
130 |     }
131 | 
132 |     input._index += count;
133 |     output._index += count;
134 |     return true;
135 | }
136 | 


--------------------------------------------------------------------------------
/src/transform/SBRT.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _SBRT_
18 | #define _SBRT_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | 
24 | namespace kanzi
25 | {
26 |    // Sort by Rank Transform is a family of transforms typically used after
27 |    // a BWT to reduce the variance of the data prior to entropy coding.
28 |    // SBR(alpha) is defined by sbr(x, alpha) = (1-alpha)*(t-w1(x,t)) + alpha*(t-w2(x,t))
29 |    // where x is an item in the data list, t is the current access time and wk(x,t) is
30 |    // the k-th access time to x at time t (with 0 <= alpha <= 1).
31 |    // See [Two new families of list update algorithms] by Frank Schulz for details.
32 |    // SBR(0)= Move to Front Transform
33 |    // SBR(1)= Time Stamp Transform
34 |    // This code implements SBR(0), SBR(1/2) and SBR(1). Code derived from openBWT
35 |    class SBRT FINAL : public Transform<byte>
36 |    {
37 |    public:
38 |        static const int MODE_MTF;
39 |        static const int MODE_RANK;
40 |        static const int MODE_TIMESTAMP;
41 | 
42 |        SBRT(int mode);
43 |        SBRT(int mode, Context&);
44 |        ~SBRT() {}
45 | 
46 |        bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length);
47 | 
48 |        bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length);
49 | 
50 |        int getMaxEncodedLength(int srcLen) const { return srcLen; }
51 | 
52 |    private:
53 | 
54 |        const int _mask1;
55 |        const int _mask2;
56 |        const int _shift;
57 |    };
58 | 
59 | }
60 | #endif
61 | 
62 | 


--------------------------------------------------------------------------------
/src/transform/SRT.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _SRT_
18 | #define _SRT_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | namespace kanzi {
24 | 
25 |    // Sorted Rank Transform is typically used after a BWT to reduce the variance
26 |    // of the data prior to entropy coding.
27 | 
28 |    class SRT FINAL : public Transform<byte> {
29 |    public:
30 |        SRT() {}
31 |        SRT(Context&) {}
32 |        ~SRT() {}
33 | 
34 |        bool forward(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
35 | 
36 |        bool inverse(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
37 | 
38 |        int getMaxEncodedLength(int srcLen) const { return srcLen + 1024 /* max header size */; }
39 | 
40 |    private:
41 |        static int preprocess(const uint freqs[], uint8 symbols[]);
42 | 
43 |        static int encodeHeader(const uint freqs[], byte dst[]);
44 | 
45 |        static int decodeHeader(const byte src[], uint freqs[]);
46 |    };
47 | }
48 | #endif
49 | 
50 | 


--------------------------------------------------------------------------------
/src/transform/UTFCodec.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2024 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | #pragma once
 17 | #ifndef _UTFCodec_
 18 | #define _UTFCodec_
 19 | 
 20 | #include "../Context.hpp"
 21 | #include "../Transform.hpp"
 22 | 
 23 | 
 24 | namespace kanzi
 25 | {
 26 |     typedef struct ssUTF
 27 |     {
 28 |         uint32 val;
 29 |         uint32 freq;
 30 | 
 31 |         ssUTF(uint32 v, uint32 f) : val(v), freq(f) {}
 32 | 
 33 |         friend bool operator< (ssUTF const& lhs, ssUTF const& rhs) {
 34 |             int r;
 35 |             return ((r = int(lhs.freq - rhs.freq)) != 0) ? r > 0 : lhs.val > rhs.val;
 36 |         }
 37 |     } sdUTF;
 38 | 
 39 |     
 40 |     // UTF8 encoder/decoder
 41 |     class UTFCodec FINAL : public Transform<byte> {
 42 |     public:
 43 |         UTFCodec() { _pCtx = nullptr; }
 44 | 
 45 |         UTFCodec(Context& ctx) : _pCtx(&ctx) {}
 46 | 
 47 |         ~UTFCodec() {}
 48 | 
 49 |         bool forward(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 50 | 
 51 |         bool inverse(SliceArray<byte>& source, SliceArray<byte>& destination, int length);
 52 | 
 53 |         int getMaxEncodedLength(int srcLen) const { return srcLen + 8192; }
 54 | 
 55 |     private:
 56 | 
 57 |         static const int MIN_BLOCK_SIZE;
 58 |         static const int LEN_SEQ[256];
 59 | 
 60 |         Context* _pCtx;
 61 |        
 62 |         static bool validate(const byte block[], int count);
 63 | 
 64 |         static int pack(const byte in[], uint32& out);
 65 | 
 66 |         static int unpack(uint32 in, byte out[]);
 67 |    };
 68 | 
 69 | 
 70 |     inline int UTFCodec::pack(const byte in[], uint32& out)
 71 |     {   
 72 |        int s;
 73 | 
 74 |        switch (int(in[0]) >> 4) {
 75 |        case 0:
 76 |        case 1:
 77 |        case 2:
 78 |        case 3:
 79 |        case 4:
 80 |        case 5:
 81 |        case 6:
 82 |        case 7:
 83 |            out = uint32(in[0]);
 84 |            s = 1;
 85 |            break;
 86 | 
 87 |        case 12:
 88 |        case 13:
 89 |            out = (1 << 19) | (uint32(in[0]) << 8) | uint32(in[1]);
 90 |            s = 2;
 91 |            break; 
 92 | 
 93 |        case 14:
 94 |            out = (2 << 19) | ((uint32(in[0]) & 0x0F) << 12) | ((uint32(in[1]) & 0x3F) << 6) | (uint32(in[2]) & 0x3F);
 95 |            s = 3;
 96 |            break;
 97 | 
 98 |        case 15:
 99 |            out = (4 << 19) | ((uint32(in[0]) & 0x07) << 18) | ((uint32(in[1]) & 0x3F) << 12) | ((uint32(in[2]) & 0x3F) << 6) | (uint32(in[3]) & 0x3F);
100 |            s = 4;
101 |            break;
102 | 
103 |        default:
104 |            out = 0;
105 |            s = 0; // signal invalid value
106 |            break;
107 |        }
108 | 
109 |        return s; 
110 |     }
111 | 
112 | 
113 |     inline int UTFCodec::unpack(uint32 in, byte out[]) 
114 |     { 
115 |        int s;
116 |        
117 |        switch (in >> 19) {
118 |        case 0:
119 |            out[0] = byte(in);
120 |            s = 1;
121 |            break;
122 | 
123 |        case 1:
124 |            out[0] = byte(in >> 8);
125 |            out[1] = byte(in);
126 |            s = 2;
127 |            break;
128 | 
129 |        case 2:
130 |            out[0] = byte(((in >> 12) & 0x0F) | 0xE0);
131 |            out[1] = byte(((in >> 6) & 0x3F) | 0x80);
132 |            out[2] = byte((in & 0x3F) | 0x80);
133 |            s = 3;
134 |            break;
135 | 
136 |        case 4:
137 |        case 5:
138 |        case 6:
139 |        case 7:
140 |            out[0] = byte(((in >> 18) & 0x07) | 0xF0);
141 |            out[1] = byte(((in >> 12) & 0x3F) | 0x80);
142 |            out[2] = byte(((in >> 6) & 0x3F) | 0x80);
143 |            out[3] = byte((in & 0x3F) | 0x80);
144 |            s = 4;
145 |            break;
146 | 
147 |        default:
148 |            s = 0; // signal invalid value
149 |            break;
150 |        }
151 | 
152 |        return s; 
153 |     }
154 | }
155 | #endif
156 | 
157 | 


--------------------------------------------------------------------------------
/src/transform/ZRLT.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _ZRLT_
18 | #define _ZRLT_
19 | 
20 | #include "../Context.hpp"
21 | #include "../Transform.hpp"
22 | 
23 | namespace kanzi
24 | {
25 |    // Zero Run Length Encoding is a simple encoding algorithm by Wheeler
26 |    // closely related to Run Length Encoding. The main difference is
27 |    // that only runs of 0 values are processed. Also, the length is
28 |    // encoded in a different way (each digit in a different byte)
29 |    // This algorithm is well adapted to process post BWT/MTFT data.
30 | 
31 |    class ZRLT FINAL : public Transform<byte>
32 |    {
33 |    public:
34 |        ZRLT() {}
35 |        ZRLT(Context&) {}
36 |        ~ZRLT() {}
37 | 
38 |        bool forward(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
39 | 
40 |        bool inverse(SliceArray<byte>& pSrc, SliceArray<byte>& pDst, int length);
41 | 
42 |        // Required encoding output buffer size unknown => guess
43 |        int getMaxEncodedLength(int srcLen) const { return srcLen; }
44 |    };
45 | 
46 | }
47 | #endif
48 | 
49 | 


--------------------------------------------------------------------------------
/src/util.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _util_
18 | #define _util_
19 | 
20 | 
21 | #include <iostream>
22 | #include "types.hpp"
23 | 
24 | 
25 | 
26 | // Ahem ... Visual Studio
27 | // This ostreambuf class is required because Microsoft cannot bother to implement
28 | // streambuf::pubsetbuf().
29 | template <typename T>
30 | struct ostreambuf : public std::basic_streambuf<T, std::char_traits<T> >
31 | {
32 |     ostreambuf(T* buffer, std::streamsize length) {
33 |        this->setp(buffer, &buffer[length]);
34 |     }
35 | };
36 | 
37 | template <typename T>
38 | struct istreambuf : public std::basic_streambuf<T, std::char_traits<T> >
39 | {
40 |     istreambuf(T* buffer, std::streamsize length) {
41 |        this->setg(buffer, buffer, &buffer[length]);
42 |     }
43 | };
44 | 
45 | #endif
46 | 
47 | 


--------------------------------------------------------------------------------
/src/util/Clock.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Clock_
18 | #define _Clock_
19 | 
20 | 
21 | #if __cplusplus >= 201103L || _MSC_VER >= 1700
22 | 
23 | #include <chrono>
24 | 
25 | namespace kanzi
26 | {
27 |    class Clock {
28 |    private:
29 |            std::chrono::steady_clock::time_point _start;
30 |            std::chrono::steady_clock::time_point _stop;
31 | 
32 |    public:
33 |            Clock()
34 |            {
35 |                    start();
36 |                    _stop = _start;
37 |            }
38 | 
39 |            void start()
40 |            {
41 |                    _start = std::chrono::steady_clock::now();
42 |            }
43 | 
44 |            void stop()
45 |            {
46 |                    _stop = std::chrono::steady_clock::now();
47 |            }
48 | 
49 |            double elapsed() const
50 |            {
51 |                    // In millisec
52 |                    return double(std::chrono::duration_cast<std::chrono::milliseconds>(_stop - _start).count());
53 |            }
54 |    };
55 | }
56 | #else
57 | 
58 | #include <ctime>
59 | 
60 | namespace kanzi
61 | {
62 | 
63 |    class Clock {
64 |    private:
65 |            clock_t _start;
66 |            clock_t _stop;
67 | 
68 |    public:
69 |            Clock()
70 |            {
71 |                start();
72 |                _stop = _start;
73 |            }
74 | 
75 |            void start()
76 |            {
77 |               _start = clock();
78 |            }
79 | 
80 |            void stop()
81 |            {
82 |               _stop = clock();
83 |            }
84 | 
85 |            double elapsed() const
86 |            {
87 |               // In millisec
88 |               return (_stop <= _start) ? 0.0 : double(_stop - _start) / CLOCKS_PER_SEC * 1000.0;
89 |            }
90 |    };
91 |    
92 | }
93 | #endif
94 | 
95 | #endif
96 | 
97 | 


--------------------------------------------------------------------------------
/src/util/Printer.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _Printer_
18 | #define _Printer_
19 | 
20 | 
21 | #ifdef CONCURRENCY_ENABLED
22 | #include <mutex>
23 | #endif
24 | 
25 | namespace kanzi
26 | {
27 | 
28 |    // Thread safe printer
29 |    class Printer
30 |    {
31 |       public:
32 |          Printer(std::ostream& os) { _os = &os; }
33 | 
34 |          ~Printer() {
35 |             try  {
36 |                 _os->flush();
37 |             }
38 |             catch (std::exception&) {
39 |                 // Ignore: best effort
40 |             }
41 |          }
42 | 
43 |          void print(const char* msg, bool print) {
44 |             if ((print == true) && (msg != nullptr)) {
45 |    #ifdef CONCURRENCY_ENABLED
46 |                std::lock_guard<std::mutex> lock(_mtx);
47 |    #endif
48 |                (*_os) << msg ;
49 |             }
50 |          }
51 | 
52 |          void println(const char* msg, bool print) {
53 |             if ((print == true) && (msg != nullptr)) {
54 |    #ifdef CONCURRENCY_ENABLED
55 |                std::lock_guard<std::mutex> lock(_mtx);
56 |    #endif
57 |                (*_os) << msg << std::endl;
58 |             }
59 |          }
60 | 
61 |          void print(const std::string& msg, bool print) {
62 |             if (print == true) {
63 |    #ifdef CONCURRENCY_ENABLED
64 |                std::lock_guard<std::mutex> lock(_mtx);
65 |    #endif
66 |                (*_os) << msg ;
67 |             }
68 |          }
69 | 
70 |          void println(const std::string& msg, bool print) {
71 |             if (print == true) {
72 |    #ifdef CONCURRENCY_ENABLED
73 |                std::lock_guard<std::mutex> lock(_mtx);
74 |    #endif
75 |                (*_os) << msg << std::endl;
76 |             }
77 |          }
78 | 
79 | 
80 |    private:
81 |    #ifdef CONCURRENCY_ENABLED
82 |          static std::mutex _mtx;
83 |    #endif
84 |          std::ostream* _os;
85 |    };
86 | 
87 | }
88 | #endif
89 | 
90 | 


--------------------------------------------------------------------------------
/src/util/strings.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2024 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | #pragma once
17 | #ifndef _strings_
18 | #define _strings_
19 | 
20 | #include <sstream>
21 | #include <string>
22 | #include <vector>
23 | 
24 | 
25 | 
26 | #if __cplusplus < 201103L
27 |    // to_string() not available before C++ 11
28 |    template <typename T>
29 |    std::string to_string(T value)
30 |    {
31 |        std::ostringstream os;
32 |        os << value;
33 |        return os.str();
34 |    }
35 | 
36 |    #define TOSTR(v) to_string(v)
37 | #else
38 |    #define TOSTR(v) std::to_string(v)
39 | #endif
40 | 
41 | 
42 | inline void to_binary(int num, char* buffer, int length)
43 | {
44 |     for (int i = length - 2; i >= 0; i--) {
45 |         buffer[i] = (num & 1) ? '1' : '0';
46 |         num >>= 1;
47 |     }
48 | 
49 |     buffer[length - 1] = '\0';
50 | }
51 | 
52 | // trim from end of string (right)
53 | inline std::string& rtrim(std::string& s)
54 | {
55 |     static const char* whitespaces = " \t\f\v\n\r";
56 |     std::size_t pos = s.find_last_not_of(whitespaces);
57 | 
58 |     if (pos != std::string::npos)
59 |        s.erase(pos + 1);
60 | 
61 |     return s;
62 | }
63 | 
64 | // trim from beginning of string (left)
65 | inline std::string& ltrim(std::string& s)
66 | {
67 |     static const char* whitespaces = " \t\f\v\n\r";
68 |     std::size_t pos = s.find_first_not_of(whitespaces);
69 | 
70 |     if (pos != std::string::npos)
71 |        s.erase(0, pos);
72 | 
73 |     return s;
74 | }
75 | 
76 | // trim from both ends of string (right then left)
77 | inline std::string& trim(std::string& s)
78 | {
79 |     return ltrim(rtrim(s));
80 | }
81 | 
82 | inline void tokenize(const std::string& str, std::vector<std::string>& v, char token)
83 | {
84 |    std::istringstream ss(str);
85 |    std::string s;    
86 | 
87 |    while (getline(ss, s, token)) 
88 |       v.push_back(s);   
89 | }    
90 | 
91 | #endif
92 | 
93 | 


--------------------------------------------------------------------------------