├── .github └── workflows │ ├── ant.yml │ ├── codeql.yml │ ├── maven.yaml │ └── sonar.yaml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── SECURITY.md └── java ├── .gitignore ├── MANIFEST.MF ├── README.md ├── build.xml ├── pom.xml └── src ├── main └── java │ └── io │ └── github │ └── flanglet │ └── kanzi │ ├── ArrayComparator.java │ ├── BitStreamException.java │ ├── ByteSorter.java │ ├── ByteTransform.java │ ├── EntropyDecoder.java │ ├── EntropyEncoder.java │ ├── Error.java │ ├── Event.java │ ├── Global.java │ ├── InputBitStream.java │ ├── IntSorter.java │ ├── IntTransform.java │ ├── Listener.java │ ├── Magic.java │ ├── Memory.java │ ├── OutputBitStream.java │ ├── Predictor.java │ ├── SliceByteArray.java │ ├── SliceIntArray.java │ ├── app │ ├── Benchmark.java │ ├── BlockCompressor.java │ ├── BlockDecompressor.java │ ├── InfoPrinter.java │ └── Kanzi.java │ ├── bitstream │ ├── DebugInputBitStream.java │ ├── DebugOutputBitStream.java │ ├── DefaultInputBitStream.java │ └── DefaultOutputBitStream.java │ ├── entropy │ ├── ANSRangeDecoder.java │ ├── ANSRangeEncoder.java │ ├── BinaryEntropyDecoder.java │ ├── BinaryEntropyEncoder.java │ ├── CMDecoder.java │ ├── CMEncoder.java │ ├── CMPredictor.java │ ├── EntropyCodecFactory.java │ ├── EntropyUtils.java │ ├── ExpGolombDecoder.java │ ├── ExpGolombEncoder.java │ ├── FPAQDecoder.java │ ├── FPAQEncoder.java │ ├── FastLogisticAdaptiveProbMap.java │ ├── HuffmanCommon.java │ ├── HuffmanDecoder.java │ ├── HuffmanEncoder.java │ ├── LinearAdaptiveProbMap.java │ ├── LogisticAdaptiveProbMap.java │ ├── NullEntropyDecoder.java │ ├── NullEntropyEncoder.java │ ├── RangeDecoder.java │ ├── RangeEncoder.java │ └── TPAQPredictor.java │ ├── io │ ├── CompressedInputStream.java │ ├── CompressedOutputStream.java │ ├── IOException.java │ ├── IOUtil.java │ └── NullOutputStream.java │ ├── module-info.java │ ├── transform │ ├── AliasCodec.java │ ├── BWT.java │ ├── BWTBlockCodec.java │ ├── BWTS.java │ ├── DivSufSort.java │ ├── EXECodec.java │ ├── FSDCodec.java │ ├── LZCodec.java │ ├── NullTransform.java │ ├── RLT.java │ ├── ROLZCodec.java │ ├── SA_IS.java │ ├── SBRT.java │ ├── SRT.java │ ├── Sequence.java │ ├── TextCodec.java │ ├── TransformFactory.java │ ├── UTFCodec.java │ └── ZRLT.java │ └── util │ ├── LyndonWords.java │ ├── hash │ ├── XXHash32.java │ └── XXHash64.java │ └── sort │ ├── BucketSort.java │ ├── DefaultArrayComparator.java │ ├── HeapSort.java │ ├── InsertionSort.java │ ├── MergeSort.java │ ├── QuickSort.java │ └── RadixSort.java └── test └── java └── io └── github └── flanglet └── kanzi └── test ├── TestBWT.java ├── TestCompressedStream.java ├── TestDefaultBitStream.java ├── TestEntropyCodec.java └── TestTransforms.java /.github/workflows/ant.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Ant 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-ant 3 | 4 | name: Java CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v5 19 | - name: Set up JDK 17 20 | uses: actions/setup-java@v5 21 | with: 22 | java-version: '17' 23 | distribution: 'adopt' 24 | - name: Build with Ant 25 | run: cd java && ant -noinput -buildfile build.xml 26 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | schedule: 9 | - cron: "35 16 * * 2" 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ java ] 24 | 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v5 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v3 31 | with: 32 | languages: ${{ matrix.language }} 33 | queries: +security-and-quality 34 | 35 | - name: Build with Ant 36 | run: cd java && ant -noinput -buildfile build.xml 37 | 38 | - name: Perform CodeQL Analysis 39 | uses: github/codeql-action/analyze@v3 40 | with: 41 | category: "/language:${{ matrix.language }}" 42 | -------------------------------------------------------------------------------- /.github/workflows/maven.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven 2 | 3 | name: Java CI 4 | 5 | on: 6 | push: 7 | branches: [ master ] 8 | pull_request: 9 | branches: [ master ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v5 18 | - name: Set up JDK 17 19 | uses: actions/setup-java@v5 20 | with: 21 | java-version: '17' 22 | distribution: 'adopt' 23 | - name: Build with Maven 24 | run: cd java && mvn 25 | -------------------------------------------------------------------------------- /.github/workflows/sonar.yaml: -------------------------------------------------------------------------------- 1 | name: SonarCloud 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | types: [opened, synchronize, reopened] 9 | 10 | jobs: 11 | build: 12 | name: Build and analyze 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v5 16 | with: 17 | fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis 18 | - name: Set up JDK 17 19 | uses: actions/setup-java@v5 20 | with: 21 | java-version: 17 22 | distribution: 'zulu' # Alternative distribution options are available. 23 | - name: Cache SonarCloud packages 24 | uses: actions/cache@v3 25 | with: 26 | path: ~/.sonar/cache 27 | key: ${{ runner.os }}-sonar 28 | restore-keys: ${{ runner.os }}-sonar 29 | - name: Cache Maven packages 30 | uses: actions/cache@v3 31 | with: 32 | path: ~/.m2 33 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} 34 | restore-keys: ${{ runner.os }}-m2 35 | - name: Build and analyze 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any 38 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} 39 | run: cd java && mvn -Dmaven.test.skip=true -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=flanglet_kanzi 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.zip 3 | *.xml 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | install: cd java && mvn compile 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Kanzi 3 | 4 | Kanzi is a modern, modular, portable, and efficient lossless data compressor written in Java. 5 | 6 | * Modern: Kanzi implements state-of-the-art compression algorithms and is built to fully utilize multi-core CPUs via built-in multi-threading. 7 | * Modular: Entropy codecs and data transforms can be selected and combined at runtime to best suit the specific data being compressed. 8 | * Expandable: A clean, interface-driven design—with no external dependencies—makes Kanzi easy to integrate, extend, and customize. 9 | * Efficient: Carefully optimized to balance compression ratio and speed for practical, high-performance usage. 10 | 11 | Unlike most mainstream lossless compressors, Kanzi is not limited to a single compression paradigm. By combining multiple algorithms and techniques, it supports a broader range of compression ratios and adapts better to diverse data types. 12 | 13 | Most traditional compressors underutilize modern hardware by running single-threaded—even on machines with many cores. Kanzi, in contrast, is concurrent by design, compressing multiple blocks in parallel across threads for significant performance gains. However, it is not compatible with standard compression formats. 14 | 15 | It’s important to note that Kanzi is a data compressor, not an archiver. It includes optional checksums for verifying data integrity, but does not provide features like cross-file deduplication or data recovery mechanisms. That said, it produces a seekable bitstream—meaning one or more consecutive blocks can be decompressed independently, without needing to process the entire stream. 16 | 17 | 18 | For more details, check [Wiki](https://github.com/flanglet/kanzi/wiki), [QA](https://github.com/flanglet/kanzi/wiki/Q&A) and [DeepWiki](https://deepwiki.com/flanglet/kanzi) 19 | 20 | See how to reuse the code here: https://github.com/flanglet/kanzi/wiki/Using-and-extending-the-code 21 | 22 | There is a C++ implementation available here: https://github.com/flanglet/kanzi-cpp 23 | 24 | There is Go implementation available here: https://github.com/flanglet/kanzi-go 25 | 26 | 27 | ![Build Status](https://github.com/flanglet/kanzi/actions/workflows/ant.yml/badge.svg) 28 | [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=flanglet_kanzi&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=flanglet_kanzi) 29 | 30 | Coverity Scan Build Status 32 | 33 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) 34 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/flanglet/kanzi) 35 | 36 | 37 | ## Why Kanzi 38 | 39 | 40 | There are already many excellent, open-source lossless data compressors available. 41 | 42 | If gzip is beginning to show its age, modern alternatives like **zstd** and **brotli** offer compelling replacements. Both are open-source, standardized, and used daily by millions. **Zstd** is especially notable for its exceptional speed and is often the best choice in general-purpose compression. 43 | 44 | However, there are scenarios where **Kanzi** may offer superior performance: 45 | 46 | While gzip, LZMA, brotli, and zstd are all based on LZ (Lempel-Ziv) compression, they are inherently limited in the compression ratios they can achieve. **Kanzi** goes further by incorporating **BWT (Burrows-Wheeler Transform)** and **CM (Context Modeling)**, which can outperform traditional LZ-based methods in certain cases. 47 | 48 | LZ-based compressors are ideal for software distribution, where data is compressed once and decompressed many times, thanks to their fast decompression speeds—though they tend to be slower when compressing at higher ratios. But in other scenarios—such as real-time data generation, one-off data transfers, or backups—**compression speed becomes critical**. Here, Kanzi can shine. 49 | 50 | **Kanzi** also features a suite of built-in, customizable data transforms tailored for specific data types (e.g., multimedia, UTF, text, DNA, etc.), which can be selectively applied during compression for better efficiency. 51 | 52 | Furthermore, Kanzi is designed to **leverage modern multi-core CPUs** to boost performance. 53 | 54 | Finally, **extensibility** is a key strength: implementing new transforms or entropy codecs—whether for experimentation or to improve performance on niche data types—is straightforward and developer-friendly. 55 | 56 | ## Benchmarks 57 | 58 | Test machine: 59 | 60 | Test machine: 61 | 62 | Apple M3 24 GB Sonoma 14.6.1 63 | 64 | Kanzi version 2.4.0 Java implementation 65 | 66 | JDK 23.0.1+11-39 67 | 68 | On this machine, Kanzi uses 4 threads (half of CPUs by default). 69 | 70 | bzip3 runs with 4 threads. 71 | 72 | zstd and lz4 use 4 threads for compression and 1 for decompression, other compressors are single threaded. 73 | 74 | The default block size at level 9 is 32MB, severely limiting the number of threads 75 | in use, especially with enwik8, but all tests are performed with default values. 76 | 77 | 78 | ### silesia.tar 79 | 80 | Download at http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip 81 | 82 | | Compressor | Encoding (ms) | Decoding (ms) | Size | 83 | |---------------------------------|-----------------|-----------------|------------------| 84 | |Original | | | 211,957,760 | 85 | |s2 -cpu 4 | 179 | 294 | 86,892,891 | 86 | |**Kanzi -l 1** | **839** | **263** | 80,245,856 | 87 | |lz4 1.1.10 -T4 -4 | 527 | 121 | 79,919,901 | 88 | |zstd 1.5.8 -T4 -2 | 147 | 150 | 69,410,383 | 89 | |**Kanzi -l 2** | **701** | **437** | 68,860,099 | 90 | |brotli 1.1.0 -2 | 907 | 402 | 68,039,159 | 91 | |Apple gzip 430.140.2 -9 | 10406 | 273 | 67,648,481 | 92 | |**Kanzi -l 3** | **1258** | **503** | 64,266,936 | 93 | |zstd 1.5.8 -T4 -5 | 300 | 154 | 62,851,716 | 94 | |**Kanzi -l 4** | **1718** | **912** | 61,131,554 | 95 | |zstd 1.5.8 -T4 -9 | 752 | 137 | 59,190,090 | 96 | |brotli 1.1.0 -6 | 3596 | 340 | 58,557,128 | 97 | |zstd 1.5.8 -T4 -13 | 4537 | 138 | 57,814,719 | 98 | |brotli 1.1.0 -9 | 19809 | 329 | 56,414,012 | 99 | |bzip2 1.0.8 -9 | 9673 | 3140 | 54,602,583 | 100 | |**Kanzi -l 5** | **3431** | **1759** | 54,025,588 | 101 | |zstd 1.5.8 -T4 -19 | 20482 | 151 | 52,858,610 | 102 | |**kanzi -l 6** | **4687** | **3710** | 49,521,392 | 103 | |xz 5.8.1 -9 | 48516 | 1594 | 48,774,000 | 104 | |bzip3 1.5.1.r3-g428f422 -j 4 | 8559 | 3948 | 47,256,794 | 105 | |**Kanzi -l 7** | **5248** | **3689** | 47,312,772 | 106 | |**Kanzi -l 8** | **16856** | **18060** | 43,260,254 | 107 | |**Kanzi -l 9** | **24852** | **27886** | 41,858,030 | 108 | 109 | 110 | 111 | ### enwik8 112 | 113 | Download at https://mattmahoney.net/dc/enwik8.zip 114 | 115 | | Compressor | Encoding (ms) | Decoding (ms) | Size | 116 | |--------------|---------------|---------------|--------------| 117 | |Original | | | 100,000,000 | 118 | |Kanzi -l 1 | 559 | 139 | 43,644,013 | 119 | |Kanzi -l 2 | 498 | 227 | 37,570,404 | 120 | |Kanzi -l 3 | 798 | 439 | 32,466,232 | 121 | |Kanzi -l 4 | 1060 | 662 | 29,536,517 | 122 | |Kanzi -l 5 | 1422 | 790 | 26 523 940 | 123 | |Kanzi -l 6 | 1965 | 1175 | 24,076,765 | 124 | |Kanzi -l 7 | 2606 | 1787 | 22,817,360 | 125 | |Kanzi -l 8 | 7377 | 7251 | 21,181,992 | 126 | |Kanzi -l 9 | 10031 | 11412 | 20,035,144 | 127 | 128 | 129 | ## Build 130 | 131 | First option (ant): 132 | 133 | ```ant``` 134 | 135 | Second option (maven): 136 | 137 | ```mvn -Dmaven.test.skip=true``` 138 | 139 | 140 | Credits 141 | 142 | Matt Mahoney, 143 | Yann Collet, 144 | Jan Ondrus, 145 | Yuta Mori, 146 | Ilya Muravyov, 147 | Neal Burns, 148 | Fabian Giesen, 149 | Jarek Duda, 150 | Ilya Grebnov 151 | 152 | Disclaimer 153 | 154 | Use at your own risk. Always keep a copy of your original files. 155 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Security updates are applied only to the latest release. 4 | 5 | ## Vulnerability Definition 6 | 7 | A security vulnerability is a bug that, given a certain input, triggers a crash or an infinite loop. Compression and decompression failures do not belong in this category. 8 | 9 | ## Reporting a Vulnerability 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** If you have discovered a security vulnerability in this project, report it privately. 12 | 13 | Please disclose it at [security advisory](https://github.com/flanglet/kanzi/security/advisories/new). 14 | 15 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 16 | 17 | * Operating system 18 | * Hardware: CPU, memory 19 | * Kanzi version 20 | * Command line invoked 21 | * Error reported/crash data/log output 22 | 23 | If possible provide a minimal reproducer. 24 | -------------------------------------------------------------------------------- /java/.gitignore: -------------------------------------------------------------------------------- 1 | target/ -------------------------------------------------------------------------------- /java/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | manifest-Version: 1.0 2 | Built-By: flanglet 3 | Main-Class: io.github.flanglet.kanzi.app.Kanzi 4 | 5 | -------------------------------------------------------------------------------- /java/README.md: -------------------------------------------------------------------------------- 1 | Build Kanzi 2 | =========== 3 | 4 | Run 'ant' or 'ant build_compress' to generate a JAR file with compression classes only. 5 | 6 | Run 'ant build_lib' to generate a JAR file with all classes in tree excluding tests. 7 | 8 | Run 'ant build_all' to generate a JAR file with all classes in tree including tests. 9 | 10 | For maven, type 'mvn clean install -DskipTests' 11 | 12 | The generated jar file is under 'target'. 13 | 14 | -------------------------------------------------------------------------------- /java/build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /java/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | 6 | org.sonatype.oss 7 | oss-parent 8 | 9 9 | 10 | 11 | io.github.flanglet 12 | kanzi 13 | 2.4.0 14 | 15 | Kanzi Compression 16 | Fast losssless data compressor in Java 17 | https://github.com/flanglet/kanzi 18 | 2011 19 | 20 | 21 | 22 | Apache License 2.0 23 | http://www.apache.org/licenses/LICENSE-2.0 24 | 25 | 26 | 27 | 28 | https://github.com/flanglet/kanzi 29 | scm:git:https://github.com/flanglet/kanzi 30 | scm:git:git@github.com:flanglet/kanzi 31 | 2.4.0 32 | 33 | 34 | 35 | 36 | ossrh 37 | Nexus Release Repository 38 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ 39 | 40 | 41 | ossrh 42 | Sonatype Nexus Snapshots 43 | https://s01.oss.sonatype.org/content/repositories/snapshots/ 44 | false 45 | 46 | 47 | 48 | 49 | 50 | flanglet 51 | Frederic Langlet 52 | 53 | 54 | pschichtel 55 | Phillip Schichtel 56 | phillip@schich.tel 57 | 58 | Contributor 59 | 60 | 61 | 62 | 63 | 64 | UTF-8 65 | 11 66 | release 67 | flanglet 68 | https://sonarcloud.io 69 | 70 | 71 | 72 | 73 | junit 74 | junit 75 | [4.13.1,) 76 | jar 77 | test 78 | 79 | 80 | 81 | 82 | clean install 83 | 84 | 85 | org.apache.maven.plugins 86 | maven-source-plugin 87 | 3.2.0 88 | 89 | 90 | attach-sources 91 | 92 | jar-no-fork 93 | 94 | 95 | 96 | 97 | 98 | org.apache.maven.plugins 99 | maven-javadoc-plugin 100 | 3.10.1 101 | 102 | false 103 | false 104 | true 105 | none 106 | 107 | 108 | 109 | attach-javadocs 110 | 111 | jar 112 | 113 | 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-release-plugin 119 | 2.5.3 120 | 121 | true 122 | false 123 | ${releaseProfile} 124 | deploy 125 | -P${releaseProfile} 126 | 127 | 128 | 129 | org.apache.maven.plugins 130 | maven-deploy-plugin 131 | 2.8.2 132 | 133 | 134 | org.apache.maven.plugins 135 | maven-jar-plugin 136 | 3.3.0 137 | 138 | 139 | MANIFEST.MF 140 | 141 | 142 | 143 | 144 | org.apache.maven.plugins 145 | maven-compiler-plugin 146 | 3.6.1 147 | 148 | ${project.build.sourceEncoding} 149 | ${jdkVersion} 150 | true 151 | true 152 | true 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | release 161 | 162 | 163 | 164 | org.sonatype.plugins 165 | nexus-staging-maven-plugin 166 | 1.6.8 167 | true 168 | 169 | ossrh 170 | https://s01.oss.sonatype.org/ 171 | true 172 | 173 | 174 | 175 | org.apache.maven.plugins 176 | maven-source-plugin 177 | 3.2.0 178 | 179 | 180 | attach-sources 181 | 182 | jar-no-fork 183 | 184 | 185 | 186 | 187 | 188 | org.apache.maven.plugins 189 | maven-javadoc-plugin 190 | 3.10.1 191 | 192 | 193 | attach-javadocs 194 | 195 | jar 196 | 197 | 198 | 199 | 200 | 201 | org.apache.maven.plugins 202 | maven-gpg-plugin 203 | 1.6 204 | 205 | 206 | sign-artifacts 207 | verify 208 | 209 | sign 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/ArrayComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This interface defines a method for comparing sub-arrays within an array. 20 | */ 21 | public interface ArrayComparator { 22 | 23 | /** 24 | * Compares two sub-arrays starting at the specified indices. 25 | * 26 | * @param lidx the starting index of the left sub-array 27 | * @param ridx the starting index of the right sub-array 28 | * @return a negative integer, zero, or a positive integer as the 29 | * left sub-array is less than, equal to, or greater than 30 | * the right sub-array 31 | */ 32 | public int compare(int lidx, int ridx); 33 | } 34 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/BitStreamException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This class represents exceptions specific to bit stream operations. 20 | * It provides different error codes to identify various error conditions. 21 | */ 22 | public class BitStreamException extends RuntimeException { 23 | 24 | private static final long serialVersionUID = 7279737120722476336L; 25 | 26 | /** 27 | * Error code for undefined errors. 28 | */ 29 | public static final int UNDEFINED = 0; 30 | 31 | /** 32 | * Error code for input/output errors. 33 | */ 34 | public static final int INPUT_OUTPUT = 1; 35 | 36 | /** 37 | * Error code for end-of-stream errors. 38 | */ 39 | public static final int END_OF_STREAM = 2; 40 | 41 | /** 42 | * Error code for invalid stream errors. 43 | */ 44 | public static final int INVALID_STREAM = 3; 45 | 46 | /** 47 | * Error code for stream closed errors. 48 | */ 49 | public static final int STREAM_CLOSED = 4; 50 | 51 | private final int code; 52 | 53 | /** 54 | * Constructs a {@code BitStreamException} with an undefined error code. 55 | */ 56 | protected BitStreamException() { 57 | this.code = UNDEFINED; 58 | } 59 | 60 | /** 61 | * Constructs a {@code BitStreamException} with the specified detail message 62 | * and error code. 63 | * 64 | * @param message the detail message 65 | * @param code the error code 66 | */ 67 | public BitStreamException(String message, int code) { 68 | super(message); 69 | this.code = code; 70 | } 71 | 72 | /** 73 | * Constructs a {@code BitStreamException} with the specified detail message, 74 | * cause, and error code. 75 | * 76 | * @param message the detail message 77 | * @param cause the cause 78 | * @param code the error code 79 | */ 80 | public BitStreamException(String message, Throwable cause, int code) { 81 | super(message, cause); 82 | this.code = code; 83 | } 84 | 85 | /** 86 | * Constructs a {@code BitStreamException} with the specified cause 87 | * and error code. 88 | * 89 | * @param cause the cause 90 | * @param code the error code 91 | */ 92 | public BitStreamException(Throwable cause, int code) { 93 | super(cause); 94 | this.code = code; 95 | } 96 | 97 | /** 98 | * Returns the error code of this exception. 99 | * 100 | * @return the error code 101 | */ 102 | public int getErrorCode() { 103 | return this.code; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/ByteSorter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This interface defines a method for sorting a sub-array of bytes. 20 | */ 21 | public interface ByteSorter { 22 | 23 | /** 24 | * Sorts a sub-array of bytes. 25 | * 26 | * @param array the array containing the sub-array to be sorted 27 | * @param idx the starting index of the sub-array 28 | * @param len the length of the sub-array 29 | * @return {@code true} if the sub-array was successfully sorted, 30 | * {@code false} otherwise 31 | */ 32 | public boolean sort(byte[] array, int idx, int len); 33 | } 34 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/ByteTransform.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi; 16 | 17 | /** 18 | * This interface defines methods for transforming byte arrays in forward and 19 | * inverse directions, and for obtaining the maximum encoded length. 20 | */ 21 | public interface ByteTransform { 22 | 23 | /** 24 | * Processes the source array and writes the transformed data to the 25 | * destination array in the forward direction. 26 | * Read src.length bytes from src.array[src.index], process them and 27 | * write them to dst.array[dst.index]. The index of each slice is updated 28 | * with the number of bytes respectively read from and written to. 29 | * 30 | * @param src the source {@code SliceByteArray} containing the data to be processed 31 | * @param dst the destination {@code SliceByteArray} where the processed data will be written 32 | * @return {@code true} if the transformation was successful, {@code false} otherwise 33 | */ 34 | public boolean forward(SliceByteArray src, SliceByteArray dst); 35 | 36 | /** 37 | * Processes the source array and writes the transformed data to the 38 | * destination array in the inverse direction. 39 | * Read src.length bytes from src.array[src.index], process them and 40 | * write them to dst.array[dst.index]. The index of each slice is updated 41 | * with the number of bytes respectively read from and written to. 42 | * 43 | * @param src the source {@code SliceByteArray} containing the data to be processed 44 | * @param dst the destination {@code SliceByteArray} where the processed data will be written 45 | * @return {@code true} if the transformation was successful, {@code false} otherwise 46 | */ 47 | public boolean inverse(SliceByteArray src, SliceByteArray dst); 48 | 49 | /** 50 | * Returns the maximum size required for the output buffer given the 51 | * length of the source data. 52 | * 53 | * @param srcLength the length of the source data 54 | * @return the maximum size required for the output buffer 55 | */ 56 | public int getMaxEncodedLength(int srcLength); 57 | } 58 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/EntropyDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi; 16 | 17 | /** 18 | * This interface defines methods for decoding data from a bitstream. 19 | */ 20 | public interface EntropyDecoder { 21 | 22 | /** 23 | * Decodes the next chunk of data from the bitstream and returns it 24 | * in the provided buffer. 25 | * 26 | * @param buffer the buffer to store the decoded data 27 | * @param blkptr the starting index in the buffer 28 | * @param len the length of data to decode 29 | * @return the number of bytes decoded 30 | */ 31 | public int decode(byte[] buffer, int blkptr, int len); 32 | 33 | /** 34 | * Releases any resources associated with this entropy decoder. 35 | * This method should be called before disposing of the entropy decoder. 36 | */ 37 | public void dispose(); 38 | 39 | /** 40 | * Returns the underlying bitstream associated with this entropy decoder. 41 | * 42 | * @return the underlying {@code InputBitStream} 43 | */ 44 | public InputBitStream getBitStream(); 45 | } 46 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/EntropyEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi; 16 | 17 | /** 18 | * This interface defines methods for encoding data into a bitstream. 19 | */ 20 | public interface EntropyEncoder { 21 | 22 | /** 23 | * Encodes the provided array into the bitstream and returns the 24 | * number of bytes written to the bitstream. 25 | * 26 | * @param array the array containing the data to be encoded 27 | * @param blkptr the starting index in the array 28 | * @param len the length of data to encode 29 | * @return the number of bytes written to the bitstream 30 | */ 31 | public int encode(byte[] array, int blkptr, int len); 32 | 33 | /** 34 | * Returns the underlying bitstream associated with this entropy encoder. 35 | * 36 | * @return the underlying {@code OutputBitStream} 37 | */ 38 | public OutputBitStream getBitStream(); 39 | 40 | /** 41 | * Releases any resources associated with this entropy encoder. 42 | * This method should be called before disposing of the entropy encoder. 43 | * Trying to encode after a call to dispose gives undefined behavior. 44 | */ 45 | public void dispose(); 46 | } 47 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/Error.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This final class defines constants for various error codes used 20 | * throughout the application. 21 | */ 22 | public final class Error { 23 | 24 | /** 25 | * Missing paraneter 26 | */ 27 | public static final int ERR_MISSING_PARAM = 1; 28 | 29 | /** 30 | * Invalid block size 31 | */ 32 | public static final int ERR_BLOCK_SIZE = 2; 33 | 34 | /** 35 | * Invalid entropy coded 36 | */ 37 | public static final int ERR_INVALID_CODEC = 3; 38 | 39 | /** 40 | * Failure to create a compressor 41 | */ 42 | public static final int ERR_CREATE_COMPRESSOR = 4; 43 | 44 | /** 45 | * Failure to create a decompressor 46 | */ 47 | public static final int ERR_CREATE_DECOMPRESSOR = 5; 48 | 49 | /** 50 | * The output should is a folder 51 | */ 52 | public static final int ERR_OUTPUT_IS_DIR = 6; 53 | 54 | /** 55 | * Failure to ovwerwrite a file 56 | */ 57 | public static final int ERR_OVERWRITE_FILE = 7; 58 | 59 | /** 60 | * Failure to create a file 61 | */ 62 | public static final int ERR_CREATE_FILE = 8; 63 | 64 | /** 65 | * Failure to create a bit stream 66 | */ 67 | public static final int ERR_CREATE_BITSTREAM = 9; 68 | 69 | /** 70 | * Failure to open a file 71 | */ 72 | public static final int ERR_OPEN_FILE = 10; 73 | 74 | /** 75 | * Failure to read a file 76 | */ 77 | public static final int ERR_READ_FILE = 11; 78 | 79 | /** 80 | * Failure to write a file 81 | */ 82 | public static final int ERR_WRITE_FILE = 12; 83 | 84 | /** 85 | * Failure to process a block of data 86 | */ 87 | public static final int ERR_PROCESS_BLOCK = 13; 88 | 89 | /** 90 | * Failure to create an entropy coded 91 | */ 92 | public static final int ERR_CREATE_CODEC = 14; 93 | 94 | /** 95 | * Invalid file 96 | */ 97 | public static final int ERR_INVALID_FILE = 15; 98 | 99 | /** 100 | * Invalid or unsupported bit stream version 101 | */ 102 | public static final int ERR_STREAM_VERSION = 16; 103 | 104 | /** 105 | * Failure to create a stream 106 | */ 107 | public static final int ERR_CREATE_STREAM = 17; 108 | 109 | /** 110 | * Invalid parameter 111 | */ 112 | public static final int ERR_INVALID_PARAM = 18; 113 | 114 | /** 115 | * Checksum failure 116 | */ 117 | public static final int ERR_CRC_CHECK = 19; 118 | 119 | /** 120 | * Unknown error 121 | */ 122 | public static final int ERR_UNKNOWN = 127; 123 | 124 | /** 125 | * Private constructor to prevent instantiation. 126 | */ 127 | private Error() { 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/Event.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This class represents events that occur during compression and 20 | * decompression processes. Each event includes attributes such as type, 21 | * size, hash, and timestamp. 22 | */ 23 | public class Event { 24 | 25 | /** 26 | * Enum representing the types of events that can occur. 27 | */ 28 | public enum Type { 29 | /** 30 | * Beginning of compression 31 | */ 32 | COMPRESSION_START, 33 | 34 | /** 35 | * Beginning of decompression 36 | */ 37 | DECOMPRESSION_START, 38 | 39 | /** 40 | * Beginning of transform 41 | */ 42 | BEFORE_TRANSFORM, 43 | 44 | /** 45 | * End of transform 46 | */ 47 | AFTER_TRANSFORM, 48 | 49 | /** 50 | * Beginning of entropy 51 | */ 52 | BEFORE_ENTROPY, 53 | 54 | /** 55 | * End of entropy 56 | */ 57 | AFTER_ENTROPY, 58 | 59 | /** 60 | * End of compression 61 | */ 62 | COMPRESSION_END, 63 | 64 | /** 65 | * End of dcompression 66 | */ 67 | DECOMPRESSION_END, 68 | 69 | /** 70 | * End of header decoding 71 | */ 72 | AFTER_HEADER_DECODING, 73 | 74 | /** 75 | * Block informartion 76 | */ 77 | BLOCK_INFO 78 | } 79 | 80 | /** 81 | * Enum representing the types of hash used in the events. 82 | */ 83 | public enum HashType { 84 | /** 85 | * No hash 86 | */ 87 | NO_HASH, 88 | 89 | /** 90 | * 32 bit hash 91 | */ 92 | 93 | SIZE_32, 94 | /** 95 | * 64 bit hash 96 | */ 97 | SIZE_64 98 | } 99 | 100 | private final int id; 101 | private final long size; 102 | private final long hash; 103 | private final Type type; 104 | private final HashType hashType; 105 | private final long time; 106 | private final String msg; 107 | 108 | /** 109 | * Constructs an Event with the specified type, id, and size, with no hash. 110 | * 111 | * @param type the type of event 112 | * @param id the event id 113 | * @param size the size of the event 114 | */ 115 | public Event(Type type, int id, long size) { 116 | this(type, id, size, 0, HashType.NO_HASH); 117 | } 118 | 119 | /** 120 | * Constructs an Event with the specified type, id, and message. 121 | * 122 | * @param type the type of event 123 | * @param id the event id 124 | * @param msg the event message 125 | */ 126 | public Event(Type type, int id, String msg) { 127 | this(type, id, msg, 0); 128 | } 129 | 130 | /** 131 | * Constructs an Event with the specified type, id, message, and time. 132 | * 133 | * @param type the type of event 134 | * @param id the event id 135 | * @param msg the event message 136 | * @param time the event timestamp 137 | */ 138 | public Event(Type type, int id, String msg, long time) { 139 | this.id = id; 140 | this.size = 0L; 141 | this.hash = 0; 142 | this.hashType = HashType.NO_HASH; 143 | this.type = type; 144 | this.time = (time > 0) ? time : System.nanoTime(); 145 | this.msg = msg; 146 | } 147 | 148 | /** 149 | * Constructs an Event with the specified type, id, size, hash, and hash type. 150 | * 151 | * @param type the type of event 152 | * @param id the event id 153 | * @param size the size of the event 154 | * @param hash the hash of the event 155 | * @param hashType the type of hash used 156 | */ 157 | public Event(Type type, int id, long size, long hash, HashType hashType) { 158 | this(type, id, size, hash, hashType, 0); 159 | } 160 | 161 | /** 162 | * Constructs an Event with the specified type, id, size, hash, hash type, and time. 163 | * 164 | * @param type the type of event 165 | * @param id the event id 166 | * @param size the size of the event 167 | * @param hash the hash of the event 168 | * @param hashType the type of hash used 169 | * @param time the event timestamp 170 | */ 171 | public Event(Type type, int id, long size, long hash, HashType hashType, long time) { 172 | this.id = id; 173 | this.size = size; 174 | this.hash = hash; 175 | this.hashType = hashType; 176 | this.type = type; 177 | this.time = (time > 0) ? time : System.nanoTime(); 178 | this.msg = null; 179 | } 180 | 181 | /** 182 | * Returns the event id. 183 | * 184 | * @return the event id 185 | */ 186 | public int getId() { 187 | return this.id; 188 | } 189 | 190 | /** 191 | * Returns the size of the event. 192 | * 193 | * @return the event size 194 | */ 195 | public long getSize() { 196 | return this.size; 197 | } 198 | 199 | /** 200 | * Returns the timestamp of the event. 201 | * 202 | * @return the event timestamp 203 | */ 204 | public long getTime() { 205 | return this.time; 206 | } 207 | 208 | /** 209 | * Returns the hash of the event, or 0 if no hash is used. 210 | * 211 | * @return the event hash 212 | */ 213 | public long getHash() { 214 | return (this.hashType == HashType.NO_HASH) ? 0 : this.hash; 215 | } 216 | 217 | /** 218 | * Returns the type of hash used in the event. 219 | * 220 | * @return the event hash type 221 | */ 222 | public HashType getHashType() { 223 | return this.hashType; 224 | } 225 | 226 | /** 227 | * Returns the type of the event. 228 | * 229 | * @return the event type 230 | */ 231 | public Type getType() { 232 | return this.type; 233 | } 234 | 235 | /** 236 | * Returns a string representation of the event. 237 | * 238 | * @return a string representation of the event 239 | */ 240 | @Override 241 | public String toString() { 242 | if (this.msg != null) { 243 | return this.msg; 244 | } 245 | StringBuilder sb = new StringBuilder(200); 246 | sb.append("{ \"type\":\"").append(this.getType()).append("\""); 247 | if (this.id >= 0) { 248 | sb.append(", \"id\":").append(this.getId()); 249 | } 250 | sb.append(", \"size\":").append(this.getSize()); 251 | sb.append(", \"time\":").append(this.getTime()); 252 | if (this.hashType == HashType.SIZE_32) { 253 | sb.append(", \"hash\":\"").append(Integer.toHexString((int) this.getHash())).append("\""); 254 | } else if (this.hashType == HashType.SIZE_64) { 255 | sb.append(", \"hash\":\"").append(Long.toHexString(this.getHash())).append("\""); 256 | } 257 | sb.append(" }"); 258 | return sb.toString(); 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/InputBitStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code InputBitStream} interface defines methods for reading bits 20 | * from a bit stream. 21 | */ 22 | public interface InputBitStream { 23 | 24 | /** 25 | * Reads a single bit from the bitstream. 26 | * 27 | * @return the bit read (0 or 1) 28 | * @throws BitStreamException if an error occurs or the stream is closed 29 | */ 30 | public int readBit() throws BitStreamException; 31 | 32 | /** 33 | * Reads a specified number of bits from the bitstream and returns them as a long. 34 | * 35 | * @param length the number of bits to read (between 1 and 64) 36 | * @return the bits read as a long 37 | * @throws BitStreamException if an error occurs or the stream is closed 38 | */ 39 | public long readBits(int length) throws BitStreamException; 40 | 41 | /** 42 | * Reads bits from the bitstream and stores them in the specified byte array. 43 | * 44 | * @param bits the byte array to store the read bits 45 | * @param start the starting index in the array 46 | * @param length the number of bits to read 47 | * @return the number of bits read 48 | * @throws BitStreamException if an error occurs or the stream is closed 49 | */ 50 | public int readBits(byte[] bits, int start, int length) throws BitStreamException; 51 | 52 | /** 53 | * Closes the bitstream and releases any associated resources. 54 | * 55 | * @throws BitStreamException if an error occurs while closing the stream 56 | */ 57 | public void close() throws BitStreamException; 58 | 59 | /** 60 | * Returns the total number of bits read from the bitstream. 61 | * 62 | * @return the total number of bits read 63 | */ 64 | public long read(); 65 | 66 | /** 67 | * Checks if there are more bits to read in the bitstream. 68 | * 69 | * @return {@code false} if the bitstream is closed or the end of the stream 70 | * has been reached, {@code true} otherwise 71 | */ 72 | public boolean hasMoreToRead(); 73 | } 74 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/IntSorter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This interface defines a method for sorting a sub-array of integers. 20 | */ 21 | public interface IntSorter { 22 | 23 | /** 24 | * Sorts a sub-array of integers. 25 | * 26 | * @param array the array containing the sub-array to be sorted 27 | * @param idx the starting index of the sub-array 28 | * @param len the length of the sub-array 29 | * @return {@code true} if the sub-array was successfully sorted, 30 | * {@code false} otherwise 31 | */ 32 | public boolean sort(int[] array, int idx, int len); 33 | } 34 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/IntTransform.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * This interface defines methods for transforming integer arrays in forward and 20 | * inverse directions, and for obtaining the maximum encoded length. 21 | */ 22 | public interface IntTransform { 23 | 24 | /** 25 | * Processes the source array and writes the transformed data to the 26 | * destination array in the forward direction. 27 | * 28 | * @param src the source {@code SliceIntArray} containing the data to be processed 29 | * @param dst the destination {@code SliceIntArray} where the processed data will be written 30 | * @return {@code true} if the transformation was successful, {@code false} otherwise 31 | */ 32 | public boolean forward(SliceIntArray src, SliceIntArray dst); 33 | 34 | /** 35 | * Processes the source array and writes the transformed data to the 36 | * destination array in the inverse direction. 37 | * 38 | * @param src the source {@code SliceIntArray} containing the data to be processed 39 | * @param dst the destination {@code SliceIntArray} where the processed data will be written 40 | * @return {@code true} if the transformation was successful, {@code false} otherwise 41 | */ 42 | public boolean inverse(SliceIntArray src, SliceIntArray dst); 43 | 44 | /** 45 | * Returns the maximum size required for the output buffer given the 46 | * length of the source data. 47 | * 48 | * @param srcLength the length of the source data 49 | * @return the maximum size required for the output buffer 50 | */ 51 | public int getMaxEncodedLength(int srcLength); 52 | } 53 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/Listener.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code Listener} interface defines a contract for objects that need to 20 | * handle and process events. 21 | *

22 | * Classes that implement this interface are expected to provide an implementation 23 | * of the {@code processEvent} method, which will be invoked when an event occurs. 24 | *

25 | * 26 | */ 27 | public interface Listener { 28 | 29 | /** 30 | * Processes the given event. 31 | *

32 | * This method will be called whenever an event occurs that the listener is 33 | * interested in. Implementations of this method should define how to handle 34 | * the event. 35 | *

36 | * 37 | * @param evt The event to be processed. Cannot be {@code null}. 38 | */ 39 | public void processEvent(Event evt); 40 | } 41 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/OutputBitStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code OutputBitStream} interface defines methods for writing bits 20 | * to a bit stream. 21 | */ 22 | public interface OutputBitStream { 23 | 24 | /** 25 | * Writes the least significant bit of the input integer to the bit stream. 26 | * 27 | * @param bit the bit to write (0 or 1) 28 | * @throws BitStreamException if the stream is closed or if an error occurs 29 | */ 30 | public void writeBit(int bit) throws BitStreamException; 31 | 32 | /** 33 | * Writes a specified number of bits from the input long value to the bit stream. 34 | * 35 | * @param bits the long value containing the bits to write 36 | * @param length the number of bits to write (must be between 1 and 64) 37 | * @return the number of bits written 38 | * @throws BitStreamException if the stream is closed or if an error occurs 39 | */ 40 | public int writeBits(long bits, int length) throws BitStreamException; 41 | 42 | /** 43 | * Writes bits from a byte array to the bit stream starting at the specified index. 44 | * 45 | * @param bits the byte array containing the bits to write 46 | * @param start the starting index in the byte array 47 | * @param nbBits the number of bits to write 48 | * @return the number of bits written 49 | * @throws BitStreamException if the stream is closed or if an error occurs 50 | */ 51 | public int writeBits(byte[] bits, int start, int nbBits) throws BitStreamException; 52 | 53 | /** 54 | * Closes the bit stream and releases any resources associated with it. 55 | * 56 | * @throws BitStreamException if an error occurs while closing the stream 57 | */ 58 | public void close() throws BitStreamException; 59 | 60 | /** 61 | * Returns the total number of bits that have been written to the stream. 62 | * 63 | * @return the number of bits written 64 | */ 65 | public long written(); 66 | } 67 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/Predictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code Predictor} interface is used by a binary entropy coder to 20 | * predict the probabilities of 0 and 1 symbols in the input signal. 21 | * 22 | *

Implementations of this interface should maintain a probability model 23 | * that can be updated based on input bits and can provide a split value 24 | * representing the predicted probability of the next bit being 1.

25 | */ 26 | public interface Predictor { 27 | 28 | /** 29 | * Updates the probability model based on the provided bit. 30 | * 31 | * @param bit the bit to update the model with (0 or 1) 32 | */ 33 | public void update(int bit); 34 | 35 | /** 36 | * Returns a split value representing the probability of the next bit being 1. 37 | * The returned value is in the range of [0..4095], where a value of 38 | * 410 roughly corresponds to a probability of 10% for the next bit being 1. 39 | * 40 | * @return the split value representing the probability of 1 41 | */ 42 | public int get(); 43 | } 44 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/SliceByteArray.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | import java.util.Objects; 19 | 20 | 21 | /** 22 | * A lightweight implementation of a byte array slice. 23 | * 24 | *

This class provides a way to manage a portion of a byte array, allowing 25 | * for the representation of a subset of the array with a specified length and 26 | * starting index. This can be useful for handling byte data efficiently without 27 | * creating multiple copies.

28 | */ 29 | public final class SliceByteArray { 30 | public byte[] array; // array.length is the slice capacity 31 | public int length; 32 | public int index; 33 | 34 | /** 35 | * Constructs an empty {@code SliceByteArray} with a zero-length array. 36 | */ 37 | public SliceByteArray() { 38 | this(new byte[0], 0, 0); 39 | } 40 | 41 | /** 42 | * Constructs a {@code SliceByteArray} with the specified array and index. 43 | * 44 | * @param array the byte array 45 | * @param idx the starting index of the slice 46 | * @throws NullPointerException if the provided array is null 47 | * @throws NullPointerException if the provided index is negative 48 | */ 49 | public SliceByteArray(byte[] array, int idx) { 50 | if (array == null) 51 | throw new NullPointerException("The array cannot be null"); 52 | if (idx < 0) 53 | throw new NullPointerException("The index cannot be negative"); 54 | 55 | this.array = array; 56 | this.length = array.length; 57 | this.index = idx; 58 | } 59 | 60 | /** 61 | * Constructs a {@code SliceByteArray} with the specified array, length, and index. 62 | * 63 | * @param array the byte array 64 | * @param length the length of the slice 65 | * @param idx the starting index of the slice 66 | * @throws NullPointerException if the provided array is null 67 | * @throws IllegalArgumentException if the provided length is negative 68 | * @throws NullPointerException if the provided index is negative 69 | */ 70 | public SliceByteArray(byte[] array, int length, int idx) { 71 | if (array == null) 72 | throw new NullPointerException("The array cannot be null"); 73 | if (length < 0) 74 | throw new IllegalArgumentException("The length cannot be negative"); 75 | if (idx < 0) 76 | throw new NullPointerException("The index cannot be negative"); 77 | 78 | this.array = array; 79 | this.length = length; 80 | this.index = idx; 81 | } 82 | 83 | @Override 84 | public boolean equals(Object o) { 85 | try { 86 | if (o == null) 87 | return false; 88 | if (this == o) 89 | return true; 90 | 91 | SliceByteArray sa = (SliceByteArray) o; 92 | return (this.array == sa.array) && 93 | (this.length == sa.length) && 94 | (this.index == sa.index); 95 | } catch (ClassCastException e) { 96 | return false; 97 | } 98 | } 99 | 100 | @Override 101 | public int hashCode() { 102 | return Objects.hashCode(this.array); 103 | } 104 | 105 | @Override 106 | @SuppressWarnings("lgtm [java/print-array]") 107 | public String toString() { 108 | StringBuilder builder = new StringBuilder(100); 109 | builder.append("[ data="); 110 | builder.append(String.valueOf(this.array)); 111 | builder.append(", len="); 112 | builder.append(this.length); 113 | builder.append(", idx="); 114 | builder.append(this.index); 115 | builder.append("]"); 116 | return builder.toString(); 117 | } 118 | 119 | /** 120 | * Validates the provided {@code SliceByteArray} instance. 121 | * 122 | * @param sa the {@code SliceByteArray} to validate 123 | * @return {@code true} if the instance is valid, {@code false} otherwise 124 | */ 125 | public static boolean isValid(SliceByteArray sa) { 126 | if (sa == null) 127 | return false; 128 | if (sa.array == null) 129 | return false; 130 | if (sa.index < 0) 131 | return false; 132 | if (sa.length < 0) 133 | return false; 134 | 135 | return (sa.index <= sa.array.length); 136 | } 137 | } 138 | 139 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/SliceIntArray.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | import java.util.Objects; 19 | 20 | /** 21 | * A lightweight implementation of a slice for an integer array. 22 | * 23 | *

This class allows for managing a portion of an integer array, providing 24 | * a means to represent a subset of the array with a specified length and 25 | * starting index. This can be useful for efficiently handling integer data 26 | * without creating multiple copies.

27 | */ 28 | public final class SliceIntArray { 29 | public int[] array; // array.length is the slice capacity 30 | public int index; 31 | public int length; 32 | 33 | /** 34 | * Constructs an empty {@code SliceIntArray} with a zero-length array. 35 | */ 36 | public SliceIntArray() { 37 | this(new int[0], 0, 0); 38 | } 39 | 40 | /** 41 | * Constructs a {@code SliceIntArray} with the specified array and index. 42 | * 43 | * @param array the integer array 44 | * @param idx the starting index of the slice 45 | * @throws NullPointerException if the provided array is null 46 | * @throws NullPointerException if the provided index is negative 47 | */ 48 | public SliceIntArray(int[] array, int idx) { 49 | if (array == null) 50 | throw new NullPointerException("The array cannot be null"); 51 | if (idx < 0) 52 | throw new NullPointerException("The index cannot be negative"); 53 | 54 | this.array = array; 55 | this.length = array.length; 56 | this.index = idx; 57 | } 58 | 59 | /** 60 | * Constructs a {@code SliceIntArray} with the specified array, length, and index. 61 | * 62 | * @param array the integer array 63 | * @param length the length of the slice 64 | * @param idx the starting index of the slice 65 | * @throws NullPointerException if the provided array is null 66 | * @throws IllegalArgumentException if the provided length is negative 67 | * @throws NullPointerException if the provided index is negative 68 | */ 69 | public SliceIntArray(int[] array, int length, int idx) { 70 | if (array == null) 71 | throw new NullPointerException("The array cannot be null"); 72 | if (length < 0) 73 | throw new IllegalArgumentException("The length cannot be negative"); 74 | if (idx < 0) 75 | throw new NullPointerException("The index cannot be negative"); 76 | 77 | this.array = array; 78 | this.length = length; 79 | this.index = idx; 80 | } 81 | 82 | @Override 83 | public boolean equals(Object o) { 84 | try { 85 | if (o == null) 86 | return false; 87 | if (this == o) 88 | return true; 89 | 90 | SliceIntArray sa = (SliceIntArray) o; 91 | return (this.array == sa.array) && 92 | (this.length == sa.length) && 93 | (this.index == sa.index); 94 | } catch (ClassCastException e) { 95 | return false; 96 | } 97 | } 98 | 99 | @Override 100 | public int hashCode() { 101 | return Objects.hashCode(this.array); 102 | } 103 | 104 | @Override 105 | @SuppressWarnings("lgtm [java/print-array]") 106 | public String toString() { 107 | StringBuilder builder = new StringBuilder(100); 108 | builder.append("[ data="); 109 | builder.append(String.valueOf(this.array)); 110 | builder.append(", len="); 111 | builder.append(this.length); 112 | builder.append(", idx="); 113 | builder.append(this.index); 114 | builder.append("]"); 115 | return builder.toString(); 116 | } 117 | 118 | /** 119 | * Validates the provided {@code SliceIntArray} instance. 120 | * 121 | * @param sa the {@code SliceIntArray} to validate 122 | * @return {@code true} if the instance is valid, {@code false} otherwise 123 | */ 124 | public static boolean isValid(SliceIntArray sa) { 125 | if (sa == null) 126 | return false; 127 | if (sa.array == null) 128 | return false; 129 | if (sa.index < 0) 130 | return false; 131 | if (sa.length < 0) 132 | return false; 133 | 134 | return (sa.index <= sa.array.length); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/app/InfoPrinter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.app; 17 | 18 | import io.github.flanglet.kanzi.Event; 19 | import java.io.PrintStream; 20 | import java.util.Map; 21 | import java.util.concurrent.ConcurrentHashMap; 22 | import io.github.flanglet.kanzi.Listener; 23 | 24 | /** 25 | * The {@code InfoPrinter} class implements the {@code Listener} interface 26 | * and provides functionality to process events and print information 27 | * about encoding or decoding processes. 28 | */ 29 | public class InfoPrinter implements Listener 30 | { 31 | /** 32 | * Enum representing the type of information to be printed. 33 | */ 34 | public enum Type 35 | { 36 | /** Represents encoding information. */ 37 | ENCODING, 38 | /** Represents decoding information. */ 39 | DECODING 40 | } 41 | 42 | private final PrintStream ps; 43 | private final Map map; 44 | private final Event.Type[] thresholds; 45 | private final Type type; 46 | private final int level; 47 | 48 | 49 | /** 50 | * Constructs an {@code InfoPrinter} with the specified information level, 51 | * type, and output stream. 52 | * 53 | * @param infoLevel the level of information to be printed 54 | * @param type the type of information (encoding or decoding) 55 | * @param ps the {@code PrintStream} to which information will be printed 56 | */ 57 | public InfoPrinter(int infoLevel, Type type, PrintStream ps) 58 | { 59 | if (ps == null) 60 | throw new NullPointerException("Invalid null print stream parameter"); 61 | 62 | this.ps = ps; 63 | this.level = infoLevel; 64 | this.type = type; 65 | this.map = new ConcurrentHashMap<>(); 66 | this.thresholds = (type == Type.ENCODING) ? 67 | new Event.Type[] 68 | { 69 | Event.Type.COMPRESSION_START, 70 | Event.Type.BEFORE_TRANSFORM, 71 | Event.Type.AFTER_TRANSFORM, 72 | Event.Type.BEFORE_ENTROPY, 73 | Event.Type.AFTER_ENTROPY, 74 | Event.Type.COMPRESSION_END 75 | } : 76 | new Event.Type[] 77 | { 78 | Event.Type.DECOMPRESSION_START, 79 | Event.Type.BEFORE_ENTROPY, 80 | Event.Type.AFTER_ENTROPY, 81 | Event.Type.BEFORE_TRANSFORM, 82 | Event.Type.AFTER_TRANSFORM, 83 | Event.Type.DECOMPRESSION_END 84 | }; 85 | } 86 | 87 | 88 | /** 89 | * Processes an event and takes action based on the event type 90 | * and the thresholds defined for the {@code InfoPrinter}. 91 | * 92 | * @param evt the {@code Event} to be processed 93 | */ 94 | @Override 95 | public void processEvent(Event evt) 96 | { 97 | int currentBlockId = evt.getId(); 98 | 99 | if (evt.getType() == this.thresholds[1]) 100 | { 101 | // Register initial block size 102 | BlockInfo bi = new BlockInfo(); 103 | bi.time0 = evt.getTime(); 104 | bi.stage0Size = evt.getSize(); 105 | 106 | this.map.put(currentBlockId, bi); 107 | 108 | if (this.level >= 5) 109 | { 110 | this.ps.println(evt); 111 | } 112 | } 113 | else if (evt.getType() == this.thresholds[2]) 114 | { 115 | BlockInfo bi = this.map.get(currentBlockId); 116 | 117 | if (bi == null) 118 | return; 119 | 120 | bi.time1 = evt.getTime(); 121 | 122 | if (this.level >= 5) 123 | { 124 | long duration_ms = (bi.time1 - bi.time0) / 1000000L; 125 | this.ps.println(String.format("%s [%d ms]", evt, duration_ms)); 126 | } 127 | } 128 | else if (evt.getType() == this.thresholds[3]) 129 | { 130 | BlockInfo bi = this.map.get(currentBlockId); 131 | 132 | if (bi == null) 133 | return; 134 | 135 | bi.time2 = evt.getTime(); 136 | bi.stage1Size = evt.getSize(); 137 | 138 | if (this.level >= 5) 139 | { 140 | long duration_ms = (bi.time2 - bi.time1) / 1000000L; 141 | this.ps.println(String.format("%s [%d ms]", evt, duration_ms)); 142 | } 143 | } 144 | else if (evt.getType() == this.thresholds[4]) 145 | { 146 | long stage2Size = evt.getSize(); 147 | BlockInfo bi = this.map.remove(currentBlockId); 148 | 149 | if ((bi == null) || (this.level < 3)) 150 | return; 151 | 152 | bi.time3 = evt.getTime(); 153 | long duration1_ms = (bi.time1 - bi.time0) / 1000000L; 154 | long duration2_ms = (bi.time3 - bi.time2) / 1000000L; 155 | StringBuilder msg = new StringBuilder(); 156 | 157 | if (this.level >= 5) 158 | { 159 | this.ps.println(String.format("%s [%d ms]", evt, duration2_ms)); 160 | } 161 | 162 | // Display block info 163 | if (this.level >= 4) 164 | { 165 | msg.append(String.format("Block %d: %d => %d [%d ms] => %d [%d ms]", currentBlockId, 166 | bi.stage0Size, bi.stage1Size, duration1_ms, stage2Size, duration2_ms)); 167 | 168 | // Add compression ratio for encoding 169 | if ((this.type == Type.ENCODING) && (bi.stage0Size != 0)) 170 | msg.append(String.format(" (%d%%)", (stage2Size*100L/bi.stage0Size))); 171 | 172 | // Optionally add hash 173 | if (evt.getHashType() == Event.HashType.SIZE_32) 174 | msg.append(String.format(" [%s]", Integer.toHexString((int) evt.getHash()))); 175 | else if (evt.getHashType() == Event.HashType.SIZE_64) 176 | msg.append(String.format(" [%s]", Long.toHexString(evt.getHash()))); 177 | 178 | this.ps.println(msg.toString()); 179 | } 180 | } 181 | else if ((evt.getType() == Event.Type.AFTER_HEADER_DECODING) && (this.level >= 3)) 182 | { 183 | this.ps.println(evt); 184 | } 185 | else if (this.level >= 5) 186 | { 187 | this.ps.println(evt); 188 | } 189 | } 190 | 191 | 192 | /** 193 | * Inner class representing information about a specific block. 194 | */ 195 | static class BlockInfo 196 | { 197 | long time0; 198 | long time1; 199 | long time2; 200 | long time3; 201 | long stage0Size; 202 | long stage1Size; 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/CMPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import java.util.Arrays; 19 | import java.util.Map; 20 | import io.github.flanglet.kanzi.Predictor; 21 | 22 | /** 23 | *

24 | * Implementation of a Context Model based predictor. 25 | * This predictor estimates the probability of the next bit being 1 based on 26 | * a combination of different contexts and adaptive learning rates. 27 | *

28 | * 29 | *

30 | * It uses multiple probability counters that are updated based on the 31 | * actual decoded bit, allowing it to adapt to the characteristics of the 32 | * input data. 33 | *

34 | */ 35 | public class CMPredictor implements Predictor { 36 | /** 37 | * The rate at which the fastest probability counter adapts. 38 | */ 39 | private static final int FAST_RATE = 2; 40 | /** 41 | * The rate at which the medium probability counter adapts. 42 | */ 43 | private static final int MEDIUM_RATE = 4; 44 | /** 45 | * The rate at which the slowest probability counter adapts. 46 | */ 47 | private static final int SLOW_RATE = 6; 48 | /** 49 | * The scaling factor for probabilities, representing the maximum possible 50 | * probability value. 51 | */ 52 | private static final int PSCALE = 65536; 53 | 54 | /** 55 | * The first context variable, derived from the previous bit. 56 | */ 57 | private int c1; 58 | /** 59 | * The second context variable, derived from the bit before the previous one. 60 | */ 61 | private int c2; 62 | /** 63 | * The current context, formed by previous bits. 64 | */ 65 | private int ctx; 66 | /** 67 | * An index used for accessing probability counters. 68 | */ 69 | private int idx; 70 | /** 71 | * A mask used to differentiate between run contexts. 72 | */ 73 | private int runMask; 74 | /** 75 | * A 2D array of probability counters, used for general context modeling. 76 | * `counter1[i][j]` stores the probability for context `i` and sub-context `j`. 77 | */ 78 | private final int[][] counter1; 79 | /** 80 | * A 2D array of probability counters, used for more specific context modeling. 81 | * `counter2[i][j]` stores the probability for context `i` and sub-context `j`. 82 | */ 83 | private final int[][] counter2; 84 | /** 85 | * A flag indicating if the bitstream version is 3 or older, which affects 86 | * probability calculation. 87 | */ 88 | private final boolean isBsVersion3; 89 | 90 | /** 91 | * Creates a new {@code CMPredictor}. 92 | *

93 | * The predictor is initialized with default probability values and can be 94 | * configured with a context map to handle different bitstream versions. 95 | *

96 | * 97 | * @param ctx A map containing context information for the predictor, 98 | * e.g., "bsVersion" to specify the bitstream version. 99 | */ 100 | public CMPredictor(Map ctx) { 101 | this.ctx = 1; 102 | this.idx = 0; 103 | this.counter1 = new int[256][257]; 104 | this.counter2 = new int[512][17]; 105 | 106 | int bsVersion = 4; 107 | 108 | if (ctx != null) 109 | bsVersion = (Integer) ctx.getOrDefault("bsVersion", 4); 110 | 111 | this.isBsVersion3 = bsVersion < 4; 112 | 113 | for (int i = 0; i < 256; i++) { 114 | Arrays.fill(this.counter1[i], PSCALE >> 1); 115 | 116 | for (int j = 0; j < 16; j++) { 117 | this.counter2[i + i][j] = j << 12; 118 | this.counter2[i + i + 1][j] = j << 12; 119 | } 120 | 121 | this.counter2[i + i][16] = (this.isBsVersion3 == true) ? 15 << 12 : 65535; 122 | this.counter2[i + i + 1][16] = (this.isBsVersion3 == true) ? 15 << 12 : 65535; 123 | } 124 | } 125 | 126 | /** 127 | * Updates the probability model based on the actual decoded bit. 128 | *

129 | * The internal counters are adjusted based on the provided bit and adaptive 130 | * learning rates. 131 | * The context is also updated for the next prediction. 132 | *

133 | * 134 | * @param bit The actual bit that was decoded (0 or 1). 135 | */ 136 | @Override 137 | public void update(int bit) { 138 | final int[] counter1_ = this.counter1[this.ctx]; 139 | final int[] counter2_ = this.counter2[this.ctx | this.runMask]; 140 | 141 | if (bit == 0) { 142 | counter1_[256] -= (counter1_[256] >> FAST_RATE); 143 | counter1_[this.c1] -= (counter1_[this.c1] >> MEDIUM_RATE); 144 | counter2_[this.idx] -= (counter2_[this.idx] >> SLOW_RATE); 145 | counter2_[this.idx + 1] -= (counter2_[this.idx + 1] >> SLOW_RATE); 146 | this.ctx += this.ctx; 147 | } else { 148 | counter1_[256] -= ((counter1_[256] - PSCALE + 16) >> FAST_RATE); 149 | counter1_[this.c1] -= ((counter1_[this.c1] - PSCALE + 16) >> MEDIUM_RATE); 150 | counter2_[this.idx] -= ((counter2_[this.idx] - PSCALE + 16) >> SLOW_RATE); 151 | counter2_[this.idx + 1] -= ((counter2_[this.idx + 1] - PSCALE + 16) >> SLOW_RATE); 152 | this.ctx += (this.ctx + 1); 153 | } 154 | 155 | if (this.ctx > 255) { 156 | this.c2 = this.c1; 157 | this.c1 = this.ctx & 0xFF; 158 | this.ctx = 1; 159 | this.runMask = (this.c1 == this.c2) ? 0x100 : 0; 160 | } 161 | } 162 | 163 | /** 164 | * Returns the predicted probability of the next bit being 1. 165 | *

166 | * The prediction is an integer value in the range [0, 4095], representing the 167 | * split point 168 | * in a range coding scheme. 169 | *

170 | * 171 | * @return The predicted probability of the next bit being 1, scaled to [0, 172 | * 4095]. 173 | */ 174 | @Override 175 | public int get() { 176 | final int[] pc1 = this.counter1[this.ctx]; 177 | final int p = (13 * (pc1[256] + pc1[this.c1]) + 6 * pc1[this.c2]) >> 5; 178 | this.idx = p >>> 12; 179 | final int[] pc2 = this.counter2[this.ctx | this.runMask]; 180 | final int x1 = pc2[this.idx]; 181 | final int x2 = pc2[this.idx + 1]; 182 | 183 | if (this.isBsVersion3 == true) { 184 | final int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12); 185 | return (p + 3 * ssep + 32) >>> 6; // rescale to [0..4095] 186 | } 187 | 188 | return (p + p + 3 * (x1 + x2) + 64) >>> 7; // rescale to [0..4095] 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyDecoder; 19 | import io.github.flanglet.kanzi.InputBitStream; 20 | 21 | // Exponential Golomb Coder 22 | public final class ExpGolombDecoder implements EntropyDecoder { 23 | private final boolean signed; 24 | private final InputBitStream bitstream; 25 | 26 | public ExpGolombDecoder(InputBitStream bitstream, boolean signed) { 27 | if (bitstream == null) 28 | throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter"); 29 | 30 | this.signed = signed; 31 | this.bitstream = bitstream; 32 | } 33 | 34 | public boolean isSigned() { 35 | return this.signed; 36 | } 37 | 38 | public byte decodeByte() { 39 | if (this.bitstream.readBit() == 1) 40 | return 0; 41 | 42 | int log2 = 1; 43 | 44 | while (this.bitstream.readBit() == 0) 45 | log2++; 46 | 47 | if (this.signed == true) { 48 | // Decode signed: read value + sign 49 | long res = this.bitstream.readBits(log2 + 1); 50 | final long sgn = res & 1; 51 | res = (res >>> 1) + (1 << log2) - 1; 52 | return (byte) ((res - sgn) ^ -sgn); // res or -res 53 | } 54 | 55 | // Decode unsigned 56 | return (byte) ((1 << log2) - 1 + this.bitstream.readBits(log2)); 57 | } 58 | 59 | @Override 60 | public InputBitStream getBitStream() { 61 | return this.bitstream; 62 | } 63 | 64 | @Override 65 | /** 66 | * Decodes a block of data by reading it directly from the bitstream. 67 | *

68 | * This method reads {@code count} bytes from the bitstream into the provided 69 | * {@code block} array. 70 | *

71 | * 72 | * @param block The byte array to decode into. 73 | * @param blkptr The starting position in the block. 74 | * @param count The number of bytes to decode. 75 | * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid 76 | * parameters). 77 | */ 78 | public int decode(byte[] block, int blkptr, int count) { 79 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 80 | return -1; 81 | 82 | final int end = blkptr + count; 83 | 84 | for (int i = blkptr; i < end; i++) 85 | block[i] = this.decodeByte(); 86 | 87 | return count; 88 | } 89 | 90 | @Override 91 | public void dispose() { 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyEncoder; 19 | import io.github.flanglet.kanzi.OutputBitStream; 20 | 21 | /** 22 | *

23 | * Implementation of an Exponential Golomb encoder. 24 | *

25 | * This encoder supports both signed and unsigned encoding of byte values. 26 | * It uses a pre-computed cache for faster encoding of common values. 27 | */ 28 | public final class ExpGolombEncoder implements EntropyEncoder { 29 | private static final int[][] CACHE_VALUES = new int[][] { 30 | // Unsigned 31 | new int[] { 32 | 513, 1538, 1539, 2564, 2565, 2566, 2567, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 4624, 33 | 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 5664, 34 | 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680, 35 | 5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 6720, 36 | 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736, 37 | 6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752, 38 | 6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768, 39 | 6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 7808, 40 | 7809, 7808, 6783, 6782, 6781, 6780, 6779, 6778, 6777, 6776, 6775, 6774, 6773, 6772, 6771, 6770, 41 | 6769, 6768, 6767, 6766, 6765, 6764, 6763, 6762, 6761, 6760, 6759, 6758, 6757, 6756, 6755, 6754, 42 | 6753, 6752, 6751, 6750, 6749, 6748, 6747, 6746, 6745, 6744, 6743, 6742, 6741, 6740, 6739, 6738, 43 | 6737, 6736, 6735, 6734, 6733, 6732, 6731, 6730, 6729, 6728, 6727, 6726, 6725, 6724, 6723, 6722, 44 | 6721, 6720, 5695, 5694, 5693, 5692, 5691, 5690, 5689, 5688, 5687, 5686, 5685, 5684, 5683, 5682, 45 | 5681, 5680, 5679, 5678, 5677, 5676, 5675, 5674, 5673, 5672, 5671, 5670, 5669, 5668, 5667, 5666, 46 | 5665, 5664, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626, 47 | 4625, 4624, 3599, 3598, 3597, 3596, 3595, 3594, 3593, 3592, 2567, 2566, 2565, 2564, 1539, 1538 48 | }, 49 | // Signed 50 | new int[] { 51 | 513, 2052, 2054, 3080, 3082, 3084, 3086, 4112, 4114, 4116, 4118, 4120, 4122, 4124, 4126, 5152, 52 | 5154, 5156, 5158, 5160, 5162, 5164, 5166, 5168, 5170, 5172, 5174, 5176, 5178, 5180, 5182, 6208, 53 | 6210, 6212, 6214, 6216, 6218, 6220, 6222, 6224, 6226, 6228, 6230, 6232, 6234, 6236, 6238, 6240, 54 | 6242, 6244, 6246, 6248, 6250, 6252, 6254, 6256, 6258, 6260, 6262, 6264, 6266, 6268, 6270, 7296, 55 | 7298, 7300, 7302, 7304, 7306, 7308, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326, 7328, 56 | 7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350, 7352, 7354, 7356, 7358, 7360, 57 | 7362, 7364, 7366, 7368, 7370, 7372, 7374, 7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392, 58 | 7394, 7396, 7398, 7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422, 8448, 59 | 8451, 8449, 7423, 7421, 7419, 7417, 7415, 7413, 7411, 7409, 7407, 7405, 7403, 7401, 7399, 7397, 60 | 7395, 7393, 7391, 7389, 7387, 7385, 7383, 7381, 7379, 7377, 7375, 7373, 7371, 7369, 7367, 7365, 61 | 7363, 7361, 7359, 7357, 7355, 7353, 7351, 7349, 7347, 7345, 7343, 7341, 7339, 7337, 7335, 7333, 62 | 7331, 7329, 7327, 7325, 7323, 7321, 7319, 7317, 7315, 7313, 7311, 7309, 7307, 7305, 7303, 7301, 63 | 7299, 7297, 6271, 6269, 6267, 6265, 6263, 6261, 6259, 6257, 6255, 6253, 6251, 6249, 6247, 6245, 64 | 6243, 6241, 6239, 6237, 6235, 6233, 6231, 6229, 6227, 6225, 6223, 6221, 6219, 6217, 6215, 6213, 65 | 6211, 6209, 5183, 5181, 5179, 5177, 5175, 5173, 5171, 5169, 5167, 5165, 5163, 5161, 5159, 5157, 66 | 5155, 5153, 4127, 4125, 4123, 4121, 4119, 4117, 4115, 4113, 3087, 3085, 3083, 3081, 2055, 2053 67 | } 68 | }; 69 | 70 | private final int[] cache; 71 | private final int signed; 72 | private final OutputBitStream bitstream; 73 | 74 | /** 75 | * Creates a new {@code ExpGolombEncoder}. 76 | * 77 | * @param bitstream The {@link OutputBitStream} to write the encoded data to. 78 | * @param signed If {@code true}, the encoder will encode signed values; 79 | * otherwise, unsigned. 80 | * @throws NullPointerException if {@code bitstream} is {@code null}. 81 | */ 82 | public ExpGolombEncoder(OutputBitStream bitstream, boolean signed) { 83 | if (bitstream == null) 84 | throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter"); 85 | 86 | this.signed = (signed == true) ? 1 : 0; 87 | // The cache stores pre-computed values for faster encoding. 88 | // CACHE_VALUES[0] is for unsigned encoding. 89 | // CACHE_VALUES[1] is for signed encoding. 90 | // Each value in the cache is a packed integer: 91 | // - The lower 9 bits (emit & 0x1FF) represent the value to write. 92 | // - The upper bits (emit >>> 9) represent the number of bits to write. 93 | this.cache = CACHE_VALUES[this.signed]; 94 | this.bitstream = bitstream; 95 | } 96 | 97 | public boolean isSigned() { 98 | return this.signed == 1; 99 | } 100 | 101 | /** 102 | * Encodes a block of data. 103 | * 104 | * @param block The byte array containing the data to encode. 105 | * @param blkptr The starting position in the block. 106 | * @param count The number of bytes to encode. 107 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid 108 | * parameters). 109 | */ 110 | @Override 111 | public int encode(byte[] block, int blkptr, int count) { 112 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 113 | return -1; 114 | 115 | final int end = blkptr + count; 116 | 117 | for (int i = blkptr; i < end; i++) 118 | this.encodeByte(block[i]); 119 | 120 | return count; 121 | } 122 | 123 | public void encodeByte(byte val) { 124 | if (val == 0) { 125 | // shortcut when input is 0 126 | this.bitstream.writeBit(1); 127 | return; 128 | } 129 | 130 | final int emit = this.cache[val & 0xFF]; 131 | this.bitstream.writeBits(emit & 0x1FF, emit >>> 9); 132 | } 133 | 134 | @Override 135 | public OutputBitStream getBitStream() { 136 | return this.bitstream; 137 | } 138 | 139 | @Override 140 | public void dispose() { 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/FPAQEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import java.util.Arrays; 19 | import io.github.flanglet.kanzi.EntropyEncoder; 20 | import io.github.flanglet.kanzi.Memory; 21 | import io.github.flanglet.kanzi.OutputBitStream; 22 | import io.github.flanglet.kanzi.SliceByteArray; 23 | 24 | /** 25 | *

26 | * Implementation of an FPAQ encoder. This class is derived from fpaq0r by 27 | * Matt Mahoney and Alexander Ratushnyak, and is a simple (and fast) adaptive 28 | * entropy bit coder. 29 | *

30 | * 31 | *

32 | * It uses a range coding approach where the current range is updated based on 33 | * the predicted probability of the next bit. The prediction is based on a 34 | * context formed by previous bits. 35 | *

36 | * 37 | *

38 | * The encoding process involves updating the range and normalizing it by 39 | * writing 40 | * bits to an {@link OutputBitStream} when the range becomes too small. 41 | *

42 | * 43 | * @see fpaq0 by Matt Mahoney 44 | */ 45 | public class FPAQEncoder implements EntropyEncoder { 46 | /** 47 | * The top value for the range, used in range coding. 48 | * This value defines the maximum possible range. 49 | */ 50 | private static final long TOP = 0x00FFFFFFFFFFFFFFL; 51 | /** 52 | * A mask used to check if the most significant bits of the low and 53 | * (low + range) values are the same, indicating that bits can be 54 | * shifted out. 55 | */ 56 | private static final long MASK_24_56 = 0x00FFFFFFFF000000L; 57 | /** 58 | * A mask used to keep the lower 24 bits of a long. 59 | */ 60 | private static final long MASK_0_24 = 0x0000000000FFFFFFL; 61 | /** 62 | * A mask used to keep the lower 32 bits of a long. 63 | */ 64 | private static final long MASK_0_32 = 0x00000000FFFFFFFFL; 65 | /** 66 | * The default chunk size for processing data. 67 | */ 68 | private static final int DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; 69 | /** 70 | * The maximum allowed block size. 71 | */ 72 | private static final int MAX_BLOCK_SIZE = 1 << 30; 73 | /** 74 | * The scaling factor for probabilities. 75 | */ 76 | private static final int PSCALE = 65536; 77 | 78 | /** 79 | * The lower bound of the current range. 80 | */ 81 | private long low; 82 | /** 83 | * The upper bound of the current range. 84 | */ 85 | private long high; 86 | /** 87 | * The output bitstream to which compressed data is written. 88 | */ 89 | private final OutputBitStream bitstream; 90 | private boolean disposed; 91 | private SliceByteArray sba; 92 | private final int[][] probs; // probability of bit=1 93 | private int[] p; // pointer to current prob 94 | 95 | /** 96 | * Creates a new {@code FPAQEncoder}. 97 | * 98 | * @param bitstream The {@link OutputBitStream} to write compressed data to. 99 | * @throws NullPointerException if {@code bitstream} is {@code null}. 100 | */ 101 | public FPAQEncoder(OutputBitStream bitstream) { 102 | if (bitstream == null) 103 | throw new NullPointerException("FPAQ codec: Invalid null bitstream parameter"); 104 | 105 | this.low = 0L; 106 | this.high = TOP; 107 | this.bitstream = bitstream; 108 | this.sba = new SliceByteArray(new byte[0], 0); 109 | this.probs = new int[4][256]; 110 | this.p = this.probs[0]; 111 | 112 | for (int i = 0; i < 4; i++) 113 | Arrays.fill(this.probs[i], PSCALE >> 1); 114 | } 115 | 116 | /** 117 | * Encodes a block of data. 118 | *

119 | * This method reads data from the provided byte array, encodes it using the 120 | * FPAQ model, and writes the compressed data to the internal bitstream. 121 | *

122 | * 123 | * @param block The byte array containing the data to encode. 124 | * @param blkptr The starting position in the block. 125 | * @param count The number of bytes to encode. 126 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid 127 | * parameters). 128 | */ 129 | @Override 130 | public int encode(byte[] block, int blkptr, int count) { 131 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0) || (count > MAX_BLOCK_SIZE)) 132 | return -1; 133 | 134 | if (count == 0) 135 | return 0; 136 | 137 | int startChunk = blkptr; 138 | final int end = blkptr + count; 139 | 140 | // Split block into chunks, encode chunk and write bit array to bitstream 141 | while (startChunk < end) { 142 | final int chunkSize = Math.min(DEFAULT_CHUNK_SIZE, end - startChunk); 143 | 144 | if (this.sba.array.length < (chunkSize + (chunkSize >> 3))) 145 | this.sba.array = new byte[chunkSize + (chunkSize >> 3)]; 146 | 147 | this.sba.index = 0; 148 | final int endChunk = startChunk + chunkSize; 149 | this.p = this.probs[0]; 150 | 151 | for (int i = startChunk; i < endChunk; i++) { 152 | final byte val = block[i]; 153 | final int bits = (val & 0xFF) + 256; 154 | this.encodeBit(val & 0x80, 1); 155 | this.encodeBit(val & 0x40, bits >> 7); 156 | this.encodeBit(val & 0x20, bits >> 6); 157 | this.encodeBit(val & 0x10, bits >> 5); 158 | this.encodeBit(val & 0x08, bits >> 4); 159 | this.encodeBit(val & 0x04, bits >> 3); 160 | this.encodeBit(val & 0x02, bits >> 2); 161 | this.encodeBit(val & 0x01, bits >> 1); 162 | this.p = this.probs[(val & 0xFF) >>> 6]; 163 | } 164 | 165 | EntropyUtils.writeVarInt(this.bitstream, this.sba.index); 166 | this.bitstream.writeBits(this.sba.array, 0, 8 * this.sba.index); 167 | startChunk += chunkSize; 168 | 169 | if (startChunk < end) 170 | this.bitstream.writeBits(this.low | MASK_0_24, 56); 171 | } 172 | 173 | return count; 174 | } 175 | 176 | /** 177 | * Encodes a single bit based on a given prediction. 178 | *

179 | * The range is split according to the prediction, and the bit is encoded by 180 | * updating the range. The probability model for the current context is then 181 | * updated based on the encoded bit. 182 | *

183 | */ 184 | private void encodeBit(int bit, int pIdx) { 185 | // Calculate interval split 186 | // Written in a way to maximize accuracy of multiplication/division 187 | final long split = (((this.high - this.low) >>> 8) * this.p[pIdx]) >>> 8; 188 | 189 | // Update probabilities 190 | if (bit == 0) { 191 | this.low += (split + 1); 192 | this.p[pIdx] -= (this.p[pIdx] >> 6); 193 | } else { 194 | this.high = this.low + split; 195 | this.p[pIdx] -= ((this.p[pIdx] - PSCALE + 64) >> 6); 196 | } 197 | 198 | // Write unchanged first 32 bits to bitstream 199 | while (((this.low ^ this.high) & MASK_24_56) == 0) 200 | this.flush(); 201 | } 202 | 203 | /** 204 | * Flushes the current range to the bitstream. 205 | *

206 | * This method is called when the range becomes too small and needs to be 207 | * normalized. It writes the most significant bits of the range to the 208 | * bitstream. 209 | *

210 | */ 211 | private void flush() { 212 | Memory.BigEndian.writeInt32(this.sba.array, this.sba.index, (int) (this.high >>> 24)); 213 | this.sba.index += 4; 214 | this.low <<= 32; 215 | this.high = (this.high << 32) | MASK_0_32; 216 | } 217 | 218 | /** 219 | * Returns the {@link OutputBitStream} used by this encoder. 220 | * 221 | * @return The {@link OutputBitStream}. 222 | */ 223 | @Override 224 | public OutputBitStream getBitStream() { 225 | return this.bitstream; 226 | } 227 | 228 | /** 229 | * Disposes of any resources used by the encoder. 230 | *

231 | * This method flushes any remaining bits in the range to the bitstream. 232 | *

233 | */ 234 | @Override 235 | public void dispose() { 236 | if (this.disposed == true) 237 | return; 238 | 239 | this.disposed = true; 240 | this.bitstream.writeBits(this.low | MASK_0_24, 56); 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/FastLogisticAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | import io.github.flanglet.kanzi.Global; 18 | 19 | /** 20 | *

21 | * Implementation of an Adaptive Probability Map (APM) with fast logistic 22 | * function. 23 | * This class maps a probability and a context into a new probability that the 24 | * next bit will be 1. 25 | * After each guess, it updates its state to improve future guesses. 26 | *

27 | * 28 | *

29 | * It uses a logistic function to squash the prediction and adapts its internal 30 | * probabilities based on the actual bit observed and a learning rate. 31 | *

32 | */ 33 | /* package */ final class FastLogisticAdaptiveProbMap { 34 | /** 35 | * The index into the {@code data} array, representing the last probability and 36 | * context. 37 | */ 38 | private int index; 39 | 40 | /** 41 | * The update rate for adapting probabilities. A smaller rate means faster 42 | * adaptation. 43 | */ 44 | private final int rate; 45 | 46 | /** 47 | * The internal data array storing probabilities for different contexts. 48 | * Each entry is a packed integer representing a probability. 49 | */ 50 | private final int[] data; 51 | 52 | /** 53 | * Creates a new {@code FastLogisticAdaptiveProbMap}. 54 | * 55 | * @param n The number of contexts to support. 56 | * @param rate The update rate for adapting probabilities. 57 | */ 58 | FastLogisticAdaptiveProbMap(int n, int rate) { 59 | this.data = new int[n * 32]; 60 | this.rate = rate; 61 | 62 | for (int j = 0; j < 32; j++) { 63 | this.data[j] = Global.squash((j - 16) << 7) << 4; 64 | } 65 | 66 | for (int i = 1; i < n; i++) { 67 | System.arraycopy(this.data, 0, this.data, i * 32, 32); 68 | } 69 | } 70 | 71 | /** 72 | * Returns an improved prediction given the current bit, prediction, and 73 | * context. 74 | * 75 | * @param bit The actual bit observed (0 or 1). 76 | * @param pr The current prediction (probability of 1). 77 | * @param ctx The current context. 78 | * @return The improved prediction (probability of 1), scaled. 79 | */ 80 | int get(int bit, int pr, int ctx) { 81 | // Update probability based on error and learning rate 82 | final int g = (-bit & 65528) + (bit << this.rate); 83 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 84 | 85 | // Find index: 32*ctx + quantized prediction in [0..32[ 86 | this.index = ((Global.STRETCH[pr] + 2048) >> 7) + (ctx << 5); 87 | return (this.data[this.index]) >> 4; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/HuffmanCommon.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | /** 19 | *

20 | * Utility class for common Huffman coding operations. 21 | *

22 | */ 23 | public final class HuffmanCommon { 24 | /** 25 | * The logarithm base 2 of the maximum chunk size. 26 | */ 27 | public static final int LOG_MAX_CHUNK_SIZE = 14; 28 | 29 | /** 30 | * The minimum chunk size for Huffman encoding/decoding. 31 | */ 32 | public static final int MIN_CHUNK_SIZE = 1024; 33 | 34 | /** 35 | * The maximum chunk size for Huffman encoding/decoding. 36 | */ 37 | public static final int MAX_CHUNK_SIZE = 1 << LOG_MAX_CHUNK_SIZE; 38 | 39 | /** 40 | * The maximum symbol size (number of bits) for Huffman codes in bitstream 41 | * version 3. 42 | */ 43 | public static final int MAX_SYMBOL_SIZE_V3 = 14; 44 | 45 | /** 46 | * The maximum symbol size (number of bits) for Huffman codes in bitstream 47 | * version 4. 48 | */ 49 | public static final int MAX_SYMBOL_SIZE_V4 = 12; 50 | 51 | /** 52 | * The size of the internal buffer used for sorting symbols. 53 | */ 54 | private static final int BUFFER_SIZE = (MAX_SYMBOL_SIZE_V3 << 8) + 256; 55 | 56 | /** 57 | * Generates canonical Huffman codes based on the provided symbol sizes. 58 | * Symbols are sorted first by increasing size, then by increasing value. 59 | * 60 | * @param sizes An array where `sizes[symbol]` stores the bit length of 61 | * the Huffman code for that symbol. 62 | * @param codes An array where the generated canonical code for each 63 | * symbol will be stored. 64 | * @param symbols An array containing the symbols to be processed. This 65 | * array will be sorted in place. 66 | * @param count The number of symbols to process. 67 | * @param maxSymbolSize The maximum allowed bit length for any symbol's Huffman 68 | * code. 69 | * @return The number of codes generated (which should be equal to `count`), or 70 | * -1 if an error occurs 71 | * (e.g., invalid symbol or code size). 72 | */ 73 | public static int generateCanonicalCodes(short[] sizes, int[] codes, int[] symbols, 74 | int count, final int maxSymbolSize) { 75 | // Sort symbols by increasing size (first key) and increasing value (second key) 76 | if (count > 1) { 77 | byte[] buf = new byte[BUFFER_SIZE]; 78 | 79 | for (int i = 0; i < count; i++) { 80 | final int s = symbols[i]; 81 | 82 | if (((s & 0xFF) != s) || (sizes[s] > maxSymbolSize)) 83 | return -1; 84 | 85 | buf[((sizes[s] - 1) << 8) | s] = 1; 86 | } 87 | 88 | int n = 0; 89 | 90 | for (int i = 0; i < BUFFER_SIZE; i++) { 91 | if (buf[i] == 0) 92 | continue; 93 | 94 | symbols[n++] = i & 0xFF; 95 | 96 | if (n == count) 97 | break; 98 | } 99 | } 100 | 101 | int code = 0; 102 | int curLen = sizes[symbols[0]]; 103 | 104 | for (int i = 0; i < count; i++) { 105 | final int s = symbols[i]; 106 | code <<= (sizes[s] - curLen); 107 | curLen = sizes[s]; 108 | codes[s] = code; 109 | code++; 110 | } 111 | 112 | return count; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/LinearAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | /** 18 | *

19 | * Implementation of an Adaptive Probability Map (APM) with linear 20 | * interpolation. 21 | * This class maps a probability and a context into a new probability that the 22 | * next bit will be 1. After each guess, it updates its state to improve future 23 | * guesses. 24 | *

25 | * 26 | *

27 | * It uses linear interpolation to squash the prediction and adapts its internal 28 | * probabilities based on the actual bit observed and a learning rate. 29 | *

30 | */ 31 | /* package */ final class LinearAdaptiveProbMap { 32 | /** 33 | * The index into the {@code data} array, representing the last probability and 34 | * context. 35 | */ 36 | private int index; 37 | 38 | /** 39 | * The update rate for adapting probabilities. A smaller rate means faster 40 | * adaptation. 41 | */ 42 | private final int rate; 43 | 44 | /** 45 | * The internal data array storing probabilities for different contexts. 46 | * Each entry is a packed integer representing a probability. 47 | */ 48 | private final int[] data; 49 | 50 | /** 51 | * Creates a new {@code LinearAdaptiveProbMap}. 52 | * 53 | * @param n The number of contexts to support. 54 | * @param rate The update rate for adapting probabilities. 55 | */ 56 | LinearAdaptiveProbMap(int n, int rate) { 57 | final int size = (n == 0) ? 65 : n * 65; 58 | this.data = new int[size]; 59 | this.rate = rate; 60 | 61 | for (int j = 0; j <= 64; j++) 62 | this.data[j] = (j << 6) << 4; 63 | 64 | for (int i = 1; i < n; i++) 65 | System.arraycopy(this.data, 0, this.data, i * 65, 65); 66 | } 67 | 68 | /** 69 | * Returns an improved prediction given the current bit, prediction, and 70 | * context. 71 | * 72 | * @param bit The actual bit observed (0 or 1). 73 | * @param pr The current prediction (probability of 1). 74 | * @param ctx The current context. 75 | * @return The improved prediction (probability of 1), scaled. 76 | */ 77 | int get(int bit, int pr, int ctx) { 78 | // Update probability based on error and learning rate 79 | final int g = (-bit & 65528) + (bit << this.rate); 80 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 81 | this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate); 82 | 83 | // Find index: 65*ctx + quantized prediction in [0..64] 84 | this.index = (pr >> 6) + (ctx << 6) + ctx; 85 | 86 | // Return interpolated probability 87 | final int w = pr & 127; 88 | return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/LogisticAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | import io.github.flanglet.kanzi.Global; 18 | 19 | /** 20 | *

21 | * Implementation of an Adaptive Probability Map (APM) with logistic function. 22 | * This class maps a probability and a context into a new probability that the 23 | * next bit will be 1. After each guess, it updates its state to improve future 24 | * guesses. 25 | *

26 | * 27 | *

28 | * It uses a logistic function to squash the prediction and adapts its internal 29 | * probabilities based on the actual bit observed and a learning rate. 30 | *

31 | */ 32 | /* package */ final class LogisticAdaptiveProbMap { 33 | /** 34 | * The index into the {@code data} array, representing the last probability and 35 | * context. 36 | */ 37 | private int index; 38 | 39 | /** 40 | * The update rate for adapting probabilities. A smaller rate means faster 41 | * adaptation. 42 | */ 43 | private final int rate; 44 | 45 | /** 46 | * The internal data array storing probabilities for different contexts. 47 | * Each entry is a packed integer representing a probability. 48 | */ 49 | private final int[] data; 50 | 51 | /** 52 | * Creates a new {@code LogisticAdaptiveProbMap}. 53 | * 54 | * @param n The number of contexts to support. 55 | * @param rate The update rate for adapting probabilities. 56 | */ 57 | 58 | LogisticAdaptiveProbMap(int n, int rate) { 59 | final int size = (n == 0) ? 33 : n * 33; 60 | this.data = new int[size]; 61 | this.rate = rate; 62 | 63 | for (int j = 0; j <= 32; j++) 64 | this.data[j] = Global.squash((j - 16) << 7) << 4; 65 | 66 | for (int i = 1; i < n; i++) 67 | System.arraycopy(this.data, 0, this.data, i * 33, 33); 68 | } 69 | 70 | /** 71 | * Returns an improved prediction given the current bit, prediction, and 72 | * context. 73 | * 74 | * @param bit The actual bit observed (0 or 1). 75 | * @param pr The current prediction (probability of 1). 76 | * @param ctx The current context. 77 | * @return The improved prediction (probability of 1), scaled. 78 | */ 79 | int get(int bit, int pr, int ctx) { 80 | // Update probability based on error and learning rate 81 | final int g = (-bit & 65528) + (bit << this.rate); 82 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 83 | this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate); 84 | pr = Global.STRETCH[pr]; 85 | 86 | // Find index: 33*ctx + quantized prediction in [0..32] 87 | this.index = ((pr + 2048) >> 7) + (ctx << 5) + ctx; 88 | 89 | // Return interpolated probability 90 | final int w = pr & 127; 91 | return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyDecoder; 19 | import io.github.flanglet.kanzi.InputBitStream; 20 | 21 | 22 | /** 23 | *

Null entropy decoder. 24 | * This decoder does not perform any actual decompression; it simply reads 25 | * the data directly from the provided {@link InputBitStream}.

26 | * 27 | *

It acts as a pass-through mechanism, useful when no entropy coding 28 | * is applied to the data, or when the data is already in its final form.

29 | */ 30 | public final class NullEntropyDecoder implements EntropyDecoder 31 | { 32 | private final InputBitStream bitstream; 33 | 34 | 35 | /** 36 | * Creates a new {@code NullEntropyDecoder}. 37 | * 38 | * @param bitstream The {@link InputBitStream} to read data from. 39 | * @throws NullPointerException if {@code bitstream} is {@code null}. 40 | */ 41 | public NullEntropyDecoder(InputBitStream bitstream) 42 | { 43 | if (bitstream == null) 44 | throw new NullPointerException("Invalid null bitstream parameter"); 45 | 46 | this.bitstream = bitstream; 47 | } 48 | 49 | /** 50 | * Decodes a block of data by reading it directly from the bitstream. 51 | *

52 | * This method reads {@code count} bytes from the bitstream into the provided {@code block} array. 53 | *

54 | * @param block The byte array to decode into. 55 | * @param blkptr The starting position in the block. 56 | * @param count The number of bytes to decode. 57 | * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid parameters). 58 | */ 59 | @Override 60 | public int decode(byte[] block, int blkptr, int count) 61 | { 62 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 63 | return -1; 64 | 65 | int res = 0; 66 | 67 | while (count > 0) 68 | { 69 | final int ckSize = (count < 1<<23) ? count : 1<<23; 70 | res += (this.bitstream.readBits(block, blkptr, 8*ckSize) >> 3); 71 | blkptr += ckSize; 72 | count -= ckSize; 73 | } 74 | 75 | return res; 76 | } 77 | 78 | /** 79 | * Decodes a single byte by reading it directly from the bitstream. 80 | * @return The decoded byte. 81 | */ 82 | public byte decodeByte() 83 | { 84 | return (byte) this.bitstream.readBits(8); 85 | } 86 | 87 | /** 88 | * Returns the {@link InputBitStream} used by this decoder. 89 | * @return The {@link InputBitStream}. 90 | */ 91 | @Override 92 | public InputBitStream getBitStream() 93 | { 94 | return this.bitstream; 95 | } 96 | 97 | /** 98 | * Disposes of any resources used by the decoder. 99 | * This method currently does nothing as there are no specific resources to release. 100 | */ 101 | @Override 102 | public void dispose() 103 | { 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyEncoder; 19 | import io.github.flanglet.kanzi.OutputBitStream; 20 | 21 | 22 | /** 23 | *

Null entropy encoder. 24 | * This encoder does not perform any actual compression; it simply writes 25 | * the data directly to the provided {@link OutputBitStream}.

26 | * 27 | *

It acts as a pass-through mechanism, useful when no entropy coding 28 | * is applied to the data, or when the data is already in its final form.

29 | */ 30 | public final class NullEntropyEncoder implements EntropyEncoder 31 | { 32 | private final OutputBitStream bitstream; 33 | 34 | 35 | /** 36 | * Creates a new {@code NullEntropyEncoder}. 37 | * @param bitstream The {@link OutputBitStream} to write data to. 38 | * @throws NullPointerException if {@code bitstream} is {@code null}. 39 | */ 40 | public NullEntropyEncoder(OutputBitStream bitstream) 41 | { 42 | if (bitstream == null) 43 | throw new NullPointerException("Invalid null bitstream parameter"); 44 | 45 | this.bitstream = bitstream; 46 | } 47 | 48 | 49 | /** 50 | * Encodes a block of data by writing it directly to the bitstream. 51 | *

52 | * This method writes {@code count} bytes from the provided {@code block} array to the bitstream. 53 | *

54 | * @param block The byte array containing the data to encode. 55 | * @param blkptr The starting position in the block. 56 | * @param count The number of bytes to encode. 57 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid parameters). 58 | */ 59 | @Override 60 | public int encode(byte[] block, int blkptr, int count) 61 | { 62 | if ((block == null) || (blkptr+count > block.length) || (blkptr < 0) || (count < 0)) 63 | return -1; 64 | 65 | int res = 0; 66 | 67 | while (count > 0) 68 | { 69 | final int ckSize = (count < 1<<23) ? count : 1<<23; 70 | res += (this.bitstream.writeBits(block, blkptr, 8*ckSize) >> 3); 71 | blkptr += ckSize; 72 | count -= ckSize; 73 | } 74 | 75 | return res; 76 | } 77 | 78 | 79 | /** 80 | * Encodes a single byte by writing it directly to the bitstream. 81 | * @param val The byte to encode. 82 | */ 83 | public void encodeByte(byte val) 84 | { 85 | this.bitstream.writeBits(val, 8); 86 | } 87 | 88 | 89 | /** 90 | * Returns the {@link OutputBitStream} used by this encoder. 91 | * @return The {@link OutputBitStream}. 92 | */ 93 | @Override 94 | public OutputBitStream getBitStream() 95 | { 96 | return this.bitstream; 97 | } 98 | 99 | 100 | /** 101 | * Disposes of any resources used by the encoder. 102 | * This method currently does nothing as there are no specific resources to release. 103 | */ 104 | @Override 105 | public void dispose() 106 | { 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/io/IOException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.io; 17 | 18 | 19 | /** 20 | * Custom exception class that extends {@link java.io.IOException}. 21 | * This exception includes an error code to provide more specific information 22 | * about the nature of the I/O error that occurred. 23 | */ 24 | public class IOException extends java.io.IOException { 25 | private static final long serialVersionUID = -9153775235137373283L; 26 | 27 | private final int code; 28 | 29 | /** 30 | * Constructs a new {@code IOException} with the specified detail message 31 | * and error code. 32 | * 33 | * @param msg the detail message explaining the reason for the exception 34 | * @param code an integer error code that provides additional context about the error 35 | */ 36 | public IOException(String msg, int code) { 37 | super(msg); 38 | this.code = code; 39 | } 40 | 41 | /** 42 | * Returns the error code associated with this exception. 43 | * 44 | * @return the error code indicating the type of I/O error 45 | */ 46 | public int getErrorCode() { 47 | return this.code; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/io/IOUtil.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2011-2025 Frederic Langlet 2 | Licensed under the Apache License, Version 2.0 (the "License"); 3 | you may not use this file except in compliance with the License. 4 | you may obtain a copy of the License at 5 | 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. 13 | */ 14 | 15 | package io.github.flanglet.kanzi.io; 16 | 17 | import java.io.File; 18 | import java.io.IOException; 19 | import java.nio.file.DirectoryIteratorException; 20 | import java.nio.file.DirectoryStream; 21 | import java.nio.file.Files; 22 | import java.nio.file.Path; 23 | import java.nio.file.Paths; 24 | import java.util.List; 25 | 26 | 27 | /** 28 | * Utility class for performing I/O operations related to file management. 29 | */ 30 | public class IOUtil { 31 | 32 | /** 33 | * Creates a list of files from the specified target path. The method can 34 | * traverse directories recursively and can ignore symbolic links and 35 | * dot files based on the provided flags. 36 | * 37 | * @param target the target path from which to list files 38 | * @param files the list to populate with found file paths 39 | * @param isRecursive flag indicating whether to search directories recursively 40 | * @param ignoreLinks flag indicating whether to ignore symbolic links 41 | * @param ignoreDotFiles flag indicating whether to ignore dot files (files starting with a dot) 42 | * @throws IOException if an I/O error occurs or the target path is invalid 43 | */ 44 | public static void createFileList(String target, List files, boolean isRecursive, 45 | boolean ignoreLinks, boolean ignoreDotFiles) throws IOException { 46 | if (target == null) 47 | return; 48 | 49 | Path root = Paths.get(target); 50 | 51 | if (!Files.exists(root)) 52 | throw new IOException("Cannot access input file '" + root + "'"); 53 | 54 | if (Files.isRegularFile(root) && Files.isHidden(root)) 55 | throw new IOException("Cannot access input file '" + root + "'"); 56 | 57 | if (Files.isRegularFile(root)) { 58 | if (!ignoreLinks || !Files.isSymbolicLink(root)) 59 | files.add(root); 60 | return; 61 | } 62 | 63 | // If not a regular file and not a directory (possibly a link?), fail 64 | if (!Files.isDirectory(root)) 65 | throw new IOException("Invalid file type '" + root + "'"); 66 | 67 | if (ignoreDotFiles) { 68 | String name = root.toString(); 69 | int idx = name.lastIndexOf(File.separator); 70 | 71 | if (idx > 0) { 72 | name = name.substring(idx + 1); 73 | if (name.charAt(0) == '.') 74 | return; 75 | } 76 | } 77 | 78 | try (DirectoryStream stream = Files.newDirectoryStream(root)) { 79 | for (Path entry : stream) { 80 | if (!Files.exists(entry)) 81 | continue; 82 | 83 | if (Files.isRegularFile(entry)) { 84 | if (ignoreDotFiles) { 85 | String name = entry.toString(); 86 | int idx = name.lastIndexOf(File.separator); 87 | 88 | if (idx > 0) { 89 | name = name.substring(idx + 1); 90 | if (name.charAt(0) == '.') 91 | continue; 92 | } 93 | } 94 | 95 | if (!ignoreLinks || !Files.isSymbolicLink(entry)) 96 | files.add(entry); 97 | } else if (isRecursive && Files.isDirectory(entry)) { 98 | createFileList(entry.toString(), files, isRecursive, ignoreLinks, ignoreDotFiles); 99 | } 100 | } 101 | } catch (DirectoryIteratorException e) { 102 | throw e.getCause(); 103 | } 104 | } 105 | } 106 | 107 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/io/NullOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.io; 17 | 18 | import java.io.OutputStream; 19 | 20 | 21 | /** 22 | * An output stream that discards all data written to it. 23 | * This is useful for situations where you need an output stream 24 | * but do not want to actually output any data. 25 | */ 26 | public class NullOutputStream extends OutputStream { 27 | 28 | /** 29 | * Writes the specified byte to this output stream. 30 | * This implementation does not perform any action. 31 | * 32 | * @param b the byte to be written 33 | */ 34 | @Override 35 | public void write(int b) { 36 | // No operation performed 37 | } 38 | 39 | /** 40 | * Writes len bytes from the specified byte array starting at 41 | * offset offs to this output stream. This implementation does 42 | * not perform any action. 43 | * 44 | * @param b the byte array containing the data to be written 45 | * @param offs the start offset in the data 46 | * @param len the number of bytes to write 47 | */ 48 | @Override 49 | public void write(byte[] b, int offs, int len) { 50 | // No operation performed 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/module-info.java: -------------------------------------------------------------------------------- 1 | package io.github.flanglet.kanzi; 2 | 3 | //module kanzi { 4 | // exports kanzi; 5 | // exports kanzi.app; 6 | // exports kanzi.bitstream; 7 | // exports kanzi.io; 8 | // exports kanzi.entropy; 9 | // exports kanzi.function; 10 | // exports kanzi.transform; 11 | // exports kanzi.util; 12 | //} -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/transform/BWTBlockCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.transform; 17 | 18 | import java.util.Map; 19 | import io.github.flanglet.kanzi.ByteTransform; 20 | import io.github.flanglet.kanzi.Global; 21 | import io.github.flanglet.kanzi.SliceByteArray; 22 | 23 | 24 | /** 25 | * Utility class to encode and decode a BWT data block and its associated primary index(es). 26 | *

27 | * BWT stream format: Header (mode + primary index(es)) | Data (n bytes) 28 | *

    29 | *
  • mode (8 bits): xxxyyyzz
  • 30 | *
  • xxx: ignored
  • 31 | *
  • yyy: log(chunks)
  • 32 | *
  • zz: primary index size - 1 (in bytes)
  • 33 | *
  • primary indexes (chunks * (8|16|24|32 bits))
  • 34 | *
35 | */ 36 | public class BWTBlockCodec implements ByteTransform { 37 | private static final int BWT_MAX_HEADER_SIZE = 8 * 4; 38 | 39 | private final BWT bwt; 40 | private final int bsVersion; 41 | 42 | /** 43 | * Default constructor. 44 | */ 45 | public BWTBlockCodec() { 46 | this.bwt = new BWT(); 47 | this.bsVersion = 6; 48 | } 49 | 50 | /** 51 | * Constructor with a context map. 52 | * 53 | * @param ctx the context map 54 | */ 55 | public BWTBlockCodec(Map ctx) { 56 | this.bwt = new BWT(ctx); 57 | this.bsVersion = (ctx == null) ? 6 : (int) ctx.getOrDefault("bsVersion", 6); 58 | } 59 | 60 | /** 61 | * Performs the forward transform, encoding the input data. 62 | * 63 | * @param input the input byte array 64 | * @param output the output byte array 65 | * @return true if the transform was successful, false otherwise 66 | */ 67 | @Override 68 | public boolean forward(SliceByteArray input, SliceByteArray output) { 69 | if (input.length == 0) 70 | return true; 71 | 72 | if (input.array == output.array) 73 | return false; 74 | 75 | final int blockSize = input.length; 76 | 77 | if (output.length - output.index < getMaxEncodedLength(blockSize)) 78 | return false; 79 | 80 | int logBlockSize = Global.log2(blockSize); 81 | 82 | if ((blockSize & (blockSize - 1)) != 0) 83 | logBlockSize++; 84 | 85 | final int pIndexSize = (logBlockSize + 7) >> 3; 86 | 87 | if ((pIndexSize <= 0) || (pIndexSize >= 5)) 88 | return false; 89 | 90 | final int chunks = BWT.getBWTChunks(blockSize); 91 | final int logNbChunks = Global.log2(chunks); 92 | 93 | if (logNbChunks > 7) 94 | return false; 95 | 96 | int idx0 = output.index; 97 | output.index += (1 + chunks * pIndexSize); 98 | 99 | // Apply forward transform 100 | if (!this.bwt.forward(input, output)) 101 | return false; 102 | 103 | final byte mode = (byte) ((logNbChunks << 2) | (pIndexSize - 1)); 104 | 105 | // Emit header 106 | for (int i = 0, idx = idx0 + 1; i < chunks; i++) { 107 | final int primaryIndex = this.bwt.getPrimaryIndex(i) - 1; 108 | int shift = (pIndexSize - 1) << 3; 109 | 110 | while (shift >= 0) { 111 | output.array[idx++] = (byte) (primaryIndex >> shift); 112 | shift -= 8; 113 | } 114 | } 115 | 116 | output.array[idx0] = mode; 117 | return true; 118 | } 119 | 120 | /** 121 | * Performs the inverse transform, decoding the input data. 122 | * 123 | * @param input the input byte array 124 | * @param output the output byte array 125 | * @return true if the transform was successful, false otherwise 126 | */ 127 | @Override 128 | public boolean inverse(SliceByteArray input, SliceByteArray output) { 129 | if (input.length == 0) 130 | return true; 131 | 132 | if (input.array == output.array) 133 | return false; 134 | 135 | final int blockSize = input.length; 136 | 137 | if (this.bsVersion > 5) { 138 | // Number of chunks and primary index size in bitstream since bsVersion 6 139 | byte mode = input.array[input.index++]; 140 | final int logNbChunks = (mode >> 2) & 0x07; 141 | final int pIndexSize = (mode & 0x03) + 1; 142 | final int chunks = 1 << logNbChunks; 143 | final int headerSize = 1 + chunks * pIndexSize; 144 | 145 | if (blockSize < headerSize) 146 | return false; 147 | 148 | if (chunks != BWT.getBWTChunks(blockSize-headerSize)) 149 | return false; 150 | 151 | // Read header 152 | for (int i = 0; i < chunks; i++) { 153 | int shift = (pIndexSize - 1) << 3; 154 | int primaryIndex = 0; 155 | 156 | // Extract BWT primary index 157 | while (shift >= 0) { 158 | primaryIndex = (primaryIndex << 8) | (input.array[input.index++] & 0xFF); 159 | shift -= 8; 160 | } 161 | 162 | if (!this.bwt.setPrimaryIndex(i, primaryIndex + 1)) 163 | return false; 164 | } 165 | 166 | input.length = blockSize - headerSize; 167 | } else { 168 | final int chunks = BWT.getBWTChunks(blockSize); 169 | 170 | for (int i = 0; i < chunks; i++) { 171 | // Read block header (mode + primary index). See top of file for format 172 | final int blockMode = input.array[input.index++] & 0xFF; 173 | final int pIndexSizeBytes = 1 + ((blockMode >>> 6) & 0x03); 174 | 175 | if (input.length < pIndexSizeBytes) 176 | return false; 177 | 178 | input.length -= pIndexSizeBytes; 179 | int shift = (pIndexSizeBytes - 1) << 3; 180 | int primaryIndex = (blockMode & 0x3F) << shift; 181 | 182 | // Extract BWT primary index 183 | for (int n = 1; n < pIndexSizeBytes; n++) { 184 | shift -= 8; 185 | primaryIndex |= ((input.array[input.index++] & 0xFF) << shift); 186 | } 187 | 188 | if (!this.bwt.setPrimaryIndex(i, primaryIndex)) 189 | return false; 190 | } 191 | } 192 | 193 | // Apply inverse transform 194 | return this.bwt.inverse(input, output); 195 | } 196 | 197 | /** 198 | * Returns the maximum encoded length, which includes the header size. 199 | * 200 | * @param srcLen the source length 201 | * @return the maximum encoded length 202 | */ 203 | @Override 204 | public int getMaxEncodedLength(int srcLen) { 205 | return srcLen + BWT_MAX_HEADER_SIZE; 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/transform/NullTransform.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.transform; 17 | 18 | import java.util.Map; 19 | import io.github.flanglet.kanzi.ByteTransform; 20 | import io.github.flanglet.kanzi.SliceByteArray; 21 | 22 | 23 | /** 24 | * NullTransform is a no-op transform that simply copies the input to the output 25 | * without performing any modifications. 26 | */ 27 | public class NullTransform implements ByteTransform { 28 | 29 | /** 30 | * Default constructor. 31 | */ 32 | public NullTransform() { 33 | } 34 | 35 | /** 36 | * Constructor with a context map. 37 | * 38 | * @param ctx the context map (not used in this implementation) 39 | */ 40 | public NullTransform(Map ctx) { 41 | } 42 | 43 | /** 44 | * Performs the forward transform, which in this case is a no-op copy 45 | * from the input to the output. 46 | * 47 | * @param input the input byte array 48 | * @param output the output byte array 49 | * @return true if the transform was successful, false otherwise 50 | */ 51 | @Override 52 | public boolean forward(SliceByteArray input, SliceByteArray output) { 53 | return doCopy(input, output); 54 | } 55 | 56 | /** 57 | * Performs the inverse transform, which in this case is a no-op copy 58 | * from the input to the output. 59 | * 60 | * @param input the input byte array 61 | * @param output the output byte array 62 | * @return true if the transform was successful, false otherwise 63 | */ 64 | @Override 65 | public boolean inverse(SliceByteArray input, SliceByteArray output) { 66 | return doCopy(input, output); 67 | } 68 | 69 | /** 70 | * Copies the input byte array to the output byte array. 71 | * 72 | * @param input the input byte array 73 | * @param output the output byte array 74 | * @return true if the copy was successful, false otherwise 75 | */ 76 | private static boolean doCopy(SliceByteArray input, SliceByteArray output) { 77 | if (input.length == 0) 78 | return true; 79 | 80 | final int count = input.length; 81 | 82 | if (output.length - output.index < count) 83 | return false; 84 | 85 | if ((input.array != output.array) || (input.index != output.index)) 86 | System.arraycopy(input.array, input.index, output.array, output.index, count); 87 | 88 | input.index += count; 89 | output.index += count; 90 | return true; 91 | } 92 | 93 | /** 94 | * Returns the maximum encoded length, which is the same as the source length 95 | * for this no-op transform. 96 | * 97 | * @param srcLen the source length 98 | * @return the maximum encoded length 99 | */ 100 | @Override 101 | public int getMaxEncodedLength(int srcLen) { 102 | return srcLen; 103 | } 104 | } -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/transform/SBRT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2025 Frederic Langlet 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.transform; 17 | 18 | import java.util.Map; 19 | import io.github.flanglet.kanzi.ByteTransform; 20 | import io.github.flanglet.kanzi.SliceByteArray; 21 | 22 | /** 23 | * Sort By Rank Transform is a family of transforms typically used after 24 | * a BWT to reduce the variance of the data prior to entropy coding. 25 | * SBR(alpha) is defined by sbr(x, alpha) = (1-alpha)*(t-w1(x,t)) + alpha*(t-w2(x,t)) 26 | * where x is an item in the data list, t is the current access time and wk(x,t) is 27 | * the k-th access time to x at time t (with alpha in [0..1]). 28 | * See [Two new families of list update algorithms] by Frank Schulz for details. 29 | * SBR(0)= Move to Front Transform 30 | * SBR(1)= Time Stamp Transform 31 | * This code implements SBR(0), SBR(1/2) and SBR(1). Code derived from openBWT. 32 | */ 33 | public class SBRT implements ByteTransform { 34 | public static final int MODE_MTF = 1; // alpha = 0 35 | public static final int MODE_RANK = 2; // alpha = 1/2 36 | public static final int MODE_TIMESTAMP = 3; // alpha = 1 37 | 38 | private final int[] prev; 39 | private final int[] curr; 40 | private final int[] symbols; 41 | private final int[] ranks; 42 | private final int mode; 43 | 44 | /** 45 | * Default constructor. 46 | */ 47 | public SBRT() { 48 | this(MODE_RANK); 49 | } 50 | 51 | /** 52 | * Constructor with mode parameter. 53 | * 54 | * @param mode the mode of the transform 55 | */ 56 | public SBRT(int mode) { 57 | if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP)) 58 | throw new IllegalArgumentException("Invalid mode parameter"); 59 | 60 | this.prev = new int[256]; 61 | this.curr = new int[256]; 62 | this.symbols = new int[256]; 63 | this.ranks = new int[256]; 64 | this.mode = mode; 65 | } 66 | 67 | /** 68 | * Constructor with a context map. 69 | * 70 | * @param ctx the context map 71 | */ 72 | public SBRT(Map ctx) { 73 | final int m = (Integer) ctx.getOrDefault("sbrt", MODE_MTF); 74 | 75 | if ((m != MODE_MTF) && (m != MODE_RANK) && (m != MODE_TIMESTAMP)) 76 | throw new IllegalArgumentException("Invalid mode parameter"); 77 | 78 | this.prev = new int[256]; 79 | this.curr = new int[256]; 80 | this.symbols = new int[256]; 81 | this.ranks = new int[256]; 82 | this.mode = m; 83 | } 84 | 85 | @Override 86 | public boolean forward(SliceByteArray input, SliceByteArray output) { 87 | if (input.length == 0) 88 | return true; 89 | 90 | if (input.array == output.array) 91 | return false; 92 | 93 | final int count = input.length; 94 | 95 | if (output.length < count) 96 | return false; 97 | 98 | if (output.index + count > output.array.length) 99 | return false; 100 | 101 | // Aliasing 102 | final byte[] src = input.array; 103 | final byte[] dst = output.array; 104 | final int srcIdx = input.index; 105 | final int dstIdx = output.index; 106 | final int[] p = this.prev; 107 | final int[] q = this.curr; 108 | final int[] s2r = this.symbols; 109 | final int[] r2s = this.ranks; 110 | 111 | final int m1 = (this.mode == MODE_TIMESTAMP) ? 0 : -1; 112 | final int m2 = (this.mode == MODE_MTF) ? 0 : -1; 113 | final int s = (this.mode == MODE_RANK) ? 1 : 0; 114 | 115 | for (int i = 0; i < 256; i++) { 116 | p[i] = 0; 117 | q[i] = 0; 118 | s2r[i] = i; 119 | r2s[i] = i; 120 | } 121 | 122 | for (int i = 0; i < count; i++) { 123 | final int c = src[srcIdx + i] & 0xFF; 124 | int r = s2r[c]; 125 | dst[dstIdx + i] = (byte) r; 126 | final int qc = ((i & m1) + (p[c] & m2)) >> s; 127 | p[c] = i; 128 | q[c] = qc; 129 | 130 | // Move up symbol to correct rank 131 | while ((r > 0) && (q[r2s[r - 1]] <= qc)) { 132 | r2s[r] = r2s[r - 1]; 133 | s2r[r2s[r]] = r; 134 | r--; 135 | } 136 | 137 | r2s[r] = c; 138 | s2r[c] = r; 139 | } 140 | 141 | input.index += count; 142 | output.index += count; 143 | return true; 144 | } 145 | 146 | @Override 147 | public boolean inverse(SliceByteArray input, SliceByteArray output) { 148 | if (input.length == 0) 149 | return true; 150 | 151 | if (input.array == output.array) 152 | return false; 153 | 154 | final int count = input.length; 155 | 156 | if (output.length < count) 157 | return false; 158 | 159 | if (output.index + count > output.array.length) 160 | return false; 161 | 162 | // Aliasing 163 | final byte[] src = input.array; 164 | final byte[] dst = output.array; 165 | final int srcIdx = input.index; 166 | final int dstIdx = output.index; 167 | final int[] p = this.prev; 168 | final int[] q = this.curr; 169 | final int[] r2s = this.ranks; 170 | 171 | final int m1 = (this.mode == MODE_TIMESTAMP) ? 0 : -1; 172 | final int m2 = (this.mode == MODE_MTF) ? 0 : -1; 173 | final int s = (this.mode == MODE_RANK) ? 1 : 0; 174 | 175 | for (int i = 0; i < 256; i++) { 176 | p[i] = 0; 177 | q[i] = 0; 178 | r2s[i] = i; 179 | } 180 | 181 | for (int i = 0; i < count; i++) { 182 | int r = src[srcIdx + i] & 0xFF; 183 | final int c = r2s[r]; 184 | dst[dstIdx + i] = (byte) c; 185 | final int qc = ((i & m1) + (p[c] & m2)) >> s; 186 | p[c] = i; 187 | q[c] = qc; 188 | 189 | // Move up symbol to correct rank 190 | while ((r > 0) && (q[r2s[r - 1]] <= qc)) { 191 | r2s[r] = r2s[r - 1]; 192 | r--; 193 | } 194 | 195 | r2s[r] = c; 196 | } 197 | 198 | input.index += count; 199 | output.index += count; 200 | return true; 201 | } 202 | 203 | /** 204 | * Return the max size required for the encoding output buffer. 205 | * 206 | * @param srcLength the source length 207 | * @return the max encoded length 208 | */ 209 | @Override 210 | public int getMaxEncodedLength(int srcLength) { 211 | return srcLength; 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/transform/Sequence.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2025 Frederic Langlet 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.transform; 17 | 18 | import io.github.flanglet.kanzi.ByteTransform; 19 | import io.github.flanglet.kanzi.SliceByteArray; 20 | 21 | /** 22 | * Encapsulates a sequence of transforms in a transform. 23 | */ 24 | public class Sequence implements ByteTransform { 25 | private static final byte SKIP_MASK = -1; 26 | 27 | private final ByteTransform[] transforms; // transforms or functions 28 | private byte skipFlags; // skip transforms 29 | 30 | /** 31 | * Constructor with an array of transforms. 32 | * 33 | * @param transforms the array of transforms 34 | */ 35 | public Sequence(ByteTransform[] transforms) { 36 | if (transforms == null) 37 | throw new NullPointerException("Invalid null transforms parameter"); 38 | 39 | if ((transforms.length < 1) || (transforms.length > 8)) 40 | throw new IllegalArgumentException("Only 1 to 8 transforms allowed"); 41 | 42 | this.transforms = transforms; 43 | } 44 | 45 | /** 46 | * Performs the forward transform, encoding the input data. 47 | * 48 | * @param src the input byte array 49 | * @param dst the output byte array 50 | * @return true if the transform was successful, false otherwise 51 | */ 52 | @Override 53 | public boolean forward(SliceByteArray src, SliceByteArray dst) { 54 | int count = src.length; 55 | 56 | if ((count < 0) || (count + src.index > src.array.length)) 57 | return false; 58 | 59 | this.skipFlags = SKIP_MASK; 60 | 61 | if (src.length == 0) 62 | return true; 63 | 64 | final int blockSize = count; 65 | final int requiredSize = this.getMaxEncodedLength(count); 66 | SliceByteArray[] sa = new SliceByteArray[]{src, dst}; 67 | SliceByteArray sa1 = sa[0]; 68 | SliceByteArray sa2 = sa[1]; 69 | int saIdx = 0; 70 | 71 | // Process transforms sequentially 72 | for (int i = 0; i < this.transforms.length; i++) { 73 | // Check that the output buffer has enough room. If not, allocate a new one. 74 | if (sa2.length < requiredSize) { 75 | sa2.length = requiredSize; 76 | 77 | if (sa2.array.length < sa2.length) 78 | sa2.array = new byte[sa2.length]; 79 | } 80 | 81 | final int savedIIdx = sa1.index; 82 | final int savedOIdx = sa2.index; 83 | final int savedLength = sa1.length; 84 | sa1.length = count; 85 | 86 | // Apply forward transform 87 | if (this.transforms[i].forward(sa1, sa2) == false) { 88 | // Transform failed. Either it does not apply to this type 89 | // of data or a recoverable error occurred => revert 90 | if (sa1.array != sa2.array) 91 | System.arraycopy(sa1.array, savedIIdx, sa2.array, savedOIdx, count); 92 | 93 | sa1.index = savedIIdx; 94 | sa2.index = savedOIdx; 95 | sa1.length = savedLength; 96 | continue; 97 | } 98 | 99 | this.skipFlags &= ~(1 << (7 - i)); 100 | count = sa2.index - savedOIdx; 101 | sa1.index = savedIIdx; 102 | sa2.index = savedOIdx; 103 | sa1.length = savedLength; 104 | saIdx ^= 1; 105 | sa1 = sa[saIdx]; 106 | sa2 = sa[saIdx ^ 1]; 107 | } 108 | 109 | if (saIdx != 1) { 110 | if (sa[1].index + count > sa[1].array.length) 111 | this.skipFlags = SKIP_MASK; 112 | else 113 | System.arraycopy(sa[0].array, sa[0].index, sa[1].array, sa[1].index, count); 114 | } 115 | 116 | src.index += blockSize; 117 | dst.index += count; 118 | return this.skipFlags != SKIP_MASK; 119 | } 120 | 121 | /** 122 | * Performs the inverse transform, decoding the input data. 123 | * 124 | * @param src the input byte array 125 | * @param dst the output byte array 126 | * @return true if the transform was successful, false otherwise 127 | */ 128 | @Override 129 | public boolean inverse(SliceByteArray src, SliceByteArray dst) { 130 | if (src.length == 0) 131 | return true; 132 | 133 | int count = src.length; 134 | 135 | if ((count < 0) || (count + src.index > src.array.length)) 136 | return false; 137 | 138 | if (this.skipFlags == SKIP_MASK) { 139 | if (src.array != dst.array) 140 | System.arraycopy(src.array, src.index, dst.array, dst.index, count); 141 | 142 | src.index += count; 143 | dst.index += count; 144 | return true; 145 | } 146 | 147 | final int blockSize = count; 148 | boolean res = true; 149 | SliceByteArray[] sa = new SliceByteArray[]{src, dst}; 150 | int saIdx = 0; 151 | 152 | // Process transforms sequentially in reverse order 153 | for (int i = this.transforms.length - 1; i >= 0; i--) { 154 | if ((this.skipFlags & (1 << (7 - i))) != 0) 155 | continue; 156 | 157 | SliceByteArray sa1 = sa[saIdx]; 158 | saIdx ^= 1; 159 | SliceByteArray sa2 = sa[saIdx]; 160 | final int savedIIdx = sa1.index; 161 | final int savedOIdx = sa2.index; 162 | final int savedILen = sa1.length; 163 | final int savedOLen = sa2.length; 164 | 165 | // Apply inverse transform 166 | sa1.length = count; 167 | sa2.length = dst.array.length; 168 | 169 | if (sa2.array.length < sa2.length) 170 | sa2.array = new byte[sa2.length]; 171 | 172 | res = this.transforms[i].inverse(sa1, sa2); 173 | count = sa2.index - savedOIdx; 174 | sa1.index = savedIIdx; 175 | sa2.index = savedOIdx; 176 | sa1.length = savedILen; 177 | sa2.length = savedOLen; 178 | 179 | // All inverse transforms must succeed 180 | if (res == false) 181 | break; 182 | } 183 | 184 | if ((res == true) && (saIdx != 1)) { 185 | if (sa[1].index + count > sa[1].array.length) 186 | res = false; 187 | else 188 | System.arraycopy(sa[0].array, sa[0].index, sa[1].array, sa[1].index, count); 189 | } 190 | 191 | if (count > dst.length) 192 | return false; 193 | 194 | src.index += blockSize; 195 | dst.index += count; 196 | return res; 197 | } 198 | 199 | /** 200 | * Returns the maximum encoded length, which includes some extra buffer for incompressible data. 201 | * 202 | * @param srcLength the source length 203 | * @return the maximum encoded length 204 | */ 205 | @Override 206 | public int getMaxEncodedLength(int srcLength) { 207 | int requiredSize = srcLength; 208 | 209 | for (ByteTransform t : this.transforms) { 210 | if (t == null) 211 | continue; 212 | 213 | requiredSize = Math.max(requiredSize, t.getMaxEncodedLength(requiredSize)); 214 | } 215 | 216 | return requiredSize; 217 | } 218 | 219 | /** 220 | * Returns the number of functions in the sequence. 221 | * 222 | * @return the number of functions 223 | */ 224 | public int getNbFunctions() { 225 | return this.transforms.length; 226 | } 227 | 228 | /** 229 | * Returns the skip flags indicating which transforms to skip. 230 | * 231 | * @return the skip flags 232 | */ 233 | public byte getSkipFlags() { 234 | return this.skipFlags; 235 | } 236 | 237 | /** 238 | * Sets the skip flags indicating which transforms to skip. 239 | * 240 | * @param flags the skip flags 241 | * @return true if the flags were set successfully, false otherwise 242 | */ 243 | public boolean setSkipFlags(byte flags) { 244 | this.skipFlags = flags; 245 | return true; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/transform/ZRLT.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.transform; 17 | 18 | import java.util.Map; 19 | import io.github.flanglet.kanzi.ByteTransform; 20 | import io.github.flanglet.kanzi.Global; 21 | import io.github.flanglet.kanzi.SliceByteArray; 22 | 23 | /** 24 | * Zero Run Length Encoding is a simple encoding algorithm by Wheeler 25 | * closely related to Run Length Encoding. The main difference is 26 | * that only runs of 0 values are processed. Also, the length is 27 | * encoded in a different way (each digit in a different byte). 28 | * This algorithm is well adapted to process post BWT/MTFT data. 29 | */ 30 | public final class ZRLT implements ByteTransform { 31 | 32 | /** 33 | * Default constructor. 34 | */ 35 | public ZRLT() { 36 | } 37 | 38 | /** 39 | * Constructor with a context map. 40 | * 41 | * @param ctx the context map 42 | */ 43 | public ZRLT(Map ctx) { 44 | } 45 | 46 | /** 47 | * Performs the forward transform, encoding the input data. 48 | * 49 | * @param input the input byte array 50 | * @param output the output byte array 51 | * @return true if the transform was successful, false otherwise 52 | */ 53 | @Override 54 | public boolean forward(SliceByteArray input, SliceByteArray output) { 55 | if (input.length == 0) 56 | return true; 57 | 58 | if (input.array == output.array) 59 | return false; 60 | 61 | final int count = input.length; 62 | 63 | if (output.length - output.index < getMaxEncodedLength(count)) 64 | return false; 65 | 66 | final byte[] src = input.array; 67 | final byte[] dst = output.array; 68 | int srcIdx = input.index; 69 | int dstIdx = output.index; 70 | final int srcEnd = srcIdx + count; 71 | final int dstEnd = dstIdx + count; // do not expand 72 | boolean res = true; 73 | 74 | if (dstIdx < dstEnd) { 75 | while (srcIdx < srcEnd) { 76 | if (src[srcIdx] == 0) { 77 | int runLength = 1; 78 | 79 | while ((srcIdx + runLength < srcEnd) && (src[srcIdx + runLength] == src[srcIdx])) 80 | runLength++; 81 | 82 | srcIdx += runLength; 83 | 84 | // Encode length 85 | runLength++; 86 | int log2 = (runLength <= 256) ? Global.LOG2_VALUES[runLength - 1] : 31 - Integer.numberOfLeadingZeros(runLength); 87 | 88 | if (dstIdx >= dstEnd - log2) { 89 | res = false; 90 | break; 91 | } 92 | 93 | // Write every bit as a byte except the most significant one 94 | while (log2 > 0) { 95 | log2--; 96 | dst[dstIdx++] = (byte) ((runLength >> log2) & 1); 97 | } 98 | 99 | continue; 100 | } 101 | 102 | final int val = src[srcIdx] & 0xFF; 103 | 104 | if (val >= 0xFE) { 105 | if (dstIdx >= dstEnd - 1) { 106 | res = false; 107 | break; 108 | } 109 | 110 | dst[dstIdx] = (byte) 0xFF; 111 | dst[dstIdx + 1] = (byte) (val - 0xFE); 112 | dstIdx += 2; 113 | } else { 114 | if (dstIdx >= dstEnd) { 115 | res = false; 116 | break; 117 | } 118 | 119 | dst[dstIdx] = (byte) (val + 1); 120 | dstIdx++; 121 | } 122 | 123 | srcIdx++; 124 | } 125 | } 126 | 127 | input.index = srcIdx; 128 | output.index = dstIdx; 129 | return res && (srcIdx == srcEnd); 130 | } 131 | 132 | /** 133 | * Performs the inverse transform, decoding the input data. 134 | * 135 | * @param input the input byte array 136 | * @param output the output byte array 137 | * @return true if the transform was successful, false otherwise 138 | */ 139 | @Override 140 | public boolean inverse(SliceByteArray input, SliceByteArray output) { 141 | if (input.length == 0) 142 | return true; 143 | 144 | if (input.array == output.array) 145 | return false; 146 | 147 | final int count = input.length; 148 | int srcIdx = input.index; 149 | int dstIdx = output.index; 150 | final byte[] src = input.array; 151 | final byte[] dst = output.array; 152 | final int srcEnd = srcIdx + count; 153 | final int dstEnd = output.length; 154 | int runLength = 0; 155 | 156 | mainLoop: 157 | while (true) { 158 | int val = src[srcIdx] & 0xFF; 159 | 160 | if (val <= 1) { 161 | // Generate the run length bit by bit (but force MSB) 162 | runLength = 1; 163 | 164 | do { 165 | runLength += (runLength + val); 166 | srcIdx++; 167 | 168 | if (srcIdx >= srcEnd) 169 | break mainLoop; 170 | 171 | val = src[srcIdx] & 0xFF; 172 | } while (val <= 1); 173 | 174 | runLength--; 175 | 176 | if (runLength > 0) { 177 | if (dstIdx + runLength >= dstEnd) 178 | break; 179 | 180 | while (runLength > 0) { 181 | runLength--; 182 | dst[dstIdx++] = 0; 183 | } 184 | } 185 | } 186 | 187 | // Regular data processing 188 | if (val == 0xFF) { 189 | srcIdx++; 190 | 191 | if (srcIdx >= srcEnd) 192 | break; 193 | 194 | dst[dstIdx] = (byte) (0xFE + src[srcIdx]); 195 | } else { 196 | dst[dstIdx] = (byte) (val - 1); 197 | } 198 | 199 | srcIdx++; 200 | dstIdx++; 201 | 202 | if ((srcIdx >= srcEnd) || (dstIdx >= dstEnd)) 203 | break; 204 | } 205 | 206 | // If runLength is not 1, add trailing 0s 207 | if (runLength > 0) { 208 | runLength--; 209 | 210 | if (dstIdx + runLength > dstEnd) 211 | return false; 212 | 213 | while (runLength > 0) { 214 | runLength--; 215 | dst[dstIdx++] = 0; 216 | } 217 | 218 | } 219 | 220 | input.index = srcIdx; 221 | output.index = dstIdx; 222 | return srcIdx == srcEnd; 223 | } 224 | 225 | /** 226 | * Required encoding output buffer size unknown, so we guess. 227 | * 228 | * @param srcLen the source length 229 | * @return the maximum encoded length 230 | */ 231 | @Override 232 | public int getMaxEncodedLength(int srcLen) { 233 | return srcLen; 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/LyndonWords.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util; 17 | 18 | import java.nio.charset.Charset; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | 22 | /** 23 | * The {@code LyndonWords} class provides functionality for finding Lyndon words in a given string. 24 | * A Lyndon word is a string that is strictly smaller than any of its non-trivial suffixes. 25 | * This class splits a string into Lyndon words based on the Chen-Fox algorithm. 26 | * 27 | *

Note: This class is not thread-safe due to the mutable state of its breakpoints list.

28 | */ 29 | public class LyndonWords { 30 | 31 | // List of breakpoints for the Lyndon words 32 | private final List breakpoints; 33 | 34 | /** 35 | * Constructs a {@code LyndonWords} object, initializing the list of breakpoints. 36 | */ 37 | public LyndonWords() { 38 | this.breakpoints = new ArrayList<>(); 39 | } 40 | 41 | /** 42 | * Finds the breakpoints of Lyndon words in a byte array. 43 | * 44 | *

This method uses the Chen-Fox algorithm to find the breakpoints where the string 45 | * can be split into Lyndon words. It is not thread-safe.

46 | * 47 | * @param buf the byte array representing the string 48 | * @param length the length of the byte array 49 | * @return a list of breakpoints where Lyndon words occur 50 | */ 51 | private List chenFoxLyndonBreakpoints(byte[] buf, int length) { 52 | int k = 0; 53 | this.breakpoints.clear(); 54 | 55 | while (k < length) { 56 | int i = k; 57 | int j = k + 1; 58 | 59 | // Find the suffixes which are lexicographically greater than the current prefix 60 | while (j < length && buf[i] <= buf[j]) { 61 | i = (buf[i] == buf[j]) ? i + 1 : k; 62 | j++; 63 | } 64 | 65 | // Record the breakpoint and adjust k to the next potential Lyndon word start 66 | while (k <= i) { 67 | k += (j - i); 68 | this.breakpoints.add(k); 69 | } 70 | } 71 | 72 | return this.breakpoints; 73 | } 74 | 75 | /** 76 | * Splits a string into Lyndon words using the default character encoding. 77 | * 78 | * @param s the input string to be split 79 | * @return an array of Lyndon words 80 | */ 81 | public String[] split(String s) { 82 | return this.split(s, null); // relies on default encoding 83 | } 84 | 85 | /** 86 | * Splits a string into Lyndon words, using the specified character encoding. 87 | * 88 | * @param s the input string to be split 89 | * @param cs the charset to use for encoding the string, or {@code null} to use the default encoding 90 | * @return an array of Lyndon words 91 | */ 92 | public String[] split(String s, Charset cs) { 93 | byte[] buf = (cs == null) ? s.getBytes() : s.getBytes(cs); 94 | this.chenFoxLyndonBreakpoints(buf, s.length()); 95 | 96 | // Create an array to hold the Lyndon words 97 | String[] res = new String[this.breakpoints.size()]; 98 | int n = 0; 99 | int prev = 0; 100 | 101 | // Split the string based on the calculated breakpoints 102 | for (int bp : this.breakpoints) { 103 | res[n++] = s.substring(prev, bp); 104 | prev = bp; 105 | } 106 | 107 | return res; 108 | } 109 | 110 | /** 111 | * Returns the positions of the breakpoints in the input string using the default character encoding. 112 | * 113 | * @param s the input string to be analyzed 114 | * @return an array of integers representing the positions of the Lyndon word breakpoints 115 | */ 116 | public int[] getPositions(String s) { 117 | return this.getPositions(s, null); // relies on default encoding 118 | } 119 | 120 | /** 121 | * Returns the positions of the breakpoints in the input string using the specified character encoding. 122 | * 123 | * @param s the input string to be analyzed 124 | * @param cs the charset to use for encoding the string, or {@code null} to use the default encoding 125 | * @return an array of integers representing the positions of the Lyndon word breakpoints 126 | */ 127 | public int[] getPositions(String s, Charset cs) { 128 | byte[] buf = (cs == null) ? s.getBytes() : s.getBytes(cs); 129 | return this.getPositions(buf, buf.length); // relies on default encoding 130 | } 131 | 132 | /** 133 | * Returns the positions of the breakpoints in the byte array. 134 | * 135 | * @param buf the byte array representing the string 136 | * @param length the length of the byte array 137 | * @return an array of integers representing the positions of the Lyndon word breakpoints 138 | */ 139 | public int[] getPositions(byte[] buf, int length) { 140 | this.chenFoxLyndonBreakpoints(buf, length); 141 | int[] res = new int[this.breakpoints.size()]; 142 | int n = 0; 143 | 144 | // Fill the result array with the breakpoints 145 | for (Integer bp : this.breakpoints) { 146 | res[n++] = bp; 147 | } 148 | 149 | return res; 150 | } 151 | 152 | /** 153 | * Main method for testing the Lyndon word splitting functionality. 154 | * 155 | *

This method demonstrates the use of the {@code split} method to split a string into Lyndon words.

156 | * 157 | * @param args command-line arguments (not used) 158 | */ 159 | public static void main(String[] args) { 160 | String[] ss = new LyndonWords().split("TO_BE_OR_NOT_TO_BE"); 161 | 162 | // Print the resulting Lyndon words 163 | for (String s : ss) { 164 | System.out.println(s); 165 | } 166 | } 167 | } 168 | 169 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash32.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.hash; 17 | 18 | 19 | import io.github.flanglet.kanzi.Memory; 20 | 21 | /** 22 | * XXHash32 is an implementation of the 32-bit variant of the XXHash algorithm, 23 | * which is a fast non-cryptographic hash function. It is designed for high-speed 24 | * hashing, commonly used in applications where performance is critical, such as 25 | * checksums, hash tables, and data integrity verification. 26 | * Port to Java of the original source code: https://github.com/Cyan4973/xxHash 27 | * 28 | *

XXHash32 uses a sequence of rounds with constant mixing primes to process 29 | * the input data and produce a 32-bit hash value. This class allows for an 30 | * optional user-defined seed, providing a degree of variability in the output. 31 | */ 32 | public class XXHash32 { 33 | 34 | // Constants used in the hashing algorithm 35 | private static final int PRIME32_1 = -1640531535; 36 | private static final int PRIME32_2 = -2048144777; 37 | private static final int PRIME32_3 = -1028477379; 38 | private static final int PRIME32_4 = 668265263; 39 | private static final int PRIME32_5 = 374761393; 40 | 41 | // The seed used for hashing 42 | private int seed; 43 | 44 | /** 45 | * Default constructor that initializes the hash function with a seed based on 46 | * the current system time in nanoseconds. 47 | */ 48 | public XXHash32() { 49 | this((int) (System.nanoTime())); 50 | } 51 | 52 | /** 53 | * Constructs an XXHash32 instance with a specified seed. 54 | * 55 | * @param seed The seed value to be used in the hash computation. 56 | */ 57 | public XXHash32(int seed) { 58 | this.seed = seed; 59 | } 60 | 61 | /** 62 | * Sets the seed value for the hash computation. This allows for custom seed values 63 | * to modify the output hash. 64 | * 65 | * @param seed The new seed value. 66 | */ 67 | public void setSeed(int seed) { 68 | this.seed = seed; 69 | } 70 | 71 | /** 72 | * Computes the 32-bit hash of the provided byte array. 73 | * This method uses the entire byte array, starting from index 0. 74 | * 75 | * @param data The byte array to be hashed. 76 | * @return The 32-bit hash value of the input data. 77 | */ 78 | public int hash(byte[] data) { 79 | return this.hash(data, 0, data.length); 80 | } 81 | 82 | /** 83 | * Computes the 32-bit hash of the provided byte array, with the option to specify 84 | * an offset and length of the data to be used. 85 | * 86 | * @param data The byte array to be hashed. 87 | * @param offset The starting index within the byte array. 88 | * @param length The number of bytes to hash. 89 | * @return The 32-bit hash value of the input data. 90 | */ 91 | public int hash(byte[] data, int offset, int length) { 92 | final int end = offset + length; 93 | int h32; 94 | int idx = offset; 95 | 96 | if (length >= 16) { 97 | final int end16 = end - 16; 98 | int v1 = this.seed + PRIME32_1 + PRIME32_2; 99 | int v2 = this.seed + PRIME32_2; 100 | int v3 = this.seed; 101 | int v4 = this.seed - PRIME32_1; 102 | 103 | // Process 16-byte blocks 104 | do { 105 | v1 = round(v1, Memory.LittleEndian.readInt32(data, idx)); 106 | v2 = round(v2, Memory.LittleEndian.readInt32(data, idx + 4)); 107 | v3 = round(v3, Memory.LittleEndian.readInt32(data, idx + 8)); 108 | v4 = round(v4, Memory.LittleEndian.readInt32(data, idx + 12)); 109 | idx += 16; 110 | } while (idx <= end16); 111 | 112 | h32 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) + 113 | ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14)); 114 | } else { 115 | h32 = this.seed + PRIME32_5; 116 | } 117 | 118 | h32 += length; 119 | 120 | // Process remaining data (less than 16 bytes) 121 | while (idx <= end - 4) { 122 | h32 += (Memory.LittleEndian.readInt32(data, idx) * PRIME32_3); 123 | h32 = ((h32 << 17) | (h32 >>> 15)) * PRIME32_4; 124 | idx += 4; 125 | } 126 | 127 | while (idx < end) { 128 | h32 += ((data[idx] & 0xFF) * PRIME32_5); 129 | h32 = ((h32 << 11) | (h32 >>> 21)) * PRIME32_1; 130 | idx++; 131 | } 132 | 133 | // Finalization step 134 | h32 ^= (h32 >>> 15); 135 | h32 *= PRIME32_2; 136 | h32 ^= (h32 >>> 13); 137 | h32 *= PRIME32_3; 138 | return h32 ^ (h32 >>> 16); 139 | } 140 | 141 | /** 142 | * Performs a single round of mixing for the hash value. 143 | * 144 | * @param acc The accumulator value to be mixed. 145 | * @param val The value to be mixed with the accumulator. 146 | * @return The new mixed accumulator value. 147 | */ 148 | private static int round(int acc, int val) { 149 | acc += (val * PRIME32_2); 150 | return ((acc << 13) | (acc >>> 19)) * PRIME32_1; 151 | } 152 | } 153 | 154 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash64.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.hash; 17 | 18 | import io.github.flanglet.kanzi.Memory; 19 | 20 | /** 21 | * XXHash64 is an implementation of the 64-bit variant of the XXHash algorithm, 22 | * which is a fast non-cryptographic hash function. It is designed for high-speed 23 | * hashing, and is widely used for checksums and hashing large amounts of data. 24 | * This class allows for a configurable seed value, and provides methods for 25 | * hashing byte arrays of various lengths. 26 | * Port to Java of the original source code: https://github.com/Cyan4973/xxHash 27 | * 28 | *

The algorithm processes the input data in blocks and uses a combination of 29 | * mix functions and bitwise operations to produce a hash value. It is optimized 30 | * for 64-bit platforms and can be used for general-purpose hashing where 31 | * cryptographic security is not a concern. 32 | * 33 | */ 34 | public class XXHash64 { 35 | 36 | // Constants used in the hashing algorithm 37 | private static final long PRIME64_1 = 0x9E3779B185EBCA87L; 38 | private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL; 39 | private static final long PRIME64_3 = 0x165667B19E3779F9L; 40 | private static final long PRIME64_4 = 0x85EBCA77C2B2AE63L; 41 | private static final long PRIME64_5 = 0x27D4EB2F165667C5L; 42 | 43 | // The seed used for hashing 44 | private long seed; 45 | 46 | /** 47 | * Default constructor that initializes the hash function with a seed based on the 48 | * current system time in nanoseconds. 49 | */ 50 | public XXHash64() { 51 | this(System.nanoTime()); 52 | } 53 | 54 | /** 55 | * Constructs an XXHash64 instance with a specified seed. 56 | * 57 | * @param seed The seed value to be used in the hash computation. 58 | */ 59 | public XXHash64(long seed) { 60 | this.seed = seed; 61 | } 62 | 63 | /** 64 | * Sets the seed value for the hash computation. This allows for custom seed values 65 | * to modify the output hash. 66 | * 67 | * @param seed The new seed value. 68 | */ 69 | public void setSeed(long seed) { 70 | this.seed = seed; 71 | } 72 | 73 | /** 74 | * Computes the 64-bit hash of the provided byte array. 75 | * This method uses the entire byte array, starting from index 0. 76 | * 77 | * @param data The byte array to be hashed. 78 | * @return The 64-bit hash value of the input data. 79 | */ 80 | public long hash(byte[] data) { 81 | return this.hash(data, 0, data.length); 82 | } 83 | 84 | /** 85 | * Computes the 64-bit hash of the provided byte array, with the option to specify 86 | * an offset and length of the data to be used. 87 | * 88 | * @param data The byte array to be hashed. 89 | * @param offset The starting index within the byte array. 90 | * @param length The number of bytes to hash. 91 | * @return The 64-bit hash value of the input data. 92 | */ 93 | public long hash(byte[] data, int offset, int length) { 94 | final int end = offset + length; 95 | long h64; 96 | int idx = offset; 97 | 98 | if (length >= 32) { 99 | final int end32 = end - 32; 100 | long v1 = this.seed + PRIME64_1 + PRIME64_2; 101 | long v2 = this.seed + PRIME64_2; 102 | long v3 = this.seed; 103 | long v4 = this.seed - PRIME64_1; 104 | 105 | // Process 32-byte blocks 106 | do { 107 | v1 = round(v1, Memory.LittleEndian.readLong64(data, idx)); 108 | v2 = round(v2, Memory.LittleEndian.readLong64(data, idx + 8)); 109 | v3 = round(v3, Memory.LittleEndian.readLong64(data, idx + 16)); 110 | v4 = round(v4, Memory.LittleEndian.readLong64(data, idx + 24)); 111 | idx += 32; 112 | } while (idx <= end32); 113 | 114 | h64 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) + 115 | ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14)); 116 | 117 | // Finalization 118 | h64 = mergeRound(h64, v1); 119 | h64 = mergeRound(h64, v2); 120 | h64 = mergeRound(h64, v3); 121 | h64 = mergeRound(h64, v4); 122 | } else { 123 | h64 = this.seed + PRIME64_5; 124 | } 125 | 126 | h64 += length; 127 | 128 | // Process remaining data (less than 32 bytes) 129 | while (idx + 8 <= end) { 130 | h64 ^= round(0, Memory.LittleEndian.readLong64(data, idx)); 131 | h64 = ((h64 << 27) | (h64 >>> 37)) * PRIME64_1 + PRIME64_4; 132 | idx += 8; 133 | } 134 | 135 | while (idx + 4 <= end) { 136 | h64 ^= (Memory.LittleEndian.readInt32(data, idx) * PRIME64_1); 137 | h64 = ((h64 << 23) | (h64 >>> 41)) * PRIME64_2 + PRIME64_3; 138 | idx += 4; 139 | } 140 | 141 | while (idx < end) { 142 | h64 ^= ((data[idx] & 0xFF) * PRIME64_5); 143 | h64 = ((h64 << 11) | (h64 >>> 53)) * PRIME64_1; 144 | idx++; 145 | } 146 | 147 | // Finalization step 148 | h64 ^= (h64 >>> 33); 149 | h64 *= PRIME64_2; 150 | h64 ^= (h64 >>> 29); 151 | h64 *= PRIME64_3; 152 | return h64 ^ (h64 >>> 32); 153 | } 154 | 155 | /** 156 | * Performs a single round of mixing for the hash value. 157 | * 158 | * @param acc The accumulator value to be mixed. 159 | * @param val The value to be mixed with the accumulator. 160 | * @return The new mixed accumulator value. 161 | */ 162 | private static long round(long acc, long val) { 163 | acc += (val * PRIME64_2); 164 | return ((acc << 31) | (acc >>> 33)) * PRIME64_1; 165 | } 166 | 167 | /** 168 | * Merges an additional value into the accumulator during the finalization phase. 169 | * 170 | * @param acc The current accumulator value. 171 | * @param val The value to be merged into the accumulator. 172 | * @return The updated accumulator value. 173 | */ 174 | private static long mergeRound(long acc, long val) { 175 | acc ^= round(0, val); 176 | return acc * PRIME64_1 + PRIME64_4; 177 | } 178 | } 179 | 180 | 181 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/BucketSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ByteSorter; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code BucketSort} class provides an implementation of the bucket sort algorithm for sorting integers and bytes. 23 | * Bucket sort is a simple and efficient sorting algorithm that works by distributing elements into a number of buckets, 24 | * then sorting the individual buckets. This implementation is optimized to handle small integer and byte values. 25 | * 26 | *

It is a simplified form of radix sort with buckets of width one, making it efficient for small integers (up to 0xFFFF).

27 | *

This implementation is not thread-safe due to the mutable state of its internal data structures.

28 | */ 29 | public class BucketSort implements IntSorter, ByteSorter { 30 | 31 | // Array to store the count of each value within the bucket range 32 | private final int[] count; 33 | 34 | /** 35 | * Constructs a {@code BucketSort} object using the default bucket size for byte values (0 to 255). 36 | */ 37 | public BucketSort() { 38 | this.count = new int[256]; 39 | } 40 | 41 | /** 42 | * Constructs a {@code BucketSort} object with a custom bucket size determined by the logarithm of the maximum value. 43 | * 44 | * @param logMaxValue the logarithm (base 2) of the maximum value to be sorted. 45 | * Must be between 2 and 16 (inclusive). 46 | * @throws IllegalArgumentException if the {@code logMaxValue} is less than 2 or greater than 16. 47 | */ 48 | public BucketSort(int logMaxValue) { 49 | if (logMaxValue < 2) 50 | throw new IllegalArgumentException("The log data size parameter must be at least 2"); 51 | 52 | if (logMaxValue > 16) 53 | throw new IllegalArgumentException("The log data size parameter must be at most 16"); 54 | 55 | this.count = new int[1 << logMaxValue]; // Array size determined by the max value (logMaxValue) 56 | } 57 | 58 | /** 59 | * Sorts an array of integers using the bucket sort algorithm. 60 | * 61 | *

The sorting works by counting the frequency of each integer in the input array, then placing the integers back into 62 | * the array in sorted order.

63 | * 64 | * @param input the array of integers to be sorted. 65 | * @param blkptr the starting index in the array to begin sorting. 66 | * @param len the length of the portion of the array to be sorted. 67 | * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters (e.g., 68 | * out-of-bounds indices or invalid length). 69 | */ 70 | @Override 71 | public boolean sort(int[] input, int blkptr, int len) { 72 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 73 | return false; 74 | 75 | if (len == 1) 76 | return true; 77 | 78 | final int len8 = len & -8; // Round down to the nearest multiple of 8 79 | final int end8 = blkptr + len8; 80 | final int[] c = this.count; // Bucket count array 81 | final int length = c.length; 82 | 83 | // Unrolled loop for efficient counting 84 | for (int i = blkptr; i < end8; i += 8) { 85 | c[input[i]]++; 86 | c[input[i + 1]]++; 87 | c[input[i + 2]]++; 88 | c[input[i + 3]]++; 89 | c[input[i + 4]]++; 90 | c[input[i + 5]]++; 91 | c[input[i + 6]]++; 92 | c[input[i + 7]]++; 93 | } 94 | 95 | // Handle remaining elements not divisible by 8 96 | for (int i = len8; i < len; i++) 97 | c[input[blkptr + i]]++; 98 | 99 | // Reconstruct the sorted array using the bucket counts 100 | for (int i = 0, j = blkptr; i < length; i++) { 101 | final int val = c[i]; 102 | 103 | if (val == 0) 104 | continue; 105 | 106 | c[i] = 0; 107 | int val8 = val & -8; 108 | 109 | for (int k = val; k > val8; k--) 110 | input[j++] = i; 111 | 112 | // Fill the remaining spots using the "8 at a time" optimization 113 | while (val8 > 0) { 114 | input[j] = i; 115 | input[j + 1] = i; 116 | input[j + 2] = i; 117 | input[j + 3] = i; 118 | input[j + 4] = i; 119 | input[j + 5] = i; 120 | input[j + 6] = i; 121 | input[j + 7] = i; 122 | j += 8; 123 | val8 -= 8; 124 | } 125 | } 126 | 127 | return true; 128 | } 129 | 130 | /** 131 | * Sorts an array of bytes using the bucket sort algorithm. 132 | * 133 | *

This method behaves similarly to the integer sort method, but operates on byte values (0 to 255).

134 | * 135 | * @param input the array of bytes to be sorted. 136 | * @param blkptr the starting index in the array to begin sorting. 137 | * @param len the length of the portion of the array to be sorted. 138 | * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters ( 139 | * out-of-bounds indices or invalid length). 140 | */ 141 | @Override 142 | public boolean sort(byte[] input, int blkptr, int len) { 143 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 144 | return false; 145 | 146 | if (len == 1) 147 | return true; 148 | 149 | final int len8 = len & -8; // Round down to the nearest multiple of 8 150 | final int end8 = blkptr + len8; 151 | final int[] c = this.count; // Bucket count array 152 | final int length = c.length; 153 | 154 | // Unrolled loop for efficient counting 155 | for (int i = blkptr; i < end8; i += 8) { 156 | c[input[i] & 0xFF]++; 157 | c[input[i + 1] & 0xFF]++; 158 | c[input[i + 2] & 0xFF]++; 159 | c[input[i + 3] & 0xFF]++; 160 | c[input[i + 4] & 0xFF]++; 161 | c[input[i + 5] & 0xFF]++; 162 | c[input[i + 6] & 0xFF]++; 163 | c[input[i + 7] & 0xFF]++; 164 | } 165 | 166 | // Handle remaining elements not divisible by 8 167 | for (int i = len8; i < len; i++) 168 | c[input[blkptr + i] & 0xFF]++; 169 | 170 | // Reconstruct the sorted array using the bucket counts 171 | for (int i = 0, j = blkptr; i < length; i++) { 172 | final int val = c[i]; 173 | 174 | if (val == 0) 175 | continue; 176 | 177 | int val8 = val & -8; 178 | c[i] = 0; 179 | 180 | for (int k = val; k > val8; k--) 181 | input[j++] = (byte) i; 182 | 183 | // Fill the remaining spots using the "8 at a time" optimization 184 | while (val8 > 0) { 185 | input[j] = (byte) i; 186 | input[j + 1] = (byte) i; 187 | input[j + 2] = (byte) i; 188 | input[j + 3] = (byte) i; 189 | input[j + 4] = (byte) i; 190 | input[j + 5] = (byte) i; 191 | input[j + 6] = (byte) i; 192 | input[j + 7] = (byte) i; 193 | j += 8; 194 | val8 -= 8; 195 | } 196 | } 197 | 198 | return true; 199 | } 200 | } 201 | 202 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/DefaultArrayComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | 20 | /** 21 | * A comparator for comparing elements in an integer array. This class implements the {@link ArrayComparator} interface 22 | * and provides a mechanism to compare two elements based on their values. The comparison also accounts for stable sorting 23 | * by considering their indices when the values are equal. 24 | * 25 | *

This class is immutable and thread-safe as it holds a reference to the input array but does not modify it.

26 | * 27 | *

Example usage:

28 | *
29 |  * int[] array = { 5, 2, 8, 1 };
30 |  * DefaultArrayComparator comparator = new DefaultArrayComparator(array);
31 |  * int result = comparator.compare(0, 1); // Compares array[0] (5) and array[1] (2)
32 |  * 
33 | * 34 | * @see ArrayComparator 35 | */ 36 | public final class DefaultArrayComparator implements ArrayComparator { 37 | 38 | private final int[] array; 39 | 40 | /** 41 | * Constructs a new {@code DefaultArrayComparator} using the specified integer array. 42 | * 43 | * @param array the array to compare elements in; must not be {@code null} 44 | * @throws NullPointerException if the provided array is {@code null} 45 | */ 46 | public DefaultArrayComparator(int[] array) { 47 | if (array == null) 48 | throw new NullPointerException("Invalid null array parameter"); 49 | 50 | this.array = array; 51 | } 52 | 53 | /** 54 | * Compares two elements of the array at the specified indices. 55 | *

56 | * The comparison is based on the values of the elements at the provided indices. If the values are equal, 57 | * the method returns a comparison based on their indices to maintain stability in sorting. 58 | *

59 | * 60 | * @param lidx the index of the first element to compare 61 | * @param ridx the index of the second element to compare 62 | * @return a negative integer if the element at {@code lidx} is less than the element at {@code ridx}, 63 | * a positive integer if the element at {@code lidx} is greater than the element at {@code ridx}, 64 | * or zero if they are equal 65 | */ 66 | @Override 67 | public int compare(int lidx, int ridx) { 68 | int res = this.array[lidx] - this.array[ridx]; 69 | 70 | // Make the sort stable 71 | if (res == 0) 72 | res = lidx - ridx; 73 | 74 | return res; 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/HeapSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code HeapSort} class implements the heap sort algorithm, a comparison-based sorting algorithm with an average and 23 | * worst-case time complexity of O(n log n). 24 | * 25 | *

Heap sort works by first building a binary heap from the input data, and then repeatedly extracting the maximum 26 | * (or minimum) element from the heap and reconstructing the heap. Although heap sort has O(n log n) time complexity, it is 27 | * often slower in practice compared to other O(n log n) algorithms such as QuickSort, due to larger constant factors.

28 | * 29 | *

This implementation allows an optional custom comparator to be used for comparing array elements. If no comparator is 30 | * provided, the natural ordering of the elements is used.

31 | * 32 | *

This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.

33 | */ 34 | public final class HeapSort implements IntSorter { 35 | 36 | // Comparator used for comparing elements in the array 37 | private final ArrayComparator cmp; 38 | 39 | /** 40 | * Constructs a {@code HeapSort} instance without a custom comparator. 41 | * This will use the natural ordering of the elements in the array. 42 | */ 43 | public HeapSort() { 44 | this(null); 45 | } 46 | 47 | /** 48 | * Constructs a {@code HeapSort} instance with the specified comparator. 49 | * If {@code cmp} is {@code null}, the natural ordering of the elements will be used. 50 | * 51 | * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering. 52 | */ 53 | public HeapSort(ArrayComparator cmp) { 54 | this.cmp = cmp; 55 | } 56 | 57 | /** 58 | * Returns the comparator used by this {@code HeapSort} instance. 59 | * 60 | * @return the comparator used for element comparisons, or {@code null} if natural ordering is used. 61 | */ 62 | protected ArrayComparator getComparator() { 63 | return this.cmp; 64 | } 65 | 66 | /** 67 | * Sorts the specified portion of the input array using the heap sort algorithm. 68 | * 69 | *

The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged 70 | * in-place, and the elements will be sorted in ascending order.

71 | * 72 | * @param input the array to be sorted. 73 | * @param blkptr the starting index of the portion to be sorted. 74 | * @param len the number of elements to sort. 75 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices). 76 | */ 77 | @Override 78 | public boolean sort(int[] input, int blkptr, int len) { 79 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 80 | return false; 81 | 82 | if (len == 1) 83 | return true; 84 | 85 | // Build the heap by calling doSort on all non-leaf nodes 86 | for (int k = len >> 1; k > 0; k--) { 87 | doSort(input, blkptr, k, len, this.cmp); 88 | } 89 | 90 | // Repeatedly extract the maximum element and reconstruct the heap 91 | for (int i = len - 1; i > 0; i--) { 92 | final int temp = input[blkptr]; 93 | input[blkptr] = input[blkptr + i]; 94 | input[blkptr + i] = temp; 95 | doSort(input, blkptr, 1, i, this.cmp); 96 | } 97 | 98 | return true; 99 | } 100 | 101 | /** 102 | * Performs a single heap sort operation on the portion of the array specified by {@code blkptr}, {@code idx}, and {@code count}. 103 | * This method ensures that the subtree rooted at {@code idx} is a valid heap. 104 | * 105 | * @param array the array to be sorted. 106 | * @param blkptr the starting index of the array to be sorted. 107 | * @param idx the index of the current node to heapify. 108 | * @param count the total number of elements in the heap. 109 | * @param cmp the comparator used for comparisons, or {@code null} to use natural ordering. 110 | */ 111 | private static void doSort(int[] array, int blkptr, int idx, int count, ArrayComparator cmp) { 112 | int k = idx; 113 | final int temp = array[blkptr + k - 1]; 114 | final int n = count >> 1; // Half the size of the heap 115 | 116 | // If a custom comparator is provided, use it for comparison 117 | if (cmp != null) { 118 | while (k <= n) { 119 | int j = k << 1; // Left child 120 | 121 | // If right child exists and is larger, use it instead 122 | if ((j < count) && (cmp.compare(array[blkptr + j - 1], array[blkptr + j]) < 0)) { 123 | j++; 124 | } 125 | 126 | // If the current node is larger than its child, break out of the loop 127 | if (temp >= array[blkptr + j - 1]) { 128 | break; 129 | } 130 | 131 | // Move the child up to the parent node 132 | array[blkptr + k - 1] = array[blkptr + j - 1]; 133 | k = j; 134 | } 135 | } 136 | // If no comparator is provided, use natural ordering (ascending order) 137 | else { 138 | while (k <= n) { 139 | int j = k << 1; // Left child 140 | 141 | // If right child exists and is larger, use it instead 142 | if ((j < count) && (array[blkptr + j - 1] < array[blkptr + j])) { 143 | j++; 144 | } 145 | 146 | // If the current node is larger than its child, break out of the loop 147 | if (temp >= array[blkptr + j - 1]) { 148 | break; 149 | } 150 | 151 | // Move the child up to the parent node 152 | array[blkptr + k - 1] = array[blkptr + j - 1]; 153 | k = j; 154 | } 155 | } 156 | 157 | // Place the original element in the correct position 158 | array[blkptr + k - 1] = temp; 159 | } 160 | } 161 | 162 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/InsertionSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code InsertionSort} class implements the insertion sort algorithm, a simple comparison-based sorting algorithm with 23 | * a worst-case time complexity of O(n²) and an average-case complexity of O(n+k), where k is the number of inversions. 24 | * This algorithm is efficient for small data sets or nearly sorted data, but is not suitable for large datasets due to its 25 | * quadratic time complexity. 26 | * 27 | *

Insertion sort works by iterating through the array and repeatedly inserting each element into its correct position 28 | * relative to the elements before it. The algorithm performs well when the data is already nearly sorted, making it ideal for 29 | * small datasets or nearly sorted data.

30 | * 31 | *

This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.

32 | */ 33 | public class InsertionSort implements IntSorter { 34 | 35 | // Comparator used for comparing elements in the array 36 | private final ArrayComparator cmp; 37 | 38 | /** 39 | * Constructs an {@code InsertionSort} instance without a custom comparator. 40 | * This will use the natural ordering of the elements in the array. 41 | */ 42 | public InsertionSort() { 43 | this(null); 44 | } 45 | 46 | /** 47 | * Constructs an {@code InsertionSort} instance with the specified comparator. 48 | * If {@code cmp} is {@code null}, the natural ordering of the elements will be used. 49 | * 50 | * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering. 51 | */ 52 | public InsertionSort(ArrayComparator cmp) { 53 | this.cmp = cmp; 54 | } 55 | 56 | /** 57 | * Returns the comparator used by this {@code InsertionSort} instance. 58 | * 59 | * @return the comparator used for element comparisons, or {@code null} if natural ordering is used. 60 | */ 61 | protected ArrayComparator getComparator() { 62 | return this.cmp; 63 | } 64 | 65 | /** 66 | * Sorts the specified portion of the input array using the insertion sort algorithm. 67 | * 68 | *

The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged 69 | * in-place, and the elements will be sorted in ascending order.

70 | * 71 | * @param input the array to be sorted. 72 | * @param blkptr the starting index of the portion to be sorted. 73 | * @param len the number of elements to sort. 74 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (e.g., out-of-bounds indices). 75 | */ 76 | @Override 77 | public boolean sort(int[] input, int blkptr, int len) { 78 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 79 | return false; 80 | 81 | if (len == 1) 82 | return true; 83 | 84 | // If no comparator is provided, sort using natural ordering 85 | if (this.cmp == null) 86 | sortNoComparator(input, blkptr, blkptr + len); 87 | else 88 | sortWithComparator(input, blkptr, blkptr + len, this.cmp); 89 | 90 | return true; 91 | } 92 | 93 | /** 94 | * Performs the insertion sort on the array using the provided comparator. 95 | * This method handles the sorting for small sub-arrays and larger arrays. 96 | * 97 | * @param array the array to be sorted. 98 | * @param blkptr the starting index of the portion to be sorted. 99 | * @param end the index where the sorting should end. 100 | * @param comp the comparator used for element comparisons. 101 | */ 102 | private static void sortWithComparator(int[] array, int blkptr, int end, ArrayComparator comp) { 103 | // Shortcut for 2-element sub-array 104 | if (end == blkptr + 1) { 105 | if (comp.compare(array[blkptr], array[end]) > 0) { 106 | final int tmp = array[blkptr]; 107 | array[blkptr] = array[end]; 108 | array[end] = tmp; 109 | } 110 | return; 111 | } 112 | 113 | // Shortcut for 3-element sub-array 114 | if (end == blkptr + 2) { 115 | final int a1 = array[blkptr]; 116 | final int a2 = array[blkptr + 1]; 117 | final int a3 = array[end]; 118 | 119 | if (comp.compare(a1, a2) <= 0) { 120 | if (comp.compare(a2, a3) <= 0) 121 | return; 122 | 123 | if (comp.compare(a3, a1) <= 0) { 124 | array[blkptr] = a3; 125 | array[blkptr + 1] = a1; 126 | array[end] = a2; 127 | return; 128 | } 129 | 130 | array[blkptr + 1] = a3; 131 | array[end] = a2; 132 | } else { 133 | if (comp.compare(a1, a3) <= 0) { 134 | array[blkptr] = a2; 135 | array[blkptr + 1] = a1; 136 | return; 137 | } 138 | 139 | if (comp.compare(a3, a2) <= 0) { 140 | array[blkptr] = a3; 141 | array[end] = a1; 142 | return; 143 | } 144 | 145 | array[blkptr] = a2; 146 | array[blkptr + 1] = a3; 147 | array[end] = a1; 148 | } 149 | return; 150 | } 151 | 152 | // Regular case for arrays with more than 3 elements 153 | for (int i = blkptr; i < end; i++) { 154 | final int val = array[i]; 155 | int j = i; 156 | 157 | while ((j > blkptr) && (comp.compare(array[j - 1], val) > 0)) { 158 | array[j] = array[j - 1]; 159 | j--; 160 | } 161 | 162 | array[j] = val; 163 | } 164 | } 165 | 166 | /** 167 | * Performs the insertion sort on the array using natural ordering (i.e., no comparator). 168 | * This method handles the sorting for small sub-arrays and larger arrays without needing a custom comparator. 169 | * 170 | * @param array the array to be sorted. 171 | * @param blkptr the starting index of the portion to be sorted. 172 | * @param end the index where the sorting should end. 173 | */ 174 | private static void sortNoComparator(int[] array, int blkptr, int end) { 175 | // Shortcut for 2-element sub-array 176 | if (end == blkptr + 1) { 177 | if (array[blkptr] > array[end]) { 178 | final int tmp = array[blkptr]; 179 | array[blkptr] = array[end]; 180 | array[end] = tmp; 181 | } 182 | return; 183 | } 184 | 185 | // Shortcut for 3-element sub-array 186 | if (end == blkptr + 2) { 187 | final int a1 = array[blkptr]; 188 | final int a2 = array[blkptr + 1]; 189 | final int a3 = array[end]; 190 | 191 | if (a1 <= a2) { 192 | if (a2 <= a3) 193 | return; 194 | 195 | if (a3 <= a1) { 196 | array[blkptr] = a3; 197 | array[blkptr + 1] = a1; 198 | array[end] = a2; 199 | return; 200 | } 201 | 202 | array[blkptr + 1] = a3; 203 | array[end] = a2; 204 | } else { 205 | if (a1 <= a3) { 206 | array[blkptr] = a2; 207 | array[blkptr + 1] = a1; 208 | return; 209 | } 210 | 211 | if (a3 <= a2) { 212 | array[blkptr] = a3; 213 | array[end] = a1; 214 | return; 215 | } 216 | 217 | array[blkptr] = a2; 218 | array[blkptr + 1] = a3; 219 | array[end] = a1; 220 | } 221 | return; 222 | } 223 | 224 | // Regular case for arrays with more than 3 elements 225 | for (int i = blkptr; i < end; i++) { 226 | final int val = array[i]; 227 | int j = i; 228 | 229 | while ((j > blkptr) && (array[j - 1] > val)) { 230 | array[j] = array[j - 1]; 231 | j--; 232 | } 233 | 234 | array[j] = val; 235 | } 236 | } 237 | } 238 | 239 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/MergeSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.util.sort; 16 | 17 | import io.github.flanglet.kanzi.IntSorter; 18 | 19 | /** 20 | * The {@code MergeSort} class implements the merge sort algorithm, which is a divide-and-conquer comparison-based sorting 21 | * algorithm. Merge sort divides the input array into smaller sub-arrays, recursively sorts each sub-array, and then merges 22 | * the sorted sub-arrays back together. While conceptually simple, it is usually not very performant for smaller arrays due 23 | * to its recursive nature. However, merge sort is known for its stable sorting and predictable O(n log n) time complexity. 24 | * 25 | *

Merge sort is efficient for large datasets and nearly sorted data, but it can require significant memory overhead 26 | * due to the need for auxiliary space to store the merged sub-arrays. This implementation uses insertion sort for small 27 | * sub-arrays to improve performance on small or nearly sorted datasets.

28 | * 29 | *

This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.

30 | */ 31 | public class MergeSort implements IntSorter { 32 | 33 | // Threshold for switching to insertion sort on small arrays 34 | private static final int SMALL_ARRAY_THRESHOLD = 32; 35 | 36 | // Temporary buffer for merging 37 | private int[] buffer; 38 | 39 | // Insertion sort used for small arrays 40 | private final IntSorter insertionSort; 41 | 42 | /** 43 | * Constructs a new {@code MergeSort} instance. This constructor initializes an empty buffer for merging and 44 | * uses an {@code InsertionSort} instance for sorting small arrays. 45 | */ 46 | public MergeSort() { 47 | this.buffer = new int[0]; 48 | this.insertionSort = new InsertionSort(); 49 | } 50 | 51 | /** 52 | * Sorts the specified portion of the input array using the merge sort algorithm. 53 | * 54 | *

This method divides the array into smaller sub-arrays, recursively sorts them using merge sort, and then 55 | * merges the sorted sub-arrays back together. For small sub-arrays (less than {@code SMALL_ARRAY_THRESHOLD}), insertion 56 | * sort is used for efficiency.

57 | * 58 | * @param data the array to be sorted. 59 | * @param start the starting index of the portion to be sorted. 60 | * @param count the number of elements to sort. 61 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices). 62 | */ 63 | @Override 64 | public boolean sort(int[] data, int start, int count) { 65 | if ((data == null) || (count < 0) || (start < 0)) 66 | return false; 67 | 68 | if (start + count > data.length) 69 | return false; 70 | 71 | if (count < 2) 72 | return true; 73 | 74 | // Ensure buffer is large enough to hold the array 75 | if (this.buffer.length < count) 76 | this.buffer = new int[count]; 77 | 78 | return this.mergesort(data, start, start + count - 1); 79 | } 80 | 81 | /** 82 | * Recursively performs merge sort on the specified sub-array. 83 | * 84 | *

This method splits the array into two halves and recursively sorts each half. Once the sub-arrays are sorted, 85 | * they are merged together using the {@code merge} method.

86 | * 87 | * @param data the array to be sorted. 88 | * @param low the starting index of the sub-array to sort. 89 | * @param high the ending index of the sub-array to sort. 90 | * @return {@code true} if the sorting was successful. 91 | */ 92 | private boolean mergesort(int[] data, int low, int high) { 93 | if (low < high) { 94 | int count = high - low + 1; 95 | 96 | // Use insertion sort for small sub-arrays 97 | if (count < SMALL_ARRAY_THRESHOLD) 98 | return this.insertionSort.sort(data, low, count); 99 | 100 | int middle = low + count / 2; 101 | this.mergesort(data, low, middle); 102 | this.mergesort(data, middle + 1, high); 103 | this.merge(data, low, middle, high); 104 | } 105 | 106 | return true; 107 | } 108 | 109 | /** 110 | * Merges two sorted sub-arrays into one sorted array. 111 | * 112 | *

This method performs the merging step of merge sort. It copies the sorted elements from the left and right halves 113 | * of the sub-array into a temporary buffer and then merges them back into the original array.

114 | * 115 | * @param data the array containing the sub-arrays to merge. 116 | * @param low the starting index of the left sub-array. 117 | * @param middle the ending index of the left sub-array. 118 | * @param high the ending index of the right sub-array. 119 | */ 120 | private void merge(int[] data, int low, int middle, int high) { 121 | int count = high - low + 1; 122 | 123 | // For small sub-arrays, copy the elements into the buffer 124 | if (count < 16) { 125 | for (int ii = low; ii <= high; ii++) 126 | this.buffer[ii] = data[ii]; 127 | } else { 128 | // For larger sub-arrays, use System.arraycopy for efficiency 129 | System.arraycopy(data, low, this.buffer, low, count); 130 | } 131 | 132 | int i = low; 133 | int j = middle + 1; 134 | int k = low; 135 | 136 | // Merge the two sorted sub-arrays 137 | while ((i <= middle) && (j <= high)) { 138 | if (this.buffer[i] <= this.buffer[j]) 139 | data[k] = this.buffer[i++]; 140 | else 141 | data[k] = this.buffer[j++]; 142 | 143 | k++; 144 | } 145 | 146 | count = middle - i + 1; 147 | 148 | // Copy the remaining elements of the left sub-array, if any 149 | if (count < 16) { 150 | while (i <= middle) 151 | data[k++] = this.buffer[i++]; 152 | } else { 153 | // Use System.arraycopy for efficiency 154 | System.arraycopy(this.buffer, i, data, k, count); 155 | } 156 | } 157 | } 158 | 159 | --------------------------------------------------------------------------------