├── .github
└── workflows
│ ├── ant.yml
│ ├── codeql.yml
│ ├── maven.yaml
│ └── sonar.yaml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── SECURITY.md
└── java
├── .gitignore
├── MANIFEST.MF
├── README.md
├── build.xml
├── pom.xml
└── src
├── main
└── java
│ └── io
│ └── github
│ └── flanglet
│ └── kanzi
│ ├── ArrayComparator.java
│ ├── BitStreamException.java
│ ├── ByteSorter.java
│ ├── ByteTransform.java
│ ├── EntropyDecoder.java
│ ├── EntropyEncoder.java
│ ├── Error.java
│ ├── Event.java
│ ├── Global.java
│ ├── InputBitStream.java
│ ├── IntSorter.java
│ ├── IntTransform.java
│ ├── Listener.java
│ ├── Magic.java
│ ├── Memory.java
│ ├── OutputBitStream.java
│ ├── Predictor.java
│ ├── SliceByteArray.java
│ ├── SliceIntArray.java
│ ├── app
│ ├── Benchmark.java
│ ├── BlockCompressor.java
│ ├── BlockDecompressor.java
│ ├── InfoPrinter.java
│ └── Kanzi.java
│ ├── bitstream
│ ├── DebugInputBitStream.java
│ ├── DebugOutputBitStream.java
│ ├── DefaultInputBitStream.java
│ └── DefaultOutputBitStream.java
│ ├── entropy
│ ├── ANSRangeDecoder.java
│ ├── ANSRangeEncoder.java
│ ├── BinaryEntropyDecoder.java
│ ├── BinaryEntropyEncoder.java
│ ├── CMDecoder.java
│ ├── CMEncoder.java
│ ├── CMPredictor.java
│ ├── EntropyCodecFactory.java
│ ├── EntropyUtils.java
│ ├── ExpGolombDecoder.java
│ ├── ExpGolombEncoder.java
│ ├── FPAQDecoder.java
│ ├── FPAQEncoder.java
│ ├── FastLogisticAdaptiveProbMap.java
│ ├── HuffmanCommon.java
│ ├── HuffmanDecoder.java
│ ├── HuffmanEncoder.java
│ ├── LinearAdaptiveProbMap.java
│ ├── LogisticAdaptiveProbMap.java
│ ├── NullEntropyDecoder.java
│ ├── NullEntropyEncoder.java
│ ├── RangeDecoder.java
│ ├── RangeEncoder.java
│ └── TPAQPredictor.java
│ ├── io
│ ├── CompressedInputStream.java
│ ├── CompressedOutputStream.java
│ ├── IOException.java
│ ├── IOUtil.java
│ └── NullOutputStream.java
│ ├── module-info.java
│ ├── transform
│ ├── AliasCodec.java
│ ├── BWT.java
│ ├── BWTBlockCodec.java
│ ├── BWTS.java
│ ├── DivSufSort.java
│ ├── EXECodec.java
│ ├── FSDCodec.java
│ ├── LZCodec.java
│ ├── NullTransform.java
│ ├── RLT.java
│ ├── ROLZCodec.java
│ ├── SA_IS.java
│ ├── SBRT.java
│ ├── SRT.java
│ ├── Sequence.java
│ ├── TextCodec.java
│ ├── TransformFactory.java
│ ├── UTFCodec.java
│ └── ZRLT.java
│ └── util
│ ├── LyndonWords.java
│ ├── hash
│ ├── XXHash32.java
│ └── XXHash64.java
│ └── sort
│ ├── BucketSort.java
│ ├── DefaultArrayComparator.java
│ ├── HeapSort.java
│ ├── InsertionSort.java
│ ├── MergeSort.java
│ ├── QuickSort.java
│ └── RadixSort.java
└── test
└── java
└── io
└── github
└── flanglet
└── kanzi
└── test
├── TestBWT.java
├── TestCompressedStream.java
├── TestDefaultBitStream.java
├── TestEntropyCodec.java
└── TestTransforms.java
/.github/workflows/ant.yml:
--------------------------------------------------------------------------------
1 | # This workflow will build a Java project with Ant
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-ant
3 |
4 | name: Java CI
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v5
19 | - name: Set up JDK 17
20 | uses: actions/setup-java@v5
21 | with:
22 | java-version: '17'
23 | distribution: 'adopt'
24 | - name: Build with Ant
25 | run: cd java && ant -noinput -buildfile build.xml
26 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 | schedule:
9 | - cron: "35 16 * * 2"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ java ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v5
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v3
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Build with Ant
36 | run: cd java && ant -noinput -buildfile build.xml
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v3
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.github/workflows/maven.yaml:
--------------------------------------------------------------------------------
1 | # This workflow will build a Java project with Maven
2 |
3 | name: Java CI
4 |
5 | on:
6 | push:
7 | branches: [ master ]
8 | pull_request:
9 | branches: [ master ]
10 |
11 | jobs:
12 | build:
13 |
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - uses: actions/checkout@v5
18 | - name: Set up JDK 17
19 | uses: actions/setup-java@v5
20 | with:
21 | java-version: '17'
22 | distribution: 'adopt'
23 | - name: Build with Maven
24 | run: cd java && mvn
25 |
--------------------------------------------------------------------------------
/.github/workflows/sonar.yaml:
--------------------------------------------------------------------------------
1 | name: SonarCloud
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | types: [opened, synchronize, reopened]
9 |
10 | jobs:
11 | build:
12 | name: Build and analyze
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v5
16 | with:
17 | fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
18 | - name: Set up JDK 17
19 | uses: actions/setup-java@v5
20 | with:
21 | java-version: 17
22 | distribution: 'zulu' # Alternative distribution options are available.
23 | - name: Cache SonarCloud packages
24 | uses: actions/cache@v3
25 | with:
26 | path: ~/.sonar/cache
27 | key: ${{ runner.os }}-sonar
28 | restore-keys: ${{ runner.os }}-sonar
29 | - name: Cache Maven packages
30 | uses: actions/cache@v3
31 | with:
32 | path: ~/.m2
33 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
34 | restore-keys: ${{ runner.os }}-m2
35 | - name: Build and analyze
36 | env:
37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any
38 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
39 | run: cd java && mvn -Dmaven.test.skip=true -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=flanglet_kanzi
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.zip
3 | *.xml
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | install: cd java && mvn compile
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Kanzi
3 |
4 | Kanzi is a modern, modular, portable, and efficient lossless data compressor written in Java.
5 |
6 | * Modern: Kanzi implements state-of-the-art compression algorithms and is built to fully utilize multi-core CPUs via built-in multi-threading.
7 | * Modular: Entropy codecs and data transforms can be selected and combined at runtime to best suit the specific data being compressed.
8 | * Expandable: A clean, interface-driven design—with no external dependencies—makes Kanzi easy to integrate, extend, and customize.
9 | * Efficient: Carefully optimized to balance compression ratio and speed for practical, high-performance usage.
10 |
11 | Unlike most mainstream lossless compressors, Kanzi is not limited to a single compression paradigm. By combining multiple algorithms and techniques, it supports a broader range of compression ratios and adapts better to diverse data types.
12 |
13 | Most traditional compressors underutilize modern hardware by running single-threaded—even on machines with many cores. Kanzi, in contrast, is concurrent by design, compressing multiple blocks in parallel across threads for significant performance gains. However, it is not compatible with standard compression formats.
14 |
15 | It’s important to note that Kanzi is a data compressor, not an archiver. It includes optional checksums for verifying data integrity, but does not provide features like cross-file deduplication or data recovery mechanisms. That said, it produces a seekable bitstream—meaning one or more consecutive blocks can be decompressed independently, without needing to process the entire stream.
16 |
17 |
18 | For more details, check [Wiki](https://github.com/flanglet/kanzi/wiki), [QA](https://github.com/flanglet/kanzi/wiki/Q&A) and [DeepWiki](https://deepwiki.com/flanglet/kanzi)
19 |
20 | See how to reuse the code here: https://github.com/flanglet/kanzi/wiki/Using-and-extending-the-code
21 |
22 | There is a C++ implementation available here: https://github.com/flanglet/kanzi-cpp
23 |
24 | There is Go implementation available here: https://github.com/flanglet/kanzi-go
25 |
26 |
27 | 
28 | [](https://sonarcloud.io/summary/new_code?id=flanglet_kanzi)
29 |
30 |
32 |
33 | [](LICENSE)
34 | [](https://deepwiki.com/flanglet/kanzi)
35 |
36 |
37 | ## Why Kanzi
38 |
39 |
40 | There are already many excellent, open-source lossless data compressors available.
41 |
42 | If gzip is beginning to show its age, modern alternatives like **zstd** and **brotli** offer compelling replacements. Both are open-source, standardized, and used daily by millions. **Zstd** is especially notable for its exceptional speed and is often the best choice in general-purpose compression.
43 |
44 | However, there are scenarios where **Kanzi** may offer superior performance:
45 |
46 | While gzip, LZMA, brotli, and zstd are all based on LZ (Lempel-Ziv) compression, they are inherently limited in the compression ratios they can achieve. **Kanzi** goes further by incorporating **BWT (Burrows-Wheeler Transform)** and **CM (Context Modeling)**, which can outperform traditional LZ-based methods in certain cases.
47 |
48 | LZ-based compressors are ideal for software distribution, where data is compressed once and decompressed many times, thanks to their fast decompression speeds—though they tend to be slower when compressing at higher ratios. But in other scenarios—such as real-time data generation, one-off data transfers, or backups—**compression speed becomes critical**. Here, Kanzi can shine.
49 |
50 | **Kanzi** also features a suite of built-in, customizable data transforms tailored for specific data types (e.g., multimedia, UTF, text, DNA, etc.), which can be selectively applied during compression for better efficiency.
51 |
52 | Furthermore, Kanzi is designed to **leverage modern multi-core CPUs** to boost performance.
53 |
54 | Finally, **extensibility** is a key strength: implementing new transforms or entropy codecs—whether for experimentation or to improve performance on niche data types—is straightforward and developer-friendly.
55 |
56 | ## Benchmarks
57 |
58 | Test machine:
59 |
60 | Test machine:
61 |
62 | Apple M3 24 GB Sonoma 14.6.1
63 |
64 | Kanzi version 2.4.0 Java implementation
65 |
66 | JDK 23.0.1+11-39
67 |
68 | On this machine, Kanzi uses 4 threads (half of CPUs by default).
69 |
70 | bzip3 runs with 4 threads.
71 |
72 | zstd and lz4 use 4 threads for compression and 1 for decompression, other compressors are single threaded.
73 |
74 | The default block size at level 9 is 32MB, severely limiting the number of threads
75 | in use, especially with enwik8, but all tests are performed with default values.
76 |
77 |
78 | ### silesia.tar
79 |
80 | Download at http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
81 |
82 | | Compressor | Encoding (ms) | Decoding (ms) | Size |
83 | |---------------------------------|-----------------|-----------------|------------------|
84 | |Original | | | 211,957,760 |
85 | |s2 -cpu 4 | 179 | 294 | 86,892,891 |
86 | |**Kanzi -l 1** | **839** | **263** | 80,245,856 |
87 | |lz4 1.1.10 -T4 -4 | 527 | 121 | 79,919,901 |
88 | |zstd 1.5.8 -T4 -2 | 147 | 150 | 69,410,383 |
89 | |**Kanzi -l 2** | **701** | **437** | 68,860,099 |
90 | |brotli 1.1.0 -2 | 907 | 402 | 68,039,159 |
91 | |Apple gzip 430.140.2 -9 | 10406 | 273 | 67,648,481 |
92 | |**Kanzi -l 3** | **1258** | **503** | 64,266,936 |
93 | |zstd 1.5.8 -T4 -5 | 300 | 154 | 62,851,716 |
94 | |**Kanzi -l 4** | **1718** | **912** | 61,131,554 |
95 | |zstd 1.5.8 -T4 -9 | 752 | 137 | 59,190,090 |
96 | |brotli 1.1.0 -6 | 3596 | 340 | 58,557,128 |
97 | |zstd 1.5.8 -T4 -13 | 4537 | 138 | 57,814,719 |
98 | |brotli 1.1.0 -9 | 19809 | 329 | 56,414,012 |
99 | |bzip2 1.0.8 -9 | 9673 | 3140 | 54,602,583 |
100 | |**Kanzi -l 5** | **3431** | **1759** | 54,025,588 |
101 | |zstd 1.5.8 -T4 -19 | 20482 | 151 | 52,858,610 |
102 | |**kanzi -l 6** | **4687** | **3710** | 49,521,392 |
103 | |xz 5.8.1 -9 | 48516 | 1594 | 48,774,000 |
104 | |bzip3 1.5.1.r3-g428f422 -j 4 | 8559 | 3948 | 47,256,794 |
105 | |**Kanzi -l 7** | **5248** | **3689** | 47,312,772 |
106 | |**Kanzi -l 8** | **16856** | **18060** | 43,260,254 |
107 | |**Kanzi -l 9** | **24852** | **27886** | 41,858,030 |
108 |
109 |
110 |
111 | ### enwik8
112 |
113 | Download at https://mattmahoney.net/dc/enwik8.zip
114 |
115 | | Compressor | Encoding (ms) | Decoding (ms) | Size |
116 | |--------------|---------------|---------------|--------------|
117 | |Original | | | 100,000,000 |
118 | |Kanzi -l 1 | 559 | 139 | 43,644,013 |
119 | |Kanzi -l 2 | 498 | 227 | 37,570,404 |
120 | |Kanzi -l 3 | 798 | 439 | 32,466,232 |
121 | |Kanzi -l 4 | 1060 | 662 | 29,536,517 |
122 | |Kanzi -l 5 | 1422 | 790 | 26 523 940 |
123 | |Kanzi -l 6 | 1965 | 1175 | 24,076,765 |
124 | |Kanzi -l 7 | 2606 | 1787 | 22,817,360 |
125 | |Kanzi -l 8 | 7377 | 7251 | 21,181,992 |
126 | |Kanzi -l 9 | 10031 | 11412 | 20,035,144 |
127 |
128 |
129 | ## Build
130 |
131 | First option (ant):
132 |
133 | ```ant```
134 |
135 | Second option (maven):
136 |
137 | ```mvn -Dmaven.test.skip=true```
138 |
139 |
140 | Credits
141 |
142 | Matt Mahoney,
143 | Yann Collet,
144 | Jan Ondrus,
145 | Yuta Mori,
146 | Ilya Muravyov,
147 | Neal Burns,
148 | Fabian Giesen,
149 | Jarek Duda,
150 | Ilya Grebnov
151 |
152 | Disclaimer
153 |
154 | Use at your own risk. Always keep a copy of your original files.
155 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | Security updates are applied only to the latest release.
4 |
5 | ## Vulnerability Definition
6 |
7 | A security vulnerability is a bug that, given a certain input, triggers a crash or an infinite loop. Compression and decompression failures do not belong in this category.
8 |
9 | ## Reporting a Vulnerability
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.** If you have discovered a security vulnerability in this project, report it privately.
12 |
13 | Please disclose it at [security advisory](https://github.com/flanglet/kanzi/security/advisories/new).
14 |
15 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
16 |
17 | * Operating system
18 | * Hardware: CPU, memory
19 | * Kanzi version
20 | * Command line invoked
21 | * Error reported/crash data/log output
22 |
23 | If possible provide a minimal reproducer.
24 |
--------------------------------------------------------------------------------
/java/.gitignore:
--------------------------------------------------------------------------------
1 | target/
--------------------------------------------------------------------------------
/java/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | manifest-Version: 1.0
2 | Built-By: flanglet
3 | Main-Class: io.github.flanglet.kanzi.app.Kanzi
4 |
5 |
--------------------------------------------------------------------------------
/java/README.md:
--------------------------------------------------------------------------------
1 | Build Kanzi
2 | ===========
3 |
4 | Run 'ant' or 'ant build_compress' to generate a JAR file with compression classes only.
5 |
6 | Run 'ant build_lib' to generate a JAR file with all classes in tree excluding tests.
7 |
8 | Run 'ant build_all' to generate a JAR file with all classes in tree including tests.
9 |
10 | For maven, type 'mvn clean install -DskipTests'
11 |
12 | The generated jar file is under 'target'.
13 |
14 |
--------------------------------------------------------------------------------
/java/build.xml:
--------------------------------------------------------------------------------
1 |
2 |
22 | * Classes that implement this interface are expected to provide an implementation 23 | * of the {@code processEvent} method, which will be invoked when an event occurs. 24 | *
25 | * 26 | */ 27 | public interface Listener { 28 | 29 | /** 30 | * Processes the given event. 31 | *32 | * This method will be called whenever an event occurs that the listener is 33 | * interested in. Implementations of this method should define how to handle 34 | * the event. 35 | *
36 | * 37 | * @param evt The event to be processed. Cannot be {@code null}. 38 | */ 39 | public void processEvent(Event evt); 40 | } 41 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/OutputBitStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code OutputBitStream} interface defines methods for writing bits 20 | * to a bit stream. 21 | */ 22 | public interface OutputBitStream { 23 | 24 | /** 25 | * Writes the least significant bit of the input integer to the bit stream. 26 | * 27 | * @param bit the bit to write (0 or 1) 28 | * @throws BitStreamException if the stream is closed or if an error occurs 29 | */ 30 | public void writeBit(int bit) throws BitStreamException; 31 | 32 | /** 33 | * Writes a specified number of bits from the input long value to the bit stream. 34 | * 35 | * @param bits the long value containing the bits to write 36 | * @param length the number of bits to write (must be between 1 and 64) 37 | * @return the number of bits written 38 | * @throws BitStreamException if the stream is closed or if an error occurs 39 | */ 40 | public int writeBits(long bits, int length) throws BitStreamException; 41 | 42 | /** 43 | * Writes bits from a byte array to the bit stream starting at the specified index. 44 | * 45 | * @param bits the byte array containing the bits to write 46 | * @param start the starting index in the byte array 47 | * @param nbBits the number of bits to write 48 | * @return the number of bits written 49 | * @throws BitStreamException if the stream is closed or if an error occurs 50 | */ 51 | public int writeBits(byte[] bits, int start, int nbBits) throws BitStreamException; 52 | 53 | /** 54 | * Closes the bit stream and releases any resources associated with it. 55 | * 56 | * @throws BitStreamException if an error occurs while closing the stream 57 | */ 58 | public void close() throws BitStreamException; 59 | 60 | /** 61 | * Returns the total number of bits that have been written to the stream. 62 | * 63 | * @return the number of bits written 64 | */ 65 | public long written(); 66 | } 67 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/Predictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | /** 19 | * The {@code Predictor} interface is used by a binary entropy coder to 20 | * predict the probabilities of 0 and 1 symbols in the input signal. 21 | * 22 | *Implementations of this interface should maintain a probability model 23 | * that can be updated based on input bits and can provide a split value 24 | * representing the predicted probability of the next bit being 1.
25 | */ 26 | public interface Predictor { 27 | 28 | /** 29 | * Updates the probability model based on the provided bit. 30 | * 31 | * @param bit the bit to update the model with (0 or 1) 32 | */ 33 | public void update(int bit); 34 | 35 | /** 36 | * Returns a split value representing the probability of the next bit being 1. 37 | * The returned value is in the range of [0..4095], where a value of 38 | * 410 roughly corresponds to a probability of 10% for the next bit being 1. 39 | * 40 | * @return the split value representing the probability of 1 41 | */ 42 | public int get(); 43 | } 44 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/SliceByteArray.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | import java.util.Objects; 19 | 20 | 21 | /** 22 | * A lightweight implementation of a byte array slice. 23 | * 24 | *This class provides a way to manage a portion of a byte array, allowing 25 | * for the representation of a subset of the array with a specified length and 26 | * starting index. This can be useful for handling byte data efficiently without 27 | * creating multiple copies.
28 | */ 29 | public final class SliceByteArray { 30 | public byte[] array; // array.length is the slice capacity 31 | public int length; 32 | public int index; 33 | 34 | /** 35 | * Constructs an empty {@code SliceByteArray} with a zero-length array. 36 | */ 37 | public SliceByteArray() { 38 | this(new byte[0], 0, 0); 39 | } 40 | 41 | /** 42 | * Constructs a {@code SliceByteArray} with the specified array and index. 43 | * 44 | * @param array the byte array 45 | * @param idx the starting index of the slice 46 | * @throws NullPointerException if the provided array is null 47 | * @throws NullPointerException if the provided index is negative 48 | */ 49 | public SliceByteArray(byte[] array, int idx) { 50 | if (array == null) 51 | throw new NullPointerException("The array cannot be null"); 52 | if (idx < 0) 53 | throw new NullPointerException("The index cannot be negative"); 54 | 55 | this.array = array; 56 | this.length = array.length; 57 | this.index = idx; 58 | } 59 | 60 | /** 61 | * Constructs a {@code SliceByteArray} with the specified array, length, and index. 62 | * 63 | * @param array the byte array 64 | * @param length the length of the slice 65 | * @param idx the starting index of the slice 66 | * @throws NullPointerException if the provided array is null 67 | * @throws IllegalArgumentException if the provided length is negative 68 | * @throws NullPointerException if the provided index is negative 69 | */ 70 | public SliceByteArray(byte[] array, int length, int idx) { 71 | if (array == null) 72 | throw new NullPointerException("The array cannot be null"); 73 | if (length < 0) 74 | throw new IllegalArgumentException("The length cannot be negative"); 75 | if (idx < 0) 76 | throw new NullPointerException("The index cannot be negative"); 77 | 78 | this.array = array; 79 | this.length = length; 80 | this.index = idx; 81 | } 82 | 83 | @Override 84 | public boolean equals(Object o) { 85 | try { 86 | if (o == null) 87 | return false; 88 | if (this == o) 89 | return true; 90 | 91 | SliceByteArray sa = (SliceByteArray) o; 92 | return (this.array == sa.array) && 93 | (this.length == sa.length) && 94 | (this.index == sa.index); 95 | } catch (ClassCastException e) { 96 | return false; 97 | } 98 | } 99 | 100 | @Override 101 | public int hashCode() { 102 | return Objects.hashCode(this.array); 103 | } 104 | 105 | @Override 106 | @SuppressWarnings("lgtm [java/print-array]") 107 | public String toString() { 108 | StringBuilder builder = new StringBuilder(100); 109 | builder.append("[ data="); 110 | builder.append(String.valueOf(this.array)); 111 | builder.append(", len="); 112 | builder.append(this.length); 113 | builder.append(", idx="); 114 | builder.append(this.index); 115 | builder.append("]"); 116 | return builder.toString(); 117 | } 118 | 119 | /** 120 | * Validates the provided {@code SliceByteArray} instance. 121 | * 122 | * @param sa the {@code SliceByteArray} to validate 123 | * @return {@code true} if the instance is valid, {@code false} otherwise 124 | */ 125 | public static boolean isValid(SliceByteArray sa) { 126 | if (sa == null) 127 | return false; 128 | if (sa.array == null) 129 | return false; 130 | if (sa.index < 0) 131 | return false; 132 | if (sa.length < 0) 133 | return false; 134 | 135 | return (sa.index <= sa.array.length); 136 | } 137 | } 138 | 139 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/SliceIntArray.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi; 17 | 18 | import java.util.Objects; 19 | 20 | /** 21 | * A lightweight implementation of a slice for an integer array. 22 | * 23 | *This class allows for managing a portion of an integer array, providing 24 | * a means to represent a subset of the array with a specified length and 25 | * starting index. This can be useful for efficiently handling integer data 26 | * without creating multiple copies.
27 | */ 28 | public final class SliceIntArray { 29 | public int[] array; // array.length is the slice capacity 30 | public int index; 31 | public int length; 32 | 33 | /** 34 | * Constructs an empty {@code SliceIntArray} with a zero-length array. 35 | */ 36 | public SliceIntArray() { 37 | this(new int[0], 0, 0); 38 | } 39 | 40 | /** 41 | * Constructs a {@code SliceIntArray} with the specified array and index. 42 | * 43 | * @param array the integer array 44 | * @param idx the starting index of the slice 45 | * @throws NullPointerException if the provided array is null 46 | * @throws NullPointerException if the provided index is negative 47 | */ 48 | public SliceIntArray(int[] array, int idx) { 49 | if (array == null) 50 | throw new NullPointerException("The array cannot be null"); 51 | if (idx < 0) 52 | throw new NullPointerException("The index cannot be negative"); 53 | 54 | this.array = array; 55 | this.length = array.length; 56 | this.index = idx; 57 | } 58 | 59 | /** 60 | * Constructs a {@code SliceIntArray} with the specified array, length, and index. 61 | * 62 | * @param array the integer array 63 | * @param length the length of the slice 64 | * @param idx the starting index of the slice 65 | * @throws NullPointerException if the provided array is null 66 | * @throws IllegalArgumentException if the provided length is negative 67 | * @throws NullPointerException if the provided index is negative 68 | */ 69 | public SliceIntArray(int[] array, int length, int idx) { 70 | if (array == null) 71 | throw new NullPointerException("The array cannot be null"); 72 | if (length < 0) 73 | throw new IllegalArgumentException("The length cannot be negative"); 74 | if (idx < 0) 75 | throw new NullPointerException("The index cannot be negative"); 76 | 77 | this.array = array; 78 | this.length = length; 79 | this.index = idx; 80 | } 81 | 82 | @Override 83 | public boolean equals(Object o) { 84 | try { 85 | if (o == null) 86 | return false; 87 | if (this == o) 88 | return true; 89 | 90 | SliceIntArray sa = (SliceIntArray) o; 91 | return (this.array == sa.array) && 92 | (this.length == sa.length) && 93 | (this.index == sa.index); 94 | } catch (ClassCastException e) { 95 | return false; 96 | } 97 | } 98 | 99 | @Override 100 | public int hashCode() { 101 | return Objects.hashCode(this.array); 102 | } 103 | 104 | @Override 105 | @SuppressWarnings("lgtm [java/print-array]") 106 | public String toString() { 107 | StringBuilder builder = new StringBuilder(100); 108 | builder.append("[ data="); 109 | builder.append(String.valueOf(this.array)); 110 | builder.append(", len="); 111 | builder.append(this.length); 112 | builder.append(", idx="); 113 | builder.append(this.index); 114 | builder.append("]"); 115 | return builder.toString(); 116 | } 117 | 118 | /** 119 | * Validates the provided {@code SliceIntArray} instance. 120 | * 121 | * @param sa the {@code SliceIntArray} to validate 122 | * @return {@code true} if the instance is valid, {@code false} otherwise 123 | */ 124 | public static boolean isValid(SliceIntArray sa) { 125 | if (sa == null) 126 | return false; 127 | if (sa.array == null) 128 | return false; 129 | if (sa.index < 0) 130 | return false; 131 | if (sa.length < 0) 132 | return false; 133 | 134 | return (sa.index <= sa.array.length); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/app/InfoPrinter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.app; 17 | 18 | import io.github.flanglet.kanzi.Event; 19 | import java.io.PrintStream; 20 | import java.util.Map; 21 | import java.util.concurrent.ConcurrentHashMap; 22 | import io.github.flanglet.kanzi.Listener; 23 | 24 | /** 25 | * The {@code InfoPrinter} class implements the {@code Listener} interface 26 | * and provides functionality to process events and print information 27 | * about encoding or decoding processes. 28 | */ 29 | public class InfoPrinter implements Listener 30 | { 31 | /** 32 | * Enum representing the type of information to be printed. 33 | */ 34 | public enum Type 35 | { 36 | /** Represents encoding information. */ 37 | ENCODING, 38 | /** Represents decoding information. */ 39 | DECODING 40 | } 41 | 42 | private final PrintStream ps; 43 | private final Map24 | * Implementation of a Context Model based predictor. 25 | * This predictor estimates the probability of the next bit being 1 based on 26 | * a combination of different contexts and adaptive learning rates. 27 | *
28 | * 29 | *30 | * It uses multiple probability counters that are updated based on the 31 | * actual decoded bit, allowing it to adapt to the characteristics of the 32 | * input data. 33 | *
34 | */ 35 | public class CMPredictor implements Predictor { 36 | /** 37 | * The rate at which the fastest probability counter adapts. 38 | */ 39 | private static final int FAST_RATE = 2; 40 | /** 41 | * The rate at which the medium probability counter adapts. 42 | */ 43 | private static final int MEDIUM_RATE = 4; 44 | /** 45 | * The rate at which the slowest probability counter adapts. 46 | */ 47 | private static final int SLOW_RATE = 6; 48 | /** 49 | * The scaling factor for probabilities, representing the maximum possible 50 | * probability value. 51 | */ 52 | private static final int PSCALE = 65536; 53 | 54 | /** 55 | * The first context variable, derived from the previous bit. 56 | */ 57 | private int c1; 58 | /** 59 | * The second context variable, derived from the bit before the previous one. 60 | */ 61 | private int c2; 62 | /** 63 | * The current context, formed by previous bits. 64 | */ 65 | private int ctx; 66 | /** 67 | * An index used for accessing probability counters. 68 | */ 69 | private int idx; 70 | /** 71 | * A mask used to differentiate between run contexts. 72 | */ 73 | private int runMask; 74 | /** 75 | * A 2D array of probability counters, used for general context modeling. 76 | * `counter1[i][j]` stores the probability for context `i` and sub-context `j`. 77 | */ 78 | private final int[][] counter1; 79 | /** 80 | * A 2D array of probability counters, used for more specific context modeling. 81 | * `counter2[i][j]` stores the probability for context `i` and sub-context `j`. 82 | */ 83 | private final int[][] counter2; 84 | /** 85 | * A flag indicating if the bitstream version is 3 or older, which affects 86 | * probability calculation. 87 | */ 88 | private final boolean isBsVersion3; 89 | 90 | /** 91 | * Creates a new {@code CMPredictor}. 92 | *93 | * The predictor is initialized with default probability values and can be 94 | * configured with a context map to handle different bitstream versions. 95 | *
96 | * 97 | * @param ctx A map containing context information for the predictor, 98 | * e.g., "bsVersion" to specify the bitstream version. 99 | */ 100 | public CMPredictor(Map129 | * The internal counters are adjusted based on the provided bit and adaptive 130 | * learning rates. 131 | * The context is also updated for the next prediction. 132 | *
133 | * 134 | * @param bit The actual bit that was decoded (0 or 1). 135 | */ 136 | @Override 137 | public void update(int bit) { 138 | final int[] counter1_ = this.counter1[this.ctx]; 139 | final int[] counter2_ = this.counter2[this.ctx | this.runMask]; 140 | 141 | if (bit == 0) { 142 | counter1_[256] -= (counter1_[256] >> FAST_RATE); 143 | counter1_[this.c1] -= (counter1_[this.c1] >> MEDIUM_RATE); 144 | counter2_[this.idx] -= (counter2_[this.idx] >> SLOW_RATE); 145 | counter2_[this.idx + 1] -= (counter2_[this.idx + 1] >> SLOW_RATE); 146 | this.ctx += this.ctx; 147 | } else { 148 | counter1_[256] -= ((counter1_[256] - PSCALE + 16) >> FAST_RATE); 149 | counter1_[this.c1] -= ((counter1_[this.c1] - PSCALE + 16) >> MEDIUM_RATE); 150 | counter2_[this.idx] -= ((counter2_[this.idx] - PSCALE + 16) >> SLOW_RATE); 151 | counter2_[this.idx + 1] -= ((counter2_[this.idx + 1] - PSCALE + 16) >> SLOW_RATE); 152 | this.ctx += (this.ctx + 1); 153 | } 154 | 155 | if (this.ctx > 255) { 156 | this.c2 = this.c1; 157 | this.c1 = this.ctx & 0xFF; 158 | this.ctx = 1; 159 | this.runMask = (this.c1 == this.c2) ? 0x100 : 0; 160 | } 161 | } 162 | 163 | /** 164 | * Returns the predicted probability of the next bit being 1. 165 | *166 | * The prediction is an integer value in the range [0, 4095], representing the 167 | * split point 168 | * in a range coding scheme. 169 | *
170 | * 171 | * @return The predicted probability of the next bit being 1, scaled to [0, 172 | * 4095]. 173 | */ 174 | @Override 175 | public int get() { 176 | final int[] pc1 = this.counter1[this.ctx]; 177 | final int p = (13 * (pc1[256] + pc1[this.c1]) + 6 * pc1[this.c2]) >> 5; 178 | this.idx = p >>> 12; 179 | final int[] pc2 = this.counter2[this.ctx | this.runMask]; 180 | final int x1 = pc2[this.idx]; 181 | final int x2 = pc2[this.idx + 1]; 182 | 183 | if (this.isBsVersion3 == true) { 184 | final int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12); 185 | return (p + 3 * ssep + 32) >>> 6; // rescale to [0..4095] 186 | } 187 | 188 | return (p + p + 3 * (x1 + x2) + 64) >>> 7; // rescale to [0..4095] 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyDecoder; 19 | import io.github.flanglet.kanzi.InputBitStream; 20 | 21 | // Exponential Golomb Coder 22 | public final class ExpGolombDecoder implements EntropyDecoder { 23 | private final boolean signed; 24 | private final InputBitStream bitstream; 25 | 26 | public ExpGolombDecoder(InputBitStream bitstream, boolean signed) { 27 | if (bitstream == null) 28 | throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter"); 29 | 30 | this.signed = signed; 31 | this.bitstream = bitstream; 32 | } 33 | 34 | public boolean isSigned() { 35 | return this.signed; 36 | } 37 | 38 | public byte decodeByte() { 39 | if (this.bitstream.readBit() == 1) 40 | return 0; 41 | 42 | int log2 = 1; 43 | 44 | while (this.bitstream.readBit() == 0) 45 | log2++; 46 | 47 | if (this.signed == true) { 48 | // Decode signed: read value + sign 49 | long res = this.bitstream.readBits(log2 + 1); 50 | final long sgn = res & 1; 51 | res = (res >>> 1) + (1 << log2) - 1; 52 | return (byte) ((res - sgn) ^ -sgn); // res or -res 53 | } 54 | 55 | // Decode unsigned 56 | return (byte) ((1 << log2) - 1 + this.bitstream.readBits(log2)); 57 | } 58 | 59 | @Override 60 | public InputBitStream getBitStream() { 61 | return this.bitstream; 62 | } 63 | 64 | @Override 65 | /** 66 | * Decodes a block of data by reading it directly from the bitstream. 67 | *68 | * This method reads {@code count} bytes from the bitstream into the provided 69 | * {@code block} array. 70 | *
71 | * 72 | * @param block The byte array to decode into. 73 | * @param blkptr The starting position in the block. 74 | * @param count The number of bytes to decode. 75 | * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid 76 | * parameters). 77 | */ 78 | public int decode(byte[] block, int blkptr, int count) { 79 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 80 | return -1; 81 | 82 | final int end = blkptr + count; 83 | 84 | for (int i = blkptr; i < end; i++) 85 | block[i] = this.decodeByte(); 86 | 87 | return count; 88 | } 89 | 90 | @Override 91 | public void dispose() { 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyEncoder; 19 | import io.github.flanglet.kanzi.OutputBitStream; 20 | 21 | /** 22 | *23 | * Implementation of an Exponential Golomb encoder. 24 | *
25 | * This encoder supports both signed and unsigned encoding of byte values. 26 | * It uses a pre-computed cache for faster encoding of common values. 27 | */ 28 | public final class ExpGolombEncoder implements EntropyEncoder { 29 | private static final int[][] CACHE_VALUES = new int[][] { 30 | // Unsigned 31 | new int[] { 32 | 513, 1538, 1539, 2564, 2565, 2566, 2567, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 4624, 33 | 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 5664, 34 | 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680, 35 | 5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 6720, 36 | 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736, 37 | 6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752, 38 | 6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768, 39 | 6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 7808, 40 | 7809, 7808, 6783, 6782, 6781, 6780, 6779, 6778, 6777, 6776, 6775, 6774, 6773, 6772, 6771, 6770, 41 | 6769, 6768, 6767, 6766, 6765, 6764, 6763, 6762, 6761, 6760, 6759, 6758, 6757, 6756, 6755, 6754, 42 | 6753, 6752, 6751, 6750, 6749, 6748, 6747, 6746, 6745, 6744, 6743, 6742, 6741, 6740, 6739, 6738, 43 | 6737, 6736, 6735, 6734, 6733, 6732, 6731, 6730, 6729, 6728, 6727, 6726, 6725, 6724, 6723, 6722, 44 | 6721, 6720, 5695, 5694, 5693, 5692, 5691, 5690, 5689, 5688, 5687, 5686, 5685, 5684, 5683, 5682, 45 | 5681, 5680, 5679, 5678, 5677, 5676, 5675, 5674, 5673, 5672, 5671, 5670, 5669, 5668, 5667, 5666, 46 | 5665, 5664, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626, 47 | 4625, 4624, 3599, 3598, 3597, 3596, 3595, 3594, 3593, 3592, 2567, 2566, 2565, 2564, 1539, 1538 48 | }, 49 | // Signed 50 | new int[] { 51 | 513, 2052, 2054, 3080, 3082, 3084, 3086, 4112, 4114, 4116, 4118, 4120, 4122, 4124, 4126, 5152, 52 | 5154, 5156, 5158, 5160, 5162, 5164, 5166, 5168, 5170, 5172, 5174, 5176, 5178, 5180, 5182, 6208, 53 | 6210, 6212, 6214, 6216, 6218, 6220, 6222, 6224, 6226, 6228, 6230, 6232, 6234, 6236, 6238, 6240, 54 | 6242, 6244, 6246, 6248, 6250, 6252, 6254, 6256, 6258, 6260, 6262, 6264, 6266, 6268, 6270, 7296, 55 | 7298, 7300, 7302, 7304, 7306, 7308, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326, 7328, 56 | 7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350, 7352, 7354, 7356, 7358, 7360, 57 | 7362, 7364, 7366, 7368, 7370, 7372, 7374, 7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392, 58 | 7394, 7396, 7398, 7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422, 8448, 59 | 8451, 8449, 7423, 7421, 7419, 7417, 7415, 7413, 7411, 7409, 7407, 7405, 7403, 7401, 7399, 7397, 60 | 7395, 7393, 7391, 7389, 7387, 7385, 7383, 7381, 7379, 7377, 7375, 7373, 7371, 7369, 7367, 7365, 61 | 7363, 7361, 7359, 7357, 7355, 7353, 7351, 7349, 7347, 7345, 7343, 7341, 7339, 7337, 7335, 7333, 62 | 7331, 7329, 7327, 7325, 7323, 7321, 7319, 7317, 7315, 7313, 7311, 7309, 7307, 7305, 7303, 7301, 63 | 7299, 7297, 6271, 6269, 6267, 6265, 6263, 6261, 6259, 6257, 6255, 6253, 6251, 6249, 6247, 6245, 64 | 6243, 6241, 6239, 6237, 6235, 6233, 6231, 6229, 6227, 6225, 6223, 6221, 6219, 6217, 6215, 6213, 65 | 6211, 6209, 5183, 5181, 5179, 5177, 5175, 5173, 5171, 5169, 5167, 5165, 5163, 5161, 5159, 5157, 66 | 5155, 5153, 4127, 4125, 4123, 4121, 4119, 4117, 4115, 4113, 3087, 3085, 3083, 3081, 2055, 2053 67 | } 68 | }; 69 | 70 | private final int[] cache; 71 | private final int signed; 72 | private final OutputBitStream bitstream; 73 | 74 | /** 75 | * Creates a new {@code ExpGolombEncoder}. 76 | * 77 | * @param bitstream The {@link OutputBitStream} to write the encoded data to. 78 | * @param signed If {@code true}, the encoder will encode signed values; 79 | * otherwise, unsigned. 80 | * @throws NullPointerException if {@code bitstream} is {@code null}. 81 | */ 82 | public ExpGolombEncoder(OutputBitStream bitstream, boolean signed) { 83 | if (bitstream == null) 84 | throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter"); 85 | 86 | this.signed = (signed == true) ? 1 : 0; 87 | // The cache stores pre-computed values for faster encoding. 88 | // CACHE_VALUES[0] is for unsigned encoding. 89 | // CACHE_VALUES[1] is for signed encoding. 90 | // Each value in the cache is a packed integer: 91 | // - The lower 9 bits (emit & 0x1FF) represent the value to write. 92 | // - The upper bits (emit >>> 9) represent the number of bits to write. 93 | this.cache = CACHE_VALUES[this.signed]; 94 | this.bitstream = bitstream; 95 | } 96 | 97 | public boolean isSigned() { 98 | return this.signed == 1; 99 | } 100 | 101 | /** 102 | * Encodes a block of data. 103 | * 104 | * @param block The byte array containing the data to encode. 105 | * @param blkptr The starting position in the block. 106 | * @param count The number of bytes to encode. 107 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid 108 | * parameters). 109 | */ 110 | @Override 111 | public int encode(byte[] block, int blkptr, int count) { 112 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 113 | return -1; 114 | 115 | final int end = blkptr + count; 116 | 117 | for (int i = blkptr; i < end; i++) 118 | this.encodeByte(block[i]); 119 | 120 | return count; 121 | } 122 | 123 | public void encodeByte(byte val) { 124 | if (val == 0) { 125 | // shortcut when input is 0 126 | this.bitstream.writeBit(1); 127 | return; 128 | } 129 | 130 | final int emit = this.cache[val & 0xFF]; 131 | this.bitstream.writeBits(emit & 0x1FF, emit >>> 9); 132 | } 133 | 134 | @Override 135 | public OutputBitStream getBitStream() { 136 | return this.bitstream; 137 | } 138 | 139 | @Override 140 | public void dispose() { 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/FPAQEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import java.util.Arrays; 19 | import io.github.flanglet.kanzi.EntropyEncoder; 20 | import io.github.flanglet.kanzi.Memory; 21 | import io.github.flanglet.kanzi.OutputBitStream; 22 | import io.github.flanglet.kanzi.SliceByteArray; 23 | 24 | /** 25 | *
26 | * Implementation of an FPAQ encoder. This class is derived from fpaq0r by 27 | * Matt Mahoney and Alexander Ratushnyak, and is a simple (and fast) adaptive 28 | * entropy bit coder. 29 | *
30 | * 31 | *32 | * It uses a range coding approach where the current range is updated based on 33 | * the predicted probability of the next bit. The prediction is based on a 34 | * context formed by previous bits. 35 | *
36 | * 37 | *38 | * The encoding process involves updating the range and normalizing it by 39 | * writing 40 | * bits to an {@link OutputBitStream} when the range becomes too small. 41 | *
42 | * 43 | * @see fpaq0 by Matt Mahoney 44 | */ 45 | public class FPAQEncoder implements EntropyEncoder { 46 | /** 47 | * The top value for the range, used in range coding. 48 | * This value defines the maximum possible range. 49 | */ 50 | private static final long TOP = 0x00FFFFFFFFFFFFFFL; 51 | /** 52 | * A mask used to check if the most significant bits of the low and 53 | * (low + range) values are the same, indicating that bits can be 54 | * shifted out. 55 | */ 56 | private static final long MASK_24_56 = 0x00FFFFFFFF000000L; 57 | /** 58 | * A mask used to keep the lower 24 bits of a long. 59 | */ 60 | private static final long MASK_0_24 = 0x0000000000FFFFFFL; 61 | /** 62 | * A mask used to keep the lower 32 bits of a long. 63 | */ 64 | private static final long MASK_0_32 = 0x00000000FFFFFFFFL; 65 | /** 66 | * The default chunk size for processing data. 67 | */ 68 | private static final int DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; 69 | /** 70 | * The maximum allowed block size. 71 | */ 72 | private static final int MAX_BLOCK_SIZE = 1 << 30; 73 | /** 74 | * The scaling factor for probabilities. 75 | */ 76 | private static final int PSCALE = 65536; 77 | 78 | /** 79 | * The lower bound of the current range. 80 | */ 81 | private long low; 82 | /** 83 | * The upper bound of the current range. 84 | */ 85 | private long high; 86 | /** 87 | * The output bitstream to which compressed data is written. 88 | */ 89 | private final OutputBitStream bitstream; 90 | private boolean disposed; 91 | private SliceByteArray sba; 92 | private final int[][] probs; // probability of bit=1 93 | private int[] p; // pointer to current prob 94 | 95 | /** 96 | * Creates a new {@code FPAQEncoder}. 97 | * 98 | * @param bitstream The {@link OutputBitStream} to write compressed data to. 99 | * @throws NullPointerException if {@code bitstream} is {@code null}. 100 | */ 101 | public FPAQEncoder(OutputBitStream bitstream) { 102 | if (bitstream == null) 103 | throw new NullPointerException("FPAQ codec: Invalid null bitstream parameter"); 104 | 105 | this.low = 0L; 106 | this.high = TOP; 107 | this.bitstream = bitstream; 108 | this.sba = new SliceByteArray(new byte[0], 0); 109 | this.probs = new int[4][256]; 110 | this.p = this.probs[0]; 111 | 112 | for (int i = 0; i < 4; i++) 113 | Arrays.fill(this.probs[i], PSCALE >> 1); 114 | } 115 | 116 | /** 117 | * Encodes a block of data. 118 | *119 | * This method reads data from the provided byte array, encodes it using the 120 | * FPAQ model, and writes the compressed data to the internal bitstream. 121 | *
122 | * 123 | * @param block The byte array containing the data to encode. 124 | * @param blkptr The starting position in the block. 125 | * @param count The number of bytes to encode. 126 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid 127 | * parameters). 128 | */ 129 | @Override 130 | public int encode(byte[] block, int blkptr, int count) { 131 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0) || (count > MAX_BLOCK_SIZE)) 132 | return -1; 133 | 134 | if (count == 0) 135 | return 0; 136 | 137 | int startChunk = blkptr; 138 | final int end = blkptr + count; 139 | 140 | // Split block into chunks, encode chunk and write bit array to bitstream 141 | while (startChunk < end) { 142 | final int chunkSize = Math.min(DEFAULT_CHUNK_SIZE, end - startChunk); 143 | 144 | if (this.sba.array.length < (chunkSize + (chunkSize >> 3))) 145 | this.sba.array = new byte[chunkSize + (chunkSize >> 3)]; 146 | 147 | this.sba.index = 0; 148 | final int endChunk = startChunk + chunkSize; 149 | this.p = this.probs[0]; 150 | 151 | for (int i = startChunk; i < endChunk; i++) { 152 | final byte val = block[i]; 153 | final int bits = (val & 0xFF) + 256; 154 | this.encodeBit(val & 0x80, 1); 155 | this.encodeBit(val & 0x40, bits >> 7); 156 | this.encodeBit(val & 0x20, bits >> 6); 157 | this.encodeBit(val & 0x10, bits >> 5); 158 | this.encodeBit(val & 0x08, bits >> 4); 159 | this.encodeBit(val & 0x04, bits >> 3); 160 | this.encodeBit(val & 0x02, bits >> 2); 161 | this.encodeBit(val & 0x01, bits >> 1); 162 | this.p = this.probs[(val & 0xFF) >>> 6]; 163 | } 164 | 165 | EntropyUtils.writeVarInt(this.bitstream, this.sba.index); 166 | this.bitstream.writeBits(this.sba.array, 0, 8 * this.sba.index); 167 | startChunk += chunkSize; 168 | 169 | if (startChunk < end) 170 | this.bitstream.writeBits(this.low | MASK_0_24, 56); 171 | } 172 | 173 | return count; 174 | } 175 | 176 | /** 177 | * Encodes a single bit based on a given prediction. 178 | *179 | * The range is split according to the prediction, and the bit is encoded by 180 | * updating the range. The probability model for the current context is then 181 | * updated based on the encoded bit. 182 | *
183 | */ 184 | private void encodeBit(int bit, int pIdx) { 185 | // Calculate interval split 186 | // Written in a way to maximize accuracy of multiplication/division 187 | final long split = (((this.high - this.low) >>> 8) * this.p[pIdx]) >>> 8; 188 | 189 | // Update probabilities 190 | if (bit == 0) { 191 | this.low += (split + 1); 192 | this.p[pIdx] -= (this.p[pIdx] >> 6); 193 | } else { 194 | this.high = this.low + split; 195 | this.p[pIdx] -= ((this.p[pIdx] - PSCALE + 64) >> 6); 196 | } 197 | 198 | // Write unchanged first 32 bits to bitstream 199 | while (((this.low ^ this.high) & MASK_24_56) == 0) 200 | this.flush(); 201 | } 202 | 203 | /** 204 | * Flushes the current range to the bitstream. 205 | *206 | * This method is called when the range becomes too small and needs to be 207 | * normalized. It writes the most significant bits of the range to the 208 | * bitstream. 209 | *
210 | */ 211 | private void flush() { 212 | Memory.BigEndian.writeInt32(this.sba.array, this.sba.index, (int) (this.high >>> 24)); 213 | this.sba.index += 4; 214 | this.low <<= 32; 215 | this.high = (this.high << 32) | MASK_0_32; 216 | } 217 | 218 | /** 219 | * Returns the {@link OutputBitStream} used by this encoder. 220 | * 221 | * @return The {@link OutputBitStream}. 222 | */ 223 | @Override 224 | public OutputBitStream getBitStream() { 225 | return this.bitstream; 226 | } 227 | 228 | /** 229 | * Disposes of any resources used by the encoder. 230 | *231 | * This method flushes any remaining bits in the range to the bitstream. 232 | *
233 | */ 234 | @Override 235 | public void dispose() { 236 | if (this.disposed == true) 237 | return; 238 | 239 | this.disposed = true; 240 | this.bitstream.writeBits(this.low | MASK_0_24, 56); 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/FastLogisticAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | import io.github.flanglet.kanzi.Global; 18 | 19 | /** 20 | *21 | * Implementation of an Adaptive Probability Map (APM) with fast logistic 22 | * function. 23 | * This class maps a probability and a context into a new probability that the 24 | * next bit will be 1. 25 | * After each guess, it updates its state to improve future guesses. 26 | *
27 | * 28 | *29 | * It uses a logistic function to squash the prediction and adapts its internal 30 | * probabilities based on the actual bit observed and a learning rate. 31 | *
32 | */ 33 | /* package */ final class FastLogisticAdaptiveProbMap { 34 | /** 35 | * The index into the {@code data} array, representing the last probability and 36 | * context. 37 | */ 38 | private int index; 39 | 40 | /** 41 | * The update rate for adapting probabilities. A smaller rate means faster 42 | * adaptation. 43 | */ 44 | private final int rate; 45 | 46 | /** 47 | * The internal data array storing probabilities for different contexts. 48 | * Each entry is a packed integer representing a probability. 49 | */ 50 | private final int[] data; 51 | 52 | /** 53 | * Creates a new {@code FastLogisticAdaptiveProbMap}. 54 | * 55 | * @param n The number of contexts to support. 56 | * @param rate The update rate for adapting probabilities. 57 | */ 58 | FastLogisticAdaptiveProbMap(int n, int rate) { 59 | this.data = new int[n * 32]; 60 | this.rate = rate; 61 | 62 | for (int j = 0; j < 32; j++) { 63 | this.data[j] = Global.squash((j - 16) << 7) << 4; 64 | } 65 | 66 | for (int i = 1; i < n; i++) { 67 | System.arraycopy(this.data, 0, this.data, i * 32, 32); 68 | } 69 | } 70 | 71 | /** 72 | * Returns an improved prediction given the current bit, prediction, and 73 | * context. 74 | * 75 | * @param bit The actual bit observed (0 or 1). 76 | * @param pr The current prediction (probability of 1). 77 | * @param ctx The current context. 78 | * @return The improved prediction (probability of 1), scaled. 79 | */ 80 | int get(int bit, int pr, int ctx) { 81 | // Update probability based on error and learning rate 82 | final int g = (-bit & 65528) + (bit << this.rate); 83 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 84 | 85 | // Find index: 32*ctx + quantized prediction in [0..32[ 86 | this.index = ((Global.STRETCH[pr] + 2048) >> 7) + (ctx << 5); 87 | return (this.data[this.index]) >> 4; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/HuffmanCommon.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | /** 19 | *20 | * Utility class for common Huffman coding operations. 21 | *
22 | */ 23 | public final class HuffmanCommon { 24 | /** 25 | * The logarithm base 2 of the maximum chunk size. 26 | */ 27 | public static final int LOG_MAX_CHUNK_SIZE = 14; 28 | 29 | /** 30 | * The minimum chunk size for Huffman encoding/decoding. 31 | */ 32 | public static final int MIN_CHUNK_SIZE = 1024; 33 | 34 | /** 35 | * The maximum chunk size for Huffman encoding/decoding. 36 | */ 37 | public static final int MAX_CHUNK_SIZE = 1 << LOG_MAX_CHUNK_SIZE; 38 | 39 | /** 40 | * The maximum symbol size (number of bits) for Huffman codes in bitstream 41 | * version 3. 42 | */ 43 | public static final int MAX_SYMBOL_SIZE_V3 = 14; 44 | 45 | /** 46 | * The maximum symbol size (number of bits) for Huffman codes in bitstream 47 | * version 4. 48 | */ 49 | public static final int MAX_SYMBOL_SIZE_V4 = 12; 50 | 51 | /** 52 | * The size of the internal buffer used for sorting symbols. 53 | */ 54 | private static final int BUFFER_SIZE = (MAX_SYMBOL_SIZE_V3 << 8) + 256; 55 | 56 | /** 57 | * Generates canonical Huffman codes based on the provided symbol sizes. 58 | * Symbols are sorted first by increasing size, then by increasing value. 59 | * 60 | * @param sizes An array where `sizes[symbol]` stores the bit length of 61 | * the Huffman code for that symbol. 62 | * @param codes An array where the generated canonical code for each 63 | * symbol will be stored. 64 | * @param symbols An array containing the symbols to be processed. This 65 | * array will be sorted in place. 66 | * @param count The number of symbols to process. 67 | * @param maxSymbolSize The maximum allowed bit length for any symbol's Huffman 68 | * code. 69 | * @return The number of codes generated (which should be equal to `count`), or 70 | * -1 if an error occurs 71 | * (e.g., invalid symbol or code size). 72 | */ 73 | public static int generateCanonicalCodes(short[] sizes, int[] codes, int[] symbols, 74 | int count, final int maxSymbolSize) { 75 | // Sort symbols by increasing size (first key) and increasing value (second key) 76 | if (count > 1) { 77 | byte[] buf = new byte[BUFFER_SIZE]; 78 | 79 | for (int i = 0; i < count; i++) { 80 | final int s = symbols[i]; 81 | 82 | if (((s & 0xFF) != s) || (sizes[s] > maxSymbolSize)) 83 | return -1; 84 | 85 | buf[((sizes[s] - 1) << 8) | s] = 1; 86 | } 87 | 88 | int n = 0; 89 | 90 | for (int i = 0; i < BUFFER_SIZE; i++) { 91 | if (buf[i] == 0) 92 | continue; 93 | 94 | symbols[n++] = i & 0xFF; 95 | 96 | if (n == count) 97 | break; 98 | } 99 | } 100 | 101 | int code = 0; 102 | int curLen = sizes[symbols[0]]; 103 | 104 | for (int i = 0; i < count; i++) { 105 | final int s = symbols[i]; 106 | code <<= (sizes[s] - curLen); 107 | curLen = sizes[s]; 108 | codes[s] = code; 109 | code++; 110 | } 111 | 112 | return count; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/LinearAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | /** 18 | *19 | * Implementation of an Adaptive Probability Map (APM) with linear 20 | * interpolation. 21 | * This class maps a probability and a context into a new probability that the 22 | * next bit will be 1. After each guess, it updates its state to improve future 23 | * guesses. 24 | *
25 | * 26 | *27 | * It uses linear interpolation to squash the prediction and adapts its internal 28 | * probabilities based on the actual bit observed and a learning rate. 29 | *
30 | */ 31 | /* package */ final class LinearAdaptiveProbMap { 32 | /** 33 | * The index into the {@code data} array, representing the last probability and 34 | * context. 35 | */ 36 | private int index; 37 | 38 | /** 39 | * The update rate for adapting probabilities. A smaller rate means faster 40 | * adaptation. 41 | */ 42 | private final int rate; 43 | 44 | /** 45 | * The internal data array storing probabilities for different contexts. 46 | * Each entry is a packed integer representing a probability. 47 | */ 48 | private final int[] data; 49 | 50 | /** 51 | * Creates a new {@code LinearAdaptiveProbMap}. 52 | * 53 | * @param n The number of contexts to support. 54 | * @param rate The update rate for adapting probabilities. 55 | */ 56 | LinearAdaptiveProbMap(int n, int rate) { 57 | final int size = (n == 0) ? 65 : n * 65; 58 | this.data = new int[size]; 59 | this.rate = rate; 60 | 61 | for (int j = 0; j <= 64; j++) 62 | this.data[j] = (j << 6) << 4; 63 | 64 | for (int i = 1; i < n; i++) 65 | System.arraycopy(this.data, 0, this.data, i * 65, 65); 66 | } 67 | 68 | /** 69 | * Returns an improved prediction given the current bit, prediction, and 70 | * context. 71 | * 72 | * @param bit The actual bit observed (0 or 1). 73 | * @param pr The current prediction (probability of 1). 74 | * @param ctx The current context. 75 | * @return The improved prediction (probability of 1), scaled. 76 | */ 77 | int get(int bit, int pr, int ctx) { 78 | // Update probability based on error and learning rate 79 | final int g = (-bit & 65528) + (bit << this.rate); 80 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 81 | this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate); 82 | 83 | // Find index: 65*ctx + quantized prediction in [0..64] 84 | this.index = (pr >> 6) + (ctx << 6) + ctx; 85 | 86 | // Return interpolated probability 87 | final int w = pr & 127; 88 | return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/LogisticAdaptiveProbMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.entropy; 16 | 17 | import io.github.flanglet.kanzi.Global; 18 | 19 | /** 20 | *21 | * Implementation of an Adaptive Probability Map (APM) with logistic function. 22 | * This class maps a probability and a context into a new probability that the 23 | * next bit will be 1. After each guess, it updates its state to improve future 24 | * guesses. 25 | *
26 | * 27 | *28 | * It uses a logistic function to squash the prediction and adapts its internal 29 | * probabilities based on the actual bit observed and a learning rate. 30 | *
31 | */ 32 | /* package */ final class LogisticAdaptiveProbMap { 33 | /** 34 | * The index into the {@code data} array, representing the last probability and 35 | * context. 36 | */ 37 | private int index; 38 | 39 | /** 40 | * The update rate for adapting probabilities. A smaller rate means faster 41 | * adaptation. 42 | */ 43 | private final int rate; 44 | 45 | /** 46 | * The internal data array storing probabilities for different contexts. 47 | * Each entry is a packed integer representing a probability. 48 | */ 49 | private final int[] data; 50 | 51 | /** 52 | * Creates a new {@code LogisticAdaptiveProbMap}. 53 | * 54 | * @param n The number of contexts to support. 55 | * @param rate The update rate for adapting probabilities. 56 | */ 57 | 58 | LogisticAdaptiveProbMap(int n, int rate) { 59 | final int size = (n == 0) ? 33 : n * 33; 60 | this.data = new int[size]; 61 | this.rate = rate; 62 | 63 | for (int j = 0; j <= 32; j++) 64 | this.data[j] = Global.squash((j - 16) << 7) << 4; 65 | 66 | for (int i = 1; i < n; i++) 67 | System.arraycopy(this.data, 0, this.data, i * 33, 33); 68 | } 69 | 70 | /** 71 | * Returns an improved prediction given the current bit, prediction, and 72 | * context. 73 | * 74 | * @param bit The actual bit observed (0 or 1). 75 | * @param pr The current prediction (probability of 1). 76 | * @param ctx The current context. 77 | * @return The improved prediction (probability of 1), scaled. 78 | */ 79 | int get(int bit, int pr, int ctx) { 80 | // Update probability based on error and learning rate 81 | final int g = (-bit & 65528) + (bit << this.rate); 82 | this.data[this.index] += ((g - this.data[this.index]) >> this.rate); 83 | this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate); 84 | pr = Global.STRETCH[pr]; 85 | 86 | // Find index: 33*ctx + quantized prediction in [0..32] 87 | this.index = ((pr + 2048) >> 7) + (ctx << 5) + ctx; 88 | 89 | // Return interpolated probability 90 | final int w = pr & 127; 91 | return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyDecoder; 19 | import io.github.flanglet.kanzi.InputBitStream; 20 | 21 | 22 | /** 23 | *Null entropy decoder. 24 | * This decoder does not perform any actual decompression; it simply reads 25 | * the data directly from the provided {@link InputBitStream}.
26 | * 27 | *It acts as a pass-through mechanism, useful when no entropy coding 28 | * is applied to the data, or when the data is already in its final form.
29 | */ 30 | public final class NullEntropyDecoder implements EntropyDecoder 31 | { 32 | private final InputBitStream bitstream; 33 | 34 | 35 | /** 36 | * Creates a new {@code NullEntropyDecoder}. 37 | * 38 | * @param bitstream The {@link InputBitStream} to read data from. 39 | * @throws NullPointerException if {@code bitstream} is {@code null}. 40 | */ 41 | public NullEntropyDecoder(InputBitStream bitstream) 42 | { 43 | if (bitstream == null) 44 | throw new NullPointerException("Invalid null bitstream parameter"); 45 | 46 | this.bitstream = bitstream; 47 | } 48 | 49 | /** 50 | * Decodes a block of data by reading it directly from the bitstream. 51 | *52 | * This method reads {@code count} bytes from the bitstream into the provided {@code block} array. 53 | *
54 | * @param block The byte array to decode into. 55 | * @param blkptr The starting position in the block. 56 | * @param count The number of bytes to decode. 57 | * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid parameters). 58 | */ 59 | @Override 60 | public int decode(byte[] block, int blkptr, int count) 61 | { 62 | if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0)) 63 | return -1; 64 | 65 | int res = 0; 66 | 67 | while (count > 0) 68 | { 69 | final int ckSize = (count < 1<<23) ? count : 1<<23; 70 | res += (this.bitstream.readBits(block, blkptr, 8*ckSize) >> 3); 71 | blkptr += ckSize; 72 | count -= ckSize; 73 | } 74 | 75 | return res; 76 | } 77 | 78 | /** 79 | * Decodes a single byte by reading it directly from the bitstream. 80 | * @return The decoded byte. 81 | */ 82 | public byte decodeByte() 83 | { 84 | return (byte) this.bitstream.readBits(8); 85 | } 86 | 87 | /** 88 | * Returns the {@link InputBitStream} used by this decoder. 89 | * @return The {@link InputBitStream}. 90 | */ 91 | @Override 92 | public InputBitStream getBitStream() 93 | { 94 | return this.bitstream; 95 | } 96 | 97 | /** 98 | * Disposes of any resources used by the decoder. 99 | * This method currently does nothing as there are no specific resources to release. 100 | */ 101 | @Override 102 | public void dispose() 103 | { 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.entropy; 17 | 18 | import io.github.flanglet.kanzi.EntropyEncoder; 19 | import io.github.flanglet.kanzi.OutputBitStream; 20 | 21 | 22 | /** 23 | *Null entropy encoder. 24 | * This encoder does not perform any actual compression; it simply writes 25 | * the data directly to the provided {@link OutputBitStream}.
26 | * 27 | *It acts as a pass-through mechanism, useful when no entropy coding 28 | * is applied to the data, or when the data is already in its final form.
29 | */ 30 | public final class NullEntropyEncoder implements EntropyEncoder 31 | { 32 | private final OutputBitStream bitstream; 33 | 34 | 35 | /** 36 | * Creates a new {@code NullEntropyEncoder}. 37 | * @param bitstream The {@link OutputBitStream} to write data to. 38 | * @throws NullPointerException if {@code bitstream} is {@code null}. 39 | */ 40 | public NullEntropyEncoder(OutputBitStream bitstream) 41 | { 42 | if (bitstream == null) 43 | throw new NullPointerException("Invalid null bitstream parameter"); 44 | 45 | this.bitstream = bitstream; 46 | } 47 | 48 | 49 | /** 50 | * Encodes a block of data by writing it directly to the bitstream. 51 | *52 | * This method writes {@code count} bytes from the provided {@code block} array to the bitstream. 53 | *
54 | * @param block The byte array containing the data to encode. 55 | * @param blkptr The starting position in the block. 56 | * @param count The number of bytes to encode. 57 | * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid parameters). 58 | */ 59 | @Override 60 | public int encode(byte[] block, int blkptr, int count) 61 | { 62 | if ((block == null) || (blkptr+count > block.length) || (blkptr < 0) || (count < 0)) 63 | return -1; 64 | 65 | int res = 0; 66 | 67 | while (count > 0) 68 | { 69 | final int ckSize = (count < 1<<23) ? count : 1<<23; 70 | res += (this.bitstream.writeBits(block, blkptr, 8*ckSize) >> 3); 71 | blkptr += ckSize; 72 | count -= ckSize; 73 | } 74 | 75 | return res; 76 | } 77 | 78 | 79 | /** 80 | * Encodes a single byte by writing it directly to the bitstream. 81 | * @param val The byte to encode. 82 | */ 83 | public void encodeByte(byte val) 84 | { 85 | this.bitstream.writeBits(val, 8); 86 | } 87 | 88 | 89 | /** 90 | * Returns the {@link OutputBitStream} used by this encoder. 91 | * @return The {@link OutputBitStream}. 92 | */ 93 | @Override 94 | public OutputBitStream getBitStream() 95 | { 96 | return this.bitstream; 97 | } 98 | 99 | 100 | /** 101 | * Disposes of any resources used by the encoder. 102 | * This method currently does nothing as there are no specific resources to release. 103 | */ 104 | @Override 105 | public void dispose() 106 | { 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/io/IOException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.io; 17 | 18 | 19 | /** 20 | * Custom exception class that extends {@link java.io.IOException}. 21 | * This exception includes an error code to provide more specific information 22 | * about the nature of the I/O error that occurred. 23 | */ 24 | public class IOException extends java.io.IOException { 25 | private static final long serialVersionUID = -9153775235137373283L; 26 | 27 | private final int code; 28 | 29 | /** 30 | * Constructs a new {@code IOException} with the specified detail message 31 | * and error code. 32 | * 33 | * @param msg the detail message explaining the reason for the exception 34 | * @param code an integer error code that provides additional context about the error 35 | */ 36 | public IOException(String msg, int code) { 37 | super(msg); 38 | this.code = code; 39 | } 40 | 41 | /** 42 | * Returns the error code associated with this exception. 43 | * 44 | * @return the error code indicating the type of I/O error 45 | */ 46 | public int getErrorCode() { 47 | return this.code; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/io/IOUtil.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2011-2025 Frederic Langlet 2 | Licensed under the Apache License, Version 2.0 (the "License"); 3 | you may not use this file except in compliance with the License. 4 | you may obtain a copy of the License at 5 | 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. 13 | */ 14 | 15 | package io.github.flanglet.kanzi.io; 16 | 17 | import java.io.File; 18 | import java.io.IOException; 19 | import java.nio.file.DirectoryIteratorException; 20 | import java.nio.file.DirectoryStream; 21 | import java.nio.file.Files; 22 | import java.nio.file.Path; 23 | import java.nio.file.Paths; 24 | import java.util.List; 25 | 26 | 27 | /** 28 | * Utility class for performing I/O operations related to file management. 29 | */ 30 | public class IOUtil { 31 | 32 | /** 33 | * Creates a list of files from the specified target path. The method can 34 | * traverse directories recursively and can ignore symbolic links and 35 | * dot files based on the provided flags. 36 | * 37 | * @param target the target path from which to list files 38 | * @param files the list to populate with found file paths 39 | * @param isRecursive flag indicating whether to search directories recursively 40 | * @param ignoreLinks flag indicating whether to ignore symbolic links 41 | * @param ignoreDotFiles flag indicating whether to ignore dot files (files starting with a dot) 42 | * @throws IOException if an I/O error occurs or the target path is invalid 43 | */ 44 | public static void createFileList(String target, List27 | * BWT stream format: Header (mode + primary index(es)) | Data (n bytes) 28 | *
Note: This class is not thread-safe due to the mutable state of its breakpoints list.
28 | */ 29 | public class LyndonWords { 30 | 31 | // List of breakpoints for the Lyndon words 32 | private final ListThis method uses the Chen-Fox algorithm to find the breakpoints where the string 45 | * can be split into Lyndon words. It is not thread-safe.
46 | * 47 | * @param buf the byte array representing the string 48 | * @param length the length of the byte array 49 | * @return a list of breakpoints where Lyndon words occur 50 | */ 51 | private ListThis method demonstrates the use of the {@code split} method to split a string into Lyndon words.
156 | * 157 | * @param args command-line arguments (not used) 158 | */ 159 | public static void main(String[] args) { 160 | String[] ss = new LyndonWords().split("TO_BE_OR_NOT_TO_BE"); 161 | 162 | // Print the resulting Lyndon words 163 | for (String s : ss) { 164 | System.out.println(s); 165 | } 166 | } 167 | } 168 | 169 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash32.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.hash; 17 | 18 | 19 | import io.github.flanglet.kanzi.Memory; 20 | 21 | /** 22 | * XXHash32 is an implementation of the 32-bit variant of the XXHash algorithm, 23 | * which is a fast non-cryptographic hash function. It is designed for high-speed 24 | * hashing, commonly used in applications where performance is critical, such as 25 | * checksums, hash tables, and data integrity verification. 26 | * Port to Java of the original source code: https://github.com/Cyan4973/xxHash 27 | * 28 | *XXHash32 uses a sequence of rounds with constant mixing primes to process 29 | * the input data and produce a 32-bit hash value. This class allows for an 30 | * optional user-defined seed, providing a degree of variability in the output. 31 | */ 32 | public class XXHash32 { 33 | 34 | // Constants used in the hashing algorithm 35 | private static final int PRIME32_1 = -1640531535; 36 | private static final int PRIME32_2 = -2048144777; 37 | private static final int PRIME32_3 = -1028477379; 38 | private static final int PRIME32_4 = 668265263; 39 | private static final int PRIME32_5 = 374761393; 40 | 41 | // The seed used for hashing 42 | private int seed; 43 | 44 | /** 45 | * Default constructor that initializes the hash function with a seed based on 46 | * the current system time in nanoseconds. 47 | */ 48 | public XXHash32() { 49 | this((int) (System.nanoTime())); 50 | } 51 | 52 | /** 53 | * Constructs an XXHash32 instance with a specified seed. 54 | * 55 | * @param seed The seed value to be used in the hash computation. 56 | */ 57 | public XXHash32(int seed) { 58 | this.seed = seed; 59 | } 60 | 61 | /** 62 | * Sets the seed value for the hash computation. This allows for custom seed values 63 | * to modify the output hash. 64 | * 65 | * @param seed The new seed value. 66 | */ 67 | public void setSeed(int seed) { 68 | this.seed = seed; 69 | } 70 | 71 | /** 72 | * Computes the 32-bit hash of the provided byte array. 73 | * This method uses the entire byte array, starting from index 0. 74 | * 75 | * @param data The byte array to be hashed. 76 | * @return The 32-bit hash value of the input data. 77 | */ 78 | public int hash(byte[] data) { 79 | return this.hash(data, 0, data.length); 80 | } 81 | 82 | /** 83 | * Computes the 32-bit hash of the provided byte array, with the option to specify 84 | * an offset and length of the data to be used. 85 | * 86 | * @param data The byte array to be hashed. 87 | * @param offset The starting index within the byte array. 88 | * @param length The number of bytes to hash. 89 | * @return The 32-bit hash value of the input data. 90 | */ 91 | public int hash(byte[] data, int offset, int length) { 92 | final int end = offset + length; 93 | int h32; 94 | int idx = offset; 95 | 96 | if (length >= 16) { 97 | final int end16 = end - 16; 98 | int v1 = this.seed + PRIME32_1 + PRIME32_2; 99 | int v2 = this.seed + PRIME32_2; 100 | int v3 = this.seed; 101 | int v4 = this.seed - PRIME32_1; 102 | 103 | // Process 16-byte blocks 104 | do { 105 | v1 = round(v1, Memory.LittleEndian.readInt32(data, idx)); 106 | v2 = round(v2, Memory.LittleEndian.readInt32(data, idx + 4)); 107 | v3 = round(v3, Memory.LittleEndian.readInt32(data, idx + 8)); 108 | v4 = round(v4, Memory.LittleEndian.readInt32(data, idx + 12)); 109 | idx += 16; 110 | } while (idx <= end16); 111 | 112 | h32 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) + 113 | ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14)); 114 | } else { 115 | h32 = this.seed + PRIME32_5; 116 | } 117 | 118 | h32 += length; 119 | 120 | // Process remaining data (less than 16 bytes) 121 | while (idx <= end - 4) { 122 | h32 += (Memory.LittleEndian.readInt32(data, idx) * PRIME32_3); 123 | h32 = ((h32 << 17) | (h32 >>> 15)) * PRIME32_4; 124 | idx += 4; 125 | } 126 | 127 | while (idx < end) { 128 | h32 += ((data[idx] & 0xFF) * PRIME32_5); 129 | h32 = ((h32 << 11) | (h32 >>> 21)) * PRIME32_1; 130 | idx++; 131 | } 132 | 133 | // Finalization step 134 | h32 ^= (h32 >>> 15); 135 | h32 *= PRIME32_2; 136 | h32 ^= (h32 >>> 13); 137 | h32 *= PRIME32_3; 138 | return h32 ^ (h32 >>> 16); 139 | } 140 | 141 | /** 142 | * Performs a single round of mixing for the hash value. 143 | * 144 | * @param acc The accumulator value to be mixed. 145 | * @param val The value to be mixed with the accumulator. 146 | * @return The new mixed accumulator value. 147 | */ 148 | private static int round(int acc, int val) { 149 | acc += (val * PRIME32_2); 150 | return ((acc << 13) | (acc >>> 19)) * PRIME32_1; 151 | } 152 | } 153 | 154 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash64.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.hash; 17 | 18 | import io.github.flanglet.kanzi.Memory; 19 | 20 | /** 21 | * XXHash64 is an implementation of the 64-bit variant of the XXHash algorithm, 22 | * which is a fast non-cryptographic hash function. It is designed for high-speed 23 | * hashing, and is widely used for checksums and hashing large amounts of data. 24 | * This class allows for a configurable seed value, and provides methods for 25 | * hashing byte arrays of various lengths. 26 | * Port to Java of the original source code: https://github.com/Cyan4973/xxHash 27 | * 28 | *
The algorithm processes the input data in blocks and uses a combination of 29 | * mix functions and bitwise operations to produce a hash value. It is optimized 30 | * for 64-bit platforms and can be used for general-purpose hashing where 31 | * cryptographic security is not a concern. 32 | * 33 | */ 34 | public class XXHash64 { 35 | 36 | // Constants used in the hashing algorithm 37 | private static final long PRIME64_1 = 0x9E3779B185EBCA87L; 38 | private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL; 39 | private static final long PRIME64_3 = 0x165667B19E3779F9L; 40 | private static final long PRIME64_4 = 0x85EBCA77C2B2AE63L; 41 | private static final long PRIME64_5 = 0x27D4EB2F165667C5L; 42 | 43 | // The seed used for hashing 44 | private long seed; 45 | 46 | /** 47 | * Default constructor that initializes the hash function with a seed based on the 48 | * current system time in nanoseconds. 49 | */ 50 | public XXHash64() { 51 | this(System.nanoTime()); 52 | } 53 | 54 | /** 55 | * Constructs an XXHash64 instance with a specified seed. 56 | * 57 | * @param seed The seed value to be used in the hash computation. 58 | */ 59 | public XXHash64(long seed) { 60 | this.seed = seed; 61 | } 62 | 63 | /** 64 | * Sets the seed value for the hash computation. This allows for custom seed values 65 | * to modify the output hash. 66 | * 67 | * @param seed The new seed value. 68 | */ 69 | public void setSeed(long seed) { 70 | this.seed = seed; 71 | } 72 | 73 | /** 74 | * Computes the 64-bit hash of the provided byte array. 75 | * This method uses the entire byte array, starting from index 0. 76 | * 77 | * @param data The byte array to be hashed. 78 | * @return The 64-bit hash value of the input data. 79 | */ 80 | public long hash(byte[] data) { 81 | return this.hash(data, 0, data.length); 82 | } 83 | 84 | /** 85 | * Computes the 64-bit hash of the provided byte array, with the option to specify 86 | * an offset and length of the data to be used. 87 | * 88 | * @param data The byte array to be hashed. 89 | * @param offset The starting index within the byte array. 90 | * @param length The number of bytes to hash. 91 | * @return The 64-bit hash value of the input data. 92 | */ 93 | public long hash(byte[] data, int offset, int length) { 94 | final int end = offset + length; 95 | long h64; 96 | int idx = offset; 97 | 98 | if (length >= 32) { 99 | final int end32 = end - 32; 100 | long v1 = this.seed + PRIME64_1 + PRIME64_2; 101 | long v2 = this.seed + PRIME64_2; 102 | long v3 = this.seed; 103 | long v4 = this.seed - PRIME64_1; 104 | 105 | // Process 32-byte blocks 106 | do { 107 | v1 = round(v1, Memory.LittleEndian.readLong64(data, idx)); 108 | v2 = round(v2, Memory.LittleEndian.readLong64(data, idx + 8)); 109 | v3 = round(v3, Memory.LittleEndian.readLong64(data, idx + 16)); 110 | v4 = round(v4, Memory.LittleEndian.readLong64(data, idx + 24)); 111 | idx += 32; 112 | } while (idx <= end32); 113 | 114 | h64 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) + 115 | ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14)); 116 | 117 | // Finalization 118 | h64 = mergeRound(h64, v1); 119 | h64 = mergeRound(h64, v2); 120 | h64 = mergeRound(h64, v3); 121 | h64 = mergeRound(h64, v4); 122 | } else { 123 | h64 = this.seed + PRIME64_5; 124 | } 125 | 126 | h64 += length; 127 | 128 | // Process remaining data (less than 32 bytes) 129 | while (idx + 8 <= end) { 130 | h64 ^= round(0, Memory.LittleEndian.readLong64(data, idx)); 131 | h64 = ((h64 << 27) | (h64 >>> 37)) * PRIME64_1 + PRIME64_4; 132 | idx += 8; 133 | } 134 | 135 | while (idx + 4 <= end) { 136 | h64 ^= (Memory.LittleEndian.readInt32(data, idx) * PRIME64_1); 137 | h64 = ((h64 << 23) | (h64 >>> 41)) * PRIME64_2 + PRIME64_3; 138 | idx += 4; 139 | } 140 | 141 | while (idx < end) { 142 | h64 ^= ((data[idx] & 0xFF) * PRIME64_5); 143 | h64 = ((h64 << 11) | (h64 >>> 53)) * PRIME64_1; 144 | idx++; 145 | } 146 | 147 | // Finalization step 148 | h64 ^= (h64 >>> 33); 149 | h64 *= PRIME64_2; 150 | h64 ^= (h64 >>> 29); 151 | h64 *= PRIME64_3; 152 | return h64 ^ (h64 >>> 32); 153 | } 154 | 155 | /** 156 | * Performs a single round of mixing for the hash value. 157 | * 158 | * @param acc The accumulator value to be mixed. 159 | * @param val The value to be mixed with the accumulator. 160 | * @return The new mixed accumulator value. 161 | */ 162 | private static long round(long acc, long val) { 163 | acc += (val * PRIME64_2); 164 | return ((acc << 31) | (acc >>> 33)) * PRIME64_1; 165 | } 166 | 167 | /** 168 | * Merges an additional value into the accumulator during the finalization phase. 169 | * 170 | * @param acc The current accumulator value. 171 | * @param val The value to be merged into the accumulator. 172 | * @return The updated accumulator value. 173 | */ 174 | private static long mergeRound(long acc, long val) { 175 | acc ^= round(0, val); 176 | return acc * PRIME64_1 + PRIME64_4; 177 | } 178 | } 179 | 180 | 181 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/BucketSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ByteSorter; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code BucketSort} class provides an implementation of the bucket sort algorithm for sorting integers and bytes. 23 | * Bucket sort is a simple and efficient sorting algorithm that works by distributing elements into a number of buckets, 24 | * then sorting the individual buckets. This implementation is optimized to handle small integer and byte values. 25 | * 26 | *
It is a simplified form of radix sort with buckets of width one, making it efficient for small integers (up to 0xFFFF).
27 | *This implementation is not thread-safe due to the mutable state of its internal data structures.
28 | */ 29 | public class BucketSort implements IntSorter, ByteSorter { 30 | 31 | // Array to store the count of each value within the bucket range 32 | private final int[] count; 33 | 34 | /** 35 | * Constructs a {@code BucketSort} object using the default bucket size for byte values (0 to 255). 36 | */ 37 | public BucketSort() { 38 | this.count = new int[256]; 39 | } 40 | 41 | /** 42 | * Constructs a {@code BucketSort} object with a custom bucket size determined by the logarithm of the maximum value. 43 | * 44 | * @param logMaxValue the logarithm (base 2) of the maximum value to be sorted. 45 | * Must be between 2 and 16 (inclusive). 46 | * @throws IllegalArgumentException if the {@code logMaxValue} is less than 2 or greater than 16. 47 | */ 48 | public BucketSort(int logMaxValue) { 49 | if (logMaxValue < 2) 50 | throw new IllegalArgumentException("The log data size parameter must be at least 2"); 51 | 52 | if (logMaxValue > 16) 53 | throw new IllegalArgumentException("The log data size parameter must be at most 16"); 54 | 55 | this.count = new int[1 << logMaxValue]; // Array size determined by the max value (logMaxValue) 56 | } 57 | 58 | /** 59 | * Sorts an array of integers using the bucket sort algorithm. 60 | * 61 | *The sorting works by counting the frequency of each integer in the input array, then placing the integers back into 62 | * the array in sorted order.
63 | * 64 | * @param input the array of integers to be sorted. 65 | * @param blkptr the starting index in the array to begin sorting. 66 | * @param len the length of the portion of the array to be sorted. 67 | * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters (e.g., 68 | * out-of-bounds indices or invalid length). 69 | */ 70 | @Override 71 | public boolean sort(int[] input, int blkptr, int len) { 72 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 73 | return false; 74 | 75 | if (len == 1) 76 | return true; 77 | 78 | final int len8 = len & -8; // Round down to the nearest multiple of 8 79 | final int end8 = blkptr + len8; 80 | final int[] c = this.count; // Bucket count array 81 | final int length = c.length; 82 | 83 | // Unrolled loop for efficient counting 84 | for (int i = blkptr; i < end8; i += 8) { 85 | c[input[i]]++; 86 | c[input[i + 1]]++; 87 | c[input[i + 2]]++; 88 | c[input[i + 3]]++; 89 | c[input[i + 4]]++; 90 | c[input[i + 5]]++; 91 | c[input[i + 6]]++; 92 | c[input[i + 7]]++; 93 | } 94 | 95 | // Handle remaining elements not divisible by 8 96 | for (int i = len8; i < len; i++) 97 | c[input[blkptr + i]]++; 98 | 99 | // Reconstruct the sorted array using the bucket counts 100 | for (int i = 0, j = blkptr; i < length; i++) { 101 | final int val = c[i]; 102 | 103 | if (val == 0) 104 | continue; 105 | 106 | c[i] = 0; 107 | int val8 = val & -8; 108 | 109 | for (int k = val; k > val8; k--) 110 | input[j++] = i; 111 | 112 | // Fill the remaining spots using the "8 at a time" optimization 113 | while (val8 > 0) { 114 | input[j] = i; 115 | input[j + 1] = i; 116 | input[j + 2] = i; 117 | input[j + 3] = i; 118 | input[j + 4] = i; 119 | input[j + 5] = i; 120 | input[j + 6] = i; 121 | input[j + 7] = i; 122 | j += 8; 123 | val8 -= 8; 124 | } 125 | } 126 | 127 | return true; 128 | } 129 | 130 | /** 131 | * Sorts an array of bytes using the bucket sort algorithm. 132 | * 133 | *This method behaves similarly to the integer sort method, but operates on byte values (0 to 255).
134 | * 135 | * @param input the array of bytes to be sorted. 136 | * @param blkptr the starting index in the array to begin sorting. 137 | * @param len the length of the portion of the array to be sorted. 138 | * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters ( 139 | * out-of-bounds indices or invalid length). 140 | */ 141 | @Override 142 | public boolean sort(byte[] input, int blkptr, int len) { 143 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 144 | return false; 145 | 146 | if (len == 1) 147 | return true; 148 | 149 | final int len8 = len & -8; // Round down to the nearest multiple of 8 150 | final int end8 = blkptr + len8; 151 | final int[] c = this.count; // Bucket count array 152 | final int length = c.length; 153 | 154 | // Unrolled loop for efficient counting 155 | for (int i = blkptr; i < end8; i += 8) { 156 | c[input[i] & 0xFF]++; 157 | c[input[i + 1] & 0xFF]++; 158 | c[input[i + 2] & 0xFF]++; 159 | c[input[i + 3] & 0xFF]++; 160 | c[input[i + 4] & 0xFF]++; 161 | c[input[i + 5] & 0xFF]++; 162 | c[input[i + 6] & 0xFF]++; 163 | c[input[i + 7] & 0xFF]++; 164 | } 165 | 166 | // Handle remaining elements not divisible by 8 167 | for (int i = len8; i < len; i++) 168 | c[input[blkptr + i] & 0xFF]++; 169 | 170 | // Reconstruct the sorted array using the bucket counts 171 | for (int i = 0, j = blkptr; i < length; i++) { 172 | final int val = c[i]; 173 | 174 | if (val == 0) 175 | continue; 176 | 177 | int val8 = val & -8; 178 | c[i] = 0; 179 | 180 | for (int k = val; k > val8; k--) 181 | input[j++] = (byte) i; 182 | 183 | // Fill the remaining spots using the "8 at a time" optimization 184 | while (val8 > 0) { 185 | input[j] = (byte) i; 186 | input[j + 1] = (byte) i; 187 | input[j + 2] = (byte) i; 188 | input[j + 3] = (byte) i; 189 | input[j + 4] = (byte) i; 190 | input[j + 5] = (byte) i; 191 | input[j + 6] = (byte) i; 192 | input[j + 7] = (byte) i; 193 | j += 8; 194 | val8 -= 8; 195 | } 196 | } 197 | 198 | return true; 199 | } 200 | } 201 | 202 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/DefaultArrayComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | 20 | /** 21 | * A comparator for comparing elements in an integer array. This class implements the {@link ArrayComparator} interface 22 | * and provides a mechanism to compare two elements based on their values. The comparison also accounts for stable sorting 23 | * by considering their indices when the values are equal. 24 | * 25 | *This class is immutable and thread-safe as it holds a reference to the input array but does not modify it.
26 | * 27 | *Example usage:
28 | *
29 | * int[] array = { 5, 2, 8, 1 };
30 | * DefaultArrayComparator comparator = new DefaultArrayComparator(array);
31 | * int result = comparator.compare(0, 1); // Compares array[0] (5) and array[1] (2)
32 | *
33 | *
34 | * @see ArrayComparator
35 | */
36 | public final class DefaultArrayComparator implements ArrayComparator {
37 |
38 | private final int[] array;
39 |
40 | /**
41 | * Constructs a new {@code DefaultArrayComparator} using the specified integer array.
42 | *
43 | * @param array the array to compare elements in; must not be {@code null}
44 | * @throws NullPointerException if the provided array is {@code null}
45 | */
46 | public DefaultArrayComparator(int[] array) {
47 | if (array == null)
48 | throw new NullPointerException("Invalid null array parameter");
49 |
50 | this.array = array;
51 | }
52 |
53 | /**
54 | * Compares two elements of the array at the specified indices.
55 | * 56 | * The comparison is based on the values of the elements at the provided indices. If the values are equal, 57 | * the method returns a comparison based on their indices to maintain stability in sorting. 58 | *
59 | * 60 | * @param lidx the index of the first element to compare 61 | * @param ridx the index of the second element to compare 62 | * @return a negative integer if the element at {@code lidx} is less than the element at {@code ridx}, 63 | * a positive integer if the element at {@code lidx} is greater than the element at {@code ridx}, 64 | * or zero if they are equal 65 | */ 66 | @Override 67 | public int compare(int lidx, int ridx) { 68 | int res = this.array[lidx] - this.array[ridx]; 69 | 70 | // Make the sort stable 71 | if (res == 0) 72 | res = lidx - ridx; 73 | 74 | return res; 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/HeapSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code HeapSort} class implements the heap sort algorithm, a comparison-based sorting algorithm with an average and 23 | * worst-case time complexity of O(n log n). 24 | * 25 | *Heap sort works by first building a binary heap from the input data, and then repeatedly extracting the maximum 26 | * (or minimum) element from the heap and reconstructing the heap. Although heap sort has O(n log n) time complexity, it is 27 | * often slower in practice compared to other O(n log n) algorithms such as QuickSort, due to larger constant factors.
28 | * 29 | *This implementation allows an optional custom comparator to be used for comparing array elements. If no comparator is 30 | * provided, the natural ordering of the elements is used.
31 | * 32 | *This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.
33 | */ 34 | public final class HeapSort implements IntSorter { 35 | 36 | // Comparator used for comparing elements in the array 37 | private final ArrayComparator cmp; 38 | 39 | /** 40 | * Constructs a {@code HeapSort} instance without a custom comparator. 41 | * This will use the natural ordering of the elements in the array. 42 | */ 43 | public HeapSort() { 44 | this(null); 45 | } 46 | 47 | /** 48 | * Constructs a {@code HeapSort} instance with the specified comparator. 49 | * If {@code cmp} is {@code null}, the natural ordering of the elements will be used. 50 | * 51 | * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering. 52 | */ 53 | public HeapSort(ArrayComparator cmp) { 54 | this.cmp = cmp; 55 | } 56 | 57 | /** 58 | * Returns the comparator used by this {@code HeapSort} instance. 59 | * 60 | * @return the comparator used for element comparisons, or {@code null} if natural ordering is used. 61 | */ 62 | protected ArrayComparator getComparator() { 63 | return this.cmp; 64 | } 65 | 66 | /** 67 | * Sorts the specified portion of the input array using the heap sort algorithm. 68 | * 69 | *The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged 70 | * in-place, and the elements will be sorted in ascending order.
71 | * 72 | * @param input the array to be sorted. 73 | * @param blkptr the starting index of the portion to be sorted. 74 | * @param len the number of elements to sort. 75 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices). 76 | */ 77 | @Override 78 | public boolean sort(int[] input, int blkptr, int len) { 79 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 80 | return false; 81 | 82 | if (len == 1) 83 | return true; 84 | 85 | // Build the heap by calling doSort on all non-leaf nodes 86 | for (int k = len >> 1; k > 0; k--) { 87 | doSort(input, blkptr, k, len, this.cmp); 88 | } 89 | 90 | // Repeatedly extract the maximum element and reconstruct the heap 91 | for (int i = len - 1; i > 0; i--) { 92 | final int temp = input[blkptr]; 93 | input[blkptr] = input[blkptr + i]; 94 | input[blkptr + i] = temp; 95 | doSort(input, blkptr, 1, i, this.cmp); 96 | } 97 | 98 | return true; 99 | } 100 | 101 | /** 102 | * Performs a single heap sort operation on the portion of the array specified by {@code blkptr}, {@code idx}, and {@code count}. 103 | * This method ensures that the subtree rooted at {@code idx} is a valid heap. 104 | * 105 | * @param array the array to be sorted. 106 | * @param blkptr the starting index of the array to be sorted. 107 | * @param idx the index of the current node to heapify. 108 | * @param count the total number of elements in the heap. 109 | * @param cmp the comparator used for comparisons, or {@code null} to use natural ordering. 110 | */ 111 | private static void doSort(int[] array, int blkptr, int idx, int count, ArrayComparator cmp) { 112 | int k = idx; 113 | final int temp = array[blkptr + k - 1]; 114 | final int n = count >> 1; // Half the size of the heap 115 | 116 | // If a custom comparator is provided, use it for comparison 117 | if (cmp != null) { 118 | while (k <= n) { 119 | int j = k << 1; // Left child 120 | 121 | // If right child exists and is larger, use it instead 122 | if ((j < count) && (cmp.compare(array[blkptr + j - 1], array[blkptr + j]) < 0)) { 123 | j++; 124 | } 125 | 126 | // If the current node is larger than its child, break out of the loop 127 | if (temp >= array[blkptr + j - 1]) { 128 | break; 129 | } 130 | 131 | // Move the child up to the parent node 132 | array[blkptr + k - 1] = array[blkptr + j - 1]; 133 | k = j; 134 | } 135 | } 136 | // If no comparator is provided, use natural ordering (ascending order) 137 | else { 138 | while (k <= n) { 139 | int j = k << 1; // Left child 140 | 141 | // If right child exists and is larger, use it instead 142 | if ((j < count) && (array[blkptr + j - 1] < array[blkptr + j])) { 143 | j++; 144 | } 145 | 146 | // If the current node is larger than its child, break out of the loop 147 | if (temp >= array[blkptr + j - 1]) { 148 | break; 149 | } 150 | 151 | // Move the child up to the parent node 152 | array[blkptr + k - 1] = array[blkptr + j - 1]; 153 | k = j; 154 | } 155 | } 156 | 157 | // Place the original element in the correct position 158 | array[blkptr + k - 1] = temp; 159 | } 160 | } 161 | 162 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/InsertionSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package io.github.flanglet.kanzi.util.sort; 17 | 18 | import io.github.flanglet.kanzi.ArrayComparator; 19 | import io.github.flanglet.kanzi.IntSorter; 20 | 21 | /** 22 | * The {@code InsertionSort} class implements the insertion sort algorithm, a simple comparison-based sorting algorithm with 23 | * a worst-case time complexity of O(n²) and an average-case complexity of O(n+k), where k is the number of inversions. 24 | * This algorithm is efficient for small data sets or nearly sorted data, but is not suitable for large datasets due to its 25 | * quadratic time complexity. 26 | * 27 | *Insertion sort works by iterating through the array and repeatedly inserting each element into its correct position 28 | * relative to the elements before it. The algorithm performs well when the data is already nearly sorted, making it ideal for 29 | * small datasets or nearly sorted data.
30 | * 31 | *This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.
32 | */ 33 | public class InsertionSort implements IntSorter { 34 | 35 | // Comparator used for comparing elements in the array 36 | private final ArrayComparator cmp; 37 | 38 | /** 39 | * Constructs an {@code InsertionSort} instance without a custom comparator. 40 | * This will use the natural ordering of the elements in the array. 41 | */ 42 | public InsertionSort() { 43 | this(null); 44 | } 45 | 46 | /** 47 | * Constructs an {@code InsertionSort} instance with the specified comparator. 48 | * If {@code cmp} is {@code null}, the natural ordering of the elements will be used. 49 | * 50 | * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering. 51 | */ 52 | public InsertionSort(ArrayComparator cmp) { 53 | this.cmp = cmp; 54 | } 55 | 56 | /** 57 | * Returns the comparator used by this {@code InsertionSort} instance. 58 | * 59 | * @return the comparator used for element comparisons, or {@code null} if natural ordering is used. 60 | */ 61 | protected ArrayComparator getComparator() { 62 | return this.cmp; 63 | } 64 | 65 | /** 66 | * Sorts the specified portion of the input array using the insertion sort algorithm. 67 | * 68 | *The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged 69 | * in-place, and the elements will be sorted in ascending order.
70 | * 71 | * @param input the array to be sorted. 72 | * @param blkptr the starting index of the portion to be sorted. 73 | * @param len the number of elements to sort. 74 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (e.g., out-of-bounds indices). 75 | */ 76 | @Override 77 | public boolean sort(int[] input, int blkptr, int len) { 78 | if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length)) 79 | return false; 80 | 81 | if (len == 1) 82 | return true; 83 | 84 | // If no comparator is provided, sort using natural ordering 85 | if (this.cmp == null) 86 | sortNoComparator(input, blkptr, blkptr + len); 87 | else 88 | sortWithComparator(input, blkptr, blkptr + len, this.cmp); 89 | 90 | return true; 91 | } 92 | 93 | /** 94 | * Performs the insertion sort on the array using the provided comparator. 95 | * This method handles the sorting for small sub-arrays and larger arrays. 96 | * 97 | * @param array the array to be sorted. 98 | * @param blkptr the starting index of the portion to be sorted. 99 | * @param end the index where the sorting should end. 100 | * @param comp the comparator used for element comparisons. 101 | */ 102 | private static void sortWithComparator(int[] array, int blkptr, int end, ArrayComparator comp) { 103 | // Shortcut for 2-element sub-array 104 | if (end == blkptr + 1) { 105 | if (comp.compare(array[blkptr], array[end]) > 0) { 106 | final int tmp = array[blkptr]; 107 | array[blkptr] = array[end]; 108 | array[end] = tmp; 109 | } 110 | return; 111 | } 112 | 113 | // Shortcut for 3-element sub-array 114 | if (end == blkptr + 2) { 115 | final int a1 = array[blkptr]; 116 | final int a2 = array[blkptr + 1]; 117 | final int a3 = array[end]; 118 | 119 | if (comp.compare(a1, a2) <= 0) { 120 | if (comp.compare(a2, a3) <= 0) 121 | return; 122 | 123 | if (comp.compare(a3, a1) <= 0) { 124 | array[blkptr] = a3; 125 | array[blkptr + 1] = a1; 126 | array[end] = a2; 127 | return; 128 | } 129 | 130 | array[blkptr + 1] = a3; 131 | array[end] = a2; 132 | } else { 133 | if (comp.compare(a1, a3) <= 0) { 134 | array[blkptr] = a2; 135 | array[blkptr + 1] = a1; 136 | return; 137 | } 138 | 139 | if (comp.compare(a3, a2) <= 0) { 140 | array[blkptr] = a3; 141 | array[end] = a1; 142 | return; 143 | } 144 | 145 | array[blkptr] = a2; 146 | array[blkptr + 1] = a3; 147 | array[end] = a1; 148 | } 149 | return; 150 | } 151 | 152 | // Regular case for arrays with more than 3 elements 153 | for (int i = blkptr; i < end; i++) { 154 | final int val = array[i]; 155 | int j = i; 156 | 157 | while ((j > blkptr) && (comp.compare(array[j - 1], val) > 0)) { 158 | array[j] = array[j - 1]; 159 | j--; 160 | } 161 | 162 | array[j] = val; 163 | } 164 | } 165 | 166 | /** 167 | * Performs the insertion sort on the array using natural ordering (i.e., no comparator). 168 | * This method handles the sorting for small sub-arrays and larger arrays without needing a custom comparator. 169 | * 170 | * @param array the array to be sorted. 171 | * @param blkptr the starting index of the portion to be sorted. 172 | * @param end the index where the sorting should end. 173 | */ 174 | private static void sortNoComparator(int[] array, int blkptr, int end) { 175 | // Shortcut for 2-element sub-array 176 | if (end == blkptr + 1) { 177 | if (array[blkptr] > array[end]) { 178 | final int tmp = array[blkptr]; 179 | array[blkptr] = array[end]; 180 | array[end] = tmp; 181 | } 182 | return; 183 | } 184 | 185 | // Shortcut for 3-element sub-array 186 | if (end == blkptr + 2) { 187 | final int a1 = array[blkptr]; 188 | final int a2 = array[blkptr + 1]; 189 | final int a3 = array[end]; 190 | 191 | if (a1 <= a2) { 192 | if (a2 <= a3) 193 | return; 194 | 195 | if (a3 <= a1) { 196 | array[blkptr] = a3; 197 | array[blkptr + 1] = a1; 198 | array[end] = a2; 199 | return; 200 | } 201 | 202 | array[blkptr + 1] = a3; 203 | array[end] = a2; 204 | } else { 205 | if (a1 <= a3) { 206 | array[blkptr] = a2; 207 | array[blkptr + 1] = a1; 208 | return; 209 | } 210 | 211 | if (a3 <= a2) { 212 | array[blkptr] = a3; 213 | array[end] = a1; 214 | return; 215 | } 216 | 217 | array[blkptr] = a2; 218 | array[blkptr + 1] = a3; 219 | array[end] = a1; 220 | } 221 | return; 222 | } 223 | 224 | // Regular case for arrays with more than 3 elements 225 | for (int i = blkptr; i < end; i++) { 226 | final int val = array[i]; 227 | int j = i; 228 | 229 | while ((j > blkptr) && (array[j - 1] > val)) { 230 | array[j] = array[j - 1]; 231 | j--; 232 | } 233 | 234 | array[j] = val; 235 | } 236 | } 237 | } 238 | 239 | -------------------------------------------------------------------------------- /java/src/main/java/io/github/flanglet/kanzi/util/sort/MergeSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2025 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | package io.github.flanglet.kanzi.util.sort; 16 | 17 | import io.github.flanglet.kanzi.IntSorter; 18 | 19 | /** 20 | * The {@code MergeSort} class implements the merge sort algorithm, which is a divide-and-conquer comparison-based sorting 21 | * algorithm. Merge sort divides the input array into smaller sub-arrays, recursively sorts each sub-array, and then merges 22 | * the sorted sub-arrays back together. While conceptually simple, it is usually not very performant for smaller arrays due 23 | * to its recursive nature. However, merge sort is known for its stable sorting and predictable O(n log n) time complexity. 24 | * 25 | *Merge sort is efficient for large datasets and nearly sorted data, but it can require significant memory overhead 26 | * due to the need for auxiliary space to store the merged sub-arrays. This implementation uses insertion sort for small 27 | * sub-arrays to improve performance on small or nearly sorted datasets.
28 | * 29 | *This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.
30 | */ 31 | public class MergeSort implements IntSorter { 32 | 33 | // Threshold for switching to insertion sort on small arrays 34 | private static final int SMALL_ARRAY_THRESHOLD = 32; 35 | 36 | // Temporary buffer for merging 37 | private int[] buffer; 38 | 39 | // Insertion sort used for small arrays 40 | private final IntSorter insertionSort; 41 | 42 | /** 43 | * Constructs a new {@code MergeSort} instance. This constructor initializes an empty buffer for merging and 44 | * uses an {@code InsertionSort} instance for sorting small arrays. 45 | */ 46 | public MergeSort() { 47 | this.buffer = new int[0]; 48 | this.insertionSort = new InsertionSort(); 49 | } 50 | 51 | /** 52 | * Sorts the specified portion of the input array using the merge sort algorithm. 53 | * 54 | *This method divides the array into smaller sub-arrays, recursively sorts them using merge sort, and then 55 | * merges the sorted sub-arrays back together. For small sub-arrays (less than {@code SMALL_ARRAY_THRESHOLD}), insertion 56 | * sort is used for efficiency.
57 | * 58 | * @param data the array to be sorted. 59 | * @param start the starting index of the portion to be sorted. 60 | * @param count the number of elements to sort. 61 | * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices). 62 | */ 63 | @Override 64 | public boolean sort(int[] data, int start, int count) { 65 | if ((data == null) || (count < 0) || (start < 0)) 66 | return false; 67 | 68 | if (start + count > data.length) 69 | return false; 70 | 71 | if (count < 2) 72 | return true; 73 | 74 | // Ensure buffer is large enough to hold the array 75 | if (this.buffer.length < count) 76 | this.buffer = new int[count]; 77 | 78 | return this.mergesort(data, start, start + count - 1); 79 | } 80 | 81 | /** 82 | * Recursively performs merge sort on the specified sub-array. 83 | * 84 | *This method splits the array into two halves and recursively sorts each half. Once the sub-arrays are sorted, 85 | * they are merged together using the {@code merge} method.
86 | * 87 | * @param data the array to be sorted. 88 | * @param low the starting index of the sub-array to sort. 89 | * @param high the ending index of the sub-array to sort. 90 | * @return {@code true} if the sorting was successful. 91 | */ 92 | private boolean mergesort(int[] data, int low, int high) { 93 | if (low < high) { 94 | int count = high - low + 1; 95 | 96 | // Use insertion sort for small sub-arrays 97 | if (count < SMALL_ARRAY_THRESHOLD) 98 | return this.insertionSort.sort(data, low, count); 99 | 100 | int middle = low + count / 2; 101 | this.mergesort(data, low, middle); 102 | this.mergesort(data, middle + 1, high); 103 | this.merge(data, low, middle, high); 104 | } 105 | 106 | return true; 107 | } 108 | 109 | /** 110 | * Merges two sorted sub-arrays into one sorted array. 111 | * 112 | *This method performs the merging step of merge sort. It copies the sorted elements from the left and right halves 113 | * of the sub-array into a temporary buffer and then merges them back into the original array.
114 | * 115 | * @param data the array containing the sub-arrays to merge. 116 | * @param low the starting index of the left sub-array. 117 | * @param middle the ending index of the left sub-array. 118 | * @param high the ending index of the right sub-array. 119 | */ 120 | private void merge(int[] data, int low, int middle, int high) { 121 | int count = high - low + 1; 122 | 123 | // For small sub-arrays, copy the elements into the buffer 124 | if (count < 16) { 125 | for (int ii = low; ii <= high; ii++) 126 | this.buffer[ii] = data[ii]; 127 | } else { 128 | // For larger sub-arrays, use System.arraycopy for efficiency 129 | System.arraycopy(data, low, this.buffer, low, count); 130 | } 131 | 132 | int i = low; 133 | int j = middle + 1; 134 | int k = low; 135 | 136 | // Merge the two sorted sub-arrays 137 | while ((i <= middle) && (j <= high)) { 138 | if (this.buffer[i] <= this.buffer[j]) 139 | data[k] = this.buffer[i++]; 140 | else 141 | data[k] = this.buffer[j++]; 142 | 143 | k++; 144 | } 145 | 146 | count = middle - i + 1; 147 | 148 | // Copy the remaining elements of the left sub-array, if any 149 | if (count < 16) { 150 | while (i <= middle) 151 | data[k++] = this.buffer[i++]; 152 | } else { 153 | // Use System.arraycopy for efficiency 154 | System.arraycopy(this.buffer, i, data, k, count); 155 | } 156 | } 157 | } 158 | 159 | --------------------------------------------------------------------------------