├── .github
    └── workflows
    │   ├── ant.yml
    │   ├── codeql.yml
    │   ├── maven.yaml
    │   └── sonar.yaml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── SECURITY.md
└── java
    ├── .gitignore
    ├── MANIFEST.MF
    ├── README.md
    ├── build.xml
    ├── pom.xml
    └── src
        ├── main
            └── java
            │   └── io
            │       └── github
            │           └── flanglet
            │               └── kanzi
            │                   ├── ArrayComparator.java
            │                   ├── BitStreamException.java
            │                   ├── ByteSorter.java
            │                   ├── ByteTransform.java
            │                   ├── EntropyDecoder.java
            │                   ├── EntropyEncoder.java
            │                   ├── Error.java
            │                   ├── Event.java
            │                   ├── Global.java
            │                   ├── InputBitStream.java
            │                   ├── IntSorter.java
            │                   ├── IntTransform.java
            │                   ├── Listener.java
            │                   ├── Magic.java
            │                   ├── Memory.java
            │                   ├── OutputBitStream.java
            │                   ├── Predictor.java
            │                   ├── SliceByteArray.java
            │                   ├── SliceIntArray.java
            │                   ├── app
            │                       ├── Benchmark.java
            │                       ├── BlockCompressor.java
            │                       ├── BlockDecompressor.java
            │                       ├── InfoPrinter.java
            │                       └── Kanzi.java
            │                   ├── bitstream
            │                       ├── DebugInputBitStream.java
            │                       ├── DebugOutputBitStream.java
            │                       ├── DefaultInputBitStream.java
            │                       └── DefaultOutputBitStream.java
            │                   ├── entropy
            │                       ├── ANSRangeDecoder.java
            │                       ├── ANSRangeEncoder.java
            │                       ├── BinaryEntropyDecoder.java
            │                       ├── BinaryEntropyEncoder.java
            │                       ├── CMDecoder.java
            │                       ├── CMEncoder.java
            │                       ├── CMPredictor.java
            │                       ├── EntropyCodecFactory.java
            │                       ├── EntropyUtils.java
            │                       ├── ExpGolombDecoder.java
            │                       ├── ExpGolombEncoder.java
            │                       ├── FPAQDecoder.java
            │                       ├── FPAQEncoder.java
            │                       ├── FastLogisticAdaptiveProbMap.java
            │                       ├── HuffmanCommon.java
            │                       ├── HuffmanDecoder.java
            │                       ├── HuffmanEncoder.java
            │                       ├── LinearAdaptiveProbMap.java
            │                       ├── LogisticAdaptiveProbMap.java
            │                       ├── NullEntropyDecoder.java
            │                       ├── NullEntropyEncoder.java
            │                       ├── RangeDecoder.java
            │                       ├── RangeEncoder.java
            │                       └── TPAQPredictor.java
            │                   ├── io
            │                       ├── CompressedInputStream.java
            │                       ├── CompressedOutputStream.java
            │                       ├── IOException.java
            │                       ├── IOUtil.java
            │                       └── NullOutputStream.java
            │                   ├── module-info.java
            │                   ├── transform
            │                       ├── AliasCodec.java
            │                       ├── BWT.java
            │                       ├── BWTBlockCodec.java
            │                       ├── BWTS.java
            │                       ├── DivSufSort.java
            │                       ├── EXECodec.java
            │                       ├── FSDCodec.java
            │                       ├── LZCodec.java
            │                       ├── NullTransform.java
            │                       ├── RLT.java
            │                       ├── ROLZCodec.java
            │                       ├── SA_IS.java
            │                       ├── SBRT.java
            │                       ├── SRT.java
            │                       ├── Sequence.java
            │                       ├── TextCodec.java
            │                       ├── TransformFactory.java
            │                       ├── UTFCodec.java
            │                       └── ZRLT.java
            │                   └── util
            │                       ├── LyndonWords.java
            │                       ├── hash
            │                           ├── XXHash32.java
            │                           └── XXHash64.java
            │                       └── sort
            │                           ├── BucketSort.java
            │                           ├── DefaultArrayComparator.java
            │                           ├── HeapSort.java
            │                           ├── InsertionSort.java
            │                           ├── MergeSort.java
            │                           ├── QuickSort.java
            │                           └── RadixSort.java
        └── test
            └── java
                └── io
                    └── github
                        └── flanglet
                            └── kanzi
                                └── test
                                    ├── TestBWT.java
                                    ├── TestCompressedStream.java
                                    ├── TestDefaultBitStream.java
                                    ├── TestEntropyCodec.java
                                    └── TestTransforms.java


/.github/workflows/ant.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will build a Java project with Ant
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-ant
 3 | 
 4 | name: Java CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v5
19 |     - name: Set up JDK 17
20 |       uses: actions/setup-java@v5
21 |       with:
22 |         java-version: '17'
23 |         distribution: 'adopt'
24 |     - name: Build with Ant
25 |       run: cd java && ant -noinput -buildfile build.xml
26 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 |   schedule:
 9 |     - cron: "35 16 * * 2"
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: [ java ]
24 | 
25 |     steps:
26 |       - name: Checkout
27 |         uses: actions/checkout@v5
28 | 
29 |       - name: Initialize CodeQL
30 |         uses: github/codeql-action/init@v3
31 |         with:
32 |           languages: ${{ matrix.language }}
33 |           queries: +security-and-quality
34 | 
35 |       - name: Build with Ant
36 |         run: cd java && ant -noinput -buildfile build.xml
37 |       
38 |       - name: Perform CodeQL Analysis
39 |         uses: github/codeql-action/analyze@v3
40 |         with:
41 |           category: "/language:${{ matrix.language }}"
42 | 


--------------------------------------------------------------------------------
/.github/workflows/maven.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow will build a Java project with Maven
 2 | 
 3 | name: Java CI
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ master ]
 8 |   pull_request:
 9 |     branches: [ master ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v5
18 |     - name: Set up JDK 17
19 |       uses: actions/setup-java@v5
20 |       with:
21 |         java-version: '17'
22 |         distribution: 'adopt'
23 |     - name: Build with Maven
24 |       run: cd java && mvn
25 | 


--------------------------------------------------------------------------------
/.github/workflows/sonar.yaml:
--------------------------------------------------------------------------------
 1 | name: SonarCloud
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened]
 9 | 
10 | jobs:
11 |   build:
12 |     name: Build and analyze
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v5
16 |         with:
17 |           fetch-depth: 0  # Shallow clones should be disabled for a better relevancy of analysis
18 |       - name: Set up JDK 17
19 |         uses: actions/setup-java@v5
20 |         with:
21 |           java-version: 17
22 |           distribution: 'zulu' # Alternative distribution options are available.
23 |       - name: Cache SonarCloud packages
24 |         uses: actions/cache@v3
25 |         with:
26 |           path: ~/.sonar/cache
27 |           key: ${{ runner.os }}-sonar
28 |           restore-keys: ${{ runner.os }}-sonar
29 |       - name: Cache Maven packages
30 |         uses: actions/cache@v3
31 |         with:
32 |           path: ~/.m2
33 |           key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
34 |           restore-keys: ${{ runner.os }}-m2
35 |       - name: Build and analyze
36 |         env:
37 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}  # Needed to get PR information, if any
38 |           SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
39 |         run: cd java && mvn -Dmaven.test.skip=true -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=flanglet_kanzi
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.zip
3 | *.xml
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | install: cd java && mvn compile
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Kanzi
  3 | 
  4 | Kanzi is a modern, modular, portable, and efficient lossless data compressor written in Java.
  5 | 
  6 | * Modern: Kanzi implements state-of-the-art compression algorithms and is built to fully utilize multi-core CPUs via built-in multi-threading.
  7 | * Modular: Entropy codecs and data transforms can be selected and combined at runtime to best suit the specific data being compressed.
  8 | * Expandable: A clean, interface-driven design—with no external dependencies—makes Kanzi easy to integrate, extend, and customize.
  9 | * Efficient: Carefully optimized to balance compression ratio and speed for practical, high-performance usage.
 10 | 
 11 | Unlike most mainstream lossless compressors, Kanzi is not limited to a single compression paradigm. By combining multiple algorithms and techniques, it supports a broader range of compression ratios and adapts better to diverse data types.
 12 | 
 13 | Most traditional compressors underutilize modern hardware by running single-threaded—even on machines with many cores. Kanzi, in contrast, is concurrent by design, compressing multiple blocks in parallel across threads for significant performance gains. However, it is not compatible with standard compression formats.
 14 | 
 15 | It’s important to note that Kanzi is a data compressor, not an archiver. It includes optional checksums for verifying data integrity, but does not provide features like cross-file deduplication or data recovery mechanisms. That said, it produces a seekable bitstream—meaning one or more consecutive blocks can be decompressed independently, without needing to process the entire stream.
 16 | 
 17 | 
 18 | For more details, check [Wiki](https://github.com/flanglet/kanzi/wiki), [QA](https://github.com/flanglet/kanzi/wiki/Q&A) and [DeepWiki](https://deepwiki.com/flanglet/kanzi)
 19 | 
 20 | See how to reuse the code here: https://github.com/flanglet/kanzi/wiki/Using-and-extending-the-code
 21 | 
 22 | There is a C++ implementation available here: https://github.com/flanglet/kanzi-cpp
 23 | 
 24 | There is Go implementation available here: https://github.com/flanglet/kanzi-go
 25 | 
 26 | 
 27 | ![Build Status](https://github.com/flanglet/kanzi/actions/workflows/ant.yml/badge.svg)
 28 | [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=flanglet_kanzi&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=flanglet_kanzi)
 29 | <a href="https://scan.coverity.com/projects/flanglet-kanzi">
 30 |   <img alt="Coverity Scan Build Status"
 31 |        src="https://img.shields.io/coverity/scan/16859.svg"/>
 32 | </a>
 33 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
 34 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/flanglet/kanzi)
 35 | 
 36 | 
 37 | ## Why Kanzi
 38 | 
 39 | 
 40 | There are already many excellent, open-source lossless data compressors available.
 41 | 
 42 | If gzip is beginning to show its age, modern alternatives like **zstd** and **brotli** offer compelling replacements. Both are open-source, standardized, and used daily by millions. **Zstd** is especially notable for its exceptional speed and is often the best choice in general-purpose compression.
 43 | 
 44 | However, there are scenarios where **Kanzi** may offer superior performance:
 45 | 
 46 | While gzip, LZMA, brotli, and zstd are all based on LZ (Lempel-Ziv) compression, they are inherently limited in the compression ratios they can achieve. **Kanzi** goes further by incorporating **BWT (Burrows-Wheeler Transform)** and **CM (Context Modeling)**, which can outperform traditional LZ-based methods in certain cases.
 47 | 
 48 | LZ-based compressors are ideal for software distribution, where data is compressed once and decompressed many times, thanks to their fast decompression speeds—though they tend to be slower when compressing at higher ratios. But in other scenarios—such as real-time data generation, one-off data transfers, or backups—**compression speed becomes critical**. Here, Kanzi can shine.
 49 | 
 50 | **Kanzi** also features a suite of built-in, customizable data transforms tailored for specific data types (e.g., multimedia, UTF, text, DNA, etc.), which can be selectively applied during compression for better efficiency.
 51 | 
 52 | Furthermore, Kanzi is designed to **leverage modern multi-core CPUs** to boost performance.
 53 | 
 54 | Finally, **extensibility** is a key strength: implementing new transforms or entropy codecs—whether for experimentation or to improve performance on niche data types—is straightforward and developer-friendly.  
 55 | 
 56 | ## Benchmarks
 57 | 
 58 | Test machine:
 59 | 
 60 | Test machine:
 61 | 
 62 | Apple M3 24 GB Sonoma 14.6.1
 63 | 
 64 | Kanzi version 2.4.0 Java implementation
 65 | 
 66 | JDK 23.0.1+11-39
 67 | 
 68 | On this machine, Kanzi uses 4 threads (half of CPUs by default).
 69 | 
 70 | bzip3 runs with 4 threads. 
 71 | 
 72 | zstd and lz4 use 4 threads for compression and 1 for decompression, other compressors are single threaded.
 73 | 
 74 | The default block size at level 9 is 32MB, severely limiting the number of threads
 75 | in use, especially with enwik8, but all tests are performed with default values.
 76 | 
 77 | 
 78 | ### silesia.tar
 79 | 
 80 | Download at http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
 81 | 
 82 | |        Compressor               |  Encoding (ms)  |  Decoding (ms)  |    Size          |
 83 | |---------------------------------|-----------------|-----------------|------------------|
 84 | |Original                         |                 |                 |   211,957,760    |
 85 | |s2 -cpu 4                        |       179       |        294      |    86,892,891    |
 86 | |**Kanzi -l 1**                   |     **839**     |      **263**    |    80,245,856    |
 87 | |lz4 1.1.10 -T4 -4                |       527       |        121      |    79,919,901    |
 88 | |zstd 1.5.8 -T4 -2                |       147       |        150      |    69,410,383    |
 89 | |**Kanzi -l 2**                   |     **701**     |      **437**    |    68,860,099    |
 90 | |brotli 1.1.0 -2                  |       907       |        402      |    68,039,159    |
 91 | |Apple gzip 430.140.2 -9          |     10406       |        273      |    67,648,481    |
 92 | |**Kanzi -l 3**                   |    **1258**     |      **503**    |    64,266,936    |
 93 | |zstd 1.5.8 -T4 -5                |       300       |        154      |    62,851,716    |
 94 | |**Kanzi -l 4**                   |    **1718**     |      **912**    |    61,131,554    |
 95 | |zstd 1.5.8 -T4 -9                |       752       |        137      |    59,190,090    |
 96 | |brotli 1.1.0 -6                  |      3596       |        340      |    58,557,128    |
 97 | |zstd 1.5.8 -T4 -13               |      4537       |        138      |    57,814,719    |
 98 | |brotli 1.1.0 -9                  |     19809       |        329      |    56,414,012    |
 99 | |bzip2 1.0.8 -9                   |      9673       |       3140      |    54,602,583    |
100 | |**Kanzi -l 5**                   |    **3431**     |     **1759**    |    54,025,588    |
101 | |zstd 1.5.8 -T4 -19               |     20482       |        151      |    52,858,610    |
102 | |**kanzi -l 6**                   |    **4687**     |     **3710**    |    49,521,392    |
103 | |xz 5.8.1 -9                      |     48516       |       1594      |    48,774,000    |
104 | |bzip3 1.5.1.r3-g428f422 -j 4     |      8559       |       3948      |    47,256,794    |
105 | |**Kanzi -l 7**                   |    **5248**     |     **3689**    |    47,312,772    |
106 | |**Kanzi -l 8**                   |   **16856**     |    **18060**    |    43,260,254    |
107 | |**Kanzi -l 9**                   |   **24852**     |    **27886**    |    41,858,030    |
108 | 
109 | 
110 | 
111 | ### enwik8
112 | 
113 | Download at https://mattmahoney.net/dc/enwik8.zip
114 | 
115 | |  Compressor  | Encoding (ms) | Decoding (ms) |    Size      |
116 | |--------------|---------------|---------------|--------------|
117 | |Original      |               |               | 100,000,000  |
118 | |Kanzi -l 1    |       559     |      139      |  43,644,013  |
119 | |Kanzi -l 2    |       498     |      227      |  37,570,404  |
120 | |Kanzi -l 3    |       798     |      439      |  32,466,232  |
121 | |Kanzi -l 4    |	    1060     |      662      |  29,536,517  |
122 | |Kanzi -l 5    | 	    1422     |      790      |  26 523 940  |
123 | |Kanzi -l 6    |	    1965     |     1175      |  24,076,765  |
124 | |Kanzi -l 7    |      2606     |     1787      |  22,817,360  |
125 | |Kanzi -l 8    |	    7377     |     7251      |  21,181,992  |
126 | |Kanzi -l 9    |	   10031     |    11412      |  20,035,144  |
127 | 
128 | 
129 | ## Build 
130 | 
131 | First option (ant):
132 | 
133 | ```ant```
134 | 
135 | Second option (maven):
136 | 
137 | ```mvn -Dmaven.test.skip=true```
138 | 
139 | 
140 | Credits
141 | 
142 | Matt Mahoney,
143 | Yann Collet,
144 | Jan Ondrus,
145 | Yuta Mori,
146 | Ilya Muravyov,
147 | Neal Burns,
148 | Fabian Giesen,
149 | Jarek Duda,
150 | Ilya Grebnov
151 | 
152 | Disclaimer
153 | 
154 | Use at your own risk. Always keep a copy of your original files.
155 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | Security updates are applied only to the latest release.
 4 | 
 5 | ## Vulnerability Definition
 6 | 
 7 | A security vulnerability is a bug that, given a certain input, triggers a crash or an infinite loop. Compression and decompression failures do not belong in this category.
 8 | 
 9 | ## Reporting a Vulnerability
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.** If you have discovered a security vulnerability in this project, report it privately.
12 | 
13 | Please disclose it at [security advisory](https://github.com/flanglet/kanzi/security/advisories/new).
14 | 
15 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
16 | 
17 | * Operating system
18 | * Hardware: CPU, memory
19 | * Kanzi version
20 | * Command line invoked
21 | * Error reported/crash data/log output
22 | 
23 | If possible provide a minimal reproducer.
24 | 


--------------------------------------------------------------------------------
/java/.gitignore:
--------------------------------------------------------------------------------
1 | target/


--------------------------------------------------------------------------------
/java/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | manifest-Version: 1.0
2 | Built-By: flanglet
3 | Main-Class: io.github.flanglet.kanzi.app.Kanzi
4 | 
5 | 


--------------------------------------------------------------------------------
/java/README.md:
--------------------------------------------------------------------------------
 1 | Build Kanzi
 2 | ===========
 3 | 
 4 | Run 'ant' or 'ant build_compress' to generate a JAR file with compression classes only.
 5 | 
 6 | Run 'ant build_lib' to generate a JAR file with all classes in tree excluding tests.
 7 | 
 8 | Run 'ant build_all' to generate a JAR file with all classes in tree including tests.
 9 | 
10 | For maven, type 'mvn clean install -DskipTests'
11 | 
12 | The generated jar file is under 'target'.
13 | 
14 | 


--------------------------------------------------------------------------------
/java/build.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 |    <project name="Kanzi Compression" default="build_compress">
 3 |    <property name="website" value="https://github.com/flanglet/kanzi"/>
 4 |    <property name="Implementation-Version" value="2.4.0"/>
 5 |    <property name="src.dir" value="src/main/java"/>
 6 |    <property name="build.dir" value="target"/>
 7 |    <property name="ant.build.javac.release" value="11"/>
 8 |    
 9 |    <target name="build_compress" depends="compile_compress">
10 |      <jar destfile="${build.dir}/kanzi.jar" manifest="MANIFEST.MF" basedir="${build.dir}/classes" />
11 |    </target>
12 |     
13 |    <target name="compile_compress" depends="clean">
14 |      <mkdir dir="${build.dir}"/>
15 |      <mkdir dir="${build.dir}/classes"/>
16 | 	 <javac includeantruntime="false" 
17 | 	        srcdir="${src.dir}" 
18 | 	        destdir="${build.dir}/classes" 
19 | 	        classpath="${build.dir}/classes" 
20 | 	        debug="on">        
21 | 		 <include name="io/github/flanglet/kanzi/*.java"/>
22 | 		 <include name="io/github/flanglet/kanzi/app/**"/>
23 | 		 <include name="io/github/flanglet/kanzi/bitstream/**"/>
24 | 		 <include name="io/github/flanglet/kanzi/entropy/**"/>
25 | 		 <include name="io/github/flanglet/kanzi/function/**"/>
26 | 		 <include name="io/github/flanglet/kanzi/io/**"/>
27 | 		 <include name="io/github/flanglet/kanzi/transform/**"/>
28 | 		 <include name="io/github/flanglet/kanzi/util/*.java"/>
29 | 		 <include name="io/github/flanglet/kanzi/util/hash/**"/>
30 | 		 <include name="io/github/flanglet/kanzi/util/sort/**"/>
31 | 	  </javac>
32 |    </target>
33 |    
34 |    <target name="build_all" depends="compile_all">
35 |      <jar destfile="${build.dir}/kanzi.jar" manifest="MANIFEST.MF" basedir="${build.dir}/classes" />
36 |    </target>
37 |     
38 |    <target name="compile_all" depends="clean">
39 |      <mkdir dir="${build.dir}"/>
40 |      <mkdir dir="${build.dir}/classes"/>
41 | 	 <javac includeantruntime="false" 
42 | 	        srcdir="${src.dir}" 
43 | 	        destdir="${build.dir}/classes" 
44 | 	        classpath="${build.dir}/classes" 
45 | 	        debug="on">        
46 | 		 <include name="kanzi/**/*.java"/>
47 | 	  </javac>
48 |    </target>
49 |    
50 |    <target name="build_lib" depends="compile_lib">
51 |      <jar destfile="${build.dir}/kanzi.jar" manifest="MANIFEST.MF" basedir="${build.dir}/classes" />
52 |    </target>
53 |     
54 |    <target name="compile_lib" depends="clean">
55 |      <mkdir dir="${build.dir}"/>
56 |      <mkdir dir="${build.dir}/classes"/>
57 | 	 <javac includeantruntime="false" 
58 | 	        srcdir="${src.dir}" 
59 | 	        destdir="${build.dir}/classes" 
60 | 	        classpath="${build.dir}/classes" 
61 | 	        debug="on">        
62 | 		 <include name="kanzi/**/*.java"/>
63 | 		 <exclude name="kanzi/test/**"/>
64 | 	  </javac>
65 |    </target>
66 | 
67 |    <target name="check_target" description="Clean output top directory">
68 |      <condition property="target.exists">
69 |        <available file="${build.dir}" type="dir"/>
70 |      </condition>
71 |    </target>
72 | 
73 |    <target name="clean" description="Clean output directories" depends="check_target" if="target.exists">
74 | 	  <delete dir="${build.dir}" />	     
75 |    </target>
76 | 
77 | </project>
78 | 


--------------------------------------------------------------------------------
/java/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |     <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |     <parent>
  6 |         <groupId>org.sonatype.oss</groupId>
  7 |         <artifactId>oss-parent</artifactId>
  8 |         <version>9</version>
  9 |     </parent>
 10 | 
 11 |     <groupId>io.github.flanglet</groupId>
 12 |     <artifactId>kanzi</artifactId>
 13 |     <version>2.4.0</version>
 14 | 
 15 |     <name>Kanzi Compression</name>
 16 |     <description>Fast losssless data compressor in Java</description>
 17 |     <url>https://github.com/flanglet/kanzi</url>
 18 |     <inceptionYear>2011</inceptionYear>
 19 | 
 20 |     <licenses>
 21 |         <license>
 22 |             <name>Apache License 2.0</name>
 23 |             <url>http://www.apache.org/licenses/LICENSE-2.0</url>
 24 |         </license>
 25 |     </licenses>
 26 | 
 27 |     <scm>
 28 |         <url>https://github.com/flanglet/kanzi</url>
 29 |         <connection>scm:git:https://github.com/flanglet/kanzi</connection>
 30 |         <developerConnection>scm:git:git@github.com:flanglet/kanzi</developerConnection>
 31 |         <tag>2.4.0</tag>
 32 |     </scm>
 33 | 
 34 |     <distributionManagement>
 35 |         <repository>
 36 |             <id>ossrh</id>
 37 |             <name>Nexus Release Repository</name>
 38 |             <url>https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/</url>
 39 |         </repository>
 40 |         <snapshotRepository>
 41 |             <id>ossrh</id>
 42 |             <name>Sonatype Nexus Snapshots</name>
 43 |             <url>https://s01.oss.sonatype.org/content/repositories/snapshots/</url>
 44 |             <uniqueVersion>false</uniqueVersion>
 45 |         </snapshotRepository>
 46 |     </distributionManagement>
 47 | 
 48 |     <developers>
 49 |         <developer>
 50 |             <id>flanglet</id>
 51 |             <name>Frederic Langlet</name>
 52 |         </developer>
 53 |         <developer>
 54 |             <id>pschichtel</id>
 55 |             <name>Phillip Schichtel</name>
 56 |             <email>phillip@schich.tel</email>
 57 |             <roles>
 58 |                 <role>Contributor</role>
 59 |             </roles>
 60 |         </developer>
 61 |     </developers>
 62 | 
 63 |     <properties>
 64 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 65 |         <jdkVersion>11</jdkVersion>
 66 |         <releaseProfile>release</releaseProfile>
 67 |         <sonar.organization>flanglet</sonar.organization>
 68 |         <sonar.host.url>https://sonarcloud.io</sonar.host.url>
 69 |     </properties>
 70 | 
 71 |     <dependencies>
 72 |         <dependency>
 73 |             <groupId>junit</groupId>
 74 |             <artifactId>junit</artifactId>
 75 |             <version>[4.13.1,)</version>
 76 |             <type>jar</type>
 77 |             <scope>test</scope>
 78 |         </dependency>
 79 |     </dependencies>
 80 | 
 81 |     <build>
 82 |         <defaultGoal>clean install</defaultGoal>
 83 |         <plugins>
 84 |            <plugin>
 85 |               <groupId>org.apache.maven.plugins</groupId>
 86 |               <artifactId>maven-source-plugin</artifactId>
 87 |               <version>3.2.0</version>
 88 |               <executions>
 89 |                 <execution>
 90 |                   <id>attach-sources</id>
 91 |                   <goals>
 92 |                     <goal>jar-no-fork</goal>
 93 |                   </goals>
 94 |                 </execution>
 95 |               </executions>
 96 |             </plugin>
 97 |             <plugin>
 98 |               <groupId>org.apache.maven.plugins</groupId>
 99 |               <artifactId>maven-javadoc-plugin</artifactId>
100 |               <version>3.10.1</version>
101 |                 <configuration>
102 |                     <failOnError>false</failOnError>
103 |                     <failOnWarnings>false</failOnWarnings>
104 |                     <quiet>true</quiet>
105 |                     <doclint>none</doclint>
106 |                 </configuration>
107 |               <executions>
108 |                 <execution>
109 |                   <id>attach-javadocs</id>
110 |                   <goals>
111 |                     <goal>jar</goal>
112 |                   </goals>
113 |                 </execution>
114 |               </executions>
115 |             </plugin>
116 |             <plugin>
117 |                 <groupId>org.apache.maven.plugins</groupId>
118 |                 <artifactId>maven-release-plugin</artifactId>
119 |                 <version>2.5.3</version>
120 |                 <configuration>
121 |                     <autoVersionSubmodules>true</autoVersionSubmodules>
122 |                     <useReleaseProfile>false</useReleaseProfile>
123 |                     <releaseProfiles>${releaseProfile}</releaseProfiles>
124 |                     <goals>deploy</goals>
125 |                     <arguments>-P${releaseProfile}</arguments>
126 |                 </configuration>
127 |             </plugin>
128 |             <plugin>
129 |                 <groupId>org.apache.maven.plugins</groupId>
130 |                 <artifactId>maven-deploy-plugin</artifactId>
131 |                 <version>2.8.2</version>
132 |             </plugin>
133 |             <plugin>
134 |                 <groupId>org.apache.maven.plugins</groupId>
135 |                 <artifactId>maven-jar-plugin</artifactId>
136 |                 <version>3.3.0</version>
137 |                 <configuration>
138 |                 <archive>
139 |                     <manifestFile>MANIFEST.MF</manifestFile>
140 |                 </archive>
141 |                 </configuration>
142 |             </plugin>
143 |             <plugin>
144 |                 <groupId>org.apache.maven.plugins</groupId>
145 |                 <artifactId>maven-compiler-plugin</artifactId>
146 |                 <version>3.6.1</version>
147 |                 <configuration>
148 |                     <encoding>${project.build.sourceEncoding}</encoding>
149 |                     <release>${jdkVersion}</release>
150 |                     <optimize>true</optimize>
151 |                     <showDeprecation>true</showDeprecation>
152 |                     <showWarnings>true</showWarnings>
153 |                 </configuration>
154 |             </plugin>
155 |         </plugins>
156 |     </build>
157 | 
158 |     <profiles>
159 |         <profile>
160 |             <id>release</id>
161 |             <build>
162 |                 <plugins>
163 |                     <plugin>
164 |                         <groupId>org.sonatype.plugins</groupId>
165 |                         <artifactId>nexus-staging-maven-plugin</artifactId>
166 |                         <version>1.6.8</version>
167 |                         <extensions>true</extensions>
168 |                         <configuration>
169 |                             <serverId>ossrh</serverId>
170 |                             <nexusUrl>https://s01.oss.sonatype.org/</nexusUrl>
171 |                             <autoReleaseAfterClose>true</autoReleaseAfterClose>
172 |                         </configuration>
173 |                     </plugin>
174 |                     <plugin>
175 |                         <groupId>org.apache.maven.plugins</groupId>
176 |                         <artifactId>maven-source-plugin</artifactId>
177 |                         <version>3.2.0</version>
178 |                         <executions>
179 |                             <execution>
180 |                                 <id>attach-sources</id>
181 |                                 <goals>
182 |                                     <goal>jar-no-fork</goal>
183 |                                 </goals>
184 |                             </execution>
185 |                         </executions>
186 |                     </plugin>
187 |                     <plugin>
188 |                         <groupId>org.apache.maven.plugins</groupId>
189 |                         <artifactId>maven-javadoc-plugin</artifactId>
190 |                         <version>3.10.1</version>
191 |                         <executions>
192 |                             <execution>
193 |                                 <id>attach-javadocs</id>
194 |                                 <goals>
195 |                                     <goal>jar</goal>
196 |                                 </goals>
197 |                             </execution>
198 |                         </executions>
199 |                     </plugin>
200 |                     <plugin>
201 |                         <groupId>org.apache.maven.plugins</groupId>
202 |                         <artifactId>maven-gpg-plugin</artifactId>
203 |                         <version>1.6</version>
204 |                         <executions>
205 |                             <execution>
206 |                                 <id>sign-artifacts</id>
207 |                                 <phase>verify</phase>
208 |                                 <goals>
209 |                                     <goal>sign</goal>
210 |                                 </goals>
211 |                             </execution>
212 |                         </executions>
213 |                     </plugin>
214 |                 </plugins>
215 |             </build>
216 |         </profile>
217 |     </profiles>
218 | </project>
219 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/ArrayComparator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * This interface defines a method for comparing sub-arrays within an array.
20 |  */
21 | public interface ArrayComparator {
22 | 
23 |     /**
24 |      * Compares two sub-arrays starting at the specified indices.
25 |      *
26 |      * @param lidx the starting index of the left sub-array
27 |      * @param ridx the starting index of the right sub-array
28 |      * @return a negative integer, zero, or a positive integer as the
29 |      *         left sub-array is less than, equal to, or greater than
30 |      *         the right sub-array
31 |      */
32 |     public int compare(int lidx, int ridx);
33 | }
34 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/BitStreamException.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi;
 17 | 
 18 | /**
 19 |  * This class represents exceptions specific to bit stream operations.
 20 |  * It provides different error codes to identify various error conditions.
 21 |  */
 22 | public class BitStreamException extends RuntimeException {
 23 | 
 24 |     private static final long serialVersionUID = 7279737120722476336L;
 25 | 
 26 |     /**
 27 |      * Error code for undefined errors.
 28 |      */
 29 |     public static final int UNDEFINED = 0;
 30 | 
 31 |     /**
 32 |      * Error code for input/output errors.
 33 |      */
 34 |     public static final int INPUT_OUTPUT = 1;
 35 | 
 36 |     /**
 37 |      * Error code for end-of-stream errors.
 38 |      */
 39 |     public static final int END_OF_STREAM = 2;
 40 | 
 41 |     /**
 42 |      * Error code for invalid stream errors.
 43 |      */
 44 |     public static final int INVALID_STREAM = 3;
 45 | 
 46 |     /**
 47 |      * Error code for stream closed errors.
 48 |      */
 49 |     public static final int STREAM_CLOSED = 4;
 50 | 
 51 |     private final int code;
 52 | 
 53 |     /**
 54 |      * Constructs a {@code BitStreamException} with an undefined error code.
 55 |      */
 56 |     protected BitStreamException() {
 57 |         this.code = UNDEFINED;
 58 |     }
 59 | 
 60 |     /**
 61 |      * Constructs a {@code BitStreamException} with the specified detail message
 62 |      * and error code.
 63 |      *
 64 |      * @param message the detail message
 65 |      * @param code the error code
 66 |      */
 67 |     public BitStreamException(String message, int code) {
 68 |         super(message);
 69 |         this.code = code;
 70 |     }
 71 | 
 72 |     /**
 73 |      * Constructs a {@code BitStreamException} with the specified detail message,
 74 |      * cause, and error code.
 75 |      *
 76 |      * @param message the detail message
 77 |      * @param cause the cause
 78 |      * @param code the error code
 79 |      */
 80 |     public BitStreamException(String message, Throwable cause, int code) {
 81 |         super(message, cause);
 82 |         this.code = code;
 83 |     }
 84 | 
 85 |     /**
 86 |      * Constructs a {@code BitStreamException} with the specified cause
 87 |      * and error code.
 88 |      *
 89 |      * @param cause the cause
 90 |      * @param code the error code
 91 |      */
 92 |     public BitStreamException(Throwable cause, int code) {
 93 |         super(cause);
 94 |         this.code = code;
 95 |     }
 96 | 
 97 |     /**
 98 |      * Returns the error code of this exception.
 99 |      *
100 |      * @return the error code
101 |      */
102 |     public int getErrorCode() {
103 |         return this.code;
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/ByteSorter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * This interface defines a method for sorting a sub-array of bytes.
20 |  */
21 | public interface ByteSorter {
22 | 
23 |    /**
24 |     * Sorts a sub-array of bytes.
25 |     *
26 |     * @param array the array containing the sub-array to be sorted
27 |     * @param idx the starting index of the sub-array
28 |     * @param len the length of the sub-array
29 |     * @return {@code true} if the sub-array was successfully sorted,
30 |     *         {@code false} otherwise
31 |     */
32 |     public boolean sort(byte[] array, int idx, int len);
33 | }
34 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/ByteTransform.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi;
16 | 
17 | /**
18 |  * This interface defines methods for transforming byte arrays in forward and
19 |  * inverse directions, and for obtaining the maximum encoded length.
20 |  */
21 | public interface ByteTransform {
22 | 
23 |     /**
24 |      * Processes the source array and writes the transformed data to the
25 |      * destination array in the forward direction.
26 |      * Read src.length bytes from src.array[src.index], process them and
27 |      * write them to dst.array[dst.index]. The index of each slice is updated
28 |      * with the number of bytes respectively read from and written to.
29 |      *
30 |      * @param src the source {@code SliceByteArray} containing the data to be processed
31 |      * @param dst the destination {@code SliceByteArray} where the processed data will be written
32 |      * @return {@code true} if the transformation was successful, {@code false} otherwise
33 |      */
34 |     public boolean forward(SliceByteArray src, SliceByteArray dst);
35 | 
36 |     /**
37 |      * Processes the source array and writes the transformed data to the
38 |      * destination array in the inverse direction.
39 |      * Read src.length bytes from src.array[src.index], process them and
40 |      * write them to dst.array[dst.index]. The index of each slice is updated
41 |      * with the number of bytes respectively read from and written to.
42 |      *
43 |      * @param src the source {@code SliceByteArray} containing the data to be processed
44 |      * @param dst the destination {@code SliceByteArray} where the processed data will be written
45 |      * @return {@code true} if the transformation was successful, {@code false} otherwise
46 |      */
47 |     public boolean inverse(SliceByteArray src, SliceByteArray dst);
48 | 
49 |     /**
50 |      * Returns the maximum size required for the output buffer given the
51 |      * length of the source data.
52 |      *
53 |      * @param srcLength the length of the source data
54 |      * @return the maximum size required for the output buffer
55 |      */
56 |     public int getMaxEncodedLength(int srcLength);
57 | }
58 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/EntropyDecoder.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi;
16 | 
17 | /**
18 |  * This interface defines methods for decoding data from a bitstream.
19 |  */
20 | public interface EntropyDecoder {
21 | 
22 |     /**
23 |      * Decodes the next chunk of data from the bitstream and returns it
24 |      * in the provided buffer.
25 |      *
26 |      * @param buffer the buffer to store the decoded data
27 |      * @param blkptr the starting index in the buffer
28 |      * @param len the length of data to decode
29 |      * @return the number of bytes decoded
30 |      */
31 |     public int decode(byte[] buffer, int blkptr, int len);
32 | 
33 |     /**
34 |      * Releases any resources associated with this entropy decoder.
35 |      * This method should be called before disposing of the entropy decoder.
36 |      */
37 |     public void dispose();
38 | 
39 |     /**
40 |      * Returns the underlying bitstream associated with this entropy decoder.
41 |      *
42 |      * @return the underlying {@code InputBitStream}
43 |      */
44 |     public InputBitStream getBitStream();
45 | }
46 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/EntropyEncoder.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi;
16 | 
17 | /**
18 |  * This interface defines methods for encoding data into a bitstream.
19 |  */
20 | public interface EntropyEncoder {
21 | 
22 |     /**
23 |      * Encodes the provided array into the bitstream and returns the
24 |      * number of bytes written to the bitstream.
25 |      *
26 |      * @param array the array containing the data to be encoded
27 |      * @param blkptr the starting index in the array
28 |      * @param len the length of data to encode
29 |      * @return the number of bytes written to the bitstream
30 |      */
31 |     public int encode(byte[] array, int blkptr, int len);
32 | 
33 |     /**
34 |      * Returns the underlying bitstream associated with this entropy encoder.
35 |      *
36 |      * @return the underlying {@code OutputBitStream}
37 |      */
38 |     public OutputBitStream getBitStream();
39 | 
40 |     /**
41 |      * Releases any resources associated with this entropy encoder.
42 |      * This method should be called before disposing of the entropy encoder.
43 |      * Trying to encode after a call to dispose gives undefined behavior.
44 |      */
45 |     public void dispose();
46 | }
47 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/Error.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi;
 17 | 
 18 | /**
 19 |  * This final class defines constants for various error codes used
 20 |  * throughout the application.
 21 |  */
 22 | public final class Error {
 23 | 
 24 |     /**
 25 |      *  Missing paraneter
 26 |      */
 27 |     public static final int ERR_MISSING_PARAM = 1;
 28 | 
 29 |     /**
 30 |      *  Invalid block size
 31 |      */
 32 |     public static final int ERR_BLOCK_SIZE = 2;
 33 | 
 34 |     /**
 35 |      *  Invalid entropy coded
 36 |      */
 37 |     public static final int ERR_INVALID_CODEC = 3;
 38 | 
 39 |     /**
 40 |      * Failure to create a compressor
 41 |      */
 42 |     public static final int ERR_CREATE_COMPRESSOR = 4;
 43 | 
 44 |     /**
 45 |      * Failure to create a decompressor
 46 |      */
 47 |     public static final int ERR_CREATE_DECOMPRESSOR = 5;
 48 | 
 49 |     /**
 50 |      * The output should is a folder
 51 |      */
 52 |     public static final int ERR_OUTPUT_IS_DIR = 6;
 53 | 
 54 |     /**
 55 |      * Failure to ovwerwrite a file
 56 |      */
 57 |     public static final int ERR_OVERWRITE_FILE = 7;
 58 | 
 59 |     /**
 60 |      * Failure to create a file
 61 |      */
 62 |     public static final int ERR_CREATE_FILE = 8;
 63 | 
 64 |     /**
 65 |      * Failure to create a bit stream
 66 |      */
 67 |     public static final int ERR_CREATE_BITSTREAM = 9;
 68 | 
 69 |     /**
 70 |      * Failure to open a file
 71 |      */
 72 |     public static final int ERR_OPEN_FILE = 10;
 73 | 
 74 |     /**
 75 |      * Failure to read a file
 76 |      */
 77 |     public static final int ERR_READ_FILE = 11;
 78 | 
 79 |     /**
 80 |      * Failure to write a file
 81 |      */
 82 |     public static final int ERR_WRITE_FILE = 12;
 83 | 
 84 |     /**
 85 |      * Failure to process a block of data
 86 |      */
 87 |     public static final int ERR_PROCESS_BLOCK = 13;
 88 | 
 89 |     /**
 90 |      * Failure to create an entropy coded
 91 |      */
 92 |     public static final int ERR_CREATE_CODEC = 14;
 93 | 
 94 |     /**
 95 |      *  Invalid file
 96 |      */
 97 |     public static final int ERR_INVALID_FILE = 15;
 98 | 
 99 |     /**
100 |      *  Invalid or unsupported bit stream version
101 |      */
102 |     public static final int ERR_STREAM_VERSION = 16;
103 | 
104 |     /**
105 |      * Failure to create a stream
106 |      */
107 |     public static final int ERR_CREATE_STREAM = 17;
108 | 
109 |     /**
110 |      * Invalid parameter
111 |      */
112 |     public static final int ERR_INVALID_PARAM = 18;
113 | 
114 |     /**
115 |      * Checksum failure
116 |      */
117 |     public static final int ERR_CRC_CHECK = 19;
118 | 
119 |     /**
120 |      *  Unknown error
121 |      */
122 |     public static final int ERR_UNKNOWN = 127;
123 | 
124 |     /**
125 |      * Private constructor to prevent instantiation.
126 |      */
127 |     private Error() {
128 |     }
129 | }
130 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/Event.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi;
 17 | 
 18 | /**
 19 |  * This class represents events that occur during compression and
 20 |  * decompression processes. Each event includes attributes such as type,
 21 |  * size, hash, and timestamp.
 22 |  */
 23 | public class Event {
 24 | 
 25 |     /**
 26 |      * Enum representing the types of events that can occur.
 27 |      */
 28 |     public enum Type {
 29 |         /**
 30 |          * Beginning of compression
 31 |          */
 32 |         COMPRESSION_START,
 33 | 
 34 |         /**
 35 |          * Beginning of decompression
 36 |          */
 37 |         DECOMPRESSION_START,
 38 | 
 39 |         /**
 40 |          * Beginning of transform
 41 |          */
 42 |         BEFORE_TRANSFORM,
 43 | 
 44 |         /**
 45 |          * End of transform
 46 |          */
 47 |         AFTER_TRANSFORM,
 48 | 
 49 |         /**
 50 |          * Beginning of entropy
 51 |          */
 52 |         BEFORE_ENTROPY,
 53 | 
 54 |         /**
 55 |          * End of entropy
 56 |          */
 57 |         AFTER_ENTROPY,
 58 | 
 59 |         /**
 60 |          * End of compression
 61 |          */
 62 |         COMPRESSION_END,
 63 | 
 64 |         /**
 65 |          * End of dcompression
 66 |          */
 67 |         DECOMPRESSION_END,
 68 | 
 69 |         /**
 70 |          * End of header decoding
 71 |          */
 72 |         AFTER_HEADER_DECODING,
 73 | 
 74 |         /**
 75 |          * Block informartion
 76 |          */
 77 |         BLOCK_INFO
 78 |     }
 79 | 
 80 |     /**
 81 |      * Enum representing the types of hash used in the events.
 82 |      */
 83 |     public enum HashType {
 84 |         /**
 85 |          * No hash
 86 |          */
 87 |         NO_HASH,
 88 | 
 89 |         /**
 90 |          * 32 bit hash
 91 |          */
 92 | 
 93 |         SIZE_32,
 94 |         /**
 95 |          * 64 bit hash
 96 |          */
 97 |         SIZE_64
 98 |     }
 99 | 
100 |     private final int id;
101 |     private final long size;
102 |     private final long hash;
103 |     private final Type type;
104 |     private final HashType hashType;
105 |     private final long time;
106 |     private final String msg;
107 | 
108 |     /**
109 |      * Constructs an Event with the specified type, id, and size, with no hash.
110 |      *
111 |      * @param type the type of event
112 |      * @param id the event id
113 |      * @param size the size of the event
114 |      */
115 |     public Event(Type type, int id, long size) {
116 |         this(type, id, size, 0, HashType.NO_HASH);
117 |     }
118 | 
119 |     /**
120 |      * Constructs an Event with the specified type, id, and message.
121 |      *
122 |      * @param type the type of event
123 |      * @param id the event id
124 |      * @param msg the event message
125 |      */
126 |     public Event(Type type, int id, String msg) {
127 |         this(type, id, msg, 0);
128 |     }
129 | 
130 |     /**
131 |      * Constructs an Event with the specified type, id, message, and time.
132 |      *
133 |      * @param type the type of event
134 |      * @param id the event id
135 |      * @param msg the event message
136 |      * @param time the event timestamp
137 |      */
138 |     public Event(Type type, int id, String msg, long time) {
139 |         this.id = id;
140 |         this.size = 0L;
141 |         this.hash = 0;
142 |         this.hashType = HashType.NO_HASH;
143 |         this.type = type;
144 |         this.time = (time > 0) ? time : System.nanoTime();
145 |         this.msg = msg;
146 |     }
147 | 
148 |     /**
149 |      * Constructs an Event with the specified type, id, size, hash, and hash type.
150 |      *
151 |      * @param type the type of event
152 |      * @param id the event id
153 |      * @param size the size of the event
154 |      * @param hash the hash of the event
155 |      * @param hashType the type of hash used
156 |      */
157 |     public Event(Type type, int id, long size, long hash, HashType hashType) {
158 |         this(type, id, size, hash, hashType, 0);
159 |     }
160 | 
161 |     /**
162 |      * Constructs an Event with the specified type, id, size, hash, hash type, and time.
163 |      *
164 |      * @param type the type of event
165 |      * @param id the event id
166 |      * @param size the size of the event
167 |      * @param hash the hash of the event
168 |      * @param hashType the type of hash used
169 |      * @param time the event timestamp
170 |      */
171 |     public Event(Type type, int id, long size, long hash, HashType hashType, long time) {
172 |         this.id = id;
173 |         this.size = size;
174 |         this.hash = hash;
175 |         this.hashType = hashType;
176 |         this.type = type;
177 |         this.time = (time > 0) ? time : System.nanoTime();
178 |         this.msg = null;
179 |     }
180 | 
181 |     /**
182 |      * Returns the event id.
183 |      *
184 |      * @return the event id
185 |      */
186 |     public int getId() {
187 |         return this.id;
188 |     }
189 | 
190 |     /**
191 |      * Returns the size of the event.
192 |      *
193 |      * @return the event size
194 |      */
195 |     public long getSize() {
196 |         return this.size;
197 |     }
198 | 
199 |     /**
200 |      * Returns the timestamp of the event.
201 |      *
202 |      * @return the event timestamp
203 |      */
204 |     public long getTime() {
205 |         return this.time;
206 |     }
207 | 
208 |     /**
209 |      * Returns the hash of the event, or 0 if no hash is used.
210 |      *
211 |      * @return the event hash
212 |      */
213 |     public long getHash() {
214 |         return (this.hashType == HashType.NO_HASH) ? 0 : this.hash;
215 |     }
216 | 
217 |     /**
218 |      * Returns the type of hash used in the event.
219 |      *
220 |      * @return the event hash type
221 |      */
222 |     public HashType getHashType() {
223 |         return this.hashType;
224 |     }
225 | 
226 |     /**
227 |      * Returns the type of the event.
228 |      *
229 |      * @return the event type
230 |      */
231 |     public Type getType() {
232 |         return this.type;
233 |     }
234 | 
235 |     /**
236 |      * Returns a string representation of the event.
237 |      *
238 |      * @return a string representation of the event
239 |      */
240 |     @Override
241 |     public String toString() {
242 |         if (this.msg != null) {
243 |             return this.msg;
244 |         }
245 |         StringBuilder sb = new StringBuilder(200);
246 |         sb.append("{ \"type\":\"").append(this.getType()).append("\"");
247 |         if (this.id >= 0) {
248 |             sb.append(", \"id\":").append(this.getId());
249 |         }
250 |         sb.append(", \"size\":").append(this.getSize());
251 |         sb.append(", \"time\":").append(this.getTime());
252 |         if (this.hashType == HashType.SIZE_32) {
253 |             sb.append(", \"hash\":\"").append(Integer.toHexString((int) this.getHash())).append("\"");
254 |         } else if (this.hashType == HashType.SIZE_64) {
255 |             sb.append(", \"hash\":\"").append(Long.toHexString(this.getHash())).append("\"");
256 |         }
257 |         sb.append(" }");
258 |         return sb.toString();
259 |     }
260 | }
261 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/InputBitStream.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * The {@code InputBitStream} interface defines methods for reading bits
20 |  * from a bit stream.
21 |  */
22 | public interface InputBitStream {
23 | 
24 |     /**
25 |      * Reads a single bit from the bitstream.
26 |      *
27 |      * @return the bit read (0 or 1)
28 |      * @throws BitStreamException if an error occurs or the stream is closed
29 |      */
30 |     public int readBit() throws BitStreamException;
31 | 
32 |     /**
33 |      * Reads a specified number of bits from the bitstream and returns them as a long.
34 |      *
35 |      * @param length the number of bits to read (between 1 and 64)
36 |      * @return the bits read as a long
37 |      * @throws BitStreamException if an error occurs or the stream is closed
38 |      */
39 |     public long readBits(int length) throws BitStreamException;
40 | 
41 |     /**
42 |      * Reads bits from the bitstream and stores them in the specified byte array.
43 |      *
44 |      * @param bits the byte array to store the read bits
45 |      * @param start the starting index in the array
46 |      * @param length the number of bits to read
47 |      * @return the number of bits read
48 |      * @throws BitStreamException if an error occurs or the stream is closed
49 |      */
50 |     public int readBits(byte[] bits, int start, int length) throws BitStreamException;
51 | 
52 |     /**
53 |      * Closes the bitstream and releases any associated resources.
54 |      *
55 |      * @throws BitStreamException if an error occurs while closing the stream
56 |      */
57 |     public void close() throws BitStreamException;
58 | 
59 |     /**
60 |      * Returns the total number of bits read from the bitstream.
61 |      *
62 |      * @return the total number of bits read
63 |      */
64 |     public long read();
65 | 
66 |     /**
67 |      * Checks if there are more bits to read in the bitstream.
68 |      *
69 |      * @return {@code false} if the bitstream is closed or the end of the stream
70 |      *         has been reached, {@code true} otherwise
71 |      */
72 |     public boolean hasMoreToRead();
73 | }
74 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/IntSorter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * This interface defines a method for sorting a sub-array of integers.
20 |  */
21 | public interface IntSorter {
22 | 
23 |    /**
24 |     * Sorts a sub-array of integers.
25 |     *
26 |     * @param array the array containing the sub-array to be sorted
27 |     * @param idx the starting index of the sub-array
28 |     * @param len the length of the sub-array
29 |     * @return {@code true} if the sub-array was successfully sorted,
30 |     *         {@code false} otherwise
31 |     */
32 |     public boolean sort(int[] array, int idx, int len);
33 | }
34 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/IntTransform.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * This interface defines methods for transforming integer arrays in forward and
20 |  * inverse directions, and for obtaining the maximum encoded length.
21 |  */
22 | public interface IntTransform {
23 | 
24 |     /**
25 |      * Processes the source array and writes the transformed data to the
26 |      * destination array in the forward direction.
27 |      *
28 |      * @param src the source {@code SliceIntArray} containing the data to be processed
29 |      * @param dst the destination {@code SliceIntArray} where the processed data will be written
30 |      * @return {@code true} if the transformation was successful, {@code false} otherwise
31 |      */
32 |     public boolean forward(SliceIntArray src, SliceIntArray dst);
33 | 
34 |     /**
35 |      * Processes the source array and writes the transformed data to the
36 |      * destination array in the inverse direction.
37 |      *
38 |      * @param src the source {@code SliceIntArray} containing the data to be processed
39 |      * @param dst the destination {@code SliceIntArray} where the processed data will be written
40 |      * @return {@code true} if the transformation was successful, {@code false} otherwise
41 |      */
42 |     public boolean inverse(SliceIntArray src, SliceIntArray dst);
43 | 
44 |     /**
45 |      * Returns the maximum size required for the output buffer given the
46 |      * length of the source data.
47 |      *
48 |      * @param srcLength the length of the source data
49 |      * @return the maximum size required for the output buffer
50 |      */
51 |     public int getMaxEncodedLength(int srcLength);
52 | }
53 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/Listener.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * The {@code Listener} interface defines a contract for objects that need to
20 |  * handle and process events.
21 |  * <p>
22 |  * Classes that implement this interface are expected to provide an implementation 
23 |  * of the {@code processEvent} method, which will be invoked when an event occurs.
24 |  * </p>
25 |  *
26 |  */
27 | public interface Listener {
28 | 
29 |     /**
30 |      * Processes the given event.
31 |      * <p>
32 |      * This method will be called whenever an event occurs that the listener is 
33 |      * interested in. Implementations of this method should define how to handle 
34 |      * the event.
35 |      * </p>
36 |      *
37 |      * @param evt The event to be processed. Cannot be {@code null}.
38 |      */
39 |     public void processEvent(Event evt);
40 | }
41 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/OutputBitStream.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * The {@code OutputBitStream} interface defines methods for writing bits
20 |  * to a bit stream.
21 |  */
22 | public interface OutputBitStream {
23 | 
24 |     /**
25 |      * Writes the least significant bit of the input integer to the bit stream.
26 |      *
27 |      * @param bit the bit to write (0 or 1)
28 |      * @throws BitStreamException if the stream is closed or if an error occurs
29 |      */
30 |     public void writeBit(int bit) throws BitStreamException;
31 | 
32 |     /**
33 |      * Writes a specified number of bits from the input long value to the bit stream.
34 |      *
35 |      * @param bits the long value containing the bits to write
36 |      * @param length the number of bits to write (must be between 1 and 64)
37 |      * @return the number of bits written
38 |      * @throws BitStreamException if the stream is closed or if an error occurs
39 |      */
40 |     public int writeBits(long bits, int length) throws BitStreamException;
41 | 
42 |     /**
43 |      * Writes bits from a byte array to the bit stream starting at the specified index.
44 |      *
45 |      * @param bits the byte array containing the bits to write
46 |      * @param start the starting index in the byte array
47 |      * @param nbBits the number of bits to write
48 |      * @return the number of bits written
49 |      * @throws BitStreamException if the stream is closed or if an error occurs
50 |      */
51 |     public int writeBits(byte[] bits, int start, int nbBits) throws BitStreamException;
52 | 
53 |     /**
54 |      * Closes the bit stream and releases any resources associated with it.
55 |      *
56 |      * @throws BitStreamException if an error occurs while closing the stream
57 |      */
58 |     public void close() throws BitStreamException;
59 | 
60 |     /**
61 |      * Returns the total number of bits that have been written to the stream.
62 |      *
63 |      * @return the number of bits written
64 |      */
65 |     public long written();
66 | }
67 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/Predictor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi;
17 | 
18 | /**
19 |  * The {@code Predictor} interface is used by a binary entropy coder to
20 |  * predict the probabilities of 0 and 1 symbols in the input signal.
21 |  *
22 |  * <p>Implementations of this interface should maintain a probability model
23 |  * that can be updated based on input bits and can provide a split value
24 |  * representing the predicted probability of the next bit being 1.</p>
25 |  */
26 | public interface Predictor {
27 | 
28 |     /**
29 |      * Updates the probability model based on the provided bit.
30 |      *
31 |      * @param bit the bit to update the model with (0 or 1)
32 |      */
33 |     public void update(int bit);
34 | 
35 |     /**
36 |      * Returns a split value representing the probability of the next bit being 1.
37 |      * The returned value is in the range of [0..4095], where a value of
38 |      * 410 roughly corresponds to a probability of 10% for the next bit being 1.
39 |      *
40 |      * @return the split value representing the probability of 1
41 |      */
42 |     public int get();
43 | }
44 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/SliceByteArray.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi;
 17 | 
 18 | import java.util.Objects;
 19 | 
 20 | 
 21 | /**
 22 |  * A lightweight implementation of a byte array slice.
 23 |  *
 24 |  * <p>This class provides a way to manage a portion of a byte array, allowing
 25 |  * for the representation of a subset of the array with a specified length and
 26 |  * starting index. This can be useful for handling byte data efficiently without
 27 |  * creating multiple copies.</p>
 28 |  */
 29 | public final class SliceByteArray {
 30 |     public byte[] array; // array.length is the slice capacity
 31 |     public int length;
 32 |     public int index;
 33 | 
 34 |     /**
 35 |      * Constructs an empty {@code SliceByteArray} with a zero-length array.
 36 |      */
 37 |     public SliceByteArray() {
 38 |         this(new byte[0], 0, 0);
 39 |     }
 40 | 
 41 |     /**
 42 |      * Constructs a {@code SliceByteArray} with the specified array and index.
 43 |      *
 44 |      * @param array the byte array
 45 |      * @param idx the starting index of the slice
 46 |      * @throws NullPointerException if the provided array is null
 47 |      * @throws NullPointerException if the provided index is negative
 48 |      */
 49 |     public SliceByteArray(byte[] array, int idx) {
 50 |         if (array == null)
 51 |             throw new NullPointerException("The array cannot be null");
 52 |         if (idx < 0)
 53 |             throw new NullPointerException("The index cannot be negative");
 54 | 
 55 |         this.array = array;
 56 |         this.length = array.length;
 57 |         this.index = idx;
 58 |     }
 59 | 
 60 |     /**
 61 |      * Constructs a {@code SliceByteArray} with the specified array, length, and index.
 62 |      *
 63 |      * @param array the byte array
 64 |      * @param length the length of the slice
 65 |      * @param idx the starting index of the slice
 66 |      * @throws NullPointerException if the provided array is null
 67 |      * @throws IllegalArgumentException if the provided length is negative
 68 |      * @throws NullPointerException if the provided index is negative
 69 |      */
 70 |     public SliceByteArray(byte[] array, int length, int idx) {
 71 |         if (array == null)
 72 |             throw new NullPointerException("The array cannot be null");
 73 |         if (length < 0)
 74 |             throw new IllegalArgumentException("The length cannot be negative");
 75 |         if (idx < 0)
 76 |             throw new NullPointerException("The index cannot be negative");
 77 | 
 78 |         this.array = array;
 79 |         this.length = length;
 80 |         this.index = idx;
 81 |     }
 82 | 
 83 |     @Override
 84 |     public boolean equals(Object o) {
 85 |         try {
 86 |             if (o == null)
 87 |                 return false;
 88 |             if (this == o)
 89 |                 return true;
 90 | 
 91 |             SliceByteArray sa = (SliceByteArray) o;
 92 |             return (this.array == sa.array) &&
 93 |                    (this.length == sa.length) &&
 94 |                    (this.index == sa.index);
 95 |         } catch (ClassCastException e) {
 96 |             return false;
 97 |         }
 98 |     }
 99 | 
100 |     @Override
101 |     public int hashCode() {
102 |         return Objects.hashCode(this.array);
103 |     }
104 | 
105 |     @Override
106 |     @SuppressWarnings("lgtm [java/print-array]")
107 |     public String toString() {
108 |         StringBuilder builder = new StringBuilder(100);
109 |         builder.append("[ data=");
110 |         builder.append(String.valueOf(this.array));
111 |         builder.append(", len=");
112 |         builder.append(this.length);
113 |         builder.append(", idx=");
114 |         builder.append(this.index);
115 |         builder.append("]");
116 |         return builder.toString();
117 |     }
118 | 
119 |     /**
120 |      * Validates the provided {@code SliceByteArray} instance.
121 |      *
122 |      * @param sa the {@code SliceByteArray} to validate
123 |      * @return {@code true} if the instance is valid, {@code false} otherwise
124 |      */
125 |     public static boolean isValid(SliceByteArray sa) {
126 |         if (sa == null)
127 |             return false;
128 |         if (sa.array == null)
129 |             return false;
130 |         if (sa.index < 0)
131 |             return false;
132 |         if (sa.length < 0)
133 |             return false;
134 | 
135 |         return (sa.index <= sa.array.length);
136 |     }
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/SliceIntArray.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi;
 17 | 
 18 | import java.util.Objects;
 19 | 
 20 | /**
 21 |  * A lightweight implementation of a slice for an integer array.
 22 |  *
 23 |  * <p>This class allows for managing a portion of an integer array, providing
 24 |  * a means to represent a subset of the array with a specified length and
 25 |  * starting index. This can be useful for efficiently handling integer data
 26 |  * without creating multiple copies.</p>
 27 |  */
 28 | public final class SliceIntArray {
 29 |     public int[] array; // array.length is the slice capacity
 30 |     public int index;
 31 |     public int length;
 32 | 
 33 |     /**
 34 |      * Constructs an empty {@code SliceIntArray} with a zero-length array.
 35 |      */
 36 |     public SliceIntArray() {
 37 |         this(new int[0], 0, 0);
 38 |     }
 39 | 
 40 |     /**
 41 |      * Constructs a {@code SliceIntArray} with the specified array and index.
 42 |      *
 43 |      * @param array the integer array
 44 |      * @param idx the starting index of the slice
 45 |      * @throws NullPointerException if the provided array is null
 46 |      * @throws NullPointerException if the provided index is negative
 47 |      */
 48 |     public SliceIntArray(int[] array, int idx) {
 49 |         if (array == null)
 50 |             throw new NullPointerException("The array cannot be null");
 51 |         if (idx < 0)
 52 |             throw new NullPointerException("The index cannot be negative");
 53 | 
 54 |         this.array = array;
 55 |         this.length = array.length;
 56 |         this.index = idx;
 57 |     }
 58 | 
 59 |     /**
 60 |      * Constructs a {@code SliceIntArray} with the specified array, length, and index.
 61 |      *
 62 |      * @param array the integer array
 63 |      * @param length the length of the slice
 64 |      * @param idx the starting index of the slice
 65 |      * @throws NullPointerException if the provided array is null
 66 |      * @throws IllegalArgumentException if the provided length is negative
 67 |      * @throws NullPointerException if the provided index is negative
 68 |      */
 69 |     public SliceIntArray(int[] array, int length, int idx) {
 70 |         if (array == null)
 71 |             throw new NullPointerException("The array cannot be null");
 72 |         if (length < 0)
 73 |             throw new IllegalArgumentException("The length cannot be negative");
 74 |         if (idx < 0)
 75 |             throw new NullPointerException("The index cannot be negative");
 76 | 
 77 |         this.array = array;
 78 |         this.length = length;
 79 |         this.index = idx;
 80 |     }
 81 | 
 82 |     @Override
 83 |     public boolean equals(Object o) {
 84 |         try {
 85 |             if (o == null)
 86 |                 return false;
 87 |             if (this == o)
 88 |                 return true;
 89 | 
 90 |             SliceIntArray sa = (SliceIntArray) o;
 91 |             return (this.array == sa.array) &&
 92 |                    (this.length == sa.length) &&
 93 |                    (this.index == sa.index);
 94 |         } catch (ClassCastException e) {
 95 |             return false;
 96 |         }
 97 |     }
 98 | 
 99 |     @Override
100 |     public int hashCode() {
101 |         return Objects.hashCode(this.array);
102 |     }
103 | 
104 |     @Override
105 |     @SuppressWarnings("lgtm [java/print-array]")
106 |     public String toString() {
107 |         StringBuilder builder = new StringBuilder(100);
108 |         builder.append("[ data=");
109 |         builder.append(String.valueOf(this.array));
110 |         builder.append(", len=");
111 |         builder.append(this.length);
112 |         builder.append(", idx=");
113 |         builder.append(this.index);
114 |         builder.append("]");
115 |         return builder.toString();
116 |     }
117 | 
118 |     /**
119 |      * Validates the provided {@code SliceIntArray} instance.
120 |      *
121 |      * @param sa the {@code SliceIntArray} to validate
122 |      * @return {@code true} if the instance is valid, {@code false} otherwise
123 |      */
124 |     public static boolean isValid(SliceIntArray sa) {
125 |         if (sa == null)
126 |             return false;
127 |         if (sa.array == null)
128 |             return false;
129 |         if (sa.index < 0)
130 |             return false;
131 |         if (sa.length < 0)
132 |             return false;
133 | 
134 |         return (sa.index <= sa.array.length);
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/app/InfoPrinter.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.app;
 17 | 
 18 | import io.github.flanglet.kanzi.Event;
 19 | import java.io.PrintStream;
 20 | import java.util.Map;
 21 | import java.util.concurrent.ConcurrentHashMap;
 22 | import io.github.flanglet.kanzi.Listener;
 23 | 
 24 | /**
 25 |  * The {@code InfoPrinter} class implements the {@code Listener} interface
 26 |  * and provides functionality to process events and print information
 27 |  * about encoding or decoding processes.
 28 |  */
 29 | public class InfoPrinter implements Listener
 30 | {
 31 |   /**
 32 |    * Enum representing the type of information to be printed.
 33 |    */
 34 |    public enum Type
 35 |    {
 36 |       /** Represents encoding information. */
 37 |       ENCODING,
 38 |       /** Represents decoding information. */
 39 |       DECODING
 40 |    }
 41 | 
 42 |    private final PrintStream ps;
 43 |    private final Map<Integer, BlockInfo> map;
 44 |    private final Event.Type[] thresholds;
 45 |    private final Type type;
 46 |    private final int level;
 47 | 
 48 | 
 49 |    /**
 50 |     * Constructs an {@code InfoPrinter} with the specified information level,
 51 |     * type, and output stream.
 52 |     *
 53 |     * @param infoLevel the level of information to be printed
 54 |     * @param type the type of information (encoding or decoding)
 55 |     * @param ps the {@code PrintStream} to which information will be printed
 56 |     */
 57 |    public InfoPrinter(int infoLevel, Type type, PrintStream ps)
 58 |    {
 59 |       if (ps == null)
 60 |          throw new NullPointerException("Invalid null print stream parameter");
 61 | 
 62 |       this.ps = ps;
 63 |       this.level = infoLevel;
 64 |       this.type = type;
 65 |       this.map = new ConcurrentHashMap<>();
 66 |       this.thresholds = (type == Type.ENCODING) ?
 67 |               new Event.Type[]
 68 |               {
 69 |                  Event.Type.COMPRESSION_START,
 70 |                  Event.Type.BEFORE_TRANSFORM,
 71 |                  Event.Type.AFTER_TRANSFORM,
 72 |                  Event.Type.BEFORE_ENTROPY,
 73 |                  Event.Type.AFTER_ENTROPY,
 74 |                  Event.Type.COMPRESSION_END
 75 |               } :
 76 |               new Event.Type[]
 77 |               {
 78 |                  Event.Type.DECOMPRESSION_START,
 79 |                  Event.Type.BEFORE_ENTROPY,
 80 |                  Event.Type.AFTER_ENTROPY,
 81 |                  Event.Type.BEFORE_TRANSFORM,
 82 |                  Event.Type.AFTER_TRANSFORM,
 83 |                  Event.Type.DECOMPRESSION_END
 84 |               };
 85 |    }
 86 | 
 87 | 
 88 |    /**
 89 |     * Processes an event and takes action based on the event type
 90 |     * and the thresholds defined for the {@code InfoPrinter}.
 91 |     *
 92 |     * @param evt the {@code Event} to be processed
 93 |     */
 94 |    @Override
 95 |    public void processEvent(Event evt)
 96 |    {
 97 |       int currentBlockId = evt.getId();
 98 | 
 99 |       if (evt.getType() == this.thresholds[1])
100 |       {
101 |          // Register initial block size
102 |          BlockInfo bi = new BlockInfo();
103 |          bi.time0 = evt.getTime();
104 |          bi.stage0Size = evt.getSize();
105 | 
106 |          this.map.put(currentBlockId, bi);
107 | 
108 |          if (this.level >= 5)
109 |          {
110 |             this.ps.println(evt);
111 |          }
112 |       }
113 |       else if (evt.getType() == this.thresholds[2])
114 |       {
115 |          BlockInfo bi = this.map.get(currentBlockId);
116 | 
117 |          if (bi == null)
118 |             return;
119 | 
120 |          bi.time1 = evt.getTime();
121 | 
122 |          if (this.level >= 5)
123 |          {
124 |             long duration_ms = (bi.time1 - bi.time0) / 1000000L;
125 |             this.ps.println(String.format("%s [%d ms]", evt, duration_ms));
126 |          }
127 |       }
128 |       else if (evt.getType() == this.thresholds[3])
129 |       {
130 |          BlockInfo bi = this.map.get(currentBlockId);
131 | 
132 |          if (bi == null)
133 |             return;
134 | 
135 |          bi.time2 = evt.getTime();
136 |          bi.stage1Size = evt.getSize();
137 | 
138 |          if (this.level >= 5)
139 |          {
140 |             long duration_ms = (bi.time2 - bi.time1) / 1000000L;
141 |             this.ps.println(String.format("%s [%d ms]", evt, duration_ms));
142 |          }
143 |       }
144 |       else if (evt.getType() == this.thresholds[4])
145 |       {
146 |          long stage2Size = evt.getSize();
147 |          BlockInfo bi = this.map.remove(currentBlockId);
148 | 
149 |          if ((bi == null) || (this.level < 3))
150 |             return;
151 | 
152 |          bi.time3 = evt.getTime();
153 |          long duration1_ms = (bi.time1 - bi.time0) / 1000000L;
154 |          long duration2_ms = (bi.time3 - bi.time2) / 1000000L;
155 |          StringBuilder msg = new StringBuilder();
156 | 
157 |          if (this.level >= 5)
158 |          {
159 |             this.ps.println(String.format("%s [%d ms]", evt, duration2_ms));
160 |          }
161 | 
162 |          // Display block info
163 |          if (this.level >= 4)
164 |          {
165 |             msg.append(String.format("Block %d: %d => %d [%d ms] => %d [%d ms]", currentBlockId,
166 |                     bi.stage0Size, bi.stage1Size, duration1_ms, stage2Size, duration2_ms));
167 | 
168 |             // Add compression ratio for encoding
169 |             if ((this.type == Type.ENCODING) && (bi.stage0Size != 0))
170 |                msg.append(String.format(" (%d%%)", (stage2Size*100L/bi.stage0Size)));
171 | 
172 |             // Optionally add hash
173 |             if (evt.getHashType() == Event.HashType.SIZE_32)
174 |                msg.append(String.format("  [%s]", Integer.toHexString((int) evt.getHash())));
175 |             else if (evt.getHashType() == Event.HashType.SIZE_64)
176 |                msg.append(String.format("  [%s]", Long.toHexString(evt.getHash())));
177 | 
178 |             this.ps.println(msg.toString());
179 |          }
180 |       }
181 |       else if ((evt.getType() == Event.Type.AFTER_HEADER_DECODING) && (this.level >= 3))
182 |       {
183 |          this.ps.println(evt);
184 |       }
185 |       else if (this.level >= 5)
186 |       {
187 |          this.ps.println(evt);
188 |       }
189 |    }
190 | 
191 | 
192 |    /**
193 |     * Inner class representing information about a specific block.
194 |     */
195 |    static class BlockInfo
196 |    {
197 |       long time0;
198 |       long time1;
199 |       long time2;
200 |       long time3;
201 |       long stage0Size;
202 |       long stage1Size;
203 |    }
204 | }
205 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/CMPredictor.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | import java.util.Arrays;
 19 | import java.util.Map;
 20 | import io.github.flanglet.kanzi.Predictor;
 21 | 
 22 | /**
 23 |  * <p>
 24 |  * Implementation of a Context Model based predictor.
 25 |  * This predictor estimates the probability of the next bit being 1 based on
 26 |  * a combination of different contexts and adaptive learning rates.
 27 |  * </p>
 28 |  *
 29 |  * <p>
 30 |  * It uses multiple probability counters that are updated based on the
 31 |  * actual decoded bit, allowing it to adapt to the characteristics of the
 32 |  * input data.
 33 |  * </p>
 34 |  */
 35 | public class CMPredictor implements Predictor {
 36 |    /**
 37 |     * The rate at which the fastest probability counter adapts.
 38 |     */
 39 |    private static final int FAST_RATE = 2;
 40 |    /**
 41 |     * The rate at which the medium probability counter adapts.
 42 |     */
 43 |    private static final int MEDIUM_RATE = 4;
 44 |    /**
 45 |     * The rate at which the slowest probability counter adapts.
 46 |     */
 47 |    private static final int SLOW_RATE = 6;
 48 |    /**
 49 |     * The scaling factor for probabilities, representing the maximum possible
 50 |     * probability value.
 51 |     */
 52 |    private static final int PSCALE = 65536;
 53 | 
 54 |    /**
 55 |     * The first context variable, derived from the previous bit.
 56 |     */
 57 |    private int c1;
 58 |    /**
 59 |     * The second context variable, derived from the bit before the previous one.
 60 |     */
 61 |    private int c2;
 62 |    /**
 63 |     * The current context, formed by previous bits.
 64 |     */
 65 |    private int ctx;
 66 |    /**
 67 |     * An index used for accessing probability counters.
 68 |     */
 69 |    private int idx;
 70 |    /**
 71 |     * A mask used to differentiate between run contexts.
 72 |     */
 73 |    private int runMask;
 74 |    /**
 75 |     * A 2D array of probability counters, used for general context modeling.
 76 |     * `counter1[i][j]` stores the probability for context `i` and sub-context `j`.
 77 |     */
 78 |    private final int[][] counter1;
 79 |    /**
 80 |     * A 2D array of probability counters, used for more specific context modeling.
 81 |     * `counter2[i][j]` stores the probability for context `i` and sub-context `j`.
 82 |     */
 83 |    private final int[][] counter2;
 84 |    /**
 85 |     * A flag indicating if the bitstream version is 3 or older, which affects
 86 |     * probability calculation.
 87 |     */
 88 |    private final boolean isBsVersion3;
 89 | 
 90 |    /**
 91 |     * Creates a new {@code CMPredictor}.
 92 |     * <p>
 93 |     * The predictor is initialized with default probability values and can be
 94 |     * configured with a context map to handle different bitstream versions.
 95 |     * </p>
 96 |     *
 97 |     * @param ctx A map containing context information for the predictor,
 98 |     *            e.g., "bsVersion" to specify the bitstream version.
 99 |     */
100 |    public CMPredictor(Map<String, Object> ctx) {
101 |       this.ctx = 1;
102 |       this.idx = 0;
103 |       this.counter1 = new int[256][257];
104 |       this.counter2 = new int[512][17];
105 | 
106 |       int bsVersion = 4;
107 | 
108 |       if (ctx != null)
109 |          bsVersion = (Integer) ctx.getOrDefault("bsVersion", 4);
110 | 
111 |       this.isBsVersion3 = bsVersion < 4;
112 | 
113 |       for (int i = 0; i < 256; i++) {
114 |          Arrays.fill(this.counter1[i], PSCALE >> 1);
115 | 
116 |          for (int j = 0; j < 16; j++) {
117 |             this.counter2[i + i][j] = j << 12;
118 |             this.counter2[i + i + 1][j] = j << 12;
119 |          }
120 | 
121 |          this.counter2[i + i][16] = (this.isBsVersion3 == true) ? 15 << 12 : 65535;
122 |          this.counter2[i + i + 1][16] = (this.isBsVersion3 == true) ? 15 << 12 : 65535;
123 |       }
124 |    }
125 | 
126 |    /**
127 |     * Updates the probability model based on the actual decoded bit.
128 |     * <p>
129 |     * The internal counters are adjusted based on the provided bit and adaptive
130 |     * learning rates.
131 |     * The context is also updated for the next prediction.
132 |     * </p>
133 |     *
134 |     * @param bit The actual bit that was decoded (0 or 1).
135 |     */
136 |    @Override
137 |    public void update(int bit) {
138 |       final int[] counter1_ = this.counter1[this.ctx];
139 |       final int[] counter2_ = this.counter2[this.ctx | this.runMask];
140 | 
141 |       if (bit == 0) {
142 |          counter1_[256] -= (counter1_[256] >> FAST_RATE);
143 |          counter1_[this.c1] -= (counter1_[this.c1] >> MEDIUM_RATE);
144 |          counter2_[this.idx] -= (counter2_[this.idx] >> SLOW_RATE);
145 |          counter2_[this.idx + 1] -= (counter2_[this.idx + 1] >> SLOW_RATE);
146 |          this.ctx += this.ctx;
147 |       } else {
148 |          counter1_[256] -= ((counter1_[256] - PSCALE + 16) >> FAST_RATE);
149 |          counter1_[this.c1] -= ((counter1_[this.c1] - PSCALE + 16) >> MEDIUM_RATE);
150 |          counter2_[this.idx] -= ((counter2_[this.idx] - PSCALE + 16) >> SLOW_RATE);
151 |          counter2_[this.idx + 1] -= ((counter2_[this.idx + 1] - PSCALE + 16) >> SLOW_RATE);
152 |          this.ctx += (this.ctx + 1);
153 |       }
154 | 
155 |       if (this.ctx > 255) {
156 |          this.c2 = this.c1;
157 |          this.c1 = this.ctx & 0xFF;
158 |          this.ctx = 1;
159 |          this.runMask = (this.c1 == this.c2) ? 0x100 : 0;
160 |       }
161 |    }
162 | 
163 |    /**
164 |     * Returns the predicted probability of the next bit being 1.
165 |     * <p>
166 |     * The prediction is an integer value in the range [0, 4095], representing the
167 |     * split point
168 |     * in a range coding scheme.
169 |     * </p>
170 |     *
171 |     * @return The predicted probability of the next bit being 1, scaled to [0,
172 |     *         4095].
173 |     */
174 |    @Override
175 |    public int get() {
176 |       final int[] pc1 = this.counter1[this.ctx];
177 |       final int p = (13 * (pc1[256] + pc1[this.c1]) + 6 * pc1[this.c2]) >> 5;
178 |       this.idx = p >>> 12;
179 |       final int[] pc2 = this.counter2[this.ctx | this.runMask];
180 |       final int x1 = pc2[this.idx];
181 |       final int x2 = pc2[this.idx + 1];
182 | 
183 |       if (this.isBsVersion3 == true) {
184 |          final int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
185 |          return (p + 3 * ssep + 32) >>> 6; // rescale to [0..4095]
186 |       }
187 | 
188 |       return (p + p + 3 * (x1 + x2) + 64) >>> 7; // rescale to [0..4095]
189 |    }
190 | }
191 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombDecoder.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi.entropy;
17 | 
18 | import io.github.flanglet.kanzi.EntropyDecoder;
19 | import io.github.flanglet.kanzi.InputBitStream;
20 | 
21 | // Exponential Golomb Coder
22 | public final class ExpGolombDecoder implements EntropyDecoder {
23 |    private final boolean signed;
24 |    private final InputBitStream bitstream;
25 | 
26 |    public ExpGolombDecoder(InputBitStream bitstream, boolean signed) {
27 |       if (bitstream == null)
28 |          throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter");
29 | 
30 |       this.signed = signed;
31 |       this.bitstream = bitstream;
32 |    }
33 | 
34 |    public boolean isSigned() {
35 |       return this.signed;
36 |    }
37 | 
38 |    public byte decodeByte() {
39 |       if (this.bitstream.readBit() == 1)
40 |          return 0;
41 | 
42 |       int log2 = 1;
43 | 
44 |       while (this.bitstream.readBit() == 0)
45 |          log2++;
46 | 
47 |       if (this.signed == true) {
48 |          // Decode signed: read value + sign
49 |          long res = this.bitstream.readBits(log2 + 1);
50 |          final long sgn = res & 1;
51 |          res = (res >>> 1) + (1 << log2) - 1;
52 |          return (byte) ((res - sgn) ^ -sgn); // res or -res
53 |       }
54 | 
55 |       // Decode unsigned
56 |       return (byte) ((1 << log2) - 1 + this.bitstream.readBits(log2));
57 |    }
58 | 
59 |    @Override
60 |    public InputBitStream getBitStream() {
61 |       return this.bitstream;
62 |    }
63 | 
64 |    @Override
65 |    /**
66 |     * Decodes a block of data by reading it directly from the bitstream.
67 |     * <p>
68 |     * This method reads {@code count} bytes from the bitstream into the provided
69 |     * {@code block} array.
70 |     * </p>
71 |     *
72 |     * @param block  The byte array to decode into.
73 |     * @param blkptr The starting position in the block.
74 |     * @param count  The number of bytes to decode.
75 |     * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid
76 |     *         parameters).
77 |     */
78 |    public int decode(byte[] block, int blkptr, int count) {
79 |       if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0))
80 |          return -1;
81 | 
82 |       final int end = blkptr + count;
83 | 
84 |       for (int i = blkptr; i < end; i++)
85 |          block[i] = this.decodeByte();
86 | 
87 |       return count;
88 |    }
89 | 
90 |    @Override
91 |    public void dispose() {
92 |    }
93 | }
94 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/ExpGolombEncoder.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | import io.github.flanglet.kanzi.EntropyEncoder;
 19 | import io.github.flanglet.kanzi.OutputBitStream;
 20 | 
 21 | /**
 22 |  * <p>
 23 |  * Implementation of an Exponential Golomb encoder.
 24 |  * <p>
 25 |  * This encoder supports both signed and unsigned encoding of byte values.
 26 |  * It uses a pre-computed cache for faster encoding of common values.
 27 |  */
 28 | public final class ExpGolombEncoder implements EntropyEncoder {
 29 |    private static final int[][] CACHE_VALUES = new int[][] {
 30 |          // Unsigned
 31 |          new int[] {
 32 |                513, 1538, 1539, 2564, 2565, 2566, 2567, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 4624,
 33 |                4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 5664,
 34 |                5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680,
 35 |                5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 6720,
 36 |                6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736,
 37 |                6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752,
 38 |                6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768,
 39 |                6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 7808,
 40 |                7809, 7808, 6783, 6782, 6781, 6780, 6779, 6778, 6777, 6776, 6775, 6774, 6773, 6772, 6771, 6770,
 41 |                6769, 6768, 6767, 6766, 6765, 6764, 6763, 6762, 6761, 6760, 6759, 6758, 6757, 6756, 6755, 6754,
 42 |                6753, 6752, 6751, 6750, 6749, 6748, 6747, 6746, 6745, 6744, 6743, 6742, 6741, 6740, 6739, 6738,
 43 |                6737, 6736, 6735, 6734, 6733, 6732, 6731, 6730, 6729, 6728, 6727, 6726, 6725, 6724, 6723, 6722,
 44 |                6721, 6720, 5695, 5694, 5693, 5692, 5691, 5690, 5689, 5688, 5687, 5686, 5685, 5684, 5683, 5682,
 45 |                5681, 5680, 5679, 5678, 5677, 5676, 5675, 5674, 5673, 5672, 5671, 5670, 5669, 5668, 5667, 5666,
 46 |                5665, 5664, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626,
 47 |                4625, 4624, 3599, 3598, 3597, 3596, 3595, 3594, 3593, 3592, 2567, 2566, 2565, 2564, 1539, 1538
 48 |          },
 49 |          // Signed
 50 |          new int[] {
 51 |                513, 2052, 2054, 3080, 3082, 3084, 3086, 4112, 4114, 4116, 4118, 4120, 4122, 4124, 4126, 5152,
 52 |                5154, 5156, 5158, 5160, 5162, 5164, 5166, 5168, 5170, 5172, 5174, 5176, 5178, 5180, 5182, 6208,
 53 |                6210, 6212, 6214, 6216, 6218, 6220, 6222, 6224, 6226, 6228, 6230, 6232, 6234, 6236, 6238, 6240,
 54 |                6242, 6244, 6246, 6248, 6250, 6252, 6254, 6256, 6258, 6260, 6262, 6264, 6266, 6268, 6270, 7296,
 55 |                7298, 7300, 7302, 7304, 7306, 7308, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326, 7328,
 56 |                7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350, 7352, 7354, 7356, 7358, 7360,
 57 |                7362, 7364, 7366, 7368, 7370, 7372, 7374, 7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392,
 58 |                7394, 7396, 7398, 7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422, 8448,
 59 |                8451, 8449, 7423, 7421, 7419, 7417, 7415, 7413, 7411, 7409, 7407, 7405, 7403, 7401, 7399, 7397,
 60 |                7395, 7393, 7391, 7389, 7387, 7385, 7383, 7381, 7379, 7377, 7375, 7373, 7371, 7369, 7367, 7365,
 61 |                7363, 7361, 7359, 7357, 7355, 7353, 7351, 7349, 7347, 7345, 7343, 7341, 7339, 7337, 7335, 7333,
 62 |                7331, 7329, 7327, 7325, 7323, 7321, 7319, 7317, 7315, 7313, 7311, 7309, 7307, 7305, 7303, 7301,
 63 |                7299, 7297, 6271, 6269, 6267, 6265, 6263, 6261, 6259, 6257, 6255, 6253, 6251, 6249, 6247, 6245,
 64 |                6243, 6241, 6239, 6237, 6235, 6233, 6231, 6229, 6227, 6225, 6223, 6221, 6219, 6217, 6215, 6213,
 65 |                6211, 6209, 5183, 5181, 5179, 5177, 5175, 5173, 5171, 5169, 5167, 5165, 5163, 5161, 5159, 5157,
 66 |                5155, 5153, 4127, 4125, 4123, 4121, 4119, 4117, 4115, 4113, 3087, 3085, 3083, 3081, 2055, 2053
 67 |          }
 68 |    };
 69 | 
 70 |    private final int[] cache;
 71 |    private final int signed;
 72 |    private final OutputBitStream bitstream;
 73 | 
 74 |    /**
 75 |     * Creates a new {@code ExpGolombEncoder}.
 76 |     *
 77 |     * @param bitstream The {@link OutputBitStream} to write the encoded data to.
 78 |     * @param signed    If {@code true}, the encoder will encode signed values;
 79 |     *                  otherwise, unsigned.
 80 |     * @throws NullPointerException if {@code bitstream} is {@code null}.
 81 |     */
 82 |    public ExpGolombEncoder(OutputBitStream bitstream, boolean signed) {
 83 |       if (bitstream == null)
 84 |          throw new NullPointerException("ExpGolomb codec: Invalid null bitstream parameter");
 85 | 
 86 |       this.signed = (signed == true) ? 1 : 0;
 87 |       // The cache stores pre-computed values for faster encoding.
 88 |       // CACHE_VALUES[0] is for unsigned encoding.
 89 |       // CACHE_VALUES[1] is for signed encoding.
 90 |       // Each value in the cache is a packed integer:
 91 |       // - The lower 9 bits (emit & 0x1FF) represent the value to write.
 92 |       // - The upper bits (emit >>> 9) represent the number of bits to write.
 93 |       this.cache = CACHE_VALUES[this.signed];
 94 |       this.bitstream = bitstream;
 95 |    }
 96 | 
 97 |    public boolean isSigned() {
 98 |       return this.signed == 1;
 99 |    }
100 | 
101 |    /**
102 |     * Encodes a block of data.
103 |     *
104 |     * @param block  The byte array containing the data to encode.
105 |     * @param blkptr The starting position in the block.
106 |     * @param count  The number of bytes to encode.
107 |     * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid
108 |     *         parameters).
109 |     */
110 |    @Override
111 |    public int encode(byte[] block, int blkptr, int count) {
112 |       if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0))
113 |          return -1;
114 | 
115 |       final int end = blkptr + count;
116 | 
117 |       for (int i = blkptr; i < end; i++)
118 |          this.encodeByte(block[i]);
119 | 
120 |       return count;
121 |    }
122 | 
123 |    public void encodeByte(byte val) {
124 |       if (val == 0) {
125 |          // shortcut when input is 0
126 |          this.bitstream.writeBit(1);
127 |          return;
128 |       }
129 | 
130 |       final int emit = this.cache[val & 0xFF];
131 |       this.bitstream.writeBits(emit & 0x1FF, emit >>> 9);
132 |    }
133 | 
134 |    @Override
135 |    public OutputBitStream getBitStream() {
136 |       return this.bitstream;
137 |    }
138 | 
139 |    @Override
140 |    public void dispose() {
141 |    }
142 | }
143 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/FPAQEncoder.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | import java.util.Arrays;
 19 | import io.github.flanglet.kanzi.EntropyEncoder;
 20 | import io.github.flanglet.kanzi.Memory;
 21 | import io.github.flanglet.kanzi.OutputBitStream;
 22 | import io.github.flanglet.kanzi.SliceByteArray;
 23 | 
 24 | /**
 25 |  * <p>
 26 |  * Implementation of an FPAQ encoder. This class is derived from fpaq0r by
 27 |  * Matt Mahoney and Alexander Ratushnyak, and is a simple (and fast) adaptive
 28 |  * entropy bit coder.
 29 |  * </p>
 30 |  *
 31 |  * <p>
 32 |  * It uses a range coding approach where the current range is updated based on
 33 |  * the predicted probability of the next bit. The prediction is based on a
 34 |  * context formed by previous bits.
 35 |  * </p>
 36 |  *
 37 |  * <p>
 38 |  * The encoding process involves updating the range and normalizing it by
 39 |  * writing
 40 |  * bits to an {@link OutputBitStream} when the range becomes too small.
 41 |  * </p>
 42 |  *
 43 |  * @see <a href="http://mattmahoney.net/dc/#fpaq0">fpaq0 by Matt Mahoney</a>
 44 |  */
 45 | public class FPAQEncoder implements EntropyEncoder {
 46 |    /**
 47 |     * The top value for the range, used in range coding.
 48 |     * This value defines the maximum possible range.
 49 |     */
 50 |    private static final long TOP = 0x00FFFFFFFFFFFFFFL;
 51 |    /**
 52 |     * A mask used to check if the most significant bits of the low and
 53 |     * (low + range) values are the same, indicating that bits can be
 54 |     * shifted out.
 55 |     */
 56 |    private static final long MASK_24_56 = 0x00FFFFFFFF000000L;
 57 |    /**
 58 |     * A mask used to keep the lower 24 bits of a long.
 59 |     */
 60 |    private static final long MASK_0_24 = 0x0000000000FFFFFFL;
 61 |    /**
 62 |     * A mask used to keep the lower 32 bits of a long.
 63 |     */
 64 |    private static final long MASK_0_32 = 0x00000000FFFFFFFFL;
 65 |    /**
 66 |     * The default chunk size for processing data.
 67 |     */
 68 |    private static final int DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
 69 |    /**
 70 |     * The maximum allowed block size.
 71 |     */
 72 |    private static final int MAX_BLOCK_SIZE = 1 << 30;
 73 |    /**
 74 |     * The scaling factor for probabilities.
 75 |     */
 76 |    private static final int PSCALE = 65536;
 77 | 
 78 |    /**
 79 |     * The lower bound of the current range.
 80 |     */
 81 |    private long low;
 82 |    /**
 83 |     * The upper bound of the current range.
 84 |     */
 85 |    private long high;
 86 |    /**
 87 |     * The output bitstream to which compressed data is written.
 88 |     */
 89 |    private final OutputBitStream bitstream;
 90 |    private boolean disposed;
 91 |    private SliceByteArray sba;
 92 |    private final int[][] probs; // probability of bit=1
 93 |    private int[] p; // pointer to current prob
 94 | 
 95 |    /**
 96 |     * Creates a new {@code FPAQEncoder}.
 97 |     *
 98 |     * @param bitstream The {@link OutputBitStream} to write compressed data to.
 99 |     * @throws NullPointerException if {@code bitstream} is {@code null}.
100 |     */
101 |    public FPAQEncoder(OutputBitStream bitstream) {
102 |       if (bitstream == null)
103 |          throw new NullPointerException("FPAQ codec: Invalid null bitstream parameter");
104 | 
105 |       this.low = 0L;
106 |       this.high = TOP;
107 |       this.bitstream = bitstream;
108 |       this.sba = new SliceByteArray(new byte[0], 0);
109 |       this.probs = new int[4][256];
110 |       this.p = this.probs[0];
111 | 
112 |       for (int i = 0; i < 4; i++)
113 |          Arrays.fill(this.probs[i], PSCALE >> 1);
114 |    }
115 | 
116 |    /**
117 |     * Encodes a block of data.
118 |     * <p>
119 |     * This method reads data from the provided byte array, encodes it using the
120 |     * FPAQ model, and writes the compressed data to the internal bitstream.
121 |     * </p>
122 |     *
123 |     * @param block  The byte array containing the data to encode.
124 |     * @param blkptr The starting position in the block.
125 |     * @param count  The number of bytes to encode.
126 |     * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid
127 |     *         parameters).
128 |     */
129 |    @Override
130 |    public int encode(byte[] block, int blkptr, int count) {
131 |       if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0) || (count > MAX_BLOCK_SIZE))
132 |          return -1;
133 | 
134 |       if (count == 0)
135 |          return 0;
136 | 
137 |       int startChunk = blkptr;
138 |       final int end = blkptr + count;
139 | 
140 |       // Split block into chunks, encode chunk and write bit array to bitstream
141 |       while (startChunk < end) {
142 |          final int chunkSize = Math.min(DEFAULT_CHUNK_SIZE, end - startChunk);
143 | 
144 |          if (this.sba.array.length < (chunkSize + (chunkSize >> 3)))
145 |             this.sba.array = new byte[chunkSize + (chunkSize >> 3)];
146 | 
147 |          this.sba.index = 0;
148 |          final int endChunk = startChunk + chunkSize;
149 |          this.p = this.probs[0];
150 | 
151 |          for (int i = startChunk; i < endChunk; i++) {
152 |             final byte val = block[i];
153 |             final int bits = (val & 0xFF) + 256;
154 |             this.encodeBit(val & 0x80, 1);
155 |             this.encodeBit(val & 0x40, bits >> 7);
156 |             this.encodeBit(val & 0x20, bits >> 6);
157 |             this.encodeBit(val & 0x10, bits >> 5);
158 |             this.encodeBit(val & 0x08, bits >> 4);
159 |             this.encodeBit(val & 0x04, bits >> 3);
160 |             this.encodeBit(val & 0x02, bits >> 2);
161 |             this.encodeBit(val & 0x01, bits >> 1);
162 |             this.p = this.probs[(val & 0xFF) >>> 6];
163 |          }
164 | 
165 |          EntropyUtils.writeVarInt(this.bitstream, this.sba.index);
166 |          this.bitstream.writeBits(this.sba.array, 0, 8 * this.sba.index);
167 |          startChunk += chunkSize;
168 | 
169 |          if (startChunk < end)
170 |             this.bitstream.writeBits(this.low | MASK_0_24, 56);
171 |       }
172 | 
173 |       return count;
174 |    }
175 | 
176 |    /**
177 |     * Encodes a single bit based on a given prediction.
178 |     * <p>
179 |     * The range is split according to the prediction, and the bit is encoded by
180 |     * updating the range. The probability model for the current context is then
181 |     * updated based on the encoded bit.
182 |     * </p>
183 |     */
184 |    private void encodeBit(int bit, int pIdx) {
185 |       // Calculate interval split
186 |       // Written in a way to maximize accuracy of multiplication/division
187 |       final long split = (((this.high - this.low) >>> 8) * this.p[pIdx]) >>> 8;
188 | 
189 |       // Update probabilities
190 |       if (bit == 0) {
191 |          this.low += (split + 1);
192 |          this.p[pIdx] -= (this.p[pIdx] >> 6);
193 |       } else {
194 |          this.high = this.low + split;
195 |          this.p[pIdx] -= ((this.p[pIdx] - PSCALE + 64) >> 6);
196 |       }
197 | 
198 |       // Write unchanged first 32 bits to bitstream
199 |       while (((this.low ^ this.high) & MASK_24_56) == 0)
200 |          this.flush();
201 |    }
202 | 
203 |    /**
204 |     * Flushes the current range to the bitstream.
205 |     * <p>
206 |     * This method is called when the range becomes too small and needs to be
207 |     * normalized. It writes the most significant bits of the range to the
208 |     * bitstream.
209 |     * </p>
210 |     */
211 |    private void flush() {
212 |       Memory.BigEndian.writeInt32(this.sba.array, this.sba.index, (int) (this.high >>> 24));
213 |       this.sba.index += 4;
214 |       this.low <<= 32;
215 |       this.high = (this.high << 32) | MASK_0_32;
216 |    }
217 | 
218 |    /**
219 |     * Returns the {@link OutputBitStream} used by this encoder.
220 |     *
221 |     * @return The {@link OutputBitStream}.
222 |     */
223 |    @Override
224 |    public OutputBitStream getBitStream() {
225 |       return this.bitstream;
226 |    }
227 | 
228 |    /**
229 |     * Disposes of any resources used by the encoder.
230 |     * <p>
231 |     * This method flushes any remaining bits in the range to the bitstream.
232 |     * </p>
233 |     */
234 |    @Override
235 |    public void dispose() {
236 |       if (this.disposed == true)
237 |          return;
238 | 
239 |       this.disposed = true;
240 |       this.bitstream.writeBits(this.low | MASK_0_24, 56);
241 |    }
242 | }
243 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/FastLogisticAdaptiveProbMap.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi.entropy;
16 | 
17 | import io.github.flanglet.kanzi.Global;
18 | 
19 | /**
20 |  * <p>
21 |  * Implementation of an Adaptive Probability Map (APM) with fast logistic
22 |  * function.
23 |  * This class maps a probability and a context into a new probability that the
24 |  * next bit will be 1.
25 |  * After each guess, it updates its state to improve future guesses.
26 |  * </p>
27 |  *
28 |  * <p>
29 |  * It uses a logistic function to squash the prediction and adapts its internal
30 |  * probabilities based on the actual bit observed and a learning rate.
31 |  * </p>
32 |  */
33 | /* package */ final class FastLogisticAdaptiveProbMap {
34 |    /**
35 |     * The index into the {@code data} array, representing the last probability and
36 |     * context.
37 |     */
38 |    private int index;
39 | 
40 |    /**
41 |     * The update rate for adapting probabilities. A smaller rate means faster
42 |     * adaptation.
43 |     */
44 |    private final int rate;
45 | 
46 |    /**
47 |     * The internal data array storing probabilities for different contexts.
48 |     * Each entry is a packed integer representing a probability.
49 |     */
50 |    private final int[] data;
51 | 
52 |    /**
53 |     * Creates a new {@code FastLogisticAdaptiveProbMap}.
54 |     *
55 |     * @param n    The number of contexts to support.
56 |     * @param rate The update rate for adapting probabilities.
57 |     */
58 |    FastLogisticAdaptiveProbMap(int n, int rate) {
59 |       this.data = new int[n * 32];
60 |       this.rate = rate;
61 | 
62 |       for (int j = 0; j < 32; j++) {
63 |          this.data[j] = Global.squash((j - 16) << 7) << 4;
64 |       }
65 | 
66 |       for (int i = 1; i < n; i++) {
67 |          System.arraycopy(this.data, 0, this.data, i * 32, 32);
68 |       }
69 |    }
70 | 
71 |    /**
72 |     * Returns an improved prediction given the current bit, prediction, and
73 |     * context.
74 |     *
75 |     * @param bit The actual bit observed (0 or 1).
76 |     * @param pr  The current prediction (probability of 1).
77 |     * @param ctx The current context.
78 |     * @return The improved prediction (probability of 1), scaled.
79 |     */
80 |    int get(int bit, int pr, int ctx) {
81 |       // Update probability based on error and learning rate
82 |       final int g = (-bit & 65528) + (bit << this.rate);
83 |       this.data[this.index] += ((g - this.data[this.index]) >> this.rate);
84 | 
85 |       // Find index: 32*ctx + quantized prediction in [0..32[
86 |       this.index = ((Global.STRETCH[pr] + 2048) >> 7) + (ctx << 5);
87 |       return (this.data[this.index]) >> 4;
88 |    }
89 | }
90 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/HuffmanCommon.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | /**
 19 |  * <p>
 20 |  * Utility class for common Huffman coding operations.
 21 |  * </p>
 22 |  */
 23 | public final class HuffmanCommon {
 24 |    /**
 25 |     * The logarithm base 2 of the maximum chunk size.
 26 |     */
 27 |    public static final int LOG_MAX_CHUNK_SIZE = 14;
 28 | 
 29 |    /**
 30 |     * The minimum chunk size for Huffman encoding/decoding.
 31 |     */
 32 |    public static final int MIN_CHUNK_SIZE = 1024;
 33 | 
 34 |    /**
 35 |     * The maximum chunk size for Huffman encoding/decoding.
 36 |     */
 37 |    public static final int MAX_CHUNK_SIZE = 1 << LOG_MAX_CHUNK_SIZE;
 38 | 
 39 |    /**
 40 |     * The maximum symbol size (number of bits) for Huffman codes in bitstream
 41 |     * version 3.
 42 |     */
 43 |    public static final int MAX_SYMBOL_SIZE_V3 = 14;
 44 | 
 45 |    /**
 46 |     * The maximum symbol size (number of bits) for Huffman codes in bitstream
 47 |     * version 4.
 48 |     */
 49 |    public static final int MAX_SYMBOL_SIZE_V4 = 12;
 50 | 
 51 |    /**
 52 |     * The size of the internal buffer used for sorting symbols.
 53 |     */
 54 |    private static final int BUFFER_SIZE = (MAX_SYMBOL_SIZE_V3 << 8) + 256;
 55 | 
 56 |    /**
 57 |     * Generates canonical Huffman codes based on the provided symbol sizes.
 58 |     * Symbols are sorted first by increasing size, then by increasing value.
 59 |     *
 60 |     * @param sizes         An array where `sizes[symbol]` stores the bit length of
 61 |     *                      the Huffman code for that symbol.
 62 |     * @param codes         An array where the generated canonical code for each
 63 |     *                      symbol will be stored.
 64 |     * @param symbols       An array containing the symbols to be processed. This
 65 |     *                      array will be sorted in place.
 66 |     * @param count         The number of symbols to process.
 67 |     * @param maxSymbolSize The maximum allowed bit length for any symbol's Huffman
 68 |     *                      code.
 69 |     * @return The number of codes generated (which should be equal to `count`), or
 70 |     *         -1 if an error occurs
 71 |     *         (e.g., invalid symbol or code size).
 72 |     */
 73 |    public static int generateCanonicalCodes(short[] sizes, int[] codes, int[] symbols,
 74 |          int count, final int maxSymbolSize) {
 75 |       // Sort symbols by increasing size (first key) and increasing value (second key)
 76 |       if (count > 1) {
 77 |          byte[] buf = new byte[BUFFER_SIZE];
 78 | 
 79 |          for (int i = 0; i < count; i++) {
 80 |             final int s = symbols[i];
 81 | 
 82 |             if (((s & 0xFF) != s) || (sizes[s] > maxSymbolSize))
 83 |                return -1;
 84 | 
 85 |             buf[((sizes[s] - 1) << 8) | s] = 1;
 86 |          }
 87 | 
 88 |          int n = 0;
 89 | 
 90 |          for (int i = 0; i < BUFFER_SIZE; i++) {
 91 |             if (buf[i] == 0)
 92 |                continue;
 93 | 
 94 |             symbols[n++] = i & 0xFF;
 95 | 
 96 |             if (n == count)
 97 |                break;
 98 |          }
 99 |       }
100 | 
101 |       int code = 0;
102 |       int curLen = sizes[symbols[0]];
103 | 
104 |       for (int i = 0; i < count; i++) {
105 |          final int s = symbols[i];
106 |          code <<= (sizes[s] - curLen);
107 |          curLen = sizes[s];
108 |          codes[s] = code;
109 |          code++;
110 |       }
111 | 
112 |       return count;
113 |    }
114 | }
115 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/LinearAdaptiveProbMap.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi.entropy;
16 | 
17 | /**
18 |  * <p>
19 |  * Implementation of an Adaptive Probability Map (APM) with linear
20 |  * interpolation.
21 |  * This class maps a probability and a context into a new probability that the
22 |  * next bit will be 1. After each guess, it updates its state to improve future
23 |  * guesses.
24 |  * </p>
25 |  *
26 |  * <p>
27 |  * It uses linear interpolation to squash the prediction and adapts its internal
28 |  * probabilities based on the actual bit observed and a learning rate.
29 |  * </p>
30 |  */
31 | /* package */ final class LinearAdaptiveProbMap {
32 |    /**
33 |     * The index into the {@code data} array, representing the last probability and
34 |     * context.
35 |     */
36 |    private int index;
37 | 
38 |    /**
39 |     * The update rate for adapting probabilities. A smaller rate means faster
40 |     * adaptation.
41 |     */
42 |    private final int rate;
43 | 
44 |    /**
45 |     * The internal data array storing probabilities for different contexts.
46 |     * Each entry is a packed integer representing a probability.
47 |     */
48 |    private final int[] data;
49 | 
50 |    /**
51 |     * Creates a new {@code LinearAdaptiveProbMap}.
52 |     *
53 |     * @param n    The number of contexts to support.
54 |     * @param rate The update rate for adapting probabilities.
55 |     */
56 |    LinearAdaptiveProbMap(int n, int rate) {
57 |       final int size = (n == 0) ? 65 : n * 65;
58 |       this.data = new int[size];
59 |       this.rate = rate;
60 | 
61 |       for (int j = 0; j <= 64; j++)
62 |          this.data[j] = (j << 6) << 4;
63 | 
64 |       for (int i = 1; i < n; i++)
65 |          System.arraycopy(this.data, 0, this.data, i * 65, 65);
66 |    }
67 | 
68 |    /**
69 |     * Returns an improved prediction given the current bit, prediction, and
70 |     * context.
71 |     *
72 |     * @param bit The actual bit observed (0 or 1).
73 |     * @param pr  The current prediction (probability of 1).
74 |     * @param ctx The current context.
75 |     * @return The improved prediction (probability of 1), scaled.
76 |     */
77 |    int get(int bit, int pr, int ctx) {
78 |       // Update probability based on error and learning rate
79 |       final int g = (-bit & 65528) + (bit << this.rate);
80 |       this.data[this.index] += ((g - this.data[this.index]) >> this.rate);
81 |       this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate);
82 | 
83 |       // Find index: 65*ctx + quantized prediction in [0..64]
84 |       this.index = (pr >> 6) + (ctx << 6) + ctx;
85 | 
86 |       // Return interpolated probability
87 |       final int w = pr & 127;
88 |       return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11;
89 |    }
90 | }
91 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/LogisticAdaptiveProbMap.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | package io.github.flanglet.kanzi.entropy;
16 | 
17 | import io.github.flanglet.kanzi.Global;
18 | 
19 | /**
20 |  * <p>
21 |  * Implementation of an Adaptive Probability Map (APM) with logistic function.
22 |  * This class maps a probability and a context into a new probability that the
23 |  * next bit will be 1. After each guess, it updates its state to improve future
24 |  * guesses.
25 |  * </p>
26 |  *
27 |  * <p>
28 |  * It uses a logistic function to squash the prediction and adapts its internal
29 |  * probabilities based on the actual bit observed and a learning rate.
30 |  * </p>
31 |  */
32 | /* package */ final class LogisticAdaptiveProbMap {
33 |    /**
34 |     * The index into the {@code data} array, representing the last probability and
35 |     * context.
36 |     */
37 |    private int index;
38 | 
39 |    /**
40 |     * The update rate for adapting probabilities. A smaller rate means faster
41 |     * adaptation.
42 |     */
43 |    private final int rate;
44 | 
45 |    /**
46 |     * The internal data array storing probabilities for different contexts.
47 |     * Each entry is a packed integer representing a probability.
48 |     */
49 |    private final int[] data;
50 | 
51 |    /**
52 |     * Creates a new {@code LogisticAdaptiveProbMap}.
53 |     *
54 |     * @param n    The number of contexts to support.
55 |     * @param rate The update rate for adapting probabilities.
56 |     */
57 | 
58 |    LogisticAdaptiveProbMap(int n, int rate) {
59 |       final int size = (n == 0) ? 33 : n * 33;
60 |       this.data = new int[size];
61 |       this.rate = rate;
62 | 
63 |       for (int j = 0; j <= 32; j++)
64 |          this.data[j] = Global.squash((j - 16) << 7) << 4;
65 | 
66 |       for (int i = 1; i < n; i++)
67 |          System.arraycopy(this.data, 0, this.data, i * 33, 33);
68 |    }
69 | 
70 |    /**
71 |     * Returns an improved prediction given the current bit, prediction, and
72 |     * context.
73 |     *
74 |     * @param bit The actual bit observed (0 or 1).
75 |     * @param pr  The current prediction (probability of 1).
76 |     * @param ctx The current context.
77 |     * @return The improved prediction (probability of 1), scaled.
78 |     */
79 |    int get(int bit, int pr, int ctx) {
80 |       // Update probability based on error and learning rate
81 |       final int g = (-bit & 65528) + (bit << this.rate);
82 |       this.data[this.index] += ((g - this.data[this.index]) >> this.rate);
83 |       this.data[this.index + 1] += ((g - this.data[this.index + 1]) >> this.rate);
84 |       pr = Global.STRETCH[pr];
85 | 
86 |       // Find index: 33*ctx + quantized prediction in [0..32]
87 |       this.index = ((pr + 2048) >> 7) + (ctx << 5) + ctx;
88 | 
89 |       // Return interpolated probability
90 |       final int w = pr & 127;
91 |       return (this.data[this.index] * (128 - w) + this.data[this.index + 1] * w) >> 11;
92 |    }
93 | }
94 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyDecoder.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | import io.github.flanglet.kanzi.EntropyDecoder;
 19 | import io.github.flanglet.kanzi.InputBitStream;
 20 | 
 21 | 
 22 | /**
 23 |  * <p>Null entropy decoder.
 24 |  * This decoder does not perform any actual decompression; it simply reads
 25 |  * the data directly from the provided {@link InputBitStream}.</p>
 26 |  *
 27 |  * <p>It acts as a pass-through mechanism, useful when no entropy coding
 28 |  * is applied to the data, or when the data is already in its final form.</p>
 29 |  */
 30 | public final class NullEntropyDecoder implements EntropyDecoder
 31 | {
 32 |    private final InputBitStream bitstream;
 33 | 
 34 | 
 35 |    /**
 36 |     * Creates a new {@code NullEntropyDecoder}.
 37 |     *
 38 |     * @param bitstream The {@link InputBitStream} to read data from.
 39 |     * @throws NullPointerException if {@code bitstream} is {@code null}.
 40 |     */
 41 |    public NullEntropyDecoder(InputBitStream bitstream)
 42 |    {
 43 |       if (bitstream == null)
 44 |          throw new NullPointerException("Invalid null bitstream parameter");
 45 | 
 46 |       this.bitstream = bitstream;
 47 |    }
 48 | 
 49 |    /**
 50 |     * Decodes a block of data by reading it directly from the bitstream.
 51 |     * <p>
 52 |     * This method reads {@code count} bytes from the bitstream into the provided {@code block} array.
 53 |     * </p>
 54 |     * @param block The byte array to decode into.
 55 |     * @param blkptr The starting position in the block.
 56 |     * @param count The number of bytes to decode.
 57 |     * @return The number of bytes decoded, or -1 if an error occurs (e.g., invalid parameters).
 58 |     */
 59 |    @Override
 60 |    public int decode(byte[] block, int blkptr, int count)
 61 |    {
 62 |       if ((block == null) || (blkptr + count > block.length) || (blkptr < 0) || (count < 0))
 63 |          return -1;
 64 | 
 65 |       int res = 0;
 66 | 
 67 |       while (count > 0)
 68 |       {
 69 |          final int ckSize = (count < 1<<23) ? count : 1<<23;
 70 |          res += (this.bitstream.readBits(block, blkptr, 8*ckSize) >> 3);
 71 |          blkptr += ckSize;
 72 |          count -= ckSize;
 73 |       }
 74 | 
 75 |       return res;
 76 |    }
 77 | 
 78 |    /**
 79 |     * Decodes a single byte by reading it directly from the bitstream.
 80 |     * @return The decoded byte.
 81 |     */
 82 |    public byte decodeByte()
 83 |    {
 84 |      return (byte) this.bitstream.readBits(8);
 85 |    }
 86 | 
 87 |    /**
 88 |     * Returns the {@link InputBitStream} used by this decoder.
 89 |     * @return The {@link InputBitStream}.
 90 |     */
 91 |    @Override
 92 |    public InputBitStream getBitStream()
 93 |    {
 94 |      return this.bitstream;
 95 |    }
 96 | 
 97 |    /**
 98 |     * Disposes of any resources used by the decoder.
 99 |     * This method currently does nothing as there are no specific resources to release.
100 |     */
101 |    @Override
102 |    public void dispose()
103 |    {
104 |    }
105 | }
106 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/entropy/NullEntropyEncoder.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.entropy;
 17 | 
 18 | import io.github.flanglet.kanzi.EntropyEncoder;
 19 | import io.github.flanglet.kanzi.OutputBitStream;
 20 | 
 21 | 
 22 | /**
 23 |  * <p>Null entropy encoder.
 24 |  * This encoder does not perform any actual compression; it simply writes
 25 |  * the data directly to the provided {@link OutputBitStream}.</p>
 26 |  *
 27 |  * <p>It acts as a pass-through mechanism, useful when no entropy coding
 28 |  * is applied to the data, or when the data is already in its final form.</p>
 29 |  */
 30 | public final class NullEntropyEncoder implements EntropyEncoder
 31 | {
 32 |    private final OutputBitStream bitstream;
 33 | 
 34 | 
 35 |  /**
 36 |  * Creates a new {@code NullEntropyEncoder}.
 37 |  * @param bitstream The {@link OutputBitStream} to write data to.
 38 |  * @throws NullPointerException if {@code bitstream} is {@code null}.
 39 |  */
 40 |    public NullEntropyEncoder(OutputBitStream bitstream)
 41 |    {
 42 |       if (bitstream == null)
 43 |          throw new NullPointerException("Invalid null bitstream parameter");
 44 | 
 45 |       this.bitstream = bitstream;
 46 |    }
 47 | 
 48 | 
 49 |  /**
 50 |  * Encodes a block of data by writing it directly to the bitstream.
 51 |  * <p>
 52 |  * This method writes {@code count} bytes from the provided {@code block} array to the bitstream.
 53 |  * </p>
 54 |  * @param block The byte array containing the data to encode.
 55 |  * @param blkptr The starting position in the block.
 56 |  * @param count The number of bytes to encode.
 57 |  * @return The number of bytes encoded, or -1 if an error occurs (e.g., invalid parameters).
 58 |  */
 59 |    @Override
 60 |    public int encode(byte[] block, int blkptr, int count)
 61 |    {
 62 |       if ((block == null) || (blkptr+count > block.length) || (blkptr < 0) || (count < 0))
 63 |          return -1;
 64 | 
 65 |       int res = 0;
 66 | 
 67 |       while (count > 0)
 68 |       {
 69 |          final int ckSize = (count < 1<<23) ? count : 1<<23;
 70 |          res += (this.bitstream.writeBits(block, blkptr, 8*ckSize) >> 3);
 71 |          blkptr += ckSize;
 72 |          count -= ckSize;
 73 |       }
 74 | 
 75 |       return res;
 76 |    }
 77 | 
 78 | 
 79 |  /**
 80 |  * Encodes a single byte by writing it directly to the bitstream.
 81 |  * @param val The byte to encode.
 82 |  */
 83 |    public void encodeByte(byte val)
 84 |    {
 85 |       this.bitstream.writeBits(val, 8);
 86 |    }
 87 | 
 88 | 
 89 |  /**
 90 |  * Returns the {@link OutputBitStream} used by this encoder.
 91 |  * @return The {@link OutputBitStream}.
 92 |  */
 93 |    @Override
 94 |    public OutputBitStream getBitStream()
 95 |    {
 96 |       return this.bitstream;
 97 |    }
 98 | 
 99 | 
100 |  /**
101 |  * Disposes of any resources used by the encoder.
102 |  * This method currently does nothing as there are no specific resources to release.
103 |  */
104 |    @Override
105 |    public void dispose()
106 |    {
107 |    }
108 | }
109 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/io/IOException.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi.io;
17 | 
18 | 
19 | /**
20 |  * Custom exception class that extends {@link java.io.IOException}.
21 |  * This exception includes an error code to provide more specific information
22 |  * about the nature of the I/O error that occurred.
23 |  */
24 | public class IOException extends java.io.IOException {
25 |     private static final long serialVersionUID = -9153775235137373283L;
26 | 
27 |     private final int code;
28 | 
29 |     /**
30 |      * Constructs a new {@code IOException} with the specified detail message 
31 |      * and error code.
32 |      *
33 |      * @param msg the detail message explaining the reason for the exception
34 |      * @param code an integer error code that provides additional context about the error
35 |      */
36 |     public IOException(String msg, int code) {
37 |         super(msg);
38 |         this.code = code;
39 |     }
40 | 
41 |     /**
42 |      * Returns the error code associated with this exception.
43 |      *
44 |      * @return the error code indicating the type of I/O error
45 |      */
46 |     public int getErrorCode() {
47 |         return this.code;
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/io/IOUtil.java:
--------------------------------------------------------------------------------
  1 | /*                                                                                                                                       Copyright 2011-2025 Frederic Langlet
  2 | Licensed under the Apache License, Version 2.0 (the "License");
  3 | you may not use this file except in compliance with the License.
  4 | you may obtain a copy of the License at
  5 | 
  6 |                 http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software
  9 | distributed under the License is distributed on an "AS IS" BASIS,
 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | See the License for the specific language governing permissions and
 12 | limitations under the License.
 13 | */
 14 | 
 15 | package io.github.flanglet.kanzi.io;
 16 | 
 17 | import java.io.File;
 18 | import java.io.IOException;
 19 | import java.nio.file.DirectoryIteratorException;
 20 | import java.nio.file.DirectoryStream;
 21 | import java.nio.file.Files;
 22 | import java.nio.file.Path;
 23 | import java.nio.file.Paths;
 24 | import java.util.List;
 25 | 
 26 | 
 27 | /**
 28 |  * Utility class for performing I/O operations related to file management.
 29 |  */
 30 | public class IOUtil {
 31 | 
 32 |     /**
 33 |      * Creates a list of files from the specified target path. The method can
 34 |      * traverse directories recursively and can ignore symbolic links and
 35 |      * dot files based on the provided flags.
 36 |      *
 37 |      * @param target          the target path from which to list files
 38 |      * @param files           the list to populate with found file paths
 39 |      * @param isRecursive     flag indicating whether to search directories recursively
 40 |      * @param ignoreLinks     flag indicating whether to ignore symbolic links
 41 |      * @param ignoreDotFiles  flag indicating whether to ignore dot files (files starting with a dot)
 42 |      * @throws IOException if an I/O error occurs or the target path is invalid
 43 |      */
 44 |     public static void createFileList(String target, List<Path> files, boolean isRecursive,
 45 |                                       boolean ignoreLinks, boolean ignoreDotFiles) throws IOException {
 46 |         if (target == null)
 47 |             return;
 48 | 
 49 |         Path root = Paths.get(target);
 50 | 
 51 |         if (!Files.exists(root))
 52 |             throw new IOException("Cannot access input file '" + root + "'");
 53 | 
 54 |         if (Files.isRegularFile(root) && Files.isHidden(root))
 55 |             throw new IOException("Cannot access input file '" + root + "'");
 56 | 
 57 |         if (Files.isRegularFile(root)) {
 58 |             if (!ignoreLinks || !Files.isSymbolicLink(root))
 59 |                 files.add(root);
 60 |             return;
 61 |         }
 62 | 
 63 |         // If not a regular file and not a directory (possibly a link?), fail
 64 |         if (!Files.isDirectory(root))
 65 |             throw new IOException("Invalid file type '" + root + "'");
 66 | 
 67 |         if (ignoreDotFiles) {
 68 |             String name = root.toString();
 69 |             int idx = name.lastIndexOf(File.separator);
 70 | 
 71 |             if (idx > 0) {
 72 |                 name = name.substring(idx + 1);
 73 |                 if (name.charAt(0) == '.')
 74 |                     return;
 75 |             }
 76 |         }
 77 | 
 78 |         try (DirectoryStream<Path> stream = Files.newDirectoryStream(root)) {
 79 |             for (Path entry : stream) {
 80 |                 if (!Files.exists(entry))
 81 |                     continue;
 82 | 
 83 |                 if (Files.isRegularFile(entry)) {
 84 |                     if (ignoreDotFiles) {
 85 |                         String name = entry.toString();
 86 |                         int idx = name.lastIndexOf(File.separator);
 87 | 
 88 |                         if (idx > 0) {
 89 |                             name = name.substring(idx + 1);
 90 |                             if (name.charAt(0) == '.')
 91 |                                 continue;
 92 |                         }
 93 |                     }
 94 | 
 95 |                     if (!ignoreLinks || !Files.isSymbolicLink(entry))
 96 |                         files.add(entry);
 97 |                 } else if (isRecursive && Files.isDirectory(entry)) {
 98 |                     createFileList(entry.toString(), files, isRecursive, ignoreLinks, ignoreDotFiles);
 99 |                 }
100 |             }
101 |         } catch (DirectoryIteratorException e) {
102 |             throw e.getCause();
103 |         }
104 |     }
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/io/NullOutputStream.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi.io;
17 | 
18 | import java.io.OutputStream;
19 | 
20 | 
21 | /**
22 |  * An output stream that discards all data written to it.
23 |  * This is useful for situations where you need an output stream
24 |  * but do not want to actually output any data.
25 |  */
26 | public class NullOutputStream extends OutputStream {
27 | 
28 |     /**
29 |      * Writes the specified byte to this output stream.
30 |      * This implementation does not perform any action.
31 |      *
32 |      * @param b the byte to be written
33 |      */
34 |     @Override
35 |     public void write(int b) {
36 |         // No operation performed
37 |     }
38 | 
39 |     /**
40 |      * Writes len bytes from the specified byte array starting at
41 |      * offset offs to this output stream. This implementation does
42 |      * not perform any action.
43 |      *
44 |      * @param b    the byte array containing the data to be written
45 |      * @param offs the start offset in the data
46 |      * @param len  the number of bytes to write
47 |      */
48 |     @Override
49 |     public void write(byte[] b, int offs, int len) {
50 |         // No operation performed
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/module-info.java:
--------------------------------------------------------------------------------
 1 | package io.github.flanglet.kanzi;
 2 | 
 3 | //module kanzi {
 4 | //  exports kanzi;
 5 | //  exports kanzi.app;
 6 | //  exports kanzi.bitstream;
 7 | //  exports kanzi.io;
 8 | //  exports kanzi.entropy;
 9 | //  exports kanzi.function;
10 | //  exports kanzi.transform;
11 | //  exports kanzi.util;
12 | //}


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/transform/BWTBlockCodec.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.transform;
 17 | 
 18 | import java.util.Map;
 19 | import io.github.flanglet.kanzi.ByteTransform;
 20 | import io.github.flanglet.kanzi.Global;
 21 | import io.github.flanglet.kanzi.SliceByteArray;
 22 | 
 23 | 
 24 | /**
 25 |  * Utility class to encode and decode a BWT data block and its associated primary index(es).
 26 |  * <p>
 27 |  * BWT stream format: Header (mode + primary index(es)) | Data (n bytes)
 28 |  * <ul>
 29 |  *   <li>mode (8 bits): xxxyyyzz</li>
 30 |  *   <li>xxx: ignored</li>
 31 |  *   <li>yyy: log(chunks)</li>
 32 |  *   <li>zz: primary index size - 1 (in bytes)</li>
 33 |  *   <li>primary indexes (chunks * (8|16|24|32 bits))</li>
 34 |  * </ul>
 35 |  */
 36 | public class BWTBlockCodec implements ByteTransform {
 37 |     private static final int BWT_MAX_HEADER_SIZE = 8 * 4;
 38 | 
 39 |     private final BWT bwt;
 40 |     private final int bsVersion;
 41 | 
 42 |     /**
 43 |      * Default constructor.
 44 |      */
 45 |     public BWTBlockCodec() {
 46 |         this.bwt = new BWT();
 47 |         this.bsVersion = 6;
 48 |     }
 49 | 
 50 |     /**
 51 |      * Constructor with a context map.
 52 |      *
 53 |      * @param ctx the context map
 54 |      */
 55 |     public BWTBlockCodec(Map<String, Object> ctx) {
 56 |         this.bwt = new BWT(ctx);
 57 |         this.bsVersion = (ctx == null) ? 6 : (int) ctx.getOrDefault("bsVersion", 6);
 58 |     }
 59 | 
 60 |     /**
 61 |      * Performs the forward transform, encoding the input data.
 62 |      *
 63 |      * @param input  the input byte array
 64 |      * @param output the output byte array
 65 |      * @return true if the transform was successful, false otherwise
 66 |      */
 67 |     @Override
 68 |     public boolean forward(SliceByteArray input, SliceByteArray output) {
 69 |         if (input.length == 0)
 70 |             return true;
 71 | 
 72 |         if (input.array == output.array)
 73 |             return false;
 74 | 
 75 |         final int blockSize = input.length;
 76 | 
 77 |         if (output.length - output.index < getMaxEncodedLength(blockSize))
 78 |             return false;
 79 | 
 80 |         int logBlockSize = Global.log2(blockSize);
 81 | 
 82 |         if ((blockSize & (blockSize - 1)) != 0)
 83 |             logBlockSize++;
 84 | 
 85 |         final int pIndexSize = (logBlockSize + 7) >> 3;
 86 | 
 87 |         if ((pIndexSize <= 0) || (pIndexSize >= 5))
 88 |             return false;
 89 | 
 90 |         final int chunks = BWT.getBWTChunks(blockSize);
 91 |         final int logNbChunks = Global.log2(chunks);
 92 | 
 93 |         if (logNbChunks > 7)
 94 |             return false;
 95 | 
 96 |         int idx0 = output.index;
 97 |         output.index += (1 + chunks * pIndexSize);
 98 | 
 99 |         // Apply forward transform
100 |         if (!this.bwt.forward(input, output))
101 |             return false;
102 | 
103 |         final byte mode = (byte) ((logNbChunks << 2) | (pIndexSize - 1));
104 | 
105 |         // Emit header
106 |         for (int i = 0, idx = idx0 + 1; i < chunks; i++) {
107 |             final int primaryIndex = this.bwt.getPrimaryIndex(i) - 1;
108 |             int shift = (pIndexSize - 1) << 3;
109 | 
110 |             while (shift >= 0) {
111 |                 output.array[idx++] = (byte) (primaryIndex >> shift);
112 |                 shift -= 8;
113 |             }
114 |         }
115 | 
116 |         output.array[idx0] = mode;
117 |         return true;
118 |     }
119 | 
120 |     /**
121 |      * Performs the inverse transform, decoding the input data.
122 |      *
123 |      * @param input  the input byte array
124 |      * @param output the output byte array
125 |      * @return true if the transform was successful, false otherwise
126 |      */
127 |     @Override
128 |     public boolean inverse(SliceByteArray input, SliceByteArray output) {
129 |         if (input.length == 0)
130 |             return true;
131 | 
132 |         if (input.array == output.array)
133 |             return false;
134 | 
135 |         final int blockSize = input.length;
136 | 
137 |         if (this.bsVersion > 5) {
138 |             // Number of chunks and primary index size in bitstream since bsVersion 6
139 |             byte mode = input.array[input.index++];
140 |             final int logNbChunks = (mode >> 2) & 0x07;
141 |             final int pIndexSize = (mode & 0x03) + 1;
142 |             final int chunks = 1 << logNbChunks;
143 |             final int headerSize = 1 + chunks * pIndexSize;
144 | 
145 |             if (blockSize < headerSize)
146 |                 return false;
147 | 
148 |             if (chunks != BWT.getBWTChunks(blockSize-headerSize))
149 |                 return false;
150 | 
151 |             // Read header
152 |             for (int i = 0; i < chunks; i++) {
153 |                 int shift = (pIndexSize - 1) << 3;
154 |                 int primaryIndex = 0;
155 | 
156 |                 // Extract BWT primary index
157 |                 while (shift >= 0) {
158 |                     primaryIndex = (primaryIndex << 8) | (input.array[input.index++] & 0xFF);
159 |                     shift -= 8;
160 |                 }
161 | 
162 |                 if (!this.bwt.setPrimaryIndex(i, primaryIndex + 1))
163 |                     return false;
164 |             }
165 | 
166 |             input.length = blockSize - headerSize;
167 |         } else {
168 |             final int chunks = BWT.getBWTChunks(blockSize);
169 | 
170 |             for (int i = 0; i < chunks; i++) {
171 |                 // Read block header (mode + primary index). See top of file for format
172 |                 final int blockMode = input.array[input.index++] & 0xFF;
173 |                 final int pIndexSizeBytes = 1 + ((blockMode >>> 6) & 0x03);
174 | 
175 |                 if (input.length < pIndexSizeBytes)
176 |                     return false;
177 | 
178 |                 input.length -= pIndexSizeBytes;
179 |                 int shift = (pIndexSizeBytes - 1) << 3;
180 |                 int primaryIndex = (blockMode & 0x3F) << shift;
181 | 
182 |                 // Extract BWT primary index
183 |                 for (int n = 1; n < pIndexSizeBytes; n++) {
184 |                     shift -= 8;
185 |                     primaryIndex |= ((input.array[input.index++] & 0xFF) << shift);
186 |                 }
187 | 
188 |                 if (!this.bwt.setPrimaryIndex(i, primaryIndex))
189 |                     return false;
190 |             }
191 |         }
192 | 
193 |         // Apply inverse transform
194 |         return this.bwt.inverse(input, output);
195 |     }
196 | 
197 |     /**
198 |      * Returns the maximum encoded length, which includes the header size.
199 |      *
200 |      * @param srcLen the source length
201 |      * @return the maximum encoded length
202 |      */
203 |     @Override
204 |     public int getMaxEncodedLength(int srcLen) {
205 |         return srcLen + BWT_MAX_HEADER_SIZE;
206 |     }
207 | }
208 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/transform/NullTransform.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.transform;
 17 | 
 18 | import java.util.Map;
 19 | import io.github.flanglet.kanzi.ByteTransform;
 20 | import io.github.flanglet.kanzi.SliceByteArray;
 21 | 
 22 | 
 23 | /**
 24 |  * NullTransform is a no-op transform that simply copies the input to the output
 25 |  * without performing any modifications.
 26 |  */
 27 | public class NullTransform implements ByteTransform {
 28 | 
 29 |     /**
 30 |      * Default constructor.
 31 |      */
 32 |     public NullTransform() {
 33 |     }
 34 | 
 35 |     /**
 36 |      * Constructor with a context map.
 37 |      *
 38 |      * @param ctx the context map (not used in this implementation)
 39 |      */
 40 |     public NullTransform(Map<String, Object> ctx) {
 41 |     }
 42 | 
 43 |     /**
 44 |      * Performs the forward transform, which in this case is a no-op copy
 45 |      * from the input to the output.
 46 |      *
 47 |      * @param input  the input byte array
 48 |      * @param output the output byte array
 49 |      * @return true if the transform was successful, false otherwise
 50 |      */
 51 |     @Override
 52 |     public boolean forward(SliceByteArray input, SliceByteArray output) {
 53 |         return doCopy(input, output);
 54 |     }
 55 | 
 56 |     /**
 57 |      * Performs the inverse transform, which in this case is a no-op copy
 58 |      * from the input to the output.
 59 |      *
 60 |      * @param input  the input byte array
 61 |      * @param output the output byte array
 62 |      * @return true if the transform was successful, false otherwise
 63 |      */
 64 |     @Override
 65 |     public boolean inverse(SliceByteArray input, SliceByteArray output) {
 66 |         return doCopy(input, output);
 67 |     }
 68 | 
 69 |     /**
 70 |      * Copies the input byte array to the output byte array.
 71 |      *
 72 |      * @param input  the input byte array
 73 |      * @param output the output byte array
 74 |      * @return true if the copy was successful, false otherwise
 75 |      */
 76 |     private static boolean doCopy(SliceByteArray input, SliceByteArray output) {
 77 |         if (input.length == 0)
 78 |             return true;
 79 | 
 80 |         final int count = input.length;
 81 | 
 82 |         if (output.length - output.index < count)
 83 |             return false;
 84 | 
 85 |         if ((input.array != output.array) || (input.index != output.index))
 86 |             System.arraycopy(input.array, input.index, output.array, output.index, count);
 87 | 
 88 |         input.index += count;
 89 |         output.index += count;
 90 |         return true;
 91 |     }
 92 | 
 93 |     /**
 94 |      * Returns the maximum encoded length, which is the same as the source length
 95 |      * for this no-op transform.
 96 |      *
 97 |      * @param srcLen the source length
 98 |      * @return the maximum encoded length
 99 |      */
100 |     @Override
101 |     public int getMaxEncodedLength(int srcLen) {
102 |         return srcLen;
103 |     }
104 | }


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/transform/SBRT.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2011-2025 Frederic Langlet
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  *                 http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package io.github.flanglet.kanzi.transform;
 17 | 
 18 | import java.util.Map;
 19 | import io.github.flanglet.kanzi.ByteTransform;
 20 | import io.github.flanglet.kanzi.SliceByteArray;
 21 | 
 22 | /**
 23 |  * Sort By Rank Transform is a family of transforms typically used after
 24 |  * a BWT to reduce the variance of the data prior to entropy coding.
 25 |  * SBR(alpha) is defined by sbr(x, alpha) = (1-alpha)*(t-w1(x,t)) + alpha*(t-w2(x,t))
 26 |  * where x is an item in the data list, t is the current access time and wk(x,t) is
 27 |  * the k-th access time to x at time t (with alpha in [0..1]).
 28 |  * See [Two new families of list update algorithms] by Frank Schulz for details.
 29 |  * SBR(0)= Move to Front Transform
 30 |  * SBR(1)= Time Stamp Transform
 31 |  * This code implements SBR(0), SBR(1/2) and SBR(1). Code derived from openBWT.
 32 |  */
 33 | public class SBRT implements ByteTransform {
 34 |     public static final int MODE_MTF = 1;       // alpha = 0
 35 |     public static final int MODE_RANK = 2;      // alpha = 1/2
 36 |     public static final int MODE_TIMESTAMP = 3; // alpha = 1
 37 | 
 38 |     private final int[] prev;
 39 |     private final int[] curr;
 40 |     private final int[] symbols;
 41 |     private final int[] ranks;
 42 |     private final int mode;
 43 | 
 44 |     /**
 45 |      * Default constructor.
 46 |      */
 47 |     public SBRT() {
 48 |         this(MODE_RANK);
 49 |     }
 50 | 
 51 |     /**
 52 |      * Constructor with mode parameter.
 53 |      *
 54 |      * @param mode the mode of the transform
 55 |      */
 56 |     public SBRT(int mode) {
 57 |         if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP))
 58 |             throw new IllegalArgumentException("Invalid mode parameter");
 59 | 
 60 |         this.prev = new int[256];
 61 |         this.curr = new int[256];
 62 |         this.symbols = new int[256];
 63 |         this.ranks = new int[256];
 64 |         this.mode = mode;
 65 |     }
 66 | 
 67 |     /**
 68 |      * Constructor with a context map.
 69 |      *
 70 |      * @param ctx the context map
 71 |      */
 72 |     public SBRT(Map<String, Object> ctx) {
 73 |         final int m = (Integer) ctx.getOrDefault("sbrt", MODE_MTF);
 74 | 
 75 |         if ((m != MODE_MTF) && (m != MODE_RANK) && (m != MODE_TIMESTAMP))
 76 |             throw new IllegalArgumentException("Invalid mode parameter");
 77 | 
 78 |         this.prev = new int[256];
 79 |         this.curr = new int[256];
 80 |         this.symbols = new int[256];
 81 |         this.ranks = new int[256];
 82 |         this.mode = m;
 83 |     }
 84 | 
 85 |     @Override
 86 |     public boolean forward(SliceByteArray input, SliceByteArray output) {
 87 |         if (input.length == 0)
 88 |             return true;
 89 | 
 90 |         if (input.array == output.array)
 91 |             return false;
 92 | 
 93 |         final int count = input.length;
 94 | 
 95 |         if (output.length < count)
 96 |             return false;
 97 | 
 98 |         if (output.index + count > output.array.length)
 99 |             return false;
100 | 
101 |         // Aliasing
102 |         final byte[] src = input.array;
103 |         final byte[] dst = output.array;
104 |         final int srcIdx = input.index;
105 |         final int dstIdx = output.index;
106 |         final int[] p = this.prev;
107 |         final int[] q = this.curr;
108 |         final int[] s2r = this.symbols;
109 |         final int[] r2s = this.ranks;
110 | 
111 |         final int m1 = (this.mode == MODE_TIMESTAMP) ? 0 : -1;
112 |         final int m2 = (this.mode == MODE_MTF) ? 0 : -1;
113 |         final int s = (this.mode == MODE_RANK) ? 1 : 0;
114 | 
115 |         for (int i = 0; i < 256; i++) {
116 |             p[i] = 0;
117 |             q[i] = 0;
118 |             s2r[i] = i;
119 |             r2s[i] = i;
120 |         }
121 | 
122 |         for (int i = 0; i < count; i++) {
123 |             final int c = src[srcIdx + i] & 0xFF;
124 |             int r = s2r[c];
125 |             dst[dstIdx + i] = (byte) r;
126 |             final int qc = ((i & m1) + (p[c] & m2)) >> s;
127 |             p[c] = i;
128 |             q[c] = qc;
129 | 
130 |             // Move up symbol to correct rank
131 |             while ((r > 0) && (q[r2s[r - 1]] <= qc)) {
132 |                 r2s[r] = r2s[r - 1];
133 |                 s2r[r2s[r]] = r;
134 |                 r--;
135 |             }
136 | 
137 |             r2s[r] = c;
138 |             s2r[c] = r;
139 |         }
140 | 
141 |         input.index += count;
142 |         output.index += count;
143 |         return true;
144 |     }
145 | 
146 |     @Override
147 |     public boolean inverse(SliceByteArray input, SliceByteArray output) {
148 |         if (input.length == 0)
149 |             return true;
150 | 
151 |         if (input.array == output.array)
152 |             return false;
153 | 
154 |         final int count = input.length;
155 | 
156 |         if (output.length < count)
157 |             return false;
158 | 
159 |         if (output.index + count > output.array.length)
160 |             return false;
161 | 
162 |         // Aliasing
163 |         final byte[] src = input.array;
164 |         final byte[] dst = output.array;
165 |         final int srcIdx = input.index;
166 |         final int dstIdx = output.index;
167 |         final int[] p = this.prev;
168 |         final int[] q = this.curr;
169 |         final int[] r2s = this.ranks;
170 | 
171 |         final int m1 = (this.mode == MODE_TIMESTAMP) ? 0 : -1;
172 |         final int m2 = (this.mode == MODE_MTF) ? 0 : -1;
173 |         final int s = (this.mode == MODE_RANK) ? 1 : 0;
174 | 
175 |         for (int i = 0; i < 256; i++) {
176 |             p[i] = 0;
177 |             q[i] = 0;
178 |             r2s[i] = i;
179 |         }
180 | 
181 |         for (int i = 0; i < count; i++) {
182 |             int r = src[srcIdx + i] & 0xFF;
183 |             final int c = r2s[r];
184 |             dst[dstIdx + i] = (byte) c;
185 |             final int qc = ((i & m1) + (p[c] & m2)) >> s;
186 |             p[c] = i;
187 |             q[c] = qc;
188 | 
189 |             // Move up symbol to correct rank
190 |             while ((r > 0) && (q[r2s[r - 1]] <= qc)) {
191 |                 r2s[r] = r2s[r - 1];
192 |                 r--;
193 |             }
194 | 
195 |             r2s[r] = c;
196 |         }
197 | 
198 |         input.index += count;
199 |         output.index += count;
200 |         return true;
201 |     }
202 | 
203 |     /**
204 |      * Return the max size required for the encoding output buffer.
205 |      *
206 |      * @param srcLength the source length
207 |      * @return the max encoded length
208 |      */
209 |     @Override
210 |     public int getMaxEncodedLength(int srcLength) {
211 |         return srcLength;
212 |     }
213 | }
214 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/transform/Sequence.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2011-2025 Frederic Langlet
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  *                 http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package io.github.flanglet.kanzi.transform;
 17 | 
 18 | import io.github.flanglet.kanzi.ByteTransform;
 19 | import io.github.flanglet.kanzi.SliceByteArray;
 20 | 
 21 | /**
 22 |  * Encapsulates a sequence of transforms in a transform.
 23 |  */
 24 | public class Sequence implements ByteTransform {
 25 |     private static final byte SKIP_MASK = -1;
 26 | 
 27 |     private final ByteTransform[] transforms; // transforms or functions
 28 |     private byte skipFlags; // skip transforms
 29 | 
 30 |     /**
 31 |      * Constructor with an array of transforms.
 32 |      *
 33 |      * @param transforms the array of transforms
 34 |      */
 35 |     public Sequence(ByteTransform[] transforms) {
 36 |         if (transforms == null)
 37 |             throw new NullPointerException("Invalid null transforms parameter");
 38 | 
 39 |         if ((transforms.length < 1) || (transforms.length > 8))
 40 |             throw new IllegalArgumentException("Only 1 to 8 transforms allowed");
 41 | 
 42 |         this.transforms = transforms;
 43 |     }
 44 | 
 45 |     /**
 46 |      * Performs the forward transform, encoding the input data.
 47 |      *
 48 |      * @param src the input byte array
 49 |      * @param dst the output byte array
 50 |      * @return true if the transform was successful, false otherwise
 51 |      */
 52 |     @Override
 53 |     public boolean forward(SliceByteArray src, SliceByteArray dst) {
 54 |         int count = src.length;
 55 | 
 56 |         if ((count < 0) || (count + src.index > src.array.length))
 57 |             return false;
 58 | 
 59 |         this.skipFlags = SKIP_MASK;
 60 | 
 61 |         if (src.length == 0)
 62 |             return true;
 63 | 
 64 |         final int blockSize = count;
 65 |         final int requiredSize = this.getMaxEncodedLength(count);
 66 |         SliceByteArray[] sa = new SliceByteArray[]{src, dst};
 67 |         SliceByteArray sa1 = sa[0];
 68 |         SliceByteArray sa2 = sa[1];
 69 |         int saIdx = 0;
 70 | 
 71 |         // Process transforms sequentially
 72 |         for (int i = 0; i < this.transforms.length; i++) {
 73 |             // Check that the output buffer has enough room. If not, allocate a new one.
 74 |             if (sa2.length < requiredSize) {
 75 |                 sa2.length = requiredSize;
 76 | 
 77 |                 if (sa2.array.length < sa2.length)
 78 |                     sa2.array = new byte[sa2.length];
 79 |             }
 80 | 
 81 |             final int savedIIdx = sa1.index;
 82 |             final int savedOIdx = sa2.index;
 83 |             final int savedLength = sa1.length;
 84 |             sa1.length = count;
 85 | 
 86 |             // Apply forward transform
 87 |             if (this.transforms[i].forward(sa1, sa2) == false) {
 88 |                 // Transform failed. Either it does not apply to this type
 89 |                 // of data or a recoverable error occurred => revert
 90 |                 if (sa1.array != sa2.array)
 91 |                     System.arraycopy(sa1.array, savedIIdx, sa2.array, savedOIdx, count);
 92 | 
 93 |                 sa1.index = savedIIdx;
 94 |                 sa2.index = savedOIdx;
 95 |                 sa1.length = savedLength;
 96 |                 continue;
 97 |             }
 98 | 
 99 |             this.skipFlags &= ~(1 << (7 - i));
100 |             count = sa2.index - savedOIdx;
101 |             sa1.index = savedIIdx;
102 |             sa2.index = savedOIdx;
103 |             sa1.length = savedLength;
104 |             saIdx ^= 1;
105 |             sa1 = sa[saIdx];
106 |             sa2 = sa[saIdx ^ 1];
107 |         }
108 | 
109 |         if (saIdx != 1) {
110 |             if (sa[1].index + count > sa[1].array.length)
111 |                 this.skipFlags = SKIP_MASK;
112 |             else
113 |                 System.arraycopy(sa[0].array, sa[0].index, sa[1].array, sa[1].index, count);
114 |         }
115 | 
116 |         src.index += blockSize;
117 |         dst.index += count;
118 |         return this.skipFlags != SKIP_MASK;
119 |     }
120 | 
121 |     /**
122 |      * Performs the inverse transform, decoding the input data.
123 |      *
124 |      * @param src the input byte array
125 |      * @param dst the output byte array
126 |      * @return true if the transform was successful, false otherwise
127 |      */
128 |     @Override
129 |     public boolean inverse(SliceByteArray src, SliceByteArray dst) {
130 |         if (src.length == 0)
131 |             return true;
132 | 
133 |         int count = src.length;
134 | 
135 |         if ((count < 0) || (count + src.index > src.array.length))
136 |             return false;
137 | 
138 |         if (this.skipFlags == SKIP_MASK) {
139 |             if (src.array != dst.array)
140 |                 System.arraycopy(src.array, src.index, dst.array, dst.index, count);
141 | 
142 |             src.index += count;
143 |             dst.index += count;
144 |             return true;
145 |         }
146 | 
147 |         final int blockSize = count;
148 |         boolean res = true;
149 |         SliceByteArray[] sa = new SliceByteArray[]{src, dst};
150 |         int saIdx = 0;
151 | 
152 |         // Process transforms sequentially in reverse order
153 |         for (int i = this.transforms.length - 1; i >= 0; i--) {
154 |             if ((this.skipFlags & (1 << (7 - i))) != 0)
155 |                 continue;
156 | 
157 |             SliceByteArray sa1 = sa[saIdx];
158 |             saIdx ^= 1;
159 |             SliceByteArray sa2 = sa[saIdx];
160 |             final int savedIIdx = sa1.index;
161 |             final int savedOIdx = sa2.index;
162 |             final int savedILen = sa1.length;
163 |             final int savedOLen = sa2.length;
164 | 
165 |             // Apply inverse transform
166 |             sa1.length = count;
167 |             sa2.length = dst.array.length;
168 | 
169 |             if (sa2.array.length < sa2.length)
170 |                 sa2.array = new byte[sa2.length];
171 | 
172 |             res = this.transforms[i].inverse(sa1, sa2);
173 |             count = sa2.index - savedOIdx;
174 |             sa1.index = savedIIdx;
175 |             sa2.index = savedOIdx;
176 |             sa1.length = savedILen;
177 |             sa2.length = savedOLen;
178 | 
179 |             // All inverse transforms must succeed
180 |             if (res == false)
181 |                 break;
182 |         }
183 | 
184 |         if ((res == true) && (saIdx != 1)) {
185 |             if (sa[1].index + count > sa[1].array.length)
186 |                 res = false;
187 |             else
188 |                 System.arraycopy(sa[0].array, sa[0].index, sa[1].array, sa[1].index, count);
189 |         }
190 | 
191 |         if (count > dst.length)
192 |             return false;
193 | 
194 |         src.index += blockSize;
195 |         dst.index += count;
196 |         return res;
197 |     }
198 | 
199 |     /**
200 |      * Returns the maximum encoded length, which includes some extra buffer for incompressible data.
201 |      *
202 |      * @param srcLength the source length
203 |      * @return the maximum encoded length
204 |      */
205 |     @Override
206 |     public int getMaxEncodedLength(int srcLength) {
207 |         int requiredSize = srcLength;
208 | 
209 |         for (ByteTransform t : this.transforms) {
210 |             if (t == null)
211 |                 continue;
212 | 
213 |             requiredSize = Math.max(requiredSize, t.getMaxEncodedLength(requiredSize));
214 |         }
215 | 
216 |         return requiredSize;
217 |     }
218 | 
219 |     /**
220 |      * Returns the number of functions in the sequence.
221 |      *
222 |      * @return the number of functions
223 |      */
224 |     public int getNbFunctions() {
225 |         return this.transforms.length;
226 |     }
227 | 
228 |     /**
229 |      * Returns the skip flags indicating which transforms to skip.
230 |      *
231 |      * @return the skip flags
232 |      */
233 |     public byte getSkipFlags() {
234 |         return this.skipFlags;
235 |     }
236 | 
237 |     /**
238 |      * Sets the skip flags indicating which transforms to skip.
239 |      *
240 |      * @param flags the skip flags
241 |      * @return true if the flags were set successfully, false otherwise
242 |      */
243 |     public boolean setSkipFlags(byte flags) {
244 |         this.skipFlags = flags;
245 |         return true;
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/transform/ZRLT.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.transform;
 17 | 
 18 | import java.util.Map;
 19 | import io.github.flanglet.kanzi.ByteTransform;
 20 | import io.github.flanglet.kanzi.Global;
 21 | import io.github.flanglet.kanzi.SliceByteArray;
 22 | 
 23 | /**
 24 |  * Zero Run Length Encoding is a simple encoding algorithm by Wheeler
 25 |  * closely related to Run Length Encoding. The main difference is
 26 |  * that only runs of 0 values are processed. Also, the length is
 27 |  * encoded in a different way (each digit in a different byte).
 28 |  * This algorithm is well adapted to process post BWT/MTFT data.
 29 |  */
 30 | public final class ZRLT implements ByteTransform {
 31 | 
 32 |     /**
 33 |      * Default constructor.
 34 |      */
 35 |     public ZRLT() {
 36 |     }
 37 | 
 38 |     /**
 39 |      * Constructor with a context map.
 40 |      *
 41 |      * @param ctx the context map
 42 |      */
 43 |     public ZRLT(Map<String, Object> ctx) {
 44 |     }
 45 | 
 46 |     /**
 47 |      * Performs the forward transform, encoding the input data.
 48 |      *
 49 |      * @param input  the input byte array
 50 |      * @param output the output byte array
 51 |      * @return true if the transform was successful, false otherwise
 52 |      */
 53 |     @Override
 54 |     public boolean forward(SliceByteArray input, SliceByteArray output) {
 55 |         if (input.length == 0)
 56 |             return true;
 57 | 
 58 |         if (input.array == output.array)
 59 |             return false;
 60 | 
 61 |         final int count = input.length;
 62 | 
 63 |         if (output.length - output.index < getMaxEncodedLength(count))
 64 |             return false;
 65 | 
 66 |         final byte[] src = input.array;
 67 |         final byte[] dst = output.array;
 68 |         int srcIdx = input.index;
 69 |         int dstIdx = output.index;
 70 |         final int srcEnd = srcIdx + count;
 71 |         final int dstEnd = dstIdx + count; // do not expand
 72 |         boolean res = true;
 73 | 
 74 |         if (dstIdx < dstEnd) {
 75 |             while (srcIdx < srcEnd) {
 76 |                 if (src[srcIdx] == 0) {
 77 |                     int runLength = 1;
 78 | 
 79 |                     while ((srcIdx + runLength < srcEnd) && (src[srcIdx + runLength] == src[srcIdx]))
 80 |                         runLength++;
 81 | 
 82 |                     srcIdx += runLength;
 83 | 
 84 |                     // Encode length
 85 |                     runLength++;
 86 |                     int log2 = (runLength <= 256) ? Global.LOG2_VALUES[runLength - 1] : 31 - Integer.numberOfLeadingZeros(runLength);
 87 | 
 88 |                     if (dstIdx >= dstEnd - log2) {
 89 |                         res = false;
 90 |                         break;
 91 |                     }
 92 | 
 93 |                     // Write every bit as a byte except the most significant one
 94 |                     while (log2 > 0) {
 95 |                         log2--;
 96 |                         dst[dstIdx++] = (byte) ((runLength >> log2) & 1);
 97 |                     }
 98 | 
 99 |                     continue;
100 |                 }
101 | 
102 |                 final int val = src[srcIdx] & 0xFF;
103 | 
104 |                 if (val >= 0xFE) {
105 |                     if (dstIdx >= dstEnd - 1) {
106 |                         res = false;
107 |                         break;
108 |                     }
109 | 
110 |                     dst[dstIdx] = (byte) 0xFF;
111 |                     dst[dstIdx + 1] = (byte) (val - 0xFE);
112 |                     dstIdx += 2;
113 |                 } else {
114 |                     if (dstIdx >= dstEnd) {
115 |                         res = false;
116 |                         break;
117 |                     }
118 | 
119 |                     dst[dstIdx] = (byte) (val + 1);
120 |                     dstIdx++;
121 |                 }
122 | 
123 |                 srcIdx++;
124 |             }
125 |         }
126 | 
127 |         input.index = srcIdx;
128 |         output.index = dstIdx;
129 |         return res && (srcIdx == srcEnd);
130 |     }
131 | 
132 |     /**
133 |      * Performs the inverse transform, decoding the input data.
134 |      *
135 |      * @param input  the input byte array
136 |      * @param output the output byte array
137 |      * @return true if the transform was successful, false otherwise
138 |      */
139 |     @Override
140 |     public boolean inverse(SliceByteArray input, SliceByteArray output) {
141 |         if (input.length == 0)
142 |             return true;
143 | 
144 |         if (input.array == output.array)
145 |             return false;
146 | 
147 |         final int count = input.length;
148 |         int srcIdx = input.index;
149 |         int dstIdx = output.index;
150 |         final byte[] src = input.array;
151 |         final byte[] dst = output.array;
152 |         final int srcEnd = srcIdx + count;
153 |         final int dstEnd = output.length;
154 |         int runLength = 0;
155 | 
156 |         mainLoop:
157 |         while (true) {
158 |             int val = src[srcIdx] & 0xFF;
159 | 
160 |             if (val <= 1) {
161 |                 // Generate the run length bit by bit (but force MSB)
162 |                 runLength = 1;
163 | 
164 |                 do {
165 |                     runLength += (runLength + val);
166 |                     srcIdx++;
167 | 
168 |                     if (srcIdx >= srcEnd)
169 |                         break mainLoop;
170 | 
171 |                     val = src[srcIdx] & 0xFF;
172 |                 } while (val <= 1);
173 | 
174 |                 runLength--;
175 | 
176 |                 if (runLength > 0) {
177 |                     if (dstIdx + runLength >= dstEnd)
178 |                         break;
179 | 
180 |                     while (runLength > 0) {
181 |                         runLength--;
182 |                         dst[dstIdx++] = 0;
183 |                     }
184 |                 }
185 |             }
186 | 
187 |             // Regular data processing
188 |             if (val == 0xFF) {
189 |                 srcIdx++;
190 | 
191 |                 if (srcIdx >= srcEnd)
192 |                     break;
193 | 
194 |                 dst[dstIdx] = (byte) (0xFE + src[srcIdx]);
195 |             } else {
196 |                 dst[dstIdx] = (byte) (val - 1);
197 |             }
198 | 
199 |             srcIdx++;
200 |             dstIdx++;
201 | 
202 |             if ((srcIdx >= srcEnd) || (dstIdx >= dstEnd))
203 |                 break;
204 |         }
205 | 
206 |         // If runLength is not 1, add trailing 0s
207 |         if (runLength > 0) {
208 |             runLength--;
209 | 
210 |             if (dstIdx + runLength > dstEnd)
211 |                 return false;
212 | 
213 |             while (runLength > 0) {
214 |                 runLength--;
215 |                 dst[dstIdx++] = 0;
216 |             }
217 | 
218 |         }
219 | 
220 |         input.index = srcIdx;
221 |         output.index = dstIdx;
222 |         return srcIdx == srcEnd;
223 |     }
224 | 
225 |     /**
226 |      * Required encoding output buffer size unknown, so we guess.
227 |      *
228 |      * @param srcLen the source length
229 |      * @return the maximum encoded length
230 |      */
231 |     @Override
232 |     public int getMaxEncodedLength(int srcLen) {
233 |         return srcLen;
234 |     }
235 | }
236 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/LyndonWords.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util;
 17 | 
 18 | import java.nio.charset.Charset;
 19 | import java.util.ArrayList;
 20 | import java.util.List;
 21 | 
 22 | /**
 23 |  * The {@code LyndonWords} class provides functionality for finding Lyndon words in a given string.
 24 |  * A Lyndon word is a string that is strictly smaller than any of its non-trivial suffixes.
 25 |  * This class splits a string into Lyndon words based on the Chen-Fox algorithm.
 26 |  *
 27 |  * <p>Note: This class is not thread-safe due to the mutable state of its breakpoints list.</p>
 28 |  */
 29 | public class LyndonWords {
 30 | 
 31 |     // List of breakpoints for the Lyndon words
 32 |     private final List<Integer> breakpoints;
 33 | 
 34 |     /**
 35 |      * Constructs a {@code LyndonWords} object, initializing the list of breakpoints.
 36 |      */
 37 |     public LyndonWords() {
 38 |         this.breakpoints = new ArrayList<>();
 39 |     }
 40 | 
 41 |     /**
 42 |      * Finds the breakpoints of Lyndon words in a byte array.
 43 |      *
 44 |      * <p>This method uses the Chen-Fox algorithm to find the breakpoints where the string
 45 |      * can be split into Lyndon words. It is not thread-safe.</p>
 46 |      *
 47 |      * @param buf the byte array representing the string
 48 |      * @param length the length of the byte array
 49 |      * @return a list of breakpoints where Lyndon words occur
 50 |      */
 51 |     private List<Integer> chenFoxLyndonBreakpoints(byte[] buf, int length) {
 52 |         int k = 0;
 53 |         this.breakpoints.clear();
 54 | 
 55 |         while (k < length) {
 56 |             int i = k;
 57 |             int j = k + 1;
 58 | 
 59 |             // Find the suffixes which are lexicographically greater than the current prefix
 60 |             while (j < length && buf[i] <= buf[j]) {
 61 |                 i = (buf[i] == buf[j]) ? i + 1 : k;
 62 |                 j++;
 63 |             }
 64 | 
 65 |             // Record the breakpoint and adjust k to the next potential Lyndon word start
 66 |             while (k <= i) {
 67 |                 k += (j - i);
 68 |                 this.breakpoints.add(k);
 69 |             }
 70 |         }
 71 | 
 72 |         return this.breakpoints;
 73 |     }
 74 | 
 75 |     /**
 76 |      * Splits a string into Lyndon words using the default character encoding.
 77 |      *
 78 |      * @param s the input string to be split
 79 |      * @return an array of Lyndon words
 80 |      */
 81 |     public String[] split(String s) {
 82 |         return this.split(s, null);  // relies on default encoding
 83 |     }
 84 | 
 85 |     /**
 86 |      * Splits a string into Lyndon words, using the specified character encoding.
 87 |      *
 88 |      * @param s the input string to be split
 89 |      * @param cs the charset to use for encoding the string, or {@code null} to use the default encoding
 90 |      * @return an array of Lyndon words
 91 |      */
 92 |     public String[] split(String s, Charset cs) {
 93 |         byte[] buf = (cs == null) ? s.getBytes() : s.getBytes(cs);
 94 |         this.chenFoxLyndonBreakpoints(buf, s.length());
 95 |  
 96 |         // Create an array to hold the Lyndon words
 97 |         String[] res = new String[this.breakpoints.size()];
 98 |         int n = 0;
 99 |         int prev = 0;
100 | 
101 |         // Split the string based on the calculated breakpoints
102 |         for (int bp : this.breakpoints) {
103 |             res[n++] = s.substring(prev, bp);
104 |             prev = bp;
105 |         }
106 | 
107 |         return res;
108 |     }
109 | 
110 |     /**
111 |      * Returns the positions of the breakpoints in the input string using the default character encoding.
112 |      *
113 |      * @param s the input string to be analyzed
114 |      * @return an array of integers representing the positions of the Lyndon word breakpoints
115 |      */
116 |     public int[] getPositions(String s) {
117 |         return this.getPositions(s, null);   // relies on default encoding
118 |     }
119 | 
120 |     /**
121 |      * Returns the positions of the breakpoints in the input string using the specified character encoding.
122 |      *
123 |      * @param s the input string to be analyzed
124 |      * @param cs the charset to use for encoding the string, or {@code null} to use the default encoding
125 |      * @return an array of integers representing the positions of the Lyndon word breakpoints
126 |      */
127 |     public int[] getPositions(String s, Charset cs) {
128 |         byte[] buf = (cs == null) ? s.getBytes() : s.getBytes(cs);
129 |         return this.getPositions(buf, buf.length);   // relies on default encoding
130 |     }
131 | 
132 |     /**
133 |      * Returns the positions of the breakpoints in the byte array.
134 |      *
135 |      * @param buf the byte array representing the string
136 |      * @param length the length of the byte array
137 |      * @return an array of integers representing the positions of the Lyndon word breakpoints
138 |      */
139 |     public int[] getPositions(byte[] buf, int length) {
140 |         this.chenFoxLyndonBreakpoints(buf, length);
141 |         int[] res = new int[this.breakpoints.size()];
142 |         int n = 0;
143 | 
144 |         // Fill the result array with the breakpoints
145 |         for (Integer bp : this.breakpoints) {
146 |             res[n++] = bp;
147 |         }
148 | 
149 |         return res;
150 |     }
151 | 
152 |     /**
153 |      * Main method for testing the Lyndon word splitting functionality.
154 |      *
155 |      * <p>This method demonstrates the use of the {@code split} method to split a string into Lyndon words.</p>
156 |      *
157 |      * @param args command-line arguments (not used)
158 |      */
159 |     public static void main(String[] args) {
160 |         String[] ss = new LyndonWords().split("TO_BE_OR_NOT_TO_BE");
161 | 
162 |         // Print the resulting Lyndon words
163 |         for (String s : ss) {
164 |             System.out.println(s);
165 |         }
166 |     }
167 | }
168 | 
169 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash32.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util.hash;
 17 | 
 18 | 
 19 | import io.github.flanglet.kanzi.Memory;
 20 | 
 21 | /**
 22 |  * XXHash32 is an implementation of the 32-bit variant of the XXHash algorithm,
 23 |  * which is a fast non-cryptographic hash function. It is designed for high-speed
 24 |  * hashing, commonly used in applications where performance is critical, such as 
 25 |  * checksums, hash tables, and data integrity verification.
 26 |  * Port to Java of the original source code: https://github.com/Cyan4973/xxHash
 27 |  *
 28 |  * <p>XXHash32 uses a sequence of rounds with constant mixing primes to process 
 29 |  * the input data and produce a 32-bit hash value. This class allows for an 
 30 |  * optional user-defined seed, providing a degree of variability in the output.
 31 |  */
 32 | public class XXHash32 {
 33 | 
 34 |     // Constants used in the hashing algorithm
 35 |     private static final int PRIME32_1 = -1640531535;
 36 |     private static final int PRIME32_2 = -2048144777;
 37 |     private static final int PRIME32_3 = -1028477379;
 38 |     private static final int PRIME32_4 = 668265263;
 39 |     private static final int PRIME32_5 = 374761393;
 40 | 
 41 |     // The seed used for hashing
 42 |     private int seed;
 43 | 
 44 |     /**
 45 |      * Default constructor that initializes the hash function with a seed based on
 46 |      * the current system time in nanoseconds.
 47 |      */
 48 |     public XXHash32() {
 49 |         this((int) (System.nanoTime()));
 50 |     }
 51 | 
 52 |     /**
 53 |      * Constructs an XXHash32 instance with a specified seed.
 54 |      * 
 55 |      * @param seed The seed value to be used in the hash computation.
 56 |      */
 57 |     public XXHash32(int seed) {
 58 |         this.seed = seed;
 59 |     }
 60 | 
 61 |     /**
 62 |      * Sets the seed value for the hash computation. This allows for custom seed values
 63 |      * to modify the output hash.
 64 |      * 
 65 |      * @param seed The new seed value.
 66 |      */
 67 |     public void setSeed(int seed) {
 68 |         this.seed = seed;
 69 |     }
 70 | 
 71 |     /**
 72 |      * Computes the 32-bit hash of the provided byte array.
 73 |      * This method uses the entire byte array, starting from index 0.
 74 |      * 
 75 |      * @param data The byte array to be hashed.
 76 |      * @return The 32-bit hash value of the input data.
 77 |      */
 78 |     public int hash(byte[] data) {
 79 |         return this.hash(data, 0, data.length);
 80 |     }
 81 | 
 82 |     /**
 83 |      * Computes the 32-bit hash of the provided byte array, with the option to specify
 84 |      * an offset and length of the data to be used.
 85 |      * 
 86 |      * @param data The byte array to be hashed.
 87 |      * @param offset The starting index within the byte array.
 88 |      * @param length The number of bytes to hash.
 89 |      * @return The 32-bit hash value of the input data.
 90 |      */
 91 |     public int hash(byte[] data, int offset, int length) {
 92 |         final int end = offset + length;
 93 |         int h32;
 94 |         int idx = offset;
 95 | 
 96 |         if (length >= 16) {
 97 |             final int end16 = end - 16;
 98 |             int v1 = this.seed + PRIME32_1 + PRIME32_2;
 99 |             int v2 = this.seed + PRIME32_2;
100 |             int v3 = this.seed;
101 |             int v4 = this.seed - PRIME32_1;
102 | 
103 |             // Process 16-byte blocks
104 |             do {
105 |                 v1 = round(v1, Memory.LittleEndian.readInt32(data, idx));
106 |                 v2 = round(v2, Memory.LittleEndian.readInt32(data, idx + 4));
107 |                 v3 = round(v3, Memory.LittleEndian.readInt32(data, idx + 8));
108 |                 v4 = round(v4, Memory.LittleEndian.readInt32(data, idx + 12));
109 |                 idx += 16;
110 |             } while (idx <= end16);
111 | 
112 |             h32 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) +
113 |                   ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14));
114 |         } else {
115 |             h32 = this.seed + PRIME32_5;
116 |         }
117 | 
118 |         h32 += length;
119 | 
120 |         // Process remaining data (less than 16 bytes)
121 |         while (idx <= end - 4) {
122 |             h32 += (Memory.LittleEndian.readInt32(data, idx) * PRIME32_3);
123 |             h32 = ((h32 << 17) | (h32 >>> 15)) * PRIME32_4;
124 |             idx += 4;
125 |         }
126 | 
127 |         while (idx < end) {
128 |             h32 += ((data[idx] & 0xFF) * PRIME32_5);
129 |             h32 = ((h32 << 11) | (h32 >>> 21)) * PRIME32_1;
130 |             idx++;
131 |         }
132 | 
133 |         // Finalization step
134 |         h32 ^= (h32 >>> 15);
135 |         h32 *= PRIME32_2;
136 |         h32 ^= (h32 >>> 13);
137 |         h32 *= PRIME32_3;
138 |         return h32 ^ (h32 >>> 16);
139 |     }
140 | 
141 |     /**
142 |      * Performs a single round of mixing for the hash value.
143 |      * 
144 |      * @param acc The accumulator value to be mixed.
145 |      * @param val The value to be mixed with the accumulator.
146 |      * @return The new mixed accumulator value.
147 |      */
148 |     private static int round(int acc, int val) {
149 |         acc += (val * PRIME32_2);
150 |         return ((acc << 13) | (acc >>> 19)) * PRIME32_1;
151 |     }
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/hash/XXHash64.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util.hash;
 17 | 
 18 | import io.github.flanglet.kanzi.Memory;
 19 | 
 20 | /**
 21 |  * XXHash64 is an implementation of the 64-bit variant of the XXHash algorithm,
 22 |  * which is a fast non-cryptographic hash function. It is designed for high-speed
 23 |  * hashing, and is widely used for checksums and hashing large amounts of data.
 24 |  * This class allows for a configurable seed value, and provides methods for
 25 |  * hashing byte arrays of various lengths.
 26 |  * Port to Java of the original source code: https://github.com/Cyan4973/xxHash
 27 |  * 
 28 |  * <p>The algorithm processes the input data in blocks and uses a combination of
 29 |  * mix functions and bitwise operations to produce a hash value. It is optimized
 30 |  * for 64-bit platforms and can be used for general-purpose hashing where
 31 |  * cryptographic security is not a concern.
 32 |  * 
 33 |  */
 34 | public class XXHash64 {
 35 |     
 36 |     // Constants used in the hashing algorithm
 37 |     private static final long PRIME64_1 = 0x9E3779B185EBCA87L;
 38 |     private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL;
 39 |     private static final long PRIME64_3 = 0x165667B19E3779F9L;
 40 |     private static final long PRIME64_4 = 0x85EBCA77C2B2AE63L;
 41 |     private static final long PRIME64_5 = 0x27D4EB2F165667C5L;
 42 | 
 43 |     // The seed used for hashing
 44 |     private long seed;
 45 | 
 46 |     /**
 47 |      * Default constructor that initializes the hash function with a seed based on the
 48 |      * current system time in nanoseconds.
 49 |      */
 50 |     public XXHash64() {
 51 |         this(System.nanoTime());
 52 |     }
 53 | 
 54 |     /**
 55 |      * Constructs an XXHash64 instance with a specified seed.
 56 |      * 
 57 |      * @param seed The seed value to be used in the hash computation.
 58 |      */
 59 |     public XXHash64(long seed) {
 60 |         this.seed = seed;
 61 |     }
 62 | 
 63 |     /**
 64 |      * Sets the seed value for the hash computation. This allows for custom seed values
 65 |      * to modify the output hash.
 66 |      * 
 67 |      * @param seed The new seed value.
 68 |      */
 69 |     public void setSeed(long seed) {
 70 |         this.seed = seed;
 71 |     }
 72 | 
 73 |     /**
 74 |      * Computes the 64-bit hash of the provided byte array.
 75 |      * This method uses the entire byte array, starting from index 0.
 76 |      * 
 77 |      * @param data The byte array to be hashed.
 78 |      * @return The 64-bit hash value of the input data.
 79 |      */
 80 |     public long hash(byte[] data) {
 81 |         return this.hash(data, 0, data.length);
 82 |     }
 83 | 
 84 |     /**
 85 |      * Computes the 64-bit hash of the provided byte array, with the option to specify
 86 |      * an offset and length of the data to be used.
 87 |      * 
 88 |      * @param data The byte array to be hashed.
 89 |      * @param offset The starting index within the byte array.
 90 |      * @param length The number of bytes to hash.
 91 |      * @return The 64-bit hash value of the input data.
 92 |      */
 93 |     public long hash(byte[] data, int offset, int length) {
 94 |         final int end = offset + length;
 95 |         long h64;
 96 |         int idx = offset;
 97 | 
 98 |         if (length >= 32) {
 99 |             final int end32 = end - 32;
100 |             long v1 = this.seed + PRIME64_1 + PRIME64_2;
101 |             long v2 = this.seed + PRIME64_2;
102 |             long v3 = this.seed;
103 |             long v4 = this.seed - PRIME64_1;
104 | 
105 |             // Process 32-byte blocks
106 |             do {
107 |                 v1 = round(v1, Memory.LittleEndian.readLong64(data, idx));
108 |                 v2 = round(v2, Memory.LittleEndian.readLong64(data, idx + 8));
109 |                 v3 = round(v3, Memory.LittleEndian.readLong64(data, idx + 16));
110 |                 v4 = round(v4, Memory.LittleEndian.readLong64(data, idx + 24));
111 |                 idx += 32;
112 |             } while (idx <= end32);
113 | 
114 |             h64 = ((v1 << 1) | (v1 >>> 31)) + ((v2 << 7) | (v2 >>> 25)) +
115 |                   ((v3 << 12) | (v3 >>> 20)) + ((v4 << 18) | (v4 >>> 14));
116 | 
117 |             // Finalization
118 |             h64 = mergeRound(h64, v1);
119 |             h64 = mergeRound(h64, v2);
120 |             h64 = mergeRound(h64, v3);
121 |             h64 = mergeRound(h64, v4);
122 |         } else {
123 |             h64 = this.seed + PRIME64_5;
124 |         }
125 | 
126 |         h64 += length;
127 | 
128 |         // Process remaining data (less than 32 bytes)
129 |         while (idx + 8 <= end) {
130 |             h64 ^= round(0, Memory.LittleEndian.readLong64(data, idx));
131 |             h64 = ((h64 << 27) | (h64 >>> 37)) * PRIME64_1 + PRIME64_4;
132 |             idx += 8;
133 |         }
134 | 
135 |         while (idx + 4 <= end) {
136 |             h64 ^= (Memory.LittleEndian.readInt32(data, idx) * PRIME64_1);
137 |             h64 = ((h64 << 23) | (h64 >>> 41)) * PRIME64_2 + PRIME64_3;
138 |             idx += 4;
139 |         }
140 | 
141 |         while (idx < end) {
142 |             h64 ^= ((data[idx] & 0xFF) * PRIME64_5);
143 |             h64 = ((h64 << 11) | (h64 >>> 53)) * PRIME64_1;
144 |             idx++;
145 |         }
146 | 
147 |         // Finalization step
148 |         h64 ^= (h64 >>> 33);
149 |         h64 *= PRIME64_2;
150 |         h64 ^= (h64 >>> 29);
151 |         h64 *= PRIME64_3;
152 |         return h64 ^ (h64 >>> 32);
153 |     }
154 | 
155 |     /**
156 |      * Performs a single round of mixing for the hash value.
157 |      * 
158 |      * @param acc The accumulator value to be mixed.
159 |      * @param val The value to be mixed with the accumulator.
160 |      * @return The new mixed accumulator value.
161 |      */
162 |     private static long round(long acc, long val) {
163 |         acc += (val * PRIME64_2);
164 |         return ((acc << 31) | (acc >>> 33)) * PRIME64_1;
165 |     }
166 | 
167 |     /**
168 |      * Merges an additional value into the accumulator during the finalization phase.
169 |      * 
170 |      * @param acc The current accumulator value.
171 |      * @param val The value to be merged into the accumulator.
172 |      * @return The updated accumulator value.
173 |      */
174 |     private static long mergeRound(long acc, long val) {
175 |         acc ^= round(0, val);
176 |         return acc * PRIME64_1 + PRIME64_4;
177 |     }
178 | }
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/sort/BucketSort.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util.sort;
 17 | 
 18 | import io.github.flanglet.kanzi.ByteSorter;
 19 | import io.github.flanglet.kanzi.IntSorter;
 20 | 
 21 | /**
 22 |  * The {@code BucketSort} class provides an implementation of the bucket sort algorithm for sorting integers and bytes.
 23 |  * Bucket sort is a simple and efficient sorting algorithm that works by distributing elements into a number of buckets,
 24 |  * then sorting the individual buckets. This implementation is optimized to handle small integer and byte values.
 25 |  *
 26 |  * <p>It is a simplified form of radix sort with buckets of width one, making it efficient for small integers (up to 0xFFFF).</p>
 27 |  * <p>This implementation is not thread-safe due to the mutable state of its internal data structures.</p>
 28 |  */
 29 | public class BucketSort implements IntSorter, ByteSorter {
 30 | 
 31 |     // Array to store the count of each value within the bucket range
 32 |     private final int[] count;
 33 | 
 34 |     /**
 35 |      * Constructs a {@code BucketSort} object using the default bucket size for byte values (0 to 255).
 36 |      */
 37 |     public BucketSort() {
 38 |         this.count = new int[256];
 39 |     }
 40 | 
 41 |     /**
 42 |      * Constructs a {@code BucketSort} object with a custom bucket size determined by the logarithm of the maximum value.
 43 |      *
 44 |      * @param logMaxValue the logarithm (base 2) of the maximum value to be sorted.
 45 |      *                     Must be between 2 and 16 (inclusive).
 46 |      * @throws IllegalArgumentException if the {@code logMaxValue} is less than 2 or greater than 16.
 47 |      */
 48 |     public BucketSort(int logMaxValue) {
 49 |         if (logMaxValue < 2)
 50 |             throw new IllegalArgumentException("The log data size parameter must be at least 2");
 51 | 
 52 |         if (logMaxValue > 16)
 53 |             throw new IllegalArgumentException("The log data size parameter must be at most 16");
 54 | 
 55 |         this.count = new int[1 << logMaxValue]; // Array size determined by the max value (logMaxValue)
 56 |     }
 57 | 
 58 |     /**
 59 |      * Sorts an array of integers using the bucket sort algorithm.
 60 |      *
 61 |      * <p>The sorting works by counting the frequency of each integer in the input array, then placing the integers back into
 62 |      * the array in sorted order.</p>
 63 |      *
 64 |      * @param input the array of integers to be sorted.
 65 |      * @param blkptr the starting index in the array to begin sorting.
 66 |      * @param len the length of the portion of the array to be sorted.
 67 |      * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters (e.g.,
 68 |      *         out-of-bounds indices or invalid length).
 69 |      */
 70 |     @Override
 71 |     public boolean sort(int[] input, int blkptr, int len) {
 72 |         if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length))
 73 |             return false;
 74 | 
 75 |         if (len == 1)
 76 |             return true;
 77 | 
 78 |         final int len8 = len & -8;  // Round down to the nearest multiple of 8
 79 |         final int end8 = blkptr + len8;
 80 |         final int[] c = this.count;  // Bucket count array
 81 |         final int length = c.length;
 82 | 
 83 |         // Unrolled loop for efficient counting
 84 |         for (int i = blkptr; i < end8; i += 8) {
 85 |             c[input[i]]++;
 86 |             c[input[i + 1]]++;
 87 |             c[input[i + 2]]++;
 88 |             c[input[i + 3]]++;
 89 |             c[input[i + 4]]++;
 90 |             c[input[i + 5]]++;
 91 |             c[input[i + 6]]++;
 92 |             c[input[i + 7]]++;
 93 |         }
 94 | 
 95 |         // Handle remaining elements not divisible by 8
 96 |         for (int i = len8; i < len; i++)
 97 |             c[input[blkptr + i]]++;
 98 | 
 99 |         // Reconstruct the sorted array using the bucket counts
100 |         for (int i = 0, j = blkptr; i < length; i++) {
101 |             final int val = c[i];
102 | 
103 |             if (val == 0)
104 |                 continue;
105 | 
106 |             c[i] = 0;
107 |             int val8 = val & -8;
108 | 
109 |             for (int k = val; k > val8; k--)
110 |                 input[j++] = i;
111 | 
112 |             // Fill the remaining spots using the "8 at a time" optimization
113 |             while (val8 > 0) {
114 |                 input[j] = i;
115 |                 input[j + 1] = i;
116 |                 input[j + 2] = i;
117 |                 input[j + 3] = i;
118 |                 input[j + 4] = i;
119 |                 input[j + 5] = i;
120 |                 input[j + 6] = i;
121 |                 input[j + 7] = i;
122 |                 j += 8;
123 |                 val8 -= 8;
124 |             }
125 |         }
126 | 
127 |         return true;
128 |     }
129 | 
130 |     /**
131 |      * Sorts an array of bytes using the bucket sort algorithm.
132 |      *
133 |      * <p>This method behaves similarly to the integer sort method, but operates on byte values (0 to 255).</p>
134 |      *
135 |      * @param input the array of bytes to be sorted.
136 |      * @param blkptr the starting index in the array to begin sorting.
137 |      * @param len the length of the portion of the array to be sorted.
138 |      * @return {@code true} if the sorting was successful; {@code false} if there were invalid parameters (
139 |      *         out-of-bounds indices or invalid length).
140 |      */
141 |     @Override
142 |     public boolean sort(byte[] input, int blkptr, int len) {
143 |         if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length))
144 |             return false;
145 | 
146 |         if (len == 1)
147 |             return true;
148 | 
149 |         final int len8 = len & -8;  // Round down to the nearest multiple of 8
150 |         final int end8 = blkptr + len8;
151 |         final int[] c = this.count;  // Bucket count array
152 |         final int length = c.length;
153 | 
154 |         // Unrolled loop for efficient counting
155 |         for (int i = blkptr; i < end8; i += 8) {
156 |             c[input[i] & 0xFF]++;
157 |             c[input[i + 1] & 0xFF]++;
158 |             c[input[i + 2] & 0xFF]++;
159 |             c[input[i + 3] & 0xFF]++;
160 |             c[input[i + 4] & 0xFF]++;
161 |             c[input[i + 5] & 0xFF]++;
162 |             c[input[i + 6] & 0xFF]++;
163 |             c[input[i + 7] & 0xFF]++;
164 |         }
165 | 
166 |         // Handle remaining elements not divisible by 8
167 |         for (int i = len8; i < len; i++)
168 |             c[input[blkptr + i] & 0xFF]++;
169 | 
170 |         // Reconstruct the sorted array using the bucket counts
171 |         for (int i = 0, j = blkptr; i < length; i++) {
172 |             final int val = c[i];
173 | 
174 |             if (val == 0)
175 |                 continue;
176 | 
177 |             int val8 = val & -8;
178 |             c[i] = 0;
179 | 
180 |             for (int k = val; k > val8; k--)
181 |                 input[j++] = (byte) i;
182 | 
183 |             // Fill the remaining spots using the "8 at a time" optimization
184 |             while (val8 > 0) {
185 |                 input[j] = (byte) i;
186 |                 input[j + 1] = (byte) i;
187 |                 input[j + 2] = (byte) i;
188 |                 input[j + 3] = (byte) i;
189 |                 input[j + 4] = (byte) i;
190 |                 input[j + 5] = (byte) i;
191 |                 input[j + 6] = (byte) i;
192 |                 input[j + 7] = (byte) i;
193 |                 j += 8;
194 |                 val8 -= 8;
195 |             }
196 |         }
197 | 
198 |         return true;
199 |     }
200 | }
201 | 
202 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/sort/DefaultArrayComparator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011-2025 Frederic Langlet
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | you may obtain a copy of the License at
 6 | 
 7 |                 http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 | 
16 | package io.github.flanglet.kanzi.util.sort;
17 | 
18 | import io.github.flanglet.kanzi.ArrayComparator;
19 | 
20 | /**
21 |  * A comparator for comparing elements in an integer array. This class implements the {@link ArrayComparator} interface
22 |  * and provides a mechanism to compare two elements based on their values. The comparison also accounts for stable sorting
23 |  * by considering their indices when the values are equal.
24 |  *
25 |  * <p>This class is immutable and thread-safe as it holds a reference to the input array but does not modify it.</p>
26 |  *
27 |  * <p>Example usage:</p>
28 |  * <pre>
29 |  * int[] array = { 5, 2, 8, 1 };
30 |  * DefaultArrayComparator comparator = new DefaultArrayComparator(array);
31 |  * int result = comparator.compare(0, 1); // Compares array[0] (5) and array[1] (2)
32 |  * </pre>
33 |  *
34 |  * @see ArrayComparator
35 |  */
36 | public final class DefaultArrayComparator implements ArrayComparator {
37 | 
38 |     private final int[] array;
39 | 
40 |     /**
41 |      * Constructs a new {@code DefaultArrayComparator} using the specified integer array.
42 |      *
43 |      * @param array the array to compare elements in; must not be {@code null}
44 |      * @throws NullPointerException if the provided array is {@code null}
45 |      */
46 |     public DefaultArrayComparator(int[] array) {
47 |         if (array == null)
48 |             throw new NullPointerException("Invalid null array parameter");
49 | 
50 |         this.array = array;
51 |     }
52 | 
53 |     /**
54 |      * Compares two elements of the array at the specified indices.
55 |      * <p>
56 |      * The comparison is based on the values of the elements at the provided indices. If the values are equal,
57 |      * the method returns a comparison based on their indices to maintain stability in sorting.
58 |      * </p>
59 |      *
60 |      * @param lidx the index of the first element to compare
61 |      * @param ridx the index of the second element to compare
62 |      * @return a negative integer if the element at {@code lidx} is less than the element at {@code ridx},
63 |      *         a positive integer if the element at {@code lidx} is greater than the element at {@code ridx},
64 |      *         or zero if they are equal
65 |      */
66 |     @Override
67 |     public int compare(int lidx, int ridx) {
68 |         int res = this.array[lidx] - this.array[ridx];
69 | 
70 |         // Make the sort stable
71 |         if (res == 0)
72 |             res = lidx - ridx;
73 | 
74 |         return res;
75 |     }
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/sort/HeapSort.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util.sort;
 17 | 
 18 | import io.github.flanglet.kanzi.ArrayComparator;
 19 | import io.github.flanglet.kanzi.IntSorter;
 20 | 
 21 | /**
 22 |  * The {@code HeapSort} class implements the heap sort algorithm, a comparison-based sorting algorithm with an average and
 23 |  * worst-case time complexity of O(n log n).
 24 |  *
 25 |  * <p>Heap sort works by first building a binary heap from the input data, and then repeatedly extracting the maximum
 26 |  * (or minimum) element from the heap and reconstructing the heap. Although heap sort has O(n log n) time complexity, it is
 27 |  * often slower in practice compared to other O(n log n) algorithms such as QuickSort, due to larger constant factors.</p>
 28 |  *
 29 |  * <p>This implementation allows an optional custom comparator to be used for comparing array elements. If no comparator is
 30 |  * provided, the natural ordering of the elements is used.</p>
 31 |  *
 32 |  * <p>This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.</p>
 33 |  */
 34 | public final class HeapSort implements IntSorter {
 35 | 
 36 |     // Comparator used for comparing elements in the array
 37 |     private final ArrayComparator cmp;
 38 | 
 39 |     /**
 40 |      * Constructs a {@code HeapSort} instance without a custom comparator.
 41 |      * This will use the natural ordering of the elements in the array.
 42 |      */
 43 |     public HeapSort() {
 44 |         this(null);
 45 |     }
 46 | 
 47 |     /**
 48 |      * Constructs a {@code HeapSort} instance with the specified comparator.
 49 |      * If {@code cmp} is {@code null}, the natural ordering of the elements will be used.
 50 |      *
 51 |      * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering.
 52 |      */
 53 |     public HeapSort(ArrayComparator cmp) {
 54 |         this.cmp = cmp;
 55 |     }
 56 | 
 57 |     /**
 58 |      * Returns the comparator used by this {@code HeapSort} instance.
 59 |      *
 60 |      * @return the comparator used for element comparisons, or {@code null} if natural ordering is used.
 61 |      */
 62 |     protected ArrayComparator getComparator() {
 63 |         return this.cmp;
 64 |     }
 65 | 
 66 |     /**
 67 |      * Sorts the specified portion of the input array using the heap sort algorithm.
 68 |      *
 69 |      * <p>The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged
 70 |      * in-place, and the elements will be sorted in ascending order.</p>
 71 |      *
 72 |      * @param input the array to be sorted.
 73 |      * @param blkptr the starting index of the portion to be sorted.
 74 |      * @param len the number of elements to sort.
 75 |      * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices).
 76 |      */
 77 |     @Override
 78 |     public boolean sort(int[] input, int blkptr, int len) {
 79 |         if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length))
 80 |             return false;
 81 | 
 82 |         if (len == 1)
 83 |             return true;
 84 | 
 85 |         // Build the heap by calling doSort on all non-leaf nodes
 86 |         for (int k = len >> 1; k > 0; k--) {
 87 |             doSort(input, blkptr, k, len, this.cmp);
 88 |         }
 89 | 
 90 |         // Repeatedly extract the maximum element and reconstruct the heap
 91 |         for (int i = len - 1; i > 0; i--) {
 92 |             final int temp = input[blkptr];
 93 |             input[blkptr] = input[blkptr + i];
 94 |             input[blkptr + i] = temp;
 95 |             doSort(input, blkptr, 1, i, this.cmp);
 96 |         }
 97 | 
 98 |         return true;
 99 |     }
100 | 
101 |     /**
102 |      * Performs a single heap sort operation on the portion of the array specified by {@code blkptr}, {@code idx}, and {@code count}.
103 |      * This method ensures that the subtree rooted at {@code idx} is a valid heap.
104 |      *
105 |      * @param array the array to be sorted.
106 |      * @param blkptr the starting index of the array to be sorted.
107 |      * @param idx the index of the current node to heapify.
108 |      * @param count the total number of elements in the heap.
109 |      * @param cmp the comparator used for comparisons, or {@code null} to use natural ordering.
110 |      */
111 |     private static void doSort(int[] array, int blkptr, int idx, int count, ArrayComparator cmp) {
112 |         int k = idx;
113 |         final int temp = array[blkptr + k - 1];
114 |         final int n = count >> 1;  // Half the size of the heap
115 | 
116 |         // If a custom comparator is provided, use it for comparison
117 |         if (cmp != null) {
118 |             while (k <= n) {
119 |                 int j = k << 1;  // Left child
120 | 
121 |                 // If right child exists and is larger, use it instead
122 |                 if ((j < count) && (cmp.compare(array[blkptr + j - 1], array[blkptr + j]) < 0)) {
123 |                     j++;
124 |                 }
125 | 
126 |                 // If the current node is larger than its child, break out of the loop
127 |                 if (temp >= array[blkptr + j - 1]) {
128 |                     break;
129 |                 }
130 | 
131 |                 // Move the child up to the parent node
132 |                 array[blkptr + k - 1] = array[blkptr + j - 1];
133 |                 k = j;
134 |             }
135 |         }
136 |         // If no comparator is provided, use natural ordering (ascending order)
137 |         else {
138 |             while (k <= n) {
139 |                 int j = k << 1;  // Left child
140 | 
141 |                 // If right child exists and is larger, use it instead
142 |                 if ((j < count) && (array[blkptr + j - 1] < array[blkptr + j])) {
143 |                     j++;
144 |                 }
145 | 
146 |                 // If the current node is larger than its child, break out of the loop
147 |                 if (temp >= array[blkptr + j - 1]) {
148 |                     break;
149 |                 }
150 | 
151 |                 // Move the child up to the parent node
152 |                 array[blkptr + k - 1] = array[blkptr + j - 1];
153 |                 k = j;
154 |             }
155 |         }
156 | 
157 |         // Place the original element in the correct position
158 |         array[blkptr + k - 1] = temp;
159 |     }
160 | }
161 | 
162 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/sort/InsertionSort.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011-2025 Frederic Langlet
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | you may obtain a copy of the License at
  6 | 
  7 |                 http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | */
 15 | 
 16 | package io.github.flanglet.kanzi.util.sort;
 17 | 
 18 | import io.github.flanglet.kanzi.ArrayComparator;
 19 | import io.github.flanglet.kanzi.IntSorter;
 20 | 
 21 | /**
 22 |  * The {@code InsertionSort} class implements the insertion sort algorithm, a simple comparison-based sorting algorithm with
 23 |  * a worst-case time complexity of O(n²) and an average-case complexity of O(n+k), where k is the number of inversions.
 24 |  * This algorithm is efficient for small data sets or nearly sorted data, but is not suitable for large datasets due to its
 25 |  * quadratic time complexity.
 26 |  *
 27 |  * <p>Insertion sort works by iterating through the array and repeatedly inserting each element into its correct position
 28 |  * relative to the elements before it. The algorithm performs well when the data is already nearly sorted, making it ideal for
 29 |  * small datasets or nearly sorted data.</p>
 30 |  *
 31 |  * <p>This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.</p>
 32 |  */
 33 | public class InsertionSort implements IntSorter {
 34 | 
 35 |     // Comparator used for comparing elements in the array
 36 |     private final ArrayComparator cmp;
 37 | 
 38 |     /**
 39 |      * Constructs an {@code InsertionSort} instance without a custom comparator.
 40 |      * This will use the natural ordering of the elements in the array.
 41 |      */
 42 |     public InsertionSort() {
 43 |         this(null);
 44 |     }
 45 | 
 46 |     /**
 47 |      * Constructs an {@code InsertionSort} instance with the specified comparator.
 48 |      * If {@code cmp} is {@code null}, the natural ordering of the elements will be used.
 49 |      *
 50 |      * @param cmp the comparator to use for element comparisons, or {@code null} to use natural ordering.
 51 |      */
 52 |     public InsertionSort(ArrayComparator cmp) {
 53 |         this.cmp = cmp;
 54 |     }
 55 | 
 56 |     /**
 57 |      * Returns the comparator used by this {@code InsertionSort} instance.
 58 |      *
 59 |      * @return the comparator used for element comparisons, or {@code null} if natural ordering is used.
 60 |      */
 61 |     protected ArrayComparator getComparator() {
 62 |         return this.cmp;
 63 |     }
 64 | 
 65 |     /**
 66 |      * Sorts the specified portion of the input array using the insertion sort algorithm.
 67 |      *
 68 |      * <p>The sorting begins at index {@code blkptr} and sorts {@code len} elements in the array. The array is rearranged
 69 |      * in-place, and the elements will be sorted in ascending order.</p>
 70 |      *
 71 |      * @param input the array to be sorted.
 72 |      * @param blkptr the starting index of the portion to be sorted.
 73 |      * @param len the number of elements to sort.
 74 |      * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (e.g., out-of-bounds indices).
 75 |      */
 76 |     @Override
 77 |     public boolean sort(int[] input, int blkptr, int len) {
 78 |         if ((blkptr < 0) || (len <= 0) || (blkptr + len > input.length))
 79 |             return false;
 80 | 
 81 |         if (len == 1)
 82 |             return true;
 83 | 
 84 |         // If no comparator is provided, sort using natural ordering
 85 |         if (this.cmp == null)
 86 |             sortNoComparator(input, blkptr, blkptr + len);
 87 |         else
 88 |             sortWithComparator(input, blkptr, blkptr + len, this.cmp);
 89 | 
 90 |         return true;
 91 |     }
 92 | 
 93 |     /**
 94 |      * Performs the insertion sort on the array using the provided comparator.
 95 |      * This method handles the sorting for small sub-arrays and larger arrays.
 96 |      *
 97 |      * @param array the array to be sorted.
 98 |      * @param blkptr the starting index of the portion to be sorted.
 99 |      * @param end the index where the sorting should end.
100 |      * @param comp the comparator used for element comparisons.
101 |      */
102 |     private static void sortWithComparator(int[] array, int blkptr, int end, ArrayComparator comp) {
103 |         // Shortcut for 2-element sub-array
104 |         if (end == blkptr + 1) {
105 |             if (comp.compare(array[blkptr], array[end]) > 0) {
106 |                 final int tmp = array[blkptr];
107 |                 array[blkptr] = array[end];
108 |                 array[end] = tmp;
109 |             }
110 |             return;
111 |         }
112 | 
113 |         // Shortcut for 3-element sub-array
114 |         if (end == blkptr + 2) {
115 |             final int a1 = array[blkptr];
116 |             final int a2 = array[blkptr + 1];
117 |             final int a3 = array[end];
118 | 
119 |             if (comp.compare(a1, a2) <= 0) {
120 |                 if (comp.compare(a2, a3) <= 0)
121 |                     return;
122 | 
123 |                 if (comp.compare(a3, a1) <= 0) {
124 |                     array[blkptr] = a3;
125 |                     array[blkptr + 1] = a1;
126 |                     array[end] = a2;
127 |                     return;
128 |                 }
129 | 
130 |                 array[blkptr + 1] = a3;
131 |                 array[end] = a2;
132 |             } else {
133 |                 if (comp.compare(a1, a3) <= 0) {
134 |                     array[blkptr] = a2;
135 |                     array[blkptr + 1] = a1;
136 |                     return;
137 |                 }
138 | 
139 |                 if (comp.compare(a3, a2) <= 0) {
140 |                     array[blkptr] = a3;
141 |                     array[end] = a1;
142 |                     return;
143 |                 }
144 | 
145 |                 array[blkptr] = a2;
146 |                 array[blkptr + 1] = a3;
147 |                 array[end] = a1;
148 |             }
149 |             return;
150 |         }
151 | 
152 |         // Regular case for arrays with more than 3 elements
153 |         for (int i = blkptr; i < end; i++) {
154 |             final int val = array[i];
155 |             int j = i;
156 | 
157 |             while ((j > blkptr) && (comp.compare(array[j - 1], val) > 0)) {
158 |                 array[j] = array[j - 1];
159 |                 j--;
160 |             }
161 | 
162 |             array[j] = val;
163 |         }
164 |     }
165 | 
166 |     /**
167 |      * Performs the insertion sort on the array using natural ordering (i.e., no comparator).
168 |      * This method handles the sorting for small sub-arrays and larger arrays without needing a custom comparator.
169 |      *
170 |      * @param array the array to be sorted.
171 |      * @param blkptr the starting index of the portion to be sorted.
172 |      * @param end the index where the sorting should end.
173 |      */
174 |     private static void sortNoComparator(int[] array, int blkptr, int end) {
175 |         // Shortcut for 2-element sub-array
176 |         if (end == blkptr + 1) {
177 |             if (array[blkptr] > array[end]) {
178 |                 final int tmp = array[blkptr];
179 |                 array[blkptr] = array[end];
180 |                 array[end] = tmp;
181 |             }
182 |             return;
183 |         }
184 | 
185 |         // Shortcut for 3-element sub-array
186 |         if (end == blkptr + 2) {
187 |             final int a1 = array[blkptr];
188 |             final int a2 = array[blkptr + 1];
189 |             final int a3 = array[end];
190 | 
191 |             if (a1 <= a2) {
192 |                 if (a2 <= a3)
193 |                     return;
194 | 
195 |                 if (a3 <= a1) {
196 |                     array[blkptr] = a3;
197 |                     array[blkptr + 1] = a1;
198 |                     array[end] = a2;
199 |                     return;
200 |                 }
201 | 
202 |                 array[blkptr + 1] = a3;
203 |                 array[end] = a2;
204 |             } else {
205 |                 if (a1 <= a3) {
206 |                     array[blkptr] = a2;
207 |                     array[blkptr + 1] = a1;
208 |                     return;
209 |                 }
210 | 
211 |                 if (a3 <= a2) {
212 |                     array[blkptr] = a3;
213 |                     array[end] = a1;
214 |                     return;
215 |                 }
216 | 
217 |                 array[blkptr] = a2;
218 |                 array[blkptr + 1] = a3;
219 |                 array[end] = a1;
220 |             }
221 |             return;
222 |         }
223 | 
224 |         // Regular case for arrays with more than 3 elements
225 |         for (int i = blkptr; i < end; i++) {
226 |             final int val = array[i];
227 |             int j = i;
228 | 
229 |             while ((j > blkptr) && (array[j - 1] > val)) {
230 |                 array[j] = array[j - 1];
231 |                 j--;
232 |             }
233 | 
234 |             array[j] = val;
235 |         }
236 |     }
237 | }
238 | 
239 | 


--------------------------------------------------------------------------------
/java/src/main/java/io/github/flanglet/kanzi/util/sort/MergeSort.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2011-2025 Frederic Langlet
  3 |  Licensed under the Apache License, Version 2.0 (the "License");
  4 |  you may not use this file except in compliance with the License.
  5 |  you may obtain a copy of the License at
  6 | 
  7 |  http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 |  Unless required by applicable law or agreed to in writing, software
 10 |  distributed under the License is distributed on an "AS IS" BASIS,
 11 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  See the License for the specific language governing permissions and
 13 |  limitations under the License.
 14 |  */
 15 | package io.github.flanglet.kanzi.util.sort;
 16 | 
 17 | import io.github.flanglet.kanzi.IntSorter;
 18 | 
 19 | /**
 20 |  * The {@code MergeSort} class implements the merge sort algorithm, which is a divide-and-conquer comparison-based sorting
 21 |  * algorithm. Merge sort divides the input array into smaller sub-arrays, recursively sorts each sub-array, and then merges
 22 |  * the sorted sub-arrays back together. While conceptually simple, it is usually not very performant for smaller arrays due
 23 |  * to its recursive nature. However, merge sort is known for its stable sorting and predictable O(n log n) time complexity.
 24 |  *
 25 |  * <p>Merge sort is efficient for large datasets and nearly sorted data, but it can require significant memory overhead
 26 |  * due to the need for auxiliary space to store the merged sub-arrays. This implementation uses insertion sort for small
 27 |  * sub-arrays to improve performance on small or nearly sorted datasets.</p>
 28 |  *
 29 |  * <p>This class implements the {@code IntSorter} interface, which defines the {@code sort} method for sorting integer arrays.</p>
 30 |  */
 31 | public class MergeSort implements IntSorter {
 32 | 
 33 |    // Threshold for switching to insertion sort on small arrays
 34 |    private static final int SMALL_ARRAY_THRESHOLD = 32;
 35 | 
 36 |    // Temporary buffer for merging
 37 |    private int[] buffer;
 38 | 
 39 |    // Insertion sort used for small arrays
 40 |    private final IntSorter insertionSort;
 41 | 
 42 |    /**
 43 |     * Constructs a new {@code MergeSort} instance. This constructor initializes an empty buffer for merging and
 44 |     * uses an {@code InsertionSort} instance for sorting small arrays.
 45 |     */
 46 |    public MergeSort() {
 47 |       this.buffer = new int[0];
 48 |       this.insertionSort = new InsertionSort();
 49 |    }
 50 | 
 51 |    /**
 52 |     * Sorts the specified portion of the input array using the merge sort algorithm.
 53 |     *
 54 |     * <p>This method divides the array into smaller sub-arrays, recursively sorts them using merge sort, and then
 55 |     * merges the sorted sub-arrays back together. For small sub-arrays (less than {@code SMALL_ARRAY_THRESHOLD}), insertion
 56 |     * sort is used for efficiency.</p>
 57 |     *
 58 |     * @param data the array to be sorted.
 59 |     * @param start the starting index of the portion to be sorted.
 60 |     * @param count the number of elements to sort.
 61 |     * @return {@code true} if the sorting was successful, {@code false} if invalid parameters were provided (out-of-bounds indices).
 62 |     */
 63 |    @Override
 64 |    public boolean sort(int[] data, int start, int count) {
 65 |       if ((data == null) || (count < 0) || (start < 0))
 66 |          return false;
 67 | 
 68 |       if (start + count > data.length)
 69 |          return false;
 70 | 
 71 |       if (count < 2)
 72 |          return true;
 73 | 
 74 |       // Ensure buffer is large enough to hold the array
 75 |       if (this.buffer.length < count)
 76 |           this.buffer = new int[count];
 77 | 
 78 |       return this.mergesort(data, start, start + count - 1);
 79 |    }
 80 | 
 81 |    /**
 82 |     * Recursively performs merge sort on the specified sub-array.
 83 |     *
 84 |     * <p>This method splits the array into two halves and recursively sorts each half. Once the sub-arrays are sorted,
 85 |     * they are merged together using the {@code merge} method.</p>
 86 |     *
 87 |     * @param data the array to be sorted.
 88 |     * @param low the starting index of the sub-array to sort.
 89 |     * @param high the ending index of the sub-array to sort.
 90 |     * @return {@code true} if the sorting was successful.
 91 |     */
 92 |    private boolean mergesort(int[] data, int low, int high) {
 93 |       if (low < high) {
 94 |          int count = high - low + 1;
 95 | 
 96 |          // Use insertion sort for small sub-arrays
 97 |          if (count < SMALL_ARRAY_THRESHOLD)
 98 |             return this.insertionSort.sort(data, low, count);
 99 | 
100 |          int middle = low + count / 2;
101 |          this.mergesort(data, low, middle);
102 |          this.mergesort(data, middle + 1, high);
103 |          this.merge(data, low, middle, high);
104 |       }
105 | 
106 |       return true;
107 |    }
108 | 
109 |    /**
110 |     * Merges two sorted sub-arrays into one sorted array.
111 |     *
112 |     * <p>This method performs the merging step of merge sort. It copies the sorted elements from the left and right halves
113 |     * of the sub-array into a temporary buffer and then merges them back into the original array.</p>
114 |     *
115 |     * @param data the array containing the sub-arrays to merge.
116 |     * @param low the starting index of the left sub-array.
117 |     * @param middle the ending index of the left sub-array.
118 |     * @param high the ending index of the right sub-array.
119 |     */
120 |    private void merge(int[] data, int low, int middle, int high) {
121 |       int count = high - low + 1;
122 | 
123 |       // For small sub-arrays, copy the elements into the buffer
124 |       if (count < 16) {
125 |          for (int ii = low; ii <= high; ii++)
126 |             this.buffer[ii] = data[ii];
127 |       } else {
128 |          // For larger sub-arrays, use System.arraycopy for efficiency
129 |          System.arraycopy(data, low, this.buffer, low, count);
130 |       }
131 | 
132 |       int i = low;
133 |       int j = middle + 1;
134 |       int k = low;
135 | 
136 |       // Merge the two sorted sub-arrays
137 |       while ((i <= middle) && (j <= high)) {
138 |          if (this.buffer[i] <= this.buffer[j])
139 |             data[k] = this.buffer[i++];
140 |          else
141 |             data[k] = this.buffer[j++];
142 | 
143 |          k++;
144 |       }
145 | 
146 |       count = middle - i + 1;
147 | 
148 |       // Copy the remaining elements of the left sub-array, if any
149 |       if (count < 16) {
150 |          while (i <= middle)
151 |             data[k++] = this.buffer[i++];
152 |       } else {
153 |          // Use System.arraycopy for efficiency
154 |          System.arraycopy(this.buffer, i, data, k, count);
155 |       }
156 |    }
157 | }
158 | 
159 | 


--------------------------------------------------------------------------------