├── Readme.markdown ├── src ├── gunzip.java ├── gzip.java └── io │ └── nayuki │ └── deflate │ ├── DataFormatException.java │ ├── DeflaterOutputStream.java │ ├── GzipInputStream.java │ ├── GzipMetadata.java │ ├── GzipOutputStream.java │ ├── InflaterInputStream.java │ ├── MarkableFileInputStream.java │ ├── ZlibInputStream.java │ ├── ZlibMetadata.java │ ├── ZlibOutputStream.java │ ├── comp │ ├── BinarySplit.java │ ├── BitOutputStream.java │ ├── CountingBitOutputStream.java │ ├── Decision.java │ ├── Lz77Huffman.java │ ├── MultiStrategy.java │ ├── Strategy.java │ └── Uncompressed.java │ └── decomp │ ├── Closed.java │ ├── Open.java │ ├── State.java │ └── StickyException.java └── test └── io └── nayuki └── deflate ├── DeflaterOutputStreamTest.java ├── InflaterInputStreamTest.java └── StringInputStream.java /Readme.markdown: -------------------------------------------------------------------------------- 1 | DEFLATE library (Java) 2 | ====================== 3 | 4 | This project provides a correct and reasonably efficient DEFLATE decompressor in pure Java. 5 | It is an alternative to other Java libraries that wrap over native C code such as zlib. 6 | 7 | Home page with detailed description: [https://www.nayuki.io/page/deflate-library-java](https://www.nayuki.io/page/deflate-library-java) 8 | 9 | 10 | License 11 | ------- 12 | 13 | Copyright © 2023 Project Nayuki. (MIT License) 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy of 16 | this software and associated documentation files (the "Software"), to deal in 17 | the Software without restriction, including without limitation the rights to 18 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 19 | the Software, and to permit persons to whom the Software is furnished to do so, 20 | subject to the following conditions: 21 | 22 | * The above copyright notice and this permission notice shall be included in 23 | all copies or substantial portions of the Software. 24 | 25 | * The Software is provided "as is", without warranty of any kind, express or 26 | implied, including but not limited to the warranties of merchantability, 27 | fitness for a particular purpose and noninfringement. In no event shall the 28 | authors or copyright holders be liable for any claim, damages or other 29 | liability, whether in an action of contract, tort or otherwise, arising from, 30 | out of or in connection with the Software or the use or other dealings in the 31 | Software. 32 | -------------------------------------------------------------------------------- /src/gunzip.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | import java.io.File; 10 | import java.io.FileOutputStream; 11 | import java.io.IOException; 12 | import java.io.OutputStream; 13 | import java.time.Instant; 14 | import io.nayuki.deflate.GzipInputStream; 15 | import io.nayuki.deflate.GzipMetadata; 16 | import io.nayuki.deflate.MarkableFileInputStream; 17 | 18 | 19 | /** 20 | * Decompression application for the gzip file format. 21 | *

Usage: java gunzip InputFile.gz OutputFile

22 | *

This decompresses a single input gzip file into a single output file. The program also prints 23 | * some information to standard output, and error messages if the file is invalid/corrupt.

24 | */ 25 | public final class gunzip { 26 | 27 | public static void main(String[] args) { 28 | String msg = submain(args); 29 | if (msg != null) { 30 | System.err.println(msg); 31 | System.exit(1); 32 | } 33 | } 34 | 35 | 36 | // Returns null if successful, otherwise returns an error message string. 37 | private static String submain(String[] args) { 38 | // Check arguments 39 | if (args.length != 2) 40 | return "Usage: java gunzip InputFile.gz OutputFile"; 41 | 42 | var inFile = new File(args[0]); 43 | if (!inFile.exists()) 44 | return "Input path does not exist: " + inFile; 45 | if (inFile.isDirectory()) 46 | return "Input path is a directory: " + inFile; 47 | var outFile = new File(args[1]); 48 | if (outFile.isDirectory()) 49 | return "Output path is a directory: " + outFile; 50 | 51 | try (var in = new GzipInputStream(new MarkableFileInputStream(inFile))) { 52 | { 53 | GzipMetadata meta = in.getMetadata(); 54 | 55 | System.err.println("Last modified: " + meta.modificationTimeUnixS() 56 | .map(t -> Instant.EPOCH.plusSeconds(t).toString()).orElse("N/A")); 57 | 58 | int extraFlags = meta.extraFlags(); 59 | System.err.println("Extra flags: " + switch (extraFlags) { 60 | case 2 -> "Maximum compression"; 61 | case 4 -> "Fastest compression"; 62 | default -> "Unknown (" + extraFlags + ")"; 63 | }); 64 | 65 | System.err.println("Operating system: " + switch (meta.operatingSystem()) { 66 | case FAT_FILESYSTEM -> "FAT filesystem"; 67 | case AMIGA -> "Amiga"; 68 | case VMS -> "VMS"; 69 | case UNIX -> "Unix"; 70 | case VM_CMS -> "VM/CMS"; 71 | case ATARI_TOS -> "Atari TOS"; 72 | case HPFS_FILESYSTEM -> "HPFS filesystem"; 73 | case MACINTOSH -> "Macintosh"; 74 | case Z_SYSTEM -> "Z-System"; 75 | case CPM -> "CP/M"; 76 | case TOPS_20 -> "TOPS-20"; 77 | case NTFS_FILESYSTEM -> "NTFS filesystem"; 78 | case QDOS -> "QDOS"; 79 | case ACORN_RISCOS -> "Acorn RISCOS"; 80 | case UNKNOWN -> "Unknown"; 81 | default -> throw new AssertionError("Unreachable value"); 82 | }); 83 | 84 | System.err.println("File mode: " + (meta.isFileText() ? "Text" : "Binary")); 85 | 86 | meta.extraField().ifPresent(b -> 87 | System.err.println("Extra field: " + b.length + " bytes")); 88 | 89 | meta.fileName().ifPresent(s -> 90 | System.err.println("File name: " + s)); 91 | 92 | meta.comment().ifPresent(s -> 93 | System.err.println("Comment: " + s)); 94 | } 95 | 96 | // Start decompressing and writing output file 97 | long elapsedTime = -System.nanoTime(); 98 | try (OutputStream out = new FileOutputStream(outFile)) { 99 | in.transferTo(out); 100 | } 101 | elapsedTime += System.nanoTime(); 102 | System.err.printf("Input speed: %.2f MB/s%n", inFile.length() / 1e6 / elapsedTime * 1.0e9); 103 | System.err.printf("Output speed: %.2f MB/s%n", outFile.length() / 1e6 / elapsedTime * 1.0e9); 104 | 105 | } catch (IOException e) { 106 | return "I/O exception: " + e.getMessage(); 107 | } 108 | return null; 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/gzip.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | import java.io.File; 10 | import java.io.FileInputStream; 11 | import java.io.FileOutputStream; 12 | import java.io.IOException; 13 | import java.io.InputStream; 14 | import java.io.OutputStream; 15 | import java.util.Optional; 16 | import io.nayuki.deflate.GzipMetadata; 17 | import io.nayuki.deflate.GzipOutputStream; 18 | 19 | 20 | /** 21 | * Compression application for the gzip file format. 22 | *

Usage: java gzip InputFile OutputFile.gz

23 | *

This compresses a single input file into a single gzip output file.

24 | */ 25 | public final class gzip { 26 | 27 | public static void main(String[] args) { 28 | String msg = submain(args); 29 | if (msg != null) { 30 | System.err.println(msg); 31 | System.exit(1); 32 | } 33 | } 34 | 35 | 36 | // Returns null if successful, otherwise returns an error message string. 37 | private static String submain(String[] args) { 38 | // Check arguments 39 | if (args.length != 2) 40 | return "Usage: java gzip InputFile OutputFile.gz"; 41 | 42 | var inFile = new File(args[0]); 43 | if (!inFile.exists()) 44 | return "Input path does not exist: " + inFile; 45 | if (inFile.isDirectory()) 46 | return "Input path is a directory: " + inFile; 47 | var outFile = new File(args[1]); 48 | if (outFile.isDirectory()) 49 | return "Output path is a directory: " + outFile; 50 | 51 | // Create the metadata structure 52 | int modTime = (int)(inFile.lastModified() / 1000); 53 | var meta = new GzipMetadata( 54 | GzipMetadata.CompressionMethod.DEFLATE, 55 | false, 56 | modTime != 0 ? Optional.of(modTime) : Optional.empty(), 57 | 0, 58 | GzipMetadata.OperatingSystem.UNIX, 59 | Optional.empty(), 60 | Optional.of(inFile.getName()), 61 | Optional.empty(), 62 | true); 63 | 64 | // Start compressing and writing output file 65 | long elapsedTime = -System.nanoTime(); 66 | try (InputStream in = new FileInputStream(inFile); 67 | OutputStream out = new GzipOutputStream(new FileOutputStream(outFile), meta)) { 68 | in.transferTo(out); 69 | } catch (IOException e) { 70 | return "I/O exception: " + e.getMessage(); 71 | } 72 | elapsedTime += System.nanoTime(); 73 | System.err.printf("Input speed: %.2f MB/s%n", inFile.length() / 1e6 / elapsedTime * 1.0e9); 74 | System.err.printf("Output speed: %.2f MB/s%n", outFile.length() / 1e6 / elapsedTime * 1.0e9); 75 | 76 | return null; 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/DataFormatException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.util.Objects; 12 | 13 | 14 | @SuppressWarnings("serial") 15 | public final class DataFormatException extends RuntimeException { 16 | 17 | /*---- Field ----*/ 18 | 19 | private final Reason reason; 20 | 21 | 22 | /*---- Constructor ----*/ 23 | 24 | public DataFormatException(Reason rsn, String msg) { 25 | super(msg); 26 | reason = Objects.requireNonNull(rsn); 27 | } 28 | 29 | 30 | /*---- Function ----*/ 31 | 32 | /* 33 | * Always throws, never returns. Use this shorter form whenever possible: 34 | * DataFormatException.throwUnexpectedEnd(); 35 | * Otherwise if definite control flow manipulation is needed, then use: 36 | * int foo; 37 | * try { 38 | * foo = bar(); 39 | * } catch (EOFException e) { 40 | * throw DataFormatException.throwUnexpectedEnd(); 41 | * } 42 | * print(foo); 43 | */ 44 | public static DataFormatException throwUnexpectedEnd() { 45 | throw new DataFormatException( 46 | Reason.UNEXPECTED_END_OF_STREAM, 47 | "Unexpected end of stream"); 48 | } 49 | 50 | 51 | /*---- Method ----*/ 52 | 53 | public Reason getReason() { 54 | return reason; 55 | } 56 | 57 | 58 | 59 | /*---- Enumeration ----*/ 60 | 61 | public enum Reason { 62 | UNEXPECTED_END_OF_STREAM, 63 | RESERVED_BLOCK_TYPE, 64 | UNCOMPRESSED_BLOCK_LENGTH_MISMATCH, 65 | HUFFMAN_CODE_UNDER_FULL, 66 | HUFFMAN_CODE_OVER_FULL, 67 | NO_PREVIOUS_CODE_LENGTH_TO_COPY, 68 | CODE_LENGTH_CODE_OVER_FULL, 69 | END_OF_BLOCK_CODE_ZERO_LENGTH, 70 | RESERVED_LENGTH_SYMBOL, 71 | RESERVED_DISTANCE_SYMBOL, 72 | LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE, 73 | COPY_FROM_BEFORE_DICTIONARY_START, 74 | 75 | HEADER_CHECKSUM_MISMATCH, 76 | UNSUPPORTED_COMPRESSION_METHOD, 77 | DECOMPRESSED_CHECKSUM_MISMATCH, 78 | DECOMPRESSED_SIZE_MISMATCH, 79 | 80 | GZIP_INVALID_MAGIC_NUMBER, 81 | GZIP_RESERVED_FLAGS_SET, 82 | GZIP_UNSUPPORTED_OPERATING_SYSTEM, 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/DeflaterOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.BufferedOutputStream; 12 | import java.io.IOException; 13 | import java.io.OutputStream; 14 | import java.util.Objects; 15 | import io.nayuki.deflate.comp.BitOutputStream; 16 | import io.nayuki.deflate.comp.Decision; 17 | import io.nayuki.deflate.comp.Lz77Huffman; 18 | import io.nayuki.deflate.comp.Strategy; 19 | 20 | 21 | /** 22 | * Compresses a byte stream into a DEFLATE data stream 23 | * (raw format without zlib or gzip headers or footers). 24 | *

Currently only supports uncompressed blocks for simplicity, which actually 25 | * expands the data slightly, but still conforms to the DEFLATE format.

26 | *

This class performs its own buffering, so it is unnecessary to wrap a {@link 27 | * BufferedOutputStream} around the {@link OutputStream} given to the constructor.

28 | * @see InflaterInputStream 29 | */ 30 | public final class DeflaterOutputStream extends OutputStream { 31 | 32 | private static final int MAX_HISTORY_CAPACITY = 32 * 1024; 33 | 34 | 35 | private OutputStream output; 36 | private BitOut bitOutput; 37 | 38 | // [unused][history (historyLength)][data (dataLength)][unused] 39 | // ^0 ^historyStart combinedBuffer.length^ 40 | private byte[] combinedBuffer; 41 | private final int historyLookbehindLimit; 42 | private final int dataLookaheadLimit; 43 | private int historyStart = 0; 44 | private int historyLength = 0; 45 | private int dataLength = 0; 46 | 47 | private final Strategy strategy; 48 | 49 | 50 | public DeflaterOutputStream(OutputStream out) { 51 | this(out, 64 * 1024, MAX_HISTORY_CAPACITY, Lz77Huffman.RLE_DYNAMIC); 52 | } 53 | 54 | 55 | public DeflaterOutputStream(OutputStream out, int dataLookaheadLimit, int historyLookbehindLimit, Strategy strat) { 56 | output = Objects.requireNonNull(out); 57 | bitOutput = new BitOut(); 58 | if (dataLookaheadLimit < 1 || historyLookbehindLimit < 0 || historyLookbehindLimit > MAX_HISTORY_CAPACITY 59 | || (long)dataLookaheadLimit + historyLookbehindLimit > Integer.MAX_VALUE) 60 | throw new IllegalArgumentException("Invalid capacities"); 61 | combinedBuffer = new byte[historyLookbehindLimit + Math.max(dataLookaheadLimit, historyLookbehindLimit)]; 62 | this.historyLookbehindLimit = historyLookbehindLimit; 63 | this.dataLookaheadLimit = dataLookaheadLimit; 64 | strategy = Objects.requireNonNull(strat); 65 | } 66 | 67 | 68 | 69 | OutputStream getUnderlyingStream() { 70 | if (output == null) 71 | throw new IllegalStateException("Stream already closed"); 72 | return output; 73 | } 74 | 75 | 76 | @Override public void write(int b) throws IOException { 77 | if (bitOutput == null) 78 | throw new IllegalStateException("Stream already ended"); 79 | if (dataLength >= dataLookaheadLimit) 80 | writeBuffer(false); 81 | combinedBuffer[historyStart + historyLength + dataLength] = (byte)b; 82 | dataLength++; 83 | } 84 | 85 | 86 | @Override public void write(byte[] b, int off, int len) throws IOException { 87 | if (bitOutput == null) 88 | throw new IllegalStateException("Stream already ended"); 89 | Objects.checkFromIndexSize(off, len, b.length); 90 | while (len > 0) { 91 | if (dataLength >= dataLookaheadLimit) 92 | writeBuffer(false); 93 | int n = Math.min(len, dataLookaheadLimit - dataLength); 94 | System.arraycopy(b, off, combinedBuffer, historyStart + historyLength + dataLength, n); 95 | off += n; 96 | len -= n; 97 | dataLength += n; 98 | } 99 | } 100 | 101 | 102 | public void finish() throws IOException { 103 | if (bitOutput == null) 104 | throw new IllegalStateException("Stream already ended"); 105 | writeBuffer(true); 106 | bitOutput.finish(); 107 | bitOutput = null; 108 | } 109 | 110 | 111 | @Override public void close() throws IOException { 112 | if (bitOutput != null) 113 | finish(); 114 | output.close(); 115 | output = null; 116 | } 117 | 118 | 119 | private void writeBuffer(boolean isFinal) throws IOException { 120 | if (bitOutput == null) 121 | throw new IllegalStateException("Stream already ended"); 122 | 123 | Decision dec = strategy.decide(combinedBuffer, historyStart, historyLength, dataLength); 124 | dec.compressTo(bitOutput, isFinal); 125 | if (isFinal) 126 | return; 127 | 128 | int dataEnd = historyStart + historyLength + dataLength; 129 | historyLength = Math.min(historyLength + dataLength, historyLookbehindLimit); 130 | dataLength = 0; 131 | if (combinedBuffer.length - dataEnd >= dataLookaheadLimit) 132 | historyStart = dataEnd - historyLength; 133 | else { 134 | System.arraycopy(combinedBuffer, dataEnd - historyLength, combinedBuffer, 0, historyLength); 135 | historyStart = 0; 136 | } 137 | } 138 | 139 | 140 | 141 | private final class BitOut implements BitOutputStream { 142 | 143 | private long bitBuffer = 0; 144 | private int bitBufferLength = 0; 145 | 146 | 147 | @Override public void writeBits(int value, int numBits) throws IOException { 148 | assert 0 <= numBits && numBits <= 31 && value >>> numBits == 0; 149 | if (numBits > 64 - bitBufferLength) { 150 | for (; bitBufferLength >= 8; bitBufferLength -= 8, bitBuffer >>>= 8) 151 | output.write((byte)bitBuffer); 152 | } 153 | assert numBits <= 64 - bitBufferLength; 154 | bitBuffer |= (long)value << bitBufferLength; 155 | bitBufferLength += numBits; 156 | } 157 | 158 | 159 | @Override public int getBitPosition() { 160 | return bitBufferLength % 8; 161 | } 162 | 163 | 164 | public void finish() throws IOException { 165 | writeBits(0, (8 - getBitPosition()) % 8); 166 | for (; bitBufferLength >= 8; bitBufferLength -= 8, bitBuffer >>>= 8) 167 | output.write((byte)bitBuffer); 168 | assert bitBufferLength == 0; 169 | } 170 | 171 | } 172 | 173 | } 174 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/GzipInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.BufferedInputStream; 12 | import java.io.DataInput; 13 | import java.io.DataInputStream; 14 | import java.io.EOFException; 15 | import java.io.IOException; 16 | import java.io.InputStream; 17 | import java.util.Objects; 18 | import java.util.zip.CRC32; 19 | import io.nayuki.deflate.DataFormatException.Reason; 20 | 21 | 22 | public final class GzipInputStream extends InputStream { 23 | 24 | /*---- Fields ----*/ 25 | 26 | private InputStream rawInput; 27 | private InputStream decompressedInput; 28 | 29 | private final GzipMetadata metadata; 30 | 31 | private long decompressedLength = 0; 32 | private CRC32 checksum = new CRC32(); 33 | 34 | 35 | 36 | /*---- Constructor ----*/ 37 | 38 | public GzipInputStream(InputStream in) throws IOException { 39 | Objects.requireNonNull(in); 40 | metadata = GzipMetadata.read(in); 41 | if (!in.markSupported()) 42 | in = new BufferedInputStream(in); 43 | rawInput = in; 44 | decompressedInput = new InflaterInputStream(in, true); 45 | } 46 | 47 | 48 | 49 | /*---- Methods ----*/ 50 | 51 | public GzipMetadata getMetadata() { 52 | return metadata; 53 | } 54 | 55 | 56 | @Override public int read() throws IOException { 57 | var b = new byte[1]; 58 | return switch (read(b)) { 59 | case 1 -> b[0] & 0xFF; 60 | case -1 -> -1; // EOF 61 | default -> throw new AssertionError("Unreachable value"); 62 | }; 63 | } 64 | 65 | 66 | @Override public int read(byte[] b, int off, int len) throws IOException { 67 | if (decompressedInput == null) 68 | return -1; 69 | int result = decompressedInput.read(b, off, len); 70 | if (result != -1) { 71 | decompressedLength += result; 72 | checksum.update(b, off, result); 73 | } else { 74 | decompressedInput = null; 75 | int expectChecksum, expectLength; 76 | DataInput din = new DataInputStream(rawInput); 77 | try { 78 | expectChecksum = Integer.reverseBytes(din.readInt()); 79 | expectLength = Integer.reverseBytes(din.readInt()); 80 | } catch (EOFException e) { 81 | throw DataFormatException.throwUnexpectedEnd(); 82 | } 83 | if ((int)checksum.getValue() != expectChecksum) 84 | throw new DataFormatException(Reason.DECOMPRESSED_CHECKSUM_MISMATCH, "Decompression CRC-32 mismatch"); 85 | checksum = null; 86 | if ((int)decompressedLength != expectLength) 87 | throw new DataFormatException(Reason.DECOMPRESSED_SIZE_MISMATCH, "Decompressed size mismatch"); 88 | } 89 | return result; 90 | } 91 | 92 | 93 | @Override public void close() throws IOException { 94 | rawInput.close(); 95 | rawInput = null; 96 | decompressedInput = null; 97 | checksum = null; 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/GzipMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.ByteArrayOutputStream; 12 | import java.io.DataInput; 13 | import java.io.DataInputStream; 14 | import java.io.DataOutput; 15 | import java.io.DataOutputStream; 16 | import java.io.EOFException; 17 | import java.io.IOException; 18 | import java.io.InputStream; 19 | import java.io.OutputStream; 20 | import java.nio.charset.StandardCharsets; 21 | import java.util.BitSet; 22 | import java.util.Objects; 23 | import java.util.Optional; 24 | import java.util.zip.CRC32; 25 | import java.util.zip.CheckedInputStream; 26 | import java.util.zip.CheckedOutputStream; 27 | import io.nayuki.deflate.DataFormatException.Reason; 28 | 29 | 30 | public record GzipMetadata( 31 | CompressionMethod compressionMethod, 32 | boolean isFileText, 33 | Optional modificationTimeUnixS, 34 | int extraFlags, 35 | OperatingSystem operatingSystem, 36 | Optional extraField, 37 | Optional fileName, 38 | Optional comment, 39 | boolean hasHeaderCrc) { 40 | 41 | 42 | /*---- Constructor ----*/ 43 | 44 | public GzipMetadata { 45 | Objects.requireNonNull(compressionMethod); 46 | 47 | Objects.requireNonNull(modificationTimeUnixS); 48 | modificationTimeUnixS.ifPresent(x -> { 49 | if (x == 0) 50 | throw new IllegalArgumentException("Modification timestamp is zero"); 51 | }); 52 | 53 | if (extraFlags >>> 8 != 0) 54 | throw new IllegalArgumentException("Invalid extra flags value"); 55 | 56 | Objects.requireNonNull(operatingSystem); 57 | 58 | Objects.requireNonNull(extraField); 59 | extraField.ifPresent(b -> { 60 | if (b.length > 0xFFFF) 61 | throw new IllegalArgumentException("Extra field too long"); 62 | }); 63 | 64 | Objects.requireNonNull(fileName); 65 | 66 | Objects.requireNonNull(comment); 67 | } 68 | 69 | 70 | 71 | /*---- Static factory ----*/ 72 | 73 | public static GzipMetadata read(InputStream in) throws IOException { 74 | Objects.requireNonNull(in); 75 | var in1 = new CheckedInputStream(in, new CRC32()); 76 | DataInput in2 = new DataInputStream(in1); 77 | 78 | try { 79 | // -- Read and process 10-byte header -- 80 | if (in2.readUnsignedShort() != 0x1F8B) 81 | throw new DataFormatException(Reason.GZIP_INVALID_MAGIC_NUMBER, "Invalid GZIP magic number"); 82 | 83 | int compMethodInt = in2.readUnsignedByte(); 84 | if (compMethodInt != 8) 85 | throw new DataFormatException(Reason.UNSUPPORTED_COMPRESSION_METHOD, "Unsupported compression method: " + compMethodInt); 86 | CompressionMethod compMethod = CompressionMethod.DEFLATE; 87 | 88 | var flagByte = new byte[1]; 89 | in2.readFully(flagByte); 90 | BitSet flags = BitSet.valueOf(flagByte); 91 | 92 | // Reserved flags 93 | if (flags.get(5) || flags.get(6) || flags.get(7)) 94 | throw new DataFormatException(Reason.GZIP_RESERVED_FLAGS_SET, "Reserved flags are set"); 95 | 96 | // Modification time 97 | int modTimeInt = Integer.reverseBytes(in2.readInt()); 98 | Optional modTime = modTimeInt != 0 ? Optional.of(modTimeInt) : Optional.empty(); 99 | 100 | // Extra flags 101 | int extraFlags = in2.readUnsignedByte(); 102 | 103 | // Operating system 104 | int operSystemInt = in2.readUnsignedByte(); 105 | OperatingSystem operSystem; 106 | if (operSystemInt < OperatingSystem.UNKNOWN.ordinal()) 107 | operSystem = OperatingSystem.values()[operSystemInt]; 108 | else if (operSystemInt == 0xFF) 109 | operSystem = OperatingSystem.UNKNOWN; 110 | else 111 | throw new DataFormatException(Reason.GZIP_UNSUPPORTED_OPERATING_SYSTEM, "Unsupported operating system value"); 112 | 113 | // -- Handle assorted flags and read more data -- 114 | boolean isFileText = flags.get(0); 115 | 116 | Optional extraField = Optional.empty(); 117 | if (flags.get(2)) { 118 | int len = Integer.reverseBytes(in2.readShort()) >>> 16; 119 | var b = new byte[len]; 120 | in2.readFully(b); 121 | extraField = Optional.of(b); 122 | } 123 | 124 | Optional fileName = Optional.empty(); 125 | if (flags.get(3)) 126 | fileName = Optional.of(readNullTerminatedString(in2)); 127 | 128 | Optional comment = Optional.empty(); 129 | if (flags.get(4)) 130 | comment = Optional.of(readNullTerminatedString(in2)); 131 | 132 | boolean hasHeaderCrc = flags.get(1); 133 | if (hasHeaderCrc) { 134 | int expect = (int)in1.getChecksum().getValue() & 0xFFFF; 135 | int actual = Integer.reverseBytes(in2.readShort()) >>> 16; 136 | if (actual != expect) 137 | throw new DataFormatException(Reason.HEADER_CHECKSUM_MISMATCH, "Header CRC-16 mismatch"); 138 | } 139 | 140 | return new GzipMetadata(compMethod, isFileText, modTime, extraFlags, 141 | operSystem, extraField, fileName, comment, hasHeaderCrc); 142 | 143 | } catch (EOFException e) { 144 | throw DataFormatException.throwUnexpectedEnd(); 145 | } 146 | } 147 | 148 | 149 | private static String readNullTerminatedString(DataInput in) throws IOException { 150 | var bout = new ByteArrayOutputStream(); 151 | while (true) { 152 | byte b = in.readByte(); 153 | if (b == 0) 154 | break; 155 | bout.write(b); 156 | } 157 | return new String(bout.toByteArray(), StandardCharsets.ISO_8859_1); 158 | } 159 | 160 | 161 | 162 | /*---- Method ----*/ 163 | 164 | public void write(OutputStream out) throws IOException { 165 | Objects.requireNonNull(out); 166 | var out1 = new CheckedOutputStream(out, new CRC32()); 167 | DataOutput out2 = new DataOutputStream(out1); 168 | 169 | out2.writeShort(0x1F8B); 170 | 171 | out2.writeByte(switch (compressionMethod) { 172 | case DEFLATE -> 8; 173 | default -> throw new AssertionError("Unreachable value"); 174 | }); 175 | 176 | boolean[] flags = { 177 | isFileText, 178 | hasHeaderCrc, 179 | extraField.isPresent(), 180 | fileName.isPresent(), 181 | comment.isPresent(), 182 | }; 183 | assert flags.length <= 8; 184 | int flagByte = 0; 185 | for (int i = 0; i < flags.length; i++) 186 | flagByte |= (flags[i] ? 1 : 0) << i; 187 | out2.writeByte(flagByte); 188 | 189 | out2.writeInt(Integer.reverseBytes(modificationTimeUnixS.orElse(0))); 190 | 191 | out2.writeByte(extraFlags); 192 | 193 | out2.writeByte(switch (operatingSystem) { 194 | case UNKNOWN -> 0xFF; 195 | default -> operatingSystem.ordinal(); 196 | }); 197 | 198 | if (extraField.isPresent()) { 199 | byte[] b = extraField.get(); 200 | out2.writeShort(Integer.reverseBytes(b.length) >>> 16); 201 | out2.write(b); 202 | } 203 | 204 | if (fileName.isPresent()) 205 | out2.write((fileName.get() + "\0").getBytes(StandardCharsets.ISO_8859_1)); 206 | 207 | if (comment.isPresent()) 208 | out2.write((comment.get() + "\0").getBytes(StandardCharsets.ISO_8859_1)); 209 | 210 | if (hasHeaderCrc) 211 | out2.writeShort(Integer.reverseBytes((int)out1.getChecksum().getValue()) >>> 16); 212 | } 213 | 214 | 215 | 216 | /*---- Enumerations ----*/ 217 | 218 | public enum CompressionMethod { 219 | DEFLATE, 220 | } 221 | 222 | 223 | public enum OperatingSystem { 224 | FAT_FILESYSTEM, 225 | AMIGA, 226 | VMS, 227 | UNIX, 228 | VM_CMS, 229 | ATARI_TOS, 230 | HPFS_FILESYSTEM, 231 | MACINTOSH, 232 | Z_SYSTEM, 233 | CPM, 234 | TOPS_20, 235 | NTFS_FILESYSTEM, 236 | QDOS, 237 | ACORN_RISCOS, 238 | 239 | UNKNOWN, 240 | } 241 | 242 | } 243 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/GzipOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.DataOutput; 12 | import java.io.DataOutputStream; 13 | import java.io.IOException; 14 | import java.io.OutputStream; 15 | import java.util.Objects; 16 | import java.util.zip.CRC32; 17 | 18 | 19 | public final class GzipOutputStream extends OutputStream { 20 | 21 | /*---- Fields ----*/ 22 | 23 | private DeflaterOutputStream output; 24 | 25 | private CRC32 checksum = new CRC32(); 26 | private long uncompressedLength = 0; 27 | 28 | 29 | 30 | /*---- Constructors ----*/ 31 | 32 | public GzipOutputStream(OutputStream out, GzipMetadata meta) throws IOException { 33 | this(new DeflaterOutputStream(out), meta); 34 | } 35 | 36 | 37 | public GzipOutputStream(DeflaterOutputStream out, GzipMetadata meta) throws IOException { 38 | Objects.requireNonNull(out); 39 | Objects.requireNonNull(meta); 40 | meta.write(out.getUnderlyingStream()); 41 | output = out; 42 | } 43 | 44 | 45 | 46 | /*---- Methods ----*/ 47 | 48 | @Override public void write(int b) throws IOException { 49 | write(new byte[]{(byte)b}); 50 | } 51 | 52 | 53 | @Override public void write(byte[] b, int off, int len) throws IOException { 54 | if (checksum == null) 55 | throw new IllegalStateException("Stream already ended"); 56 | output.write(b, off, len); 57 | checksum.update(b, off, len); 58 | uncompressedLength += len; 59 | } 60 | 61 | 62 | public void finish() throws IOException { 63 | if (checksum == null) 64 | throw new IllegalStateException("Stream already ended"); 65 | output.finish(); 66 | DataOutput dout = new DataOutputStream(output.getUnderlyingStream()); 67 | dout.writeInt(Integer.reverseBytes((int)checksum.getValue())); 68 | checksum = null; 69 | dout.writeInt(Integer.reverseBytes((int)uncompressedLength)); 70 | } 71 | 72 | 73 | @Override public void close() throws IOException { 74 | if (checksum != null) 75 | finish(); 76 | output.close(); 77 | output = null; 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/InflaterInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.IOException; 12 | import java.io.InputStream; 13 | import java.util.Objects; 14 | import io.nayuki.deflate.decomp.Closed; 15 | import io.nayuki.deflate.decomp.Open; 16 | import io.nayuki.deflate.decomp.State; 17 | import io.nayuki.deflate.decomp.StickyException; 18 | 19 | 20 | /** 21 | * Decompresses a DEFLATE data stream (raw format without zlib or gzip headers or footers) into 22 | * a byte stream. Objects only use memory and no operating system resources, so it is safe to discard 23 | * these objects without calling {@link #close()} in order to continue using the underlying streams. 24 | * @see DeflaterOutputStream 25 | */ 26 | public final class InflaterInputStream extends InputStream { 27 | 28 | /*---- Field ----*/ 29 | 30 | private State state; 31 | 32 | 33 | 34 | /*---- Constructors ----*/ 35 | 36 | /** 37 | * Constructs an inflater input stream over the specified underlying input stream. The underlying 38 | * stream must contain DEFLATE-compressed data with no headers or footers (e.g. must be unwrapped 39 | * from the zlib or gzip container formats). When this inflater stream reaches the end, the 40 | * underlying stream will be at an unspecified position at or after the end of the DEFLATE data. 41 | * @param in the underlying input stream of raw DEFLATE-compressed data 42 | * @throws NullPointerException if the input stream is {@code null} 43 | */ 44 | public InflaterInputStream(InputStream in) { 45 | this(in, false); 46 | } 47 | 48 | 49 | /** 50 | * Constructs an inflater input stream over the specified underlying input stream, 51 | * and with the specified option for ending exactly. The underlying stream must 52 | * contain DEFLATE-compressed data with no headers or footers (e.g. must be unwrapped 53 | * from the zlib or gzip container formats). If ending exactly is requested, then 54 | * the underlying stream must support marking, and when this inflater stream reaches 55 | * the end, the underlying stream will be foremost byte position after the end of the 56 | * DEFLATE data. Otherwise (not ending exactly) when this inflater stream reaches the 57 | * end, the underlying stream will be at an unspecified position at or after the end 58 | * of the DEFLATE data. For end-exactly to be useful, discard this inflater stream 59 | * without calling {@link #close()} so that the underlying stream can still be used. 60 | * @param in the underlying input stream of raw DEFLATE-compressed data 61 | * @param endExactly whether to position the underlying stream at the exact 62 | * position after the end of the DEFLATE data when this inflater stream ends 63 | * @throws NullPointerException if the input stream is {@code null} 64 | * @throws IllegalArgumentException if {@code endExactly 65 | * == true} but {@code in.markSupported() == false} 66 | */ 67 | public InflaterInputStream(InputStream in, boolean endExactly) { 68 | this(in, endExactly, DEFAULT_INPUT_BUFFER_SIZE); 69 | } 70 | 71 | 72 | private static final int DEFAULT_INPUT_BUFFER_SIZE = 16 * 1024; 73 | 74 | 75 | /** 76 | * Constructs an inflater input stream over the specified underlying input stream, 77 | * with the specified options for ending exactly and input buffer size. The underlying 78 | * stream must contain DEFLATE-compressed data with no headers or footers (e.g. must 79 | * be unwrapped from the zlib or gzip container formats). If ending exactly is 80 | * requested, then the underlying stream must support marking, and when this inflater 81 | * stream reaches the end, the underlying stream will be foremost byte position after 82 | * the end of the DEFLATE data. Otherwise (not ending exactly) when this inflater 83 | * stream reaches the end, the underlying stream will be at an unspecified position 84 | * at or after the end of the DEFLATE data. For end-exactly to be useful, discard this 85 | * inflater stream without calling {@link #close()} so that the underlying stream can 86 | * still be used. 87 | * @param in the underlying input stream of raw DEFLATE-compressed data (not {@code null}) 88 | * @param endExactly whether to position the underlying stream at the exact 89 | * position after the end of the DEFLATE data when this inflater stream ends 90 | * @param inBufLen the size of the internal read buffer, which must be positive 91 | * @throws NullPointerException if the input stream is {@code null} 92 | * @throws IllegalArgumentException if {@code inBufLen < 1} 93 | * @throws IllegalArgumentException if {@code endExactly 94 | * == true} but {@code in.markSupported() == false} 95 | */ 96 | public InflaterInputStream(InputStream in, boolean endExactly, int inBufLen) { 97 | Objects.requireNonNull(in); 98 | if (inBufLen <= 0) 99 | throw new IllegalArgumentException("Non-positive input buffer size"); 100 | if (endExactly) { 101 | if (!in.markSupported()) 102 | throw new IllegalArgumentException("Input stream not markable, cannot support detachment"); 103 | in.mark(0); 104 | } 105 | state = new Open(in, endExactly, inBufLen); 106 | } 107 | 108 | 109 | 110 | /*---- Methods ----*/ 111 | 112 | /** 113 | * Reads the next byte of decompressed data from this stream. If data is 114 | * available then a number in the range [0, 255] is returned (blocking if 115 | * necessary); otherwise −1 is returned if the end of stream is reached. 116 | * @return the next unsigned byte of data, or −1 for the end of stream 117 | * @throws IOException if an I/O exception occurs in the underlying input stream, the end 118 | * of stream occurs at an unexpected position, or the compressed data has a format error 119 | * @throws IllegalStateException if the stream has already been closed 120 | */ 121 | @Override public int read() throws IOException { 122 | // In theory this method for reading a single byte could be implemented somewhat faster. 123 | // We could take the logic of read(byte[],int,int) and simplify it for the special case 124 | // of handling one byte. But if the caller chose to use this read() method instead of 125 | // the bulk read(byte[]) method, then they have already chosen to not care about speed. 126 | // Therefore speeding up this method would result in needless complexity. Instead, 127 | // we chose to optimize this method for simplicity and ease of verifying correctness. 128 | var b = new byte[1]; 129 | return switch (read(b)) { 130 | case 1 -> b[0] & 0xFF; 131 | case -1 -> -1; // EOF 132 | default -> throw new AssertionError("Unreachable value"); 133 | }; 134 | } 135 | 136 | 137 | /** 138 | * Reads some bytes from the decompressed data of this stream into the specified array's 139 | * subrange. This returns the number of data bytes that were stored into the array, and 140 | * is in the range [−1, len]. A return value of 0 is allowed iff {@code len} is 0. 141 | * @throws NullPointerException if the array is {@code null} 142 | * @throws ArrayIndexOutOfBoundsException if the array subrange is out of bounds 143 | * @throws IOException if an I/O exception occurs in the underlying input stream, the end 144 | * of stream occurs at an unexpected position, or the compressed data has a format error 145 | * @throws IllegalStateException if the stream has already been closed 146 | */ 147 | @Override public int read(byte[] b, int off, int len) throws IOException { 148 | // Check arguments and state 149 | Objects.requireNonNull(b); 150 | Objects.checkFromIndexSize(off, len, b.length); 151 | if (state instanceof Open st) { 152 | try { 153 | return st.read(b, off, len); 154 | } catch (IOException e) { 155 | state = new StickyException(st.input, e); 156 | throw e; 157 | } 158 | } else if (state instanceof StickyException st) 159 | throw st.exception(); 160 | else if (state instanceof Closed) 161 | throw new IllegalStateException("Stream already closed"); 162 | else 163 | throw new AssertionError("Unreachable type"); 164 | } 165 | 166 | 167 | /** 168 | * Closes this input stream and the underlying stream. It is illegal 169 | * to call {@link #read()} or {@link #detach()} after closing. It is 170 | * idempotent to call this {@link #close()} method more than once. 171 | * @throws IOException if an I/O exception occurs in the underlying stream 172 | */ 173 | @Override public void close() throws IOException { 174 | if (state instanceof Open st) 175 | st.close(); 176 | else if (state instanceof StickyException st) 177 | st.input().close(); 178 | state = Closed.SINGLETON; 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/MarkableFileInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.File; 12 | import java.io.FileNotFoundException; 13 | import java.io.IOException; 14 | import java.io.InputStream; 15 | import java.io.RandomAccessFile; 16 | 17 | 18 | public final class MarkableFileInputStream extends InputStream { 19 | 20 | private final RandomAccessFile raf; 21 | private long markPosition; 22 | 23 | 24 | 25 | public MarkableFileInputStream(String path) throws FileNotFoundException { 26 | this(new File(path)); 27 | } 28 | 29 | 30 | public MarkableFileInputStream(File file) throws FileNotFoundException { 31 | raf = new RandomAccessFile(file, "r"); 32 | markPosition = -1; 33 | } 34 | 35 | 36 | 37 | @Override public int read() throws IOException { 38 | return raf.read(); 39 | } 40 | 41 | 42 | @Override public int read(byte[] b, int off, int len) throws IOException { 43 | return raf.read(b, off, len); 44 | } 45 | 46 | 47 | @Override public boolean markSupported() { 48 | return true; 49 | } 50 | 51 | 52 | @Override public void mark(int readLimit) { 53 | try { 54 | markPosition = raf.getFilePointer(); 55 | } catch (IOException e) { 56 | throw new RuntimeException(e); 57 | } 58 | } 59 | 60 | 61 | // Normally after a reset(), rereading the same file section will yield the same bytes. 62 | // But this is not always true - e.g. due to concurrent writing. Thus this class does not 63 | // provide a hard guarantee for the mark()/reset() behavior like BufferedInputStream does. 64 | @Override public void reset() { 65 | try { 66 | raf.seek(markPosition); 67 | } catch (IOException e) { 68 | throw new RuntimeException(e); 69 | } 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/ZlibInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.BufferedInputStream; 12 | import java.io.DataInputStream; 13 | import java.io.EOFException; 14 | import java.io.IOException; 15 | import java.io.InputStream; 16 | import java.util.Objects; 17 | import java.util.zip.Adler32; 18 | import io.nayuki.deflate.DataFormatException.Reason; 19 | 20 | 21 | public final class ZlibInputStream extends InputStream { 22 | 23 | /*---- Fields ----*/ 24 | 25 | private InputStream rawInput; 26 | private InputStream decompressedInput; 27 | 28 | private final ZlibMetadata metadata; 29 | 30 | private Adler32 checksum = new Adler32(); 31 | 32 | 33 | 34 | /*---- Constructor ----*/ 35 | 36 | public ZlibInputStream(InputStream in) throws IOException { 37 | Objects.requireNonNull(in); 38 | metadata = ZlibMetadata.read(in); 39 | if (!in.markSupported()) 40 | in = new BufferedInputStream(in); 41 | rawInput = in; 42 | decompressedInput = new InflaterInputStream(in, true); 43 | } 44 | 45 | 46 | 47 | /*---- Methods ----*/ 48 | 49 | public ZlibMetadata getMetadata() { 50 | return metadata; 51 | } 52 | 53 | 54 | @Override public int read() throws IOException { 55 | var b = new byte[1]; 56 | return switch (read(b)) { 57 | case 1 -> b[0] & 0xFF; 58 | case -1 -> -1; // EOF 59 | default -> throw new AssertionError("Unreachable value"); 60 | }; 61 | } 62 | 63 | 64 | @Override public int read(byte[] b, int off, int len) throws IOException { 65 | if (decompressedInput == null) 66 | return -1; 67 | int result = decompressedInput.read(b, off, len); 68 | if (result != -1) 69 | checksum.update(b, off, result); 70 | else { 71 | decompressedInput = null; 72 | int expectChecksum; 73 | try { 74 | expectChecksum = new DataInputStream(rawInput).readInt(); 75 | } catch (EOFException e) { 76 | throw DataFormatException.throwUnexpectedEnd(); 77 | } 78 | if ((int)checksum.getValue() != expectChecksum) 79 | throw new DataFormatException(Reason.DECOMPRESSED_CHECKSUM_MISMATCH, "Decompression Adler-32 mismatch"); 80 | checksum = null; 81 | } 82 | return result; 83 | } 84 | 85 | 86 | @Override public void close() throws IOException { 87 | rawInput.close(); 88 | rawInput = null; 89 | decompressedInput = null; 90 | checksum = null; 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/ZlibMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.IOException; 12 | import java.io.InputStream; 13 | import java.io.OutputStream; 14 | import java.util.Objects; 15 | import java.util.Optional; 16 | import io.nayuki.deflate.DataFormatException.Reason; 17 | 18 | 19 | public record ZlibMetadata( 20 | CompressionMethod compressionMethod, 21 | int compressionInfo, // Uint4 22 | Optional presetDictionary, 23 | CompressionLevel compressionLevel) { 24 | 25 | 26 | /*---- Constructor ----*/ 27 | 28 | public ZlibMetadata { 29 | Objects.requireNonNull(compressionMethod); 30 | 31 | if (compressionInfo >>> 4 != 0 || compressionMethod == CompressionMethod.DEFLATE && compressionInfo > 7) 32 | throw new IllegalArgumentException("Invalid compression info value"); 33 | 34 | Objects.requireNonNull(presetDictionary); 35 | 36 | Objects.requireNonNull(compressionLevel); 37 | } 38 | 39 | 40 | public static final ZlibMetadata DEFAULT = new ZlibMetadata( 41 | CompressionMethod.DEFLATE, 7, Optional.empty(), CompressionLevel.DEFAULT); 42 | 43 | 44 | 45 | /*---- Static factory ----*/ 46 | 47 | public static ZlibMetadata read(InputStream in) throws IOException { 48 | Objects.requireNonNull(in); 49 | int cmf = in.read(); 50 | int flg = in.read(); 51 | if (flg == -1) 52 | DataFormatException.throwUnexpectedEnd(); 53 | if ((cmf << 8 | flg) % CHECKSUM_MODULUS != 0) 54 | throw new DataFormatException(Reason.HEADER_CHECKSUM_MISMATCH, "Header checksum mismatch"); 55 | 56 | int compMethodInt = cmf & 0xF; 57 | CompressionMethod compMethod = switch (compMethodInt) { 58 | case 8 -> CompressionMethod.DEFLATE; 59 | case 15 -> CompressionMethod.RESERVED; 60 | default -> throw new DataFormatException(Reason.UNSUPPORTED_COMPRESSION_METHOD, "Unsupported compression method: " + compMethodInt); 61 | }; 62 | 63 | int compInfo = cmf >>> 4; 64 | 65 | Optional presetDict = Optional.empty(); 66 | if (((flg >>> 5) & 1) != 0) { 67 | int val = 0; 68 | for (int i = 0; i < 4; i++) { 69 | int b = in.read(); 70 | if (b == -1) 71 | DataFormatException.throwUnexpectedEnd(); 72 | val = (val << 8) | b; 73 | } 74 | presetDict = Optional.of(val); 75 | } 76 | 77 | CompressionLevel compLevel = CompressionLevel.values()[flg >>> 6]; 78 | 79 | return new ZlibMetadata(compMethod, compInfo, presetDict, compLevel); 80 | } 81 | 82 | 83 | 84 | /*---- Method ----*/ 85 | 86 | public void write(OutputStream out) throws IOException { 87 | Objects.requireNonNull(out); 88 | 89 | int compMethodInt = switch (compressionMethod) { 90 | case DEFLATE -> 8; 91 | case RESERVED -> 15; 92 | }; 93 | int cmf = (compMethodInt << 0) | (compressionInfo << 4); 94 | int flg = ((presetDictionary.isPresent() ? 1 : 0) << 5) | (compressionLevel.ordinal() << 6); 95 | flg |= (CHECKSUM_MODULUS - (cmf << 8 | flg) % CHECKSUM_MODULUS) % CHECKSUM_MODULUS; 96 | 97 | out.write(cmf); 98 | out.write(flg); 99 | if (presetDictionary.isPresent()) { 100 | int val = presetDictionary.get(); 101 | for (int i = 3; i >= 0; i--) 102 | out.write(val >>> (i * 8)); 103 | } 104 | } 105 | 106 | 107 | private static final int CHECKSUM_MODULUS = 31; 108 | 109 | 110 | 111 | /*---- Enumerations ----*/ 112 | 113 | public enum CompressionMethod { 114 | DEFLATE, 115 | RESERVED, 116 | } 117 | 118 | 119 | public enum CompressionLevel { 120 | FASTEST, 121 | FAST, 122 | DEFAULT, 123 | MAXIMUM, 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/ZlibOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.DataOutput; 12 | import java.io.DataOutputStream; 13 | import java.io.IOException; 14 | import java.io.OutputStream; 15 | import java.util.Objects; 16 | import java.util.zip.Adler32; 17 | 18 | 19 | public final class ZlibOutputStream extends OutputStream { 20 | 21 | /*---- Fields ----*/ 22 | 23 | private DeflaterOutputStream output; 24 | 25 | private Adler32 checksum = new Adler32(); 26 | 27 | 28 | 29 | /*---- Constructors ----*/ 30 | 31 | public ZlibOutputStream(OutputStream out, ZlibMetadata meta) throws IOException { 32 | this(new DeflaterOutputStream(out), meta); 33 | } 34 | 35 | 36 | public ZlibOutputStream(DeflaterOutputStream out, ZlibMetadata meta) throws IOException { 37 | Objects.requireNonNull(out); 38 | Objects.requireNonNull(meta); 39 | meta.write(out.getUnderlyingStream()); 40 | output = out; 41 | } 42 | 43 | 44 | 45 | /*---- Methods ----*/ 46 | 47 | @Override public void write(int b) throws IOException { 48 | write(new byte[]{(byte)b}); 49 | } 50 | 51 | 52 | @Override public void write(byte[] b, int off, int len) throws IOException { 53 | if (checksum == null) 54 | throw new IllegalStateException("Stream already ended"); 55 | output.write(b, off, len); 56 | checksum.update(b, off, len); 57 | } 58 | 59 | 60 | public void finish() throws IOException { 61 | if (checksum == null) 62 | throw new IllegalStateException("Stream already ended"); 63 | output.finish(); 64 | DataOutput dout = new DataOutputStream(output.getUnderlyingStream()); 65 | dout.writeInt((int)checksum.getValue()); 66 | checksum = null; 67 | } 68 | 69 | 70 | @Override public void close() throws IOException { 71 | if (checksum != null) 72 | finish(); 73 | output.close(); 74 | output = null; 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/BinarySplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | import java.util.Arrays; 13 | import java.util.Objects; 14 | 15 | 16 | public final class BinarySplit implements Strategy { 17 | 18 | private final Strategy substrategy; 19 | private final int minimumBlockLength; 20 | 21 | 22 | public BinarySplit(Strategy strat, int minBlockLen) { 23 | substrategy = Objects.requireNonNull(strat); 24 | if (minBlockLen < 1) 25 | throw new IllegalArgumentException("Non-positive minimum block length"); 26 | minimumBlockLength = minBlockLen; 27 | } 28 | 29 | 30 | public Decision decide(byte[] b, int off, int historyLen, int dataLen) { 31 | return decide(b, off, historyLen, dataLen, 32 | substrategy.decide(b, off, historyLen, dataLen)); 33 | } 34 | 35 | 36 | private Decision decide(byte[] b, int off, int historyLen, int dataLen, Decision curDec) { 37 | var subdecisions = new Decision[8][]; 38 | Arrays.fill(subdecisions, new Decision[]{curDec}); 39 | long[] bitLengths = curDec.getBitLengths().clone(); 40 | 41 | int firstHalfLen = (dataLen + 1) / 2; 42 | int secondHalfLen = dataLen - firstHalfLen; 43 | if (Math.min(firstHalfLen, secondHalfLen) > minimumBlockLength) { 44 | Decision[] splitDecs = { 45 | substrategy.decide(b, off, historyLen, firstHalfLen), 46 | substrategy.decide(b, off, historyLen + firstHalfLen, secondHalfLen), 47 | }; 48 | boolean improved = false; 49 | for (int i = 0; i < bitLengths.length; i++) { 50 | long bitLen = 0; 51 | for (Decision dec : splitDecs) 52 | bitLen += dec.getBitLengths()[(int)(bitLen % 8)]; 53 | improved |= bitLen < bitLengths[i]; 54 | } 55 | 56 | if (improved) { 57 | splitDecs[0] = decide(b, off, historyLen, firstHalfLen, splitDecs[0]); 58 | splitDecs[1] = decide(b, off, historyLen + firstHalfLen, secondHalfLen, splitDecs[1]); 59 | } 60 | for (int i = 0; i < bitLengths.length; i++) { 61 | long bitLen = 0; 62 | for (Decision dec : splitDecs) 63 | bitLen += dec.getBitLengths()[(int)(bitLen % 8)]; 64 | if (bitLen < bitLengths[i]) { 65 | bitLengths[i] = bitLen; 66 | subdecisions[i] = splitDecs; 67 | } 68 | } 69 | } 70 | 71 | return new Decision() { 72 | @Override public long[] getBitLengths() { 73 | return bitLengths; 74 | } 75 | 76 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException { 77 | Decision[] decs = subdecisions[out.getBitPosition()]; 78 | for (int i = 0; i < decs.length; i++) 79 | decs[i].compressTo(out, isFinal && i == decs.length - 1); 80 | } 81 | }; 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/BitOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | 13 | 14 | public interface BitOutputStream { 15 | 16 | public void writeBits(int value, int numBits) throws IOException; 17 | 18 | 19 | public int getBitPosition(); 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/CountingBitOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | 13 | 14 | final class CountingBitOutputStream implements BitOutputStream { 15 | 16 | private long length = 0; 17 | 18 | 19 | @Override public void writeBits(int value, int numBits) throws IOException { 20 | length += numBits; 21 | } 22 | 23 | 24 | @Override public int getBitPosition() { 25 | return (int)length % 8; 26 | } 27 | 28 | 29 | public long getBitLength() { 30 | return length; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/Decision.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | 13 | 14 | public interface Decision { 15 | 16 | public long[] getBitLengths(); 17 | 18 | 19 | public void compressTo(BitOutputStream out, boolean isFinal) throws IOException; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/Lz77Huffman.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | import java.nio.ShortBuffer; 13 | import java.util.ArrayList; 14 | import java.util.Arrays; 15 | import java.util.Collections; 16 | import java.util.Iterator; 17 | import java.util.List; 18 | 19 | 20 | public record Lz77Huffman( 21 | boolean useDynamicHuffmanCodes, 22 | int searchMinimumRunLength, 23 | int searchMaximumRunLength, 24 | int searchMinimumDistance, 25 | int searchMaximumDistance) 26 | implements Strategy { 27 | 28 | 29 | public Lz77Huffman { 30 | int minRun = searchMinimumRunLength; 31 | int maxRun = searchMaximumRunLength; 32 | int minDist = searchMinimumDistance; 33 | int maxDist = searchMaximumDistance; 34 | if (minRun == 0 && maxRun == 0 && minDist == 0 && maxDist == 0); 35 | else if (ABSOLUTE_MINIMUM_RUN_LENGTH <= minRun && minRun <= maxRun && maxRun <= ABSOLUTE_MAXIMUM_RUN_LENGTH && 36 | ABSOLUTE_MINIMUM_DISTANCE <= minDist && minDist <= maxDist && maxDist <= ABSOLUTE_MAXIMUM_DISTANCE); 37 | else 38 | throw new IllegalArgumentException("Invalid minimum/maximum run-length/distance"); 39 | } 40 | 41 | 42 | @Override public Decision decide(byte[] b, int off, int historyLen, int dataLen) { 43 | return new Decision() { 44 | private final long[] bitLengths = new long[8]; 45 | { 46 | var temp = new CountingBitOutputStream(); 47 | try { 48 | compressTo(temp, false); 49 | } catch (IOException e) { 50 | throw new AssertionError("Caught impossible exception", e); 51 | } 52 | Arrays.fill(bitLengths, temp.getBitLength()); 53 | } 54 | 55 | 56 | @Override public long[] getBitLengths() { 57 | return bitLengths; 58 | } 59 | 60 | 61 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException { 62 | int index = off + historyLen; 63 | final int end = index + dataLen; 64 | 65 | var symbolsAndExtraBits = ShortBuffer.allocate(Math.toIntExact(Math.ceilDiv(dataLen * 4L, 3) + 1)); 66 | var litLenHistogram = new int[286]; 67 | var distHistogram = new int[30]; 68 | while (index < end) { 69 | int bestRun = 0; 70 | int bestDist = 0; 71 | for (int dist = searchMinimumDistance, distEnd = Math.min(searchMaximumDistance, index - off); dist <= distEnd && bestRun < searchMaximumRunLength; dist++) { 72 | int run = 0; 73 | int historyIndex = index - dist; 74 | int dataIndex = index; 75 | for (; run < searchMaximumRunLength && dataIndex < end && b[dataIndex] == b[historyIndex]; run++, dataIndex++) { 76 | historyIndex++; 77 | if (historyIndex == index) 78 | historyIndex -= dist; 79 | } 80 | if (run > bestRun || run == bestRun && dist < bestDist) { 81 | bestRun = run; 82 | bestDist = dist; 83 | } 84 | } 85 | if (bestRun == 0 || bestRun < searchMinimumRunLength) { 86 | int sym = b[index] & 0xFF; // Literal 87 | index++; 88 | symbolsAndExtraBits.put((short)(sym << 4)); 89 | litLenHistogram[sym]++; 90 | } 91 | else { 92 | { 93 | int r = bestRun - 3; 94 | int numExtra, sym, extra; 95 | if (bestRun < 11) { // Actually works down to run < 7 96 | numExtra = 0; 97 | sym = r + 257; 98 | extra = 0; 99 | } else if (bestRun == 258) { 100 | numExtra = 0; 101 | sym = 285; 102 | extra = 0; 103 | } else { 104 | numExtra = 29 - Integer.numberOfLeadingZeros(r); 105 | sym = (numExtra << 2) + (r >>> numExtra) + 257; 106 | extra = r & ((1 << numExtra) - 1); 107 | } 108 | symbolsAndExtraBits.put((short)(sym << 4 | numExtra)); 109 | litLenHistogram[sym]++; 110 | symbolsAndExtraBits.put((short)extra); 111 | } 112 | { 113 | int d = bestDist - 1; 114 | int numExtra, sym, extra; 115 | if (bestDist < 5) { // Actually works down to bestDist < 3 116 | numExtra = 0; 117 | sym = d; 118 | extra = 0; 119 | } else { 120 | numExtra = 30 - Integer.numberOfLeadingZeros(d); 121 | sym = (numExtra << 1) + (d >>> numExtra); 122 | extra = d & ((1 << numExtra) - 1); 123 | } 124 | symbolsAndExtraBits.put((short)(sym << 4 | numExtra)); 125 | distHistogram[sym]++; 126 | symbolsAndExtraBits.put((short)extra); 127 | } 128 | index += bestRun; 129 | } 130 | } 131 | symbolsAndExtraBits.put((short)(256 << 4)); 132 | litLenHistogram[256]++; 133 | 134 | out.writeBits((isFinal ? 1 : 0), 1); // bfinal 135 | out.writeBits((!useDynamicHuffmanCodes ? 1 : 2), 2); // btype 136 | 137 | int[] litLenCode; 138 | int[] distCode; 139 | if (!useDynamicHuffmanCodes) { 140 | litLenCode = STATIC_LITERAL_LENGTH_CODE; 141 | distCode = STATIC_DISTANCE_CODE; 142 | } 143 | else { // Further histogram processing and dynamic code generation 144 | 145 | { 146 | if (dataLen == 0) 147 | litLenHistogram[0]++; // Dummy value to fill the Huffman code tree 148 | int histoEnd = litLenHistogram.length; 149 | for (; histoEnd > 257 && litLenHistogram[histoEnd - 1] == 0; histoEnd--); 150 | if (histoEnd < litLenHistogram.length) 151 | litLenHistogram = Arrays.copyOf(litLenHistogram, histoEnd); 152 | } 153 | byte[] litLenCodeLen = calcHuffmanCodeLengths(litLenHistogram, 15); 154 | 155 | { 156 | int numDistCodesUsed = 0; 157 | for (int x : distHistogram) { 158 | if (x > 0) 159 | numDistCodesUsed++; 160 | } 161 | if (numDistCodesUsed == 1) { 162 | for (int i = 0; i < distHistogram.length; i++) { 163 | if (distHistogram[i] > 0) { 164 | if (distHistogram.length - i > 1) 165 | distHistogram[i + 1] = 1; 166 | else 167 | distHistogram[i - 1] = 1; 168 | break; 169 | } 170 | } 171 | } 172 | int histoEnd = distHistogram.length; 173 | for (; histoEnd > 1 && distHistogram[histoEnd - 1] == 0; histoEnd--); 174 | if (histoEnd < distHistogram.length) 175 | distHistogram = Arrays.copyOf(distHistogram, histoEnd); 176 | } 177 | byte[] distCodeLen; 178 | if (distHistogram.length == 1 && distHistogram[0] == 0) 179 | distCodeLen = new byte[]{0}; 180 | else 181 | distCodeLen = calcHuffmanCodeLengths(distHistogram, 15); 182 | 183 | var codeLens = new byte[litLenCodeLen.length + distCodeLen.length]; 184 | System.arraycopy(litLenCodeLen, 0, codeLens, 0, litLenCodeLen.length); 185 | System.arraycopy(distCodeLen, 0, codeLens, litLenCodeLen.length, distCodeLen.length); 186 | 187 | List codeLengthSymbols = new ArrayList<>(); 188 | List extraBits = new ArrayList<>(); 189 | for (int i = 0; i < codeLens.length; ) { // Greedy algorithm 190 | int val = codeLens[i]; 191 | if (val == 0) { 192 | int runLength = 1; 193 | for (; runLength < 138 && i + runLength < codeLens.length 194 | && codeLens[i + runLength] == 0; runLength++); 195 | if (runLength < 3) { 196 | codeLengthSymbols.add(val); 197 | i++; 198 | } else if (runLength < 11) { 199 | codeLengthSymbols.add(17); 200 | extraBits.add(runLength - 3); 201 | i += runLength; 202 | } else if (runLength < 139) { 203 | codeLengthSymbols.add(18); 204 | extraBits.add(runLength - 11); 205 | i += runLength; 206 | } else 207 | throw new AssertionError("Unreachable value"); 208 | continue; 209 | } 210 | if (i > 0) { 211 | int runLength = 0; 212 | for (; runLength < 6 && i + runLength < codeLens.length 213 | && codeLens[i + runLength] == codeLens[i - 1]; runLength++); 214 | if (runLength >= 3) { 215 | codeLengthSymbols.add(16); 216 | extraBits.add(runLength - 3); 217 | i += runLength; 218 | continue; 219 | } 220 | } 221 | codeLengthSymbols.add(val); 222 | i++; 223 | } 224 | 225 | var codeLenHistogram = new int[19]; 226 | for (int sym : codeLengthSymbols) 227 | codeLenHistogram[sym]++; 228 | byte[] codeLenCodeLen = calcHuffmanCodeLengths(codeLenHistogram, 7); 229 | 230 | var reordered = new int[codeLenCodeLen.length]; 231 | for (int i = 0; i < reordered.length; i++) 232 | reordered[i] = codeLenCodeLen[CODE_LENGTH_CODE_ORDER[i]]; 233 | int numCodeLenCodeLens = reordered.length; 234 | for (; numCodeLenCodeLens > 4 && reordered[numCodeLenCodeLens - 1] == 0; numCodeLenCodeLens--); 235 | 236 | out.writeBits(litLenCodeLen.length - 257, 5); // hlit 237 | out.writeBits(distCodeLen .length - 1, 5); // hdist 238 | out.writeBits(numCodeLenCodeLens - 4, 4); // hclen 239 | 240 | for (int i = 0; i < numCodeLenCodeLens; i++) 241 | out.writeBits(reordered[i], 3); 242 | 243 | int[] codeLenCode = codeLengthsToCodes(codeLenCodeLen, 7); 244 | Iterator extraBitsIter = extraBits.iterator(); 245 | for (int sym : codeLengthSymbols) { 246 | int pair = codeLenCode[sym]; 247 | out.writeBits(pair >>> 4, pair & 0xF); 248 | if (sym >= 16) { 249 | out.writeBits(extraBitsIter.next(), switch (sym) { 250 | case 16 -> 2; 251 | case 17 -> 3; 252 | case 18 -> 7; 253 | default -> throw new AssertionError("Unreachable value"); 254 | }); 255 | } 256 | } 257 | if (extraBitsIter.hasNext()) 258 | throw new AssertionError("Unreachable state"); 259 | 260 | litLenCode = codeLengthsToCodes(litLenCodeLen, 15); 261 | if (distCodeLen.length == 1 && distCodeLen[0] == 0) 262 | distCode = null; 263 | else 264 | distCode = codeLengthsToCodes(distCodeLen, 15); 265 | } 266 | 267 | symbolsAndExtraBits.flip(); 268 | while (symbolsAndExtraBits.hasRemaining()) { 269 | int litLenPair = symbolsAndExtraBits.get(); 270 | int litLenSym = litLenPair >>> 4; 271 | assert 0 <= litLenSym && litLenSym <= 285; 272 | int lenNumExtra = litLenPair & 0xF; 273 | int litLenCodePair = litLenCode[litLenSym]; 274 | out.writeBits(litLenCodePair >>> 4, litLenCodePair & 0xF); 275 | if (litLenSym > 256) { 276 | out.writeBits(symbolsAndExtraBits.get(), lenNumExtra); 277 | int distPair = symbolsAndExtraBits.get(); 278 | int distSym = distPair >>> 4; 279 | assert 0 <= distSym && distSym <= 29; 280 | int distNumExtra = distPair & 0xF; 281 | int distCodePair = distCode[distSym]; 282 | out.writeBits(distCodePair >>> 4, distCodePair & 0xF); 283 | out.writeBits(symbolsAndExtraBits.get(), distNumExtra); 284 | } 285 | } 286 | } 287 | }; 288 | } 289 | 290 | 291 | public static final int ABSOLUTE_MINIMUM_RUN_LENGTH = 3; 292 | public static final int ABSOLUTE_MAXIMUM_RUN_LENGTH = 258; 293 | 294 | public static final int ABSOLUTE_MINIMUM_DISTANCE = 1; 295 | public static final int ABSOLUTE_MAXIMUM_DISTANCE = 32 * 1024; 296 | 297 | 298 | public static final Lz77Huffman LITERAL_STATIC = new Lz77Huffman(false, 0, 0, 0, 0); 299 | public static final Lz77Huffman LITERAL_DYNAMIC = new Lz77Huffman(true , 0, 0, 0, 0); 300 | 301 | public static final Lz77Huffman RLE_STATIC = new Lz77Huffman(false, ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, 1, 1); 302 | public static final Lz77Huffman RLE_DYNAMIC = new Lz77Huffman(true , ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, 1, 1); 303 | 304 | public static final Lz77Huffman FULL_STATIC = new Lz77Huffman(false, ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, ABSOLUTE_MINIMUM_DISTANCE, ABSOLUTE_MAXIMUM_DISTANCE); 305 | public static final Lz77Huffman FULL_DYNAMIC = new Lz77Huffman(true , ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, ABSOLUTE_MINIMUM_DISTANCE, ABSOLUTE_MAXIMUM_DISTANCE); 306 | 307 | 308 | 309 | private static byte[] calcHuffmanCodeLengths(int[] symbolHistogram, int maxLen) { 310 | List leaves = new ArrayList<>(); 311 | for (int sym = 0; sym < symbolHistogram.length; sym++) { 312 | int freq = symbolHistogram[sym]; 313 | if (freq > 0) 314 | leaves.add(new Leaf(freq, sym)); 315 | } 316 | 317 | // Package-merge algorithm 318 | List nodes = new ArrayList<>(); 319 | for (int i = 0; i < maxLen; i++) { 320 | nodes.addAll(leaves); 321 | Collections.sort(nodes, (x, y) -> Long.compare(x.frequency(), y.frequency())); 322 | List newNodes = new ArrayList<>(); 323 | for (int j = 0; j + 2 <= nodes.size(); j += 2) { 324 | Node a = nodes.get(j + 0); 325 | Node b = nodes.get(j + 1); 326 | newNodes.add(new InternalNode(a.frequency() + b.frequency(), a, b)); 327 | } 328 | nodes = newNodes; 329 | } 330 | 331 | var nodeHistogram = new byte[symbolHistogram.length]; 332 | for (int i = 0; i < leaves.size() - 1; i++) 333 | nodes.get(i).countOccurrences(nodeHistogram); 334 | return nodeHistogram; 335 | } 336 | 337 | 338 | 339 | private interface Node { 340 | 341 | public long frequency(); 342 | 343 | public void countOccurrences(byte[] nodeHistogram); 344 | 345 | } 346 | 347 | 348 | private record InternalNode(long frequency, Node... children) implements Node { 349 | 350 | public void countOccurrences(byte[] nodeHistogram) { 351 | for (Node node : children) 352 | node.countOccurrences(nodeHistogram); 353 | } 354 | 355 | } 356 | 357 | 358 | private record Leaf(long frequency, int symbol) implements Node { 359 | 360 | public void countOccurrences(byte[] nodeHistogram) { 361 | nodeHistogram[symbol]++; 362 | } 363 | 364 | } 365 | 366 | 367 | 368 | private static final int[] CODE_LENGTH_CODE_ORDER = 369 | {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 370 | 371 | 372 | private static int[] codeLengthsToCodes(byte[] codeLengths, int maxCodeLength) { 373 | if (!(1 <= maxCodeLength && maxCodeLength <= 15)) 374 | throw new IllegalArgumentException("Invalid maximum code length"); 375 | var result = new int[codeLengths.length]; 376 | int nextCode = 0; 377 | for (int codeLength = 1; codeLength <= maxCodeLength; codeLength++) { 378 | nextCode <<= 1; 379 | for (int symbol = 0; symbol < codeLengths.length; symbol++) { 380 | if (codeLengths[symbol] != codeLength) 381 | continue; 382 | if (nextCode >>> codeLength != 0) 383 | throw new IllegalArgumentException("This canonical code produces an over-full Huffman code tree"); 384 | result[symbol] = Integer.reverse(nextCode) >>> (32 - codeLength) << 4 | codeLength; 385 | nextCode++; 386 | } 387 | } 388 | if (nextCode != 1 << maxCodeLength) 389 | throw new IllegalArgumentException("This canonical code produces an under-full Huffman code tree"); 390 | return result; 391 | } 392 | 393 | 394 | private static final int[] STATIC_LITERAL_LENGTH_CODE; 395 | static { 396 | var codeLens = new byte[288]; 397 | int i = 0; 398 | for (; i < 144; i++) codeLens[i] = 8; 399 | for (; i < 256; i++) codeLens[i] = 9; 400 | for (; i < 280; i++) codeLens[i] = 7; 401 | for (; i < 288; i++) codeLens[i] = 8; 402 | STATIC_LITERAL_LENGTH_CODE = codeLengthsToCodes(codeLens, 9); 403 | } 404 | 405 | private static final int[] STATIC_DISTANCE_CODE; 406 | static { 407 | var codeLens = new byte[32]; 408 | Arrays.fill(codeLens, (byte)5); 409 | STATIC_DISTANCE_CODE = codeLengthsToCodes(codeLens, 5); 410 | } 411 | 412 | } 413 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/MultiStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | import java.util.Arrays; 13 | import java.util.Objects; 14 | 15 | 16 | public final class MultiStrategy implements Strategy { 17 | 18 | private Strategy[] substrategies; 19 | 20 | 21 | public MultiStrategy(Strategy... strats) { 22 | Objects.requireNonNull(strats); 23 | for (Strategy st : strats) 24 | Objects.requireNonNull(st); 25 | if (strats.length == 0) 26 | throw new IllegalArgumentException("Empty list of strategies"); 27 | substrategies = strats; 28 | } 29 | 30 | 31 | public Decision decide(byte[] b, int off, int historyLen, int dataLen) { 32 | var bitLengths = new long[8]; 33 | var subdecisions = new Decision[bitLengths.length]; 34 | Arrays.fill(bitLengths, Long.MAX_VALUE); 35 | for (Strategy st : substrategies) { 36 | Decision dec = st.decide(b, off, historyLen, dataLen); 37 | long[] bitLens = dec.getBitLengths(); 38 | for (int i = 0; i < bitLengths.length; i++) { 39 | if (bitLens[i] < bitLengths[i]) { 40 | bitLengths[i] = bitLens[i]; 41 | subdecisions[i] = dec; 42 | } 43 | } 44 | } 45 | for (Decision dec : subdecisions) 46 | Objects.requireNonNull(dec); 47 | 48 | return new Decision() { 49 | @Override public long[] getBitLengths() { 50 | return bitLengths; 51 | } 52 | 53 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException { 54 | subdecisions[out.getBitPosition()].compressTo(out, isFinal); 55 | } 56 | }; 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/Strategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | 12 | public interface Strategy { 13 | 14 | public Decision decide(byte[] b, int off, int historyLen, int dataLen); 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/comp/Uncompressed.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.comp; 10 | 11 | import java.io.IOException; 12 | 13 | 14 | public enum Uncompressed implements Strategy { 15 | 16 | SINGLETON; 17 | 18 | 19 | @Override public Decision decide(byte[] b, int off, int historyLen, int dataLen) { 20 | return new Decision() { 21 | private final long[] bitLengths = new long[8]; 22 | { 23 | int numBlocks = Math.max(Math.ceilDiv(dataLen, MAX_BLOCK_LEN), 1); 24 | for (int i = 0; i < bitLengths.length; i++) 25 | bitLengths[i] = dataLen * 8L + numBlocks * 40L + ((13 - i) % 8 - 5); 26 | } 27 | 28 | @Override public long[] getBitLengths() { 29 | return bitLengths; 30 | } 31 | 32 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException { 33 | int index = off + historyLen; 34 | final int end = index + dataLen; 35 | do { 36 | int n = Math.min(end - index, MAX_BLOCK_LEN); 37 | out.writeBits((isFinal && n == end - index) ? 1 : 0, 1); 38 | out.writeBits(0, 2); 39 | out.writeBits(0, (8 - out.getBitPosition()) % 8); 40 | out.writeBits(n ^ 0x0000, 16); 41 | out.writeBits(n ^ 0xFFFF, 16); 42 | int e = index + n; 43 | for (; index < e; index++) 44 | out.writeBits(b[index] & 0xFF, 8); 45 | } while (index < end); 46 | } 47 | }; 48 | } 49 | 50 | 51 | private static final int MAX_BLOCK_LEN = (1 << 16) - 1; // Configurable in the range [1, 65535] 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/decomp/Closed.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.decomp; 10 | 11 | 12 | public enum Closed implements State { 13 | SINGLETON 14 | } 15 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/decomp/Open.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.decomp; 10 | 11 | import java.io.DataInputStream; 12 | import java.io.EOFException; 13 | import java.io.IOException; 14 | import java.io.InputStream; 15 | import java.nio.ByteBuffer; 16 | import java.nio.ByteOrder; 17 | import java.util.Arrays; 18 | import java.util.Optional; 19 | import io.nayuki.deflate.DataFormatException; 20 | import io.nayuki.deflate.DataFormatException.Reason; 21 | 22 | 23 | public final class Open implements State { 24 | 25 | /*---- Fields ----*/ 26 | 27 | // The underlying stream to read from 28 | public final InputStream input; 29 | 30 | // Indicates whether mark() should be called when the underlying 31 | // input stream is read, and whether calling detach() is allowed. 32 | private final boolean endExactly; 33 | 34 | 35 | // The typical data flow in this decompressor looks like: 36 | // input (the underlying input stream) -> input.read() 37 | // -> inputBuffer -> getLong() 38 | // -> inputButBuffer1 -> packing logic in readBits() 39 | // -> inputBitBuffer0 -> readBit() or equivalent 40 | // -> Huffman decoding logic for literal and length-distance symbols 41 | // -> LZ77 decoding logic -> dictionary 42 | // -> copying to the caller's array 43 | // -> b (the array passed into this.read(byte[],int,int)). 44 | 45 | // Buffer of bytes read from input.read() (the underlying input stream) 46 | private final ByteBuffer inputBuffer; // Can have any positive length (but longer means less overhead) 47 | 48 | // Buffer of bits packed from the bytes in `inputBuffer` 49 | private long inputBitBuffer0 = 0; // Always in the range [0, 2^inputBitBuffer0Length) 50 | private int inputBitBuffer0Length = 0; // Always in the range [0, 64] 51 | 52 | private long inputBitBuffer1 = 0; // Always in the range [0, 2^inputBitBuffer1Length) 53 | private int inputBitBuffer1Length = 0; // Always in the range [0, 64] 54 | 55 | 56 | private Optional blockDecoder = Optional.empty(); 57 | 58 | // Indicates whether a block header with the `bfinal` flag has been seen. 59 | // This starts as false, should eventually become true, and never changes back to false. 60 | private boolean isLastBlock = false; 61 | 62 | 63 | // Buffer of last 32 KiB of decoded data, for LZ77 decompression 64 | private final byte[] dictionary = new byte[DICTIONARY_LENGTH]; 65 | private int dictionaryIndex = 0; // Always in the range [0, dictionary.length) 66 | private int dictionaryLength = 0; // Number of bytes written, in the range [0, dictionary.length], saturating at the maximum 67 | 68 | 69 | 70 | /*---- Constructor ----*/ 71 | 72 | public Open(InputStream in, boolean endExact, int inBufLen) { 73 | input = in; 74 | endExactly = endExact; 75 | inputBuffer = ByteBuffer.allocate(inBufLen) 76 | .order(ByteOrder.LITTLE_ENDIAN).position(0).limit(0); 77 | } 78 | 79 | 80 | 81 | /*---- Public methods ----*/ 82 | 83 | public int read(byte[] b, int off, int len) throws IOException { 84 | int result = 0; // Number of bytes filled in the array `b` 85 | while (result < len) { 86 | if (blockDecoder.isEmpty()) { // Between blocks 87 | if (isLastBlock) 88 | break; 89 | 90 | // Read and process the block header 91 | isLastBlock = readBits(1) == 1; 92 | blockDecoder = Optional.of(switch (readBits(2)) { // Type 93 | case 0 -> new UncompressedBlock(); 94 | case 1 -> new HuffmanBlock(false); 95 | case 2 -> new HuffmanBlock(true); 96 | case 3 -> throw new DataFormatException(Reason.RESERVED_BLOCK_TYPE, "Reserved block type"); 97 | default -> throw new AssertionError("Unreachable value"); 98 | }); 99 | } 100 | 101 | BlockDecoder dec = blockDecoder.get(); 102 | result += dec.read(b, off + result, len - result); 103 | if (dec.isDone()) { 104 | blockDecoder = Optional.empty(); 105 | if (isLastBlock && endExactly) 106 | finish(); 107 | } 108 | } 109 | return (result > 0 || blockDecoder.isPresent() || !isLastBlock) ? result : -1; 110 | } 111 | 112 | 113 | private void finish() throws IOException { 114 | // Rewind the underlying stream, then skip over bytes that were already consumed. 115 | // Note that a byte with some bits consumed is considered to be fully consumed. 116 | input.reset(); 117 | int skip = inputBuffer.position() - (inputBitBuffer0Length + inputBitBuffer1Length) / 8; 118 | assert skip >= 0; 119 | try { 120 | new DataInputStream(input).skipNBytes(skip); 121 | } catch (EOFException e) { 122 | DataFormatException.throwUnexpectedEnd(); 123 | } 124 | } 125 | 126 | 127 | public void close() throws IOException { 128 | input.close(); 129 | } 130 | 131 | 132 | 133 | /*---- Private methods ----*/ 134 | 135 | // Returns the given number of least significant bits from the bit buffer. 136 | // This updates the bit buffer state and possibly also the byte buffer state. 137 | private int readBits(int numBits) throws IOException { 138 | // Check arguments and invariants 139 | assert 0 <= numBits && numBits <= 16; // Note: DEFLATE uses up to 16, but this method is correct up to 31 140 | assert isBitBufferValid(); 141 | 142 | // Ensure there is enough data in the bit buffer to satisfy the request 143 | while (inputBitBuffer0Length < numBits) { 144 | if (inputBitBuffer1Length > 0) { 145 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length); 146 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length; 147 | inputBitBuffer0Length += n; 148 | inputBitBuffer1 >>>= n; 149 | inputBitBuffer1Length -= n; 150 | } else { 151 | if (!inputBuffer.hasRemaining()) 152 | fillInputBuffer(); 153 | 154 | // Pack as many bytes as possible from input byte buffer into the bit buffer 155 | int numBytes = Math.min((64 - inputBitBuffer0Length) >>> 3, inputBuffer.remaining()); 156 | assert 0 <= numBytes && numBytes <= 8; 157 | for (int i = 0; i < numBytes; i++, inputBitBuffer0Length += 8) 158 | inputBitBuffer0 |= (inputBuffer.get() & 0xFFL) << inputBitBuffer0Length; 159 | assert isBitBufferValid(); 160 | } 161 | } 162 | 163 | // Extract the bits to return 164 | int result = (int)inputBitBuffer0 & ((1 << numBits) - 1); 165 | assert result >>> numBits == 0; 166 | inputBitBuffer0 >>>= numBits; 167 | inputBitBuffer0Length -= numBits; 168 | assert isBitBufferValid(); 169 | return result; 170 | } 171 | 172 | 173 | private boolean isBitBufferValid() { 174 | return 0 <= inputBitBuffer0Length && inputBitBuffer0Length <= 64 175 | && (inputBitBuffer0Length == 64 || inputBitBuffer0 >>> inputBitBuffer0Length == 0); 176 | } 177 | 178 | 179 | // Fills the empty input byte buffer with at least 180 | // one new byte read from the underlying input stream. 181 | private void fillInputBuffer() throws IOException { 182 | assert !inputBuffer.hasRemaining(); 183 | if (endExactly) 184 | input.mark(inputBuffer.capacity()); 185 | int n = input.read(inputBuffer.array()); 186 | if (n == -1) 187 | DataFormatException.throwUnexpectedEnd(); 188 | else if (n == 0) 189 | throw new AssertionError("read() returned zero bytes"); 190 | else 191 | inputBuffer.position(0).limit(n); 192 | } 193 | 194 | 195 | 196 | /*---- Constants ----*/ 197 | 198 | // Must be a power of 2. Do not change this constant value. If the value is decreased, then 199 | // decompression may produce different data that violates the DEFLATE spec (but no crashes). 200 | // If the value is increased, the behavior stays the same but memory is wasted with no benefit. 201 | private static final int DICTIONARY_LENGTH = 32 * 1024; 202 | 203 | // This is why the above must be a power of 2. 204 | private static final int DICTIONARY_MASK = DICTIONARY_LENGTH - 1; 205 | 206 | static { 207 | if (DICTIONARY_LENGTH < 32 * 1024) 208 | throw new AssertionError("Dictionary length shorter than required by the specification"); 209 | if (Integer.bitCount(DICTIONARY_LENGTH) != 1) 210 | throw new AssertionError("Dictionary length not a power of 2"); // Required for mask-based modulo calculation 211 | } 212 | 213 | 214 | 215 | /*---- Block decoder types ----*/ 216 | 217 | private interface BlockDecoder { 218 | 219 | // Unlike InputStream.read(byte[]), this returns [0, len] but never -1. 220 | public int read(byte[] b, int off, int len) throws IOException; 221 | 222 | public boolean isDone(); 223 | 224 | } 225 | 226 | 227 | private final class UncompressedBlock implements BlockDecoder { 228 | 229 | private int numRemainingBytes; // Non-negative 230 | 231 | 232 | public UncompressedBlock() throws IOException { 233 | // Discard bits to align to byte 234 | readBits((inputBitBuffer0Length + inputBitBuffer1Length) % 8); 235 | assert (inputBitBuffer0Length + inputBitBuffer1Length) % 8 == 0; 236 | 237 | numRemainingBytes = readBits(16); 238 | assert 0x0000 <= numRemainingBytes && numRemainingBytes <= 0xFFFF; 239 | if (numRemainingBytes != (readBits(16) ^ 0xFFFF)) 240 | throw new DataFormatException(Reason.UNCOMPRESSED_BLOCK_LENGTH_MISMATCH, "len/nlen mismatch in uncompressed block"); 241 | } 242 | 243 | 244 | public int read(byte[] b, final int off, int len) throws IOException { 245 | if (numRemainingBytes < 0) 246 | throw new AssertionError("Unreachable state"); 247 | 248 | // Check bit buffer invariants 249 | assert isBitBufferValid(); 250 | assert (inputBitBuffer0Length + inputBitBuffer1Length) % 8 == 0; 251 | 252 | len = Math.min(numRemainingBytes, len); 253 | numRemainingBytes -= len; 254 | int index = off; 255 | final int end = off + len; 256 | assert off <= end && end <= b.length; 257 | 258 | // First unpack saved bits 259 | for (; inputBitBuffer0Length + inputBitBuffer1Length >= 8 && index < end; index++) 260 | b[index] = (byte)readBits(8); 261 | 262 | // Copy from input buffer 263 | { 264 | int n = Math.min(end - index, inputBuffer.remaining()); 265 | assert inputBitBuffer0Length + inputBitBuffer1Length == 0 || n == 0; 266 | inputBuffer.get(b, index, n); 267 | index += n; 268 | } 269 | 270 | // Read directly from input stream, bypassing the input buffer 271 | if (index < end) { 272 | assert inputBitBuffer0Length + inputBitBuffer1Length == 0 && !inputBuffer.hasRemaining(); 273 | if (endExactly) { 274 | inputBuffer.position(0).limit(0); 275 | input.mark(0); 276 | } 277 | do { 278 | int n = input.read(b, index, end - index); 279 | if (n == -1) 280 | DataFormatException.throwUnexpectedEnd(); 281 | index += n; 282 | } while (index < end); 283 | if (endExactly) 284 | input.mark(0); 285 | } 286 | 287 | // Copy output bytes to dictionary 288 | for (index = off; index < end; ) { 289 | int n = Math.min(end - index, dictionary.length - dictionaryIndex); 290 | System.arraycopy(b, index, dictionary, dictionaryIndex, n); 291 | index += n; 292 | dictionaryIndex = (dictionaryIndex + n) & DICTIONARY_MASK; 293 | } 294 | dictionaryLength += Math.min(len, dictionary.length - dictionaryLength); 295 | 296 | return len; 297 | } 298 | 299 | 300 | public boolean isDone() { 301 | if (numRemainingBytes < 0) 302 | throw new AssertionError("Unreachable state"); 303 | return numRemainingBytes == 0; 304 | } 305 | 306 | } 307 | 308 | 309 | 310 | private final class HuffmanBlock implements BlockDecoder { 311 | 312 | private final short[] literalLengthCodeTree; // Not null 313 | private final short[] literalLengthCodeTable; // Derived from literalLengthCodeTree; not null 314 | private final short[] distanceCodeTree; // Can be null 315 | private final short[] distanceCodeTable; // Derived from distanceCodeTree; same nullness 316 | private final int maxBitsPerIteration; // In the range [1, 48] 317 | 318 | private int numPendingOutputBytes = 0; // Always in the range [0, MAX_RUN_LENGTH-1] 319 | private boolean isDone = false; 320 | 321 | 322 | public HuffmanBlock(boolean dynamic) throws IOException { 323 | if (!dynamic) { 324 | literalLengthCodeTree = FIXED_LITERAL_LENGTH_CODE_TREE; 325 | literalLengthCodeTable = FIXED_LITERAL_LENGTH_CODE_TABLE; 326 | distanceCodeTree = FIXED_DISTANCE_CODE_TREE; 327 | distanceCodeTable = FIXED_DISTANCE_CODE_TABLE; 328 | maxBitsPerIteration = 9 + 5 + 5 + 13; 329 | } 330 | else { 331 | // Read the current block's dynamic Huffman code tables from from the input 332 | // buffers/stream, process the code lengths and computes the code trees, and 333 | // ultimately set just the variables {literalLengthCodeTree, literalLengthCodeTable, 334 | // distanceCodeTree, distanceCodeTable}. This might throw an IOException for actual I/O 335 | // exceptions, unexpected end of stream, or a description of an invalid Huffman code. 336 | int numLitLenCodes = readBits(5) + 257; // hlit + 257 337 | int numDistCodes = readBits(5) + 1; // hdist + 1 338 | 339 | // Read the code length code lengths 340 | int numCodeLenCodes = readBits(4) + 4; // hclen + 4 341 | var codeLenCodeLen = new byte[CODE_LENGTH_CODE_ORDER.length]; 342 | for (int i = 0; i < numCodeLenCodes; i++) // Fill array in strange order 343 | codeLenCodeLen[CODE_LENGTH_CODE_ORDER[i]] = (byte)readBits(3); 344 | short[] codeLenCodeTree = codeLengthsToCodeTree(codeLenCodeLen); 345 | 346 | // Read the main code lengths and handle runs 347 | var codeLens = new byte[numLitLenCodes + numDistCodes]; 348 | byte runVal = -1; 349 | for (int i = 0; i < codeLens.length; ) { 350 | int sym = decodeSymbol(codeLenCodeTree); 351 | assert 0 <= sym && sym < codeLenCodeLen.length; 352 | if (sym < 16) { 353 | runVal = (byte)sym; 354 | codeLens[i] = runVal; 355 | i++; 356 | } else { 357 | int runLen = switch (sym) { 358 | case 16 -> { 359 | if (runVal == -1) 360 | throw new DataFormatException(Reason.NO_PREVIOUS_CODE_LENGTH_TO_COPY, "No code length value to copy"); 361 | yield readBits(2) + 3; 362 | } 363 | case 17 -> { 364 | runVal = 0; 365 | yield readBits(3) + 3; 366 | } 367 | case 18 -> { 368 | runVal = 0; 369 | yield readBits(7) + 11; 370 | } 371 | default -> throw new AssertionError("Unreachable value"); 372 | }; 373 | for (; runLen > 0; runLen--, i++) { 374 | if (i >= codeLens.length) 375 | throw new DataFormatException(Reason.CODE_LENGTH_CODE_OVER_FULL, "Run exceeds number of codes"); 376 | codeLens[i] = runVal; 377 | } 378 | } 379 | } 380 | 381 | // Create literal-length code tree 382 | byte[] litLenCodeLen = Arrays.copyOf(codeLens, numLitLenCodes); 383 | if (litLenCodeLen[256] == 0) 384 | throw new DataFormatException(Reason.END_OF_BLOCK_CODE_ZERO_LENGTH, "End-of-block symbol has zero code length"); 385 | literalLengthCodeTree = codeLengthsToCodeTree(litLenCodeLen); 386 | literalLengthCodeTable = codeTreeToCodeTable(literalLengthCodeTree); 387 | int maxBitsPerLitLen = 0; 388 | for (int sym = 0; sym < litLenCodeLen.length; sym++) { 389 | int numBits = litLenCodeLen[sym]; 390 | if (sym >= 257 && numBits > 0) 391 | numBits += RUN_LENGTH_TABLE[sym - 257] & 0x7; // Extra bits 392 | maxBitsPerLitLen = Math.max(numBits, maxBitsPerLitLen); 393 | } 394 | 395 | // Create distance code tree with some extra processing 396 | byte[] distCodeLen = Arrays.copyOfRange(codeLens, numLitLenCodes, codeLens.length); 397 | int maxBitsPerDist = 0; 398 | if (distCodeLen.length == 1 && distCodeLen[0] == 0) { 399 | // Empty distance code; the block shall be all literal symbols 400 | distanceCodeTree = null; 401 | distanceCodeTable = null; 402 | } else { 403 | for (int sym = 0; sym < distCodeLen.length; sym++) { 404 | int numBits = distCodeLen[sym]; 405 | if (numBits > 0 && sym < DISTANCE_TABLE.length) 406 | numBits += DISTANCE_TABLE[sym] & 0xF; // Extra bits 407 | maxBitsPerDist = Math.max(numBits, maxBitsPerDist); 408 | } 409 | 410 | // Get statistics for upcoming logic 411 | int oneCount = 0; 412 | int otherPositiveCount = 0; 413 | for (byte x : distCodeLen) { 414 | if (x == 1) 415 | oneCount++; 416 | else if (x > 1) 417 | otherPositiveCount++; 418 | } 419 | 420 | // Handle the case where only one distance code is defined 421 | if (oneCount == 1 && otherPositiveCount == 0) { 422 | // Add a dummy invalid code to make the Huffman tree complete 423 | distCodeLen = Arrays.copyOf(distCodeLen, 32); 424 | distCodeLen[31] = 1; 425 | } 426 | distanceCodeTree = codeLengthsToCodeTree(distCodeLen); 427 | distanceCodeTable = codeTreeToCodeTable(distanceCodeTree); 428 | } 429 | 430 | maxBitsPerIteration = maxBitsPerLitLen + maxBitsPerDist; 431 | } 432 | 433 | if (!(1 <= maxBitsPerIteration && maxBitsPerIteration <= 48)) 434 | throw new AssertionError("Unreachable value"); 435 | } 436 | 437 | 438 | public int read(byte[] b, final int off, final int len) throws IOException { 439 | int index = off; 440 | final int end = off + len; 441 | assert off <= end && end <= b.length; 442 | 443 | for (; numPendingOutputBytes > 0 && index < end; numPendingOutputBytes--, index++) 444 | b[index] = dictionary[(dictionaryIndex - numPendingOutputBytes) & DICTIONARY_MASK]; 445 | 446 | while (index < end) { 447 | assert numPendingOutputBytes == 0; 448 | assert isBitBufferValid(); 449 | 450 | // Try to fill the input bit buffer (somewhat similar to logic in readBits()) 451 | if (inputBitBuffer0Length < maxBitsPerIteration) { 452 | if (inputBitBuffer1Length > 0) { 453 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length); 454 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length; 455 | inputBitBuffer0Length += n; 456 | inputBitBuffer1 >>>= n; 457 | inputBitBuffer1Length -= n; 458 | } 459 | if (inputBitBuffer0Length < maxBitsPerIteration) { 460 | assert inputBitBuffer1Length == 0; 461 | if (inputBuffer.remaining() >= 8) { 462 | inputBitBuffer1 = inputBuffer.getLong(); 463 | inputBitBuffer1Length = 64; 464 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length); 465 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length; 466 | inputBitBuffer0Length += n; 467 | inputBitBuffer1 >>>= n; 468 | inputBitBuffer1Length -= n; 469 | } else { 470 | for (; inputBitBuffer0Length <= 56 && inputBuffer.hasRemaining(); inputBitBuffer0Length += 8) 471 | inputBitBuffer0 |= (inputBuffer.get() & 0xFFL) << inputBitBuffer0Length; 472 | } 473 | } 474 | assert isBitBufferValid(); 475 | } 476 | 477 | int run, dist; 478 | 479 | if (inputBitBuffer0Length >= maxBitsPerIteration) { // Fast path entirely from bit buffer 480 | // Decode next literal/length symbol (a customized version of decodeSymbol()) 481 | final int sym; 482 | { 483 | int temp = literalLengthCodeTable[(int)inputBitBuffer0 & CODE_TABLE_MASK]; 484 | int consumed = temp & 0xF; 485 | inputBitBuffer0 >>>= consumed; 486 | inputBitBuffer0Length -= consumed; 487 | int node = temp >> 4; 488 | while (node >= 0) { 489 | node = literalLengthCodeTree[node + ((int)inputBitBuffer0 & 1)]; 490 | inputBitBuffer0 >>>= 1; 491 | inputBitBuffer0Length--; 492 | } 493 | sym = ~node; 494 | assert isBitBufferValid(); 495 | } 496 | 497 | // Handle the symbol by ranges 498 | assert 0 <= sym && sym <= 287; 499 | if (sym < 256) { // Literal byte 500 | b[index] = (byte)sym; 501 | index++; 502 | dictionary[dictionaryIndex] = (byte)sym; 503 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK; 504 | if (dictionaryLength < dictionary.length) 505 | dictionaryLength++; 506 | continue; 507 | 508 | } else if (sym > 256) { // Length and distance for copying 509 | // Decode the run length (a customized version of decodeRunLength()) 510 | assert 257 <= sym && sym <= 287; 511 | { 512 | int temp; 513 | try { 514 | temp = RUN_LENGTH_TABLE[sym - 257]; 515 | } catch (ArrayIndexOutOfBoundsException e) { 516 | throw new DataFormatException(Reason.RESERVED_LENGTH_SYMBOL, "Reserved run length symbol: " + sym); 517 | } 518 | run = temp >>> 3; 519 | int numExtraBits = temp & 7; 520 | run += (int)inputBitBuffer0 & ((1 << numExtraBits) - 1); 521 | inputBitBuffer0 >>>= numExtraBits; 522 | inputBitBuffer0Length -= numExtraBits; 523 | } 524 | 525 | // Decode next distance symbol (a customized version of decodeSymbol()) 526 | if (distanceCodeTree == null) 527 | throw new DataFormatException(Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE, "Length symbol encountered with empty distance code"); 528 | final int distSym; 529 | { 530 | int temp = distanceCodeTable[(int)inputBitBuffer0 & CODE_TABLE_MASK]; 531 | int consumed = temp & 0xF; 532 | inputBitBuffer0 >>>= consumed; 533 | inputBitBuffer0Length -= consumed; 534 | int node = temp >> 4; 535 | while (node >= 0) { 536 | node = distanceCodeTree[node + ((int)inputBitBuffer0 & 1)]; 537 | inputBitBuffer0 >>>= 1; 538 | inputBitBuffer0Length--; 539 | } 540 | distSym = ~node; 541 | } 542 | 543 | // Decode the distance (a customized version of decodeDistance()) 544 | assert 0 <= distSym && distSym <= 31; 545 | { 546 | int temp; 547 | try { 548 | temp = DISTANCE_TABLE[distSym]; 549 | } catch (ArrayIndexOutOfBoundsException e) { 550 | throw new DataFormatException(Reason.RESERVED_DISTANCE_SYMBOL, "Reserved distance symbol: " + distSym); 551 | } 552 | dist = temp >>> 4; 553 | int numExtraBits = temp & 0xF; 554 | dist += (int)inputBitBuffer0 & ((1 << numExtraBits) - 1); 555 | inputBitBuffer0 >>>= numExtraBits; 556 | inputBitBuffer0Length -= numExtraBits; 557 | } 558 | assert isBitBufferValid(); 559 | 560 | } else { // sym == 256, end of block 561 | isDone = true; 562 | break; 563 | } 564 | 565 | } else { // General case (always correct), when not enough bits in buffer to guarantee reading 566 | int sym = decodeSymbol(literalLengthCodeTree); 567 | assert 0 <= sym && sym <= 287; 568 | if (sym < 256) { // Literal byte 569 | b[index] = (byte)sym; 570 | index++; 571 | dictionary[dictionaryIndex] = (byte)sym; 572 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK; 573 | if (dictionaryLength < dictionary.length) 574 | dictionaryLength++; 575 | continue; 576 | } else if (sym > 256) { // Length and distance for copying 577 | run = decodeRunLength(sym); 578 | if (distanceCodeTree == null) 579 | throw new DataFormatException(Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE, "Length symbol encountered with empty distance code"); 580 | int distSym = decodeSymbol(distanceCodeTree); 581 | assert 0 <= distSym && distSym <= 31; 582 | dist = decodeDistance(distSym); 583 | } else { // sym == 256, end of block 584 | isDone = true; 585 | break; 586 | } 587 | } 588 | 589 | // Copy bytes to output and dictionary 590 | assert 3 <= run && run <= MAX_RUN_LENGTH; 591 | assert 1 <= dist && dist <= 32768; 592 | if (dist > dictionaryLength) 593 | throw new DataFormatException(Reason.COPY_FROM_BEFORE_DICTIONARY_START, "Attempting to copy from before start of dictionary"); 594 | int dictReadIndex = (dictionaryIndex - dist) & DICTIONARY_MASK; 595 | if (run <= end - index) { // Nice case with less branching 596 | for (int i = 0; i < run; i++) { 597 | byte bb = dictionary[dictReadIndex]; 598 | dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK; 599 | dictionary[dictionaryIndex] = bb; 600 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK; 601 | b[index] = bb; 602 | index++; 603 | } 604 | } else { // General case 605 | for (int i = 0; i < run; i++) { 606 | byte bb = dictionary[dictReadIndex]; 607 | dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK; 608 | dictionary[dictionaryIndex] = bb; 609 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK; 610 | if (index < end) { 611 | b[index] = bb; 612 | index++; 613 | } else 614 | numPendingOutputBytes++; 615 | } 616 | } 617 | dictionaryLength += Math.min(run, dictionary.length - dictionaryLength); 618 | } 619 | return index - off; 620 | } 621 | 622 | 623 | public boolean isDone() { 624 | return numPendingOutputBytes == 0 && isDone; 625 | } 626 | 627 | 628 | /*---- Huffman coding methods ----*/ 629 | 630 | // Reads bits from the input buffers/stream and uses the given code tree to 631 | // decode the next symbol. The returned symbol value is a non-negative integer. 632 | // This throws an IOException if the end of stream is reached before a symbol 633 | // is decoded, or if the underlying stream experiences an I/O exception. 634 | private int decodeSymbol(short[] codeTree) throws IOException { 635 | int node = 0; // An index into the codeTree array which signifies the current tree node 636 | while (node >= 0) { 637 | if (inputBitBuffer0Length > 0) { // Medium path using buffered bits 638 | node = codeTree[node + ((int)inputBitBuffer0 & 1)]; 639 | inputBitBuffer0 >>>= 1; 640 | inputBitBuffer0Length--; 641 | } else // Slow path with potential I/O operations 642 | node = codeTree[node + readBits(1)]; 643 | } 644 | assert isBitBufferValid(); 645 | return ~node; // Symbol was encoded as bitwise complement 646 | } 647 | 648 | 649 | // Takes the given run length symbol in the range [257, 287], possibly 650 | // reads some more input bits, and returns a number in the range [3, 258]. 651 | // This throws an IOException if bits needed to be read but the end of 652 | // stream was reached or the underlying stream experienced an I/O exception. 653 | private int decodeRunLength(int sym) throws IOException { 654 | assert 257 <= sym && sym <= 287; 655 | try { 656 | int temp = RUN_LENGTH_TABLE[sym - 257]; 657 | return (temp >>> 3) + readBits(temp & 7); 658 | } catch (ArrayIndexOutOfBoundsException e) { 659 | throw new DataFormatException(Reason.RESERVED_LENGTH_SYMBOL, "Reserved run length symbol: " + sym); 660 | } 661 | } 662 | 663 | 664 | // Takes the given distance symbol in the range [0, 31], possibly reads 665 | // some more input bits, and returns a number in the range [1, 32768]. 666 | // This throws an IOException if bits needed to be read but the end of 667 | // stream was reached or the underlying stream experienced an I/O exception. 668 | private int decodeDistance(int sym) throws IOException { 669 | assert 0 <= sym && sym <= 31; 670 | try { 671 | int temp = DISTANCE_TABLE[sym]; 672 | return (temp >>> 4) + readBits(temp & 0xF); 673 | } catch (ArrayIndexOutOfBoundsException e) { 674 | throw new DataFormatException(Reason.RESERVED_DISTANCE_SYMBOL, "Reserved distance symbol: " + sym); 675 | } 676 | } 677 | 678 | 679 | /* 680 | * Converts the given array of symbol code lengths into a canonical code tree. 681 | * A symbol code length is either zero (absent from the tree) or a positive integer. 682 | * 683 | * A code tree is an array of integers, where each pair represents a node. 684 | * Each pair is adjacent and starts on an even index. The earlier element of 685 | * the pair represents the left child and the later element represents the 686 | * right child. The root node is at index 0. If an element is non-negative, 687 | * then it is the index of the child node in the array. Otherwise it is the 688 | * bitwise complement of the leaf symbol. This tree is used in decodeSymbol() 689 | * and codeTreeToCodeTable(). Not every element of the array needs to be 690 | * used, nor do used elements need to be contiguous. 691 | * 692 | * For example, this Huffman tree: 693 | * /\ 694 | * 0 1 695 | * / \ 696 | * /\ 'c' 697 | * 0 1 698 | * / \ 699 | * 'a' 'b' 700 | * is serialized as this array: 701 | * [2, ~'c', ~'a', ~'b'] 702 | * because the root is located at index 0 and 703 | * the other internal node is located at index 2. 704 | */ 705 | private static short[] codeLengthsToCodeTree(byte[] codeLengths) throws DataFormatException { 706 | var codeLengthsAndSymbols = new short[codeLengths.length]; 707 | for (int i = 0; i < codeLengths.length; i++) { 708 | byte cl = codeLengths[i]; 709 | if (cl < 0) 710 | throw new IllegalArgumentException("Negative code length"); 711 | if (cl > 15) 712 | throw new AssertionError("Maximum code length exceeds DEFLATE specification"); 713 | int pair = cl << 11 | i; // uint15 714 | assert pair >>> 15 == 0; 715 | codeLengthsAndSymbols[i] = (short)pair; 716 | } 717 | Arrays.sort(codeLengthsAndSymbols); 718 | 719 | int codeLenSymIndex = 0; 720 | // Skip unused symbols (code length 0) 721 | while (codeLenSymIndex < codeLengthsAndSymbols.length && codeLengthsAndSymbols[codeLenSymIndex] >>> 11 == 0) 722 | codeLenSymIndex++; 723 | 724 | int numCodes = codeLengthsAndSymbols.length - codeLenSymIndex; 725 | if (numCodes < 2) 726 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree"); 727 | if (numCodes > 16385) // Because some indexes would overflow int16 728 | throw new IllegalArgumentException("Too many codes"); 729 | 730 | var result = new short[(numCodes - 1) * 2]; 731 | int resultNext = 0; 732 | int resultEnd = 2; // Start with root node already allocated; always even 733 | int curCodeLen = 1; 734 | for (; codeLenSymIndex < codeLengthsAndSymbols.length; codeLenSymIndex++) { 735 | int pair = codeLengthsAndSymbols[codeLenSymIndex]; 736 | for (int codeLen = pair >>> 11; curCodeLen < codeLen; curCodeLen++) { 737 | // Double every open slot 738 | for (int end = resultEnd; resultNext < end; resultNext++) { 739 | if (resultEnd >= result.length) 740 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree"); 741 | result[resultNext] = (short)resultEnd; 742 | resultEnd += 2; 743 | } 744 | } 745 | if (resultNext >= resultEnd) 746 | throw new DataFormatException(Reason.HUFFMAN_CODE_OVER_FULL, "This canonical code produces an over-full Huffman code tree"); 747 | int symbol = pair & ((1 << 11) - 1); 748 | result[resultNext] = (short)~symbol; 749 | resultNext++; 750 | } 751 | if (resultEnd != result.length) 752 | throw new AssertionError("Unreachable state"); 753 | if (resultNext < resultEnd) 754 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree"); 755 | return result; 756 | } 757 | 758 | 759 | /* 760 | * Converts a code tree array into a fast look-up table that consumes up to 761 | * CODE_TABLE_BITS at once. Each entry i in the table encodes the result of 762 | * decoding starting from the root and consuming the bits of i starting from 763 | * the lowest-order bits. 764 | * 765 | * Each array element encodes (node << 4) | numBitsConsumed, where: 766 | * - numBitsConsumed is a 4-bit unsigned integer in the range [1, CODE_TABLE_BITS]. 767 | * - node is an 12-bit signed integer representing either the current node 768 | * (which is a non-negative number) after consuming all the available bits 769 | * from i, or the bitwise complement of the decoded symbol (so it's negative). 770 | */ 771 | private static short[] codeTreeToCodeTable(short[] codeTree) { 772 | assert 1 <= CODE_TABLE_BITS && CODE_TABLE_BITS <= 15; 773 | var result = new short[1 << CODE_TABLE_BITS]; 774 | for (int i = 0; i < result.length; i++) { 775 | // Simulate decodeSymbol() using the bits of i 776 | int node = 0; 777 | int consumed = 0; 778 | do { 779 | assert node % 2 == 0; 780 | node = codeTree[node + ((i >>> consumed) & 1)]; 781 | consumed++; 782 | } while (node >= 0 && consumed < CODE_TABLE_BITS); 783 | 784 | assert 1 <= consumed && consumed <= 15; // uint4 785 | assert -2048 <= node && node <= 2047; // int12 786 | result[i] = (short)(node << 4 | consumed); 787 | } 788 | return result; 789 | } 790 | 791 | 792 | /*---- Constants and tables ----*/ 793 | 794 | private static final int[] CODE_LENGTH_CODE_ORDER = 795 | {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 796 | 797 | private static final short[] FIXED_LITERAL_LENGTH_CODE_TREE; 798 | private static final short[] FIXED_LITERAL_LENGTH_CODE_TABLE; 799 | private static final short[] FIXED_DISTANCE_CODE_TREE; 800 | private static final short[] FIXED_DISTANCE_CODE_TABLE; 801 | 802 | // Any integer from 1 to 15 is valid. Affects speed but produces same output. 803 | private static final int CODE_TABLE_BITS = 9; 804 | private static final int CODE_TABLE_MASK = (1 << CODE_TABLE_BITS) - 1; 805 | 806 | static { 807 | if (!(1 <= CODE_TABLE_BITS && CODE_TABLE_BITS <= 15)) 808 | throw new AssertionError("Value out of range"); 809 | } 810 | 811 | 812 | static { 813 | var llcodelens = new byte[288]; 814 | Arrays.fill(llcodelens, 0, 144, (byte)8); 815 | Arrays.fill(llcodelens, 144, 256, (byte)9); 816 | Arrays.fill(llcodelens, 256, 280, (byte)7); 817 | Arrays.fill(llcodelens, 280, 288, (byte)8); 818 | 819 | var distcodelens = new byte[32]; 820 | Arrays.fill(distcodelens, (byte)5); 821 | 822 | try { 823 | FIXED_LITERAL_LENGTH_CODE_TREE = codeLengthsToCodeTree(llcodelens); 824 | FIXED_DISTANCE_CODE_TREE = codeLengthsToCodeTree(distcodelens); 825 | } catch (DataFormatException e) { 826 | throw new AssertionError(e); 827 | } 828 | FIXED_LITERAL_LENGTH_CODE_TABLE = codeTreeToCodeTable(FIXED_LITERAL_LENGTH_CODE_TREE); 829 | FIXED_DISTANCE_CODE_TABLE = codeTreeToCodeTable(FIXED_DISTANCE_CODE_TREE); 830 | } 831 | 832 | 833 | private static final int MAX_RUN_LENGTH = 258; // Required by the specification, do not modify 834 | 835 | static { 836 | if (MAX_RUN_LENGTH - 1 > DICTIONARY_LENGTH) 837 | throw new AssertionError("Cannot guarantee all pending run bytes can be buffered in dictionary"); 838 | } 839 | 840 | 841 | // For length symbols from 257 to 285 (inclusive). RUN_LENGTH_TABLE[i] 842 | // = (base of run length) << 3 | (number of extra bits to read). 843 | private static final short[] RUN_LENGTH_TABLE = new short[29]; 844 | 845 | static { 846 | for (int i = 0; i < RUN_LENGTH_TABLE.length; i++) { 847 | int sym = i + 257; 848 | int run, extraBits; 849 | if (sym <= 264) { 850 | extraBits = 0; 851 | run = sym - 254; 852 | } else if (sym <= 284) { 853 | extraBits = (sym - 261) / 4; 854 | run = (((sym - 1) % 4 + 4) << extraBits) + 3; 855 | } else if (sym == 285) { 856 | extraBits = 0; 857 | run = 258; 858 | } else 859 | throw new AssertionError("Unreachable value"); 860 | assert run >>> 12 == 0; 861 | assert extraBits >>> 3 == 0; 862 | RUN_LENGTH_TABLE[i] = (short)(run << 3 | extraBits); 863 | } 864 | } 865 | 866 | 867 | // For length symbols from 0 to 29 (inclusive). DISTANCE_TABLE[i] 868 | // = (base of distance) << 4 | (number of extra bits to read). 869 | private static final int[] DISTANCE_TABLE = new int[30]; 870 | 871 | static { 872 | for (int sym = 0; sym < DISTANCE_TABLE.length; sym++) { 873 | int dist, extraBits; 874 | if (sym <= 3) { 875 | extraBits = 0; 876 | dist = sym + 1; 877 | } else if (sym <= 29) { 878 | extraBits = sym / 2 - 1; 879 | dist = ((sym % 2 + 2) << extraBits) + 1; 880 | } else 881 | throw new AssertionError("Unreachable value"); 882 | assert dist >>> 27 == 0; 883 | assert extraBits >>> 4 == 0; 884 | DISTANCE_TABLE[sym] = dist << 4 | extraBits; 885 | } 886 | } 887 | 888 | } 889 | 890 | } 891 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/decomp/State.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.decomp; 10 | 11 | 12 | public sealed interface State permits Open, StickyException, Closed {} 13 | -------------------------------------------------------------------------------- /src/io/nayuki/deflate/decomp/StickyException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate.decomp; 10 | 11 | import java.io.IOException; 12 | import java.io.InputStream; 13 | import java.util.Objects; 14 | 15 | 16 | // A saved exception that is thrown on every read() or detach(). 17 | public record StickyException( 18 | InputStream input, 19 | IOException exception) 20 | implements State { 21 | 22 | 23 | public StickyException { 24 | Objects.requireNonNull(input); 25 | Objects.requireNonNull(exception); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /test/io/nayuki/deflate/DeflaterOutputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.ByteArrayInputStream; 12 | import java.io.ByteArrayOutputStream; 13 | import java.io.IOException; 14 | import java.io.InputStream; 15 | import java.io.OutputStream; 16 | import java.util.Arrays; 17 | import java.util.Random; 18 | import org.junit.Assert; 19 | import org.junit.Test; 20 | 21 | 22 | public class DeflaterOutputStreamTest { 23 | 24 | @Test public void testEmpty() throws IOException { 25 | byte[] data = {}; 26 | var bout = new ByteArrayOutputStream(); 27 | try (OutputStream dout = new DeflaterOutputStream(bout)) { 28 | dout.write(data); 29 | } 30 | checkInflate(data, bout.toByteArray()); 31 | } 32 | 33 | 34 | @Test public void testShortSingleWriteRandomly() throws IOException { 35 | for (int i = 0; i < 1000; i++) { 36 | var data = new byte[rand.nextInt(100)]; 37 | rand.nextBytes(data); 38 | var bout = new ByteArrayOutputStream(); 39 | try (OutputStream dout = new DeflaterOutputStream(bout)) { 40 | dout.write(data); 41 | } 42 | checkInflate(data, bout.toByteArray()); 43 | } 44 | } 45 | 46 | 47 | @Test public void testShortMultiWriteRandomly() throws IOException { 48 | for (int i = 0; i < 1000; i++) { 49 | var data = new byte[rand.nextInt(1000)]; 50 | rand.nextBytes(data); 51 | var bout = new ByteArrayOutputStream(); 52 | try (OutputStream dout = new DeflaterOutputStream(bout)) { 53 | for (int off = 0; off < data.length; ) { 54 | if (rand.nextDouble() < 0.1) { 55 | dout.write(data[off]); 56 | off++; 57 | } else { 58 | int n = rand.nextInt(Math.min(100, data.length - off)) + 1; 59 | dout.write(data, off, n); 60 | off += n; 61 | } 62 | } 63 | } 64 | checkInflate(data, bout.toByteArray()); 65 | } 66 | } 67 | 68 | 69 | @Test public void testByteRunsRandomly() throws IOException { 70 | var bout = new ByteArrayOutputStream(); 71 | for (int i = 0; i < 1000; i++) { 72 | var b = new byte[rand.nextInt(1000) + 1]; 73 | Arrays.fill(b, (byte)rand.nextInt(1 << 8)); 74 | bout.write(b); 75 | } 76 | byte[] data = bout.toByteArray(); 77 | 78 | bout = new ByteArrayOutputStream(); 79 | try (OutputStream dout = new DeflaterOutputStream(bout)) { 80 | dout.write(data); 81 | } 82 | checkInflate(data, bout.toByteArray()); 83 | } 84 | 85 | 86 | @Test public void testLongRandomly() throws IOException { 87 | for (int i = 0; i < 1000; i++) { 88 | var data = new byte[rand.nextInt(1_000_000)]; 89 | rand.nextBytes(data); 90 | var bout = new ByteArrayOutputStream(); 91 | try (OutputStream dout = new DeflaterOutputStream(bout)) { 92 | for (int off = 0; off < data.length; ) { 93 | if (rand.nextDouble() < 0.9) { 94 | dout.write(data[off]); 95 | off++; 96 | } else { 97 | int n = rand.nextInt(Math.min(300_000, data.length - off)) + 1; 98 | dout.write(data, off, n); 99 | off += n; 100 | } 101 | } 102 | } 103 | checkInflate(data, bout.toByteArray()); 104 | } 105 | } 106 | 107 | 108 | 109 | private static void checkInflate(byte[] uncomp, byte[] comp) throws IOException { 110 | var bout = new ByteArrayOutputStream(); 111 | try (InputStream in = new InflaterInputStream(new ByteArrayInputStream(comp))) { 112 | in.transferTo(bout); 113 | } 114 | Assert.assertArrayEquals(uncomp, bout.toByteArray()); 115 | } 116 | 117 | 118 | private static Random rand = new Random(); 119 | 120 | } 121 | -------------------------------------------------------------------------------- /test/io/nayuki/deflate/InflaterInputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.ByteArrayOutputStream; 12 | import java.io.IOException; 13 | import java.util.Objects; 14 | import java.util.Random; 15 | import org.junit.Assert; 16 | import org.junit.Test; 17 | import io.nayuki.deflate.DataFormatException.Reason; 18 | 19 | 20 | public final class InflaterInputStreamTest { 21 | 22 | /*---- Block header ----*/ 23 | 24 | @Test public void testHeaderEndBeforeFinal() { 25 | testFail("", 26 | Reason.UNEXPECTED_END_OF_STREAM); 27 | } 28 | 29 | 30 | @Test public void testHeaderEndBeforeType() { 31 | // Fixed Huffman block: 90 91 92 93 94 End 32 | testFail("0 10 110010000 110010001 110010010 110010011 110010100 0000000" 33 | + "1", 34 | Reason.UNEXPECTED_END_OF_STREAM); 35 | } 36 | 37 | 38 | @Test public void testHeaderEndInType() { 39 | // Fixed Huffman block: 95 96 97 98 End 40 | testFail("0 10 110010101 110010110 110010111 110011000 0000000" 41 | + "1 0", 42 | Reason.UNEXPECTED_END_OF_STREAM); 43 | } 44 | 45 | 46 | 47 | /*---- Block type 0b00 ----*/ 48 | 49 | @Test public void testUncompressedEmpty() { 50 | // Uncompressed block len=0: (empty) 51 | test("1 00 00000 0000000000000000 1111111111111111", 52 | ""); 53 | } 54 | 55 | 56 | @Test public void testUncompressedThreeBytes() { 57 | // Uncompressed block len=3: 05 14 23 58 | test("1 00 00000 1100000000000000 0011111111111111 10100000 00101000 11000100", 59 | "05 14 23"); 60 | } 61 | 62 | 63 | @Test public void testUncompressedTwoBlocks() { 64 | // Uncompressed block len=2: 05 14 65 | // Uncompressed block len=1: 23 66 | test("0 00 00000 0100000000000000 1011111111111111 10100000 00101000" 67 | + "1 00 00000 1000000000000000 0111111111111111 11000100", 68 | "05 14 23"); 69 | } 70 | 71 | 72 | @Test public void testUncompressedEndBeforeLength() { 73 | // Uncompressed block (partial padding) (no length) 74 | testFail("1 00 000", 75 | Reason.UNEXPECTED_END_OF_STREAM); 76 | } 77 | 78 | 79 | @Test public void testUncompressedEndInLength() { 80 | // Uncompressed block (partial length) 81 | testFail("1 00 00000 0000000000", 82 | Reason.UNEXPECTED_END_OF_STREAM); 83 | } 84 | 85 | 86 | @Test public void testUncompressedEndInNegatedLength() { 87 | // Uncompressed block (len) (partial nlen) 88 | testFail("1 00 00000 0000000000000000 11111111", 89 | Reason.UNEXPECTED_END_OF_STREAM); 90 | } 91 | 92 | 93 | @Test public void testUncompressedLengthNegatedMismatch() { 94 | // Uncompressed block (mismatched len and nlen) 95 | testFail("1 00 00000 0010000000010000 1111100100110101", 96 | Reason.UNCOMPRESSED_BLOCK_LENGTH_MISMATCH); 97 | } 98 | 99 | 100 | @Test public void testUncompressedEndBeforeData() { 101 | // Uncompressed block len=6: (End) 102 | testFail("1 00 11111 0110000000000000 1001111111111111", 103 | Reason.UNEXPECTED_END_OF_STREAM); 104 | } 105 | 106 | 107 | @Test public void testUncompressedEndInData() { 108 | // Uncompressed block len=6: 55 EE (End) 109 | testFail("1 00 11111 0110000000000000 1001111111111111 10101010 01110111", 110 | Reason.UNEXPECTED_END_OF_STREAM); 111 | } 112 | 113 | 114 | @Test public void testUncompressedEndBeforeFinalBlock() { 115 | // Uncompressed block len=0: (empty) 116 | // No final block 117 | testFail("0 00 00000 0000000000000000 1111111111111111", 118 | Reason.UNEXPECTED_END_OF_STREAM); 119 | } 120 | 121 | 122 | @Test public void testUncompressedAlreadyByteAligned() { 123 | // Fixed Huffman block: 90 A1 FF End 124 | // Uncompressed block len=2: AB CD 125 | test("0 10 110010000 110100001 111111111 0000000 " 126 | + "1 00 0100000000000000 1011111111111111 11010101 10110011", 127 | "90 A1 FF AB CD"); 128 | } 129 | 130 | 131 | @Test public void testUncompressedRandom() { 132 | final int TRIALS = 100; 133 | for (int i = 0; i < TRIALS; i++) { 134 | int numBlocks = rand.nextInt(30) + 1; 135 | var inBits = new StringBuilder(); 136 | var outBytes = new StringBuilder(); 137 | for (int j = 0; j < numBlocks; j++) { 138 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal 139 | inBits.append("00"); // btype 140 | for (int k = 0; k < 5; k++) // Padding 141 | inBits.append(rand.nextInt(2)); 142 | 143 | // A quasi log-uniform distribution 144 | int len = rand.nextInt(17); 145 | if (len > 0) { 146 | len = 1 << (len - 1); 147 | len |= rand.nextInt(len); 148 | } 149 | int temp = len | ((~len) << 16); 150 | for (int k = 0; k < 32; k++) 151 | inBits.append((temp >>> k) & 1); 152 | 153 | var data = new byte[len]; 154 | rand.nextBytes(data); 155 | for (byte b : data) { 156 | outBytes.append(String.format("%02x", b)); 157 | for (int k = 0; k < 8; k++, b >>>= 1) 158 | inBits.append(b & 1); 159 | } 160 | } 161 | test(inBits.toString(), outBytes.toString()); 162 | } 163 | } 164 | 165 | 166 | @Test public void testUncompressedRandomAndShortFixedHuffman() { 167 | final int TRIALS = 100; 168 | for (int i = 0; i < TRIALS; i++) { 169 | int numBlocks = rand.nextInt(30) + 1; 170 | var inBits = new StringBuilder(); 171 | var outBytes = new StringBuilder(); 172 | for (int j = 0; j < numBlocks; j++) { 173 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal 174 | if (rand.nextDouble() < 0.5) { 175 | inBits.append("00"); // btype 176 | while (inBits.length() % 8 != 0) // Padding 177 | inBits.append(rand.nextInt(2)); 178 | 179 | // A quasi log-uniform distribution 180 | int len = rand.nextInt(17); 181 | if (len > 0) { 182 | len = 1 << (len - 1); 183 | len |= rand.nextInt(len); 184 | } 185 | int temp = len | ((~len) << 16); 186 | for (int k = 0; k < 32; k++) 187 | inBits.append((temp >>> k) & 1); 188 | 189 | var data = new byte[len]; 190 | rand.nextBytes(data); 191 | for (byte b : data) { 192 | outBytes.append(String.format("%02x", b)); 193 | for (int k = 0; k < 8; k++, b >>>= 1) 194 | inBits.append(b & 1); 195 | } 196 | } else { 197 | inBits.append("10"); // btype 198 | inBits.append("111111111"); // Symbol #255 (0xFF) 199 | outBytes.append("FF"); 200 | inBits.append("0000000"); // End of block 201 | // Including bfinal, this writes a total of 19 bits, which is 3 202 | // modulo 8. By writing many consecutive blocks of this type, the 203 | // starting position of the next block can be any number mod 8. 204 | } 205 | } 206 | test(inBits.toString(), outBytes.toString()); 207 | } 208 | } 209 | 210 | 211 | 212 | /*---- Block type 0b01 ----*/ 213 | 214 | @Test public void testFixedHuffmanEmpty() { 215 | // Fixed Huffman block: End 216 | test("1 10 0000000", 217 | ""); 218 | } 219 | 220 | 221 | @Test public void testFixedHuffmanLiterals() { 222 | // Fixed Huffman block: 00 80 8F 90 C0 FF End 223 | test("1 10 00110000 10110000 10111111 110010000 111000000 111111111 0000000", 224 | "00 80 8F 90 C0 FF"); 225 | } 226 | 227 | 228 | @Test public void testFixedHuffmanNonOverlappingRun() { 229 | // Fixed Huffman block: 00 01 02 (3,3) End 230 | test("1 10 00110000 00110001 00110010 0000001 00010 0000000", 231 | "00 01 02 00 01 02"); 232 | } 233 | 234 | 235 | @Test public void testFixedHuffmanOverlappingRun1() { 236 | // Fixed Huffman block: 01 (1,4) End 237 | test("1 10 00110001 0000010 00000 0000000", 238 | "01 01 01 01 01"); 239 | } 240 | 241 | 242 | @Test 243 | public void testFixedHuffmanOverlappingRun2() { 244 | // Fixed Huffman block: 8E 8F (2,5) End 245 | test("1 10 10111110 10111111 0000011 00001 0000000", 246 | "8E 8F 8E 8F 8E 8F 8E"); 247 | } 248 | 249 | 250 | @Test public void testFixedHuffmanInvalidLengthCode286() { 251 | // Fixed Huffman block: #286 252 | testFail("1 10 11000110", 253 | Reason.RESERVED_LENGTH_SYMBOL); 254 | } 255 | 256 | 257 | @Test public void testFixedHuffmanInvalidLengthCode287() { 258 | // Fixed Huffman block: #287 259 | testFail("1 10 11000111", 260 | Reason.RESERVED_LENGTH_SYMBOL); 261 | } 262 | 263 | 264 | @Test public void testFixedHuffmanInvalidDistanceCode30() { 265 | // Fixed Huffman block: 00 #257 #30 266 | testFail("1 10 00110000 0000001 11110", 267 | Reason.RESERVED_DISTANCE_SYMBOL); 268 | } 269 | 270 | 271 | @Test public void testFixedHuffmanInvalidDistanceCode31() { 272 | // Fixed Huffman block: 00 #257 #31 273 | testFail("1 10 00110000 0000001 11111", 274 | Reason.RESERVED_DISTANCE_SYMBOL); 275 | } 276 | 277 | 278 | @Test public void testFixedHuffmanEndInSymbol() { 279 | // Fixed Huffman block: (partial symbol) 280 | testFail("1 10 00000", 281 | Reason.UNEXPECTED_END_OF_STREAM); 282 | } 283 | 284 | 285 | @Test public void testFixedHuffmanEndBeforeSymbol() { 286 | // Fixed Huffman block: 93 91 94 90 92 287 | testFail("1 10 110010011 110010001 110010100 110010000 110010010", 288 | Reason.UNEXPECTED_END_OF_STREAM); 289 | } 290 | 291 | 292 | @Test public void testFixedHuffmanEofInRunExtensionBits() { 293 | // Fixed Huffman block: 00 #269+1(partial) 294 | testFail("1 10 00110000 0001101 1", 295 | Reason.UNEXPECTED_END_OF_STREAM); 296 | } 297 | 298 | 299 | @Test public void testFixedHuffmanEofInDistanceExtensionBits() { 300 | // Fixed Huffman block: 00 #285 #0 #257 #8+00(partial) 301 | testFail("1 10 00110000 11000101 00000 0000001 01000 00", 302 | Reason.UNEXPECTED_END_OF_STREAM); 303 | } 304 | 305 | 306 | @Test public void testFixedHuffmanLiteralsRandom() { 307 | final int TRIALS = 100; 308 | for (int i = 0; i < TRIALS; i++) { 309 | int numBlocks = rand.nextInt(100) + 1; 310 | var inBits = new StringBuilder(); 311 | var outBytes = new StringBuilder(); 312 | for (int j = 0; j < numBlocks; j++) { 313 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal 314 | inBits.append("10"); // btype 315 | 316 | // A quasi log-uniform distribution 317 | int len = rand.nextInt(16); 318 | if (len > 0) { 319 | len = 1 << (len - 1); 320 | len |= rand.nextInt(len); 321 | } 322 | 323 | for (int k = 0; k < len; k++) { 324 | int b = rand.nextInt(256); 325 | if (b < 144) { 326 | for (int l = 7; l >= 0; l--) 327 | inBits.append(((b - 0 + 48) >>> l) & 1); 328 | } else { 329 | for (int l = 8; l >= 0; l--) 330 | inBits.append(((b - 144 + 400) >>> l) & 1); 331 | } 332 | outBytes.append(String.format("%02x", b)); 333 | } 334 | inBits.append("0000000"); 335 | } 336 | test(inBits.toString(), outBytes.toString()); 337 | } 338 | } 339 | 340 | 341 | 342 | /*---- Block type 0b10 ----*/ 343 | 344 | @Test public void testDynamicHuffmanEmpty() { 345 | // Dynamic Huffman block: 346 | // numCodeLen=19 347 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:0, 17:0, 18:1 348 | // numLitLen=257, numDist=2 349 | // litLenCodeLen = 0:1, 1:0, ..., 255:0, 256:1 350 | // distCodeLen = 0:1, 1:1 351 | // Data: End 352 | String blockHeader = "1 01"; 353 | String codeCounts = "00000 10000 1111"; 354 | String codeLenCodeLens = "000 000 100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100 000"; 355 | String codeLens = "0 11111111 10101011 0 0 0"; 356 | String data = "1"; 357 | test(blockHeader + codeCounts + codeLenCodeLens + codeLens + data, 358 | ""); 359 | } 360 | 361 | 362 | @Test public void testDynamicHuffmanEmptyNoDistanceCode() { 363 | // Dynamic Huffman block: 364 | // numCodeLen=18 365 | // codeLenCodeLen = 0:2, 1:2, 2:0, ..., 15:0, 16:0, 17:0, 18:1 366 | // numLitLen=257, numDist=1 367 | // litLenCodeLen = 0:0, ..., 254:0, 255:1, 256:1 368 | // distCodeLen = 0:0 369 | // Data: End 370 | String blockHeader = "1 01"; 371 | String codeCounts = "00000 00000 0111"; 372 | String codeLenCodeLens = "000 000 100 010 000 000 000 000 000 000 000 000 000 000 000 000 000 010"; 373 | String codeLens = "01111111 00101011 11 11 10"; 374 | String data = "1"; 375 | test(blockHeader + codeCounts + codeLenCodeLens + codeLens + data, 376 | ""); 377 | } 378 | 379 | 380 | @Test public void testDynamicHuffmanCodeLengthRepeatAtStart() { 381 | // Dynamic Huffman block: 382 | // numLitLen=257, numDist=1, numCodeLen=18 383 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:1, 17:0, 18:0 384 | // Literal/length/distance code lengths: #16+00 385 | String blockHeader = "1 01"; 386 | String codeCounts = "00000 00000 0111"; 387 | String codeLenCodeLens = "100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100"; 388 | String codeLens = "1"; 389 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens, 390 | Reason.NO_PREVIOUS_CODE_LENGTH_TO_COPY); 391 | } 392 | 393 | 394 | @Test public void testDynamicHuffmanTooManyCodeLengthItems() { 395 | // Dynamic Huffman block: 396 | // numLitLen=257, numDist=1, numCodeLen=18 397 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:0, 17:0, 18:1 398 | // Literal/length/distance code lengths: 1 1 #18+1111111 #18+1101100 399 | String blockHeader = "1 01"; 400 | String codeCounts = "00000 00000 0111"; 401 | String codeLenCodeLens = "000 000 100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100"; 402 | String codeLens = "0 0 11111111 10011011"; 403 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens, 404 | Reason.CODE_LENGTH_CODE_OVER_FULL); 405 | } 406 | 407 | 408 | @Test public void testDynamicHuffmanOverfullCode0() { 409 | // Dynamic Huffman block: 410 | // numLitLen=257, numDist=1, numCodeLen=4 411 | // codeLenCodeLen = 0:1, 1:1, 2:1, 3:0 412 | String blockHeader = "1 01"; 413 | String codeCounts = "00000 00000 0000"; 414 | String codeLenCodeLens = "100 100 100 000"; 415 | String padding = "0000000000000000000"; 416 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 417 | Reason.HUFFMAN_CODE_OVER_FULL); 418 | } 419 | 420 | 421 | @Test public void testDynamicHuffmanOverfullCode1() { 422 | // Dynamic Huffman block: 423 | // numLitLen=257, numDist=1, numCodeLen=4 424 | // codeLenCodeLen = 0:1, 1:1, 2:1, 3:1 425 | String blockHeader = "1 01"; 426 | String codeCounts = "00000 00000 0000"; 427 | String codeLenCodeLens = "100 100 100 100"; 428 | String padding = "0000000000000000000"; 429 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 430 | Reason.HUFFMAN_CODE_OVER_FULL); 431 | } 432 | 433 | 434 | @Test public void testDynamicHuffmanUnpairedCode() { 435 | // Dynamic Huffman block: 436 | // numLitLen=257, numDist=1, numCodeLen=4 437 | // codeLenCodeLen = 0:1, 1:2, 2:3, 3:0 438 | String blockHeader = "1 01"; 439 | String codeCounts = "00000 00000 0000"; 440 | String codeLenCodeLens = "100 010 110 000"; 441 | String padding = "0000000000000000000"; 442 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 443 | Reason.HUFFMAN_CODE_UNDER_FULL); 444 | } 445 | 446 | 447 | @Test public void testDynamicHuffmanEmptyCode() { 448 | // Dynamic Huffman block: 449 | // numLitLen=257, numDist=1, numCodeLen=4 450 | // codeLenCodeLen = 0:0, 1:0, 2:0, 3:0 451 | String blockHeader = "1 01"; 452 | String codeCounts = "00000 00000 0000"; 453 | String codeLenCodeLens = "000 000 000 000"; 454 | String padding = "0000000000000000000"; 455 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 456 | Reason.HUFFMAN_CODE_UNDER_FULL); 457 | } 458 | 459 | 460 | @Test public void testDynamicHuffmanUnderfullCode0() { 461 | // Dynamic Huffman block: 462 | // numLitLen=257, numDist=1, numCodeLen=4 463 | // codeLenCodeLen = 0:0, 1:0, 2:1, 3:0 464 | String blockHeader = "1 01"; 465 | String codeCounts = "00000 00000 0000"; 466 | String codeLenCodeLens = "000 000 100 000"; 467 | String padding = "0000000000000000000"; 468 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 469 | Reason.HUFFMAN_CODE_UNDER_FULL); 470 | } 471 | 472 | 473 | @Test public void testDynamicHuffmanUnderfullCode1() { 474 | // Dynamic Huffman block: 475 | // numLitLen=257, numDist=1, numCodeLen=4 476 | // codeLenCodeLen = 0:2, 1:1, 2:0, 3:0 477 | String blockHeader = "1 01"; 478 | String codeCounts = "00000 00000 0000"; 479 | String codeLenCodeLens = "010 100 000 000"; 480 | String padding = "0000000000000000000"; 481 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding, 482 | Reason.HUFFMAN_CODE_UNDER_FULL); 483 | } 484 | 485 | 486 | @Test public void testDynamicHuffmanUseOfNullDistanceCode() { 487 | // Dynamic Huffman block: 488 | // numLitLen=258, numDist=1, numCodeLen=18 489 | // codeLenCodeLen = 0:2, 1:2, 2:2, ..., 15:0, 16:0, 17:0, 18:2 490 | // Literal/length/distance code lengths: 2 #18+1111111 #18+1101100 1 2 0 491 | // Data: 00 #257 492 | String blockHeader = "1 01"; 493 | String codeCounts = "10000 00000 0111"; 494 | String codeLenCodeLens = "000 000 010 010 000 000 000 000 000 000 000 000 000 000 000 010 000 010"; 495 | String codeLens = "10 111111111 110101011 01 10 00"; 496 | String data = "10 11"; 497 | String padding = "0000000000000000"; 498 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens + data + padding, 499 | Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE); 500 | } 501 | 502 | 503 | 504 | /*---- Block type 0b11 ----*/ 505 | 506 | @Test public void testReservedBlockType() { 507 | // Reserved block type 508 | testFail("1 11 00000", 509 | Reason.RESERVED_BLOCK_TYPE); 510 | } 511 | 512 | 513 | 514 | /*---- Utilities ----*/ 515 | 516 | // `inputBits` has 0s and 1s, and optional spaces; its length need not be 517 | // a multiple of 8. `refOutputHex` has pairs of hexadecimal digits (with 518 | // optional spaces) representing the expected decompressed output byte sequence. 519 | private static void test(String inputBits, String refOutputHex) { 520 | // Process the input bit string 521 | Objects.requireNonNull(inputBits); 522 | inputBits = inputBits.replace(" ", ""); 523 | int padMode = rand.nextInt(3); 524 | while (inputBits.length() % 8 != 0) { 525 | inputBits += switch (padMode) { 526 | case 0 -> 0; 527 | case 1 -> 1; 528 | case 2 -> rand.nextInt(2); 529 | default -> throw new AssertionError("Unreachable value"); 530 | }; 531 | } 532 | 533 | // Convert the reference output hex string 534 | Objects.requireNonNull(refOutputHex); 535 | refOutputHex = refOutputHex.replace(" ", ""); 536 | if (refOutputHex.length() % 2 != 0) 537 | throw new IllegalArgumentException(); 538 | var refOut = new byte[refOutputHex.length() / 2]; 539 | for (int i = 0; i < refOut.length; i++) 540 | refOut[i] = (byte)Integer.parseInt(refOutputHex.substring(i * 2, (i + 1) * 2), 16); 541 | 542 | // Perform decompression with single-byte reads and check output 543 | var bout = new ByteArrayOutputStream(); 544 | var sin = new StringInputStream(inputBits); 545 | try { 546 | @SuppressWarnings("resource") 547 | var iin = new InflaterInputStream(sin, true); 548 | while (true) { 549 | int b = iin.read(); 550 | if (b == -1) 551 | break; 552 | bout.write(b); 553 | } 554 | } catch (IOException e) { 555 | throw new AssertionError("Unexpected exception", e); 556 | } 557 | if (sin.read() != -1) 558 | throw new IllegalArgumentException(); 559 | Assert.assertArrayEquals(refOut, bout.toByteArray()); 560 | 561 | // Perform decompression with block reads and check output 562 | bout.reset(); 563 | sin = new StringInputStream(inputBits); 564 | try { 565 | @SuppressWarnings("resource") 566 | var iin = new InflaterInputStream(sin, true); 567 | while (true) { 568 | var buf = new byte[rand.nextInt(100) + 1]; 569 | int off = rand.nextInt(buf.length + 1); 570 | int len = rand.nextInt(buf.length - off + 1); 571 | int n = iin.read(buf, off, len); 572 | if (!(-1 <= n && n <= len)) 573 | throw new IllegalArgumentException(); 574 | if (n == -1) 575 | break; 576 | if (n == 0 && len != 0) 577 | throw new IllegalArgumentException(); 578 | bout.write(buf, off, n); 579 | } 580 | } catch (IOException e) { 581 | throw new AssertionError("Unexpected exception", e); 582 | } 583 | Assert.assertArrayEquals(refOut, bout.toByteArray()); 584 | } 585 | 586 | 587 | private static void testFail(String inputBits, Reason reason) { 588 | try { 589 | test(inputBits, ""); 590 | } catch (DataFormatException e) { 591 | Assert.assertEquals(reason, e.getReason()); 592 | } 593 | } 594 | 595 | 596 | private static Random rand = new Random(); 597 | 598 | } 599 | -------------------------------------------------------------------------------- /test/io/nayuki/deflate/StringInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DEFLATE library (Java) 3 | * 4 | * Copyright (c) Project Nayuki 5 | * MIT License. See readme file. 6 | * https://www.nayuki.io/page/deflate-library-java 7 | */ 8 | 9 | package io.nayuki.deflate; 10 | 11 | import java.io.InputStream; 12 | import java.util.Objects; 13 | 14 | 15 | final class StringInputStream extends InputStream { 16 | 17 | /*---- Fields ----*/ 18 | 19 | private final String bits; 20 | private int index = 0; 21 | private int mark = -1; 22 | 23 | 24 | 25 | /*---- Constructor ----*/ 26 | 27 | public StringInputStream(String s) { 28 | Objects.requireNonNull(s); 29 | if (!s.matches("[01]*")) 30 | throw new IllegalArgumentException("String has characters other than 0 and 1"); 31 | if (s.length() % 8 != 0) 32 | throw new IllegalArgumentException("String length not a multiple of 8"); 33 | bits = s; 34 | } 35 | 36 | 37 | 38 | /*---- Methods ----*/ 39 | 40 | @Override public int read() { 41 | if (index >= bits.length()) 42 | return -1; 43 | int result = Integer.parseInt(bits.substring(index, index + 8), 2); 44 | result = Integer.reverse(result) >>> 24; 45 | index += 8; 46 | return result; 47 | } 48 | 49 | 50 | @Override public boolean markSupported() { 51 | return true; 52 | } 53 | 54 | 55 | @Override public void mark(int limit) { 56 | mark = index; 57 | } 58 | 59 | 60 | @Override public void reset() { 61 | if (mark == -1) 62 | throw new IllegalStateException("No mark set"); 63 | index = mark; 64 | } 65 | 66 | } 67 | --------------------------------------------------------------------------------