├── Readme.markdown
├── src
├── gunzip.java
├── gzip.java
└── io
│ └── nayuki
│ └── deflate
│ ├── DataFormatException.java
│ ├── DeflaterOutputStream.java
│ ├── GzipInputStream.java
│ ├── GzipMetadata.java
│ ├── GzipOutputStream.java
│ ├── InflaterInputStream.java
│ ├── MarkableFileInputStream.java
│ ├── ZlibInputStream.java
│ ├── ZlibMetadata.java
│ ├── ZlibOutputStream.java
│ ├── comp
│ ├── BinarySplit.java
│ ├── BitOutputStream.java
│ ├── CountingBitOutputStream.java
│ ├── Decision.java
│ ├── Lz77Huffman.java
│ ├── MultiStrategy.java
│ ├── Strategy.java
│ └── Uncompressed.java
│ └── decomp
│ ├── Closed.java
│ ├── Open.java
│ ├── State.java
│ └── StickyException.java
└── test
└── io
└── nayuki
└── deflate
├── DeflaterOutputStreamTest.java
├── InflaterInputStreamTest.java
└── StringInputStream.java
/Readme.markdown:
--------------------------------------------------------------------------------
1 | DEFLATE library (Java)
2 | ======================
3 |
4 | This project provides a correct and reasonably efficient DEFLATE decompressor in pure Java.
5 | It is an alternative to other Java libraries that wrap over native C code such as zlib.
6 |
7 | Home page with detailed description: [https://www.nayuki.io/page/deflate-library-java](https://www.nayuki.io/page/deflate-library-java)
8 |
9 |
10 | License
11 | -------
12 |
13 | Copyright © 2023 Project Nayuki. (MIT License)
14 |
15 | Permission is hereby granted, free of charge, to any person obtaining a copy of
16 | this software and associated documentation files (the "Software"), to deal in
17 | the Software without restriction, including without limitation the rights to
18 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
19 | the Software, and to permit persons to whom the Software is furnished to do so,
20 | subject to the following conditions:
21 |
22 | * The above copyright notice and this permission notice shall be included in
23 | all copies or substantial portions of the Software.
24 |
25 | * The Software is provided "as is", without warranty of any kind, express or
26 | implied, including but not limited to the warranties of merchantability,
27 | fitness for a particular purpose and noninfringement. In no event shall the
28 | authors or copyright holders be liable for any claim, damages or other
29 | liability, whether in an action of contract, tort or otherwise, arising from,
30 | out of or in connection with the Software or the use or other dealings in the
31 | Software.
32 |
--------------------------------------------------------------------------------
/src/gunzip.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | import java.io.File;
10 | import java.io.FileOutputStream;
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 | import java.time.Instant;
14 | import io.nayuki.deflate.GzipInputStream;
15 | import io.nayuki.deflate.GzipMetadata;
16 | import io.nayuki.deflate.MarkableFileInputStream;
17 |
18 |
19 | /**
20 | * Decompression application for the gzip file format.
21 | *
Usage: java gunzip InputFile.gz OutputFile
22 | * This decompresses a single input gzip file into a single output file. The program also prints
23 | * some information to standard output, and error messages if the file is invalid/corrupt.
24 | */
25 | public final class gunzip {
26 |
27 | public static void main(String[] args) {
28 | String msg = submain(args);
29 | if (msg != null) {
30 | System.err.println(msg);
31 | System.exit(1);
32 | }
33 | }
34 |
35 |
36 | // Returns null if successful, otherwise returns an error message string.
37 | private static String submain(String[] args) {
38 | // Check arguments
39 | if (args.length != 2)
40 | return "Usage: java gunzip InputFile.gz OutputFile";
41 |
42 | var inFile = new File(args[0]);
43 | if (!inFile.exists())
44 | return "Input path does not exist: " + inFile;
45 | if (inFile.isDirectory())
46 | return "Input path is a directory: " + inFile;
47 | var outFile = new File(args[1]);
48 | if (outFile.isDirectory())
49 | return "Output path is a directory: " + outFile;
50 |
51 | try (var in = new GzipInputStream(new MarkableFileInputStream(inFile))) {
52 | {
53 | GzipMetadata meta = in.getMetadata();
54 |
55 | System.err.println("Last modified: " + meta.modificationTimeUnixS()
56 | .map(t -> Instant.EPOCH.plusSeconds(t).toString()).orElse("N/A"));
57 |
58 | int extraFlags = meta.extraFlags();
59 | System.err.println("Extra flags: " + switch (extraFlags) {
60 | case 2 -> "Maximum compression";
61 | case 4 -> "Fastest compression";
62 | default -> "Unknown (" + extraFlags + ")";
63 | });
64 |
65 | System.err.println("Operating system: " + switch (meta.operatingSystem()) {
66 | case FAT_FILESYSTEM -> "FAT filesystem";
67 | case AMIGA -> "Amiga";
68 | case VMS -> "VMS";
69 | case UNIX -> "Unix";
70 | case VM_CMS -> "VM/CMS";
71 | case ATARI_TOS -> "Atari TOS";
72 | case HPFS_FILESYSTEM -> "HPFS filesystem";
73 | case MACINTOSH -> "Macintosh";
74 | case Z_SYSTEM -> "Z-System";
75 | case CPM -> "CP/M";
76 | case TOPS_20 -> "TOPS-20";
77 | case NTFS_FILESYSTEM -> "NTFS filesystem";
78 | case QDOS -> "QDOS";
79 | case ACORN_RISCOS -> "Acorn RISCOS";
80 | case UNKNOWN -> "Unknown";
81 | default -> throw new AssertionError("Unreachable value");
82 | });
83 |
84 | System.err.println("File mode: " + (meta.isFileText() ? "Text" : "Binary"));
85 |
86 | meta.extraField().ifPresent(b ->
87 | System.err.println("Extra field: " + b.length + " bytes"));
88 |
89 | meta.fileName().ifPresent(s ->
90 | System.err.println("File name: " + s));
91 |
92 | meta.comment().ifPresent(s ->
93 | System.err.println("Comment: " + s));
94 | }
95 |
96 | // Start decompressing and writing output file
97 | long elapsedTime = -System.nanoTime();
98 | try (OutputStream out = new FileOutputStream(outFile)) {
99 | in.transferTo(out);
100 | }
101 | elapsedTime += System.nanoTime();
102 | System.err.printf("Input speed: %.2f MB/s%n", inFile.length() / 1e6 / elapsedTime * 1.0e9);
103 | System.err.printf("Output speed: %.2f MB/s%n", outFile.length() / 1e6 / elapsedTime * 1.0e9);
104 |
105 | } catch (IOException e) {
106 | return "I/O exception: " + e.getMessage();
107 | }
108 | return null;
109 | }
110 |
111 | }
112 |
--------------------------------------------------------------------------------
/src/gzip.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | import java.io.File;
10 | import java.io.FileInputStream;
11 | import java.io.FileOutputStream;
12 | import java.io.IOException;
13 | import java.io.InputStream;
14 | import java.io.OutputStream;
15 | import java.util.Optional;
16 | import io.nayuki.deflate.GzipMetadata;
17 | import io.nayuki.deflate.GzipOutputStream;
18 |
19 |
20 | /**
21 | * Compression application for the gzip file format.
22 | * Usage: java gzip InputFile OutputFile.gz
23 | * This compresses a single input file into a single gzip output file.
24 | */
25 | public final class gzip {
26 |
27 | public static void main(String[] args) {
28 | String msg = submain(args);
29 | if (msg != null) {
30 | System.err.println(msg);
31 | System.exit(1);
32 | }
33 | }
34 |
35 |
36 | // Returns null if successful, otherwise returns an error message string.
37 | private static String submain(String[] args) {
38 | // Check arguments
39 | if (args.length != 2)
40 | return "Usage: java gzip InputFile OutputFile.gz";
41 |
42 | var inFile = new File(args[0]);
43 | if (!inFile.exists())
44 | return "Input path does not exist: " + inFile;
45 | if (inFile.isDirectory())
46 | return "Input path is a directory: " + inFile;
47 | var outFile = new File(args[1]);
48 | if (outFile.isDirectory())
49 | return "Output path is a directory: " + outFile;
50 |
51 | // Create the metadata structure
52 | int modTime = (int)(inFile.lastModified() / 1000);
53 | var meta = new GzipMetadata(
54 | GzipMetadata.CompressionMethod.DEFLATE,
55 | false,
56 | modTime != 0 ? Optional.of(modTime) : Optional.empty(),
57 | 0,
58 | GzipMetadata.OperatingSystem.UNIX,
59 | Optional.empty(),
60 | Optional.of(inFile.getName()),
61 | Optional.empty(),
62 | true);
63 |
64 | // Start compressing and writing output file
65 | long elapsedTime = -System.nanoTime();
66 | try (InputStream in = new FileInputStream(inFile);
67 | OutputStream out = new GzipOutputStream(new FileOutputStream(outFile), meta)) {
68 | in.transferTo(out);
69 | } catch (IOException e) {
70 | return "I/O exception: " + e.getMessage();
71 | }
72 | elapsedTime += System.nanoTime();
73 | System.err.printf("Input speed: %.2f MB/s%n", inFile.length() / 1e6 / elapsedTime * 1.0e9);
74 | System.err.printf("Output speed: %.2f MB/s%n", outFile.length() / 1e6 / elapsedTime * 1.0e9);
75 |
76 | return null;
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/DataFormatException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.util.Objects;
12 |
13 |
14 | @SuppressWarnings("serial")
15 | public final class DataFormatException extends RuntimeException {
16 |
17 | /*---- Field ----*/
18 |
19 | private final Reason reason;
20 |
21 |
22 | /*---- Constructor ----*/
23 |
24 | public DataFormatException(Reason rsn, String msg) {
25 | super(msg);
26 | reason = Objects.requireNonNull(rsn);
27 | }
28 |
29 |
30 | /*---- Function ----*/
31 |
32 | /*
33 | * Always throws, never returns. Use this shorter form whenever possible:
34 | * DataFormatException.throwUnexpectedEnd();
35 | * Otherwise if definite control flow manipulation is needed, then use:
36 | * int foo;
37 | * try {
38 | * foo = bar();
39 | * } catch (EOFException e) {
40 | * throw DataFormatException.throwUnexpectedEnd();
41 | * }
42 | * print(foo);
43 | */
44 | public static DataFormatException throwUnexpectedEnd() {
45 | throw new DataFormatException(
46 | Reason.UNEXPECTED_END_OF_STREAM,
47 | "Unexpected end of stream");
48 | }
49 |
50 |
51 | /*---- Method ----*/
52 |
53 | public Reason getReason() {
54 | return reason;
55 | }
56 |
57 |
58 |
59 | /*---- Enumeration ----*/
60 |
61 | public enum Reason {
62 | UNEXPECTED_END_OF_STREAM,
63 | RESERVED_BLOCK_TYPE,
64 | UNCOMPRESSED_BLOCK_LENGTH_MISMATCH,
65 | HUFFMAN_CODE_UNDER_FULL,
66 | HUFFMAN_CODE_OVER_FULL,
67 | NO_PREVIOUS_CODE_LENGTH_TO_COPY,
68 | CODE_LENGTH_CODE_OVER_FULL,
69 | END_OF_BLOCK_CODE_ZERO_LENGTH,
70 | RESERVED_LENGTH_SYMBOL,
71 | RESERVED_DISTANCE_SYMBOL,
72 | LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE,
73 | COPY_FROM_BEFORE_DICTIONARY_START,
74 |
75 | HEADER_CHECKSUM_MISMATCH,
76 | UNSUPPORTED_COMPRESSION_METHOD,
77 | DECOMPRESSED_CHECKSUM_MISMATCH,
78 | DECOMPRESSED_SIZE_MISMATCH,
79 |
80 | GZIP_INVALID_MAGIC_NUMBER,
81 | GZIP_RESERVED_FLAGS_SET,
82 | GZIP_UNSUPPORTED_OPERATING_SYSTEM,
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/DeflaterOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.BufferedOutputStream;
12 | import java.io.IOException;
13 | import java.io.OutputStream;
14 | import java.util.Objects;
15 | import io.nayuki.deflate.comp.BitOutputStream;
16 | import io.nayuki.deflate.comp.Decision;
17 | import io.nayuki.deflate.comp.Lz77Huffman;
18 | import io.nayuki.deflate.comp.Strategy;
19 |
20 |
21 | /**
22 | * Compresses a byte stream into a DEFLATE data stream
23 | * (raw format without zlib or gzip headers or footers).
24 | * Currently only supports uncompressed blocks for simplicity, which actually
25 | * expands the data slightly, but still conforms to the DEFLATE format.
26 | * This class performs its own buffering, so it is unnecessary to wrap a {@link
27 | * BufferedOutputStream} around the {@link OutputStream} given to the constructor.
28 | * @see InflaterInputStream
29 | */
30 | public final class DeflaterOutputStream extends OutputStream {
31 |
32 | private static final int MAX_HISTORY_CAPACITY = 32 * 1024;
33 |
34 |
35 | private OutputStream output;
36 | private BitOut bitOutput;
37 |
38 | // [unused][history (historyLength)][data (dataLength)][unused]
39 | // ^0 ^historyStart combinedBuffer.length^
40 | private byte[] combinedBuffer;
41 | private final int historyLookbehindLimit;
42 | private final int dataLookaheadLimit;
43 | private int historyStart = 0;
44 | private int historyLength = 0;
45 | private int dataLength = 0;
46 |
47 | private final Strategy strategy;
48 |
49 |
50 | public DeflaterOutputStream(OutputStream out) {
51 | this(out, 64 * 1024, MAX_HISTORY_CAPACITY, Lz77Huffman.RLE_DYNAMIC);
52 | }
53 |
54 |
55 | public DeflaterOutputStream(OutputStream out, int dataLookaheadLimit, int historyLookbehindLimit, Strategy strat) {
56 | output = Objects.requireNonNull(out);
57 | bitOutput = new BitOut();
58 | if (dataLookaheadLimit < 1 || historyLookbehindLimit < 0 || historyLookbehindLimit > MAX_HISTORY_CAPACITY
59 | || (long)dataLookaheadLimit + historyLookbehindLimit > Integer.MAX_VALUE)
60 | throw new IllegalArgumentException("Invalid capacities");
61 | combinedBuffer = new byte[historyLookbehindLimit + Math.max(dataLookaheadLimit, historyLookbehindLimit)];
62 | this.historyLookbehindLimit = historyLookbehindLimit;
63 | this.dataLookaheadLimit = dataLookaheadLimit;
64 | strategy = Objects.requireNonNull(strat);
65 | }
66 |
67 |
68 |
69 | OutputStream getUnderlyingStream() {
70 | if (output == null)
71 | throw new IllegalStateException("Stream already closed");
72 | return output;
73 | }
74 |
75 |
76 | @Override public void write(int b) throws IOException {
77 | if (bitOutput == null)
78 | throw new IllegalStateException("Stream already ended");
79 | if (dataLength >= dataLookaheadLimit)
80 | writeBuffer(false);
81 | combinedBuffer[historyStart + historyLength + dataLength] = (byte)b;
82 | dataLength++;
83 | }
84 |
85 |
86 | @Override public void write(byte[] b, int off, int len) throws IOException {
87 | if (bitOutput == null)
88 | throw new IllegalStateException("Stream already ended");
89 | Objects.checkFromIndexSize(off, len, b.length);
90 | while (len > 0) {
91 | if (dataLength >= dataLookaheadLimit)
92 | writeBuffer(false);
93 | int n = Math.min(len, dataLookaheadLimit - dataLength);
94 | System.arraycopy(b, off, combinedBuffer, historyStart + historyLength + dataLength, n);
95 | off += n;
96 | len -= n;
97 | dataLength += n;
98 | }
99 | }
100 |
101 |
102 | public void finish() throws IOException {
103 | if (bitOutput == null)
104 | throw new IllegalStateException("Stream already ended");
105 | writeBuffer(true);
106 | bitOutput.finish();
107 | bitOutput = null;
108 | }
109 |
110 |
111 | @Override public void close() throws IOException {
112 | if (bitOutput != null)
113 | finish();
114 | output.close();
115 | output = null;
116 | }
117 |
118 |
119 | private void writeBuffer(boolean isFinal) throws IOException {
120 | if (bitOutput == null)
121 | throw new IllegalStateException("Stream already ended");
122 |
123 | Decision dec = strategy.decide(combinedBuffer, historyStart, historyLength, dataLength);
124 | dec.compressTo(bitOutput, isFinal);
125 | if (isFinal)
126 | return;
127 |
128 | int dataEnd = historyStart + historyLength + dataLength;
129 | historyLength = Math.min(historyLength + dataLength, historyLookbehindLimit);
130 | dataLength = 0;
131 | if (combinedBuffer.length - dataEnd >= dataLookaheadLimit)
132 | historyStart = dataEnd - historyLength;
133 | else {
134 | System.arraycopy(combinedBuffer, dataEnd - historyLength, combinedBuffer, 0, historyLength);
135 | historyStart = 0;
136 | }
137 | }
138 |
139 |
140 |
141 | private final class BitOut implements BitOutputStream {
142 |
143 | private long bitBuffer = 0;
144 | private int bitBufferLength = 0;
145 |
146 |
147 | @Override public void writeBits(int value, int numBits) throws IOException {
148 | assert 0 <= numBits && numBits <= 31 && value >>> numBits == 0;
149 | if (numBits > 64 - bitBufferLength) {
150 | for (; bitBufferLength >= 8; bitBufferLength -= 8, bitBuffer >>>= 8)
151 | output.write((byte)bitBuffer);
152 | }
153 | assert numBits <= 64 - bitBufferLength;
154 | bitBuffer |= (long)value << bitBufferLength;
155 | bitBufferLength += numBits;
156 | }
157 |
158 |
159 | @Override public int getBitPosition() {
160 | return bitBufferLength % 8;
161 | }
162 |
163 |
164 | public void finish() throws IOException {
165 | writeBits(0, (8 - getBitPosition()) % 8);
166 | for (; bitBufferLength >= 8; bitBufferLength -= 8, bitBuffer >>>= 8)
167 | output.write((byte)bitBuffer);
168 | assert bitBufferLength == 0;
169 | }
170 |
171 | }
172 |
173 | }
174 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/GzipInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.BufferedInputStream;
12 | import java.io.DataInput;
13 | import java.io.DataInputStream;
14 | import java.io.EOFException;
15 | import java.io.IOException;
16 | import java.io.InputStream;
17 | import java.util.Objects;
18 | import java.util.zip.CRC32;
19 | import io.nayuki.deflate.DataFormatException.Reason;
20 |
21 |
22 | public final class GzipInputStream extends InputStream {
23 |
24 | /*---- Fields ----*/
25 |
26 | private InputStream rawInput;
27 | private InputStream decompressedInput;
28 |
29 | private final GzipMetadata metadata;
30 |
31 | private long decompressedLength = 0;
32 | private CRC32 checksum = new CRC32();
33 |
34 |
35 |
36 | /*---- Constructor ----*/
37 |
38 | public GzipInputStream(InputStream in) throws IOException {
39 | Objects.requireNonNull(in);
40 | metadata = GzipMetadata.read(in);
41 | if (!in.markSupported())
42 | in = new BufferedInputStream(in);
43 | rawInput = in;
44 | decompressedInput = new InflaterInputStream(in, true);
45 | }
46 |
47 |
48 |
49 | /*---- Methods ----*/
50 |
51 | public GzipMetadata getMetadata() {
52 | return metadata;
53 | }
54 |
55 |
56 | @Override public int read() throws IOException {
57 | var b = new byte[1];
58 | return switch (read(b)) {
59 | case 1 -> b[0] & 0xFF;
60 | case -1 -> -1; // EOF
61 | default -> throw new AssertionError("Unreachable value");
62 | };
63 | }
64 |
65 |
66 | @Override public int read(byte[] b, int off, int len) throws IOException {
67 | if (decompressedInput == null)
68 | return -1;
69 | int result = decompressedInput.read(b, off, len);
70 | if (result != -1) {
71 | decompressedLength += result;
72 | checksum.update(b, off, result);
73 | } else {
74 | decompressedInput = null;
75 | int expectChecksum, expectLength;
76 | DataInput din = new DataInputStream(rawInput);
77 | try {
78 | expectChecksum = Integer.reverseBytes(din.readInt());
79 | expectLength = Integer.reverseBytes(din.readInt());
80 | } catch (EOFException e) {
81 | throw DataFormatException.throwUnexpectedEnd();
82 | }
83 | if ((int)checksum.getValue() != expectChecksum)
84 | throw new DataFormatException(Reason.DECOMPRESSED_CHECKSUM_MISMATCH, "Decompression CRC-32 mismatch");
85 | checksum = null;
86 | if ((int)decompressedLength != expectLength)
87 | throw new DataFormatException(Reason.DECOMPRESSED_SIZE_MISMATCH, "Decompressed size mismatch");
88 | }
89 | return result;
90 | }
91 |
92 |
93 | @Override public void close() throws IOException {
94 | rawInput.close();
95 | rawInput = null;
96 | decompressedInput = null;
97 | checksum = null;
98 | }
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/GzipMetadata.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.ByteArrayOutputStream;
12 | import java.io.DataInput;
13 | import java.io.DataInputStream;
14 | import java.io.DataOutput;
15 | import java.io.DataOutputStream;
16 | import java.io.EOFException;
17 | import java.io.IOException;
18 | import java.io.InputStream;
19 | import java.io.OutputStream;
20 | import java.nio.charset.StandardCharsets;
21 | import java.util.BitSet;
22 | import java.util.Objects;
23 | import java.util.Optional;
24 | import java.util.zip.CRC32;
25 | import java.util.zip.CheckedInputStream;
26 | import java.util.zip.CheckedOutputStream;
27 | import io.nayuki.deflate.DataFormatException.Reason;
28 |
29 |
30 | public record GzipMetadata(
31 | CompressionMethod compressionMethod,
32 | boolean isFileText,
33 | Optional modificationTimeUnixS,
34 | int extraFlags,
35 | OperatingSystem operatingSystem,
36 | Optional extraField,
37 | Optional fileName,
38 | Optional comment,
39 | boolean hasHeaderCrc) {
40 |
41 |
42 | /*---- Constructor ----*/
43 |
44 | public GzipMetadata {
45 | Objects.requireNonNull(compressionMethod);
46 |
47 | Objects.requireNonNull(modificationTimeUnixS);
48 | modificationTimeUnixS.ifPresent(x -> {
49 | if (x == 0)
50 | throw new IllegalArgumentException("Modification timestamp is zero");
51 | });
52 |
53 | if (extraFlags >>> 8 != 0)
54 | throw new IllegalArgumentException("Invalid extra flags value");
55 |
56 | Objects.requireNonNull(operatingSystem);
57 |
58 | Objects.requireNonNull(extraField);
59 | extraField.ifPresent(b -> {
60 | if (b.length > 0xFFFF)
61 | throw new IllegalArgumentException("Extra field too long");
62 | });
63 |
64 | Objects.requireNonNull(fileName);
65 |
66 | Objects.requireNonNull(comment);
67 | }
68 |
69 |
70 |
71 | /*---- Static factory ----*/
72 |
73 | public static GzipMetadata read(InputStream in) throws IOException {
74 | Objects.requireNonNull(in);
75 | var in1 = new CheckedInputStream(in, new CRC32());
76 | DataInput in2 = new DataInputStream(in1);
77 |
78 | try {
79 | // -- Read and process 10-byte header --
80 | if (in2.readUnsignedShort() != 0x1F8B)
81 | throw new DataFormatException(Reason.GZIP_INVALID_MAGIC_NUMBER, "Invalid GZIP magic number");
82 |
83 | int compMethodInt = in2.readUnsignedByte();
84 | if (compMethodInt != 8)
85 | throw new DataFormatException(Reason.UNSUPPORTED_COMPRESSION_METHOD, "Unsupported compression method: " + compMethodInt);
86 | CompressionMethod compMethod = CompressionMethod.DEFLATE;
87 |
88 | var flagByte = new byte[1];
89 | in2.readFully(flagByte);
90 | BitSet flags = BitSet.valueOf(flagByte);
91 |
92 | // Reserved flags
93 | if (flags.get(5) || flags.get(6) || flags.get(7))
94 | throw new DataFormatException(Reason.GZIP_RESERVED_FLAGS_SET, "Reserved flags are set");
95 |
96 | // Modification time
97 | int modTimeInt = Integer.reverseBytes(in2.readInt());
98 | Optional modTime = modTimeInt != 0 ? Optional.of(modTimeInt) : Optional.empty();
99 |
100 | // Extra flags
101 | int extraFlags = in2.readUnsignedByte();
102 |
103 | // Operating system
104 | int operSystemInt = in2.readUnsignedByte();
105 | OperatingSystem operSystem;
106 | if (operSystemInt < OperatingSystem.UNKNOWN.ordinal())
107 | operSystem = OperatingSystem.values()[operSystemInt];
108 | else if (operSystemInt == 0xFF)
109 | operSystem = OperatingSystem.UNKNOWN;
110 | else
111 | throw new DataFormatException(Reason.GZIP_UNSUPPORTED_OPERATING_SYSTEM, "Unsupported operating system value");
112 |
113 | // -- Handle assorted flags and read more data --
114 | boolean isFileText = flags.get(0);
115 |
116 | Optional extraField = Optional.empty();
117 | if (flags.get(2)) {
118 | int len = Integer.reverseBytes(in2.readShort()) >>> 16;
119 | var b = new byte[len];
120 | in2.readFully(b);
121 | extraField = Optional.of(b);
122 | }
123 |
124 | Optional fileName = Optional.empty();
125 | if (flags.get(3))
126 | fileName = Optional.of(readNullTerminatedString(in2));
127 |
128 | Optional comment = Optional.empty();
129 | if (flags.get(4))
130 | comment = Optional.of(readNullTerminatedString(in2));
131 |
132 | boolean hasHeaderCrc = flags.get(1);
133 | if (hasHeaderCrc) {
134 | int expect = (int)in1.getChecksum().getValue() & 0xFFFF;
135 | int actual = Integer.reverseBytes(in2.readShort()) >>> 16;
136 | if (actual != expect)
137 | throw new DataFormatException(Reason.HEADER_CHECKSUM_MISMATCH, "Header CRC-16 mismatch");
138 | }
139 |
140 | return new GzipMetadata(compMethod, isFileText, modTime, extraFlags,
141 | operSystem, extraField, fileName, comment, hasHeaderCrc);
142 |
143 | } catch (EOFException e) {
144 | throw DataFormatException.throwUnexpectedEnd();
145 | }
146 | }
147 |
148 |
149 | private static String readNullTerminatedString(DataInput in) throws IOException {
150 | var bout = new ByteArrayOutputStream();
151 | while (true) {
152 | byte b = in.readByte();
153 | if (b == 0)
154 | break;
155 | bout.write(b);
156 | }
157 | return new String(bout.toByteArray(), StandardCharsets.ISO_8859_1);
158 | }
159 |
160 |
161 |
162 | /*---- Method ----*/
163 |
164 | public void write(OutputStream out) throws IOException {
165 | Objects.requireNonNull(out);
166 | var out1 = new CheckedOutputStream(out, new CRC32());
167 | DataOutput out2 = new DataOutputStream(out1);
168 |
169 | out2.writeShort(0x1F8B);
170 |
171 | out2.writeByte(switch (compressionMethod) {
172 | case DEFLATE -> 8;
173 | default -> throw new AssertionError("Unreachable value");
174 | });
175 |
176 | boolean[] flags = {
177 | isFileText,
178 | hasHeaderCrc,
179 | extraField.isPresent(),
180 | fileName.isPresent(),
181 | comment.isPresent(),
182 | };
183 | assert flags.length <= 8;
184 | int flagByte = 0;
185 | for (int i = 0; i < flags.length; i++)
186 | flagByte |= (flags[i] ? 1 : 0) << i;
187 | out2.writeByte(flagByte);
188 |
189 | out2.writeInt(Integer.reverseBytes(modificationTimeUnixS.orElse(0)));
190 |
191 | out2.writeByte(extraFlags);
192 |
193 | out2.writeByte(switch (operatingSystem) {
194 | case UNKNOWN -> 0xFF;
195 | default -> operatingSystem.ordinal();
196 | });
197 |
198 | if (extraField.isPresent()) {
199 | byte[] b = extraField.get();
200 | out2.writeShort(Integer.reverseBytes(b.length) >>> 16);
201 | out2.write(b);
202 | }
203 |
204 | if (fileName.isPresent())
205 | out2.write((fileName.get() + "\0").getBytes(StandardCharsets.ISO_8859_1));
206 |
207 | if (comment.isPresent())
208 | out2.write((comment.get() + "\0").getBytes(StandardCharsets.ISO_8859_1));
209 |
210 | if (hasHeaderCrc)
211 | out2.writeShort(Integer.reverseBytes((int)out1.getChecksum().getValue()) >>> 16);
212 | }
213 |
214 |
215 |
216 | /*---- Enumerations ----*/
217 |
218 | public enum CompressionMethod {
219 | DEFLATE,
220 | }
221 |
222 |
223 | public enum OperatingSystem {
224 | FAT_FILESYSTEM,
225 | AMIGA,
226 | VMS,
227 | UNIX,
228 | VM_CMS,
229 | ATARI_TOS,
230 | HPFS_FILESYSTEM,
231 | MACINTOSH,
232 | Z_SYSTEM,
233 | CPM,
234 | TOPS_20,
235 | NTFS_FILESYSTEM,
236 | QDOS,
237 | ACORN_RISCOS,
238 |
239 | UNKNOWN,
240 | }
241 |
242 | }
243 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/GzipOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.DataOutput;
12 | import java.io.DataOutputStream;
13 | import java.io.IOException;
14 | import java.io.OutputStream;
15 | import java.util.Objects;
16 | import java.util.zip.CRC32;
17 |
18 |
19 | public final class GzipOutputStream extends OutputStream {
20 |
21 | /*---- Fields ----*/
22 |
23 | private DeflaterOutputStream output;
24 |
25 | private CRC32 checksum = new CRC32();
26 | private long uncompressedLength = 0;
27 |
28 |
29 |
30 | /*---- Constructors ----*/
31 |
32 | public GzipOutputStream(OutputStream out, GzipMetadata meta) throws IOException {
33 | this(new DeflaterOutputStream(out), meta);
34 | }
35 |
36 |
37 | public GzipOutputStream(DeflaterOutputStream out, GzipMetadata meta) throws IOException {
38 | Objects.requireNonNull(out);
39 | Objects.requireNonNull(meta);
40 | meta.write(out.getUnderlyingStream());
41 | output = out;
42 | }
43 |
44 |
45 |
46 | /*---- Methods ----*/
47 |
48 | @Override public void write(int b) throws IOException {
49 | write(new byte[]{(byte)b});
50 | }
51 |
52 |
53 | @Override public void write(byte[] b, int off, int len) throws IOException {
54 | if (checksum == null)
55 | throw new IllegalStateException("Stream already ended");
56 | output.write(b, off, len);
57 | checksum.update(b, off, len);
58 | uncompressedLength += len;
59 | }
60 |
61 |
62 | public void finish() throws IOException {
63 | if (checksum == null)
64 | throw new IllegalStateException("Stream already ended");
65 | output.finish();
66 | DataOutput dout = new DataOutputStream(output.getUnderlyingStream());
67 | dout.writeInt(Integer.reverseBytes((int)checksum.getValue()));
68 | checksum = null;
69 | dout.writeInt(Integer.reverseBytes((int)uncompressedLength));
70 | }
71 |
72 |
73 | @Override public void close() throws IOException {
74 | if (checksum != null)
75 | finish();
76 | output.close();
77 | output = null;
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/InflaterInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.IOException;
12 | import java.io.InputStream;
13 | import java.util.Objects;
14 | import io.nayuki.deflate.decomp.Closed;
15 | import io.nayuki.deflate.decomp.Open;
16 | import io.nayuki.deflate.decomp.State;
17 | import io.nayuki.deflate.decomp.StickyException;
18 |
19 |
20 | /**
21 | * Decompresses a DEFLATE data stream (raw format without zlib or gzip headers or footers) into
22 | * a byte stream. Objects only use memory and no operating system resources, so it is safe to discard
23 | * these objects without calling {@link #close()} in order to continue using the underlying streams.
24 | * @see DeflaterOutputStream
25 | */
26 | public final class InflaterInputStream extends InputStream {
27 |
28 | /*---- Field ----*/
29 |
30 | private State state;
31 |
32 |
33 |
34 | /*---- Constructors ----*/
35 |
36 | /**
37 | * Constructs an inflater input stream over the specified underlying input stream. The underlying
38 | * stream must contain DEFLATE-compressed data with no headers or footers (e.g. must be unwrapped
39 | * from the zlib or gzip container formats). When this inflater stream reaches the end, the
40 | * underlying stream will be at an unspecified position at or after the end of the DEFLATE data.
41 | * @param in the underlying input stream of raw DEFLATE-compressed data
42 | * @throws NullPointerException if the input stream is {@code null}
43 | */
44 | public InflaterInputStream(InputStream in) {
45 | this(in, false);
46 | }
47 |
48 |
49 | /**
50 | * Constructs an inflater input stream over the specified underlying input stream,
51 | * and with the specified option for ending exactly. The underlying stream must
52 | * contain DEFLATE-compressed data with no headers or footers (e.g. must be unwrapped
53 | * from the zlib or gzip container formats). If ending exactly is requested, then
54 | * the underlying stream must support marking, and when this inflater stream reaches
55 | * the end, the underlying stream will be foremost byte position after the end of the
56 | * DEFLATE data. Otherwise (not ending exactly) when this inflater stream reaches the
57 | * end, the underlying stream will be at an unspecified position at or after the end
58 | * of the DEFLATE data. For end-exactly to be useful, discard this inflater stream
59 | * without calling {@link #close()} so that the underlying stream can still be used.
60 | * @param in the underlying input stream of raw DEFLATE-compressed data
61 | * @param endExactly whether to position the underlying stream at the exact
62 | * position after the end of the DEFLATE data when this inflater stream ends
63 | * @throws NullPointerException if the input stream is {@code null}
64 | * @throws IllegalArgumentException if {@code endExactly
65 | * == true} but {@code in.markSupported() == false}
66 | */
67 | public InflaterInputStream(InputStream in, boolean endExactly) {
68 | this(in, endExactly, DEFAULT_INPUT_BUFFER_SIZE);
69 | }
70 |
71 |
72 | private static final int DEFAULT_INPUT_BUFFER_SIZE = 16 * 1024;
73 |
74 |
75 | /**
76 | * Constructs an inflater input stream over the specified underlying input stream,
77 | * with the specified options for ending exactly and input buffer size. The underlying
78 | * stream must contain DEFLATE-compressed data with no headers or footers (e.g. must
79 | * be unwrapped from the zlib or gzip container formats). If ending exactly is
80 | * requested, then the underlying stream must support marking, and when this inflater
81 | * stream reaches the end, the underlying stream will be foremost byte position after
82 | * the end of the DEFLATE data. Otherwise (not ending exactly) when this inflater
83 | * stream reaches the end, the underlying stream will be at an unspecified position
84 | * at or after the end of the DEFLATE data. For end-exactly to be useful, discard this
85 | * inflater stream without calling {@link #close()} so that the underlying stream can
86 | * still be used.
87 | * @param in the underlying input stream of raw DEFLATE-compressed data (not {@code null})
88 | * @param endExactly whether to position the underlying stream at the exact
89 | * position after the end of the DEFLATE data when this inflater stream ends
90 | * @param inBufLen the size of the internal read buffer, which must be positive
91 | * @throws NullPointerException if the input stream is {@code null}
92 | * @throws IllegalArgumentException if {@code inBufLen < 1}
93 | * @throws IllegalArgumentException if {@code endExactly
94 | * == true} but {@code in.markSupported() == false}
95 | */
96 | public InflaterInputStream(InputStream in, boolean endExactly, int inBufLen) {
97 | Objects.requireNonNull(in);
98 | if (inBufLen <= 0)
99 | throw new IllegalArgumentException("Non-positive input buffer size");
100 | if (endExactly) {
101 | if (!in.markSupported())
102 | throw new IllegalArgumentException("Input stream not markable, cannot support detachment");
103 | in.mark(0);
104 | }
105 | state = new Open(in, endExactly, inBufLen);
106 | }
107 |
108 |
109 |
110 | /*---- Methods ----*/
111 |
112 | /**
113 | * Reads the next byte of decompressed data from this stream. If data is
114 | * available then a number in the range [0, 255] is returned (blocking if
115 | * necessary); otherwise −1 is returned if the end of stream is reached.
116 | * @return the next unsigned byte of data, or −1 for the end of stream
117 | * @throws IOException if an I/O exception occurs in the underlying input stream, the end
118 | * of stream occurs at an unexpected position, or the compressed data has a format error
119 | * @throws IllegalStateException if the stream has already been closed
120 | */
121 | @Override public int read() throws IOException {
122 | // In theory this method for reading a single byte could be implemented somewhat faster.
123 | // We could take the logic of read(byte[],int,int) and simplify it for the special case
124 | // of handling one byte. But if the caller chose to use this read() method instead of
125 | // the bulk read(byte[]) method, then they have already chosen to not care about speed.
126 | // Therefore speeding up this method would result in needless complexity. Instead,
127 | // we chose to optimize this method for simplicity and ease of verifying correctness.
128 | var b = new byte[1];
129 | return switch (read(b)) {
130 | case 1 -> b[0] & 0xFF;
131 | case -1 -> -1; // EOF
132 | default -> throw new AssertionError("Unreachable value");
133 | };
134 | }
135 |
136 |
137 | /**
138 | * Reads some bytes from the decompressed data of this stream into the specified array's
139 | * subrange. This returns the number of data bytes that were stored into the array, and
140 | * is in the range [−1, len]. A return value of 0 is allowed iff {@code len} is 0.
141 | * @throws NullPointerException if the array is {@code null}
142 | * @throws ArrayIndexOutOfBoundsException if the array subrange is out of bounds
143 | * @throws IOException if an I/O exception occurs in the underlying input stream, the end
144 | * of stream occurs at an unexpected position, or the compressed data has a format error
145 | * @throws IllegalStateException if the stream has already been closed
146 | */
147 | @Override public int read(byte[] b, int off, int len) throws IOException {
148 | // Check arguments and state
149 | Objects.requireNonNull(b);
150 | Objects.checkFromIndexSize(off, len, b.length);
151 | if (state instanceof Open st) {
152 | try {
153 | return st.read(b, off, len);
154 | } catch (IOException e) {
155 | state = new StickyException(st.input, e);
156 | throw e;
157 | }
158 | } else if (state instanceof StickyException st)
159 | throw st.exception();
160 | else if (state instanceof Closed)
161 | throw new IllegalStateException("Stream already closed");
162 | else
163 | throw new AssertionError("Unreachable type");
164 | }
165 |
166 |
167 | /**
168 | * Closes this input stream and the underlying stream. It is illegal
169 | * to call {@link #read()} or {@link #detach()} after closing. It is
170 | * idempotent to call this {@link #close()} method more than once.
171 | * @throws IOException if an I/O exception occurs in the underlying stream
172 | */
173 | @Override public void close() throws IOException {
174 | if (state instanceof Open st)
175 | st.close();
176 | else if (state instanceof StickyException st)
177 | st.input().close();
178 | state = Closed.SINGLETON;
179 | }
180 |
181 | }
182 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/MarkableFileInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.File;
12 | import java.io.FileNotFoundException;
13 | import java.io.IOException;
14 | import java.io.InputStream;
15 | import java.io.RandomAccessFile;
16 |
17 |
18 | public final class MarkableFileInputStream extends InputStream {
19 |
20 | private final RandomAccessFile raf;
21 | private long markPosition;
22 |
23 |
24 |
25 | public MarkableFileInputStream(String path) throws FileNotFoundException {
26 | this(new File(path));
27 | }
28 |
29 |
30 | public MarkableFileInputStream(File file) throws FileNotFoundException {
31 | raf = new RandomAccessFile(file, "r");
32 | markPosition = -1;
33 | }
34 |
35 |
36 |
37 | @Override public int read() throws IOException {
38 | return raf.read();
39 | }
40 |
41 |
42 | @Override public int read(byte[] b, int off, int len) throws IOException {
43 | return raf.read(b, off, len);
44 | }
45 |
46 |
47 | @Override public boolean markSupported() {
48 | return true;
49 | }
50 |
51 |
52 | @Override public void mark(int readLimit) {
53 | try {
54 | markPosition = raf.getFilePointer();
55 | } catch (IOException e) {
56 | throw new RuntimeException(e);
57 | }
58 | }
59 |
60 |
61 | // Normally after a reset(), rereading the same file section will yield the same bytes.
62 | // But this is not always true - e.g. due to concurrent writing. Thus this class does not
63 | // provide a hard guarantee for the mark()/reset() behavior like BufferedInputStream does.
64 | @Override public void reset() {
65 | try {
66 | raf.seek(markPosition);
67 | } catch (IOException e) {
68 | throw new RuntimeException(e);
69 | }
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/ZlibInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.BufferedInputStream;
12 | import java.io.DataInputStream;
13 | import java.io.EOFException;
14 | import java.io.IOException;
15 | import java.io.InputStream;
16 | import java.util.Objects;
17 | import java.util.zip.Adler32;
18 | import io.nayuki.deflate.DataFormatException.Reason;
19 |
20 |
21 | public final class ZlibInputStream extends InputStream {
22 |
23 | /*---- Fields ----*/
24 |
25 | private InputStream rawInput;
26 | private InputStream decompressedInput;
27 |
28 | private final ZlibMetadata metadata;
29 |
30 | private Adler32 checksum = new Adler32();
31 |
32 |
33 |
34 | /*---- Constructor ----*/
35 |
36 | public ZlibInputStream(InputStream in) throws IOException {
37 | Objects.requireNonNull(in);
38 | metadata = ZlibMetadata.read(in);
39 | if (!in.markSupported())
40 | in = new BufferedInputStream(in);
41 | rawInput = in;
42 | decompressedInput = new InflaterInputStream(in, true);
43 | }
44 |
45 |
46 |
47 | /*---- Methods ----*/
48 |
49 | public ZlibMetadata getMetadata() {
50 | return metadata;
51 | }
52 |
53 |
54 | @Override public int read() throws IOException {
55 | var b = new byte[1];
56 | return switch (read(b)) {
57 | case 1 -> b[0] & 0xFF;
58 | case -1 -> -1; // EOF
59 | default -> throw new AssertionError("Unreachable value");
60 | };
61 | }
62 |
63 |
64 | @Override public int read(byte[] b, int off, int len) throws IOException {
65 | if (decompressedInput == null)
66 | return -1;
67 | int result = decompressedInput.read(b, off, len);
68 | if (result != -1)
69 | checksum.update(b, off, result);
70 | else {
71 | decompressedInput = null;
72 | int expectChecksum;
73 | try {
74 | expectChecksum = new DataInputStream(rawInput).readInt();
75 | } catch (EOFException e) {
76 | throw DataFormatException.throwUnexpectedEnd();
77 | }
78 | if ((int)checksum.getValue() != expectChecksum)
79 | throw new DataFormatException(Reason.DECOMPRESSED_CHECKSUM_MISMATCH, "Decompression Adler-32 mismatch");
80 | checksum = null;
81 | }
82 | return result;
83 | }
84 |
85 |
86 | @Override public void close() throws IOException {
87 | rawInput.close();
88 | rawInput = null;
89 | decompressedInput = null;
90 | checksum = null;
91 | }
92 |
93 | }
94 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/ZlibMetadata.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.IOException;
12 | import java.io.InputStream;
13 | import java.io.OutputStream;
14 | import java.util.Objects;
15 | import java.util.Optional;
16 | import io.nayuki.deflate.DataFormatException.Reason;
17 |
18 |
19 | public record ZlibMetadata(
20 | CompressionMethod compressionMethod,
21 | int compressionInfo, // Uint4
22 | Optional presetDictionary,
23 | CompressionLevel compressionLevel) {
24 |
25 |
26 | /*---- Constructor ----*/
27 |
28 | public ZlibMetadata {
29 | Objects.requireNonNull(compressionMethod);
30 |
31 | if (compressionInfo >>> 4 != 0 || compressionMethod == CompressionMethod.DEFLATE && compressionInfo > 7)
32 | throw new IllegalArgumentException("Invalid compression info value");
33 |
34 | Objects.requireNonNull(presetDictionary);
35 |
36 | Objects.requireNonNull(compressionLevel);
37 | }
38 |
39 |
40 | public static final ZlibMetadata DEFAULT = new ZlibMetadata(
41 | CompressionMethod.DEFLATE, 7, Optional.empty(), CompressionLevel.DEFAULT);
42 |
43 |
44 |
45 | /*---- Static factory ----*/
46 |
47 | public static ZlibMetadata read(InputStream in) throws IOException {
48 | Objects.requireNonNull(in);
49 | int cmf = in.read();
50 | int flg = in.read();
51 | if (flg == -1)
52 | DataFormatException.throwUnexpectedEnd();
53 | if ((cmf << 8 | flg) % CHECKSUM_MODULUS != 0)
54 | throw new DataFormatException(Reason.HEADER_CHECKSUM_MISMATCH, "Header checksum mismatch");
55 |
56 | int compMethodInt = cmf & 0xF;
57 | CompressionMethod compMethod = switch (compMethodInt) {
58 | case 8 -> CompressionMethod.DEFLATE;
59 | case 15 -> CompressionMethod.RESERVED;
60 | default -> throw new DataFormatException(Reason.UNSUPPORTED_COMPRESSION_METHOD, "Unsupported compression method: " + compMethodInt);
61 | };
62 |
63 | int compInfo = cmf >>> 4;
64 |
65 | Optional presetDict = Optional.empty();
66 | if (((flg >>> 5) & 1) != 0) {
67 | int val = 0;
68 | for (int i = 0; i < 4; i++) {
69 | int b = in.read();
70 | if (b == -1)
71 | DataFormatException.throwUnexpectedEnd();
72 | val = (val << 8) | b;
73 | }
74 | presetDict = Optional.of(val);
75 | }
76 |
77 | CompressionLevel compLevel = CompressionLevel.values()[flg >>> 6];
78 |
79 | return new ZlibMetadata(compMethod, compInfo, presetDict, compLevel);
80 | }
81 |
82 |
83 |
84 | /*---- Method ----*/
85 |
86 | public void write(OutputStream out) throws IOException {
87 | Objects.requireNonNull(out);
88 |
89 | int compMethodInt = switch (compressionMethod) {
90 | case DEFLATE -> 8;
91 | case RESERVED -> 15;
92 | };
93 | int cmf = (compMethodInt << 0) | (compressionInfo << 4);
94 | int flg = ((presetDictionary.isPresent() ? 1 : 0) << 5) | (compressionLevel.ordinal() << 6);
95 | flg |= (CHECKSUM_MODULUS - (cmf << 8 | flg) % CHECKSUM_MODULUS) % CHECKSUM_MODULUS;
96 |
97 | out.write(cmf);
98 | out.write(flg);
99 | if (presetDictionary.isPresent()) {
100 | int val = presetDictionary.get();
101 | for (int i = 3; i >= 0; i--)
102 | out.write(val >>> (i * 8));
103 | }
104 | }
105 |
106 |
107 | private static final int CHECKSUM_MODULUS = 31;
108 |
109 |
110 |
111 | /*---- Enumerations ----*/
112 |
113 | public enum CompressionMethod {
114 | DEFLATE,
115 | RESERVED,
116 | }
117 |
118 |
119 | public enum CompressionLevel {
120 | FASTEST,
121 | FAST,
122 | DEFAULT,
123 | MAXIMUM,
124 | }
125 |
126 | }
127 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/ZlibOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.DataOutput;
12 | import java.io.DataOutputStream;
13 | import java.io.IOException;
14 | import java.io.OutputStream;
15 | import java.util.Objects;
16 | import java.util.zip.Adler32;
17 |
18 |
19 | public final class ZlibOutputStream extends OutputStream {
20 |
21 | /*---- Fields ----*/
22 |
23 | private DeflaterOutputStream output;
24 |
25 | private Adler32 checksum = new Adler32();
26 |
27 |
28 |
29 | /*---- Constructors ----*/
30 |
31 | public ZlibOutputStream(OutputStream out, ZlibMetadata meta) throws IOException {
32 | this(new DeflaterOutputStream(out), meta);
33 | }
34 |
35 |
36 | public ZlibOutputStream(DeflaterOutputStream out, ZlibMetadata meta) throws IOException {
37 | Objects.requireNonNull(out);
38 | Objects.requireNonNull(meta);
39 | meta.write(out.getUnderlyingStream());
40 | output = out;
41 | }
42 |
43 |
44 |
45 | /*---- Methods ----*/
46 |
47 | @Override public void write(int b) throws IOException {
48 | write(new byte[]{(byte)b});
49 | }
50 |
51 |
52 | @Override public void write(byte[] b, int off, int len) throws IOException {
53 | if (checksum == null)
54 | throw new IllegalStateException("Stream already ended");
55 | output.write(b, off, len);
56 | checksum.update(b, off, len);
57 | }
58 |
59 |
60 | public void finish() throws IOException {
61 | if (checksum == null)
62 | throw new IllegalStateException("Stream already ended");
63 | output.finish();
64 | DataOutput dout = new DataOutputStream(output.getUnderlyingStream());
65 | dout.writeInt((int)checksum.getValue());
66 | checksum = null;
67 | }
68 |
69 |
70 | @Override public void close() throws IOException {
71 | if (checksum != null)
72 | finish();
73 | output.close();
74 | output = null;
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/BinarySplit.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 | import java.util.Arrays;
13 | import java.util.Objects;
14 |
15 |
16 | public final class BinarySplit implements Strategy {
17 |
18 | private final Strategy substrategy;
19 | private final int minimumBlockLength;
20 |
21 |
22 | public BinarySplit(Strategy strat, int minBlockLen) {
23 | substrategy = Objects.requireNonNull(strat);
24 | if (minBlockLen < 1)
25 | throw new IllegalArgumentException("Non-positive minimum block length");
26 | minimumBlockLength = minBlockLen;
27 | }
28 |
29 |
30 | public Decision decide(byte[] b, int off, int historyLen, int dataLen) {
31 | return decide(b, off, historyLen, dataLen,
32 | substrategy.decide(b, off, historyLen, dataLen));
33 | }
34 |
35 |
36 | private Decision decide(byte[] b, int off, int historyLen, int dataLen, Decision curDec) {
37 | var subdecisions = new Decision[8][];
38 | Arrays.fill(subdecisions, new Decision[]{curDec});
39 | long[] bitLengths = curDec.getBitLengths().clone();
40 |
41 | int firstHalfLen = (dataLen + 1) / 2;
42 | int secondHalfLen = dataLen - firstHalfLen;
43 | if (Math.min(firstHalfLen, secondHalfLen) > minimumBlockLength) {
44 | Decision[] splitDecs = {
45 | substrategy.decide(b, off, historyLen, firstHalfLen),
46 | substrategy.decide(b, off, historyLen + firstHalfLen, secondHalfLen),
47 | };
48 | boolean improved = false;
49 | for (int i = 0; i < bitLengths.length; i++) {
50 | long bitLen = 0;
51 | for (Decision dec : splitDecs)
52 | bitLen += dec.getBitLengths()[(int)(bitLen % 8)];
53 | improved |= bitLen < bitLengths[i];
54 | }
55 |
56 | if (improved) {
57 | splitDecs[0] = decide(b, off, historyLen, firstHalfLen, splitDecs[0]);
58 | splitDecs[1] = decide(b, off, historyLen + firstHalfLen, secondHalfLen, splitDecs[1]);
59 | }
60 | for (int i = 0; i < bitLengths.length; i++) {
61 | long bitLen = 0;
62 | for (Decision dec : splitDecs)
63 | bitLen += dec.getBitLengths()[(int)(bitLen % 8)];
64 | if (bitLen < bitLengths[i]) {
65 | bitLengths[i] = bitLen;
66 | subdecisions[i] = splitDecs;
67 | }
68 | }
69 | }
70 |
71 | return new Decision() {
72 | @Override public long[] getBitLengths() {
73 | return bitLengths;
74 | }
75 |
76 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException {
77 | Decision[] decs = subdecisions[out.getBitPosition()];
78 | for (int i = 0; i < decs.length; i++)
79 | decs[i].compressTo(out, isFinal && i == decs.length - 1);
80 | }
81 | };
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/BitOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 |
13 |
14 | public interface BitOutputStream {
15 |
16 | public void writeBits(int value, int numBits) throws IOException;
17 |
18 |
19 | public int getBitPosition();
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/CountingBitOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 |
13 |
14 | final class CountingBitOutputStream implements BitOutputStream {
15 |
16 | private long length = 0;
17 |
18 |
19 | @Override public void writeBits(int value, int numBits) throws IOException {
20 | length += numBits;
21 | }
22 |
23 |
24 | @Override public int getBitPosition() {
25 | return (int)length % 8;
26 | }
27 |
28 |
29 | public long getBitLength() {
30 | return length;
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/Decision.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 |
13 |
14 | public interface Decision {
15 |
16 | public long[] getBitLengths();
17 |
18 |
19 | public void compressTo(BitOutputStream out, boolean isFinal) throws IOException;
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/Lz77Huffman.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 | import java.nio.ShortBuffer;
13 | import java.util.ArrayList;
14 | import java.util.Arrays;
15 | import java.util.Collections;
16 | import java.util.Iterator;
17 | import java.util.List;
18 |
19 |
20 | public record Lz77Huffman(
21 | boolean useDynamicHuffmanCodes,
22 | int searchMinimumRunLength,
23 | int searchMaximumRunLength,
24 | int searchMinimumDistance,
25 | int searchMaximumDistance)
26 | implements Strategy {
27 |
28 |
29 | public Lz77Huffman {
30 | int minRun = searchMinimumRunLength;
31 | int maxRun = searchMaximumRunLength;
32 | int minDist = searchMinimumDistance;
33 | int maxDist = searchMaximumDistance;
34 | if (minRun == 0 && maxRun == 0 && minDist == 0 && maxDist == 0);
35 | else if (ABSOLUTE_MINIMUM_RUN_LENGTH <= minRun && minRun <= maxRun && maxRun <= ABSOLUTE_MAXIMUM_RUN_LENGTH &&
36 | ABSOLUTE_MINIMUM_DISTANCE <= minDist && minDist <= maxDist && maxDist <= ABSOLUTE_MAXIMUM_DISTANCE);
37 | else
38 | throw new IllegalArgumentException("Invalid minimum/maximum run-length/distance");
39 | }
40 |
41 |
42 | @Override public Decision decide(byte[] b, int off, int historyLen, int dataLen) {
43 | return new Decision() {
44 | private final long[] bitLengths = new long[8];
45 | {
46 | var temp = new CountingBitOutputStream();
47 | try {
48 | compressTo(temp, false);
49 | } catch (IOException e) {
50 | throw new AssertionError("Caught impossible exception", e);
51 | }
52 | Arrays.fill(bitLengths, temp.getBitLength());
53 | }
54 |
55 |
56 | @Override public long[] getBitLengths() {
57 | return bitLengths;
58 | }
59 |
60 |
61 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException {
62 | int index = off + historyLen;
63 | final int end = index + dataLen;
64 |
65 | var symbolsAndExtraBits = ShortBuffer.allocate(Math.toIntExact(Math.ceilDiv(dataLen * 4L, 3) + 1));
66 | var litLenHistogram = new int[286];
67 | var distHistogram = new int[30];
68 | while (index < end) {
69 | int bestRun = 0;
70 | int bestDist = 0;
71 | for (int dist = searchMinimumDistance, distEnd = Math.min(searchMaximumDistance, index - off); dist <= distEnd && bestRun < searchMaximumRunLength; dist++) {
72 | int run = 0;
73 | int historyIndex = index - dist;
74 | int dataIndex = index;
75 | for (; run < searchMaximumRunLength && dataIndex < end && b[dataIndex] == b[historyIndex]; run++, dataIndex++) {
76 | historyIndex++;
77 | if (historyIndex == index)
78 | historyIndex -= dist;
79 | }
80 | if (run > bestRun || run == bestRun && dist < bestDist) {
81 | bestRun = run;
82 | bestDist = dist;
83 | }
84 | }
85 | if (bestRun == 0 || bestRun < searchMinimumRunLength) {
86 | int sym = b[index] & 0xFF; // Literal
87 | index++;
88 | symbolsAndExtraBits.put((short)(sym << 4));
89 | litLenHistogram[sym]++;
90 | }
91 | else {
92 | {
93 | int r = bestRun - 3;
94 | int numExtra, sym, extra;
95 | if (bestRun < 11) { // Actually works down to run < 7
96 | numExtra = 0;
97 | sym = r + 257;
98 | extra = 0;
99 | } else if (bestRun == 258) {
100 | numExtra = 0;
101 | sym = 285;
102 | extra = 0;
103 | } else {
104 | numExtra = 29 - Integer.numberOfLeadingZeros(r);
105 | sym = (numExtra << 2) + (r >>> numExtra) + 257;
106 | extra = r & ((1 << numExtra) - 1);
107 | }
108 | symbolsAndExtraBits.put((short)(sym << 4 | numExtra));
109 | litLenHistogram[sym]++;
110 | symbolsAndExtraBits.put((short)extra);
111 | }
112 | {
113 | int d = bestDist - 1;
114 | int numExtra, sym, extra;
115 | if (bestDist < 5) { // Actually works down to bestDist < 3
116 | numExtra = 0;
117 | sym = d;
118 | extra = 0;
119 | } else {
120 | numExtra = 30 - Integer.numberOfLeadingZeros(d);
121 | sym = (numExtra << 1) + (d >>> numExtra);
122 | extra = d & ((1 << numExtra) - 1);
123 | }
124 | symbolsAndExtraBits.put((short)(sym << 4 | numExtra));
125 | distHistogram[sym]++;
126 | symbolsAndExtraBits.put((short)extra);
127 | }
128 | index += bestRun;
129 | }
130 | }
131 | symbolsAndExtraBits.put((short)(256 << 4));
132 | litLenHistogram[256]++;
133 |
134 | out.writeBits((isFinal ? 1 : 0), 1); // bfinal
135 | out.writeBits((!useDynamicHuffmanCodes ? 1 : 2), 2); // btype
136 |
137 | int[] litLenCode;
138 | int[] distCode;
139 | if (!useDynamicHuffmanCodes) {
140 | litLenCode = STATIC_LITERAL_LENGTH_CODE;
141 | distCode = STATIC_DISTANCE_CODE;
142 | }
143 | else { // Further histogram processing and dynamic code generation
144 |
145 | {
146 | if (dataLen == 0)
147 | litLenHistogram[0]++; // Dummy value to fill the Huffman code tree
148 | int histoEnd = litLenHistogram.length;
149 | for (; histoEnd > 257 && litLenHistogram[histoEnd - 1] == 0; histoEnd--);
150 | if (histoEnd < litLenHistogram.length)
151 | litLenHistogram = Arrays.copyOf(litLenHistogram, histoEnd);
152 | }
153 | byte[] litLenCodeLen = calcHuffmanCodeLengths(litLenHistogram, 15);
154 |
155 | {
156 | int numDistCodesUsed = 0;
157 | for (int x : distHistogram) {
158 | if (x > 0)
159 | numDistCodesUsed++;
160 | }
161 | if (numDistCodesUsed == 1) {
162 | for (int i = 0; i < distHistogram.length; i++) {
163 | if (distHistogram[i] > 0) {
164 | if (distHistogram.length - i > 1)
165 | distHistogram[i + 1] = 1;
166 | else
167 | distHistogram[i - 1] = 1;
168 | break;
169 | }
170 | }
171 | }
172 | int histoEnd = distHistogram.length;
173 | for (; histoEnd > 1 && distHistogram[histoEnd - 1] == 0; histoEnd--);
174 | if (histoEnd < distHistogram.length)
175 | distHistogram = Arrays.copyOf(distHistogram, histoEnd);
176 | }
177 | byte[] distCodeLen;
178 | if (distHistogram.length == 1 && distHistogram[0] == 0)
179 | distCodeLen = new byte[]{0};
180 | else
181 | distCodeLen = calcHuffmanCodeLengths(distHistogram, 15);
182 |
183 | var codeLens = new byte[litLenCodeLen.length + distCodeLen.length];
184 | System.arraycopy(litLenCodeLen, 0, codeLens, 0, litLenCodeLen.length);
185 | System.arraycopy(distCodeLen, 0, codeLens, litLenCodeLen.length, distCodeLen.length);
186 |
187 | List codeLengthSymbols = new ArrayList<>();
188 | List extraBits = new ArrayList<>();
189 | for (int i = 0; i < codeLens.length; ) { // Greedy algorithm
190 | int val = codeLens[i];
191 | if (val == 0) {
192 | int runLength = 1;
193 | for (; runLength < 138 && i + runLength < codeLens.length
194 | && codeLens[i + runLength] == 0; runLength++);
195 | if (runLength < 3) {
196 | codeLengthSymbols.add(val);
197 | i++;
198 | } else if (runLength < 11) {
199 | codeLengthSymbols.add(17);
200 | extraBits.add(runLength - 3);
201 | i += runLength;
202 | } else if (runLength < 139) {
203 | codeLengthSymbols.add(18);
204 | extraBits.add(runLength - 11);
205 | i += runLength;
206 | } else
207 | throw new AssertionError("Unreachable value");
208 | continue;
209 | }
210 | if (i > 0) {
211 | int runLength = 0;
212 | for (; runLength < 6 && i + runLength < codeLens.length
213 | && codeLens[i + runLength] == codeLens[i - 1]; runLength++);
214 | if (runLength >= 3) {
215 | codeLengthSymbols.add(16);
216 | extraBits.add(runLength - 3);
217 | i += runLength;
218 | continue;
219 | }
220 | }
221 | codeLengthSymbols.add(val);
222 | i++;
223 | }
224 |
225 | var codeLenHistogram = new int[19];
226 | for (int sym : codeLengthSymbols)
227 | codeLenHistogram[sym]++;
228 | byte[] codeLenCodeLen = calcHuffmanCodeLengths(codeLenHistogram, 7);
229 |
230 | var reordered = new int[codeLenCodeLen.length];
231 | for (int i = 0; i < reordered.length; i++)
232 | reordered[i] = codeLenCodeLen[CODE_LENGTH_CODE_ORDER[i]];
233 | int numCodeLenCodeLens = reordered.length;
234 | for (; numCodeLenCodeLens > 4 && reordered[numCodeLenCodeLens - 1] == 0; numCodeLenCodeLens--);
235 |
236 | out.writeBits(litLenCodeLen.length - 257, 5); // hlit
237 | out.writeBits(distCodeLen .length - 1, 5); // hdist
238 | out.writeBits(numCodeLenCodeLens - 4, 4); // hclen
239 |
240 | for (int i = 0; i < numCodeLenCodeLens; i++)
241 | out.writeBits(reordered[i], 3);
242 |
243 | int[] codeLenCode = codeLengthsToCodes(codeLenCodeLen, 7);
244 | Iterator extraBitsIter = extraBits.iterator();
245 | for (int sym : codeLengthSymbols) {
246 | int pair = codeLenCode[sym];
247 | out.writeBits(pair >>> 4, pair & 0xF);
248 | if (sym >= 16) {
249 | out.writeBits(extraBitsIter.next(), switch (sym) {
250 | case 16 -> 2;
251 | case 17 -> 3;
252 | case 18 -> 7;
253 | default -> throw new AssertionError("Unreachable value");
254 | });
255 | }
256 | }
257 | if (extraBitsIter.hasNext())
258 | throw new AssertionError("Unreachable state");
259 |
260 | litLenCode = codeLengthsToCodes(litLenCodeLen, 15);
261 | if (distCodeLen.length == 1 && distCodeLen[0] == 0)
262 | distCode = null;
263 | else
264 | distCode = codeLengthsToCodes(distCodeLen, 15);
265 | }
266 |
267 | symbolsAndExtraBits.flip();
268 | while (symbolsAndExtraBits.hasRemaining()) {
269 | int litLenPair = symbolsAndExtraBits.get();
270 | int litLenSym = litLenPair >>> 4;
271 | assert 0 <= litLenSym && litLenSym <= 285;
272 | int lenNumExtra = litLenPair & 0xF;
273 | int litLenCodePair = litLenCode[litLenSym];
274 | out.writeBits(litLenCodePair >>> 4, litLenCodePair & 0xF);
275 | if (litLenSym > 256) {
276 | out.writeBits(symbolsAndExtraBits.get(), lenNumExtra);
277 | int distPair = symbolsAndExtraBits.get();
278 | int distSym = distPair >>> 4;
279 | assert 0 <= distSym && distSym <= 29;
280 | int distNumExtra = distPair & 0xF;
281 | int distCodePair = distCode[distSym];
282 | out.writeBits(distCodePair >>> 4, distCodePair & 0xF);
283 | out.writeBits(symbolsAndExtraBits.get(), distNumExtra);
284 | }
285 | }
286 | }
287 | };
288 | }
289 |
290 |
291 | public static final int ABSOLUTE_MINIMUM_RUN_LENGTH = 3;
292 | public static final int ABSOLUTE_MAXIMUM_RUN_LENGTH = 258;
293 |
294 | public static final int ABSOLUTE_MINIMUM_DISTANCE = 1;
295 | public static final int ABSOLUTE_MAXIMUM_DISTANCE = 32 * 1024;
296 |
297 |
298 | public static final Lz77Huffman LITERAL_STATIC = new Lz77Huffman(false, 0, 0, 0, 0);
299 | public static final Lz77Huffman LITERAL_DYNAMIC = new Lz77Huffman(true , 0, 0, 0, 0);
300 |
301 | public static final Lz77Huffman RLE_STATIC = new Lz77Huffman(false, ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, 1, 1);
302 | public static final Lz77Huffman RLE_DYNAMIC = new Lz77Huffman(true , ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, 1, 1);
303 |
304 | public static final Lz77Huffman FULL_STATIC = new Lz77Huffman(false, ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, ABSOLUTE_MINIMUM_DISTANCE, ABSOLUTE_MAXIMUM_DISTANCE);
305 | public static final Lz77Huffman FULL_DYNAMIC = new Lz77Huffman(true , ABSOLUTE_MINIMUM_RUN_LENGTH, ABSOLUTE_MAXIMUM_RUN_LENGTH, ABSOLUTE_MINIMUM_DISTANCE, ABSOLUTE_MAXIMUM_DISTANCE);
306 |
307 |
308 |
309 | private static byte[] calcHuffmanCodeLengths(int[] symbolHistogram, int maxLen) {
310 | List leaves = new ArrayList<>();
311 | for (int sym = 0; sym < symbolHistogram.length; sym++) {
312 | int freq = symbolHistogram[sym];
313 | if (freq > 0)
314 | leaves.add(new Leaf(freq, sym));
315 | }
316 |
317 | // Package-merge algorithm
318 | List nodes = new ArrayList<>();
319 | for (int i = 0; i < maxLen; i++) {
320 | nodes.addAll(leaves);
321 | Collections.sort(nodes, (x, y) -> Long.compare(x.frequency(), y.frequency()));
322 | List newNodes = new ArrayList<>();
323 | for (int j = 0; j + 2 <= nodes.size(); j += 2) {
324 | Node a = nodes.get(j + 0);
325 | Node b = nodes.get(j + 1);
326 | newNodes.add(new InternalNode(a.frequency() + b.frequency(), a, b));
327 | }
328 | nodes = newNodes;
329 | }
330 |
331 | var nodeHistogram = new byte[symbolHistogram.length];
332 | for (int i = 0; i < leaves.size() - 1; i++)
333 | nodes.get(i).countOccurrences(nodeHistogram);
334 | return nodeHistogram;
335 | }
336 |
337 |
338 |
339 | private interface Node {
340 |
341 | public long frequency();
342 |
343 | public void countOccurrences(byte[] nodeHistogram);
344 |
345 | }
346 |
347 |
348 | private record InternalNode(long frequency, Node... children) implements Node {
349 |
350 | public void countOccurrences(byte[] nodeHistogram) {
351 | for (Node node : children)
352 | node.countOccurrences(nodeHistogram);
353 | }
354 |
355 | }
356 |
357 |
358 | private record Leaf(long frequency, int symbol) implements Node {
359 |
360 | public void countOccurrences(byte[] nodeHistogram) {
361 | nodeHistogram[symbol]++;
362 | }
363 |
364 | }
365 |
366 |
367 |
368 | private static final int[] CODE_LENGTH_CODE_ORDER =
369 | {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
370 |
371 |
372 | private static int[] codeLengthsToCodes(byte[] codeLengths, int maxCodeLength) {
373 | if (!(1 <= maxCodeLength && maxCodeLength <= 15))
374 | throw new IllegalArgumentException("Invalid maximum code length");
375 | var result = new int[codeLengths.length];
376 | int nextCode = 0;
377 | for (int codeLength = 1; codeLength <= maxCodeLength; codeLength++) {
378 | nextCode <<= 1;
379 | for (int symbol = 0; symbol < codeLengths.length; symbol++) {
380 | if (codeLengths[symbol] != codeLength)
381 | continue;
382 | if (nextCode >>> codeLength != 0)
383 | throw new IllegalArgumentException("This canonical code produces an over-full Huffman code tree");
384 | result[symbol] = Integer.reverse(nextCode) >>> (32 - codeLength) << 4 | codeLength;
385 | nextCode++;
386 | }
387 | }
388 | if (nextCode != 1 << maxCodeLength)
389 | throw new IllegalArgumentException("This canonical code produces an under-full Huffman code tree");
390 | return result;
391 | }
392 |
393 |
394 | private static final int[] STATIC_LITERAL_LENGTH_CODE;
395 | static {
396 | var codeLens = new byte[288];
397 | int i = 0;
398 | for (; i < 144; i++) codeLens[i] = 8;
399 | for (; i < 256; i++) codeLens[i] = 9;
400 | for (; i < 280; i++) codeLens[i] = 7;
401 | for (; i < 288; i++) codeLens[i] = 8;
402 | STATIC_LITERAL_LENGTH_CODE = codeLengthsToCodes(codeLens, 9);
403 | }
404 |
405 | private static final int[] STATIC_DISTANCE_CODE;
406 | static {
407 | var codeLens = new byte[32];
408 | Arrays.fill(codeLens, (byte)5);
409 | STATIC_DISTANCE_CODE = codeLengthsToCodes(codeLens, 5);
410 | }
411 |
412 | }
413 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/MultiStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 | import java.util.Arrays;
13 | import java.util.Objects;
14 |
15 |
16 | public final class MultiStrategy implements Strategy {
17 |
18 | private Strategy[] substrategies;
19 |
20 |
21 | public MultiStrategy(Strategy... strats) {
22 | Objects.requireNonNull(strats);
23 | for (Strategy st : strats)
24 | Objects.requireNonNull(st);
25 | if (strats.length == 0)
26 | throw new IllegalArgumentException("Empty list of strategies");
27 | substrategies = strats;
28 | }
29 |
30 |
31 | public Decision decide(byte[] b, int off, int historyLen, int dataLen) {
32 | var bitLengths = new long[8];
33 | var subdecisions = new Decision[bitLengths.length];
34 | Arrays.fill(bitLengths, Long.MAX_VALUE);
35 | for (Strategy st : substrategies) {
36 | Decision dec = st.decide(b, off, historyLen, dataLen);
37 | long[] bitLens = dec.getBitLengths();
38 | for (int i = 0; i < bitLengths.length; i++) {
39 | if (bitLens[i] < bitLengths[i]) {
40 | bitLengths[i] = bitLens[i];
41 | subdecisions[i] = dec;
42 | }
43 | }
44 | }
45 | for (Decision dec : subdecisions)
46 | Objects.requireNonNull(dec);
47 |
48 | return new Decision() {
49 | @Override public long[] getBitLengths() {
50 | return bitLengths;
51 | }
52 |
53 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException {
54 | subdecisions[out.getBitPosition()].compressTo(out, isFinal);
55 | }
56 | };
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/Strategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 |
12 | public interface Strategy {
13 |
14 | public Decision decide(byte[] b, int off, int historyLen, int dataLen);
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/comp/Uncompressed.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.comp;
10 |
11 | import java.io.IOException;
12 |
13 |
14 | public enum Uncompressed implements Strategy {
15 |
16 | SINGLETON;
17 |
18 |
19 | @Override public Decision decide(byte[] b, int off, int historyLen, int dataLen) {
20 | return new Decision() {
21 | private final long[] bitLengths = new long[8];
22 | {
23 | int numBlocks = Math.max(Math.ceilDiv(dataLen, MAX_BLOCK_LEN), 1);
24 | for (int i = 0; i < bitLengths.length; i++)
25 | bitLengths[i] = dataLen * 8L + numBlocks * 40L + ((13 - i) % 8 - 5);
26 | }
27 |
28 | @Override public long[] getBitLengths() {
29 | return bitLengths;
30 | }
31 |
32 | @Override public void compressTo(BitOutputStream out, boolean isFinal) throws IOException {
33 | int index = off + historyLen;
34 | final int end = index + dataLen;
35 | do {
36 | int n = Math.min(end - index, MAX_BLOCK_LEN);
37 | out.writeBits((isFinal && n == end - index) ? 1 : 0, 1);
38 | out.writeBits(0, 2);
39 | out.writeBits(0, (8 - out.getBitPosition()) % 8);
40 | out.writeBits(n ^ 0x0000, 16);
41 | out.writeBits(n ^ 0xFFFF, 16);
42 | int e = index + n;
43 | for (; index < e; index++)
44 | out.writeBits(b[index] & 0xFF, 8);
45 | } while (index < end);
46 | }
47 | };
48 | }
49 |
50 |
51 | private static final int MAX_BLOCK_LEN = (1 << 16) - 1; // Configurable in the range [1, 65535]
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/decomp/Closed.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.decomp;
10 |
11 |
12 | public enum Closed implements State {
13 | SINGLETON
14 | }
15 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/decomp/Open.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.decomp;
10 |
11 | import java.io.DataInputStream;
12 | import java.io.EOFException;
13 | import java.io.IOException;
14 | import java.io.InputStream;
15 | import java.nio.ByteBuffer;
16 | import java.nio.ByteOrder;
17 | import java.util.Arrays;
18 | import java.util.Optional;
19 | import io.nayuki.deflate.DataFormatException;
20 | import io.nayuki.deflate.DataFormatException.Reason;
21 |
22 |
23 | public final class Open implements State {
24 |
25 | /*---- Fields ----*/
26 |
27 | // The underlying stream to read from
28 | public final InputStream input;
29 |
30 | // Indicates whether mark() should be called when the underlying
31 | // input stream is read, and whether calling detach() is allowed.
32 | private final boolean endExactly;
33 |
34 |
35 | // The typical data flow in this decompressor looks like:
36 | // input (the underlying input stream) -> input.read()
37 | // -> inputBuffer -> getLong()
38 | // -> inputButBuffer1 -> packing logic in readBits()
39 | // -> inputBitBuffer0 -> readBit() or equivalent
40 | // -> Huffman decoding logic for literal and length-distance symbols
41 | // -> LZ77 decoding logic -> dictionary
42 | // -> copying to the caller's array
43 | // -> b (the array passed into this.read(byte[],int,int)).
44 |
45 | // Buffer of bytes read from input.read() (the underlying input stream)
46 | private final ByteBuffer inputBuffer; // Can have any positive length (but longer means less overhead)
47 |
48 | // Buffer of bits packed from the bytes in `inputBuffer`
49 | private long inputBitBuffer0 = 0; // Always in the range [0, 2^inputBitBuffer0Length)
50 | private int inputBitBuffer0Length = 0; // Always in the range [0, 64]
51 |
52 | private long inputBitBuffer1 = 0; // Always in the range [0, 2^inputBitBuffer1Length)
53 | private int inputBitBuffer1Length = 0; // Always in the range [0, 64]
54 |
55 |
56 | private Optional blockDecoder = Optional.empty();
57 |
58 | // Indicates whether a block header with the `bfinal` flag has been seen.
59 | // This starts as false, should eventually become true, and never changes back to false.
60 | private boolean isLastBlock = false;
61 |
62 |
63 | // Buffer of last 32 KiB of decoded data, for LZ77 decompression
64 | private final byte[] dictionary = new byte[DICTIONARY_LENGTH];
65 | private int dictionaryIndex = 0; // Always in the range [0, dictionary.length)
66 | private int dictionaryLength = 0; // Number of bytes written, in the range [0, dictionary.length], saturating at the maximum
67 |
68 |
69 |
70 | /*---- Constructor ----*/
71 |
72 | public Open(InputStream in, boolean endExact, int inBufLen) {
73 | input = in;
74 | endExactly = endExact;
75 | inputBuffer = ByteBuffer.allocate(inBufLen)
76 | .order(ByteOrder.LITTLE_ENDIAN).position(0).limit(0);
77 | }
78 |
79 |
80 |
81 | /*---- Public methods ----*/
82 |
83 | public int read(byte[] b, int off, int len) throws IOException {
84 | int result = 0; // Number of bytes filled in the array `b`
85 | while (result < len) {
86 | if (blockDecoder.isEmpty()) { // Between blocks
87 | if (isLastBlock)
88 | break;
89 |
90 | // Read and process the block header
91 | isLastBlock = readBits(1) == 1;
92 | blockDecoder = Optional.of(switch (readBits(2)) { // Type
93 | case 0 -> new UncompressedBlock();
94 | case 1 -> new HuffmanBlock(false);
95 | case 2 -> new HuffmanBlock(true);
96 | case 3 -> throw new DataFormatException(Reason.RESERVED_BLOCK_TYPE, "Reserved block type");
97 | default -> throw new AssertionError("Unreachable value");
98 | });
99 | }
100 |
101 | BlockDecoder dec = blockDecoder.get();
102 | result += dec.read(b, off + result, len - result);
103 | if (dec.isDone()) {
104 | blockDecoder = Optional.empty();
105 | if (isLastBlock && endExactly)
106 | finish();
107 | }
108 | }
109 | return (result > 0 || blockDecoder.isPresent() || !isLastBlock) ? result : -1;
110 | }
111 |
112 |
113 | private void finish() throws IOException {
114 | // Rewind the underlying stream, then skip over bytes that were already consumed.
115 | // Note that a byte with some bits consumed is considered to be fully consumed.
116 | input.reset();
117 | int skip = inputBuffer.position() - (inputBitBuffer0Length + inputBitBuffer1Length) / 8;
118 | assert skip >= 0;
119 | try {
120 | new DataInputStream(input).skipNBytes(skip);
121 | } catch (EOFException e) {
122 | DataFormatException.throwUnexpectedEnd();
123 | }
124 | }
125 |
126 |
127 | public void close() throws IOException {
128 | input.close();
129 | }
130 |
131 |
132 |
133 | /*---- Private methods ----*/
134 |
135 | // Returns the given number of least significant bits from the bit buffer.
136 | // This updates the bit buffer state and possibly also the byte buffer state.
137 | private int readBits(int numBits) throws IOException {
138 | // Check arguments and invariants
139 | assert 0 <= numBits && numBits <= 16; // Note: DEFLATE uses up to 16, but this method is correct up to 31
140 | assert isBitBufferValid();
141 |
142 | // Ensure there is enough data in the bit buffer to satisfy the request
143 | while (inputBitBuffer0Length < numBits) {
144 | if (inputBitBuffer1Length > 0) {
145 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length);
146 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length;
147 | inputBitBuffer0Length += n;
148 | inputBitBuffer1 >>>= n;
149 | inputBitBuffer1Length -= n;
150 | } else {
151 | if (!inputBuffer.hasRemaining())
152 | fillInputBuffer();
153 |
154 | // Pack as many bytes as possible from input byte buffer into the bit buffer
155 | int numBytes = Math.min((64 - inputBitBuffer0Length) >>> 3, inputBuffer.remaining());
156 | assert 0 <= numBytes && numBytes <= 8;
157 | for (int i = 0; i < numBytes; i++, inputBitBuffer0Length += 8)
158 | inputBitBuffer0 |= (inputBuffer.get() & 0xFFL) << inputBitBuffer0Length;
159 | assert isBitBufferValid();
160 | }
161 | }
162 |
163 | // Extract the bits to return
164 | int result = (int)inputBitBuffer0 & ((1 << numBits) - 1);
165 | assert result >>> numBits == 0;
166 | inputBitBuffer0 >>>= numBits;
167 | inputBitBuffer0Length -= numBits;
168 | assert isBitBufferValid();
169 | return result;
170 | }
171 |
172 |
173 | private boolean isBitBufferValid() {
174 | return 0 <= inputBitBuffer0Length && inputBitBuffer0Length <= 64
175 | && (inputBitBuffer0Length == 64 || inputBitBuffer0 >>> inputBitBuffer0Length == 0);
176 | }
177 |
178 |
179 | // Fills the empty input byte buffer with at least
180 | // one new byte read from the underlying input stream.
181 | private void fillInputBuffer() throws IOException {
182 | assert !inputBuffer.hasRemaining();
183 | if (endExactly)
184 | input.mark(inputBuffer.capacity());
185 | int n = input.read(inputBuffer.array());
186 | if (n == -1)
187 | DataFormatException.throwUnexpectedEnd();
188 | else if (n == 0)
189 | throw new AssertionError("read() returned zero bytes");
190 | else
191 | inputBuffer.position(0).limit(n);
192 | }
193 |
194 |
195 |
196 | /*---- Constants ----*/
197 |
198 | // Must be a power of 2. Do not change this constant value. If the value is decreased, then
199 | // decompression may produce different data that violates the DEFLATE spec (but no crashes).
200 | // If the value is increased, the behavior stays the same but memory is wasted with no benefit.
201 | private static final int DICTIONARY_LENGTH = 32 * 1024;
202 |
203 | // This is why the above must be a power of 2.
204 | private static final int DICTIONARY_MASK = DICTIONARY_LENGTH - 1;
205 |
206 | static {
207 | if (DICTIONARY_LENGTH < 32 * 1024)
208 | throw new AssertionError("Dictionary length shorter than required by the specification");
209 | if (Integer.bitCount(DICTIONARY_LENGTH) != 1)
210 | throw new AssertionError("Dictionary length not a power of 2"); // Required for mask-based modulo calculation
211 | }
212 |
213 |
214 |
215 | /*---- Block decoder types ----*/
216 |
217 | private interface BlockDecoder {
218 |
219 | // Unlike InputStream.read(byte[]), this returns [0, len] but never -1.
220 | public int read(byte[] b, int off, int len) throws IOException;
221 |
222 | public boolean isDone();
223 |
224 | }
225 |
226 |
227 | private final class UncompressedBlock implements BlockDecoder {
228 |
229 | private int numRemainingBytes; // Non-negative
230 |
231 |
232 | public UncompressedBlock() throws IOException {
233 | // Discard bits to align to byte
234 | readBits((inputBitBuffer0Length + inputBitBuffer1Length) % 8);
235 | assert (inputBitBuffer0Length + inputBitBuffer1Length) % 8 == 0;
236 |
237 | numRemainingBytes = readBits(16);
238 | assert 0x0000 <= numRemainingBytes && numRemainingBytes <= 0xFFFF;
239 | if (numRemainingBytes != (readBits(16) ^ 0xFFFF))
240 | throw new DataFormatException(Reason.UNCOMPRESSED_BLOCK_LENGTH_MISMATCH, "len/nlen mismatch in uncompressed block");
241 | }
242 |
243 |
244 | public int read(byte[] b, final int off, int len) throws IOException {
245 | if (numRemainingBytes < 0)
246 | throw new AssertionError("Unreachable state");
247 |
248 | // Check bit buffer invariants
249 | assert isBitBufferValid();
250 | assert (inputBitBuffer0Length + inputBitBuffer1Length) % 8 == 0;
251 |
252 | len = Math.min(numRemainingBytes, len);
253 | numRemainingBytes -= len;
254 | int index = off;
255 | final int end = off + len;
256 | assert off <= end && end <= b.length;
257 |
258 | // First unpack saved bits
259 | for (; inputBitBuffer0Length + inputBitBuffer1Length >= 8 && index < end; index++)
260 | b[index] = (byte)readBits(8);
261 |
262 | // Copy from input buffer
263 | {
264 | int n = Math.min(end - index, inputBuffer.remaining());
265 | assert inputBitBuffer0Length + inputBitBuffer1Length == 0 || n == 0;
266 | inputBuffer.get(b, index, n);
267 | index += n;
268 | }
269 |
270 | // Read directly from input stream, bypassing the input buffer
271 | if (index < end) {
272 | assert inputBitBuffer0Length + inputBitBuffer1Length == 0 && !inputBuffer.hasRemaining();
273 | if (endExactly) {
274 | inputBuffer.position(0).limit(0);
275 | input.mark(0);
276 | }
277 | do {
278 | int n = input.read(b, index, end - index);
279 | if (n == -1)
280 | DataFormatException.throwUnexpectedEnd();
281 | index += n;
282 | } while (index < end);
283 | if (endExactly)
284 | input.mark(0);
285 | }
286 |
287 | // Copy output bytes to dictionary
288 | for (index = off; index < end; ) {
289 | int n = Math.min(end - index, dictionary.length - dictionaryIndex);
290 | System.arraycopy(b, index, dictionary, dictionaryIndex, n);
291 | index += n;
292 | dictionaryIndex = (dictionaryIndex + n) & DICTIONARY_MASK;
293 | }
294 | dictionaryLength += Math.min(len, dictionary.length - dictionaryLength);
295 |
296 | return len;
297 | }
298 |
299 |
300 | public boolean isDone() {
301 | if (numRemainingBytes < 0)
302 | throw new AssertionError("Unreachable state");
303 | return numRemainingBytes == 0;
304 | }
305 |
306 | }
307 |
308 |
309 |
310 | private final class HuffmanBlock implements BlockDecoder {
311 |
312 | private final short[] literalLengthCodeTree; // Not null
313 | private final short[] literalLengthCodeTable; // Derived from literalLengthCodeTree; not null
314 | private final short[] distanceCodeTree; // Can be null
315 | private final short[] distanceCodeTable; // Derived from distanceCodeTree; same nullness
316 | private final int maxBitsPerIteration; // In the range [1, 48]
317 |
318 | private int numPendingOutputBytes = 0; // Always in the range [0, MAX_RUN_LENGTH-1]
319 | private boolean isDone = false;
320 |
321 |
322 | public HuffmanBlock(boolean dynamic) throws IOException {
323 | if (!dynamic) {
324 | literalLengthCodeTree = FIXED_LITERAL_LENGTH_CODE_TREE;
325 | literalLengthCodeTable = FIXED_LITERAL_LENGTH_CODE_TABLE;
326 | distanceCodeTree = FIXED_DISTANCE_CODE_TREE;
327 | distanceCodeTable = FIXED_DISTANCE_CODE_TABLE;
328 | maxBitsPerIteration = 9 + 5 + 5 + 13;
329 | }
330 | else {
331 | // Read the current block's dynamic Huffman code tables from from the input
332 | // buffers/stream, process the code lengths and computes the code trees, and
333 | // ultimately set just the variables {literalLengthCodeTree, literalLengthCodeTable,
334 | // distanceCodeTree, distanceCodeTable}. This might throw an IOException for actual I/O
335 | // exceptions, unexpected end of stream, or a description of an invalid Huffman code.
336 | int numLitLenCodes = readBits(5) + 257; // hlit + 257
337 | int numDistCodes = readBits(5) + 1; // hdist + 1
338 |
339 | // Read the code length code lengths
340 | int numCodeLenCodes = readBits(4) + 4; // hclen + 4
341 | var codeLenCodeLen = new byte[CODE_LENGTH_CODE_ORDER.length];
342 | for (int i = 0; i < numCodeLenCodes; i++) // Fill array in strange order
343 | codeLenCodeLen[CODE_LENGTH_CODE_ORDER[i]] = (byte)readBits(3);
344 | short[] codeLenCodeTree = codeLengthsToCodeTree(codeLenCodeLen);
345 |
346 | // Read the main code lengths and handle runs
347 | var codeLens = new byte[numLitLenCodes + numDistCodes];
348 | byte runVal = -1;
349 | for (int i = 0; i < codeLens.length; ) {
350 | int sym = decodeSymbol(codeLenCodeTree);
351 | assert 0 <= sym && sym < codeLenCodeLen.length;
352 | if (sym < 16) {
353 | runVal = (byte)sym;
354 | codeLens[i] = runVal;
355 | i++;
356 | } else {
357 | int runLen = switch (sym) {
358 | case 16 -> {
359 | if (runVal == -1)
360 | throw new DataFormatException(Reason.NO_PREVIOUS_CODE_LENGTH_TO_COPY, "No code length value to copy");
361 | yield readBits(2) + 3;
362 | }
363 | case 17 -> {
364 | runVal = 0;
365 | yield readBits(3) + 3;
366 | }
367 | case 18 -> {
368 | runVal = 0;
369 | yield readBits(7) + 11;
370 | }
371 | default -> throw new AssertionError("Unreachable value");
372 | };
373 | for (; runLen > 0; runLen--, i++) {
374 | if (i >= codeLens.length)
375 | throw new DataFormatException(Reason.CODE_LENGTH_CODE_OVER_FULL, "Run exceeds number of codes");
376 | codeLens[i] = runVal;
377 | }
378 | }
379 | }
380 |
381 | // Create literal-length code tree
382 | byte[] litLenCodeLen = Arrays.copyOf(codeLens, numLitLenCodes);
383 | if (litLenCodeLen[256] == 0)
384 | throw new DataFormatException(Reason.END_OF_BLOCK_CODE_ZERO_LENGTH, "End-of-block symbol has zero code length");
385 | literalLengthCodeTree = codeLengthsToCodeTree(litLenCodeLen);
386 | literalLengthCodeTable = codeTreeToCodeTable(literalLengthCodeTree);
387 | int maxBitsPerLitLen = 0;
388 | for (int sym = 0; sym < litLenCodeLen.length; sym++) {
389 | int numBits = litLenCodeLen[sym];
390 | if (sym >= 257 && numBits > 0)
391 | numBits += RUN_LENGTH_TABLE[sym - 257] & 0x7; // Extra bits
392 | maxBitsPerLitLen = Math.max(numBits, maxBitsPerLitLen);
393 | }
394 |
395 | // Create distance code tree with some extra processing
396 | byte[] distCodeLen = Arrays.copyOfRange(codeLens, numLitLenCodes, codeLens.length);
397 | int maxBitsPerDist = 0;
398 | if (distCodeLen.length == 1 && distCodeLen[0] == 0) {
399 | // Empty distance code; the block shall be all literal symbols
400 | distanceCodeTree = null;
401 | distanceCodeTable = null;
402 | } else {
403 | for (int sym = 0; sym < distCodeLen.length; sym++) {
404 | int numBits = distCodeLen[sym];
405 | if (numBits > 0 && sym < DISTANCE_TABLE.length)
406 | numBits += DISTANCE_TABLE[sym] & 0xF; // Extra bits
407 | maxBitsPerDist = Math.max(numBits, maxBitsPerDist);
408 | }
409 |
410 | // Get statistics for upcoming logic
411 | int oneCount = 0;
412 | int otherPositiveCount = 0;
413 | for (byte x : distCodeLen) {
414 | if (x == 1)
415 | oneCount++;
416 | else if (x > 1)
417 | otherPositiveCount++;
418 | }
419 |
420 | // Handle the case where only one distance code is defined
421 | if (oneCount == 1 && otherPositiveCount == 0) {
422 | // Add a dummy invalid code to make the Huffman tree complete
423 | distCodeLen = Arrays.copyOf(distCodeLen, 32);
424 | distCodeLen[31] = 1;
425 | }
426 | distanceCodeTree = codeLengthsToCodeTree(distCodeLen);
427 | distanceCodeTable = codeTreeToCodeTable(distanceCodeTree);
428 | }
429 |
430 | maxBitsPerIteration = maxBitsPerLitLen + maxBitsPerDist;
431 | }
432 |
433 | if (!(1 <= maxBitsPerIteration && maxBitsPerIteration <= 48))
434 | throw new AssertionError("Unreachable value");
435 | }
436 |
437 |
438 | public int read(byte[] b, final int off, final int len) throws IOException {
439 | int index = off;
440 | final int end = off + len;
441 | assert off <= end && end <= b.length;
442 |
443 | for (; numPendingOutputBytes > 0 && index < end; numPendingOutputBytes--, index++)
444 | b[index] = dictionary[(dictionaryIndex - numPendingOutputBytes) & DICTIONARY_MASK];
445 |
446 | while (index < end) {
447 | assert numPendingOutputBytes == 0;
448 | assert isBitBufferValid();
449 |
450 | // Try to fill the input bit buffer (somewhat similar to logic in readBits())
451 | if (inputBitBuffer0Length < maxBitsPerIteration) {
452 | if (inputBitBuffer1Length > 0) {
453 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length);
454 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length;
455 | inputBitBuffer0Length += n;
456 | inputBitBuffer1 >>>= n;
457 | inputBitBuffer1Length -= n;
458 | }
459 | if (inputBitBuffer0Length < maxBitsPerIteration) {
460 | assert inputBitBuffer1Length == 0;
461 | if (inputBuffer.remaining() >= 8) {
462 | inputBitBuffer1 = inputBuffer.getLong();
463 | inputBitBuffer1Length = 64;
464 | int n = Math.min(64 - inputBitBuffer0Length, inputBitBuffer1Length);
465 | inputBitBuffer0 |= inputBitBuffer1 << inputBitBuffer0Length;
466 | inputBitBuffer0Length += n;
467 | inputBitBuffer1 >>>= n;
468 | inputBitBuffer1Length -= n;
469 | } else {
470 | for (; inputBitBuffer0Length <= 56 && inputBuffer.hasRemaining(); inputBitBuffer0Length += 8)
471 | inputBitBuffer0 |= (inputBuffer.get() & 0xFFL) << inputBitBuffer0Length;
472 | }
473 | }
474 | assert isBitBufferValid();
475 | }
476 |
477 | int run, dist;
478 |
479 | if (inputBitBuffer0Length >= maxBitsPerIteration) { // Fast path entirely from bit buffer
480 | // Decode next literal/length symbol (a customized version of decodeSymbol())
481 | final int sym;
482 | {
483 | int temp = literalLengthCodeTable[(int)inputBitBuffer0 & CODE_TABLE_MASK];
484 | int consumed = temp & 0xF;
485 | inputBitBuffer0 >>>= consumed;
486 | inputBitBuffer0Length -= consumed;
487 | int node = temp >> 4;
488 | while (node >= 0) {
489 | node = literalLengthCodeTree[node + ((int)inputBitBuffer0 & 1)];
490 | inputBitBuffer0 >>>= 1;
491 | inputBitBuffer0Length--;
492 | }
493 | sym = ~node;
494 | assert isBitBufferValid();
495 | }
496 |
497 | // Handle the symbol by ranges
498 | assert 0 <= sym && sym <= 287;
499 | if (sym < 256) { // Literal byte
500 | b[index] = (byte)sym;
501 | index++;
502 | dictionary[dictionaryIndex] = (byte)sym;
503 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
504 | if (dictionaryLength < dictionary.length)
505 | dictionaryLength++;
506 | continue;
507 |
508 | } else if (sym > 256) { // Length and distance for copying
509 | // Decode the run length (a customized version of decodeRunLength())
510 | assert 257 <= sym && sym <= 287;
511 | {
512 | int temp;
513 | try {
514 | temp = RUN_LENGTH_TABLE[sym - 257];
515 | } catch (ArrayIndexOutOfBoundsException e) {
516 | throw new DataFormatException(Reason.RESERVED_LENGTH_SYMBOL, "Reserved run length symbol: " + sym);
517 | }
518 | run = temp >>> 3;
519 | int numExtraBits = temp & 7;
520 | run += (int)inputBitBuffer0 & ((1 << numExtraBits) - 1);
521 | inputBitBuffer0 >>>= numExtraBits;
522 | inputBitBuffer0Length -= numExtraBits;
523 | }
524 |
525 | // Decode next distance symbol (a customized version of decodeSymbol())
526 | if (distanceCodeTree == null)
527 | throw new DataFormatException(Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE, "Length symbol encountered with empty distance code");
528 | final int distSym;
529 | {
530 | int temp = distanceCodeTable[(int)inputBitBuffer0 & CODE_TABLE_MASK];
531 | int consumed = temp & 0xF;
532 | inputBitBuffer0 >>>= consumed;
533 | inputBitBuffer0Length -= consumed;
534 | int node = temp >> 4;
535 | while (node >= 0) {
536 | node = distanceCodeTree[node + ((int)inputBitBuffer0 & 1)];
537 | inputBitBuffer0 >>>= 1;
538 | inputBitBuffer0Length--;
539 | }
540 | distSym = ~node;
541 | }
542 |
543 | // Decode the distance (a customized version of decodeDistance())
544 | assert 0 <= distSym && distSym <= 31;
545 | {
546 | int temp;
547 | try {
548 | temp = DISTANCE_TABLE[distSym];
549 | } catch (ArrayIndexOutOfBoundsException e) {
550 | throw new DataFormatException(Reason.RESERVED_DISTANCE_SYMBOL, "Reserved distance symbol: " + distSym);
551 | }
552 | dist = temp >>> 4;
553 | int numExtraBits = temp & 0xF;
554 | dist += (int)inputBitBuffer0 & ((1 << numExtraBits) - 1);
555 | inputBitBuffer0 >>>= numExtraBits;
556 | inputBitBuffer0Length -= numExtraBits;
557 | }
558 | assert isBitBufferValid();
559 |
560 | } else { // sym == 256, end of block
561 | isDone = true;
562 | break;
563 | }
564 |
565 | } else { // General case (always correct), when not enough bits in buffer to guarantee reading
566 | int sym = decodeSymbol(literalLengthCodeTree);
567 | assert 0 <= sym && sym <= 287;
568 | if (sym < 256) { // Literal byte
569 | b[index] = (byte)sym;
570 | index++;
571 | dictionary[dictionaryIndex] = (byte)sym;
572 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
573 | if (dictionaryLength < dictionary.length)
574 | dictionaryLength++;
575 | continue;
576 | } else if (sym > 256) { // Length and distance for copying
577 | run = decodeRunLength(sym);
578 | if (distanceCodeTree == null)
579 | throw new DataFormatException(Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE, "Length symbol encountered with empty distance code");
580 | int distSym = decodeSymbol(distanceCodeTree);
581 | assert 0 <= distSym && distSym <= 31;
582 | dist = decodeDistance(distSym);
583 | } else { // sym == 256, end of block
584 | isDone = true;
585 | break;
586 | }
587 | }
588 |
589 | // Copy bytes to output and dictionary
590 | assert 3 <= run && run <= MAX_RUN_LENGTH;
591 | assert 1 <= dist && dist <= 32768;
592 | if (dist > dictionaryLength)
593 | throw new DataFormatException(Reason.COPY_FROM_BEFORE_DICTIONARY_START, "Attempting to copy from before start of dictionary");
594 | int dictReadIndex = (dictionaryIndex - dist) & DICTIONARY_MASK;
595 | if (run <= end - index) { // Nice case with less branching
596 | for (int i = 0; i < run; i++) {
597 | byte bb = dictionary[dictReadIndex];
598 | dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK;
599 | dictionary[dictionaryIndex] = bb;
600 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
601 | b[index] = bb;
602 | index++;
603 | }
604 | } else { // General case
605 | for (int i = 0; i < run; i++) {
606 | byte bb = dictionary[dictReadIndex];
607 | dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK;
608 | dictionary[dictionaryIndex] = bb;
609 | dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
610 | if (index < end) {
611 | b[index] = bb;
612 | index++;
613 | } else
614 | numPendingOutputBytes++;
615 | }
616 | }
617 | dictionaryLength += Math.min(run, dictionary.length - dictionaryLength);
618 | }
619 | return index - off;
620 | }
621 |
622 |
623 | public boolean isDone() {
624 | return numPendingOutputBytes == 0 && isDone;
625 | }
626 |
627 |
628 | /*---- Huffman coding methods ----*/
629 |
630 | // Reads bits from the input buffers/stream and uses the given code tree to
631 | // decode the next symbol. The returned symbol value is a non-negative integer.
632 | // This throws an IOException if the end of stream is reached before a symbol
633 | // is decoded, or if the underlying stream experiences an I/O exception.
634 | private int decodeSymbol(short[] codeTree) throws IOException {
635 | int node = 0; // An index into the codeTree array which signifies the current tree node
636 | while (node >= 0) {
637 | if (inputBitBuffer0Length > 0) { // Medium path using buffered bits
638 | node = codeTree[node + ((int)inputBitBuffer0 & 1)];
639 | inputBitBuffer0 >>>= 1;
640 | inputBitBuffer0Length--;
641 | } else // Slow path with potential I/O operations
642 | node = codeTree[node + readBits(1)];
643 | }
644 | assert isBitBufferValid();
645 | return ~node; // Symbol was encoded as bitwise complement
646 | }
647 |
648 |
649 | // Takes the given run length symbol in the range [257, 287], possibly
650 | // reads some more input bits, and returns a number in the range [3, 258].
651 | // This throws an IOException if bits needed to be read but the end of
652 | // stream was reached or the underlying stream experienced an I/O exception.
653 | private int decodeRunLength(int sym) throws IOException {
654 | assert 257 <= sym && sym <= 287;
655 | try {
656 | int temp = RUN_LENGTH_TABLE[sym - 257];
657 | return (temp >>> 3) + readBits(temp & 7);
658 | } catch (ArrayIndexOutOfBoundsException e) {
659 | throw new DataFormatException(Reason.RESERVED_LENGTH_SYMBOL, "Reserved run length symbol: " + sym);
660 | }
661 | }
662 |
663 |
664 | // Takes the given distance symbol in the range [0, 31], possibly reads
665 | // some more input bits, and returns a number in the range [1, 32768].
666 | // This throws an IOException if bits needed to be read but the end of
667 | // stream was reached or the underlying stream experienced an I/O exception.
668 | private int decodeDistance(int sym) throws IOException {
669 | assert 0 <= sym && sym <= 31;
670 | try {
671 | int temp = DISTANCE_TABLE[sym];
672 | return (temp >>> 4) + readBits(temp & 0xF);
673 | } catch (ArrayIndexOutOfBoundsException e) {
674 | throw new DataFormatException(Reason.RESERVED_DISTANCE_SYMBOL, "Reserved distance symbol: " + sym);
675 | }
676 | }
677 |
678 |
679 | /*
680 | * Converts the given array of symbol code lengths into a canonical code tree.
681 | * A symbol code length is either zero (absent from the tree) or a positive integer.
682 | *
683 | * A code tree is an array of integers, where each pair represents a node.
684 | * Each pair is adjacent and starts on an even index. The earlier element of
685 | * the pair represents the left child and the later element represents the
686 | * right child. The root node is at index 0. If an element is non-negative,
687 | * then it is the index of the child node in the array. Otherwise it is the
688 | * bitwise complement of the leaf symbol. This tree is used in decodeSymbol()
689 | * and codeTreeToCodeTable(). Not every element of the array needs to be
690 | * used, nor do used elements need to be contiguous.
691 | *
692 | * For example, this Huffman tree:
693 | * /\
694 | * 0 1
695 | * / \
696 | * /\ 'c'
697 | * 0 1
698 | * / \
699 | * 'a' 'b'
700 | * is serialized as this array:
701 | * [2, ~'c', ~'a', ~'b']
702 | * because the root is located at index 0 and
703 | * the other internal node is located at index 2.
704 | */
705 | private static short[] codeLengthsToCodeTree(byte[] codeLengths) throws DataFormatException {
706 | var codeLengthsAndSymbols = new short[codeLengths.length];
707 | for (int i = 0; i < codeLengths.length; i++) {
708 | byte cl = codeLengths[i];
709 | if (cl < 0)
710 | throw new IllegalArgumentException("Negative code length");
711 | if (cl > 15)
712 | throw new AssertionError("Maximum code length exceeds DEFLATE specification");
713 | int pair = cl << 11 | i; // uint15
714 | assert pair >>> 15 == 0;
715 | codeLengthsAndSymbols[i] = (short)pair;
716 | }
717 | Arrays.sort(codeLengthsAndSymbols);
718 |
719 | int codeLenSymIndex = 0;
720 | // Skip unused symbols (code length 0)
721 | while (codeLenSymIndex < codeLengthsAndSymbols.length && codeLengthsAndSymbols[codeLenSymIndex] >>> 11 == 0)
722 | codeLenSymIndex++;
723 |
724 | int numCodes = codeLengthsAndSymbols.length - codeLenSymIndex;
725 | if (numCodes < 2)
726 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree");
727 | if (numCodes > 16385) // Because some indexes would overflow int16
728 | throw new IllegalArgumentException("Too many codes");
729 |
730 | var result = new short[(numCodes - 1) * 2];
731 | int resultNext = 0;
732 | int resultEnd = 2; // Start with root node already allocated; always even
733 | int curCodeLen = 1;
734 | for (; codeLenSymIndex < codeLengthsAndSymbols.length; codeLenSymIndex++) {
735 | int pair = codeLengthsAndSymbols[codeLenSymIndex];
736 | for (int codeLen = pair >>> 11; curCodeLen < codeLen; curCodeLen++) {
737 | // Double every open slot
738 | for (int end = resultEnd; resultNext < end; resultNext++) {
739 | if (resultEnd >= result.length)
740 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree");
741 | result[resultNext] = (short)resultEnd;
742 | resultEnd += 2;
743 | }
744 | }
745 | if (resultNext >= resultEnd)
746 | throw new DataFormatException(Reason.HUFFMAN_CODE_OVER_FULL, "This canonical code produces an over-full Huffman code tree");
747 | int symbol = pair & ((1 << 11) - 1);
748 | result[resultNext] = (short)~symbol;
749 | resultNext++;
750 | }
751 | if (resultEnd != result.length)
752 | throw new AssertionError("Unreachable state");
753 | if (resultNext < resultEnd)
754 | throw new DataFormatException(Reason.HUFFMAN_CODE_UNDER_FULL, "This canonical code produces an under-full Huffman code tree");
755 | return result;
756 | }
757 |
758 |
759 | /*
760 | * Converts a code tree array into a fast look-up table that consumes up to
761 | * CODE_TABLE_BITS at once. Each entry i in the table encodes the result of
762 | * decoding starting from the root and consuming the bits of i starting from
763 | * the lowest-order bits.
764 | *
765 | * Each array element encodes (node << 4) | numBitsConsumed, where:
766 | * - numBitsConsumed is a 4-bit unsigned integer in the range [1, CODE_TABLE_BITS].
767 | * - node is an 12-bit signed integer representing either the current node
768 | * (which is a non-negative number) after consuming all the available bits
769 | * from i, or the bitwise complement of the decoded symbol (so it's negative).
770 | */
771 | private static short[] codeTreeToCodeTable(short[] codeTree) {
772 | assert 1 <= CODE_TABLE_BITS && CODE_TABLE_BITS <= 15;
773 | var result = new short[1 << CODE_TABLE_BITS];
774 | for (int i = 0; i < result.length; i++) {
775 | // Simulate decodeSymbol() using the bits of i
776 | int node = 0;
777 | int consumed = 0;
778 | do {
779 | assert node % 2 == 0;
780 | node = codeTree[node + ((i >>> consumed) & 1)];
781 | consumed++;
782 | } while (node >= 0 && consumed < CODE_TABLE_BITS);
783 |
784 | assert 1 <= consumed && consumed <= 15; // uint4
785 | assert -2048 <= node && node <= 2047; // int12
786 | result[i] = (short)(node << 4 | consumed);
787 | }
788 | return result;
789 | }
790 |
791 |
792 | /*---- Constants and tables ----*/
793 |
794 | private static final int[] CODE_LENGTH_CODE_ORDER =
795 | {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
796 |
797 | private static final short[] FIXED_LITERAL_LENGTH_CODE_TREE;
798 | private static final short[] FIXED_LITERAL_LENGTH_CODE_TABLE;
799 | private static final short[] FIXED_DISTANCE_CODE_TREE;
800 | private static final short[] FIXED_DISTANCE_CODE_TABLE;
801 |
802 | // Any integer from 1 to 15 is valid. Affects speed but produces same output.
803 | private static final int CODE_TABLE_BITS = 9;
804 | private static final int CODE_TABLE_MASK = (1 << CODE_TABLE_BITS) - 1;
805 |
806 | static {
807 | if (!(1 <= CODE_TABLE_BITS && CODE_TABLE_BITS <= 15))
808 | throw new AssertionError("Value out of range");
809 | }
810 |
811 |
812 | static {
813 | var llcodelens = new byte[288];
814 | Arrays.fill(llcodelens, 0, 144, (byte)8);
815 | Arrays.fill(llcodelens, 144, 256, (byte)9);
816 | Arrays.fill(llcodelens, 256, 280, (byte)7);
817 | Arrays.fill(llcodelens, 280, 288, (byte)8);
818 |
819 | var distcodelens = new byte[32];
820 | Arrays.fill(distcodelens, (byte)5);
821 |
822 | try {
823 | FIXED_LITERAL_LENGTH_CODE_TREE = codeLengthsToCodeTree(llcodelens);
824 | FIXED_DISTANCE_CODE_TREE = codeLengthsToCodeTree(distcodelens);
825 | } catch (DataFormatException e) {
826 | throw new AssertionError(e);
827 | }
828 | FIXED_LITERAL_LENGTH_CODE_TABLE = codeTreeToCodeTable(FIXED_LITERAL_LENGTH_CODE_TREE);
829 | FIXED_DISTANCE_CODE_TABLE = codeTreeToCodeTable(FIXED_DISTANCE_CODE_TREE);
830 | }
831 |
832 |
833 | private static final int MAX_RUN_LENGTH = 258; // Required by the specification, do not modify
834 |
835 | static {
836 | if (MAX_RUN_LENGTH - 1 > DICTIONARY_LENGTH)
837 | throw new AssertionError("Cannot guarantee all pending run bytes can be buffered in dictionary");
838 | }
839 |
840 |
841 | // For length symbols from 257 to 285 (inclusive). RUN_LENGTH_TABLE[i]
842 | // = (base of run length) << 3 | (number of extra bits to read).
843 | private static final short[] RUN_LENGTH_TABLE = new short[29];
844 |
845 | static {
846 | for (int i = 0; i < RUN_LENGTH_TABLE.length; i++) {
847 | int sym = i + 257;
848 | int run, extraBits;
849 | if (sym <= 264) {
850 | extraBits = 0;
851 | run = sym - 254;
852 | } else if (sym <= 284) {
853 | extraBits = (sym - 261) / 4;
854 | run = (((sym - 1) % 4 + 4) << extraBits) + 3;
855 | } else if (sym == 285) {
856 | extraBits = 0;
857 | run = 258;
858 | } else
859 | throw new AssertionError("Unreachable value");
860 | assert run >>> 12 == 0;
861 | assert extraBits >>> 3 == 0;
862 | RUN_LENGTH_TABLE[i] = (short)(run << 3 | extraBits);
863 | }
864 | }
865 |
866 |
867 | // For length symbols from 0 to 29 (inclusive). DISTANCE_TABLE[i]
868 | // = (base of distance) << 4 | (number of extra bits to read).
869 | private static final int[] DISTANCE_TABLE = new int[30];
870 |
871 | static {
872 | for (int sym = 0; sym < DISTANCE_TABLE.length; sym++) {
873 | int dist, extraBits;
874 | if (sym <= 3) {
875 | extraBits = 0;
876 | dist = sym + 1;
877 | } else if (sym <= 29) {
878 | extraBits = sym / 2 - 1;
879 | dist = ((sym % 2 + 2) << extraBits) + 1;
880 | } else
881 | throw new AssertionError("Unreachable value");
882 | assert dist >>> 27 == 0;
883 | assert extraBits >>> 4 == 0;
884 | DISTANCE_TABLE[sym] = dist << 4 | extraBits;
885 | }
886 | }
887 |
888 | }
889 |
890 | }
891 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/decomp/State.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.decomp;
10 |
11 |
12 | public sealed interface State permits Open, StickyException, Closed {}
13 |
--------------------------------------------------------------------------------
/src/io/nayuki/deflate/decomp/StickyException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate.decomp;
10 |
11 | import java.io.IOException;
12 | import java.io.InputStream;
13 | import java.util.Objects;
14 |
15 |
16 | // A saved exception that is thrown on every read() or detach().
17 | public record StickyException(
18 | InputStream input,
19 | IOException exception)
20 | implements State {
21 |
22 |
23 | public StickyException {
24 | Objects.requireNonNull(input);
25 | Objects.requireNonNull(exception);
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/test/io/nayuki/deflate/DeflaterOutputStreamTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.ByteArrayInputStream;
12 | import java.io.ByteArrayOutputStream;
13 | import java.io.IOException;
14 | import java.io.InputStream;
15 | import java.io.OutputStream;
16 | import java.util.Arrays;
17 | import java.util.Random;
18 | import org.junit.Assert;
19 | import org.junit.Test;
20 |
21 |
22 | public class DeflaterOutputStreamTest {
23 |
24 | @Test public void testEmpty() throws IOException {
25 | byte[] data = {};
26 | var bout = new ByteArrayOutputStream();
27 | try (OutputStream dout = new DeflaterOutputStream(bout)) {
28 | dout.write(data);
29 | }
30 | checkInflate(data, bout.toByteArray());
31 | }
32 |
33 |
34 | @Test public void testShortSingleWriteRandomly() throws IOException {
35 | for (int i = 0; i < 1000; i++) {
36 | var data = new byte[rand.nextInt(100)];
37 | rand.nextBytes(data);
38 | var bout = new ByteArrayOutputStream();
39 | try (OutputStream dout = new DeflaterOutputStream(bout)) {
40 | dout.write(data);
41 | }
42 | checkInflate(data, bout.toByteArray());
43 | }
44 | }
45 |
46 |
47 | @Test public void testShortMultiWriteRandomly() throws IOException {
48 | for (int i = 0; i < 1000; i++) {
49 | var data = new byte[rand.nextInt(1000)];
50 | rand.nextBytes(data);
51 | var bout = new ByteArrayOutputStream();
52 | try (OutputStream dout = new DeflaterOutputStream(bout)) {
53 | for (int off = 0; off < data.length; ) {
54 | if (rand.nextDouble() < 0.1) {
55 | dout.write(data[off]);
56 | off++;
57 | } else {
58 | int n = rand.nextInt(Math.min(100, data.length - off)) + 1;
59 | dout.write(data, off, n);
60 | off += n;
61 | }
62 | }
63 | }
64 | checkInflate(data, bout.toByteArray());
65 | }
66 | }
67 |
68 |
69 | @Test public void testByteRunsRandomly() throws IOException {
70 | var bout = new ByteArrayOutputStream();
71 | for (int i = 0; i < 1000; i++) {
72 | var b = new byte[rand.nextInt(1000) + 1];
73 | Arrays.fill(b, (byte)rand.nextInt(1 << 8));
74 | bout.write(b);
75 | }
76 | byte[] data = bout.toByteArray();
77 |
78 | bout = new ByteArrayOutputStream();
79 | try (OutputStream dout = new DeflaterOutputStream(bout)) {
80 | dout.write(data);
81 | }
82 | checkInflate(data, bout.toByteArray());
83 | }
84 |
85 |
86 | @Test public void testLongRandomly() throws IOException {
87 | for (int i = 0; i < 1000; i++) {
88 | var data = new byte[rand.nextInt(1_000_000)];
89 | rand.nextBytes(data);
90 | var bout = new ByteArrayOutputStream();
91 | try (OutputStream dout = new DeflaterOutputStream(bout)) {
92 | for (int off = 0; off < data.length; ) {
93 | if (rand.nextDouble() < 0.9) {
94 | dout.write(data[off]);
95 | off++;
96 | } else {
97 | int n = rand.nextInt(Math.min(300_000, data.length - off)) + 1;
98 | dout.write(data, off, n);
99 | off += n;
100 | }
101 | }
102 | }
103 | checkInflate(data, bout.toByteArray());
104 | }
105 | }
106 |
107 |
108 |
109 | private static void checkInflate(byte[] uncomp, byte[] comp) throws IOException {
110 | var bout = new ByteArrayOutputStream();
111 | try (InputStream in = new InflaterInputStream(new ByteArrayInputStream(comp))) {
112 | in.transferTo(bout);
113 | }
114 | Assert.assertArrayEquals(uncomp, bout.toByteArray());
115 | }
116 |
117 |
118 | private static Random rand = new Random();
119 |
120 | }
121 |
--------------------------------------------------------------------------------
/test/io/nayuki/deflate/InflaterInputStreamTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.ByteArrayOutputStream;
12 | import java.io.IOException;
13 | import java.util.Objects;
14 | import java.util.Random;
15 | import org.junit.Assert;
16 | import org.junit.Test;
17 | import io.nayuki.deflate.DataFormatException.Reason;
18 |
19 |
20 | public final class InflaterInputStreamTest {
21 |
22 | /*---- Block header ----*/
23 |
24 | @Test public void testHeaderEndBeforeFinal() {
25 | testFail("",
26 | Reason.UNEXPECTED_END_OF_STREAM);
27 | }
28 |
29 |
30 | @Test public void testHeaderEndBeforeType() {
31 | // Fixed Huffman block: 90 91 92 93 94 End
32 | testFail("0 10 110010000 110010001 110010010 110010011 110010100 0000000"
33 | + "1",
34 | Reason.UNEXPECTED_END_OF_STREAM);
35 | }
36 |
37 |
38 | @Test public void testHeaderEndInType() {
39 | // Fixed Huffman block: 95 96 97 98 End
40 | testFail("0 10 110010101 110010110 110010111 110011000 0000000"
41 | + "1 0",
42 | Reason.UNEXPECTED_END_OF_STREAM);
43 | }
44 |
45 |
46 |
47 | /*---- Block type 0b00 ----*/
48 |
49 | @Test public void testUncompressedEmpty() {
50 | // Uncompressed block len=0: (empty)
51 | test("1 00 00000 0000000000000000 1111111111111111",
52 | "");
53 | }
54 |
55 |
56 | @Test public void testUncompressedThreeBytes() {
57 | // Uncompressed block len=3: 05 14 23
58 | test("1 00 00000 1100000000000000 0011111111111111 10100000 00101000 11000100",
59 | "05 14 23");
60 | }
61 |
62 |
63 | @Test public void testUncompressedTwoBlocks() {
64 | // Uncompressed block len=2: 05 14
65 | // Uncompressed block len=1: 23
66 | test("0 00 00000 0100000000000000 1011111111111111 10100000 00101000"
67 | + "1 00 00000 1000000000000000 0111111111111111 11000100",
68 | "05 14 23");
69 | }
70 |
71 |
72 | @Test public void testUncompressedEndBeforeLength() {
73 | // Uncompressed block (partial padding) (no length)
74 | testFail("1 00 000",
75 | Reason.UNEXPECTED_END_OF_STREAM);
76 | }
77 |
78 |
79 | @Test public void testUncompressedEndInLength() {
80 | // Uncompressed block (partial length)
81 | testFail("1 00 00000 0000000000",
82 | Reason.UNEXPECTED_END_OF_STREAM);
83 | }
84 |
85 |
86 | @Test public void testUncompressedEndInNegatedLength() {
87 | // Uncompressed block (len) (partial nlen)
88 | testFail("1 00 00000 0000000000000000 11111111",
89 | Reason.UNEXPECTED_END_OF_STREAM);
90 | }
91 |
92 |
93 | @Test public void testUncompressedLengthNegatedMismatch() {
94 | // Uncompressed block (mismatched len and nlen)
95 | testFail("1 00 00000 0010000000010000 1111100100110101",
96 | Reason.UNCOMPRESSED_BLOCK_LENGTH_MISMATCH);
97 | }
98 |
99 |
100 | @Test public void testUncompressedEndBeforeData() {
101 | // Uncompressed block len=6: (End)
102 | testFail("1 00 11111 0110000000000000 1001111111111111",
103 | Reason.UNEXPECTED_END_OF_STREAM);
104 | }
105 |
106 |
107 | @Test public void testUncompressedEndInData() {
108 | // Uncompressed block len=6: 55 EE (End)
109 | testFail("1 00 11111 0110000000000000 1001111111111111 10101010 01110111",
110 | Reason.UNEXPECTED_END_OF_STREAM);
111 | }
112 |
113 |
114 | @Test public void testUncompressedEndBeforeFinalBlock() {
115 | // Uncompressed block len=0: (empty)
116 | // No final block
117 | testFail("0 00 00000 0000000000000000 1111111111111111",
118 | Reason.UNEXPECTED_END_OF_STREAM);
119 | }
120 |
121 |
122 | @Test public void testUncompressedAlreadyByteAligned() {
123 | // Fixed Huffman block: 90 A1 FF End
124 | // Uncompressed block len=2: AB CD
125 | test("0 10 110010000 110100001 111111111 0000000 "
126 | + "1 00 0100000000000000 1011111111111111 11010101 10110011",
127 | "90 A1 FF AB CD");
128 | }
129 |
130 |
131 | @Test public void testUncompressedRandom() {
132 | final int TRIALS = 100;
133 | for (int i = 0; i < TRIALS; i++) {
134 | int numBlocks = rand.nextInt(30) + 1;
135 | var inBits = new StringBuilder();
136 | var outBytes = new StringBuilder();
137 | for (int j = 0; j < numBlocks; j++) {
138 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal
139 | inBits.append("00"); // btype
140 | for (int k = 0; k < 5; k++) // Padding
141 | inBits.append(rand.nextInt(2));
142 |
143 | // A quasi log-uniform distribution
144 | int len = rand.nextInt(17);
145 | if (len > 0) {
146 | len = 1 << (len - 1);
147 | len |= rand.nextInt(len);
148 | }
149 | int temp = len | ((~len) << 16);
150 | for (int k = 0; k < 32; k++)
151 | inBits.append((temp >>> k) & 1);
152 |
153 | var data = new byte[len];
154 | rand.nextBytes(data);
155 | for (byte b : data) {
156 | outBytes.append(String.format("%02x", b));
157 | for (int k = 0; k < 8; k++, b >>>= 1)
158 | inBits.append(b & 1);
159 | }
160 | }
161 | test(inBits.toString(), outBytes.toString());
162 | }
163 | }
164 |
165 |
166 | @Test public void testUncompressedRandomAndShortFixedHuffman() {
167 | final int TRIALS = 100;
168 | for (int i = 0; i < TRIALS; i++) {
169 | int numBlocks = rand.nextInt(30) + 1;
170 | var inBits = new StringBuilder();
171 | var outBytes = new StringBuilder();
172 | for (int j = 0; j < numBlocks; j++) {
173 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal
174 | if (rand.nextDouble() < 0.5) {
175 | inBits.append("00"); // btype
176 | while (inBits.length() % 8 != 0) // Padding
177 | inBits.append(rand.nextInt(2));
178 |
179 | // A quasi log-uniform distribution
180 | int len = rand.nextInt(17);
181 | if (len > 0) {
182 | len = 1 << (len - 1);
183 | len |= rand.nextInt(len);
184 | }
185 | int temp = len | ((~len) << 16);
186 | for (int k = 0; k < 32; k++)
187 | inBits.append((temp >>> k) & 1);
188 |
189 | var data = new byte[len];
190 | rand.nextBytes(data);
191 | for (byte b : data) {
192 | outBytes.append(String.format("%02x", b));
193 | for (int k = 0; k < 8; k++, b >>>= 1)
194 | inBits.append(b & 1);
195 | }
196 | } else {
197 | inBits.append("10"); // btype
198 | inBits.append("111111111"); // Symbol #255 (0xFF)
199 | outBytes.append("FF");
200 | inBits.append("0000000"); // End of block
201 | // Including bfinal, this writes a total of 19 bits, which is 3
202 | // modulo 8. By writing many consecutive blocks of this type, the
203 | // starting position of the next block can be any number mod 8.
204 | }
205 | }
206 | test(inBits.toString(), outBytes.toString());
207 | }
208 | }
209 |
210 |
211 |
212 | /*---- Block type 0b01 ----*/
213 |
214 | @Test public void testFixedHuffmanEmpty() {
215 | // Fixed Huffman block: End
216 | test("1 10 0000000",
217 | "");
218 | }
219 |
220 |
221 | @Test public void testFixedHuffmanLiterals() {
222 | // Fixed Huffman block: 00 80 8F 90 C0 FF End
223 | test("1 10 00110000 10110000 10111111 110010000 111000000 111111111 0000000",
224 | "00 80 8F 90 C0 FF");
225 | }
226 |
227 |
228 | @Test public void testFixedHuffmanNonOverlappingRun() {
229 | // Fixed Huffman block: 00 01 02 (3,3) End
230 | test("1 10 00110000 00110001 00110010 0000001 00010 0000000",
231 | "00 01 02 00 01 02");
232 | }
233 |
234 |
235 | @Test public void testFixedHuffmanOverlappingRun1() {
236 | // Fixed Huffman block: 01 (1,4) End
237 | test("1 10 00110001 0000010 00000 0000000",
238 | "01 01 01 01 01");
239 | }
240 |
241 |
242 | @Test
243 | public void testFixedHuffmanOverlappingRun2() {
244 | // Fixed Huffman block: 8E 8F (2,5) End
245 | test("1 10 10111110 10111111 0000011 00001 0000000",
246 | "8E 8F 8E 8F 8E 8F 8E");
247 | }
248 |
249 |
250 | @Test public void testFixedHuffmanInvalidLengthCode286() {
251 | // Fixed Huffman block: #286
252 | testFail("1 10 11000110",
253 | Reason.RESERVED_LENGTH_SYMBOL);
254 | }
255 |
256 |
257 | @Test public void testFixedHuffmanInvalidLengthCode287() {
258 | // Fixed Huffman block: #287
259 | testFail("1 10 11000111",
260 | Reason.RESERVED_LENGTH_SYMBOL);
261 | }
262 |
263 |
264 | @Test public void testFixedHuffmanInvalidDistanceCode30() {
265 | // Fixed Huffman block: 00 #257 #30
266 | testFail("1 10 00110000 0000001 11110",
267 | Reason.RESERVED_DISTANCE_SYMBOL);
268 | }
269 |
270 |
271 | @Test public void testFixedHuffmanInvalidDistanceCode31() {
272 | // Fixed Huffman block: 00 #257 #31
273 | testFail("1 10 00110000 0000001 11111",
274 | Reason.RESERVED_DISTANCE_SYMBOL);
275 | }
276 |
277 |
278 | @Test public void testFixedHuffmanEndInSymbol() {
279 | // Fixed Huffman block: (partial symbol)
280 | testFail("1 10 00000",
281 | Reason.UNEXPECTED_END_OF_STREAM);
282 | }
283 |
284 |
285 | @Test public void testFixedHuffmanEndBeforeSymbol() {
286 | // Fixed Huffman block: 93 91 94 90 92
287 | testFail("1 10 110010011 110010001 110010100 110010000 110010010",
288 | Reason.UNEXPECTED_END_OF_STREAM);
289 | }
290 |
291 |
292 | @Test public void testFixedHuffmanEofInRunExtensionBits() {
293 | // Fixed Huffman block: 00 #269+1(partial)
294 | testFail("1 10 00110000 0001101 1",
295 | Reason.UNEXPECTED_END_OF_STREAM);
296 | }
297 |
298 |
299 | @Test public void testFixedHuffmanEofInDistanceExtensionBits() {
300 | // Fixed Huffman block: 00 #285 #0 #257 #8+00(partial)
301 | testFail("1 10 00110000 11000101 00000 0000001 01000 00",
302 | Reason.UNEXPECTED_END_OF_STREAM);
303 | }
304 |
305 |
306 | @Test public void testFixedHuffmanLiteralsRandom() {
307 | final int TRIALS = 100;
308 | for (int i = 0; i < TRIALS; i++) {
309 | int numBlocks = rand.nextInt(100) + 1;
310 | var inBits = new StringBuilder();
311 | var outBytes = new StringBuilder();
312 | for (int j = 0; j < numBlocks; j++) {
313 | inBits.append(j + 1 < numBlocks ? "0" : "1"); // bfinal
314 | inBits.append("10"); // btype
315 |
316 | // A quasi log-uniform distribution
317 | int len = rand.nextInt(16);
318 | if (len > 0) {
319 | len = 1 << (len - 1);
320 | len |= rand.nextInt(len);
321 | }
322 |
323 | for (int k = 0; k < len; k++) {
324 | int b = rand.nextInt(256);
325 | if (b < 144) {
326 | for (int l = 7; l >= 0; l--)
327 | inBits.append(((b - 0 + 48) >>> l) & 1);
328 | } else {
329 | for (int l = 8; l >= 0; l--)
330 | inBits.append(((b - 144 + 400) >>> l) & 1);
331 | }
332 | outBytes.append(String.format("%02x", b));
333 | }
334 | inBits.append("0000000");
335 | }
336 | test(inBits.toString(), outBytes.toString());
337 | }
338 | }
339 |
340 |
341 |
342 | /*---- Block type 0b10 ----*/
343 |
344 | @Test public void testDynamicHuffmanEmpty() {
345 | // Dynamic Huffman block:
346 | // numCodeLen=19
347 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:0, 17:0, 18:1
348 | // numLitLen=257, numDist=2
349 | // litLenCodeLen = 0:1, 1:0, ..., 255:0, 256:1
350 | // distCodeLen = 0:1, 1:1
351 | // Data: End
352 | String blockHeader = "1 01";
353 | String codeCounts = "00000 10000 1111";
354 | String codeLenCodeLens = "000 000 100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100 000";
355 | String codeLens = "0 11111111 10101011 0 0 0";
356 | String data = "1";
357 | test(blockHeader + codeCounts + codeLenCodeLens + codeLens + data,
358 | "");
359 | }
360 |
361 |
362 | @Test public void testDynamicHuffmanEmptyNoDistanceCode() {
363 | // Dynamic Huffman block:
364 | // numCodeLen=18
365 | // codeLenCodeLen = 0:2, 1:2, 2:0, ..., 15:0, 16:0, 17:0, 18:1
366 | // numLitLen=257, numDist=1
367 | // litLenCodeLen = 0:0, ..., 254:0, 255:1, 256:1
368 | // distCodeLen = 0:0
369 | // Data: End
370 | String blockHeader = "1 01";
371 | String codeCounts = "00000 00000 0111";
372 | String codeLenCodeLens = "000 000 100 010 000 000 000 000 000 000 000 000 000 000 000 000 000 010";
373 | String codeLens = "01111111 00101011 11 11 10";
374 | String data = "1";
375 | test(blockHeader + codeCounts + codeLenCodeLens + codeLens + data,
376 | "");
377 | }
378 |
379 |
380 | @Test public void testDynamicHuffmanCodeLengthRepeatAtStart() {
381 | // Dynamic Huffman block:
382 | // numLitLen=257, numDist=1, numCodeLen=18
383 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:1, 17:0, 18:0
384 | // Literal/length/distance code lengths: #16+00
385 | String blockHeader = "1 01";
386 | String codeCounts = "00000 00000 0111";
387 | String codeLenCodeLens = "100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100";
388 | String codeLens = "1";
389 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens,
390 | Reason.NO_PREVIOUS_CODE_LENGTH_TO_COPY);
391 | }
392 |
393 |
394 | @Test public void testDynamicHuffmanTooManyCodeLengthItems() {
395 | // Dynamic Huffman block:
396 | // numLitLen=257, numDist=1, numCodeLen=18
397 | // codeLenCodeLen = 0:0, 1:1, 2:0, ..., 15:0, 16:0, 17:0, 18:1
398 | // Literal/length/distance code lengths: 1 1 #18+1111111 #18+1101100
399 | String blockHeader = "1 01";
400 | String codeCounts = "00000 00000 0111";
401 | String codeLenCodeLens = "000 000 100 000 000 000 000 000 000 000 000 000 000 000 000 000 000 100";
402 | String codeLens = "0 0 11111111 10011011";
403 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens,
404 | Reason.CODE_LENGTH_CODE_OVER_FULL);
405 | }
406 |
407 |
408 | @Test public void testDynamicHuffmanOverfullCode0() {
409 | // Dynamic Huffman block:
410 | // numLitLen=257, numDist=1, numCodeLen=4
411 | // codeLenCodeLen = 0:1, 1:1, 2:1, 3:0
412 | String blockHeader = "1 01";
413 | String codeCounts = "00000 00000 0000";
414 | String codeLenCodeLens = "100 100 100 000";
415 | String padding = "0000000000000000000";
416 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
417 | Reason.HUFFMAN_CODE_OVER_FULL);
418 | }
419 |
420 |
421 | @Test public void testDynamicHuffmanOverfullCode1() {
422 | // Dynamic Huffman block:
423 | // numLitLen=257, numDist=1, numCodeLen=4
424 | // codeLenCodeLen = 0:1, 1:1, 2:1, 3:1
425 | String blockHeader = "1 01";
426 | String codeCounts = "00000 00000 0000";
427 | String codeLenCodeLens = "100 100 100 100";
428 | String padding = "0000000000000000000";
429 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
430 | Reason.HUFFMAN_CODE_OVER_FULL);
431 | }
432 |
433 |
434 | @Test public void testDynamicHuffmanUnpairedCode() {
435 | // Dynamic Huffman block:
436 | // numLitLen=257, numDist=1, numCodeLen=4
437 | // codeLenCodeLen = 0:1, 1:2, 2:3, 3:0
438 | String blockHeader = "1 01";
439 | String codeCounts = "00000 00000 0000";
440 | String codeLenCodeLens = "100 010 110 000";
441 | String padding = "0000000000000000000";
442 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
443 | Reason.HUFFMAN_CODE_UNDER_FULL);
444 | }
445 |
446 |
447 | @Test public void testDynamicHuffmanEmptyCode() {
448 | // Dynamic Huffman block:
449 | // numLitLen=257, numDist=1, numCodeLen=4
450 | // codeLenCodeLen = 0:0, 1:0, 2:0, 3:0
451 | String blockHeader = "1 01";
452 | String codeCounts = "00000 00000 0000";
453 | String codeLenCodeLens = "000 000 000 000";
454 | String padding = "0000000000000000000";
455 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
456 | Reason.HUFFMAN_CODE_UNDER_FULL);
457 | }
458 |
459 |
460 | @Test public void testDynamicHuffmanUnderfullCode0() {
461 | // Dynamic Huffman block:
462 | // numLitLen=257, numDist=1, numCodeLen=4
463 | // codeLenCodeLen = 0:0, 1:0, 2:1, 3:0
464 | String blockHeader = "1 01";
465 | String codeCounts = "00000 00000 0000";
466 | String codeLenCodeLens = "000 000 100 000";
467 | String padding = "0000000000000000000";
468 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
469 | Reason.HUFFMAN_CODE_UNDER_FULL);
470 | }
471 |
472 |
473 | @Test public void testDynamicHuffmanUnderfullCode1() {
474 | // Dynamic Huffman block:
475 | // numLitLen=257, numDist=1, numCodeLen=4
476 | // codeLenCodeLen = 0:2, 1:1, 2:0, 3:0
477 | String blockHeader = "1 01";
478 | String codeCounts = "00000 00000 0000";
479 | String codeLenCodeLens = "010 100 000 000";
480 | String padding = "0000000000000000000";
481 | testFail(blockHeader + codeCounts + codeLenCodeLens + padding,
482 | Reason.HUFFMAN_CODE_UNDER_FULL);
483 | }
484 |
485 |
486 | @Test public void testDynamicHuffmanUseOfNullDistanceCode() {
487 | // Dynamic Huffman block:
488 | // numLitLen=258, numDist=1, numCodeLen=18
489 | // codeLenCodeLen = 0:2, 1:2, 2:2, ..., 15:0, 16:0, 17:0, 18:2
490 | // Literal/length/distance code lengths: 2 #18+1111111 #18+1101100 1 2 0
491 | // Data: 00 #257
492 | String blockHeader = "1 01";
493 | String codeCounts = "10000 00000 0111";
494 | String codeLenCodeLens = "000 000 010 010 000 000 000 000 000 000 000 000 000 000 000 010 000 010";
495 | String codeLens = "10 111111111 110101011 01 10 00";
496 | String data = "10 11";
497 | String padding = "0000000000000000";
498 | testFail(blockHeader + codeCounts + codeLenCodeLens + codeLens + data + padding,
499 | Reason.LENGTH_ENCOUNTERED_WITH_EMPTY_DISTANCE_CODE);
500 | }
501 |
502 |
503 |
504 | /*---- Block type 0b11 ----*/
505 |
506 | @Test public void testReservedBlockType() {
507 | // Reserved block type
508 | testFail("1 11 00000",
509 | Reason.RESERVED_BLOCK_TYPE);
510 | }
511 |
512 |
513 |
514 | /*---- Utilities ----*/
515 |
516 | // `inputBits` has 0s and 1s, and optional spaces; its length need not be
517 | // a multiple of 8. `refOutputHex` has pairs of hexadecimal digits (with
518 | // optional spaces) representing the expected decompressed output byte sequence.
519 | private static void test(String inputBits, String refOutputHex) {
520 | // Process the input bit string
521 | Objects.requireNonNull(inputBits);
522 | inputBits = inputBits.replace(" ", "");
523 | int padMode = rand.nextInt(3);
524 | while (inputBits.length() % 8 != 0) {
525 | inputBits += switch (padMode) {
526 | case 0 -> 0;
527 | case 1 -> 1;
528 | case 2 -> rand.nextInt(2);
529 | default -> throw new AssertionError("Unreachable value");
530 | };
531 | }
532 |
533 | // Convert the reference output hex string
534 | Objects.requireNonNull(refOutputHex);
535 | refOutputHex = refOutputHex.replace(" ", "");
536 | if (refOutputHex.length() % 2 != 0)
537 | throw new IllegalArgumentException();
538 | var refOut = new byte[refOutputHex.length() / 2];
539 | for (int i = 0; i < refOut.length; i++)
540 | refOut[i] = (byte)Integer.parseInt(refOutputHex.substring(i * 2, (i + 1) * 2), 16);
541 |
542 | // Perform decompression with single-byte reads and check output
543 | var bout = new ByteArrayOutputStream();
544 | var sin = new StringInputStream(inputBits);
545 | try {
546 | @SuppressWarnings("resource")
547 | var iin = new InflaterInputStream(sin, true);
548 | while (true) {
549 | int b = iin.read();
550 | if (b == -1)
551 | break;
552 | bout.write(b);
553 | }
554 | } catch (IOException e) {
555 | throw new AssertionError("Unexpected exception", e);
556 | }
557 | if (sin.read() != -1)
558 | throw new IllegalArgumentException();
559 | Assert.assertArrayEquals(refOut, bout.toByteArray());
560 |
561 | // Perform decompression with block reads and check output
562 | bout.reset();
563 | sin = new StringInputStream(inputBits);
564 | try {
565 | @SuppressWarnings("resource")
566 | var iin = new InflaterInputStream(sin, true);
567 | while (true) {
568 | var buf = new byte[rand.nextInt(100) + 1];
569 | int off = rand.nextInt(buf.length + 1);
570 | int len = rand.nextInt(buf.length - off + 1);
571 | int n = iin.read(buf, off, len);
572 | if (!(-1 <= n && n <= len))
573 | throw new IllegalArgumentException();
574 | if (n == -1)
575 | break;
576 | if (n == 0 && len != 0)
577 | throw new IllegalArgumentException();
578 | bout.write(buf, off, n);
579 | }
580 | } catch (IOException e) {
581 | throw new AssertionError("Unexpected exception", e);
582 | }
583 | Assert.assertArrayEquals(refOut, bout.toByteArray());
584 | }
585 |
586 |
587 | private static void testFail(String inputBits, Reason reason) {
588 | try {
589 | test(inputBits, "");
590 | } catch (DataFormatException e) {
591 | Assert.assertEquals(reason, e.getReason());
592 | }
593 | }
594 |
595 |
596 | private static Random rand = new Random();
597 |
598 | }
599 |
--------------------------------------------------------------------------------
/test/io/nayuki/deflate/StringInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DEFLATE library (Java)
3 | *
4 | * Copyright (c) Project Nayuki
5 | * MIT License. See readme file.
6 | * https://www.nayuki.io/page/deflate-library-java
7 | */
8 |
9 | package io.nayuki.deflate;
10 |
11 | import java.io.InputStream;
12 | import java.util.Objects;
13 |
14 |
15 | final class StringInputStream extends InputStream {
16 |
17 | /*---- Fields ----*/
18 |
19 | private final String bits;
20 | private int index = 0;
21 | private int mark = -1;
22 |
23 |
24 |
25 | /*---- Constructor ----*/
26 |
27 | public StringInputStream(String s) {
28 | Objects.requireNonNull(s);
29 | if (!s.matches("[01]*"))
30 | throw new IllegalArgumentException("String has characters other than 0 and 1");
31 | if (s.length() % 8 != 0)
32 | throw new IllegalArgumentException("String length not a multiple of 8");
33 | bits = s;
34 | }
35 |
36 |
37 |
38 | /*---- Methods ----*/
39 |
40 | @Override public int read() {
41 | if (index >= bits.length())
42 | return -1;
43 | int result = Integer.parseInt(bits.substring(index, index + 8), 2);
44 | result = Integer.reverse(result) >>> 24;
45 | index += 8;
46 | return result;
47 | }
48 |
49 |
50 | @Override public boolean markSupported() {
51 | return true;
52 | }
53 |
54 |
55 | @Override public void mark(int limit) {
56 | mark = index;
57 | }
58 |
59 |
60 | @Override public void reset() {
61 | if (mark == -1)
62 | throw new IllegalStateException("No mark set");
63 | index = mark;
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------