├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── src ├── test │ ├── resources │ │ ├── binary │ │ │ ├── help.bin │ │ │ └── word.doc │ │ ├── shakespeare.tar │ │ └── shakespeare │ │ │ └── play.dtd │ └── java │ │ ├── com │ │ └── ning │ │ │ └── compress │ │ │ ├── lzf │ │ │ ├── TestLZFCompressingInputStream.java │ │ │ ├── util │ │ │ │ └── TestFileStreams.java │ │ │ ├── ManualTestLZF.java │ │ │ ├── TestLZFUncompressor.java │ │ │ ├── TestLZFOutputStream.java │ │ │ ├── TestLZFDecoder.java │ │ │ ├── TestLZFRoundTrip.java │ │ │ ├── LZFEncoderTest.java │ │ │ ├── TestLZFInputStream.java │ │ │ └── TestFuzzUnsafeLZF.java │ │ │ ├── gzip │ │ │ ├── TestGzipStreams.java │ │ │ └── TestGzipUncompressor.java │ │ │ └── BaseForTests.java │ │ └── perf │ │ ├── ManualSkipComparison.java │ │ ├── ManualUncompressComparison.java │ │ └── ManualUnsafePerf.java ├── main │ ├── java │ │ └── com │ │ │ └── ning │ │ │ └── compress │ │ │ ├── lzf │ │ │ ├── util │ │ │ │ ├── package-info.java │ │ │ │ ├── ChunkDecoderFactory.java │ │ │ │ └── ChunkEncoderFactory.java │ │ │ ├── impl │ │ │ │ ├── package-info.java │ │ │ │ ├── UnsafeChunkEncoders.java │ │ │ │ ├── VanillaChunkEncoder.java │ │ │ │ ├── UnsafeChunkEncoderBE.java │ │ │ │ ├── UnsafeChunkEncoderLE.java │ │ │ │ └── UnsafeChunkEncoder.java │ │ │ ├── package-info.java │ │ │ ├── parallel │ │ │ │ ├── package-info.java │ │ │ │ ├── WriteTask.java │ │ │ │ ├── BlockManager.java │ │ │ │ ├── CompressTask.java │ │ │ │ └── PLZFOutputStream.java │ │ │ ├── LZFException.java │ │ │ ├── LZF.java │ │ │ ├── LZFDecoder.java │ │ │ ├── LZFChunk.java │ │ │ ├── LZFCompressingInputStream.java │ │ │ └── LZFOutputStream.java │ │ │ ├── package-info.java │ │ │ ├── gzip │ │ │ ├── package-info.java │ │ │ ├── GZIPException.java │ │ │ ├── GZIPRecycler.java │ │ │ └── OptimizedGZIPOutputStream.java │ │ │ ├── CompressionFormatException.java │ │ │ ├── DataHandler.java │ │ │ ├── UncompressorOutputStream.java │ │ │ ├── Uncompressor.java │ │ │ └── BufferRecycler.java │ └── resources │ │ └── META-INF │ │ ├── LICENSE │ │ └── NOTICE └── moditect │ └── module-info.java ├── run-skip ├── run-comp-perf ├── run-uncomp-perf ├── .gitattributes ├── profile-skip ├── profile-comp-perf ├── profile-uncomp-perf ├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── VERSION.txt └── README.md /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /src/test/resources/binary/help.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/binary/help.bin -------------------------------------------------------------------------------- /src/test/resources/binary/word.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/binary/word.doc -------------------------------------------------------------------------------- /src/test/resources/shakespeare.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/shakespeare.tar -------------------------------------------------------------------------------- /run-skip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualSkipComparison \ 6 | $* 7 | -------------------------------------------------------------------------------- /run-comp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx200m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualCompressComparison \ 6 | $* 7 | 8 | -------------------------------------------------------------------------------- /run-uncomp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx200m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualUncompressComparison \ 6 | $* 7 | 8 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains helper classes uses by LZF codec. 3 | */ 4 | 5 | package com.ning.compress.lzf.util; 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Mark all Jazzer inputs as binary, to avoid bytes in them being misinterpreted as line terminators and being 2 | # changed on checkout 3 | /src/test/resources/**/*Inputs/** binary 4 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains part of public API that is shared between all different 3 | compression codecs. 4 | */ 5 | 6 | package com.ning.compress; 7 | -------------------------------------------------------------------------------- /profile-skip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualSkipComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /profile-comp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualCompressComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /profile-uncomp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualUncompressComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains implementation classes that are not part 3 | of public interface of LZF codec. 4 | */ 5 | 6 | package com.ning.compress.lzf.impl; 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | groups: 8 | github-actions: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains public API of the LZF codec, as well as some 3 | of the implementation (specifically parts that are designed to be overridable). 4 | */ 5 | 6 | package com.ning.compress.lzf; 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | /.idea 3 | *.ipr 4 | *.iws 5 | *.log 6 | .DS_Store 7 | .classpath 8 | .settings 9 | .project 10 | target 11 | pom.xml.releaseBackup 12 | release.properties 13 | *~ 14 | temp-testng-customsuite.xml 15 | test-output 16 | .externalToolBuilders 17 | server/logs 18 | runtime 19 | logs 20 | 21 | # Jazzer fuzzing corpus 22 | /.cifuzz-corpus/ 23 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/LICENSE: -------------------------------------------------------------------------------- 1 | This copy of Compress-LZF library is licensed under the 2 | Apache (Software) License, version 2.0 ("the License"). 3 | See the License for details about distribution rights, and the 4 | specific rights regarding derivate works. 5 | 6 | You may obtain a copy of the License at: 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | -------------------------------------------------------------------------------- /src/moditect/module-info.java: -------------------------------------------------------------------------------- 1 | // Hand-crafted 06-Jan-2021 by tatu.saloranta@iki.fi 2 | module com.ning.compress.lzf { 3 | requires transitive java.xml; 4 | requires jdk.unsupported; 5 | 6 | exports com.ning.compress; 7 | exports com.ning.compress.gzip; 8 | exports com.ning.compress.lzf; 9 | // Not sure if this needs to be exported but... 10 | exports com.ning.compress.lzf.impl; 11 | exports com.ning.compress.lzf.parallel; 12 | exports com.ning.compress.lzf.util; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains parallel implementation of LZF compressor: granularity 3 | is at chunk-level, so that each processing thread operates on a single chunk 4 | at a time (and conversely, no chunk is "split" across threads). 5 |
6 | The main abstraction to use is {@link com.ning.compress.lzf.parallel.PLZFOutputStream},
7 | which orchestrates operation of multi-thread compression.
8 | */
9 |
10 | package com.ning.compress.lzf.parallel;
11 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/gzip/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | Package that contains optimized stream implementations for working
3 | with GZIP. Internally JDK provided efficient ZLIB codec is used for
4 | actual encoding and decoding.
5 | Code here
6 | adds appropriate reuse to specifically improve handling of relatively
7 | short compressed data; and may also have better support for alternate
8 | operating modes such as "push-style" handling that is needed for
9 | non-blocking ("async") stream processing.
10 | */
11 |
12 | package com.ning.compress.gzip;
13 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/LZFException.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import com.ning.compress.CompressionFormatException;
4 |
5 | public class LZFException extends CompressionFormatException
6 | {
7 | private static final long serialVersionUID = 1L;
8 |
9 | public LZFException(String message) {
10 | super(message);
11 | }
12 |
13 | public LZFException(Throwable t) {
14 | super(t);
15 | }
16 |
17 | public LZFException(String message, Throwable t) {
18 | super(message, t);
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/gzip/GZIPException.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.gzip;
2 |
3 | import com.ning.compress.CompressionFormatException;
4 |
5 | public class GZIPException extends CompressionFormatException
6 | {
7 | private static final long serialVersionUID = 1L;
8 |
9 | public GZIPException(String message) {
10 | super(message);
11 | }
12 |
13 | public GZIPException(Throwable t) {
14 | super(t);
15 | }
16 |
17 | public GZIPException(String message, Throwable t) {
18 | super(message, t);
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2009-2010 Ning, Inc.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 | use this file except in compliance with the License. You may obtain a copy of
5 | the License at http://www.apache.org/licenses/LICENSE-2.0
6 |
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
9 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10 | License for the specific language governing permissions and limitations under
11 | the License.
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/CompressionFormatException.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress;
2 |
3 | import java.io.IOException;
4 |
5 | /**
6 | * Base exception used by compression codecs when encountering a problem
7 | * with underlying data format, usually due to data corruption.
8 | */
9 | public class CompressionFormatException extends IOException
10 | {
11 | private static final long serialVersionUID = 1L;
12 |
13 | protected CompressionFormatException(String message) {
14 | super(message);
15 | }
16 |
17 | protected CompressionFormatException(Throwable t) {
18 | super();
19 | initCause(t);
20 | }
21 |
22 | protected CompressionFormatException(String message, Throwable t) {
23 | super(message);
24 | initCause(t);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/resources/META-INF/NOTICE:
--------------------------------------------------------------------------------
1 | # Compress LZF
2 |
3 | This library contains efficient implementation of LZF compression format,
4 | as well as additional helper classes that build on JDK-provided gzip (deflat)
5 | codec.
6 |
7 | ## Licensing
8 |
9 | Library is licensed under Apache License 2.0, as per accompanying LICENSE file.
10 |
11 | ## Credit
12 |
13 | Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi).
14 | It was started at Ning, inc., as an official Open Source process used by
15 | platform backend, but after initial versions has been developed outside of
16 | Ning by supporting community.
17 |
18 | Other contributors include:
19 |
20 | * Jon Hartlaub (first versions of streaming reader/writer; unit tests)
21 | * Cedrik Lime: parallel LZF implementation
22 |
23 | Various community members have contributed bug reports, and suggested minor
24 | fixes; these can be found from file "VERSION.txt" in SCM.
25 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.6/apache-maven-3.8.6-bin.zip
18 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.0/maven-wrapper-3.1.0.jar
19 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/parallel/WriteTask.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.parallel;
2 |
3 | import java.io.OutputStream;
4 | import java.util.concurrent.Future;
5 |
6 | import com.ning.compress.lzf.LZFChunk;
7 |
8 | /**
9 | * @author Cédrik LIME
10 | */
11 | class WriteTask implements Runnable {
12 | private final OutputStream output;
13 | private final Future
14 | * NOTE: return value was added (from void to boolean) in 0.9.9
15 | *
16 | * @return True, if caller should process and feed more data; false if
17 | * caller is not interested in more data and processing should be terminated
18 | * (and {@link #allDataHandled} should be called immediately)
19 | */
20 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException;
21 |
22 | /**
23 | * Method called after last call to {@link #handleData}, for successful
24 | * operation, if and when caller is informed about end of content
25 | * Note that if an exception thrown by {@link #handleData} has caused processing
26 | * to be aborted, this method might not get called.
27 | * Implementation may choose to free resources, flush state, or perform
28 | * validation at this point.
29 | */
30 | public void allDataHandled() throws IOException;
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/parallel/CompressTask.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.parallel;
2 |
3 | import java.util.concurrent.Callable;
4 |
5 | import com.ning.compress.lzf.ChunkEncoder;
6 | import com.ning.compress.lzf.LZFChunk;
7 | import com.ning.compress.lzf.util.ChunkEncoderFactory;
8 |
9 | /**
10 | * @author Cédrik LIME
11 | */
12 | class CompressTask implements Callable
12 | * Note that API does not define the way that listener is attached: this is
13 | * typically passed through to constructor of the implementation.
14 | *
15 | * @author Tatu Saloranta (tatu.saloranta@iki.fi)
16 | */
17 | public abstract class Uncompressor
18 | {
19 | /**
20 | * Method called to feed more compressed data to be uncompressed, and
21 | * sent to possible listeners.
22 | *
23 | * NOTE: return value was added (from void to boolean) in 0.9.9
24 | *
25 | * @return True, if caller should process and feed more data; false if
26 | * caller is not interested in more data and processing should be terminated.
27 | * (and {@link #complete} should be called immediately)
28 | */
29 | public abstract boolean feedCompressedData(byte[] comp, int offset, int len)
30 | throws IOException;
31 |
32 | /**
33 | * Method called to indicate that all data to uncompress has already been fed.
34 | * This typically results in last block of data being uncompressed, and results
35 | * being sent to listener(s); but may also throw an exception if incomplete
36 | * block was passed.
37 | */
38 | public abstract void complete() throws IOException;
39 | }
40 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/util/TestFileStreams.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.util;
2 |
3 | import java.io.*;
4 | import java.nio.charset.StandardCharsets;
5 | import java.nio.file.Path;
6 |
7 | import com.ning.compress.BaseForTests;
8 | import org.junit.jupiter.api.Test;
9 | import org.junit.jupiter.api.io.TempDir;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals;
12 | import static org.junit.jupiter.api.Assertions.assertEquals;
13 |
14 | public class TestFileStreams extends BaseForTests
15 | {
16 | @TempDir
17 | Path tempDir;
18 |
19 | @Test
20 | public void testStreams() throws Exception
21 | {
22 | File f = tempDir.resolve("lzf-test.lzf").toFile();
23 |
24 | // First, write encoded stuff (won't compress, but produces something)
25 | byte[] input = "Whatever stuff...".getBytes(StandardCharsets.UTF_8);
26 |
27 | try (LZFFileOutputStream out = new LZFFileOutputStream(f)) {
28 | out.write(input);
29 | }
30 |
31 | long len = f.length();
32 | // happens to be 22; 17 bytes uncompressed, with 5 byte header
33 | assertEquals(22L, len);
34 |
35 | try (LZFFileInputStream in = new LZFFileInputStream(f)) {
36 | for (byte b : input) {
37 | assertEquals(b & 0xFF, in.read());
38 | }
39 | assertEquals(-1, in.read());
40 | }
41 | }
42 |
43 | @Test
44 | public void testReadAndWrite() throws Exception
45 | {
46 | File f = tempDir.resolve("lzf-test.lzf").toFile();
47 |
48 | byte[] fluff = constructFluff(132000);
49 | try (LZFFileOutputStream fout = new LZFFileOutputStream(f)) {
50 | fout.write(fluff);
51 | }
52 |
53 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(fluff.length);
54 | try (LZFFileInputStream in = new LZFFileInputStream(f)) {
55 | in.readAndWrite(bytes);
56 | }
57 | byte[] actual = bytes.toByteArray();
58 | assertArrayEquals(fluff, actual);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/gzip/GZIPRecycler.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.gzip;
2 |
3 | import java.lang.ref.SoftReference;
4 | import java.util.zip.Deflater;
5 | import java.util.zip.Inflater;
6 |
7 | /**
8 | * GZIP-codec-specific "extension" to {@link com.ning.compress.BufferRecycler},
9 | * used for recycling expensive objects.
10 | *
11 | * @author Tatu Saloranta (tatu.saloranta@iki.fi)
12 | */
13 | public final class GZIPRecycler
14 | {
15 | final protected static ThreadLocal
12 | * Yes, it looks butt-ugly, but does the job. Nonetheless, if anyone
13 | * has lipstick for this pig, let me know.
14 | */
15 | public class ChunkDecoderFactory
16 | {
17 | private final static ChunkDecoderFactory _instance;
18 | static {
19 | Class> impl = null;
20 | try {
21 | // first, try loading optimal one, which uses Sun JDK Unsafe...
22 | impl = (Class>) Class.forName(UnsafeChunkDecoder.class.getName());
23 | } catch (Throwable t) { }
24 | if (impl == null) {
25 | impl = VanillaChunkDecoder.class;
26 | }
27 | _instance = new ChunkDecoderFactory(impl);
28 | }
29 |
30 | private final Class extends ChunkDecoder> _implClass;
31 |
32 | @SuppressWarnings("unchecked")
33 | private ChunkDecoderFactory(Class> imp)
34 | {
35 | _implClass = (Class extends ChunkDecoder>) imp;
36 | }
37 |
38 | /*
39 | ///////////////////////////////////////////////////////////////////////
40 | // Public API
41 | ///////////////////////////////////////////////////////////////////////
42 | */
43 |
44 | /**
45 | * Method to use for getting decoder instance that uses the most optimal
46 | * available methods for underlying data access. It should be safe to call
47 | * this method as implementations are dynamically loaded; however, on some
48 | * non-standard platforms it may be necessary to either directly load
49 | * instances, or use {@link #safeInstance()}.
50 | */
51 | public static ChunkDecoder optimalInstance() {
52 | try {
53 | return _instance._implClass.newInstance();
54 | } catch (Exception e) {
55 | throw new IllegalStateException("Failed to load a ChunkDecoder instance ("+e.getClass().getName()+"): "
56 | +e.getMessage(), e);
57 | }
58 | }
59 |
60 | /**
61 | * Method that can be used to ensure that a "safe" decoder instance is loaded.
62 | * Safe here means that it should work on any and all Java platforms.
63 | */
64 | public static ChunkDecoder safeInstance() {
65 | // this will always succeed loading; no need to use dynamic class loading or instantiation
66 | return new VanillaChunkDecoder();
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/test/java/perf/ManualSkipComparison.java:
--------------------------------------------------------------------------------
1 | package perf;
2 |
3 | import java.io.*;
4 |
5 | import com.ning.compress.lzf.*;
6 | import com.ning.compress.lzf.util.LZFFileInputStream;
7 | import com.ning.compress.lzf.util.LZFFileOutputStream;
8 |
9 | /**
10 | * Micro-benchmark for testing performance of skip alternatives.
11 | */
12 | public class ManualSkipComparison
13 | {
14 | private int size = 0;
15 |
16 | private void test(File file, int origSize) throws Exception
17 | {
18 | // Let's try to guestimate suitable size... to get to 50 megs to process
19 | final int REPS = (int) ((double) (50 * 1000 * 1000) / (double) file.length());
20 |
21 | System.out.printf("Skipping %d bytes of compressed data, %d reps.\n",
22 | file.length(), REPS);
23 |
24 | int i = 0;
25 | while (true) {
26 | try { Thread.sleep(100L); } catch (InterruptedException ie) { }
27 | int round = (i++ % 2);
28 |
29 | String msg;
30 | boolean lf = (round == 0);
31 |
32 | long msecs;
33 |
34 | switch (round) {
35 |
36 | case 0:
37 | msg = "LZF skip/old";
38 | msecs = testSkip(REPS, file, false);
39 | break;
40 | case 1:
41 | msg = "LZF skip/NEW";
42 | msecs = testSkip(REPS, file, true);
43 | break;
44 | default:
45 | throw new Error();
46 | }
47 | if (lf) {
48 | System.out.println();
49 | }
50 | System.out.println("Test '"+msg+"' ["+size+" bytes] -> "+msecs+" msecs");
51 | if (size != origSize) { // sanity check
52 | throw new Error("Wrong skip count!!!");
53 | }
54 | }
55 | }
56 |
57 | private final long testSkip(int REPS, File file, boolean newSkip) throws Exception
58 | {
59 | long start = System.currentTimeMillis();
60 | long len = -1L;
61 |
62 | // final byte[] buffer = new byte[16000];
63 |
64 | while (--REPS >= 0) {
65 | InputStream in = newSkip ? new LZFFileInputStream(file)
66 | : new LZFInputStream(new FileInputStream(file));
67 | len = 0;
68 | long skipped;
69 |
70 | while ((skipped = in.skip(Integer.MAX_VALUE)) >= 0L) {
71 | len += skipped;
72 | }
73 | in.close();
74 | }
75 | size = (int) len;
76 | return System.currentTimeMillis() - start;
77 | }
78 |
79 | public static void main(String[] args) throws Exception
80 | {
81 | if (args.length != 1) {
82 | System.err.println("Usage: java ... [file]");
83 | System.exit(1);
84 | }
85 | File in = new File(args[0]);
86 | System.out.printf("Reading input, %d bytes...\n", in.length());
87 | File out = File.createTempFile("skip-perf", ".lzf");
88 | System.out.printf("(writing as file '%s')\n", out.getPath());
89 |
90 | byte[] buffer = new byte[4000];
91 | int count;
92 | FileInputStream ins = new FileInputStream(in);
93 | LZFFileOutputStream outs = new LZFFileOutputStream(out);
94 |
95 | while ((count = ins.read(buffer)) > 0) {
96 | outs.write(buffer, 0, count);
97 | }
98 | ins.close();
99 | outs.close();
100 | System.out.printf("Compressed as file '%s', %d bytes\n", out.getPath(), out.length());
101 |
102 | new ManualSkipComparison().test(out, (int) in.length());
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/TestLZFUncompressor.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.*;
4 | import java.util.Random;
5 |
6 | import com.ning.compress.BaseForTests;
7 | import com.ning.compress.DataHandler;
8 | import com.ning.compress.UncompressorOutputStream;
9 | import org.junit.jupiter.api.Test;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals;
12 |
13 | public class TestLZFUncompressor extends BaseForTests
14 | {
15 | @Test
16 | public void testSimpleSmall1by1() throws IOException
17 | {
18 | byte[] fluff = constructFluff(4000);
19 | byte[] comp = LZFEncoder.encode(fluff);
20 |
21 | Collector co = new Collector();
22 | LZFUncompressor uncomp = new LZFUncompressor(co);
23 | for (int i = 0, end = comp.length; i < end; ++i) {
24 | uncomp.feedCompressedData(comp, i, 1);
25 | }
26 | uncomp.complete();
27 | byte[] result = co.getBytes();
28 |
29 | assertArrayEquals(fluff, result);
30 | }
31 |
32 | @Test
33 | public void testSimpleSmallAsChunk() throws IOException
34 | {
35 | byte[] fluff = constructFluff(4000);
36 | byte[] comp = LZFEncoder.encode(fluff);
37 |
38 | // and then uncompress, first byte by bytes
39 | Collector co = new Collector();
40 | LZFUncompressor uncomp = new LZFUncompressor(co);
41 | uncomp.feedCompressedData(comp, 0, comp.length);
42 | uncomp.complete();
43 | byte[] result = co.getBytes();
44 | assertArrayEquals(fluff, result);
45 | }
46 |
47 | @Test
48 | public void testSimpleBiggerVarLength() throws IOException
49 | {
50 | byte[] fluff = constructFluff(190000);
51 | byte[] comp = LZFEncoder.encode(fluff);
52 |
53 | // and then uncompress with arbitrary-sized blocks...
54 | Random rnd = new Random(123);
55 | Collector co = new Collector();
56 | LZFUncompressor uncomp = new LZFUncompressor(co);
57 | for (int i = 0, end = comp.length; i < end; ) {
58 | int size = Math.min(end-i, 1+rnd.nextInt(7));
59 | uncomp.feedCompressedData(comp, i, size);
60 | i += size;
61 | }
62 | uncomp.complete();
63 | byte[] result = co.getBytes();
64 |
65 | assertArrayEquals(fluff, result);
66 | }
67 |
68 | @Test
69 | public void testSimpleBiggerOneChunk() throws IOException
70 | {
71 | byte[] fluff = constructFluff(275000);
72 | byte[] comp = LZFEncoder.encode(fluff);
73 |
74 | // and then uncompress in one chunk
75 | Collector co = new Collector();
76 | LZFUncompressor uncomp = new LZFUncompressor(co);
77 | uncomp.feedCompressedData(comp, 0, comp.length);
78 | uncomp.complete();
79 | byte[] result = co.getBytes();
80 |
81 | assertArrayEquals(fluff, result);
82 | }
83 |
84 |
85 | @Test
86 | public void testSimpleBiggerAsStream() throws IOException
87 | {
88 | byte[] fluff = constructFluff(277000);
89 | byte[] comp = LZFEncoder.encode(fluff);
90 | Collector co = new Collector();
91 | UncompressorOutputStream out = new UncompressorOutputStream(new LZFUncompressor(co));
92 | out.write(comp, 0, comp.length);
93 | out.close();
94 | byte[] result = co.getBytes();
95 |
96 | assertArrayEquals(fluff, result);
97 | }
98 |
99 | private final static class Collector implements DataHandler
100 | {
101 | private final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
102 |
103 | @Override
104 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException {
105 | bytes.write(buffer, offset, len);
106 | return true;
107 | }
108 | @Override
109 | public void allDataHandled() throws IOException { }
110 | public byte[] getBytes() { return bytes.toByteArray(); }
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/LZF.java:
--------------------------------------------------------------------------------
1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
2 | * file except in compliance with the License. You may obtain a copy of the License at
3 | *
4 | * http://www.apache.org/licenses/LICENSE-2.0
5 | *
6 | * Unless required by applicable law or agreed to in writing, software distributed under
7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
8 | * OF ANY KIND, either express or implied. See the License for the specific language
9 | * governing permissions and limitations under the License.
10 | */
11 |
12 | package com.ning.compress.lzf;
13 |
14 | import java.io.*;
15 |
16 | import com.ning.compress.lzf.util.LZFFileInputStream;
17 | import com.ning.compress.lzf.util.LZFFileOutputStream;
18 |
19 | /**
20 | * Simple command-line utility that can be used for testing LZF
21 | * compression, or as rudimentary command-line tool.
22 | * Arguments are the same as used by the "standard" lzf command line tool
23 | *
24 | * @author Tatu Saloranta (tatu@ning.com)
25 | */
26 | public class LZF
27 | {
28 | public final static String SUFFIX = ".lzf";
29 |
30 | protected void process(String[] args) throws IOException
31 | {
32 | if (args.length == 2) {
33 | String oper = args[0];
34 | boolean compress = "-c".equals(oper);
35 | boolean toSystemOutput = !compress && "-o".equals(oper);
36 | if (compress || toSystemOutput || "-d".equals(oper)) {
37 | String filename = args[1];
38 | File src = new File(filename);
39 | if (!src.exists()) {
40 | System.err.println("File '"+filename+"' does not exist.");
41 | System.exit(1);
42 | }
43 | if (!compress && !filename.endsWith(SUFFIX)) {
44 | System.err.println("File '"+filename+"' does end with expected suffix ('"+SUFFIX+"', won't decompress.");
45 | System.exit(1);
46 | }
47 |
48 | if (compress) {
49 | int inputLength = 0;
50 | File resultFile = new File(filename+SUFFIX);
51 | InputStream in = new FileInputStream(src);
52 | OutputStream out = new LZFFileOutputStream(resultFile);
53 | byte[] buffer = new byte[8192];
54 | int bytesRead;
55 | while ((bytesRead = in.read(buffer, 0, buffer.length)) != -1) {
56 | inputLength += bytesRead;
57 | out.write(buffer, 0, bytesRead);
58 | }
59 | in.close();
60 | out.flush();
61 | out.close();
62 | System.out.printf("Compressed '%s' into '%s' (%d->%d bytes)\n",
63 | src.getPath(), resultFile.getPath(),
64 | inputLength, resultFile.length());
65 | } else {
66 | OutputStream out;
67 | LZFFileInputStream in = new LZFFileInputStream(src);
68 | File resultFile = null;
69 | if (toSystemOutput) {
70 | out = System.out;
71 | } else {
72 | resultFile = new File(filename.substring(0, filename.length() - SUFFIX.length()));
73 | out = new FileOutputStream(resultFile);
74 | }
75 | int uncompLen = in.readAndWrite(out);
76 | in.close();
77 | out.flush();
78 | out.close();
79 | if (resultFile != null) {
80 | System.out.printf("Uncompressed '%s' into '%s' (%d->%d bytes)\n",
81 | src.getPath(), resultFile.getPath(),
82 | src.length(), uncompLen);
83 | }
84 | }
85 | return;
86 | }
87 | }
88 | System.err.println("Usage: java "+getClass().getName()+" -c/-d/-o source-file");
89 | System.err.println(" -d parameter: decompress to file");
90 | System.err.println(" -c parameter: compress to file");
91 | System.err.println(" -o parameter: decompress to stdout");
92 | System.exit(1);
93 | }
94 |
95 | public static void main(String[] args) throws IOException {
96 | new LZF().process(args);
97 | }
98 | }
99 |
100 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/gzip/TestGzipUncompressor.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.gzip;
2 |
3 | import java.io.*;
4 | import java.util.Random;
5 |
6 | import com.ning.compress.BaseForTests;
7 | import com.ning.compress.DataHandler;
8 | import com.ning.compress.UncompressorOutputStream;
9 | import org.junit.jupiter.api.Test;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals;
12 |
13 | public class TestGzipUncompressor extends BaseForTests
14 | {
15 | @Test
16 | public void testSimpleSmall1by1() throws IOException
17 | {
18 | byte[] fluff = constructFluff(4000);
19 | byte[] comp = gzipAll(fluff);
20 |
21 | Collector co = new Collector();
22 | GZIPUncompressor uncomp = new GZIPUncompressor(co);
23 | for (int i = 0, end = comp.length; i < end; ++i) {
24 | uncomp.feedCompressedData(comp, i, 1);
25 | }
26 | uncomp.complete();
27 | byte[] result = co.getBytes();
28 |
29 | assertArrayEquals(fluff, result);
30 | }
31 |
32 | @Test
33 | public void testSimpleSmallAsChunk() throws IOException
34 | {
35 | byte[] fluff = constructFluff(4000);
36 | byte[] comp = gzipAll(fluff);
37 |
38 | // and then uncompress, first byte by bytes
39 | Collector co = new Collector();
40 | GZIPUncompressor uncomp = new GZIPUncompressor(co);
41 | uncomp.feedCompressedData(comp, 0, comp.length);
42 | uncomp.complete();
43 | byte[] result = co.getBytes();
44 | assertArrayEquals(fluff, result);
45 | }
46 |
47 | @Test
48 | public void testSimpleBiggerVarLength() throws IOException
49 | {
50 | byte[] fluff = constructFluff(190000);
51 | byte[] comp = gzipAll(fluff);
52 |
53 | // and then uncompress with arbitrary-sized blocks...
54 | Random rnd = new Random(123);
55 | Collector co = new Collector();
56 | GZIPUncompressor uncomp = new GZIPUncompressor(co);
57 | for (int i = 0, end = comp.length; i < end; ) {
58 | int size = Math.min(end-i, 1+rnd.nextInt(7));
59 | uncomp.feedCompressedData(comp, i, size);
60 | i += size;
61 | }
62 | uncomp.complete();
63 | byte[] result = co.getBytes();
64 |
65 | assertArrayEquals(fluff, result);
66 | }
67 |
68 | @Test
69 | public void testSimpleBiggerOneChunk() throws IOException
70 | {
71 | byte[] fluff = constructFluff(275000);
72 | byte[] comp = gzipAll(fluff);
73 |
74 | // and then uncompress in one chunk
75 | Collector co = new Collector();
76 | GZIPUncompressor uncomp = new GZIPUncompressor(co);
77 | uncomp.feedCompressedData(comp, 0, comp.length);
78 | uncomp.complete();
79 | byte[] result = co.getBytes();
80 |
81 | assertArrayEquals(fluff, result);
82 | }
83 |
84 | @Test
85 | public void testSimpleBiggerAsStream() throws IOException
86 | {
87 | byte[] fluff = constructFluff(277000);
88 | byte[] comp = gzipAll(fluff);
89 | Collector co = new Collector();
90 | UncompressorOutputStream out = new UncompressorOutputStream(new GZIPUncompressor(co));
91 | out.write(comp, 0, comp.length);
92 | out.close();
93 | byte[] result = co.getBytes();
94 |
95 | assertArrayEquals(fluff, result);
96 | }
97 |
98 | /*
99 | ///////////////////////////////////////////////////////////////////////
100 | // Helper methods
101 | ///////////////////////////////////////////////////////////////////////
102 | */
103 |
104 | private byte[] gzipAll(byte[] input) throws IOException
105 | {
106 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(16 + input.length>>2);
107 | OptimizedGZIPOutputStream gz = new OptimizedGZIPOutputStream(bytes);
108 | gz.write(input);
109 | gz.close();
110 | return bytes.toByteArray();
111 | }
112 |
113 | private final static class Collector implements DataHandler
114 | {
115 | private final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
116 |
117 | @Override
118 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException {
119 | bytes.write(buffer, offset, len);
120 | return true;
121 | }
122 | @Override
123 | public void allDataHandled() throws IOException { }
124 | public byte[] getBytes() { return bytes.toByteArray(); }
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/TestLZFOutputStream.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.ByteArrayOutputStream;
5 | import java.io.OutputStream;
6 | import java.nio.charset.StandardCharsets;
7 |
8 | import org.junit.jupiter.api.BeforeEach;
9 |
10 | import com.ning.compress.BaseForTests;
11 | import org.junit.jupiter.api.Test;
12 |
13 | import static org.junit.jupiter.api.Assertions.*;
14 |
15 | public class TestLZFOutputStream extends BaseForTests
16 | {
17 | private static final int BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN * 64;
18 | private byte[] nonEncodableBytesToWrite;
19 | private byte[] bytesToWrite;
20 |
21 | @BeforeEach
22 | public void setUp() {
23 | nonEncodableBytesToWrite = constructUncompressable(BUFFER_SIZE);
24 | String phrase = "all work and no play make Jack a dull boy";
25 | bytesToWrite = new byte[BUFFER_SIZE];
26 | byte[] bytes = phrase.getBytes(StandardCharsets.UTF_8);
27 | int cursor = 0;
28 | while(cursor <= bytesToWrite.length) {
29 | System.arraycopy(bytes, 0, bytesToWrite, cursor, (bytes.length+cursor < bytesToWrite.length)?bytes.length:bytesToWrite.length-cursor);
30 | cursor += bytes.length;
31 | }
32 | }
33 |
34 | @Test
35 | public void testUnencodable() throws Exception
36 | {
37 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
38 | OutputStream os = new LZFOutputStream(bos);
39 | os.write(nonEncodableBytesToWrite);
40 | os.close();
41 | assertTrue(bos.toByteArray().length > nonEncodableBytesToWrite.length);
42 | verifyOutputStream(bos, nonEncodableBytesToWrite);
43 | }
44 |
45 | @Test
46 | public void testStreaming() throws Exception
47 | {
48 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
49 | OutputStream os = new LZFOutputStream(bos);
50 | os.write(bytesToWrite);
51 | os.close();
52 | int len = bos.toByteArray().length;
53 | int max = bytesToWrite.length/2;
54 | if (len <= 10 || len >= max) {
55 | fail("Sanity check: should have 10 < len < "+max+"; len = "+len);
56 | }
57 | verifyOutputStream(bos, bytesToWrite);
58 | }
59 |
60 | @Test
61 | public void testSingleByte() throws Exception
62 | {
63 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
64 | OutputStream os = new LZFOutputStream(bos);
65 | int idx = 0;
66 | for(; idx < BUFFER_SIZE; idx++) {
67 | os.write(bytesToWrite[idx]);
68 | if(idx % 1023 == 0 && idx > BUFFER_SIZE/2) {
69 | os.flush();
70 | }
71 | }
72 | os.close();
73 | int len = bos.toByteArray().length;
74 | int max = bytesToWrite.length/2;
75 | if (len <= 10 || len >= max) {
76 | fail("Sanity check: should have 10 < len < "+max+"; len = "+len);
77 | }
78 | verifyOutputStream(bos, bytesToWrite);
79 | }
80 |
81 | @Test
82 | public void testPartialBuffer() throws Exception
83 | {
84 | int offset = 255;
85 | int len = 1<<17;
86 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
87 | OutputStream os = new LZFOutputStream(bos);
88 | os.write(bytesToWrite, offset, len);
89 | os.close();
90 | assertTrue(bos.toByteArray().length > 10);
91 | assertTrue(bos.toByteArray().length < bytesToWrite.length*.5);
92 | int bytesToCopy = Math.min(len, bytesToWrite.length);
93 | byte[] compareBytes = new byte[bytesToCopy];
94 | System.arraycopy(bytesToWrite, offset, compareBytes, 0, bytesToCopy);
95 | verifyOutputStream(bos, compareBytes);
96 | }
97 |
98 | @Test
99 | public void testEmptyBuffer() throws Exception
100 | {
101 | byte[] input = new byte[0];
102 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
103 | OutputStream os = new LZFOutputStream(bos);
104 | os.write(input);
105 | os.close();
106 | int len = bos.toByteArray().length;
107 | if (len != 0) {
108 | fail("Sanity check: should have len == 0; len = "+len);
109 | }
110 | verifyOutputStream(bos, input);
111 | }
112 |
113 | private void verifyOutputStream(ByteArrayOutputStream bos, byte[] reference) throws Exception
114 | {
115 | ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
116 | LZFInputStream lzfi = new LZFInputStream(bis);
117 | int val;
118 | int idx = 0;
119 | while((val = lzfi.read()) != -1) {
120 | int refVal = ((int)reference[idx++]) & 255;
121 | assertEquals(refVal, val);
122 | }
123 | lzfi.close();
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/VERSION.txt:
--------------------------------------------------------------------------------
1 | 1.2.0 (not yet released)
2 |
3 | #54: Validate arguments for `Unsafe` codecs
4 | (contributed by @Marcono1234)
5 | #60: Convert tests to JUnit 5 & refactor tests
6 | (contributed by @Marcono1234)
7 | #61: Prevent user code from subclassing `UnsafeChunkEncoder`
8 | (contributed by @Marcono1234)
9 | #64: Fix differences between big- and little-endian encoder
10 | (contributed by @Marcono1234)
11 | #68: Improve bounds checks
12 | (contributed by @Marcono1234)
13 | - Updated `oss-parent` dep to latest (v72)
14 |
15 | 1.1.3 (26-Sep-2025)
16 |
17 | - Updated plug-in dependencies
18 | - Published via Sonatype Central repository
19 |
20 | 1.1.2 (29-Jan-2023)
21 |
22 | #53: Treat `sun.misc` as an optional OSGi dependency
23 | (contributed by @Mailaender)
24 |
25 | 1.1.1 (25-Jan-2023)
26 |
27 | #50: `Unsafe` needs support in `module-info.java`
28 | (contributed by @robertvazan)
29 |
30 | 1.1 (07-Jan-2021)
31 |
32 | #48: Fix issues outlined by "lgtm.com"'s static analysis
33 | #49: Add Java 9+ module info using Moditect
34 | - Now requires Java 8
35 |
36 | 1.0.4 (12-Mar-2017)
37 |
38 | #43: estimateMaxWorkspaceSize() is too small
39 | (reported by Roman L, leventow@github)
40 |
41 | 1.0.3 (15-Aug-2014)
42 |
43 | #37: Incorrect de-serialization on Big Endian systems, due to incorrect usage of #numberOfTrailingZeroes
44 | (pointed out by Gireesh P, gireeshpunathil@github)
45 |
46 | 1.0.2 (09-Aug-2014)
47 |
48 | #38: Overload of factory methods and constructors in Encoders and Streams
49 | to allow specifying custom `BufferRecycler` instance
50 | (contributed by `serverperformance@github`)
51 | #39: VanillaChunkEncoder.tryCompress() not using 'inPos' as it should, potentially
52 | causing corruption in rare cases
53 | (contributed by Ryan E, rjerns@github)
54 |
55 | 1.0.1 (08-Apr-2014)
56 |
57 | #35: Fix a problem with closing of `DeflaterOutputStream` (for gzip output)
58 | that could cause corrupt state for reusable `Deflater`
59 | (contribyted by thmd@github)
60 |
61 | 1.0.0 (02-Dec-2013)
62 |
63 | #34: Add `ChunkEncoder.appendEncodedIfCompresses()` for conditional compression;
64 | useful for building efficient "compress but only if it makes enough difference"
65 | processing systems
66 |
67 | 0.9.9 (25-Sep-2013)
68 |
69 | #14: Added parallel LZF compression, contributed by Cedrik
70 | (javabean@github)
71 | #25: Allow early termination of push-style `Uncompressor` operation
72 | #32: Fix for a rare NPE
73 | (suggested by francoisforster@github)
74 |
75 | 0.9.8 (09-Mar-2013)
76 |
77 | #24: Problems uncompressing certain types of binary documents
78 | - Minor perf improvement for 'appendEncoded', was not reusing buffers
79 |
80 | 0.9.7 (06-Mar-2013)
81 |
82 | #23: Add UnsafeChunkEncoder that uses 'sun.misc.Unsafe' for additional Oomph.
83 | * Add LZFEncoder.estimateMaxWorkspaceSize() to help allocate work buffers.
84 | #22: Add method(s) to allow encoding into caller-provided (pre-allocated) buffer.
85 |
86 | 0.9.6 (05-Sep-2012)
87 |
88 | #17: Add IOException subtypes 'LZFException' and 'GZIPException' (with
89 | common supertype of 'CompressionFormatException) to allow for better
90 | catching of decompression errors
91 | #19: (more) Efficient skipping with LZFInputStream, LZFFileInputStream;
92 | can skip full chunks without decoding -- much faster (as per simple tests)
93 |
94 | 0.9.5 (25-May-2012)
95 |
96 | * Add 'LZFCompressingInputStream' to allow streaming compression
97 | "in reverse" (compared to LZFOutputStream)
98 | * Add GZIP support functionality:
99 | * 'OptimizedGZIPInputStream', 'OptimizedGZIPOutputStream' which add buffer
100 | (and Inflater/Deflater) recycling for improved performance compared to
101 | default JDK implementations (uses same native ZLIB library for actual
102 | decompression)
103 | * Add "push-mode" handler, 'Uncompressor' to be used for un-/decompression
104 | with non-blocking push-style data sources (like async-http-client)
105 | * Implementations for LZF (LZFUncompressor) and GZIP (GZIPUncompressor)
106 | * 'UncompressorOutputStream' convenience wrapper to expose 'Uncompressor'
107 | as 'OutputStream'
108 |
109 | 0.9.3
110 |
111 | * Fixed Issue #12: Command-line tool out of memory
112 | (reported by nodarret@github)
113 | * Implemented Issue #16: Add LZFInputStream.readAndWrite(...) method for copying
114 | uncompressed data, avoiding an intermediate copy.
115 | * Fix for Issue #15: LZFDecoder not passing 'offset', 'length' params
116 | (reported by T.Effland)
117 | * Fix for Issue #13: problems with Unsafe decoder on some platforms
118 |
119 | 0.9.0 (and prior)
120 |
121 | * Rewrote decoder to allow ChunkDecoder variants, to allow optional use of
122 | sun.misc.Unsafe (which can boost uncompression speed by up to +50%)
123 | * #11: Input/OutputStreams not throwing IOException if reading/writing
124 | after close() called, should be.
125 | (reported by Dain S)
126 | * Fix an NPE in BufferRecycler
127 | (reported by Matt Abrams, abramsm@gmail.com)
128 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/BufferRecycler.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress;
2 |
3 | import java.lang.ref.SoftReference;
4 |
5 | /**
6 | * Simple helper class to encapsulate details of basic buffer
7 | * recycling scheme, which helps a lot (as per profiling) for
8 | * smaller encoding cases.
9 | *
10 | * @author Tatu Saloranta (tatu.saloranta@iki.fi)
11 | */
12 | public final class BufferRecycler
13 | {
14 | private final static int MIN_ENCODING_BUFFER = 4000;
15 |
16 | private final static int MIN_OUTPUT_BUFFER = 8000;
17 |
18 | /**
19 | * This
41 | *Uses a ThreadLocal soft-referenced BufferRecycler instance.
42 | *
43 | * @param totalLength Expected total length of content to compress; only matters
44 | * for content that is smaller than maximum chunk size (64k), to optimize
45 | * encoding hash tables
46 | *
47 | * @return ChunkEncoder constructed
48 | */
49 | public static ChunkEncoder optimalInstance(int totalLength) {
50 | try {
51 | return UnsafeChunkEncoders.createEncoder(totalLength);
52 | } catch (Exception e) {
53 | return safeInstance(totalLength);
54 | }
55 | }
56 |
57 | /**
58 | * Factory method for constructing encoder that is always passed buffer
59 | * externally, so that it will not (nor need) allocate encoding buffer.
60 | *
61 | * Uses a ThreadLocal soft-referenced BufferRecycler instance.
62 | *
63 | * @return ChunkEncoder constructed
64 | */
65 | public static ChunkEncoder optimalNonAllocatingInstance(int totalLength) {
66 | try {
67 | return UnsafeChunkEncoders.createNonAllocatingEncoder(totalLength);
68 | } catch (Exception e) {
69 | return safeNonAllocatingInstance(totalLength);
70 | }
71 | }
72 |
73 | /**
74 | * Convenience method, equivalent to:
75 | *
89 | * Uses a ThreadLocal soft-referenced BufferRecycler instance.
90 | *
91 | * @param totalLength Expected total length of content to compress; only matters
92 | * for content that is smaller than maximum chunk size (64k), to optimize
93 | * encoding hash tables
94 | *
95 | * @return ChunkEncoder constructed
96 | */
97 | public static ChunkEncoder safeInstance(int totalLength) {
98 | return new VanillaChunkEncoder(totalLength);
99 | }
100 |
101 | /**
102 | * Factory method for constructing encoder that is always passed buffer
103 | * externally, so that it will not (nor need) allocate encoding buffer.
104 | * Uses a ThreadLocal soft-referenced BufferRecycler instance.
105 | *
106 | * @return ChunkEncoder constructed
107 | */
108 | public static ChunkEncoder safeNonAllocatingInstance(int totalLength) {
109 | return VanillaChunkEncoder.nonAllocatingEncoder(totalLength);
110 | }
111 |
112 | /**
113 | * Convenience method, equivalent to:
114 | *
43 | * Default value is 'true'
44 | */
45 | protected boolean _cfgFinishBlockOnFlush = true;
46 |
47 | /**
48 | * Flag that indicates if we have already called '_outputStream.close()'
49 | * (to avoid calling it multiple times)
50 | */
51 | protected boolean _outputStreamClosed;
52 |
53 | /*
54 | ///////////////////////////////////////////////////////////////////////
55 | // Construction, configuration
56 | ///////////////////////////////////////////////////////////////////////
57 | */
58 |
59 | public LZFOutputStream(final OutputStream outputStream)
60 | {
61 | this(ChunkEncoderFactory.optimalInstance(DEFAULT_OUTPUT_BUFFER_SIZE), outputStream);
62 | }
63 |
64 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream)
65 | {
66 | this(encoder, outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, encoder._recycler);
67 | }
68 |
69 | public LZFOutputStream(final OutputStream outputStream, final BufferRecycler bufferRecycler)
70 | {
71 | this(ChunkEncoderFactory.optimalInstance(bufferRecycler), outputStream, bufferRecycler);
72 | }
73 |
74 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream, final BufferRecycler bufferRecycler)
75 | {
76 | this(encoder, outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, bufferRecycler);
77 | }
78 |
79 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream,
80 | final int bufferSize, BufferRecycler bufferRecycler)
81 | {
82 | super(outputStream);
83 | _encoder = encoder;
84 | if (bufferRecycler==null) {
85 | bufferRecycler = _encoder._recycler;
86 | }
87 | _recycler = bufferRecycler;
88 | _outputBuffer = bufferRecycler.allocOutputBuffer(bufferSize);
89 | _outputStreamClosed = false;
90 | }
91 |
92 | /**
93 | * Method for defining whether call to {@link #flush} will also complete
94 | * current block (similar to calling {@link #finishBlock()}) or not.
95 | */
96 | public LZFOutputStream setFinishBlockOnFlush(boolean b) {
97 | _cfgFinishBlockOnFlush = b;
98 | return this;
99 | }
100 |
101 | /*
102 | ///////////////////////////////////////////////////////////////////////
103 | // OutputStream impl
104 | ///////////////////////////////////////////////////////////////////////
105 | */
106 |
107 | @Override
108 | public void write(final int singleByte) throws IOException
109 | {
110 | checkNotClosed();
111 | if (_position >= _outputBuffer.length) {
112 | writeCompressedBlock();
113 | }
114 | _outputBuffer[_position++] = (byte) singleByte;
115 | }
116 |
117 | @Override
118 | public void write(final byte[] buffer, int offset, int length) throws IOException
119 | {
120 | checkNotClosed();
121 |
122 | final int BUFFER_LEN = _outputBuffer.length;
123 |
124 | // simple case first: empty _outputBuffer and "big" input buffer: write first full blocks, if any, without copying
125 | while (_position == 0 && length >= BUFFER_LEN) {
126 | _encoder.encodeAndWriteChunk(buffer, offset, BUFFER_LEN, out);
127 | offset += BUFFER_LEN;
128 | length -= BUFFER_LEN;
129 | }
130 |
131 | // simple case first: buffering only (for trivially short writes)
132 | int free = BUFFER_LEN - _position;
133 | if (free > length) {
134 | System.arraycopy(buffer, offset, _outputBuffer, _position, length);
135 | _position += length;
136 | return;
137 | }
138 | // otherwise, copy whatever we can, flush
139 | System.arraycopy(buffer, offset, _outputBuffer, _position, free);
140 | offset += free;
141 | length -= free;
142 | _position += free;
143 | writeCompressedBlock();
144 |
145 | // then write intermediate full blocks, if any, without copying:
146 | while (length >= BUFFER_LEN) {
147 | _encoder.encodeAndWriteChunk(buffer, offset, BUFFER_LEN, out);
148 | offset += BUFFER_LEN;
149 | length -= BUFFER_LEN;
150 | }
151 |
152 | // and finally, copy leftovers in buffer, if any
153 | if (length > 0) {
154 | System.arraycopy(buffer, offset, _outputBuffer, 0, length);
155 | }
156 | _position = length;
157 | }
158 |
159 | public void write(final InputStream in) throws IOException {
160 | writeCompressedBlock(); // will flush _outputBuffer
161 | int read;
162 | while ((read = in.read(_outputBuffer)) >= 0) {
163 | _position = read;
164 | writeCompressedBlock();
165 | }
166 | }
167 |
168 | public void write(final FileChannel in) throws IOException {
169 | MappedByteBuffer src = in.map(MapMode.READ_ONLY, 0, in.size());
170 | write(src);
171 | }
172 |
173 | @Override
174 | public synchronized int write(final ByteBuffer src) throws IOException {
175 | int r = src.remaining();
176 | if (r <= 0) {
177 | return r;
178 | }
179 | writeCompressedBlock(); // will flush _outputBuffer
180 | if (src.hasArray()) {
181 | // direct compression from backing array
182 | write(src.array(), src.arrayOffset(), src.limit() - src.arrayOffset());
183 | } else {
184 | // need to copy to heap array first
185 | while (src.hasRemaining()) {
186 | int toRead = Math.min(src.remaining(), _outputBuffer.length);
187 | src.get(_outputBuffer, 0, toRead);
188 | _position = toRead;
189 | writeCompressedBlock();
190 | }
191 | }
192 | return r;
193 | }
194 |
195 | @Override
196 | public void flush() throws IOException
197 | {
198 | checkNotClosed();
199 | if (_cfgFinishBlockOnFlush && _position > 0) {
200 | writeCompressedBlock();
201 | }
202 | super.flush();
203 | }
204 |
205 | @Override
206 | public boolean isOpen() {
207 | return ! _outputStreamClosed;
208 | }
209 |
210 | @Override
211 | public void close() throws IOException
212 | {
213 | if (!_outputStreamClosed) {
214 | if (_position > 0) {
215 | writeCompressedBlock();
216 | }
217 | super.close(); // will flush beforehand
218 | _encoder.close();
219 | _outputStreamClosed = true;
220 | byte[] buf = _outputBuffer;
221 | if (buf != null) {
222 | _outputBuffer = null;
223 | _recycler.releaseOutputBuffer(buf);
224 | }
225 | }
226 | }
227 |
228 | /*
229 | ///////////////////////////////////////////////////////////////////////
230 | // Additional public methods
231 | ///////////////////////////////////////////////////////////////////////
232 | */
233 |
234 | /**
235 | * Method that can be used to find underlying {@link OutputStream} that
236 | * we write encoded LZF encoded data into, after compressing it.
237 | * Will never return null; although underlying stream may be closed
238 | * (if this stream has been closed).
239 | */
240 | public OutputStream getUnderlyingOutputStream() {
241 | return out;
242 | }
243 |
244 | /**
245 | * Accessor for checking whether call to "flush()" will first finish the
246 | * current block or not.
247 | */
248 | public boolean getFinishBlockOnFlush() {
249 | return _cfgFinishBlockOnFlush;
250 | }
251 |
252 | /**
253 | * Method that can be used to force completion of the current block,
254 | * which means that all buffered data will be compressed into an
255 | * LZF block. This typically results in lower compression ratio
256 | * as larger blocks compress better; but may be necessary for
257 | * network connections to ensure timely sending of data.
258 | */
259 | public LZFOutputStream finishBlock() throws IOException
260 | {
261 | checkNotClosed();
262 | if (_position > 0) {
263 | writeCompressedBlock();
264 | }
265 | return this;
266 | }
267 |
268 | /*
269 | ///////////////////////////////////////////////////////////////////////
270 | // Internal methods
271 | ///////////////////////////////////////////////////////////////////////
272 | */
273 |
274 | /**
275 | * Compress and write the current block to the OutputStream
276 | */
277 | protected void writeCompressedBlock() throws IOException
278 | {
279 | int left = _position;
280 | _position = 0;
281 | int offset = 0;
282 |
283 | while (left > 0) {
284 | int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left);
285 | _encoder.encodeAndWriteChunk(_outputBuffer, offset, chunkLen, out);
286 | offset += chunkLen;
287 | left -= chunkLen;
288 | }
289 | }
290 |
291 | protected void checkNotClosed() throws IOException
292 | {
293 | if (_outputStreamClosed) {
294 | throw new IOException(getClass().getName()+" already closed");
295 | }
296 | }
297 | }
298 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/TestFuzzUnsafeLZF.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import com.code_intelligence.jazzer.junit.FuzzTest;
4 | import com.code_intelligence.jazzer.mutation.annotation.InRange;
5 | import com.code_intelligence.jazzer.mutation.annotation.NotNull;
6 | import com.code_intelligence.jazzer.mutation.annotation.WithLength;
7 | import com.ning.compress.BufferRecycler;
8 | import com.ning.compress.lzf.impl.*;
9 |
10 | import java.io.ByteArrayInputStream;
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 | import java.lang.annotation.Retention;
14 | import java.lang.annotation.RetentionPolicy;
15 | import java.util.Arrays;
16 | import java.util.stream.Stream;
17 |
18 | import static org.junit.jupiter.api.Assertions.assertArrayEquals;
19 | import static org.junit.jupiter.api.Assertions.assertEquals;
20 |
21 | /**
22 | * Fuzzing test using Jazzer (https://github.com/CodeIntelligenceTesting/jazzer/) for
23 | * LZF decoder and encoder which uses {@link sun.misc.Unsafe}.
24 | *
25 | * By default the tests are run in 'regression mode' where no fuzzing is performed.
26 | * To run in 'fuzzing mode' set the environment variable {@code JAZZER_FUZZ=1}, see
27 | * also the {@code pom.xml} of this project.
28 | *
29 | * See the Jazzer README for more information.
30 | */
31 | public class TestFuzzUnsafeLZF {
32 | /*
33 | * Important:
34 | * These fuzz test methods all have to be listed separately in the `pom.xml` to
35 | * support running them in fuzzing mode, see https://github.com/CodeIntelligenceTesting/jazzer/issues/599
36 | */
37 |
38 | @FuzzTest(maxDuration = "30s")
39 | @Retention(RetentionPolicy.RUNTIME)
40 | @interface LZFFuzzTest {
41 | }
42 |
43 | // This fuzz test performs decoding twice and verifies that the result is the same (either same decoded value or both exception)
44 | @LZFFuzzTest
45 | void decode(byte @NotNull @WithLength(min = 0, max = 32767) [] input, byte @NotNull [] suffix, @InRange(min = 0, max = 32767) int outputSize) {
46 | byte[] output = new byte[outputSize];
47 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder();
48 |
49 | byte[] input1 = input.clone();
50 |
51 | // For the second decoding, append a suffix which should be ignored
52 | byte[] input2 = new byte[input.length + suffix.length];
53 | System.arraycopy(input, 0, input2, 0, input.length);
54 | // Append suffix
55 | System.arraycopy(suffix, 0, input2, input.length, suffix.length);
56 |
57 | byte[] decoded1 = null;
58 | try {
59 | int decodedLen = decoder.decode(input1, 0, input.length, output);
60 | decoded1 = Arrays.copyOf(output, decodedLen);
61 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) {
62 | }
63 |
64 | // Repeat decoding, this time with (ignored) suffix and prefilled output
65 | // Should lead to same decoded result
66 | Arrays.fill(output, (byte) 0xFF);
67 | byte[] decoded2 = null;
68 | try {
69 | int decodedLen = decoder.decode(input2, 0, input.length, output);
70 | decoded2 = Arrays.copyOf(output, decodedLen);
71 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) {
72 | }
73 |
74 | assertArrayEquals(decoded1, decoded2);
75 |
76 | // Compare with result of vanilla decoder
77 | byte[] decodedVanilla = null;
78 | try {
79 | int decodedLen = new VanillaChunkDecoder().decode(input, output);
80 | decodedVanilla = Arrays.copyOf(output, decodedLen);
81 | } catch (Exception ignored) {
82 | }
83 | assertArrayEquals(decodedVanilla, decoded1);
84 |
85 | }
86 |
87 | @LZFFuzzTest
88 | // `boolean dummy` parameter is as workaround for https://github.com/CodeIntelligenceTesting/jazzer/issues/1022
89 | void roundtrip(byte @NotNull @WithLength(min = 1, max = 32767) [] input, boolean dummy) throws LZFException {
90 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder();
91 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) {
92 | byte[] decoded = decoder.decode(LZFEncoder.encode(encoder, input.clone(), input.length));
93 | assertArrayEquals(input, decoded);
94 | }
95 | }
96 |
97 |
98 | // Note: These encoder fuzz tests only cover the encoder implementation matching the platform endianness;
99 | // don't cover the other endianness here because that could lead to failures simply due to endianness
100 | // mismatch, and not due to an actual bug in the implementation
101 |
102 | @LZFFuzzTest
103 | void encode(byte @NotNull @WithLength(min = 1, max = 32767) [] input, byte @NotNull [] suffix) {
104 | byte[] input1 = input.clone();
105 |
106 | // For the second encoding, append a suffix which should be ignored
107 | byte[] input2 = new byte[input.length + suffix.length];
108 | System.arraycopy(input, 0, input2, 0, input.length);
109 | // Append suffix
110 | System.arraycopy(suffix, 0, input2, input.length, suffix.length);
111 |
112 | byte[] encoded1;
113 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) {
114 | encoded1 = LZFEncoder.encode(encoder, input1, input.length);
115 | }
116 |
117 | byte[] encoded2;
118 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) {
119 | encoded2 = LZFEncoder.encode(encoder, input2, input.length);
120 | }
121 | assertArrayEquals(encoded1, encoded2);
122 |
123 | // Compare with result of vanilla encoder
124 | byte[] encodedVanilla;
125 | try (VanillaChunkEncoder encoder = new VanillaChunkEncoder(input.length, new BufferRecycler())) {
126 | encodedVanilla = LZFEncoder.encode(encoder, input, input.length);
127 | }
128 | assertArrayEquals(encodedVanilla, encoded1);
129 | }
130 |
131 | @LZFFuzzTest
132 | void encodeAppend(byte @NotNull @WithLength(min = 1, max = 32767) [] input, @InRange(min = 0, max = 32767) int outputSize) {
133 | byte[] output = new byte[outputSize];
134 | // Prefill output; should have no effect on encoded result
135 | Arrays.fill(output, (byte) 0xFF);
136 | int encodedLen;
137 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) {
138 | encodedLen = LZFEncoder.appendEncoded(encoder, input.clone(), 0, input.length, output, 0);
139 | } catch (ArrayIndexOutOfBoundsException | IllegalArgumentException ignored) {
140 | // Skip comparison with vanilla encoder
141 | return;
142 | }
143 |
144 | byte[] encodedUnsafe = Arrays.copyOf(output, encodedLen);
145 |
146 | // Compare with result of vanilla encoder
147 | Arrays.fill(output, (byte) 0);
148 | try (VanillaChunkEncoder encoder = new VanillaChunkEncoder(input.length, new BufferRecycler())) {
149 | encodedLen = LZFEncoder.appendEncoded(encoder, input, 0, input.length, output, 0);
150 | }
151 | // TODO: VanillaChunkEncoder performs out-of-bounds array index whereas UnsafeChunkEncoder does not (not sure which one is correct)
152 | // Why do they even have different `_handleTail` implementations, UnsafeChunkEncoder is not using Unsafe there?
153 | catch (ArrayIndexOutOfBoundsException ignored) {
154 | return;
155 | }
156 | byte[] encodedVanilla = Arrays.copyOf(output, encodedLen);
157 | assertArrayEquals(encodedVanilla, encodedUnsafe);
158 | }
159 |
160 | /// Note: Also cover LZFInputStream and LZFOutputStream because they in parts use methods of the decoder and encoder
161 | /// which are otherwise not reachable
162 |
163 | @LZFFuzzTest
164 | void inputStreamRead(byte @NotNull @WithLength(min = 0, max = 32767) [] input, @InRange(min = 1, max = 32767) int readBufferSize) throws IOException {
165 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder();
166 | try (LZFInputStream inputStream = new LZFInputStream(decoder, new ByteArrayInputStream(input), new BufferRecycler(), false)) {
167 | byte[] readBuffer = new byte[readBufferSize];
168 | while (inputStream.read(readBuffer) != -1) {
169 | // Do nothing, just consume the data
170 | }
171 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) {
172 | }
173 | // TODO: This IndexOutOfBoundsException occurs because LZFInputStream makes an invalid call to ByteArrayInputStream
174 | // The reason seems to be that `_inputBuffer` is only MAX_CHUNK_LEN large, but should be `2 + MAX_CHUNK_LEN` to
175 | // account for first two bytes encoding the length? (might affect more places in code)
176 | catch (IndexOutOfBoundsException ignored) {
177 | }
178 | }
179 |
180 | @LZFFuzzTest
181 | void inputStreamSkip(byte @NotNull @WithLength(min = 0, max = 32767) [] input, @InRange(min = 1, max = 32767) int skipCount) throws IOException {
182 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder();
183 | try (LZFInputStream inputStream = new LZFInputStream(decoder, new ByteArrayInputStream(input), new BufferRecycler(), false)) {
184 | while (inputStream.skip(skipCount) > 0) {
185 | // Do nothing, just consume the data
186 | }
187 | } catch (LZFException ignored) {
188 | }
189 | // TODO: This IndexOutOfBoundsException occurs because LZFInputStream makes an invalid call to ByteArrayInputStream
190 | // The reason seems to be that `_inputBuffer` is only MAX_CHUNK_LEN large, but should be `2 + MAX_CHUNK_LEN` to
191 | // account for first two bytes encoding the length? (might affect more places in code)
192 | catch (IndexOutOfBoundsException ignored) {
193 | }
194 | }
195 |
196 | private static class NullOutputStream extends OutputStream {
197 | public static final OutputStream INSTANCE = new NullOutputStream();
198 |
199 | private NullOutputStream() {
200 | }
201 |
202 | @Override
203 | public void write(int b) {
204 | // Do nothing
205 | }
206 |
207 | @Override
208 | public void write(byte[] b, int off, int len) {
209 | // Do nothing
210 | }
211 | }
212 |
213 | @LZFFuzzTest
214 | // Generates multiple arrays and writes them separately
215 | void outputStream(byte @NotNull @WithLength(min = 1, max = 10) [] @NotNull @WithLength(min = 1) [] arrays, @InRange(min = 1, max = 32767) int bufferSize) throws IOException {
216 | int totalLength = Stream.of(arrays).mapToInt(a -> a.length).sum();
217 |
218 | UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(totalLength, new BufferRecycler());
219 | try (LZFOutputStream outputStream = new LZFOutputStream(encoder, NullOutputStream.INSTANCE, bufferSize, null)) {
220 | for (byte[] array : arrays) {
221 | outputStream.write(array);
222 | }
223 | }
224 | }
225 | }
226 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/parallel/PLZFOutputStream.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.parallel;
2 |
3 | import java.io.FilterOutputStream;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 | import java.io.OutputStream;
7 | import java.lang.management.ManagementFactory;
8 | import java.lang.management.OperatingSystemMXBean;
9 | import java.nio.ByteBuffer;
10 | import java.nio.MappedByteBuffer;
11 | import java.nio.channels.FileChannel;
12 | import java.nio.channels.FileChannel.MapMode;
13 | import java.nio.channels.WritableByteChannel;
14 | import java.util.ArrayList;
15 | import java.util.Collection;
16 | import java.util.concurrent.ExecutorService;
17 | import java.util.concurrent.Executors;
18 | import java.util.concurrent.Future;
19 | import java.util.concurrent.LinkedBlockingQueue;
20 | import java.util.concurrent.ThreadPoolExecutor;
21 | import java.util.concurrent.TimeUnit;
22 |
23 | import com.ning.compress.lzf.LZFChunk;
24 |
25 | /**
26 | * Decorator {@link OutputStream} implementation that will compress
27 | * output using LZF compression algorithm, given uncompressed input
28 | * to write. Its counterpart is {@link com.ning.compress.lzf.LZFInputStream}; although
29 | * in some ways {@link com.ning.compress.lzf.LZFCompressingInputStream} can be seen
30 | * as the opposite.
31 | *
32 | * This class uses a parallel implementation to make use of all available cores,
33 | * modulo system load.
34 | *
35 | * @author Tatu Saloranta
36 | * @author Cédrik Lime
37 | *
38 | * @see com.ning.compress.lzf.LZFInputStream
39 | * @see com.ning.compress.lzf.LZFCompressingInputStream
40 | * @see com.ning.compress.lzf.LZFOutputStream
41 | */
42 | public class PLZFOutputStream extends FilterOutputStream implements WritableByteChannel
43 | {
44 | private static final int DEFAULT_OUTPUT_BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN;
45 |
46 | protected byte[] _outputBuffer;
47 | protected int _position = 0;
48 |
49 | /**
50 | * Flag that indicates if we have already called '_outputStream.close()'
51 | * (to avoid calling it multiple times)
52 | */
53 | protected boolean _outputStreamClosed;
54 |
55 | private BlockManager blockManager;
56 | private final ExecutorService compressExecutor;
57 | private final ExecutorService writeExecutor;
58 | volatile Exception writeException = null;
59 |
60 |
61 | /*
62 | ///////////////////////////////////////////////////////////////////////
63 | // Construction, configuration
64 | ///////////////////////////////////////////////////////////////////////
65 | */
66 |
67 | public PLZFOutputStream(final OutputStream outputStream) {
68 | this(outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, getNThreads());
69 | }
70 |
71 | protected PLZFOutputStream(final OutputStream outputStream, int nThreads) {
72 | this(outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, nThreads);
73 | }
74 |
75 | protected PLZFOutputStream(final OutputStream outputStream, final int bufferSize, int nThreads) {
76 | super(outputStream);
77 | _outputStreamClosed = false;
78 | compressExecutor = new ThreadPoolExecutor(nThreads, nThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueueThreadLocal contains a {@link java.lang.ref.SoftReference}
20 | * to a {@link BufferRecycler} used to provide a low-cost
21 | * buffer recycling for buffers we need for encoding, decoding.
22 | */
23 | final protected static ThreadLocalsun.misc.Unsafe
35 | * to speed up decompression
36 | */
37 | protected final static AtomicReferencesun.misc.Unsafe
41 | * for decompression, just standard JDK functionality.
42 | */
43 | protected final static AtomicReferencesun.misc.Unsafe for memory access.
55 | */
56 | public static ChunkDecoder fastDecoder() {
57 | // race conditions are ok here, we don't really mind
58 | ChunkDecoder dec = _fastDecoderRef.get();
59 | if (dec == null) { //
60 | dec = ChunkDecoderFactory.optimalInstance();
61 | _fastDecoderRef.compareAndSet(null, dec);
62 | }
63 | return dec;
64 | }
65 |
66 | /**
67 | * Accessor method that can be used to obtain {@link ChunkDecoder}
68 | * that only uses standard JDK access methods, and should work on
69 | * all Java platforms and JVMs.
70 | */
71 | public static ChunkDecoder safeDecoder() {
72 | // race conditions are ok here, we don't really mind
73 | ChunkDecoder dec = _safeDecoderRef.get();
74 | if (dec == null) { //
75 | dec = ChunkDecoderFactory.safeInstance();
76 | _safeDecoderRef.compareAndSet(null, dec);
77 | }
78 | return dec;
79 | }
80 |
81 | /*
82 | ///////////////////////////////////////////////////////////////////////
83 | // Basic API, general
84 | ///////////////////////////////////////////////////////////////////////
85 | */
86 |
87 | /**
88 | * Helper method that checks resulting size of an LZF chunk, regardless of
89 | * whether it contains compressed or uncompressed contents.
90 | */
91 | public static int calculateUncompressedSize(byte[] data, int offset, int length) throws LZFException {
92 | return ChunkDecoder.calculateUncompressedSize(data, length, length);
93 | }
94 |
95 | /*
96 | ///////////////////////////////////////////////////////////////////////
97 | // Basic API, fast decode methods
98 | ///////////////////////////////////////////////////////////////////////
99 | */
100 |
101 | public static byte[] decode(final byte[] inputBuffer) throws LZFException {
102 | return fastDecoder().decode(inputBuffer, 0, inputBuffer.length);
103 | }
104 |
105 | public static byte[] decode(final byte[] inputBuffer, int offset, int length) throws LZFException {
106 | return fastDecoder().decode(inputBuffer, offset, length);
107 | }
108 |
109 | public static int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws LZFException {
110 | return fastDecoder().decode(inputBuffer, 0, inputBuffer.length, targetBuffer);
111 | }
112 |
113 | public static int decode(final byte[] sourceBuffer, int offset, int length, final byte[] targetBuffer)
114 | throws LZFException {
115 | return fastDecoder().decode(sourceBuffer, offset, length, targetBuffer);
116 | }
117 |
118 | /*
119 | ///////////////////////////////////////////////////////////////////////
120 | // Basic API, "safe" decode methods
121 | ///////////////////////////////////////////////////////////////////////
122 | */
123 |
124 | public static byte[] safeDecode(final byte[] inputBuffer) throws LZFException {
125 | return safeDecoder().decode(inputBuffer, 0, inputBuffer.length);
126 | }
127 |
128 | public static byte[] safeDecode(final byte[] inputBuffer, int offset, int length) throws LZFException {
129 | return safeDecoder().decode(inputBuffer, offset, length);
130 | }
131 |
132 | public static int safeDecode(final byte[] inputBuffer, final byte[] targetBuffer) throws LZFException {
133 | return safeDecoder().decode(inputBuffer, 0, inputBuffer.length, targetBuffer);
134 | }
135 |
136 | public static int safeDecode(final byte[] sourceBuffer, int offset, int length, final byte[] targetBuffer)
137 | throws LZFException {
138 | return safeDecoder().decode(sourceBuffer, offset, length, targetBuffer);
139 | }
140 | }
141 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/LZFChunk.java:
--------------------------------------------------------------------------------
1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
2 | * file except in compliance with the License. You may obtain a copy of the License at
3 | *
4 | * http://www.apache.org/licenses/LICENSE-2.0
5 | *
6 | * Unless required by applicable law or agreed to in writing, software distributed under
7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
8 | * OF ANY KIND, either express or implied. See the License for the specific language
9 | * governing permissions and limitations under the License.
10 | */
11 |
12 | package com.ning.compress.lzf;
13 |
14 | import java.io.*;
15 |
16 | /**
17 | * Helper class used to store LZF encoded segments (compressed and non-compressed)
18 | * that can be sequenced to produce LZF files/streams.
19 | *
20 | * @author Tatu Saloranta
21 | */
22 | public class LZFChunk
23 | {
24 | /**
25 | * Maximum length of literal run for LZF encoding.
26 | */
27 | public static final int MAX_LITERAL = 1 << 5; // 32
28 |
29 | /**
30 | * Chunk length is limited by 2-byte length indicator, to 64k
31 | */
32 | public static final int MAX_CHUNK_LEN = 0xFFFF;
33 |
34 | /**
35 | * Header can be either 7 bytes (compressed) or 5 bytes (uncompressed)
36 | * long
37 | */
38 | public static final int MAX_HEADER_LEN = 7;
39 |
40 | public static final int HEADER_LEN_COMPRESSED = 7;
41 | public static final int HEADER_LEN_NOT_COMPRESSED = 5;
42 |
43 | public final static byte BYTE_Z = 'Z';
44 | public final static byte BYTE_V = 'V';
45 |
46 | public final static int BLOCK_TYPE_NON_COMPRESSED = 0;
47 | public final static int BLOCK_TYPE_COMPRESSED = 1;
48 |
49 |
50 | protected final byte[] _data;
51 | protected LZFChunk _next;
52 |
53 | private LZFChunk(byte[] data) { _data = data; }
54 |
55 | /**
56 | * Factory method for constructing compressed chunk
57 | */
58 | public static LZFChunk createCompressed(int origLen, byte[] encData, int encPtr, int encLen)
59 | {
60 | byte[] result = new byte[encLen + HEADER_LEN_COMPRESSED];
61 | result[0] = BYTE_Z;
62 | result[1] = BYTE_V;
63 | result[2] = BLOCK_TYPE_COMPRESSED;
64 | result[3] = (byte) (encLen >> 8);
65 | result[4] = (byte) encLen;
66 | result[5] = (byte) (origLen >> 8);
67 | result[6] = (byte) origLen;
68 | System.arraycopy(encData, encPtr, result, HEADER_LEN_COMPRESSED, encLen);
69 | return new LZFChunk(result);
70 | }
71 |
72 | public static int appendCompressedHeader(int origLen, int encLen, byte[] headerBuffer, int offset)
73 | {
74 | headerBuffer[offset++] = BYTE_Z;
75 | headerBuffer[offset++] = BYTE_V;
76 | headerBuffer[offset++] = BLOCK_TYPE_COMPRESSED;
77 | headerBuffer[offset++] = (byte) (encLen >> 8);
78 | headerBuffer[offset++] = (byte) encLen;
79 | headerBuffer[offset++] = (byte) (origLen >> 8);
80 | headerBuffer[offset++] = (byte) origLen;
81 | return offset;
82 | }
83 |
84 | public static void writeCompressedHeader(int origLen, int encLen, OutputStream out, byte[] headerBuffer)
85 | throws IOException
86 | {
87 | headerBuffer[0] = BYTE_Z;
88 | headerBuffer[1] = BYTE_V;
89 | headerBuffer[2] = BLOCK_TYPE_COMPRESSED;
90 | headerBuffer[3] = (byte) (encLen >> 8);
91 | headerBuffer[4] = (byte) encLen;
92 | headerBuffer[5] = (byte) (origLen >> 8);
93 | headerBuffer[6] = (byte) origLen;
94 | out.write(headerBuffer, 0, HEADER_LEN_COMPRESSED);
95 | }
96 |
97 | /**
98 | * Factory method for constructing compressed chunk
99 | */
100 | public static LZFChunk createNonCompressed(byte[] plainData, int ptr, int len)
101 | {
102 | byte[] result = new byte[len + HEADER_LEN_NOT_COMPRESSED];
103 | result[0] = BYTE_Z;
104 | result[1] = BYTE_V;
105 | result[2] = BLOCK_TYPE_NON_COMPRESSED;
106 | result[3] = (byte) (len >> 8);
107 | result[4] = (byte) len;
108 | System.arraycopy(plainData, ptr, result, HEADER_LEN_NOT_COMPRESSED, len);
109 | return new LZFChunk(result);
110 | }
111 |
112 | /**
113 | * Method for appending specific content as non-compressed chunk, in
114 | * given buffer.
115 | */
116 | public static int appendNonCompressed(byte[] plainData, int ptr, int len,
117 | byte[] outputBuffer, int outputPtr)
118 | {
119 | outputBuffer[outputPtr++] = BYTE_Z;
120 | outputBuffer[outputPtr++] = BYTE_V;
121 | outputBuffer[outputPtr++] = BLOCK_TYPE_NON_COMPRESSED;
122 | outputBuffer[outputPtr++] = (byte) (len >> 8);
123 | outputBuffer[outputPtr++] = (byte) len;
124 | System.arraycopy(plainData, ptr, outputBuffer, outputPtr, len);
125 | return outputPtr + len;
126 | }
127 |
128 | public static int appendNonCompressedHeader(int len, byte[] headerBuffer, int offset)
129 | {
130 | headerBuffer[offset++] = BYTE_Z;
131 | headerBuffer[offset++] = BYTE_V;
132 | headerBuffer[offset++] = BLOCK_TYPE_NON_COMPRESSED;
133 | headerBuffer[offset++] = (byte) (len >> 8);
134 | headerBuffer[offset++] = (byte) len;
135 | return offset;
136 | }
137 |
138 | public static void writeNonCompressedHeader(int len, OutputStream out, byte[] headerBuffer)
139 | throws IOException
140 | {
141 | headerBuffer[0] = BYTE_Z;
142 | headerBuffer[1] = BYTE_V;
143 | headerBuffer[2] = BLOCK_TYPE_NON_COMPRESSED;
144 | headerBuffer[3] = (byte) (len >> 8);
145 | headerBuffer[4] = (byte) len;
146 | out.write(headerBuffer, 0, HEADER_LEN_NOT_COMPRESSED);
147 | }
148 |
149 | public void setNext(LZFChunk next) { _next = next; }
150 |
151 | public LZFChunk next() { return _next; }
152 | public int length() { return _data.length; }
153 | public byte[] getData() { return _data; }
154 |
155 | public int copyTo(byte[] dst, int ptr) {
156 | int len = _data.length;
157 | System.arraycopy(_data, 0, dst, ptr, len);
158 | return ptr+len;
159 | }
160 | }
161 |
--------------------------------------------------------------------------------
/src/test/java/perf/ManualUnsafePerf.java:
--------------------------------------------------------------------------------
1 | package perf;
2 |
3 | import java.lang.reflect.Field;
4 |
5 | import sun.misc.Unsafe;
6 |
7 | @SuppressWarnings("restriction")
8 | public class ManualUnsafePerf
9 | {
10 | protected static final Unsafe unsafe;
11 | static {
12 | try {
13 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
14 | theUnsafe.setAccessible(true);
15 | unsafe = (Unsafe) theUnsafe.get(null);
16 | }
17 | catch (Exception e) {
18 | throw new RuntimeException(e);
19 | }
20 | }
21 |
22 | protected static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
23 |
24 | protected static final long CHAR_ARRAY_OFFSET = unsafe.arrayBaseOffset(char[].class);
25 |
26 | static final int INPUT_LEN = 48;
27 |
28 | private void test() throws Exception
29 | {
30 | // Let's try to guestimate suitable size... to get to 10 megs to process
31 | // but, with more docs, give more time
32 | final int REPS = 2500 * 1000;
33 |
34 | final int WARMUP_ROUNDS = 5;
35 | int roundTotal = 0;
36 | int roundsDone = 0;
37 | final String[] names = new String[] {"Decode/JDK", "Decode/Unsafe" };
38 | final int TYPES = names.length;
39 | final long[] times = new long[TYPES];
40 |
41 | StringBuilder sb = new StringBuilder();
42 | for (int i = 0; i < INPUT_LEN; ++i) {
43 | sb.append((char) ('A'+i));
44 | }
45 | byte[] INPUT = new byte[INPUT_LEN + 8];
46 | {
47 | byte[] b = sb.toString().getBytes("UTF-8");
48 | System.arraycopy(b, 0, INPUT, 4, INPUT_LEN);
49 | }
50 |
51 | for (;; ++roundTotal) {
52 | try { Thread.sleep(100L); } catch (InterruptedException ie) { }
53 | int round = (roundTotal % TYPES);
54 | String msg = names[round];
55 | long msec;
56 |
57 | switch (round) {
58 | case 0:
59 | msec = testDecodeJDK(REPS, INPUT, 4, INPUT_LEN);
60 | break;
61 | case 1:
62 | msec = testDecodeUnsafe(REPS, INPUT, 4, INPUT_LEN);
63 | break;
64 | default:
65 | throw new Error();
66 | }
67 |
68 | boolean roundDone = (round == 1);
69 |
70 | // skip first 5 rounds to let results stabilize
71 | if (roundsDone >= WARMUP_ROUNDS) {
72 | times[round] += msec;
73 | }
74 | System.out.printf("Test '%s' -> %d msecs\n", msg, msec);
75 | if (roundDone) {
76 | roundDone = false;
77 | ++roundsDone;
78 | if ((roundsDone % 7) == 0 && roundsDone > WARMUP_ROUNDS) {
79 | _printResults((roundsDone - WARMUP_ROUNDS), names, times);
80 | }
81 | }
82 | if ((roundTotal % 17) == 0) {
83 | System.out.println("[GC]");
84 | Thread.sleep(100L);
85 | System.gc();
86 | Thread.sleep(100L);
87 | }
88 | }
89 | }
90 |
91 | public long testDecodeJDK(int reps, byte[] input, final int offset, final int len)
92 | {
93 | final long mainStart = System.currentTimeMillis();
94 | char[] result = new char[64];
95 | while (--reps >= 0) {
96 | for (int i = 0; i < len; ++i) {
97 | result[i] = (char) input[offset+i];
98 | }
99 | }
100 | long time = System.currentTimeMillis() - mainStart;
101 | return time;
102 | }
103 |
104 | public long testDecodeUnsafe(int reps, byte[] input, final int offset, final int len)
105 | {
106 | final long mainStart = System.currentTimeMillis();
107 | char[] result = new char[100];
108 |
109 | while (--reps >= 0) {
110 | // long inBase = BYTE_ARRAY_OFFSET + offset;
111 | // long outBase = CHAR_ARRAY_OFFSET;
112 |
113 | // final long inEnd = inBase + len;
114 | for (int i = 0; i < len; ) {
115 | result[i++] = (char) input[offset+1];
116 |
117 | /*
118 | int quad = unsafe.getInt(input, inBase);
119 | inBase += 4;
120 |
121 | result[i++] = (char) (quad >>> 24);
122 | result[i++] = (char) ((quad >> 16) & 0xFF);
123 | result[i++] = (char) ((quad >> 8) & 0xFF);
124 | result[i++] = (char) (quad & 0xFF);
125 | */
126 |
127 | /*
128 | int q1 = ((quad >>> 24) << 16) + ((quad >> 16) & 0xFF);
129 |
130 | unsafe.putInt(result, outBase, q1);
131 | outBase += 4;
132 |
133 | int q2 = (quad & 0xFFFF);
134 | q2 = ((q2 >> 8) << 16) | (q2 & 0xFF);
135 |
136 | unsafe.putInt(result, outBase, q2);
137 | outBase += 4;
138 |
139 | long l = q1;
140 | l = (l << 32) | q2;
141 |
142 | unsafe.putLong(result, outBase, l);
143 | outBase += 8;
144 | */
145 | }
146 | }
147 | long time = System.currentTimeMillis() - mainStart;
148 | /*
149 | String str = new String(result, 0, len);
150 | System.out.println("("+str.length()+") '"+str+"'");
151 | */
152 | return time;
153 | }
154 |
155 | protected void _printResults(int rounds, String[] names, long[] times)
156 | {
157 | System.out.printf(" Averages after %d rounds:", rounds);
158 | double den = (double) rounds;
159 | for (int file = 0; file < names.length; ++file) {
160 | if (file > 0) {
161 | System.out.print(" / ");
162 | }
163 | System.out.printf(" %s(", names[file]);
164 | long time = times[file];
165 | double msecs = time / den;
166 | System.out.printf("%.1f)", msecs);
167 | }
168 | System.out.println();
169 | }
170 |
171 | public static void main(String[] args) throws Exception
172 | {
173 | new ManualUnsafePerf().test();
174 | }
175 | }
176 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/impl/VanillaChunkEncoder.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.impl;
2 |
3 | import com.ning.compress.BufferRecycler;
4 | import com.ning.compress.lzf.ChunkEncoder;
5 | import com.ning.compress.lzf.LZFChunk;
6 |
7 | public class VanillaChunkEncoder
8 | extends ChunkEncoder
9 | {
10 | /**
11 | * @param totalLength Total encoded length; used for calculating size
12 | * of hash table to use
13 | */
14 | public VanillaChunkEncoder(int totalLength) {
15 | super(totalLength);
16 | }
17 |
18 | /**
19 | * Alternate constructor used when we want to avoid allocation encoding
20 | * buffer, in cases where caller wants full control over allocations.
21 | */
22 | protected VanillaChunkEncoder(int totalLength, boolean bogus) {
23 | super(totalLength, bogus);
24 | }
25 |
26 | /**
27 | * @param totalLength Total encoded length; used for calculating size
28 | * of hash table to use
29 | * @param bufferRecycler The BufferRecycler instance
30 | */
31 | public VanillaChunkEncoder(int totalLength, BufferRecycler bufferRecycler) {
32 | super(totalLength, bufferRecycler);
33 | }
34 |
35 | /**
36 | * Alternate constructor used when we want to avoid allocation encoding
37 | * buffer, in cases where caller wants full control over allocations.
38 | */
39 | protected VanillaChunkEncoder(int totalLength, BufferRecycler bufferRecycler, boolean bogus) {
40 | super(totalLength, bufferRecycler, bogus);
41 | }
42 |
43 | public static VanillaChunkEncoder nonAllocatingEncoder(int totalLength) {
44 | return new VanillaChunkEncoder(totalLength, true);
45 | }
46 |
47 | public static VanillaChunkEncoder nonAllocatingEncoder(int totalLength, BufferRecycler bufferRecycler) {
48 | return new VanillaChunkEncoder(totalLength, bufferRecycler, true);
49 | }
50 |
51 | /*
52 | ///////////////////////////////////////////////////////////////////////
53 | // Abstract method implementations
54 | ///////////////////////////////////////////////////////////////////////
55 | */
56 |
57 | /**
58 | * Main workhorse method that will try to compress given chunk, and return
59 | * end position (offset to byte after last included byte)
60 | *
61 | * @return Output pointer after handling content, such that result - originalOutPost
62 | * is the actual length of compressed chunk (without header)
63 | */
64 | @Override
65 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos)
66 | {
67 | final int[] hashTable = _hashTable;
68 | ++outPos; // To leave one byte for literal-length indicator
69 | int seen = first(in, inPos); // past 4 bytes we have seen... (last one is LSB)
70 | int literals = 0;
71 | inEnd -= TAIL_LENGTH;
72 | final int firstPos = inPos; // so that we won't have back references across block boundary
73 |
74 | while (inPos < inEnd) {
75 | byte p2 = in[inPos + 2];
76 | // next
77 | seen = (seen << 8) + (p2 & 255);
78 | int off = hash(seen);
79 | int ref = hashTable[off];
80 | hashTable[off] = inPos;
81 |
82 | // First expected common case: no back-ref (for whatever reason)
83 | if (ref >= inPos // can't refer forward (i.e. leftovers)
84 | || (ref < firstPos) // or to previous block
85 | || (off = inPos - ref) > MAX_OFF
86 | || in[ref+2] != p2 // must match hash
87 | || in[ref+1] != (byte) (seen >> 8)
88 | || in[ref] != (byte) (seen >> 16)) {
89 | out[outPos++] = in[inPos++];
90 | literals++;
91 | if (literals == LZFChunk.MAX_LITERAL) {
92 | out[outPos - 33] = (byte) 31; // <= out[outPos - literals - 1] = MAX_LITERAL_MINUS_1;
93 | literals = 0;
94 | outPos++; // To leave one byte for literal-length indicator
95 | }
96 | continue;
97 | }
98 | // match
99 | int maxLen = inEnd - inPos + 2;
100 | if (maxLen > MAX_REF) {
101 | maxLen = MAX_REF;
102 | }
103 | if (literals == 0) {
104 | outPos--; // We do not need literal length indicator, go back
105 | } else {
106 | out[outPos - literals - 1] = (byte) (literals - 1);
107 | literals = 0;
108 | }
109 | int len = 3;
110 | // find match length
111 | while (len < maxLen && in[ref + len] == in[inPos + len]) {
112 | len++;
113 | }
114 | len -= 2;
115 | --off; // was off by one earlier
116 | if (len < 7) {
117 | out[outPos++] = (byte) ((off >> 8) + (len << 5));
118 | } else {
119 | out[outPos++] = (byte) ((off >> 8) + (7 << 5));
120 | out[outPos++] = (byte) (len - 7);
121 | }
122 | out[outPos++] = (byte) off;
123 | outPos++;
124 | inPos += len;
125 | seen = first(in, inPos);
126 | seen = (seen << 8) + (in[inPos + 2] & 255);
127 | hashTable[hash(seen)] = inPos;
128 | ++inPos;
129 | seen = (seen << 8) + (in[inPos + 2] & 255); // hash = next(hash, in, inPos);
130 | hashTable[hash(seen)] = inPos;
131 | ++inPos;
132 | }
133 | // Should never happen but verify:
134 | if (inPos > inEnd + TAIL_LENGTH) {
135 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH));
136 | }
137 | // try offlining the tail
138 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals);
139 | }
140 |
141 | private final int _handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos,
142 | int literals)
143 | {
144 | while (inPos < inEnd) {
145 | out[outPos++] = in[inPos++];
146 | literals++;
147 | if (literals == LZFChunk.MAX_LITERAL) {
148 | out[outPos - literals - 1] = (byte) (literals - 1);
149 | literals = 0;
150 | outPos++;
151 | }
152 | }
153 | out[outPos - literals - 1] = (byte) (literals - 1);
154 | if (literals == 0) {
155 | outPos--;
156 | }
157 | return outPos;
158 | }
159 |
160 | /*
161 | ///////////////////////////////////////////////////////////////////////
162 | // Internal methods
163 | ///////////////////////////////////////////////////////////////////////
164 | */
165 |
166 | private final int first(byte[] in, int inPos) {
167 | return (in[inPos] << 8) + (in[inPos + 1] & 0xFF);
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoderBE.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.impl;
2 |
3 | import com.ning.compress.BufferRecycler;
4 | import com.ning.compress.lzf.LZFChunk;
5 |
6 | /**
7 | * Implementation to use on Big-Endian architectures.
8 | */
9 | @SuppressWarnings("restriction")
10 | public final class UnsafeChunkEncoderBE
11 | extends UnsafeChunkEncoder
12 | {
13 | public UnsafeChunkEncoderBE(int totalLength) {
14 | super(totalLength);
15 | }
16 |
17 | public UnsafeChunkEncoderBE(int totalLength, boolean bogus) {
18 | super(totalLength, bogus);
19 | }
20 |
21 | public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler) {
22 | super(totalLength, bufferRecycler);
23 | }
24 |
25 | public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler, boolean bogus) {
26 | super(totalLength, bufferRecycler, bogus);
27 | }
28 |
29 | @Override
30 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos)
31 | {
32 | // Sanity checks; otherwise if any of the arguments are invalid `Unsafe` might corrupt memory
33 | _checkArrayIndices(in, inPos, inEnd);
34 | _checkArrayIndices(out, outPos, out.length);
35 | _checkOutputLength(inEnd - inPos, out.length - outPos);
36 |
37 | final int[] hashTable = _hashTable;
38 | int literals = 0;
39 | inEnd -= TAIL_LENGTH;
40 | final int firstPos = inPos; // so that we won't have back references across block boundary
41 |
42 | int seen = _getInt(in, inPos) >> 16;
43 |
44 | while (inPos < inEnd) {
45 | seen = (seen << 8) + (in[inPos + 2] & 255);
46 |
47 | int off = hash(seen);
48 | int ref = hashTable[off];
49 | hashTable[off] = inPos;
50 |
51 | // First expected common case: no back-ref (for whatever reason)
52 | if ((ref >= inPos) // can't refer forward (i.e. leftovers)
53 | || (ref < firstPos) // or to previous block
54 | || (off = inPos - ref) > MAX_OFF
55 | || ((seen << 8) != _getShifted3Bytes(in, ref))) {
56 | ++inPos;
57 | ++literals;
58 | if (literals == LZFChunk.MAX_LITERAL) {
59 | outPos = _copyFullLiterals(in, inPos, out, outPos);
60 | literals = 0;
61 | }
62 | continue;
63 | }
64 |
65 | if (literals > 0) {
66 | outPos = _copyPartialLiterals(in, inPos, out, outPos, literals);
67 | literals = 0;
68 | }
69 | // match
70 | final int maxLen = Math.min(MAX_REF, inEnd - inPos + 2);
71 | int len = _findMatchLength(in, ref+3, inPos+3, ref+maxLen);
72 |
73 | --off; // was off by one earlier
74 | if (len < 7) {
75 | out[outPos++] = (byte) ((off >> 8) + (len << 5));
76 | } else {
77 | out[outPos++] = (byte) ((off >> 8) + (7 << 5));
78 | out[outPos++] = (byte) (len - 7);
79 | }
80 | out[outPos++] = (byte) off;
81 | inPos += len;
82 | seen = _getInt(in, inPos);
83 | hashTable[hash(seen >> 8)] = inPos;
84 | ++inPos;
85 | hashTable[hash(seen)] = inPos;
86 | ++inPos;
87 | }
88 | // Should never happen but verify:
89 | if (inPos > inEnd + TAIL_LENGTH) {
90 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH));
91 | }
92 | // offline the tail handling
93 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals);
94 | }
95 |
96 | private final static int _getInt(final byte[] in, final int inPos) {
97 | return unsafe.getInt(in, BYTE_ARRAY_OFFSET + inPos);
98 | }
99 |
100 | /**
101 | * Reads 3 bytes, shifted to the left by 8.
102 | */
103 | private static int _getShifted3Bytes(byte[] in, int inPos) {
104 | // For inPos 0 have to read bytes manually to avoid Unsafe out-of-bounds access at `inPos - 1`
105 | // But for higher inPos values can use Unsafe to read as int and discard first byte
106 | if (inPos == 0) {
107 | return ((in[0] & 0xFF) << 24) | ((in[1] & 0xFF) << 16) | ((in[2] & 0xFF) << 8);
108 | } else {
109 | return _getInt(in, inPos - 1) << 8;
110 | }
111 | }
112 |
113 | /*
114 | ///////////////////////////////////////////////////////////////////////
115 | // Methods for finding length of a back-reference
116 | ///////////////////////////////////////////////////////////////////////
117 | */
118 |
119 | private final static int _findMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
120 | {
121 | // Expect at least 8 bytes to check for fast case; offline others
122 | if ((ptr1 + 8) >= maxPtr1) { // rare case, offline
123 | return _findTailMatchLength(in, ptr1, ptr2, maxPtr1);
124 | }
125 | // short matches common, so start with specialized comparison
126 | // NOTE: we know that we have 4 bytes of slack before end, so this is safe:
127 | int i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
128 | int i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
129 | if (i1 != i2) {
130 | return 1 + _leadingBytes(i1, i2);
131 | }
132 | ptr1 += 4;
133 | ptr2 += 4;
134 |
135 | i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
136 | i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
137 | if (i1 != i2) {
138 | return 5 + _leadingBytes(i1, i2);
139 | }
140 | return _findLongMatchLength(in, ptr1+4, ptr2+4, maxPtr1);
141 | }
142 |
143 | private final static int _findLongMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
144 | {
145 | final int base = ptr1 - 9;
146 | // and then just loop with longs if we get that far
147 | final int longEnd = maxPtr1-8;
148 | while (ptr1 <= longEnd) {
149 | long l1 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr1);
150 | long l2 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr2);
151 | if (l1 != l2) {
152 | return ptr1 - base + _leadingBytes(l1, l2);
153 | }
154 | ptr1 += 8;
155 | ptr2 += 8;
156 | }
157 | // or, if running out of runway, handle last bytes with loop-de-loop...
158 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) {
159 | ++ptr1;
160 | ++ptr2;
161 | }
162 | return ptr1 - base; // i.e.
163 | }
164 |
165 | /* With Big-Endian, in-memory layout is "natural", so what we consider
166 | * leading is also leading for in-register.
167 | */
168 |
169 | private final static int _leadingBytes(int i1, int i2) {
170 | return Integer.numberOfLeadingZeros(i1 ^ i2) >> 3;
171 | }
172 |
173 | private final static int _leadingBytes(long l1, long l2) {
174 | return Long.numberOfLeadingZeros(l1 ^ l2) >> 3;
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/TestLZFRoundTrip.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.*;
4 | import java.nio.file.Files;
5 | import java.nio.file.Path;
6 |
7 | import com.ning.compress.lzf.impl.UnsafeChunkDecoder;
8 | import com.ning.compress.lzf.impl.VanillaChunkDecoder;
9 | import com.ning.compress.lzf.util.ChunkEncoderFactory;
10 | import org.junit.jupiter.api.Test;
11 | import org.junit.jupiter.api.io.TempDir;
12 |
13 | import static org.junit.jupiter.api.Assertions.*;
14 |
15 | public class TestLZFRoundTrip
16 | {
17 | private final static String[] FILES = {
18 | "/shakespeare.tar",
19 | "/shakespeare/hamlet.xml",
20 | "/shakespeare/macbeth.xml",
21 | "/shakespeare/play.dtd",
22 | "/shakespeare/r_and_j.xml"
23 | ,"/binary/help.bin"
24 | ,"/binary/word.doc"
25 | };
26 |
27 | @TempDir
28 | Path tempDir;
29 |
30 | @Test
31 | public void testVanillaCodec() throws Exception
32 | {
33 | _testUsingBlock(new VanillaChunkDecoder());
34 | _testUsingReader(new VanillaChunkDecoder());
35 | }
36 |
37 | @Test
38 | public void testUnsafeCodec() throws IOException
39 | {
40 | _testUsingBlock(new UnsafeChunkDecoder());
41 | _testUsingReader(new UnsafeChunkDecoder());
42 | }
43 |
44 | @Test
45 | public void testLZFCompressionOnTestFiles() throws IOException {
46 | for (int i = 0; i < 100; i++) {
47 | testLZFCompressionOnDir(new File("src/test/resources/shakespeare"));
48 | }
49 | }
50 |
51 | private void testLZFCompressionOnDir(File dir) throws IOException
52 | {
53 | File[] files = dir.listFiles();
54 | for (File file : files) {
55 | if (!file.isDirectory()) {
56 | testLZFCompressionOnFile(file.toPath());
57 | } else {
58 | testLZFCompressionOnDir(file);
59 | }
60 | }
61 | }
62 |
63 | private void testLZFCompressionOnFile(Path file) throws IOException
64 | {
65 | final ChunkDecoder decoder = new UnsafeChunkDecoder();
66 | byte[] buf = new byte[64 * 1024];
67 |
68 | Path compressedFile = Files.createTempFile(tempDir, "test", ".lzf");
69 | try (InputStream in = new BufferedInputStream(Files.newInputStream(file));
70 | OutputStream out = new LZFOutputStream(new BufferedOutputStream(
71 | Files.newOutputStream(compressedFile)))) {
72 | int len;
73 | while ((len = in.read(buf, 0, buf.length)) >= 0) {
74 | out.write(buf, 0, len);
75 | }
76 | }
77 |
78 | // decompress and verify bytes haven't changed
79 | try (InputStream in = new BufferedInputStream(Files.newInputStream(file));
80 | DataInputStream compressedIn = new DataInputStream(new LZFInputStream(decoder,
81 | Files.newInputStream(compressedFile), false))) {
82 | int len;
83 | while ((len = in.read(buf, 0, buf.length)) >= 0) {
84 | byte[] buf2 = new byte[len];
85 | compressedIn.readFully(buf2, 0, len);
86 | byte[] trimmedBuf = new byte[len];
87 | System.arraycopy(buf, 0, trimmedBuf, 0, len);
88 | assertArrayEquals(trimmedBuf, buf2);
89 | }
90 | assertEquals(-1, compressedIn.read());
91 | }
92 | }
93 |
94 | @Test
95 | public void testHashCollision() throws IOException
96 | {
97 | // this test generates a hash collision: [0,1,153,64] hashes the same as [1,153,64,64]
98 | // and then leverages the bug s/inPos/0/ to corrupt the array
99 | // the first array is used to insert a reference from this hash to offset 6
100 | // and then the hash table is reused and still thinks that there is such a hash at position 6
101 | // and at position 7, it finds a sequence with the same hash
102 | // so it inserts a buggy reference
103 | final byte[] b1 = new byte[] {0,1,2,3,4,(byte)153,64,64,64,9,9,9,9,9,9,9,9,9,9};
104 | final byte[] b2 = new byte[] {1,(byte)153,0,0,0,0,(byte)153,64,64,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
105 | final int off = 6;
106 |
107 | ChunkEncoder encoder = ChunkEncoderFactory.safeInstance();
108 | ChunkDecoder decoder = new VanillaChunkDecoder();
109 | _testCollision(encoder, decoder, b1, 0, b1.length);
110 | _testCollision(encoder, decoder, b2, off, b2.length - off);
111 |
112 | encoder = ChunkEncoderFactory.optimalInstance();
113 | decoder = new UnsafeChunkDecoder();
114 | _testCollision(encoder, decoder, b1, 0, b1.length);
115 | _testCollision(encoder, decoder, b2, off, b2.length - off);
116 | }
117 |
118 | private void _testCollision(ChunkEncoder encoder, ChunkDecoder decoder, byte[] bytes, int offset, int length) throws IOException
119 | {
120 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
121 | byte[] expected = new byte[length];
122 | byte[] buffer = new byte[LZFChunk.MAX_CHUNK_LEN];
123 | byte[] output = new byte[length];
124 | System.arraycopy(bytes, offset, expected, 0, length);
125 | encoder.encodeAndWriteChunk(bytes, offset, length, outputStream);
126 | InputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
127 | assertEquals(length, decoder.decodeChunk(inputStream, buffer, output));
128 | assertArrayEquals(expected, output);
129 | }
130 |
131 | /*
132 | ///////////////////////////////////////////////////////////////////////
133 | // Helper method
134 | ///////////////////////////////////////////////////////////////////////
135 | */
136 |
137 |
138 | protected void _testUsingBlock(ChunkDecoder decoder) throws IOException
139 | {
140 | for (String name : FILES) {
141 | byte[] data = readResource(name);
142 | byte[] lzf = LZFEncoder.encode(data);
143 | byte[] decoded = decoder.decode(lzf);
144 |
145 | assertArrayEquals(data, decoded,
146 | String.format("File '%s', %d->%d bytes", name, data.length, lzf.length));
147 | }
148 | }
149 |
150 | protected void _testUsingReader(ChunkDecoder decoder) throws IOException
151 | {
152 | for (String name : FILES) {
153 | byte[] data = readResource(name);
154 | byte[] lzf = LZFEncoder.encode(data);
155 | LZFInputStream comp = new LZFInputStream(decoder, new ByteArrayInputStream(lzf), false);
156 | byte[] decoded = readAll(comp);
157 |
158 | assertArrayEquals(data, decoded);
159 | }
160 | }
161 |
162 | protected byte[] readResource(String name) throws IOException
163 | {
164 | return readAll(getClass().getResourceAsStream(name));
165 | }
166 |
167 | protected byte[] readAll(InputStream in) throws IOException
168 | {
169 | assertNotNull(in);
170 | byte[] buffer = new byte[4000];
171 | int count;
172 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(4000);
173 |
174 | while ((count = in.read(buffer)) > 0) {
175 | bytes.write(buffer, 0, count);
176 | }
177 | in.close();
178 | return bytes.toByteArray();
179 | }
180 | }
181 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoderLE.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.impl;
2 |
3 | import com.ning.compress.BufferRecycler;
4 | import com.ning.compress.lzf.LZFChunk;
5 |
6 | /**
7 | * Implementation to use on Little-Endian architectures.
8 | */
9 | @SuppressWarnings("restriction")
10 | public final class UnsafeChunkEncoderLE
11 | extends UnsafeChunkEncoder
12 | {
13 | public UnsafeChunkEncoderLE(int totalLength) {
14 | super(totalLength);
15 | }
16 |
17 | public UnsafeChunkEncoderLE(int totalLength, boolean bogus) {
18 | super(totalLength, bogus);
19 | }
20 |
21 | public UnsafeChunkEncoderLE(int totalLength, BufferRecycler bufferRecycler) {
22 | super(totalLength, bufferRecycler);
23 | }
24 |
25 | public UnsafeChunkEncoderLE(int totalLength, BufferRecycler bufferRecycler, boolean bogus) {
26 | super(totalLength, bufferRecycler, bogus);
27 | }
28 |
29 | @Override
30 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos)
31 | {
32 | // Sanity checks; otherwise if any of the arguments are invalid `Unsafe` might corrupt memory
33 | _checkArrayIndices(in, inPos, inEnd);
34 | _checkArrayIndices(out, outPos, out.length);
35 | _checkOutputLength(inEnd - inPos, out.length - outPos);
36 |
37 | final int[] hashTable = _hashTable;
38 | int literals = 0;
39 | inEnd -= TAIL_LENGTH;
40 | final int firstPos = inPos; // so that we won't have back references across block boundary
41 |
42 | int seen = _getInt(in, inPos) >> 16;
43 |
44 | while (inPos < inEnd) {
45 | seen = (seen << 8) + (in[inPos + 2] & 255);
46 |
47 | int off = hash(seen);
48 | int ref = hashTable[off];
49 | hashTable[off] = inPos;
50 |
51 | // First expected common case: no back-ref (for whatever reason)
52 | if ((ref >= inPos) // can't refer forward (i.e. leftovers)
53 | || (ref < firstPos) // or to previous block
54 | || (off = inPos - ref) > MAX_OFF
55 | || ((seen << 8) != _getShifted3Bytes(in, ref))) {
56 | ++inPos;
57 | ++literals;
58 | if (literals == LZFChunk.MAX_LITERAL) {
59 | outPos = _copyFullLiterals(in, inPos, out, outPos);
60 | literals = 0;
61 | }
62 | continue;
63 | }
64 |
65 | if (literals > 0) {
66 | outPos = _copyPartialLiterals(in, inPos, out, outPos, literals);
67 | literals = 0;
68 | }
69 | // match
70 | final int maxLen = Math.min(MAX_REF, inEnd - inPos + 2);
71 | int len = _findMatchLength(in, ref+3, inPos+3, ref+maxLen);
72 |
73 | --off; // was off by one earlier
74 | if (len < 7) {
75 | out[outPos++] = (byte) ((off >> 8) + (len << 5));
76 | } else {
77 | out[outPos++] = (byte) ((off >> 8) + (7 << 5));
78 | out[outPos++] = (byte) (len - 7);
79 | }
80 | out[outPos++] = (byte) off;
81 | inPos += len;
82 | seen = _getInt(in, inPos);
83 | hashTable[hash(seen >> 8)] = inPos;
84 | ++inPos;
85 | hashTable[hash(seen)] = inPos;
86 | ++inPos;
87 | }
88 | // Should never happen but verify:
89 | if (inPos > inEnd + TAIL_LENGTH) {
90 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH));
91 | }
92 | // offline the tail handling
93 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals);
94 | }
95 |
96 | private final static int _getInt(final byte[] in, final int inPos) {
97 | return Integer.reverseBytes(unsafe.getInt(in, BYTE_ARRAY_OFFSET + inPos));
98 | }
99 |
100 | /**
101 | * Reads 3 bytes, shifted to the left by 8.
102 | */
103 | private static int _getShifted3Bytes(byte[] in, int inPos) {
104 | // For inPos 0 have to read bytes manually to avoid Unsafe out-of-bounds access at `inPos - 1`
105 | // But for higher inPos values can use Unsafe to read as int and discard first byte
106 | if (inPos == 0) {
107 | return ((in[0] & 0xFF) << 24) | ((in[1] & 0xFF) << 16) | ((in[2] & 0xFF) << 8);
108 | } else {
109 | return _getInt(in, inPos - 1) << 8;
110 | }
111 | }
112 |
113 | /*
114 | ///////////////////////////////////////////////////////////////////////
115 | // Methods for finding length of a back-reference
116 | ///////////////////////////////////////////////////////////////////////
117 | */
118 |
119 | private final static int _findMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
120 | {
121 | // Expect at least 8 bytes to check for fast case; offline others
122 | if ((ptr1 + 8) >= maxPtr1) { // rare case, offline
123 | return _findTailMatchLength(in, ptr1, ptr2, maxPtr1);
124 | }
125 | // short matches common, so start with specialized comparison
126 | // NOTE: we know that we have 4 bytes of slack before end, so this is safe:
127 | int i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
128 | int i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
129 | if (i1 != i2) {
130 | return 1 + _leadingBytes(i1, i2);
131 | }
132 | ptr1 += 4;
133 | ptr2 += 4;
134 |
135 | i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
136 | i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
137 | if (i1 != i2) {
138 | return 5 + _leadingBytes(i1, i2);
139 | }
140 | return _findLongMatchLength(in, ptr1+4, ptr2+4, maxPtr1);
141 | }
142 |
143 | private final static int _findLongMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
144 | {
145 | final int base = ptr1 - 9;
146 | // and then just loop with longs if we get that far
147 | final int longEnd = maxPtr1-8;
148 | while (ptr1 <= longEnd) {
149 | long l1 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr1);
150 | long l2 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr2);
151 | if (l1 != l2) {
152 | return ptr1 - base + _leadingBytes(l1, l2);
153 | }
154 | ptr1 += 8;
155 | ptr2 += 8;
156 | }
157 | // or, if running out of runway, handle last bytes with loop-de-loop...
158 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) {
159 | ++ptr1;
160 | ++ptr2;
161 | }
162 | return ptr1 - base; // i.e.
163 | }
164 |
165 | /* With Little-Endian, in-memory layout is reverse of what we expect for
166 | * in-register, so we either have to reverse bytes, or, simpler,
167 | * calculate trailing zeroes instead.
168 | */
169 |
170 | private final static int _leadingBytes(int i1, int i2) {
171 | return Integer.numberOfTrailingZeros(i1 ^ i2) >> 3;
172 | }
173 |
174 | private final static int _leadingBytes(long l1, long l2) {
175 | return Long.numberOfTrailingZeros(l1 ^ l2) >> 3;
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoder.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.impl;
2 |
3 | import com.ning.compress.BufferRecycler;
4 | import java.lang.reflect.Field;
5 |
6 | import sun.misc.Unsafe;
7 |
8 | import com.ning.compress.lzf.ChunkEncoder;
9 | import com.ning.compress.lzf.LZFChunk;
10 |
11 | /**
12 | * {@link ChunkEncoder} implementation that handles actual encoding of individual chunks,
13 | * using Sun's sun.misc.Unsafe functionality, which gives
14 | * nice extra boost for speed.
15 | *
16 | * @author Tatu Saloranta (tatu.saloranta@iki.fi)
17 | */
18 | @SuppressWarnings("restriction")
19 | public abstract class UnsafeChunkEncoder
20 | extends ChunkEncoder
21 | {
22 | // // Our Nitro Booster, mr. Unsafe!
23 |
24 | static final Unsafe unsafe;
25 | static {
26 | try {
27 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
28 | theUnsafe.setAccessible(true);
29 | unsafe = (Unsafe) theUnsafe.get(null);
30 | }
31 | catch (Exception e) {
32 | throw new RuntimeException(e);
33 | }
34 | }
35 |
36 | // All members here (fields, constructors, methods) are at most package-private; users are
37 | // not supposed to subclass this class
38 |
39 | static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
40 |
41 | UnsafeChunkEncoder(int totalLength) {
42 | super(totalLength);
43 | }
44 |
45 | UnsafeChunkEncoder(int totalLength, boolean bogus) {
46 | super(totalLength, bogus);
47 | }
48 |
49 | UnsafeChunkEncoder(int totalLength, BufferRecycler bufferRecycler) {
50 | super(totalLength, bufferRecycler);
51 | }
52 |
53 | UnsafeChunkEncoder(int totalLength, BufferRecycler bufferRecycler, boolean bogus) {
54 | super(totalLength, bufferRecycler, bogus);
55 | }
56 |
57 | /*
58 | ///////////////////////////////////////////////////////////////////////
59 | // Shared helper methods
60 | ///////////////////////////////////////////////////////////////////////
61 | */
62 |
63 | /**
64 | * @param start start index, inclusive
65 | * @param end end index, exclusive
66 | */
67 | static void _checkArrayIndices(byte[] array, int start, int end) {
68 | if (start < 0 || end < start || end > array.length) {
69 | throw new ArrayIndexOutOfBoundsException();
70 | }
71 | }
72 |
73 | static void _checkOutputLength(int inputLen, int outputLen) {
74 | int maxEncoded = inputLen + ((inputLen + 31) >> 5);
75 |
76 | if (maxEncoded < 0 || maxEncoded > outputLen) {
77 | throw new IllegalArgumentException("Output length " + outputLen + " is too small for input length " + inputLen);
78 | }
79 | }
80 |
81 | final static int _copyPartialLiterals(byte[] in, int inPos, byte[] out, int outPos,
82 | int literals)
83 | {
84 | if (out.length - outPos < literals + 1) {
85 | throw new IllegalArgumentException("Not enough space in output array");
86 | }
87 |
88 | out[outPos++] = (byte) (literals-1);
89 |
90 | // Here use of Unsafe is clear win:
91 | // System.arraycopy(in, inPos-literals, out, outPos, literals);
92 |
93 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos - literals;
94 | long rawOutPtr= BYTE_ARRAY_OFFSET + outPos;
95 |
96 | switch (literals >> 3) {
97 | case 3:
98 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
99 | rawInPtr += 8;
100 | rawOutPtr += 8;
101 | case 2:
102 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
103 | rawInPtr += 8;
104 | rawOutPtr += 8;
105 | case 1:
106 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
107 | rawInPtr += 8;
108 | rawOutPtr += 8;
109 | }
110 | int left = (literals & 7);
111 | if (left > 0) {
112 | System.arraycopy(in, (int) (rawInPtr - BYTE_ARRAY_OFFSET), out, (int) (rawOutPtr - BYTE_ARRAY_OFFSET), left);
113 | }
114 |
115 | return outPos+literals;
116 | }
117 |
118 | final static int _copyLongLiterals(byte[] in, int inPos, byte[] out, int outPos,
119 | int literals)
120 | {
121 | inPos -= literals;
122 |
123 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos;
124 | long rawOutPtr = BYTE_ARRAY_OFFSET + outPos;
125 |
126 | while (literals >= LZFChunk.MAX_LITERAL) {
127 | out[outPos++] = (byte) 31;
128 | ++rawOutPtr;
129 |
130 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
131 | rawInPtr += 8;
132 | rawOutPtr += 8;
133 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
134 | rawInPtr += 8;
135 | rawOutPtr += 8;
136 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
137 | rawInPtr += 8;
138 | rawOutPtr += 8;
139 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
140 | rawInPtr += 8;
141 | rawOutPtr += 8;
142 |
143 | inPos += LZFChunk.MAX_LITERAL;
144 | outPos += LZFChunk.MAX_LITERAL;
145 | literals -= LZFChunk.MAX_LITERAL;
146 | }
147 | if (literals > 0) {
148 | return _copyPartialLiterals(in, inPos+literals, out, outPos, literals);
149 | }
150 | return outPos;
151 | }
152 |
153 | final static int _copyFullLiterals(byte[] in, int inPos, byte[] out, int outPos)
154 | {
155 | if (out.length - outPos < 32 + 1) {
156 | throw new IllegalArgumentException("Not enough space in output array");
157 | }
158 |
159 | // literals == 32
160 | out[outPos++] = (byte) 31;
161 |
162 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos - 32;
163 | long rawOutPtr = BYTE_ARRAY_OFFSET + outPos;
164 |
165 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
166 | rawInPtr += 8;
167 | rawOutPtr += 8;
168 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
169 | rawInPtr += 8;
170 | rawOutPtr += 8;
171 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
172 | rawInPtr += 8;
173 | rawOutPtr += 8;
174 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr));
175 |
176 | return (outPos + 32);
177 | }
178 |
179 | final static int _handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos,
180 | int literals)
181 | {
182 | while (inPos < inEnd) {
183 | ++inPos;
184 | ++literals;
185 | if (literals == LZFChunk.MAX_LITERAL) {
186 | out[outPos++] = (byte) (literals-1); // <= out[outPos - literals - 1] = MAX_LITERAL_MINUS_1;
187 | System.arraycopy(in, inPos-literals, out, outPos, literals);
188 | outPos += literals;
189 | literals = 0;
190 | }
191 | }
192 | if (literals > 0) {
193 | out[outPos++] = (byte) (literals - 1);
194 | System.arraycopy(in, inPos-literals, out, outPos, literals);
195 | outPos += literals;
196 | }
197 | return outPos;
198 | }
199 |
200 | final static int _findTailMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
201 | {
202 | final int start1 = ptr1;
203 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) {
204 | ++ptr1;
205 | ++ptr2;
206 | }
207 | return ptr1 - start1 + 1; // i.e.
208 | }
209 | }
210 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/util/ChunkEncoderFactory.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf.util;
2 |
3 | import com.ning.compress.BufferRecycler;
4 | import com.ning.compress.lzf.ChunkEncoder;
5 | import com.ning.compress.lzf.LZFChunk;
6 | import com.ning.compress.lzf.impl.UnsafeChunkEncoders;
7 | import com.ning.compress.lzf.impl.VanillaChunkEncoder;
8 |
9 | /**
10 | * Simple helper class used for loading
11 | * {@link ChunkEncoder} implementations, based on criteria
12 | * such as "fastest available" or "safe to run anywhere".
13 | */
14 | public class ChunkEncoderFactory
15 | {
16 | /*
17 | ///////////////////////////////////////////////////////////////////////
18 | // Public API
19 | ///////////////////////////////////////////////////////////////////////
20 | */
21 |
22 | /**
23 | * Convenience method, equivalent to:
24 | *
25 | * return optimalInstance(LZFChunk.MAX_CHUNK_LEN);
26 | *
27 | *
28 | * @return ChunkEncoder constructed
29 | */
30 | public static ChunkEncoder optimalInstance() {
31 | return optimalInstance(LZFChunk.MAX_CHUNK_LEN);
32 | }
33 |
34 | /**
35 | * Method to use for getting compressor instance that uses the most optimal
36 | * available methods for underlying data access. It should be safe to call
37 | * this method as implementations are dynamically loaded; however, on some
38 | * non-standard platforms it may be necessary to either directly load
39 | * instances, or use {@link #safeInstance}.
40 | *
76 | * return safeInstance(LZFChunk.MAX_CHUNK_LEN);
77 | *
78 | *
79 | * @return ChunkEncoder constructed
80 | */
81 | public static ChunkEncoder safeInstance() {
82 | return safeInstance(LZFChunk.MAX_CHUNK_LEN);
83 | }
84 |
85 | /**
86 | * Method that can be used to ensure that a "safe" compressor instance is loaded.
87 | * Safe here means that it should work on any and all Java platforms.
88 | *
115 | * return optimalInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler);
116 | *
117 | *
118 | * @return ChunkEncoder constructed
119 | */
120 | public static ChunkEncoder optimalInstance(BufferRecycler bufferRecycler) {
121 | return optimalInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler);
122 | }
123 |
124 | /**
125 | * Method to use for getting compressor instance that uses the most optimal
126 | * available methods for underlying data access. It should be safe to call
127 | * this method as implementations are dynamically loaded; however, on some
128 | * non-standard platforms it may be necessary to either directly load
129 | * instances, or use {@link #safeInstance}.
130 | *
131 | * @param totalLength Expected total length of content to compress; only matters
132 | * for content that is smaller than maximum chunk size (64k), to optimize
133 | * encoding hash tables
134 | * @param bufferRecycler The BufferRecycler instance
135 | *
136 | * @return ChunkEncoder constructed
137 | */
138 | public static ChunkEncoder optimalInstance(int totalLength, BufferRecycler bufferRecycler) {
139 | try {
140 | return UnsafeChunkEncoders.createEncoder(totalLength, bufferRecycler);
141 | } catch (Exception e) {
142 | return safeInstance(totalLength, bufferRecycler);
143 | }
144 | }
145 |
146 | /**
147 | * Factory method for constructing encoder that is always passed buffer
148 | * externally, so that it will not (nor need) allocate encoding buffer.
149 | *
150 | * @return ChunkEncoder constructed
151 | */
152 | public static ChunkEncoder optimalNonAllocatingInstance(int totalLength, BufferRecycler bufferRecycler) {
153 | try {
154 | return UnsafeChunkEncoders.createNonAllocatingEncoder(totalLength, bufferRecycler);
155 | } catch (Exception e) {
156 | return safeNonAllocatingInstance(totalLength, bufferRecycler);
157 | }
158 | }
159 |
160 | /**
161 | * Convenience method, equivalent to:
162 | *
163 | * return safeInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler);
164 | *
165 | *
166 | * @return ChunkEncoder constructed
167 | */
168 | public static ChunkEncoder safeInstance(BufferRecycler bufferRecycler) {
169 | return safeInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler);
170 | }
171 | /**
172 | * Method that can be used to ensure that a "safe" compressor instance is loaded.
173 | * Safe here means that it should work on any and all Java platforms.
174 | *
175 | * @param totalLength Expected total length of content to compress; only matters
176 | * for content that is smaller than maximum chunk size (64k), to optimize
177 | * encoding hash tables
178 | * @param bufferRecycler The BufferRecycler instance
179 | *
180 | * @return ChunkEncoder constructed
181 | */
182 | public static ChunkEncoder safeInstance(int totalLength, BufferRecycler bufferRecycler) {
183 | return new VanillaChunkEncoder(totalLength, bufferRecycler);
184 | }
185 |
186 | /**
187 | * Factory method for constructing encoder that is always passed buffer
188 | * externally, so that it will not (nor need) allocate encoding buffer.
189 | */
190 | public static ChunkEncoder safeNonAllocatingInstance(int totalLength, BufferRecycler bufferRecycler) {
191 | return VanillaChunkEncoder.nonAllocatingEncoder(totalLength, bufferRecycler);
192 | }
193 | }
194 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/LZFEncoderTest.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.*;
4 | import java.util.Arrays;
5 |
6 | import com.ning.compress.BaseForTests;
7 | import com.ning.compress.lzf.impl.UnsafeChunkEncoder;
8 | import com.ning.compress.lzf.impl.UnsafeChunkEncoderBE;
9 | import com.ning.compress.lzf.impl.UnsafeChunkEncoderLE;
10 | import com.ning.compress.lzf.util.ChunkEncoderFactory;
11 | import org.junit.jupiter.api.Test;
12 |
13 | import static org.junit.jupiter.api.Assertions.*;
14 |
15 | public class LZFEncoderTest extends BaseForTests
16 | {
17 | @Test
18 | public void testBigSizeEstimate()
19 | {
20 | for (int amt : new int[] {
21 | 100, 250, 600,
22 | 10000, 50000, 65000, 120000, 130000,
23 | 3 * 0x10000 + 4,
24 | 15 * 0x10000 + 4,
25 | 1000 * 0x10000 + 4,
26 | }) {
27 | int estimate = LZFEncoder.estimateMaxWorkspaceSize(amt);
28 | int chunks = ((amt + 0xFFFE) / 0xFFFF);
29 | int expMin = 2 + amt + (chunks * 5); // 5-byte header for uncompressed; however, not enough workspace
30 | int expMax = ((int) (0.05 * 0xFFFF)) + amt + (chunks * 7);
31 | if (estimate < expMin || estimate > expMax) {
32 | fail("Expected ratio for "+amt+" to be "+expMin+" <= x <= "+expMax+", was: "+estimate);
33 | }
34 | //System.err.printf("%d < %d < %d\n", expMin, estimate, expMax);
35 | }
36 | }
37 |
38 | // as per [compress-lzf#43]
39 | @Test
40 | public void testSmallSizeEstimate()
41 | {
42 | // and here we ensure that specific uncompressable case won't fail
43 | byte[] in = new byte[] {0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0};
44 | int outSize = LZFEncoder.estimateMaxWorkspaceSize(in.length);
45 | LZFEncoder.appendEncoded(in, 0, in.length, new byte[outSize], 0);
46 | }
47 |
48 | @Test
49 | public void testCompressableChunksSingle() throws Exception
50 | {
51 | byte[] source = constructFluff(55000);
52 | _testCompressableChunksSingle(source, ChunkEncoderFactory.safeInstance());
53 | _testCompressableChunksSingle(source, ChunkEncoderFactory.optimalInstance());
54 | }
55 |
56 | private void _testCompressableChunksSingle(byte[] source, ChunkEncoder encoder) throws Exception
57 | {
58 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)];
59 | int compLen = LZFEncoder.appendEncoded(encoder, source, 0, source.length, buffer, 0);
60 |
61 | // and make sure we get identical compression
62 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen);
63 | byte[] asBlockStd = LZFEncoder.encode(source);
64 | assertArrayEquals(bufferAsBlock, asBlockStd);
65 |
66 | // then uncompress, verify
67 | byte[] uncomp = uncompress(buffer, 0, compLen);
68 |
69 | assertArrayEquals(source, uncomp);
70 | }
71 |
72 | @Test
73 | public void testCompressableChunksMulti() throws Exception
74 | {
75 | // let's do bit over 256k, to get multiple chunks
76 | byte[] source = constructFluff(4 * 0xFFFF + 4000);
77 | _testCompressableChunksMulti(source, ChunkEncoderFactory.safeInstance());
78 | _testCompressableChunksMulti(source, ChunkEncoderFactory.optimalInstance());
79 | }
80 |
81 | private void _testCompressableChunksMulti(byte[] source, ChunkEncoder encoder) throws Exception
82 | {
83 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)];
84 | int compLen = LZFEncoder.appendEncoded(encoder, source, 0, source.length, buffer, 0);
85 |
86 | // and make sure we get identical compression
87 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen);
88 | byte[] asBlockStd = LZFEncoder.encode(encoder, source, 0, source.length);
89 | assertArrayEquals(bufferAsBlock, asBlockStd);
90 |
91 | // then uncompress, verify
92 | byte[] uncomp = uncompress(buffer, 0, compLen);
93 |
94 | assertArrayEquals(source, uncomp);
95 | }
96 |
97 | @Test
98 | public void testNonCompressableChunksSingle() throws Exception
99 | {
100 | byte[] source = constructUncompressable(4000);
101 | _testNonCompressableChunksSingle(source, ChunkEncoderFactory.safeInstance());
102 | _testNonCompressableChunksSingle(source, ChunkEncoderFactory.optimalInstance());
103 | }
104 |
105 | private void _testNonCompressableChunksSingle(byte[] source, ChunkEncoder encoder) throws Exception
106 | {
107 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)];
108 | int compLen = LZFEncoder.appendEncoded(source, 0, source.length, buffer, 0);
109 |
110 | // and make sure we get identical compression
111 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen);
112 | byte[] asBlockStd = LZFEncoder.encode(encoder, source, 0, source.length);
113 | assertArrayEquals(bufferAsBlock, asBlockStd);
114 |
115 | // then uncompress, verify
116 | byte[] uncomp = uncompress(buffer, 0, compLen);
117 |
118 | assertArrayEquals(source, uncomp);
119 | }
120 |
121 | @Test
122 | public void testConditionalCompression() throws Exception
123 | {
124 | final byte[] input = constructFluff(52000);
125 |
126 | _testConditionalCompression(ChunkEncoderFactory.safeInstance(), input);
127 | _testConditionalCompression(ChunkEncoderFactory.optimalInstance(), input);
128 | }
129 |
130 | private void _testConditionalCompression(ChunkEncoder enc, final byte[] input) throws IOException
131 | {
132 | // double-check expected compression ratio
133 | byte[] comp = enc.encodeChunk(input, 0, input.length).getData();
134 | int pct = (int) (100.0 * comp.length / input.length);
135 | // happens to compress to about 61%, good
136 | assertEquals(61, pct);
137 |
138 | // should be ok if we only require down to 70% compression
139 | byte[] buf = new byte[60000];
140 | int offset = enc.appendEncodedIfCompresses(input, 0.70, 0, input.length, buf, 0);
141 | assertEquals(comp.length, offset);
142 |
143 | // but not to 60%
144 | offset = enc.appendEncodedIfCompresses(input, 0.60, 0, input.length, buf, 0);
145 | assertEquals(-1, offset);
146 |
147 | // // // Second part: OutputStream alternatives
148 |
149 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(60000);
150 | assertTrue(enc.encodeAndWriteChunkIfCompresses(input, 0, input.length, bytes, 0.70));
151 | assertEquals(comp.length, bytes.size());
152 | byte[] output = bytes.toByteArray();
153 | assertArrayEquals(comp, output);
154 |
155 | bytes = new ByteArrayOutputStream(60000);
156 | assertFalse(enc.encodeAndWriteChunkIfCompresses(input, 0, input.length, bytes, 0.60));
157 | assertEquals(0, bytes.size());
158 |
159 | // // // Third part: chunk creation
160 |
161 | LZFChunk chunk = enc.encodeChunkIfCompresses(input, 0, input.length, 0.70);
162 | assertNotNull(chunk);
163 | assertEquals(comp.length, chunk.length());
164 | assertArrayEquals(comp, chunk.getData());
165 |
166 | chunk = enc.encodeChunkIfCompresses(input, 0, input.length, 0.60);
167 | assertNull(chunk);
168 | }
169 |
170 | @Test
171 | public void testUnsafeValidation() {
172 | _testUnsafeValidation(new UnsafeChunkEncoderBE(10));
173 | _testUnsafeValidation(new UnsafeChunkEncoderLE(10));
174 |
175 | }
176 |
177 | private void _testUnsafeValidation(UnsafeChunkEncoder encoder) {
178 | byte[] array = new byte[10];
179 | int goodStart = 2;
180 | int goodEnd = 5;
181 |
182 | assertThrows(NullPointerException.class, () -> encoder.tryCompress(null, goodStart, goodEnd, array, goodStart));
183 | assertThrows(NullPointerException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, null, goodStart));
184 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, -1, goodEnd, array, goodStart));
185 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, array.length + 1, goodEnd, array, goodStart));
186 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodStart - 1, array, goodStart));
187 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, array.length + 1, array, goodStart));
188 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, array, -1));
189 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, array, array.length + 1));
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/src/test/java/com/ning/compress/lzf/TestLZFInputStream.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.*;
4 | import java.nio.charset.StandardCharsets;
5 | import java.util.Random;
6 | import java.security.SecureRandom;
7 |
8 | import com.ning.compress.BaseForTests;
9 | import org.junit.jupiter.api.BeforeEach;
10 | import org.junit.jupiter.api.Test;
11 |
12 | import static org.junit.jupiter.api.Assertions.*;
13 |
14 | public class TestLZFInputStream extends BaseForTests
15 | {
16 | private static final int BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN * 64;
17 | private final byte[] nonEncodableBytesToWrite = new byte[BUFFER_SIZE];
18 | private final byte[] bytesToWrite = new byte[BUFFER_SIZE];
19 | private byte[] nonCompressableBytes;
20 | private final int compressableInputLength = BUFFER_SIZE;
21 | private byte[] compressedBytes;
22 |
23 | @BeforeEach
24 | public void setUp() throws Exception
25 | {
26 | SecureRandom.getInstance("SHA1PRNG").nextBytes(nonEncodableBytesToWrite);
27 | String phrase = "all work and no play make Jack a dull boy";
28 | byte[] bytes = phrase.getBytes(StandardCharsets.UTF_8);
29 | int cursor = 0;
30 | while(cursor <= bytesToWrite.length) {
31 | System.arraycopy(bytes, 0, bytesToWrite, cursor, (bytes.length+cursor < bytesToWrite.length)?bytes.length:bytesToWrite.length-cursor);
32 | cursor += bytes.length;
33 | }
34 | ByteArrayOutputStream nonCompressed = new ByteArrayOutputStream();
35 | OutputStream os = new LZFOutputStream(nonCompressed);
36 | os.write(nonEncodableBytesToWrite);
37 | os.close();
38 | nonCompressableBytes = nonCompressed.toByteArray();
39 |
40 | ByteArrayOutputStream compressed = new ByteArrayOutputStream();
41 | os = new LZFOutputStream(compressed);
42 | os.write(bytesToWrite);
43 | os.close();
44 | compressedBytes = compressed.toByteArray();
45 | }
46 |
47 | @Test
48 | public void testDecompressNonEncodableReadByte() throws IOException {
49 | doDecompressReadByte(nonCompressableBytes, nonEncodableBytesToWrite);
50 | }
51 |
52 | @Test
53 | public void testDecompressNonEncodableReadBlock() throws IOException {
54 | doDecompressReadBlock(nonCompressableBytes, nonEncodableBytesToWrite);
55 | }
56 |
57 | @Test
58 | public void testDecompressEncodableReadByte() throws IOException {
59 | doDecompressReadByte(compressedBytes, bytesToWrite);
60 | }
61 |
62 | @Test
63 | public void testDecompressEncodableReadBlock() throws IOException {
64 | doDecompressReadBlock(compressedBytes, bytesToWrite);
65 | }
66 |
67 | @Test
68 | public void testRead0() throws IOException
69 | {
70 | ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes);
71 | InputStream is = new LZFInputStream(bis);
72 | assertEquals(0, is.available());
73 | byte[] buffer = new byte[65536+23];
74 | int val = is.read(buffer, 0, 0);
75 | // read of 0 or less should return a 0-byte read.
76 | assertEquals(0, val);
77 | val = is.read(buffer, 0, -1);
78 | assertEquals(0, val);
79 | // close should work.
80 | is.close();
81 | }
82 |
83 | @Test
84 | public void testAvailable() throws IOException
85 | {
86 | ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes);
87 | LZFInputStream is = new LZFInputStream(bis);
88 | assertSame(bis, is.getUnderlyingInputStream());
89 | assertEquals(0, is.available());
90 | // read one byte; should decode bunch more, make available
91 | assertNotEquals(-1, is.read());
92 | int total = 1; // since we read one byte already
93 | assertEquals(65534, is.available());
94 | // and after we skip through all of it, end with -1 for EOF
95 | long count;
96 | while ((count = is.skip(16384L)) > 0L) {
97 | total += (int) count;
98 | }
99 | // nothing more available; but we haven't yet closed so:
100 | assertEquals(0, is.available());
101 | // and then we close it:
102 | is.close();
103 | assertEquals(0, is.available());
104 | assertEquals(compressableInputLength, total);
105 | }
106 |
107 | @Test void testIncrementalWithFullReads() throws IOException {
108 | doTestIncremental(true);
109 | }
110 |
111 | @Test void testIncrementalWithMinimalReads() throws IOException {
112 | doTestIncremental(false);
113 | }
114 |
115 | @Test
116 | public void testReadAndWrite() throws Exception
117 | {
118 | byte[] fluff = constructFluff(132000);
119 | byte[] comp = LZFEncoder.encode(fluff);
120 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(fluff.length);
121 | LZFInputStream in = new LZFInputStream(new ByteArrayInputStream(comp));
122 | in.readAndWrite(bytes);
123 | in.close();
124 | byte[] actual = bytes.toByteArray();
125 | assertArrayEquals(fluff, actual);
126 | }
127 |
128 | // Mostly for [Issue#19]
129 | @Test
130 | public void testLongSkips() throws Exception
131 | {
132 | // 64k per block, 200k gives 3 full, one small
133 | byte[] fluff = constructFluff(200000);
134 | byte[] comp = LZFEncoder.encode(fluff);
135 |
136 | // we get about 200k, maybe byte or two more, so:
137 | final int LENGTH = fluff.length;
138 |
139 | LZFInputStream in = new LZFInputStream(new ByteArrayInputStream(comp));
140 | // read one byte for fun
141 | assertEquals(fluff[0] & 0xFF, in.read());
142 | // then skip all but one
143 | long amt = in.skip(LENGTH-2);
144 | assertEquals(LENGTH-2, amt);
145 | assertEquals(fluff[LENGTH-1] & 0xFF, in.read());
146 |
147 | assertEquals(-1, in.read());
148 | in.close();
149 | }
150 |
151 | /*
152 | ///////////////////////////////////////////////////////////////////
153 | // Helper methods
154 | ///////////////////////////////////////////////////////////////////
155 | */
156 |
157 | /**
158 | * Test that creates a longer piece of content, compresses it, and reads
159 | * back in arbitrary small reads.
160 | */
161 | private void doTestIncremental(boolean fullReads) throws IOException
162 | {
163 | // first need to compress something...
164 | String[] words = new String[] { "what", "ever", "some", "other", "words", "too" };
165 | StringBuilder sb = new StringBuilder(258000);
166 | Random rnd = new Random(123);
167 | while (sb.length() < 256000) {
168 | int i = (rnd.nextInt() & 31);
169 | if (i < words.length) {
170 | sb.append(words[i]);
171 | } else {
172 | sb.append(i);
173 | }
174 | }
175 | byte[] uncomp = sb.toString().getBytes(StandardCharsets.UTF_8);
176 | ByteArrayOutputStream bytes = new ByteArrayOutputStream();
177 | LZFOutputStream lzOut = new LZFOutputStream(bytes);
178 | lzOut.write(uncomp);
179 | lzOut.close();
180 | byte[] comp = bytes.toByteArray();
181 |
182 | // read back, in chunks
183 | bytes = new ByteArrayOutputStream(uncomp.length);
184 | byte[] buffer = new byte[500];
185 | LZFInputStream lzIn = new LZFInputStream(new ByteArrayInputStream(comp), fullReads);
186 | int pos = 0;
187 |
188 | while (true) {
189 | int len = 1 + ((rnd.nextInt() & 0x7FFFFFFF) % buffer.length);
190 | int offset = buffer.length - len;
191 |
192 | int count = lzIn.read(buffer, offset, len);
193 | if (count < 0) {
194 | break;
195 | }
196 | if (count > len) {
197 | fail("Requested "+len+" bytes (offset "+offset+", array length "+buffer.length+"), got "+count);
198 | }
199 | pos += count;
200 | // with full reads, ought to get full results
201 | if (count != len) {
202 | if (fullReads) {
203 | // Except at the end, with last incomplete chunk
204 | if (pos != uncomp.length) {
205 | fail("Got partial read (when requested full read!), position "+pos+" (of full "+uncomp.length+")");
206 | }
207 | }
208 | }
209 | bytes.write(buffer, offset, count);
210 | }
211 | byte[] result = bytes.toByteArray();
212 | assertArrayEquals(uncomp, result);
213 | lzIn.close();
214 | }
215 |
216 | private void doDecompressReadByte(byte[] bytes, byte[] reference) throws IOException
217 | {
218 | ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
219 | InputStream is = new LZFInputStream(bis);
220 | int i = 0;
221 | int testVal;
222 | while((testVal=is.read()) != -1) {
223 | int rVal = ((int)reference[i]) & 255;
224 | assertEquals(rVal, testVal);
225 | ++i;
226 | }
227 | is.close();
228 | }
229 |
230 | private void doDecompressReadBlock(byte[] bytes, byte[] reference) throws IOException
231 | {
232 | ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
233 | int outputBytes = 0;
234 | InputStream is = new LZFInputStream(bis);
235 | int val;
236 | byte[] buffer = new byte[65536+23];
237 | while((val=is.read(buffer)) != -1) {
238 | for(int i = 0; i < val; i++) {
239 | byte testVal = buffer[i];
240 | assertEquals(reference[outputBytes], testVal);
241 | ++outputBytes;
242 | }
243 | }
244 | assertEquals(reference.length, outputBytes);
245 | is.close();
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/LZFCompressingInputStream.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 |
6 | import com.ning.compress.BufferRecycler;
7 | import com.ning.compress.lzf.util.ChunkEncoderFactory;
8 |
9 | /**
10 | * Decorator {@link InputStream} implementation used for
11 | * reading uncompressed data
12 | * and compressing it on the fly, such that reads return compressed
13 | * data.
14 | * It is reverse of {@link LZFInputStream} (which instead uncompresses data).
15 | *
16 | * @author Tatu Saloranta
17 | *
18 | * @see com.ning.compress.lzf.LZFInputStream
19 | */
20 | public class LZFCompressingInputStream extends InputStream
21 | {
22 | private final BufferRecycler _recycler;
23 |
24 | private ChunkEncoder _encoder;
25 |
26 | /**
27 | * Stream used for reading data to be compressed
28 | */
29 | protected final InputStream _inputStream;
30 |
31 | /**
32 | * Flag that indicates if we have already called 'inputStream.close()'
33 | * (to avoid calling it multiple times)
34 | */
35 | protected boolean _inputStreamClosed;
36 |
37 | /**
38 | * Flag that indicates whether we force full reads (reading of as many
39 | * bytes as requested), or 'optimal' reads (up to as many as available,
40 | * but at least one). Default is false, meaning that 'optimal' read
41 | * is used.
42 | */
43 | protected boolean _cfgFullReads = false;
44 |
45 | /**
46 | * Buffer in which uncompressed input is first read, before getting
47 | * encoded in {@link #_encodedBytes}.
48 | */
49 | protected byte[] _inputBuffer;
50 |
51 | /**
52 | * Buffer that contains compressed data that is returned to readers.
53 | */
54 | protected byte[] _encodedBytes;
55 |
56 | /**
57 | * The current position (next char to output) in the uncompressed bytes buffer.
58 | */
59 | protected int _bufferPosition = 0;
60 |
61 | /**
62 | * Length of the current uncompressed bytes buffer
63 | */
64 | protected int _bufferLength = 0;
65 |
66 | /**
67 | * Number of bytes read from the underlying {@link #_inputStream}
68 | */
69 | protected int _readCount = 0;
70 |
71 | /*
72 | ///////////////////////////////////////////////////////////////////////
73 | // Construction, configuration
74 | ///////////////////////////////////////////////////////////////////////
75 | */
76 |
77 | public LZFCompressingInputStream(InputStream in)
78 | {
79 | this(null, in, BufferRecycler.instance());
80 | }
81 |
82 | public LZFCompressingInputStream(final ChunkEncoder encoder, InputStream in)
83 | {
84 | this(encoder, in, null);
85 | }
86 |
87 | public LZFCompressingInputStream(final ChunkEncoder encoder, InputStream in, BufferRecycler bufferRecycler)
88 | {
89 | // may be passed by caller, or could be null
90 | _encoder = encoder;
91 | _inputStream = in;
92 | if (bufferRecycler==null) {
93 | bufferRecycler = (encoder!=null) ? _encoder._recycler : BufferRecycler.instance();
94 | }
95 | _recycler = bufferRecycler;
96 | _inputBuffer = bufferRecycler.allocInputBuffer(LZFChunk.MAX_CHUNK_LEN);
97 | // let's not yet allocate encoding buffer; don't know optimal size
98 | }
99 |
100 | /**
101 | * Method that can be used define whether reads should be "full" or
102 | * "optimal": former means that full compressed blocks are read right
103 | * away as needed, optimal that only smaller chunks are read at a time,
104 | * more being read as needed.
105 | */
106 | public void setUseFullReads(boolean b) {
107 | _cfgFullReads = b;
108 | }
109 |
110 | /*
111 | ///////////////////////////////////////////////////////////////////////
112 | // InputStream implementation
113 | ///////////////////////////////////////////////////////////////////////
114 | */
115 |
116 | @Override
117 | public int available()
118 | {
119 | if (_inputStreamClosed) { // javadocs suggest 0 for closed as well (not -1)
120 | return 0;
121 | }
122 | int left = (_bufferLength - _bufferPosition);
123 | return (left <= 0) ? 0 : left;
124 | }
125 |
126 | @Override
127 | public int read() throws IOException
128 | {
129 | if (!readyBuffer()) {
130 | return -1;
131 | }
132 | return _encodedBytes[_bufferPosition++] & 255;
133 | }
134 |
135 | @Override
136 | public int read(final byte[] buffer) throws IOException
137 | {
138 | return read(buffer, 0, buffer.length);
139 | }
140 |
141 | @Override
142 | public int read(final byte[] buffer, int offset, int length) throws IOException
143 | {
144 | if (length < 1) {
145 | return 0;
146 | }
147 | if (!readyBuffer()) {
148 | return -1;
149 | }
150 | // First let's read however much data we happen to have...
151 | int chunkLength = Math.min(_bufferLength - _bufferPosition, length);
152 | System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength);
153 | _bufferPosition += chunkLength;
154 |
155 | if (chunkLength == length || !_cfgFullReads) {
156 | return chunkLength;
157 | }
158 | // Need more data, then
159 | int totalRead = chunkLength;
160 | do {
161 | offset += chunkLength;
162 | if (!readyBuffer()) {
163 | break;
164 | }
165 | chunkLength = Math.min(_bufferLength - _bufferPosition, (length - totalRead));
166 | System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength);
167 | _bufferPosition += chunkLength;
168 | totalRead += chunkLength;
169 | } while (totalRead < length);
170 |
171 | return totalRead;
172 | }
173 |
174 | @Override
175 | public void close() throws IOException
176 | {
177 | _bufferPosition = _bufferLength = 0;
178 | byte[] buf = _encodedBytes;
179 | if (buf != null) {
180 | _encodedBytes = null;
181 | _recycler.releaseEncodeBuffer(buf);
182 | }
183 | if (_encoder != null) {
184 | _encoder.close();
185 | }
186 | _closeInput();
187 | }
188 |
189 | private void _closeInput() throws IOException
190 | {
191 | byte[] buf = _inputBuffer;
192 | if (buf != null) {
193 | _inputBuffer = null;
194 | _recycler.releaseInputBuffer(buf);
195 | }
196 | if (!_inputStreamClosed) {
197 | _inputStreamClosed = true;
198 | _inputStream.close();
199 | }
200 | }
201 |
202 | /**
203 | * Overridden to just skip at most a single chunk at a time
204 | */
205 | @Override
206 | public long skip(long n) throws IOException
207 | {
208 | if (_inputStreamClosed) {
209 | return -1;
210 | }
211 | int left = (_bufferLength - _bufferPosition);
212 | // if none left, must read more:
213 | if (left <= 0) {
214 | // otherwise must read more to skip...
215 | int b = read();
216 | if (b < 0) { // EOF
217 | return -1;
218 | }
219 | // push it back to get accurate skip count
220 | --_bufferPosition;
221 | left = (_bufferLength - _bufferPosition);
222 | }
223 | // either way, just skip whatever we have decoded
224 | if (left > n) {
225 | left = (int) n;
226 | }
227 | _bufferPosition += left;
228 | return left;
229 | }
230 | /*
231 | ///////////////////////////////////////////////////////////////////////
232 | // Internal methods
233 | ///////////////////////////////////////////////////////////////////////
234 | */
235 |
236 | /**
237 | * Fill the uncompressed bytes buffer by reading the underlying inputStream.
238 | * @throws IOException
239 | */
240 | protected boolean readyBuffer() throws IOException
241 | {
242 | if (_bufferPosition < _bufferLength) {
243 | return true;
244 | }
245 | if (_inputStreamClosed) {
246 | return false;
247 | }
248 | // Ok: read as much as we can from input source first
249 | int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length);
250 | if (count < 0) { // if no input read, it's EOF
251 | _closeInput(); // and we can close input source as well
252 | return false;
253 | }
254 | int chunkLength = count;
255 | int left = _inputBuffer.length - count;
256 |
257 | while ((count = _inputStream.read(_inputBuffer, chunkLength, left)) > 0) {
258 | chunkLength += count;
259 | left -= count;
260 | if (left < 1) {
261 | break;
262 | }
263 | }
264 |
265 | _bufferPosition = 0;
266 | // Ok: if we don't yet have an encoder (and buffer for it), let's get one
267 | if (_encoder == null) {
268 | // need 7 byte header, plus regular max buffer size:
269 | int bufferLen = chunkLength + ((chunkLength + 31) >> 5) + 7;
270 | _encoder = ChunkEncoderFactory.optimalNonAllocatingInstance(bufferLen, _recycler);
271 | }
272 | if (_encodedBytes == null) {
273 | int bufferLen = chunkLength + ((chunkLength + 31) >> 5) + 7;
274 | _encodedBytes = _recycler.allocEncodingBuffer(bufferLen);
275 | }
276 | // offset of 7 so we can prepend header as necessary
277 | int encodeEnd = _encoder.tryCompress(_inputBuffer, 0, chunkLength, _encodedBytes, 7);
278 | // but did it compress?
279 | if (encodeEnd < (chunkLength + 5)) { // yes! (compared to 5 byte uncomp prefix, data)
280 | // prepend header in situ
281 | LZFChunk.appendCompressedHeader(chunkLength, encodeEnd-7, _encodedBytes, 0);
282 | _bufferLength = encodeEnd;
283 | } else { // no -- so sad...
284 | int ptr = LZFChunk.appendNonCompressedHeader(chunkLength, _encodedBytes, 0);
285 | // TODO: figure out a way to avoid this copy; need a header
286 | System.arraycopy(_inputBuffer, 0, _encodedBytes, ptr, chunkLength);
287 | _bufferLength = ptr + chunkLength;
288 | }
289 | if (count < 0) { // did we get end-of-input?
290 | _closeInput();
291 | }
292 | return true;
293 | }
294 | }
295 |
--------------------------------------------------------------------------------
/src/main/java/com/ning/compress/lzf/LZFOutputStream.java:
--------------------------------------------------------------------------------
1 | package com.ning.compress.lzf;
2 |
3 | import java.io.FilterOutputStream;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 | import java.io.OutputStream;
7 | import java.nio.ByteBuffer;
8 | import java.nio.MappedByteBuffer;
9 | import java.nio.channels.FileChannel;
10 | import java.nio.channels.FileChannel.MapMode;
11 | import java.nio.channels.WritableByteChannel;
12 |
13 | import com.ning.compress.BufferRecycler;
14 | import com.ning.compress.lzf.util.ChunkEncoderFactory;
15 |
16 | /**
17 | * Decorator {@link OutputStream} implementation that will compress
18 | * output using LZF compression algorithm, given uncompressed input
19 | * to write. Its counterpart is {@link LZFInputStream}; although
20 | * in some ways {@link LZFCompressingInputStream} can be seen
21 | * as the opposite.
22 | *
23 | * @author jon hartlaub
24 | * @author Tatu Saloranta
25 | *
26 | * @see LZFInputStream
27 | * @see LZFCompressingInputStream
28 | */
29 | public class LZFOutputStream extends FilterOutputStream implements WritableByteChannel
30 | {
31 | private static final int DEFAULT_OUTPUT_BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN;
32 |
33 | private final ChunkEncoder _encoder;
34 | private final BufferRecycler _recycler;
35 |
36 | protected byte[] _outputBuffer;
37 | protected int _position = 0;
38 |
39 | /**
40 | * Configuration setting that governs whether basic 'flush()' should
41 | * first complete a block or not.
42 | *flush method does nothing.
194 | */
195 | @Override
196 | public void flush() throws IOException
197 | {
198 | checkNotClosed();
199 | }
200 |
201 | @Override
202 | public boolean isOpen() {
203 | return ! _outputStreamClosed;
204 | }
205 |
206 | @Override
207 | public void close() throws IOException
208 | {
209 | if (!_outputStreamClosed) {
210 | if (_position > 0) {
211 | writeCompressedBlock();
212 | }
213 | byte[] buf = _outputBuffer;
214 | if (buf != null) {
215 | blockManager.releaseBlockToPool(_outputBuffer);
216 | _outputBuffer = null;
217 | }
218 | writeExecutor.shutdown();
219 | try {
220 | writeExecutor.awaitTermination(1, TimeUnit.HOURS);
221 | // at this point compressExecutor should have no pending tasks: cleanup ThreadLocal's
222 | // we don't know how many threads; go to the max for now. This will change once we get a proper configuration bean.
223 | int maxThreads = Runtime.getRuntime().availableProcessors();
224 | Collection