├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── src ├── test │ ├── resources │ │ ├── binary │ │ │ ├── help.bin │ │ │ └── word.doc │ │ ├── shakespeare.tar │ │ └── shakespeare │ │ │ └── play.dtd │ └── java │ │ ├── com │ │ └── ning │ │ │ └── compress │ │ │ ├── lzf │ │ │ ├── TestLZFCompressingInputStream.java │ │ │ ├── util │ │ │ │ └── TestFileStreams.java │ │ │ ├── ManualTestLZF.java │ │ │ ├── TestLZFUncompressor.java │ │ │ ├── TestLZFOutputStream.java │ │ │ ├── TestLZFDecoder.java │ │ │ ├── TestLZFRoundTrip.java │ │ │ ├── LZFEncoderTest.java │ │ │ ├── TestLZFInputStream.java │ │ │ └── TestFuzzUnsafeLZF.java │ │ │ ├── gzip │ │ │ ├── TestGzipStreams.java │ │ │ └── TestGzipUncompressor.java │ │ │ └── BaseForTests.java │ │ └── perf │ │ ├── ManualSkipComparison.java │ │ ├── ManualUncompressComparison.java │ │ └── ManualUnsafePerf.java ├── main │ ├── java │ │ └── com │ │ │ └── ning │ │ │ └── compress │ │ │ ├── lzf │ │ │ ├── util │ │ │ │ ├── package-info.java │ │ │ │ ├── ChunkDecoderFactory.java │ │ │ │ └── ChunkEncoderFactory.java │ │ │ ├── impl │ │ │ │ ├── package-info.java │ │ │ │ ├── UnsafeChunkEncoders.java │ │ │ │ ├── VanillaChunkEncoder.java │ │ │ │ ├── UnsafeChunkEncoderBE.java │ │ │ │ ├── UnsafeChunkEncoderLE.java │ │ │ │ └── UnsafeChunkEncoder.java │ │ │ ├── package-info.java │ │ │ ├── parallel │ │ │ │ ├── package-info.java │ │ │ │ ├── WriteTask.java │ │ │ │ ├── BlockManager.java │ │ │ │ ├── CompressTask.java │ │ │ │ └── PLZFOutputStream.java │ │ │ ├── LZFException.java │ │ │ ├── LZF.java │ │ │ ├── LZFDecoder.java │ │ │ ├── LZFChunk.java │ │ │ ├── LZFCompressingInputStream.java │ │ │ └── LZFOutputStream.java │ │ │ ├── package-info.java │ │ │ ├── gzip │ │ │ ├── package-info.java │ │ │ ├── GZIPException.java │ │ │ ├── GZIPRecycler.java │ │ │ └── OptimizedGZIPOutputStream.java │ │ │ ├── CompressionFormatException.java │ │ │ ├── DataHandler.java │ │ │ ├── UncompressorOutputStream.java │ │ │ ├── Uncompressor.java │ │ │ └── BufferRecycler.java │ └── resources │ │ └── META-INF │ │ ├── LICENSE │ │ └── NOTICE └── moditect │ └── module-info.java ├── run-skip ├── run-comp-perf ├── run-uncomp-perf ├── .gitattributes ├── profile-skip ├── profile-comp-perf ├── profile-uncomp-perf ├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── VERSION.txt └── README.md /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /src/test/resources/binary/help.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/binary/help.bin -------------------------------------------------------------------------------- /src/test/resources/binary/word.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/binary/word.doc -------------------------------------------------------------------------------- /src/test/resources/shakespeare.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ning/compress/HEAD/src/test/resources/shakespeare.tar -------------------------------------------------------------------------------- /run-skip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualSkipComparison \ 6 | $* 7 | -------------------------------------------------------------------------------- /run-comp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx200m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualCompressComparison \ 6 | $* 7 | 8 | -------------------------------------------------------------------------------- /run-uncomp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx200m -server \ 4 | -cp target/classes:target/test-classes \ 5 | perf.ManualUncompressComparison \ 6 | $* 7 | 8 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains helper classes uses by LZF codec. 3 | */ 4 | 5 | package com.ning.compress.lzf.util; 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Mark all Jazzer inputs as binary, to avoid bytes in them being misinterpreted as line terminators and being 2 | # changed on checkout 3 | /src/test/resources/**/*Inputs/** binary 4 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains part of public API that is shared between all different 3 | compression codecs. 4 | */ 5 | 6 | package com.ning.compress; 7 | -------------------------------------------------------------------------------- /profile-skip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualSkipComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /profile-comp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualCompressComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /profile-uncomp-perf: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -Xmx64m -server \ 4 | -cp target/classes:target/test-classes \ 5 | -Xrunhprof:cpu=samples,depth=10,verbose=n,interval=2 \ 6 | perf.ManualUncompressComparison \ 7 | $* 8 | 9 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains implementation classes that are not part 3 | of public interface of LZF codec. 4 | */ 5 | 6 | package com.ning.compress.lzf.impl; 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | groups: 8 | github-actions: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains public API of the LZF codec, as well as some 3 | of the implementation (specifically parts that are designed to be overridable). 4 | */ 5 | 6 | package com.ning.compress.lzf; 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | /.idea 3 | *.ipr 4 | *.iws 5 | *.log 6 | .DS_Store 7 | .classpath 8 | .settings 9 | .project 10 | target 11 | pom.xml.releaseBackup 12 | release.properties 13 | *~ 14 | temp-testng-customsuite.xml 15 | test-output 16 | .externalToolBuilders 17 | server/logs 18 | runtime 19 | logs 20 | 21 | # Jazzer fuzzing corpus 22 | /.cifuzz-corpus/ 23 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/LICENSE: -------------------------------------------------------------------------------- 1 | This copy of Compress-LZF library is licensed under the 2 | Apache (Software) License, version 2.0 ("the License"). 3 | See the License for details about distribution rights, and the 4 | specific rights regarding derivate works. 5 | 6 | You may obtain a copy of the License at: 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | -------------------------------------------------------------------------------- /src/moditect/module-info.java: -------------------------------------------------------------------------------- 1 | // Hand-crafted 06-Jan-2021 by tatu.saloranta@iki.fi 2 | module com.ning.compress.lzf { 3 | requires transitive java.xml; 4 | requires jdk.unsupported; 5 | 6 | exports com.ning.compress; 7 | exports com.ning.compress.gzip; 8 | exports com.ning.compress.lzf; 9 | // Not sure if this needs to be exported but... 10 | exports com.ning.compress.lzf.impl; 11 | exports com.ning.compress.lzf.parallel; 12 | exports com.ning.compress.lzf.util; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains parallel implementation of LZF compressor: granularity 3 | is at chunk-level, so that each processing thread operates on a single chunk 4 | at a time (and conversely, no chunk is "split" across threads). 5 |

6 | The main abstraction to use is {@link com.ning.compress.lzf.parallel.PLZFOutputStream}, 7 | which orchestrates operation of multi-thread compression. 8 | */ 9 | 10 | package com.ning.compress.lzf.parallel; 11 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/gzip/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | Package that contains optimized stream implementations for working 3 | with GZIP. Internally JDK provided efficient ZLIB codec is used for 4 | actual encoding and decoding. 5 | Code here 6 | adds appropriate reuse to specifically improve handling of relatively 7 | short compressed data; and may also have better support for alternate 8 | operating modes such as "push-style" handling that is needed for 9 | non-blocking ("async") stream processing. 10 | */ 11 | 12 | package com.ning.compress.gzip; 13 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZFException.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import com.ning.compress.CompressionFormatException; 4 | 5 | public class LZFException extends CompressionFormatException 6 | { 7 | private static final long serialVersionUID = 1L; 8 | 9 | public LZFException(String message) { 10 | super(message); 11 | } 12 | 13 | public LZFException(Throwable t) { 14 | super(t); 15 | } 16 | 17 | public LZFException(String message, Throwable t) { 18 | super(message, t); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/gzip/GZIPException.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.gzip; 2 | 3 | import com.ning.compress.CompressionFormatException; 4 | 5 | public class GZIPException extends CompressionFormatException 6 | { 7 | private static final long serialVersionUID = 1L; 8 | 9 | public GZIPException(String message) { 10 | super(message); 11 | } 12 | 13 | public GZIPException(Throwable t) { 14 | super(t); 15 | } 16 | 17 | public GZIPException(String message, Throwable t) { 18 | super(message, t); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2009-2010 Ning, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | use this file except in compliance with the License. You may obtain a copy of 5 | the License at http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS,WITHOUT 9 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | License for the specific language governing permissions and limitations under 11 | the License. -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/CompressionFormatException.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.io.IOException; 4 | 5 | /** 6 | * Base exception used by compression codecs when encountering a problem 7 | * with underlying data format, usually due to data corruption. 8 | */ 9 | public class CompressionFormatException extends IOException 10 | { 11 | private static final long serialVersionUID = 1L; 12 | 13 | protected CompressionFormatException(String message) { 14 | super(message); 15 | } 16 | 17 | protected CompressionFormatException(Throwable t) { 18 | super(); 19 | initCause(t); 20 | } 21 | 22 | protected CompressionFormatException(String message, Throwable t) { 23 | super(message); 24 | initCause(t); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/NOTICE: -------------------------------------------------------------------------------- 1 | # Compress LZF 2 | 3 | This library contains efficient implementation of LZF compression format, 4 | as well as additional helper classes that build on JDK-provided gzip (deflat) 5 | codec. 6 | 7 | ## Licensing 8 | 9 | Library is licensed under Apache License 2.0, as per accompanying LICENSE file. 10 | 11 | ## Credit 12 | 13 | Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi). 14 | It was started at Ning, inc., as an official Open Source process used by 15 | platform backend, but after initial versions has been developed outside of 16 | Ning by supporting community. 17 | 18 | Other contributors include: 19 | 20 | * Jon Hartlaub (first versions of streaming reader/writer; unit tests) 21 | * Cedrik Lime: parallel LZF implementation 22 | 23 | Various community members have contributed bug reports, and suggested minor 24 | fixes; these can be found from file "VERSION.txt" in SCM. 25 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.6/apache-maven-3.8.6-bin.zip 18 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.0/maven-wrapper-3.1.0.jar 19 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/WriteTask.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.parallel; 2 | 3 | import java.io.OutputStream; 4 | import java.util.concurrent.Future; 5 | 6 | import com.ning.compress.lzf.LZFChunk; 7 | 8 | /** 9 | * @author Cédrik LIME 10 | */ 11 | class WriteTask implements Runnable { 12 | private final OutputStream output; 13 | private final Future lzfFuture; 14 | private final PLZFOutputStream caller; 15 | 16 | public WriteTask(OutputStream output, Future lzfFuture, PLZFOutputStream caller) { 17 | super(); 18 | this.output = output; 19 | this.lzfFuture = lzfFuture; 20 | this.caller = caller; 21 | } 22 | 23 | /** {@inheritDoc} */ 24 | @Override 25 | public void run() { 26 | try { 27 | LZFChunk lzfChunk = lzfFuture.get(); 28 | while (lzfChunk != null) { 29 | output.write(lzfChunk.getData()); 30 | lzfChunk = lzfChunk.next(); 31 | } 32 | } catch (Exception e) { 33 | caller.writeException = e; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/BlockManager.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.parallel; 2 | 3 | import java.util.concurrent.BlockingDeque; 4 | import java.util.concurrent.LinkedBlockingDeque; 5 | 6 | /** 7 | * @author Cédrik LIME 8 | */ 9 | class BlockManager { 10 | /* used as a blocking Stack (FIFO) */ 11 | private final BlockingDeque blockPool; 12 | 13 | public BlockManager(int blockPoolSize, int blockSize) { 14 | // log.debug("Using block pool size of " + blockPoolSize); 15 | blockPool = new LinkedBlockingDeque(blockPoolSize); 16 | for (int i = 0; i < blockPoolSize; ++i) { 17 | blockPool.addFirst(new byte[blockSize]); 18 | } 19 | } 20 | 21 | public byte[] getBlockFromPool() { 22 | byte[] block = null; 23 | try { 24 | block = blockPool.takeFirst(); 25 | } catch (InterruptedException e) { 26 | throw new RuntimeException(e); 27 | } 28 | return block; 29 | } 30 | 31 | public void releaseBlockToPool(byte[] block) { 32 | // Arrays.fill(block, (byte)0); 33 | try { 34 | blockPool.putLast(block); 35 | } catch (InterruptedException e) { 36 | throw new RuntimeException(e); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/test/resources/shakespeare/play.dtd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/DataHandler.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.io.IOException; 4 | 5 | /** 6 | * Interface used by {@link Uncompressor} implementations: receives 7 | * uncompressed data and processes it appropriately. 8 | */ 9 | public interface DataHandler 10 | { 11 | /** 12 | * Method called with uncompressed data as it becomes available. 13 | *

14 | * NOTE: return value was added (from void to boolean) in 0.9.9 15 | * 16 | * @return True, if caller should process and feed more data; false if 17 | * caller is not interested in more data and processing should be terminated 18 | * (and {@link #allDataHandled} should be called immediately) 19 | */ 20 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException; 21 | 22 | /** 23 | * Method called after last call to {@link #handleData}, for successful 24 | * operation, if and when caller is informed about end of content 25 | * Note that if an exception thrown by {@link #handleData} has caused processing 26 | * to be aborted, this method might not get called. 27 | * Implementation may choose to free resources, flush state, or perform 28 | * validation at this point. 29 | */ 30 | public void allDataHandled() throws IOException; 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/CompressTask.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.parallel; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import com.ning.compress.lzf.ChunkEncoder; 6 | import com.ning.compress.lzf.LZFChunk; 7 | import com.ning.compress.lzf.util.ChunkEncoderFactory; 8 | 9 | /** 10 | * @author Cédrik LIME 11 | */ 12 | class CompressTask implements Callable { 13 | private static final ThreadLocal ENCODER = new ThreadLocal() { 14 | @Override 15 | protected ChunkEncoder initialValue() { 16 | return ChunkEncoderFactory.optimalInstance(); 17 | } 18 | }; 19 | 20 | protected byte[] data; 21 | protected int offset, length; 22 | protected BlockManager blockManager; 23 | 24 | public CompressTask(byte[] input, int offset, int length, BlockManager blockManager) { 25 | super(); 26 | this.data = input; 27 | this.offset = offset; 28 | this.length = length; 29 | this.blockManager = blockManager; 30 | } 31 | public CompressTask(byte[] input, BlockManager blockManager) { 32 | this(input, 0, input.length, blockManager); 33 | } 34 | 35 | /** {@inheritDoc} */ 36 | @Override 37 | public LZFChunk call() { 38 | if (data != null) { 39 | LZFChunk lzfChunk = ENCODER.get().encodeChunk(data, offset, length); 40 | // input data is fully processed, we can now discard it 41 | blockManager.releaseBlockToPool(data); 42 | return lzfChunk; 43 | } else { 44 | // cleanup time! 45 | ENCODER.remove(); 46 | return null; 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/UncompressorOutputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.io.*; 4 | 5 | /** 6 | * Simple wrapper or wrapper around {@link Uncompressor}, to help 7 | * with inter-operability. 8 | */ 9 | public class UncompressorOutputStream extends OutputStream 10 | { 11 | protected final Uncompressor _uncompressor; 12 | 13 | private byte[] _singleByte = null; 14 | 15 | public UncompressorOutputStream(Uncompressor uncomp) 16 | { 17 | _uncompressor = uncomp; 18 | } 19 | 20 | /** 21 | * Call to this method will result in call to 22 | * {@link Uncompressor#complete()}, which is idempotent 23 | * (i.e. can be called multiple times without ill effects). 24 | */ 25 | @Override 26 | public void close() throws IOException { 27 | _uncompressor.complete(); 28 | } 29 | 30 | @Override 31 | public void flush() { } 32 | 33 | @Override 34 | public void write(byte[] b) throws IOException { 35 | _uncompressor.feedCompressedData(b, 0, b.length); 36 | } 37 | 38 | @Override 39 | public void write(byte[] b, int off, int len) throws IOException { 40 | _uncompressor.feedCompressedData(b, off, len); 41 | } 42 | 43 | @Override 44 | public void write(int b) throws IOException 45 | { 46 | if (_singleByte == null) { 47 | _singleByte = new byte[1]; 48 | } 49 | _singleByte[0] = (byte) b; 50 | _uncompressor.feedCompressedData(_singleByte, 0, 1); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFCompressingInputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | 5 | import com.ning.compress.BaseForTests; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 9 | import static org.junit.jupiter.api.Assertions.assertEquals; 10 | 11 | public class TestLZFCompressingInputStream extends BaseForTests 12 | { 13 | @Test 14 | public void testSimpleCompression() throws IOException 15 | { 16 | // produce multiple chunks, about 3 here: 17 | byte[] source = constructFluff(140000); 18 | LZFCompressingInputStream compIn = new LZFCompressingInputStream(new ByteArrayInputStream(source)); 19 | byte[] comp = readAll(compIn); 20 | byte[] uncomp = uncompress(comp); 21 | assertArrayEquals(source, uncomp); 22 | 23 | // and then check that size is about same as with static methods 24 | byte[] comp2 = compress(source); 25 | assertEquals(comp.length, comp2.length); 26 | } 27 | 28 | @Test 29 | public void testSimpleNonCompressed() throws IOException 30 | { 31 | // produce two chunks as well 32 | byte[] source = this.constructUncompressable(89000); 33 | LZFCompressingInputStream compIn = new LZFCompressingInputStream(new ByteArrayInputStream(source)); 34 | byte[] comp = readAll(compIn); 35 | // 2 non-compressed chunks with headers: 36 | assertEquals(89000 + 5 + 5, comp.length); 37 | byte[] uncomp = uncompress(comp); 38 | assertArrayEquals(source, uncomp); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/Uncompressor.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.io.IOException; 4 | 5 | /** 6 | * Abstract class that defines "push" style API for various uncompressors 7 | * (aka decompressors or decoders). Implementations are alternatives to stream 8 | * based uncompressors (such as {@link com.ning.compress.lzf.LZFInputStream}) 9 | * in cases where "push" operation is important and/or blocking is not allowed; 10 | * for example, when handling asynchronous HTTP responses. 11 | *

12 | * Note that API does not define the way that listener is attached: this is 13 | * typically passed through to constructor of the implementation. 14 | * 15 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 16 | */ 17 | public abstract class Uncompressor 18 | { 19 | /** 20 | * Method called to feed more compressed data to be uncompressed, and 21 | * sent to possible listeners. 22 | *

23 | * NOTE: return value was added (from void to boolean) in 0.9.9 24 | * 25 | * @return True, if caller should process and feed more data; false if 26 | * caller is not interested in more data and processing should be terminated. 27 | * (and {@link #complete} should be called immediately) 28 | */ 29 | public abstract boolean feedCompressedData(byte[] comp, int offset, int len) 30 | throws IOException; 31 | 32 | /** 33 | * Method called to indicate that all data to uncompress has already been fed. 34 | * This typically results in last block of data being uncompressed, and results 35 | * being sent to listener(s); but may also throw an exception if incomplete 36 | * block was passed. 37 | */ 38 | public abstract void complete() throws IOException; 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/util/TestFileStreams.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.util; 2 | 3 | import java.io.*; 4 | import java.nio.charset.StandardCharsets; 5 | import java.nio.file.Path; 6 | 7 | import com.ning.compress.BaseForTests; 8 | import org.junit.jupiter.api.Test; 9 | import org.junit.jupiter.api.io.TempDir; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 12 | import static org.junit.jupiter.api.Assertions.assertEquals; 13 | 14 | public class TestFileStreams extends BaseForTests 15 | { 16 | @TempDir 17 | Path tempDir; 18 | 19 | @Test 20 | public void testStreams() throws Exception 21 | { 22 | File f = tempDir.resolve("lzf-test.lzf").toFile(); 23 | 24 | // First, write encoded stuff (won't compress, but produces something) 25 | byte[] input = "Whatever stuff...".getBytes(StandardCharsets.UTF_8); 26 | 27 | try (LZFFileOutputStream out = new LZFFileOutputStream(f)) { 28 | out.write(input); 29 | } 30 | 31 | long len = f.length(); 32 | // happens to be 22; 17 bytes uncompressed, with 5 byte header 33 | assertEquals(22L, len); 34 | 35 | try (LZFFileInputStream in = new LZFFileInputStream(f)) { 36 | for (byte b : input) { 37 | assertEquals(b & 0xFF, in.read()); 38 | } 39 | assertEquals(-1, in.read()); 40 | } 41 | } 42 | 43 | @Test 44 | public void testReadAndWrite() throws Exception 45 | { 46 | File f = tempDir.resolve("lzf-test.lzf").toFile(); 47 | 48 | byte[] fluff = constructFluff(132000); 49 | try (LZFFileOutputStream fout = new LZFFileOutputStream(f)) { 50 | fout.write(fluff); 51 | } 52 | 53 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(fluff.length); 54 | try (LZFFileInputStream in = new LZFFileInputStream(f)) { 55 | in.readAndWrite(bytes); 56 | } 57 | byte[] actual = bytes.toByteArray(); 58 | assertArrayEquals(fluff, actual); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/gzip/GZIPRecycler.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.gzip; 2 | 3 | import java.lang.ref.SoftReference; 4 | import java.util.zip.Deflater; 5 | import java.util.zip.Inflater; 6 | 7 | /** 8 | * GZIP-codec-specific "extension" to {@link com.ning.compress.BufferRecycler}, 9 | * used for recycling expensive objects. 10 | * 11 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 12 | */ 13 | public final class GZIPRecycler 14 | { 15 | final protected static ThreadLocal> _recyclerRef 16 | = new ThreadLocal>(); 17 | 18 | protected Inflater _inflater; 19 | 20 | protected Deflater _deflater; 21 | 22 | /** 23 | * Accessor to get thread-local recycler instance 24 | */ 25 | public static GZIPRecycler instance() 26 | { 27 | SoftReference ref = _recyclerRef.get(); 28 | GZIPRecycler br = (ref == null) ? null : ref.get(); 29 | if (br == null) { 30 | br = new GZIPRecycler(); 31 | _recyclerRef.set(new SoftReference(br)); 32 | } 33 | return br; 34 | } 35 | 36 | /* 37 | /////////////////////////////////////////////////////////////////////// 38 | // API 39 | /////////////////////////////////////////////////////////////////////// 40 | */ 41 | 42 | public Deflater allocDeflater() 43 | { 44 | Deflater d = _deflater; 45 | if (d == null) { // important: true means 'dont add zlib header'; gzip has its own 46 | d = new Deflater(Deflater.DEFAULT_COMPRESSION, true); 47 | } else { 48 | _deflater = null; 49 | } 50 | return d; 51 | } 52 | 53 | public void releaseDeflater(Deflater d) 54 | { 55 | if (d != null) { 56 | d.reset(); 57 | _deflater = d; 58 | } 59 | } 60 | 61 | public Inflater allocInflater() 62 | { 63 | Inflater i = _inflater; 64 | if (i == null) { // important: true means 'dont add zlib header'; gzip has its own 65 | i = new Inflater(true); 66 | } else { 67 | _inflater = null; 68 | } 69 | return i; 70 | } 71 | 72 | public void releaseInflater(Inflater i) 73 | { 74 | if (i != null) { 75 | i.reset(); 76 | _inflater = i; 77 | } 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/gzip/TestGzipStreams.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.gzip; 2 | 3 | import java.io.*; 4 | import java.nio.charset.StandardCharsets; 5 | import java.util.zip.*; 6 | 7 | import com.ning.compress.BaseForTests; 8 | import org.junit.jupiter.api.Test; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 11 | 12 | public class TestGzipStreams extends BaseForTests 13 | { 14 | private final static String INPUT_STR = "Some somewhat short text string -- but enough repetition to overcome shortness of input"; 15 | private final static byte[] INPUT_BYTES; 16 | static { 17 | try { 18 | INPUT_BYTES = INPUT_STR.getBytes(StandardCharsets.UTF_8); 19 | } catch (Exception e) { 20 | throw new RuntimeException(e); 21 | } 22 | } 23 | 24 | @Test 25 | public void testReusableInputStreams() throws IOException 26 | { 27 | // Create known good gzip via JDK 28 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 29 | GZIPOutputStream comp = new GZIPOutputStream(bytes); 30 | comp.write(INPUT_BYTES); 31 | comp.close(); 32 | 33 | // then decode with 'our' thing, twice: 34 | byte[] raw = bytes.toByteArray(); 35 | OptimizedGZIPInputStream re = new OptimizedGZIPInputStream(new ByteArrayInputStream(raw)); 36 | byte[] b = _readAll(re); 37 | assertArrayEquals(INPUT_BYTES, b); 38 | re.close(); 39 | } 40 | 41 | @Test 42 | public void testReusableOutputStreams() throws IOException 43 | { 44 | // first use custom stream 45 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 46 | OptimizedGZIPOutputStream re = new OptimizedGZIPOutputStream(bytes); 47 | re.write(INPUT_BYTES); 48 | re.close(); 49 | 50 | byte[] raw = bytes.toByteArray(); 51 | byte[] b = _readAll(new GZIPInputStream(new ByteArrayInputStream(raw))); 52 | assertArrayEquals(INPUT_BYTES, b); 53 | } 54 | 55 | private byte[] _readAll(InputStream in) throws IOException 56 | { 57 | byte[] buffer = new byte[1000]; 58 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(1000); 59 | int count; 60 | 61 | while ((count = in.read(buffer)) > 0) { 62 | bytes.write(buffer, 0, count); 63 | } 64 | in.close(); 65 | return bytes.toByteArray(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoders.java: -------------------------------------------------------------------------------- 1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 2 | * file except in compliance with the License. You may obtain a copy of the License at 3 | * 4 | * http://www.apache.org/licenses/LICENSE-2.0 5 | * 6 | * Unless required by applicable law or agreed to in writing, software distributed under 7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 8 | * OF ANY KIND, either express or implied. See the License for the specific language 9 | * governing permissions and limitations under the License. 10 | */ 11 | 12 | package com.ning.compress.lzf.impl; 13 | 14 | import com.ning.compress.BufferRecycler; 15 | import java.nio.ByteOrder; 16 | 17 | 18 | /** 19 | * Class that handles actual encoding of individual chunks. 20 | * Resulting chunks can be compressed or non-compressed; compression 21 | * is only used if it actually reduces chunk size (including overhead 22 | * of additional header bytes) 23 | * 24 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 25 | */ 26 | public final class UnsafeChunkEncoders 27 | { 28 | private final static boolean LITTLE_ENDIAN = (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN); 29 | 30 | public static UnsafeChunkEncoder createEncoder(int totalLength) { 31 | if (LITTLE_ENDIAN) { 32 | return new UnsafeChunkEncoderLE(totalLength); 33 | } 34 | return new UnsafeChunkEncoderBE(totalLength); 35 | } 36 | 37 | public static UnsafeChunkEncoder createNonAllocatingEncoder(int totalLength) { 38 | if (LITTLE_ENDIAN) { 39 | return new UnsafeChunkEncoderLE(totalLength, false); 40 | } 41 | return new UnsafeChunkEncoderBE(totalLength, false); 42 | } 43 | 44 | public static UnsafeChunkEncoder createEncoder(int totalLength, BufferRecycler bufferRecycler) { 45 | if (LITTLE_ENDIAN) { 46 | return new UnsafeChunkEncoderLE(totalLength, bufferRecycler); 47 | } 48 | return new UnsafeChunkEncoderBE(totalLength, bufferRecycler); 49 | } 50 | 51 | public static UnsafeChunkEncoder createNonAllocatingEncoder(int totalLength, BufferRecycler bufferRecycler) { 52 | if (LITTLE_ENDIAN) { 53 | return new UnsafeChunkEncoderLE(totalLength, bufferRecycler, false); 54 | } 55 | return new UnsafeChunkEncoderBE(totalLength, bufferRecycler, false); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | paths-ignore: 7 | - 'README.md' 8 | - 'VERSION.txt' 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | build: 18 | runs-on: 'ubuntu-latest' 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | java_version: ['8', '11', '17', '21'] 23 | env: 24 | JAVA_OPTS: '-XX:+TieredCompilation -XX:TieredStopAtLevel=1' 25 | steps: 26 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 27 | - name: Set up JDK 28 | uses: actions/setup-java@f2beeb24e141e01a676f977032f5a29d81c9e27e # v5.1.0 29 | with: 30 | distribution: 'temurin' 31 | java-version: ${{ matrix.java_version }} 32 | cache: 'maven' 33 | - name: Build 34 | run: ./mvnw -B -q -ff -ntp verify 35 | - name: Generate code coverage 36 | if: ${{ github.event_name != 'pull_request' && matrix.java_version == '8' }} 37 | run: ./mvnw -B -q -ff -ntp test 38 | - name: Publish code coverage 39 | if: ${{ github.event_name != 'pull_request' && matrix.java_version == '8' }} 40 | uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 41 | with: 42 | token: ${{ secrets.CODECOV_TOKEN }} 43 | files: ./target/site/jacoco/jacoco.xml 44 | flags: unittests 45 | 46 | # TODO: Maybe consider caching Jazzer `.cifuzz-corpus` directory if that improves fuzzing performance? 47 | # But could become outdated when fuzz test methods are changed 48 | fuzz: 49 | runs-on: ubuntu-latest 50 | steps: 51 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 52 | - name: Set up JDK 53 | uses: actions/setup-java@f2beeb24e141e01a676f977032f5a29d81c9e27e # v5.1.0 54 | with: 55 | distribution: 'temurin' 56 | java-version: 17 57 | cache: 'maven' 58 | - name: Run tests 59 | id: fuzz-tests 60 | # Don't run with `-q`, to see fuzzing progress 61 | run: ./mvnw -B -ff -ntp --activate-profiles fuzz test 62 | - name: Upload fuzz test inputs 63 | if: always() && steps.fuzz-tests.outcome == 'failure' 64 | uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 65 | with: 66 | name: fuzz-test-inputs 67 | path: src/test/resources/**/*Inputs/** 68 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/ManualTestLZF.java: -------------------------------------------------------------------------------- 1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 2 | * file except in compliance with the License. You may obtain a copy of the License at 3 | * 4 | * http://www.apache.org/licenses/LICENSE-2.0 5 | * 6 | * Unless required by applicable law or agreed to in writing, software distributed under 7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 8 | * OF ANY KIND, either express or implied. See the License for the specific language 9 | * governing permissions and limitations under the License. 10 | */ 11 | 12 | package com.ning.compress.lzf; 13 | 14 | import java.io.*; 15 | 16 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 17 | 18 | /** 19 | * Semi-automatic non-unit test: will use all files on current working 20 | * directory (and its subdirs) for testing that LZF encode+decode 21 | * will correctly round-trip content. 22 | */ 23 | public class ManualTestLZF 24 | { 25 | public static void main(String... args) throws Exception 26 | { 27 | File currDir = new File("").getAbsoluteFile(); 28 | int count = _handleFiles(currDir); 29 | System.out.println("OK: tested with "+count+" files."); 30 | } 31 | 32 | private static int _handleFiles(File dir) throws IOException 33 | { 34 | System.out.println("Testing files from dir '"+dir.getAbsolutePath()+"'..."); 35 | int count = 0; 36 | for (File f : dir.listFiles()) { 37 | if (f.isDirectory()) { 38 | count += _handleFiles(f); 39 | } else { 40 | byte[] data = _readData(f); 41 | byte[] enc = LZFEncoder.encode(data); 42 | byte[] dec = LZFDecoder.decode(enc); 43 | assertArrayEquals(data, dec, "File '"+f.getAbsolutePath()+"'"); 44 | ++count; 45 | } 46 | } 47 | return count; 48 | } 49 | 50 | private static byte[] _readData(File in) throws IOException 51 | { 52 | int len = (int) in.length(); 53 | byte[] result = new byte[len]; 54 | int offset = 0; 55 | FileInputStream fis = new FileInputStream(in); 56 | 57 | while (len > 0) { 58 | int count = fis.read(result, offset, len); 59 | if (count < 0) break; 60 | len -= count; 61 | offset += count; 62 | } 63 | fis.close(); 64 | return result; 65 | } 66 | 67 | } -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/BaseForTests.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.util.Random; 7 | 8 | import com.ning.compress.lzf.LZFDecoder; 9 | import com.ning.compress.lzf.LZFEncoder; 10 | import com.ning.compress.lzf.LZFException; 11 | 12 | public class BaseForTests 13 | { 14 | private final static byte[] ABCD = new byte[] { 'a', 'b', 'c', 'd' }; 15 | 16 | protected byte[] constructFluff(int length) 17 | { 18 | Random rnd = new Random(length); 19 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(length + 100); 20 | while (bytes.size() < length) { 21 | int num = rnd.nextInt(); 22 | switch (num & 3) { 23 | case 0: 24 | try { 25 | bytes.write(ABCD); 26 | } catch (IOException e) { 27 | throw new RuntimeException(e); 28 | } 29 | break; 30 | case 1: 31 | bytes.write(num); 32 | break; 33 | default: 34 | bytes.write((num >> 3) & 0x7); 35 | break; 36 | } 37 | } 38 | return bytes.toByteArray(); 39 | } 40 | 41 | protected byte[] constructUncompressable(int length) 42 | { 43 | byte[] result = new byte[length]; 44 | Random rnd = new Random(length); 45 | // SecureRandom is "more random", but not reproduceable, so use default instead: 46 | // SecureRandom.getInstance("SHA1PRNG").nextBytes(result); 47 | rnd.nextBytes(result); 48 | return result; 49 | } 50 | 51 | protected byte[] readAll(InputStream in) throws IOException 52 | { 53 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(1024); 54 | byte[] buf = new byte[1024]; 55 | int count; 56 | 57 | while ((count = in.read(buf)) > 0) { 58 | bytes.write(buf, 0, count); 59 | } 60 | in.close(); 61 | return bytes.toByteArray(); 62 | } 63 | 64 | protected byte[] compress(byte[] input) { 65 | return LZFEncoder.encode(input); 66 | } 67 | 68 | protected byte[] compress(byte[] input, int offset, int len) { 69 | return LZFEncoder.encode(input, offset, len); 70 | } 71 | 72 | protected byte[] uncompress(byte[] input) throws LZFException { 73 | return LZFDecoder.safeDecode(input); 74 | } 75 | 76 | protected byte[] uncompress(byte[] input, int offset, int len) throws LZFException { 77 | return LZFDecoder.safeDecode(input, offset, len); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/util/ChunkDecoderFactory.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.util; 2 | 3 | import com.ning.compress.lzf.ChunkDecoder; 4 | import com.ning.compress.lzf.impl.VanillaChunkDecoder; 5 | import com.ning.compress.lzf.impl.UnsafeChunkDecoder; 6 | 7 | /** 8 | * Simple helper class used for loading 9 | * {@link ChunkDecoder} implementations, based on criteria 10 | * such as "fastest available". 11 | *

12 | * Yes, it looks butt-ugly, but does the job. Nonetheless, if anyone 13 | * has lipstick for this pig, let me know. 14 | */ 15 | public class ChunkDecoderFactory 16 | { 17 | private final static ChunkDecoderFactory _instance; 18 | static { 19 | Class impl = null; 20 | try { 21 | // first, try loading optimal one, which uses Sun JDK Unsafe... 22 | impl = (Class) Class.forName(UnsafeChunkDecoder.class.getName()); 23 | } catch (Throwable t) { } 24 | if (impl == null) { 25 | impl = VanillaChunkDecoder.class; 26 | } 27 | _instance = new ChunkDecoderFactory(impl); 28 | } 29 | 30 | private final Class _implClass; 31 | 32 | @SuppressWarnings("unchecked") 33 | private ChunkDecoderFactory(Class imp) 34 | { 35 | _implClass = (Class) imp; 36 | } 37 | 38 | /* 39 | /////////////////////////////////////////////////////////////////////// 40 | // Public API 41 | /////////////////////////////////////////////////////////////////////// 42 | */ 43 | 44 | /** 45 | * Method to use for getting decoder instance that uses the most optimal 46 | * available methods for underlying data access. It should be safe to call 47 | * this method as implementations are dynamically loaded; however, on some 48 | * non-standard platforms it may be necessary to either directly load 49 | * instances, or use {@link #safeInstance()}. 50 | */ 51 | public static ChunkDecoder optimalInstance() { 52 | try { 53 | return _instance._implClass.newInstance(); 54 | } catch (Exception e) { 55 | throw new IllegalStateException("Failed to load a ChunkDecoder instance ("+e.getClass().getName()+"): " 56 | +e.getMessage(), e); 57 | } 58 | } 59 | 60 | /** 61 | * Method that can be used to ensure that a "safe" decoder instance is loaded. 62 | * Safe here means that it should work on any and all Java platforms. 63 | */ 64 | public static ChunkDecoder safeInstance() { 65 | // this will always succeed loading; no need to use dynamic class loading or instantiation 66 | return new VanillaChunkDecoder(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/perf/ManualSkipComparison.java: -------------------------------------------------------------------------------- 1 | package perf; 2 | 3 | import java.io.*; 4 | 5 | import com.ning.compress.lzf.*; 6 | import com.ning.compress.lzf.util.LZFFileInputStream; 7 | import com.ning.compress.lzf.util.LZFFileOutputStream; 8 | 9 | /** 10 | * Micro-benchmark for testing performance of skip alternatives. 11 | */ 12 | public class ManualSkipComparison 13 | { 14 | private int size = 0; 15 | 16 | private void test(File file, int origSize) throws Exception 17 | { 18 | // Let's try to guestimate suitable size... to get to 50 megs to process 19 | final int REPS = (int) ((double) (50 * 1000 * 1000) / (double) file.length()); 20 | 21 | System.out.printf("Skipping %d bytes of compressed data, %d reps.\n", 22 | file.length(), REPS); 23 | 24 | int i = 0; 25 | while (true) { 26 | try { Thread.sleep(100L); } catch (InterruptedException ie) { } 27 | int round = (i++ % 2); 28 | 29 | String msg; 30 | boolean lf = (round == 0); 31 | 32 | long msecs; 33 | 34 | switch (round) { 35 | 36 | case 0: 37 | msg = "LZF skip/old"; 38 | msecs = testSkip(REPS, file, false); 39 | break; 40 | case 1: 41 | msg = "LZF skip/NEW"; 42 | msecs = testSkip(REPS, file, true); 43 | break; 44 | default: 45 | throw new Error(); 46 | } 47 | if (lf) { 48 | System.out.println(); 49 | } 50 | System.out.println("Test '"+msg+"' ["+size+" bytes] -> "+msecs+" msecs"); 51 | if (size != origSize) { // sanity check 52 | throw new Error("Wrong skip count!!!"); 53 | } 54 | } 55 | } 56 | 57 | private final long testSkip(int REPS, File file, boolean newSkip) throws Exception 58 | { 59 | long start = System.currentTimeMillis(); 60 | long len = -1L; 61 | 62 | // final byte[] buffer = new byte[16000]; 63 | 64 | while (--REPS >= 0) { 65 | InputStream in = newSkip ? new LZFFileInputStream(file) 66 | : new LZFInputStream(new FileInputStream(file)); 67 | len = 0; 68 | long skipped; 69 | 70 | while ((skipped = in.skip(Integer.MAX_VALUE)) >= 0L) { 71 | len += skipped; 72 | } 73 | in.close(); 74 | } 75 | size = (int) len; 76 | return System.currentTimeMillis() - start; 77 | } 78 | 79 | public static void main(String[] args) throws Exception 80 | { 81 | if (args.length != 1) { 82 | System.err.println("Usage: java ... [file]"); 83 | System.exit(1); 84 | } 85 | File in = new File(args[0]); 86 | System.out.printf("Reading input, %d bytes...\n", in.length()); 87 | File out = File.createTempFile("skip-perf", ".lzf"); 88 | System.out.printf("(writing as file '%s')\n", out.getPath()); 89 | 90 | byte[] buffer = new byte[4000]; 91 | int count; 92 | FileInputStream ins = new FileInputStream(in); 93 | LZFFileOutputStream outs = new LZFFileOutputStream(out); 94 | 95 | while ((count = ins.read(buffer)) > 0) { 96 | outs.write(buffer, 0, count); 97 | } 98 | ins.close(); 99 | outs.close(); 100 | System.out.printf("Compressed as file '%s', %d bytes\n", out.getPath(), out.length()); 101 | 102 | new ManualSkipComparison().test(out, (int) in.length()); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFUncompressor.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | import java.util.Random; 5 | 6 | import com.ning.compress.BaseForTests; 7 | import com.ning.compress.DataHandler; 8 | import com.ning.compress.UncompressorOutputStream; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 12 | 13 | public class TestLZFUncompressor extends BaseForTests 14 | { 15 | @Test 16 | public void testSimpleSmall1by1() throws IOException 17 | { 18 | byte[] fluff = constructFluff(4000); 19 | byte[] comp = LZFEncoder.encode(fluff); 20 | 21 | Collector co = new Collector(); 22 | LZFUncompressor uncomp = new LZFUncompressor(co); 23 | for (int i = 0, end = comp.length; i < end; ++i) { 24 | uncomp.feedCompressedData(comp, i, 1); 25 | } 26 | uncomp.complete(); 27 | byte[] result = co.getBytes(); 28 | 29 | assertArrayEquals(fluff, result); 30 | } 31 | 32 | @Test 33 | public void testSimpleSmallAsChunk() throws IOException 34 | { 35 | byte[] fluff = constructFluff(4000); 36 | byte[] comp = LZFEncoder.encode(fluff); 37 | 38 | // and then uncompress, first byte by bytes 39 | Collector co = new Collector(); 40 | LZFUncompressor uncomp = new LZFUncompressor(co); 41 | uncomp.feedCompressedData(comp, 0, comp.length); 42 | uncomp.complete(); 43 | byte[] result = co.getBytes(); 44 | assertArrayEquals(fluff, result); 45 | } 46 | 47 | @Test 48 | public void testSimpleBiggerVarLength() throws IOException 49 | { 50 | byte[] fluff = constructFluff(190000); 51 | byte[] comp = LZFEncoder.encode(fluff); 52 | 53 | // and then uncompress with arbitrary-sized blocks... 54 | Random rnd = new Random(123); 55 | Collector co = new Collector(); 56 | LZFUncompressor uncomp = new LZFUncompressor(co); 57 | for (int i = 0, end = comp.length; i < end; ) { 58 | int size = Math.min(end-i, 1+rnd.nextInt(7)); 59 | uncomp.feedCompressedData(comp, i, size); 60 | i += size; 61 | } 62 | uncomp.complete(); 63 | byte[] result = co.getBytes(); 64 | 65 | assertArrayEquals(fluff, result); 66 | } 67 | 68 | @Test 69 | public void testSimpleBiggerOneChunk() throws IOException 70 | { 71 | byte[] fluff = constructFluff(275000); 72 | byte[] comp = LZFEncoder.encode(fluff); 73 | 74 | // and then uncompress in one chunk 75 | Collector co = new Collector(); 76 | LZFUncompressor uncomp = new LZFUncompressor(co); 77 | uncomp.feedCompressedData(comp, 0, comp.length); 78 | uncomp.complete(); 79 | byte[] result = co.getBytes(); 80 | 81 | assertArrayEquals(fluff, result); 82 | } 83 | 84 | 85 | @Test 86 | public void testSimpleBiggerAsStream() throws IOException 87 | { 88 | byte[] fluff = constructFluff(277000); 89 | byte[] comp = LZFEncoder.encode(fluff); 90 | Collector co = new Collector(); 91 | UncompressorOutputStream out = new UncompressorOutputStream(new LZFUncompressor(co)); 92 | out.write(comp, 0, comp.length); 93 | out.close(); 94 | byte[] result = co.getBytes(); 95 | 96 | assertArrayEquals(fluff, result); 97 | } 98 | 99 | private final static class Collector implements DataHandler 100 | { 101 | private final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 102 | 103 | @Override 104 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException { 105 | bytes.write(buffer, offset, len); 106 | return true; 107 | } 108 | @Override 109 | public void allDataHandled() throws IOException { } 110 | public byte[] getBytes() { return bytes.toByteArray(); } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZF.java: -------------------------------------------------------------------------------- 1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 2 | * file except in compliance with the License. You may obtain a copy of the License at 3 | * 4 | * http://www.apache.org/licenses/LICENSE-2.0 5 | * 6 | * Unless required by applicable law or agreed to in writing, software distributed under 7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 8 | * OF ANY KIND, either express or implied. See the License for the specific language 9 | * governing permissions and limitations under the License. 10 | */ 11 | 12 | package com.ning.compress.lzf; 13 | 14 | import java.io.*; 15 | 16 | import com.ning.compress.lzf.util.LZFFileInputStream; 17 | import com.ning.compress.lzf.util.LZFFileOutputStream; 18 | 19 | /** 20 | * Simple command-line utility that can be used for testing LZF 21 | * compression, or as rudimentary command-line tool. 22 | * Arguments are the same as used by the "standard" lzf command line tool 23 | * 24 | * @author Tatu Saloranta (tatu@ning.com) 25 | */ 26 | public class LZF 27 | { 28 | public final static String SUFFIX = ".lzf"; 29 | 30 | protected void process(String[] args) throws IOException 31 | { 32 | if (args.length == 2) { 33 | String oper = args[0]; 34 | boolean compress = "-c".equals(oper); 35 | boolean toSystemOutput = !compress && "-o".equals(oper); 36 | if (compress || toSystemOutput || "-d".equals(oper)) { 37 | String filename = args[1]; 38 | File src = new File(filename); 39 | if (!src.exists()) { 40 | System.err.println("File '"+filename+"' does not exist."); 41 | System.exit(1); 42 | } 43 | if (!compress && !filename.endsWith(SUFFIX)) { 44 | System.err.println("File '"+filename+"' does end with expected suffix ('"+SUFFIX+"', won't decompress."); 45 | System.exit(1); 46 | } 47 | 48 | if (compress) { 49 | int inputLength = 0; 50 | File resultFile = new File(filename+SUFFIX); 51 | InputStream in = new FileInputStream(src); 52 | OutputStream out = new LZFFileOutputStream(resultFile); 53 | byte[] buffer = new byte[8192]; 54 | int bytesRead; 55 | while ((bytesRead = in.read(buffer, 0, buffer.length)) != -1) { 56 | inputLength += bytesRead; 57 | out.write(buffer, 0, bytesRead); 58 | } 59 | in.close(); 60 | out.flush(); 61 | out.close(); 62 | System.out.printf("Compressed '%s' into '%s' (%d->%d bytes)\n", 63 | src.getPath(), resultFile.getPath(), 64 | inputLength, resultFile.length()); 65 | } else { 66 | OutputStream out; 67 | LZFFileInputStream in = new LZFFileInputStream(src); 68 | File resultFile = null; 69 | if (toSystemOutput) { 70 | out = System.out; 71 | } else { 72 | resultFile = new File(filename.substring(0, filename.length() - SUFFIX.length())); 73 | out = new FileOutputStream(resultFile); 74 | } 75 | int uncompLen = in.readAndWrite(out); 76 | in.close(); 77 | out.flush(); 78 | out.close(); 79 | if (resultFile != null) { 80 | System.out.printf("Uncompressed '%s' into '%s' (%d->%d bytes)\n", 81 | src.getPath(), resultFile.getPath(), 82 | src.length(), uncompLen); 83 | } 84 | } 85 | return; 86 | } 87 | } 88 | System.err.println("Usage: java "+getClass().getName()+" -c/-d/-o source-file"); 89 | System.err.println(" -d parameter: decompress to file"); 90 | System.err.println(" -c parameter: compress to file"); 91 | System.err.println(" -o parameter: decompress to stdout"); 92 | System.exit(1); 93 | } 94 | 95 | public static void main(String[] args) throws IOException { 96 | new LZF().process(args); 97 | } 98 | } 99 | 100 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/gzip/TestGzipUncompressor.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.gzip; 2 | 3 | import java.io.*; 4 | import java.util.Random; 5 | 6 | import com.ning.compress.BaseForTests; 7 | import com.ning.compress.DataHandler; 8 | import com.ning.compress.UncompressorOutputStream; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 12 | 13 | public class TestGzipUncompressor extends BaseForTests 14 | { 15 | @Test 16 | public void testSimpleSmall1by1() throws IOException 17 | { 18 | byte[] fluff = constructFluff(4000); 19 | byte[] comp = gzipAll(fluff); 20 | 21 | Collector co = new Collector(); 22 | GZIPUncompressor uncomp = new GZIPUncompressor(co); 23 | for (int i = 0, end = comp.length; i < end; ++i) { 24 | uncomp.feedCompressedData(comp, i, 1); 25 | } 26 | uncomp.complete(); 27 | byte[] result = co.getBytes(); 28 | 29 | assertArrayEquals(fluff, result); 30 | } 31 | 32 | @Test 33 | public void testSimpleSmallAsChunk() throws IOException 34 | { 35 | byte[] fluff = constructFluff(4000); 36 | byte[] comp = gzipAll(fluff); 37 | 38 | // and then uncompress, first byte by bytes 39 | Collector co = new Collector(); 40 | GZIPUncompressor uncomp = new GZIPUncompressor(co); 41 | uncomp.feedCompressedData(comp, 0, comp.length); 42 | uncomp.complete(); 43 | byte[] result = co.getBytes(); 44 | assertArrayEquals(fluff, result); 45 | } 46 | 47 | @Test 48 | public void testSimpleBiggerVarLength() throws IOException 49 | { 50 | byte[] fluff = constructFluff(190000); 51 | byte[] comp = gzipAll(fluff); 52 | 53 | // and then uncompress with arbitrary-sized blocks... 54 | Random rnd = new Random(123); 55 | Collector co = new Collector(); 56 | GZIPUncompressor uncomp = new GZIPUncompressor(co); 57 | for (int i = 0, end = comp.length; i < end; ) { 58 | int size = Math.min(end-i, 1+rnd.nextInt(7)); 59 | uncomp.feedCompressedData(comp, i, size); 60 | i += size; 61 | } 62 | uncomp.complete(); 63 | byte[] result = co.getBytes(); 64 | 65 | assertArrayEquals(fluff, result); 66 | } 67 | 68 | @Test 69 | public void testSimpleBiggerOneChunk() throws IOException 70 | { 71 | byte[] fluff = constructFluff(275000); 72 | byte[] comp = gzipAll(fluff); 73 | 74 | // and then uncompress in one chunk 75 | Collector co = new Collector(); 76 | GZIPUncompressor uncomp = new GZIPUncompressor(co); 77 | uncomp.feedCompressedData(comp, 0, comp.length); 78 | uncomp.complete(); 79 | byte[] result = co.getBytes(); 80 | 81 | assertArrayEquals(fluff, result); 82 | } 83 | 84 | @Test 85 | public void testSimpleBiggerAsStream() throws IOException 86 | { 87 | byte[] fluff = constructFluff(277000); 88 | byte[] comp = gzipAll(fluff); 89 | Collector co = new Collector(); 90 | UncompressorOutputStream out = new UncompressorOutputStream(new GZIPUncompressor(co)); 91 | out.write(comp, 0, comp.length); 92 | out.close(); 93 | byte[] result = co.getBytes(); 94 | 95 | assertArrayEquals(fluff, result); 96 | } 97 | 98 | /* 99 | /////////////////////////////////////////////////////////////////////// 100 | // Helper methods 101 | /////////////////////////////////////////////////////////////////////// 102 | */ 103 | 104 | private byte[] gzipAll(byte[] input) throws IOException 105 | { 106 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(16 + input.length>>2); 107 | OptimizedGZIPOutputStream gz = new OptimizedGZIPOutputStream(bytes); 108 | gz.write(input); 109 | gz.close(); 110 | return bytes.toByteArray(); 111 | } 112 | 113 | private final static class Collector implements DataHandler 114 | { 115 | private final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 116 | 117 | @Override 118 | public boolean handleData(byte[] buffer, int offset, int len) throws IOException { 119 | bytes.write(buffer, offset, len); 120 | return true; 121 | } 122 | @Override 123 | public void allDataHandled() throws IOException { } 124 | public byte[] getBytes() { return bytes.toByteArray(); } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFOutputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.OutputStream; 6 | import java.nio.charset.StandardCharsets; 7 | 8 | import org.junit.jupiter.api.BeforeEach; 9 | 10 | import com.ning.compress.BaseForTests; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import static org.junit.jupiter.api.Assertions.*; 14 | 15 | public class TestLZFOutputStream extends BaseForTests 16 | { 17 | private static final int BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN * 64; 18 | private byte[] nonEncodableBytesToWrite; 19 | private byte[] bytesToWrite; 20 | 21 | @BeforeEach 22 | public void setUp() { 23 | nonEncodableBytesToWrite = constructUncompressable(BUFFER_SIZE); 24 | String phrase = "all work and no play make Jack a dull boy"; 25 | bytesToWrite = new byte[BUFFER_SIZE]; 26 | byte[] bytes = phrase.getBytes(StandardCharsets.UTF_8); 27 | int cursor = 0; 28 | while(cursor <= bytesToWrite.length) { 29 | System.arraycopy(bytes, 0, bytesToWrite, cursor, (bytes.length+cursor < bytesToWrite.length)?bytes.length:bytesToWrite.length-cursor); 30 | cursor += bytes.length; 31 | } 32 | } 33 | 34 | @Test 35 | public void testUnencodable() throws Exception 36 | { 37 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 38 | OutputStream os = new LZFOutputStream(bos); 39 | os.write(nonEncodableBytesToWrite); 40 | os.close(); 41 | assertTrue(bos.toByteArray().length > nonEncodableBytesToWrite.length); 42 | verifyOutputStream(bos, nonEncodableBytesToWrite); 43 | } 44 | 45 | @Test 46 | public void testStreaming() throws Exception 47 | { 48 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 49 | OutputStream os = new LZFOutputStream(bos); 50 | os.write(bytesToWrite); 51 | os.close(); 52 | int len = bos.toByteArray().length; 53 | int max = bytesToWrite.length/2; 54 | if (len <= 10 || len >= max) { 55 | fail("Sanity check: should have 10 < len < "+max+"; len = "+len); 56 | } 57 | verifyOutputStream(bos, bytesToWrite); 58 | } 59 | 60 | @Test 61 | public void testSingleByte() throws Exception 62 | { 63 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 64 | OutputStream os = new LZFOutputStream(bos); 65 | int idx = 0; 66 | for(; idx < BUFFER_SIZE; idx++) { 67 | os.write(bytesToWrite[idx]); 68 | if(idx % 1023 == 0 && idx > BUFFER_SIZE/2) { 69 | os.flush(); 70 | } 71 | } 72 | os.close(); 73 | int len = bos.toByteArray().length; 74 | int max = bytesToWrite.length/2; 75 | if (len <= 10 || len >= max) { 76 | fail("Sanity check: should have 10 < len < "+max+"; len = "+len); 77 | } 78 | verifyOutputStream(bos, bytesToWrite); 79 | } 80 | 81 | @Test 82 | public void testPartialBuffer() throws Exception 83 | { 84 | int offset = 255; 85 | int len = 1<<17; 86 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 87 | OutputStream os = new LZFOutputStream(bos); 88 | os.write(bytesToWrite, offset, len); 89 | os.close(); 90 | assertTrue(bos.toByteArray().length > 10); 91 | assertTrue(bos.toByteArray().length < bytesToWrite.length*.5); 92 | int bytesToCopy = Math.min(len, bytesToWrite.length); 93 | byte[] compareBytes = new byte[bytesToCopy]; 94 | System.arraycopy(bytesToWrite, offset, compareBytes, 0, bytesToCopy); 95 | verifyOutputStream(bos, compareBytes); 96 | } 97 | 98 | @Test 99 | public void testEmptyBuffer() throws Exception 100 | { 101 | byte[] input = new byte[0]; 102 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 103 | OutputStream os = new LZFOutputStream(bos); 104 | os.write(input); 105 | os.close(); 106 | int len = bos.toByteArray().length; 107 | if (len != 0) { 108 | fail("Sanity check: should have len == 0; len = "+len); 109 | } 110 | verifyOutputStream(bos, input); 111 | } 112 | 113 | private void verifyOutputStream(ByteArrayOutputStream bos, byte[] reference) throws Exception 114 | { 115 | ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); 116 | LZFInputStream lzfi = new LZFInputStream(bis); 117 | int val; 118 | int idx = 0; 119 | while((val = lzfi.read()) != -1) { 120 | int refVal = ((int)reference[idx++]) & 255; 121 | assertEquals(refVal, val); 122 | } 123 | lzfi.close(); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /VERSION.txt: -------------------------------------------------------------------------------- 1 | 1.2.0 (not yet released) 2 | 3 | #54: Validate arguments for `Unsafe` codecs 4 | (contributed by @Marcono1234) 5 | #60: Convert tests to JUnit 5 & refactor tests 6 | (contributed by @Marcono1234) 7 | #61: Prevent user code from subclassing `UnsafeChunkEncoder` 8 | (contributed by @Marcono1234) 9 | #64: Fix differences between big- and little-endian encoder 10 | (contributed by @Marcono1234) 11 | #68: Improve bounds checks 12 | (contributed by @Marcono1234) 13 | - Updated `oss-parent` dep to latest (v72) 14 | 15 | 1.1.3 (26-Sep-2025) 16 | 17 | - Updated plug-in dependencies 18 | - Published via Sonatype Central repository 19 | 20 | 1.1.2 (29-Jan-2023) 21 | 22 | #53: Treat `sun.misc` as an optional OSGi dependency 23 | (contributed by @Mailaender) 24 | 25 | 1.1.1 (25-Jan-2023) 26 | 27 | #50: `Unsafe` needs support in `module-info.java` 28 | (contributed by @robertvazan) 29 | 30 | 1.1 (07-Jan-2021) 31 | 32 | #48: Fix issues outlined by "lgtm.com"'s static analysis 33 | #49: Add Java 9+ module info using Moditect 34 | - Now requires Java 8 35 | 36 | 1.0.4 (12-Mar-2017) 37 | 38 | #43: estimateMaxWorkspaceSize() is too small 39 | (reported by Roman L, leventow@github) 40 | 41 | 1.0.3 (15-Aug-2014) 42 | 43 | #37: Incorrect de-serialization on Big Endian systems, due to incorrect usage of #numberOfTrailingZeroes 44 | (pointed out by Gireesh P, gireeshpunathil@github) 45 | 46 | 1.0.2 (09-Aug-2014) 47 | 48 | #38: Overload of factory methods and constructors in Encoders and Streams 49 | to allow specifying custom `BufferRecycler` instance 50 | (contributed by `serverperformance@github`) 51 | #39: VanillaChunkEncoder.tryCompress() not using 'inPos' as it should, potentially 52 | causing corruption in rare cases 53 | (contributed by Ryan E, rjerns@github) 54 | 55 | 1.0.1 (08-Apr-2014) 56 | 57 | #35: Fix a problem with closing of `DeflaterOutputStream` (for gzip output) 58 | that could cause corrupt state for reusable `Deflater` 59 | (contribyted by thmd@github) 60 | 61 | 1.0.0 (02-Dec-2013) 62 | 63 | #34: Add `ChunkEncoder.appendEncodedIfCompresses()` for conditional compression; 64 | useful for building efficient "compress but only if it makes enough difference" 65 | processing systems 66 | 67 | 0.9.9 (25-Sep-2013) 68 | 69 | #14: Added parallel LZF compression, contributed by Cedrik 70 | (javabean@github) 71 | #25: Allow early termination of push-style `Uncompressor` operation 72 | #32: Fix for a rare NPE 73 | (suggested by francoisforster@github) 74 | 75 | 0.9.8 (09-Mar-2013) 76 | 77 | #24: Problems uncompressing certain types of binary documents 78 | - Minor perf improvement for 'appendEncoded', was not reusing buffers 79 | 80 | 0.9.7 (06-Mar-2013) 81 | 82 | #23: Add UnsafeChunkEncoder that uses 'sun.misc.Unsafe' for additional Oomph. 83 | * Add LZFEncoder.estimateMaxWorkspaceSize() to help allocate work buffers. 84 | #22: Add method(s) to allow encoding into caller-provided (pre-allocated) buffer. 85 | 86 | 0.9.6 (05-Sep-2012) 87 | 88 | #17: Add IOException subtypes 'LZFException' and 'GZIPException' (with 89 | common supertype of 'CompressionFormatException) to allow for better 90 | catching of decompression errors 91 | #19: (more) Efficient skipping with LZFInputStream, LZFFileInputStream; 92 | can skip full chunks without decoding -- much faster (as per simple tests) 93 | 94 | 0.9.5 (25-May-2012) 95 | 96 | * Add 'LZFCompressingInputStream' to allow streaming compression 97 | "in reverse" (compared to LZFOutputStream) 98 | * Add GZIP support functionality: 99 | * 'OptimizedGZIPInputStream', 'OptimizedGZIPOutputStream' which add buffer 100 | (and Inflater/Deflater) recycling for improved performance compared to 101 | default JDK implementations (uses same native ZLIB library for actual 102 | decompression) 103 | * Add "push-mode" handler, 'Uncompressor' to be used for un-/decompression 104 | with non-blocking push-style data sources (like async-http-client) 105 | * Implementations for LZF (LZFUncompressor) and GZIP (GZIPUncompressor) 106 | * 'UncompressorOutputStream' convenience wrapper to expose 'Uncompressor' 107 | as 'OutputStream' 108 | 109 | 0.9.3 110 | 111 | * Fixed Issue #12: Command-line tool out of memory 112 | (reported by nodarret@github) 113 | * Implemented Issue #16: Add LZFInputStream.readAndWrite(...) method for copying 114 | uncompressed data, avoiding an intermediate copy. 115 | * Fix for Issue #15: LZFDecoder not passing 'offset', 'length' params 116 | (reported by T.Effland) 117 | * Fix for Issue #13: problems with Unsafe decoder on some platforms 118 | 119 | 0.9.0 (and prior) 120 | 121 | * Rewrote decoder to allow ChunkDecoder variants, to allow optional use of 122 | sun.misc.Unsafe (which can boost uncompression speed by up to +50%) 123 | * #11: Input/OutputStreams not throwing IOException if reading/writing 124 | after close() called, should be. 125 | (reported by Dain S) 126 | * Fix an NPE in BufferRecycler 127 | (reported by Matt Abrams, abramsm@gmail.com) 128 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/BufferRecycler.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress; 2 | 3 | import java.lang.ref.SoftReference; 4 | 5 | /** 6 | * Simple helper class to encapsulate details of basic buffer 7 | * recycling scheme, which helps a lot (as per profiling) for 8 | * smaller encoding cases. 9 | * 10 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 11 | */ 12 | public final class BufferRecycler 13 | { 14 | private final static int MIN_ENCODING_BUFFER = 4000; 15 | 16 | private final static int MIN_OUTPUT_BUFFER = 8000; 17 | 18 | /** 19 | * This ThreadLocal contains a {@link java.lang.ref.SoftReference} 20 | * to a {@link BufferRecycler} used to provide a low-cost 21 | * buffer recycling for buffers we need for encoding, decoding. 22 | */ 23 | final protected static ThreadLocal> _recyclerRef 24 | = new ThreadLocal>(); 25 | 26 | private byte[] _inputBuffer; 27 | private byte[] _outputBuffer; 28 | 29 | private byte[] _decodingBuffer; 30 | private byte[] _encodingBuffer; 31 | 32 | private int[] _encodingHash; 33 | 34 | /** 35 | * Accessor to get thread-local recycler instance 36 | */ 37 | public static BufferRecycler instance() 38 | { 39 | SoftReference ref = _recyclerRef.get(); 40 | BufferRecycler br = (ref == null) ? null : ref.get(); 41 | if (br == null) { 42 | br = new BufferRecycler(); 43 | _recyclerRef.set(new SoftReference(br)); 44 | } 45 | return br; 46 | } 47 | 48 | /* 49 | /////////////////////////////////////////////////////////////////////// 50 | // Buffers for encoding (output) 51 | /////////////////////////////////////////////////////////////////////// 52 | */ 53 | 54 | public byte[] allocEncodingBuffer(int minSize) 55 | { 56 | byte[] buf = _encodingBuffer; 57 | if (buf == null || buf.length < minSize) { 58 | buf = new byte[Math.max(minSize, MIN_ENCODING_BUFFER)]; 59 | } else { 60 | _encodingBuffer = null; 61 | } 62 | return buf; 63 | } 64 | 65 | public void releaseEncodeBuffer(byte[] buffer) 66 | { 67 | if (_encodingBuffer == null || (buffer != null && buffer.length > _encodingBuffer.length)) { 68 | _encodingBuffer = buffer; 69 | } 70 | } 71 | 72 | public byte[] allocOutputBuffer(int minSize) 73 | { 74 | byte[] buf = _outputBuffer; 75 | if (buf == null || buf.length < minSize) { 76 | buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)]; 77 | } else { 78 | _outputBuffer = null; 79 | } 80 | return buf; 81 | } 82 | 83 | public void releaseOutputBuffer(byte[] buffer) 84 | { 85 | if (_outputBuffer == null || (buffer != null && buffer.length > _outputBuffer.length)) { 86 | _outputBuffer = buffer; 87 | } 88 | } 89 | 90 | public int[] allocEncodingHash(int suggestedSize) 91 | { 92 | int[] buf = _encodingHash; 93 | if (buf == null || buf.length < suggestedSize) { 94 | buf = new int[suggestedSize]; 95 | } else { 96 | _encodingHash = null; 97 | } 98 | return buf; 99 | } 100 | 101 | public void releaseEncodingHash(int[] buffer) 102 | { 103 | if (_encodingHash == null || (buffer != null && buffer.length > _encodingHash.length)) { 104 | _encodingHash = buffer; 105 | } 106 | } 107 | 108 | /* 109 | /////////////////////////////////////////////////////////////////////// 110 | // Buffers for decoding (input) 111 | /////////////////////////////////////////////////////////////////////// 112 | */ 113 | 114 | public byte[] allocInputBuffer(int minSize) 115 | { 116 | byte[] buf = _inputBuffer; 117 | if (buf == null || buf.length < minSize) { 118 | buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)]; 119 | } else { 120 | _inputBuffer = null; 121 | } 122 | return buf; 123 | } 124 | 125 | public void releaseInputBuffer(byte[] buffer) 126 | { 127 | if (_inputBuffer == null || (buffer != null && buffer.length > _inputBuffer.length)) { 128 | _inputBuffer = buffer; 129 | } 130 | } 131 | 132 | public byte[] allocDecodeBuffer(int size) 133 | { 134 | byte[] buf = _decodingBuffer; 135 | if (buf == null || buf.length < size) { 136 | buf = new byte[size]; 137 | } else { 138 | _decodingBuffer = null; 139 | } 140 | return buf; 141 | } 142 | 143 | public void releaseDecodeBuffer(byte[] buffer) 144 | { 145 | if (_decodingBuffer == null || (buffer != null && buffer.length > _decodingBuffer.length)) { 146 | _decodingBuffer = buffer; 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/gzip/OptimizedGZIPOutputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.gzip; 2 | 3 | import java.io.*; 4 | import java.util.zip.CRC32; 5 | import java.util.zip.Deflater; 6 | import java.util.zip.DeflaterOutputStream; 7 | 8 | /** 9 | * Optimized variant of {@link java.util.zip.GZIPOutputStream} that 10 | * reuses underlying {@link java.util.zip.Deflater} instance}. 11 | */ 12 | public class OptimizedGZIPOutputStream 13 | extends OutputStream 14 | { 15 | /** 16 | * GZIP header magic number; written out LSB like most everything 17 | * else (i.e. as 0x1f 0x8b) 18 | */ 19 | private final static int GZIP_MAGIC = 0x8b1f; 20 | 21 | /** 22 | * For now, static header seems fine, since JDK default gzip writer 23 | * does it too: 24 | */ 25 | final static byte[] DEFAULT_HEADER = new byte[] { 26 | (byte) GZIP_MAGIC, // Magic number (short) 27 | (byte)(GZIP_MAGIC >> 8), // Magic number (short) 28 | Deflater.DEFLATED, // Compression method (CM) 29 | 0, // Flags (FLG) 30 | 0, // Modification time MTIME (int) 31 | 0, // Modification time MTIME (int) 32 | 0, // Modification time MTIME (int) 33 | 0, // Modification time MTIME (int) 34 | 0, // Extra flags (XFLG) 35 | (byte) 0xff // Operating system (OS), UNKNOWN 36 | }; 37 | 38 | /* 39 | /////////////////////////////////////////////////////////////////////// 40 | // Helper objects 41 | /////////////////////////////////////////////////////////////////////// 42 | */ 43 | 44 | protected Deflater _deflater; 45 | 46 | protected final GZIPRecycler _gzipRecycler; 47 | 48 | protected final byte[] _eightByteBuffer = new byte[8]; 49 | 50 | /* 51 | /////////////////////////////////////////////////////////////////////// 52 | // State 53 | /////////////////////////////////////////////////////////////////////// 54 | */ 55 | 56 | /** 57 | * Underlying output stream that header, compressed content and 58 | * footer go to 59 | */ 60 | protected OutputStream _rawOut; 61 | 62 | // TODO: write this out, not strictly needed... 63 | protected DeflaterOutputStream _deflaterOut; 64 | 65 | protected CRC32 _crc; 66 | 67 | /* 68 | /////////////////////////////////////////////////////////////////////// 69 | // Construction 70 | /////////////////////////////////////////////////////////////////////// 71 | */ 72 | 73 | public OptimizedGZIPOutputStream(OutputStream out) throws IOException 74 | { 75 | super(); 76 | _gzipRecycler = GZIPRecycler.instance(); 77 | _rawOut = out; 78 | // write header: 79 | _rawOut.write(DEFAULT_HEADER); 80 | _deflater = _gzipRecycler.allocDeflater(); 81 | _deflaterOut = new DeflaterOutputStream(_rawOut, _deflater, 4000); 82 | _crc = new CRC32(); 83 | } 84 | 85 | /* 86 | /////////////////////////////////////////////////////////////////////// 87 | // OutputStream implementation 88 | /////////////////////////////////////////////////////////////////////// 89 | */ 90 | 91 | @Override 92 | public void close() throws IOException 93 | { 94 | _deflaterOut.finish(); 95 | _deflaterOut = null; 96 | _writeTrailer(_rawOut); 97 | _rawOut.close(); 98 | Deflater d = _deflater; 99 | if (d != null) { 100 | _deflater = null; 101 | _gzipRecycler.releaseDeflater(d); 102 | } 103 | } 104 | 105 | @Override 106 | public void flush() throws IOException { 107 | _deflaterOut.flush(); 108 | } 109 | 110 | @Override 111 | public final void write(byte[] buf) throws IOException { 112 | write(buf, 0, buf.length); 113 | } 114 | 115 | @Override 116 | public final void write(int c) throws IOException { 117 | _eightByteBuffer[0] = (byte) c; 118 | write(_eightByteBuffer, 0, 1); 119 | } 120 | 121 | @Override 122 | public void write(byte[] buf, int off, int len) throws IOException { 123 | _deflaterOut.write(buf, off, len); 124 | _crc.update(buf, off, len); 125 | } 126 | 127 | /* 128 | /////////////////////////////////////////////////////////////////////// 129 | // Internal methods 130 | /////////////////////////////////////////////////////////////////////// 131 | */ 132 | 133 | private void _writeTrailer(OutputStream out) throws IOException 134 | { 135 | _putInt(_eightByteBuffer, 0, (int) _crc.getValue()); 136 | _putInt(_eightByteBuffer, 4, _deflater.getTotalIn()); 137 | out.write(_eightByteBuffer, 0, 8); 138 | } 139 | 140 | /** 141 | * Stupid GZIP, writes stuff in wrong order (not network, but x86) 142 | */ 143 | private final static void _putInt(byte[] buf, int offset, int value) 144 | { 145 | buf[offset++] = (byte) (value); 146 | buf[offset++] = (byte) (value >> 8); 147 | buf[offset++] = (byte) (value >> 16); 148 | buf[offset] = (byte) (value >> 24); 149 | } 150 | } -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFDecoder.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | import java.nio.charset.StandardCharsets; 5 | 6 | import com.ning.compress.BaseForTests; 7 | import com.ning.compress.lzf.impl.UnsafeChunkDecoder; 8 | import com.ning.compress.lzf.util.ChunkDecoderFactory; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 12 | import static org.junit.jupiter.api.Assertions.assertThrows; 13 | 14 | public class TestLZFDecoder extends BaseForTests 15 | { 16 | @Test 17 | public void testSimple() throws IOException { 18 | _testSimple(ChunkDecoderFactory.safeInstance()); 19 | _testSimple(ChunkDecoderFactory.optimalInstance()); 20 | } 21 | 22 | @Test 23 | public void testLonger() throws IOException { 24 | _testLonger(ChunkDecoderFactory.safeInstance()); 25 | _testLonger(ChunkDecoderFactory.optimalInstance()); 26 | } 27 | 28 | @Test 29 | public void testChunks() throws IOException { 30 | _testChunks(ChunkDecoderFactory.safeInstance()); 31 | _testChunks(ChunkDecoderFactory.optimalInstance()); 32 | } 33 | 34 | @Test 35 | public void testUnsafeValidation() { 36 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder(); 37 | 38 | byte[] array = new byte[10]; 39 | int goodStart = 2; 40 | int goodEnd = 5; 41 | assertThrows(NullPointerException.class, () -> decoder.decodeChunk(null, goodStart, goodEnd, array, goodStart, goodEnd)); 42 | assertThrows(NullPointerException.class, () -> decoder.decodeChunk(array, goodStart, goodEnd, null, goodStart, goodEnd)); 43 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, -1, goodEnd, array, goodStart, goodEnd)); 44 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, goodStart - 1, array, goodStart, goodEnd)); 45 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, -1, array, goodStart, goodEnd)); 46 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, array.length + 1, array, goodStart, goodEnd)); 47 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, goodEnd, array, -1, goodEnd)); 48 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, goodEnd, array, goodStart, goodStart - 1)); 49 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, goodEnd, array, goodStart, -1)); 50 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> decoder.decodeChunk(array, goodStart, goodEnd, array, goodStart, array.length + 1)); 51 | } 52 | 53 | /* 54 | /////////////////////////////////////////////////////////////////////// 55 | // Second-level test methods 56 | /////////////////////////////////////////////////////////////////////// 57 | */ 58 | 59 | private void _testSimple(ChunkDecoder decoder) throws IOException 60 | { 61 | byte[] orig = "Another trivial test".getBytes(StandardCharsets.UTF_8); 62 | byte[] compressed = compress(orig); 63 | byte[] result = decoder.decode(compressed); 64 | assertArrayEquals(orig, result); 65 | 66 | // also, ensure that offset, length are passed 67 | byte[] compressed2 = new byte[compressed.length + 4]; 68 | System.arraycopy(compressed, 0, compressed2, 2, compressed.length); 69 | 70 | result = decoder.decode(compressed2, 2, compressed.length); 71 | assertArrayEquals(orig, result); 72 | 73 | // two ways to do that as well: 74 | result = LZFDecoder.decode(compressed2, 2, compressed.length); 75 | assertArrayEquals(orig, result); 76 | } 77 | 78 | private void _testLonger(ChunkDecoder decoder) throws IOException 79 | { 80 | byte[] orig = this.constructFluff(250000); // 250k 81 | byte[] compressed = compress(orig); 82 | byte[] result = decoder.decode(compressed); 83 | assertArrayEquals(orig, result); 84 | 85 | // also, ensure that offset, length are passed 86 | byte[] compressed2 = new byte[compressed.length + 4]; 87 | System.arraycopy(compressed, 0, compressed2, 2, compressed.length); 88 | 89 | result = decoder.decode(compressed2, 2, compressed.length); 90 | assertArrayEquals(orig, result); 91 | 92 | // two ways to do that as well: 93 | result = LZFDecoder.decode(compressed2, 2, compressed.length); 94 | assertArrayEquals(orig, result); 95 | } 96 | 97 | private void _testChunks(ChunkDecoder decoder) throws IOException 98 | { 99 | byte[] orig1 = "Another trivial test".getBytes(StandardCharsets.UTF_8); 100 | byte[] orig2 = " with some of repepepepepetitition too!".getBytes(StandardCharsets.UTF_8); 101 | ByteArrayOutputStream out = new ByteArrayOutputStream(); 102 | out.write(orig1); 103 | out.write(orig2); 104 | byte[] orig = out.toByteArray(); 105 | 106 | byte[] compressed1 = compress(orig1); 107 | byte[] compressed2 = compress(orig2); 108 | out = new ByteArrayOutputStream(); 109 | out.write(compressed1); 110 | out.write(compressed2); 111 | byte[] compressed = out.toByteArray(); 112 | 113 | byte[] result = decoder.decode(compressed); 114 | assertArrayEquals(orig, result); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LZF Compressor 2 | 3 | ## Overview 4 | 5 | LZF-compress is a Java library for encoding and decoding data in LZF format, 6 | written by Tatu Saloranta (tatu.saloranta@iki.fi) 7 | 8 | Data format and algorithm based on original [LZF library](https://oldhome.schmorp.de/marc/liblzf.html) by Marc A Lehmann. 9 | See [LZF Format Specification](https://github.com/ning/compress/wiki/LZFFormat) for full description. 10 | 11 | Format differs slightly from some other adaptations, such as the one used 12 | by [H2 database project](http://www.h2database.com) (by Thomas Mueller); 13 | although internal block compression structure is the same, block identifiers differ. 14 | This package uses the original LZF identifiers to be 100% compatible with existing command-line `lzf` tool(s). 15 | 16 | LZF algorithm itself is optimized for speed, with somewhat more modest compression. 17 | Compared to the standard `Deflate` (algorithm gzip uses) LZF can be 5-6 times as fast to compress, 18 | and twice as fast to decompress. Compression rate is lower since no Huffman-encoding is used 19 | after lempel-ziv substring elimination. 20 | 21 | ## License 22 | 23 | [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0) 24 | 25 | ## Requirements 26 | 27 | Versions up to 1.0.4 require JDK 6; versions from 1.1 on require JDK 8. 28 | 29 | Library has no external dependencies. 30 | 31 | ## Usage 32 | 33 | See [Wiki](https://github.com/ning/compress/wiki) for more details; here's a "TL;DNR" version. 34 | 35 | Both compression and decompression can be done either by streaming approach: 36 | 37 | ```java 38 | InputStream in = new LZFInputStream(new FileInputStream("data.lzf")); 39 | OutputStream out = new LZFOutputStream(new FileOutputStream("results.lzf")); 40 | InputStream compIn = new LZFCompressingInputStream(new FileInputStream("stuff.txt")); 41 | ``` 42 | 43 | or by block operation: 44 | 45 | ```java 46 | byte[] compressed = LZFEncoder.encode(uncompressedData); 47 | byte[] uncompressed = LZFDecoder.decode(compressedData); 48 | ``` 49 | 50 | and you can even use the LZF jar as a command-line tool (it has manifest that points to 'com.ning.compress.lzf.LZF' as the class having main() method to call), like so: 51 | 52 | java -jar compress-lzf-1.1.3.jar 53 | 54 | (which will display necessary usage arguments for `-c`(ompressing) or `-d`(ecompressing) files. 55 | 56 | ### Adding as Dependency 57 | 58 | #### Maven 59 | 60 | ```xml 61 | 62 | com.ning 63 | compress-lzf 64 | 1.1.3 65 | 66 | ``` 67 | 68 | #### Module info (JPMS) 69 | 70 | Starting with version 1.1, `module-info.class` is included; module name is `com.ning.compress.lzf` so you will need to use: 71 | 72 | requires com.ning.compress.lzf 73 | 74 | ### Parallel processing 75 | 76 | Since the compression is more CPU-heavy than decompression, it could benefit from concurrent operation. 77 | This works well with LZF because of its block-oriented nature, so that although there is need for 78 | sequential processing within block (of up to 64kB), encoding of separate blocks can be done completely 79 | independently: there are no dependencies to earlier blocks. 80 | 81 | The main abstraction to use is `PLZFOutputStream` which a `FilterOutputStream` and implements 82 | `java.nio.channels.WritableByteChannel` as well. It use is like that of any `OutputStream`: 83 | 84 | ```java 85 | PLZFOutputStream output = new PLZFOutputStream(new FileOutputStream("stuff.lzf")); 86 | // then write contents: 87 | output.write(buffer); 88 | // ... 89 | output.close(); 90 | 91 | ``` 92 | 93 | ## Interoperability 94 | 95 | Besides Java support, LZF codecs / bindings exist for non-JVM languages as well: 96 | 97 | * C: [liblzf](http://oldhome.schmorp.de/marc/liblzf.html) (the original LZF package!) 98 | * C#: [C# LZF](https://csharplzfcompression.codeplex.com/) 99 | * Go: [Golly](https://github.com/tav/golly) 100 | * Javascript(!): [freecode LZF](http://freecode.com/projects/lzf) (or via [SourceForge](http://sourceforge.net/projects/lzf/)) 101 | * Perl: [Compress::LZF](http://search.cpan.org/dist/Compress-LZF/LZF.pm) 102 | * Python: [Python-LZF](https://github.com/teepark/python-lzf) 103 | * Ruby: [glebtv/lzf](https://github.com/glebtv/lzf), [LZF/Ruby](https://rubyforge.org/projects/lzfruby/) 104 | 105 | ## Related 106 | 107 | Check out [jvm-compress-benchmark](https://github.com/ning/jvm-compressor-benchmark) for comparison of space- and time-efficiency of this LZF implementation, relative other available Java-accessible compression libraries. 108 | 109 | ## More 110 | 111 | [Project Wiki](https://github.com/ning/compress/wiki). 112 | 113 | ## Alternative High-Speed Lempel-Ziv Compressors 114 | 115 | LZF belongs to a family of compression codecs called "simple Lempel-Ziv" codecs. 116 | Since LZ compression is also the first part of `deflate` compression (which is used, 117 | along with simple framing, for `gzip`), it can be viewed as "first-part of gzip" 118 | (second part being Huffman-encoding of compressed content). 119 | 120 | There are many other codecs in this category, most notable (and competitive being) 121 | 122 | * [Snappy](http://en.wikipedia.org/wiki/Snappy_%28software%29) 123 | * [LZ4](http://en.wikipedia.org/wiki/LZ4_%28compression_algorithm%29) 124 | 125 | all of which have very similar compression ratios (due to same underlying algorithm, 126 | differences coming from slight encoding variations, and efficiency differences in 127 | back-reference matching), and similar performance profiles regarding ratio of 128 | compression vs uncompression speeds. 129 | -------------------------------------------------------------------------------- /src/test/java/perf/ManualUncompressComparison.java: -------------------------------------------------------------------------------- 1 | package perf; 2 | 3 | import java.io.*; 4 | 5 | import com.ning.compress.lzf.*; 6 | import com.ning.compress.lzf.util.ChunkDecoderFactory; 7 | 8 | /** 9 | * Simple manual performance micro-benchmark that compares compress and 10 | * decompress speeds of this LZF implementation with other codecs. 11 | */ 12 | public class ManualUncompressComparison 13 | { 14 | protected int size = 0; 15 | 16 | protected byte[] _lzfEncoded; 17 | 18 | private void test(byte[] input) throws Exception 19 | { 20 | _lzfEncoded = LZFEncoder.encode(input); 21 | 22 | // Let's try to guestimate suitable size... to get to 20 megs to process 23 | final int REPS = Math.max(1, (int) ((double) (20 * 1000 * 1000) / (double) input.length)); 24 | 25 | // final int TYPES = 1; 26 | final int TYPES = 2; 27 | final int WARMUP_ROUNDS = 5; 28 | int i = 0; 29 | int roundsDone = 0; 30 | final long[] times = new long[TYPES]; 31 | 32 | System.out.println("Read "+input.length+" bytes to compress, uncompress; will do "+REPS+" repetitions"); 33 | 34 | // But first, validate! 35 | _preValidate(_lzfEncoded); 36 | 37 | while (true) { 38 | try { Thread.sleep(100L); } catch (InterruptedException ie) { } 39 | int round = (i++ % TYPES); 40 | 41 | String msg; 42 | boolean lf = (round == 0); 43 | 44 | long msecs; 45 | 46 | switch (round) { 47 | 48 | case 0: 49 | msg = "LZF decompress/block/safe"; 50 | msecs = testLZFDecompress(REPS, _lzfEncoded, ChunkDecoderFactory.safeInstance()); 51 | break; 52 | case 1: 53 | msg = "LZF decompress/block/UNSAFE"; 54 | msecs = testLZFDecompress(REPS, _lzfEncoded, ChunkDecoderFactory.optimalInstance()); 55 | break; 56 | case 2: 57 | msg = "LZF decompress/stream"; 58 | msecs = testLZFDecompressStream(REPS, _lzfEncoded); 59 | break; 60 | default: 61 | throw new Error(); 62 | } 63 | 64 | // skip first 5 rounds to let results stabilize 65 | if (roundsDone >= WARMUP_ROUNDS) { 66 | times[round] += msecs; 67 | } 68 | System.out.printf("Test '%s' [%d bytes] -> %d msecs\n", msg, size, msecs); 69 | if (lf) { 70 | ++roundsDone; 71 | if ((roundsDone % 3) == 0 && roundsDone > WARMUP_ROUNDS) { 72 | double den = (double) (roundsDone - WARMUP_ROUNDS); 73 | if (times.length == 1) { 74 | System.out.printf("Averages after %d rounds: %.1f msecs\n", 75 | (int) den, times[0] / den); 76 | } else { 77 | System.out.printf("Averages after %d rounds (safe / UNSAFE): %.1f / %.1f msecs\n", 78 | (int) den, 79 | times[0] / den, times[1] / den); 80 | } 81 | System.out.println(); 82 | } 83 | } 84 | if ((i % 17) == 0) { 85 | System.out.println("[GC]"); 86 | Thread.sleep(100L); 87 | System.gc(); 88 | Thread.sleep(100L); 89 | } 90 | } 91 | } 92 | 93 | protected void _preValidate(byte[] compressed) throws LZFException 94 | { 95 | byte[] decoded1 = LZFDecoder.decode(compressed); 96 | byte[] decoded2 = LZFDecoder.safeDecode(compressed); 97 | 98 | if (decoded1.length == decoded2.length) { 99 | for (int i = 0, len = decoded1.length; i < len; ++i) { 100 | if (decoded1[i] != decoded2[i]) { 101 | throw new IllegalStateException("Uncompressed contents differ at "+i+"/"+len); 102 | } 103 | } 104 | } else { 105 | throw new IllegalStateException("Uncompressed content lengths diff: expected "+decoded1.length+", got "+decoded2.length); 106 | } 107 | } 108 | 109 | protected final long testLZFDecompress(int REPS, byte[] encoded, ChunkDecoder decoder) throws Exception 110 | { 111 | size = encoded.length; 112 | long start = System.currentTimeMillis(); 113 | byte[] uncomp = null; 114 | 115 | while (--REPS >= 0) { 116 | uncomp = decoder.decode(encoded); 117 | } 118 | size = uncomp.length; 119 | return System.currentTimeMillis() - start; 120 | } 121 | 122 | protected final long testLZFDecompressStream(int REPS, byte[] encoded) throws Exception 123 | { 124 | final byte[] buffer = new byte[8000]; 125 | size = 0; 126 | long start = System.currentTimeMillis(); 127 | while (--REPS >= 0) { 128 | int total = 0; 129 | LZFInputStream in = new LZFInputStream(new ByteArrayInputStream(encoded)); 130 | int count; 131 | while ((count = in.read(buffer)) > 0) { 132 | total += count; 133 | } 134 | size = total; 135 | in.close(); 136 | } 137 | return System.currentTimeMillis() - start; 138 | } 139 | 140 | public static void main(String[] args) throws Exception 141 | { 142 | if (args.length != 1) { 143 | System.err.println("Usage: java ... [file]"); 144 | System.exit(1); 145 | } 146 | File f = new File(args[0]); 147 | ByteArrayOutputStream bytes = new ByteArrayOutputStream((int) f.length()); 148 | byte[] buffer = new byte[4000]; 149 | int count; 150 | FileInputStream in = new FileInputStream(f); 151 | 152 | while ((count = in.read(buffer)) > 0) { 153 | bytes.write(buffer, 0, count); 154 | } 155 | in.close(); 156 | new ManualUncompressComparison().test(bytes.toByteArray()); 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZFDecoder.java: -------------------------------------------------------------------------------- 1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 2 | * file except in compliance with the License. You may obtain a copy of the License at 3 | * 4 | * http://www.apache.org/licenses/LICENSE-2.0 5 | * 6 | * Unless required by applicable law or agreed to in writing, software distributed under 7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 8 | * OF ANY KIND, either express or implied. See the License for the specific language 9 | * governing permissions and limitations under the License. 10 | */ 11 | 12 | package com.ning.compress.lzf; 13 | 14 | import java.util.concurrent.atomic.AtomicReference; 15 | 16 | import com.ning.compress.lzf.util.ChunkDecoderFactory; 17 | 18 | /** 19 | * Decoder that handles decoding of sequence of encoded LZF chunks, 20 | * combining them into a single contiguous result byte array. 21 | * This class has been mostly replaced by 22 | * {@link ChunkDecoder}, although static methods are left here 23 | * and may still be used for convenience. 24 | * All static methods use {@link ChunkDecoderFactory#optimalInstance} 25 | * to find actual {@link ChunkDecoder} instance to use. 26 | * 27 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 28 | * 29 | * @see com.ning.compress.lzf.ChunkDecoder 30 | */ 31 | public class LZFDecoder 32 | { 33 | /** 34 | * Lazily initialized "fast" instance that may use sun.misc.Unsafe 35 | * to speed up decompression 36 | */ 37 | protected final static AtomicReference _fastDecoderRef = new AtomicReference(); 38 | 39 | /** 40 | * Lazily initialized "safe" instance that DOES NOT use sun.misc.Unsafe 41 | * for decompression, just standard JDK functionality. 42 | */ 43 | protected final static AtomicReference _safeDecoderRef = new AtomicReference(); 44 | 45 | /* 46 | /////////////////////////////////////////////////////////////////////// 47 | // Factory methods for ChunkDecoders 48 | /////////////////////////////////////////////////////////////////////// 49 | */ 50 | 51 | /** 52 | * Accessor method that can be used to obtain {@link ChunkDecoder} 53 | * that uses all possible optimization methods available, including 54 | * sun.misc.Unsafe for memory access. 55 | */ 56 | public static ChunkDecoder fastDecoder() { 57 | // race conditions are ok here, we don't really mind 58 | ChunkDecoder dec = _fastDecoderRef.get(); 59 | if (dec == null) { // 60 | dec = ChunkDecoderFactory.optimalInstance(); 61 | _fastDecoderRef.compareAndSet(null, dec); 62 | } 63 | return dec; 64 | } 65 | 66 | /** 67 | * Accessor method that can be used to obtain {@link ChunkDecoder} 68 | * that only uses standard JDK access methods, and should work on 69 | * all Java platforms and JVMs. 70 | */ 71 | public static ChunkDecoder safeDecoder() { 72 | // race conditions are ok here, we don't really mind 73 | ChunkDecoder dec = _safeDecoderRef.get(); 74 | if (dec == null) { // 75 | dec = ChunkDecoderFactory.safeInstance(); 76 | _safeDecoderRef.compareAndSet(null, dec); 77 | } 78 | return dec; 79 | } 80 | 81 | /* 82 | /////////////////////////////////////////////////////////////////////// 83 | // Basic API, general 84 | /////////////////////////////////////////////////////////////////////// 85 | */ 86 | 87 | /** 88 | * Helper method that checks resulting size of an LZF chunk, regardless of 89 | * whether it contains compressed or uncompressed contents. 90 | */ 91 | public static int calculateUncompressedSize(byte[] data, int offset, int length) throws LZFException { 92 | return ChunkDecoder.calculateUncompressedSize(data, length, length); 93 | } 94 | 95 | /* 96 | /////////////////////////////////////////////////////////////////////// 97 | // Basic API, fast decode methods 98 | /////////////////////////////////////////////////////////////////////// 99 | */ 100 | 101 | public static byte[] decode(final byte[] inputBuffer) throws LZFException { 102 | return fastDecoder().decode(inputBuffer, 0, inputBuffer.length); 103 | } 104 | 105 | public static byte[] decode(final byte[] inputBuffer, int offset, int length) throws LZFException { 106 | return fastDecoder().decode(inputBuffer, offset, length); 107 | } 108 | 109 | public static int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws LZFException { 110 | return fastDecoder().decode(inputBuffer, 0, inputBuffer.length, targetBuffer); 111 | } 112 | 113 | public static int decode(final byte[] sourceBuffer, int offset, int length, final byte[] targetBuffer) 114 | throws LZFException { 115 | return fastDecoder().decode(sourceBuffer, offset, length, targetBuffer); 116 | } 117 | 118 | /* 119 | /////////////////////////////////////////////////////////////////////// 120 | // Basic API, "safe" decode methods 121 | /////////////////////////////////////////////////////////////////////// 122 | */ 123 | 124 | public static byte[] safeDecode(final byte[] inputBuffer) throws LZFException { 125 | return safeDecoder().decode(inputBuffer, 0, inputBuffer.length); 126 | } 127 | 128 | public static byte[] safeDecode(final byte[] inputBuffer, int offset, int length) throws LZFException { 129 | return safeDecoder().decode(inputBuffer, offset, length); 130 | } 131 | 132 | public static int safeDecode(final byte[] inputBuffer, final byte[] targetBuffer) throws LZFException { 133 | return safeDecoder().decode(inputBuffer, 0, inputBuffer.length, targetBuffer); 134 | } 135 | 136 | public static int safeDecode(final byte[] sourceBuffer, int offset, int length, final byte[] targetBuffer) 137 | throws LZFException { 138 | return safeDecoder().decode(sourceBuffer, offset, length, targetBuffer); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZFChunk.java: -------------------------------------------------------------------------------- 1 | /* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 2 | * file except in compliance with the License. You may obtain a copy of the License at 3 | * 4 | * http://www.apache.org/licenses/LICENSE-2.0 5 | * 6 | * Unless required by applicable law or agreed to in writing, software distributed under 7 | * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 8 | * OF ANY KIND, either express or implied. See the License for the specific language 9 | * governing permissions and limitations under the License. 10 | */ 11 | 12 | package com.ning.compress.lzf; 13 | 14 | import java.io.*; 15 | 16 | /** 17 | * Helper class used to store LZF encoded segments (compressed and non-compressed) 18 | * that can be sequenced to produce LZF files/streams. 19 | * 20 | * @author Tatu Saloranta 21 | */ 22 | public class LZFChunk 23 | { 24 | /** 25 | * Maximum length of literal run for LZF encoding. 26 | */ 27 | public static final int MAX_LITERAL = 1 << 5; // 32 28 | 29 | /** 30 | * Chunk length is limited by 2-byte length indicator, to 64k 31 | */ 32 | public static final int MAX_CHUNK_LEN = 0xFFFF; 33 | 34 | /** 35 | * Header can be either 7 bytes (compressed) or 5 bytes (uncompressed) 36 | * long 37 | */ 38 | public static final int MAX_HEADER_LEN = 7; 39 | 40 | public static final int HEADER_LEN_COMPRESSED = 7; 41 | public static final int HEADER_LEN_NOT_COMPRESSED = 5; 42 | 43 | public final static byte BYTE_Z = 'Z'; 44 | public final static byte BYTE_V = 'V'; 45 | 46 | public final static int BLOCK_TYPE_NON_COMPRESSED = 0; 47 | public final static int BLOCK_TYPE_COMPRESSED = 1; 48 | 49 | 50 | protected final byte[] _data; 51 | protected LZFChunk _next; 52 | 53 | private LZFChunk(byte[] data) { _data = data; } 54 | 55 | /** 56 | * Factory method for constructing compressed chunk 57 | */ 58 | public static LZFChunk createCompressed(int origLen, byte[] encData, int encPtr, int encLen) 59 | { 60 | byte[] result = new byte[encLen + HEADER_LEN_COMPRESSED]; 61 | result[0] = BYTE_Z; 62 | result[1] = BYTE_V; 63 | result[2] = BLOCK_TYPE_COMPRESSED; 64 | result[3] = (byte) (encLen >> 8); 65 | result[4] = (byte) encLen; 66 | result[5] = (byte) (origLen >> 8); 67 | result[6] = (byte) origLen; 68 | System.arraycopy(encData, encPtr, result, HEADER_LEN_COMPRESSED, encLen); 69 | return new LZFChunk(result); 70 | } 71 | 72 | public static int appendCompressedHeader(int origLen, int encLen, byte[] headerBuffer, int offset) 73 | { 74 | headerBuffer[offset++] = BYTE_Z; 75 | headerBuffer[offset++] = BYTE_V; 76 | headerBuffer[offset++] = BLOCK_TYPE_COMPRESSED; 77 | headerBuffer[offset++] = (byte) (encLen >> 8); 78 | headerBuffer[offset++] = (byte) encLen; 79 | headerBuffer[offset++] = (byte) (origLen >> 8); 80 | headerBuffer[offset++] = (byte) origLen; 81 | return offset; 82 | } 83 | 84 | public static void writeCompressedHeader(int origLen, int encLen, OutputStream out, byte[] headerBuffer) 85 | throws IOException 86 | { 87 | headerBuffer[0] = BYTE_Z; 88 | headerBuffer[1] = BYTE_V; 89 | headerBuffer[2] = BLOCK_TYPE_COMPRESSED; 90 | headerBuffer[3] = (byte) (encLen >> 8); 91 | headerBuffer[4] = (byte) encLen; 92 | headerBuffer[5] = (byte) (origLen >> 8); 93 | headerBuffer[6] = (byte) origLen; 94 | out.write(headerBuffer, 0, HEADER_LEN_COMPRESSED); 95 | } 96 | 97 | /** 98 | * Factory method for constructing compressed chunk 99 | */ 100 | public static LZFChunk createNonCompressed(byte[] plainData, int ptr, int len) 101 | { 102 | byte[] result = new byte[len + HEADER_LEN_NOT_COMPRESSED]; 103 | result[0] = BYTE_Z; 104 | result[1] = BYTE_V; 105 | result[2] = BLOCK_TYPE_NON_COMPRESSED; 106 | result[3] = (byte) (len >> 8); 107 | result[4] = (byte) len; 108 | System.arraycopy(plainData, ptr, result, HEADER_LEN_NOT_COMPRESSED, len); 109 | return new LZFChunk(result); 110 | } 111 | 112 | /** 113 | * Method for appending specific content as non-compressed chunk, in 114 | * given buffer. 115 | */ 116 | public static int appendNonCompressed(byte[] plainData, int ptr, int len, 117 | byte[] outputBuffer, int outputPtr) 118 | { 119 | outputBuffer[outputPtr++] = BYTE_Z; 120 | outputBuffer[outputPtr++] = BYTE_V; 121 | outputBuffer[outputPtr++] = BLOCK_TYPE_NON_COMPRESSED; 122 | outputBuffer[outputPtr++] = (byte) (len >> 8); 123 | outputBuffer[outputPtr++] = (byte) len; 124 | System.arraycopy(plainData, ptr, outputBuffer, outputPtr, len); 125 | return outputPtr + len; 126 | } 127 | 128 | public static int appendNonCompressedHeader(int len, byte[] headerBuffer, int offset) 129 | { 130 | headerBuffer[offset++] = BYTE_Z; 131 | headerBuffer[offset++] = BYTE_V; 132 | headerBuffer[offset++] = BLOCK_TYPE_NON_COMPRESSED; 133 | headerBuffer[offset++] = (byte) (len >> 8); 134 | headerBuffer[offset++] = (byte) len; 135 | return offset; 136 | } 137 | 138 | public static void writeNonCompressedHeader(int len, OutputStream out, byte[] headerBuffer) 139 | throws IOException 140 | { 141 | headerBuffer[0] = BYTE_Z; 142 | headerBuffer[1] = BYTE_V; 143 | headerBuffer[2] = BLOCK_TYPE_NON_COMPRESSED; 144 | headerBuffer[3] = (byte) (len >> 8); 145 | headerBuffer[4] = (byte) len; 146 | out.write(headerBuffer, 0, HEADER_LEN_NOT_COMPRESSED); 147 | } 148 | 149 | public void setNext(LZFChunk next) { _next = next; } 150 | 151 | public LZFChunk next() { return _next; } 152 | public int length() { return _data.length; } 153 | public byte[] getData() { return _data; } 154 | 155 | public int copyTo(byte[] dst, int ptr) { 156 | int len = _data.length; 157 | System.arraycopy(_data, 0, dst, ptr, len); 158 | return ptr+len; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/test/java/perf/ManualUnsafePerf.java: -------------------------------------------------------------------------------- 1 | package perf; 2 | 3 | import java.lang.reflect.Field; 4 | 5 | import sun.misc.Unsafe; 6 | 7 | @SuppressWarnings("restriction") 8 | public class ManualUnsafePerf 9 | { 10 | protected static final Unsafe unsafe; 11 | static { 12 | try { 13 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 14 | theUnsafe.setAccessible(true); 15 | unsafe = (Unsafe) theUnsafe.get(null); 16 | } 17 | catch (Exception e) { 18 | throw new RuntimeException(e); 19 | } 20 | } 21 | 22 | protected static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class); 23 | 24 | protected static final long CHAR_ARRAY_OFFSET = unsafe.arrayBaseOffset(char[].class); 25 | 26 | static final int INPUT_LEN = 48; 27 | 28 | private void test() throws Exception 29 | { 30 | // Let's try to guestimate suitable size... to get to 10 megs to process 31 | // but, with more docs, give more time 32 | final int REPS = 2500 * 1000; 33 | 34 | final int WARMUP_ROUNDS = 5; 35 | int roundTotal = 0; 36 | int roundsDone = 0; 37 | final String[] names = new String[] {"Decode/JDK", "Decode/Unsafe" }; 38 | final int TYPES = names.length; 39 | final long[] times = new long[TYPES]; 40 | 41 | StringBuilder sb = new StringBuilder(); 42 | for (int i = 0; i < INPUT_LEN; ++i) { 43 | sb.append((char) ('A'+i)); 44 | } 45 | byte[] INPUT = new byte[INPUT_LEN + 8]; 46 | { 47 | byte[] b = sb.toString().getBytes("UTF-8"); 48 | System.arraycopy(b, 0, INPUT, 4, INPUT_LEN); 49 | } 50 | 51 | for (;; ++roundTotal) { 52 | try { Thread.sleep(100L); } catch (InterruptedException ie) { } 53 | int round = (roundTotal % TYPES); 54 | String msg = names[round]; 55 | long msec; 56 | 57 | switch (round) { 58 | case 0: 59 | msec = testDecodeJDK(REPS, INPUT, 4, INPUT_LEN); 60 | break; 61 | case 1: 62 | msec = testDecodeUnsafe(REPS, INPUT, 4, INPUT_LEN); 63 | break; 64 | default: 65 | throw new Error(); 66 | } 67 | 68 | boolean roundDone = (round == 1); 69 | 70 | // skip first 5 rounds to let results stabilize 71 | if (roundsDone >= WARMUP_ROUNDS) { 72 | times[round] += msec; 73 | } 74 | System.out.printf("Test '%s' -> %d msecs\n", msg, msec); 75 | if (roundDone) { 76 | roundDone = false; 77 | ++roundsDone; 78 | if ((roundsDone % 7) == 0 && roundsDone > WARMUP_ROUNDS) { 79 | _printResults((roundsDone - WARMUP_ROUNDS), names, times); 80 | } 81 | } 82 | if ((roundTotal % 17) == 0) { 83 | System.out.println("[GC]"); 84 | Thread.sleep(100L); 85 | System.gc(); 86 | Thread.sleep(100L); 87 | } 88 | } 89 | } 90 | 91 | public long testDecodeJDK(int reps, byte[] input, final int offset, final int len) 92 | { 93 | final long mainStart = System.currentTimeMillis(); 94 | char[] result = new char[64]; 95 | while (--reps >= 0) { 96 | for (int i = 0; i < len; ++i) { 97 | result[i] = (char) input[offset+i]; 98 | } 99 | } 100 | long time = System.currentTimeMillis() - mainStart; 101 | return time; 102 | } 103 | 104 | public long testDecodeUnsafe(int reps, byte[] input, final int offset, final int len) 105 | { 106 | final long mainStart = System.currentTimeMillis(); 107 | char[] result = new char[100]; 108 | 109 | while (--reps >= 0) { 110 | // long inBase = BYTE_ARRAY_OFFSET + offset; 111 | // long outBase = CHAR_ARRAY_OFFSET; 112 | 113 | // final long inEnd = inBase + len; 114 | for (int i = 0; i < len; ) { 115 | result[i++] = (char) input[offset+1]; 116 | 117 | /* 118 | int quad = unsafe.getInt(input, inBase); 119 | inBase += 4; 120 | 121 | result[i++] = (char) (quad >>> 24); 122 | result[i++] = (char) ((quad >> 16) & 0xFF); 123 | result[i++] = (char) ((quad >> 8) & 0xFF); 124 | result[i++] = (char) (quad & 0xFF); 125 | */ 126 | 127 | /* 128 | int q1 = ((quad >>> 24) << 16) + ((quad >> 16) & 0xFF); 129 | 130 | unsafe.putInt(result, outBase, q1); 131 | outBase += 4; 132 | 133 | int q2 = (quad & 0xFFFF); 134 | q2 = ((q2 >> 8) << 16) | (q2 & 0xFF); 135 | 136 | unsafe.putInt(result, outBase, q2); 137 | outBase += 4; 138 | 139 | long l = q1; 140 | l = (l << 32) | q2; 141 | 142 | unsafe.putLong(result, outBase, l); 143 | outBase += 8; 144 | */ 145 | } 146 | } 147 | long time = System.currentTimeMillis() - mainStart; 148 | /* 149 | String str = new String(result, 0, len); 150 | System.out.println("("+str.length()+") '"+str+"'"); 151 | */ 152 | return time; 153 | } 154 | 155 | protected void _printResults(int rounds, String[] names, long[] times) 156 | { 157 | System.out.printf(" Averages after %d rounds:", rounds); 158 | double den = (double) rounds; 159 | for (int file = 0; file < names.length; ++file) { 160 | if (file > 0) { 161 | System.out.print(" / "); 162 | } 163 | System.out.printf(" %s(", names[file]); 164 | long time = times[file]; 165 | double msecs = time / den; 166 | System.out.printf("%.1f)", msecs); 167 | } 168 | System.out.println(); 169 | } 170 | 171 | public static void main(String[] args) throws Exception 172 | { 173 | new ManualUnsafePerf().test(); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/VanillaChunkEncoder.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.impl; 2 | 3 | import com.ning.compress.BufferRecycler; 4 | import com.ning.compress.lzf.ChunkEncoder; 5 | import com.ning.compress.lzf.LZFChunk; 6 | 7 | public class VanillaChunkEncoder 8 | extends ChunkEncoder 9 | { 10 | /** 11 | * @param totalLength Total encoded length; used for calculating size 12 | * of hash table to use 13 | */ 14 | public VanillaChunkEncoder(int totalLength) { 15 | super(totalLength); 16 | } 17 | 18 | /** 19 | * Alternate constructor used when we want to avoid allocation encoding 20 | * buffer, in cases where caller wants full control over allocations. 21 | */ 22 | protected VanillaChunkEncoder(int totalLength, boolean bogus) { 23 | super(totalLength, bogus); 24 | } 25 | 26 | /** 27 | * @param totalLength Total encoded length; used for calculating size 28 | * of hash table to use 29 | * @param bufferRecycler The BufferRecycler instance 30 | */ 31 | public VanillaChunkEncoder(int totalLength, BufferRecycler bufferRecycler) { 32 | super(totalLength, bufferRecycler); 33 | } 34 | 35 | /** 36 | * Alternate constructor used when we want to avoid allocation encoding 37 | * buffer, in cases where caller wants full control over allocations. 38 | */ 39 | protected VanillaChunkEncoder(int totalLength, BufferRecycler bufferRecycler, boolean bogus) { 40 | super(totalLength, bufferRecycler, bogus); 41 | } 42 | 43 | public static VanillaChunkEncoder nonAllocatingEncoder(int totalLength) { 44 | return new VanillaChunkEncoder(totalLength, true); 45 | } 46 | 47 | public static VanillaChunkEncoder nonAllocatingEncoder(int totalLength, BufferRecycler bufferRecycler) { 48 | return new VanillaChunkEncoder(totalLength, bufferRecycler, true); 49 | } 50 | 51 | /* 52 | /////////////////////////////////////////////////////////////////////// 53 | // Abstract method implementations 54 | /////////////////////////////////////////////////////////////////////// 55 | */ 56 | 57 | /** 58 | * Main workhorse method that will try to compress given chunk, and return 59 | * end position (offset to byte after last included byte) 60 | * 61 | * @return Output pointer after handling content, such that result - originalOutPost 62 | * is the actual length of compressed chunk (without header) 63 | */ 64 | @Override 65 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) 66 | { 67 | final int[] hashTable = _hashTable; 68 | ++outPos; // To leave one byte for literal-length indicator 69 | int seen = first(in, inPos); // past 4 bytes we have seen... (last one is LSB) 70 | int literals = 0; 71 | inEnd -= TAIL_LENGTH; 72 | final int firstPos = inPos; // so that we won't have back references across block boundary 73 | 74 | while (inPos < inEnd) { 75 | byte p2 = in[inPos + 2]; 76 | // next 77 | seen = (seen << 8) + (p2 & 255); 78 | int off = hash(seen); 79 | int ref = hashTable[off]; 80 | hashTable[off] = inPos; 81 | 82 | // First expected common case: no back-ref (for whatever reason) 83 | if (ref >= inPos // can't refer forward (i.e. leftovers) 84 | || (ref < firstPos) // or to previous block 85 | || (off = inPos - ref) > MAX_OFF 86 | || in[ref+2] != p2 // must match hash 87 | || in[ref+1] != (byte) (seen >> 8) 88 | || in[ref] != (byte) (seen >> 16)) { 89 | out[outPos++] = in[inPos++]; 90 | literals++; 91 | if (literals == LZFChunk.MAX_LITERAL) { 92 | out[outPos - 33] = (byte) 31; // <= out[outPos - literals - 1] = MAX_LITERAL_MINUS_1; 93 | literals = 0; 94 | outPos++; // To leave one byte for literal-length indicator 95 | } 96 | continue; 97 | } 98 | // match 99 | int maxLen = inEnd - inPos + 2; 100 | if (maxLen > MAX_REF) { 101 | maxLen = MAX_REF; 102 | } 103 | if (literals == 0) { 104 | outPos--; // We do not need literal length indicator, go back 105 | } else { 106 | out[outPos - literals - 1] = (byte) (literals - 1); 107 | literals = 0; 108 | } 109 | int len = 3; 110 | // find match length 111 | while (len < maxLen && in[ref + len] == in[inPos + len]) { 112 | len++; 113 | } 114 | len -= 2; 115 | --off; // was off by one earlier 116 | if (len < 7) { 117 | out[outPos++] = (byte) ((off >> 8) + (len << 5)); 118 | } else { 119 | out[outPos++] = (byte) ((off >> 8) + (7 << 5)); 120 | out[outPos++] = (byte) (len - 7); 121 | } 122 | out[outPos++] = (byte) off; 123 | outPos++; 124 | inPos += len; 125 | seen = first(in, inPos); 126 | seen = (seen << 8) + (in[inPos + 2] & 255); 127 | hashTable[hash(seen)] = inPos; 128 | ++inPos; 129 | seen = (seen << 8) + (in[inPos + 2] & 255); // hash = next(hash, in, inPos); 130 | hashTable[hash(seen)] = inPos; 131 | ++inPos; 132 | } 133 | // Should never happen but verify: 134 | if (inPos > inEnd + TAIL_LENGTH) { 135 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH)); 136 | } 137 | // try offlining the tail 138 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals); 139 | } 140 | 141 | private final int _handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos, 142 | int literals) 143 | { 144 | while (inPos < inEnd) { 145 | out[outPos++] = in[inPos++]; 146 | literals++; 147 | if (literals == LZFChunk.MAX_LITERAL) { 148 | out[outPos - literals - 1] = (byte) (literals - 1); 149 | literals = 0; 150 | outPos++; 151 | } 152 | } 153 | out[outPos - literals - 1] = (byte) (literals - 1); 154 | if (literals == 0) { 155 | outPos--; 156 | } 157 | return outPos; 158 | } 159 | 160 | /* 161 | /////////////////////////////////////////////////////////////////////// 162 | // Internal methods 163 | /////////////////////////////////////////////////////////////////////// 164 | */ 165 | 166 | private final int first(byte[] in, int inPos) { 167 | return (in[inPos] << 8) + (in[inPos + 1] & 0xFF); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoderBE.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.impl; 2 | 3 | import com.ning.compress.BufferRecycler; 4 | import com.ning.compress.lzf.LZFChunk; 5 | 6 | /** 7 | * Implementation to use on Big-Endian architectures. 8 | */ 9 | @SuppressWarnings("restriction") 10 | public final class UnsafeChunkEncoderBE 11 | extends UnsafeChunkEncoder 12 | { 13 | public UnsafeChunkEncoderBE(int totalLength) { 14 | super(totalLength); 15 | } 16 | 17 | public UnsafeChunkEncoderBE(int totalLength, boolean bogus) { 18 | super(totalLength, bogus); 19 | } 20 | 21 | public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler) { 22 | super(totalLength, bufferRecycler); 23 | } 24 | 25 | public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler, boolean bogus) { 26 | super(totalLength, bufferRecycler, bogus); 27 | } 28 | 29 | @Override 30 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) 31 | { 32 | // Sanity checks; otherwise if any of the arguments are invalid `Unsafe` might corrupt memory 33 | _checkArrayIndices(in, inPos, inEnd); 34 | _checkArrayIndices(out, outPos, out.length); 35 | _checkOutputLength(inEnd - inPos, out.length - outPos); 36 | 37 | final int[] hashTable = _hashTable; 38 | int literals = 0; 39 | inEnd -= TAIL_LENGTH; 40 | final int firstPos = inPos; // so that we won't have back references across block boundary 41 | 42 | int seen = _getInt(in, inPos) >> 16; 43 | 44 | while (inPos < inEnd) { 45 | seen = (seen << 8) + (in[inPos + 2] & 255); 46 | 47 | int off = hash(seen); 48 | int ref = hashTable[off]; 49 | hashTable[off] = inPos; 50 | 51 | // First expected common case: no back-ref (for whatever reason) 52 | if ((ref >= inPos) // can't refer forward (i.e. leftovers) 53 | || (ref < firstPos) // or to previous block 54 | || (off = inPos - ref) > MAX_OFF 55 | || ((seen << 8) != _getShifted3Bytes(in, ref))) { 56 | ++inPos; 57 | ++literals; 58 | if (literals == LZFChunk.MAX_LITERAL) { 59 | outPos = _copyFullLiterals(in, inPos, out, outPos); 60 | literals = 0; 61 | } 62 | continue; 63 | } 64 | 65 | if (literals > 0) { 66 | outPos = _copyPartialLiterals(in, inPos, out, outPos, literals); 67 | literals = 0; 68 | } 69 | // match 70 | final int maxLen = Math.min(MAX_REF, inEnd - inPos + 2); 71 | int len = _findMatchLength(in, ref+3, inPos+3, ref+maxLen); 72 | 73 | --off; // was off by one earlier 74 | if (len < 7) { 75 | out[outPos++] = (byte) ((off >> 8) + (len << 5)); 76 | } else { 77 | out[outPos++] = (byte) ((off >> 8) + (7 << 5)); 78 | out[outPos++] = (byte) (len - 7); 79 | } 80 | out[outPos++] = (byte) off; 81 | inPos += len; 82 | seen = _getInt(in, inPos); 83 | hashTable[hash(seen >> 8)] = inPos; 84 | ++inPos; 85 | hashTable[hash(seen)] = inPos; 86 | ++inPos; 87 | } 88 | // Should never happen but verify: 89 | if (inPos > inEnd + TAIL_LENGTH) { 90 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH)); 91 | } 92 | // offline the tail handling 93 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals); 94 | } 95 | 96 | private final static int _getInt(final byte[] in, final int inPos) { 97 | return unsafe.getInt(in, BYTE_ARRAY_OFFSET + inPos); 98 | } 99 | 100 | /** 101 | * Reads 3 bytes, shifted to the left by 8. 102 | */ 103 | private static int _getShifted3Bytes(byte[] in, int inPos) { 104 | // For inPos 0 have to read bytes manually to avoid Unsafe out-of-bounds access at `inPos - 1` 105 | // But for higher inPos values can use Unsafe to read as int and discard first byte 106 | if (inPos == 0) { 107 | return ((in[0] & 0xFF) << 24) | ((in[1] & 0xFF) << 16) | ((in[2] & 0xFF) << 8); 108 | } else { 109 | return _getInt(in, inPos - 1) << 8; 110 | } 111 | } 112 | 113 | /* 114 | /////////////////////////////////////////////////////////////////////// 115 | // Methods for finding length of a back-reference 116 | /////////////////////////////////////////////////////////////////////// 117 | */ 118 | 119 | private final static int _findMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1) 120 | { 121 | // Expect at least 8 bytes to check for fast case; offline others 122 | if ((ptr1 + 8) >= maxPtr1) { // rare case, offline 123 | return _findTailMatchLength(in, ptr1, ptr2, maxPtr1); 124 | } 125 | // short matches common, so start with specialized comparison 126 | // NOTE: we know that we have 4 bytes of slack before end, so this is safe: 127 | int i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1); 128 | int i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2); 129 | if (i1 != i2) { 130 | return 1 + _leadingBytes(i1, i2); 131 | } 132 | ptr1 += 4; 133 | ptr2 += 4; 134 | 135 | i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1); 136 | i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2); 137 | if (i1 != i2) { 138 | return 5 + _leadingBytes(i1, i2); 139 | } 140 | return _findLongMatchLength(in, ptr1+4, ptr2+4, maxPtr1); 141 | } 142 | 143 | private final static int _findLongMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1) 144 | { 145 | final int base = ptr1 - 9; 146 | // and then just loop with longs if we get that far 147 | final int longEnd = maxPtr1-8; 148 | while (ptr1 <= longEnd) { 149 | long l1 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr1); 150 | long l2 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr2); 151 | if (l1 != l2) { 152 | return ptr1 - base + _leadingBytes(l1, l2); 153 | } 154 | ptr1 += 8; 155 | ptr2 += 8; 156 | } 157 | // or, if running out of runway, handle last bytes with loop-de-loop... 158 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) { 159 | ++ptr1; 160 | ++ptr2; 161 | } 162 | return ptr1 - base; // i.e. 163 | } 164 | 165 | /* With Big-Endian, in-memory layout is "natural", so what we consider 166 | * leading is also leading for in-register. 167 | */ 168 | 169 | private final static int _leadingBytes(int i1, int i2) { 170 | return Integer.numberOfLeadingZeros(i1 ^ i2) >> 3; 171 | } 172 | 173 | private final static int _leadingBytes(long l1, long l2) { 174 | return Long.numberOfLeadingZeros(l1 ^ l2) >> 3; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFRoundTrip.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | 7 | import com.ning.compress.lzf.impl.UnsafeChunkDecoder; 8 | import com.ning.compress.lzf.impl.VanillaChunkDecoder; 9 | import com.ning.compress.lzf.util.ChunkEncoderFactory; 10 | import org.junit.jupiter.api.Test; 11 | import org.junit.jupiter.api.io.TempDir; 12 | 13 | import static org.junit.jupiter.api.Assertions.*; 14 | 15 | public class TestLZFRoundTrip 16 | { 17 | private final static String[] FILES = { 18 | "/shakespeare.tar", 19 | "/shakespeare/hamlet.xml", 20 | "/shakespeare/macbeth.xml", 21 | "/shakespeare/play.dtd", 22 | "/shakespeare/r_and_j.xml" 23 | ,"/binary/help.bin" 24 | ,"/binary/word.doc" 25 | }; 26 | 27 | @TempDir 28 | Path tempDir; 29 | 30 | @Test 31 | public void testVanillaCodec() throws Exception 32 | { 33 | _testUsingBlock(new VanillaChunkDecoder()); 34 | _testUsingReader(new VanillaChunkDecoder()); 35 | } 36 | 37 | @Test 38 | public void testUnsafeCodec() throws IOException 39 | { 40 | _testUsingBlock(new UnsafeChunkDecoder()); 41 | _testUsingReader(new UnsafeChunkDecoder()); 42 | } 43 | 44 | @Test 45 | public void testLZFCompressionOnTestFiles() throws IOException { 46 | for (int i = 0; i < 100; i++) { 47 | testLZFCompressionOnDir(new File("src/test/resources/shakespeare")); 48 | } 49 | } 50 | 51 | private void testLZFCompressionOnDir(File dir) throws IOException 52 | { 53 | File[] files = dir.listFiles(); 54 | for (File file : files) { 55 | if (!file.isDirectory()) { 56 | testLZFCompressionOnFile(file.toPath()); 57 | } else { 58 | testLZFCompressionOnDir(file); 59 | } 60 | } 61 | } 62 | 63 | private void testLZFCompressionOnFile(Path file) throws IOException 64 | { 65 | final ChunkDecoder decoder = new UnsafeChunkDecoder(); 66 | byte[] buf = new byte[64 * 1024]; 67 | 68 | Path compressedFile = Files.createTempFile(tempDir, "test", ".lzf"); 69 | try (InputStream in = new BufferedInputStream(Files.newInputStream(file)); 70 | OutputStream out = new LZFOutputStream(new BufferedOutputStream( 71 | Files.newOutputStream(compressedFile)))) { 72 | int len; 73 | while ((len = in.read(buf, 0, buf.length)) >= 0) { 74 | out.write(buf, 0, len); 75 | } 76 | } 77 | 78 | // decompress and verify bytes haven't changed 79 | try (InputStream in = new BufferedInputStream(Files.newInputStream(file)); 80 | DataInputStream compressedIn = new DataInputStream(new LZFInputStream(decoder, 81 | Files.newInputStream(compressedFile), false))) { 82 | int len; 83 | while ((len = in.read(buf, 0, buf.length)) >= 0) { 84 | byte[] buf2 = new byte[len]; 85 | compressedIn.readFully(buf2, 0, len); 86 | byte[] trimmedBuf = new byte[len]; 87 | System.arraycopy(buf, 0, trimmedBuf, 0, len); 88 | assertArrayEquals(trimmedBuf, buf2); 89 | } 90 | assertEquals(-1, compressedIn.read()); 91 | } 92 | } 93 | 94 | @Test 95 | public void testHashCollision() throws IOException 96 | { 97 | // this test generates a hash collision: [0,1,153,64] hashes the same as [1,153,64,64] 98 | // and then leverages the bug s/inPos/0/ to corrupt the array 99 | // the first array is used to insert a reference from this hash to offset 6 100 | // and then the hash table is reused and still thinks that there is such a hash at position 6 101 | // and at position 7, it finds a sequence with the same hash 102 | // so it inserts a buggy reference 103 | final byte[] b1 = new byte[] {0,1,2,3,4,(byte)153,64,64,64,9,9,9,9,9,9,9,9,9,9}; 104 | final byte[] b2 = new byte[] {1,(byte)153,0,0,0,0,(byte)153,64,64,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 105 | final int off = 6; 106 | 107 | ChunkEncoder encoder = ChunkEncoderFactory.safeInstance(); 108 | ChunkDecoder decoder = new VanillaChunkDecoder(); 109 | _testCollision(encoder, decoder, b1, 0, b1.length); 110 | _testCollision(encoder, decoder, b2, off, b2.length - off); 111 | 112 | encoder = ChunkEncoderFactory.optimalInstance(); 113 | decoder = new UnsafeChunkDecoder(); 114 | _testCollision(encoder, decoder, b1, 0, b1.length); 115 | _testCollision(encoder, decoder, b2, off, b2.length - off); 116 | } 117 | 118 | private void _testCollision(ChunkEncoder encoder, ChunkDecoder decoder, byte[] bytes, int offset, int length) throws IOException 119 | { 120 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 121 | byte[] expected = new byte[length]; 122 | byte[] buffer = new byte[LZFChunk.MAX_CHUNK_LEN]; 123 | byte[] output = new byte[length]; 124 | System.arraycopy(bytes, offset, expected, 0, length); 125 | encoder.encodeAndWriteChunk(bytes, offset, length, outputStream); 126 | InputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); 127 | assertEquals(length, decoder.decodeChunk(inputStream, buffer, output)); 128 | assertArrayEquals(expected, output); 129 | } 130 | 131 | /* 132 | /////////////////////////////////////////////////////////////////////// 133 | // Helper method 134 | /////////////////////////////////////////////////////////////////////// 135 | */ 136 | 137 | 138 | protected void _testUsingBlock(ChunkDecoder decoder) throws IOException 139 | { 140 | for (String name : FILES) { 141 | byte[] data = readResource(name); 142 | byte[] lzf = LZFEncoder.encode(data); 143 | byte[] decoded = decoder.decode(lzf); 144 | 145 | assertArrayEquals(data, decoded, 146 | String.format("File '%s', %d->%d bytes", name, data.length, lzf.length)); 147 | } 148 | } 149 | 150 | protected void _testUsingReader(ChunkDecoder decoder) throws IOException 151 | { 152 | for (String name : FILES) { 153 | byte[] data = readResource(name); 154 | byte[] lzf = LZFEncoder.encode(data); 155 | LZFInputStream comp = new LZFInputStream(decoder, new ByteArrayInputStream(lzf), false); 156 | byte[] decoded = readAll(comp); 157 | 158 | assertArrayEquals(data, decoded); 159 | } 160 | } 161 | 162 | protected byte[] readResource(String name) throws IOException 163 | { 164 | return readAll(getClass().getResourceAsStream(name)); 165 | } 166 | 167 | protected byte[] readAll(InputStream in) throws IOException 168 | { 169 | assertNotNull(in); 170 | byte[] buffer = new byte[4000]; 171 | int count; 172 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(4000); 173 | 174 | while ((count = in.read(buffer)) > 0) { 175 | bytes.write(buffer, 0, count); 176 | } 177 | in.close(); 178 | return bytes.toByteArray(); 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoderLE.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.impl; 2 | 3 | import com.ning.compress.BufferRecycler; 4 | import com.ning.compress.lzf.LZFChunk; 5 | 6 | /** 7 | * Implementation to use on Little-Endian architectures. 8 | */ 9 | @SuppressWarnings("restriction") 10 | public final class UnsafeChunkEncoderLE 11 | extends UnsafeChunkEncoder 12 | { 13 | public UnsafeChunkEncoderLE(int totalLength) { 14 | super(totalLength); 15 | } 16 | 17 | public UnsafeChunkEncoderLE(int totalLength, boolean bogus) { 18 | super(totalLength, bogus); 19 | } 20 | 21 | public UnsafeChunkEncoderLE(int totalLength, BufferRecycler bufferRecycler) { 22 | super(totalLength, bufferRecycler); 23 | } 24 | 25 | public UnsafeChunkEncoderLE(int totalLength, BufferRecycler bufferRecycler, boolean bogus) { 26 | super(totalLength, bufferRecycler, bogus); 27 | } 28 | 29 | @Override 30 | protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) 31 | { 32 | // Sanity checks; otherwise if any of the arguments are invalid `Unsafe` might corrupt memory 33 | _checkArrayIndices(in, inPos, inEnd); 34 | _checkArrayIndices(out, outPos, out.length); 35 | _checkOutputLength(inEnd - inPos, out.length - outPos); 36 | 37 | final int[] hashTable = _hashTable; 38 | int literals = 0; 39 | inEnd -= TAIL_LENGTH; 40 | final int firstPos = inPos; // so that we won't have back references across block boundary 41 | 42 | int seen = _getInt(in, inPos) >> 16; 43 | 44 | while (inPos < inEnd) { 45 | seen = (seen << 8) + (in[inPos + 2] & 255); 46 | 47 | int off = hash(seen); 48 | int ref = hashTable[off]; 49 | hashTable[off] = inPos; 50 | 51 | // First expected common case: no back-ref (for whatever reason) 52 | if ((ref >= inPos) // can't refer forward (i.e. leftovers) 53 | || (ref < firstPos) // or to previous block 54 | || (off = inPos - ref) > MAX_OFF 55 | || ((seen << 8) != _getShifted3Bytes(in, ref))) { 56 | ++inPos; 57 | ++literals; 58 | if (literals == LZFChunk.MAX_LITERAL) { 59 | outPos = _copyFullLiterals(in, inPos, out, outPos); 60 | literals = 0; 61 | } 62 | continue; 63 | } 64 | 65 | if (literals > 0) { 66 | outPos = _copyPartialLiterals(in, inPos, out, outPos, literals); 67 | literals = 0; 68 | } 69 | // match 70 | final int maxLen = Math.min(MAX_REF, inEnd - inPos + 2); 71 | int len = _findMatchLength(in, ref+3, inPos+3, ref+maxLen); 72 | 73 | --off; // was off by one earlier 74 | if (len < 7) { 75 | out[outPos++] = (byte) ((off >> 8) + (len << 5)); 76 | } else { 77 | out[outPos++] = (byte) ((off >> 8) + (7 << 5)); 78 | out[outPos++] = (byte) (len - 7); 79 | } 80 | out[outPos++] = (byte) off; 81 | inPos += len; 82 | seen = _getInt(in, inPos); 83 | hashTable[hash(seen >> 8)] = inPos; 84 | ++inPos; 85 | hashTable[hash(seen)] = inPos; 86 | ++inPos; 87 | } 88 | // Should never happen but verify: 89 | if (inPos > inEnd + TAIL_LENGTH) { 90 | throw new IllegalStateException("Internal error: consumed input past end, `inPos` > "+(inEnd + TAIL_LENGTH)); 91 | } 92 | // offline the tail handling 93 | return _handleTail(in, inPos, inEnd+TAIL_LENGTH, out, outPos, literals); 94 | } 95 | 96 | private final static int _getInt(final byte[] in, final int inPos) { 97 | return Integer.reverseBytes(unsafe.getInt(in, BYTE_ARRAY_OFFSET + inPos)); 98 | } 99 | 100 | /** 101 | * Reads 3 bytes, shifted to the left by 8. 102 | */ 103 | private static int _getShifted3Bytes(byte[] in, int inPos) { 104 | // For inPos 0 have to read bytes manually to avoid Unsafe out-of-bounds access at `inPos - 1` 105 | // But for higher inPos values can use Unsafe to read as int and discard first byte 106 | if (inPos == 0) { 107 | return ((in[0] & 0xFF) << 24) | ((in[1] & 0xFF) << 16) | ((in[2] & 0xFF) << 8); 108 | } else { 109 | return _getInt(in, inPos - 1) << 8; 110 | } 111 | } 112 | 113 | /* 114 | /////////////////////////////////////////////////////////////////////// 115 | // Methods for finding length of a back-reference 116 | /////////////////////////////////////////////////////////////////////// 117 | */ 118 | 119 | private final static int _findMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1) 120 | { 121 | // Expect at least 8 bytes to check for fast case; offline others 122 | if ((ptr1 + 8) >= maxPtr1) { // rare case, offline 123 | return _findTailMatchLength(in, ptr1, ptr2, maxPtr1); 124 | } 125 | // short matches common, so start with specialized comparison 126 | // NOTE: we know that we have 4 bytes of slack before end, so this is safe: 127 | int i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1); 128 | int i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2); 129 | if (i1 != i2) { 130 | return 1 + _leadingBytes(i1, i2); 131 | } 132 | ptr1 += 4; 133 | ptr2 += 4; 134 | 135 | i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1); 136 | i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2); 137 | if (i1 != i2) { 138 | return 5 + _leadingBytes(i1, i2); 139 | } 140 | return _findLongMatchLength(in, ptr1+4, ptr2+4, maxPtr1); 141 | } 142 | 143 | private final static int _findLongMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1) 144 | { 145 | final int base = ptr1 - 9; 146 | // and then just loop with longs if we get that far 147 | final int longEnd = maxPtr1-8; 148 | while (ptr1 <= longEnd) { 149 | long l1 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr1); 150 | long l2 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr2); 151 | if (l1 != l2) { 152 | return ptr1 - base + _leadingBytes(l1, l2); 153 | } 154 | ptr1 += 8; 155 | ptr2 += 8; 156 | } 157 | // or, if running out of runway, handle last bytes with loop-de-loop... 158 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) { 159 | ++ptr1; 160 | ++ptr2; 161 | } 162 | return ptr1 - base; // i.e. 163 | } 164 | 165 | /* With Little-Endian, in-memory layout is reverse of what we expect for 166 | * in-register, so we either have to reverse bytes, or, simpler, 167 | * calculate trailing zeroes instead. 168 | */ 169 | 170 | private final static int _leadingBytes(int i1, int i2) { 171 | return Integer.numberOfTrailingZeros(i1 ^ i2) >> 3; 172 | } 173 | 174 | private final static int _leadingBytes(long l1, long l2) { 175 | return Long.numberOfTrailingZeros(l1 ^ l2) >> 3; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/impl/UnsafeChunkEncoder.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.impl; 2 | 3 | import com.ning.compress.BufferRecycler; 4 | import java.lang.reflect.Field; 5 | 6 | import sun.misc.Unsafe; 7 | 8 | import com.ning.compress.lzf.ChunkEncoder; 9 | import com.ning.compress.lzf.LZFChunk; 10 | 11 | /** 12 | * {@link ChunkEncoder} implementation that handles actual encoding of individual chunks, 13 | * using Sun's sun.misc.Unsafe functionality, which gives 14 | * nice extra boost for speed. 15 | * 16 | * @author Tatu Saloranta (tatu.saloranta@iki.fi) 17 | */ 18 | @SuppressWarnings("restriction") 19 | public abstract class UnsafeChunkEncoder 20 | extends ChunkEncoder 21 | { 22 | // // Our Nitro Booster, mr. Unsafe! 23 | 24 | static final Unsafe unsafe; 25 | static { 26 | try { 27 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 28 | theUnsafe.setAccessible(true); 29 | unsafe = (Unsafe) theUnsafe.get(null); 30 | } 31 | catch (Exception e) { 32 | throw new RuntimeException(e); 33 | } 34 | } 35 | 36 | // All members here (fields, constructors, methods) are at most package-private; users are 37 | // not supposed to subclass this class 38 | 39 | static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class); 40 | 41 | UnsafeChunkEncoder(int totalLength) { 42 | super(totalLength); 43 | } 44 | 45 | UnsafeChunkEncoder(int totalLength, boolean bogus) { 46 | super(totalLength, bogus); 47 | } 48 | 49 | UnsafeChunkEncoder(int totalLength, BufferRecycler bufferRecycler) { 50 | super(totalLength, bufferRecycler); 51 | } 52 | 53 | UnsafeChunkEncoder(int totalLength, BufferRecycler bufferRecycler, boolean bogus) { 54 | super(totalLength, bufferRecycler, bogus); 55 | } 56 | 57 | /* 58 | /////////////////////////////////////////////////////////////////////// 59 | // Shared helper methods 60 | /////////////////////////////////////////////////////////////////////// 61 | */ 62 | 63 | /** 64 | * @param start start index, inclusive 65 | * @param end end index, exclusive 66 | */ 67 | static void _checkArrayIndices(byte[] array, int start, int end) { 68 | if (start < 0 || end < start || end > array.length) { 69 | throw new ArrayIndexOutOfBoundsException(); 70 | } 71 | } 72 | 73 | static void _checkOutputLength(int inputLen, int outputLen) { 74 | int maxEncoded = inputLen + ((inputLen + 31) >> 5); 75 | 76 | if (maxEncoded < 0 || maxEncoded > outputLen) { 77 | throw new IllegalArgumentException("Output length " + outputLen + " is too small for input length " + inputLen); 78 | } 79 | } 80 | 81 | final static int _copyPartialLiterals(byte[] in, int inPos, byte[] out, int outPos, 82 | int literals) 83 | { 84 | if (out.length - outPos < literals + 1) { 85 | throw new IllegalArgumentException("Not enough space in output array"); 86 | } 87 | 88 | out[outPos++] = (byte) (literals-1); 89 | 90 | // Here use of Unsafe is clear win: 91 | // System.arraycopy(in, inPos-literals, out, outPos, literals); 92 | 93 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos - literals; 94 | long rawOutPtr= BYTE_ARRAY_OFFSET + outPos; 95 | 96 | switch (literals >> 3) { 97 | case 3: 98 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 99 | rawInPtr += 8; 100 | rawOutPtr += 8; 101 | case 2: 102 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 103 | rawInPtr += 8; 104 | rawOutPtr += 8; 105 | case 1: 106 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 107 | rawInPtr += 8; 108 | rawOutPtr += 8; 109 | } 110 | int left = (literals & 7); 111 | if (left > 0) { 112 | System.arraycopy(in, (int) (rawInPtr - BYTE_ARRAY_OFFSET), out, (int) (rawOutPtr - BYTE_ARRAY_OFFSET), left); 113 | } 114 | 115 | return outPos+literals; 116 | } 117 | 118 | final static int _copyLongLiterals(byte[] in, int inPos, byte[] out, int outPos, 119 | int literals) 120 | { 121 | inPos -= literals; 122 | 123 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos; 124 | long rawOutPtr = BYTE_ARRAY_OFFSET + outPos; 125 | 126 | while (literals >= LZFChunk.MAX_LITERAL) { 127 | out[outPos++] = (byte) 31; 128 | ++rawOutPtr; 129 | 130 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 131 | rawInPtr += 8; 132 | rawOutPtr += 8; 133 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 134 | rawInPtr += 8; 135 | rawOutPtr += 8; 136 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 137 | rawInPtr += 8; 138 | rawOutPtr += 8; 139 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 140 | rawInPtr += 8; 141 | rawOutPtr += 8; 142 | 143 | inPos += LZFChunk.MAX_LITERAL; 144 | outPos += LZFChunk.MAX_LITERAL; 145 | literals -= LZFChunk.MAX_LITERAL; 146 | } 147 | if (literals > 0) { 148 | return _copyPartialLiterals(in, inPos+literals, out, outPos, literals); 149 | } 150 | return outPos; 151 | } 152 | 153 | final static int _copyFullLiterals(byte[] in, int inPos, byte[] out, int outPos) 154 | { 155 | if (out.length - outPos < 32 + 1) { 156 | throw new IllegalArgumentException("Not enough space in output array"); 157 | } 158 | 159 | // literals == 32 160 | out[outPos++] = (byte) 31; 161 | 162 | long rawInPtr = BYTE_ARRAY_OFFSET + inPos - 32; 163 | long rawOutPtr = BYTE_ARRAY_OFFSET + outPos; 164 | 165 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 166 | rawInPtr += 8; 167 | rawOutPtr += 8; 168 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 169 | rawInPtr += 8; 170 | rawOutPtr += 8; 171 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 172 | rawInPtr += 8; 173 | rawOutPtr += 8; 174 | unsafe.putLong(out, rawOutPtr, unsafe.getLong(in, rawInPtr)); 175 | 176 | return (outPos + 32); 177 | } 178 | 179 | final static int _handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos, 180 | int literals) 181 | { 182 | while (inPos < inEnd) { 183 | ++inPos; 184 | ++literals; 185 | if (literals == LZFChunk.MAX_LITERAL) { 186 | out[outPos++] = (byte) (literals-1); // <= out[outPos - literals - 1] = MAX_LITERAL_MINUS_1; 187 | System.arraycopy(in, inPos-literals, out, outPos, literals); 188 | outPos += literals; 189 | literals = 0; 190 | } 191 | } 192 | if (literals > 0) { 193 | out[outPos++] = (byte) (literals - 1); 194 | System.arraycopy(in, inPos-literals, out, outPos, literals); 195 | outPos += literals; 196 | } 197 | return outPos; 198 | } 199 | 200 | final static int _findTailMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1) 201 | { 202 | final int start1 = ptr1; 203 | while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) { 204 | ++ptr1; 205 | ++ptr2; 206 | } 207 | return ptr1 - start1 + 1; // i.e. 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/util/ChunkEncoderFactory.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.util; 2 | 3 | import com.ning.compress.BufferRecycler; 4 | import com.ning.compress.lzf.ChunkEncoder; 5 | import com.ning.compress.lzf.LZFChunk; 6 | import com.ning.compress.lzf.impl.UnsafeChunkEncoders; 7 | import com.ning.compress.lzf.impl.VanillaChunkEncoder; 8 | 9 | /** 10 | * Simple helper class used for loading 11 | * {@link ChunkEncoder} implementations, based on criteria 12 | * such as "fastest available" or "safe to run anywhere". 13 | */ 14 | public class ChunkEncoderFactory 15 | { 16 | /* 17 | /////////////////////////////////////////////////////////////////////// 18 | // Public API 19 | /////////////////////////////////////////////////////////////////////// 20 | */ 21 | 22 | /** 23 | * Convenience method, equivalent to: 24 | * 25 | * return optimalInstance(LZFChunk.MAX_CHUNK_LEN); 26 | * 27 | * 28 | * @return ChunkEncoder constructed 29 | */ 30 | public static ChunkEncoder optimalInstance() { 31 | return optimalInstance(LZFChunk.MAX_CHUNK_LEN); 32 | } 33 | 34 | /** 35 | * Method to use for getting compressor instance that uses the most optimal 36 | * available methods for underlying data access. It should be safe to call 37 | * this method as implementations are dynamically loaded; however, on some 38 | * non-standard platforms it may be necessary to either directly load 39 | * instances, or use {@link #safeInstance}. 40 | *

41 | *Uses a ThreadLocal soft-referenced BufferRecycler instance. 42 | * 43 | * @param totalLength Expected total length of content to compress; only matters 44 | * for content that is smaller than maximum chunk size (64k), to optimize 45 | * encoding hash tables 46 | * 47 | * @return ChunkEncoder constructed 48 | */ 49 | public static ChunkEncoder optimalInstance(int totalLength) { 50 | try { 51 | return UnsafeChunkEncoders.createEncoder(totalLength); 52 | } catch (Exception e) { 53 | return safeInstance(totalLength); 54 | } 55 | } 56 | 57 | /** 58 | * Factory method for constructing encoder that is always passed buffer 59 | * externally, so that it will not (nor need) allocate encoding buffer. 60 | *

61 | * Uses a ThreadLocal soft-referenced BufferRecycler instance. 62 | * 63 | * @return ChunkEncoder constructed 64 | */ 65 | public static ChunkEncoder optimalNonAllocatingInstance(int totalLength) { 66 | try { 67 | return UnsafeChunkEncoders.createNonAllocatingEncoder(totalLength); 68 | } catch (Exception e) { 69 | return safeNonAllocatingInstance(totalLength); 70 | } 71 | } 72 | 73 | /** 74 | * Convenience method, equivalent to: 75 | * 76 | * return safeInstance(LZFChunk.MAX_CHUNK_LEN); 77 | * 78 | * 79 | * @return ChunkEncoder constructed 80 | */ 81 | public static ChunkEncoder safeInstance() { 82 | return safeInstance(LZFChunk.MAX_CHUNK_LEN); 83 | } 84 | 85 | /** 86 | * Method that can be used to ensure that a "safe" compressor instance is loaded. 87 | * Safe here means that it should work on any and all Java platforms. 88 | *

89 | * Uses a ThreadLocal soft-referenced BufferRecycler instance. 90 | * 91 | * @param totalLength Expected total length of content to compress; only matters 92 | * for content that is smaller than maximum chunk size (64k), to optimize 93 | * encoding hash tables 94 | * 95 | * @return ChunkEncoder constructed 96 | */ 97 | public static ChunkEncoder safeInstance(int totalLength) { 98 | return new VanillaChunkEncoder(totalLength); 99 | } 100 | 101 | /** 102 | * Factory method for constructing encoder that is always passed buffer 103 | * externally, so that it will not (nor need) allocate encoding buffer. 104 | *

Uses a ThreadLocal soft-referenced BufferRecycler instance. 105 | * 106 | * @return ChunkEncoder constructed 107 | */ 108 | public static ChunkEncoder safeNonAllocatingInstance(int totalLength) { 109 | return VanillaChunkEncoder.nonAllocatingEncoder(totalLength); 110 | } 111 | 112 | /** 113 | * Convenience method, equivalent to: 114 | * 115 | * return optimalInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler); 116 | * 117 | * 118 | * @return ChunkEncoder constructed 119 | */ 120 | public static ChunkEncoder optimalInstance(BufferRecycler bufferRecycler) { 121 | return optimalInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler); 122 | } 123 | 124 | /** 125 | * Method to use for getting compressor instance that uses the most optimal 126 | * available methods for underlying data access. It should be safe to call 127 | * this method as implementations are dynamically loaded; however, on some 128 | * non-standard platforms it may be necessary to either directly load 129 | * instances, or use {@link #safeInstance}. 130 | * 131 | * @param totalLength Expected total length of content to compress; only matters 132 | * for content that is smaller than maximum chunk size (64k), to optimize 133 | * encoding hash tables 134 | * @param bufferRecycler The BufferRecycler instance 135 | * 136 | * @return ChunkEncoder constructed 137 | */ 138 | public static ChunkEncoder optimalInstance(int totalLength, BufferRecycler bufferRecycler) { 139 | try { 140 | return UnsafeChunkEncoders.createEncoder(totalLength, bufferRecycler); 141 | } catch (Exception e) { 142 | return safeInstance(totalLength, bufferRecycler); 143 | } 144 | } 145 | 146 | /** 147 | * Factory method for constructing encoder that is always passed buffer 148 | * externally, so that it will not (nor need) allocate encoding buffer. 149 | * 150 | * @return ChunkEncoder constructed 151 | */ 152 | public static ChunkEncoder optimalNonAllocatingInstance(int totalLength, BufferRecycler bufferRecycler) { 153 | try { 154 | return UnsafeChunkEncoders.createNonAllocatingEncoder(totalLength, bufferRecycler); 155 | } catch (Exception e) { 156 | return safeNonAllocatingInstance(totalLength, bufferRecycler); 157 | } 158 | } 159 | 160 | /** 161 | * Convenience method, equivalent to: 162 | * 163 | * return safeInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler); 164 | * 165 | * 166 | * @return ChunkEncoder constructed 167 | */ 168 | public static ChunkEncoder safeInstance(BufferRecycler bufferRecycler) { 169 | return safeInstance(LZFChunk.MAX_CHUNK_LEN, bufferRecycler); 170 | } 171 | /** 172 | * Method that can be used to ensure that a "safe" compressor instance is loaded. 173 | * Safe here means that it should work on any and all Java platforms. 174 | * 175 | * @param totalLength Expected total length of content to compress; only matters 176 | * for content that is smaller than maximum chunk size (64k), to optimize 177 | * encoding hash tables 178 | * @param bufferRecycler The BufferRecycler instance 179 | * 180 | * @return ChunkEncoder constructed 181 | */ 182 | public static ChunkEncoder safeInstance(int totalLength, BufferRecycler bufferRecycler) { 183 | return new VanillaChunkEncoder(totalLength, bufferRecycler); 184 | } 185 | 186 | /** 187 | * Factory method for constructing encoder that is always passed buffer 188 | * externally, so that it will not (nor need) allocate encoding buffer. 189 | */ 190 | public static ChunkEncoder safeNonAllocatingInstance(int totalLength, BufferRecycler bufferRecycler) { 191 | return VanillaChunkEncoder.nonAllocatingEncoder(totalLength, bufferRecycler); 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/LZFEncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | import java.util.Arrays; 5 | 6 | import com.ning.compress.BaseForTests; 7 | import com.ning.compress.lzf.impl.UnsafeChunkEncoder; 8 | import com.ning.compress.lzf.impl.UnsafeChunkEncoderBE; 9 | import com.ning.compress.lzf.impl.UnsafeChunkEncoderLE; 10 | import com.ning.compress.lzf.util.ChunkEncoderFactory; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import static org.junit.jupiter.api.Assertions.*; 14 | 15 | public class LZFEncoderTest extends BaseForTests 16 | { 17 | @Test 18 | public void testBigSizeEstimate() 19 | { 20 | for (int amt : new int[] { 21 | 100, 250, 600, 22 | 10000, 50000, 65000, 120000, 130000, 23 | 3 * 0x10000 + 4, 24 | 15 * 0x10000 + 4, 25 | 1000 * 0x10000 + 4, 26 | }) { 27 | int estimate = LZFEncoder.estimateMaxWorkspaceSize(amt); 28 | int chunks = ((amt + 0xFFFE) / 0xFFFF); 29 | int expMin = 2 + amt + (chunks * 5); // 5-byte header for uncompressed; however, not enough workspace 30 | int expMax = ((int) (0.05 * 0xFFFF)) + amt + (chunks * 7); 31 | if (estimate < expMin || estimate > expMax) { 32 | fail("Expected ratio for "+amt+" to be "+expMin+" <= x <= "+expMax+", was: "+estimate); 33 | } 34 | //System.err.printf("%d < %d < %d\n", expMin, estimate, expMax); 35 | } 36 | } 37 | 38 | // as per [compress-lzf#43] 39 | @Test 40 | public void testSmallSizeEstimate() 41 | { 42 | // and here we ensure that specific uncompressable case won't fail 43 | byte[] in = new byte[] {0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0}; 44 | int outSize = LZFEncoder.estimateMaxWorkspaceSize(in.length); 45 | LZFEncoder.appendEncoded(in, 0, in.length, new byte[outSize], 0); 46 | } 47 | 48 | @Test 49 | public void testCompressableChunksSingle() throws Exception 50 | { 51 | byte[] source = constructFluff(55000); 52 | _testCompressableChunksSingle(source, ChunkEncoderFactory.safeInstance()); 53 | _testCompressableChunksSingle(source, ChunkEncoderFactory.optimalInstance()); 54 | } 55 | 56 | private void _testCompressableChunksSingle(byte[] source, ChunkEncoder encoder) throws Exception 57 | { 58 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)]; 59 | int compLen = LZFEncoder.appendEncoded(encoder, source, 0, source.length, buffer, 0); 60 | 61 | // and make sure we get identical compression 62 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen); 63 | byte[] asBlockStd = LZFEncoder.encode(source); 64 | assertArrayEquals(bufferAsBlock, asBlockStd); 65 | 66 | // then uncompress, verify 67 | byte[] uncomp = uncompress(buffer, 0, compLen); 68 | 69 | assertArrayEquals(source, uncomp); 70 | } 71 | 72 | @Test 73 | public void testCompressableChunksMulti() throws Exception 74 | { 75 | // let's do bit over 256k, to get multiple chunks 76 | byte[] source = constructFluff(4 * 0xFFFF + 4000); 77 | _testCompressableChunksMulti(source, ChunkEncoderFactory.safeInstance()); 78 | _testCompressableChunksMulti(source, ChunkEncoderFactory.optimalInstance()); 79 | } 80 | 81 | private void _testCompressableChunksMulti(byte[] source, ChunkEncoder encoder) throws Exception 82 | { 83 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)]; 84 | int compLen = LZFEncoder.appendEncoded(encoder, source, 0, source.length, buffer, 0); 85 | 86 | // and make sure we get identical compression 87 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen); 88 | byte[] asBlockStd = LZFEncoder.encode(encoder, source, 0, source.length); 89 | assertArrayEquals(bufferAsBlock, asBlockStd); 90 | 91 | // then uncompress, verify 92 | byte[] uncomp = uncompress(buffer, 0, compLen); 93 | 94 | assertArrayEquals(source, uncomp); 95 | } 96 | 97 | @Test 98 | public void testNonCompressableChunksSingle() throws Exception 99 | { 100 | byte[] source = constructUncompressable(4000); 101 | _testNonCompressableChunksSingle(source, ChunkEncoderFactory.safeInstance()); 102 | _testNonCompressableChunksSingle(source, ChunkEncoderFactory.optimalInstance()); 103 | } 104 | 105 | private void _testNonCompressableChunksSingle(byte[] source, ChunkEncoder encoder) throws Exception 106 | { 107 | byte[] buffer = new byte[LZFEncoder.estimateMaxWorkspaceSize(source.length)]; 108 | int compLen = LZFEncoder.appendEncoded(source, 0, source.length, buffer, 0); 109 | 110 | // and make sure we get identical compression 111 | byte[] bufferAsBlock = Arrays.copyOf(buffer, compLen); 112 | byte[] asBlockStd = LZFEncoder.encode(encoder, source, 0, source.length); 113 | assertArrayEquals(bufferAsBlock, asBlockStd); 114 | 115 | // then uncompress, verify 116 | byte[] uncomp = uncompress(buffer, 0, compLen); 117 | 118 | assertArrayEquals(source, uncomp); 119 | } 120 | 121 | @Test 122 | public void testConditionalCompression() throws Exception 123 | { 124 | final byte[] input = constructFluff(52000); 125 | 126 | _testConditionalCompression(ChunkEncoderFactory.safeInstance(), input); 127 | _testConditionalCompression(ChunkEncoderFactory.optimalInstance(), input); 128 | } 129 | 130 | private void _testConditionalCompression(ChunkEncoder enc, final byte[] input) throws IOException 131 | { 132 | // double-check expected compression ratio 133 | byte[] comp = enc.encodeChunk(input, 0, input.length).getData(); 134 | int pct = (int) (100.0 * comp.length / input.length); 135 | // happens to compress to about 61%, good 136 | assertEquals(61, pct); 137 | 138 | // should be ok if we only require down to 70% compression 139 | byte[] buf = new byte[60000]; 140 | int offset = enc.appendEncodedIfCompresses(input, 0.70, 0, input.length, buf, 0); 141 | assertEquals(comp.length, offset); 142 | 143 | // but not to 60% 144 | offset = enc.appendEncodedIfCompresses(input, 0.60, 0, input.length, buf, 0); 145 | assertEquals(-1, offset); 146 | 147 | // // // Second part: OutputStream alternatives 148 | 149 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(60000); 150 | assertTrue(enc.encodeAndWriteChunkIfCompresses(input, 0, input.length, bytes, 0.70)); 151 | assertEquals(comp.length, bytes.size()); 152 | byte[] output = bytes.toByteArray(); 153 | assertArrayEquals(comp, output); 154 | 155 | bytes = new ByteArrayOutputStream(60000); 156 | assertFalse(enc.encodeAndWriteChunkIfCompresses(input, 0, input.length, bytes, 0.60)); 157 | assertEquals(0, bytes.size()); 158 | 159 | // // // Third part: chunk creation 160 | 161 | LZFChunk chunk = enc.encodeChunkIfCompresses(input, 0, input.length, 0.70); 162 | assertNotNull(chunk); 163 | assertEquals(comp.length, chunk.length()); 164 | assertArrayEquals(comp, chunk.getData()); 165 | 166 | chunk = enc.encodeChunkIfCompresses(input, 0, input.length, 0.60); 167 | assertNull(chunk); 168 | } 169 | 170 | @Test 171 | public void testUnsafeValidation() { 172 | _testUnsafeValidation(new UnsafeChunkEncoderBE(10)); 173 | _testUnsafeValidation(new UnsafeChunkEncoderLE(10)); 174 | 175 | } 176 | 177 | private void _testUnsafeValidation(UnsafeChunkEncoder encoder) { 178 | byte[] array = new byte[10]; 179 | int goodStart = 2; 180 | int goodEnd = 5; 181 | 182 | assertThrows(NullPointerException.class, () -> encoder.tryCompress(null, goodStart, goodEnd, array, goodStart)); 183 | assertThrows(NullPointerException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, null, goodStart)); 184 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, -1, goodEnd, array, goodStart)); 185 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, array.length + 1, goodEnd, array, goodStart)); 186 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodStart - 1, array, goodStart)); 187 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, array.length + 1, array, goodStart)); 188 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, array, -1)); 189 | assertThrows(ArrayIndexOutOfBoundsException.class, () -> encoder.tryCompress(array, goodStart, goodEnd, array, array.length + 1)); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestLZFInputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.*; 4 | import java.nio.charset.StandardCharsets; 5 | import java.util.Random; 6 | import java.security.SecureRandom; 7 | 8 | import com.ning.compress.BaseForTests; 9 | import org.junit.jupiter.api.BeforeEach; 10 | import org.junit.jupiter.api.Test; 11 | 12 | import static org.junit.jupiter.api.Assertions.*; 13 | 14 | public class TestLZFInputStream extends BaseForTests 15 | { 16 | private static final int BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN * 64; 17 | private final byte[] nonEncodableBytesToWrite = new byte[BUFFER_SIZE]; 18 | private final byte[] bytesToWrite = new byte[BUFFER_SIZE]; 19 | private byte[] nonCompressableBytes; 20 | private final int compressableInputLength = BUFFER_SIZE; 21 | private byte[] compressedBytes; 22 | 23 | @BeforeEach 24 | public void setUp() throws Exception 25 | { 26 | SecureRandom.getInstance("SHA1PRNG").nextBytes(nonEncodableBytesToWrite); 27 | String phrase = "all work and no play make Jack a dull boy"; 28 | byte[] bytes = phrase.getBytes(StandardCharsets.UTF_8); 29 | int cursor = 0; 30 | while(cursor <= bytesToWrite.length) { 31 | System.arraycopy(bytes, 0, bytesToWrite, cursor, (bytes.length+cursor < bytesToWrite.length)?bytes.length:bytesToWrite.length-cursor); 32 | cursor += bytes.length; 33 | } 34 | ByteArrayOutputStream nonCompressed = new ByteArrayOutputStream(); 35 | OutputStream os = new LZFOutputStream(nonCompressed); 36 | os.write(nonEncodableBytesToWrite); 37 | os.close(); 38 | nonCompressableBytes = nonCompressed.toByteArray(); 39 | 40 | ByteArrayOutputStream compressed = new ByteArrayOutputStream(); 41 | os = new LZFOutputStream(compressed); 42 | os.write(bytesToWrite); 43 | os.close(); 44 | compressedBytes = compressed.toByteArray(); 45 | } 46 | 47 | @Test 48 | public void testDecompressNonEncodableReadByte() throws IOException { 49 | doDecompressReadByte(nonCompressableBytes, nonEncodableBytesToWrite); 50 | } 51 | 52 | @Test 53 | public void testDecompressNonEncodableReadBlock() throws IOException { 54 | doDecompressReadBlock(nonCompressableBytes, nonEncodableBytesToWrite); 55 | } 56 | 57 | @Test 58 | public void testDecompressEncodableReadByte() throws IOException { 59 | doDecompressReadByte(compressedBytes, bytesToWrite); 60 | } 61 | 62 | @Test 63 | public void testDecompressEncodableReadBlock() throws IOException { 64 | doDecompressReadBlock(compressedBytes, bytesToWrite); 65 | } 66 | 67 | @Test 68 | public void testRead0() throws IOException 69 | { 70 | ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); 71 | InputStream is = new LZFInputStream(bis); 72 | assertEquals(0, is.available()); 73 | byte[] buffer = new byte[65536+23]; 74 | int val = is.read(buffer, 0, 0); 75 | // read of 0 or less should return a 0-byte read. 76 | assertEquals(0, val); 77 | val = is.read(buffer, 0, -1); 78 | assertEquals(0, val); 79 | // close should work. 80 | is.close(); 81 | } 82 | 83 | @Test 84 | public void testAvailable() throws IOException 85 | { 86 | ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); 87 | LZFInputStream is = new LZFInputStream(bis); 88 | assertSame(bis, is.getUnderlyingInputStream()); 89 | assertEquals(0, is.available()); 90 | // read one byte; should decode bunch more, make available 91 | assertNotEquals(-1, is.read()); 92 | int total = 1; // since we read one byte already 93 | assertEquals(65534, is.available()); 94 | // and after we skip through all of it, end with -1 for EOF 95 | long count; 96 | while ((count = is.skip(16384L)) > 0L) { 97 | total += (int) count; 98 | } 99 | // nothing more available; but we haven't yet closed so: 100 | assertEquals(0, is.available()); 101 | // and then we close it: 102 | is.close(); 103 | assertEquals(0, is.available()); 104 | assertEquals(compressableInputLength, total); 105 | } 106 | 107 | @Test void testIncrementalWithFullReads() throws IOException { 108 | doTestIncremental(true); 109 | } 110 | 111 | @Test void testIncrementalWithMinimalReads() throws IOException { 112 | doTestIncremental(false); 113 | } 114 | 115 | @Test 116 | public void testReadAndWrite() throws Exception 117 | { 118 | byte[] fluff = constructFluff(132000); 119 | byte[] comp = LZFEncoder.encode(fluff); 120 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(fluff.length); 121 | LZFInputStream in = new LZFInputStream(new ByteArrayInputStream(comp)); 122 | in.readAndWrite(bytes); 123 | in.close(); 124 | byte[] actual = bytes.toByteArray(); 125 | assertArrayEquals(fluff, actual); 126 | } 127 | 128 | // Mostly for [Issue#19] 129 | @Test 130 | public void testLongSkips() throws Exception 131 | { 132 | // 64k per block, 200k gives 3 full, one small 133 | byte[] fluff = constructFluff(200000); 134 | byte[] comp = LZFEncoder.encode(fluff); 135 | 136 | // we get about 200k, maybe byte or two more, so: 137 | final int LENGTH = fluff.length; 138 | 139 | LZFInputStream in = new LZFInputStream(new ByteArrayInputStream(comp)); 140 | // read one byte for fun 141 | assertEquals(fluff[0] & 0xFF, in.read()); 142 | // then skip all but one 143 | long amt = in.skip(LENGTH-2); 144 | assertEquals(LENGTH-2, amt); 145 | assertEquals(fluff[LENGTH-1] & 0xFF, in.read()); 146 | 147 | assertEquals(-1, in.read()); 148 | in.close(); 149 | } 150 | 151 | /* 152 | /////////////////////////////////////////////////////////////////// 153 | // Helper methods 154 | /////////////////////////////////////////////////////////////////// 155 | */ 156 | 157 | /** 158 | * Test that creates a longer piece of content, compresses it, and reads 159 | * back in arbitrary small reads. 160 | */ 161 | private void doTestIncremental(boolean fullReads) throws IOException 162 | { 163 | // first need to compress something... 164 | String[] words = new String[] { "what", "ever", "some", "other", "words", "too" }; 165 | StringBuilder sb = new StringBuilder(258000); 166 | Random rnd = new Random(123); 167 | while (sb.length() < 256000) { 168 | int i = (rnd.nextInt() & 31); 169 | if (i < words.length) { 170 | sb.append(words[i]); 171 | } else { 172 | sb.append(i); 173 | } 174 | } 175 | byte[] uncomp = sb.toString().getBytes(StandardCharsets.UTF_8); 176 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 177 | LZFOutputStream lzOut = new LZFOutputStream(bytes); 178 | lzOut.write(uncomp); 179 | lzOut.close(); 180 | byte[] comp = bytes.toByteArray(); 181 | 182 | // read back, in chunks 183 | bytes = new ByteArrayOutputStream(uncomp.length); 184 | byte[] buffer = new byte[500]; 185 | LZFInputStream lzIn = new LZFInputStream(new ByteArrayInputStream(comp), fullReads); 186 | int pos = 0; 187 | 188 | while (true) { 189 | int len = 1 + ((rnd.nextInt() & 0x7FFFFFFF) % buffer.length); 190 | int offset = buffer.length - len; 191 | 192 | int count = lzIn.read(buffer, offset, len); 193 | if (count < 0) { 194 | break; 195 | } 196 | if (count > len) { 197 | fail("Requested "+len+" bytes (offset "+offset+", array length "+buffer.length+"), got "+count); 198 | } 199 | pos += count; 200 | // with full reads, ought to get full results 201 | if (count != len) { 202 | if (fullReads) { 203 | // Except at the end, with last incomplete chunk 204 | if (pos != uncomp.length) { 205 | fail("Got partial read (when requested full read!), position "+pos+" (of full "+uncomp.length+")"); 206 | } 207 | } 208 | } 209 | bytes.write(buffer, offset, count); 210 | } 211 | byte[] result = bytes.toByteArray(); 212 | assertArrayEquals(uncomp, result); 213 | lzIn.close(); 214 | } 215 | 216 | private void doDecompressReadByte(byte[] bytes, byte[] reference) throws IOException 217 | { 218 | ByteArrayInputStream bis = new ByteArrayInputStream(bytes); 219 | InputStream is = new LZFInputStream(bis); 220 | int i = 0; 221 | int testVal; 222 | while((testVal=is.read()) != -1) { 223 | int rVal = ((int)reference[i]) & 255; 224 | assertEquals(rVal, testVal); 225 | ++i; 226 | } 227 | is.close(); 228 | } 229 | 230 | private void doDecompressReadBlock(byte[] bytes, byte[] reference) throws IOException 231 | { 232 | ByteArrayInputStream bis = new ByteArrayInputStream(bytes); 233 | int outputBytes = 0; 234 | InputStream is = new LZFInputStream(bis); 235 | int val; 236 | byte[] buffer = new byte[65536+23]; 237 | while((val=is.read(buffer)) != -1) { 238 | for(int i = 0; i < val; i++) { 239 | byte testVal = buffer[i]; 240 | assertEquals(reference[outputBytes], testVal); 241 | ++outputBytes; 242 | } 243 | } 244 | assertEquals(reference.length, outputBytes); 245 | is.close(); 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZFCompressingInputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | import com.ning.compress.BufferRecycler; 7 | import com.ning.compress.lzf.util.ChunkEncoderFactory; 8 | 9 | /** 10 | * Decorator {@link InputStream} implementation used for 11 | * reading uncompressed data 12 | * and compressing it on the fly, such that reads return compressed 13 | * data. 14 | * It is reverse of {@link LZFInputStream} (which instead uncompresses data). 15 | * 16 | * @author Tatu Saloranta 17 | * 18 | * @see com.ning.compress.lzf.LZFInputStream 19 | */ 20 | public class LZFCompressingInputStream extends InputStream 21 | { 22 | private final BufferRecycler _recycler; 23 | 24 | private ChunkEncoder _encoder; 25 | 26 | /** 27 | * Stream used for reading data to be compressed 28 | */ 29 | protected final InputStream _inputStream; 30 | 31 | /** 32 | * Flag that indicates if we have already called 'inputStream.close()' 33 | * (to avoid calling it multiple times) 34 | */ 35 | protected boolean _inputStreamClosed; 36 | 37 | /** 38 | * Flag that indicates whether we force full reads (reading of as many 39 | * bytes as requested), or 'optimal' reads (up to as many as available, 40 | * but at least one). Default is false, meaning that 'optimal' read 41 | * is used. 42 | */ 43 | protected boolean _cfgFullReads = false; 44 | 45 | /** 46 | * Buffer in which uncompressed input is first read, before getting 47 | * encoded in {@link #_encodedBytes}. 48 | */ 49 | protected byte[] _inputBuffer; 50 | 51 | /** 52 | * Buffer that contains compressed data that is returned to readers. 53 | */ 54 | protected byte[] _encodedBytes; 55 | 56 | /** 57 | * The current position (next char to output) in the uncompressed bytes buffer. 58 | */ 59 | protected int _bufferPosition = 0; 60 | 61 | /** 62 | * Length of the current uncompressed bytes buffer 63 | */ 64 | protected int _bufferLength = 0; 65 | 66 | /** 67 | * Number of bytes read from the underlying {@link #_inputStream} 68 | */ 69 | protected int _readCount = 0; 70 | 71 | /* 72 | /////////////////////////////////////////////////////////////////////// 73 | // Construction, configuration 74 | /////////////////////////////////////////////////////////////////////// 75 | */ 76 | 77 | public LZFCompressingInputStream(InputStream in) 78 | { 79 | this(null, in, BufferRecycler.instance()); 80 | } 81 | 82 | public LZFCompressingInputStream(final ChunkEncoder encoder, InputStream in) 83 | { 84 | this(encoder, in, null); 85 | } 86 | 87 | public LZFCompressingInputStream(final ChunkEncoder encoder, InputStream in, BufferRecycler bufferRecycler) 88 | { 89 | // may be passed by caller, or could be null 90 | _encoder = encoder; 91 | _inputStream = in; 92 | if (bufferRecycler==null) { 93 | bufferRecycler = (encoder!=null) ? _encoder._recycler : BufferRecycler.instance(); 94 | } 95 | _recycler = bufferRecycler; 96 | _inputBuffer = bufferRecycler.allocInputBuffer(LZFChunk.MAX_CHUNK_LEN); 97 | // let's not yet allocate encoding buffer; don't know optimal size 98 | } 99 | 100 | /** 101 | * Method that can be used define whether reads should be "full" or 102 | * "optimal": former means that full compressed blocks are read right 103 | * away as needed, optimal that only smaller chunks are read at a time, 104 | * more being read as needed. 105 | */ 106 | public void setUseFullReads(boolean b) { 107 | _cfgFullReads = b; 108 | } 109 | 110 | /* 111 | /////////////////////////////////////////////////////////////////////// 112 | // InputStream implementation 113 | /////////////////////////////////////////////////////////////////////// 114 | */ 115 | 116 | @Override 117 | public int available() 118 | { 119 | if (_inputStreamClosed) { // javadocs suggest 0 for closed as well (not -1) 120 | return 0; 121 | } 122 | int left = (_bufferLength - _bufferPosition); 123 | return (left <= 0) ? 0 : left; 124 | } 125 | 126 | @Override 127 | public int read() throws IOException 128 | { 129 | if (!readyBuffer()) { 130 | return -1; 131 | } 132 | return _encodedBytes[_bufferPosition++] & 255; 133 | } 134 | 135 | @Override 136 | public int read(final byte[] buffer) throws IOException 137 | { 138 | return read(buffer, 0, buffer.length); 139 | } 140 | 141 | @Override 142 | public int read(final byte[] buffer, int offset, int length) throws IOException 143 | { 144 | if (length < 1) { 145 | return 0; 146 | } 147 | if (!readyBuffer()) { 148 | return -1; 149 | } 150 | // First let's read however much data we happen to have... 151 | int chunkLength = Math.min(_bufferLength - _bufferPosition, length); 152 | System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength); 153 | _bufferPosition += chunkLength; 154 | 155 | if (chunkLength == length || !_cfgFullReads) { 156 | return chunkLength; 157 | } 158 | // Need more data, then 159 | int totalRead = chunkLength; 160 | do { 161 | offset += chunkLength; 162 | if (!readyBuffer()) { 163 | break; 164 | } 165 | chunkLength = Math.min(_bufferLength - _bufferPosition, (length - totalRead)); 166 | System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength); 167 | _bufferPosition += chunkLength; 168 | totalRead += chunkLength; 169 | } while (totalRead < length); 170 | 171 | return totalRead; 172 | } 173 | 174 | @Override 175 | public void close() throws IOException 176 | { 177 | _bufferPosition = _bufferLength = 0; 178 | byte[] buf = _encodedBytes; 179 | if (buf != null) { 180 | _encodedBytes = null; 181 | _recycler.releaseEncodeBuffer(buf); 182 | } 183 | if (_encoder != null) { 184 | _encoder.close(); 185 | } 186 | _closeInput(); 187 | } 188 | 189 | private void _closeInput() throws IOException 190 | { 191 | byte[] buf = _inputBuffer; 192 | if (buf != null) { 193 | _inputBuffer = null; 194 | _recycler.releaseInputBuffer(buf); 195 | } 196 | if (!_inputStreamClosed) { 197 | _inputStreamClosed = true; 198 | _inputStream.close(); 199 | } 200 | } 201 | 202 | /** 203 | * Overridden to just skip at most a single chunk at a time 204 | */ 205 | @Override 206 | public long skip(long n) throws IOException 207 | { 208 | if (_inputStreamClosed) { 209 | return -1; 210 | } 211 | int left = (_bufferLength - _bufferPosition); 212 | // if none left, must read more: 213 | if (left <= 0) { 214 | // otherwise must read more to skip... 215 | int b = read(); 216 | if (b < 0) { // EOF 217 | return -1; 218 | } 219 | // push it back to get accurate skip count 220 | --_bufferPosition; 221 | left = (_bufferLength - _bufferPosition); 222 | } 223 | // either way, just skip whatever we have decoded 224 | if (left > n) { 225 | left = (int) n; 226 | } 227 | _bufferPosition += left; 228 | return left; 229 | } 230 | /* 231 | /////////////////////////////////////////////////////////////////////// 232 | // Internal methods 233 | /////////////////////////////////////////////////////////////////////// 234 | */ 235 | 236 | /** 237 | * Fill the uncompressed bytes buffer by reading the underlying inputStream. 238 | * @throws IOException 239 | */ 240 | protected boolean readyBuffer() throws IOException 241 | { 242 | if (_bufferPosition < _bufferLength) { 243 | return true; 244 | } 245 | if (_inputStreamClosed) { 246 | return false; 247 | } 248 | // Ok: read as much as we can from input source first 249 | int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length); 250 | if (count < 0) { // if no input read, it's EOF 251 | _closeInput(); // and we can close input source as well 252 | return false; 253 | } 254 | int chunkLength = count; 255 | int left = _inputBuffer.length - count; 256 | 257 | while ((count = _inputStream.read(_inputBuffer, chunkLength, left)) > 0) { 258 | chunkLength += count; 259 | left -= count; 260 | if (left < 1) { 261 | break; 262 | } 263 | } 264 | 265 | _bufferPosition = 0; 266 | // Ok: if we don't yet have an encoder (and buffer for it), let's get one 267 | if (_encoder == null) { 268 | // need 7 byte header, plus regular max buffer size: 269 | int bufferLen = chunkLength + ((chunkLength + 31) >> 5) + 7; 270 | _encoder = ChunkEncoderFactory.optimalNonAllocatingInstance(bufferLen, _recycler); 271 | } 272 | if (_encodedBytes == null) { 273 | int bufferLen = chunkLength + ((chunkLength + 31) >> 5) + 7; 274 | _encodedBytes = _recycler.allocEncodingBuffer(bufferLen); 275 | } 276 | // offset of 7 so we can prepend header as necessary 277 | int encodeEnd = _encoder.tryCompress(_inputBuffer, 0, chunkLength, _encodedBytes, 7); 278 | // but did it compress? 279 | if (encodeEnd < (chunkLength + 5)) { // yes! (compared to 5 byte uncomp prefix, data) 280 | // prepend header in situ 281 | LZFChunk.appendCompressedHeader(chunkLength, encodeEnd-7, _encodedBytes, 0); 282 | _bufferLength = encodeEnd; 283 | } else { // no -- so sad... 284 | int ptr = LZFChunk.appendNonCompressedHeader(chunkLength, _encodedBytes, 0); 285 | // TODO: figure out a way to avoid this copy; need a header 286 | System.arraycopy(_inputBuffer, 0, _encodedBytes, ptr, chunkLength); 287 | _bufferLength = ptr + chunkLength; 288 | } 289 | if (count < 0) { // did we get end-of-input? 290 | _closeInput(); 291 | } 292 | return true; 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/LZFOutputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import java.io.FilterOutputStream; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.OutputStream; 7 | import java.nio.ByteBuffer; 8 | import java.nio.MappedByteBuffer; 9 | import java.nio.channels.FileChannel; 10 | import java.nio.channels.FileChannel.MapMode; 11 | import java.nio.channels.WritableByteChannel; 12 | 13 | import com.ning.compress.BufferRecycler; 14 | import com.ning.compress.lzf.util.ChunkEncoderFactory; 15 | 16 | /** 17 | * Decorator {@link OutputStream} implementation that will compress 18 | * output using LZF compression algorithm, given uncompressed input 19 | * to write. Its counterpart is {@link LZFInputStream}; although 20 | * in some ways {@link LZFCompressingInputStream} can be seen 21 | * as the opposite. 22 | * 23 | * @author jon hartlaub 24 | * @author Tatu Saloranta 25 | * 26 | * @see LZFInputStream 27 | * @see LZFCompressingInputStream 28 | */ 29 | public class LZFOutputStream extends FilterOutputStream implements WritableByteChannel 30 | { 31 | private static final int DEFAULT_OUTPUT_BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN; 32 | 33 | private final ChunkEncoder _encoder; 34 | private final BufferRecycler _recycler; 35 | 36 | protected byte[] _outputBuffer; 37 | protected int _position = 0; 38 | 39 | /** 40 | * Configuration setting that governs whether basic 'flush()' should 41 | * first complete a block or not. 42 | *

43 | * Default value is 'true' 44 | */ 45 | protected boolean _cfgFinishBlockOnFlush = true; 46 | 47 | /** 48 | * Flag that indicates if we have already called '_outputStream.close()' 49 | * (to avoid calling it multiple times) 50 | */ 51 | protected boolean _outputStreamClosed; 52 | 53 | /* 54 | /////////////////////////////////////////////////////////////////////// 55 | // Construction, configuration 56 | /////////////////////////////////////////////////////////////////////// 57 | */ 58 | 59 | public LZFOutputStream(final OutputStream outputStream) 60 | { 61 | this(ChunkEncoderFactory.optimalInstance(DEFAULT_OUTPUT_BUFFER_SIZE), outputStream); 62 | } 63 | 64 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream) 65 | { 66 | this(encoder, outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, encoder._recycler); 67 | } 68 | 69 | public LZFOutputStream(final OutputStream outputStream, final BufferRecycler bufferRecycler) 70 | { 71 | this(ChunkEncoderFactory.optimalInstance(bufferRecycler), outputStream, bufferRecycler); 72 | } 73 | 74 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream, final BufferRecycler bufferRecycler) 75 | { 76 | this(encoder, outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, bufferRecycler); 77 | } 78 | 79 | public LZFOutputStream(final ChunkEncoder encoder, final OutputStream outputStream, 80 | final int bufferSize, BufferRecycler bufferRecycler) 81 | { 82 | super(outputStream); 83 | _encoder = encoder; 84 | if (bufferRecycler==null) { 85 | bufferRecycler = _encoder._recycler; 86 | } 87 | _recycler = bufferRecycler; 88 | _outputBuffer = bufferRecycler.allocOutputBuffer(bufferSize); 89 | _outputStreamClosed = false; 90 | } 91 | 92 | /** 93 | * Method for defining whether call to {@link #flush} will also complete 94 | * current block (similar to calling {@link #finishBlock()}) or not. 95 | */ 96 | public LZFOutputStream setFinishBlockOnFlush(boolean b) { 97 | _cfgFinishBlockOnFlush = b; 98 | return this; 99 | } 100 | 101 | /* 102 | /////////////////////////////////////////////////////////////////////// 103 | // OutputStream impl 104 | /////////////////////////////////////////////////////////////////////// 105 | */ 106 | 107 | @Override 108 | public void write(final int singleByte) throws IOException 109 | { 110 | checkNotClosed(); 111 | if (_position >= _outputBuffer.length) { 112 | writeCompressedBlock(); 113 | } 114 | _outputBuffer[_position++] = (byte) singleByte; 115 | } 116 | 117 | @Override 118 | public void write(final byte[] buffer, int offset, int length) throws IOException 119 | { 120 | checkNotClosed(); 121 | 122 | final int BUFFER_LEN = _outputBuffer.length; 123 | 124 | // simple case first: empty _outputBuffer and "big" input buffer: write first full blocks, if any, without copying 125 | while (_position == 0 && length >= BUFFER_LEN) { 126 | _encoder.encodeAndWriteChunk(buffer, offset, BUFFER_LEN, out); 127 | offset += BUFFER_LEN; 128 | length -= BUFFER_LEN; 129 | } 130 | 131 | // simple case first: buffering only (for trivially short writes) 132 | int free = BUFFER_LEN - _position; 133 | if (free > length) { 134 | System.arraycopy(buffer, offset, _outputBuffer, _position, length); 135 | _position += length; 136 | return; 137 | } 138 | // otherwise, copy whatever we can, flush 139 | System.arraycopy(buffer, offset, _outputBuffer, _position, free); 140 | offset += free; 141 | length -= free; 142 | _position += free; 143 | writeCompressedBlock(); 144 | 145 | // then write intermediate full blocks, if any, without copying: 146 | while (length >= BUFFER_LEN) { 147 | _encoder.encodeAndWriteChunk(buffer, offset, BUFFER_LEN, out); 148 | offset += BUFFER_LEN; 149 | length -= BUFFER_LEN; 150 | } 151 | 152 | // and finally, copy leftovers in buffer, if any 153 | if (length > 0) { 154 | System.arraycopy(buffer, offset, _outputBuffer, 0, length); 155 | } 156 | _position = length; 157 | } 158 | 159 | public void write(final InputStream in) throws IOException { 160 | writeCompressedBlock(); // will flush _outputBuffer 161 | int read; 162 | while ((read = in.read(_outputBuffer)) >= 0) { 163 | _position = read; 164 | writeCompressedBlock(); 165 | } 166 | } 167 | 168 | public void write(final FileChannel in) throws IOException { 169 | MappedByteBuffer src = in.map(MapMode.READ_ONLY, 0, in.size()); 170 | write(src); 171 | } 172 | 173 | @Override 174 | public synchronized int write(final ByteBuffer src) throws IOException { 175 | int r = src.remaining(); 176 | if (r <= 0) { 177 | return r; 178 | } 179 | writeCompressedBlock(); // will flush _outputBuffer 180 | if (src.hasArray()) { 181 | // direct compression from backing array 182 | write(src.array(), src.arrayOffset(), src.limit() - src.arrayOffset()); 183 | } else { 184 | // need to copy to heap array first 185 | while (src.hasRemaining()) { 186 | int toRead = Math.min(src.remaining(), _outputBuffer.length); 187 | src.get(_outputBuffer, 0, toRead); 188 | _position = toRead; 189 | writeCompressedBlock(); 190 | } 191 | } 192 | return r; 193 | } 194 | 195 | @Override 196 | public void flush() throws IOException 197 | { 198 | checkNotClosed(); 199 | if (_cfgFinishBlockOnFlush && _position > 0) { 200 | writeCompressedBlock(); 201 | } 202 | super.flush(); 203 | } 204 | 205 | @Override 206 | public boolean isOpen() { 207 | return ! _outputStreamClosed; 208 | } 209 | 210 | @Override 211 | public void close() throws IOException 212 | { 213 | if (!_outputStreamClosed) { 214 | if (_position > 0) { 215 | writeCompressedBlock(); 216 | } 217 | super.close(); // will flush beforehand 218 | _encoder.close(); 219 | _outputStreamClosed = true; 220 | byte[] buf = _outputBuffer; 221 | if (buf != null) { 222 | _outputBuffer = null; 223 | _recycler.releaseOutputBuffer(buf); 224 | } 225 | } 226 | } 227 | 228 | /* 229 | /////////////////////////////////////////////////////////////////////// 230 | // Additional public methods 231 | /////////////////////////////////////////////////////////////////////// 232 | */ 233 | 234 | /** 235 | * Method that can be used to find underlying {@link OutputStream} that 236 | * we write encoded LZF encoded data into, after compressing it. 237 | * Will never return null; although underlying stream may be closed 238 | * (if this stream has been closed). 239 | */ 240 | public OutputStream getUnderlyingOutputStream() { 241 | return out; 242 | } 243 | 244 | /** 245 | * Accessor for checking whether call to "flush()" will first finish the 246 | * current block or not. 247 | */ 248 | public boolean getFinishBlockOnFlush() { 249 | return _cfgFinishBlockOnFlush; 250 | } 251 | 252 | /** 253 | * Method that can be used to force completion of the current block, 254 | * which means that all buffered data will be compressed into an 255 | * LZF block. This typically results in lower compression ratio 256 | * as larger blocks compress better; but may be necessary for 257 | * network connections to ensure timely sending of data. 258 | */ 259 | public LZFOutputStream finishBlock() throws IOException 260 | { 261 | checkNotClosed(); 262 | if (_position > 0) { 263 | writeCompressedBlock(); 264 | } 265 | return this; 266 | } 267 | 268 | /* 269 | /////////////////////////////////////////////////////////////////////// 270 | // Internal methods 271 | /////////////////////////////////////////////////////////////////////// 272 | */ 273 | 274 | /** 275 | * Compress and write the current block to the OutputStream 276 | */ 277 | protected void writeCompressedBlock() throws IOException 278 | { 279 | int left = _position; 280 | _position = 0; 281 | int offset = 0; 282 | 283 | while (left > 0) { 284 | int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left); 285 | _encoder.encodeAndWriteChunk(_outputBuffer, offset, chunkLen, out); 286 | offset += chunkLen; 287 | left -= chunkLen; 288 | } 289 | } 290 | 291 | protected void checkNotClosed() throws IOException 292 | { 293 | if (_outputStreamClosed) { 294 | throw new IOException(getClass().getName()+" already closed"); 295 | } 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /src/test/java/com/ning/compress/lzf/TestFuzzUnsafeLZF.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf; 2 | 3 | import com.code_intelligence.jazzer.junit.FuzzTest; 4 | import com.code_intelligence.jazzer.mutation.annotation.InRange; 5 | import com.code_intelligence.jazzer.mutation.annotation.NotNull; 6 | import com.code_intelligence.jazzer.mutation.annotation.WithLength; 7 | import com.ning.compress.BufferRecycler; 8 | import com.ning.compress.lzf.impl.*; 9 | 10 | import java.io.ByteArrayInputStream; 11 | import java.io.IOException; 12 | import java.io.OutputStream; 13 | import java.lang.annotation.Retention; 14 | import java.lang.annotation.RetentionPolicy; 15 | import java.util.Arrays; 16 | import java.util.stream.Stream; 17 | 18 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | 21 | /** 22 | * Fuzzing test using Jazzer (https://github.com/CodeIntelligenceTesting/jazzer/) for 23 | * LZF decoder and encoder which uses {@link sun.misc.Unsafe}. 24 | * 25 | *

By default the tests are run in 'regression mode' where no fuzzing is performed. 26 | * To run in 'fuzzing mode' set the environment variable {@code JAZZER_FUZZ=1}, see 27 | * also the {@code pom.xml} of this project. 28 | * 29 | *

See the Jazzer README for more information. 30 | */ 31 | public class TestFuzzUnsafeLZF { 32 | /* 33 | * Important: 34 | * These fuzz test methods all have to be listed separately in the `pom.xml` to 35 | * support running them in fuzzing mode, see https://github.com/CodeIntelligenceTesting/jazzer/issues/599 36 | */ 37 | 38 | @FuzzTest(maxDuration = "30s") 39 | @Retention(RetentionPolicy.RUNTIME) 40 | @interface LZFFuzzTest { 41 | } 42 | 43 | // This fuzz test performs decoding twice and verifies that the result is the same (either same decoded value or both exception) 44 | @LZFFuzzTest 45 | void decode(byte @NotNull @WithLength(min = 0, max = 32767) [] input, byte @NotNull [] suffix, @InRange(min = 0, max = 32767) int outputSize) { 46 | byte[] output = new byte[outputSize]; 47 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder(); 48 | 49 | byte[] input1 = input.clone(); 50 | 51 | // For the second decoding, append a suffix which should be ignored 52 | byte[] input2 = new byte[input.length + suffix.length]; 53 | System.arraycopy(input, 0, input2, 0, input.length); 54 | // Append suffix 55 | System.arraycopy(suffix, 0, input2, input.length, suffix.length); 56 | 57 | byte[] decoded1 = null; 58 | try { 59 | int decodedLen = decoder.decode(input1, 0, input.length, output); 60 | decoded1 = Arrays.copyOf(output, decodedLen); 61 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) { 62 | } 63 | 64 | // Repeat decoding, this time with (ignored) suffix and prefilled output 65 | // Should lead to same decoded result 66 | Arrays.fill(output, (byte) 0xFF); 67 | byte[] decoded2 = null; 68 | try { 69 | int decodedLen = decoder.decode(input2, 0, input.length, output); 70 | decoded2 = Arrays.copyOf(output, decodedLen); 71 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) { 72 | } 73 | 74 | assertArrayEquals(decoded1, decoded2); 75 | 76 | // Compare with result of vanilla decoder 77 | byte[] decodedVanilla = null; 78 | try { 79 | int decodedLen = new VanillaChunkDecoder().decode(input, output); 80 | decodedVanilla = Arrays.copyOf(output, decodedLen); 81 | } catch (Exception ignored) { 82 | } 83 | assertArrayEquals(decodedVanilla, decoded1); 84 | 85 | } 86 | 87 | @LZFFuzzTest 88 | // `boolean dummy` parameter is as workaround for https://github.com/CodeIntelligenceTesting/jazzer/issues/1022 89 | void roundtrip(byte @NotNull @WithLength(min = 1, max = 32767) [] input, boolean dummy) throws LZFException { 90 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder(); 91 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) { 92 | byte[] decoded = decoder.decode(LZFEncoder.encode(encoder, input.clone(), input.length)); 93 | assertArrayEquals(input, decoded); 94 | } 95 | } 96 | 97 | 98 | // Note: These encoder fuzz tests only cover the encoder implementation matching the platform endianness; 99 | // don't cover the other endianness here because that could lead to failures simply due to endianness 100 | // mismatch, and not due to an actual bug in the implementation 101 | 102 | @LZFFuzzTest 103 | void encode(byte @NotNull @WithLength(min = 1, max = 32767) [] input, byte @NotNull [] suffix) { 104 | byte[] input1 = input.clone(); 105 | 106 | // For the second encoding, append a suffix which should be ignored 107 | byte[] input2 = new byte[input.length + suffix.length]; 108 | System.arraycopy(input, 0, input2, 0, input.length); 109 | // Append suffix 110 | System.arraycopy(suffix, 0, input2, input.length, suffix.length); 111 | 112 | byte[] encoded1; 113 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) { 114 | encoded1 = LZFEncoder.encode(encoder, input1, input.length); 115 | } 116 | 117 | byte[] encoded2; 118 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) { 119 | encoded2 = LZFEncoder.encode(encoder, input2, input.length); 120 | } 121 | assertArrayEquals(encoded1, encoded2); 122 | 123 | // Compare with result of vanilla encoder 124 | byte[] encodedVanilla; 125 | try (VanillaChunkEncoder encoder = new VanillaChunkEncoder(input.length, new BufferRecycler())) { 126 | encodedVanilla = LZFEncoder.encode(encoder, input, input.length); 127 | } 128 | assertArrayEquals(encodedVanilla, encoded1); 129 | } 130 | 131 | @LZFFuzzTest 132 | void encodeAppend(byte @NotNull @WithLength(min = 1, max = 32767) [] input, @InRange(min = 0, max = 32767) int outputSize) { 133 | byte[] output = new byte[outputSize]; 134 | // Prefill output; should have no effect on encoded result 135 | Arrays.fill(output, (byte) 0xFF); 136 | int encodedLen; 137 | try (UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(input.length, new BufferRecycler())) { 138 | encodedLen = LZFEncoder.appendEncoded(encoder, input.clone(), 0, input.length, output, 0); 139 | } catch (ArrayIndexOutOfBoundsException | IllegalArgumentException ignored) { 140 | // Skip comparison with vanilla encoder 141 | return; 142 | } 143 | 144 | byte[] encodedUnsafe = Arrays.copyOf(output, encodedLen); 145 | 146 | // Compare with result of vanilla encoder 147 | Arrays.fill(output, (byte) 0); 148 | try (VanillaChunkEncoder encoder = new VanillaChunkEncoder(input.length, new BufferRecycler())) { 149 | encodedLen = LZFEncoder.appendEncoded(encoder, input, 0, input.length, output, 0); 150 | } 151 | // TODO: VanillaChunkEncoder performs out-of-bounds array index whereas UnsafeChunkEncoder does not (not sure which one is correct) 152 | // Why do they even have different `_handleTail` implementations, UnsafeChunkEncoder is not using Unsafe there? 153 | catch (ArrayIndexOutOfBoundsException ignored) { 154 | return; 155 | } 156 | byte[] encodedVanilla = Arrays.copyOf(output, encodedLen); 157 | assertArrayEquals(encodedVanilla, encodedUnsafe); 158 | } 159 | 160 | /// Note: Also cover LZFInputStream and LZFOutputStream because they in parts use methods of the decoder and encoder 161 | /// which are otherwise not reachable 162 | 163 | @LZFFuzzTest 164 | void inputStreamRead(byte @NotNull @WithLength(min = 0, max = 32767) [] input, @InRange(min = 1, max = 32767) int readBufferSize) throws IOException { 165 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder(); 166 | try (LZFInputStream inputStream = new LZFInputStream(decoder, new ByteArrayInputStream(input), new BufferRecycler(), false)) { 167 | byte[] readBuffer = new byte[readBufferSize]; 168 | while (inputStream.read(readBuffer) != -1) { 169 | // Do nothing, just consume the data 170 | } 171 | } catch (LZFException | ArrayIndexOutOfBoundsException ignored) { 172 | } 173 | // TODO: This IndexOutOfBoundsException occurs because LZFInputStream makes an invalid call to ByteArrayInputStream 174 | // The reason seems to be that `_inputBuffer` is only MAX_CHUNK_LEN large, but should be `2 + MAX_CHUNK_LEN` to 175 | // account for first two bytes encoding the length? (might affect more places in code) 176 | catch (IndexOutOfBoundsException ignored) { 177 | } 178 | } 179 | 180 | @LZFFuzzTest 181 | void inputStreamSkip(byte @NotNull @WithLength(min = 0, max = 32767) [] input, @InRange(min = 1, max = 32767) int skipCount) throws IOException { 182 | UnsafeChunkDecoder decoder = new UnsafeChunkDecoder(); 183 | try (LZFInputStream inputStream = new LZFInputStream(decoder, new ByteArrayInputStream(input), new BufferRecycler(), false)) { 184 | while (inputStream.skip(skipCount) > 0) { 185 | // Do nothing, just consume the data 186 | } 187 | } catch (LZFException ignored) { 188 | } 189 | // TODO: This IndexOutOfBoundsException occurs because LZFInputStream makes an invalid call to ByteArrayInputStream 190 | // The reason seems to be that `_inputBuffer` is only MAX_CHUNK_LEN large, but should be `2 + MAX_CHUNK_LEN` to 191 | // account for first two bytes encoding the length? (might affect more places in code) 192 | catch (IndexOutOfBoundsException ignored) { 193 | } 194 | } 195 | 196 | private static class NullOutputStream extends OutputStream { 197 | public static final OutputStream INSTANCE = new NullOutputStream(); 198 | 199 | private NullOutputStream() { 200 | } 201 | 202 | @Override 203 | public void write(int b) { 204 | // Do nothing 205 | } 206 | 207 | @Override 208 | public void write(byte[] b, int off, int len) { 209 | // Do nothing 210 | } 211 | } 212 | 213 | @LZFFuzzTest 214 | // Generates multiple arrays and writes them separately 215 | void outputStream(byte @NotNull @WithLength(min = 1, max = 10) [] @NotNull @WithLength(min = 1) [] arrays, @InRange(min = 1, max = 32767) int bufferSize) throws IOException { 216 | int totalLength = Stream.of(arrays).mapToInt(a -> a.length).sum(); 217 | 218 | UnsafeChunkEncoder encoder = UnsafeChunkEncoders.createEncoder(totalLength, new BufferRecycler()); 219 | try (LZFOutputStream outputStream = new LZFOutputStream(encoder, NullOutputStream.INSTANCE, bufferSize, null)) { 220 | for (byte[] array : arrays) { 221 | outputStream.write(array); 222 | } 223 | } 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/com/ning/compress/lzf/parallel/PLZFOutputStream.java: -------------------------------------------------------------------------------- 1 | package com.ning.compress.lzf.parallel; 2 | 3 | import java.io.FilterOutputStream; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.OutputStream; 7 | import java.lang.management.ManagementFactory; 8 | import java.lang.management.OperatingSystemMXBean; 9 | import java.nio.ByteBuffer; 10 | import java.nio.MappedByteBuffer; 11 | import java.nio.channels.FileChannel; 12 | import java.nio.channels.FileChannel.MapMode; 13 | import java.nio.channels.WritableByteChannel; 14 | import java.util.ArrayList; 15 | import java.util.Collection; 16 | import java.util.concurrent.ExecutorService; 17 | import java.util.concurrent.Executors; 18 | import java.util.concurrent.Future; 19 | import java.util.concurrent.LinkedBlockingQueue; 20 | import java.util.concurrent.ThreadPoolExecutor; 21 | import java.util.concurrent.TimeUnit; 22 | 23 | import com.ning.compress.lzf.LZFChunk; 24 | 25 | /** 26 | * Decorator {@link OutputStream} implementation that will compress 27 | * output using LZF compression algorithm, given uncompressed input 28 | * to write. Its counterpart is {@link com.ning.compress.lzf.LZFInputStream}; although 29 | * in some ways {@link com.ning.compress.lzf.LZFCompressingInputStream} can be seen 30 | * as the opposite. 31 | *

32 | * This class uses a parallel implementation to make use of all available cores, 33 | * modulo system load. 34 | * 35 | * @author Tatu Saloranta 36 | * @author Cédrik Lime 37 | * 38 | * @see com.ning.compress.lzf.LZFInputStream 39 | * @see com.ning.compress.lzf.LZFCompressingInputStream 40 | * @see com.ning.compress.lzf.LZFOutputStream 41 | */ 42 | public class PLZFOutputStream extends FilterOutputStream implements WritableByteChannel 43 | { 44 | private static final int DEFAULT_OUTPUT_BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN; 45 | 46 | protected byte[] _outputBuffer; 47 | protected int _position = 0; 48 | 49 | /** 50 | * Flag that indicates if we have already called '_outputStream.close()' 51 | * (to avoid calling it multiple times) 52 | */ 53 | protected boolean _outputStreamClosed; 54 | 55 | private BlockManager blockManager; 56 | private final ExecutorService compressExecutor; 57 | private final ExecutorService writeExecutor; 58 | volatile Exception writeException = null; 59 | 60 | 61 | /* 62 | /////////////////////////////////////////////////////////////////////// 63 | // Construction, configuration 64 | /////////////////////////////////////////////////////////////////////// 65 | */ 66 | 67 | public PLZFOutputStream(final OutputStream outputStream) { 68 | this(outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, getNThreads()); 69 | } 70 | 71 | protected PLZFOutputStream(final OutputStream outputStream, int nThreads) { 72 | this(outputStream, DEFAULT_OUTPUT_BUFFER_SIZE, nThreads); 73 | } 74 | 75 | protected PLZFOutputStream(final OutputStream outputStream, final int bufferSize, int nThreads) { 76 | super(outputStream); 77 | _outputStreamClosed = false; 78 | compressExecutor = new ThreadPoolExecutor(nThreads, nThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue()); // unbounded 79 | ((ThreadPoolExecutor)compressExecutor).allowCoreThreadTimeOut(true); 80 | writeExecutor = Executors.newSingleThreadExecutor(); // unbounded 81 | blockManager = new BlockManager(nThreads * 2, bufferSize); // this is where the bounds will be enforced! 82 | _outputBuffer = blockManager.getBlockFromPool(); 83 | } 84 | 85 | protected static int getNThreads() { 86 | int nThreads = Runtime.getRuntime().availableProcessors(); 87 | OperatingSystemMXBean jmx = ManagementFactory.getOperatingSystemMXBean(); 88 | 89 | if (jmx != null) { 90 | int loadAverage = (int) jmx.getSystemLoadAverage(); 91 | if (nThreads > 1 && loadAverage >= 1) { 92 | nThreads = Math.max(1, nThreads - loadAverage); 93 | } 94 | } 95 | return nThreads; 96 | } 97 | 98 | /* 99 | /////////////////////////////////////////////////////////////////////// 100 | // OutputStream impl 101 | /////////////////////////////////////////////////////////////////////// 102 | */ 103 | 104 | /** 105 | * {@inheritDoc} 106 | * WARNING: using this method will lead to very poor performance! 107 | */ 108 | @Override 109 | public void write(final int singleByte) throws IOException 110 | { 111 | checkNotClosed(); 112 | if (_position >= _outputBuffer.length) { 113 | writeCompressedBlock(); 114 | } 115 | _outputBuffer[_position++] = (byte) singleByte; 116 | } 117 | 118 | @Override 119 | public void write(final byte[] buffer, int offset, int length) throws IOException 120 | { 121 | checkNotClosed(); 122 | 123 | final int BUFFER_LEN = _outputBuffer.length; 124 | 125 | // simple case first: buffering only (for trivially short writes) 126 | int free = BUFFER_LEN - _position; 127 | if (free > length) { 128 | System.arraycopy(buffer, offset, _outputBuffer, _position, length); 129 | _position += length; 130 | return; 131 | } 132 | // otherwise, copy whatever we can, flush 133 | System.arraycopy(buffer, offset, _outputBuffer, _position, free); 134 | offset += free; 135 | length -= free; 136 | _position += free; 137 | writeCompressedBlock(); 138 | 139 | // then write intermediate full blocks, if any: 140 | while (length >= BUFFER_LEN) { 141 | System.arraycopy(buffer, offset, _outputBuffer, 0, BUFFER_LEN); 142 | _position = BUFFER_LEN; 143 | writeCompressedBlock(); 144 | offset += BUFFER_LEN; 145 | length -= BUFFER_LEN; 146 | } 147 | 148 | // and finally, copy leftovers in buffer, if any 149 | if (length > 0) { 150 | System.arraycopy(buffer, offset, _outputBuffer, 0, length); 151 | } 152 | _position = length; 153 | } 154 | 155 | public void write(final InputStream in) throws IOException { 156 | writeCompressedBlock(); // will flush _outputBuffer 157 | int read; 158 | while ((read = in.read(_outputBuffer)) >= 0) { 159 | _position = read; 160 | writeCompressedBlock(); 161 | } 162 | } 163 | 164 | public void write(final FileChannel in) throws IOException { 165 | MappedByteBuffer src = in.map(MapMode.READ_ONLY, 0, in.size()); 166 | write(src); 167 | } 168 | 169 | @Override 170 | public synchronized int write(final ByteBuffer src) throws IOException { 171 | int r = src.remaining(); 172 | if (r <= 0) { 173 | return r; 174 | } 175 | writeCompressedBlock(); // will flush _outputBuffer 176 | if (src.hasArray()) { 177 | // direct compression from backing array 178 | write(src.array(), src.arrayOffset(), src.limit() - src.arrayOffset()); 179 | } else { 180 | // need to copy to heap array first 181 | while (src.hasRemaining()) { 182 | int toRead = Math.min(src.remaining(), _outputBuffer.length); 183 | src.get(_outputBuffer, 0, toRead); 184 | _position = toRead; 185 | writeCompressedBlock(); 186 | } 187 | } 188 | return r; 189 | } 190 | 191 | 192 | /** 193 | * This flush method does nothing. 194 | */ 195 | @Override 196 | public void flush() throws IOException 197 | { 198 | checkNotClosed(); 199 | } 200 | 201 | @Override 202 | public boolean isOpen() { 203 | return ! _outputStreamClosed; 204 | } 205 | 206 | @Override 207 | public void close() throws IOException 208 | { 209 | if (!_outputStreamClosed) { 210 | if (_position > 0) { 211 | writeCompressedBlock(); 212 | } 213 | byte[] buf = _outputBuffer; 214 | if (buf != null) { 215 | blockManager.releaseBlockToPool(_outputBuffer); 216 | _outputBuffer = null; 217 | } 218 | writeExecutor.shutdown(); 219 | try { 220 | writeExecutor.awaitTermination(1, TimeUnit.HOURS); 221 | // at this point compressExecutor should have no pending tasks: cleanup ThreadLocal's 222 | // we don't know how many threads; go to the max for now. This will change once we get a proper configuration bean. 223 | int maxThreads = Runtime.getRuntime().availableProcessors(); 224 | Collection cleanupTasks = new ArrayList(maxThreads); 225 | for (int i = 0; i < maxThreads; ++i) { 226 | cleanupTasks.add(new CompressTask(null, -1, -1, null)); 227 | } 228 | compressExecutor.invokeAll(cleanupTasks); 229 | compressExecutor.shutdown(); 230 | compressExecutor.awaitTermination(1, TimeUnit.MINUTES); 231 | } catch (InterruptedException e) { 232 | throw new IOException(e); 233 | } finally { 234 | super.flush(); 235 | super.close(); 236 | _outputStreamClosed = true; 237 | compressExecutor.shutdownNow(); 238 | writeExecutor.shutdownNow(); 239 | blockManager = null; 240 | checkWriteException(); 241 | } 242 | } 243 | } 244 | 245 | /* 246 | /////////////////////////////////////////////////////////////////////// 247 | // Additional public methods 248 | /////////////////////////////////////////////////////////////////////// 249 | */ 250 | 251 | /** 252 | * Method that can be used to find underlying {@link OutputStream} that 253 | * we write encoded LZF encoded data into, after compressing it. 254 | * Will never return null; although underlying stream may be closed 255 | * (if this stream has been closed). 256 | */ 257 | public OutputStream getUnderlyingOutputStream() { 258 | return out; 259 | } 260 | 261 | /* 262 | /////////////////////////////////////////////////////////////////////// 263 | // Internal methods 264 | /////////////////////////////////////////////////////////////////////// 265 | */ 266 | 267 | /** 268 | * Compress and write the current block to the OutputStream 269 | */ 270 | protected void writeCompressedBlock() throws IOException 271 | { 272 | if (_position == 0) { 273 | return; 274 | } 275 | Future lzfFuture = compressExecutor.submit(new CompressTask(_outputBuffer, 0, _position, blockManager)); 276 | writeExecutor.execute(new WriteTask(out, lzfFuture, this)); 277 | _outputBuffer = blockManager.getBlockFromPool(); 278 | _position = 0; 279 | checkWriteException(); 280 | } 281 | 282 | protected void checkWriteException() throws IOException { 283 | if (writeException != null) { 284 | IOException ioe = (writeException instanceof IOException) ? (IOException) writeException : new IOException(writeException); 285 | writeException = null; 286 | throw ioe; 287 | } 288 | } 289 | 290 | protected void checkNotClosed() throws IOException 291 | { 292 | if (_outputStreamClosed) { 293 | throw new IOException(getClass().getName()+" already closed"); 294 | } 295 | } 296 | } 297 | --------------------------------------------------------------------------------