├── gradle.properties ├── settings.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── src ├── main │ ├── ghpages │ │ └── index.html │ └── java │ │ └── org │ │ └── anarres │ │ └── parallelgzip │ │ ├── ParallelGZIPInputStream.java │ │ ├── ParallelGZIPEnvironment.java │ │ └── ParallelGZIPOutputStream.java └── test │ └── java │ └── org │ └── anarres │ └── parallelgzip │ ├── ParallelGZIPPerformanceTest.java │ └── ParallelGZIPOutputStreamTest.java ├── codequality ├── HEADER └── checkstyle.xml ├── .gitignore ├── README.md ├── gradlew.bat ├── gradlew └── LICENSE /gradle.properties: -------------------------------------------------------------------------------- 1 | version=1.0.6-SNAPSHOT 2 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name='parallelgzip' 2 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shevek/parallelgzip/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /src/main/ghpages/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Javadoc 4 | Coverage 5 | 6 | 7 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.6-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /codequality/HEADER: -------------------------------------------------------------------------------- 1 | Copyright ${year} Shevek 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /src/main/java/org/anarres/parallelgzip/ParallelGZIPInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package org.anarres.parallelgzip; 6 | 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | import java.util.zip.GZIPInputStream; 10 | 11 | /** 12 | * 13 | * @author shevek 14 | */ 15 | public class ParallelGZIPInputStream extends GZIPInputStream { 16 | 17 | public ParallelGZIPInputStream(InputStream in, int size) throws IOException { 18 | super(in, size); 19 | } 20 | 21 | public ParallelGZIPInputStream(InputStream in) throws IOException { 22 | super(in); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.rar 19 | *.tar 20 | *.zip 21 | 22 | # Logs and databases # 23 | ###################### 24 | *.log 25 | 26 | # OS generated files # 27 | ###################### 28 | .DS_Store* 29 | ehthumbs.db 30 | Icon? 31 | Thumbs.db 32 | 33 | # Editor Files # 34 | ################ 35 | *~ 36 | *.swp 37 | 38 | # Gradle Files # 39 | ################ 40 | .gradle 41 | 42 | # Build output directies 43 | /target 44 | */target 45 | /build 46 | */build 47 | 48 | # IntelliJ specific files/directories 49 | out 50 | .idea 51 | *.ipr 52 | *.iws 53 | *.iml 54 | atlassian-ide-plugin.xml 55 | 56 | # Eclipse specific files/directories 57 | .classpath 58 | .project 59 | .settings 60 | .metadata 61 | 62 | # NetBeans specific files/directories 63 | .nbattrs 64 | .nb-gradle 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Parallel GZIPOutputStream and GZIPInputStream 2 | ============================================= 3 | 4 | This library contains a parallelized GZIP implementation which is a 5 | high performance drop-in replacement for the standard java.util.zip 6 | classes. It is a pure Java equivalent of the pigz parallel compresssor. 7 | 8 | The performance of ParallelGZIPOutputStream is excellent: it scales 9 | linearly with the number of cores, and spends 95% of all thread time 10 | in the native compression routines on 24 and c32-core Xeon systems. 11 | 12 | ParallelGZIPOutputStream has exactly the same memory contract as 13 | GZIPOutputStream: it extends FilterOutputStream. If a single thread 14 | writes data into the compressor, it will write compressed data to the 15 | underlying output stream on the same thread without any externally 16 | visible synchronization or flush calls. The user never needs to know 17 | or change anything as a result of the parallelism. 18 | 19 | Currently, ParallelGZIPInputStream is a subclass of the standard 20 | GZIPInputStream, but parallelism may be added in future. 21 | 22 | API Documentation 23 | ================= 24 | 25 | The [JavaDoc API](http://shevek.github.io/parallelgzip/docs/javadoc/) 26 | is available. 27 | 28 | 29 | References 30 | ========== 31 | 32 | * http://zlib.net/pigz/pigz.pdf 33 | * http://www.gzip.org/zlib/rfc-gzip.html 34 | 35 | Credits 36 | ======= 37 | 38 | I needed this at work, but I was inspired to 39 | publish it by Paul Eggert's CS131 coursework at 40 | http://www.cs.ucla.edu/classes/fall11/cs131/hw/hw3.html - I wonder 41 | what grade I will get. 42 | 43 | -------------------------------------------------------------------------------- /src/test/java/org/anarres/parallelgzip/ParallelGZIPPerformanceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package org.anarres.parallelgzip; 7 | 8 | import com.google.common.base.Stopwatch; 9 | import com.google.common.io.ByteStreams; 10 | import java.io.OutputStream; 11 | import java.util.Random; 12 | import java.util.concurrent.TimeUnit; 13 | import org.junit.Ignore; 14 | import org.junit.Test; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | /** 19 | * 20 | * @author shevek 21 | */ 22 | @Ignore 23 | public class ParallelGZIPPerformanceTest { 24 | 25 | private static final Logger LOG = LoggerFactory.getLogger(ParallelGZIPPerformanceTest.class); 26 | 27 | @Test 28 | public void testThreads() throws Exception { 29 | LOG.info("AvailableProcessors = " + Runtime.getRuntime().availableProcessors()); 30 | Random r = new Random(); 31 | byte[] data = new byte[10 * 1024 * 1024]; 32 | r.nextBytes(data); 33 | for (int i = 0; i < data.length; i++) 34 | data[i] = (byte) (data[i] & 0x7f); // Strip the top bit to make it amenable to Huffman compression. 35 | 36 | OutputStream out = ByteStreams.nullOutputStream(); 37 | ParallelGZIPOutputStream gzip = new ParallelGZIPOutputStream(out); 38 | Stopwatch stopwatch = Stopwatch.createStarted(); 39 | 40 | for (int i = 0; i < 1024; i++) { 41 | LOG.debug("Write iteration " + i); 42 | gzip.write(data); 43 | } 44 | gzip.close(); 45 | 46 | long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); 47 | LOG.info("elapsed=" + elapsed); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/anarres/parallelgzip/ParallelGZIPEnvironment.java: -------------------------------------------------------------------------------- 1 | package org.anarres.parallelgzip; 2 | 3 | import java.util.concurrent.ArrayBlockingQueue; 4 | import java.util.concurrent.ExecutorService; 5 | import java.util.concurrent.Executors; 6 | import java.util.concurrent.ThreadFactory; 7 | import java.util.concurrent.ThreadPoolExecutor; 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.atomic.AtomicLong; 10 | import javax.annotation.Nonnegative; 11 | import javax.annotation.Nonnull; 12 | 13 | /** 14 | * 15 | * @author shevek 16 | */ 17 | public class ParallelGZIPEnvironment { 18 | 19 | private static class ThreadFactoryHolder { 20 | 21 | private static final ThreadFactory THREAD_FACTORY = new ThreadFactory() { 22 | private final ThreadFactory defaultThreadFactory = Executors.defaultThreadFactory(); 23 | private final AtomicLong counter = new AtomicLong(0); 24 | 25 | @Override 26 | public Thread newThread(@Nonnull Runnable r) { 27 | Thread thread = defaultThreadFactory.newThread(r); 28 | thread.setName("parallelgzip-" + counter.getAndIncrement()); 29 | thread.setDaemon(true); 30 | return thread; 31 | } 32 | }; 33 | } 34 | 35 | @Nonnull 36 | public static ThreadPoolExecutor newThreadPoolExecutor(@Nonnegative int nthreads) { 37 | ThreadPoolExecutor executor = new ThreadPoolExecutor(nthreads, nthreads, 38 | 1L, TimeUnit.SECONDS, 39 | new ArrayBlockingQueue(nthreads * 20), 40 | ThreadFactoryHolder.THREAD_FACTORY, 41 | new ThreadPoolExecutor.CallerRunsPolicy()); 42 | executor.allowCoreThreadTimeOut(true); 43 | return executor; 44 | } 45 | 46 | private static class ThreadPoolHolder { 47 | 48 | private static final ExecutorService EXECUTOR = newThreadPoolExecutor(Runtime.getRuntime().availableProcessors()); 49 | } 50 | 51 | @Nonnull 52 | public static ExecutorService getSharedThreadPool() { 53 | return ThreadPoolHolder.EXECUTOR; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | -------------------------------------------------------------------------------- /src/test/java/org/anarres/parallelgzip/ParallelGZIPOutputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package org.anarres.parallelgzip; 6 | 7 | import com.google.common.base.Stopwatch; 8 | import com.google.common.io.ByteStreams; 9 | import java.io.ByteArrayInputStream; 10 | import java.io.ByteArrayOutputStream; 11 | import java.util.Random; 12 | import java.util.concurrent.ThreadPoolExecutor; 13 | import java.util.concurrent.TimeUnit; 14 | import java.util.zip.GZIPOutputStream; 15 | import javax.annotation.Nonnegative; 16 | import javax.annotation.Nonnull; 17 | import org.junit.Test; 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | import static org.junit.Assert.*; 21 | 22 | /** 23 | * 24 | * @author shevek 25 | */ 26 | public class ParallelGZIPOutputStreamTest { 27 | 28 | private static final Logger LOG = LoggerFactory.getLogger(ParallelGZIPOutputStreamTest.class); 29 | 30 | private static class ByteArrayOutputBuffer extends ByteArrayOutputStream { 31 | 32 | public ByteArrayOutputBuffer(@Nonnegative int size) { 33 | super(size); 34 | } 35 | 36 | @Nonnull 37 | public ByteArrayInputStream toInput() { 38 | return new ByteArrayInputStream(buf, 0, count); 39 | } 40 | } 41 | 42 | private void testPerformance(int len) throws Exception { 43 | Random r = new Random(); 44 | byte[] data = new byte[len]; 45 | r.nextBytes(data); 46 | LOG.info("Data is " + data.length + " bytes."); 47 | 48 | ByteArrayOutputBuffer out = new ByteArrayOutputBuffer(data.length); // Reallocation will occur on the first iteration. 49 | 50 | final int serialCount = 10; 51 | long serialTotal = 0; 52 | for (int i = -2; i < serialCount; i++) { 53 | out.reset(); 54 | Stopwatch stopwatch = Stopwatch.createStarted(); 55 | GZIPOutputStream gzip = new GZIPOutputStream(out); 56 | gzip.write(data); 57 | gzip.close(); 58 | gzip.close(); // Again, for testing. 59 | long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); 60 | LOG.info("i=" + i + ", size=" + data.length + "; serial=" + elapsed); 61 | if (i < 0) 62 | continue; 63 | serialTotal += elapsed; 64 | } 65 | 66 | double serialTime = serialTotal / (double) serialCount; 67 | LOG.info("size=" + data.length + "; serialavg=" + (long) serialTime); 68 | 69 | final int parallelCount = 25; 70 | long parallelTotal = 0; 71 | for (int i = -2; i < parallelCount; i++) { 72 | out.reset(); 73 | Stopwatch stopwatch = Stopwatch.createStarted(); 74 | ParallelGZIPOutputStream gzip = new ParallelGZIPOutputStream(out); 75 | gzip.write(data); 76 | gzip.close(); 77 | long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); 78 | double perc = serialTime * 100d / elapsed; 79 | LOG.info("i=" + i + ", size=" + data.length + "; parallel=" + elapsed + "; perf=" + (int) perc + "%"); 80 | if (i < 0) 81 | continue; 82 | parallelTotal += elapsed; 83 | } 84 | 85 | double parallelTime = parallelTotal / (double) parallelCount; 86 | LOG.info("size=" + data.length + "; parallelavg=" + (long) parallelTime); 87 | 88 | ParallelGZIPInputStream in = new ParallelGZIPInputStream(out.toInput()); 89 | byte[] copy = ByteStreams.toByteArray(in); 90 | assertArrayEquals(data, copy); 91 | } 92 | 93 | @Test 94 | public void testPerformance() throws Exception { 95 | testPerformance(0); 96 | testPerformance(1); 97 | testPerformance(4); 98 | testPerformance(16); 99 | testPerformance(64 * 1024 - 1); 100 | testPerformance(64 * 1024); 101 | testPerformance(64 * 1024 + 1); 102 | testPerformance(4096 * 1024 + 17); 103 | testPerformance(16384 * 1024 + 17); 104 | testPerformance(65536 * 1024 + 17); 105 | } 106 | 107 | // This routine has been updated to be a lot more of a fuzzer. 108 | private void testThreads(@Nonnull ByteArrayOutputBuffer out, @Nonnegative int nthreads) throws Exception { 109 | Random r = new Random(); 110 | 111 | ThreadPoolExecutor executor = ParallelGZIPEnvironment.newThreadPoolExecutor(nthreads); 112 | try { 113 | for (int i = 0; i < 3; i++) { 114 | out.reset(); 115 | // The randomness throws the perf results off a bit, but fuzzes the block sizes. 116 | byte[] data = new byte[256 * 1024 * 1024 + r.nextInt(1048576)]; 117 | r.nextBytes(data); 118 | LOG.info("Data is " + data.length + " bytes."); 119 | { 120 | Stopwatch stopwatch = Stopwatch.createStarted(); 121 | ParallelGZIPOutputStream gzip = new ParallelGZIPOutputStream(out, executor); 122 | gzip.write(data); 123 | gzip.close(); 124 | long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); 125 | LOG.info("nthreads=" + nthreads + "; parallel=" + elapsed); 126 | gzip = null; 127 | } 128 | ParallelGZIPInputStream in = new ParallelGZIPInputStream(out.toInput()); 129 | byte[] copy = ByteStreams.toByteArray(in); 130 | assertArrayEquals(data, copy); 131 | } 132 | } finally { 133 | executor.shutdown(); 134 | executor.awaitTermination(10, TimeUnit.SECONDS); 135 | } 136 | } 137 | 138 | @Test 139 | public void testThreads() throws Exception { 140 | LOG.info("AvailableProcessors = " + Runtime.getRuntime().availableProcessors()); 141 | // Reallocation will occur on the first iteration. 142 | // Sharing this will help the tests run without killing the JVM. 143 | ByteArrayOutputBuffer out = new ByteArrayOutputBuffer(280 * 1000 * 1000); 144 | for (int i = 1; i < 32; i += 3) 145 | testThreads(out, i); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=$((i+1)) 158 | done 159 | case $i in 160 | (0) set -- ;; 161 | (1) set -- "$args0" ;; 162 | (2) set -- "$args0" "$args1" ;; 163 | (3) set -- "$args0" "$args1" "$args2" ;; 164 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=$(save "$@") 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 184 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 185 | cd "$(dirname "$0")" 186 | fi 187 | 188 | exec "$JAVACMD" "$@" 189 | -------------------------------------------------------------------------------- /codequality/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2012-2014 Shevek 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/main/java/org/anarres/parallelgzip/ParallelGZIPOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package org.anarres.parallelgzip; 6 | 7 | import java.io.ByteArrayOutputStream; 8 | import java.io.FilterOutputStream; 9 | import java.io.IOException; 10 | import java.io.InterruptedIOException; 11 | import java.io.OutputStream; 12 | import java.nio.ByteBuffer; 13 | import java.nio.ByteOrder; 14 | import java.util.concurrent.ArrayBlockingQueue; 15 | import java.util.concurrent.BlockingQueue; 16 | import java.util.concurrent.Callable; 17 | import java.util.concurrent.ExecutionException; 18 | import java.util.concurrent.ExecutorService; 19 | import java.util.concurrent.Future; 20 | import java.util.concurrent.ThreadPoolExecutor; 21 | import java.util.zip.CRC32; 22 | import java.util.zip.Deflater; 23 | import java.util.zip.DeflaterOutputStream; 24 | import java.util.zip.GZIPOutputStream; 25 | import javax.annotation.CheckForNull; 26 | import javax.annotation.Nonnegative; 27 | import javax.annotation.Nonnull; 28 | 29 | /** 30 | * A multi-threaded version of {@link GZIPOutputStream}. 31 | * 32 | * @author shevek 33 | */ 34 | public class ParallelGZIPOutputStream extends FilterOutputStream { 35 | 36 | private static final int GZIP_MAGIC = 0x8b1f; 37 | private static final int SIZE = 64 * 1024; 38 | 39 | @Nonnull 40 | private static Deflater newDeflater() { 41 | return new Deflater(Deflater.DEFAULT_COMPRESSION, true); 42 | } 43 | 44 | @Nonnull 45 | private static DeflaterOutputStream newDeflaterOutputStream(@Nonnull OutputStream out, @Nonnull Deflater deflater) { 46 | return new DeflaterOutputStream(out, deflater, 512, true); 47 | } 48 | 49 | /* Allow write into byte[] directly */ 50 | private static class ByteArrayOutputStreamExposed extends ByteArrayOutputStream { 51 | 52 | public ByteArrayOutputStreamExposed(int size) { 53 | super(size); 54 | } 55 | 56 | public void writeTo(@Nonnull byte[] buf) throws IOException { 57 | System.arraycopy(this.buf, 0, buf, 0, count); 58 | } 59 | } 60 | 61 | private static class State { 62 | 63 | private final Deflater def = newDeflater(); 64 | private final ByteArrayOutputStreamExposed buf = new ByteArrayOutputStreamExposed(SIZE + (SIZE >> 3)); 65 | private final DeflaterOutputStream str = newDeflaterOutputStream(buf, def); 66 | } 67 | 68 | /** This ThreadLocal avoids the recycling of a lot of memory, causing lumpy performance. */ 69 | private static final ThreadLocal STATE = new ThreadLocal() { 70 | @Override 71 | protected State initialValue() { 72 | return new State(); 73 | } 74 | }; 75 | 76 | private static class Block implements Callable { 77 | 78 | // private final int index; 79 | private byte[] buf = new byte[SIZE + (SIZE >> 3)]; 80 | private int buf_length = 0; 81 | 82 | /* 83 | public Block(@Nonnegative int index) { 84 | this.index = index; 85 | } 86 | */ 87 | // Only on worker thread 88 | @Override 89 | public Block call() throws IOException { 90 | // LOG.info("Processing " + this + " on " + Thread.currentThread()); 91 | 92 | State state = STATE.get(); 93 | // ByteArrayOutputStream buf = new ByteArrayOutputStream(in.length); // Overestimate output size required. 94 | // DeflaterOutputStream def = newDeflaterOutputStream(buf); 95 | state.def.reset(); 96 | state.buf.reset(); 97 | state.str.write(buf, 0, buf_length); 98 | state.str.flush(); 99 | 100 | // int in_length = buf_length; 101 | int out_length = state.buf.size(); 102 | if (out_length > buf.length) 103 | this.buf = new byte[out_length]; 104 | // System.out.println("Compressed " + in_length + " to " + out_length + " bytes."); 105 | this.buf_length = out_length; 106 | state.buf.writeTo(buf); 107 | 108 | // return Arrays.copyOf(in, in_length); 109 | return this; 110 | } 111 | 112 | @Override 113 | public String toString() { 114 | return "Block" /* + index */ + "(" + buf_length + "/" + buf.length + " bytes)"; 115 | } 116 | } 117 | 118 | @Nonnegative 119 | private static int getThreadCount(@Nonnull ExecutorService executor) { 120 | if (executor instanceof ThreadPoolExecutor) 121 | return ((ThreadPoolExecutor) executor).getMaximumPoolSize(); 122 | return Runtime.getRuntime().availableProcessors(); 123 | } 124 | 125 | // TODO: Share, daemonize. 126 | private final ExecutorService executor; 127 | private final CRC32 crc = new CRC32(); 128 | private final int emitQueueSize; 129 | private final BlockingQueue> emitQueue; 130 | @Nonnull 131 | private Block block = new Block(); 132 | @CheckForNull 133 | private Block freeBlock = null; 134 | /** Used as a sentinel for 'closed'. */ 135 | private long bytesWritten = 0; 136 | 137 | // Master thread only 138 | @Deprecated // Doesn't really use the given number of threads. 139 | public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnull ExecutorService executor, @Nonnegative int nthreads) throws IOException { 140 | super(out); 141 | this.executor = executor; 142 | // Some blocks compress faster than others; allow a long enough queue to keep all CPUs busy at least for a bit. 143 | this.emitQueueSize = nthreads * 3; 144 | this.emitQueue = new ArrayBlockingQueue>(emitQueueSize); 145 | writeHeader(); 146 | } 147 | 148 | /** 149 | * Creates a ParallelGZIPOutputStream 150 | * using {@link ParallelGZIPEnvironment#getSharedThreadPool()}. 151 | * 152 | * @param out the eventual output stream for the compressed data. 153 | * @throws IOException if it all goes wrong. 154 | */ 155 | @Deprecated // Doesn't really use the given number of threads. 156 | public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnegative int nthreads) throws IOException { 157 | this(out, ParallelGZIPEnvironment.getSharedThreadPool(), nthreads); 158 | } 159 | 160 | public ParallelGZIPOutputStream(@Nonnull OutputStream out, @Nonnull ExecutorService executor) throws IOException { 161 | this(out, executor, getThreadCount(executor)); 162 | } 163 | 164 | /** 165 | * Creates a ParallelGZIPOutputStream 166 | * using {@link ParallelGZIPEnvironment#getSharedThreadPool()}. 167 | * 168 | * @param out the eventual output stream for the compressed data. 169 | * @throws IOException if it all goes wrong. 170 | */ 171 | public ParallelGZIPOutputStream(@Nonnull OutputStream out) throws IOException { 172 | this(out, ParallelGZIPEnvironment.getSharedThreadPool()); 173 | } 174 | 175 | /* 176 | * @see http://www.gzip.org/zlib/rfc-gzip.html#file-format 177 | */ 178 | private void writeHeader() throws IOException { 179 | out.write(new byte[]{ 180 | (byte) GZIP_MAGIC, // ID1: Magic number (little-endian short) 181 | (byte) (GZIP_MAGIC >> 8), // ID2: Magic number (little-endian short) 182 | Deflater.DEFLATED, // CM: Compression method 183 | 0, // FLG: Flags (byte) 184 | 0, 0, 0, 0, // MTIME: Modification time (int) 185 | 0, // XFL: Extra flags 186 | 3 // OS: Operating system (3 = Linux) 187 | }); 188 | } 189 | 190 | // Master thread only 191 | @Override 192 | public void write(int b) throws IOException { 193 | byte[] single = new byte[1]; 194 | single[0] = (byte) (b & 0xFF); 195 | write(single); 196 | } 197 | 198 | // Master thread only 199 | @Override 200 | public void write(byte[] b) throws IOException { 201 | write(b, 0, b.length); 202 | } 203 | 204 | // Master thread only 205 | @Override 206 | public void write(byte[] b, int off, int len) throws IOException { 207 | crc.update(b, off, len); 208 | bytesWritten += len; 209 | 210 | while (len > 0) { 211 | final byte[] blockBuf = block.buf; 212 | // assert block.in_length < block.in.length 213 | int capacity = SIZE - block.buf_length; // Make sure we don't grow the block buf repeatedly. 214 | if (len >= capacity) { 215 | System.arraycopy(b, off, blockBuf, block.buf_length, capacity); 216 | block.buf_length += capacity; // == block.in.length 217 | off += capacity; 218 | len -= capacity; 219 | submit(); 220 | } else { 221 | System.arraycopy(b, off, blockBuf, block.buf_length, len); 222 | block.buf_length += len; 223 | // off += len; 224 | // len = 0; 225 | break; 226 | } 227 | } 228 | } 229 | 230 | // Master thread only 231 | private void submit() throws IOException { 232 | emitUntil(emitQueueSize - 1); 233 | emitQueue.add(executor.submit(block)); 234 | Block b = freeBlock; 235 | if (b != null) 236 | freeBlock = null; 237 | else 238 | b = new Block(); 239 | block = b; 240 | } 241 | 242 | // Emit If Available - submit always 243 | // Emit At Least one - submit when executor is full 244 | // Emit All Remaining - flush(), close() 245 | // Master thread only 246 | private void tryEmit() throws IOException, InterruptedException, ExecutionException { 247 | for (;;) { 248 | Future future = emitQueue.peek(); 249 | // LOG.info("Peeked future " + future); 250 | if (future == null) 251 | return; 252 | if (!future.isDone()) 253 | return; 254 | // It's an ordered queue. This MUST be the same element as above. 255 | Block b = emitQueue.remove().get(); 256 | // System.out.println("Chance-emitting block " + b); 257 | out.write(b.buf, 0, b.buf_length); 258 | b.buf_length = 0; 259 | freeBlock = b; 260 | } 261 | } 262 | 263 | // Master thread only 264 | /** Emits any opportunistically available blocks. Furthermore, emits blocks until the number of executing tasks is less than taskCountAllowed. */ 265 | private void emitUntil(@Nonnegative int taskCountAllowed) throws IOException { 266 | try { 267 | while (emitQueue.size() > taskCountAllowed) { 268 | // LOG.info("Waiting for taskCount=" + emitQueue.size() + " -> " + taskCountAllowed); 269 | Block b = emitQueue.remove().get(); // Valid because emitQueue.size() > 0 270 | // System.out.println("Force-emitting block " + b); 271 | out.write(b.buf, 0, b.buf_length); // Blocks until this task is done. 272 | b.buf_length = 0; 273 | freeBlock = b; 274 | } 275 | // We may have achieved more opportunistically available blocks 276 | // while waiting for a block above. Let's emit them here. 277 | tryEmit(); 278 | } catch (ExecutionException e) { 279 | throw new IOException(e); 280 | } catch (InterruptedException e) { 281 | throw new InterruptedIOException(); 282 | } 283 | } 284 | 285 | // Master thread only 286 | @Override 287 | public void flush() throws IOException { 288 | // LOG.info("Flush: " + block); 289 | if (block.buf_length > 0) 290 | submit(); 291 | emitUntil(0); 292 | super.flush(); 293 | } 294 | 295 | // Master thread only 296 | @Override 297 | public void close() throws IOException { 298 | // LOG.info("Closing: bytesWritten=" + bytesWritten); 299 | if (bytesWritten >= 0) { 300 | flush(); 301 | 302 | newDeflaterOutputStream(out, newDeflater()).finish(); 303 | 304 | ByteBuffer buf = ByteBuffer.allocate(8); 305 | buf.order(ByteOrder.LITTLE_ENDIAN); 306 | // LOG.info("CRC is " + crc.getValue()); 307 | buf.putInt((int) crc.getValue()); 308 | buf.putInt((int) (bytesWritten % 4294967296L)); 309 | out.write(buf.array()); // allocate() guarantees a backing array. 310 | // LOG.info("trailer is " + Arrays.toString(buf.array())); 311 | 312 | out.flush(); 313 | out.close(); 314 | 315 | bytesWritten = Integer.MIN_VALUE; 316 | // } else { 317 | // LOG.warn("Already closed."); 318 | 319 | freeBlock = null; 320 | } 321 | } 322 | } 323 | --------------------------------------------------------------------------------