├── LICENSE.txt ├── NOTICE.txt ├── README.txt ├── maven ├── build-assemblynative.xml └── build-compilenative.xml ├── pom.xml └── src ├── main ├── assemblies │ └── hadoop-snappy.xml ├── java │ └── org │ │ └── apache │ │ └── hadoop │ │ └── io │ │ └── compress │ │ ├── SnappyCodec.java │ │ └── snappy │ │ ├── LoadSnappy.java │ │ ├── SnappyCompressor.java │ │ └── SnappyDecompressor.java └── native │ ├── Makefile.am │ ├── acinclude.m4 │ ├── configure.ac │ ├── packageNativeHadoop.sh │ └── src │ ├── org │ └── apache │ │ └── hadoop │ │ └── io │ │ └── compress │ │ └── snappy │ │ ├── SnappyCompressor.c │ │ ├── SnappyDecompressor.c │ │ └── org_apache_hadoop_io_compress_snappy.h │ ├── org_apache_hadoop.h │ ├── org_apache_hadoop_io_compress_snappy_SnappyCompressor.h │ └── org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h └── test ├── java └── org │ └── apache │ └── hadoop │ └── io │ └── compress │ └── snappy │ └── TestSnappyCodec.java └── resources └── test.txt /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Hadoop Snappy uses and includes Snappy 1.0.2 2 | 3 | http://code.google.com/p/snappy 4 | 5 | Snappy 1.0.2 is licensed under Apache 2.0 License 6 | 7 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | Hadoop-Snappy enables Snappy compression for Hadoop. 2 | 3 | http://code.google.com/p/hadoop-snappy/ 4 | 5 | This project is integrated into Hadoop Common (JUN 2011). 6 | 7 | Hadoop-Snappy can be used as an add-on for recent (released) versions 8 | of Hadoop that do not provide Snappy Codec support yet. 9 | 10 | Hadoop-Snappy is being kept in synch with Hadoop Common. 11 | 12 | Build Hadoop Snappy 13 | ===== 14 | 1. Requirements: gcc c++, autoconf, automake, libtool, Java 6, 15 | JAVA_HOME set, Maven 3 16 | 17 | 2. Build/install Snappy (http://code.google.com/p/snappy/) 18 | 19 | 3. Build Hadoop Snappy 20 | 21 | $ mvn package [-Dsnappy.prefix=SNAPPY_INSTALLATION_DIR] 22 | 23 | 'snappy.prefix' by default is '/usr/local'. If Snappy is installed in 24 | other location than user local set 'snappy.prefix' to the right location. 25 | 26 | The built tarball is at target/hadoop-snappy-0.0.1-SNAPSHOT.tar.gz. The 27 | tarball includes snappy native library 28 | 29 | 30 | Install Hadoop Snappy in Hadoop 31 | ===== 32 | 33 | 1. Expand hadoop-snappy-0.0.1-SNAPSHOT.tar.gz file 34 | 35 | Copy (recursively) the lib directory of the expanded tarball in 36 | the /lib of all Hadoop nodes 37 | 38 | $ cp -r hadoop-snappy-0.0.1-SNAPSHOT/lib/* /lib 39 | 40 | IMPORTANT: Hadoo Snappy 0.0.1-SNAPSHOT tarball includes Snappy native 41 | library. 42 | 43 | 2. Add the following key/value pairs into core-site.xml 44 | 45 | 46 | io.compression.codecs 47 | 48 | org.apache.hadoop.io.compress.GzipCodec, 49 | org.apache.hadoop.io.compress.DefaultCodec, 50 | org.apache.hadoop.io.compress.BZip2Codec, 51 | org.apache.hadoop.io.compress.SnappyCodec 52 | 53 | 54 | 55 | 3. Restart Hadoop. 56 | 57 | 58 | License 59 | ======= 60 | Hadoop Snappy is licensed under the the Apache License, Version 2.0. 61 | Snappy is licensed under the the Apache License, Version 2.0. 62 | 63 | 64 | Origins 65 | ======= 66 | This project is based on the Hadoop LZO codec classes. 67 | 68 | The authors of the Hadoop LZO codec classes gave permission to 69 | relicense as the Apache License 2.0 (Todd Lipcon, Kevin Weil, 70 | Owen O'Malley, Hong Tang, Chris Douglas, Arun C Murthy) 71 | Thanks! 72 | 73 | http://code.google.com/p/hadoop-gpl-compression 74 | https://github.com/kevinweil/hadoop-lzo 75 | -------------------------------------------------------------------------------- /maven/build-assemblynative.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | cp -PR ${native.build.dir}/* ${native.assembly.lib.dir}/${native.build.platform} 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /maven/build-compilenative.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 4.0.0 17 | org.apache.hadoop 18 | hadoop-snappy 19 | 0.0.1-SNAPSHOT 20 | jar 21 | Hadoop Snappy 22 | Hadoop Snappy 23 | 24 | 25 | 26 | The Apache Software License, Version 2.0 27 | http://www.apache.org/licenses/LICENSE-2.0.txt 28 | 29 | 30 | 31 | 32 | /usr/local 33 | ${snappy.prefix}/lib 34 | ${snappy.prefix}/include 35 | 36 | ${os.name}-${os.arch}-${sun.arch.data.model} 37 | so 38 | 39 | 40 | UTF-8 41 | UTF-8 42 | 43 | 44 | 45 | 46 | ${project.build.directory}/${project.artifactId}-${project.version}/${project.artifactId}-${project.version}/lib/native/${build.platform} 47 | 48 | 49 | 50 | 51 | 1.1.0 52 | 1 53 | 0.0.1 54 | 0 55 | 56 | 57 | 58 | 59 | org.apache.hadoop 60 | hadoop-core 61 | provided 62 | 63 | 64 | commons-logging 65 | commons-logging-api 66 | provided 67 | 68 | 69 | junit 70 | junit 71 | test 72 | 73 | 74 | 75 | 76 | 77 | 78 | org.apache.hadoop 79 | hadoop-core 80 | 0.20.2 81 | 82 | 83 | commons-logging 84 | commons-logging-api 85 | 1.1 86 | 87 | 88 | junit 89 | junit 90 | 4.8.1 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | org.apache.maven.plugins 100 | maven-compiler-plugin 101 | 2.3.2 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-surefire-plugin 106 | 2.6 107 | 108 | 109 | org.apache.maven.plugins 110 | maven-jar-plugin 111 | 2.3.1 112 | 113 | 114 | org.apache.maven.plugins 115 | maven-assembly-plugin 116 | 2.2-beta-3 117 | 118 | 119 | org.apache.maven.plugins 120 | maven-antrun-plugin 121 | 1.6 122 | 123 | 124 | 125 | 126 | 127 | 128 | org.apache.maven.plugins 129 | maven-compiler-plugin 130 | 131 | 1.6 132 | 1.6 133 | 134 | 135 | 136 | maven-assembly-plugin 137 | 138 | 139 | package 140 | 141 | single 142 | 143 | 144 | 145 | 146 | 147 | src/main/assemblies/hadoop-snappy.xml 148 | 149 | 150 | 151 | 152 | org.apache.maven.plugins 153 | maven-antrun-plugin 154 | 155 | 156 | compile 157 | compile 158 | 159 | run 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | Snappy native library not found at ${snappy.lib}, use -Dsnappy.prefix=PATH 170 | 171 | 172 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | package 184 | package 185 | 186 | run 187 | 188 | 189 | 190 | 191 | 192 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | org.codehaus.mojo 237 | build-helper-maven-plugin 238 | 1.5 239 | 240 | 241 | abc 242 | package 243 | 244 | attach-artifact 245 | 246 | 247 | 248 | 249 | ${project.build.directory}/${project.artifactId}-${project.version}-${build.platform}.tar 250 | tar 251 | ${build.platform}${os.distro.classifier} 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | org.apache.maven.plugins 260 | maven-surefire-plugin 261 | 262 | once 263 | -Djava.library.path=${snappy.lib}:${project.build.directory}/native-build/usr/local/lib 264 | 265 | **/${test.exclude}.java 266 | ${test.exclude.pattern} 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | os.name.mac 276 | 277 | 278 | Mac 279 | 280 | 281 | 282 | Mac_OS_X-${os.arch}-${sun.arch.data.model} 283 | dylib 284 | 285 | 286 | 287 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /src/main/assemblies/hadoop-snappy.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | tar 16 | 17 | dir 18 | 19 | true 20 | hadoop-snappy-${project.version} 21 | 22 | 23 | ${basedir} 24 | /src 25 | 26 | maven/** 27 | src/** 28 | pom.xml 29 | LICENSE.txt 30 | README.txt 31 | 32 | 33 | 34 | 35 | 36 | ${project.build.directory}/hadoop-snappy-${project.version}.jar 37 | /lib 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.io.compress; 20 | 21 | import java.io.IOException; 22 | import java.io.InputStream; 23 | import java.io.OutputStream; 24 | 25 | import org.apache.hadoop.conf.Configurable; 26 | import org.apache.hadoop.conf.Configuration; 27 | import org.apache.hadoop.io.compress.snappy.LoadSnappy; 28 | import org.apache.hadoop.io.compress.snappy.SnappyCompressor; 29 | import org.apache.hadoop.io.compress.snappy.SnappyDecompressor; 30 | 31 | /** 32 | * This class creates snappy compressors/decompressors. 33 | */ 34 | public class SnappyCodec implements Configurable, CompressionCodec { 35 | 36 | /** Internal buffer size for Snappy compressor/decompressors */ 37 | public static final String IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY = 38 | "io.compression.codec.snappy.buffersize"; 39 | 40 | /** Default value for IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY */ 41 | public static final int IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT = 42 | 256 * 1024; 43 | 44 | static { 45 | LoadSnappy.isLoaded(); 46 | } 47 | 48 | Configuration conf; 49 | 50 | /** 51 | * Set the configuration to be used by this object. 52 | * 53 | * @param conf the configuration object. 54 | */ 55 | @Override 56 | public void setConf(Configuration conf) { 57 | this.conf = conf; 58 | } 59 | 60 | /** 61 | * Return the configuration used by this object. 62 | * 63 | * @return the configuration object used by this objec. 64 | */ 65 | @Override 66 | public Configuration getConf() { 67 | return conf; 68 | } 69 | 70 | /** 71 | * Are the native snappy libraries loaded & initialized? 72 | * 73 | * @param conf configuration 74 | * @return true if loaded & initialized, otherwise false 75 | */ 76 | public static boolean isNativeSnappyLoaded(Configuration conf) { 77 | return LoadSnappy.isLoaded(); 78 | } 79 | 80 | /** 81 | * Create a {@link CompressionOutputStream} that will write to the given 82 | * {@link OutputStream}. 83 | * 84 | * @param out the location for the final output stream 85 | * @return a stream the user can write uncompressed data to have it compressed 86 | * @throws IOException 87 | */ 88 | @Override 89 | public CompressionOutputStream createOutputStream(OutputStream out) 90 | throws IOException { 91 | return createOutputStream(out, createCompressor()); 92 | } 93 | 94 | /** 95 | * Create a {@link CompressionOutputStream} that will write to the given 96 | * {@link OutputStream} with the given {@link Compressor}. 97 | * 98 | * @param out the location for the final output stream 99 | * @param compressor compressor to use 100 | * @return a stream the user can write uncompressed data to have it compressed 101 | * @throws IOException 102 | */ 103 | @Override 104 | public CompressionOutputStream createOutputStream(OutputStream out, 105 | Compressor compressor) 106 | throws IOException { 107 | if (!isNativeSnappyLoaded(conf)) { 108 | throw new RuntimeException("native snappy library not available"); 109 | } 110 | int bufferSize = conf.getInt( 111 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, 112 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); 113 | 114 | int compressionOverhead = (bufferSize / 6) + 32; 115 | 116 | return new BlockCompressorStream(out, compressor, bufferSize, 117 | compressionOverhead); 118 | } 119 | 120 | /** 121 | * Get the type of {@link Compressor} needed by this {@link CompressionCodec}. 122 | * 123 | * @return the type of compressor needed by this codec. 124 | */ 125 | @Override 126 | public Class getCompressorType() { 127 | if (!isNativeSnappyLoaded(conf)) { 128 | throw new RuntimeException("native snappy library not available"); 129 | } 130 | 131 | return SnappyCompressor.class; 132 | } 133 | 134 | /** 135 | * Create a new {@link Compressor} for use by this {@link CompressionCodec}. 136 | * 137 | * @return a new compressor for use by this codec 138 | */ 139 | @Override 140 | public Compressor createCompressor() { 141 | if (!isNativeSnappyLoaded(conf)) { 142 | throw new RuntimeException("native snappy library not available"); 143 | } 144 | int bufferSize = conf.getInt( 145 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, 146 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); 147 | return new SnappyCompressor(bufferSize); 148 | } 149 | 150 | /** 151 | * Create a {@link CompressionInputStream} that will read from the given 152 | * input stream. 153 | * 154 | * @param in the stream to read compressed bytes from 155 | * @return a stream to read uncompressed bytes from 156 | * @throws IOException 157 | */ 158 | @Override 159 | public CompressionInputStream createInputStream(InputStream in) 160 | throws IOException { 161 | return createInputStream(in, createDecompressor()); 162 | } 163 | 164 | /** 165 | * Create a {@link CompressionInputStream} that will read from the given 166 | * {@link InputStream} with the given {@link Decompressor}. 167 | * 168 | * @param in the stream to read compressed bytes from 169 | * @param decompressor decompressor to use 170 | * @return a stream to read uncompressed bytes from 171 | * @throws IOException 172 | */ 173 | @Override 174 | public CompressionInputStream createInputStream(InputStream in, 175 | Decompressor decompressor) 176 | throws IOException { 177 | if (!isNativeSnappyLoaded(conf)) { 178 | throw new RuntimeException("native snappy library not available"); 179 | } 180 | 181 | return new BlockDecompressorStream(in, decompressor, conf.getInt( 182 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, 183 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT)); 184 | } 185 | 186 | /** 187 | * Get the type of {@link Decompressor} needed by this {@link CompressionCodec}. 188 | * 189 | * @return the type of decompressor needed by this codec. 190 | */ 191 | @Override 192 | public Class getDecompressorType() { 193 | if (!isNativeSnappyLoaded(conf)) { 194 | throw new RuntimeException("native snappy library not available"); 195 | } 196 | 197 | return SnappyDecompressor.class; 198 | } 199 | 200 | /** 201 | * Create a new {@link Decompressor} for use by this {@link CompressionCodec}. 202 | * 203 | * @return a new decompressor for use by this codec 204 | */ 205 | @Override 206 | public Decompressor createDecompressor() { 207 | if (!isNativeSnappyLoaded(conf)) { 208 | throw new RuntimeException("native snappy library not available"); 209 | } 210 | int bufferSize = conf.getInt( 211 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, 212 | IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); 213 | return new SnappyDecompressor(bufferSize); 214 | } 215 | 216 | /** 217 | * Get the default filename extension for this kind of compression. 218 | * 219 | * @return .snappy. 220 | */ 221 | @Override 222 | public String getDefaultExtension() { 223 | return ".snappy"; 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/org/apache/hadoop/io/compress/snappy/LoadSnappy.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.io.compress.snappy; 19 | 20 | import java.io.File; 21 | 22 | import org.apache.commons.logging.Log; 23 | import org.apache.commons.logging.LogFactory; 24 | 25 | /** 26 | * Determines if Snappy native library is available and loads it if available. 27 | */ 28 | public class LoadSnappy { 29 | private static final Log LOG = LogFactory.getLog(LoadSnappy.class.getName()); 30 | 31 | private static boolean LOADED = false; 32 | 33 | static { 34 | try { 35 | System.loadLibrary("snappy"); 36 | System.loadLibrary("hadoopsnappy"); 37 | 38 | // Find the path to the native library 39 | String snappyPath = findLibrary("snappy"); 40 | 41 | // Initialize the native library. This causes hadoopsnappy to 42 | // dynamically load the symbols from the native snappy library. 43 | // If this fails, the library can't be used, and attempting to 44 | // use it will cause the JVM to crash. 45 | SnappyCompressor.initIDs(snappyPath); 46 | SnappyDecompressor.initIDs(snappyPath); 47 | 48 | LOADED = true; 49 | } catch (UnsatisfiedLinkError ex) { 50 | LOG.warn("Failed to load library from " + 51 | System.getProperty("java.library.path") + ": " + ex.getMessage()); 52 | } 53 | if (LOADED) { 54 | LOG.info("Snappy native library loaded"); 55 | } else { 56 | LOG.warn("Snappy native library not loaded"); 57 | } 58 | } 59 | 60 | private static String findLibrary(String name) { 61 | name = System.mapLibraryName(name).replace(".jnilib", ".dylib"); 62 | String paths[] = System.getProperty("java.library.path").split(":"); 63 | for (String path : paths) { 64 | File file = new File(path, name); 65 | if (file.exists()) { 66 | return file.toString(); 67 | } 68 | } 69 | throw new UnsatisfiedLinkError("cannot find path to " + name); 70 | } 71 | 72 | /** 73 | * Returns if Snappy native library is loaded. 74 | * 75 | * @return true if Snappy native library is loaded, 76 | * false if not. 77 | */ 78 | public static boolean isLoaded() { 79 | return LOADED; 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.io.compress.snappy; 20 | 21 | import java.io.IOException; 22 | import java.nio.Buffer; 23 | import java.nio.ByteBuffer; 24 | 25 | import org.apache.commons.logging.Log; 26 | import org.apache.commons.logging.LogFactory; 27 | import org.apache.hadoop.conf.Configuration; 28 | import org.apache.hadoop.io.compress.Compressor; 29 | 30 | /** 31 | * A {@link Compressor} based on the snappy compression algorithm. 32 | * http://code.google.com/p/snappy/ 33 | */ 34 | public class SnappyCompressor implements Compressor { 35 | private static final Log LOG = 36 | LogFactory.getLog(SnappyCompressor.class.getName()); 37 | private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; 38 | 39 | // HACK - Use this as a global lock in the JNI layer 40 | @SuppressWarnings({"unchecked", "unused"}) 41 | private static Class clazz = SnappyCompressor.class; 42 | 43 | private int directBufferSize; 44 | private Buffer compressedDirectBuf = null; 45 | private int uncompressedDirectBufLen; 46 | private Buffer uncompressedDirectBuf = null; 47 | private byte[] userBuf = null; 48 | private int userBufOff = 0, userBufLen = 0; 49 | private boolean finish, finished; 50 | 51 | private long bytesRead = 0L; 52 | private long bytesWritten = 0L; 53 | 54 | 55 | /** 56 | * Creates a new compressor. 57 | * 58 | * @param directBufferSize size of the direct buffer to be used. 59 | */ 60 | public SnappyCompressor(int directBufferSize) { 61 | // The JVM crashes if the library is not loaded 62 | if (!LoadSnappy.isLoaded()) { 63 | throw new RuntimeException("native snappy library not available"); 64 | } 65 | 66 | this.directBufferSize = directBufferSize; 67 | 68 | uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); 69 | compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); 70 | compressedDirectBuf.position(directBufferSize); 71 | } 72 | 73 | /** 74 | * Creates a new compressor with the default buffer size. 75 | */ 76 | public SnappyCompressor() { 77 | this(DEFAULT_DIRECT_BUFFER_SIZE); 78 | } 79 | 80 | /** 81 | * Sets input data for compression. 82 | * This should be called whenever #needsInput() returns 83 | * true indicating that more input data is required. 84 | * 85 | * @param b Input data 86 | * @param off Start offset 87 | * @param len Length 88 | */ 89 | @Override 90 | public synchronized void setInput(byte[] b, int off, int len) { 91 | if (b == null) { 92 | throw new NullPointerException(); 93 | } 94 | if (off < 0 || len < 0 || off > b.length - len) { 95 | throw new ArrayIndexOutOfBoundsException(); 96 | } 97 | finished = false; 98 | 99 | if (len > uncompressedDirectBuf.remaining()) { 100 | // save data; now !needsInput 101 | this.userBuf = b; 102 | this.userBufOff = off; 103 | this.userBufLen = len; 104 | } else { 105 | ((ByteBuffer) uncompressedDirectBuf).put(b, off, len); 106 | uncompressedDirectBufLen = uncompressedDirectBuf.position(); 107 | } 108 | 109 | bytesRead += len; 110 | } 111 | 112 | /** 113 | * If a write would exceed the capacity of the direct buffers, it is set 114 | * aside to be loaded by this function while the compressed data are 115 | * consumed. 116 | */ 117 | synchronized void setInputFromSavedData() { 118 | if (0 >= userBufLen) { 119 | return; 120 | } 121 | finished = false; 122 | 123 | uncompressedDirectBufLen = Math.min(userBufLen, directBufferSize); 124 | ((ByteBuffer) uncompressedDirectBuf).put(userBuf, userBufOff, 125 | uncompressedDirectBufLen); 126 | 127 | // Note how much data is being fed to snappy 128 | userBufOff += uncompressedDirectBufLen; 129 | userBufLen -= uncompressedDirectBufLen; 130 | } 131 | 132 | /** 133 | * Does nothing. 134 | */ 135 | @Override 136 | public synchronized void setDictionary(byte[] b, int off, int len) { 137 | // do nothing 138 | } 139 | 140 | /** 141 | * Returns true if the input data buffer is empty and 142 | * #setInput() should be called to provide more input. 143 | * 144 | * @return true if the input data buffer is empty and 145 | * #setInput() should be called in order to provide more input. 146 | */ 147 | @Override 148 | public synchronized boolean needsInput() { 149 | return !(compressedDirectBuf.remaining() > 0 150 | || uncompressedDirectBuf.remaining() == 0 || userBufLen > 0); 151 | } 152 | 153 | /** 154 | * When called, indicates that compression should end 155 | * with the current contents of the input buffer. 156 | */ 157 | @Override 158 | public synchronized void finish() { 159 | finish = true; 160 | } 161 | 162 | /** 163 | * Returns true if the end of the compressed 164 | * data output stream has been reached. 165 | * 166 | * @return true if the end of the compressed 167 | * data output stream has been reached. 168 | */ 169 | @Override 170 | public synchronized boolean finished() { 171 | // Check if all uncompressed data has been consumed 172 | return (finish && finished && compressedDirectBuf.remaining() == 0); 173 | } 174 | 175 | /** 176 | * Fills specified buffer with compressed data. Returns actual number 177 | * of bytes of compressed data. A return value of 0 indicates that 178 | * needsInput() should be called in order to determine if more input 179 | * data is required. 180 | * 181 | * @param b Buffer for the compressed data 182 | * @param off Start offset of the data 183 | * @param len Size of the buffer 184 | * @return The actual number of bytes of compressed data. 185 | */ 186 | @Override 187 | public synchronized int compress(byte[] b, int off, int len) 188 | throws IOException { 189 | if (b == null) { 190 | throw new NullPointerException(); 191 | } 192 | if (off < 0 || len < 0 || off > b.length - len) { 193 | throw new ArrayIndexOutOfBoundsException(); 194 | } 195 | 196 | // Check if there is compressed data 197 | int n = compressedDirectBuf.remaining(); 198 | if (n > 0) { 199 | n = Math.min(n, len); 200 | ((ByteBuffer) compressedDirectBuf).get(b, off, n); 201 | bytesWritten += n; 202 | return n; 203 | } 204 | 205 | // Re-initialize the snappy's output direct-buffer 206 | compressedDirectBuf.clear(); 207 | compressedDirectBuf.limit(0); 208 | if (0 == uncompressedDirectBuf.position()) { 209 | // No compressed data, so we should have !needsInput or !finished 210 | setInputFromSavedData(); 211 | if (0 == uncompressedDirectBuf.position()) { 212 | // Called without data; write nothing 213 | finished = true; 214 | return 0; 215 | } 216 | } 217 | 218 | // Compress data 219 | n = compressBytesDirect(); 220 | compressedDirectBuf.limit(n); 221 | uncompressedDirectBuf.clear(); // snappy consumes all buffer input 222 | 223 | // Set 'finished' if snapy has consumed all user-data 224 | if (0 == userBufLen) { 225 | finished = true; 226 | } 227 | 228 | // Get atmost 'len' bytes 229 | n = Math.min(n, len); 230 | bytesWritten += n; 231 | ((ByteBuffer) compressedDirectBuf).get(b, off, n); 232 | 233 | return n; 234 | } 235 | 236 | /** 237 | * Resets compressor so that a new set of input data can be processed. 238 | */ 239 | @Override 240 | public synchronized void reset() { 241 | finish = false; 242 | finished = false; 243 | uncompressedDirectBuf.clear(); 244 | uncompressedDirectBufLen = 0; 245 | compressedDirectBuf.clear(); 246 | compressedDirectBuf.limit(0); 247 | userBufOff = userBufLen = 0; 248 | bytesRead = bytesWritten = 0L; 249 | } 250 | 251 | /** 252 | * Prepare the compressor to be used in a new stream with settings defined in 253 | * the given Configuration 254 | * 255 | * @param conf Configuration from which new setting are fetched 256 | */ 257 | public synchronized void reinit(Configuration conf) { 258 | reset(); 259 | } 260 | 261 | /** 262 | * Return number of bytes given to this compressor since last reset. 263 | */ 264 | @Override 265 | public synchronized long getBytesRead() { 266 | return bytesRead; 267 | } 268 | 269 | /** 270 | * Return number of bytes consumed by callers of compress since last reset. 271 | */ 272 | @Override 273 | public synchronized long getBytesWritten() { 274 | return bytesWritten; 275 | } 276 | 277 | /** 278 | * Closes the compressor and discards any unprocessed input. 279 | */ 280 | @Override 281 | public synchronized void end() { 282 | } 283 | 284 | native static void initIDs(String snappyPath); 285 | 286 | private native int compressBytesDirect(); 287 | } 288 | -------------------------------------------------------------------------------- /src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.io.compress.snappy; 20 | 21 | import java.io.IOException; 22 | import java.nio.Buffer; 23 | import java.nio.ByteBuffer; 24 | 25 | import org.apache.commons.logging.Log; 26 | import org.apache.commons.logging.LogFactory; 27 | import org.apache.hadoop.io.compress.Decompressor; 28 | 29 | /** 30 | * A {@link Decompressor} based on the snappy compression algorithm. 31 | * http://code.google.com/p/snappy/ 32 | */ 33 | public class SnappyDecompressor implements Decompressor { 34 | private static final Log LOG = 35 | LogFactory.getLog(SnappyCompressor.class.getName()); 36 | private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; 37 | 38 | // HACK - Use this as a global lock in the JNI layer 39 | @SuppressWarnings({"unchecked", "unused"}) 40 | private static Class clazz = SnappyDecompressor.class; 41 | 42 | private int directBufferSize; 43 | private Buffer compressedDirectBuf = null; 44 | private int compressedDirectBufLen; 45 | private Buffer uncompressedDirectBuf = null; 46 | private byte[] userBuf = null; 47 | private int userBufOff = 0, userBufLen = 0; 48 | private boolean finished; 49 | 50 | /** 51 | * Creates a new compressor. 52 | * 53 | * @param directBufferSize size of the direct buffer to be used. 54 | */ 55 | public SnappyDecompressor(int directBufferSize) { 56 | // The JVM crashes if the library is not loaded 57 | if (!LoadSnappy.isLoaded()) { 58 | throw new RuntimeException("native snappy library not available"); 59 | } 60 | 61 | this.directBufferSize = directBufferSize; 62 | 63 | compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); 64 | uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); 65 | uncompressedDirectBuf.position(directBufferSize); 66 | 67 | } 68 | 69 | /** 70 | * Creates a new decompressor with the default buffer size. 71 | */ 72 | public SnappyDecompressor() { 73 | this(DEFAULT_DIRECT_BUFFER_SIZE); 74 | } 75 | 76 | /** 77 | * Sets input data for decompression. 78 | * This should be called if and only if {@link #needsInput()} returns 79 | * true indicating that more input data is required. 80 | * (Both native and non-native versions of various Decompressors require 81 | * that the data passed in via b[] remain unmodified until 82 | * the caller is explicitly notified--via {@link #needsInput()}--that the 83 | * buffer may be safely modified. With this requirement, an extra 84 | * buffer-copy can be avoided.) 85 | * 86 | * @param b Input data 87 | * @param off Start offset 88 | * @param len Length 89 | */ 90 | @Override 91 | public synchronized void setInput(byte[] b, int off, int len) { 92 | if (b == null) { 93 | throw new NullPointerException(); 94 | } 95 | if (off < 0 || len < 0 || off > b.length - len) { 96 | throw new ArrayIndexOutOfBoundsException(); 97 | } 98 | 99 | this.userBuf = b; 100 | this.userBufOff = off; 101 | this.userBufLen = len; 102 | 103 | setInputFromSavedData(); 104 | 105 | // Reinitialize snappy's output direct-buffer 106 | uncompressedDirectBuf.limit(directBufferSize); 107 | uncompressedDirectBuf.position(directBufferSize); 108 | } 109 | 110 | /** 111 | * If a write would exceed the capacity of the direct buffers, it is set 112 | * aside to be loaded by this function while the compressed data are 113 | * consumed. 114 | */ 115 | synchronized void setInputFromSavedData() { 116 | compressedDirectBufLen = Math.min(userBufLen, directBufferSize); 117 | 118 | // Reinitialize snappy's input direct buffer 119 | compressedDirectBuf.rewind(); 120 | ((ByteBuffer) compressedDirectBuf).put(userBuf, userBufOff, 121 | compressedDirectBufLen); 122 | 123 | // Note how much data is being fed to snappy 124 | userBufOff += compressedDirectBufLen; 125 | userBufLen -= compressedDirectBufLen; 126 | } 127 | 128 | /** 129 | * Does nothing. 130 | */ 131 | @Override 132 | public synchronized void setDictionary(byte[] b, int off, int len) { 133 | // do nothing 134 | } 135 | 136 | /** 137 | * Returns true if the input data buffer is empty and 138 | * {@link #setInput(byte[], int, int)} should be called to 139 | * provide more input. 140 | * 141 | * @return true if the input data buffer is empty and 142 | * {@link #setInput(byte[], int, int)} should be called in 143 | * order to provide more input. 144 | */ 145 | @Override 146 | public synchronized boolean needsInput() { 147 | // Consume remaining compressed data? 148 | if (uncompressedDirectBuf.remaining() > 0) { 149 | return false; 150 | } 151 | 152 | // Check if snappy has consumed all input 153 | if (compressedDirectBufLen <= 0) { 154 | // Check if we have consumed all user-input 155 | if (userBufLen <= 0) { 156 | return true; 157 | } else { 158 | setInputFromSavedData(); 159 | } 160 | } 161 | 162 | return false; 163 | } 164 | 165 | /** 166 | * Returns false. 167 | * 168 | * @return false. 169 | */ 170 | @Override 171 | public synchronized boolean needsDictionary() { 172 | return false; 173 | } 174 | 175 | /** 176 | * Returns true if the end of the decompressed 177 | * data output stream has been reached. 178 | * 179 | * @return true if the end of the decompressed 180 | * data output stream has been reached. 181 | */ 182 | @Override 183 | public synchronized boolean finished() { 184 | return (finished && uncompressedDirectBuf.remaining() == 0); 185 | } 186 | 187 | /** 188 | * Fills specified buffer with uncompressed data. Returns actual number 189 | * of bytes of uncompressed data. A return value of 0 indicates that 190 | * {@link #needsInput()} should be called in order to determine if more 191 | * input data is required. 192 | * 193 | * @param b Buffer for the compressed data 194 | * @param off Start offset of the data 195 | * @param len Size of the buffer 196 | * @return The actual number of bytes of compressed data. 197 | * @throws IOException 198 | */ 199 | @Override 200 | public synchronized int decompress(byte[] b, int off, int len) 201 | throws IOException { 202 | if (b == null) { 203 | throw new NullPointerException(); 204 | } 205 | if (off < 0 || len < 0 || off > b.length - len) { 206 | throw new ArrayIndexOutOfBoundsException(); 207 | } 208 | 209 | int n = 0; 210 | 211 | // Check if there is uncompressed data 212 | n = uncompressedDirectBuf.remaining(); 213 | if (n > 0) { 214 | n = Math.min(n, len); 215 | ((ByteBuffer) uncompressedDirectBuf).get(b, off, n); 216 | return n; 217 | } 218 | if (compressedDirectBufLen > 0) { 219 | // Re-initialize the snappy's output direct buffer 220 | uncompressedDirectBuf.rewind(); 221 | uncompressedDirectBuf.limit(directBufferSize); 222 | 223 | // Decompress data 224 | n = decompressBytesDirect(); 225 | uncompressedDirectBuf.limit(n); 226 | 227 | if (userBufLen <= 0) { 228 | finished = true; 229 | } 230 | 231 | // Get atmost 'len' bytes 232 | n = Math.min(n, len); 233 | ((ByteBuffer) uncompressedDirectBuf).get(b, off, n); 234 | } 235 | 236 | return n; 237 | } 238 | 239 | /** 240 | * Resets decompressor and input and output buffers so that a new set of 241 | * input data can be processed. 242 | */ 243 | @Override 244 | public synchronized void reset() { 245 | finished = false; 246 | compressedDirectBufLen = 0; 247 | uncompressedDirectBuf.limit(directBufferSize); 248 | uncompressedDirectBuf.position(directBufferSize); 249 | userBufOff = userBufLen = 0; 250 | } 251 | 252 | /** 253 | * Closes the decompressor and discards any unprocessed input. 254 | */ 255 | @Override 256 | public synchronized void end() { 257 | // do nothing 258 | } 259 | 260 | native static void initIDs(String snappyPath); 261 | 262 | private native int decompressBytesDirect(); 263 | } 264 | -------------------------------------------------------------------------------- /src/main/native/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | ACLOCAL_AMFLAGS = -I m4 19 | 20 | AM_CPPFLAGS = @JNI_CPPFLAGS@ -I$(HADOOP_NATIVE_SRCDIR)/src -Isrc/org/apache/hadoop/io/compress/snappy 21 | AM_LDFLAGS = @JNI_LDFLAGS@ 22 | AM_CFLAGS = -g -Wall -fPIC -O2 -m$(JVM_DATA_MODEL) 23 | 24 | lib_LTLIBRARIES = libhadoopsnappy.la 25 | 26 | libhadoopsnappy_la_SOURCES = src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c 27 | libhadoopsnappy_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:1:0 28 | -------------------------------------------------------------------------------- /src/main/native/acinclude.m4: -------------------------------------------------------------------------------- 1 | # AC_COMPUTE_NEEDED_DSO(LIBRARY, PREPROC_SYMBOL) 2 | # -------------------------------------------------- 3 | # Compute the 'actual' dynamic-library used 4 | # for LIBRARY and set it to PREPROC_SYMBOL 5 | AC_DEFUN([AC_COMPUTE_NEEDED_DSO], 6 | [ 7 | AC_CACHE_CHECK([Checking for the 'actual' dynamic-library for '-l$1'], ac_cv_libname_$1, 8 | [ 9 | echo 'int main(int argc, char **argv){return 0;}' > conftest.c 10 | if test -z "`${CC} ${LDFLAGS} -o conftest conftest.c -l$1 2>&1`"; then 11 | dnl Try objdump and ldd in that order to get the dynamic library 12 | if test ! -z "`which objdump | grep -v 'no objdump'`"; then 13 | ac_cv_libname_$1="`objdump -p conftest | grep NEEDED | grep $1 | sed 's/\W*NEEDED\W*\(.*\)\W*$/\"\1\"/'`" 14 | elif test ! -z "`which ldd | grep -v 'no ldd'`"; then 15 | ac_cv_libname_$1="`ldd conftest | grep $1 | sed 's/^[[[^A-Za-z0-9]]]*\([[[A-Za-z0-9\.]]]*\)[[[^A-Za-z0-9]]]*=>.*$/\"\1\"/'`" 16 | elif test ! -z "`which otool | grep -v 'no otool'`"; then 17 | ac_cv_libname_$1=\"`otool -L conftest | grep $1 | sed -e 's/^[ ]*//' -e 's/ .*//' -e 's/.*\/\(.*\)$/\1/'`\"; 18 | else 19 | AC_MSG_ERROR(Can't find either 'objdump' or 'ldd' or 'otool' to compute the dynamic library for '-l$1') 20 | fi 21 | else 22 | ac_cv_libname_$1=libnotfound.so 23 | fi 24 | rm -f conftest* 25 | ] 26 | ) 27 | AC_DEFINE_UNQUOTED($2, ${ac_cv_libname_$1}, [The 'actual' dynamic-library for '-l$1']) 28 | ])# AC_COMPUTE_NEEDED_DSO 29 | -------------------------------------------------------------------------------- /src/main/native/configure.ac: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # -*- Autoconf -*- 20 | # Process this file with autoconf to produce a configure script. 21 | 22 | AC_PREREQ(2.59) 23 | AC_INIT(hadoop-snappy, 0.0.1, issaymk2@gmail.com) 24 | AC_CONFIG_SRCDIR([src/org_apache_hadoop.h]) 25 | AC_CONFIG_AUX_DIR([config]) 26 | AC_CONFIG_MACRO_DIR([m4]) 27 | AC_CONFIG_HEADER([config.h]) 28 | AC_SYS_LARGEFILE 29 | AM_INIT_AUTOMAKE([subdir-objects foreign]) 30 | 31 | # Checks for programs. 32 | AC_PROG_CC 33 | AC_PROG_LIBTOOL 34 | AM_PROG_CC_C_O 35 | 36 | # Checks for libraries. 37 | AC_CHECK_LIB([dl], [dlopen]) 38 | 39 | # Checks for header files. 40 | AC_HEADER_STDC 41 | AC_CHECK_HEADERS([stdio.h stddef.h], [], AC_MSG_ERROR(Some system headers not found... please ensure their presence on your platform.)) 42 | AC_CHECK_HEADERS([snappy-c.h], AC_COMPUTE_NEEDED_DSO(snappy,HADOOP_SNAPPY_LIBRARY), AC_MSG_ERROR([Native snappy headers not found.])) 43 | 44 | # Checks for typedefs, structures, and compiler characteristics. 45 | AC_C_CONST 46 | AC_TYPE_SIZE_T 47 | 48 | # Checks for library functions. 49 | dnl Check for '-ljvm' 50 | JNI_LDFLAGS="" 51 | if test "x$JAVA_HOME" != "x" 52 | then 53 | JNI_LDFLAGS="-L$JAVA_HOME/jre/lib/$OS_ARCH/server" 54 | fi 55 | ldflags_bak=$LDFLAGS 56 | LDFLAGS="$LDFLAGS $JNI_LDFLAGS" 57 | AC_CHECK_LIB([jvm], [JNI_GetCreatedJavaVMs]) 58 | LDFLAGS=$ldflags_bak 59 | AC_SUBST([JNI_LDFLAGS]) 60 | 61 | dnl Check for JNI headers 62 | JNI_CPPFLAGS="" 63 | if test "x$JAVA_HOME" != "x" 64 | then 65 | for dir in `find $JAVA_HOME/include -follow -type d` 66 | do 67 | JNI_CPPFLAGS="$JNI_CPPFLAGS -I$dir" 68 | done 69 | fi 70 | cppflags_bak=$CPPFLAGS 71 | CPPFLAGS="$CPPFLAGS $JNI_CPPFLAGS" 72 | AC_CHECK_HEADERS([jni.h], [], AC_MSG_ERROR([Native java headers not found. Is \$JAVA_HOME set correctly?])) 73 | CPPFLAGS=$cppflags_bak 74 | AC_SUBST([JNI_CPPFLAGS]) 75 | 76 | 77 | AC_CONFIG_FILES([Makefile]) 78 | AC_OUTPUT 79 | -------------------------------------------------------------------------------- /src/main/native/packageNativeHadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # packageNativeHadoop.sh - A simple script to help package native-hadoop libraries 20 | 21 | # 22 | # Note: 23 | # This script relies on the following environment variables to function correctly: 24 | # * BASE_NATIVE_LIB_DIR 25 | # * BUILD_NATIVE_DIR 26 | # * DIST_LIB_DIR 27 | # All these are setup by build.xml. 28 | # 29 | 30 | TAR='tar cf -' 31 | UNTAR='tar xfBp -' 32 | 33 | # Copy the pre-built libraries in $BASE_NATIVE_LIB_DIR 34 | if [ -d $BASE_NATIVE_LIB_DIR ] 35 | then 36 | for platform in `ls $BASE_NATIVE_LIB_DIR` 37 | do 38 | if [ ! -d $DIST_LIB_DIR/$platform ] 39 | then 40 | mkdir -p $DIST_LIB_DIR/$platform 41 | echo "Created $DIST_LIB_DIR/$platform" 42 | fi 43 | echo "Copying libraries in $BASE_NATIVE_LIB_DIR/$platform to $DIST_LIB_DIR/$platform/" 44 | cd $BASE_NATIVE_LIB_DIR/$platform/ 45 | $TAR . | (cd $DIST_LIB_DIR/$platform/; $UNTAR) 46 | done 47 | fi 48 | 49 | # Copy the custom-built libraries in $BUILD_DIR 50 | if [ -d $BUILD_NATIVE_DIR ] 51 | then 52 | for platform in `ls $BUILD_NATIVE_DIR` 53 | do 54 | if [ ! -d $DIST_LIB_DIR/$platform ] 55 | then 56 | mkdir -p $DIST_LIB_DIR/$platform 57 | echo "Created $DIST_LIB_DIR/$platform" 58 | fi 59 | echo "Copying libraries in $BUILD_NATIVE_DIR/$platform/lib to $DIST_LIB_DIR/$platform/" 60 | cd $BUILD_NATIVE_DIR/$platform/lib 61 | $TAR . | (cd $DIST_LIB_DIR/$platform/; $UNTAR) 62 | done 63 | fi 64 | 65 | #vim: ts=2: sw=2: et 66 | -------------------------------------------------------------------------------- /src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #if defined HAVE_CONFIG_H 20 | #include 21 | #endif 22 | 23 | #if defined HADOOP_SNAPPY_LIBRARY 24 | 25 | #if defined HAVE_STDIO_H 26 | #include 27 | #else 28 | #error 'stdio.h not found' 29 | #endif 30 | 31 | #if defined HAVE_STDLIB_H 32 | #include 33 | #else 34 | #error 'stdlib.h not found' 35 | #endif 36 | 37 | #if defined HAVE_STRING_H 38 | #include 39 | #else 40 | #error 'string.h not found' 41 | #endif 42 | 43 | #if defined HAVE_DLFCN_H 44 | #include 45 | #else 46 | #error 'dlfcn.h not found' 47 | #endif 48 | 49 | #include "org_apache_hadoop_io_compress_snappy.h" 50 | #include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h" 51 | 52 | static jfieldID SnappyCompressor_clazz; 53 | static jfieldID SnappyCompressor_uncompressedDirectBuf; 54 | static jfieldID SnappyCompressor_uncompressedDirectBufLen; 55 | static jfieldID SnappyCompressor_compressedDirectBuf; 56 | static jfieldID SnappyCompressor_directBufferSize; 57 | 58 | static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*); 59 | 60 | JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs 61 | (JNIEnv *env, jclass clazz, jstring libsnappy_path){ 62 | 63 | // Load libsnappy.so 64 | const char *path = (*env)->GetStringUTFChars(env, libsnappy_path, NULL); 65 | if (path == NULL) { 66 | return; // OutOfMemoryError already thrown 67 | } 68 | void *libsnappy = dlopen(path, RTLD_NOW); 69 | if (!libsnappy) { 70 | char msg[2000]; 71 | snprintf(msg, sizeof(msg), "Cannot load %s (%s)", path, dlerror()); 72 | (*env)->ReleaseStringUTFChars(env, libsnappy_path, path); 73 | THROW(env, "java/lang/UnsatisfiedLinkError", msg); 74 | return; 75 | } 76 | (*env)->ReleaseStringUTFChars(env, libsnappy_path, path); 77 | 78 | // Locate the requisite symbols from libsnappy.so 79 | dlerror(); // Clear any existing error 80 | LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress"); 81 | 82 | SnappyCompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz", 83 | "Ljava/lang/Class;"); 84 | SnappyCompressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, 85 | "uncompressedDirectBuf", 86 | "Ljava/nio/Buffer;"); 87 | SnappyCompressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, 88 | "uncompressedDirectBufLen", "I"); 89 | SnappyCompressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, 90 | "compressedDirectBuf", 91 | "Ljava/nio/Buffer;"); 92 | SnappyCompressor_directBufferSize = (*env)->GetFieldID(env, clazz, 93 | "directBufferSize", "I"); 94 | } 95 | 96 | JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect 97 | (JNIEnv *env, jobject thisj){ 98 | // Get members of SnappyCompressor 99 | jobject clazz = (*env)->GetStaticObjectField(env, thisj, SnappyCompressor_clazz); 100 | jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf); 101 | jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen); 102 | jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_compressedDirectBuf); 103 | size_t compressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_directBufferSize); 104 | 105 | // Get the input direct buffer 106 | LOCK_CLASS(env, clazz, "SnappyCompressor"); 107 | const char* uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); 108 | UNLOCK_CLASS(env, clazz, "SnappyCompressor"); 109 | 110 | if (uncompressed_bytes == 0) { 111 | return (jint)0; 112 | } 113 | 114 | // Get the output direct buffer 115 | LOCK_CLASS(env, clazz, "SnappyCompressor"); 116 | char* compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); 117 | UNLOCK_CLASS(env, clazz, "SnappyCompressor"); 118 | 119 | if (compressed_bytes == 0) { 120 | return (jint)0; 121 | } 122 | 123 | snappy_status ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len, compressed_bytes, &compressed_direct_buf_len); 124 | if (ret != SNAPPY_OK){ 125 | THROW(env, "Ljava/lang/InternalError", "Could not compress data. Buffer length is too small."); 126 | } 127 | 128 | (*env)->SetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen, 0); 129 | 130 | return (jint)compressed_direct_buf_len; 131 | } 132 | 133 | #endif //define HADOOP_SNAPPY_LIBRARY 134 | -------------------------------------------------------------------------------- /src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #if defined HAVE_CONFIG_H 20 | #include 21 | #endif 22 | 23 | #if defined HADOOP_SNAPPY_LIBRARY 24 | 25 | #if defined HAVE_STDIO_H 26 | #include 27 | #else 28 | #error 'stdio.h not found' 29 | #endif 30 | 31 | #if defined HAVE_STDLIB_H 32 | #include 33 | #else 34 | #error 'stdlib.h not found' 35 | #endif 36 | 37 | #if defined HAVE_STRING_H 38 | #include 39 | #else 40 | #error 'string.h not found' 41 | #endif 42 | 43 | #if defined HAVE_DLFCN_H 44 | #include 45 | #else 46 | #error 'dlfcn.h not found' 47 | #endif 48 | 49 | #include "org_apache_hadoop_io_compress_snappy.h" 50 | #include "org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h" 51 | 52 | static jfieldID SnappyDecompressor_clazz; 53 | static jfieldID SnappyDecompressor_compressedDirectBuf; 54 | static jfieldID SnappyDecompressor_compressedDirectBufLen; 55 | static jfieldID SnappyDecompressor_uncompressedDirectBuf; 56 | static jfieldID SnappyDecompressor_directBufferSize; 57 | 58 | static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); 59 | 60 | JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs 61 | (JNIEnv *env, jclass clazz, jstring libsnappy_path){ 62 | 63 | // Load libsnappy.so 64 | const char *path = (*env)->GetStringUTFChars(env, libsnappy_path, NULL); 65 | if (path == NULL) { 66 | return; // OutOfMemoryError already thrown 67 | } 68 | void *libsnappy = dlopen(path, RTLD_NOW); 69 | if (!libsnappy) { 70 | char msg[2000]; 71 | snprintf(msg, sizeof(msg), "Cannot load %s (%s)", path, dlerror()); 72 | (*env)->ReleaseStringUTFChars(env, libsnappy_path, path); 73 | THROW(env, "java/lang/UnsatisfiedLinkError", msg); 74 | return; 75 | } 76 | (*env)->ReleaseStringUTFChars(env, libsnappy_path, path); 77 | 78 | // Locate the requisite symbols from libsnappy.so 79 | dlerror(); // Clear any existing error 80 | LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); 81 | 82 | SnappyDecompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz", 83 | "Ljava/lang/Class;"); 84 | SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, 85 | "compressedDirectBuf", 86 | "Ljava/nio/Buffer;"); 87 | SnappyDecompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, 88 | "compressedDirectBufLen", "I"); 89 | SnappyDecompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, 90 | "uncompressedDirectBuf", 91 | "Ljava/nio/Buffer;"); 92 | SnappyDecompressor_directBufferSize = (*env)->GetFieldID(env, clazz, 93 | "directBufferSize", "I"); 94 | } 95 | 96 | JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect 97 | (JNIEnv *env, jobject thisj){ 98 | // Get members of SnappyDecompressor 99 | jobject clazz = (*env)->GetStaticObjectField(env,thisj, SnappyDecompressor_clazz); 100 | jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf); 101 | jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, SnappyDecompressor_compressedDirectBufLen); 102 | jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_uncompressedDirectBuf); 103 | size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyDecompressor_directBufferSize); 104 | 105 | // Get the input direct buffer 106 | LOCK_CLASS(env, clazz, "SnappyDecompressor"); 107 | const char* compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); 108 | UNLOCK_CLASS(env, clazz, "SnappyDecompressor"); 109 | 110 | if (compressed_bytes == 0) { 111 | return (jint)0; 112 | } 113 | 114 | // Get the output direct buffer 115 | LOCK_CLASS(env, clazz, "SnappyDecompressor"); 116 | char* uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); 117 | UNLOCK_CLASS(env, clazz, "SnappyDecompressor"); 118 | 119 | if (uncompressed_bytes == 0) { 120 | return (jint)0; 121 | } 122 | 123 | snappy_status ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, uncompressed_bytes, &uncompressed_direct_buf_len); 124 | if (ret == SNAPPY_BUFFER_TOO_SMALL){ 125 | THROW(env, "Ljava/lang/InternalError", "Could not decompress data. Buffer length is too small."); 126 | } else if (ret == SNAPPY_INVALID_INPUT){ 127 | THROW(env, "Ljava/lang/InternalError", "Could not decompress data. Input is invalid."); 128 | } else if (ret != SNAPPY_OK){ 129 | THROW(env, "Ljava/lang/InternalError", "Could not decompress data."); 130 | } 131 | 132 | (*env)->SetIntField(env, thisj, SnappyDecompressor_compressedDirectBufLen, 0); 133 | 134 | return (jint)uncompressed_direct_buf_len; 135 | } 136 | 137 | #endif //define HADOOP_SNAPPY_LIBRARY 138 | -------------------------------------------------------------------------------- /src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | 20 | #if !defined ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H 21 | #define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H 22 | 23 | 24 | #if defined HAVE_CONFIG_H 25 | #include 26 | #endif 27 | 28 | #if defined HADOOP_SNAPPY_LIBRARY 29 | 30 | #if defined HAVE_STDDEF_H 31 | #include 32 | #else 33 | #error 'stddef.h not found' 34 | #endif 35 | 36 | #if defined HAVE_SNAPPY_C_H 37 | #include 38 | #else 39 | #error 'Please install snappy-development packages for your platform.' 40 | #endif 41 | 42 | #if defined HAVE_DLFCN_H 43 | #include 44 | #else 45 | #error "dlfcn.h not found" 46 | #endif 47 | 48 | #if defined HAVE_JNI_H 49 | #include 50 | #else 51 | #error 'jni.h not found' 52 | #endif 53 | 54 | #include "org_apache_hadoop.h" 55 | 56 | #endif //define HADOOP_SNAPPY_LIBRARY 57 | 58 | #endif //ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H 59 | -------------------------------------------------------------------------------- /src/main/native/src/org_apache_hadoop.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | /** 20 | * This file includes some common utilities 21 | * for all native code used in hadoop. 22 | */ 23 | 24 | #if !defined ORG_APACHE_HADOOP_H 25 | #define ORG_APACHE_HADOOP_H 26 | 27 | #if defined HAVE_CONFIG_H 28 | #include 29 | #endif 30 | 31 | #if defined HAVE_DLFCN_H 32 | #include 33 | #else 34 | #error "dlfcn.h not found" 35 | #endif 36 | 37 | #if defined HAVE_JNI_H 38 | #include 39 | #else 40 | #error 'jni.h not found' 41 | #endif 42 | 43 | /* A helper macro to 'throw' a java exception. */ 44 | #define THROW(env, exception_name, message) \ 45 | { \ 46 | jclass ecls = (*env)->FindClass(env, exception_name); \ 47 | if (ecls) { \ 48 | (*env)->ThrowNew(env, ecls, message); \ 49 | (*env)->DeleteLocalRef(env, ecls); \ 50 | } \ 51 | } 52 | 53 | /* Helper macro to return if an exception is pending */ 54 | #define PASS_EXCEPTIONS(env) \ 55 | { \ 56 | if ((*env)->ExceptionCheck(env)) return; \ 57 | } 58 | 59 | #define PASS_EXCEPTIONS_GOTO(env, target) \ 60 | { \ 61 | if ((*env)->ExceptionCheck(env)) goto target; \ 62 | } 63 | 64 | #define PASS_EXCEPTIONS_RET(env, ret) \ 65 | { \ 66 | if ((*env)->ExceptionCheck(env)) return (ret); \ 67 | } 68 | 69 | /** 70 | * A helper function to dlsym a 'symbol' from a given library-handle. 71 | * 72 | * @param env jni handle to report contingencies. 73 | * @param handle handle to the dlopen'ed library. 74 | * @param symbol symbol to load. 75 | * @return returns the address where the symbol is loaded in memory, 76 | * NULL on error. 77 | */ 78 | static __attribute__ ((unused)) 79 | void *do_dlsym(JNIEnv *env, void *handle, const char *symbol) { 80 | if (!env || !handle || !symbol) { 81 | THROW(env, "java/lang/InternalError", NULL); 82 | return NULL; 83 | } 84 | char *error = NULL; 85 | void *func_ptr = dlsym(handle, symbol); 86 | if ((error = dlerror()) != NULL) { 87 | THROW(env, "java/lang/UnsatisfiedLinkError", symbol); 88 | return NULL; 89 | } 90 | return func_ptr; 91 | } 92 | 93 | /* A helper macro to dlsym the requisite dynamic symbol and bail-out on error. */ 94 | #define LOAD_DYNAMIC_SYMBOL(func_ptr, env, handle, symbol) \ 95 | if ((func_ptr = do_dlsym(env, handle, symbol)) == NULL) { \ 96 | return; \ 97 | } 98 | 99 | #define LOCK_CLASS(env, clazz, classname) \ 100 | if ((*env)->MonitorEnter(env, clazz) != 0) { \ 101 | char exception_msg[128]; \ 102 | snprintf(exception_msg, 128, "Failed to lock %s", classname); \ 103 | THROW(env, "java/lang/InternalError", exception_msg); \ 104 | } 105 | 106 | #define UNLOCK_CLASS(env, clazz, classname) \ 107 | if ((*env)->MonitorExit(env, clazz) != 0) { \ 108 | char exception_msg[128]; \ 109 | snprintf(exception_msg, 128, "Failed to unlock %s", classname); \ 110 | THROW(env, "java/lang/InternalError", exception_msg); \ 111 | } 112 | 113 | #endif 114 | 115 | //vim: sw=2: ts=2: et 116 | -------------------------------------------------------------------------------- /src/main/native/src/org_apache_hadoop_io_compress_snappy_SnappyCompressor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | /* DO NOT EDIT THIS FILE - it is machine generated */ 20 | #include 21 | /* Header for class org_apache_hadoop_io_compress_snappy_SnappyCompressor */ 22 | 23 | #ifndef _Included_org_apache_hadoop_io_compress_snappy_SnappyCompressor 24 | #define _Included_org_apache_hadoop_io_compress_snappy_SnappyCompressor 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | #undef org_apache_hadoop_io_compress_snappy_SnappyCompressor_DEFAULT_DIRECT_BUFFER_SIZE 29 | #define org_apache_hadoop_io_compress_snappy_SnappyCompressor_DEFAULT_DIRECT_BUFFER_SIZE 65536L 30 | /* 31 | * Class: org_apache_hadoop_io_compress_snappy_SnappyCompressor 32 | * Method: initIDs 33 | * Signature: (Ljava/lang/String;)V 34 | */ 35 | JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs 36 | (JNIEnv *, jclass, jstring); 37 | 38 | /* 39 | * Class: org_apache_hadoop_io_compress_snappy_SnappyCompressor 40 | * Method: compressBytesDirect 41 | * Signature: ()I 42 | */ 43 | JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect 44 | (JNIEnv *, jobject); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | #endif 50 | -------------------------------------------------------------------------------- /src/main/native/src/org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | /* DO NOT EDIT THIS FILE - it is machine generated */ 20 | #include 21 | /* Header for class org_apache_hadoop_io_compress_snappy_SnappyDecompressor */ 22 | 23 | #ifndef _Included_org_apache_hadoop_io_compress_snappy_SnappyDecompressor 24 | #define _Included_org_apache_hadoop_io_compress_snappy_SnappyDecompressor 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | #undef org_apache_hadoop_io_compress_snappy_SnappyDecompressor_DEFAULT_DIRECT_BUFFER_SIZE 29 | #define org_apache_hadoop_io_compress_snappy_SnappyDecompressor_DEFAULT_DIRECT_BUFFER_SIZE 65536L 30 | /* 31 | * Class: org_apache_hadoop_io_compress_snappy_SnappyDecompressor 32 | * Method: initIDs 33 | * Signature: (Ljava/lang/String;)V 34 | */ 35 | JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs 36 | (JNIEnv *, jclass, jstring); 37 | 38 | /* 39 | * Class: org_apache_hadoop_io_compress_snappy_SnappyDecompressor 40 | * Method: decompressBytesDirect 41 | * Signature: ()I 42 | */ 43 | JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect 44 | (JNIEnv *, jobject); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | #endif 50 | -------------------------------------------------------------------------------- /src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.hadoop.io.compress.snappy; 20 | 21 | import java.io.BufferedReader; 22 | import java.io.File; 23 | import java.io.InputStream; 24 | import java.io.FileInputStream; 25 | import java.io.FileNotFoundException; 26 | import java.io.FileOutputStream; 27 | import java.io.IOException; 28 | import java.io.InputStreamReader; 29 | 30 | import junit.framework.TestCase; 31 | 32 | import org.apache.hadoop.conf.Configuration; 33 | import org.apache.hadoop.io.compress.CompressionCodec; 34 | import org.apache.hadoop.io.compress.CompressionInputStream; 35 | import org.apache.hadoop.io.compress.CompressionOutputStream; 36 | import org.apache.hadoop.io.compress.SnappyCodec; 37 | import org.apache.hadoop.util.ReflectionUtils; 38 | 39 | public class TestSnappyCodec extends TestCase { 40 | private String inputDir; 41 | 42 | @Override 43 | protected void setUp() throws Exception { 44 | super.setUp(); 45 | inputDir = System.getProperty("test.build.data", "target"); 46 | } 47 | 48 | public void testFile() throws Exception { 49 | run("test.txt"); 50 | } 51 | 52 | private void run(String filename) throws FileNotFoundException, IOException{ 53 | File snappyFile = new File(inputDir, filename + new SnappyCodec().getDefaultExtension()); 54 | if (snappyFile.exists()) { 55 | snappyFile.delete(); 56 | } 57 | 58 | Configuration conf = new Configuration(); 59 | CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf); 60 | 61 | // Compress 62 | InputStream is = this.getClass().getClassLoader().getResourceAsStream("test.txt"); 63 | FileOutputStream os = new FileOutputStream(snappyFile); 64 | CompressionOutputStream cos = codec.createOutputStream(os); 65 | 66 | byte buffer[] = new byte[8192]; 67 | try { 68 | int bytesRead = 0; 69 | while ((bytesRead = is.read(buffer)) > 0) { 70 | cos.write(buffer, 0, bytesRead); 71 | } 72 | } catch (IOException e) { 73 | System.err.println("Compress Error"); 74 | e.printStackTrace(); 75 | } finally { 76 | is.close(); 77 | cos.close(); 78 | os.close(); 79 | } 80 | 81 | // Decompress 82 | is = this.getClass().getClassLoader().getResourceAsStream("test.txt"); 83 | FileInputStream is2 = new FileInputStream(snappyFile); 84 | CompressionInputStream cis = codec.createInputStream(is2); 85 | BufferedReader r = new BufferedReader(new InputStreamReader(is)); 86 | BufferedReader cr = new BufferedReader(new InputStreamReader(cis)); 87 | 88 | 89 | try { 90 | String line, rline; 91 | int lineNum = 0; 92 | while ((line = r.readLine()) != null) { 93 | lineNum++; 94 | rline = cr.readLine(); 95 | if (!rline.equals(line)) { 96 | System.err.println("Decompress error at line " + line + " of file " + filename); 97 | System.err.println("Original: [" + line + "]"); 98 | System.err.println("Decompressed: [" + rline + "]"); 99 | } 100 | assertEquals(rline, line); 101 | } 102 | assertNull(cr.readLine()); 103 | } catch (IOException e) { 104 | System.err.println("Decompress Error"); 105 | e.printStackTrace(); 106 | } finally { 107 | cis.close(); 108 | is.close(); 109 | os.close(); 110 | } 111 | } 112 | } 113 | --------------------------------------------------------------------------------