├── snappy ├── AUTHORS ├── cmake │ ├── SnappyConfig.cmake │ └── config.h.in ├── testdata │ ├── fireworks.jpeg │ ├── geo.protodata │ ├── paper-100k.pdf │ ├── baddata1.snappy │ ├── baddata2.snappy │ └── baddata3.snappy ├── .appveyor.yml ├── CONTRIBUTING.md ├── snappy-stubs-internal.cc ├── .travis.yml ├── COPYING ├── snappy-stubs-public.h ├── snappy-stubs-public.h.in ├── snappy-sinksource.cc ├── snappy-c.cc ├── format_description.txt ├── snappy.vcproj ├── snappy-test.vcproj ├── framing_format.txt ├── snappy-c.h ├── CMakeLists.txt ├── NEWS ├── README.md ├── snappy-sinksource.h ├── snappy-internal.h ├── snappy.h ├── snappy-test.h ├── snappy-stubs-internal.h ├── snappy-test.cc └── snappy_unittest.cc ├── snatshot.png └── ReadMe.md /snappy/AUTHORS: -------------------------------------------------------------------------------- 1 | opensource@google.com 2 | -------------------------------------------------------------------------------- /snatshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snatshot.png -------------------------------------------------------------------------------- /snappy/cmake/SnappyConfig.cmake: -------------------------------------------------------------------------------- 1 | include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake") -------------------------------------------------------------------------------- /snappy/testdata/fireworks.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/fireworks.jpeg -------------------------------------------------------------------------------- /snappy/testdata/geo.protodata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/geo.protodata -------------------------------------------------------------------------------- /snappy/testdata/paper-100k.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/paper-100k.pdf -------------------------------------------------------------------------------- /snappy/testdata/baddata1.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/baddata1.snappy -------------------------------------------------------------------------------- /snappy/testdata/baddata2.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/baddata2.snappy -------------------------------------------------------------------------------- /snappy/testdata/baddata3.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanttobeno/snappy/HEAD/snappy/testdata/baddata3.snappy -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | 2 |

Snappy

3 | 4 | 谷歌2011年开始开发的一款专注于速度的压缩,解压库,速度完胜zlib。 5 | 6 | 百度的[sofa-pbrpc](https://github.com/baidu/sofa-pbrpc)也使用到了。 7 | 8 | Snappy v1.1.7, August 24th 2017: 9 | 10 | * Improved CMake build support for 64-bit Linux distributions. 11 | 12 | * MSVC builds now use MSVC-specific intrinsics that map to clzll. 13 | 14 | * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors. 15 | 16 | 17 | --- 18 | 19 | 最简单用法: 20 | 21 | snappy::Compress(input.data(), input.size(), &output); 22 | 23 | 24 | 25 | snappy::Uncompress(input.data(), input.size(), &output); 26 | 27 | 28 | where "input" and "output" are both instances of std::string. 29 | 30 | --- 31 | This Is A Debug Test!!! 32 | 33 | ![snatshot.png](snatshot.png) 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /snappy/.appveyor.yml: -------------------------------------------------------------------------------- 1 | # Build matrix / environment variables are explained on: 2 | # https://www.appveyor.com/docs/appveyor-yml/ 3 | # This file can be validated on: https://ci.appveyor.com/tools/validate-yaml 4 | 5 | version: "{build}" 6 | 7 | environment: 8 | matrix: 9 | # AppVeyor currently has no custom job name feature. 10 | # http://help.appveyor.com/discussions/questions/1623-can-i-provide-a-friendly-name-for-jobs 11 | - JOB: Visual Studio 2017 12 | APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 13 | CMAKE_GENERATOR: Visual Studio 15 2017 14 | 15 | platform: 16 | - x86 17 | - x64 18 | 19 | configuration: 20 | - RelWithDebInfo 21 | - Debug 22 | 23 | build: 24 | verbosity: minimal 25 | 26 | build_script: 27 | - git submodule update --init --recursive 28 | - mkdir out 29 | - cd out 30 | - if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64 31 | - cmake --version 32 | - cmake .. -G "%CMAKE_GENERATOR%" 33 | -DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%" 34 | - cmake --build . --config %CONFIGURATION% 35 | - cd .. 36 | 37 | test_script: 38 | - out\%CONFIGURATION%\snappy_unittest 39 | -------------------------------------------------------------------------------- /snappy/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | Please make sure that all the automated checks (CLA, AppVeyor, Travis) pass for 26 | your pull requests. Pull requests whose checks fail may be ignored. 27 | -------------------------------------------------------------------------------- /snappy/snappy-stubs-internal.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | 32 | #include "snappy-stubs-internal.h" 33 | 34 | namespace snappy { 35 | 36 | void Varint::Append32(string* s, uint32 value) { 37 | char buf[Varint::kMax32]; 38 | const char* p = Varint::Encode32(buf, value); 39 | s->append(buf, p - buf); 40 | } 41 | 42 | } // namespace snappy 43 | -------------------------------------------------------------------------------- /snappy/.travis.yml: -------------------------------------------------------------------------------- 1 | # Build matrix / environment variables are explained on: 2 | # http://about.travis-ci.org/docs/user/build-configuration/ 3 | # This file can be validated on: http://lint.travis-ci.org/ 4 | 5 | sudo: false 6 | dist: trusty 7 | language: cpp 8 | 9 | compiler: 10 | - gcc 11 | - clang 12 | os: 13 | - linux 14 | - osx 15 | 16 | env: 17 | - BUILD_TYPE=Debug 18 | - BUILD_TYPE=RelWithDebInfo 19 | 20 | matrix: 21 | allow_failures: 22 | - compiler: clang 23 | env: BUILD_TYPE=RelWithDebInfo 24 | 25 | addons: 26 | apt: 27 | # List of whitelisted in travis packages for ubuntu-trusty can be found here: 28 | # https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-trusty 29 | # List of whitelisted in travis apt-sources: 30 | # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json 31 | sources: 32 | - ubuntu-toolchain-r-test 33 | - llvm-toolchain-trusty-5.0 34 | packages: 35 | - cmake 36 | - gcc-7 37 | - g++-7 38 | - clang-5.0 39 | 40 | install: 41 | # Travis doesn't have a DSL for installing homebrew packages yet. Status tracked 42 | # in https://github.com/travis-ci/travis-ci/issues/5377 43 | # The Travis VM image for Mac already has a link at /usr/local/include/c++, 44 | # causing Homebrew's gcc@7 installation to error out. This was reported to 45 | # Homebrew maintainers at https://github.com/Homebrew/brew/issues/1742 and 46 | # removing the link emerged as a workaround. 47 | - if [ "$TRAVIS_OS_NAME" == "osx" ]; then 48 | brew update; 49 | if [ -L /usr/local/include/c++ ]; then rm /usr/local/include/c++; fi; 50 | brew install gcc@7; 51 | fi 52 | # /usr/bin/gcc is stuck to old versions on both Linux and OSX. 53 | - if [ "$CXX" = "g++" ]; then export CXX="g++-7" CC="gcc-7"; fi 54 | - echo ${CC} 55 | - echo ${CXX} 56 | - ${CXX} --version 57 | - cmake --version 58 | 59 | before_script: 60 | - mkdir -p build && cd build 61 | - cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE 62 | - cmake --build . 63 | - cd .. 64 | 65 | script: 66 | - build/snappy_unittest 67 | -------------------------------------------------------------------------------- /snappy/cmake/config.h.in: -------------------------------------------------------------------------------- 1 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 2 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 3 | 4 | /* Define to 1 if the compiler supports __builtin_ctz and friends. */ 5 | #cmakedefine HAVE_BUILTIN_CTZ 1 6 | 7 | /* Define to 1 if the compiler supports __builtin_expect. */ 8 | #cmakedefine HAVE_BUILTIN_EXPECT 1 9 | 10 | /* Define to 1 if you have the header file. */ 11 | #cmakedefine HAVE_BYTESWAP_H 1 12 | 13 | /* Define to 1 if you have a definition for mmap() in . */ 14 | #cmakedefine HAVE_FUNC_MMAP 1 15 | 16 | /* Define to 1 if you have a definition for sysconf() in . */ 17 | #cmakedefine HAVE_FUNC_SYSCONF 1 18 | 19 | /* Define to 1 to use the gflags package for command-line parsing. */ 20 | #cmakedefine HAVE_GFLAGS 1 21 | 22 | /* Define to 1 if you have Google Test. */ 23 | #cmakedefine HAVE_GTEST 1 24 | 25 | /* Define to 1 if you have the `lzo2' library (-llzo2). */ 26 | #cmakedefine HAVE_LIBLZO2 1 27 | 28 | /* Define to 1 if you have the `z' library (-lz). */ 29 | #cmakedefine HAVE_LIBZ 1 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #cmakedefine HAVE_STDDEF_H 1 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #cmakedefine HAVE_STDINT_H 1 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #cmakedefine HAVE_SYS_ENDIAN_H 1 39 | 40 | /* Define to 1 if you have the header file. */ 41 | #cmakedefine HAVE_SYS_MMAN_H 1 42 | 43 | /* Define to 1 if you have the header file. */ 44 | #cmakedefine HAVE_SYS_RESOURCE_H 1 45 | 46 | /* Define to 1 if you have the header file. */ 47 | #cmakedefine HAVE_SYS_TIME_H 1 48 | 49 | /* Define to 1 if you have the header file. */ 50 | #cmakedefine HAVE_SYS_UIO_H 1 51 | 52 | /* Define to 1 if you have the header file. */ 53 | #cmakedefine HAVE_UNISTD_H 1 54 | 55 | /* Define to 1 if you have the header file. */ 56 | #cmakedefine HAVE_WINDOWS_H 1 57 | 58 | /* Define to 1 if your processor stores words with the most significant byte 59 | first (like Motorola and SPARC, unlike Intel and VAX). */ 60 | #cmakedefine SNAPPY_IS_BIG_ENDIAN 1 61 | 62 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 63 | -------------------------------------------------------------------------------- /snappy/COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2011, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | === 31 | 32 | Some of the benchmark data in testdata/ is licensed differently: 33 | 34 | - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and 35 | is licensed under the Creative Commons Attribution 3.0 license 36 | (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ 37 | for more information. 38 | 39 | - kppkn.gtb is taken from the Gaviota chess tablebase set, and 40 | is licensed under the MIT License. See 41 | https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 42 | for more information. 43 | 44 | - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper 45 | “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA 46 | Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, 47 | which is licensed under the CC-BY license. See 48 | http://www.ploscompbiol.org/static/license for more ifnormation. 49 | 50 | - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project 51 | Gutenberg. The first three have expired copyrights and are in the public 52 | domain; the latter does not have expired copyright, but is still in the 53 | public domain according to the license information 54 | (http://www.gutenberg.org/ebooks/53). 55 | -------------------------------------------------------------------------------- /snappy/snappy-stubs-public.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // Author: sesse@google.com (Steinar H. Gunderson) 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | // 30 | // Various type stubs for the open-source version of Snappy. 31 | // 32 | // This file cannot include config.h, as it is included from snappy.h, 33 | // which is a public header. Instead, snappy-stubs-public.h is generated by 34 | // from snappy-stubs-public.h.in at configure time. 35 | 36 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 37 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 38 | 39 | #if 1 40 | #include 41 | #endif 42 | 43 | #if 1 44 | #include 45 | #endif 46 | 47 | #if 0 48 | #include 49 | #endif 50 | 51 | #define SNAPPY_MAJOR 1 52 | #define SNAPPY_MINOR 1 53 | #define SNAPPY_PATCHLEVEL 3 54 | #define SNAPPY_VERSION \ 55 | ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) 56 | 57 | #include 58 | 59 | namespace snappy { 60 | 61 | #if 1 62 | typedef int8_t int8; 63 | typedef uint8_t uint8; 64 | typedef int16_t int16; 65 | typedef uint16_t uint16; 66 | typedef int32_t int32; 67 | typedef uint32_t uint32; 68 | typedef int64_t int64; 69 | typedef uint64_t uint64; 70 | #else 71 | typedef signed char int8; 72 | typedef unsigned char uint8; 73 | typedef short int16; 74 | typedef unsigned short uint16; 75 | typedef int int32; 76 | typedef unsigned int uint32; 77 | typedef long long int64; 78 | typedef unsigned long long uint64; 79 | #endif 80 | 81 | typedef std::string string; 82 | 83 | #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 84 | TypeName(const TypeName&); \ 85 | void operator=(const TypeName&) 86 | 87 | #if !0 88 | // Windows does not have an iovec type, yet the concept is universally useful. 89 | // It is simple to define it ourselves, so we put it inside our own namespace. 90 | struct iovec { 91 | void* iov_base; 92 | size_t iov_len; 93 | }; 94 | #endif 95 | 96 | } // namespace snappy 97 | 98 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 99 | -------------------------------------------------------------------------------- /snappy/snappy-stubs-public.h.in: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // Author: sesse@google.com (Steinar H. Gunderson) 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | // 30 | // Various type stubs for the open-source version of Snappy. 31 | // 32 | // This file cannot include config.h, as it is included from snappy.h, 33 | // which is a public header. Instead, snappy-stubs-public.h is generated by 34 | // from snappy-stubs-public.h.in at configure time. 35 | 36 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 37 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 38 | 39 | #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H 40 | #include 41 | #endif // HAVE_STDDEF_H 42 | 43 | #if ${HAVE_STDDEF_H_01} // HAVE_STDDEF_H 44 | #include 45 | #endif // HAVE_STDDEF_H 46 | 47 | #if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H 48 | #include 49 | #endif // HAVE_SYS_UIO_H 50 | 51 | #define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR} 52 | #define SNAPPY_MINOR ${PROJECT_VERSION_MINOR} 53 | #define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH} 54 | #define SNAPPY_VERSION \ 55 | ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) 56 | 57 | #include 58 | 59 | namespace snappy { 60 | 61 | #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H 62 | typedef int8_t int8; 63 | typedef uint8_t uint8; 64 | typedef int16_t int16; 65 | typedef uint16_t uint16; 66 | typedef int32_t int32; 67 | typedef uint32_t uint32; 68 | typedef int64_t int64; 69 | typedef uint64_t uint64; 70 | #else 71 | typedef signed char int8; 72 | typedef unsigned char uint8; 73 | typedef short int16; 74 | typedef unsigned short uint16; 75 | typedef int int32; 76 | typedef unsigned int uint32; 77 | typedef long long int64; 78 | typedef unsigned long long uint64; 79 | #endif // HAVE_STDINT_H 80 | 81 | typedef std::string string; 82 | 83 | #if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H 84 | // Windows does not have an iovec type, yet the concept is universally useful. 85 | // It is simple to define it ourselves, so we put it inside our own namespace. 86 | struct iovec { 87 | void* iov_base; 88 | size_t iov_len; 89 | }; 90 | #endif // !HAVE_SYS_UIO_H 91 | 92 | } // namespace snappy 93 | 94 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 95 | -------------------------------------------------------------------------------- /snappy/snappy-sinksource.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | 31 | #include "snappy-sinksource.h" 32 | 33 | namespace snappy { 34 | 35 | Source::~Source() { } 36 | 37 | Sink::~Sink() { } 38 | 39 | char* Sink::GetAppendBuffer(size_t length, char* scratch) { 40 | return scratch; 41 | } 42 | 43 | char* Sink::GetAppendBufferVariable( 44 | size_t min_size, size_t desired_size_hint, char* scratch, 45 | size_t scratch_size, size_t* allocated_size) { 46 | *allocated_size = scratch_size; 47 | return scratch; 48 | } 49 | 50 | void Sink::AppendAndTakeOwnership( 51 | char* bytes, size_t n, 52 | void (*deleter)(void*, const char*, size_t), 53 | void *deleter_arg) { 54 | Append(bytes, n); 55 | (*deleter)(deleter_arg, bytes, n); 56 | } 57 | 58 | ByteArraySource::~ByteArraySource() { } 59 | 60 | size_t ByteArraySource::Available() const { return left_; } 61 | 62 | const char* ByteArraySource::Peek(size_t* len) { 63 | *len = left_; 64 | return ptr_; 65 | } 66 | 67 | void ByteArraySource::Skip(size_t n) { 68 | left_ -= n; 69 | ptr_ += n; 70 | } 71 | 72 | UncheckedByteArraySink::~UncheckedByteArraySink() { } 73 | 74 | void UncheckedByteArraySink::Append(const char* data, size_t n) { 75 | // Do no copying if the caller filled in the result of GetAppendBuffer() 76 | if (data != dest_) { 77 | memcpy(dest_, data, n); 78 | } 79 | dest_ += n; 80 | } 81 | 82 | char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { 83 | return dest_; 84 | } 85 | 86 | void UncheckedByteArraySink::AppendAndTakeOwnership( 87 | char* data, size_t n, 88 | void (*deleter)(void*, const char*, size_t), 89 | void *deleter_arg) { 90 | if (data != dest_) { 91 | memcpy(dest_, data, n); 92 | (*deleter)(deleter_arg, data, n); 93 | } 94 | dest_ += n; 95 | } 96 | 97 | char* UncheckedByteArraySink::GetAppendBufferVariable( 98 | size_t min_size, size_t desired_size_hint, char* scratch, 99 | size_t scratch_size, size_t* allocated_size) { 100 | *allocated_size = desired_size_hint; 101 | return dest_; 102 | } 103 | 104 | } // namespace snappy 105 | -------------------------------------------------------------------------------- /snappy/snappy-c.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Martin Gieseking . 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include "snappy.h" 30 | #include "snappy-c.h" 31 | 32 | extern "C" { 33 | 34 | snappy_status snappy_compress(const char* input, 35 | size_t input_length, 36 | char* compressed, 37 | size_t *compressed_length) { 38 | if (*compressed_length < snappy_max_compressed_length(input_length)) { 39 | return SNAPPY_BUFFER_TOO_SMALL; 40 | } 41 | snappy::RawCompress(input, input_length, compressed, compressed_length); 42 | return SNAPPY_OK; 43 | } 44 | 45 | snappy_status snappy_uncompress(const char* compressed, 46 | size_t compressed_length, 47 | char* uncompressed, 48 | size_t* uncompressed_length) { 49 | size_t real_uncompressed_length; 50 | if (!snappy::GetUncompressedLength(compressed, 51 | compressed_length, 52 | &real_uncompressed_length)) { 53 | return SNAPPY_INVALID_INPUT; 54 | } 55 | if (*uncompressed_length < real_uncompressed_length) { 56 | return SNAPPY_BUFFER_TOO_SMALL; 57 | } 58 | if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { 59 | return SNAPPY_INVALID_INPUT; 60 | } 61 | *uncompressed_length = real_uncompressed_length; 62 | return SNAPPY_OK; 63 | } 64 | 65 | size_t snappy_max_compressed_length(size_t source_length) { 66 | return snappy::MaxCompressedLength(source_length); 67 | } 68 | 69 | snappy_status snappy_uncompressed_length(const char *compressed, 70 | size_t compressed_length, 71 | size_t *result) { 72 | if (snappy::GetUncompressedLength(compressed, 73 | compressed_length, 74 | result)) { 75 | return SNAPPY_OK; 76 | } else { 77 | return SNAPPY_INVALID_INPUT; 78 | } 79 | } 80 | 81 | snappy_status snappy_validate_compressed_buffer(const char *compressed, 82 | size_t compressed_length) { 83 | if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { 84 | return SNAPPY_OK; 85 | } else { 86 | return SNAPPY_INVALID_INPUT; 87 | } 88 | } 89 | 90 | } // extern "C" 91 | -------------------------------------------------------------------------------- /snappy/format_description.txt: -------------------------------------------------------------------------------- 1 | Snappy compressed format description 2 | Last revised: 2011-10-05 3 | 4 | 5 | This is not a formal specification, but should suffice to explain most 6 | relevant parts of how the Snappy format works. It is originally based on 7 | text by Zeev Tarantov. 8 | 9 | Snappy is a LZ77-type compressor with a fixed, byte-oriented encoding. 10 | There is no entropy encoder backend nor framing layer -- the latter is 11 | assumed to be handled by other parts of the system. 12 | 13 | This document only describes the format, not how the Snappy compressor nor 14 | decompressor actually works. The correctness of the decompressor should not 15 | depend on implementation details of the compressor, and vice versa. 16 | 17 | 18 | 1. Preamble 19 | 20 | The stream starts with the uncompressed length (up to a maximum of 2^32 - 1), 21 | stored as a little-endian varint. Varints consist of a series of bytes, 22 | where the lower 7 bits are data and the upper bit is set iff there are 23 | more bytes to be read. In other words, an uncompressed length of 64 would 24 | be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE) 25 | would be stored as 0xFE 0xFF 0x7F. 26 | 27 | 28 | 2. The compressed stream itself 29 | 30 | There are two types of elements in a Snappy stream: Literals and 31 | copies (backreferences). There is no restriction on the order of elements, 32 | except that the stream naturally cannot start with a copy. (Having 33 | two literals in a row is never optimal from a compression point of 34 | view, but nevertheless fully permitted.) Each element starts with a tag byte, 35 | and the lower two bits of this tag byte signal what type of element will 36 | follow: 37 | 38 | 00: Literal 39 | 01: Copy with 1-byte offset 40 | 10: Copy with 2-byte offset 41 | 11: Copy with 4-byte offset 42 | 43 | The interpretation of the upper six bits are element-dependent. 44 | 45 | 46 | 2.1. Literals (00) 47 | 48 | Literals are uncompressed data stored directly in the byte stream. 49 | The literal length is stored differently depending on the length 50 | of the literal: 51 | 52 | - For literals up to and including 60 bytes in length, the upper 53 | six bits of the tag byte contain (len-1). The literal follows 54 | immediately thereafter in the bytestream. 55 | - For longer literals, the (len-1) value is stored after the tag byte, 56 | little-endian. The upper six bits of the tag byte describe how 57 | many bytes are used for the length; 60, 61, 62 or 63 for 58 | 1-4 bytes, respectively. The literal itself follows after the 59 | length. 60 | 61 | 62 | 2.2. Copies 63 | 64 | Copies are references back into previous decompressed data, telling 65 | the decompressor to reuse data it has previously decoded. 66 | They encode two values: The _offset_, saying how many bytes back 67 | from the current position to read, and the _length_, how many bytes 68 | to copy. Offsets of zero can be encoded, but are not legal; 69 | similarly, it is possible to encode backreferences that would 70 | go past the end of the block (offset > current decompressed position), 71 | which is also nonsensical and thus not allowed. 72 | 73 | As in most LZ77-based compressors, the length can be larger than the offset, 74 | yielding a form of run-length encoding (RLE). For instance, 75 | "xababab" could be encoded as 76 | 77 | 78 | 79 | Note that since the current Snappy compressor works in 32 kB 80 | blocks and does not do matching across blocks, it will never produce 81 | a bitstream with offsets larger than about 32768. However, the 82 | decompressor should not rely on this, as it may change in the future. 83 | 84 | There are several different kinds of copy elements, depending on 85 | the amount of bytes to be copied (length), and how far back the 86 | data to be copied is (offset). 87 | 88 | 89 | 2.2.1. Copy with 1-byte offset (01) 90 | 91 | These elements can encode lengths between [4..11] bytes and offsets 92 | between [0..2047] bytes. (len-4) occupies three bits and is stored 93 | in bits [2..4] of the tag byte. The offset occupies 11 bits, of which the 94 | upper three are stored in the upper three bits ([5..7]) of the tag byte, 95 | and the lower eight are stored in a byte following the tag byte. 96 | 97 | 98 | 2.2.2. Copy with 2-byte offset (10) 99 | 100 | These elements can encode lengths between [1..64] and offsets from 101 | [0..65535]. (len-1) occupies six bits and is stored in the upper 102 | six bits ([2..7]) of the tag byte. The offset is stored as a 103 | little-endian 16-bit integer in the two bytes following the tag byte. 104 | 105 | 106 | 2.2.3. Copy with 4-byte offset (11) 107 | 108 | These are like the copies with 2-byte offsets (see previous subsection), 109 | except that the offset is stored as a 32-bit integer instead of a 110 | 16-bit integer (and thus will occupy four bytes). 111 | -------------------------------------------------------------------------------- /snappy/snappy.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 65 | 68 | 71 | 74 | 77 | 80 | 81 | 89 | 92 | 95 | 98 | 101 | 104 | 116 | 119 | 122 | 125 | 128 | 131 | 134 | 137 | 140 | 143 | 144 | 145 | 146 | 147 | 148 | 153 | 156 | 157 | 160 | 161 | 164 | 165 | 168 | 169 | 170 | 175 | 178 | 179 | 182 | 183 | 186 | 187 | 190 | 191 | 194 | 195 | 198 | 199 | 200 | 205 | 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /snappy/snappy-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 70 | 73 | 76 | 79 | 82 | 85 | 88 | 91 | 92 | 100 | 103 | 106 | 109 | 112 | 115 | 127 | 130 | 133 | 136 | 146 | 149 | 152 | 155 | 158 | 161 | 164 | 167 | 168 | 169 | 170 | 171 | 172 | 177 | 180 | 181 | 184 | 185 | 186 | 191 | 194 | 195 | 196 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /snappy/framing_format.txt: -------------------------------------------------------------------------------- 1 | Snappy framing format description 2 | Last revised: 2013-10-25 3 | 4 | This format decribes a framing format for Snappy, allowing compressing to 5 | files or streams that can then more easily be decompressed without having 6 | to hold the entire stream in memory. It also provides data checksums to 7 | help verify integrity. It does not provide metadata checksums, so it does 8 | not protect against e.g. all forms of truncations. 9 | 10 | Implementation of the framing format is optional for Snappy compressors and 11 | decompressor; it is not part of the Snappy core specification. 12 | 13 | 14 | 1. General structure 15 | 16 | The file consists solely of chunks, lying back-to-back with no padding 17 | in between. Each chunk consists first a single byte of chunk identifier, 18 | then a three-byte little-endian length of the chunk in bytes (from 0 to 19 | 16777215, inclusive), and then the data if any. The four bytes of chunk 20 | header is not counted in the data length. 21 | 22 | The different chunk types are listed below. The first chunk must always 23 | be the stream identifier chunk (see section 4.1, below). The stream 24 | ends when the file ends -- there is no explicit end-of-file marker. 25 | 26 | 27 | 2. File type identification 28 | 29 | The following identifiers for this format are recommended where appropriate. 30 | However, note that none have been registered officially, so this is only to 31 | be taken as a guideline. We use "Snappy framed" to distinguish between this 32 | format and raw Snappy data. 33 | 34 | File extension: .sz 35 | MIME type: application/x-snappy-framed 36 | HTTP Content-Encoding: x-snappy-framed 37 | 38 | 39 | 3. Checksum format 40 | 41 | Some chunks have data protected by a checksum (the ones that do will say so 42 | explicitly). The checksums are always masked CRC-32Cs. 43 | 44 | A description of CRC-32C can be found in RFC 3720, section 12.1, with 45 | examples in section B.4. 46 | 47 | Checksums are not stored directly, but masked, as checksumming data and 48 | then its own checksum can be problematic. The masking is the same as used 49 | in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant 50 | 0xa282ead8 (using wraparound as normal for unsigned integers). This is 51 | equivalent to the following C code: 52 | 53 | uint32_t mask_checksum(uint32_t x) { 54 | return ((x >> 15) | (x << 17)) + 0xa282ead8; 55 | } 56 | 57 | Note that the masking is reversible. 58 | 59 | The checksum is always stored as a four bytes long integer, in little-endian. 60 | 61 | 62 | 4. Chunk types 63 | 64 | The currently supported chunk types are described below. The list may 65 | be extended in the future. 66 | 67 | 68 | 4.1. Stream identifier (chunk type 0xff) 69 | 70 | The stream identifier is always the first element in the stream. 71 | It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that 72 | a valid Snappy framed stream always starts with the bytes 73 | 74 | 0xff 0x06 0x00 0x00 0x73 0x4e 0x61 0x50 0x70 0x59 75 | 76 | The stream identifier chunk can come multiple times in the stream besides 77 | the first; if such a chunk shows up, it should simply be ignored, assuming 78 | it has the right length and contents. This allows for easy concatenation of 79 | compressed files without the need for re-framing. 80 | 81 | 82 | 4.2. Compressed data (chunk type 0x00) 83 | 84 | Compressed data chunks contain a normal Snappy compressed bitstream; 85 | see the compressed format specification. The compressed data is preceded by 86 | the CRC-32C (see section 3) of the _uncompressed_ data. 87 | 88 | Note that the data portion of the chunk, i.e., the compressed contents, 89 | can be at most 16777211 bytes (2^24 - 1, minus the checksum). 90 | However, we place an additional restriction that the uncompressed data 91 | in a chunk must be no longer than 65536 bytes. This allows consumers to 92 | easily use small fixed-size buffers. 93 | 94 | 95 | 4.3. Uncompressed data (chunk type 0x01) 96 | 97 | Uncompressed data chunks allow a compressor to send uncompressed, 98 | raw data; this is useful if, for instance, uncompressible or 99 | near-incompressible data is detected, and faster decompression is desired. 100 | 101 | As in the compressed chunks, the data is preceded by its own masked 102 | CRC-32C (see section 3). 103 | 104 | An uncompressed data chunk, like compressed data chunks, should contain 105 | no more than 65536 data bytes, so the maximum legal chunk length with the 106 | checksum is 65540. 107 | 108 | 109 | 4.4. Padding (chunk type 0xfe) 110 | 111 | Padding chunks allow a compressor to increase the size of the data stream 112 | so that it complies with external demands, e.g. that the total number of 113 | bytes is a multiple of some value. 114 | 115 | All bytes of the padding chunk, except the chunk byte itself and the length, 116 | should be zero, but decompressors must not try to interpret or verify the 117 | padding data in any way. 118 | 119 | 120 | 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f) 121 | 122 | These are reserved for future expansion. A decoder that sees such a chunk 123 | should immediately return an error, as it must assume it cannot decode the 124 | stream correctly. 125 | 126 | Future versions of this specification may define meanings for these chunks. 127 | 128 | 129 | 4.6. Reserved skippable chunks (chunk types 0x80-0xfd) 130 | 131 | These are also reserved for future expansion, but unlike the chunks 132 | described in 4.5, a decoder seeing these must skip them and continue 133 | decoding. 134 | 135 | Future versions of this specification may define meanings for these chunks. 136 | -------------------------------------------------------------------------------- /snappy/snappy-c.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Martin Gieseking . 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * Plain C interface (a wrapper around the C++ implementation). 31 | */ 32 | 33 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ 34 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | #include 41 | 42 | /* 43 | * Return values; see the documentation for each function to know 44 | * what each can return. 45 | */ 46 | typedef enum { 47 | SNAPPY_OK = 0, 48 | SNAPPY_INVALID_INPUT = 1, 49 | SNAPPY_BUFFER_TOO_SMALL = 2 50 | } snappy_status; 51 | 52 | /* 53 | * Takes the data stored in "input[0..input_length-1]" and stores 54 | * it in the array pointed to by "compressed". 55 | * 56 | * signals the space available in "compressed". 57 | * If it is not at least equal to "snappy_max_compressed_length(input_length)", 58 | * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, 59 | * contains the true length of the compressed output, 60 | * and SNAPPY_OK is returned. 61 | * 62 | * Example: 63 | * size_t output_length = snappy_max_compressed_length(input_length); 64 | * char* output = (char*)malloc(output_length); 65 | * if (snappy_compress(input, input_length, output, &output_length) 66 | * == SNAPPY_OK) { 67 | * ... Process(output, output_length) ... 68 | * } 69 | * free(output); 70 | */ 71 | snappy_status snappy_compress(const char* input, 72 | size_t input_length, 73 | char* compressed, 74 | size_t* compressed_length); 75 | 76 | /* 77 | * Given data in "compressed[0..compressed_length-1]" generated by 78 | * calling the snappy_compress routine, this routine stores 79 | * the uncompressed data to 80 | * uncompressed[0..uncompressed_length-1]. 81 | * Returns failure (a value not equal to SNAPPY_OK) if the message 82 | * is corrupted and could not be decrypted. 83 | * 84 | * signals the space available in "uncompressed". 85 | * If it is not at least equal to the value returned by 86 | * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL 87 | * is returned. After successful decompression, 88 | * contains the true length of the decompressed output. 89 | * 90 | * Example: 91 | * size_t output_length; 92 | * if (snappy_uncompressed_length(input, input_length, &output_length) 93 | * != SNAPPY_OK) { 94 | * ... fail ... 95 | * } 96 | * char* output = (char*)malloc(output_length); 97 | * if (snappy_uncompress(input, input_length, output, &output_length) 98 | * == SNAPPY_OK) { 99 | * ... Process(output, output_length) ... 100 | * } 101 | * free(output); 102 | */ 103 | snappy_status snappy_uncompress(const char* compressed, 104 | size_t compressed_length, 105 | char* uncompressed, 106 | size_t* uncompressed_length); 107 | 108 | /* 109 | * Returns the maximal size of the compressed representation of 110 | * input data that is "source_length" bytes in length. 111 | */ 112 | size_t snappy_max_compressed_length(size_t source_length); 113 | 114 | /* 115 | * REQUIRES: "compressed[]" was produced by snappy_compress() 116 | * Returns SNAPPY_OK and stores the length of the uncompressed data in 117 | * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. 118 | * This operation takes O(1) time. 119 | */ 120 | snappy_status snappy_uncompressed_length(const char* compressed, 121 | size_t compressed_length, 122 | size_t* result); 123 | 124 | /* 125 | * Check if the contents of "compressed[]" can be uncompressed successfully. 126 | * Does not return the uncompressed data; if so, returns SNAPPY_OK, 127 | * or if not, returns SNAPPY_INVALID_INPUT. 128 | * Takes time proportional to compressed_length, but is usually at least a 129 | * factor of four faster than actual decompression. 130 | */ 131 | snappy_status snappy_validate_compressed_buffer(const char* compressed, 132 | size_t compressed_length); 133 | 134 | #ifdef __cplusplus 135 | } // extern "C" 136 | #endif 137 | 138 | #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */ 139 | -------------------------------------------------------------------------------- /snappy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(Snappy VERSION 1.1.7 LANGUAGES C CXX) 3 | 4 | # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make 5 | # it prominent in the GUI. 6 | option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) 7 | 8 | option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) 9 | 10 | include(TestBigEndian) 11 | test_big_endian(SNAPPY_IS_BIG_ENDIAN) 12 | 13 | include(CheckIncludeFile) 14 | check_include_file("byteswap.h" HAVE_BYTESWAP_H) 15 | check_include_file("stddef.h" HAVE_STDDEF_H) 16 | check_include_file("stdint.h" HAVE_STDINT_H) 17 | check_include_file("sys/endian.h" HAVE_SYS_ENDIAN_H) 18 | check_include_file("sys/mman.h" HAVE_SYS_MMAN_H) 19 | check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) 20 | check_include_file("sys/time.h" HAVE_SYS_TIME_H) 21 | check_include_file("sys/uio.h" HAVE_SYS_UIO_H) 22 | check_include_file("unistd.h" HAVE_UNISTD_H) 23 | check_include_file("windows.h" HAVE_WINDOWS_H) 24 | 25 | include(CheckLibraryExists) 26 | check_library_exists(z zlibVersion "" HAVE_LIBZ) 27 | check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) 28 | 29 | include(CheckCXXSourceCompiles) 30 | check_cxx_source_compiles( 31 | "int main(void) { return __builtin_expect(0, 1); }" HAVE_BUILTIN_EXPECT) 32 | 33 | check_cxx_source_compiles( 34 | "int main(void) { return __builtin_ctzll(0); }" HAVE_BUILTIN_CTZ) 35 | 36 | include(CheckSymbolExists) 37 | check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) 38 | check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) 39 | 40 | find_package(GTest QUIET) 41 | if(GTEST_FOUND) 42 | set(HAVE_GTEST 1) 43 | endif(GTEST_FOUND) 44 | 45 | find_package(Gflags QUIET) 46 | if(GFLAGS_FOUND) 47 | set(HAVE_GFLAGS 1) 48 | endif(GFLAGS_FOUND) 49 | 50 | configure_file( 51 | "${PROJECT_SOURCE_DIR}/cmake/config.h.in" 52 | "${PROJECT_BINARY_DIR}/config.h" 53 | ) 54 | 55 | # We don't want to define HAVE_ macros in public headers. Instead, we use 56 | # CMake's variable substitution with 0/1 variables, which will be seen by the 57 | # preprocessor as constants. 58 | set(HAVE_STDINT_H_01 ${HAVE_STDINT_H}) 59 | set(HAVE_STDDEF_H_01 ${HAVE_STDDEF_H}) 60 | set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H}) 61 | if(NOT HAVE_STDINT_H_01) 62 | set(HAVE_STDINT_H_01 0) 63 | endif(NOT HAVE_STDINT_H_01) 64 | if(NOT HAVE_STDDEF_H_01) 65 | set(HAVE_STDDEF_H_01 0) 66 | endif(NOT HAVE_STDDEF_H_01) 67 | if(NOT HAVE_SYS_UIO_H_01) 68 | set(HAVE_SYS_UIO_H_01 0) 69 | endif(NOT HAVE_SYS_UIO_H_01) 70 | 71 | configure_file( 72 | "${PROJECT_SOURCE_DIR}/snappy-stubs-public.h.in" 73 | "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") 74 | 75 | add_library(snappy "") 76 | target_sources(snappy 77 | PRIVATE 78 | "${PROJECT_SOURCE_DIR}/snappy-internal.h" 79 | "${PROJECT_SOURCE_DIR}/snappy-stubs-internal.h" 80 | "${PROJECT_SOURCE_DIR}/snappy-c.cc" 81 | "${PROJECT_SOURCE_DIR}/snappy-sinksource.cc" 82 | "${PROJECT_SOURCE_DIR}/snappy-stubs-internal.cc" 83 | "${PROJECT_SOURCE_DIR}/snappy.cc" 84 | "${PROJECT_BINARY_DIR}/config.h" 85 | 86 | # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". 87 | $<$:PUBLIC> 88 | $ 89 | $ 90 | $ 91 | $ 92 | $ 93 | $ 94 | $ 95 | $ 96 | ) 97 | target_include_directories(snappy 98 | PUBLIC 99 | $ 100 | $ 101 | $ 102 | ) 103 | set_target_properties(snappy 104 | PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) 105 | 106 | target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) 107 | if(BUILD_SHARED_LIBS) 108 | set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) 109 | endif(BUILD_SHARED_LIBS) 110 | 111 | if(SNAPPY_BUILD_TESTS) 112 | enable_testing() 113 | 114 | add_executable(snappy_unittest "") 115 | target_sources(snappy_unittest 116 | PRIVATE 117 | "${PROJECT_SOURCE_DIR}/snappy_unittest.cc" 118 | "${PROJECT_SOURCE_DIR}/snappy-test.cc" 119 | ) 120 | target_compile_definitions(snappy_unittest PRIVATE -DHAVE_CONFIG_H) 121 | target_link_libraries(snappy_unittest snappy ${GFLAGS_LIBRARIES}) 122 | 123 | if(HAVE_LIBZ) 124 | target_link_libraries(snappy_unittest z) 125 | endif(HAVE_LIBZ) 126 | if(HAVE_LIBLZO2) 127 | target_link_libraries(snappy_unittest lzo2) 128 | endif(HAVE_LIBLZO2) 129 | 130 | target_include_directories(snappy_unittest 131 | BEFORE PRIVATE 132 | "${PROJECT_SOURCE_DIR}" 133 | "${GTEST_INCLUDE_DIRS}" 134 | "${GFLAGS_INCLUDE_DIRS}" 135 | ) 136 | 137 | add_test( 138 | NAME snappy_unittest 139 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 140 | COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest") 141 | endif(SNAPPY_BUILD_TESTS) 142 | 143 | include(GNUInstallDirs) 144 | install(TARGETS snappy 145 | EXPORT SnappyTargets 146 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 147 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 148 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 149 | ) 150 | install( 151 | FILES 152 | "${PROJECT_SOURCE_DIR}/snappy-c.h" 153 | "${PROJECT_SOURCE_DIR}/snappy-sinksource.h" 154 | "${PROJECT_SOURCE_DIR}/snappy.h" 155 | "${PROJECT_BINARY_DIR}/snappy-stubs-public.h" 156 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 157 | ) 158 | 159 | include(CMakePackageConfigHelpers) 160 | write_basic_package_version_file( 161 | "${PROJECT_BINARY_DIR}/SnappyConfigVersion.cmake" 162 | COMPATIBILITY SameMajorVersion 163 | ) 164 | install( 165 | EXPORT SnappyTargets 166 | NAMESPACE Snappy:: 167 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/Snappy" 168 | ) 169 | install( 170 | FILES 171 | "${PROJECT_SOURCE_DIR}/cmake/SnappyConfig.cmake" 172 | "${PROJECT_BINARY_DIR}/SnappyConfigVersion.cmake" 173 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/Snappy" 174 | ) 175 | -------------------------------------------------------------------------------- /snappy/NEWS: -------------------------------------------------------------------------------- 1 | Snappy v1.1.7, August 24th 2017: 2 | 3 | * Improved CMake build support for 64-bit Linux distributions. 4 | 5 | * MSVC builds now use MSVC-specific intrinsics that map to clzll. 6 | 7 | * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors. 8 | 9 | Snappy v1.1.6, July 12th 2017: 10 | 11 | This is a re-release of v1.1.5 with proper SONAME / SOVERSION values. 12 | 13 | Snappy v1.1.5, June 28th 2017: 14 | 15 | This release has broken SONAME / SOVERSION values. Users of snappy as a shared 16 | library should avoid 1.1.5 and use 1.1.6 instead. SONAME / SOVERSION errors will 17 | manifest as the dynamic library loader complaining that it cannot find snappy's 18 | shared library file (libsnappy.so / libsnappy.dylib), or that the library it 19 | found does not have the required version. 1.1.6 has the same code as 1.1.5, but 20 | carries build configuration fixes for the issues above. 21 | 22 | * Add CMake build support. The autoconf build support is now deprecated, and 23 | will be removed in the next release. 24 | 25 | * Add AppVeyor configuration, for Windows CI coverage. 26 | 27 | * Small performance improvement on little-endian PowerPC. 28 | 29 | * Small performance improvement on LLVM with position-independent executables. 30 | 31 | * Fix a few issues with various build environments. 32 | 33 | Snappy v1.1.4, January 25th 2017: 34 | 35 | * Fix a 1% performance regression when snappy is used in PIE executables. 36 | 37 | * Improve compression performance by 5%. 38 | 39 | * Improve decompression performance by 20%. 40 | 41 | Snappy v1.1.3, July 6th 2015: 42 | 43 | This is the first release to be done from GitHub, which means that 44 | some minor things like the ChangeLog format has changed (git log 45 | format instead of svn log). 46 | 47 | * Add support for Uncompress() from a Source to a Sink. 48 | 49 | * Various minor changes to improve MSVC support; in particular, 50 | the unit tests now compile and run under MSVC. 51 | 52 | 53 | Snappy v1.1.2, February 28th 2014: 54 | 55 | This is a maintenance release with no changes to the actual library 56 | source code. 57 | 58 | * Stop distributing benchmark data files that have unclear 59 | or unsuitable licensing. 60 | 61 | * Add support for padding chunks in the framing format. 62 | 63 | 64 | Snappy v1.1.1, October 15th 2013: 65 | 66 | * Add support for uncompressing to iovecs (scatter I/O). 67 | The bulk of this patch was contributed by Mohit Aron. 68 | 69 | * Speed up decompression by ~2%; much more so (~13-20%) on 70 | a few benchmarks on given compilers and CPUs. 71 | 72 | * Fix a few issues with MSVC compilation. 73 | 74 | * Support truncated test data in the benchmark. 75 | 76 | 77 | Snappy v1.1.0, January 18th 2013: 78 | 79 | * Snappy now uses 64 kB block size instead of 32 kB. On average, 80 | this means it compresses about 3% denser (more so for some 81 | inputs), at the same or better speeds. 82 | 83 | * libsnappy no longer depends on iostream. 84 | 85 | * Some small performance improvements in compression on x86 86 | (0.5–1%). 87 | 88 | * Various portability fixes for ARM-based platforms, for MSVC, 89 | and for GNU/Hurd. 90 | 91 | 92 | Snappy v1.0.5, February 24th 2012: 93 | 94 | * More speed improvements. Exactly how big will depend on 95 | the architecture: 96 | 97 | - 3–10% faster decompression for the base case (x86-64). 98 | 99 | - ARMv7 and higher can now use unaligned accesses, 100 | and will see about 30% faster decompression and 101 | 20–40% faster compression. 102 | 103 | - 32-bit platforms (ARM and 32-bit x86) will see 2–5% 104 | faster compression. 105 | 106 | These are all cumulative (e.g., ARM gets all three speedups). 107 | 108 | * Fixed an issue where the unit test would crash on system 109 | with less than 256 MB address space available, 110 | e.g. some embedded platforms. 111 | 112 | * Added a framing format description, for use over e.g. HTTP, 113 | or for a command-line compressor. We do not have any 114 | implementations of this at the current point, but there seems 115 | to be enough of a general interest in the topic. 116 | Also make the format description slightly clearer. 117 | 118 | * Remove some compile-time warnings in -Wall 119 | (mostly signed/unsigned comparisons), for easier embedding 120 | into projects that use -Wall -Werror. 121 | 122 | 123 | Snappy v1.0.4, September 15th 2011: 124 | 125 | * Speeded up the decompressor somewhat; typically about 2–8% 126 | for Core i7, in 64-bit mode (comparable for Opteron). 127 | Somewhat more for some tests, almost no gain for others. 128 | 129 | * Make Snappy compile on certain platforms it didn't before 130 | (Solaris with SunPro C++, HP-UX, AIX). 131 | 132 | * Correct some minor errors in the format description. 133 | 134 | 135 | Snappy v1.0.3, June 2nd 2011: 136 | 137 | * Speeded up the decompressor somewhat; about 3-6% for Core 2, 138 | 6-13% for Core i7, and 5-12% for Opteron (all in 64-bit mode). 139 | 140 | * Added compressed format documentation. This text is new, 141 | but an earlier version from Zeev Tarantov was used as reference. 142 | 143 | * Only link snappy_unittest against -lz and other autodetected 144 | libraries, not libsnappy.so (which doesn't need any such dependency). 145 | 146 | * Fixed some display issues in the microbenchmarks, one of which would 147 | frequently make the test crash on GNU/Hurd. 148 | 149 | 150 | Snappy v1.0.2, April 29th 2011: 151 | 152 | * Relicense to a BSD-type license. 153 | 154 | * Added C bindings, contributed by Martin Gieseking. 155 | 156 | * More Win32 fixes, in particular for MSVC. 157 | 158 | * Replace geo.protodata with a newer version. 159 | 160 | * Fix timing inaccuracies in the unit test when comparing Snappy 161 | to other algorithms. 162 | 163 | 164 | Snappy v1.0.1, March 25th 2011: 165 | 166 | This is a maintenance release, mostly containing minor fixes. 167 | There is no new functionality. The most important fixes include: 168 | 169 | * The COPYING file and all licensing headers now correctly state that 170 | Snappy is licensed under the Apache 2.0 license. 171 | 172 | * snappy_unittest should now compile natively under Windows, 173 | as well as on embedded systems with no mmap(). 174 | 175 | * Various autotools nits have been fixed. 176 | 177 | 178 | Snappy v1.0, March 17th 2011: 179 | 180 | * Initial version. 181 | -------------------------------------------------------------------------------- /snappy/README.md: -------------------------------------------------------------------------------- 1 | Snappy, a fast compressor/decompressor. 2 | 3 | 4 | Introduction 5 | ============ 6 | 7 | Snappy is a compression/decompression library. It does not aim for maximum 8 | compression, or compatibility with any other compression library; instead, 9 | it aims for very high speeds and reasonable compression. For instance, 10 | compared to the fastest mode of zlib, Snappy is an order of magnitude faster 11 | for most inputs, but the resulting compressed files are anywhere from 20% to 12 | 100% bigger. (For more information, see "Performance", below.) 13 | 14 | Snappy has the following properties: 15 | 16 | * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code. 17 | See "Performance" below. 18 | * Stable: Over the last few years, Snappy has compressed and decompressed 19 | petabytes of data in Google's production environment. The Snappy bitstream 20 | format is stable and will not change between versions. 21 | * Robust: The Snappy decompressor is designed not to crash in the face of 22 | corrupted or malicious input. 23 | * Free and open source software: Snappy is licensed under a BSD-type license. 24 | For more information, see the included COPYING file. 25 | 26 | Snappy has previously been called "Zippy" in some Google presentations 27 | and the like. 28 | 29 | 30 | Performance 31 | =========== 32 | 33 | Snappy is intended to be fast. On a single core of a Core i7 processor 34 | in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at 35 | about 500 MB/sec or more. (These numbers are for the slowest inputs in our 36 | benchmark suite; others are much faster.) In our tests, Snappy usually 37 | is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ, 38 | etc.) while achieving comparable compression ratios. 39 | 40 | Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x 41 | for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and 42 | other already-compressed data. Similar numbers for zlib in its fastest mode 43 | are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are 44 | capable of achieving yet higher compression rates, although usually at the 45 | expense of speed. Of course, compression ratio will vary significantly with 46 | the input. 47 | 48 | Although Snappy should be fairly portable, it is primarily optimized 49 | for 64-bit x86-compatible processors, and may run slower in other environments. 50 | In particular: 51 | 52 | - Snappy uses 64-bit operations in several places to process more data at 53 | once than would otherwise be possible. 54 | - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap. 55 | On some platforms, these must be emulated with single-byte loads 56 | and stores, which is much slower. 57 | - Snappy assumes little-endian throughout, and needs to byte-swap data in 58 | several places if running on a big-endian platform. 59 | 60 | Experience has shown that even heavily tuned code can be improved. 61 | Performance optimizations, whether for 64-bit x86 or other platforms, 62 | are of course most welcome; see "Contact", below. 63 | 64 | 65 | Building 66 | ======== 67 | 68 | CMake is supported and autotools will soon be deprecated. 69 | You need CMake 3.4 or above to build: 70 | 71 | mkdir build 72 | cd build && cmake ../ && make 73 | 74 | 75 | Usage 76 | ===== 77 | 78 | Note that Snappy, both the implementation and the main interface, 79 | is written in C++. However, several third-party bindings to other languages 80 | are available; see the home page at http://google.github.io/snappy/ 81 | for more information. Also, if you want to use Snappy from C code, you can 82 | use the included C bindings in snappy-c.h. 83 | 84 | To use Snappy from your own C++ program, include the file "snappy.h" from 85 | your calling file, and link against the compiled library. 86 | 87 | There are many ways to call Snappy, but the simplest possible is 88 | 89 | snappy::Compress(input.data(), input.size(), &output); 90 | 91 | and similarly 92 | 93 | snappy::Uncompress(input.data(), input.size(), &output); 94 | 95 | where "input" and "output" are both instances of std::string. 96 | 97 | There are other interfaces that are more flexible in various ways, including 98 | support for custom (non-array) input sources. See the header file for more 99 | information. 100 | 101 | 102 | Tests and benchmarks 103 | ==================== 104 | 105 | When you compile Snappy, snappy_unittest is compiled in addition to the 106 | library itself. You do not need it to use the compressor from your own library, 107 | but it contains several useful components for Snappy development. 108 | 109 | First of all, it contains unit tests, verifying correctness on your machine in 110 | various scenarios. If you want to change or optimize Snappy, please run the 111 | tests to verify you have not broken anything. Note that if you have the 112 | Google Test library installed, unit test behavior (especially failures) will be 113 | significantly more user-friendly. You can find Google Test at 114 | 115 | http://github.com/google/googletest 116 | 117 | You probably also want the gflags library for handling of command-line flags; 118 | you can find it at 119 | 120 | http://gflags.github.io/gflags/ 121 | 122 | In addition to the unit tests, snappy contains microbenchmarks used to 123 | tune compression and decompression performance. These are automatically run 124 | before the unit tests, but you can disable them using the flag 125 | --run_microbenchmarks=false if you have gflags installed (otherwise you will 126 | need to edit the source). 127 | 128 | Finally, snappy can benchmark Snappy against a few other compression libraries 129 | (zlib, LZO, LZF, and QuickLZ), if they were detected at configure time. 130 | To benchmark using a given file, give the compression algorithm you want to test 131 | Snappy against (e.g. --zlib) and then a list of one or more file names on the 132 | command line. The testdata/ directory contains the files used by the 133 | microbenchmark, which should provide a reasonably balanced starting point for 134 | benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they 135 | are used to verify correctness in the presence of corrupted data in the unit 136 | test.) 137 | 138 | 139 | Contact 140 | ======= 141 | 142 | Snappy is distributed through GitHub. For the latest version, a bug tracker, 143 | and other information, see 144 | 145 | http://google.github.io/snappy/ 146 | 147 | or the repository at 148 | 149 | https://github.com/google/snappy 150 | -------------------------------------------------------------------------------- /snappy/snappy-sinksource.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 30 | #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 31 | 32 | #include 33 | 34 | namespace snappy { 35 | 36 | // A Sink is an interface that consumes a sequence of bytes. 37 | class Sink { 38 | public: 39 | Sink() { } 40 | virtual ~Sink(); 41 | 42 | // Append "bytes[0,n-1]" to this. 43 | virtual void Append(const char* bytes, size_t n) = 0; 44 | 45 | // Returns a writable buffer of the specified length for appending. 46 | // May return a pointer to the caller-owned scratch buffer which 47 | // must have at least the indicated length. The returned buffer is 48 | // only valid until the next operation on this Sink. 49 | // 50 | // After writing at most "length" bytes, call Append() with the 51 | // pointer returned from this function and the number of bytes 52 | // written. Many Append() implementations will avoid copying 53 | // bytes if this function returned an internal buffer. 54 | // 55 | // If a non-scratch buffer is returned, the caller may only pass a 56 | // prefix of it to Append(). That is, it is not correct to pass an 57 | // interior pointer of the returned array to Append(). 58 | // 59 | // The default implementation always returns the scratch buffer. 60 | virtual char* GetAppendBuffer(size_t length, char* scratch); 61 | 62 | // For higher performance, Sink implementations can provide custom 63 | // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. 64 | // These methods can reduce the number of copies done during 65 | // compression/decompression. 66 | 67 | // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" 68 | // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) 69 | // to free the buffer. deleter function must be non NULL. 70 | // 71 | // The default implementation just calls Append and frees "bytes". 72 | // Other implementations may avoid a copy while appending the buffer. 73 | virtual void AppendAndTakeOwnership( 74 | char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), 75 | void *deleter_arg); 76 | 77 | // Returns a writable buffer for appending and writes the buffer's capacity to 78 | // *allocated_size. Guarantees *allocated_size >= min_size. 79 | // May return a pointer to the caller-owned scratch buffer which must have 80 | // scratch_size >= min_size. 81 | // 82 | // The returned buffer is only valid until the next operation 83 | // on this ByteSink. 84 | // 85 | // After writing at most *allocated_size bytes, call Append() with the 86 | // pointer returned from this function and the number of bytes written. 87 | // Many Append() implementations will avoid copying bytes if this function 88 | // returned an internal buffer. 89 | // 90 | // If the sink implementation allocates or reallocates an internal buffer, 91 | // it should use the desired_size_hint if appropriate. If a caller cannot 92 | // provide a reasonable guess at the desired capacity, it should set 93 | // desired_size_hint = 0. 94 | // 95 | // If a non-scratch buffer is returned, the caller may only pass 96 | // a prefix to it to Append(). That is, it is not correct to pass an 97 | // interior pointer to Append(). 98 | // 99 | // The default implementation always returns the scratch buffer. 100 | virtual char* GetAppendBufferVariable( 101 | size_t min_size, size_t desired_size_hint, char* scratch, 102 | size_t scratch_size, size_t* allocated_size); 103 | 104 | private: 105 | // No copying 106 | Sink(const Sink&); 107 | void operator=(const Sink&); 108 | }; 109 | 110 | // A Source is an interface that yields a sequence of bytes 111 | class Source { 112 | public: 113 | Source() { } 114 | virtual ~Source(); 115 | 116 | // Return the number of bytes left to read from the source 117 | virtual size_t Available() const = 0; 118 | 119 | // Peek at the next flat region of the source. Does not reposition 120 | // the source. The returned region is empty iff Available()==0. 121 | // 122 | // Returns a pointer to the beginning of the region and store its 123 | // length in *len. 124 | // 125 | // The returned region is valid until the next call to Skip() or 126 | // until this object is destroyed, whichever occurs first. 127 | // 128 | // The returned region may be larger than Available() (for example 129 | // if this ByteSource is a view on a substring of a larger source). 130 | // The caller is responsible for ensuring that it only reads the 131 | // Available() bytes. 132 | virtual const char* Peek(size_t* len) = 0; 133 | 134 | // Skip the next n bytes. Invalidates any buffer returned by 135 | // a previous call to Peek(). 136 | // REQUIRES: Available() >= n 137 | virtual void Skip(size_t n) = 0; 138 | 139 | private: 140 | // No copying 141 | Source(const Source&); 142 | void operator=(const Source&); 143 | }; 144 | 145 | // A Source implementation that yields the contents of a flat array 146 | class ByteArraySource : public Source { 147 | public: 148 | ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } 149 | virtual ~ByteArraySource(); 150 | virtual size_t Available() const; 151 | virtual const char* Peek(size_t* len); 152 | virtual void Skip(size_t n); 153 | private: 154 | const char* ptr_; 155 | size_t left_; 156 | }; 157 | 158 | // A Sink implementation that writes to a flat array without any bound checks. 159 | class UncheckedByteArraySink : public Sink { 160 | public: 161 | explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } 162 | virtual ~UncheckedByteArraySink(); 163 | virtual void Append(const char* data, size_t n); 164 | virtual char* GetAppendBuffer(size_t len, char* scratch); 165 | virtual char* GetAppendBufferVariable( 166 | size_t min_size, size_t desired_size_hint, char* scratch, 167 | size_t scratch_size, size_t* allocated_size); 168 | virtual void AppendAndTakeOwnership( 169 | char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), 170 | void *deleter_arg); 171 | 172 | // Return the current output pointer so that a caller can see how 173 | // many bytes were produced. 174 | // Note: this is not a Sink method. 175 | char* CurrentDestination() const { return dest_; } 176 | private: 177 | char* dest_; 178 | }; 179 | 180 | } // namespace snappy 181 | 182 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 183 | -------------------------------------------------------------------------------- /snappy/snappy-internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2008 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Internals shared between the Snappy implementation and its unittest. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 32 | #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 33 | 34 | #include "snappy-stubs-internal.h" 35 | 36 | namespace snappy { 37 | namespace internal { 38 | 39 | class WorkingMemory { 40 | public: 41 | WorkingMemory() : large_table_(NULL) { } 42 | ~WorkingMemory() { delete[] large_table_; } 43 | 44 | // Allocates and clears a hash table using memory in "*this", 45 | // stores the number of buckets in "*table_size" and returns a pointer to 46 | // the base of the hash table. 47 | uint16* GetHashTable(size_t input_size, int* table_size); 48 | 49 | private: 50 | uint16 small_table_[1<<10]; // 2KB 51 | uint16* large_table_; // Allocated only when needed 52 | 53 | // No copying 54 | WorkingMemory(const WorkingMemory&); 55 | void operator=(const WorkingMemory&); 56 | }; 57 | 58 | // Flat array compression that does not emit the "uncompressed length" 59 | // prefix. Compresses "input" string to the "*op" buffer. 60 | // 61 | // REQUIRES: "input_length <= kBlockSize" 62 | // REQUIRES: "op" points to an array of memory that is at least 63 | // "MaxCompressedLength(input_length)" in size. 64 | // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 65 | // REQUIRES: "table_size" is a power of two 66 | // 67 | // Returns an "end" pointer into "op" buffer. 68 | // "end - op" is the compressed size of "input". 69 | char* CompressFragment(const char* input, 70 | size_t input_length, 71 | char* op, 72 | uint16* table, 73 | const int table_size); 74 | 75 | // Find the largest n such that 76 | // 77 | // s1[0,n-1] == s2[0,n-1] 78 | // and n <= (s2_limit - s2). 79 | // 80 | // Return make_pair(n, n < 8). 81 | // Does not read *s2_limit or beyond. 82 | // Does not read *(s1 + (s2_limit - s2)) or beyond. 83 | // Requires that s2_limit >= s2. 84 | // 85 | // Separate implementation for 64-bit, little-endian cpus. 86 | #if !defined(SNAPPY_IS_BIG_ENDIAN) && \ 87 | (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) 88 | static inline std::pair FindMatchLength(const char* s1, 89 | const char* s2, 90 | const char* s2_limit) { 91 | assert(s2_limit >= s2); 92 | size_t matched = 0; 93 | 94 | // This block isn't necessary for correctness; we could just start looping 95 | // immediately. As an optimization though, it is useful. It creates some not 96 | // uncommon code paths that determine, without extra effort, whether the match 97 | // length is less than 8. In short, we are hoping to avoid a conditional 98 | // branch, and perhaps get better code layout from the C++ compiler. 99 | if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { 100 | uint64 a1 = UNALIGNED_LOAD64(s1); 101 | uint64 a2 = UNALIGNED_LOAD64(s2); 102 | if (a1 != a2) { 103 | return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, 104 | true); 105 | } else { 106 | matched = 8; 107 | s2 += 8; 108 | } 109 | } 110 | 111 | // Find out how long the match is. We loop over the data 64 bits at a 112 | // time until we find a 64-bit block that doesn't match; then we find 113 | // the first non-matching bit and use that to calculate the total 114 | // length of the match. 115 | while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { 116 | if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { 117 | s2 += 8; 118 | matched += 8; 119 | } else { 120 | uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); 121 | int matching_bits = Bits::FindLSBSetNonZero64(x); 122 | matched += matching_bits >> 3; 123 | assert(matched >= 8); 124 | return std::pair(matched, false); 125 | } 126 | } 127 | while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { 128 | if (s1[matched] == *s2) { 129 | ++s2; 130 | ++matched; 131 | } else { 132 | return std::pair(matched, matched < 8); 133 | } 134 | } 135 | return std::pair(matched, matched < 8); 136 | } 137 | #else 138 | static inline std::pair FindMatchLength(const char* s1, 139 | const char* s2, 140 | const char* s2_limit) { 141 | // Implementation based on the x86-64 version, above. 142 | assert(s2_limit >= s2); 143 | int matched = 0; 144 | 145 | while (s2 <= s2_limit - 4 && 146 | UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { 147 | s2 += 4; 148 | matched += 4; 149 | } 150 | if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { 151 | uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); 152 | int matching_bits = Bits::FindLSBSetNonZero(x); 153 | matched += matching_bits >> 3; 154 | } else { 155 | while ((s2 < s2_limit) && (s1[matched] == *s2)) { 156 | ++s2; 157 | ++matched; 158 | } 159 | } 160 | return std::pair(matched, matched < 8); 161 | } 162 | #endif 163 | 164 | // Lookup tables for decompression code. Give --snappy_dump_decompression_table 165 | // to the unit test to recompute char_table. 166 | 167 | enum { 168 | LITERAL = 0, 169 | COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode 170 | COPY_2_BYTE_OFFSET = 2, 171 | COPY_4_BYTE_OFFSET = 3 172 | }; 173 | static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. 174 | 175 | // Data stored per entry in lookup table: 176 | // Range Bits-used Description 177 | // ------------------------------------ 178 | // 1..64 0..7 Literal/copy length encoded in opcode byte 179 | // 0..7 8..10 Copy offset encoded in opcode byte / 256 180 | // 0..4 11..13 Extra bytes after opcode 181 | // 182 | // We use eight bits for the length even though 7 would have sufficed 183 | // because of efficiency reasons: 184 | // (1) Extracting a byte is faster than a bit-field 185 | // (2) It properly aligns copy offset so we do not need a <<8 186 | static const uint16 char_table[256] = { 187 | 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 188 | 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 189 | 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 190 | 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 191 | 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 192 | 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 193 | 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 194 | 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 195 | 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 196 | 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 197 | 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 198 | 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 199 | 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 200 | 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 201 | 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 202 | 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 203 | 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 204 | 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 205 | 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 206 | 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 207 | 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 208 | 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 209 | 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 210 | 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 211 | 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 212 | 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 213 | 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 214 | 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 215 | 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 216 | 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 217 | 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 218 | 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 219 | }; 220 | 221 | } // end namespace internal 222 | } // end namespace snappy 223 | 224 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 225 | -------------------------------------------------------------------------------- /snappy/snappy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2005 and onwards Google Inc. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // A light-weight compression algorithm. It is designed for speed of 30 | // compression and decompression, rather than for the utmost in space 31 | // savings. 32 | // 33 | // For getting better compression ratios when you are compressing data 34 | // with long repeated sequences or compressing data that is similar to 35 | // other data, while still compressing fast, you might look at first 36 | // using BMDiff and then compressing the output of BMDiff with 37 | // Snappy. 38 | 39 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ 40 | #define THIRD_PARTY_SNAPPY_SNAPPY_H__ 41 | 42 | #include 43 | #include 44 | 45 | #include "snappy-stubs-public.h" 46 | 47 | namespace snappy { 48 | class Source; 49 | class Sink; 50 | 51 | // ------------------------------------------------------------------------ 52 | // Generic compression/decompression routines. 53 | // ------------------------------------------------------------------------ 54 | 55 | // Compress the bytes read from "*source" and append to "*sink". Return the 56 | // number of bytes written. 57 | size_t Compress(Source* source, Sink* sink); 58 | 59 | // Find the uncompressed length of the given stream, as given by the header. 60 | // Note that the true length could deviate from this; the stream could e.g. 61 | // be truncated. 62 | // 63 | // Also note that this leaves "*source" in a state that is unsuitable for 64 | // further operations, such as RawUncompress(). You will need to rewind 65 | // or recreate the source yourself before attempting any further calls. 66 | bool GetUncompressedLength(Source* source, uint32* result); 67 | 68 | // ------------------------------------------------------------------------ 69 | // Higher-level string based routines (should be sufficient for most users) 70 | // ------------------------------------------------------------------------ 71 | 72 | // Sets "*output" to the compressed version of "input[0,input_length-1]". 73 | // Original contents of *output are lost. 74 | // 75 | // REQUIRES: "input[]" is not an alias of "*output". 76 | size_t Compress(const char* input, size_t input_length, string* output); 77 | 78 | // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". 79 | // Original contents of "*uncompressed" are lost. 80 | // 81 | // REQUIRES: "compressed[]" is not an alias of "*uncompressed". 82 | // 83 | // returns false if the message is corrupted and could not be decompressed 84 | bool Uncompress(const char* compressed, size_t compressed_length, 85 | string* uncompressed); 86 | 87 | // Decompresses "compressed" to "*uncompressed". 88 | // 89 | // returns false if the message is corrupted and could not be decompressed 90 | bool Uncompress(Source* compressed, Sink* uncompressed); 91 | 92 | // This routine uncompresses as much of the "compressed" as possible 93 | // into sink. It returns the number of valid bytes added to sink 94 | // (extra invalid bytes may have been added due to errors; the caller 95 | // should ignore those). The emitted data typically has length 96 | // GetUncompressedLength(), but may be shorter if an error is 97 | // encountered. 98 | size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); 99 | 100 | // ------------------------------------------------------------------------ 101 | // Lower-level character array based routines. May be useful for 102 | // efficiency reasons in certain circumstances. 103 | // ------------------------------------------------------------------------ 104 | 105 | // REQUIRES: "compressed" must point to an area of memory that is at 106 | // least "MaxCompressedLength(input_length)" bytes in length. 107 | // 108 | // Takes the data stored in "input[0..input_length]" and stores 109 | // it in the array pointed to by "compressed". 110 | // 111 | // "*compressed_length" is set to the length of the compressed output. 112 | // 113 | // Example: 114 | // char* output = new char[snappy::MaxCompressedLength(input_length)]; 115 | // size_t output_length; 116 | // RawCompress(input, input_length, output, &output_length); 117 | // ... Process(output, output_length) ... 118 | // delete [] output; 119 | void RawCompress(const char* input, 120 | size_t input_length, 121 | char* compressed, 122 | size_t* compressed_length); 123 | 124 | // Given data in "compressed[0..compressed_length-1]" generated by 125 | // calling the Snappy::Compress routine, this routine 126 | // stores the uncompressed data to 127 | // uncompressed[0..GetUncompressedLength(compressed)-1] 128 | // returns false if the message is corrupted and could not be decrypted 129 | bool RawUncompress(const char* compressed, size_t compressed_length, 130 | char* uncompressed); 131 | 132 | // Given data from the byte source 'compressed' generated by calling 133 | // the Snappy::Compress routine, this routine stores the uncompressed 134 | // data to 135 | // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] 136 | // returns false if the message is corrupted and could not be decrypted 137 | bool RawUncompress(Source* compressed, char* uncompressed); 138 | 139 | // Given data in "compressed[0..compressed_length-1]" generated by 140 | // calling the Snappy::Compress routine, this routine 141 | // stores the uncompressed data to the iovec "iov". The number of physical 142 | // buffers in "iov" is given by iov_cnt and their cumulative size 143 | // must be at least GetUncompressedLength(compressed). The individual buffers 144 | // in "iov" must not overlap with each other. 145 | // 146 | // returns false if the message is corrupted and could not be decrypted 147 | bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, 148 | const struct iovec* iov, size_t iov_cnt); 149 | 150 | // Given data from the byte source 'compressed' generated by calling 151 | // the Snappy::Compress routine, this routine stores the uncompressed 152 | // data to the iovec "iov". The number of physical 153 | // buffers in "iov" is given by iov_cnt and their cumulative size 154 | // must be at least GetUncompressedLength(compressed). The individual buffers 155 | // in "iov" must not overlap with each other. 156 | // 157 | // returns false if the message is corrupted and could not be decrypted 158 | bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, 159 | size_t iov_cnt); 160 | 161 | // Returns the maximal size of the compressed representation of 162 | // input data that is "source_bytes" bytes in length; 163 | size_t MaxCompressedLength(size_t source_bytes); 164 | 165 | // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() 166 | // Returns true and stores the length of the uncompressed data in 167 | // *result normally. Returns false on parsing error. 168 | // This operation takes O(1) time. 169 | bool GetUncompressedLength(const char* compressed, size_t compressed_length, 170 | size_t* result); 171 | 172 | // Returns true iff the contents of "compressed[]" can be uncompressed 173 | // successfully. Does not return the uncompressed data. Takes 174 | // time proportional to compressed_length, but is usually at least 175 | // a factor of four faster than actual decompression. 176 | bool IsValidCompressedBuffer(const char* compressed, 177 | size_t compressed_length); 178 | 179 | // Returns true iff the contents of "compressed" can be uncompressed 180 | // successfully. Does not return the uncompressed data. Takes 181 | // time proportional to *compressed length, but is usually at least 182 | // a factor of four faster than actual decompression. 183 | // On success, consumes all of *compressed. On failure, consumes an 184 | // unspecified prefix of *compressed. 185 | bool IsValidCompressed(Source* compressed); 186 | 187 | // The size of a compression block. Note that many parts of the compression 188 | // code assumes that kBlockSize <= 65536; in particular, the hash table 189 | // can only store 16-bit offsets, and EmitCopy() also assumes the offset 190 | // is 65535 bytes or less. Note also that if you change this, it will 191 | // affect the framing format (see framing_format.txt). 192 | // 193 | // Note that there might be older data around that is compressed with larger 194 | // block sizes, so the decompression code should not rely on the 195 | // non-existence of long backreferences. 196 | static const int kBlockLog = 16; 197 | static const size_t kBlockSize = 1 << kBlockLog; 198 | 199 | static const int kMaxHashTableBits = 14; 200 | static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; 201 | } // end namespace snappy 202 | 203 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ 204 | -------------------------------------------------------------------------------- /snappy/snappy-test.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the unit tests for the open-source version of Snappy. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 32 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 33 | 34 | #include 35 | #include 36 | 37 | #include "snappy-stubs-internal.h" 38 | 39 | #include 40 | #include 41 | 42 | #ifdef HAVE_SYS_MMAN_H 43 | #include 44 | #endif 45 | 46 | #ifdef HAVE_SYS_RESOURCE_H 47 | #include 48 | #endif 49 | 50 | #ifdef HAVE_SYS_TIME_H 51 | #include 52 | #endif 53 | 54 | #ifdef HAVE_WINDOWS_H 55 | #include 56 | #endif 57 | 58 | #include 59 | 60 | #ifdef HAVE_GTEST 61 | 62 | #include 63 | #undef TYPED_TEST 64 | #define TYPED_TEST TEST 65 | #define INIT_GTEST(argc, argv) ::testing::InitGoogleTest(argc, *argv) 66 | 67 | #else 68 | 69 | // Stubs for if the user doesn't have Google Test installed. 70 | 71 | #define TEST(test_case, test_subcase) \ 72 | void Test_ ## test_case ## _ ## test_subcase() 73 | #define INIT_GTEST(argc, argv) 74 | 75 | #define TYPED_TEST TEST 76 | #define EXPECT_EQ CHECK_EQ 77 | #define EXPECT_NE CHECK_NE 78 | #define EXPECT_FALSE(cond) CHECK(!(cond)) 79 | 80 | #endif 81 | 82 | #ifdef HAVE_GFLAGS 83 | 84 | #include 85 | 86 | // This is tricky; both gflags and Google Test want to look at the command line 87 | // arguments. Google Test seems to be the most happy with unknown arguments, 88 | // though, so we call it first and hope for the best. 89 | #define InitGoogle(argv0, argc, argv, remove_flags) \ 90 | INIT_GTEST(argc, argv); \ 91 | google::ParseCommandLineFlags(argc, argv, remove_flags); 92 | 93 | #else 94 | 95 | // If we don't have the gflags package installed, these can only be 96 | // changed at compile time. 97 | #define DEFINE_int32(flag_name, default_value, description) \ 98 | static int FLAGS_ ## flag_name = default_value; 99 | 100 | #define InitGoogle(argv0, argc, argv, remove_flags) \ 101 | INIT_GTEST(argc, argv) 102 | 103 | #endif 104 | 105 | #ifdef HAVE_LIBZ 106 | #include "zlib.h" 107 | #endif 108 | 109 | #ifdef HAVE_LIBLZO2 110 | #include "lzo/lzo1x.h" 111 | #endif 112 | 113 | namespace { 114 | 115 | namespace file { 116 | int Defaults() { return 0; } 117 | 118 | class DummyStatus { 119 | public: 120 | void CheckSuccess() { } 121 | }; 122 | 123 | DummyStatus GetContents( 124 | const std::string& filename, std::string* data, int unused) { 125 | FILE* fp = fopen(filename.c_str(), "rb"); 126 | if (fp == NULL) { 127 | perror(filename.c_str()); 128 | exit(1); 129 | } 130 | 131 | data->clear(); 132 | while (!feof(fp)) { 133 | char buf[4096]; 134 | size_t ret = fread(buf, 1, 4096, fp); 135 | if (ret == 0 && ferror(fp)) { 136 | perror("fread"); 137 | exit(1); 138 | } 139 | data->append(std::string(buf, ret)); 140 | } 141 | 142 | fclose(fp); 143 | 144 | return DummyStatus(); 145 | } 146 | 147 | inline DummyStatus SetContents( 148 | const std::string& filename, const std::string& str, int unused) { 149 | FILE* fp = fopen(filename.c_str(), "wb"); 150 | if (fp == NULL) { 151 | perror(filename.c_str()); 152 | exit(1); 153 | } 154 | 155 | int ret = fwrite(str.data(), str.size(), 1, fp); 156 | if (ret != 1) { 157 | perror("fwrite"); 158 | exit(1); 159 | } 160 | 161 | fclose(fp); 162 | 163 | return DummyStatus(); 164 | } 165 | } // namespace file 166 | 167 | } // namespace 168 | 169 | namespace snappy { 170 | 171 | #define FLAGS_test_random_seed 301 172 | typedef string TypeParam; 173 | 174 | void Test_CorruptedTest_VerifyCorrupted(); 175 | void Test_Snappy_SimpleTests(); 176 | void Test_Snappy_MaxBlowup(); 177 | void Test_Snappy_RandomData(); 178 | void Test_Snappy_FourByteOffset(); 179 | void Test_SnappyCorruption_TruncatedVarint(); 180 | void Test_SnappyCorruption_UnterminatedVarint(); 181 | void Test_SnappyCorruption_OverflowingVarint(); 182 | void Test_Snappy_ReadPastEndOfBuffer(); 183 | void Test_Snappy_FindMatchLength(); 184 | void Test_Snappy_FindMatchLengthRandom(); 185 | 186 | string ReadTestDataFile(const string& base, size_t size_limit); 187 | 188 | string ReadTestDataFile(const string& base); 189 | 190 | // A sprintf() variant that returns a std::string. 191 | // Not safe for general use due to truncation issues. 192 | string StringPrintf(const char* format, ...); 193 | 194 | // A simple, non-cryptographically-secure random generator. 195 | class ACMRandom { 196 | public: 197 | explicit ACMRandom(uint32 seed) : seed_(seed) {} 198 | 199 | int32 Next(); 200 | 201 | int32 Uniform(int32 n) { 202 | return Next() % n; 203 | } 204 | uint8 Rand8() { 205 | return static_cast((Next() >> 1) & 0x000000ff); 206 | } 207 | bool OneIn(int X) { return Uniform(X) == 0; } 208 | 209 | // Skewed: pick "base" uniformly from range [0,max_log] and then 210 | // return "base" random bits. The effect is to pick a number in the 211 | // range [0,2^max_log-1] with bias towards smaller numbers. 212 | int32 Skewed(int max_log); 213 | 214 | private: 215 | static const uint32 M = 2147483647L; // 2^31-1 216 | uint32 seed_; 217 | }; 218 | 219 | inline int32 ACMRandom::Next() { 220 | static const uint64 A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 221 | // We are computing 222 | // seed_ = (seed_ * A) % M, where M = 2^31-1 223 | // 224 | // seed_ must not be zero or M, or else all subsequent computed values 225 | // will be zero or M respectively. For all other values, seed_ will end 226 | // up cycling through every number in [1,M-1] 227 | uint64 product = seed_ * A; 228 | 229 | // Compute (product % M) using the fact that ((x << 31) % M) == x. 230 | seed_ = (product >> 31) + (product & M); 231 | // The first reduction may overflow by 1 bit, so we may need to repeat. 232 | // mod == M is not possible; using > allows the faster sign-bit-based test. 233 | if (seed_ > M) { 234 | seed_ -= M; 235 | } 236 | return seed_; 237 | } 238 | 239 | inline int32 ACMRandom::Skewed(int max_log) { 240 | const int32 base = (Next() - 1) % (max_log+1); 241 | return (Next() - 1) & ((1u << base)-1); 242 | } 243 | 244 | // A wall-time clock. This stub is not super-accurate, nor resistant to the 245 | // system time changing. 246 | class CycleTimer { 247 | public: 248 | CycleTimer() : real_time_us_(0) {} 249 | 250 | void Start() { 251 | #ifdef WIN32 252 | QueryPerformanceCounter(&start_); 253 | #else 254 | gettimeofday(&start_, NULL); 255 | #endif 256 | } 257 | 258 | void Stop() { 259 | #ifdef WIN32 260 | LARGE_INTEGER stop; 261 | LARGE_INTEGER frequency; 262 | QueryPerformanceCounter(&stop); 263 | QueryPerformanceFrequency(&frequency); 264 | 265 | double elapsed = static_cast(stop.QuadPart - start_.QuadPart) / 266 | frequency.QuadPart; 267 | real_time_us_ += elapsed * 1e6 + 0.5; 268 | #else 269 | struct timeval stop; 270 | gettimeofday(&stop, NULL); 271 | 272 | real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec); 273 | real_time_us_ += (stop.tv_usec - start_.tv_usec); 274 | #endif 275 | } 276 | 277 | double Get() { 278 | return real_time_us_ * 1e-6; 279 | } 280 | 281 | private: 282 | int64 real_time_us_; 283 | #ifdef WIN32 284 | LARGE_INTEGER start_; 285 | #else 286 | struct timeval start_; 287 | #endif 288 | }; 289 | 290 | // Minimalistic microbenchmark framework. 291 | 292 | typedef void (*BenchmarkFunction)(int, int); 293 | 294 | class Benchmark { 295 | public: 296 | Benchmark(const string& name, BenchmarkFunction function) : 297 | name_(name), function_(function) {} 298 | 299 | Benchmark* DenseRange(int start, int stop) { 300 | start_ = start; 301 | stop_ = stop; 302 | return this; 303 | } 304 | 305 | void Run(); 306 | 307 | private: 308 | const string name_; 309 | const BenchmarkFunction function_; 310 | int start_, stop_; 311 | }; 312 | #define BENCHMARK(benchmark_name) \ 313 | Benchmark* Benchmark_ ## benchmark_name = \ 314 | (new Benchmark(#benchmark_name, benchmark_name)) 315 | 316 | extern Benchmark* Benchmark_BM_UFlat; 317 | extern Benchmark* Benchmark_BM_UIOVec; 318 | extern Benchmark* Benchmark_BM_UValidate; 319 | extern Benchmark* Benchmark_BM_ZFlat; 320 | 321 | void ResetBenchmarkTiming(); 322 | void StartBenchmarkTiming(); 323 | void StopBenchmarkTiming(); 324 | void SetBenchmarkLabel(const string& str); 325 | void SetBenchmarkBytesProcessed(int64 bytes); 326 | 327 | #ifdef HAVE_LIBZ 328 | 329 | // Object-oriented wrapper around zlib. 330 | class ZLib { 331 | public: 332 | ZLib(); 333 | ~ZLib(); 334 | 335 | // Wipe a ZLib object to a virgin state. This differs from Reset() 336 | // in that it also breaks any state. 337 | void Reinit(); 338 | 339 | // Call this to make a zlib buffer as good as new. Here's the only 340 | // case where they differ: 341 | // CompressChunk(a); CompressChunk(b); CompressChunkDone(); vs 342 | // CompressChunk(a); Reset(); CompressChunk(b); CompressChunkDone(); 343 | // You'll want to use Reset(), then, when you interrupt a compress 344 | // (or uncompress) in the middle of a chunk and want to start over. 345 | void Reset(); 346 | 347 | // According to the zlib manual, when you Compress, the destination 348 | // buffer must have size at least src + .1%*src + 12. This function 349 | // helps you calculate that. Augment this to account for a potential 350 | // gzip header and footer, plus a few bytes of slack. 351 | static int MinCompressbufSize(int uncompress_size) { 352 | return uncompress_size + uncompress_size/1000 + 40; 353 | } 354 | 355 | // Compresses the source buffer into the destination buffer. 356 | // sourceLen is the byte length of the source buffer. 357 | // Upon entry, destLen is the total size of the destination buffer, 358 | // which must be of size at least MinCompressbufSize(sourceLen). 359 | // Upon exit, destLen is the actual size of the compressed buffer. 360 | // 361 | // This function can be used to compress a whole file at once if the 362 | // input file is mmap'ed. 363 | // 364 | // Returns Z_OK if success, Z_MEM_ERROR if there was not 365 | // enough memory, Z_BUF_ERROR if there was not enough room in the 366 | // output buffer. Note that if the output buffer is exactly the same 367 | // size as the compressed result, we still return Z_BUF_ERROR. 368 | // (check CL#1936076) 369 | int Compress(Bytef *dest, uLongf *destLen, 370 | const Bytef *source, uLong sourceLen); 371 | 372 | // Uncompresses the source buffer into the destination buffer. 373 | // The destination buffer must be long enough to hold the entire 374 | // decompressed contents. 375 | // 376 | // Returns Z_OK on success, otherwise, it returns a zlib error code. 377 | int Uncompress(Bytef *dest, uLongf *destLen, 378 | const Bytef *source, uLong sourceLen); 379 | 380 | // Uncompress data one chunk at a time -- ie you can call this 381 | // more than once. To get this to work you need to call per-chunk 382 | // and "done" routines. 383 | // 384 | // Returns Z_OK if success, Z_MEM_ERROR if there was not 385 | // enough memory, Z_BUF_ERROR if there was not enough room in the 386 | // output buffer. 387 | 388 | int UncompressAtMost(Bytef *dest, uLongf *destLen, 389 | const Bytef *source, uLong *sourceLen); 390 | 391 | // Checks gzip footer information, as needed. Mostly this just 392 | // makes sure the checksums match. Whenever you call this, it 393 | // will assume the last 8 bytes from the previous UncompressChunk 394 | // call are the footer. Returns true iff everything looks ok. 395 | bool UncompressChunkDone(); 396 | 397 | private: 398 | int InflateInit(); // sets up the zlib inflate structure 399 | int DeflateInit(); // sets up the zlib deflate structure 400 | 401 | // These init the zlib data structures for compressing/uncompressing 402 | int CompressInit(Bytef *dest, uLongf *destLen, 403 | const Bytef *source, uLong *sourceLen); 404 | int UncompressInit(Bytef *dest, uLongf *destLen, 405 | const Bytef *source, uLong *sourceLen); 406 | // Initialization method to be called if we hit an error while 407 | // uncompressing. On hitting an error, call this method before 408 | // returning the error. 409 | void UncompressErrorInit(); 410 | 411 | // Helper function for Compress 412 | int CompressChunkOrAll(Bytef *dest, uLongf *destLen, 413 | const Bytef *source, uLong sourceLen, 414 | int flush_mode); 415 | int CompressAtMostOrAll(Bytef *dest, uLongf *destLen, 416 | const Bytef *source, uLong *sourceLen, 417 | int flush_mode); 418 | 419 | // Likewise for UncompressAndUncompressChunk 420 | int UncompressChunkOrAll(Bytef *dest, uLongf *destLen, 421 | const Bytef *source, uLong sourceLen, 422 | int flush_mode); 423 | 424 | int UncompressAtMostOrAll(Bytef *dest, uLongf *destLen, 425 | const Bytef *source, uLong *sourceLen, 426 | int flush_mode); 427 | 428 | // Initialization method to be called if we hit an error while 429 | // compressing. On hitting an error, call this method before 430 | // returning the error. 431 | void CompressErrorInit(); 432 | 433 | int compression_level_; // compression level 434 | int window_bits_; // log base 2 of the window size used in compression 435 | int mem_level_; // specifies the amount of memory to be used by 436 | // compressor (1-9) 437 | z_stream comp_stream_; // Zlib stream data structure 438 | bool comp_init_; // True if we have initialized comp_stream_ 439 | z_stream uncomp_stream_; // Zlib stream data structure 440 | bool uncomp_init_; // True if we have initialized uncomp_stream_ 441 | 442 | // These are used only with chunked compression. 443 | bool first_chunk_; // true if we need to emit headers with this chunk 444 | }; 445 | 446 | #endif // HAVE_LIBZ 447 | 448 | } // namespace snappy 449 | 450 | DECLARE_bool(run_microbenchmarks); 451 | 452 | static inline void RunSpecifiedBenchmarks() { 453 | if (!FLAGS_run_microbenchmarks) { 454 | return; 455 | } 456 | 457 | fprintf(stderr, "Running microbenchmarks.\n"); 458 | #ifndef NDEBUG 459 | fprintf(stderr, "WARNING: Compiled with assertions enabled, will be slow.\n"); 460 | #endif 461 | #ifndef __OPTIMIZE__ 462 | fprintf(stderr, "WARNING: Compiled without optimization, will be slow.\n"); 463 | #endif 464 | fprintf(stderr, "Benchmark Time(ns) CPU(ns) Iterations\n"); 465 | fprintf(stderr, "---------------------------------------------------\n"); 466 | 467 | snappy::Benchmark_BM_UFlat->Run(); 468 | snappy::Benchmark_BM_UIOVec->Run(); 469 | snappy::Benchmark_BM_UValidate->Run(); 470 | snappy::Benchmark_BM_ZFlat->Run(); 471 | 472 | fprintf(stderr, "\n"); 473 | } 474 | 475 | #ifndef HAVE_GTEST 476 | 477 | static inline int RUN_ALL_TESTS() { 478 | fprintf(stderr, "Running correctness tests.\n"); 479 | snappy::Test_CorruptedTest_VerifyCorrupted(); 480 | snappy::Test_Snappy_SimpleTests(); 481 | snappy::Test_Snappy_MaxBlowup(); 482 | snappy::Test_Snappy_RandomData(); 483 | snappy::Test_Snappy_FourByteOffset(); 484 | snappy::Test_SnappyCorruption_TruncatedVarint(); 485 | snappy::Test_SnappyCorruption_UnterminatedVarint(); 486 | snappy::Test_SnappyCorruption_OverflowingVarint(); 487 | snappy::Test_Snappy_ReadPastEndOfBuffer(); 488 | snappy::Test_Snappy_FindMatchLength(); 489 | snappy::Test_Snappy_FindMatchLengthRandom(); 490 | fprintf(stderr, "All tests passed.\n"); 491 | 492 | return 0; 493 | } 494 | 495 | #endif // HAVE_GTEST 496 | 497 | // For main(). 498 | namespace snappy { 499 | 500 | // Logging. 501 | 502 | #define LOG(level) LogMessage() 503 | #define VLOG(level) true ? (void)0 : \ 504 | snappy::LogMessageVoidify() & snappy::LogMessage() 505 | 506 | class LogMessage { 507 | public: 508 | LogMessage() { } 509 | ~LogMessage() { 510 | std::cerr << std::endl; 511 | } 512 | 513 | LogMessage& operator<<(const std::string& msg) { 514 | std::cerr << msg; 515 | return *this; 516 | } 517 | LogMessage& operator<<(int x) { 518 | std::cerr << x; 519 | return *this; 520 | } 521 | }; 522 | 523 | // Asserts, both versions activated in debug mode only, 524 | // and ones that are always active. 525 | 526 | #define CRASH_UNLESS(condition) \ 527 | SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \ 528 | snappy::LogMessageVoidify() & snappy::LogMessageCrash() 529 | 530 | #ifdef _MSC_VER 531 | // ~LogMessageCrash calls abort() and therefore never exits. This is by design 532 | // so temporarily disable warning C4722. 533 | #pragma warning(push) 534 | #pragma warning(disable:4722) 535 | #endif 536 | 537 | class LogMessageCrash : public LogMessage { 538 | public: 539 | LogMessageCrash() { } 540 | ~LogMessageCrash() { 541 | std::cerr << std::endl; 542 | abort(); 543 | } 544 | }; 545 | 546 | #ifdef _MSC_VER 547 | #pragma warning(pop) 548 | #endif 549 | 550 | // This class is used to explicitly ignore values in the conditional 551 | // logging macros. This avoids compiler warnings like "value computed 552 | // is not used" and "statement has no effect". 553 | 554 | class LogMessageVoidify { 555 | public: 556 | LogMessageVoidify() { } 557 | // This has to be an operator with a precedence lower than << but 558 | // higher than ?: 559 | void operator&(const LogMessage&) { } 560 | }; 561 | 562 | #define CHECK(cond) CRASH_UNLESS(cond) 563 | #define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) 564 | #define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) 565 | #define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) 566 | #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) 567 | #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) 568 | #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) 569 | #define CHECK_OK(cond) (cond).CheckSuccess() 570 | 571 | } // namespace snappy 572 | 573 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 574 | -------------------------------------------------------------------------------- /snappy/snappy-stubs-internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the open-source version of Snappy. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 32 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 33 | 34 | #ifdef HAVE_CONFIG_H 35 | #include "config.h" 36 | #endif 37 | 38 | #include 39 | 40 | #include 41 | #include 42 | #include 43 | 44 | #ifdef HAVE_SYS_MMAN_H 45 | #include 46 | #endif 47 | 48 | #ifdef HAVE_UNISTD_H 49 | #include 50 | #endif 51 | 52 | #if defined(_MSC_VER) 53 | #include 54 | #endif // defined(_MSC_VER) 55 | 56 | #include "snappy-stubs-public.h" 57 | 58 | #if defined(__x86_64__) 59 | 60 | // Enable 64-bit optimized versions of some routines. 61 | #define ARCH_K8 1 62 | 63 | #elif defined(__ppc64__) 64 | 65 | #define ARCH_PPC 1 66 | 67 | #elif defined(__aarch64__) 68 | 69 | #define ARCH_ARM 1 70 | 71 | #endif 72 | 73 | // Needed by OS X, among others. 74 | #ifndef MAP_ANONYMOUS 75 | #define MAP_ANONYMOUS MAP_ANON 76 | #endif 77 | 78 | // The size of an array, if known at compile-time. 79 | // Will give unexpected results if used on a pointer. 80 | // We undefine it first, since some compilers already have a definition. 81 | #ifdef ARRAYSIZE 82 | #undef ARRAYSIZE 83 | #endif 84 | #define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) 85 | 86 | // Static prediction hints. 87 | #ifdef HAVE_BUILTIN_EXPECT 88 | #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) 89 | #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) 90 | #else 91 | #define SNAPPY_PREDICT_FALSE(x) x 92 | #define SNAPPY_PREDICT_TRUE(x) x 93 | #endif 94 | 95 | // This is only used for recomputing the tag byte table used during 96 | // decompression; for simplicity we just remove it from the open-source 97 | // version (anyone who wants to regenerate it can just do the call 98 | // themselves within main()). 99 | #define DEFINE_bool(flag_name, default_value, description) \ 100 | bool FLAGS_ ## flag_name = default_value 101 | #define DECLARE_bool(flag_name) \ 102 | extern bool FLAGS_ ## flag_name 103 | 104 | namespace snappy { 105 | 106 | static const uint32 kuint32max = static_cast(0xFFFFFFFF); 107 | static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); 108 | 109 | // Potentially unaligned loads and stores. 110 | 111 | // x86, PowerPC, and ARM64 can simply do these loads and stores native. 112 | 113 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ 114 | defined(__aarch64__) 115 | 116 | #define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) 117 | #define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) 118 | #define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) 119 | 120 | #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) 121 | #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) 122 | #define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) 123 | 124 | // ARMv7 and newer support native unaligned accesses, but only of 16-bit 125 | // and 32-bit values (not 64-bit); older versions either raise a fatal signal, 126 | // do an unaligned read and rotate the words around a bit, or do the reads very 127 | // slowly (trip through kernel mode). There's no simple #define that says just 128 | // “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 129 | // sub-architectures. 130 | // 131 | // This is a mess, but there's not much we can do about it. 132 | // 133 | // To further complicate matters, only LDR instructions (single reads) are 134 | // allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we 135 | // explicitly tell the compiler that these accesses can be unaligned, it can and 136 | // will combine accesses. On armcc, the way to signal this is done by accessing 137 | // through the type (uint32 __packed *), but GCC has no such attribute 138 | // (it ignores __attribute__((packed)) on individual variables). However, 139 | // we can tell it that a _struct_ is unaligned, which has the same effect, 140 | // so we do that. 141 | 142 | #elif defined(__arm__) && \ 143 | !defined(__ARM_ARCH_4__) && \ 144 | !defined(__ARM_ARCH_4T__) && \ 145 | !defined(__ARM_ARCH_5__) && \ 146 | !defined(__ARM_ARCH_5T__) && \ 147 | !defined(__ARM_ARCH_5TE__) && \ 148 | !defined(__ARM_ARCH_5TEJ__) && \ 149 | !defined(__ARM_ARCH_6__) && \ 150 | !defined(__ARM_ARCH_6J__) && \ 151 | !defined(__ARM_ARCH_6K__) && \ 152 | !defined(__ARM_ARCH_6Z__) && \ 153 | !defined(__ARM_ARCH_6ZK__) && \ 154 | !defined(__ARM_ARCH_6T2__) 155 | 156 | #if __GNUC__ 157 | #define ATTRIBUTE_PACKED __attribute__((__packed__)) 158 | #else 159 | #define ATTRIBUTE_PACKED 160 | #endif 161 | 162 | namespace base { 163 | namespace internal { 164 | 165 | struct Unaligned16Struct { 166 | uint16 value; 167 | uint8 dummy; // To make the size non-power-of-two. 168 | } ATTRIBUTE_PACKED; 169 | 170 | struct Unaligned32Struct { 171 | uint32 value; 172 | uint8 dummy; // To make the size non-power-of-two. 173 | } ATTRIBUTE_PACKED; 174 | 175 | } // namespace internal 176 | } // namespace base 177 | 178 | #define UNALIGNED_LOAD16(_p) \ 179 | ((reinterpret_cast(_p))->value) 180 | #define UNALIGNED_LOAD32(_p) \ 181 | ((reinterpret_cast(_p))->value) 182 | 183 | #define UNALIGNED_STORE16(_p, _val) \ 184 | ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \ 185 | (_val)) 186 | #define UNALIGNED_STORE32(_p, _val) \ 187 | ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \ 188 | (_val)) 189 | 190 | // TODO(user): NEON supports unaligned 64-bit loads and stores. 191 | // See if that would be more efficient on platforms supporting it, 192 | // at least for copies. 193 | 194 | inline uint64 UNALIGNED_LOAD64(const void *p) { 195 | uint64 t; 196 | memcpy(&t, p, sizeof t); 197 | return t; 198 | } 199 | 200 | inline void UNALIGNED_STORE64(void *p, uint64 v) { 201 | memcpy(p, &v, sizeof v); 202 | } 203 | 204 | #else 205 | 206 | // These functions are provided for architectures that don't support 207 | // unaligned loads and stores. 208 | 209 | inline uint16 UNALIGNED_LOAD16(const void *p) { 210 | uint16 t; 211 | memcpy(&t, p, sizeof t); 212 | return t; 213 | } 214 | 215 | inline uint32 UNALIGNED_LOAD32(const void *p) { 216 | uint32 t; 217 | memcpy(&t, p, sizeof t); 218 | return t; 219 | } 220 | 221 | inline uint64 UNALIGNED_LOAD64(const void *p) { 222 | uint64 t; 223 | memcpy(&t, p, sizeof t); 224 | return t; 225 | } 226 | 227 | inline void UNALIGNED_STORE16(void *p, uint16 v) { 228 | memcpy(p, &v, sizeof v); 229 | } 230 | 231 | inline void UNALIGNED_STORE32(void *p, uint32 v) { 232 | memcpy(p, &v, sizeof v); 233 | } 234 | 235 | inline void UNALIGNED_STORE64(void *p, uint64 v) { 236 | memcpy(p, &v, sizeof v); 237 | } 238 | 239 | #endif 240 | 241 | // The following guarantees declaration of the byte swap functions. 242 | #if defined(SNAPPY_IS_BIG_ENDIAN) 243 | 244 | #ifdef HAVE_SYS_BYTEORDER_H 245 | #include 246 | #endif 247 | 248 | #ifdef HAVE_SYS_ENDIAN_H 249 | #include 250 | #endif 251 | 252 | #ifdef _MSC_VER 253 | #include 254 | #define bswap_16(x) _byteswap_ushort(x) 255 | #define bswap_32(x) _byteswap_ulong(x) 256 | #define bswap_64(x) _byteswap_uint64(x) 257 | 258 | #elif defined(__APPLE__) 259 | // Mac OS X / Darwin features 260 | #include 261 | #define bswap_16(x) OSSwapInt16(x) 262 | #define bswap_32(x) OSSwapInt32(x) 263 | #define bswap_64(x) OSSwapInt64(x) 264 | 265 | #elif defined(HAVE_BYTESWAP_H) 266 | #include 267 | 268 | #elif defined(bswap32) 269 | // FreeBSD defines bswap{16,32,64} in (already #included). 270 | #define bswap_16(x) bswap16(x) 271 | #define bswap_32(x) bswap32(x) 272 | #define bswap_64(x) bswap64(x) 273 | 274 | #elif defined(BSWAP_64) 275 | // Solaris 10 defines BSWAP_{16,32,64} in (already #included). 276 | #define bswap_16(x) BSWAP_16(x) 277 | #define bswap_32(x) BSWAP_32(x) 278 | #define bswap_64(x) BSWAP_64(x) 279 | 280 | #else 281 | 282 | inline uint16 bswap_16(uint16 x) { 283 | return (x << 8) | (x >> 8); 284 | } 285 | 286 | inline uint32 bswap_32(uint32 x) { 287 | x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); 288 | return (x >> 16) | (x << 16); 289 | } 290 | 291 | inline uint64 bswap_64(uint64 x) { 292 | x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); 293 | x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); 294 | return (x >> 32) | (x << 32); 295 | } 296 | 297 | #endif 298 | 299 | #endif // defined(SNAPPY_IS_BIG_ENDIAN) 300 | 301 | // Convert to little-endian storage, opposite of network format. 302 | // Convert x from host to little endian: x = LittleEndian.FromHost(x); 303 | // convert x from little endian to host: x = LittleEndian.ToHost(x); 304 | // 305 | // Store values into unaligned memory converting to little endian order: 306 | // LittleEndian.Store16(p, x); 307 | // 308 | // Load unaligned values stored in little endian converting to host order: 309 | // x = LittleEndian.Load16(p); 310 | class LittleEndian { 311 | public: 312 | // Conversion functions. 313 | #if defined(SNAPPY_IS_BIG_ENDIAN) 314 | 315 | static uint16 FromHost16(uint16 x) { return bswap_16(x); } 316 | static uint16 ToHost16(uint16 x) { return bswap_16(x); } 317 | 318 | static uint32 FromHost32(uint32 x) { return bswap_32(x); } 319 | static uint32 ToHost32(uint32 x) { return bswap_32(x); } 320 | 321 | static bool IsLittleEndian() { return false; } 322 | 323 | #else // !defined(SNAPPY_IS_BIG_ENDIAN) 324 | 325 | static uint16 FromHost16(uint16 x) { return x; } 326 | static uint16 ToHost16(uint16 x) { return x; } 327 | 328 | static uint32 FromHost32(uint32 x) { return x; } 329 | static uint32 ToHost32(uint32 x) { return x; } 330 | 331 | static bool IsLittleEndian() { return true; } 332 | 333 | #endif // !defined(SNAPPY_IS_BIG_ENDIAN) 334 | 335 | // Functions to do unaligned loads and stores in little-endian order. 336 | static uint16 Load16(const void *p) { 337 | return ToHost16(UNALIGNED_LOAD16(p)); 338 | } 339 | 340 | static void Store16(void *p, uint16 v) { 341 | UNALIGNED_STORE16(p, FromHost16(v)); 342 | } 343 | 344 | static uint32 Load32(const void *p) { 345 | return ToHost32(UNALIGNED_LOAD32(p)); 346 | } 347 | 348 | static void Store32(void *p, uint32 v) { 349 | UNALIGNED_STORE32(p, FromHost32(v)); 350 | } 351 | }; 352 | 353 | // Some bit-manipulation functions. 354 | class Bits { 355 | public: 356 | // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. 357 | static int Log2Floor(uint32 n); 358 | 359 | // Return the first set least / most significant bit, 0-indexed. Returns an 360 | // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except 361 | // that it's 0-indexed. 362 | static int FindLSBSetNonZero(uint32 n); 363 | 364 | #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 365 | static int FindLSBSetNonZero64(uint64 n); 366 | #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 367 | 368 | private: 369 | // No copying 370 | Bits(const Bits&); 371 | void operator=(const Bits&); 372 | }; 373 | 374 | #ifdef HAVE_BUILTIN_CTZ 375 | 376 | inline int Bits::Log2Floor(uint32 n) { 377 | return n == 0 ? -1 : 31 ^ __builtin_clz(n); 378 | } 379 | 380 | inline int Bits::FindLSBSetNonZero(uint32 n) { 381 | return __builtin_ctz(n); 382 | } 383 | 384 | #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 385 | inline int Bits::FindLSBSetNonZero64(uint64 n) { 386 | return __builtin_ctzll(n); 387 | } 388 | #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 389 | 390 | #elif defined(_MSC_VER) 391 | 392 | inline int Bits::Log2Floor(uint32 n) { 393 | unsigned long where; 394 | if (_BitScanReverse(&where, n)) { 395 | return where; 396 | } else { 397 | return -1; 398 | } 399 | } 400 | 401 | inline int Bits::FindLSBSetNonZero(uint32 n) { 402 | unsigned long where; 403 | if (_BitScanForward(&where, n)) return static_cast(where); 404 | return 32; 405 | } 406 | 407 | #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 408 | inline int Bits::FindLSBSetNonZero64(uint64 n) { 409 | unsigned long where; 410 | if (_BitScanForward64(&where, n)) return static_cast(where); 411 | return 64; 412 | } 413 | #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 414 | 415 | #else // Portable versions. 416 | 417 | inline int Bits::Log2Floor(uint32 n) { 418 | if (n == 0) 419 | return -1; 420 | int log = 0; 421 | uint32 value = n; 422 | for (int i = 4; i >= 0; --i) { 423 | int shift = (1 << i); 424 | uint32 x = value >> shift; 425 | if (x != 0) { 426 | value = x; 427 | log += shift; 428 | } 429 | } 430 | assert(value == 1); 431 | return log; 432 | } 433 | 434 | inline int Bits::FindLSBSetNonZero(uint32 n) { 435 | int rc = 31; 436 | for (int i = 4, shift = 1 << 4; i >= 0; --i) { 437 | const uint32 x = n << shift; 438 | if (x != 0) { 439 | n = x; 440 | rc -= shift; 441 | } 442 | shift >>= 1; 443 | } 444 | return rc; 445 | } 446 | 447 | #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 448 | // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). 449 | inline int Bits::FindLSBSetNonZero64(uint64 n) { 450 | const uint32 bottombits = static_cast(n); 451 | if (bottombits == 0) { 452 | // Bottom bits are zero, so scan in top bits 453 | return 32 + FindLSBSetNonZero(static_cast(n >> 32)); 454 | } else { 455 | return FindLSBSetNonZero(bottombits); 456 | } 457 | } 458 | #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) 459 | 460 | #endif // End portable versions. 461 | 462 | // Variable-length integer encoding. 463 | class Varint { 464 | public: 465 | // Maximum lengths of varint encoding of uint32. 466 | static const int kMax32 = 5; 467 | 468 | // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. 469 | // Never reads a character at or beyond limit. If a valid/terminated varint32 470 | // was found in the range, stores it in *OUTPUT and returns a pointer just 471 | // past the last byte of the varint32. Else returns NULL. On success, 472 | // "result <= limit". 473 | static const char* Parse32WithLimit(const char* ptr, const char* limit, 474 | uint32* OUTPUT); 475 | 476 | // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". 477 | // EFFECTS Encodes "v" into "ptr" and returns a pointer to the 478 | // byte just past the last encoded byte. 479 | static char* Encode32(char* ptr, uint32 v); 480 | 481 | // EFFECTS Appends the varint representation of "value" to "*s". 482 | static void Append32(string* s, uint32 value); 483 | }; 484 | 485 | inline const char* Varint::Parse32WithLimit(const char* p, 486 | const char* l, 487 | uint32* OUTPUT) { 488 | const unsigned char* ptr = reinterpret_cast(p); 489 | const unsigned char* limit = reinterpret_cast(l); 490 | uint32 b, result; 491 | if (ptr >= limit) return NULL; 492 | b = *(ptr++); result = b & 127; if (b < 128) goto done; 493 | if (ptr >= limit) return NULL; 494 | b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; 495 | if (ptr >= limit) return NULL; 496 | b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; 497 | if (ptr >= limit) return NULL; 498 | b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; 499 | if (ptr >= limit) return NULL; 500 | b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; 501 | return NULL; // Value is too long to be a varint32 502 | done: 503 | *OUTPUT = result; 504 | return reinterpret_cast(ptr); 505 | } 506 | 507 | inline char* Varint::Encode32(char* sptr, uint32 v) { 508 | // Operate on characters as unsigneds 509 | unsigned char* ptr = reinterpret_cast(sptr); 510 | static const int B = 128; 511 | if (v < (1<<7)) { 512 | *(ptr++) = v; 513 | } else if (v < (1<<14)) { 514 | *(ptr++) = v | B; 515 | *(ptr++) = v>>7; 516 | } else if (v < (1<<21)) { 517 | *(ptr++) = v | B; 518 | *(ptr++) = (v>>7) | B; 519 | *(ptr++) = v>>14; 520 | } else if (v < (1<<28)) { 521 | *(ptr++) = v | B; 522 | *(ptr++) = (v>>7) | B; 523 | *(ptr++) = (v>>14) | B; 524 | *(ptr++) = v>>21; 525 | } else { 526 | *(ptr++) = v | B; 527 | *(ptr++) = (v>>7) | B; 528 | *(ptr++) = (v>>14) | B; 529 | *(ptr++) = (v>>21) | B; 530 | *(ptr++) = v>>28; 531 | } 532 | return reinterpret_cast(ptr); 533 | } 534 | 535 | // If you know the internal layout of the std::string in use, you can 536 | // replace this function with one that resizes the string without 537 | // filling the new space with zeros (if applicable) -- 538 | // it will be non-portable but faster. 539 | inline void STLStringResizeUninitialized(string* s, size_t new_size) { 540 | s->resize(new_size); 541 | } 542 | 543 | // Return a mutable char* pointing to a string's internal buffer, 544 | // which may not be null-terminated. Writing through this pointer will 545 | // modify the string. 546 | // 547 | // string_as_array(&str)[i] is valid for 0 <= i < str.size() until the 548 | // next call to a string method that invalidates iterators. 549 | // 550 | // As of 2006-04, there is no standard-blessed way of getting a 551 | // mutable reference to a string's internal buffer. However, issue 530 552 | // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) 553 | // proposes this as the method. It will officially be part of the standard 554 | // for C++0x. This should already work on all current implementations. 555 | inline char* string_as_array(string* str) { 556 | return str->empty() ? NULL : &*str->begin(); 557 | } 558 | 559 | } // namespace snappy 560 | 561 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 562 | -------------------------------------------------------------------------------- /snappy/snappy-test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the unit tests for the open-source version of Snappy. 30 | 31 | #ifdef HAVE_CONFIG_H 32 | #include "config.h" 33 | #endif 34 | 35 | #ifdef HAVE_WINDOWS_H 36 | // Needed to be able to use std::max without workarounds in the source code. 37 | // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts 38 | #define NOMINMAX 39 | #include 40 | #endif 41 | 42 | #include "snappy-test.h" 43 | 44 | #include 45 | 46 | DEFINE_bool(run_microbenchmarks, true, 47 | "Run microbenchmarks before doing anything else."); 48 | 49 | namespace snappy { 50 | 51 | string ReadTestDataFile(const string& base, size_t size_limit) { 52 | string contents; 53 | const char* srcdir = getenv("srcdir"); // This is set by Automake. 54 | string prefix; 55 | if (srcdir) { 56 | prefix = string(srcdir) + "/"; 57 | } 58 | file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults() 59 | ).CheckSuccess(); 60 | if (size_limit > 0) { 61 | contents = contents.substr(0, size_limit); 62 | } 63 | return contents; 64 | } 65 | 66 | string ReadTestDataFile(const string& base) { 67 | return ReadTestDataFile(base, 0); 68 | } 69 | 70 | string StringPrintf(const char* format, ...) { 71 | char buf[4096]; 72 | va_list ap; 73 | va_start(ap, format); 74 | vsnprintf(buf, sizeof(buf), format, ap); 75 | va_end(ap); 76 | return buf; 77 | } 78 | 79 | bool benchmark_running = false; 80 | int64 benchmark_real_time_us = 0; 81 | int64 benchmark_cpu_time_us = 0; 82 | string *benchmark_label = NULL; 83 | int64 benchmark_bytes_processed = 0; 84 | 85 | void ResetBenchmarkTiming() { 86 | benchmark_real_time_us = 0; 87 | benchmark_cpu_time_us = 0; 88 | } 89 | 90 | #ifdef WIN32 91 | LARGE_INTEGER benchmark_start_real; 92 | FILETIME benchmark_start_cpu; 93 | #else // WIN32 94 | struct timeval benchmark_start_real; 95 | struct rusage benchmark_start_cpu; 96 | #endif // WIN32 97 | 98 | void StartBenchmarkTiming() { 99 | #ifdef WIN32 100 | QueryPerformanceCounter(&benchmark_start_real); 101 | FILETIME dummy; 102 | CHECK(GetProcessTimes( 103 | GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_start_cpu)); 104 | #else 105 | gettimeofday(&benchmark_start_real, NULL); 106 | if (getrusage(RUSAGE_SELF, &benchmark_start_cpu) == -1) { 107 | perror("getrusage(RUSAGE_SELF)"); 108 | exit(1); 109 | } 110 | #endif 111 | benchmark_running = true; 112 | } 113 | 114 | void StopBenchmarkTiming() { 115 | if (!benchmark_running) { 116 | return; 117 | } 118 | 119 | #ifdef WIN32 120 | LARGE_INTEGER benchmark_stop_real; 121 | LARGE_INTEGER benchmark_frequency; 122 | QueryPerformanceCounter(&benchmark_stop_real); 123 | QueryPerformanceFrequency(&benchmark_frequency); 124 | 125 | double elapsed_real = static_cast( 126 | benchmark_stop_real.QuadPart - benchmark_start_real.QuadPart) / 127 | benchmark_frequency.QuadPart; 128 | benchmark_real_time_us += elapsed_real * 1e6 + 0.5; 129 | 130 | FILETIME benchmark_stop_cpu, dummy; 131 | CHECK(GetProcessTimes( 132 | GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_stop_cpu)); 133 | 134 | ULARGE_INTEGER start_ulargeint; 135 | start_ulargeint.LowPart = benchmark_start_cpu.dwLowDateTime; 136 | start_ulargeint.HighPart = benchmark_start_cpu.dwHighDateTime; 137 | 138 | ULARGE_INTEGER stop_ulargeint; 139 | stop_ulargeint.LowPart = benchmark_stop_cpu.dwLowDateTime; 140 | stop_ulargeint.HighPart = benchmark_stop_cpu.dwHighDateTime; 141 | 142 | benchmark_cpu_time_us += 143 | (stop_ulargeint.QuadPart - start_ulargeint.QuadPart + 5) / 10; 144 | #else // WIN32 145 | struct timeval benchmark_stop_real; 146 | gettimeofday(&benchmark_stop_real, NULL); 147 | benchmark_real_time_us += 148 | 1000000 * (benchmark_stop_real.tv_sec - benchmark_start_real.tv_sec); 149 | benchmark_real_time_us += 150 | (benchmark_stop_real.tv_usec - benchmark_start_real.tv_usec); 151 | 152 | struct rusage benchmark_stop_cpu; 153 | if (getrusage(RUSAGE_SELF, &benchmark_stop_cpu) == -1) { 154 | perror("getrusage(RUSAGE_SELF)"); 155 | exit(1); 156 | } 157 | benchmark_cpu_time_us += 1000000 * (benchmark_stop_cpu.ru_utime.tv_sec - 158 | benchmark_start_cpu.ru_utime.tv_sec); 159 | benchmark_cpu_time_us += (benchmark_stop_cpu.ru_utime.tv_usec - 160 | benchmark_start_cpu.ru_utime.tv_usec); 161 | #endif // WIN32 162 | 163 | benchmark_running = false; 164 | } 165 | 166 | void SetBenchmarkLabel(const string& str) { 167 | if (benchmark_label) { 168 | delete benchmark_label; 169 | } 170 | benchmark_label = new string(str); 171 | } 172 | 173 | void SetBenchmarkBytesProcessed(int64 bytes) { 174 | benchmark_bytes_processed = bytes; 175 | } 176 | 177 | struct BenchmarkRun { 178 | int64 real_time_us; 179 | int64 cpu_time_us; 180 | }; 181 | 182 | struct BenchmarkCompareCPUTime { 183 | bool operator() (const BenchmarkRun& a, const BenchmarkRun& b) const { 184 | return a.cpu_time_us < b.cpu_time_us; 185 | } 186 | }; 187 | 188 | void Benchmark::Run() { 189 | for (int test_case_num = start_; test_case_num <= stop_; ++test_case_num) { 190 | // Run a few iterations first to find out approximately how fast 191 | // the benchmark is. 192 | const int kCalibrateIterations = 100; 193 | ResetBenchmarkTiming(); 194 | StartBenchmarkTiming(); 195 | (*function_)(kCalibrateIterations, test_case_num); 196 | StopBenchmarkTiming(); 197 | 198 | // Let each test case run for about 200ms, but at least as many 199 | // as we used to calibrate. 200 | // Run five times and pick the median. 201 | const int kNumRuns = 5; 202 | const int kMedianPos = kNumRuns / 2; 203 | int num_iterations = 0; 204 | if (benchmark_real_time_us > 0) { 205 | num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us; 206 | } 207 | num_iterations = std::max(num_iterations, kCalibrateIterations); 208 | BenchmarkRun benchmark_runs[kNumRuns]; 209 | 210 | for (int run = 0; run < kNumRuns; ++run) { 211 | ResetBenchmarkTiming(); 212 | StartBenchmarkTiming(); 213 | (*function_)(num_iterations, test_case_num); 214 | StopBenchmarkTiming(); 215 | 216 | benchmark_runs[run].real_time_us = benchmark_real_time_us; 217 | benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us; 218 | } 219 | 220 | string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num); 221 | string human_readable_speed; 222 | 223 | std::nth_element(benchmark_runs, 224 | benchmark_runs + kMedianPos, 225 | benchmark_runs + kNumRuns, 226 | BenchmarkCompareCPUTime()); 227 | int64 real_time_us = benchmark_runs[kMedianPos].real_time_us; 228 | int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us; 229 | if (cpu_time_us <= 0) { 230 | human_readable_speed = "?"; 231 | } else { 232 | int64 bytes_per_second = 233 | benchmark_bytes_processed * 1000000 / cpu_time_us; 234 | if (bytes_per_second < 1024) { 235 | human_readable_speed = StringPrintf("%dB/s", bytes_per_second); 236 | } else if (bytes_per_second < 1024 * 1024) { 237 | human_readable_speed = StringPrintf( 238 | "%.1fkB/s", bytes_per_second / 1024.0f); 239 | } else if (bytes_per_second < 1024 * 1024 * 1024) { 240 | human_readable_speed = StringPrintf( 241 | "%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f)); 242 | } else { 243 | human_readable_speed = StringPrintf( 244 | "%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f)); 245 | } 246 | } 247 | 248 | fprintf(stderr, 249 | #ifdef WIN32 250 | "%-18s %10I64d %10I64d %10d %s %s\n", 251 | #else 252 | "%-18s %10lld %10lld %10d %s %s\n", 253 | #endif 254 | heading.c_str(), 255 | static_cast(real_time_us * 1000 / num_iterations), 256 | static_cast(cpu_time_us * 1000 / num_iterations), 257 | num_iterations, 258 | human_readable_speed.c_str(), 259 | benchmark_label->c_str()); 260 | } 261 | } 262 | 263 | #ifdef HAVE_LIBZ 264 | 265 | ZLib::ZLib() 266 | : comp_init_(false), 267 | uncomp_init_(false) { 268 | Reinit(); 269 | } 270 | 271 | ZLib::~ZLib() { 272 | if (comp_init_) { deflateEnd(&comp_stream_); } 273 | if (uncomp_init_) { inflateEnd(&uncomp_stream_); } 274 | } 275 | 276 | void ZLib::Reinit() { 277 | compression_level_ = Z_DEFAULT_COMPRESSION; 278 | window_bits_ = MAX_WBITS; 279 | mem_level_ = 8; // DEF_MEM_LEVEL 280 | if (comp_init_) { 281 | deflateEnd(&comp_stream_); 282 | comp_init_ = false; 283 | } 284 | if (uncomp_init_) { 285 | inflateEnd(&uncomp_stream_); 286 | uncomp_init_ = false; 287 | } 288 | first_chunk_ = true; 289 | } 290 | 291 | void ZLib::Reset() { 292 | first_chunk_ = true; 293 | } 294 | 295 | // --------- COMPRESS MODE 296 | 297 | // Initialization method to be called if we hit an error while 298 | // compressing. On hitting an error, call this method before returning 299 | // the error. 300 | void ZLib::CompressErrorInit() { 301 | deflateEnd(&comp_stream_); 302 | comp_init_ = false; 303 | Reset(); 304 | } 305 | 306 | int ZLib::DeflateInit() { 307 | return deflateInit2(&comp_stream_, 308 | compression_level_, 309 | Z_DEFLATED, 310 | window_bits_, 311 | mem_level_, 312 | Z_DEFAULT_STRATEGY); 313 | } 314 | 315 | int ZLib::CompressInit(Bytef *dest, uLongf *destLen, 316 | const Bytef *source, uLong *sourceLen) { 317 | int err; 318 | 319 | comp_stream_.next_in = (Bytef*)source; 320 | comp_stream_.avail_in = (uInt)*sourceLen; 321 | if ((uLong)comp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; 322 | comp_stream_.next_out = dest; 323 | comp_stream_.avail_out = (uInt)*destLen; 324 | if ((uLong)comp_stream_.avail_out != *destLen) return Z_BUF_ERROR; 325 | 326 | if ( !first_chunk_ ) // only need to set up stream the first time through 327 | return Z_OK; 328 | 329 | if (comp_init_) { // we've already initted it 330 | err = deflateReset(&comp_stream_); 331 | if (err != Z_OK) { 332 | LOG(WARNING) << "ERROR: Can't reset compress object; creating a new one"; 333 | deflateEnd(&comp_stream_); 334 | comp_init_ = false; 335 | } 336 | } 337 | if (!comp_init_) { // first use 338 | comp_stream_.zalloc = (alloc_func)0; 339 | comp_stream_.zfree = (free_func)0; 340 | comp_stream_.opaque = (voidpf)0; 341 | err = DeflateInit(); 342 | if (err != Z_OK) return err; 343 | comp_init_ = true; 344 | } 345 | return Z_OK; 346 | } 347 | 348 | // In a perfect world we'd always have the full buffer to compress 349 | // when the time came, and we could just call Compress(). Alas, we 350 | // want to do chunked compression on our webserver. In this 351 | // application, we compress the header, send it off, then compress the 352 | // results, send them off, then compress the footer. Thus we need to 353 | // use the chunked compression features of zlib. 354 | int ZLib::CompressAtMostOrAll(Bytef *dest, uLongf *destLen, 355 | const Bytef *source, uLong *sourceLen, 356 | int flush_mode) { // Z_FULL_FLUSH or Z_FINISH 357 | int err; 358 | 359 | if ( (err=CompressInit(dest, destLen, source, sourceLen)) != Z_OK ) 360 | return err; 361 | 362 | // This is used to figure out how many bytes we wrote *this chunk* 363 | int compressed_size = comp_stream_.total_out; 364 | 365 | // Some setup happens only for the first chunk we compress in a run 366 | if ( first_chunk_ ) { 367 | first_chunk_ = false; 368 | } 369 | 370 | // flush_mode is Z_FINISH for all mode, Z_SYNC_FLUSH for incremental 371 | // compression. 372 | err = deflate(&comp_stream_, flush_mode); 373 | 374 | *sourceLen = comp_stream_.avail_in; 375 | 376 | if ((err == Z_STREAM_END || err == Z_OK) 377 | && comp_stream_.avail_in == 0 378 | && comp_stream_.avail_out != 0 ) { 379 | // we processed everything ok and the output buffer was large enough. 380 | ; 381 | } else if (err == Z_STREAM_END && comp_stream_.avail_in > 0) { 382 | return Z_BUF_ERROR; // should never happen 383 | } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { 384 | // an error happened 385 | CompressErrorInit(); 386 | return err; 387 | } else if (comp_stream_.avail_out == 0) { // not enough space 388 | err = Z_BUF_ERROR; 389 | } 390 | 391 | assert(err == Z_OK || err == Z_STREAM_END || err == Z_BUF_ERROR); 392 | if (err == Z_STREAM_END) 393 | err = Z_OK; 394 | 395 | // update the crc and other metadata 396 | compressed_size = comp_stream_.total_out - compressed_size; // delta 397 | *destLen = compressed_size; 398 | 399 | return err; 400 | } 401 | 402 | int ZLib::CompressChunkOrAll(Bytef *dest, uLongf *destLen, 403 | const Bytef *source, uLong sourceLen, 404 | int flush_mode) { // Z_FULL_FLUSH or Z_FINISH 405 | const int ret = 406 | CompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); 407 | if (ret == Z_BUF_ERROR) 408 | CompressErrorInit(); 409 | return ret; 410 | } 411 | 412 | // This routine only initializes the compression stream once. Thereafter, it 413 | // just does a deflateReset on the stream, which should be faster. 414 | int ZLib::Compress(Bytef *dest, uLongf *destLen, 415 | const Bytef *source, uLong sourceLen) { 416 | int err; 417 | if ( (err=CompressChunkOrAll(dest, destLen, source, sourceLen, 418 | Z_FINISH)) != Z_OK ) 419 | return err; 420 | Reset(); // reset for next call to Compress 421 | 422 | return Z_OK; 423 | } 424 | 425 | 426 | // --------- UNCOMPRESS MODE 427 | 428 | int ZLib::InflateInit() { 429 | return inflateInit2(&uncomp_stream_, MAX_WBITS); 430 | } 431 | 432 | // Initialization method to be called if we hit an error while 433 | // uncompressing. On hitting an error, call this method before 434 | // returning the error. 435 | void ZLib::UncompressErrorInit() { 436 | inflateEnd(&uncomp_stream_); 437 | uncomp_init_ = false; 438 | Reset(); 439 | } 440 | 441 | int ZLib::UncompressInit(Bytef *dest, uLongf *destLen, 442 | const Bytef *source, uLong *sourceLen) { 443 | int err; 444 | 445 | uncomp_stream_.next_in = (Bytef*)source; 446 | uncomp_stream_.avail_in = (uInt)*sourceLen; 447 | // Check for source > 64K on 16-bit machine: 448 | if ((uLong)uncomp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; 449 | 450 | uncomp_stream_.next_out = dest; 451 | uncomp_stream_.avail_out = (uInt)*destLen; 452 | if ((uLong)uncomp_stream_.avail_out != *destLen) return Z_BUF_ERROR; 453 | 454 | if ( !first_chunk_ ) // only need to set up stream the first time through 455 | return Z_OK; 456 | 457 | if (uncomp_init_) { // we've already initted it 458 | err = inflateReset(&uncomp_stream_); 459 | if (err != Z_OK) { 460 | LOG(WARNING) 461 | << "ERROR: Can't reset uncompress object; creating a new one"; 462 | UncompressErrorInit(); 463 | } 464 | } 465 | if (!uncomp_init_) { 466 | uncomp_stream_.zalloc = (alloc_func)0; 467 | uncomp_stream_.zfree = (free_func)0; 468 | uncomp_stream_.opaque = (voidpf)0; 469 | err = InflateInit(); 470 | if (err != Z_OK) return err; 471 | uncomp_init_ = true; 472 | } 473 | return Z_OK; 474 | } 475 | 476 | // If you compressed your data a chunk at a time, with CompressChunk, 477 | // you can uncompress it a chunk at a time with UncompressChunk. 478 | // Only difference bewteen chunked and unchunked uncompression 479 | // is the flush mode we use: Z_SYNC_FLUSH (chunked) or Z_FINISH (unchunked). 480 | int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen, 481 | const Bytef *source, uLong *sourceLen, 482 | int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH 483 | int err = Z_OK; 484 | 485 | if ( (err=UncompressInit(dest, destLen, source, sourceLen)) != Z_OK ) { 486 | LOG(WARNING) << "UncompressInit: Error: " << err << " SourceLen: " 487 | << *sourceLen; 488 | return err; 489 | } 490 | 491 | // This is used to figure out how many output bytes we wrote *this chunk*: 492 | const uLong old_total_out = uncomp_stream_.total_out; 493 | 494 | // This is used to figure out how many input bytes we read *this chunk*: 495 | const uLong old_total_in = uncomp_stream_.total_in; 496 | 497 | // Some setup happens only for the first chunk we compress in a run 498 | if ( first_chunk_ ) { 499 | first_chunk_ = false; // so we don't do this again 500 | 501 | // For the first chunk *only* (to avoid infinite troubles), we let 502 | // there be no actual data to uncompress. This sometimes triggers 503 | // when the input is only the gzip header, say. 504 | if ( *sourceLen == 0 ) { 505 | *destLen = 0; 506 | return Z_OK; 507 | } 508 | } 509 | 510 | // We'll uncompress as much as we can. If we end OK great, otherwise 511 | // if we get an error that seems to be the gzip footer, we store the 512 | // gzip footer and return OK, otherwise we return the error. 513 | 514 | // flush_mode is Z_SYNC_FLUSH for chunked mode, Z_FINISH for all mode. 515 | err = inflate(&uncomp_stream_, flush_mode); 516 | 517 | // Figure out how many bytes of the input zlib slurped up: 518 | const uLong bytes_read = uncomp_stream_.total_in - old_total_in; 519 | CHECK_LE(source + bytes_read, source + *sourceLen); 520 | *sourceLen = uncomp_stream_.avail_in; 521 | 522 | if ((err == Z_STREAM_END || err == Z_OK) // everything went ok 523 | && uncomp_stream_.avail_in == 0) { // and we read it all 524 | ; 525 | } else if (err == Z_STREAM_END && uncomp_stream_.avail_in > 0) { 526 | LOG(WARNING) 527 | << "UncompressChunkOrAll: Received some extra data, bytes total: " 528 | << uncomp_stream_.avail_in << " bytes: " 529 | << std::string(reinterpret_cast(uncomp_stream_.next_in), 530 | std::min(int(uncomp_stream_.avail_in), 20)); 531 | UncompressErrorInit(); 532 | return Z_DATA_ERROR; // what's the extra data for? 533 | } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { 534 | // an error happened 535 | LOG(WARNING) << "UncompressChunkOrAll: Error: " << err 536 | << " avail_out: " << uncomp_stream_.avail_out; 537 | UncompressErrorInit(); 538 | return err; 539 | } else if (uncomp_stream_.avail_out == 0) { 540 | err = Z_BUF_ERROR; 541 | } 542 | 543 | assert(err == Z_OK || err == Z_BUF_ERROR || err == Z_STREAM_END); 544 | if (err == Z_STREAM_END) 545 | err = Z_OK; 546 | 547 | *destLen = uncomp_stream_.total_out - old_total_out; // size for this call 548 | 549 | return err; 550 | } 551 | 552 | int ZLib::UncompressChunkOrAll(Bytef *dest, uLongf *destLen, 553 | const Bytef *source, uLong sourceLen, 554 | int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH 555 | const int ret = 556 | UncompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); 557 | if (ret == Z_BUF_ERROR) 558 | UncompressErrorInit(); 559 | return ret; 560 | } 561 | 562 | int ZLib::UncompressAtMost(Bytef *dest, uLongf *destLen, 563 | const Bytef *source, uLong *sourceLen) { 564 | return UncompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); 565 | } 566 | 567 | // We make sure we've uncompressed everything, that is, the current 568 | // uncompress stream is at a compressed-buffer-EOF boundary. In gzip 569 | // mode, we also check the gzip footer to make sure we pass the gzip 570 | // consistency checks. We RETURN true iff both types of checks pass. 571 | bool ZLib::UncompressChunkDone() { 572 | assert(!first_chunk_ && uncomp_init_); 573 | // Make sure we're at the end-of-compressed-data point. This means 574 | // if we call inflate with Z_FINISH we won't consume any input or 575 | // write any output 576 | Bytef dummyin, dummyout; 577 | uLongf dummylen = 0; 578 | if ( UncompressChunkOrAll(&dummyout, &dummylen, &dummyin, 0, Z_FINISH) 579 | != Z_OK ) { 580 | return false; 581 | } 582 | 583 | // Make sure that when we exit, we can start a new round of chunks later 584 | Reset(); 585 | 586 | return true; 587 | } 588 | 589 | // Uncompresses the source buffer into the destination buffer. 590 | // The destination buffer must be long enough to hold the entire 591 | // decompressed contents. 592 | // 593 | // We only initialize the uncomp_stream once. Thereafter, we use 594 | // inflateReset, which should be faster. 595 | // 596 | // Returns Z_OK on success, otherwise, it returns a zlib error code. 597 | int ZLib::Uncompress(Bytef *dest, uLongf *destLen, 598 | const Bytef *source, uLong sourceLen) { 599 | int err; 600 | if ( (err=UncompressChunkOrAll(dest, destLen, source, sourceLen, 601 | Z_FINISH)) != Z_OK ) { 602 | Reset(); // let us try to compress again 603 | return err; 604 | } 605 | if ( !UncompressChunkDone() ) // calls Reset() 606 | return Z_DATA_ERROR; 607 | return Z_OK; // stream_end is ok 608 | } 609 | 610 | #endif // HAVE_LIBZ 611 | 612 | } // namespace snappy 613 | -------------------------------------------------------------------------------- /snappy/snappy_unittest.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2005 and onwards Google Inc. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "snappy.h" 39 | #include "snappy-internal.h" 40 | #include "snappy-test.h" 41 | #include "snappy-sinksource.h" 42 | 43 | DEFINE_int32(start_len, -1, 44 | "Starting prefix size for testing (-1: just full file contents)"); 45 | DEFINE_int32(end_len, -1, 46 | "Starting prefix size for testing (-1: just full file contents)"); 47 | DEFINE_int32(bytes, 10485760, 48 | "How many bytes to compress/uncompress per file for timing"); 49 | 50 | DEFINE_bool(zlib, false, 51 | "Run zlib compression (http://www.zlib.net)"); 52 | DEFINE_bool(lzo, false, 53 | "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); 54 | DEFINE_bool(snappy, true, "Run snappy compression"); 55 | 56 | DEFINE_bool(write_compressed, false, 57 | "Write compressed versions of each file to .comp"); 58 | DEFINE_bool(write_uncompressed, false, 59 | "Write uncompressed versions of each file to .uncomp"); 60 | 61 | DEFINE_bool(snappy_dump_decompression_table, false, 62 | "If true, we print the decompression table during tests."); 63 | 64 | namespace snappy { 65 | 66 | #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) 67 | 68 | // To test against code that reads beyond its input, this class copies a 69 | // string to a newly allocated group of pages, the last of which 70 | // is made unreadable via mprotect. Note that we need to allocate the 71 | // memory with mmap(), as POSIX allows mprotect() only on memory allocated 72 | // with mmap(), and some malloc/posix_memalign implementations expect to 73 | // be able to read previously allocated memory while doing heap allocations. 74 | class DataEndingAtUnreadablePage { 75 | public: 76 | explicit DataEndingAtUnreadablePage(const string& s) { 77 | const size_t page_size = sysconf(_SC_PAGESIZE); 78 | const size_t size = s.size(); 79 | // Round up space for string to a multiple of page_size. 80 | size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); 81 | alloc_size_ = space_for_string + page_size; 82 | mem_ = mmap(NULL, alloc_size_, 83 | PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 84 | CHECK_NE(MAP_FAILED, mem_); 85 | protected_page_ = reinterpret_cast(mem_) + space_for_string; 86 | char* dst = protected_page_ - size; 87 | memcpy(dst, s.data(), size); 88 | data_ = dst; 89 | size_ = size; 90 | // Make guard page unreadable. 91 | CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); 92 | } 93 | 94 | ~DataEndingAtUnreadablePage() { 95 | const size_t page_size = sysconf(_SC_PAGESIZE); 96 | // Undo the mprotect. 97 | CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); 98 | CHECK_EQ(0, munmap(mem_, alloc_size_)); 99 | } 100 | 101 | const char* data() const { return data_; } 102 | size_t size() const { return size_; } 103 | 104 | private: 105 | size_t alloc_size_; 106 | void* mem_; 107 | char* protected_page_; 108 | const char* data_; 109 | size_t size_; 110 | }; 111 | 112 | #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) 113 | 114 | // Fallback for systems without mmap. 115 | typedef string DataEndingAtUnreadablePage; 116 | 117 | #endif 118 | 119 | enum CompressorType { 120 | ZLIB, LZO, SNAPPY 121 | }; 122 | 123 | const char* names[] = { 124 | "ZLIB", "LZO", "SNAPPY" 125 | }; 126 | 127 | static size_t MinimumRequiredOutputSpace(size_t input_size, 128 | CompressorType comp) { 129 | switch (comp) { 130 | #ifdef ZLIB_VERSION 131 | case ZLIB: 132 | return ZLib::MinCompressbufSize(input_size); 133 | #endif // ZLIB_VERSION 134 | 135 | #ifdef LZO_VERSION 136 | case LZO: 137 | return input_size + input_size/64 + 16 + 3; 138 | #endif // LZO_VERSION 139 | 140 | case SNAPPY: 141 | return snappy::MaxCompressedLength(input_size); 142 | 143 | default: 144 | LOG(FATAL) << "Unknown compression type number " << comp; 145 | return 0; 146 | } 147 | } 148 | 149 | // Returns true if we successfully compressed, false otherwise. 150 | // 151 | // If compressed_is_preallocated is set, do not resize the compressed buffer. 152 | // This is typically what you want for a benchmark, in order to not spend 153 | // time in the memory allocator. If you do set this flag, however, 154 | // "compressed" must be preinitialized to at least MinCompressbufSize(comp) 155 | // number of bytes, and may contain junk bytes at the end after return. 156 | static bool Compress(const char* input, size_t input_size, CompressorType comp, 157 | string* compressed, bool compressed_is_preallocated) { 158 | if (!compressed_is_preallocated) { 159 | compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); 160 | } 161 | 162 | switch (comp) { 163 | #ifdef ZLIB_VERSION 164 | case ZLIB: { 165 | ZLib zlib; 166 | uLongf destlen = compressed->size(); 167 | int ret = zlib.Compress( 168 | reinterpret_cast(string_as_array(compressed)), 169 | &destlen, 170 | reinterpret_cast(input), 171 | input_size); 172 | CHECK_EQ(Z_OK, ret); 173 | if (!compressed_is_preallocated) { 174 | compressed->resize(destlen); 175 | } 176 | return true; 177 | } 178 | #endif // ZLIB_VERSION 179 | 180 | #ifdef LZO_VERSION 181 | case LZO: { 182 | unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; 183 | lzo_uint destlen; 184 | int ret = lzo1x_1_15_compress( 185 | reinterpret_cast(input), 186 | input_size, 187 | reinterpret_cast(string_as_array(compressed)), 188 | &destlen, 189 | mem); 190 | CHECK_EQ(LZO_E_OK, ret); 191 | delete[] mem; 192 | if (!compressed_is_preallocated) { 193 | compressed->resize(destlen); 194 | } 195 | break; 196 | } 197 | #endif // LZO_VERSION 198 | 199 | case SNAPPY: { 200 | size_t destlen; 201 | snappy::RawCompress(input, input_size, 202 | string_as_array(compressed), 203 | &destlen); 204 | CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); 205 | if (!compressed_is_preallocated) { 206 | compressed->resize(destlen); 207 | } 208 | break; 209 | } 210 | 211 | default: { 212 | return false; // the asked-for library wasn't compiled in 213 | } 214 | } 215 | return true; 216 | } 217 | 218 | static bool Uncompress(const string& compressed, CompressorType comp, 219 | int size, string* output) { 220 | switch (comp) { 221 | #ifdef ZLIB_VERSION 222 | case ZLIB: { 223 | output->resize(size); 224 | ZLib zlib; 225 | uLongf destlen = output->size(); 226 | int ret = zlib.Uncompress( 227 | reinterpret_cast(string_as_array(output)), 228 | &destlen, 229 | reinterpret_cast(compressed.data()), 230 | compressed.size()); 231 | CHECK_EQ(Z_OK, ret); 232 | CHECK_EQ(static_cast(size), destlen); 233 | break; 234 | } 235 | #endif // ZLIB_VERSION 236 | 237 | #ifdef LZO_VERSION 238 | case LZO: { 239 | output->resize(size); 240 | lzo_uint destlen; 241 | int ret = lzo1x_decompress( 242 | reinterpret_cast(compressed.data()), 243 | compressed.size(), 244 | reinterpret_cast(string_as_array(output)), 245 | &destlen, 246 | NULL); 247 | CHECK_EQ(LZO_E_OK, ret); 248 | CHECK_EQ(static_cast(size), destlen); 249 | break; 250 | } 251 | #endif // LZO_VERSION 252 | 253 | case SNAPPY: { 254 | snappy::RawUncompress(compressed.data(), compressed.size(), 255 | string_as_array(output)); 256 | break; 257 | } 258 | 259 | default: { 260 | return false; // the asked-for library wasn't compiled in 261 | } 262 | } 263 | return true; 264 | } 265 | 266 | static void Measure(const char* data, 267 | size_t length, 268 | CompressorType comp, 269 | int repeats, 270 | int block_size) { 271 | // Run tests a few time and pick median running times 272 | static const int kRuns = 5; 273 | double ctime[kRuns]; 274 | double utime[kRuns]; 275 | int compressed_size = 0; 276 | 277 | { 278 | // Chop the input into blocks 279 | int num_blocks = (length + block_size - 1) / block_size; 280 | std::vector input(num_blocks); 281 | std::vector input_length(num_blocks); 282 | std::vector compressed(num_blocks); 283 | std::vector output(num_blocks); 284 | for (int b = 0; b < num_blocks; b++) { 285 | int input_start = b * block_size; 286 | int input_limit = std::min((b+1)*block_size, length); 287 | input[b] = data+input_start; 288 | input_length[b] = input_limit-input_start; 289 | 290 | // Pre-grow the output buffer so we don't measure string append time. 291 | compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); 292 | } 293 | 294 | // First, try one trial compression to make sure the code is compiled in 295 | if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { 296 | LOG(WARNING) << "Skipping " << names[comp] << ": " 297 | << "library not compiled in"; 298 | return; 299 | } 300 | 301 | for (int run = 0; run < kRuns; run++) { 302 | CycleTimer ctimer, utimer; 303 | 304 | for (int b = 0; b < num_blocks; b++) { 305 | // Pre-grow the output buffer so we don't measure string append time. 306 | compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); 307 | } 308 | 309 | ctimer.Start(); 310 | for (int b = 0; b < num_blocks; b++) 311 | for (int i = 0; i < repeats; i++) 312 | Compress(input[b], input_length[b], comp, &compressed[b], true); 313 | ctimer.Stop(); 314 | 315 | // Compress once more, with resizing, so we don't leave junk 316 | // at the end that will confuse the decompressor. 317 | for (int b = 0; b < num_blocks; b++) { 318 | Compress(input[b], input_length[b], comp, &compressed[b], false); 319 | } 320 | 321 | for (int b = 0; b < num_blocks; b++) { 322 | output[b].resize(input_length[b]); 323 | } 324 | 325 | utimer.Start(); 326 | for (int i = 0; i < repeats; i++) 327 | for (int b = 0; b < num_blocks; b++) 328 | Uncompress(compressed[b], comp, input_length[b], &output[b]); 329 | utimer.Stop(); 330 | 331 | ctime[run] = ctimer.Get(); 332 | utime[run] = utimer.Get(); 333 | } 334 | 335 | compressed_size = 0; 336 | for (size_t i = 0; i < compressed.size(); i++) { 337 | compressed_size += compressed[i].size(); 338 | } 339 | } 340 | 341 | std::sort(ctime, ctime + kRuns); 342 | std::sort(utime, utime + kRuns); 343 | const int med = kRuns/2; 344 | 345 | float comp_rate = (length / ctime[med]) * repeats / 1048576.0; 346 | float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; 347 | string x = names[comp]; 348 | x += ":"; 349 | string urate = (uncomp_rate >= 0) 350 | ? StringPrintf("%.1f", uncomp_rate) 351 | : string("?"); 352 | printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " 353 | "comp %5.1f MB/s uncomp %5s MB/s\n", 354 | x.c_str(), 355 | block_size/(1<<20), 356 | static_cast(length), static_cast(compressed_size), 357 | (compressed_size * 100.0) / std::max(1, length), 358 | comp_rate, 359 | urate.c_str()); 360 | } 361 | 362 | static int VerifyString(const string& input) { 363 | string compressed; 364 | DataEndingAtUnreadablePage i(input); 365 | const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 366 | CHECK_EQ(written, compressed.size()); 367 | CHECK_LE(compressed.size(), 368 | snappy::MaxCompressedLength(input.size())); 369 | CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 370 | 371 | string uncompressed; 372 | DataEndingAtUnreadablePage c(compressed); 373 | CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 374 | CHECK_EQ(uncompressed, input); 375 | return uncompressed.size(); 376 | } 377 | 378 | static void VerifyStringSink(const string& input) { 379 | string compressed; 380 | DataEndingAtUnreadablePage i(input); 381 | const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 382 | CHECK_EQ(written, compressed.size()); 383 | CHECK_LE(compressed.size(), 384 | snappy::MaxCompressedLength(input.size())); 385 | CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 386 | 387 | string uncompressed; 388 | uncompressed.resize(input.size()); 389 | snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed)); 390 | DataEndingAtUnreadablePage c(compressed); 391 | snappy::ByteArraySource source(c.data(), c.size()); 392 | CHECK(snappy::Uncompress(&source, &sink)); 393 | CHECK_EQ(uncompressed, input); 394 | } 395 | 396 | static void VerifyIOVec(const string& input) { 397 | string compressed; 398 | DataEndingAtUnreadablePage i(input); 399 | const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 400 | CHECK_EQ(written, compressed.size()); 401 | CHECK_LE(compressed.size(), 402 | snappy::MaxCompressedLength(input.size())); 403 | CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 404 | 405 | // Try uncompressing into an iovec containing a random number of entries 406 | // ranging from 1 to 10. 407 | char* buf = new char[input.size()]; 408 | ACMRandom rnd(input.size()); 409 | size_t num = rnd.Next() % 10 + 1; 410 | if (input.size() < num) { 411 | num = input.size(); 412 | } 413 | struct iovec* iov = new iovec[num]; 414 | int used_so_far = 0; 415 | for (size_t i = 0; i < num; ++i) { 416 | iov[i].iov_base = buf + used_so_far; 417 | if (i == num - 1) { 418 | iov[i].iov_len = input.size() - used_so_far; 419 | } else { 420 | // Randomly choose to insert a 0 byte entry. 421 | if (rnd.OneIn(5)) { 422 | iov[i].iov_len = 0; 423 | } else { 424 | iov[i].iov_len = rnd.Uniform(input.size()); 425 | } 426 | } 427 | used_so_far += iov[i].iov_len; 428 | } 429 | CHECK(snappy::RawUncompressToIOVec( 430 | compressed.data(), compressed.size(), iov, num)); 431 | CHECK(!memcmp(buf, input.data(), input.size())); 432 | delete[] iov; 433 | delete[] buf; 434 | } 435 | 436 | // Test that data compressed by a compressor that does not 437 | // obey block sizes is uncompressed properly. 438 | static void VerifyNonBlockedCompression(const string& input) { 439 | if (input.length() > snappy::kBlockSize) { 440 | // We cannot test larger blocks than the maximum block size, obviously. 441 | return; 442 | } 443 | 444 | string prefix; 445 | Varint::Append32(&prefix, input.size()); 446 | 447 | // Setup compression table 448 | snappy::internal::WorkingMemory wmem; 449 | int table_size; 450 | uint16* table = wmem.GetHashTable(input.size(), &table_size); 451 | 452 | // Compress entire input in one shot 453 | string compressed; 454 | compressed += prefix; 455 | compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); 456 | char* dest = string_as_array(&compressed) + prefix.size(); 457 | char* end = snappy::internal::CompressFragment(input.data(), input.size(), 458 | dest, table, table_size); 459 | compressed.resize(end - compressed.data()); 460 | 461 | // Uncompress into string 462 | string uncomp_str; 463 | CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str)); 464 | CHECK_EQ(uncomp_str, input); 465 | 466 | // Uncompress using source/sink 467 | string uncomp_str2; 468 | uncomp_str2.resize(input.size()); 469 | snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2)); 470 | snappy::ByteArraySource source(compressed.data(), compressed.size()); 471 | CHECK(snappy::Uncompress(&source, &sink)); 472 | CHECK_EQ(uncomp_str2, input); 473 | 474 | // Uncompress into iovec 475 | { 476 | static const int kNumBlocks = 10; 477 | struct iovec vec[kNumBlocks]; 478 | const int block_size = 1 + input.size() / kNumBlocks; 479 | string iovec_data(block_size * kNumBlocks, 'x'); 480 | for (int i = 0; i < kNumBlocks; i++) { 481 | vec[i].iov_base = string_as_array(&iovec_data) + i * block_size; 482 | vec[i].iov_len = block_size; 483 | } 484 | CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), 485 | vec, kNumBlocks)); 486 | CHECK_EQ(string(iovec_data.data(), input.size()), input); 487 | } 488 | } 489 | 490 | // Expand the input so that it is at least K times as big as block size 491 | static string Expand(const string& input) { 492 | static const int K = 3; 493 | string data = input; 494 | while (data.size() < K * snappy::kBlockSize) { 495 | data += input; 496 | } 497 | return data; 498 | } 499 | 500 | static int Verify(const string& input) { 501 | VLOG(1) << "Verifying input of size " << input.size(); 502 | 503 | // Compress using string based routines 504 | const int result = VerifyString(input); 505 | 506 | // Verify using sink based routines 507 | VerifyStringSink(input); 508 | 509 | VerifyNonBlockedCompression(input); 510 | VerifyIOVec(input); 511 | if (!input.empty()) { 512 | const string expanded = Expand(input); 513 | VerifyNonBlockedCompression(expanded); 514 | VerifyIOVec(input); 515 | } 516 | 517 | return result; 518 | } 519 | 520 | 521 | static bool IsValidCompressedBuffer(const string& c) { 522 | return snappy::IsValidCompressedBuffer(c.data(), c.size()); 523 | } 524 | static bool Uncompress(const string& c, string* u) { 525 | return snappy::Uncompress(c.data(), c.size(), u); 526 | } 527 | 528 | // This test checks to ensure that snappy doesn't coredump if it gets 529 | // corrupted data. 530 | TEST(CorruptedTest, VerifyCorrupted) { 531 | string source = "making sure we don't crash with corrupted input"; 532 | VLOG(1) << source; 533 | string dest; 534 | string uncmp; 535 | snappy::Compress(source.data(), source.size(), &dest); 536 | 537 | // Mess around with the data. It's hard to simulate all possible 538 | // corruptions; this is just one example ... 539 | CHECK_GT(dest.size(), 3); 540 | dest[1]--; 541 | dest[3]++; 542 | // this really ought to fail. 543 | CHECK(!IsValidCompressedBuffer(dest)); 544 | CHECK(!Uncompress(dest, &uncmp)); 545 | 546 | // This is testing for a security bug - a buffer that decompresses to 100k 547 | // but we lie in the snappy header and only reserve 0 bytes of memory :) 548 | source.resize(100000); 549 | for (size_t i = 0; i < source.length(); ++i) { 550 | source[i] = 'A'; 551 | } 552 | snappy::Compress(source.data(), source.size(), &dest); 553 | dest[0] = dest[1] = dest[2] = dest[3] = 0; 554 | CHECK(!IsValidCompressedBuffer(dest)); 555 | CHECK(!Uncompress(dest, &uncmp)); 556 | 557 | if (sizeof(void *) == 4) { 558 | // Another security check; check a crazy big length can't DoS us with an 559 | // over-allocation. 560 | // Currently this is done only for 32-bit builds. On 64-bit builds, 561 | // where 3 GB might be an acceptable allocation size, Uncompress() 562 | // attempts to decompress, and sometimes causes the test to run out of 563 | // memory. 564 | dest[0] = dest[1] = dest[2] = dest[3] = '\xff'; 565 | // This decodes to a really large size, i.e., about 3 GB. 566 | dest[4] = 'k'; 567 | CHECK(!IsValidCompressedBuffer(dest)); 568 | CHECK(!Uncompress(dest, &uncmp)); 569 | } else { 570 | LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build"; 571 | } 572 | 573 | // This decodes to about 2 MB; much smaller, but should still fail. 574 | dest[0] = dest[1] = dest[2] = '\xff'; 575 | dest[3] = 0x00; 576 | CHECK(!IsValidCompressedBuffer(dest)); 577 | CHECK(!Uncompress(dest, &uncmp)); 578 | 579 | // try reading stuff in from a bad file. 580 | for (int i = 1; i <= 3; ++i) { 581 | string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(), 582 | 0); 583 | string uncmp; 584 | // check that we don't return a crazy length 585 | size_t ulen; 586 | CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen) 587 | || (ulen < (1<<20))); 588 | uint32 ulen2; 589 | snappy::ByteArraySource source(data.data(), data.size()); 590 | CHECK(!snappy::GetUncompressedLength(&source, &ulen2) || 591 | (ulen2 < (1<<20))); 592 | CHECK(!IsValidCompressedBuffer(data)); 593 | CHECK(!Uncompress(data, &uncmp)); 594 | } 595 | } 596 | 597 | // Helper routines to construct arbitrary compressed strings. 598 | // These mirror the compression code in snappy.cc, but are copied 599 | // here so that we can bypass some limitations in the how snappy.cc 600 | // invokes these routines. 601 | static void AppendLiteral(string* dst, const string& literal) { 602 | if (literal.empty()) return; 603 | int n = literal.size() - 1; 604 | if (n < 60) { 605 | // Fit length in tag byte 606 | dst->push_back(0 | (n << 2)); 607 | } else { 608 | // Encode in upcoming bytes 609 | char number[4]; 610 | int count = 0; 611 | while (n > 0) { 612 | number[count++] = n & 0xff; 613 | n >>= 8; 614 | } 615 | dst->push_back(0 | ((59+count) << 2)); 616 | *dst += string(number, count); 617 | } 618 | *dst += literal; 619 | } 620 | 621 | static void AppendCopy(string* dst, int offset, int length) { 622 | while (length > 0) { 623 | // Figure out how much to copy in one shot 624 | int to_copy; 625 | if (length >= 68) { 626 | to_copy = 64; 627 | } else if (length > 64) { 628 | to_copy = 60; 629 | } else { 630 | to_copy = length; 631 | } 632 | length -= to_copy; 633 | 634 | if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) { 635 | assert(to_copy-4 < 8); // Must fit in 3 bits 636 | dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5)); 637 | dst->push_back(offset & 0xff); 638 | } else if (offset < 65536) { 639 | dst->push_back(2 | ((to_copy-1) << 2)); 640 | dst->push_back(offset & 0xff); 641 | dst->push_back(offset >> 8); 642 | } else { 643 | dst->push_back(3 | ((to_copy-1) << 2)); 644 | dst->push_back(offset & 0xff); 645 | dst->push_back((offset >> 8) & 0xff); 646 | dst->push_back((offset >> 16) & 0xff); 647 | dst->push_back((offset >> 24) & 0xff); 648 | } 649 | } 650 | } 651 | 652 | TEST(Snappy, SimpleTests) { 653 | Verify(""); 654 | Verify("a"); 655 | Verify("ab"); 656 | Verify("abc"); 657 | 658 | Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc"); 659 | Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc"); 660 | Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc"); 661 | Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); 662 | Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); 663 | } 664 | 665 | // Verify max blowup (lots of four-byte copies) 666 | TEST(Snappy, MaxBlowup) { 667 | string input; 668 | for (int i = 0; i < 20000; i++) { 669 | ACMRandom rnd(i); 670 | uint32 bytes = static_cast(rnd.Next()); 671 | input.append(reinterpret_cast(&bytes), sizeof(bytes)); 672 | } 673 | for (int i = 19999; i >= 0; i--) { 674 | ACMRandom rnd(i); 675 | uint32 bytes = static_cast(rnd.Next()); 676 | input.append(reinterpret_cast(&bytes), sizeof(bytes)); 677 | } 678 | Verify(input); 679 | } 680 | 681 | TEST(Snappy, RandomData) { 682 | ACMRandom rnd(FLAGS_test_random_seed); 683 | 684 | const int num_ops = 20000; 685 | for (int i = 0; i < num_ops; i++) { 686 | if ((i % 1000) == 0) { 687 | VLOG(0) << "Random op " << i << " of " << num_ops; 688 | } 689 | 690 | string x; 691 | size_t len = rnd.Uniform(4096); 692 | if (i < 100) { 693 | len = 65536 + rnd.Uniform(65536); 694 | } 695 | while (x.size() < len) { 696 | int run_len = 1; 697 | if (rnd.OneIn(10)) { 698 | run_len = rnd.Skewed(8); 699 | } 700 | char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3); 701 | while (run_len-- > 0 && x.size() < len) { 702 | x += c; 703 | } 704 | } 705 | 706 | Verify(x); 707 | } 708 | } 709 | 710 | TEST(Snappy, FourByteOffset) { 711 | // The new compressor cannot generate four-byte offsets since 712 | // it chops up the input into 32KB pieces. So we hand-emit the 713 | // copy manually. 714 | 715 | // The two fragments that make up the input string. 716 | string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; 717 | string fragment2 = "some other string"; 718 | 719 | // How many times each fragment is emitted. 720 | const int n1 = 2; 721 | const int n2 = 100000 / fragment2.size(); 722 | const int length = n1 * fragment1.size() + n2 * fragment2.size(); 723 | 724 | string compressed; 725 | Varint::Append32(&compressed, length); 726 | 727 | AppendLiteral(&compressed, fragment1); 728 | string src = fragment1; 729 | for (int i = 0; i < n2; i++) { 730 | AppendLiteral(&compressed, fragment2); 731 | src += fragment2; 732 | } 733 | AppendCopy(&compressed, src.size(), fragment1.size()); 734 | src += fragment1; 735 | CHECK_EQ(length, src.size()); 736 | 737 | string uncompressed; 738 | CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 739 | CHECK(snappy::Uncompress(compressed.data(), compressed.size(), 740 | &uncompressed)); 741 | CHECK_EQ(uncompressed, src); 742 | } 743 | 744 | TEST(Snappy, IOVecEdgeCases) { 745 | // Test some tricky edge cases in the iovec output that are not necessarily 746 | // exercised by random tests. 747 | 748 | // Our output blocks look like this initially (the last iovec is bigger 749 | // than depicted): 750 | // [ ] [ ] [ ] [ ] [ ] 751 | static const int kLengths[] = { 2, 1, 4, 8, 128 }; 752 | 753 | struct iovec iov[ARRAYSIZE(kLengths)]; 754 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 755 | iov[i].iov_base = new char[kLengths[i]]; 756 | iov[i].iov_len = kLengths[i]; 757 | } 758 | 759 | string compressed; 760 | Varint::Append32(&compressed, 22); 761 | 762 | // A literal whose output crosses three blocks. 763 | // [ab] [c] [123 ] [ ] [ ] 764 | AppendLiteral(&compressed, "abc123"); 765 | 766 | // A copy whose output crosses two blocks (source and destination 767 | // segments marked). 768 | // [ab] [c] [1231] [23 ] [ ] 769 | // ^--^ -- 770 | AppendCopy(&compressed, 3, 3); 771 | 772 | // A copy where the input is, at first, in the block before the output: 773 | // 774 | // [ab] [c] [1231] [231231 ] [ ] 775 | // ^--- ^--- 776 | // Then during the copy, the pointers move such that the input and 777 | // output pointers are in the same block: 778 | // 779 | // [ab] [c] [1231] [23123123] [ ] 780 | // ^- ^- 781 | // And then they move again, so that the output pointer is no longer 782 | // in the same block as the input pointer: 783 | // [ab] [c] [1231] [23123123] [123 ] 784 | // ^-- ^-- 785 | AppendCopy(&compressed, 6, 9); 786 | 787 | // Finally, a copy where the input is from several blocks back, 788 | // and it also crosses three blocks: 789 | // 790 | // [ab] [c] [1231] [23123123] [123b ] 791 | // ^ ^ 792 | // [ab] [c] [1231] [23123123] [123bc ] 793 | // ^ ^ 794 | // [ab] [c] [1231] [23123123] [123bc12 ] 795 | // ^- ^- 796 | AppendCopy(&compressed, 17, 4); 797 | 798 | CHECK(snappy::RawUncompressToIOVec( 799 | compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 800 | CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2)); 801 | CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1)); 802 | CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4)); 803 | CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8)); 804 | CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7)); 805 | 806 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 807 | delete[] reinterpret_cast(iov[i].iov_base); 808 | } 809 | } 810 | 811 | TEST(Snappy, IOVecLiteralOverflow) { 812 | static const int kLengths[] = { 3, 4 }; 813 | 814 | struct iovec iov[ARRAYSIZE(kLengths)]; 815 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 816 | iov[i].iov_base = new char[kLengths[i]]; 817 | iov[i].iov_len = kLengths[i]; 818 | } 819 | 820 | string compressed; 821 | Varint::Append32(&compressed, 8); 822 | 823 | AppendLiteral(&compressed, "12345678"); 824 | 825 | CHECK(!snappy::RawUncompressToIOVec( 826 | compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 827 | 828 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 829 | delete[] reinterpret_cast(iov[i].iov_base); 830 | } 831 | } 832 | 833 | TEST(Snappy, IOVecCopyOverflow) { 834 | static const int kLengths[] = { 3, 4 }; 835 | 836 | struct iovec iov[ARRAYSIZE(kLengths)]; 837 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 838 | iov[i].iov_base = new char[kLengths[i]]; 839 | iov[i].iov_len = kLengths[i]; 840 | } 841 | 842 | string compressed; 843 | Varint::Append32(&compressed, 8); 844 | 845 | AppendLiteral(&compressed, "123"); 846 | AppendCopy(&compressed, 3, 5); 847 | 848 | CHECK(!snappy::RawUncompressToIOVec( 849 | compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 850 | 851 | for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 852 | delete[] reinterpret_cast(iov[i].iov_base); 853 | } 854 | } 855 | 856 | static bool CheckUncompressedLength(const string& compressed, 857 | size_t* ulength) { 858 | const bool result1 = snappy::GetUncompressedLength(compressed.data(), 859 | compressed.size(), 860 | ulength); 861 | 862 | snappy::ByteArraySource source(compressed.data(), compressed.size()); 863 | uint32 length; 864 | const bool result2 = snappy::GetUncompressedLength(&source, &length); 865 | CHECK_EQ(result1, result2); 866 | return result1; 867 | } 868 | 869 | TEST(SnappyCorruption, TruncatedVarint) { 870 | string compressed, uncompressed; 871 | size_t ulength; 872 | compressed.push_back('\xf0'); 873 | CHECK(!CheckUncompressedLength(compressed, &ulength)); 874 | CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 875 | CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 876 | &uncompressed)); 877 | } 878 | 879 | TEST(SnappyCorruption, UnterminatedVarint) { 880 | string compressed, uncompressed; 881 | size_t ulength; 882 | compressed.push_back('\x80'); 883 | compressed.push_back('\x80'); 884 | compressed.push_back('\x80'); 885 | compressed.push_back('\x80'); 886 | compressed.push_back('\x80'); 887 | compressed.push_back(10); 888 | CHECK(!CheckUncompressedLength(compressed, &ulength)); 889 | CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 890 | CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 891 | &uncompressed)); 892 | } 893 | 894 | TEST(SnappyCorruption, OverflowingVarint) { 895 | string compressed, uncompressed; 896 | size_t ulength; 897 | compressed.push_back('\xfb'); 898 | compressed.push_back('\xff'); 899 | compressed.push_back('\xff'); 900 | compressed.push_back('\xff'); 901 | compressed.push_back('\x7f'); 902 | CHECK(!CheckUncompressedLength(compressed, &ulength)); 903 | CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 904 | CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 905 | &uncompressed)); 906 | } 907 | 908 | TEST(Snappy, ReadPastEndOfBuffer) { 909 | // Check that we do not read past end of input 910 | 911 | // Make a compressed string that ends with a single-byte literal 912 | string compressed; 913 | Varint::Append32(&compressed, 1); 914 | AppendLiteral(&compressed, "x"); 915 | 916 | string uncompressed; 917 | DataEndingAtUnreadablePage c(compressed); 918 | CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 919 | CHECK_EQ(uncompressed, string("x")); 920 | } 921 | 922 | // Check for an infinite loop caused by a copy with offset==0 923 | TEST(Snappy, ZeroOffsetCopy) { 924 | const char* compressed = "\x40\x12\x00\x00"; 925 | // \x40 Length (must be > kMaxIncrementCopyOverflow) 926 | // \x12\x00\x00 Copy with offset==0, length==5 927 | char uncompressed[100]; 928 | EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed)); 929 | } 930 | 931 | TEST(Snappy, ZeroOffsetCopyValidation) { 932 | const char* compressed = "\x05\x12\x00\x00"; 933 | // \x05 Length 934 | // \x12\x00\x00 Copy with offset==0, length==5 935 | EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4)); 936 | } 937 | 938 | namespace { 939 | 940 | int TestFindMatchLength(const char* s1, const char *s2, unsigned length) { 941 | std::pair p = 942 | snappy::internal::FindMatchLength(s1, s2, s2 + length); 943 | CHECK_EQ(p.first < 8, p.second); 944 | return p.first; 945 | } 946 | 947 | } // namespace 948 | 949 | TEST(Snappy, FindMatchLength) { 950 | // Exercise all different code paths through the function. 951 | // 64-bit version: 952 | 953 | // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop. 954 | EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6)); 955 | EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11)); 956 | 957 | // Hit s1_limit in 64-bit loop, find a non-match in single-character loop. 958 | EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9)); 959 | 960 | // Same, but edge cases. 961 | EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11)); 962 | EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11)); 963 | 964 | // Find non-match at once in first loop. 965 | EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16)); 966 | EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16)); 967 | EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16)); 968 | EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16)); 969 | 970 | // Find non-match in first loop after one block. 971 | EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 972 | "abcdefgh?1234567xxxxxxxx", 24)); 973 | EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 974 | "abcdefgh0?234567xxxxxxxx", 24)); 975 | EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 976 | "abcdefgh01237654xxxxxxxx", 24)); 977 | EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 978 | "abcdefgh0123456?xxxxxxxx", 24)); 979 | 980 | // 32-bit version: 981 | 982 | // Short matches. 983 | EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8)); 984 | EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8)); 985 | EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8)); 986 | EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8)); 987 | EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8)); 988 | EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8)); 989 | EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8)); 990 | EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8)); 991 | EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7)); 992 | EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7)); 993 | 994 | // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop. 995 | EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10)); 996 | EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10)); 997 | EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13)); 998 | 999 | // Same, but edge cases. 1000 | EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12)); 1001 | EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12)); 1002 | 1003 | // Hit s1_limit in 32-bit loop, find a non-match in single-character loop. 1004 | EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13)); 1005 | 1006 | // Find non-match at once in first loop. 1007 | EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1008 | "xxxxxx?123xxxxxxxx", 18)); 1009 | EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1010 | "xxxxxx0?23xxxxxxxx", 18)); 1011 | EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1012 | "xxxxxx0132xxxxxxxx", 18)); 1013 | EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1014 | "xxxxxx012?xxxxxxxx", 18)); 1015 | 1016 | // Same, but edge cases. 1017 | EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10)); 1018 | EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10)); 1019 | EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10)); 1020 | EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10)); 1021 | 1022 | // Find non-match in first loop after one block. 1023 | EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx", 1024 | "xxxxxxabcd?123xx", 16)); 1025 | EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx", 1026 | "xxxxxxabcd0?23xx", 16)); 1027 | EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx", 1028 | "xxxxxxabcd0132xx", 16)); 1029 | EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx", 1030 | "xxxxxxabcd012?xx", 16)); 1031 | 1032 | // Same, but edge cases. 1033 | EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14)); 1034 | EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14)); 1035 | EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14)); 1036 | EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14)); 1037 | } 1038 | 1039 | TEST(Snappy, FindMatchLengthRandom) { 1040 | const int kNumTrials = 10000; 1041 | const int kTypicalLength = 10; 1042 | ACMRandom rnd(FLAGS_test_random_seed); 1043 | 1044 | for (int i = 0; i < kNumTrials; i++) { 1045 | string s, t; 1046 | char a = rnd.Rand8(); 1047 | char b = rnd.Rand8(); 1048 | while (!rnd.OneIn(kTypicalLength)) { 1049 | s.push_back(rnd.OneIn(2) ? a : b); 1050 | t.push_back(rnd.OneIn(2) ? a : b); 1051 | } 1052 | DataEndingAtUnreadablePage u(s); 1053 | DataEndingAtUnreadablePage v(t); 1054 | int matched = TestFindMatchLength(u.data(), v.data(), t.size()); 1055 | if (matched == t.size()) { 1056 | EXPECT_EQ(s, t); 1057 | } else { 1058 | EXPECT_NE(s[matched], t[matched]); 1059 | for (int j = 0; j < matched; j++) { 1060 | EXPECT_EQ(s[j], t[j]); 1061 | } 1062 | } 1063 | } 1064 | } 1065 | 1066 | static uint16 MakeEntry(unsigned int extra, 1067 | unsigned int len, 1068 | unsigned int copy_offset) { 1069 | // Check that all of the fields fit within the allocated space 1070 | assert(extra == (extra & 0x7)); // At most 3 bits 1071 | assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits 1072 | assert(len == (len & 0x7f)); // At most 7 bits 1073 | return len | (copy_offset << 8) | (extra << 11); 1074 | } 1075 | 1076 | // Check that the decompression table is correct, and optionally print out 1077 | // the computed one. 1078 | TEST(Snappy, VerifyCharTable) { 1079 | using snappy::internal::LITERAL; 1080 | using snappy::internal::COPY_1_BYTE_OFFSET; 1081 | using snappy::internal::COPY_2_BYTE_OFFSET; 1082 | using snappy::internal::COPY_4_BYTE_OFFSET; 1083 | using snappy::internal::char_table; 1084 | 1085 | uint16 dst[256]; 1086 | 1087 | // Place invalid entries in all places to detect missing initialization 1088 | int assigned = 0; 1089 | for (int i = 0; i < 256; i++) { 1090 | dst[i] = 0xffff; 1091 | } 1092 | 1093 | // Small LITERAL entries. We store (len-1) in the top 6 bits. 1094 | for (unsigned int len = 1; len <= 60; len++) { 1095 | dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); 1096 | assigned++; 1097 | } 1098 | 1099 | // Large LITERAL entries. We use 60..63 in the high 6 bits to 1100 | // encode the number of bytes of length info that follow the opcode. 1101 | for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { 1102 | // We set the length field in the lookup table to 1 because extra 1103 | // bytes encode len-1. 1104 | dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); 1105 | assigned++; 1106 | } 1107 | 1108 | // COPY_1_BYTE_OFFSET. 1109 | // 1110 | // The tag byte in the compressed data stores len-4 in 3 bits, and 1111 | // offset/256 in 5 bits. offset%256 is stored in the next byte. 1112 | // 1113 | // This format is used for length in range [4..11] and offset in 1114 | // range [0..2047] 1115 | for (unsigned int len = 4; len < 12; len++) { 1116 | for (unsigned int offset = 0; offset < 2048; offset += 256) { 1117 | dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = 1118 | MakeEntry(1, len, offset>>8); 1119 | assigned++; 1120 | } 1121 | } 1122 | 1123 | // COPY_2_BYTE_OFFSET. 1124 | // Tag contains len-1 in top 6 bits, and offset in next two bytes. 1125 | for (unsigned int len = 1; len <= 64; len++) { 1126 | dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); 1127 | assigned++; 1128 | } 1129 | 1130 | // COPY_4_BYTE_OFFSET. 1131 | // Tag contents len-1 in top 6 bits, and offset in next four bytes. 1132 | for (unsigned int len = 1; len <= 64; len++) { 1133 | dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); 1134 | assigned++; 1135 | } 1136 | 1137 | // Check that each entry was initialized exactly once. 1138 | EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256"; 1139 | for (int i = 0; i < 256; i++) { 1140 | EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i; 1141 | } 1142 | 1143 | if (FLAGS_snappy_dump_decompression_table) { 1144 | printf("static const uint16 char_table[256] = {\n "); 1145 | for (int i = 0; i < 256; i++) { 1146 | printf("0x%04x%s", 1147 | dst[i], 1148 | ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); 1149 | } 1150 | printf("};\n"); 1151 | } 1152 | 1153 | // Check that computed table matched recorded table. 1154 | for (int i = 0; i < 256; i++) { 1155 | EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i; 1156 | } 1157 | } 1158 | 1159 | static void CompressFile(const char* fname) { 1160 | string fullinput; 1161 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 1162 | 1163 | string compressed; 1164 | Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); 1165 | 1166 | CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed, 1167 | file::Defaults())); 1168 | } 1169 | 1170 | static void UncompressFile(const char* fname) { 1171 | string fullinput; 1172 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 1173 | 1174 | size_t uncompLength; 1175 | CHECK(CheckUncompressedLength(fullinput, &uncompLength)); 1176 | 1177 | string uncompressed; 1178 | uncompressed.resize(uncompLength); 1179 | CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); 1180 | 1181 | CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed, 1182 | file::Defaults())); 1183 | } 1184 | 1185 | static void MeasureFile(const char* fname) { 1186 | string fullinput; 1187 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 1188 | printf("%-40s :\n", fname); 1189 | 1190 | int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len; 1191 | int end_len = fullinput.size(); 1192 | if (FLAGS_end_len >= 0) { 1193 | end_len = std::min(fullinput.size(), FLAGS_end_len); 1194 | } 1195 | for (int len = start_len; len <= end_len; len++) { 1196 | const char* const input = fullinput.data(); 1197 | int repeats = (FLAGS_bytes + len) / (len + 1); 1198 | if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10); 1199 | if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10); 1200 | if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10); 1201 | 1202 | // For block-size based measurements 1203 | if (0 && FLAGS_snappy) { 1204 | Measure(input, len, SNAPPY, repeats, 8<<10); 1205 | Measure(input, len, SNAPPY, repeats, 16<<10); 1206 | Measure(input, len, SNAPPY, repeats, 32<<10); 1207 | Measure(input, len, SNAPPY, repeats, 64<<10); 1208 | Measure(input, len, SNAPPY, repeats, 256<<10); 1209 | Measure(input, len, SNAPPY, repeats, 1024<<10); 1210 | } 1211 | } 1212 | } 1213 | 1214 | static struct { 1215 | const char* label; 1216 | const char* filename; 1217 | size_t size_limit; 1218 | } files[] = { 1219 | { "html", "html", 0 }, 1220 | { "urls", "urls.10K", 0 }, 1221 | { "jpg", "fireworks.jpeg", 0 }, 1222 | { "jpg_200", "fireworks.jpeg", 200 }, 1223 | { "pdf", "paper-100k.pdf", 0 }, 1224 | { "html4", "html_x_4", 0 }, 1225 | { "txt1", "alice29.txt", 0 }, 1226 | { "txt2", "asyoulik.txt", 0 }, 1227 | { "txt3", "lcet10.txt", 0 }, 1228 | { "txt4", "plrabn12.txt", 0 }, 1229 | { "pb", "geo.protodata", 0 }, 1230 | { "gaviota", "kppkn.gtb", 0 }, 1231 | }; 1232 | 1233 | static void BM_UFlat(int iters, int arg) { 1234 | StopBenchmarkTiming(); 1235 | 1236 | // Pick file to process based on "arg" 1237 | CHECK_GE(arg, 0); 1238 | CHECK_LT(arg, ARRAYSIZE(files)); 1239 | string contents = ReadTestDataFile(files[arg].filename, 1240 | files[arg].size_limit); 1241 | 1242 | string zcontents; 1243 | snappy::Compress(contents.data(), contents.size(), &zcontents); 1244 | char* dst = new char[contents.size()]; 1245 | 1246 | SetBenchmarkBytesProcessed(static_cast(iters) * 1247 | static_cast(contents.size())); 1248 | SetBenchmarkLabel(files[arg].label); 1249 | StartBenchmarkTiming(); 1250 | while (iters-- > 0) { 1251 | CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); 1252 | } 1253 | StopBenchmarkTiming(); 1254 | 1255 | delete[] dst; 1256 | } 1257 | BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1); 1258 | 1259 | static void BM_UValidate(int iters, int arg) { 1260 | StopBenchmarkTiming(); 1261 | 1262 | // Pick file to process based on "arg" 1263 | CHECK_GE(arg, 0); 1264 | CHECK_LT(arg, ARRAYSIZE(files)); 1265 | string contents = ReadTestDataFile(files[arg].filename, 1266 | files[arg].size_limit); 1267 | 1268 | string zcontents; 1269 | snappy::Compress(contents.data(), contents.size(), &zcontents); 1270 | 1271 | SetBenchmarkBytesProcessed(static_cast(iters) * 1272 | static_cast(contents.size())); 1273 | SetBenchmarkLabel(files[arg].label); 1274 | StartBenchmarkTiming(); 1275 | while (iters-- > 0) { 1276 | CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); 1277 | } 1278 | StopBenchmarkTiming(); 1279 | } 1280 | BENCHMARK(BM_UValidate)->DenseRange(0, 4); 1281 | 1282 | static void BM_UIOVec(int iters, int arg) { 1283 | StopBenchmarkTiming(); 1284 | 1285 | // Pick file to process based on "arg" 1286 | CHECK_GE(arg, 0); 1287 | CHECK_LT(arg, ARRAYSIZE(files)); 1288 | string contents = ReadTestDataFile(files[arg].filename, 1289 | files[arg].size_limit); 1290 | 1291 | string zcontents; 1292 | snappy::Compress(contents.data(), contents.size(), &zcontents); 1293 | 1294 | // Uncompress into an iovec containing ten entries. 1295 | const int kNumEntries = 10; 1296 | struct iovec iov[kNumEntries]; 1297 | char *dst = new char[contents.size()]; 1298 | int used_so_far = 0; 1299 | for (int i = 0; i < kNumEntries; ++i) { 1300 | iov[i].iov_base = dst + used_so_far; 1301 | if (used_so_far == contents.size()) { 1302 | iov[i].iov_len = 0; 1303 | continue; 1304 | } 1305 | 1306 | if (i == kNumEntries - 1) { 1307 | iov[i].iov_len = contents.size() - used_so_far; 1308 | } else { 1309 | iov[i].iov_len = contents.size() / kNumEntries; 1310 | } 1311 | used_so_far += iov[i].iov_len; 1312 | } 1313 | 1314 | SetBenchmarkBytesProcessed(static_cast(iters) * 1315 | static_cast(contents.size())); 1316 | SetBenchmarkLabel(files[arg].label); 1317 | StartBenchmarkTiming(); 1318 | while (iters-- > 0) { 1319 | CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, 1320 | kNumEntries)); 1321 | } 1322 | StopBenchmarkTiming(); 1323 | 1324 | delete[] dst; 1325 | } 1326 | BENCHMARK(BM_UIOVec)->DenseRange(0, 4); 1327 | 1328 | static void BM_UFlatSink(int iters, int arg) { 1329 | StopBenchmarkTiming(); 1330 | 1331 | // Pick file to process based on "arg" 1332 | CHECK_GE(arg, 0); 1333 | CHECK_LT(arg, ARRAYSIZE(files)); 1334 | string contents = ReadTestDataFile(files[arg].filename, 1335 | files[arg].size_limit); 1336 | 1337 | string zcontents; 1338 | snappy::Compress(contents.data(), contents.size(), &zcontents); 1339 | char* dst = new char[contents.size()]; 1340 | 1341 | SetBenchmarkBytesProcessed(static_cast(iters) * 1342 | static_cast(contents.size())); 1343 | SetBenchmarkLabel(files[arg].label); 1344 | StartBenchmarkTiming(); 1345 | while (iters-- > 0) { 1346 | snappy::ByteArraySource source(zcontents.data(), zcontents.size()); 1347 | snappy::UncheckedByteArraySink sink(dst); 1348 | CHECK(snappy::Uncompress(&source, &sink)); 1349 | } 1350 | StopBenchmarkTiming(); 1351 | 1352 | string s(dst, contents.size()); 1353 | CHECK_EQ(contents, s); 1354 | 1355 | delete[] dst; 1356 | } 1357 | 1358 | BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1); 1359 | 1360 | static void BM_ZFlat(int iters, int arg) { 1361 | StopBenchmarkTiming(); 1362 | 1363 | // Pick file to process based on "arg" 1364 | CHECK_GE(arg, 0); 1365 | CHECK_LT(arg, ARRAYSIZE(files)); 1366 | string contents = ReadTestDataFile(files[arg].filename, 1367 | files[arg].size_limit); 1368 | 1369 | char* dst = new char[snappy::MaxCompressedLength(contents.size())]; 1370 | 1371 | SetBenchmarkBytesProcessed(static_cast(iters) * 1372 | static_cast(contents.size())); 1373 | StartBenchmarkTiming(); 1374 | 1375 | size_t zsize = 0; 1376 | while (iters-- > 0) { 1377 | snappy::RawCompress(contents.data(), contents.size(), dst, &zsize); 1378 | } 1379 | StopBenchmarkTiming(); 1380 | const double compression_ratio = 1381 | static_cast(zsize) / std::max(1, contents.size()); 1382 | SetBenchmarkLabel(StringPrintf("%s (%.2f %%)", 1383 | files[arg].label, 100.0 * compression_ratio)); 1384 | VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes", 1385 | files[arg].label, contents.size(), zsize); 1386 | delete[] dst; 1387 | } 1388 | BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1); 1389 | 1390 | } // namespace snappy 1391 | 1392 | int main(int argc, char** argv) { 1393 | InitGoogle(argv[0], &argc, &argv, true); 1394 | RunSpecifiedBenchmarks(); 1395 | 1396 | if (argc >= 2) { 1397 | for (int arg = 1; arg < argc; arg++) { 1398 | if (FLAGS_write_compressed) { 1399 | snappy::CompressFile(argv[arg]); 1400 | } else if (FLAGS_write_uncompressed) { 1401 | snappy::UncompressFile(argv[arg]); 1402 | } else { 1403 | snappy::MeasureFile(argv[arg]); 1404 | } 1405 | } 1406 | return 0; 1407 | } 1408 | 1409 | return RUN_ALL_TESTS(); 1410 | } 1411 | --------------------------------------------------------------------------------