├── .bazelrc ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── BUILD.bazel ├── CMakeLists.txt ├── CONTRIBUTING.md ├── COPYING ├── MODULE.bazel ├── NEWS ├── README.md ├── WORKSPACE ├── WORKSPACE.bzlmod ├── cmake ├── SnappyConfig.cmake.in └── config.h.in ├── docs └── README.md ├── format_description.txt ├── framing_format.txt ├── snappy-c.cc ├── snappy-c.h ├── snappy-internal.h ├── snappy-sinksource.cc ├── snappy-sinksource.h ├── snappy-stubs-internal.cc ├── snappy-stubs-internal.h ├── snappy-stubs-public.h.in ├── snappy-test.cc ├── snappy-test.h ├── snappy.cc ├── snappy.h ├── snappy_benchmark.cc ├── snappy_compress_fuzzer.cc ├── snappy_test_data.cc ├── snappy_test_data.h ├── snappy_test_tool.cc ├── snappy_uncompress_fuzzer.cc ├── snappy_unittest.cc └── testdata ├── alice29.txt ├── asyoulik.txt ├── baddata1.snappy ├── baddata2.snappy ├── baddata3.snappy ├── fireworks.jpeg ├── geo.protodata ├── html ├── html_x_4 ├── kppkn.gtb ├── lcet10.txt ├── paper-100k.pdf ├── plrabn12.txt └── urls.10K /.bazelrc: -------------------------------------------------------------------------------- 1 | # googletest requires C++14 or above 2 | build --cxxopt='-std=c++17' 3 | # Enable Bzlmod for every Bazel command 4 | common --enable_bzlmod 5 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are 5 | # met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above 10 | # copyright notice, this list of conditions and the following disclaimer 11 | # in the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Google Inc. nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | name: ci 30 | on: [push, pull_request] 31 | 32 | permissions: 33 | contents: read 34 | 35 | jobs: 36 | build-and-test: 37 | name: >- 38 | CI 39 | ${{ matrix.os }} 40 | ${{ matrix.cpu_level }} 41 | ${{ matrix.compiler }} 42 | ${{ matrix.optimized && 'release' || 'debug' }} 43 | runs-on: ${{ matrix.os }} 44 | strategy: 45 | fail-fast: false 46 | matrix: 47 | compiler: [clang, gcc, msvc] 48 | os: [ubuntu-latest, macos-latest, windows-latest] 49 | cpu_level: [baseline, avx, avx2] 50 | optimized: [true, false] 51 | exclude: 52 | # MSVC only works on Windows. 53 | - os: ubuntu-latest 54 | compiler: msvc 55 | - os: macos-latest 56 | compiler: msvc 57 | # GitHub servers seem to run on pre-Haswell CPUs. Attempting to use AVX2 58 | # results in crashes. 59 | - os: macos-latest 60 | cpu_level: avx2 61 | # Not testing with GCC on macOS. 62 | - os: macos-latest 63 | compiler: gcc 64 | # Only testing with MSVC on Windows. 65 | - os: windows-latest 66 | compiler: clang 67 | - os: windows-latest 68 | compiler: gcc 69 | include: 70 | - compiler: clang 71 | CC: clang 72 | CXX: clang++ 73 | - compiler: gcc 74 | CC: gcc 75 | CXX: g++ 76 | - compiler: msvc 77 | CC: 78 | CXX: 79 | 80 | env: 81 | CMAKE_BUILD_DIR: ${{ github.workspace }}/build 82 | CMAKE_BUILD_TYPE: ${{ matrix.optimized && 'RelWithDebInfo' || 'Debug' }} 83 | CC: ${{ matrix.CC }} 84 | CXX: ${{ matrix.CXX }} 85 | SNAPPY_REQUIRE_AVX: ${{ matrix.cpu_level == 'baseline' && '0' || '1' }} 86 | SNAPPY_REQUIRE_AVX2: ${{ matrix.cpu_level == 'avx2' && '1' || '0' }} 87 | SNAPPY_FUZZING_BUILD: >- 88 | ${{ (startsWith(matrix.os, 'ubuntu') && matrix.compiler == 'clang' && 89 | !matrix.optimized) && '1' || '0' }} 90 | BINARY_SUFFIX: ${{ startsWith(matrix.os, 'windows') && '.exe' || '' }} 91 | BINARY_PATH: >- 92 | ${{ format( 93 | startsWith(matrix.os, 'windows') && '{0}\build\{1}\' || '{0}/build/', 94 | github.workspace, 95 | matrix.optimized && 'RelWithDebInfo' || 'Debug') }} 96 | 97 | steps: 98 | - uses: actions/checkout@v2 99 | with: 100 | submodules: true 101 | 102 | - name: Generate build config 103 | run: >- 104 | cmake -S "${{ github.workspace }}" -B "${{ env.CMAKE_BUILD_DIR }}" 105 | -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }} 106 | -DCMAKE_INSTALL_PREFIX=${{ runner.temp }}/install_test/ 107 | -DSNAPPY_FUZZING_BUILD=${{ env.SNAPPY_FUZZING_BUILD }} 108 | -DSNAPPY_REQUIRE_AVX=${{ env.SNAPPY_REQUIRE_AVX }} 109 | -DSNAPPY_REQUIRE_AVX2=${{ env.SNAPPY_REQUIRE_AVX2 }} 110 | 111 | - name: Build 112 | run: >- 113 | cmake --build "${{ env.CMAKE_BUILD_DIR }}" 114 | --config "${{ env.CMAKE_BUILD_TYPE }}" 115 | 116 | - name: Run C++ API Tests 117 | run: ${{ env.BINARY_PATH }}snappy_unittest${{ env.BINARY_SUFFIX }} 118 | 119 | - name: Run Compression Fuzzer 120 | if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }} 121 | run: >- 122 | ${{ env.BINARY_PATH }}snappy_compress_fuzzer${{ env.BINARY_SUFFIX }} 123 | -runs=1000 -close_fd_mask=3 124 | 125 | - name: Run Decompression Fuzzer 126 | if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }} 127 | run: >- 128 | ${{ env.BINARY_PATH }}snappy_uncompress_fuzzer${{ env.BINARY_SUFFIX }} 129 | -runs=1000 -close_fd_mask=3 130 | 131 | - name: Run Benchmarks 132 | run: ${{ env.BINARY_PATH }}snappy_benchmark${{ env.BINARY_SUFFIX }} 133 | 134 | - name: Test CMake installation 135 | run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --target install 136 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editors. 2 | *.sw* 3 | .vscode 4 | .DS_Store 5 | 6 | # Build directory. 7 | build/ 8 | /bazel-* 9 | MODULE.bazel.lock 10 | out/ 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/benchmark"] 2 | path = third_party/benchmark 3 | url = https://github.com/google/benchmark.git 4 | [submodule "third_party/googletest"] 5 | path = third_party/googletest 6 | url = https://github.com/google/googletest.git 7 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | opensource@google.com 2 | -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google Inc. All Rights Reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are 5 | # met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above 10 | # copyright notice, this list of conditions and the following disclaimer 11 | # in the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Google Inc. nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | package(default_visibility = ["//visibility:public"]) 30 | 31 | licenses(["notice"]) 32 | 33 | SNAPPY_VERSION = (1, 2, 2) 34 | 35 | config_setting( 36 | name = "windows", 37 | constraint_values = ["@platforms//os:windows"], 38 | ) 39 | 40 | cc_library( 41 | name = "config", 42 | hdrs = ["config.h"], 43 | defines = ["HAVE_CONFIG_H"], 44 | ) 45 | 46 | cc_library( 47 | name = "snappy-stubs-public", 48 | hdrs = [":snappy-stubs-public.h"], 49 | ) 50 | 51 | cc_library( 52 | name = "snappy-stubs-internal", 53 | srcs = ["snappy-stubs-internal.cc"], 54 | hdrs = ["snappy-stubs-internal.h"], 55 | deps = [ 56 | ":config", 57 | ":snappy-stubs-public", 58 | ], 59 | ) 60 | 61 | cc_library( 62 | name = "snappy", 63 | srcs = [ 64 | "snappy.cc", 65 | "snappy-internal.h", 66 | "snappy-sinksource.cc", 67 | ], 68 | hdrs = [ 69 | "snappy.h", 70 | "snappy-sinksource.h", 71 | ], 72 | copts = select({ 73 | ":windows": [], 74 | "//conditions:default": [ 75 | "-Wno-sign-compare", 76 | ], 77 | }), 78 | deps = [ 79 | ":config", 80 | ":snappy-stubs-internal", 81 | ":snappy-stubs-public", 82 | ], 83 | ) 84 | 85 | cc_library( 86 | name = "snappy-c", 87 | srcs = ["snappy-c.cc"], 88 | hdrs = ["snappy-c.h"], 89 | deps = [":snappy"], 90 | ) 91 | 92 | filegroup( 93 | name = "testdata", 94 | srcs = glob(["testdata/*"]), 95 | ) 96 | 97 | cc_library( 98 | name = "snappy-test", 99 | testonly = True, 100 | srcs = [ 101 | "snappy-test.cc", 102 | "snappy_test_data.cc", 103 | ], 104 | hdrs = [ 105 | "snappy-test.h", 106 | "snappy_test_data.h", 107 | ], 108 | deps = [":snappy-stubs-internal"], 109 | ) 110 | 111 | cc_test( 112 | name = "snappy_benchmark", 113 | srcs = ["snappy_benchmark.cc"], 114 | data = [":testdata"], 115 | deps = [ 116 | ":snappy", 117 | ":snappy-test", 118 | "@com_google_benchmark//:benchmark_main", 119 | ], 120 | ) 121 | 122 | cc_test( 123 | name = "snappy_unittest", 124 | srcs = [ 125 | "snappy_unittest.cc", 126 | ], 127 | data = [":testdata"], 128 | deps = [ 129 | ":snappy", 130 | ":snappy-test", 131 | "@com_google_googletest//:gtest_main", 132 | ], 133 | ) 134 | 135 | # Generate a config.h similar to what cmake would produce. 136 | genrule( 137 | name = "config_h", 138 | outs = ["config.h"], 139 | cmd = """cat <$@ 140 | #define HAVE_STDDEF_H 1 141 | #define HAVE_STDINT_H 1 142 | #ifdef __has_builtin 143 | # if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect) 144 | # define HAVE_BUILTIN_EXPECT 1 145 | # endif 146 | # if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll) 147 | # define HAVE_BUILTIN_CTZ 1 148 | # endif 149 | # if !defined(HAVE_BUILTIN_PREFETCH) && __has_builtin(__builtin_prefetech) 150 | # define HAVE_BUILTIN_PREFETCH 1 151 | # endif 152 | #elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4) 153 | # ifndef HAVE_BUILTIN_EXPECT 154 | # define HAVE_BUILTIN_EXPECT 1 155 | # endif 156 | # ifndef HAVE_BUILTIN_CTZ 157 | # define HAVE_BUILTIN_CTZ 1 158 | # endif 159 | # ifndef HAVE_BUILTIN_PREFETCH 160 | # define HAVE_BUILTIN_PREFETCH 1 161 | # endif 162 | #endif 163 | 164 | #if defined(_WIN32) && !defined(HAVE_WINDOWS_H) 165 | #define HAVE_WINDOWS_H 1 166 | #endif 167 | 168 | #ifdef __has_include 169 | # if !defined(HAVE_BYTESWAP_H) && __has_include() 170 | # define HAVE_BYTESWAP_H 1 171 | # endif 172 | # if !defined(HAVE_UNISTD_H) && __has_include() 173 | # define HAVE_UNISTD_H 1 174 | # endif 175 | # if !defined(HAVE_SYS_ENDIAN_H) && __has_include() 176 | # define HAVE_SYS_ENDIAN_H 1 177 | # endif 178 | # if !defined(HAVE_SYS_MMAN_H) && __has_include() 179 | # define HAVE_SYS_MMAN_H 1 180 | # endif 181 | # if !defined(HAVE_SYS_UIO_H) && __has_include() 182 | # define HAVE_SYS_UIO_H 1 183 | # endif 184 | # if !defined(HAVE_SYS_TIME_H) && __has_include() 185 | # define HAVE_SYS_TIME_H 1 186 | # endif 187 | #endif 188 | 189 | #ifndef SNAPPY_IS_BIG_ENDIAN 190 | # ifdef __s390x__ 191 | # define SNAPPY_IS_BIG_ENDIAN 1 192 | # elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 193 | # define SNAPPY_IS_BIG_ENDIAN 1 194 | # endif 195 | #endif 196 | EOF 197 | """, 198 | ) 199 | 200 | genrule( 201 | name = "snappy_stubs_public_h", 202 | srcs = ["snappy-stubs-public.h.in"], 203 | outs = ["snappy-stubs-public.h"], 204 | # Assume sys/uio.h is available on non-Windows. 205 | # Set the version numbers. 206 | cmd = ("""sed -e 's/$${HAVE_SYS_UIO_H_01}/!_WIN32/g' \ 207 | -e 's/$${PROJECT_VERSION_MAJOR}/%d/g' \ 208 | -e 's/$${PROJECT_VERSION_MINOR}/%d/g' \ 209 | -e 's/$${PROJECT_VERSION_PATCH}/%d/g' \ 210 | $< >$@""" % SNAPPY_VERSION), 211 | ) 212 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are 5 | # met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above 10 | # copyright notice, this list of conditions and the following disclaimer 11 | # in the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Google Inc. nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | cmake_minimum_required(VERSION 3.10) 30 | project(Snappy VERSION 1.2.2 LANGUAGES C CXX) 31 | 32 | # C++ standard can be overridden when this is used as a sub-project. 33 | if(NOT CMAKE_CXX_STANDARD) 34 | # This project requires C++11. 35 | set(CMAKE_CXX_STANDARD 11) 36 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 37 | set(CMAKE_CXX_EXTENSIONS OFF) 38 | endif(NOT CMAKE_CXX_STANDARD) 39 | 40 | # https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake 41 | if(MSVC) 42 | # Use the highest warning level for Visual Studio. 43 | set(CMAKE_CXX_WARNING_LEVEL 4) 44 | if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") 45 | string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 46 | else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") 47 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") 48 | endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") 49 | 50 | # Disable C++ exceptions. 51 | string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 52 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-") 53 | add_definitions(-D_HAS_EXCEPTIONS=0) 54 | 55 | # Disable RTTI. 56 | string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 57 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") 58 | else(MSVC) 59 | # Use -Wall for clang and gcc. 60 | if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") 61 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") 62 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") 63 | 64 | # Use -Wextra for clang and gcc. 65 | if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") 66 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") 67 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") 68 | 69 | # Use -Werror for clang only. 70 | if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") 71 | if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") 72 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") 73 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") 74 | endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") 75 | 76 | # Disable sign comparison warnings. Matches upcoming Bazel setup. 77 | if(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare") 78 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") 79 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare") 80 | 81 | # Disable C++ exceptions. 82 | string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 83 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") 84 | 85 | # Disable RTTI. 86 | string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 87 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 88 | endif(MSVC) 89 | 90 | # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make 91 | # it prominent in the GUI. 92 | option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) 93 | 94 | option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) 95 | 96 | option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" ON) 97 | 98 | option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF) 99 | 100 | option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) 101 | 102 | option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) 103 | 104 | option(SNAPPY_INSTALL "Install Snappy's header and library" ON) 105 | 106 | include(TestBigEndian) 107 | test_big_endian(SNAPPY_IS_BIG_ENDIAN) 108 | 109 | include(CheckIncludeFile) 110 | check_include_file("sys/mman.h" HAVE_SYS_MMAN_H) 111 | check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) 112 | check_include_file("sys/time.h" HAVE_SYS_TIME_H) 113 | check_include_file("sys/uio.h" HAVE_SYS_UIO_H) 114 | check_include_file("unistd.h" HAVE_UNISTD_H) 115 | check_include_file("windows.h" HAVE_WINDOWS_H) 116 | 117 | include(CheckLibraryExists) 118 | check_library_exists(z zlibVersion "" HAVE_LIBZ) 119 | check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) 120 | check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4) 121 | 122 | include(CheckCXXCompilerFlag) 123 | CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) 124 | CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) 125 | CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) 126 | CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) 127 | if(SNAPPY_REQUIRE_AVX2) 128 | if(HAVE_VISUAL_STUDIO_ARCH_AVX2) 129 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") 130 | endif(HAVE_VISUAL_STUDIO_ARCH_AVX2) 131 | if(HAVE_CLANG_MAVX) 132 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") 133 | endif(HAVE_CLANG_MAVX) 134 | if(HAVE_CLANG_MBMI2) 135 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2") 136 | endif(HAVE_CLANG_MBMI2) 137 | elseif (SNAPPY_REQUIRE_AVX) 138 | if(HAVE_VISUAL_STUDIO_ARCH_AVX) 139 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") 140 | endif(HAVE_VISUAL_STUDIO_ARCH_AVX) 141 | if(HAVE_CLANG_MAVX) 142 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") 143 | endif(HAVE_CLANG_MAVX) 144 | endif(SNAPPY_REQUIRE_AVX2) 145 | 146 | # Used by googletest. 147 | check_cxx_compiler_flag(-Wno-missing-field-initializers 148 | SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) 149 | check_cxx_compiler_flag(-Wno-implicit-int-float-conversion 150 | SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) 151 | 152 | include(CheckCXXSourceCompiles) 153 | check_cxx_source_compiles(" 154 | int main() { 155 | return __builtin_expect(0, 1); 156 | }" HAVE_BUILTIN_EXPECT) 157 | 158 | check_cxx_source_compiles(" 159 | int main() { 160 | return __builtin_ctzll(0); 161 | }" HAVE_BUILTIN_CTZ) 162 | 163 | check_cxx_source_compiles(" 164 | int main() { 165 | __builtin_prefetch(0, 0, 3); 166 | return 0; 167 | }" HAVE_BUILTIN_PREFETCH) 168 | 169 | check_cxx_source_compiles(" 170 | __attribute__((always_inline)) int zero() { return 0; } 171 | 172 | int main() { 173 | return zero(); 174 | }" HAVE_ATTRIBUTE_ALWAYS_INLINE) 175 | 176 | check_cxx_source_compiles(" 177 | #include 178 | 179 | int main() { 180 | const __m128i *src = 0; 181 | __m128i dest; 182 | const __m128i shuffle_mask = _mm_load_si128(src); 183 | const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask); 184 | _mm_storeu_si128(&dest, pattern); 185 | return 0; 186 | }" SNAPPY_HAVE_SSSE3) 187 | 188 | check_cxx_source_compiles(" 189 | #include 190 | int main() { 191 | return _mm_crc32_u32(0, 1); 192 | }" SNAPPY_HAVE_X86_CRC32) 193 | 194 | check_cxx_source_compiles(" 195 | #include 196 | #include 197 | int main() { 198 | return __crc32cw(0, 1); 199 | }" SNAPPY_HAVE_NEON_CRC32) 200 | 201 | check_cxx_source_compiles(" 202 | #include 203 | int main() { 204 | return _bzhi_u32(0, 1); 205 | }" SNAPPY_HAVE_BMI2) 206 | 207 | check_cxx_source_compiles(" 208 | #include 209 | #include 210 | int main() { 211 | uint8_t val = 3, dup[8]; 212 | uint8x16_t v1 = vld1q_dup_u8(&val); 213 | uint8x16_t v2 = vqtbl1q_u8(v1, v1); 214 | vst1q_u8(dup, v1); 215 | vst1q_u8(dup, v2); 216 | return 0; 217 | }" SNAPPY_HAVE_NEON) 218 | 219 | include(CheckSymbolExists) 220 | check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) 221 | check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) 222 | 223 | configure_file( 224 | "cmake/config.h.in" 225 | "${PROJECT_BINARY_DIR}/config.h" 226 | ) 227 | 228 | # We don't want to define HAVE_ macros in public headers. Instead, we use 229 | # CMake's variable substitution with 0/1 variables, which will be seen by the 230 | # preprocessor as constants. 231 | set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H}) 232 | if(NOT HAVE_SYS_UIO_H_01) 233 | set(HAVE_SYS_UIO_H_01 0) 234 | endif(NOT HAVE_SYS_UIO_H_01) 235 | 236 | if (SNAPPY_FUZZING_BUILD) 237 | if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 238 | message(WARNING "Fuzzing builds are only supported with Clang") 239 | endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 240 | 241 | if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") 242 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") 243 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") 244 | 245 | if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") 246 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link") 247 | endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") 248 | endif (SNAPPY_FUZZING_BUILD) 249 | 250 | configure_file( 251 | "snappy-stubs-public.h.in" 252 | "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") 253 | 254 | add_library(snappy "") 255 | target_sources(snappy 256 | PRIVATE 257 | "snappy-internal.h" 258 | "snappy-stubs-internal.h" 259 | "snappy-c.cc" 260 | "snappy-sinksource.cc" 261 | "snappy-stubs-internal.cc" 262 | "snappy.cc" 263 | "${PROJECT_BINARY_DIR}/config.h" 264 | PUBLIC 265 | $ 266 | $ 267 | $ 268 | $ 269 | $ 270 | $ 271 | $ 272 | $ 273 | ) 274 | target_include_directories(snappy 275 | PUBLIC 276 | $ 277 | $ 278 | $ 279 | ) 280 | set_target_properties(snappy 281 | PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) 282 | 283 | target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) 284 | if(BUILD_SHARED_LIBS) 285 | set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) 286 | endif(BUILD_SHARED_LIBS) 287 | 288 | if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) 289 | add_library(snappy_test_support "") 290 | target_sources(snappy_test_support 291 | PRIVATE 292 | "snappy-test.cc" 293 | "snappy-test.h" 294 | "snappy_test_data.cc" 295 | "snappy_test_data.h" 296 | "${PROJECT_BINARY_DIR}/config.h" 297 | ) 298 | 299 | # Test files include snappy-test.h, HAVE_CONFIG_H must be defined. 300 | target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H) 301 | if(BUILD_SHARED_LIBS) 302 | set_target_properties(snappy_test_support PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) 303 | endif(BUILD_SHARED_LIBS) 304 | 305 | target_link_libraries(snappy_test_support snappy) 306 | 307 | if(HAVE_LIBZ) 308 | target_link_libraries(snappy_test_support z) 309 | endif(HAVE_LIBZ) 310 | if(HAVE_LIBLZO2) 311 | target_link_libraries(snappy_test_support lzo2) 312 | endif(HAVE_LIBLZO2) 313 | if(HAVE_LIBLZ4) 314 | target_link_libraries(snappy_test_support lz4) 315 | endif(HAVE_LIBLZ4) 316 | 317 | target_include_directories(snappy_test_support 318 | BEFORE PUBLIC 319 | "${PROJECT_SOURCE_DIR}" 320 | ) 321 | endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) 322 | 323 | if(SNAPPY_BUILD_TESTS) 324 | enable_testing() 325 | 326 | # Prevent overriding the parent project's compiler/linker settings on Windows. 327 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 328 | set(install_gtest OFF) 329 | set(install_gmock OFF) 330 | set(build_gmock ON) 331 | 332 | # This project is tested using GoogleTest. 333 | add_subdirectory("third_party/googletest") 334 | 335 | # GoogleTest triggers a missing field initializers warning. 336 | if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) 337 | set_property(TARGET gtest 338 | APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) 339 | set_property(TARGET gmock 340 | APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) 341 | endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) 342 | 343 | if(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) 344 | set_property(TARGET gtest 345 | APPEND PROPERTY COMPILE_OPTIONS -Wno-implicit-int-float-conversion) 346 | endif(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) 347 | 348 | add_executable(snappy_unittest "") 349 | target_sources(snappy_unittest 350 | PRIVATE 351 | "snappy_unittest.cc" 352 | ) 353 | target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest) 354 | 355 | add_test( 356 | NAME snappy_unittest 357 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 358 | COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest") 359 | 360 | add_executable(snappy_test_tool "") 361 | target_sources(snappy_test_tool 362 | PRIVATE 363 | "snappy_test_tool.cc" 364 | ) 365 | target_link_libraries(snappy_test_tool snappy_test_support) 366 | endif(SNAPPY_BUILD_TESTS) 367 | 368 | if(SNAPPY_BUILD_BENCHMARKS) 369 | add_executable(snappy_benchmark "") 370 | target_sources(snappy_benchmark 371 | PRIVATE 372 | "snappy_benchmark.cc" 373 | ) 374 | target_link_libraries(snappy_benchmark snappy_test_support benchmark_main) 375 | 376 | # This project uses Google benchmark for benchmarking. 377 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) 378 | set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) 379 | add_subdirectory("third_party/benchmark") 380 | endif(SNAPPY_BUILD_BENCHMARKS) 381 | 382 | if(SNAPPY_FUZZING_BUILD) 383 | add_executable(snappy_compress_fuzzer "") 384 | target_sources(snappy_compress_fuzzer 385 | PRIVATE "snappy_compress_fuzzer.cc" 386 | ) 387 | target_link_libraries(snappy_compress_fuzzer snappy) 388 | set_target_properties(snappy_compress_fuzzer 389 | PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" 390 | ) 391 | 392 | add_executable(snappy_uncompress_fuzzer "") 393 | target_sources(snappy_uncompress_fuzzer 394 | PRIVATE "snappy_uncompress_fuzzer.cc" 395 | ) 396 | target_link_libraries(snappy_uncompress_fuzzer snappy) 397 | set_target_properties(snappy_uncompress_fuzzer 398 | PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" 399 | ) 400 | endif(SNAPPY_FUZZING_BUILD) 401 | 402 | # Must be included before CMAKE_INSTALL_INCLUDEDIR is used. 403 | include(GNUInstallDirs) 404 | 405 | if(SNAPPY_INSTALL) 406 | install(TARGETS snappy 407 | EXPORT SnappyTargets 408 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 409 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 410 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 411 | ) 412 | install( 413 | FILES 414 | "snappy-c.h" 415 | "snappy-sinksource.h" 416 | "snappy.h" 417 | "${PROJECT_BINARY_DIR}/snappy-stubs-public.h" 418 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" 419 | ) 420 | 421 | include(CMakePackageConfigHelpers) 422 | configure_package_config_file( 423 | "cmake/${PROJECT_NAME}Config.cmake.in" 424 | "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" 425 | INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" 426 | ) 427 | write_basic_package_version_file( 428 | "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" 429 | COMPATIBILITY SameMajorVersion 430 | ) 431 | install( 432 | EXPORT SnappyTargets 433 | NAMESPACE Snappy:: 434 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" 435 | ) 436 | install( 437 | FILES 438 | "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" 439 | "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" 440 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" 441 | ) 442 | endif(SNAPPY_INSTALL) 443 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | See [the README](README.md#contributing-to-the-snappy-project) for areas 26 | where we are likely to accept external contributions. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows [Google's Open Source Community 31 | Guidelines](https://opensource.google/conduct/). 32 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2011, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | === 31 | 32 | Some of the benchmark data in testdata/ is licensed differently: 33 | 34 | - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and 35 | is licensed under the Creative Commons Attribution 3.0 license 36 | (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ 37 | for more information. 38 | 39 | - kppkn.gtb is taken from the Gaviota chess tablebase set, and 40 | is licensed under the MIT License. See 41 | https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 42 | for more information. 43 | 44 | - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper 45 | “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA 46 | Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, 47 | which is licensed under the CC-BY license. See 48 | http://www.ploscompbiol.org/static/license for more ifnormation. 49 | 50 | - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project 51 | Gutenberg. The first three have expired copyrights and are in the public 52 | domain; the latter does not have expired copyright, but is still in the 53 | public domain according to the license information 54 | (http://www.gutenberg.org/ebooks/53). 55 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "snappy", 3 | version = "1.2.2", 4 | compatibility_level = 1, 5 | ) 6 | 7 | bazel_dep( 8 | name = "googletest", 9 | version = "1.14.0.bcr.1", 10 | dev_dependency = True, 11 | repo_name = "com_google_googletest", 12 | ) 13 | bazel_dep( 14 | name = "google_benchmark", 15 | version = "1.9.0", 16 | dev_dependency = True, 17 | repo_name = "com_google_benchmark", 18 | ) 19 | 20 | bazel_dep( 21 | name = "platforms", 22 | version = "0.0.9", 23 | ) 24 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Snappy v1.2.2, Mar 26th 2025: 2 | 3 | * We added a new compression level in v1.2.1 which compresses a bit 4 | denser but slower. Decompression speed should be even faster with it. 5 | 6 | * We fixed a very old issue of data corruption when compressed size 7 | exceeds 4GB. This can happen when you compress data close to 4GB 8 | and it's incompressible, for example, random data. 9 | 10 | * Started to use minimum CMake 3.10 because older ones are not 11 | planned to be supported. 12 | 13 | * Various other small fixes and performance improvements (especially 14 | for clang). 15 | 16 | Snappy v1.1.10, Mar 8th 2023: 17 | 18 | * Performance improvements 19 | 20 | * Compilation fixes for various environments 21 | 22 | Snappy v1.1.9, May 4th 2021: 23 | 24 | * Performance improvements. 25 | 26 | * Google Test and Google Benchmark are now bundled in third_party/. 27 | 28 | Snappy v1.1.8, January 15th 2020: 29 | 30 | * Small performance improvements. 31 | 32 | * Removed snappy::string alias for std::string. 33 | 34 | * Improved CMake configuration. 35 | 36 | Snappy v1.1.7, August 24th 2017: 37 | 38 | * Improved CMake build support for 64-bit Linux distributions. 39 | 40 | * MSVC builds now use MSVC-specific intrinsics that map to clzll. 41 | 42 | * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors. 43 | 44 | Snappy v1.1.6, July 12th 2017: 45 | 46 | This is a re-release of v1.1.5 with proper SONAME / SOVERSION values. 47 | 48 | Snappy v1.1.5, June 28th 2017: 49 | 50 | This release has broken SONAME / SOVERSION values. Users of snappy as a shared 51 | library should avoid 1.1.5 and use 1.1.6 instead. SONAME / SOVERSION errors will 52 | manifest as the dynamic library loader complaining that it cannot find snappy's 53 | shared library file (libsnappy.so / libsnappy.dylib), or that the library it 54 | found does not have the required version. 1.1.6 has the same code as 1.1.5, but 55 | carries build configuration fixes for the issues above. 56 | 57 | * Add CMake build support. The autoconf build support is now deprecated, and 58 | will be removed in the next release. 59 | 60 | * Add AppVeyor configuration, for Windows CI coverage. 61 | 62 | * Small performance improvement on little-endian PowerPC. 63 | 64 | * Small performance improvement on LLVM with position-independent executables. 65 | 66 | * Fix a few issues with various build environments. 67 | 68 | Snappy v1.1.4, January 25th 2017: 69 | 70 | * Fix a 1% performance regression when snappy is used in PIE executables. 71 | 72 | * Improve compression performance by 5%. 73 | 74 | * Improve decompression performance by 20%. 75 | 76 | Snappy v1.1.3, July 6th 2015: 77 | 78 | This is the first release to be done from GitHub, which means that 79 | some minor things like the ChangeLog format has changed (git log 80 | format instead of svn log). 81 | 82 | * Add support for Uncompress() from a Source to a Sink. 83 | 84 | * Various minor changes to improve MSVC support; in particular, 85 | the unit tests now compile and run under MSVC. 86 | 87 | 88 | Snappy v1.1.2, February 28th 2014: 89 | 90 | This is a maintenance release with no changes to the actual library 91 | source code. 92 | 93 | * Stop distributing benchmark data files that have unclear 94 | or unsuitable licensing. 95 | 96 | * Add support for padding chunks in the framing format. 97 | 98 | 99 | Snappy v1.1.1, October 15th 2013: 100 | 101 | * Add support for uncompressing to iovecs (scatter I/O). 102 | The bulk of this patch was contributed by Mohit Aron. 103 | 104 | * Speed up decompression by ~2%; much more so (~13-20%) on 105 | a few benchmarks on given compilers and CPUs. 106 | 107 | * Fix a few issues with MSVC compilation. 108 | 109 | * Support truncated test data in the benchmark. 110 | 111 | 112 | Snappy v1.1.0, January 18th 2013: 113 | 114 | * Snappy now uses 64 kB block size instead of 32 kB. On average, 115 | this means it compresses about 3% denser (more so for some 116 | inputs), at the same or better speeds. 117 | 118 | * libsnappy no longer depends on iostream. 119 | 120 | * Some small performance improvements in compression on x86 121 | (0.5–1%). 122 | 123 | * Various portability fixes for ARM-based platforms, for MSVC, 124 | and for GNU/Hurd. 125 | 126 | 127 | Snappy v1.0.5, February 24th 2012: 128 | 129 | * More speed improvements. Exactly how big will depend on 130 | the architecture: 131 | 132 | - 3–10% faster decompression for the base case (x86-64). 133 | 134 | - ARMv7 and higher can now use unaligned accesses, 135 | and will see about 30% faster decompression and 136 | 20–40% faster compression. 137 | 138 | - 32-bit platforms (ARM and 32-bit x86) will see 2–5% 139 | faster compression. 140 | 141 | These are all cumulative (e.g., ARM gets all three speedups). 142 | 143 | * Fixed an issue where the unit test would crash on system 144 | with less than 256 MB address space available, 145 | e.g. some embedded platforms. 146 | 147 | * Added a framing format description, for use over e.g. HTTP, 148 | or for a command-line compressor. We do not have any 149 | implementations of this at the current point, but there seems 150 | to be enough of a general interest in the topic. 151 | Also make the format description slightly clearer. 152 | 153 | * Remove some compile-time warnings in -Wall 154 | (mostly signed/unsigned comparisons), for easier embedding 155 | into projects that use -Wall -Werror. 156 | 157 | 158 | Snappy v1.0.4, September 15th 2011: 159 | 160 | * Speeded up the decompressor somewhat; typically about 2–8% 161 | for Core i7, in 64-bit mode (comparable for Opteron). 162 | Somewhat more for some tests, almost no gain for others. 163 | 164 | * Make Snappy compile on certain platforms it didn't before 165 | (Solaris with SunPro C++, HP-UX, AIX). 166 | 167 | * Correct some minor errors in the format description. 168 | 169 | 170 | Snappy v1.0.3, June 2nd 2011: 171 | 172 | * Speeded up the decompressor somewhat; about 3-6% for Core 2, 173 | 6-13% for Core i7, and 5-12% for Opteron (all in 64-bit mode). 174 | 175 | * Added compressed format documentation. This text is new, 176 | but an earlier version from Zeev Tarantov was used as reference. 177 | 178 | * Only link snappy_unittest against -lz and other autodetected 179 | libraries, not libsnappy.so (which doesn't need any such dependency). 180 | 181 | * Fixed some display issues in the microbenchmarks, one of which would 182 | frequently make the test crash on GNU/Hurd. 183 | 184 | 185 | Snappy v1.0.2, April 29th 2011: 186 | 187 | * Relicense to a BSD-type license. 188 | 189 | * Added C bindings, contributed by Martin Gieseking. 190 | 191 | * More Win32 fixes, in particular for MSVC. 192 | 193 | * Replace geo.protodata with a newer version. 194 | 195 | * Fix timing inaccuracies in the unit test when comparing Snappy 196 | to other algorithms. 197 | 198 | 199 | Snappy v1.0.1, March 25th 2011: 200 | 201 | This is a maintenance release, mostly containing minor fixes. 202 | There is no new functionality. The most important fixes include: 203 | 204 | * The COPYING file and all licensing headers now correctly state that 205 | Snappy is licensed under the Apache 2.0 license. 206 | 207 | * snappy_unittest should now compile natively under Windows, 208 | as well as on embedded systems with no mmap(). 209 | 210 | * Various autotools nits have been fixed. 211 | 212 | 213 | Snappy v1.0, March 17th 2011: 214 | 215 | * Initial version. 216 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Snappy, a fast compressor/decompressor. 2 | 3 | [![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml) 4 | 5 | Introduction 6 | ============ 7 | 8 | Snappy is a compression/decompression library. It does not aim for maximum 9 | compression, or compatibility with any other compression library; instead, 10 | it aims for very high speeds and reasonable compression. For instance, 11 | compared to the fastest mode of zlib, Snappy is an order of magnitude faster 12 | for most inputs, but the resulting compressed files are anywhere from 20% to 13 | 100% bigger. (For more information, see "Performance", below.) 14 | 15 | Snappy has the following properties: 16 | 17 | * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code. 18 | See "Performance" below. 19 | * Stable: Over the last few years, Snappy has compressed and decompressed 20 | petabytes of data in Google's production environment. The Snappy bitstream 21 | format is stable and will not change between versions. 22 | * Robust: The Snappy decompressor is designed not to crash in the face of 23 | corrupted or malicious input. 24 | * Free and open source software: Snappy is licensed under a BSD-type license. 25 | For more information, see the included COPYING file. 26 | 27 | Snappy has previously been called "Zippy" in some Google presentations 28 | and the like. 29 | 30 | 31 | Performance 32 | =========== 33 | 34 | Snappy is intended to be fast. On a single core of a Core i7 processor 35 | in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at 36 | about 500 MB/sec or more. (These numbers are for the slowest inputs in our 37 | benchmark suite; others are much faster.) In our tests, Snappy usually 38 | is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ, 39 | etc.) while achieving comparable compression ratios. 40 | 41 | Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x 42 | for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and 43 | other already-compressed data. Similar numbers for zlib in its fastest mode 44 | are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are 45 | capable of achieving yet higher compression rates, although usually at the 46 | expense of speed. Of course, compression ratio will vary significantly with 47 | the input. 48 | 49 | Although Snappy should be fairly portable, it is primarily optimized 50 | for 64-bit x86-compatible processors, and may run slower in other environments. 51 | In particular: 52 | 53 | - Snappy uses 64-bit operations in several places to process more data at 54 | once than would otherwise be possible. 55 | - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap. 56 | On some platforms, these must be emulated with single-byte loads 57 | and stores, which is much slower. 58 | - Snappy assumes little-endian throughout, and needs to byte-swap data in 59 | several places if running on a big-endian platform. 60 | 61 | Experience has shown that even heavily tuned code can be improved. 62 | Performance optimizations, whether for 64-bit x86 or other platforms, 63 | are of course most welcome; see "Contact", below. 64 | 65 | 66 | Building 67 | ======== 68 | 69 | You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt) 70 | or later to build: 71 | 72 | ```bash 73 | git submodule update --init 74 | mkdir build 75 | cd build && cmake ../ && make 76 | ``` 77 | 78 | Usage 79 | ===== 80 | 81 | Note that Snappy, both the implementation and the main interface, 82 | is written in C++. However, several third-party bindings to other languages 83 | are available; see the [home page](docs/README.md) for more information. 84 | Also, if you want to use Snappy from C code, you can use the included C 85 | bindings in snappy-c.h. 86 | 87 | To use Snappy from your own C++ program, include the file "snappy.h" from 88 | your calling file, and link against the compiled library. 89 | 90 | There are many ways to call Snappy, but the simplest possible is 91 | 92 | ```c++ 93 | snappy::Compress(input.data(), input.size(), &output); 94 | ``` 95 | 96 | and similarly 97 | 98 | ```c++ 99 | snappy::Uncompress(input.data(), input.size(), &output); 100 | ``` 101 | 102 | where "input" and "output" are both instances of std::string. 103 | 104 | There are other interfaces that are more flexible in various ways, including 105 | support for custom (non-array) input sources. See the header file for more 106 | information. 107 | 108 | 109 | Tests and benchmarks 110 | ==================== 111 | 112 | When you compile Snappy, the following binaries are compiled in addition to the 113 | library itself. You do not need them to use the compressor from your own 114 | library, but they are useful for Snappy development. 115 | 116 | * `snappy_benchmark` contains microbenchmarks used to tune compression and 117 | decompression performance. 118 | * `snappy_unittests` contains unit tests, verifying correctness on your machine 119 | in various scenarios. 120 | * `snappy_test_tool` can benchmark Snappy against a few other compression 121 | libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure 122 | time. To benchmark using a given file, give the compression algorithm you want 123 | to test Snappy against (e.g. --zlib) and then a list of one or more file names 124 | on the command line. 125 | 126 | If you want to change or optimize Snappy, please run the tests and benchmarks to 127 | verify you have not broken anything. 128 | 129 | The testdata/ directory contains the files used by the microbenchmarks, which 130 | should provide a reasonably balanced starting point for benchmarking. (Note that 131 | baddata[1-3].snappy are not intended as benchmarks; they are used to verify 132 | correctness in the presence of corrupted data in the unit test.) 133 | 134 | Contributing to the Snappy Project 135 | ================================== 136 | 137 | In addition to the aims listed at the top of the [README](README.md) Snappy 138 | explicitly supports the following: 139 | 140 | 1. C++11 141 | 2. Clang (gcc and MSVC are best-effort). 142 | 3. Low level optimizations (e.g. assembly or equivalent intrinsics) for: 143 | - [x86](https://en.wikipedia.org/wiki/X86) 144 | - [x86-64](https://en.wikipedia.org/wiki/X86-64) 145 | - ARMv7 (32-bit) 146 | - ARMv8 (AArch64) 147 | 4. Supports only the Snappy compression scheme as described in 148 | [format_description.txt](format_description.txt). 149 | 5. CMake for building 150 | 151 | Changes adding features or dependencies outside of the core area of focus listed 152 | above might not be accepted. If in doubt post a message to the 153 | [Snappy discussion mailing list](https://groups.google.com/g/snappy-compression). 154 | 155 | We are unlikely to accept contributions to the build configuration files, such 156 | as `CMakeLists.txt`. We are focused on maintaining a build configuration that 157 | allows us to test that the project works in a few supported configurations 158 | inside Google. We are not currently interested in supporting other requirements, 159 | such as different operating systems, compilers, or build systems. 160 | 161 | Contact 162 | ======= 163 | 164 | Snappy is distributed through GitHub. For the latest version and other 165 | information, see https://github.com/google/snappy. 166 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google Inc. All Rights Reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are 5 | # met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above 10 | # copyright notice, this list of conditions and the following disclaimer 11 | # in the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Google Inc. nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /WORKSPACE.bzlmod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/WORKSPACE.bzlmod -------------------------------------------------------------------------------- /cmake/SnappyConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are 5 | # met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above 10 | # copyright notice, this list of conditions and the following disclaimer 11 | # in the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Google Inc. nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | @PACKAGE_INIT@ 30 | 31 | include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake") 32 | 33 | check_required_components(Snappy) -------------------------------------------------------------------------------- /cmake/config.h.in: -------------------------------------------------------------------------------- 1 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 2 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 3 | 4 | /* Define to 1 if the compiler supports __attribute__((always_inline)). */ 5 | #cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE 6 | 7 | /* Define to 1 if the compiler supports __builtin_ctz and friends. */ 8 | #cmakedefine01 HAVE_BUILTIN_CTZ 9 | 10 | /* Define to 1 if the compiler supports __builtin_expect. */ 11 | #cmakedefine01 HAVE_BUILTIN_EXPECT 12 | 13 | /* Define to 1 if the compiler supports __builtin_prefetch. */ 14 | #cmakedefine01 HAVE_BUILTIN_PREFETCH 15 | 16 | /* Define to 1 if you have a definition for mmap() in . */ 17 | #cmakedefine01 HAVE_FUNC_MMAP 18 | 19 | /* Define to 1 if you have a definition for sysconf() in . */ 20 | #cmakedefine01 HAVE_FUNC_SYSCONF 21 | 22 | /* Define to 1 if you have the `lzo2' library (-llzo2). */ 23 | #cmakedefine01 HAVE_LIBLZO2 24 | 25 | /* Define to 1 if you have the `z' library (-lz). */ 26 | #cmakedefine01 HAVE_LIBZ 27 | 28 | /* Define to 1 if you have the `lz4' library (-llz4). */ 29 | #cmakedefine01 HAVE_LIBLZ4 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #cmakedefine01 HAVE_SYS_MMAN_H 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #cmakedefine01 HAVE_SYS_RESOURCE_H 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #cmakedefine01 HAVE_SYS_TIME_H 39 | 40 | /* Define to 1 if you have the header file. */ 41 | #cmakedefine01 HAVE_SYS_UIO_H 42 | 43 | /* Define to 1 if you have the header file. */ 44 | #cmakedefine01 HAVE_UNISTD_H 45 | 46 | /* Define to 1 if you have the header file. */ 47 | #cmakedefine01 HAVE_WINDOWS_H 48 | 49 | /* Define to 1 if you target processors with SSSE3+ and have . */ 50 | #cmakedefine01 SNAPPY_HAVE_SSSE3 51 | 52 | /* Define to 1 if you target processors with SSE4.2 and have . */ 53 | #cmakedefine01 SNAPPY_HAVE_X86_CRC32 54 | 55 | /* Define to 1 if you target processors with BMI2+ and have . */ 56 | #cmakedefine01 SNAPPY_HAVE_BMI2 57 | 58 | /* Define to 1 if you target processors with NEON and have . */ 59 | #cmakedefine01 SNAPPY_HAVE_NEON 60 | 61 | /* Define to 1 if you have and and want to optimize 62 | compression speed by using __crc32cw from . */ 63 | #cmakedefine01 SNAPPY_HAVE_NEON_CRC32 64 | 65 | /* Define to 1 if your processor stores words with the most significant byte 66 | first (like Motorola and SPARC, unlike Intel and VAX). */ 67 | #cmakedefine01 SNAPPY_IS_BIG_ENDIAN 68 | 69 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ 70 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Snappy is a compression/decompression library. It does not aim for maximum 2 | compression, or compatibility with any other compression library; instead, it 3 | aims for very high speeds and reasonable compression. For instance, compared 4 | to the fastest mode of zlib, Snappy is an order of magnitude faster for most 5 | inputs, but the resulting compressed files are anywhere from 20% to 100% 6 | bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy 7 | compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec 8 | or more. 9 | 10 | Snappy is widely used inside Google, in everything from BigTable and MapReduce 11 | to our internal RPC systems. (Snappy has previously been referred to as "Zippy" 12 | in some presentations and the likes.) 13 | 14 | For more information, please see the [README](../README.md). Benchmarks against 15 | a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are 16 | included in the source code distribution. The source code also contains a 17 | [formal format specification](../format_description.txt), as well 18 | as a specification for a [framing format](../framing_format.txt) useful for 19 | higher-level framing and encapsulation of Snappy data, e.g. for transporting 20 | Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy 21 | distribution currently has no code implementing the latter, but some of the 22 | ports do (see below). 23 | 24 | Snappy is written in C++, but C bindings are included, and several bindings to 25 | other languages are maintained by third parties: 26 | 27 | * C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper), 28 | [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper), 29 | [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native 30 | reimplementation) 31 | * [C port](http://github.com/andikleen/snappy-c) 32 | * [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries, 33 | NuGet packages and command-line tool) 34 | * Common Lisp: [Library bindings](http://flambard.github.com/thnappy/), 35 | [native reimplementation](https://github.com/brown/snappy) 36 | * Erlang: [esnappy](https://github.com/thekvs/esnappy), 37 | [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif) 38 | * [Go](https://github.com/golang/snappy/) 39 | * [Haskell](http://hackage.haskell.org/package/snappy) 40 | * [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko) 41 | * [iOS packaging](https://github.com/ideawu/snappy-ios) 42 | * Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the 43 | framing format), [native reimplementation](http://code.google.com/p/jsnappy/), 44 | [other native reimplementation](https://github.com/dain/snappy) (including 45 | the framing format) 46 | * [Lua](https://github.com/forhappy/lua-snappy) 47 | * [Node.js](https://github.com/kesla/node-snappy) (including the [framing 48 | format](https://github.com/kesla/node-snappy-stream)) 49 | * [Perl](http://search.cpan.org/dist/Compress-Snappy/) 50 | * [PHP](https://github.com/kjdev/php-ext-snappy) 51 | * [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line 52 | tool for the framing format) 53 | * [R](https://github.com/lulyon/R-snappy) 54 | * [Ruby](https://github.com/miyucy/snappy) 55 | * [Rust](https://github.com/BurntSushi/rust-snappy) 56 | * [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format) 57 | 58 | Snappy is used or is available as an alternative in software such as 59 | 60 | * [MongoDB](https://www.mongodb.com/) 61 | * [Cassandra](http://cassandra.apache.org/) 62 | * [Couchbase](http://www.couchbase.com/) 63 | * [Hadoop](http://hadoop.apache.org/) 64 | * [LessFS](http://www.lessfs.com/wordpress/) 65 | * [LevelDB](https://github.com/google/leveldb) (which is in turn used by 66 | [Google Chrome](http://chrome.google.com/)) 67 | * [Lucene](http://lucene.apache.org/) 68 | * [VoltDB](http://voltdb.com/) 69 | 70 | If you know of more, do not hesitate to let us know. The easiest way to get in 71 | touch is via the 72 | [Snappy discussion mailing list](http://groups.google.com/group/snappy-compression). 73 | -------------------------------------------------------------------------------- /format_description.txt: -------------------------------------------------------------------------------- 1 | Snappy compressed format description 2 | Last revised: 2011-10-05 3 | 4 | 5 | This is not a formal specification, but should suffice to explain most 6 | relevant parts of how the Snappy format works. It is originally based on 7 | text by Zeev Tarantov. 8 | 9 | Snappy is a LZ77-type compressor with a fixed, byte-oriented encoding. 10 | There is no entropy encoder backend nor framing layer -- the latter is 11 | assumed to be handled by other parts of the system. 12 | 13 | This document only describes the format, not how the Snappy compressor nor 14 | decompressor actually works. The correctness of the decompressor should not 15 | depend on implementation details of the compressor, and vice versa. 16 | 17 | 18 | 1. Preamble 19 | 20 | The stream starts with the uncompressed length (up to a maximum of 2^32 - 1), 21 | stored as a little-endian varint. Varints consist of a series of bytes, 22 | where the lower 7 bits are data and the upper bit is set iff there are 23 | more bytes to be read. In other words, an uncompressed length of 64 would 24 | be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE) 25 | would be stored as 0xFE 0xFF 0x7F. 26 | 27 | 28 | 2. The compressed stream itself 29 | 30 | There are two types of elements in a Snappy stream: Literals and 31 | copies (backreferences). There is no restriction on the order of elements, 32 | except that the stream naturally cannot start with a copy. (Having 33 | two literals in a row is never optimal from a compression point of 34 | view, but nevertheless fully permitted.) Each element starts with a tag byte, 35 | and the lower two bits of this tag byte signal what type of element will 36 | follow: 37 | 38 | 00: Literal 39 | 01: Copy with 1-byte offset 40 | 10: Copy with 2-byte offset 41 | 11: Copy with 4-byte offset 42 | 43 | The interpretation of the upper six bits are element-dependent. 44 | 45 | 46 | 2.1. Literals (00) 47 | 48 | Literals are uncompressed data stored directly in the byte stream. 49 | The literal length is stored differently depending on the length 50 | of the literal: 51 | 52 | - For literals up to and including 60 bytes in length, the upper 53 | six bits of the tag byte contain (len-1). The literal follows 54 | immediately thereafter in the bytestream. 55 | - For longer literals, the (len-1) value is stored after the tag byte, 56 | little-endian. The upper six bits of the tag byte describe how 57 | many bytes are used for the length; 60, 61, 62 or 63 for 58 | 1-4 bytes, respectively. The literal itself follows after the 59 | length. 60 | 61 | 62 | 2.2. Copies 63 | 64 | Copies are references back into previous decompressed data, telling 65 | the decompressor to reuse data it has previously decoded. 66 | They encode two values: The _offset_, saying how many bytes back 67 | from the current position to read, and the _length_, how many bytes 68 | to copy. Offsets of zero can be encoded, but are not legal; 69 | similarly, it is possible to encode backreferences that would 70 | go past the end of the block (offset > current decompressed position), 71 | which is also nonsensical and thus not allowed. 72 | 73 | As in most LZ77-based compressors, the length can be larger than the offset, 74 | yielding a form of run-length encoding (RLE). For instance, 75 | "xababab" could be encoded as 76 | 77 | 78 | 79 | Note that since the current Snappy compressor works in 32 kB 80 | blocks and does not do matching across blocks, it will never produce 81 | a bitstream with offsets larger than about 32768. However, the 82 | decompressor should not rely on this, as it may change in the future. 83 | 84 | There are several different kinds of copy elements, depending on 85 | the amount of bytes to be copied (length), and how far back the 86 | data to be copied is (offset). 87 | 88 | 89 | 2.2.1. Copy with 1-byte offset (01) 90 | 91 | These elements can encode lengths between [4..11] bytes and offsets 92 | between [0..2047] bytes. (len-4) occupies three bits and is stored 93 | in bits [2..4] of the tag byte. The offset occupies 11 bits, of which the 94 | upper three are stored in the upper three bits ([5..7]) of the tag byte, 95 | and the lower eight are stored in a byte following the tag byte. 96 | 97 | 98 | 2.2.2. Copy with 2-byte offset (10) 99 | 100 | These elements can encode lengths between [1..64] and offsets from 101 | [0..65535]. (len-1) occupies six bits and is stored in the upper 102 | six bits ([2..7]) of the tag byte. The offset is stored as a 103 | little-endian 16-bit integer in the two bytes following the tag byte. 104 | 105 | 106 | 2.2.3. Copy with 4-byte offset (11) 107 | 108 | These are like the copies with 2-byte offsets (see previous subsection), 109 | except that the offset is stored as a 32-bit integer instead of a 110 | 16-bit integer (and thus will occupy four bytes). 111 | -------------------------------------------------------------------------------- /framing_format.txt: -------------------------------------------------------------------------------- 1 | Snappy framing format description 2 | Last revised: 2013-10-25 3 | 4 | This format decribes a framing format for Snappy, allowing compressing to 5 | files or streams that can then more easily be decompressed without having 6 | to hold the entire stream in memory. It also provides data checksums to 7 | help verify integrity. It does not provide metadata checksums, so it does 8 | not protect against e.g. all forms of truncations. 9 | 10 | Implementation of the framing format is optional for Snappy compressors and 11 | decompressor; it is not part of the Snappy core specification. 12 | 13 | 14 | 1. General structure 15 | 16 | The file consists solely of chunks, lying back-to-back with no padding 17 | in between. Each chunk consists first a single byte of chunk identifier, 18 | then a three-byte little-endian length of the chunk in bytes (from 0 to 19 | 16777215, inclusive), and then the data if any. The four bytes of chunk 20 | header is not counted in the data length. 21 | 22 | The different chunk types are listed below. The first chunk must always 23 | be the stream identifier chunk (see section 4.1, below). The stream 24 | ends when the file ends -- there is no explicit end-of-file marker. 25 | 26 | 27 | 2. File type identification 28 | 29 | The following identifiers for this format are recommended where appropriate. 30 | However, note that none have been registered officially, so this is only to 31 | be taken as a guideline. We use "Snappy framed" to distinguish between this 32 | format and raw Snappy data. 33 | 34 | File extension: .sz 35 | MIME type: application/x-snappy-framed 36 | HTTP Content-Encoding: x-snappy-framed 37 | 38 | 39 | 3. Checksum format 40 | 41 | Some chunks have data protected by a checksum (the ones that do will say so 42 | explicitly). The checksums are always masked CRC-32Cs. 43 | 44 | A description of CRC-32C can be found in RFC 3720, section 12.1, with 45 | examples in section B.4. 46 | 47 | Checksums are not stored directly, but masked, as checksumming data and 48 | then its own checksum can be problematic. The masking is the same as used 49 | in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant 50 | 0xa282ead8 (using wraparound as normal for unsigned integers). This is 51 | equivalent to the following C code: 52 | 53 | uint32_t mask_checksum(uint32_t x) { 54 | return ((x >> 15) | (x << 17)) + 0xa282ead8; 55 | } 56 | 57 | Note that the masking is reversible. 58 | 59 | The checksum is always stored as a four bytes long integer, in little-endian. 60 | 61 | 62 | 4. Chunk types 63 | 64 | The currently supported chunk types are described below. The list may 65 | be extended in the future. 66 | 67 | 68 | 4.1. Stream identifier (chunk type 0xff) 69 | 70 | The stream identifier is always the first element in the stream. 71 | It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that 72 | a valid Snappy framed stream always starts with the bytes 73 | 74 | 0xff 0x06 0x00 0x00 0x73 0x4e 0x61 0x50 0x70 0x59 75 | 76 | The stream identifier chunk can come multiple times in the stream besides 77 | the first; if such a chunk shows up, it should simply be ignored, assuming 78 | it has the right length and contents. This allows for easy concatenation of 79 | compressed files without the need for re-framing. 80 | 81 | 82 | 4.2. Compressed data (chunk type 0x00) 83 | 84 | Compressed data chunks contain a normal Snappy compressed bitstream; 85 | see the compressed format specification. The compressed data is preceded by 86 | the CRC-32C (see section 3) of the _uncompressed_ data. 87 | 88 | Note that the data portion of the chunk, i.e., the compressed contents, 89 | can be at most 16777211 bytes (2^24 - 1, minus the checksum). 90 | However, we place an additional restriction that the uncompressed data 91 | in a chunk must be no longer than 65536 bytes. This allows consumers to 92 | easily use small fixed-size buffers. 93 | 94 | 95 | 4.3. Uncompressed data (chunk type 0x01) 96 | 97 | Uncompressed data chunks allow a compressor to send uncompressed, 98 | raw data; this is useful if, for instance, uncompressible or 99 | near-incompressible data is detected, and faster decompression is desired. 100 | 101 | As in the compressed chunks, the data is preceded by its own masked 102 | CRC-32C (see section 3). 103 | 104 | An uncompressed data chunk, like compressed data chunks, should contain 105 | no more than 65536 data bytes, so the maximum legal chunk length with the 106 | checksum is 65540. 107 | 108 | 109 | 4.4. Padding (chunk type 0xfe) 110 | 111 | Padding chunks allow a compressor to increase the size of the data stream 112 | so that it complies with external demands, e.g. that the total number of 113 | bytes is a multiple of some value. 114 | 115 | All bytes of the padding chunk, except the chunk byte itself and the length, 116 | should be zero, but decompressors must not try to interpret or verify the 117 | padding data in any way. 118 | 119 | 120 | 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f) 121 | 122 | These are reserved for future expansion. A decoder that sees such a chunk 123 | should immediately return an error, as it must assume it cannot decode the 124 | stream correctly. 125 | 126 | Future versions of this specification may define meanings for these chunks. 127 | 128 | 129 | 4.6. Reserved skippable chunks (chunk types 0x80-0xfd) 130 | 131 | These are also reserved for future expansion, but unlike the chunks 132 | described in 4.5, a decoder seeing these must skip them and continue 133 | decoding. 134 | 135 | Future versions of this specification may define meanings for these chunks. 136 | -------------------------------------------------------------------------------- /snappy-c.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Martin Gieseking . 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include "snappy.h" 30 | #include "snappy-c.h" 31 | 32 | extern "C" { 33 | 34 | snappy_status snappy_compress(const char* input, 35 | size_t input_length, 36 | char* compressed, 37 | size_t *compressed_length) { 38 | if (*compressed_length < snappy_max_compressed_length(input_length)) { 39 | return SNAPPY_BUFFER_TOO_SMALL; 40 | } 41 | snappy::RawCompress(input, input_length, compressed, compressed_length); 42 | return SNAPPY_OK; 43 | } 44 | 45 | snappy_status snappy_uncompress(const char* compressed, 46 | size_t compressed_length, 47 | char* uncompressed, 48 | size_t* uncompressed_length) { 49 | size_t real_uncompressed_length; 50 | if (!snappy::GetUncompressedLength(compressed, 51 | compressed_length, 52 | &real_uncompressed_length)) { 53 | return SNAPPY_INVALID_INPUT; 54 | } 55 | if (*uncompressed_length < real_uncompressed_length) { 56 | return SNAPPY_BUFFER_TOO_SMALL; 57 | } 58 | if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { 59 | return SNAPPY_INVALID_INPUT; 60 | } 61 | *uncompressed_length = real_uncompressed_length; 62 | return SNAPPY_OK; 63 | } 64 | 65 | size_t snappy_max_compressed_length(size_t source_length) { 66 | return snappy::MaxCompressedLength(source_length); 67 | } 68 | 69 | snappy_status snappy_uncompressed_length(const char *compressed, 70 | size_t compressed_length, 71 | size_t *result) { 72 | if (snappy::GetUncompressedLength(compressed, 73 | compressed_length, 74 | result)) { 75 | return SNAPPY_OK; 76 | } else { 77 | return SNAPPY_INVALID_INPUT; 78 | } 79 | } 80 | 81 | snappy_status snappy_validate_compressed_buffer(const char *compressed, 82 | size_t compressed_length) { 83 | if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { 84 | return SNAPPY_OK; 85 | } else { 86 | return SNAPPY_INVALID_INPUT; 87 | } 88 | } 89 | 90 | } // extern "C" 91 | -------------------------------------------------------------------------------- /snappy-c.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Martin Gieseking . 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are 6 | * met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * * Redistributions in binary form must reproduce the above 11 | * copyright notice, this list of conditions and the following disclaimer 12 | * in the documentation and/or other materials provided with the 13 | * distribution. 14 | * * Neither the name of Google Inc. nor the names of its 15 | * contributors may be used to endorse or promote products derived from 16 | * this software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * Plain C interface (a wrapper around the C++ implementation). 31 | */ 32 | 33 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ 34 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | #include 41 | 42 | /* 43 | * Return values; see the documentation for each function to know 44 | * what each can return. 45 | */ 46 | typedef enum { 47 | SNAPPY_OK = 0, 48 | SNAPPY_INVALID_INPUT = 1, 49 | SNAPPY_BUFFER_TOO_SMALL = 2 50 | } snappy_status; 51 | 52 | /* 53 | * Takes the data stored in "input[0..input_length-1]" and stores 54 | * it in the array pointed to by "compressed". 55 | * 56 | * signals the space available in "compressed". 57 | * If it is not at least equal to "snappy_max_compressed_length(input_length)", 58 | * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, 59 | * contains the true length of the compressed output, 60 | * and SNAPPY_OK is returned. 61 | * 62 | * Example: 63 | * size_t output_length = snappy_max_compressed_length(input_length); 64 | * char* output = (char*)malloc(output_length); 65 | * if (snappy_compress(input, input_length, output, &output_length) 66 | * == SNAPPY_OK) { 67 | * ... Process(output, output_length) ... 68 | * } 69 | * free(output); 70 | */ 71 | snappy_status snappy_compress(const char* input, 72 | size_t input_length, 73 | char* compressed, 74 | size_t* compressed_length); 75 | 76 | /* 77 | * Given data in "compressed[0..compressed_length-1]" generated by 78 | * calling the snappy_compress routine, this routine stores 79 | * the uncompressed data to 80 | * uncompressed[0..uncompressed_length-1]. 81 | * Returns failure (a value not equal to SNAPPY_OK) if the message 82 | * is corrupted and could not be decrypted. 83 | * 84 | * signals the space available in "uncompressed". 85 | * If it is not at least equal to the value returned by 86 | * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL 87 | * is returned. After successful decompression, 88 | * contains the true length of the decompressed output. 89 | * 90 | * Example: 91 | * size_t output_length; 92 | * if (snappy_uncompressed_length(input, input_length, &output_length) 93 | * != SNAPPY_OK) { 94 | * ... fail ... 95 | * } 96 | * char* output = (char*)malloc(output_length); 97 | * if (snappy_uncompress(input, input_length, output, &output_length) 98 | * == SNAPPY_OK) { 99 | * ... Process(output, output_length) ... 100 | * } 101 | * free(output); 102 | */ 103 | snappy_status snappy_uncompress(const char* compressed, 104 | size_t compressed_length, 105 | char* uncompressed, 106 | size_t* uncompressed_length); 107 | 108 | /* 109 | * Returns the maximal size of the compressed representation of 110 | * input data that is "source_length" bytes in length. 111 | */ 112 | size_t snappy_max_compressed_length(size_t source_length); 113 | 114 | /* 115 | * REQUIRES: "compressed[]" was produced by snappy_compress() 116 | * Returns SNAPPY_OK and stores the length of the uncompressed data in 117 | * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. 118 | * This operation takes O(1) time. 119 | */ 120 | snappy_status snappy_uncompressed_length(const char* compressed, 121 | size_t compressed_length, 122 | size_t* result); 123 | 124 | /* 125 | * Check if the contents of "compressed[]" can be uncompressed successfully. 126 | * Does not return the uncompressed data; if so, returns SNAPPY_OK, 127 | * or if not, returns SNAPPY_INVALID_INPUT. 128 | * Takes time proportional to compressed_length, but is usually at least a 129 | * factor of four faster than actual decompression. 130 | */ 131 | snappy_status snappy_validate_compressed_buffer(const char* compressed, 132 | size_t compressed_length); 133 | 134 | #ifdef __cplusplus 135 | } // extern "C" 136 | #endif 137 | 138 | #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */ 139 | -------------------------------------------------------------------------------- /snappy-internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2008 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Internals shared between the Snappy implementation and its unittest. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 32 | #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 33 | 34 | #include 35 | 36 | #include "snappy-stubs-internal.h" 37 | 38 | #if SNAPPY_HAVE_SSSE3 39 | // Please do not replace with or with headers that assume more 40 | // advanced SSE versions without checking with all the OWNERS. 41 | #include 42 | #include 43 | #endif 44 | 45 | #if SNAPPY_HAVE_NEON 46 | #include 47 | #endif 48 | 49 | #if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON 50 | #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1 51 | #else 52 | #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0 53 | #endif 54 | 55 | namespace snappy { 56 | namespace internal { 57 | 58 | #if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 59 | #if SNAPPY_HAVE_SSSE3 60 | using V128 = __m128i; 61 | #elif SNAPPY_HAVE_NEON 62 | using V128 = uint8x16_t; 63 | #endif 64 | 65 | // Load 128 bits of integer data. `src` must be 16-byte aligned. 66 | inline V128 V128_Load(const V128* src); 67 | 68 | // Load 128 bits of integer data. `src` does not need to be aligned. 69 | inline V128 V128_LoadU(const V128* src); 70 | 71 | // Store 128 bits of integer data. `dst` does not need to be aligned. 72 | inline void V128_StoreU(V128* dst, V128 val); 73 | 74 | // Shuffle packed 8-bit integers using a shuffle mask. 75 | // Each packed integer in the shuffle mask must be in [0,16). 76 | inline V128 V128_Shuffle(V128 input, V128 shuffle_mask); 77 | 78 | // Constructs V128 with 16 chars |c|. 79 | inline V128 V128_DupChar(char c); 80 | 81 | #if SNAPPY_HAVE_SSSE3 82 | inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); } 83 | 84 | inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); } 85 | 86 | inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); } 87 | 88 | inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { 89 | return _mm_shuffle_epi8(input, shuffle_mask); 90 | } 91 | 92 | inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); } 93 | 94 | #elif SNAPPY_HAVE_NEON 95 | inline V128 V128_Load(const V128* src) { 96 | return vld1q_u8(reinterpret_cast(src)); 97 | } 98 | 99 | inline V128 V128_LoadU(const V128* src) { 100 | return vld1q_u8(reinterpret_cast(src)); 101 | } 102 | 103 | inline void V128_StoreU(V128* dst, V128 val) { 104 | vst1q_u8(reinterpret_cast(dst), val); 105 | } 106 | 107 | inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { 108 | assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15); 109 | return vqtbl1q_u8(input, shuffle_mask); 110 | } 111 | 112 | inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); } 113 | #endif 114 | #endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 115 | 116 | // Working memory performs a single allocation to hold all scratch space 117 | // required for compression. 118 | class WorkingMemory { 119 | public: 120 | explicit WorkingMemory(size_t input_size); 121 | ~WorkingMemory(); 122 | 123 | // Allocates and clears a hash table using memory in "*this", 124 | // stores the number of buckets in "*table_size" and returns a pointer to 125 | // the base of the hash table. 126 | uint16_t* GetHashTable(size_t fragment_size, int* table_size) const; 127 | char* GetScratchInput() const { return input_; } 128 | char* GetScratchOutput() const { return output_; } 129 | 130 | private: 131 | char* mem_; // the allocated memory, never nullptr 132 | size_t size_; // the size of the allocated memory, never 0 133 | uint16_t* table_; // the pointer to the hashtable 134 | char* input_; // the pointer to the input scratch buffer 135 | char* output_; // the pointer to the output scratch buffer 136 | 137 | // No copying 138 | WorkingMemory(const WorkingMemory&); 139 | void operator=(const WorkingMemory&); 140 | }; 141 | 142 | // Flat array compression that does not emit the "uncompressed length" 143 | // prefix. Compresses "input" string to the "*op" buffer. 144 | // 145 | // REQUIRES: "input_length <= kBlockSize" 146 | // REQUIRES: "op" points to an array of memory that is at least 147 | // "MaxCompressedLength(input_length)" in size. 148 | // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 149 | // REQUIRES: "table_size" is a power of two 150 | // 151 | // Returns an "end" pointer into "op" buffer. 152 | // "end - op" is the compressed size of "input". 153 | char* CompressFragment(const char* input, 154 | size_t input_length, 155 | char* op, 156 | uint16_t* table, 157 | const int table_size); 158 | 159 | // Find the largest n such that 160 | // 161 | // s1[0,n-1] == s2[0,n-1] 162 | // and n <= (s2_limit - s2). 163 | // 164 | // Return make_pair(n, n < 8). 165 | // Does not read *s2_limit or beyond. 166 | // Does not read *(s1 + (s2_limit - s2)) or beyond. 167 | // Requires that s2_limit >= s2. 168 | // 169 | // In addition populate *data with the next 5 bytes from the end of the match. 170 | // This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is 171 | // that on some arch's this can be done faster in this routine than subsequent 172 | // loading from s2 + n. 173 | // 174 | // Separate implementation for 64-bit, little-endian cpus. 175 | #if !SNAPPY_IS_BIG_ENDIAN && \ 176 | (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \ 177 | defined(ARCH_ARM)) 178 | static inline std::pair FindMatchLength(const char* s1, 179 | const char* s2, 180 | const char* s2_limit, 181 | uint64_t* data) { 182 | assert(s2_limit >= s2); 183 | size_t matched = 0; 184 | 185 | // This block isn't necessary for correctness; we could just start looping 186 | // immediately. As an optimization though, it is useful. It creates some not 187 | // uncommon code paths that determine, without extra effort, whether the match 188 | // length is less than 8. In short, we are hoping to avoid a conditional 189 | // branch, and perhaps get better code layout from the C++ compiler. 190 | if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) { 191 | uint64_t a1 = UNALIGNED_LOAD64(s1); 192 | uint64_t a2 = UNALIGNED_LOAD64(s2); 193 | if (SNAPPY_PREDICT_TRUE(a1 != a2)) { 194 | // This code is critical for performance. The reason is that it determines 195 | // how much to advance `ip` (s2). This obviously depends on both the loads 196 | // from the `candidate` (s1) and `ip`. Furthermore the next `candidate` 197 | // depends on the advanced `ip` calculated here through a load, hash and 198 | // new candidate hash lookup (a lot of cycles). This makes s1 (ie. 199 | // `candidate`) the variable that limits throughput. This is the reason we 200 | // go through hoops to have this function update `data` for the next iter. 201 | // The straightforward code would use *data, given by 202 | // 203 | // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles), 204 | // 205 | // as input for the hash table lookup to find next candidate. However 206 | // this forces the load on the data dependency chain of s1, because 207 | // matched_bytes directly depends on s1. However matched_bytes is 0..7, so 208 | // we can also calculate *data by 209 | // 210 | // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8), 211 | // matched_bytes); 212 | // 213 | // The loads do not depend on s1 anymore and are thus off the bottleneck. 214 | // The straightforward implementation on x86_64 would be to use 215 | // 216 | // shrd rax, rdx, cl (cl being matched_bytes * 8) 217 | // 218 | // unfortunately shrd with a variable shift has a 4 cycle latency. So this 219 | // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable 220 | // shift instruction but can only shift 64 bits. If we focus on just 221 | // obtaining the least significant 4 bytes, we can obtain this by 222 | // 223 | // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2), 224 | // UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8); 225 | // 226 | // Writen like above this is not a big win, the conditional move would be 227 | // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle). 228 | // However matched_bytes < 4 is equal to 229 | // static_cast(xorval) != 0. Writen that way, the conditional 230 | // move (2 cycles) can execute in parallel with FindLSBSetNonZero64 231 | // (tzcnt), which takes 3 cycles. 232 | uint64_t xorval = a1 ^ a2; 233 | int shift = Bits::FindLSBSetNonZero64(xorval); 234 | size_t matched_bytes = shift >> 3; 235 | uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); 236 | #ifndef __x86_64__ 237 | a2 = static_cast(xorval) == 0 ? a3 : a2; 238 | #else 239 | // Ideally this would just be 240 | // 241 | // a2 = static_cast(xorval) == 0 ? a3 : a2; 242 | // 243 | // However clang correctly infers that the above statement participates on 244 | // a critical data dependency chain and thus, unfortunately, refuses to 245 | // use a conditional move (it's tuned to cut data dependencies). In this 246 | // case there is a longer parallel chain anyway AND this will be fairly 247 | // unpredictable. 248 | asm("testl %k2, %k2\n\t" 249 | "cmovzq %1, %0\n\t" 250 | : "+r"(a2) 251 | : "r"(a3), "r"(xorval) 252 | : "cc"); 253 | #endif 254 | *data = a2 >> (shift & (3 * 8)); 255 | return std::pair(matched_bytes, true); 256 | } else { 257 | matched = 8; 258 | s2 += 8; 259 | } 260 | } 261 | SNAPPY_PREFETCH(s1 + 64); 262 | SNAPPY_PREFETCH(s2 + 64); 263 | 264 | // Find out how long the match is. We loop over the data 64 bits at a 265 | // time until we find a 64-bit block that doesn't match; then we find 266 | // the first non-matching bit and use that to calculate the total 267 | // length of the match. 268 | while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) { 269 | uint64_t a1 = UNALIGNED_LOAD64(s1 + matched); 270 | uint64_t a2 = UNALIGNED_LOAD64(s2); 271 | if (a1 == a2) { 272 | s2 += 8; 273 | matched += 8; 274 | } else { 275 | uint64_t xorval = a1 ^ a2; 276 | int shift = Bits::FindLSBSetNonZero64(xorval); 277 | size_t matched_bytes = shift >> 3; 278 | uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); 279 | #ifndef __x86_64__ 280 | a2 = static_cast(xorval) == 0 ? a3 : a2; 281 | #else 282 | asm("testl %k2, %k2\n\t" 283 | "cmovzq %1, %0\n\t" 284 | : "+r"(a2) 285 | : "r"(a3), "r"(xorval) 286 | : "cc"); 287 | #endif 288 | *data = a2 >> (shift & (3 * 8)); 289 | matched += matched_bytes; 290 | assert(matched >= 8); 291 | return std::pair(matched, false); 292 | } 293 | } 294 | while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { 295 | if (s1[matched] == *s2) { 296 | ++s2; 297 | ++matched; 298 | } else { 299 | if (s2 <= s2_limit - 8) { 300 | *data = UNALIGNED_LOAD64(s2); 301 | } 302 | return std::pair(matched, matched < 8); 303 | } 304 | } 305 | return std::pair(matched, matched < 8); 306 | } 307 | #else 308 | static inline std::pair FindMatchLength(const char* s1, 309 | const char* s2, 310 | const char* s2_limit, 311 | uint64_t* data) { 312 | // Implementation based on the x86-64 version, above. 313 | assert(s2_limit >= s2); 314 | int matched = 0; 315 | 316 | while (s2 <= s2_limit - 4 && 317 | UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { 318 | s2 += 4; 319 | matched += 4; 320 | } 321 | if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { 322 | uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); 323 | int matching_bits = Bits::FindLSBSetNonZero(x); 324 | matched += matching_bits >> 3; 325 | s2 += matching_bits >> 3; 326 | } else { 327 | while ((s2 < s2_limit) && (s1[matched] == *s2)) { 328 | ++s2; 329 | ++matched; 330 | } 331 | } 332 | if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2); 333 | return std::pair(matched, matched < 8); 334 | } 335 | #endif 336 | 337 | static inline size_t FindMatchLengthPlain(const char* s1, const char* s2, 338 | const char* s2_limit) { 339 | // Implementation based on the x86-64 version, above. 340 | assert(s2_limit >= s2); 341 | int matched = 0; 342 | 343 | while (s2 <= s2_limit - 8 && 344 | UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { 345 | s2 += 8; 346 | matched += 8; 347 | } 348 | if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) { 349 | uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); 350 | int matching_bits = Bits::FindLSBSetNonZero64(x); 351 | matched += matching_bits >> 3; 352 | s2 += matching_bits >> 3; 353 | } else { 354 | while ((s2 < s2_limit) && (s1[matched] == *s2)) { 355 | ++s2; 356 | ++matched; 357 | } 358 | } 359 | return matched; 360 | } 361 | 362 | // Lookup tables for decompression code. Give --snappy_dump_decompression_table 363 | // to the unit test to recompute char_table. 364 | 365 | enum { 366 | LITERAL = 0, 367 | COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode 368 | COPY_2_BYTE_OFFSET = 2, 369 | COPY_4_BYTE_OFFSET = 3 370 | }; 371 | static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. 372 | 373 | // Data stored per entry in lookup table: 374 | // Range Bits-used Description 375 | // ------------------------------------ 376 | // 1..64 0..7 Literal/copy length encoded in opcode byte 377 | // 0..7 8..10 Copy offset encoded in opcode byte / 256 378 | // 0..4 11..13 Extra bytes after opcode 379 | // 380 | // We use eight bits for the length even though 7 would have sufficed 381 | // because of efficiency reasons: 382 | // (1) Extracting a byte is faster than a bit-field 383 | // (2) It properly aligns copy offset so we do not need a <<8 384 | static constexpr uint16_t char_table[256] = { 385 | // clang-format off 386 | 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 387 | 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 388 | 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 389 | 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 390 | 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 391 | 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 392 | 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 393 | 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 394 | 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 395 | 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 396 | 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 397 | 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 398 | 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 399 | 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 400 | 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 401 | 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 402 | 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 403 | 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 404 | 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 405 | 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 406 | 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 407 | 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 408 | 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 409 | 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 410 | 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 411 | 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 412 | 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 413 | 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 414 | 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 415 | 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 416 | 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 417 | 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040, 418 | // clang-format on 419 | }; 420 | 421 | } // end namespace internal 422 | } // end namespace snappy 423 | 424 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ 425 | -------------------------------------------------------------------------------- /snappy-sinksource.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | 32 | #include "snappy-sinksource.h" 33 | 34 | namespace snappy { 35 | 36 | Source::~Source() = default; 37 | 38 | Sink::~Sink() = default; 39 | 40 | char* Sink::GetAppendBuffer(size_t length, char* scratch) { 41 | // TODO: Switch to [[maybe_unused]] when we can assume C++17. 42 | (void)length; 43 | 44 | return scratch; 45 | } 46 | 47 | char* Sink::GetAppendBufferVariable( 48 | size_t min_size, size_t desired_size_hint, char* scratch, 49 | size_t scratch_size, size_t* allocated_size) { 50 | // TODO: Switch to [[maybe_unused]] when we can assume C++17. 51 | (void)min_size; 52 | (void)desired_size_hint; 53 | 54 | *allocated_size = scratch_size; 55 | return scratch; 56 | } 57 | 58 | void Sink::AppendAndTakeOwnership( 59 | char* bytes, size_t n, 60 | void (*deleter)(void*, const char*, size_t), 61 | void *deleter_arg) { 62 | Append(bytes, n); 63 | (*deleter)(deleter_arg, bytes, n); 64 | } 65 | 66 | ByteArraySource::~ByteArraySource() = default; 67 | 68 | size_t ByteArraySource::Available() const { return left_; } 69 | 70 | const char* ByteArraySource::Peek(size_t* len) { 71 | *len = left_; 72 | return ptr_; 73 | } 74 | 75 | void ByteArraySource::Skip(size_t n) { 76 | left_ -= n; 77 | ptr_ += n; 78 | } 79 | 80 | UncheckedByteArraySink::~UncheckedByteArraySink() { } 81 | 82 | void UncheckedByteArraySink::Append(const char* data, size_t n) { 83 | // Do no copying if the caller filled in the result of GetAppendBuffer() 84 | if (data != dest_) { 85 | std::memcpy(dest_, data, n); 86 | } 87 | dest_ += n; 88 | } 89 | 90 | char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { 91 | // TODO: Switch to [[maybe_unused]] when we can assume C++17. 92 | (void)len; 93 | (void)scratch; 94 | 95 | return dest_; 96 | } 97 | 98 | void UncheckedByteArraySink::AppendAndTakeOwnership( 99 | char* bytes, size_t n, 100 | void (*deleter)(void*, const char*, size_t), 101 | void *deleter_arg) { 102 | if (bytes != dest_) { 103 | std::memcpy(dest_, bytes, n); 104 | (*deleter)(deleter_arg, bytes, n); 105 | } 106 | dest_ += n; 107 | } 108 | 109 | char* UncheckedByteArraySink::GetAppendBufferVariable( 110 | size_t min_size, size_t desired_size_hint, char* scratch, 111 | size_t scratch_size, size_t* allocated_size) { 112 | // TODO: Switch to [[maybe_unused]] when we can assume C++17. 113 | (void)min_size; 114 | (void)scratch; 115 | (void)scratch_size; 116 | 117 | *allocated_size = desired_size_hint; 118 | return dest_; 119 | } 120 | 121 | } // namespace snappy 122 | -------------------------------------------------------------------------------- /snappy-sinksource.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 30 | #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 31 | 32 | #include 33 | 34 | namespace snappy { 35 | 36 | // A Sink is an interface that consumes a sequence of bytes. 37 | class Sink { 38 | public: 39 | Sink() { } 40 | virtual ~Sink(); 41 | 42 | // Append "bytes[0,n-1]" to this. 43 | virtual void Append(const char* bytes, size_t n) = 0; 44 | 45 | // Returns a writable buffer of the specified length for appending. 46 | // May return a pointer to the caller-owned scratch buffer which 47 | // must have at least the indicated length. The returned buffer is 48 | // only valid until the next operation on this Sink. 49 | // 50 | // After writing at most "length" bytes, call Append() with the 51 | // pointer returned from this function and the number of bytes 52 | // written. Many Append() implementations will avoid copying 53 | // bytes if this function returned an internal buffer. 54 | // 55 | // If a non-scratch buffer is returned, the caller may only pass a 56 | // prefix of it to Append(). That is, it is not correct to pass an 57 | // interior pointer of the returned array to Append(). 58 | // 59 | // The default implementation always returns the scratch buffer. 60 | virtual char* GetAppendBuffer(size_t length, char* scratch); 61 | 62 | // For higher performance, Sink implementations can provide custom 63 | // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. 64 | // These methods can reduce the number of copies done during 65 | // compression/decompression. 66 | 67 | // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" 68 | // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) 69 | // to free the buffer. deleter function must be non NULL. 70 | // 71 | // The default implementation just calls Append and frees "bytes". 72 | // Other implementations may avoid a copy while appending the buffer. 73 | virtual void AppendAndTakeOwnership( 74 | char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), 75 | void *deleter_arg); 76 | 77 | // Returns a writable buffer for appending and writes the buffer's capacity to 78 | // *allocated_size. Guarantees *allocated_size >= min_size. 79 | // May return a pointer to the caller-owned scratch buffer which must have 80 | // scratch_size >= min_size. 81 | // 82 | // The returned buffer is only valid until the next operation 83 | // on this ByteSink. 84 | // 85 | // After writing at most *allocated_size bytes, call Append() with the 86 | // pointer returned from this function and the number of bytes written. 87 | // Many Append() implementations will avoid copying bytes if this function 88 | // returned an internal buffer. 89 | // 90 | // If the sink implementation allocates or reallocates an internal buffer, 91 | // it should use the desired_size_hint if appropriate. If a caller cannot 92 | // provide a reasonable guess at the desired capacity, it should set 93 | // desired_size_hint = 0. 94 | // 95 | // If a non-scratch buffer is returned, the caller may only pass 96 | // a prefix to it to Append(). That is, it is not correct to pass an 97 | // interior pointer to Append(). 98 | // 99 | // The default implementation always returns the scratch buffer. 100 | virtual char* GetAppendBufferVariable( 101 | size_t min_size, size_t desired_size_hint, char* scratch, 102 | size_t scratch_size, size_t* allocated_size); 103 | 104 | private: 105 | // No copying 106 | Sink(const Sink&); 107 | void operator=(const Sink&); 108 | }; 109 | 110 | // A Source is an interface that yields a sequence of bytes 111 | class Source { 112 | public: 113 | Source() { } 114 | virtual ~Source(); 115 | 116 | // Return the number of bytes left to read from the source 117 | virtual size_t Available() const = 0; 118 | 119 | // Peek at the next flat region of the source. Does not reposition 120 | // the source. The returned region is empty iff Available()==0. 121 | // 122 | // Returns a pointer to the beginning of the region and store its 123 | // length in *len. 124 | // 125 | // The returned region is valid until the next call to Skip() or 126 | // until this object is destroyed, whichever occurs first. 127 | // 128 | // The returned region may be larger than Available() (for example 129 | // if this ByteSource is a view on a substring of a larger source). 130 | // The caller is responsible for ensuring that it only reads the 131 | // Available() bytes. 132 | virtual const char* Peek(size_t* len) = 0; 133 | 134 | // Skip the next n bytes. Invalidates any buffer returned by 135 | // a previous call to Peek(). 136 | // REQUIRES: Available() >= n 137 | virtual void Skip(size_t n) = 0; 138 | 139 | private: 140 | // No copying 141 | Source(const Source&); 142 | void operator=(const Source&); 143 | }; 144 | 145 | // A Source implementation that yields the contents of a flat array 146 | class ByteArraySource : public Source { 147 | public: 148 | ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } 149 | ~ByteArraySource() override; 150 | size_t Available() const override; 151 | const char* Peek(size_t* len) override; 152 | void Skip(size_t n) override; 153 | private: 154 | const char* ptr_; 155 | size_t left_; 156 | }; 157 | 158 | // A Sink implementation that writes to a flat array without any bound checks. 159 | class UncheckedByteArraySink : public Sink { 160 | public: 161 | explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } 162 | ~UncheckedByteArraySink() override; 163 | void Append(const char* data, size_t n) override; 164 | char* GetAppendBuffer(size_t len, char* scratch) override; 165 | char* GetAppendBufferVariable( 166 | size_t min_size, size_t desired_size_hint, char* scratch, 167 | size_t scratch_size, size_t* allocated_size) override; 168 | void AppendAndTakeOwnership( 169 | char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), 170 | void *deleter_arg) override; 171 | 172 | // Return the current output pointer so that a caller can see how 173 | // many bytes were produced. 174 | // Note: this is not a Sink method. 175 | char* CurrentDestination() const { return dest_; } 176 | private: 177 | char* dest_; 178 | }; 179 | 180 | } // namespace snappy 181 | 182 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ 183 | -------------------------------------------------------------------------------- /snappy-stubs-internal.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | 32 | #include "snappy-stubs-internal.h" 33 | 34 | namespace snappy { 35 | 36 | void Varint::Append32(std::string* s, uint32_t value) { 37 | char buf[Varint::kMax32]; 38 | const char* p = Varint::Encode32(buf, value); 39 | s->append(buf, p - buf); 40 | } 41 | 42 | } // namespace snappy 43 | -------------------------------------------------------------------------------- /snappy-stubs-internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the open-source version of Snappy. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 32 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 33 | 34 | #if HAVE_CONFIG_H 35 | #include "config.h" 36 | #endif 37 | 38 | #include 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #if HAVE_SYS_MMAN_H 47 | #include 48 | #endif 49 | 50 | #if HAVE_UNISTD_H 51 | #include 52 | #endif 53 | 54 | #if defined(_MSC_VER) 55 | #include 56 | #endif // defined(_MSC_VER) 57 | 58 | #ifndef __has_feature 59 | #define __has_feature(x) 0 60 | #endif 61 | 62 | #if __has_feature(memory_sanitizer) 63 | #include 64 | #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ 65 | __msan_unpoison((address), (size)) 66 | #else 67 | #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ 68 | #endif // __has_feature(memory_sanitizer) 69 | 70 | #include "snappy-stubs-public.h" 71 | 72 | // Used to enable 64-bit optimized versions of some routines. 73 | #if defined(__PPC64__) || defined(__powerpc64__) 74 | #define ARCH_PPC 1 75 | #elif defined(__aarch64__) || defined(_M_ARM64) 76 | #define ARCH_ARM 1 77 | #endif 78 | 79 | // Needed by OS X, among others. 80 | #ifndef MAP_ANONYMOUS 81 | #define MAP_ANONYMOUS MAP_ANON 82 | #endif 83 | 84 | // The size of an array, if known at compile-time. 85 | // Will give unexpected results if used on a pointer. 86 | // We undefine it first, since some compilers already have a definition. 87 | #ifdef ARRAYSIZE 88 | #undef ARRAYSIZE 89 | #endif 90 | #define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))} 91 | 92 | // Static prediction hints. 93 | #if HAVE_BUILTIN_EXPECT 94 | #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) 95 | #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) 96 | #else 97 | #define SNAPPY_PREDICT_FALSE(x) x 98 | #define SNAPPY_PREDICT_TRUE(x) x 99 | #endif // HAVE_BUILTIN_EXPECT 100 | 101 | // Inlining hints. 102 | #if HAVE_ATTRIBUTE_ALWAYS_INLINE 103 | #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) 104 | #else 105 | #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE 106 | #endif // HAVE_ATTRIBUTE_ALWAYS_INLINE 107 | 108 | #if HAVE_BUILTIN_PREFETCH 109 | #define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3) 110 | #else 111 | #define SNAPPY_PREFETCH(ptr) (void)(ptr) 112 | #endif 113 | 114 | // Stubbed version of ABSL_FLAG. 115 | // 116 | // In the open source version, flags can only be changed at compile time. 117 | #define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \ 118 | flag_type FLAGS_ ## flag_name = default_value 119 | 120 | namespace snappy { 121 | 122 | // Stubbed version of absl::GetFlag(). 123 | template 124 | inline T GetFlag(T flag) { return flag; } 125 | 126 | static const uint32_t kuint32max = std::numeric_limits::max(); 127 | static const int64_t kint64max = std::numeric_limits::max(); 128 | 129 | // Potentially unaligned loads and stores. 130 | 131 | inline uint16_t UNALIGNED_LOAD16(const void *p) { 132 | // Compiles to a single movzx/ldrh on clang/gcc/msvc. 133 | uint16_t v; 134 | std::memcpy(&v, p, sizeof(v)); 135 | return v; 136 | } 137 | 138 | inline uint32_t UNALIGNED_LOAD32(const void *p) { 139 | // Compiles to a single mov/ldr on clang/gcc/msvc. 140 | uint32_t v; 141 | std::memcpy(&v, p, sizeof(v)); 142 | return v; 143 | } 144 | 145 | inline uint64_t UNALIGNED_LOAD64(const void *p) { 146 | // Compiles to a single mov/ldr on clang/gcc/msvc. 147 | uint64_t v; 148 | std::memcpy(&v, p, sizeof(v)); 149 | return v; 150 | } 151 | 152 | inline void UNALIGNED_STORE16(void *p, uint16_t v) { 153 | // Compiles to a single mov/strh on clang/gcc/msvc. 154 | std::memcpy(p, &v, sizeof(v)); 155 | } 156 | 157 | inline void UNALIGNED_STORE32(void *p, uint32_t v) { 158 | // Compiles to a single mov/str on clang/gcc/msvc. 159 | std::memcpy(p, &v, sizeof(v)); 160 | } 161 | 162 | inline void UNALIGNED_STORE64(void *p, uint64_t v) { 163 | // Compiles to a single mov/str on clang/gcc/msvc. 164 | std::memcpy(p, &v, sizeof(v)); 165 | } 166 | 167 | // Convert to little-endian storage, opposite of network format. 168 | // Convert x from host to little endian: x = LittleEndian.FromHost(x); 169 | // convert x from little endian to host: x = LittleEndian.ToHost(x); 170 | // 171 | // Store values into unaligned memory converting to little endian order: 172 | // LittleEndian.Store16(p, x); 173 | // 174 | // Load unaligned values stored in little endian converting to host order: 175 | // x = LittleEndian.Load16(p); 176 | class LittleEndian { 177 | public: 178 | // Functions to do unaligned loads and stores in little-endian order. 179 | static inline uint16_t Load16(const void *ptr) { 180 | // Compiles to a single mov/str on recent clang and gcc. 181 | #if SNAPPY_IS_BIG_ENDIAN 182 | const uint8_t* const buffer = reinterpret_cast(ptr); 183 | return (static_cast(buffer[0])) | 184 | (static_cast(buffer[1]) << 8); 185 | #else 186 | // memcpy() turns into a single instruction early in the optimization 187 | // pipeline (relatively to a series of byte accesses). So, using memcpy 188 | // instead of byte accesses may lead to better decisions in more stages of 189 | // the optimization pipeline. 190 | uint16_t value; 191 | std::memcpy(&value, ptr, 2); 192 | return value; 193 | #endif 194 | } 195 | 196 | static inline uint32_t Load32(const void *ptr) { 197 | // Compiles to a single mov/str on recent clang and gcc. 198 | #if SNAPPY_IS_BIG_ENDIAN 199 | const uint8_t* const buffer = reinterpret_cast(ptr); 200 | return (static_cast(buffer[0])) | 201 | (static_cast(buffer[1]) << 8) | 202 | (static_cast(buffer[2]) << 16) | 203 | (static_cast(buffer[3]) << 24); 204 | #else 205 | // See Load16() for the rationale of using memcpy(). 206 | uint32_t value; 207 | std::memcpy(&value, ptr, 4); 208 | return value; 209 | #endif 210 | } 211 | 212 | static inline uint64_t Load64(const void *ptr) { 213 | // Compiles to a single mov/str on recent clang and gcc. 214 | #if SNAPPY_IS_BIG_ENDIAN 215 | const uint8_t* const buffer = reinterpret_cast(ptr); 216 | return (static_cast(buffer[0])) | 217 | (static_cast(buffer[1]) << 8) | 218 | (static_cast(buffer[2]) << 16) | 219 | (static_cast(buffer[3]) << 24) | 220 | (static_cast(buffer[4]) << 32) | 221 | (static_cast(buffer[5]) << 40) | 222 | (static_cast(buffer[6]) << 48) | 223 | (static_cast(buffer[7]) << 56); 224 | #else 225 | // See Load16() for the rationale of using memcpy(). 226 | uint64_t value; 227 | std::memcpy(&value, ptr, 8); 228 | return value; 229 | #endif 230 | } 231 | 232 | static inline void Store16(void *dst, uint16_t value) { 233 | // Compiles to a single mov/str on recent clang and gcc. 234 | #if SNAPPY_IS_BIG_ENDIAN 235 | uint8_t* const buffer = reinterpret_cast(dst); 236 | buffer[0] = static_cast(value); 237 | buffer[1] = static_cast(value >> 8); 238 | #else 239 | // See Load16() for the rationale of using memcpy(). 240 | std::memcpy(dst, &value, 2); 241 | #endif 242 | } 243 | 244 | static void Store32(void *dst, uint32_t value) { 245 | // Compiles to a single mov/str on recent clang and gcc. 246 | #if SNAPPY_IS_BIG_ENDIAN 247 | uint8_t* const buffer = reinterpret_cast(dst); 248 | buffer[0] = static_cast(value); 249 | buffer[1] = static_cast(value >> 8); 250 | buffer[2] = static_cast(value >> 16); 251 | buffer[3] = static_cast(value >> 24); 252 | #else 253 | // See Load16() for the rationale of using memcpy(). 254 | std::memcpy(dst, &value, 4); 255 | #endif 256 | } 257 | 258 | static void Store64(void* dst, uint64_t value) { 259 | // Compiles to a single mov/str on recent clang and gcc. 260 | #if SNAPPY_IS_BIG_ENDIAN 261 | uint8_t* const buffer = reinterpret_cast(dst); 262 | buffer[0] = static_cast(value); 263 | buffer[1] = static_cast(value >> 8); 264 | buffer[2] = static_cast(value >> 16); 265 | buffer[3] = static_cast(value >> 24); 266 | buffer[4] = static_cast(value >> 32); 267 | buffer[5] = static_cast(value >> 40); 268 | buffer[6] = static_cast(value >> 48); 269 | buffer[7] = static_cast(value >> 56); 270 | #else 271 | // See Load16() for the rationale of using memcpy(). 272 | std::memcpy(dst, &value, 8); 273 | #endif 274 | } 275 | 276 | static inline constexpr bool IsLittleEndian() { 277 | #if SNAPPY_IS_BIG_ENDIAN 278 | return false; 279 | #else 280 | return true; 281 | #endif // SNAPPY_IS_BIG_ENDIAN 282 | } 283 | }; 284 | 285 | // Some bit-manipulation functions. 286 | class Bits { 287 | public: 288 | // Return floor(log2(n)) for positive integer n. 289 | static int Log2FloorNonZero(uint32_t n); 290 | 291 | // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. 292 | static int Log2Floor(uint32_t n); 293 | 294 | // Return the first set least / most significant bit, 0-indexed. Returns an 295 | // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except 296 | // that it's 0-indexed. 297 | static int FindLSBSetNonZero(uint32_t n); 298 | 299 | static int FindLSBSetNonZero64(uint64_t n); 300 | 301 | private: 302 | // No copying 303 | Bits(const Bits&); 304 | void operator=(const Bits&); 305 | }; 306 | 307 | #if HAVE_BUILTIN_CTZ 308 | 309 | inline int Bits::Log2FloorNonZero(uint32_t n) { 310 | assert(n != 0); 311 | // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof 312 | // represents subtraction in base 2 and observes that there's no carry. 313 | // 314 | // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). 315 | // Using "31 ^" here instead of "31 -" allows the optimizer to strip the 316 | // function body down to _bit_scan_reverse(x). 317 | return 31 ^ __builtin_clz(n); 318 | } 319 | 320 | inline int Bits::Log2Floor(uint32_t n) { 321 | return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); 322 | } 323 | 324 | inline int Bits::FindLSBSetNonZero(uint32_t n) { 325 | assert(n != 0); 326 | return __builtin_ctz(n); 327 | } 328 | 329 | #elif defined(_MSC_VER) 330 | 331 | inline int Bits::Log2FloorNonZero(uint32_t n) { 332 | assert(n != 0); 333 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. 334 | unsigned long where; 335 | _BitScanReverse(&where, n); 336 | return static_cast(where); 337 | } 338 | 339 | inline int Bits::Log2Floor(uint32_t n) { 340 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. 341 | unsigned long where; 342 | if (_BitScanReverse(&where, n)) 343 | return static_cast(where); 344 | return -1; 345 | } 346 | 347 | inline int Bits::FindLSBSetNonZero(uint32_t n) { 348 | assert(n != 0); 349 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. 350 | unsigned long where; 351 | if (_BitScanForward(&where, n)) 352 | return static_cast(where); 353 | return 32; 354 | } 355 | 356 | #else // Portable versions. 357 | 358 | inline int Bits::Log2FloorNonZero(uint32_t n) { 359 | assert(n != 0); 360 | 361 | int log = 0; 362 | uint32_t value = n; 363 | for (int i = 4; i >= 0; --i) { 364 | int shift = (1 << i); 365 | uint32_t x = value >> shift; 366 | if (x != 0) { 367 | value = x; 368 | log += shift; 369 | } 370 | } 371 | assert(value == 1); 372 | return log; 373 | } 374 | 375 | inline int Bits::Log2Floor(uint32_t n) { 376 | return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); 377 | } 378 | 379 | inline int Bits::FindLSBSetNonZero(uint32_t n) { 380 | assert(n != 0); 381 | 382 | int rc = 31; 383 | for (int i = 4, shift = 1 << 4; i >= 0; --i) { 384 | const uint32_t x = n << shift; 385 | if (x != 0) { 386 | n = x; 387 | rc -= shift; 388 | } 389 | shift >>= 1; 390 | } 391 | return rc; 392 | } 393 | 394 | #endif // End portable versions. 395 | 396 | #if HAVE_BUILTIN_CTZ 397 | 398 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { 399 | assert(n != 0); 400 | return __builtin_ctzll(n); 401 | } 402 | 403 | #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) 404 | // _BitScanForward64() is only available on x64 and ARM64. 405 | 406 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { 407 | assert(n != 0); 408 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. 409 | unsigned long where; 410 | if (_BitScanForward64(&where, n)) 411 | return static_cast(where); 412 | return 64; 413 | } 414 | 415 | #else // Portable version. 416 | 417 | // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). 418 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { 419 | assert(n != 0); 420 | 421 | const uint32_t bottombits = static_cast(n); 422 | if (bottombits == 0) { 423 | // Bottom bits are zero, so scan the top bits. 424 | return 32 + FindLSBSetNonZero(static_cast(n >> 32)); 425 | } else { 426 | return FindLSBSetNonZero(bottombits); 427 | } 428 | } 429 | 430 | #endif // HAVE_BUILTIN_CTZ 431 | 432 | // Variable-length integer encoding. 433 | class Varint { 434 | public: 435 | // Maximum lengths of varint encoding of uint32_t. 436 | static const int kMax32 = 5; 437 | 438 | // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. 439 | // Never reads a character at or beyond limit. If a valid/terminated varint32 440 | // was found in the range, stores it in *OUTPUT and returns a pointer just 441 | // past the last byte of the varint32. Else returns NULL. On success, 442 | // "result <= limit". 443 | static const char* Parse32WithLimit(const char* ptr, const char* limit, 444 | uint32_t* OUTPUT); 445 | 446 | // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". 447 | // EFFECTS Encodes "v" into "ptr" and returns a pointer to the 448 | // byte just past the last encoded byte. 449 | static char* Encode32(char* ptr, uint32_t v); 450 | 451 | // EFFECTS Appends the varint representation of "value" to "*s". 452 | static void Append32(std::string* s, uint32_t value); 453 | }; 454 | 455 | inline const char* Varint::Parse32WithLimit(const char* p, 456 | const char* l, 457 | uint32_t* OUTPUT) { 458 | const unsigned char* ptr = reinterpret_cast(p); 459 | const unsigned char* limit = reinterpret_cast(l); 460 | uint32_t b, result; 461 | if (ptr >= limit) return NULL; 462 | b = *(ptr++); result = b & 127; if (b < 128) goto done; 463 | if (ptr >= limit) return NULL; 464 | b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; 465 | if (ptr >= limit) return NULL; 466 | b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; 467 | if (ptr >= limit) return NULL; 468 | b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; 469 | if (ptr >= limit) return NULL; 470 | b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; 471 | return NULL; // Value is too long to be a varint32 472 | done: 473 | *OUTPUT = result; 474 | return reinterpret_cast(ptr); 475 | } 476 | 477 | inline char* Varint::Encode32(char* sptr, uint32_t v) { 478 | // Operate on characters as unsigneds 479 | uint8_t* ptr = reinterpret_cast(sptr); 480 | static const uint8_t B = 128; 481 | if (v < (1 << 7)) { 482 | *(ptr++) = static_cast(v); 483 | } else if (v < (1 << 14)) { 484 | *(ptr++) = static_cast(v | B); 485 | *(ptr++) = static_cast(v >> 7); 486 | } else if (v < (1 << 21)) { 487 | *(ptr++) = static_cast(v | B); 488 | *(ptr++) = static_cast((v >> 7) | B); 489 | *(ptr++) = static_cast(v >> 14); 490 | } else if (v < (1 << 28)) { 491 | *(ptr++) = static_cast(v | B); 492 | *(ptr++) = static_cast((v >> 7) | B); 493 | *(ptr++) = static_cast((v >> 14) | B); 494 | *(ptr++) = static_cast(v >> 21); 495 | } else { 496 | *(ptr++) = static_cast(v | B); 497 | *(ptr++) = static_cast((v>>7) | B); 498 | *(ptr++) = static_cast((v>>14) | B); 499 | *(ptr++) = static_cast((v>>21) | B); 500 | *(ptr++) = static_cast(v >> 28); 501 | } 502 | return reinterpret_cast(ptr); 503 | } 504 | 505 | // If you know the internal layout of the std::string in use, you can 506 | // replace this function with one that resizes the string without 507 | // filling the new space with zeros (if applicable) -- 508 | // it will be non-portable but faster. 509 | inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { 510 | s->resize(new_size); 511 | } 512 | 513 | // Return a mutable char* pointing to a string's internal buffer, 514 | // which may not be null-terminated. Writing through this pointer will 515 | // modify the string. 516 | // 517 | // string_as_array(&str)[i] is valid for 0 <= i < str.size() until the 518 | // next call to a string method that invalidates iterators. 519 | // 520 | // As of 2006-04, there is no standard-blessed way of getting a 521 | // mutable reference to a string's internal buffer. However, issue 530 522 | // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) 523 | // proposes this as the method. It will officially be part of the standard 524 | // for C++0x. This should already work on all current implementations. 525 | inline char* string_as_array(std::string* str) { 526 | return str->empty() ? NULL : &*str->begin(); 527 | } 528 | 529 | } // namespace snappy 530 | 531 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ 532 | -------------------------------------------------------------------------------- /snappy-stubs-public.h.in: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various type stubs for the open-source version of Snappy. 30 | // 31 | // This file cannot include config.h, as it is included from snappy.h, 32 | // which is a public header. Instead, snappy-stubs-public.h is generated by 33 | // from snappy-stubs-public.h.in at configure time. 34 | 35 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 36 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 37 | 38 | #include 39 | 40 | #if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H 41 | #include 42 | #endif // HAVE_SYS_UIO_H 43 | 44 | #define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR} 45 | #define SNAPPY_MINOR ${PROJECT_VERSION_MINOR} 46 | #define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH} 47 | #define SNAPPY_VERSION \ 48 | ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) 49 | 50 | namespace snappy { 51 | 52 | #if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H 53 | // Windows does not have an iovec type, yet the concept is universally useful. 54 | // It is simple to define it ourselves, so we put it inside our own namespace. 55 | struct iovec { 56 | void* iov_base; 57 | size_t iov_len; 58 | }; 59 | #endif // !HAVE_SYS_UIO_H 60 | 61 | } // namespace snappy 62 | 63 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ 64 | -------------------------------------------------------------------------------- /snappy-test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the unit tests for the open-source version of Snappy. 30 | 31 | #include "snappy-test.h" 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | namespace file { 41 | 42 | OptionsStub::OptionsStub() = default; 43 | OptionsStub::~OptionsStub() = default; 44 | 45 | const OptionsStub &Defaults() { 46 | static OptionsStub defaults; 47 | return defaults; 48 | } 49 | 50 | StatusStub::StatusStub() = default; 51 | StatusStub::StatusStub(const StatusStub &) = default; 52 | StatusStub &StatusStub::operator=(const StatusStub &) = default; 53 | StatusStub::~StatusStub() = default; 54 | 55 | bool StatusStub::ok() { return true; } 56 | 57 | StatusStub GetContents(const std::string &filename, std::string *output, 58 | const OptionsStub & /* options */) { 59 | std::FILE *fp = std::fopen(filename.c_str(), "rb"); 60 | if (fp == nullptr) { 61 | std::perror(filename.c_str()); 62 | std::exit(1); 63 | } 64 | 65 | output->clear(); 66 | while (!std::feof(fp)) { 67 | char buffer[4096]; 68 | size_t bytes_read = std::fread(buffer, 1, sizeof(buffer), fp); 69 | if (bytes_read == 0 && std::ferror(fp)) { 70 | std::perror("fread"); 71 | std::exit(1); 72 | } 73 | output->append(buffer, bytes_read); 74 | } 75 | 76 | std::fclose(fp); 77 | return StatusStub(); 78 | } 79 | 80 | StatusStub SetContents(const std::string &file_name, const std::string &content, 81 | const OptionsStub & /* options */) { 82 | std::FILE *fp = std::fopen(file_name.c_str(), "wb"); 83 | if (fp == nullptr) { 84 | std::perror(file_name.c_str()); 85 | std::exit(1); 86 | } 87 | 88 | size_t bytes_written = std::fwrite(content.data(), 1, content.size(), fp); 89 | if (bytes_written != content.size()) { 90 | std::perror("fwrite"); 91 | std::exit(1); 92 | } 93 | 94 | std::fclose(fp); 95 | return StatusStub(); 96 | } 97 | 98 | } // namespace file 99 | 100 | namespace snappy { 101 | 102 | std::string ReadTestDataFile(const std::string& base, size_t size_limit) { 103 | std::string contents; 104 | const char* srcdir = getenv("srcdir"); // This is set by Automake. 105 | std::string prefix; 106 | if (srcdir) { 107 | prefix = std::string(srcdir) + "/"; 108 | } 109 | file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults() 110 | ).ok(); 111 | if (size_limit > 0) { 112 | contents = contents.substr(0, size_limit); 113 | } 114 | return contents; 115 | } 116 | 117 | std::string StrFormat(const char* format, ...) { 118 | char buffer[4096]; 119 | std::va_list ap; 120 | va_start(ap, format); 121 | std::vsnprintf(buffer, sizeof(buffer), format, ap); 122 | va_end(ap); 123 | return buffer; 124 | } 125 | 126 | LogMessage::~LogMessage() { std::cerr << std::endl; } 127 | 128 | LogMessage &LogMessage::operator<<(const std::string &message) { 129 | std::cerr << message; 130 | return *this; 131 | } 132 | 133 | LogMessage &LogMessage::operator<<(int number) { 134 | std::cerr << number; 135 | return *this; 136 | } 137 | 138 | #ifdef _MSC_VER 139 | // ~LogMessageCrash calls std::abort() and therefore never exits. This is by 140 | // design, so temporarily disable warning C4722. 141 | #pragma warning(push) 142 | #pragma warning(disable : 4722) 143 | #endif 144 | 145 | LogMessageCrash::~LogMessageCrash() { 146 | std::cerr << std::endl; 147 | std::abort(); 148 | } 149 | 150 | #ifdef _MSC_VER 151 | #pragma warning(pop) 152 | #endif 153 | 154 | #if HAVE_LIBZ 155 | 156 | ZLib::ZLib() 157 | : comp_init_(false), 158 | uncomp_init_(false) { 159 | Reinit(); 160 | } 161 | 162 | ZLib::~ZLib() { 163 | if (comp_init_) { deflateEnd(&comp_stream_); } 164 | if (uncomp_init_) { inflateEnd(&uncomp_stream_); } 165 | } 166 | 167 | void ZLib::Reinit() { 168 | compression_level_ = Z_DEFAULT_COMPRESSION; 169 | window_bits_ = MAX_WBITS; 170 | mem_level_ = 8; // DEF_MEM_LEVEL 171 | if (comp_init_) { 172 | deflateEnd(&comp_stream_); 173 | comp_init_ = false; 174 | } 175 | if (uncomp_init_) { 176 | inflateEnd(&uncomp_stream_); 177 | uncomp_init_ = false; 178 | } 179 | first_chunk_ = true; 180 | } 181 | 182 | void ZLib::Reset() { 183 | first_chunk_ = true; 184 | } 185 | 186 | // --------- COMPRESS MODE 187 | 188 | // Initialization method to be called if we hit an error while 189 | // compressing. On hitting an error, call this method before returning 190 | // the error. 191 | void ZLib::CompressErrorInit() { 192 | deflateEnd(&comp_stream_); 193 | comp_init_ = false; 194 | Reset(); 195 | } 196 | 197 | int ZLib::DeflateInit() { 198 | return deflateInit2(&comp_stream_, 199 | compression_level_, 200 | Z_DEFLATED, 201 | window_bits_, 202 | mem_level_, 203 | Z_DEFAULT_STRATEGY); 204 | } 205 | 206 | int ZLib::CompressInit(Bytef *dest, uLongf *destLen, 207 | const Bytef *source, uLong *sourceLen) { 208 | int err; 209 | 210 | comp_stream_.next_in = (Bytef*)source; 211 | comp_stream_.avail_in = (uInt)*sourceLen; 212 | if ((uLong)comp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; 213 | comp_stream_.next_out = dest; 214 | comp_stream_.avail_out = (uInt)*destLen; 215 | if ((uLong)comp_stream_.avail_out != *destLen) return Z_BUF_ERROR; 216 | 217 | if ( !first_chunk_ ) // only need to set up stream the first time through 218 | return Z_OK; 219 | 220 | if (comp_init_) { // we've already initted it 221 | err = deflateReset(&comp_stream_); 222 | if (err != Z_OK) { 223 | LOG(WARNING) << "ERROR: Can't reset compress object; creating a new one"; 224 | deflateEnd(&comp_stream_); 225 | comp_init_ = false; 226 | } 227 | } 228 | if (!comp_init_) { // first use 229 | comp_stream_.zalloc = (alloc_func)0; 230 | comp_stream_.zfree = (free_func)0; 231 | comp_stream_.opaque = (voidpf)0; 232 | err = DeflateInit(); 233 | if (err != Z_OK) return err; 234 | comp_init_ = true; 235 | } 236 | return Z_OK; 237 | } 238 | 239 | // In a perfect world we'd always have the full buffer to compress 240 | // when the time came, and we could just call Compress(). Alas, we 241 | // want to do chunked compression on our webserver. In this 242 | // application, we compress the header, send it off, then compress the 243 | // results, send them off, then compress the footer. Thus we need to 244 | // use the chunked compression features of zlib. 245 | int ZLib::CompressAtMostOrAll(Bytef *dest, uLongf *destLen, 246 | const Bytef *source, uLong *sourceLen, 247 | int flush_mode) { // Z_FULL_FLUSH or Z_FINISH 248 | int err; 249 | 250 | if ( (err=CompressInit(dest, destLen, source, sourceLen)) != Z_OK ) 251 | return err; 252 | 253 | // This is used to figure out how many bytes we wrote *this chunk* 254 | int compressed_size = comp_stream_.total_out; 255 | 256 | // Some setup happens only for the first chunk we compress in a run 257 | if ( first_chunk_ ) { 258 | first_chunk_ = false; 259 | } 260 | 261 | // flush_mode is Z_FINISH for all mode, Z_SYNC_FLUSH for incremental 262 | // compression. 263 | err = deflate(&comp_stream_, flush_mode); 264 | 265 | *sourceLen = comp_stream_.avail_in; 266 | 267 | if ((err == Z_STREAM_END || err == Z_OK) 268 | && comp_stream_.avail_in == 0 269 | && comp_stream_.avail_out != 0 ) { 270 | // we processed everything ok and the output buffer was large enough. 271 | ; 272 | } else if (err == Z_STREAM_END && comp_stream_.avail_in > 0) { 273 | return Z_BUF_ERROR; // should never happen 274 | } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { 275 | // an error happened 276 | CompressErrorInit(); 277 | return err; 278 | } else if (comp_stream_.avail_out == 0) { // not enough space 279 | err = Z_BUF_ERROR; 280 | } 281 | 282 | assert(err == Z_OK || err == Z_STREAM_END || err == Z_BUF_ERROR); 283 | if (err == Z_STREAM_END) 284 | err = Z_OK; 285 | 286 | // update the crc and other metadata 287 | compressed_size = comp_stream_.total_out - compressed_size; // delta 288 | *destLen = compressed_size; 289 | 290 | return err; 291 | } 292 | 293 | int ZLib::CompressChunkOrAll(Bytef *dest, uLongf *destLen, 294 | const Bytef *source, uLong sourceLen, 295 | int flush_mode) { // Z_FULL_FLUSH or Z_FINISH 296 | const int ret = 297 | CompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); 298 | if (ret == Z_BUF_ERROR) 299 | CompressErrorInit(); 300 | return ret; 301 | } 302 | 303 | // This routine only initializes the compression stream once. Thereafter, it 304 | // just does a deflateReset on the stream, which should be faster. 305 | int ZLib::Compress(Bytef *dest, uLongf *destLen, 306 | const Bytef *source, uLong sourceLen) { 307 | int err; 308 | if ( (err=CompressChunkOrAll(dest, destLen, source, sourceLen, 309 | Z_FINISH)) != Z_OK ) 310 | return err; 311 | Reset(); // reset for next call to Compress 312 | 313 | return Z_OK; 314 | } 315 | 316 | 317 | // --------- UNCOMPRESS MODE 318 | 319 | int ZLib::InflateInit() { 320 | return inflateInit2(&uncomp_stream_, MAX_WBITS); 321 | } 322 | 323 | // Initialization method to be called if we hit an error while 324 | // uncompressing. On hitting an error, call this method before 325 | // returning the error. 326 | void ZLib::UncompressErrorInit() { 327 | inflateEnd(&uncomp_stream_); 328 | uncomp_init_ = false; 329 | Reset(); 330 | } 331 | 332 | int ZLib::UncompressInit(Bytef *dest, uLongf *destLen, 333 | const Bytef *source, uLong *sourceLen) { 334 | int err; 335 | 336 | uncomp_stream_.next_in = (Bytef*)source; 337 | uncomp_stream_.avail_in = (uInt)*sourceLen; 338 | // Check for source > 64K on 16-bit machine: 339 | if ((uLong)uncomp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; 340 | 341 | uncomp_stream_.next_out = dest; 342 | uncomp_stream_.avail_out = (uInt)*destLen; 343 | if ((uLong)uncomp_stream_.avail_out != *destLen) return Z_BUF_ERROR; 344 | 345 | if ( !first_chunk_ ) // only need to set up stream the first time through 346 | return Z_OK; 347 | 348 | if (uncomp_init_) { // we've already initted it 349 | err = inflateReset(&uncomp_stream_); 350 | if (err != Z_OK) { 351 | LOG(WARNING) 352 | << "ERROR: Can't reset uncompress object; creating a new one"; 353 | UncompressErrorInit(); 354 | } 355 | } 356 | if (!uncomp_init_) { 357 | uncomp_stream_.zalloc = (alloc_func)0; 358 | uncomp_stream_.zfree = (free_func)0; 359 | uncomp_stream_.opaque = (voidpf)0; 360 | err = InflateInit(); 361 | if (err != Z_OK) return err; 362 | uncomp_init_ = true; 363 | } 364 | return Z_OK; 365 | } 366 | 367 | // If you compressed your data a chunk at a time, with CompressChunk, 368 | // you can uncompress it a chunk at a time with UncompressChunk. 369 | // Only difference bewteen chunked and unchunked uncompression 370 | // is the flush mode we use: Z_SYNC_FLUSH (chunked) or Z_FINISH (unchunked). 371 | int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen, 372 | const Bytef *source, uLong *sourceLen, 373 | int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH 374 | int err = Z_OK; 375 | 376 | if ( (err=UncompressInit(dest, destLen, source, sourceLen)) != Z_OK ) { 377 | LOG(WARNING) << "UncompressInit: Error: " << err << " SourceLen: " 378 | << *sourceLen; 379 | return err; 380 | } 381 | 382 | // This is used to figure out how many output bytes we wrote *this chunk*: 383 | const uLong old_total_out = uncomp_stream_.total_out; 384 | 385 | // This is used to figure out how many input bytes we read *this chunk*: 386 | const uLong old_total_in = uncomp_stream_.total_in; 387 | 388 | // Some setup happens only for the first chunk we compress in a run 389 | if ( first_chunk_ ) { 390 | first_chunk_ = false; // so we don't do this again 391 | 392 | // For the first chunk *only* (to avoid infinite troubles), we let 393 | // there be no actual data to uncompress. This sometimes triggers 394 | // when the input is only the gzip header, say. 395 | if ( *sourceLen == 0 ) { 396 | *destLen = 0; 397 | return Z_OK; 398 | } 399 | } 400 | 401 | // We'll uncompress as much as we can. If we end OK great, otherwise 402 | // if we get an error that seems to be the gzip footer, we store the 403 | // gzip footer and return OK, otherwise we return the error. 404 | 405 | // flush_mode is Z_SYNC_FLUSH for chunked mode, Z_FINISH for all mode. 406 | err = inflate(&uncomp_stream_, flush_mode); 407 | 408 | // Figure out how many bytes of the input zlib slurped up: 409 | const uLong bytes_read = uncomp_stream_.total_in - old_total_in; 410 | CHECK_LE(source + bytes_read, source + *sourceLen); 411 | *sourceLen = uncomp_stream_.avail_in; 412 | 413 | if ((err == Z_STREAM_END || err == Z_OK) // everything went ok 414 | && uncomp_stream_.avail_in == 0) { // and we read it all 415 | ; 416 | } else if (err == Z_STREAM_END && uncomp_stream_.avail_in > 0) { 417 | LOG(WARNING) 418 | << "UncompressChunkOrAll: Received some extra data, bytes total: " 419 | << uncomp_stream_.avail_in << " bytes: " 420 | << std::string(reinterpret_cast(uncomp_stream_.next_in), 421 | std::min(int(uncomp_stream_.avail_in), 20)); 422 | UncompressErrorInit(); 423 | return Z_DATA_ERROR; // what's the extra data for? 424 | } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { 425 | // an error happened 426 | LOG(WARNING) << "UncompressChunkOrAll: Error: " << err 427 | << " avail_out: " << uncomp_stream_.avail_out; 428 | UncompressErrorInit(); 429 | return err; 430 | } else if (uncomp_stream_.avail_out == 0) { 431 | err = Z_BUF_ERROR; 432 | } 433 | 434 | assert(err == Z_OK || err == Z_BUF_ERROR || err == Z_STREAM_END); 435 | if (err == Z_STREAM_END) 436 | err = Z_OK; 437 | 438 | *destLen = uncomp_stream_.total_out - old_total_out; // size for this call 439 | 440 | return err; 441 | } 442 | 443 | int ZLib::UncompressChunkOrAll(Bytef *dest, uLongf *destLen, 444 | const Bytef *source, uLong sourceLen, 445 | int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH 446 | const int ret = 447 | UncompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); 448 | if (ret == Z_BUF_ERROR) 449 | UncompressErrorInit(); 450 | return ret; 451 | } 452 | 453 | int ZLib::UncompressAtMost(Bytef *dest, uLongf *destLen, 454 | const Bytef *source, uLong *sourceLen) { 455 | return UncompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); 456 | } 457 | 458 | // We make sure we've uncompressed everything, that is, the current 459 | // uncompress stream is at a compressed-buffer-EOF boundary. In gzip 460 | // mode, we also check the gzip footer to make sure we pass the gzip 461 | // consistency checks. We RETURN true iff both types of checks pass. 462 | bool ZLib::UncompressChunkDone() { 463 | assert(!first_chunk_ && uncomp_init_); 464 | // Make sure we're at the end-of-compressed-data point. This means 465 | // if we call inflate with Z_FINISH we won't consume any input or 466 | // write any output 467 | Bytef dummyin, dummyout; 468 | uLongf dummylen = 0; 469 | if ( UncompressChunkOrAll(&dummyout, &dummylen, &dummyin, 0, Z_FINISH) 470 | != Z_OK ) { 471 | return false; 472 | } 473 | 474 | // Make sure that when we exit, we can start a new round of chunks later 475 | Reset(); 476 | 477 | return true; 478 | } 479 | 480 | // Uncompresses the source buffer into the destination buffer. 481 | // The destination buffer must be long enough to hold the entire 482 | // decompressed contents. 483 | // 484 | // We only initialize the uncomp_stream once. Thereafter, we use 485 | // inflateReset, which should be faster. 486 | // 487 | // Returns Z_OK on success, otherwise, it returns a zlib error code. 488 | int ZLib::Uncompress(Bytef *dest, uLongf *destLen, 489 | const Bytef *source, uLong sourceLen) { 490 | int err; 491 | if ( (err=UncompressChunkOrAll(dest, destLen, source, sourceLen, 492 | Z_FINISH)) != Z_OK ) { 493 | Reset(); // let us try to compress again 494 | return err; 495 | } 496 | if ( !UncompressChunkDone() ) // calls Reset() 497 | return Z_DATA_ERROR; 498 | return Z_OK; // stream_end is ok 499 | } 500 | 501 | #endif // HAVE_LIBZ 502 | 503 | } // namespace snappy 504 | -------------------------------------------------------------------------------- /snappy-test.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Various stubs for the unit tests for the open-source version of Snappy. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 32 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 33 | 34 | #if HAVE_CONFIG_H 35 | #include "config.h" 36 | #endif 37 | 38 | #include "snappy-stubs-internal.h" 39 | 40 | #if HAVE_SYS_MMAN_H 41 | #include 42 | #endif 43 | 44 | #if HAVE_SYS_RESOURCE_H 45 | #include 46 | #endif 47 | 48 | #if HAVE_SYS_TIME_H 49 | #include 50 | #endif 51 | 52 | #if HAVE_WINDOWS_H 53 | // Needed to be able to use std::max without workarounds in the source code. 54 | // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts 55 | #define NOMINMAX 56 | #include 57 | #endif 58 | 59 | #define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0)) 60 | 61 | #if HAVE_LIBZ 62 | #include "zlib.h" 63 | #endif 64 | 65 | #if HAVE_LIBLZO2 66 | #include "lzo/lzo1x.h" 67 | #endif 68 | 69 | #if HAVE_LIBLZ4 70 | #include "lz4.h" 71 | #endif 72 | 73 | namespace file { 74 | 75 | // Stubs the class file::Options. 76 | // 77 | // This class should not be instantiated explicitly. It should only be used by 78 | // passing file::Defaults() to file::GetContents() / file::SetContents(). 79 | class OptionsStub { 80 | public: 81 | OptionsStub(); 82 | OptionsStub(const OptionsStub &) = delete; 83 | OptionsStub &operator=(const OptionsStub &) = delete; 84 | ~OptionsStub(); 85 | }; 86 | 87 | const OptionsStub &Defaults(); 88 | 89 | // Stubs the class absl::Status. 90 | // 91 | // This class should not be instantiated explicitly. It should only be used by 92 | // passing the result of file::GetContents() / file::SetContents() to 93 | // CHECK_OK(). 94 | class StatusStub { 95 | public: 96 | StatusStub(); 97 | StatusStub(const StatusStub &); 98 | StatusStub &operator=(const StatusStub &); 99 | ~StatusStub(); 100 | 101 | bool ok(); 102 | }; 103 | 104 | StatusStub GetContents(const std::string &file_name, std::string *output, 105 | const OptionsStub & /* options */); 106 | 107 | StatusStub SetContents(const std::string &file_name, const std::string &content, 108 | const OptionsStub & /* options */); 109 | 110 | } // namespace file 111 | 112 | namespace snappy { 113 | 114 | #define FLAGS_test_random_seed 301 115 | 116 | std::string ReadTestDataFile(const std::string& base, size_t size_limit); 117 | 118 | // A std::sprintf() variant that returns a std::string. 119 | // Not safe for general use due to truncation issues. 120 | std::string StrFormat(const char* format, ...); 121 | 122 | // A wall-time clock. This stub is not super-accurate, nor resistant to the 123 | // system time changing. 124 | class CycleTimer { 125 | public: 126 | inline CycleTimer() : real_time_us_(0) {} 127 | inline ~CycleTimer() = default; 128 | 129 | inline void Start() { 130 | #ifdef WIN32 131 | QueryPerformanceCounter(&start_); 132 | #else 133 | ::gettimeofday(&start_, nullptr); 134 | #endif 135 | } 136 | 137 | inline void Stop() { 138 | #ifdef WIN32 139 | LARGE_INTEGER stop; 140 | LARGE_INTEGER frequency; 141 | QueryPerformanceCounter(&stop); 142 | QueryPerformanceFrequency(&frequency); 143 | 144 | double elapsed = static_cast(stop.QuadPart - start_.QuadPart) / 145 | frequency.QuadPart; 146 | real_time_us_ += elapsed * 1e6 + 0.5; 147 | #else 148 | struct ::timeval stop; 149 | ::gettimeofday(&stop, nullptr); 150 | 151 | real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec); 152 | real_time_us_ += (stop.tv_usec - start_.tv_usec); 153 | #endif 154 | } 155 | 156 | inline double Get() { return real_time_us_ * 1e-6; } 157 | 158 | private: 159 | int64_t real_time_us_; 160 | #ifdef WIN32 161 | LARGE_INTEGER start_; 162 | #else 163 | struct ::timeval start_; 164 | #endif 165 | }; 166 | 167 | // Logging. 168 | 169 | class LogMessage { 170 | public: 171 | inline LogMessage() = default; 172 | ~LogMessage(); 173 | 174 | LogMessage &operator<<(const std::string &message); 175 | LogMessage &operator<<(int number); 176 | }; 177 | 178 | class LogMessageCrash : public LogMessage { 179 | public: 180 | inline LogMessageCrash() = default; 181 | ~LogMessageCrash(); 182 | }; 183 | 184 | // This class is used to explicitly ignore values in the conditional 185 | // logging macros. This avoids compiler warnings like "value computed 186 | // is not used" and "statement has no effect". 187 | 188 | class LogMessageVoidify { 189 | public: 190 | inline LogMessageVoidify() = default; 191 | inline ~LogMessageVoidify() = default; 192 | 193 | // This has to be an operator with a precedence lower than << but 194 | // higher than ?: 195 | inline void operator&(const LogMessage &) {} 196 | }; 197 | 198 | // Asserts, both versions activated in debug mode only, 199 | // and ones that are always active. 200 | 201 | #define CRASH_UNLESS(condition) \ 202 | SNAPPY_PREDICT_TRUE(condition) \ 203 | ? (void)0 \ 204 | : snappy::LogMessageVoidify() & snappy::LogMessageCrash() 205 | 206 | #define LOG(level) LogMessage() 207 | #define VLOG(level) \ 208 | true ? (void)0 : snappy::LogMessageVoidify() & snappy::LogMessage() 209 | 210 | #define CHECK(cond) CRASH_UNLESS(cond) 211 | #define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) 212 | #define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) 213 | #define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) 214 | #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) 215 | #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) 216 | #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) 217 | #define CHECK_OK(cond) (cond).ok() 218 | 219 | #if HAVE_LIBZ 220 | 221 | // Object-oriented wrapper around zlib. 222 | class ZLib { 223 | public: 224 | ZLib(); 225 | ~ZLib(); 226 | 227 | // Wipe a ZLib object to a virgin state. This differs from Reset() 228 | // in that it also breaks any state. 229 | void Reinit(); 230 | 231 | // Call this to make a zlib buffer as good as new. Here's the only 232 | // case where they differ: 233 | // CompressChunk(a); CompressChunk(b); CompressChunkDone(); vs 234 | // CompressChunk(a); Reset(); CompressChunk(b); CompressChunkDone(); 235 | // You'll want to use Reset(), then, when you interrupt a compress 236 | // (or uncompress) in the middle of a chunk and want to start over. 237 | void Reset(); 238 | 239 | // According to the zlib manual, when you Compress, the destination 240 | // buffer must have size at least src + .1%*src + 12. This function 241 | // helps you calculate that. Augment this to account for a potential 242 | // gzip header and footer, plus a few bytes of slack. 243 | static int MinCompressbufSize(int uncompress_size) { 244 | return uncompress_size + uncompress_size/1000 + 40; 245 | } 246 | 247 | // Compresses the source buffer into the destination buffer. 248 | // sourceLen is the byte length of the source buffer. 249 | // Upon entry, destLen is the total size of the destination buffer, 250 | // which must be of size at least MinCompressbufSize(sourceLen). 251 | // Upon exit, destLen is the actual size of the compressed buffer. 252 | // 253 | // This function can be used to compress a whole file at once if the 254 | // input file is mmap'ed. 255 | // 256 | // Returns Z_OK if success, Z_MEM_ERROR if there was not 257 | // enough memory, Z_BUF_ERROR if there was not enough room in the 258 | // output buffer. Note that if the output buffer is exactly the same 259 | // size as the compressed result, we still return Z_BUF_ERROR. 260 | // (check CL#1936076) 261 | int Compress(Bytef *dest, uLongf *destLen, 262 | const Bytef *source, uLong sourceLen); 263 | 264 | // Uncompresses the source buffer into the destination buffer. 265 | // The destination buffer must be long enough to hold the entire 266 | // decompressed contents. 267 | // 268 | // Returns Z_OK on success, otherwise, it returns a zlib error code. 269 | int Uncompress(Bytef *dest, uLongf *destLen, 270 | const Bytef *source, uLong sourceLen); 271 | 272 | // Uncompress data one chunk at a time -- ie you can call this 273 | // more than once. To get this to work you need to call per-chunk 274 | // and "done" routines. 275 | // 276 | // Returns Z_OK if success, Z_MEM_ERROR if there was not 277 | // enough memory, Z_BUF_ERROR if there was not enough room in the 278 | // output buffer. 279 | 280 | int UncompressAtMost(Bytef *dest, uLongf *destLen, 281 | const Bytef *source, uLong *sourceLen); 282 | 283 | // Checks gzip footer information, as needed. Mostly this just 284 | // makes sure the checksums match. Whenever you call this, it 285 | // will assume the last 8 bytes from the previous UncompressChunk 286 | // call are the footer. Returns true iff everything looks ok. 287 | bool UncompressChunkDone(); 288 | 289 | private: 290 | int InflateInit(); // sets up the zlib inflate structure 291 | int DeflateInit(); // sets up the zlib deflate structure 292 | 293 | // These init the zlib data structures for compressing/uncompressing 294 | int CompressInit(Bytef *dest, uLongf *destLen, 295 | const Bytef *source, uLong *sourceLen); 296 | int UncompressInit(Bytef *dest, uLongf *destLen, 297 | const Bytef *source, uLong *sourceLen); 298 | // Initialization method to be called if we hit an error while 299 | // uncompressing. On hitting an error, call this method before 300 | // returning the error. 301 | void UncompressErrorInit(); 302 | 303 | // Helper function for Compress 304 | int CompressChunkOrAll(Bytef *dest, uLongf *destLen, 305 | const Bytef *source, uLong sourceLen, 306 | int flush_mode); 307 | int CompressAtMostOrAll(Bytef *dest, uLongf *destLen, 308 | const Bytef *source, uLong *sourceLen, 309 | int flush_mode); 310 | 311 | // Likewise for UncompressAndUncompressChunk 312 | int UncompressChunkOrAll(Bytef *dest, uLongf *destLen, 313 | const Bytef *source, uLong sourceLen, 314 | int flush_mode); 315 | 316 | int UncompressAtMostOrAll(Bytef *dest, uLongf *destLen, 317 | const Bytef *source, uLong *sourceLen, 318 | int flush_mode); 319 | 320 | // Initialization method to be called if we hit an error while 321 | // compressing. On hitting an error, call this method before 322 | // returning the error. 323 | void CompressErrorInit(); 324 | 325 | int compression_level_; // compression level 326 | int window_bits_; // log base 2 of the window size used in compression 327 | int mem_level_; // specifies the amount of memory to be used by 328 | // compressor (1-9) 329 | z_stream comp_stream_; // Zlib stream data structure 330 | bool comp_init_; // True if we have initialized comp_stream_ 331 | z_stream uncomp_stream_; // Zlib stream data structure 332 | bool uncomp_init_; // True if we have initialized uncomp_stream_ 333 | 334 | // These are used only with chunked compression. 335 | bool first_chunk_; // true if we need to emit headers with this chunk 336 | }; 337 | 338 | #endif // HAVE_LIBZ 339 | 340 | } // namespace snappy 341 | 342 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ 343 | -------------------------------------------------------------------------------- /snappy.h: -------------------------------------------------------------------------------- 1 | // Copyright 2005 and onwards Google Inc. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // A light-weight compression algorithm. It is designed for speed of 30 | // compression and decompression, rather than for the utmost in space 31 | // savings. 32 | // 33 | // For getting better compression ratios when you are compressing data 34 | // with long repeated sequences or compressing data that is similar to 35 | // other data, while still compressing fast, you might look at first 36 | // using BMDiff and then compressing the output of BMDiff with 37 | // Snappy. 38 | 39 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ 40 | #define THIRD_PARTY_SNAPPY_SNAPPY_H__ 41 | 42 | #include 43 | #include 44 | 45 | #include 46 | 47 | #include "snappy-stubs-public.h" 48 | 49 | namespace snappy { 50 | class Source; 51 | class Sink; 52 | 53 | struct CompressionOptions { 54 | // Compression level. 55 | // Level 1 is the fastest 56 | // Level 2 is a little slower but provides better compression. Level 2 is 57 | // **EXPERIMENTAL** for the time being. It might happen that we decide to 58 | // fall back to level 1 in the future. 59 | // Levels 3+ are currently not supported. We plan to support levels up to 60 | // 9 in the future. 61 | // If you played with other compression algorithms, level 1 is equivalent to 62 | // fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode 63 | // and compresses somewhere around zstd:-3 and zstd:-2 but generally with 64 | // faster decompression speeds than snappy:1 and zstd:-3. 65 | int level = DefaultCompressionLevel(); 66 | 67 | constexpr CompressionOptions() = default; 68 | constexpr CompressionOptions(int compression_level) 69 | : level(compression_level) {} 70 | static constexpr int MinCompressionLevel() { return 1; } 71 | static constexpr int MaxCompressionLevel() { return 2; } 72 | static constexpr int DefaultCompressionLevel() { return 1; } 73 | }; 74 | 75 | // ------------------------------------------------------------------------ 76 | // Generic compression/decompression routines. 77 | // ------------------------------------------------------------------------ 78 | 79 | // Compress the bytes read from "*reader" and append to "*writer". Return the 80 | // number of bytes written. 81 | // First version is to preserve ABI. 82 | size_t Compress(Source* reader, Sink* writer); 83 | size_t Compress(Source* reader, Sink* writer, 84 | CompressionOptions options); 85 | 86 | // Find the uncompressed length of the given stream, as given by the header. 87 | // Note that the true length could deviate from this; the stream could e.g. 88 | // be truncated. 89 | // 90 | // Also note that this leaves "*source" in a state that is unsuitable for 91 | // further operations, such as RawUncompress(). You will need to rewind 92 | // or recreate the source yourself before attempting any further calls. 93 | bool GetUncompressedLength(Source* source, uint32_t* result); 94 | 95 | // ------------------------------------------------------------------------ 96 | // Higher-level string based routines (should be sufficient for most users) 97 | // ------------------------------------------------------------------------ 98 | 99 | // Sets "*compressed" to the compressed version of "input[0..input_length-1]". 100 | // Original contents of *compressed are lost. 101 | // 102 | // REQUIRES: "input[]" is not an alias of "*compressed". 103 | // First version is to preserve ABI. 104 | size_t Compress(const char* input, size_t input_length, 105 | std::string* compressed); 106 | size_t Compress(const char* input, size_t input_length, 107 | std::string* compressed, CompressionOptions options); 108 | 109 | // Same as `Compress` above but taking an `iovec` array as input. Note that 110 | // this function preprocesses the inputs to compute the sum of 111 | // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use 112 | // `RawCompressFromIOVec` below. 113 | // First version is to preserve ABI. 114 | size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, 115 | std::string* compressed); 116 | size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, 117 | std::string* compressed, 118 | CompressionOptions options); 119 | 120 | // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed". 121 | // Original contents of "*uncompressed" are lost. 122 | // 123 | // REQUIRES: "compressed[]" is not an alias of "*uncompressed". 124 | // 125 | // returns false if the message is corrupted and could not be decompressed 126 | bool Uncompress(const char* compressed, size_t compressed_length, 127 | std::string* uncompressed); 128 | 129 | // Decompresses "compressed" to "*uncompressed". 130 | // 131 | // returns false if the message is corrupted and could not be decompressed 132 | bool Uncompress(Source* compressed, Sink* uncompressed); 133 | 134 | // This routine uncompresses as much of the "compressed" as possible 135 | // into sink. It returns the number of valid bytes added to sink 136 | // (extra invalid bytes may have been added due to errors; the caller 137 | // should ignore those). The emitted data typically has length 138 | // GetUncompressedLength(), but may be shorter if an error is 139 | // encountered. 140 | size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); 141 | 142 | // ------------------------------------------------------------------------ 143 | // Lower-level character array based routines. May be useful for 144 | // efficiency reasons in certain circumstances. 145 | // ------------------------------------------------------------------------ 146 | 147 | // REQUIRES: "compressed" must point to an area of memory that is at 148 | // least "MaxCompressedLength(input_length)" bytes in length. 149 | // 150 | // Takes the data stored in "input[0..input_length]" and stores 151 | // it in the array pointed to by "compressed". 152 | // 153 | // "*compressed_length" is set to the length of the compressed output. 154 | // 155 | // Example: 156 | // char* output = new char[snappy::MaxCompressedLength(input_length)]; 157 | // size_t output_length; 158 | // RawCompress(input, input_length, output, &output_length); 159 | // ... Process(output, output_length) ... 160 | // delete [] output; 161 | void RawCompress(const char* input, size_t input_length, char* compressed, 162 | size_t* compressed_length); 163 | void RawCompress(const char* input, size_t input_length, char* compressed, 164 | size_t* compressed_length, CompressionOptions options); 165 | 166 | // Same as `RawCompress` above but taking an `iovec` array as input. Note that 167 | // `uncompressed_length` is the total number of bytes to be read from the 168 | // elements of `iov` (_not_ the number of elements in `iov`). 169 | void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, 170 | char* compressed, size_t* compressed_length); 171 | void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, 172 | char* compressed, size_t* compressed_length, 173 | CompressionOptions options); 174 | 175 | // Given data in "compressed[0..compressed_length-1]" generated by 176 | // calling the Snappy::Compress routine, this routine 177 | // stores the uncompressed data to 178 | // uncompressed[0..GetUncompressedLength(compressed)-1] 179 | // returns false if the message is corrupted and could not be decrypted 180 | bool RawUncompress(const char* compressed, size_t compressed_length, 181 | char* uncompressed); 182 | 183 | // Given data from the byte source 'compressed' generated by calling 184 | // the Snappy::Compress routine, this routine stores the uncompressed 185 | // data to 186 | // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] 187 | // returns false if the message is corrupted and could not be decrypted 188 | bool RawUncompress(Source* compressed, char* uncompressed); 189 | 190 | // Given data in "compressed[0..compressed_length-1]" generated by 191 | // calling the Snappy::Compress routine, this routine 192 | // stores the uncompressed data to the iovec "iov". The number of physical 193 | // buffers in "iov" is given by iov_cnt and their cumulative size 194 | // must be at least GetUncompressedLength(compressed). The individual buffers 195 | // in "iov" must not overlap with each other. 196 | // 197 | // returns false if the message is corrupted and could not be decrypted 198 | bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, 199 | const struct iovec* iov, size_t iov_cnt); 200 | 201 | // Given data from the byte source 'compressed' generated by calling 202 | // the Snappy::Compress routine, this routine stores the uncompressed 203 | // data to the iovec "iov". The number of physical 204 | // buffers in "iov" is given by iov_cnt and their cumulative size 205 | // must be at least GetUncompressedLength(compressed). The individual buffers 206 | // in "iov" must not overlap with each other. 207 | // 208 | // returns false if the message is corrupted and could not be decrypted 209 | bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, 210 | size_t iov_cnt); 211 | 212 | // Returns the maximal size of the compressed representation of 213 | // input data that is "source_bytes" bytes in length; 214 | size_t MaxCompressedLength(size_t source_bytes); 215 | 216 | // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() 217 | // Returns true and stores the length of the uncompressed data in 218 | // *result normally. Returns false on parsing error. 219 | // This operation takes O(1) time. 220 | bool GetUncompressedLength(const char* compressed, size_t compressed_length, 221 | size_t* result); 222 | 223 | // Returns true iff the contents of "compressed[]" can be uncompressed 224 | // successfully. Does not return the uncompressed data. Takes 225 | // time proportional to compressed_length, but is usually at least 226 | // a factor of four faster than actual decompression. 227 | bool IsValidCompressedBuffer(const char* compressed, 228 | size_t compressed_length); 229 | 230 | // Returns true iff the contents of "compressed" can be uncompressed 231 | // successfully. Does not return the uncompressed data. Takes 232 | // time proportional to *compressed length, but is usually at least 233 | // a factor of four faster than actual decompression. 234 | // On success, consumes all of *compressed. On failure, consumes an 235 | // unspecified prefix of *compressed. 236 | bool IsValidCompressed(Source* compressed); 237 | 238 | // The size of a compression block. Note that many parts of the compression 239 | // code assumes that kBlockSize <= 65536; in particular, the hash table 240 | // can only store 16-bit offsets, and EmitCopy() also assumes the offset 241 | // is 65535 bytes or less. Note also that if you change this, it will 242 | // affect the framing format (see framing_format.txt). 243 | // 244 | // Note that there might be older data around that is compressed with larger 245 | // block sizes, so the decompression code should not rely on the 246 | // non-existence of long backreferences. 247 | static constexpr int kBlockLog = 16; 248 | static constexpr size_t kBlockSize = 1 << kBlockLog; 249 | 250 | static constexpr int kMinHashTableBits = 8; 251 | static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits; 252 | 253 | static constexpr int kMaxHashTableBits = 15; 254 | static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits; 255 | } // end namespace snappy 256 | 257 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ 258 | -------------------------------------------------------------------------------- /snappy_benchmark.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "benchmark/benchmark.h" 35 | #include "snappy-internal.h" 36 | #include "snappy-sinksource.h" 37 | #include "snappy-test.h" 38 | #include "snappy.h" 39 | #include "snappy_test_data.h" 40 | 41 | namespace snappy { 42 | 43 | namespace { 44 | 45 | void FilesAndLevels(benchmark::internal::Benchmark* benchmark) { 46 | for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) { 47 | for (int level = snappy::CompressionOptions::MinCompressionLevel(); 48 | level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) { 49 | benchmark->ArgPair(i, level); 50 | } 51 | } 52 | } 53 | 54 | void BM_UFlat(benchmark::State& state) { 55 | // Pick file to process based on state.range(0). 56 | int file_index = state.range(0); 57 | 58 | CHECK_GE(file_index, 0); 59 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 60 | std::string contents = 61 | ReadTestDataFile(kTestDataFiles[file_index].filename, 62 | kTestDataFiles[file_index].size_limit); 63 | 64 | std::string zcontents; 65 | snappy::Compress( 66 | contents.data(), contents.size(), &zcontents, 67 | snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); 68 | char* dst = new char[contents.size()]; 69 | 70 | for (auto s : state) { 71 | CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); 72 | benchmark::DoNotOptimize(dst); 73 | } 74 | state.SetBytesProcessed(static_cast(state.iterations()) * 75 | static_cast(contents.size())); 76 | state.SetLabel(kTestDataFiles[file_index].label); 77 | 78 | delete[] dst; 79 | } 80 | BENCHMARK(BM_UFlat)->Apply(FilesAndLevels); 81 | 82 | struct SourceFiles { 83 | SourceFiles() { 84 | for (int i = 0; i < kFiles; i++) { 85 | std::string contents = ReadTestDataFile(kTestDataFiles[i].filename, 86 | kTestDataFiles[i].size_limit); 87 | max_size = std::max(max_size, contents.size()); 88 | sizes[i] = contents.size(); 89 | snappy::Compress(contents.data(), contents.size(), &zcontents[i]); 90 | } 91 | } 92 | static constexpr int kFiles = ARRAYSIZE(kTestDataFiles); 93 | std::string zcontents[kFiles]; 94 | size_t sizes[kFiles]; 95 | size_t max_size = 0; 96 | }; 97 | 98 | void BM_UFlatMedley(benchmark::State& state) { 99 | static const SourceFiles* const source = new SourceFiles(); 100 | 101 | std::vector dst(source->max_size); 102 | 103 | for (auto s : state) { 104 | for (int i = 0; i < SourceFiles::kFiles; i++) { 105 | CHECK(snappy::RawUncompress(source->zcontents[i].data(), 106 | source->zcontents[i].size(), dst.data())); 107 | benchmark::DoNotOptimize(dst); 108 | } 109 | } 110 | 111 | int64_t source_sizes = 0; 112 | for (int i = 0; i < SourceFiles::kFiles; i++) { 113 | source_sizes += static_cast(source->sizes[i]); 114 | } 115 | state.SetBytesProcessed(static_cast(state.iterations()) * 116 | source_sizes); 117 | } 118 | BENCHMARK(BM_UFlatMedley); 119 | 120 | void BM_UValidate(benchmark::State& state) { 121 | // Pick file to process based on state.range(0). 122 | int file_index = state.range(0); 123 | 124 | CHECK_GE(file_index, 0); 125 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 126 | std::string contents = 127 | ReadTestDataFile(kTestDataFiles[file_index].filename, 128 | kTestDataFiles[file_index].size_limit); 129 | 130 | std::string zcontents; 131 | snappy::Compress( 132 | contents.data(), contents.size(), &zcontents, 133 | snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); 134 | 135 | for (auto s : state) { 136 | CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); 137 | } 138 | state.SetBytesProcessed(static_cast(state.iterations()) * 139 | static_cast(contents.size())); 140 | state.SetLabel(kTestDataFiles[file_index].label); 141 | } 142 | BENCHMARK(BM_UValidate)->Apply(FilesAndLevels); 143 | 144 | void BM_UValidateMedley(benchmark::State& state) { 145 | static const SourceFiles* const source = new SourceFiles(); 146 | 147 | for (auto s : state) { 148 | for (int i = 0; i < SourceFiles::kFiles; i++) { 149 | CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(), 150 | source->zcontents[i].size())); 151 | } 152 | } 153 | 154 | int64_t source_sizes = 0; 155 | for (int i = 0; i < SourceFiles::kFiles; i++) { 156 | source_sizes += static_cast(source->sizes[i]); 157 | } 158 | state.SetBytesProcessed(static_cast(state.iterations()) * 159 | source_sizes); 160 | } 161 | BENCHMARK(BM_UValidateMedley); 162 | 163 | void BM_UIOVecSource(benchmark::State& state) { 164 | // Pick file to process based on state.range(0). 165 | int file_index = state.range(0); 166 | int level = state.range(1); 167 | 168 | CHECK_GE(file_index, 0); 169 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 170 | std::string contents = 171 | ReadTestDataFile(kTestDataFiles[file_index].filename, 172 | kTestDataFiles[file_index].size_limit); 173 | 174 | // Create `iovec`s of the `contents`. 175 | const int kNumEntries = 10; 176 | struct iovec iov[kNumEntries]; 177 | size_t used_so_far = 0; 178 | for (int i = 0; i < kNumEntries; ++i) { 179 | iov[i].iov_base = const_cast(contents.data()) + used_so_far; 180 | if (used_so_far == contents.size()) { 181 | iov[i].iov_len = 0; 182 | continue; 183 | } 184 | if (i == kNumEntries - 1) { 185 | iov[i].iov_len = contents.size() - used_so_far; 186 | } else { 187 | iov[i].iov_len = contents.size() / kNumEntries; 188 | } 189 | used_so_far += iov[i].iov_len; 190 | } 191 | 192 | char* dst = new char[snappy::MaxCompressedLength(contents.size())]; 193 | size_t zsize = 0; 194 | for (auto s : state) { 195 | snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize, 196 | snappy::CompressionOptions{/*level=*/level}); 197 | benchmark::DoNotOptimize(iov); 198 | } 199 | state.SetBytesProcessed(static_cast(state.iterations()) * 200 | static_cast(contents.size())); 201 | const double compression_ratio = 202 | static_cast(zsize) / std::max(1, contents.size()); 203 | state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label, 204 | 100.0 * compression_ratio)); 205 | VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", 206 | kTestDataFiles[file_index].label, contents.size(), 207 | zsize); 208 | 209 | delete[] dst; 210 | } 211 | BENCHMARK(BM_UIOVecSource)->Apply(FilesAndLevels); 212 | 213 | void BM_UIOVecSink(benchmark::State& state) { 214 | // Pick file to process based on state.range(0). 215 | int file_index = state.range(0); 216 | 217 | CHECK_GE(file_index, 0); 218 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 219 | std::string contents = 220 | ReadTestDataFile(kTestDataFiles[file_index].filename, 221 | kTestDataFiles[file_index].size_limit); 222 | 223 | std::string zcontents; 224 | snappy::Compress(contents.data(), contents.size(), &zcontents); 225 | 226 | // Uncompress into an iovec containing ten entries. 227 | const int kNumEntries = 10; 228 | struct iovec iov[kNumEntries]; 229 | char* dst = new char[contents.size()]; 230 | size_t used_so_far = 0; 231 | for (int i = 0; i < kNumEntries; ++i) { 232 | iov[i].iov_base = dst + used_so_far; 233 | if (used_so_far == contents.size()) { 234 | iov[i].iov_len = 0; 235 | continue; 236 | } 237 | 238 | if (i == kNumEntries - 1) { 239 | iov[i].iov_len = contents.size() - used_so_far; 240 | } else { 241 | iov[i].iov_len = contents.size() / kNumEntries; 242 | } 243 | used_so_far += iov[i].iov_len; 244 | } 245 | 246 | for (auto s : state) { 247 | CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, 248 | kNumEntries)); 249 | benchmark::DoNotOptimize(iov); 250 | } 251 | state.SetBytesProcessed(static_cast(state.iterations()) * 252 | static_cast(contents.size())); 253 | state.SetLabel(kTestDataFiles[file_index].label); 254 | 255 | delete[] dst; 256 | } 257 | BENCHMARK(BM_UIOVecSink)->DenseRange(0, 4); 258 | 259 | void BM_UFlatSink(benchmark::State& state) { 260 | // Pick file to process based on state.range(0). 261 | int file_index = state.range(0); 262 | 263 | CHECK_GE(file_index, 0); 264 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 265 | std::string contents = 266 | ReadTestDataFile(kTestDataFiles[file_index].filename, 267 | kTestDataFiles[file_index].size_limit); 268 | 269 | std::string zcontents; 270 | snappy::Compress( 271 | contents.data(), contents.size(), &zcontents, 272 | snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); 273 | char* dst = new char[contents.size()]; 274 | 275 | for (auto s : state) { 276 | snappy::ByteArraySource source(zcontents.data(), zcontents.size()); 277 | snappy::UncheckedByteArraySink sink(dst); 278 | CHECK(snappy::Uncompress(&source, &sink)); 279 | benchmark::DoNotOptimize(sink); 280 | } 281 | state.SetBytesProcessed(static_cast(state.iterations()) * 282 | static_cast(contents.size())); 283 | state.SetLabel(kTestDataFiles[file_index].label); 284 | 285 | std::string s(dst, contents.size()); 286 | CHECK_EQ(contents, s); 287 | 288 | delete[] dst; 289 | } 290 | 291 | BENCHMARK(BM_UFlatSink)->Apply(FilesAndLevels); 292 | 293 | void BM_ZFlat(benchmark::State& state) { 294 | // Pick file to process based on state.range(0). 295 | int file_index = state.range(0); 296 | int level = state.range(1); 297 | 298 | CHECK_GE(file_index, 0); 299 | CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); 300 | std::string contents = 301 | ReadTestDataFile(kTestDataFiles[file_index].filename, 302 | kTestDataFiles[file_index].size_limit); 303 | char* dst = new char[snappy::MaxCompressedLength(contents.size())]; 304 | 305 | size_t zsize = 0; 306 | for (auto s : state) { 307 | snappy::RawCompress(contents.data(), contents.size(), dst, &zsize, 308 | snappy::CompressionOptions{/*level=*/level}); 309 | benchmark::DoNotOptimize(dst); 310 | } 311 | state.SetBytesProcessed(static_cast(state.iterations()) * 312 | static_cast(contents.size())); 313 | const double compression_ratio = 314 | static_cast(zsize) / std::max(1, contents.size()); 315 | state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label, 316 | 100.0 * compression_ratio)); 317 | VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", 318 | kTestDataFiles[file_index].label, contents.size(), 319 | zsize); 320 | delete[] dst; 321 | } 322 | 323 | BENCHMARK(BM_ZFlat)->Apply(FilesAndLevels); 324 | 325 | void BM_ZFlatAll(benchmark::State& state) { 326 | const int num_files = ARRAYSIZE(kTestDataFiles); 327 | int level = state.range(0); 328 | 329 | std::vector contents(num_files); 330 | std::vector dst(num_files); 331 | 332 | int64_t total_contents_size = 0; 333 | for (int i = 0; i < num_files; ++i) { 334 | contents[i] = ReadTestDataFile(kTestDataFiles[i].filename, 335 | kTestDataFiles[i].size_limit); 336 | dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())]; 337 | total_contents_size += contents[i].size(); 338 | } 339 | 340 | size_t zsize = 0; 341 | for (auto s : state) { 342 | for (int i = 0; i < num_files; ++i) { 343 | snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], 344 | &zsize, snappy::CompressionOptions{/*level=*/level}); 345 | benchmark::DoNotOptimize(dst); 346 | } 347 | } 348 | 349 | state.SetBytesProcessed(static_cast(state.iterations()) * 350 | total_contents_size); 351 | 352 | for (char* dst_item : dst) { 353 | delete[] dst_item; 354 | } 355 | state.SetLabel(StrFormat("%d kTestDataFiles", num_files)); 356 | } 357 | BENCHMARK(BM_ZFlatAll)->DenseRange(1, 2); 358 | 359 | void BM_ZFlatIncreasingTableSize(benchmark::State& state) { 360 | CHECK_GT(ARRAYSIZE(kTestDataFiles), 0); 361 | int level = state.range(0); 362 | const std::string base_content = ReadTestDataFile( 363 | kTestDataFiles[0].filename, kTestDataFiles[0].size_limit); 364 | 365 | std::vector contents; 366 | std::vector dst; 367 | int64_t total_contents_size = 0; 368 | for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits; 369 | ++table_bits) { 370 | std::string content = base_content; 371 | content.resize(1 << table_bits); 372 | dst.push_back(new char[snappy::MaxCompressedLength(content.size())]); 373 | total_contents_size += content.size(); 374 | contents.push_back(std::move(content)); 375 | } 376 | 377 | size_t zsize = 0; 378 | for (auto s : state) { 379 | for (size_t i = 0; i < contents.size(); ++i) { 380 | snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], 381 | &zsize, snappy::CompressionOptions{/*level=*/level}); 382 | benchmark::DoNotOptimize(dst); 383 | } 384 | } 385 | 386 | state.SetBytesProcessed(static_cast(state.iterations()) * 387 | total_contents_size); 388 | 389 | for (char* dst_item : dst) { 390 | delete[] dst_item; 391 | } 392 | state.SetLabel(StrFormat("%d tables", contents.size())); 393 | } 394 | BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(1, 2); 395 | 396 | } // namespace 397 | 398 | } // namespace snappy 399 | -------------------------------------------------------------------------------- /snappy_compress_fuzzer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // libFuzzer harness for fuzzing snappy compression code. 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #include "snappy.h" 38 | 39 | // Entry point for LibFuzzer. 40 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { 41 | std::string input(reinterpret_cast(data), size); 42 | for (int level = snappy::CompressionOptions::MinCompressionLevel(); 43 | level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) { 44 | std::string compressed; 45 | size_t compressed_size = 46 | snappy::Compress(input.data(), input.size(), &compressed, 47 | snappy::CompressionOptions{/*level=*/level}); 48 | 49 | (void)compressed_size; // Variable only used in debug builds. 50 | assert(compressed_size == compressed.size()); 51 | assert(compressed.size() <= snappy::MaxCompressedLength(input.size())); 52 | assert( 53 | snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 54 | 55 | std::string uncompressed_after_compress; 56 | bool uncompress_succeeded = snappy::Uncompress( 57 | compressed.data(), compressed.size(), &uncompressed_after_compress); 58 | 59 | (void)uncompress_succeeded; // Variable only used in debug builds. 60 | assert(uncompress_succeeded); 61 | assert(input == uncompressed_after_compress); 62 | } 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /snappy_test_data.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // Support code for reading test data. 30 | 31 | #include "snappy_test_data.h" 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #include "snappy-test.h" 38 | 39 | namespace snappy { 40 | 41 | std::string ReadTestDataFile(const char* base, size_t size_limit) { 42 | std::string srcdir; 43 | const char* srcdir_env = std::getenv("srcdir"); // This is set by Automake. 44 | if (srcdir_env) { 45 | srcdir = std::string(srcdir_env) + "/"; 46 | } 47 | 48 | std::string contents; 49 | CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents, 50 | file::Defaults())); 51 | if (size_limit > 0) { 52 | contents = contents.substr(0, size_limit); 53 | } 54 | return contents; 55 | } 56 | 57 | } // namespace snappy 58 | -------------------------------------------------------------------------------- /snappy_test_data.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // List of test case files. 30 | 31 | #ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ 32 | #define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ 33 | 34 | #include 35 | #include 36 | 37 | namespace snappy { 38 | 39 | std::string ReadTestDataFile(const char* base, size_t size_limit); 40 | 41 | // TODO: Replace anonymous namespace with inline variable when we can 42 | // rely on C++17. 43 | namespace { 44 | 45 | constexpr struct { 46 | const char* label; 47 | const char* filename; 48 | size_t size_limit; 49 | } kTestDataFiles[] = { 50 | { "html", "html", 0 }, 51 | { "urls", "urls.10K", 0 }, 52 | { "jpg", "fireworks.jpeg", 0 }, 53 | { "jpg_200", "fireworks.jpeg", 200 }, 54 | { "pdf", "paper-100k.pdf", 0 }, 55 | { "html4", "html_x_4", 0 }, 56 | { "txt1", "alice29.txt", 0 }, 57 | { "txt2", "asyoulik.txt", 0 }, 58 | { "txt3", "lcet10.txt", 0 }, 59 | { "txt4", "plrabn12.txt", 0 }, 60 | { "pb", "geo.protodata", 0 }, 61 | { "gaviota", "kppkn.gtb", 0 }, 62 | }; 63 | 64 | } // namespace 65 | 66 | } // namespace snappy 67 | 68 | #endif // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ 69 | -------------------------------------------------------------------------------- /snappy_test_tool.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "snappy-test.h" 39 | 40 | #include "snappy-internal.h" 41 | #include "snappy-sinksource.h" 42 | #include "snappy.h" 43 | #include "snappy_test_data.h" 44 | 45 | SNAPPY_FLAG(int32_t, start_len, -1, 46 | "Starting prefix size for testing (-1: just full file contents)"); 47 | SNAPPY_FLAG(int32_t, end_len, -1, 48 | "Starting prefix size for testing (-1: just full file contents)"); 49 | SNAPPY_FLAG(int32_t, bytes, 10485760, 50 | "How many bytes to compress/uncompress per file for timing"); 51 | 52 | SNAPPY_FLAG(bool, zlib, true, 53 | "Run zlib compression (http://www.zlib.net)"); 54 | SNAPPY_FLAG(bool, lzo, true, 55 | "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); 56 | SNAPPY_FLAG(bool, lz4, true, 57 | "Run LZ4 compression (https://github.com/lz4/lz4)"); 58 | SNAPPY_FLAG(bool, snappy, true, "Run snappy compression"); 59 | 60 | SNAPPY_FLAG(bool, write_compressed, false, 61 | "Write compressed versions of each file to .comp"); 62 | SNAPPY_FLAG(bool, write_uncompressed, false, 63 | "Write uncompressed versions of each file to .uncomp"); 64 | 65 | namespace snappy { 66 | 67 | namespace { 68 | 69 | #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF 70 | 71 | // To test against code that reads beyond its input, this class copies a 72 | // string to a newly allocated group of pages, the last of which 73 | // is made unreadable via mprotect. Note that we need to allocate the 74 | // memory with mmap(), as POSIX allows mprotect() only on memory allocated 75 | // with mmap(), and some malloc/posix_memalign implementations expect to 76 | // be able to read previously allocated memory while doing heap allocations. 77 | class DataEndingAtUnreadablePage { 78 | public: 79 | explicit DataEndingAtUnreadablePage(const std::string& s) { 80 | const size_t page_size = sysconf(_SC_PAGESIZE); 81 | const size_t size = s.size(); 82 | // Round up space for string to a multiple of page_size. 83 | size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); 84 | alloc_size_ = space_for_string + page_size; 85 | mem_ = mmap(NULL, alloc_size_, 86 | PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 87 | CHECK_NE(MAP_FAILED, mem_); 88 | protected_page_ = reinterpret_cast(mem_) + space_for_string; 89 | char* dst = protected_page_ - size; 90 | std::memcpy(dst, s.data(), size); 91 | data_ = dst; 92 | size_ = size; 93 | // Make guard page unreadable. 94 | CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); 95 | } 96 | 97 | ~DataEndingAtUnreadablePage() { 98 | const size_t page_size = sysconf(_SC_PAGESIZE); 99 | // Undo the mprotect. 100 | CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); 101 | CHECK_EQ(0, munmap(mem_, alloc_size_)); 102 | } 103 | 104 | const char* data() const { return data_; } 105 | size_t size() const { return size_; } 106 | 107 | private: 108 | size_t alloc_size_; 109 | void* mem_; 110 | char* protected_page_; 111 | const char* data_; 112 | size_t size_; 113 | }; 114 | 115 | #else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF 116 | 117 | // Fallback for systems without mmap. 118 | using DataEndingAtUnreadablePage = std::string; 119 | 120 | #endif 121 | 122 | enum CompressorType { ZLIB, LZO, LZ4, SNAPPY }; 123 | 124 | const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"}; 125 | 126 | size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) { 127 | switch (comp) { 128 | #ifdef ZLIB_VERSION 129 | case ZLIB: 130 | return ZLib::MinCompressbufSize(input_size); 131 | #endif // ZLIB_VERSION 132 | 133 | #ifdef LZO_VERSION 134 | case LZO: 135 | return input_size + input_size/64 + 16 + 3; 136 | #endif // LZO_VERSION 137 | 138 | #ifdef LZ4_VERSION_NUMBER 139 | case LZ4: 140 | return LZ4_compressBound(input_size); 141 | #endif // LZ4_VERSION_NUMBER 142 | 143 | case SNAPPY: 144 | return snappy::MaxCompressedLength(input_size); 145 | 146 | default: 147 | LOG(FATAL) << "Unknown compression type number " << comp; 148 | return 0; 149 | } 150 | } 151 | 152 | // Returns true if we successfully compressed, false otherwise. 153 | // 154 | // If compressed_is_preallocated is set, do not resize the compressed buffer. 155 | // This is typically what you want for a benchmark, in order to not spend 156 | // time in the memory allocator. If you do set this flag, however, 157 | // "compressed" must be preinitialized to at least MinCompressbufSize(comp) 158 | // number of bytes, and may contain junk bytes at the end after return. 159 | bool Compress(const char* input, size_t input_size, CompressorType comp, 160 | std::string* compressed, bool compressed_is_preallocated) { 161 | if (!compressed_is_preallocated) { 162 | compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); 163 | } 164 | 165 | switch (comp) { 166 | #ifdef ZLIB_VERSION 167 | case ZLIB: { 168 | ZLib zlib; 169 | uLongf destlen = compressed->size(); 170 | int ret = zlib.Compress( 171 | reinterpret_cast(string_as_array(compressed)), 172 | &destlen, 173 | reinterpret_cast(input), 174 | input_size); 175 | CHECK_EQ(Z_OK, ret); 176 | if (!compressed_is_preallocated) { 177 | compressed->resize(destlen); 178 | } 179 | return true; 180 | } 181 | #endif // ZLIB_VERSION 182 | 183 | #ifdef LZO_VERSION 184 | case LZO: { 185 | unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; 186 | lzo_uint destlen; 187 | int ret = lzo1x_1_15_compress( 188 | reinterpret_cast(input), 189 | input_size, 190 | reinterpret_cast(string_as_array(compressed)), 191 | &destlen, 192 | mem); 193 | CHECK_EQ(LZO_E_OK, ret); 194 | delete[] mem; 195 | if (!compressed_is_preallocated) { 196 | compressed->resize(destlen); 197 | } 198 | break; 199 | } 200 | #endif // LZO_VERSION 201 | 202 | #ifdef LZ4_VERSION_NUMBER 203 | case LZ4: { 204 | int destlen = compressed->size(); 205 | destlen = LZ4_compress_default(input, string_as_array(compressed), 206 | input_size, destlen); 207 | CHECK_NE(destlen, 0); 208 | if (!compressed_is_preallocated) { 209 | compressed->resize(destlen); 210 | } 211 | break; 212 | } 213 | #endif // LZ4_VERSION_NUMBER 214 | 215 | case SNAPPY: { 216 | size_t destlen; 217 | snappy::RawCompress(input, input_size, 218 | string_as_array(compressed), 219 | &destlen); 220 | CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); 221 | if (!compressed_is_preallocated) { 222 | compressed->resize(destlen); 223 | } 224 | break; 225 | } 226 | 227 | default: { 228 | return false; // the asked-for library wasn't compiled in 229 | } 230 | } 231 | return true; 232 | } 233 | 234 | bool Uncompress(const std::string& compressed, CompressorType comp, int size, 235 | std::string* output) { 236 | // TODO: Switch to [[maybe_unused]] when we can assume C++17. 237 | (void)size; 238 | switch (comp) { 239 | #ifdef ZLIB_VERSION 240 | case ZLIB: { 241 | output->resize(size); 242 | ZLib zlib; 243 | uLongf destlen = output->size(); 244 | int ret = zlib.Uncompress( 245 | reinterpret_cast(string_as_array(output)), 246 | &destlen, 247 | reinterpret_cast(compressed.data()), 248 | compressed.size()); 249 | CHECK_EQ(Z_OK, ret); 250 | CHECK_EQ(static_cast(size), destlen); 251 | break; 252 | } 253 | #endif // ZLIB_VERSION 254 | 255 | #ifdef LZO_VERSION 256 | case LZO: { 257 | output->resize(size); 258 | lzo_uint destlen; 259 | int ret = lzo1x_decompress( 260 | reinterpret_cast(compressed.data()), 261 | compressed.size(), 262 | reinterpret_cast(string_as_array(output)), 263 | &destlen, 264 | NULL); 265 | CHECK_EQ(LZO_E_OK, ret); 266 | CHECK_EQ(static_cast(size), destlen); 267 | break; 268 | } 269 | #endif // LZO_VERSION 270 | 271 | #ifdef LZ4_VERSION_NUMBER 272 | case LZ4: { 273 | output->resize(size); 274 | int destlen = output->size(); 275 | destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output), 276 | compressed.size(), destlen); 277 | CHECK_NE(destlen, 0); 278 | CHECK_EQ(size, destlen); 279 | break; 280 | } 281 | #endif // LZ4_VERSION_NUMBER 282 | case SNAPPY: { 283 | snappy::RawUncompress(compressed.data(), compressed.size(), 284 | string_as_array(output)); 285 | break; 286 | } 287 | 288 | default: { 289 | return false; // the asked-for library wasn't compiled in 290 | } 291 | } 292 | return true; 293 | } 294 | 295 | void Measure(const char* data, size_t length, CompressorType comp, int repeats, 296 | int block_size) { 297 | // Run tests a few time and pick median running times 298 | static const int kRuns = 5; 299 | double ctime[kRuns]; 300 | double utime[kRuns]; 301 | int compressed_size = 0; 302 | 303 | { 304 | // Chop the input into blocks 305 | int num_blocks = (length + block_size - 1) / block_size; 306 | std::vector input(num_blocks); 307 | std::vector input_length(num_blocks); 308 | std::vector compressed(num_blocks); 309 | std::vector output(num_blocks); 310 | for (int b = 0; b < num_blocks; ++b) { 311 | int input_start = b * block_size; 312 | int input_limit = std::min((b+1)*block_size, length); 313 | input[b] = data+input_start; 314 | input_length[b] = input_limit-input_start; 315 | } 316 | 317 | // Pre-grow the output buffers so we don't measure string append time. 318 | for (std::string& compressed_block : compressed) { 319 | compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); 320 | } 321 | 322 | // First, try one trial compression to make sure the code is compiled in 323 | if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { 324 | LOG(WARNING) << "Skipping " << names[comp] << ": " 325 | << "library not compiled in"; 326 | return; 327 | } 328 | 329 | for (int run = 0; run < kRuns; ++run) { 330 | CycleTimer ctimer, utimer; 331 | 332 | // Pre-grow the output buffers so we don't measure string append time. 333 | for (std::string& compressed_block : compressed) { 334 | compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); 335 | } 336 | 337 | ctimer.Start(); 338 | for (int b = 0; b < num_blocks; ++b) { 339 | for (int i = 0; i < repeats; ++i) 340 | Compress(input[b], input_length[b], comp, &compressed[b], true); 341 | } 342 | ctimer.Stop(); 343 | 344 | // Compress once more, with resizing, so we don't leave junk 345 | // at the end that will confuse the decompressor. 346 | for (int b = 0; b < num_blocks; ++b) { 347 | Compress(input[b], input_length[b], comp, &compressed[b], false); 348 | } 349 | 350 | for (int b = 0; b < num_blocks; ++b) { 351 | output[b].resize(input_length[b]); 352 | } 353 | 354 | utimer.Start(); 355 | for (int i = 0; i < repeats; ++i) { 356 | for (int b = 0; b < num_blocks; ++b) 357 | Uncompress(compressed[b], comp, input_length[b], &output[b]); 358 | } 359 | utimer.Stop(); 360 | 361 | ctime[run] = ctimer.Get(); 362 | utime[run] = utimer.Get(); 363 | } 364 | 365 | compressed_size = 0; 366 | for (const std::string& compressed_item : compressed) { 367 | compressed_size += compressed_item.size(); 368 | } 369 | } 370 | 371 | std::sort(ctime, ctime + kRuns); 372 | std::sort(utime, utime + kRuns); 373 | const int med = kRuns/2; 374 | 375 | float comp_rate = (length / ctime[med]) * repeats / 1048576.0; 376 | float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; 377 | std::string x = names[comp]; 378 | x += ":"; 379 | std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) 380 | : std::string("?"); 381 | std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " 382 | "comp %5.1f MB/s uncomp %5s MB/s\n", 383 | x.c_str(), 384 | block_size/(1<<20), 385 | static_cast(length), static_cast(compressed_size), 386 | (compressed_size * 100.0) / std::max(1, length), 387 | comp_rate, 388 | urate.c_str()); 389 | } 390 | 391 | void CompressFile(const char* fname) { 392 | std::string fullinput; 393 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 394 | 395 | std::string compressed; 396 | Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); 397 | 398 | CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, 399 | file::Defaults())); 400 | } 401 | 402 | void UncompressFile(const char* fname) { 403 | std::string fullinput; 404 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 405 | 406 | size_t uncompLength; 407 | CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(), 408 | &uncompLength)); 409 | 410 | std::string uncompressed; 411 | uncompressed.resize(uncompLength); 412 | CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); 413 | 414 | CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, 415 | file::Defaults())); 416 | } 417 | 418 | void MeasureFile(const char* fname) { 419 | std::string fullinput; 420 | CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); 421 | std::printf("%-40s :\n", fname); 422 | 423 | int start_len = (snappy::GetFlag(FLAGS_start_len) < 0) 424 | ? fullinput.size() 425 | : snappy::GetFlag(FLAGS_start_len); 426 | int end_len = fullinput.size(); 427 | if (snappy::GetFlag(FLAGS_end_len) >= 0) { 428 | end_len = std::min(fullinput.size(), snappy::GetFlag(FLAGS_end_len)); 429 | } 430 | for (int len = start_len; len <= end_len; ++len) { 431 | const char* const input = fullinput.data(); 432 | int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1); 433 | if (snappy::GetFlag(FLAGS_zlib)) 434 | Measure(input, len, ZLIB, repeats, 1024 << 10); 435 | if (snappy::GetFlag(FLAGS_lzo)) 436 | Measure(input, len, LZO, repeats, 1024 << 10); 437 | if (snappy::GetFlag(FLAGS_lz4)) 438 | Measure(input, len, LZ4, repeats, 1024 << 10); 439 | if (snappy::GetFlag(FLAGS_snappy)) 440 | Measure(input, len, SNAPPY, repeats, 4096 << 10); 441 | 442 | // For block-size based measurements 443 | if (0 && snappy::GetFlag(FLAGS_snappy)) { 444 | Measure(input, len, SNAPPY, repeats, 8<<10); 445 | Measure(input, len, SNAPPY, repeats, 16<<10); 446 | Measure(input, len, SNAPPY, repeats, 32<<10); 447 | Measure(input, len, SNAPPY, repeats, 64<<10); 448 | Measure(input, len, SNAPPY, repeats, 256<<10); 449 | Measure(input, len, SNAPPY, repeats, 1024<<10); 450 | } 451 | } 452 | } 453 | 454 | } // namespace 455 | 456 | } // namespace snappy 457 | 458 | int main(int argc, char** argv) { 459 | InitGoogle(argv[0], &argc, &argv, true); 460 | 461 | for (int arg = 1; arg < argc; ++arg) { 462 | if (snappy::GetFlag(FLAGS_write_compressed)) { 463 | snappy::CompressFile(argv[arg]); 464 | } else if (snappy::GetFlag(FLAGS_write_uncompressed)) { 465 | snappy::UncompressFile(argv[arg]); 466 | } else { 467 | snappy::MeasureFile(argv[arg]); 468 | } 469 | } 470 | return 0; 471 | } 472 | -------------------------------------------------------------------------------- /snappy_uncompress_fuzzer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | // 29 | // libFuzzer harness for fuzzing snappy's decompression code. 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #include "snappy.h" 38 | 39 | // Entry point for LibFuzzer. 40 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { 41 | std::string input(reinterpret_cast(data), size); 42 | 43 | // Avoid self-crafted decompression bombs. 44 | size_t uncompressed_size; 45 | constexpr size_t kMaxUncompressedSize = 1 << 20; 46 | bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength( 47 | input.data(), input.size(), &uncompressed_size); 48 | if (!get_uncompressed_length_succeeded || 49 | (uncompressed_size > kMaxUncompressedSize)) { 50 | return 0; 51 | } 52 | 53 | std::string uncompressed; 54 | // The return value of snappy::Uncompress() is ignored because decompression 55 | // will fail on invalid inputs. 56 | snappy::Uncompress(input.data(), input.size(), &uncompressed); 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /testdata/baddata1.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/baddata1.snappy -------------------------------------------------------------------------------- /testdata/baddata2.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/baddata2.snappy -------------------------------------------------------------------------------- /testdata/baddata3.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/baddata3.snappy -------------------------------------------------------------------------------- /testdata/fireworks.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/fireworks.jpeg -------------------------------------------------------------------------------- /testdata/geo.protodata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/geo.protodata -------------------------------------------------------------------------------- /testdata/paper-100k.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/snappy/6af9287fbdb913f0794d0148c6aa43b58e63c8e3/testdata/paper-100k.pdf --------------------------------------------------------------------------------