├── .travis.yml ├── travis.yml ├── Setup.hs ├── .gitignore ├── AUTHORS.txt ├── bench └── Bench1.hs ├── README.md ├── LICENSE.txt ├── lz4.cabal ├── tests └── Properties.hs └── src ├── Codec └── Compression │ └── LZ4.hsc └── cbits ├── lz4hc.h ├── lz4.h ├── lz4hc.c └── lz4.c /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | -------------------------------------------------------------------------------- /travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist* 2 | cabal-dev* 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | *~ 8 | tests/Properties 9 | -------------------------------------------------------------------------------- /AUTHORS.txt: -------------------------------------------------------------------------------- 1 | Mark Wotton 2 | Simon Hengel 3 | Austin Seipp 4 | -------------------------------------------------------------------------------- /bench/Bench1.hs: -------------------------------------------------------------------------------- 1 | module Main 2 | ( main -- :: IO () 3 | ) where 4 | import Prelude hiding (words) 5 | 6 | import qualified Data.ByteString as S 7 | import qualified Codec.Compression.Snappy as Snappy 8 | import qualified Codec.Compression.QuickLZ as QuickLZ 9 | import qualified Codec.Compression.LZ4 as LZ4 10 | 11 | import Criterion.Main 12 | import Criterion.Config 13 | import Control.DeepSeq (NFData) 14 | 15 | 16 | instance NFData S.ByteString 17 | 18 | 19 | main :: IO () 20 | main = do 21 | words <- S.readFile "/usr/share/dict/words" 22 | 23 | let cfg = defaultConfig { cfgPerformGC = ljust True } 24 | defaultMainWith cfg (return ()) 25 | [ bgroup "/usr/share/dict/words" 26 | [ bench "snappy" $ nf Snappy.compress words 27 | , bench "quicklz" $ nf QuickLZ.compress words 28 | , bench "lz4" $ nf LZ4.compress words 29 | , bench "lz4 HC" $ nf LZ4.compressHC words 30 | , bench "lz4 ultra" $ nf LZ4.compressPlusHC words 31 | ] 32 | ] 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fast compression for Haskell ByteStrings 2 | 3 | This library implements Haskell bindings to [lz4][], a fast 4 | compression library. 5 | 6 | [travis-ci.org](http://travis-ci.org) results: [![Build Status](https://secure.travis-ci.org/mwotton/lz4hs.png?branch=master)](http://travis-ci.org/mwotton/lz4hs) 7 | 8 | # Installation 9 | 10 | It's just a `cabal install` away on [Hackage][]: 11 | 12 | ```bash 13 | $ cabal install lz4 14 | ``` 15 | 16 | # Join in 17 | 18 | File bugs in the GitHub [issue tracker][]. 19 | 20 | Master [git repository][gh]: 21 | 22 | * `git clone https://github.com/mwotton/lz4hs.git` 23 | 24 | # Authors 25 | 26 | See `AUTHORS.txt`. 27 | 28 | # License. 29 | 30 | BSD3. See `LICENSE.txt` for terms of copyright and redistribution. 31 | 32 | [lz4]: http://code.google.com/p/lz4 33 | [issue tracker]: https://github.com/mwotton/lz4hs/issues 34 | [continuous integration]: https://travis-ci.org/mwotton/lz4hs 35 | [gh]: https://github.com/mwotton/lz4hs 36 | [bb]: http://bitbucket.org/mwotton/lz4hs 37 | [Hackage]: http://hackage.haskell.org/package/lz4c 38 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Mark Wotton 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | * Neither the name of Mark Wotton nor the names of other 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /lz4.cabal: -------------------------------------------------------------------------------- 1 | name: lz4 2 | version: 0.2.3 3 | synopsis: LZ4 compression for ByteStrings 4 | description: 5 | High level bindings to the LZ4 compression library. 6 | . 7 | Currently based on lz4 r75. C sources are included and no external 8 | dependencies are needed other than @cereal@. 9 | homepage: http://github.com/mwotton/lz4hs 10 | bug-reports: https://github.com/mwotton/lz4hs/issues 11 | license: BSD3 12 | license-file: LICENSE.txt 13 | copyright: Copyright (c) Mark Wotton, Austin Seipp 2012 14 | author: Mark Wotton =1.10 19 | 20 | extra-source-files: 21 | src/cbits/lz4.h, src/cbits/lz4.c, 22 | src/cbits/lz4hc.h, src/cbits/lz4hc.c 23 | tests/Properties.hs, 24 | README.md, AUTHORS.txt 25 | 26 | source-repository head 27 | type: git 28 | location: https://github.com/mwotton/lz4hs 29 | 30 | library 31 | hs-source-dirs: src 32 | exposed-modules: Codec.Compression.LZ4 33 | build-depends: 34 | base >= 3 && < 5, 35 | bytestring, 36 | cereal 37 | 38 | c-sources: src/cbits/lz4.c src/cbits/lz4hc.c 39 | include-dirs: src/cbits 40 | 41 | ghc-options: -Wall -O2 -fwarn-tabs 42 | default-language: Haskell2010 43 | 44 | test-suite properties 45 | hs-source-dirs: tests 46 | main-is: Properties.hs 47 | type: exitcode-stdio-1.0 48 | 49 | build-depends: 50 | base >= 3 && < 5, 51 | bytestring, 52 | hspec >= 1.3, QuickCheck, 53 | HUnit, 54 | lz4 55 | 56 | ghc-options: -fno-cse -fwarn-tabs 57 | default-language: Haskell2010 58 | 59 | benchmark bench1 60 | hs-source-dirs: bench 61 | main-is: Bench1.hs 62 | type: exitcode-stdio-1.0 63 | 64 | build-depends: 65 | base >= 4, 66 | bytestring, 67 | deepseq, 68 | criterion, 69 | lz4, quicklz, snappy 70 | 71 | ghc-options: -Wall -fno-warn-orphans 72 | default-language: Haskell2010 73 | -------------------------------------------------------------------------------- /tests/Properties.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, ViewPatterns #-} 2 | module Main (main) where 3 | import Control.Applicative 4 | import Test.Hspec 5 | import Test.Hspec.QuickCheck 6 | import Test.QuickCheck 7 | 8 | import Codec.Compression.LZ4 9 | import qualified Data.ByteString.Char8 as S 10 | 11 | main :: IO () 12 | main = hspec $ do 13 | describe "compression" $ do 14 | prop "is pure" $ prop_compress_pure compress decompress 15 | prop "(>>= decomp) = id" $ prop_compression_id compress decompress 16 | describe "high compression" $ do 17 | prop "is pure" $ prop_compress_pure compressHC decompress 18 | prop "(>>= decomp) = id" $ prop_compression_id compressHC decompress 19 | describe "ultra compression" $ do 20 | prop "is pure" $ prop_compress_pure compressPlusHC decompressPlusHC 21 | prop "(>>= decomp) = id" $ prop_compression_id compressPlusHC decompressPlusHC 22 | describe "decompression" $ do 23 | prop "is pure (normal)" $ prop_decompress_pure compress decompress 24 | prop "is pure (high)" $ prop_decompress_pure compressHC decompress 25 | prop "is pure (ultra)" $ prop_decompress_pure compressPlusHC decompressPlusHC 26 | describe "regression test" $ do 27 | let input = "\STXd\STX\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\vexample.com\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\ETX\NUL\NUL\NUL\NUL\NUL\NUL\NUL\SI\NUL\NUL\NUL\NUL\NUL\NUL\NUL\tWhirlpool\NUL\NUL\NUL\NUL\NUL\NUL\NUL\vexample.com\NUL\STXf\SOH\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\ffacebook.com\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\b\NUL\NUL\NUL\NUL\NUL\NUL\NUL\EOTSHA1\NUL\NUL\NUL\NUL\NUL\NUL\NUL\ffacebook.com\NUL\SOH\NUL\NUL\NUL\NUL\NUL\NUL\NUL\tgmail.com\SOH\NUL\NUL\NUL\NUL\NUL\NUL" 28 | it "can compress an oddly full-of-NULLs string" $ do 29 | (compress input >>= decompress) `shouldBe` Just input 30 | 31 | prop_compress_pure comp decomp (S.pack -> xs) = 32 | (comp xs) == (comp xs) 33 | 34 | prop_compression_id comp decomp (S.pack -> xs) = 35 | maybe False (== xs) (comp xs >>= decomp) 36 | 37 | prop_decompress_pure comp decomp (S.pack -> xs) = 38 | let z = comp xs 39 | in (z >>= decomp) == (z >>= decomp) 40 | -------------------------------------------------------------------------------- /src/Codec/Compression/LZ4.hsc: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | 4 | -- | 5 | -- Module : Codec.Compression.LZ4 6 | -- Copyright : (c) Mark Wotton, Austin Seipp 2012 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : mwotton@gmail.com 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- This module provides a high level 'ByteString' interface to the 14 | -- lz4 library. More information about lz4 can be found here: 15 | -- . 16 | -- 17 | -- This module prefixes the buffer that is compressed with the 18 | -- uncompressed length (as lz4 can't recover this information 19 | -- itself.) It also has this property: all functions when 20 | -- called with an empty string return @Just Data.ByteString.empty@ 21 | -- 22 | module Codec.Compression.LZ4 23 | ( -- * High level interface 24 | -- ** Compressing and decompressing strict 'ByteString's 25 | compress -- :: S.ByteString -> S.ByteString 26 | , decompress -- :: S.ByteString -> Maybe S.ByteString 27 | 28 | -- ** High-compression mode 29 | , compressHC -- :: S.ByteString -> S.ByteString 30 | 31 | -- ** Compression + HC mode 32 | , compressPlusHC -- :: S.ByteString -> S.ByteString 33 | , decompressPlusHC -- :: S.ByteString -> S.ByteString 34 | 35 | -- * FFI functions 36 | , c_LZ4_compress -- :: Ptr CChar -> Ptr Word8 -> CInt -> IO CInt 37 | , c_LZ4_compressHC -- :: Ptr CChar -> Ptr Word8 -> CInt -> IO CInt 38 | , c_LZ4_uncompress -- :: Ptr CChar -> Ptr Word8 -> CInt -> IO CInt 39 | , c_LZ4_compressBound -- :: CInt -> CInt 40 | ) where 41 | 42 | import Prelude hiding (max) 43 | import Data.Word 44 | import Foreign.Ptr 45 | import Foreign.C 46 | import System.IO.Unsafe (unsafePerformIO) 47 | import Control.Applicative 48 | 49 | import qualified Data.ByteString as S 50 | import qualified Data.ByteString.Internal as SI 51 | import qualified Data.ByteString.Unsafe as U 52 | 53 | import Data.Serialize 54 | 55 | #include 56 | #include 57 | 58 | 59 | -------------------------------------------------------------------------------- 60 | -- Compression 61 | 62 | -- | Compresses the input 'ByteString'. 63 | -- 64 | -- Will return 'Nothing' if the compression fails. Otherwise, returns 65 | -- @Just xs@ with the compressed string (and additionally, if @xs == 66 | -- empty@ then @compress empty == Just empty@.) 67 | compress :: S.ByteString -> Maybe S.ByteString 68 | compress xs 69 | | S.null xs = Just S.empty 70 | | otherwise = compressor c_LZ4_compress xs 71 | {-# INLINEABLE compress #-} 72 | 73 | -- | Compress the input 'ByteString' as much as possible, but comes 74 | -- with a massive speed drop in compression. Decompression is faster 75 | -- however and can be done with 'decompress'. 76 | -- 77 | -- Will return 'Nothing' if the compression fails. Otherwise, returns 78 | -- @Just xs@ with the compressed string (and additionally, if @xs == 79 | -- empty@ then @compressHC empty == Just empty@.) 80 | compressHC :: S.ByteString -> Maybe S.ByteString 81 | compressHC xs 82 | | S.null xs = Just S.empty 83 | | otherwise = compressor c_LZ4_compressHC xs 84 | {-# INLINEABLE compressHC #-} 85 | 86 | -- | Essentially defined as: 87 | -- 88 | -- > compressPlusHC xs = compress xs >>= compressHC 89 | -- 90 | -- 91 | -- This is an experimental interface. After regular compression, due 92 | -- to output encoding, things like relative offsets in the compression 93 | -- buffer or artifacts from number encoding can end up the same in the 94 | -- output buffer for often repeated data. Therefore, further savings 95 | -- are possible in the input buffer by compressing again. lz4 even in 96 | -- high compression mode will quickly ignore already-compressed data 97 | -- and remain quite fast. Thus, this interface is designed to give a 98 | -- better compression/speed tradeoff than 'compressHC': it doesn't 99 | -- compress as well, but is nowhere near as slow. Some context: 100 | -- 101 | -- 102 | -- Must be decompressed with 'decompressPlusHC'. 103 | -- 104 | -- Will return 'Nothing' if the compression fails. Otherwise, returns 105 | -- @Just xs@ with the compressed string (and additionally, if @xs == 106 | -- empty@ then @compressPlusHC empty == Just empty@.) 107 | compressPlusHC :: S.ByteString -> Maybe S.ByteString 108 | compressPlusHC xs 109 | | S.null xs = Just S.empty 110 | | otherwise = compress xs >>= compressHC 111 | {-# INLINEABLE compressPlusHC #-} 112 | 113 | 114 | -------------------------------------------------------------------------------- 115 | -- Decompression 116 | 117 | -- | Decompress the input 'ByteString'. 118 | decompress :: S.ByteString -> Maybe S.ByteString 119 | decompress xs 120 | | S.null xs = Just S.empty 121 | | otherwise = 122 | -- Get the length of the uncompressed buffer and do our thing 123 | either (const Nothing) (unsafePerformIO . go) $ runGet unformat xs 124 | where go (l, str) = 125 | U.unsafeUseAsCString str $ \cstr -> do 126 | out <- SI.createAndTrim l $ \p -> do 127 | r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l) 128 | --- NOTE: r is the count of bytes c_LZ4_uncompress read from input buffer, 129 | --- and NOT the count of bytes used in result buffer 130 | return $! if (r <= 0) then 0 else l 131 | return $! if (S.null out) then Nothing else (Just out) 132 | {-# INLINEABLE decompress #-} 133 | 134 | -- | Decompress a string compressed with 'compressPlusHC'. Essentially 135 | -- defined as: 136 | -- 137 | -- > decompressPlusHC xs = decompress xs >>= decompress 138 | -- 139 | decompressPlusHC :: S.ByteString -> Maybe S.ByteString 140 | decompressPlusHC xs 141 | | S.null xs = Just S.empty 142 | | otherwise = decompress xs >>= decompress 143 | {-# INLINEABLE decompressPlusHC #-} 144 | 145 | 146 | -------------------------------------------------------------------------------- 147 | -- Utilities 148 | 149 | -- The compression methods are all identical, so this just abstracts them 150 | compressor :: (Ptr CChar -> Ptr Word8 -> CInt -> IO CInt) 151 | -> S.ByteString 152 | -> Maybe S.ByteString 153 | compressor f xs = unsafePerformIO $ do 154 | U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do 155 | let len' = fromIntegral len :: CInt 156 | let max = c_LZ4_compressBound len' 157 | bs <- SI.createAndTrim (fromIntegral max) $ \output -> 158 | fromIntegral <$> f cstr output len' 159 | return $ if S.null bs then Nothing else 160 | -- Prefix the compressed string with the uncompressed length 161 | Just $ runPut $ format (fromIntegral len) bs 162 | {-# INLINEABLE compressor #-} 163 | 164 | -- Pushes a Word32 and a ByteString into the format we use to correctly 165 | -- encode/decode. 166 | format :: Word32 -> Putter S.ByteString 167 | format l xs = do 168 | putWord32le l 169 | putWord32le (fromIntegral $ S.length xs) 170 | putByteString xs 171 | 172 | -- Gets a ByteString and it's length from the compressed format. 173 | unformat :: Get (Int, S.ByteString) 174 | unformat = (,) <$> (fromIntegral <$> getWord32le) 175 | <*> (fromIntegral <$> getWord32le >>= getByteString) 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- 180 | -- FFI Bindings 181 | 182 | -- In lz4 r71, LZ4_compressBound was changed to a macro. This is identical to 183 | -- that macro so we don't have to go through C land just to get at it. 184 | -- 185 | -- NB: MUST *ALWAYS* BE KEPT IN SYNC WITH lz4.h! 186 | 187 | --foreign import ccall unsafe "lz4.h LZ4_compressBound" 188 | -- c_LZ4_compressBound :: CInt -> IO CInt 189 | -- | Worst case compression bounds on an input string. 190 | c_LZ4_compressBound :: CInt -- ^ String length 191 | -> CInt -- ^ Worst-case size 192 | c_LZ4_compressBound sz = sz + (sz `div` 255) + 16 193 | {-# INLINE c_LZ4_compressBound #-} 194 | 195 | -- | Compresses a string. 196 | foreign import ccall unsafe "lz4.h LZ4_compress" 197 | c_LZ4_compress :: Ptr CChar -- ^ Source 198 | -> Ptr Word8 -- ^ Dest 199 | -> CInt -- ^ Input size 200 | -> IO CInt -- ^ Result 201 | 202 | -- | Compresses a string with very high compression. 203 | foreign import ccall unsafe "lz4hc.h LZ4_compressHC" 204 | c_LZ4_compressHC :: Ptr CChar -- ^ Source 205 | -> Ptr Word8 -- ^ Dest 206 | -> CInt -- ^ Input size 207 | -> IO CInt -- ^ Result 208 | 209 | -- | Decompresses a string. Works for both 'c_LZ4_compress' and 210 | -- 'c_LZ4_compressHC'. 211 | foreign import ccall unsafe "lz4.h LZ4_uncompress" 212 | c_LZ4_uncompress :: Ptr CChar -- ^ Source 213 | -> Ptr Word8 -- ^ Dest 214 | -> CInt -- ^ Size of ORIGINAL INPUT 215 | -> IO CInt -- ^ Result 216 | -------------------------------------------------------------------------------- /src/cbits/lz4hc.h: -------------------------------------------------------------------------------- 1 | /* 2 | LZ4 HC - High Compression Mode of LZ4 3 | Header File 4 | Copyright (C) 2011-2014, Yann Collet. 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following disclaimer 15 | in the documentation and/or other materials provided with the 16 | distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | You can contact the author at : 31 | - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 | - LZ4 source repository : http://code.google.com/p/lz4/ 33 | */ 34 | #pragma once 35 | 36 | 37 | #if defined (__cplusplus) 38 | extern "C" { 39 | #endif 40 | 41 | 42 | int LZ4_compressHC (const char* source, char* dest, int inputSize); 43 | /* 44 | LZ4_compressHC : 45 | return : the number of bytes in compressed buffer dest 46 | or 0 if compression fails. 47 | note : destination buffer must be already allocated. 48 | To avoid any problem, size it to handle worst cases situations (input data not compressible) 49 | Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h") 50 | */ 51 | 52 | int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); 53 | /* 54 | LZ4_compress_limitedOutput() : 55 | Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. 56 | If it cannot achieve it, compression will stop, and result of the function will be zero. 57 | This function never writes outside of provided output buffer. 58 | 59 | inputSize : Max supported value is 1 GB 60 | maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated) 61 | return : the number of output bytes written in buffer 'dest' 62 | or 0 if compression fails. 63 | */ 64 | 65 | 66 | int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); 67 | int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); 68 | /* 69 | Same functions as above, but with programmable 'compressionLevel'. 70 | Recommended values are between 4 and 9, although any value between 0 and 16 will work. 71 | 'compressionLevel'==0 means use default 'compressionLevel' value. 72 | Values above 16 behave the same as 16. 73 | Equivalent variants exist for all other compression functions below. 74 | */ 75 | 76 | /* Note : 77 | Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license) 78 | */ 79 | 80 | 81 | /************************************** 82 | Using an external allocation 83 | **************************************/ 84 | int LZ4_sizeofStateHC(void); 85 | int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); 86 | int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); 87 | 88 | int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); 89 | int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); 90 | 91 | /* 92 | These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods. 93 | To know how much memory must be allocated for the compression tables, use : 94 | int LZ4_sizeofStateHC(); 95 | 96 | Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0). 97 | 98 | The allocated memory can be provided to the compressions functions using 'void* state' parameter. 99 | LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions. 100 | They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). 101 | */ 102 | 103 | 104 | /************************************** 105 | Streaming Functions 106 | **************************************/ 107 | /* Note : these streaming functions still follows the older model */ 108 | void* LZ4_createHC (const char* inputBuffer); 109 | int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize); 110 | int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize); 111 | char* LZ4_slideInputBufferHC (void* LZ4HC_Data); 112 | int LZ4_freeHC (void* LZ4HC_Data); 113 | 114 | int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); 115 | int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); 116 | 117 | /* 118 | These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. 119 | In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function : 120 | 121 | void* LZ4_createHC (const char* inputBuffer); 122 | The result of the function is the (void*) pointer on the LZ4HC Data Structure. 123 | This pointer will be needed in all other functions. 124 | If the pointer returned is NULL, then the allocation has failed, and compression must be aborted. 125 | The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. 126 | The input buffer must be already allocated, and size at least 192KB. 127 | 'inputBuffer' will also be the 'const char* source' of the first block. 128 | 129 | All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'. 130 | To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(). 131 | Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(), 132 | but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one. 133 | If next block does not begin immediately after the previous one, the compression will fail (return 0). 134 | 135 | When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : 136 | char* LZ4_slideInputBufferHC(void* LZ4HC_Data); 137 | must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer. 138 | Note that, for this function to work properly, minimum size of an input buffer must be 192KB. 139 | ==> The memory position where the next input data block must start is provided as the result of the function. 140 | 141 | Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual. 142 | 143 | When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure. 144 | */ 145 | 146 | int LZ4_sizeofStreamStateHC(void); 147 | int LZ4_resetStreamStateHC(void* state, const char* inputBuffer); 148 | 149 | /* 150 | These functions achieve the same result as : 151 | void* LZ4_createHC (const char* inputBuffer); 152 | 153 | They are provided here to allow the user program to allocate memory using its own routines. 154 | 155 | To know how much space must be allocated, use LZ4_sizeofStreamStateHC(); 156 | Note also that space must be aligned for pointers (32 or 64 bits). 157 | 158 | Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer); 159 | void* state is a pointer to the space allocated. 160 | It must be aligned for pointers (32 or 64 bits), and be large enough. 161 | The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. 162 | The input buffer must be already allocated, and size at least 192KB. 163 | 'inputBuffer' will also be the 'const char* source' of the first block. 164 | 165 | The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). 166 | return value of LZ4_resetStreamStateHC() must be 0 is OK. 167 | Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)). 168 | */ 169 | 170 | 171 | #if defined (__cplusplus) 172 | } 173 | #endif 174 | -------------------------------------------------------------------------------- /src/cbits/lz4.h: -------------------------------------------------------------------------------- 1 | /* 2 | LZ4 - Fast LZ compression algorithm 3 | Header File 4 | Copyright (C) 2011-2014, Yann Collet. 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following disclaimer 15 | in the documentation and/or other materials provided with the 16 | distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | You can contact the author at : 31 | - LZ4 source repository : http://code.google.com/p/lz4/ 32 | - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c 33 | */ 34 | #pragma once 35 | 36 | #if defined (__cplusplus) 37 | extern "C" { 38 | #endif 39 | 40 | 41 | /************************************** 42 | Version 43 | **************************************/ 44 | #define LZ4_VERSION_MAJOR 1 /* for major interface/format changes */ 45 | #define LZ4_VERSION_MINOR 2 /* for minor interface/format changes */ 46 | #define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ 47 | 48 | 49 | /************************************** 50 | Tuning parameter 51 | **************************************/ 52 | /* 53 | * LZ4_MEMORY_USAGE : 54 | * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 55 | * Increasing memory usage improves compression ratio 56 | * Reduced memory usage can improve speed, due to cache effect 57 | * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 58 | */ 59 | #define LZ4_MEMORY_USAGE 14 60 | 61 | 62 | /************************************** 63 | Simple Functions 64 | **************************************/ 65 | 66 | int LZ4_compress (const char* source, char* dest, int inputSize); 67 | int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxOutputSize); 68 | 69 | /* 70 | LZ4_compress() : 71 | Compresses 'inputSize' bytes from 'source' into 'dest'. 72 | Destination buffer must be already allocated, 73 | and must be sized to handle worst cases situations (input data not compressible) 74 | Worst case size evaluation is provided by function LZ4_compressBound() 75 | inputSize : Max supported value is LZ4_MAX_INPUT_VALUE 76 | return : the number of bytes written in buffer dest 77 | or 0 if the compression fails 78 | 79 | LZ4_decompress_safe() : 80 | compressedSize : is obviously the source size 81 | maxOutputSize : is the size of the destination buffer, which must be already allocated. 82 | return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) 83 | If the destination buffer is not large enough, decoding will stop and output an error code (<0). 84 | If the source stream is detected malformed, the function will stop decoding and return a negative result. 85 | This function is protected against buffer overflow exploits : 86 | it never writes outside of output buffer, and never reads outside of input buffer. 87 | Therefore, it is protected against malicious data packets. 88 | */ 89 | 90 | 91 | /* 92 | Note : 93 | Should you prefer to explicitly allocate compression-table memory using your own allocation method, 94 | use the streaming functions provided below, simply reset the memory area between each call to LZ4_compress_continue() 95 | */ 96 | 97 | 98 | /************************************** 99 | Advanced Functions 100 | **************************************/ 101 | #define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ 102 | #define LZ4_COMPRESSBOUND(isize) ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) 103 | 104 | /* 105 | LZ4_compressBound() : 106 | Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) 107 | primarily useful for memory allocation of output buffer. 108 | macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation). 109 | 110 | isize : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE 111 | return : maximum output size in a "worst case" scenario 112 | or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) 113 | */ 114 | int LZ4_compressBound(int isize); 115 | 116 | 117 | /* 118 | LZ4_compress_limitedOutput() : 119 | Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. 120 | If it cannot achieve it, compression will stop, and result of the function will be zero. 121 | This function never writes outside of provided output buffer. 122 | 123 | inputSize : Max supported value is LZ4_MAX_INPUT_VALUE 124 | maxOutputSize : is the size of the destination buffer (which must be already allocated) 125 | return : the number of bytes written in buffer 'dest' 126 | or 0 if the compression fails 127 | */ 128 | int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); 129 | 130 | 131 | /* 132 | LZ4_decompress_fast() : 133 | originalSize : is the original and therefore uncompressed size 134 | return : the number of bytes read from the source buffer (in other words, the compressed size) 135 | If the source stream is malformed, the function will stop decoding and return a negative result. 136 | Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes. 137 | note : This function is a bit faster than LZ4_decompress_safe() 138 | It provides fast decompression and fully respect memory boundaries for properly formed compressed data. 139 | It does not provide full protection against intentionnally modified data stream. 140 | Use this function in a trusted environment (data to decode comes from a trusted source). 141 | */ 142 | int LZ4_decompress_fast (const char* source, char* dest, int originalSize); 143 | 144 | 145 | /* 146 | LZ4_decompress_safe_partial() : 147 | This function decompress a compressed block of size 'compressedSize' at position 'source' 148 | into output buffer 'dest' of size 'maxOutputSize'. 149 | The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, 150 | reducing decompression time. 151 | return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) 152 | Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. 153 | Always control how many bytes were decoded. 154 | If the source stream is detected malformed, the function will stop decoding and return a negative result. 155 | This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets 156 | */ 157 | int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize); 158 | 159 | 160 | /*********************************************** 161 | Experimental Streaming Compression Functions 162 | ***********************************************/ 163 | 164 | #define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8) 165 | #define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U32 * sizeof(unsigned int)) 166 | /* 167 | * LZ4_stream_t 168 | * information structure to track an LZ4 stream. 169 | * important : set this structure content to zero before first use ! 170 | */ 171 | typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; 172 | 173 | /* 174 | * If you prefer dynamic allocation methods, 175 | * LZ4_createStream 176 | * provides a pointer (void*) towards an initialized LZ4_stream_t structure. 177 | * LZ4_free just frees it. 178 | */ 179 | void* LZ4_createStream(); 180 | int LZ4_free (void* LZ4_stream); 181 | 182 | 183 | /* 184 | * LZ4_loadDict 185 | * Use this function to load a static dictionary into LZ4_stream. 186 | * Any previous data will be forgotten, only 'dictionary' will remain in memory. 187 | * Loading a size of 0 is allowed (same effect as init). 188 | * Return : 1 if OK, 0 if error 189 | */ 190 | int LZ4_loadDict (void* LZ4_stream, const char* dictionary, int dictSize); 191 | 192 | /* 193 | * LZ4_compress_continue 194 | * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio 195 | * Previous data blocks are assumed to still be present at their previous location. 196 | */ 197 | int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize); 198 | 199 | /* 200 | * LZ4_compress_limitedOutput_continue 201 | * Same as before, but also specify a maximum target compressed size (maxOutputSize) 202 | * If objective cannot be met, compression exits, and returns a zero. 203 | */ 204 | int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize); 205 | 206 | /* 207 | * LZ4_saveDict 208 | * If previously compressed data block is not guaranteed to remain at its previous memory location 209 | * save it into a safe place (char* safeBuffer) 210 | * Note : you don't need to call LZ4_loadDict() afterwards, 211 | * dictionary is immediately usable, you can therefore call again LZ4_compress_continue() 212 | * Return : 1 if OK, 0 if error 213 | * Note : any dictSize > 64 KB will be interpreted as 64KB. 214 | */ 215 | int LZ4_saveDict (void* LZ4_stream, char* safeBuffer, int dictSize); 216 | 217 | 218 | /************************************************ 219 | Experimental Streaming Decompression Functions 220 | ************************************************/ 221 | 222 | #define LZ4_STREAMDECODESIZE_U32 4 223 | #define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int)) 224 | /* 225 | * LZ4_streamDecode_t 226 | * information structure to track an LZ4 stream. 227 | * important : set this structure content to zero before first use ! 228 | */ 229 | typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t; 230 | 231 | /* 232 | * If you prefer dynamic allocation methods, 233 | * LZ4_createStreamDecode() 234 | * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. 235 | * LZ4_free just frees it. 236 | */ 237 | void* LZ4_createStreamDecode(); 238 | int LZ4_free (void* LZ4_stream); /* yes, it's the same one as for compression */ 239 | 240 | /* 241 | *_continue() : 242 | These decoding functions allow decompression of multiple blocks in "streaming" mode. 243 | Previously decoded blocks must still be available at the memory position where they were decoded. 244 | If it's not possible, save the relevant part of decoded data into a safe buffer, 245 | and indicate where it stands using LZ4_setDictDecode() 246 | */ 247 | int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize); 248 | int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize); 249 | 250 | /* 251 | * LZ4_setDictDecode 252 | * Use this function to instruct where to find the dictionary. 253 | * This function can be used to specify a static dictionary, 254 | * or to instruct where to find some previously decoded data saved into a different memory space. 255 | * Setting a size of 0 is allowed (same effect as no dictionary). 256 | * Return : 1 if OK, 0 if error 257 | */ 258 | int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictSize); 259 | 260 | 261 | /* 262 | Advanced decoding functions : 263 | *_usingDict() : 264 | These decoding functions work the same as 265 | a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue() 266 | all together into a single function call. 267 | It doesn't use nor update an LZ4_streamDecode_t structure. 268 | */ 269 | int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize); 270 | int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); 271 | 272 | 273 | 274 | 275 | /************************************** 276 | Obsolete Functions 277 | **************************************/ 278 | /* 279 | Obsolete decompression functions 280 | These function names are deprecated and should no longer be used. 281 | They are only provided here for compatibility with older user programs. 282 | - LZ4_uncompress is the same as LZ4_decompress_fast 283 | - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe 284 | */ 285 | int LZ4_uncompress (const char* source, char* dest, int outputSize); 286 | int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); 287 | 288 | /* Obsolete functions for externally allocated state; use streaming interface instead */ 289 | int LZ4_sizeofState(void); 290 | int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); 291 | int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); 292 | 293 | /* Obsolete streaming functions; use new streaming interface whenever possible */ 294 | void* LZ4_create (const char* inputBuffer); 295 | int LZ4_sizeofStreamState(void); 296 | int LZ4_resetStreamState(void* state, const char* inputBuffer); 297 | char* LZ4_slideInputBuffer (void* state); 298 | 299 | /* Obsolete streaming decoding functions */ 300 | int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize); 301 | int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize); 302 | 303 | 304 | #if defined (__cplusplus) 305 | } 306 | #endif 307 | -------------------------------------------------------------------------------- /src/cbits/lz4hc.c: -------------------------------------------------------------------------------- 1 | /* 2 | LZ4 HC - High Compression Mode of LZ4 3 | Copyright (C) 2011-2014, Yann Collet. 4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following disclaimer 14 | in the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | You can contact the author at : 30 | - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 | - LZ4 source repository : http://code.google.com/p/lz4/ 32 | */ 33 | 34 | 35 | 36 | /************************************** 37 | Tuning Parameter 38 | **************************************/ 39 | #define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8 40 | 41 | 42 | /************************************** 43 | Memory routines 44 | **************************************/ 45 | #include /* calloc, free */ 46 | #define ALLOCATOR(s) calloc(1,s) 47 | #define FREEMEM free 48 | #include /* memset, memcpy */ 49 | #define MEM_INIT memset 50 | 51 | 52 | /************************************** 53 | CPU Feature Detection 54 | **************************************/ 55 | /* 32 or 64 bits ? */ 56 | #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ 57 | || defined(__powerpc64__) || defined(__powerpc64le__) \ 58 | || defined(__ppc64__) || defined(__ppc64le__) \ 59 | || defined(__PPC64__) || defined(__PPC64LE__) \ 60 | || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */ 61 | # define LZ4_ARCH64 1 62 | #else 63 | # define LZ4_ARCH64 0 64 | #endif 65 | 66 | /* 67 | * Little Endian or Big Endian ? 68 | * Overwrite the #define below if you know your architecture endianess 69 | */ 70 | #include /* Apparently required to detect endianess */ 71 | #if defined (__GLIBC__) 72 | # include 73 | # if (__BYTE_ORDER == __BIG_ENDIAN) 74 | # define LZ4_BIG_ENDIAN 1 75 | # endif 76 | #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) 77 | # define LZ4_BIG_ENDIAN 1 78 | #elif defined(__sparc) || defined(__sparc__) \ 79 | || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ 80 | || defined(__hpux) || defined(__hppa) \ 81 | || defined(_MIPSEB) || defined(__s390__) 82 | # define LZ4_BIG_ENDIAN 1 83 | #else 84 | /* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ 85 | #endif 86 | 87 | /* 88 | * Unaligned memory access is automatically enabled for "common" CPU, such as x86. 89 | * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected 90 | * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance 91 | */ 92 | #if defined(__ARM_FEATURE_UNALIGNED) 93 | # define LZ4_FORCE_UNALIGNED_ACCESS 1 94 | #endif 95 | 96 | /* Define this parameter if your target system or compiler does not support hardware bit count */ 97 | #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ 98 | # define LZ4_FORCE_SW_BITCOUNT 99 | #endif 100 | 101 | 102 | /************************************** 103 | Compiler Options 104 | **************************************/ 105 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ 106 | /* "restrict" is a known keyword */ 107 | #else 108 | # define restrict /* Disable restrict */ 109 | #endif 110 | 111 | #ifdef _MSC_VER /* Visual Studio */ 112 | # define FORCE_INLINE static __forceinline 113 | # include /* For Visual 2005 */ 114 | # if LZ4_ARCH64 /* 64-bits */ 115 | # pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ 116 | # pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ 117 | # else /* 32-bits */ 118 | # pragma intrinsic(_BitScanForward) /* For Visual 2005 */ 119 | # pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ 120 | # endif 121 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 122 | # pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */ 123 | #else 124 | # ifdef __GNUC__ 125 | # define FORCE_INLINE static inline __attribute__((always_inline)) 126 | # else 127 | # define FORCE_INLINE static inline 128 | # endif 129 | #endif 130 | 131 | #ifdef _MSC_VER /* Visual Studio */ 132 | # define lz4_bswap16(x) _byteswap_ushort(x) 133 | #else 134 | # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) 135 | #endif 136 | 137 | 138 | /************************************** 139 | Includes 140 | **************************************/ 141 | #include "lz4hc.h" 142 | #include "lz4.h" 143 | 144 | 145 | /************************************** 146 | Basic Types 147 | **************************************/ 148 | #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ 149 | # include 150 | typedef uint8_t BYTE; 151 | typedef uint16_t U16; 152 | typedef uint32_t U32; 153 | typedef int32_t S32; 154 | typedef uint64_t U64; 155 | #else 156 | typedef unsigned char BYTE; 157 | typedef unsigned short U16; 158 | typedef unsigned int U32; 159 | typedef signed int S32; 160 | typedef unsigned long long U64; 161 | #endif 162 | 163 | #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 164 | # define _PACKED __attribute__ ((packed)) 165 | #else 166 | # define _PACKED 167 | #endif 168 | 169 | #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 170 | # ifdef __IBMC__ 171 | # pragma pack(1) 172 | # else 173 | # pragma pack(push, 1) 174 | # endif 175 | #endif 176 | 177 | typedef struct _U16_S { U16 v; } _PACKED U16_S; 178 | typedef struct _U32_S { U32 v; } _PACKED U32_S; 179 | typedef struct _U64_S { U64 v; } _PACKED U64_S; 180 | 181 | #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 182 | # pragma pack(pop) 183 | #endif 184 | 185 | #define A64(x) (((U64_S *)(x))->v) 186 | #define A32(x) (((U32_S *)(x))->v) 187 | #define A16(x) (((U16_S *)(x))->v) 188 | 189 | 190 | /************************************** 191 | Constants 192 | **************************************/ 193 | #define MINMATCH 4 194 | 195 | #define DICTIONARY_LOGSIZE 16 196 | #define MAXD (1<> ((MINMATCH*8)-HASH_LOG)) 268 | #define HASH_VALUE(p) HASH_FUNCTION(A32(p)) 269 | #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) 270 | #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] 271 | #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) 272 | 273 | 274 | /************************************** 275 | Private functions 276 | **************************************/ 277 | #if LZ4_ARCH64 278 | 279 | FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) 280 | { 281 | #if defined(LZ4_BIG_ENDIAN) 282 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 283 | unsigned long r = 0; 284 | _BitScanReverse64( &r, val ); 285 | return (int)(r>>3); 286 | # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 287 | return (__builtin_clzll(val) >> 3); 288 | # else 289 | int r; 290 | if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 291 | if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 292 | r += (!val); 293 | return r; 294 | # endif 295 | #else 296 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 297 | unsigned long r = 0; 298 | _BitScanForward64( &r, val ); 299 | return (int)(r>>3); 300 | # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 301 | return (__builtin_ctzll(val) >> 3); 302 | # else 303 | static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 304 | return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 305 | # endif 306 | #endif 307 | } 308 | 309 | #else 310 | 311 | FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) 312 | { 313 | #if defined(LZ4_BIG_ENDIAN) 314 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 315 | unsigned long r; 316 | _BitScanReverse( &r, val ); 317 | return (int)(r>>3); 318 | # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 319 | return (__builtin_clz(val) >> 3); 320 | # else 321 | int r; 322 | if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 323 | r += (!val); 324 | return r; 325 | # endif 326 | #else 327 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 328 | unsigned long r; 329 | _BitScanForward( &r, val ); 330 | return (int)(r>>3); 331 | # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 332 | return (__builtin_ctz(val) >> 3); 333 | # else 334 | static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 335 | return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 336 | # endif 337 | #endif 338 | } 339 | 340 | #endif 341 | 342 | 343 | int LZ4_sizeofStreamStateHC() 344 | { 345 | return sizeof(LZ4HC_Data_Structure); 346 | } 347 | 348 | FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base) 349 | { 350 | MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); 351 | MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); 352 | hc4->nextToUpdate = base + 1; 353 | hc4->base = base; 354 | hc4->inputBuffer = base; 355 | hc4->end = base; 356 | } 357 | 358 | int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) 359 | { 360 | if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ 361 | LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); 362 | return 0; 363 | } 364 | 365 | 366 | void* LZ4_createHC (const char* inputBuffer) 367 | { 368 | void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); 369 | LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); 370 | return hc4; 371 | } 372 | 373 | 374 | int LZ4_freeHC (void* LZ4HC_Data) 375 | { 376 | FREEMEM(LZ4HC_Data); 377 | return (0); 378 | } 379 | 380 | 381 | /* Update chains up to ip (excluded) */ 382 | FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) 383 | { 384 | U16* chainTable = hc4->chainTable; 385 | HTYPE* HashTable = hc4->hashTable; 386 | INITBASE(base,hc4->base); 387 | 388 | while(hc4->nextToUpdate < ip) 389 | { 390 | const BYTE* const p = hc4->nextToUpdate; 391 | size_t delta = (p) - HASH_POINTER(p); 392 | if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; 393 | DELTANEXT(p) = (U16)delta; 394 | HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base); 395 | hc4->nextToUpdate++; 396 | } 397 | } 398 | 399 | 400 | char* LZ4_slideInputBufferHC(void* LZ4HC_Data) 401 | { 402 | LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; 403 | U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB; 404 | distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */ 405 | LZ4HC_Insert(hc4, hc4->end - MINMATCH); 406 | memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB); 407 | hc4->nextToUpdate -= distance; 408 | hc4->base -= distance; 409 | if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */ 410 | { 411 | int i; 412 | hc4->base += 1 GB; 413 | for (i=0; ihashTable[i] -= 1 GB; 414 | } 415 | hc4->end -= distance; 416 | return (char*)(hc4->end); 417 | } 418 | 419 | 420 | FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) 421 | { 422 | const BYTE* p1t = p1; 423 | 424 | while (p1tchainTable; 441 | HTYPE* const HashTable = hc4->hashTable; 442 | const BYTE* ref; 443 | INITBASE(base,hc4->base); 444 | int nbAttempts=maxNbAttempts; 445 | size_t repl=0, ml=0; 446 | U16 delta=0; /* useless assignment, to remove an uninitialization warning */ 447 | 448 | /* HC4 match finder */ 449 | LZ4HC_Insert(hc4, ip); 450 | ref = HASH_POINTER(ip); 451 | 452 | #define REPEAT_OPTIMIZATION 453 | #ifdef REPEAT_OPTIMIZATION 454 | /* Detect repetitive sequences of length <= 4 */ 455 | if ((U32)(ip-ref) <= 4) /* potential repetition */ 456 | { 457 | if (A32(ref) == A32(ip)) /* confirmed */ 458 | { 459 | delta = (U16)(ip-ref); 460 | repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; 461 | *matchpos = ref; 462 | } 463 | ref = GETNEXT(ref); 464 | } 465 | #endif 466 | 467 | while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) 468 | { 469 | nbAttempts--; 470 | if (*(ref+ml) == *(ip+ml)) 471 | if (A32(ref) == A32(ip)) 472 | { 473 | size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; 474 | if (mlt > ml) { ml = mlt; *matchpos = ref; } 475 | } 476 | ref = GETNEXT(ref); 477 | } 478 | 479 | #ifdef REPEAT_OPTIMIZATION 480 | /* Complete table */ 481 | if (repl) 482 | { 483 | const BYTE* ptr = ip; 484 | const BYTE* end; 485 | 486 | end = ip + repl - (MINMATCH-1); 487 | while(ptr < end-delta) 488 | { 489 | DELTANEXT(ptr) = delta; /* Pre-Load */ 490 | ptr++; 491 | } 492 | do 493 | { 494 | DELTANEXT(ptr) = delta; 495 | HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */ 496 | ptr++; 497 | } while(ptr < end); 498 | hc4->nextToUpdate = end; 499 | } 500 | #endif 501 | 502 | return (int)ml; 503 | } 504 | 505 | 506 | FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts) 507 | { 508 | U16* const chainTable = hc4->chainTable; 509 | HTYPE* const HashTable = hc4->hashTable; 510 | INITBASE(base,hc4->base); 511 | const BYTE* ref; 512 | int nbAttempts = maxNbAttempts; 513 | int delta = (int)(ip-startLimit); 514 | 515 | /* First Match */ 516 | LZ4HC_Insert(hc4, ip); 517 | ref = HASH_POINTER(ip); 518 | 519 | while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) 520 | { 521 | nbAttempts--; 522 | if (*(startLimit + longest) == *(ref - delta + longest)) 523 | if (A32(ref) == A32(ip)) 524 | { 525 | #if 1 526 | const BYTE* reft = ref+MINMATCH; 527 | const BYTE* ipt = ip+MINMATCH; 528 | const BYTE* startt = ip; 529 | 530 | while (iptstartLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;} 550 | 551 | if ((ipt-startt) > longest) 552 | { 553 | longest = (int)(ipt-startt); 554 | *matchpos = reft; 555 | *startpos = startt; 556 | } 557 | } 558 | ref = GETNEXT(ref); 559 | } 560 | 561 | return longest; 562 | } 563 | 564 | 565 | typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; 566 | 567 | FORCE_INLINE int LZ4HC_encodeSequence ( 568 | const BYTE** ip, 569 | BYTE** op, 570 | const BYTE** anchor, 571 | int matchLength, 572 | const BYTE* ref, 573 | limitedOutput_directive limitedOutputBuffer, 574 | BYTE* oend) 575 | { 576 | int length; 577 | BYTE* token; 578 | 579 | /* Encode Literal length */ 580 | length = (int)(*ip - *anchor); 581 | token = (*op)++; 582 | if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */ 583 | if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } 584 | else *token = (BYTE)(length<>8) > oend)) return 1; /* Check output limit */ 595 | if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } 596 | else *token += (BYTE)(length); 597 | 598 | /* Prepare next loop */ 599 | *ip += matchLength; 600 | *anchor = *ip; 601 | 602 | return 0; 603 | } 604 | 605 | 606 | #define MAX_COMPRESSION_LEVEL 16 607 | static int LZ4HC_compress_generic ( 608 | void* ctxvoid, 609 | const char* source, 610 | char* dest, 611 | int inputSize, 612 | int maxOutputSize, 613 | int compressionLevel, 614 | limitedOutput_directive limit 615 | ) 616 | { 617 | LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; 618 | const BYTE* ip = (const BYTE*) source; 619 | const BYTE* anchor = ip; 620 | const BYTE* const iend = ip + inputSize; 621 | const BYTE* const mflimit = iend - MFLIMIT; 622 | const BYTE* const matchlimit = (iend - LASTLITERALS); 623 | 624 | BYTE* op = (BYTE*) dest; 625 | BYTE* const oend = op + maxOutputSize; 626 | 627 | const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1<end) return 0; 640 | ctx->end += inputSize; 641 | 642 | ip++; 643 | 644 | /* Main Loop */ 645 | while (ip < mflimit) 646 | { 647 | ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); 648 | if (!ml) { ip++; continue; } 649 | 650 | /* saved, in case we would skip too much */ 651 | start0 = ip; 652 | ref0 = ref; 653 | ml0 = ml; 654 | 655 | _Search2: 656 | if (ip+ml < mflimit) 657 | ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); 658 | else ml2 = ml; 659 | 660 | if (ml2 == ml) /* No better match */ 661 | { 662 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; 663 | continue; 664 | } 665 | 666 | if (start0 < ip) 667 | { 668 | if (start2 < ip + ml0) /* empirical */ 669 | { 670 | ip = start0; 671 | ref = ref0; 672 | ml = ml0; 673 | } 674 | } 675 | 676 | /* Here, start0==ip */ 677 | if ((start2 - ip) < 3) /* First Match too small : removed */ 678 | { 679 | ml = ml2; 680 | ip = start2; 681 | ref =ref2; 682 | goto _Search2; 683 | } 684 | 685 | _Search3: 686 | /* 687 | * Currently we have : 688 | * ml2 > ml1, and 689 | * ip1+3 <= ip2 (usually < ip1+ml1) 690 | */ 691 | if ((start2 - ip) < OPTIMAL_ML) 692 | { 693 | int correction; 694 | int new_ml = ml; 695 | if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; 696 | if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; 697 | correction = new_ml - (int)(start2 - ip); 698 | if (correction > 0) 699 | { 700 | start2 += correction; 701 | ref2 += correction; 702 | ml2 -= correction; 703 | } 704 | } 705 | /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ 706 | 707 | if (start2 + ml2 < mflimit) 708 | ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); 709 | else ml3 = ml2; 710 | 711 | if (ml3 == ml2) /* No better match : 2 sequences to encode */ 712 | { 713 | /* ip & ref are known; Now for ml */ 714 | if (start2 < ip+ml) ml = (int)(start2 - ip); 715 | /* Now, encode 2 sequences */ 716 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; 717 | ip = start2; 718 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; 719 | continue; 720 | } 721 | 722 | if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ 723 | { 724 | if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ 725 | { 726 | if (start2 < ip+ml) 727 | { 728 | int correction = (int)(ip+ml - start2); 729 | start2 += correction; 730 | ref2 += correction; 731 | ml2 -= correction; 732 | if (ml2 < MINMATCH) 733 | { 734 | start2 = start3; 735 | ref2 = ref3; 736 | ml2 = ml3; 737 | } 738 | } 739 | 740 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; 741 | ip = start3; 742 | ref = ref3; 743 | ml = ml3; 744 | 745 | start0 = start2; 746 | ref0 = ref2; 747 | ml0 = ml2; 748 | goto _Search2; 749 | } 750 | 751 | start2 = start3; 752 | ref2 = ref3; 753 | ml2 = ml3; 754 | goto _Search3; 755 | } 756 | 757 | /* 758 | * OK, now we have 3 ascending matches; let's write at least the first one 759 | * ip & ref are known; Now for ml 760 | */ 761 | if (start2 < ip+ml) 762 | { 763 | if ((start2 - ip) < (int)ML_MASK) 764 | { 765 | int correction; 766 | if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; 767 | if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; 768 | correction = ml - (int)(start2 - ip); 769 | if (correction > 0) 770 | { 771 | start2 += correction; 772 | ref2 += correction; 773 | ml2 -= correction; 774 | } 775 | } 776 | else 777 | { 778 | ml = (int)(start2 - ip); 779 | } 780 | } 781 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; 782 | 783 | ip = start2; 784 | ref = ref2; 785 | ml = ml2; 786 | 787 | start2 = start3; 788 | ref2 = ref3; 789 | ml2 = ml3; 790 | 791 | goto _Search3; 792 | 793 | } 794 | 795 | /* Encode Last Literals */ 796 | { 797 | int lastRun = (int)(iend - anchor); 798 | if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ 799 | if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 800 | else *op++ = (BYTE)(lastRun< /* Apparently required to detect endianess */ 64 | #if defined (__GLIBC__) 65 | # include 66 | # if (__BYTE_ORDER == __BIG_ENDIAN) 67 | # define LZ4_BIG_ENDIAN 1 68 | # endif 69 | #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) 70 | # define LZ4_BIG_ENDIAN 1 71 | #elif defined(__sparc) || defined(__sparc__) \ 72 | || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ 73 | || defined(__hpux) || defined(__hppa) \ 74 | || defined(_MIPSEB) || defined(__s390__) 75 | # define LZ4_BIG_ENDIAN 1 76 | #else 77 | /* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ 78 | #endif 79 | 80 | /* 81 | * Unaligned memory access is automatically enabled for "common" CPU, such as x86. 82 | * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property 83 | * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance 84 | */ 85 | #if defined(__ARM_FEATURE_UNALIGNED) 86 | # define LZ4_FORCE_UNALIGNED_ACCESS 1 87 | #endif 88 | 89 | /* Define this parameter if your target system or compiler does not support hardware bit count */ 90 | #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ 91 | # define LZ4_FORCE_SW_BITCOUNT 92 | #endif 93 | 94 | /* 95 | * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 96 | * This option may provide a small boost to performance for some big endian cpu, although probably modest. 97 | * You may set this option to 1 if data will remain within closed environment. 98 | * This option is useless on Little_Endian CPU (such as x86) 99 | */ 100 | 101 | /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 102 | 103 | 104 | /************************************** 105 | Compiler Options 106 | **************************************/ 107 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ 108 | /* "restrict" is a known keyword */ 109 | #else 110 | # define restrict /* Disable restrict */ 111 | #endif 112 | 113 | #ifdef _MSC_VER /* Visual Studio */ 114 | # define FORCE_INLINE static __forceinline 115 | # include /* For Visual 2005 */ 116 | # if LZ4_ARCH64 /* 64-bits */ 117 | # pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ 118 | # pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ 119 | # else /* 32-bits */ 120 | # pragma intrinsic(_BitScanForward) /* For Visual 2005 */ 121 | # pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ 122 | # endif 123 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 124 | #else 125 | # ifdef __GNUC__ 126 | # define FORCE_INLINE static inline __attribute__((always_inline)) 127 | # else 128 | # define FORCE_INLINE static inline 129 | # endif 130 | #endif 131 | 132 | #ifdef _MSC_VER /* Visual Studio */ 133 | # define lz4_bswap16(x) _byteswap_ushort(x) 134 | #else 135 | # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) 136 | #endif 137 | 138 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 139 | 140 | #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 141 | # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 142 | #else 143 | # define expect(expr,value) (expr) 144 | #endif 145 | 146 | #define likely(expr) expect((expr) != 0, 1) 147 | #define unlikely(expr) expect((expr) != 0, 0) 148 | 149 | 150 | /************************************** 151 | Memory routines 152 | **************************************/ 153 | #include /* malloc, calloc, free */ 154 | #define ALLOCATOR(n,s) calloc(n,s) 155 | #define FREEMEM free 156 | #include /* memset, memcpy */ 157 | #define MEM_INIT memset 158 | 159 | 160 | /************************************** 161 | Includes 162 | **************************************/ 163 | #include "lz4.h" 164 | 165 | 166 | /************************************** 167 | Basic Types 168 | **************************************/ 169 | #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ 170 | # include 171 | typedef uint8_t BYTE; 172 | typedef uint16_t U16; 173 | typedef uint32_t U32; 174 | typedef int32_t S32; 175 | typedef uint64_t U64; 176 | #else 177 | typedef unsigned char BYTE; 178 | typedef unsigned short U16; 179 | typedef unsigned int U32; 180 | typedef signed int S32; 181 | typedef unsigned long long U64; 182 | #endif 183 | 184 | #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 185 | # define _PACKED __attribute__ ((packed)) 186 | #else 187 | # define _PACKED 188 | #endif 189 | 190 | #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 191 | # if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) 192 | # pragma pack(1) 193 | # else 194 | # pragma pack(push, 1) 195 | # endif 196 | #endif 197 | 198 | typedef struct { U16 v; } _PACKED U16_S; 199 | typedef struct { U32 v; } _PACKED U32_S; 200 | typedef struct { U64 v; } _PACKED U64_S; 201 | typedef struct {size_t v;} _PACKED size_t_S; 202 | 203 | #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 204 | # if defined(__SUNPRO_C) || defined(__SUNPRO_CC) 205 | # pragma pack(0) 206 | # else 207 | # pragma pack(pop) 208 | # endif 209 | #endif 210 | 211 | #define A16(x) (((U16_S *)(x))->v) 212 | #define A32(x) (((U32_S *)(x))->v) 213 | #define A64(x) (((U64_S *)(x))->v) 214 | #define AARCH(x) (((size_t_S *)(x))->v) 215 | 216 | 217 | /************************************** 218 | Constants 219 | **************************************/ 220 | #define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) 221 | #define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) 222 | #define HASH_SIZE_U32 (1 << LZ4_HASHLOG) 223 | 224 | #define MINMATCH 4 225 | 226 | #define COPYLENGTH 8 227 | #define LASTLITERALS 5 228 | #define MFLIMIT (COPYLENGTH+MINMATCH) 229 | static const int LZ4_minLength = (MFLIMIT+1); 230 | 231 | #define KB *(1U<<10) 232 | #define MB *(1U<<20) 233 | #define GB *(1U<<30) 234 | 235 | #define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) 236 | #define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ 237 | 238 | #define MAXD_LOG 16 239 | #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 240 | 241 | #define ML_BITS 4 242 | #define ML_MASK ((1U<=e; */ 291 | #else 292 | # define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3); 308 | # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 309 | return (__builtin_clzll(val) >> 3); 310 | # else 311 | int r; 312 | if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 313 | if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 314 | r += (!val); 315 | return r; 316 | # endif 317 | # else 318 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 319 | unsigned long r = 0; 320 | _BitScanForward64( &r, val ); 321 | return (int)(r>>3); 322 | # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 323 | return (__builtin_ctzll(val) >> 3); 324 | # else 325 | static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 326 | return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 327 | # endif 328 | # endif 329 | } 330 | 331 | #else 332 | 333 | int LZ4_NbCommonBytes (register U32 val) 334 | { 335 | # if defined(LZ4_BIG_ENDIAN) 336 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 337 | unsigned long r = 0; 338 | _BitScanReverse( &r, val ); 339 | return (int)(r>>3); 340 | # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 341 | return (__builtin_clz(val) >> 3); 342 | # else 343 | int r; 344 | if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 345 | r += (!val); 346 | return r; 347 | # endif 348 | # else 349 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 350 | unsigned long r; 351 | _BitScanForward( &r, val ); 352 | return (int)(r>>3); 353 | # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 354 | return (__builtin_ctz(val) >> 3); 355 | # else 356 | static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 357 | return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 358 | # endif 359 | # endif 360 | } 361 | 362 | #endif 363 | 364 | 365 | /******************************** 366 | Compression functions 367 | ********************************/ 368 | int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } 369 | 370 | static int LZ4_hashSequence(U32 sequence, tableType_t tableType) 371 | { 372 | if (tableType == byU16) 373 | return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); 374 | else 375 | return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); 376 | } 377 | 378 | static int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } 379 | 380 | static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) 381 | { 382 | switch (tableType) 383 | { 384 | case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } 385 | case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } 386 | case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } 387 | } 388 | } 389 | 390 | static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) 391 | { 392 | U32 h = LZ4_hashPosition(p, tableType); 393 | LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); 394 | } 395 | 396 | static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) 397 | { 398 | if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } 399 | if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } 400 | { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ 401 | } 402 | 403 | static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) 404 | { 405 | U32 h = LZ4_hashPosition(p, tableType); 406 | return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); 407 | } 408 | 409 | static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimit) 410 | { 411 | const BYTE* const pStart = pIn; 412 | 413 | while (likely(pIndictSize; 446 | const BYTE* const dictionary = dictPtr->dictionary; 447 | const BYTE* const dictEnd = dictionary + dictPtr->dictSize; 448 | const size_t dictDelta = dictEnd - (const BYTE*)source; 449 | const BYTE* anchor = (const BYTE*) source; 450 | const BYTE* const iend = ip + inputSize; 451 | const BYTE* const mflimit = iend - MFLIMIT; 452 | const BYTE* const matchlimit = iend - LASTLITERALS; 453 | 454 | BYTE* op = (BYTE*) dest; 455 | BYTE* const olimit = op + maxOutputSize; 456 | 457 | const int skipStrength = SKIPSTRENGTH; 458 | U32 forwardH; 459 | size_t refDelta=0; 460 | 461 | /* Init conditions */ 462 | if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ 463 | switch(dict) 464 | { 465 | case noDict: 466 | default: 467 | base = (const BYTE*)source; 468 | lowLimit = (const BYTE*)source; 469 | break; 470 | case withPrefix64k: 471 | base = (const BYTE*)source - dictPtr->currentOffset; 472 | lowLimit = (const BYTE*)source - dictPtr->dictSize; 473 | break; 474 | case usingExtDict: 475 | base = (const BYTE*)source - dictPtr->currentOffset; 476 | lowLimit = (const BYTE*)source; 477 | break; 478 | } 479 | if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */ 480 | if (inputSize> skipStrength; 502 | //if (step>8) step=8; // required for valid forwardIp ; slows down uncompressible data a bit 503 | 504 | if (unlikely(forwardIp > mflimit)) goto _last_literals; 505 | 506 | ref = LZ4_getPositionOnHash(h, ctx, tableType, base); 507 | if (dict==usingExtDict) 508 | { 509 | if (ref<(const BYTE*)source) 510 | { 511 | refDelta = dictDelta; 512 | lowLimit = dictionary; 513 | } 514 | else 515 | { 516 | refDelta = 0; 517 | lowLimit = (const BYTE*)source; 518 | } 519 | } 520 | forwardH = LZ4_hashPosition(forwardIp, tableType); 521 | LZ4_putPositionOnHash(ip, h, ctx, tableType, base); 522 | 523 | } while ( ((dictIssue==dictSmall) ? (ref < lowRefLimit) : 0) 524 | || ((tableType==byU16) ? 0 : (ref + MAX_DISTANCE < ip)) 525 | || (A32(ref+refDelta) != A32(ip)) ); 526 | } 527 | 528 | /* Catch up */ 529 | while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; } 530 | 531 | { 532 | /* Encode Literal length */ 533 | unsigned litLength = (unsigned)(ip - anchor); 534 | token = op++; 535 | if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) 536 | return 0; /* Check output limit */ 537 | if (litLength>=RUN_MASK) 538 | { 539 | int len = (int)litLength-RUN_MASK; 540 | *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; 542 | *op++ = (BYTE)len; 543 | } 544 | else *token = (BYTE)(litLength< matchlimit) limit = matchlimit; 564 | matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, limit); 565 | ip += MINMATCH + matchLength; 566 | if (ip==limit) 567 | { 568 | unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit); 569 | matchLength += more; 570 | ip += more; 571 | } 572 | } 573 | else 574 | { 575 | matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, matchlimit); 576 | ip += MINMATCH + matchLength; 577 | } 578 | 579 | if (matchLength>=ML_MASK) 580 | { 581 | if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit))) 582 | return 0; /* Check output limit */ 583 | *token += ML_MASK; 584 | matchLength -= ML_MASK; 585 | for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; } 586 | if (matchLength >= 255) { matchLength-=255; *op++ = 255; } 587 | *op++ = (BYTE)matchLength; 588 | } 589 | else *token += (BYTE)(matchLength); 590 | } 591 | 592 | anchor = ip; 593 | 594 | /* Test end of chunk */ 595 | if (ip > mflimit) break; 596 | 597 | /* Fill table */ 598 | LZ4_putPosition(ip-2, ctx, tableType, base); 599 | 600 | /* Test next position */ 601 | ref = LZ4_getPosition(ip, ctx, tableType, base); 602 | if (dict==usingExtDict) 603 | { 604 | if (ref<(const BYTE*)source) 605 | { 606 | refDelta = dictDelta; 607 | lowLimit = dictionary; 608 | } 609 | else 610 | { 611 | refDelta = 0; 612 | lowLimit = (const BYTE*)source; 613 | } 614 | } 615 | LZ4_putPosition(ip, ctx, tableType, base); 616 | if ( ((dictIssue==dictSmall) ? (ref>=lowRefLimit) : 1) 617 | && (ref+MAX_DISTANCE>=ip) 618 | && (A32(ref+refDelta)==A32(ip)) ) 619 | { token=op++; *token=0; goto _next_match; } 620 | 621 | /* Prepare next loop */ 622 | forwardH = LZ4_hashPosition(++ip, tableType); 623 | } 624 | 625 | _last_literals: 626 | /* Encode Last Literals */ 627 | { 628 | int lastRun = (int)(iend - anchor); 629 | if ((outputLimited) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) 630 | return 0; /* Check output limit */ 631 | if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 632 | else *op++ = (BYTE)(lastRun<= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ 709 | if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_stream_t_internal)); /* Uninitialized structure detected */ 710 | 711 | if (dictSize < MINMATCH) 712 | { 713 | dict->dictionary = NULL; 714 | dict->dictSize = 0; 715 | return 1; 716 | } 717 | 718 | if (p <= dictEnd - 64 KB) p = dictEnd - 64 KB; 719 | base = p - dict->currentOffset; 720 | dict->dictionary = p; 721 | dict->dictSize = (U32)(dictEnd - p); 722 | dict->currentOffset += dict->dictSize; 723 | 724 | while (p <= dictEnd-MINMATCH) 725 | { 726 | LZ4_putPosition(p, dict, byU32, base); 727 | p+=3; 728 | } 729 | 730 | return 1; 731 | } 732 | 733 | 734 | void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) 735 | { 736 | if ((LZ4_dict->currentOffset > 0x80000000) || 737 | ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */ 738 | { 739 | /* rescale hash table */ 740 | U32 delta = LZ4_dict->currentOffset - 64 KB; 741 | const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; 742 | int i; 743 | for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; 746 | else LZ4_dict->hashTable[i] -= delta; 747 | } 748 | LZ4_dict->currentOffset = 64 KB; 749 | if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; 750 | LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; 751 | } 752 | } 753 | 754 | 755 | FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* source, char* dest, int inputSize, 756 | int maxOutputSize, limitedOutput_directive limit) 757 | { 758 | LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream; 759 | const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; 760 | 761 | const BYTE* smallest = (const BYTE*) source; 762 | if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ 763 | if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd; 764 | LZ4_renormDictT(streamPtr, smallest); 765 | 766 | /* Check overlapping input/dictionary space */ 767 | { 768 | const BYTE* sourceEnd = (const BYTE*) source + inputSize; 769 | if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) 770 | { 771 | streamPtr->dictSize = (U32)(dictEnd - sourceEnd); 772 | if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; 773 | if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; 774 | streamPtr->dictionary = dictEnd - streamPtr->dictSize; 775 | } 776 | } 777 | 778 | /* prefix mode : source data follows dictionary */ 779 | if (dictEnd == (const BYTE*)source) 780 | { 781 | int result; 782 | if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) 783 | result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall); 784 | else 785 | result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue); 786 | streamPtr->dictSize += (U32)inputSize; 787 | streamPtr->currentOffset += (U32)inputSize; 788 | return result; 789 | } 790 | 791 | /* external dictionary mode */ 792 | { 793 | int result; 794 | if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) 795 | result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall); 796 | else 797 | result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue); 798 | streamPtr->dictionary = (const BYTE*)source; 799 | streamPtr->dictSize = (U32)inputSize; 800 | streamPtr->currentOffset += (U32)inputSize; 801 | return result; 802 | } 803 | } 804 | 805 | 806 | int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize) 807 | { 808 | return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, 0, notLimited); 809 | } 810 | 811 | int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize) 812 | { 813 | return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput); 814 | } 815 | 816 | 817 | // Hidden debug function, to force separate dictionary mode 818 | int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) 819 | { 820 | LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict; 821 | int result; 822 | const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; 823 | 824 | const BYTE* smallest = dictEnd; 825 | if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; 826 | LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest); 827 | 828 | result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue); 829 | 830 | streamPtr->dictionary = (const BYTE*)source; 831 | streamPtr->dictSize = (U32)inputSize; 832 | streamPtr->currentOffset += (U32)inputSize; 833 | 834 | return result; 835 | } 836 | 837 | 838 | int LZ4_saveDict (void* LZ4_dict, char* safeBuffer, int dictSize) 839 | { 840 | LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; 841 | const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; 842 | 843 | if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ 844 | if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; 845 | 846 | memcpy(safeBuffer, previousDictEnd - dictSize, dictSize); 847 | 848 | dict->dictionary = (const BYTE*)safeBuffer; 849 | dict->dictSize = (U32)dictSize; 850 | 851 | return 1; 852 | } 853 | 854 | 855 | 856 | /**************************** 857 | Decompression functions 858 | ****************************/ 859 | /* 860 | * This generic decompression function cover all use cases. 861 | * It shall be instanciated several times, using different sets of directives 862 | * Note that it is essential this generic function is really inlined, 863 | * in order to remove useless branches during compilation optimisation. 864 | */ 865 | FORCE_INLINE int LZ4_decompress_generic( 866 | const char* source, 867 | char* dest, 868 | int inputSize, 869 | int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ 870 | 871 | int endOnInput, /* endOnOutputSize, endOnInputSize */ 872 | int partialDecoding, /* full, partial */ 873 | int targetOutputSize, /* only used if partialDecoding==partial */ 874 | int dict, /* noDict, withPrefix64k, usingExtDict */ 875 | const char* dictStart, /* only if dict==usingExtDict */ 876 | int dictSize /* note : = 0 if noDict */ 877 | ) 878 | { 879 | /* Local Variables */ 880 | const BYTE* restrict ip = (const BYTE*) source; 881 | const BYTE* ref; 882 | const BYTE* const iend = ip + inputSize; 883 | 884 | BYTE* op = (BYTE*) dest; 885 | BYTE* const oend = op + outputSize; 886 | BYTE* cpy; 887 | BYTE* oexit = op + targetOutputSize; 888 | const BYTE* const lowLimit = (const BYTE*)dest - dictSize; 889 | 890 | const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; 891 | //#define OLD 892 | #ifdef OLD 893 | const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ 894 | #else 895 | const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ 896 | #endif 897 | static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 898 | 899 | const int checkOffset = (endOnInput) && (dictSize < (int)(64 KB)); 900 | 901 | 902 | /* Special cases */ 903 | if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ 904 | if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ 905 | if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); 906 | 907 | 908 | /* Main Loop */ 909 | while (1) 910 | { 911 | unsigned token; 912 | size_t length; 913 | 914 | /* get runlength */ 915 | token = *ip++; 916 | if ((length=(token>>ML_BITS)) == RUN_MASK) 917 | { 918 | unsigned s; 919 | do 920 | { 921 | s = *ip++; 922 | length += s; 923 | } 924 | while (likely((endOnInput)?ipLZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */ 926 | if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* quickfix issue 134 */ 927 | if ((endOnInput) && (sizeof(void*)==4) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* quickfix issue 134 */ 928 | } 929 | 930 | /* copy literals */ 931 | cpy = op+length; 932 | if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) 933 | || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 934 | { 935 | if (partialDecoding) 936 | { 937 | if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ 938 | if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ 939 | } 940 | else 941 | { 942 | if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ 943 | if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ 944 | } 945 | memcpy(op, ip, length); 946 | ip += length; 947 | op += length; 948 | break; /* Necessarily EOF, due to parsing restrictions */ 949 | } 950 | LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; 951 | 952 | /* get offset */ 953 | LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 954 | if ((checkOffset) && (unlikely(ref < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ 955 | 956 | /* get matchlength */ 957 | if ((length=(token&ML_MASK)) == ML_MASK) 958 | { 959 | unsigned s; 960 | do 961 | { 962 | if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error; 963 | s = *ip++; 964 | length += s; 965 | } while (s==255); 966 | //if ((sizeof(void*)==4) && unlikely(length>LZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */ 967 | if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* quickfix issue 134 */ 968 | } 969 | 970 | /* check external dictionary */ 971 | if ((dict==usingExtDict) && (ref < (BYTE* const)dest)) 972 | { 973 | if (unlikely(op+length+MINMATCH > oend-LASTLITERALS)) goto _output_error; 974 | 975 | if (length+MINMATCH <= (size_t)(dest-(char*)ref)) 976 | { 977 | ref = dictEnd - (dest-(char*)ref); 978 | memcpy(op, ref, length+MINMATCH); 979 | op += length+MINMATCH; 980 | } 981 | else 982 | { 983 | size_t copySize = (size_t)(dest-(char*)ref); 984 | memcpy(op, dictEnd - copySize, copySize); 985 | op += copySize; 986 | copySize = length+MINMATCH - copySize; 987 | if (copySize > (size_t)((char*)op-dest)) /* overlap */ 988 | { 989 | BYTE* const cpy = op + copySize; 990 | const BYTE* ref = (BYTE*)dest; 991 | while (op < cpy) *op++ = *ref++; 992 | } 993 | else 994 | { 995 | memcpy(op, dest, copySize); 996 | op += copySize; 997 | } 998 | } 999 | continue; 1000 | } 1001 | 1002 | /* copy repeated sequence */ 1003 | if (unlikely((op-ref)<(int)STEPSIZE)) 1004 | { 1005 | const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref]; 1006 | op[0] = ref[0]; 1007 | op[1] = ref[1]; 1008 | op[2] = ref[2]; 1009 | op[3] = ref[3]; 1010 | #ifdef OLD 1011 | op += 4, ref += 4; ref -= dec32table[op-ref]; 1012 | A32(op) = A32(ref); 1013 | op += STEPSIZE-4; ref -= dec64; 1014 | #else 1015 | ref += dec32table[op-ref]; 1016 | A32(op+4) = A32(ref); 1017 | op += STEPSIZE; ref -= dec64; 1018 | #endif 1019 | } else { LZ4_COPYSTEP(op,ref); } 1020 | cpy = op + length - (STEPSIZE-4); 1021 | 1022 | if (unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))) 1023 | { 1024 | if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */ 1025 | if (opdictionary = dictionary; 1095 | lz4sd->dictSize = dictSize; 1096 | return 1; 1097 | } 1098 | 1099 | /* 1100 | *_continue() : 1101 | These decoding functions allow decompression of multiple blocks in "streaming" mode. 1102 | Previously decoded blocks must still be available at the memory position where they were decoded. 1103 | If it's not possible, save the relevant part of decoded data into a safe buffer, 1104 | and indicate where it stands using LZ4_setDictDecode() 1105 | */ 1106 | int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) 1107 | { 1108 | LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; 1109 | int result; 1110 | 1111 | result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize); 1112 | if (result <= 0) return result; 1113 | if (lz4sd->dictionary + lz4sd->dictSize == dest) 1114 | { 1115 | lz4sd->dictSize += result; 1116 | } 1117 | else 1118 | { 1119 | lz4sd->dictionary = dest; 1120 | lz4sd->dictSize = result; 1121 | } 1122 | 1123 | return result; 1124 | } 1125 | 1126 | int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize) 1127 | { 1128 | LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; 1129 | int result; 1130 | 1131 | result = LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize); 1132 | if (result <= 0) return result; 1133 | if (lz4sd->dictionary + lz4sd->dictSize == dest) 1134 | { 1135 | lz4sd->dictSize += result; 1136 | } 1137 | else 1138 | { 1139 | lz4sd->dictionary = dest; 1140 | lz4sd->dictSize = result; 1141 | } 1142 | 1143 | return result; 1144 | } 1145 | 1146 | 1147 | /* 1148 | Advanced decoding functions : 1149 | *_usingDict() : 1150 | These decoding functions work the same as "_continue" ones, 1151 | the dictionary must be explicitly provided within parameters 1152 | */ 1153 | 1154 | int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) 1155 | { 1156 | return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize); 1157 | } 1158 | 1159 | int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) 1160 | { 1161 | return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize); 1162 | } 1163 | 1164 | 1165 | /*************************************************** 1166 | Obsolete Functions 1167 | ***************************************************/ 1168 | /* 1169 | These function names are deprecated and should no longer be used. 1170 | They are only provided here for compatibility with older user programs. 1171 | - LZ4_uncompress is totally equivalent to LZ4_decompress_fast 1172 | - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe 1173 | */ 1174 | int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } 1175 | int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } 1176 | 1177 | 1178 | /* Obsolete Streaming functions */ 1179 | 1180 | int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } 1181 | 1182 | void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base) 1183 | { 1184 | MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE); 1185 | lz4ds->bufferStart = base; 1186 | } 1187 | 1188 | int LZ4_resetStreamState(void* state, const char* inputBuffer) 1189 | { 1190 | if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ 1191 | LZ4_init((LZ4_stream_t_internal*)state, (const BYTE*)inputBuffer); 1192 | return 0; 1193 | } 1194 | 1195 | void* LZ4_create (const char* inputBuffer) 1196 | { 1197 | void* lz4ds = ALLOCATOR(4, LZ4_STREAMSIZE_U32); 1198 | LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer); 1199 | return lz4ds; 1200 | } 1201 | 1202 | char* LZ4_slideInputBuffer (void* LZ4_Data) 1203 | { 1204 | LZ4_stream_t_internal* lz4ds = (LZ4_stream_t_internal*)LZ4_Data; 1205 | 1206 | LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)lz4ds->bufferStart, 64 KB); 1207 | 1208 | return (char*)(lz4ds->bufferStart + 64 KB); 1209 | } 1210 | 1211 | /* Obsolete compresson functions using User-allocated state */ 1212 | 1213 | int LZ4_sizeofState() { return LZ4_STREAMSIZE; } 1214 | 1215 | int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize) 1216 | { 1217 | if (((size_t)(state)&3) != 0) return 0; /* Error : state is not aligned on 4-bytes boundary */ 1218 | MEM_INIT(state, 0, LZ4_STREAMSIZE); 1219 | 1220 | if (inputSize < (int)LZ4_64KLIMIT) 1221 | return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); 1222 | else 1223 | return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); 1224 | } 1225 | 1226 | int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize) 1227 | { 1228 | if (((size_t)(state)&3) != 0) return 0; /* Error : state is not aligned on 4-bytes boundary */ 1229 | MEM_INIT(state, 0, LZ4_STREAMSIZE); 1230 | 1231 | if (inputSize < (int)LZ4_64KLIMIT) 1232 | return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); 1233 | else 1234 | return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); 1235 | } 1236 | 1237 | /* Obsolete streaming decompression functions */ 1238 | 1239 | int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) 1240 | { 1241 | return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, NULL, 64 KB); 1242 | } 1243 | 1244 | int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) 1245 | { 1246 | return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 64 KB); 1247 | } 1248 | --------------------------------------------------------------------------------