├── .ghci ├── .gitignore ├── bench ├── .ghci ├── Benchmarks.hs └── GenericBenchmarks.hs ├── AUTHORS ├── LICENSE ├── blaze-binary.cabal ├── src └── Data │ └── Blaze │ ├── Binary │ ├── StreamDecoding.hs │ ├── Decoding.hs │ ├── Encoding.hs │ ├── IterDecoding.hs │ └── ParamDecoding.hs │ └── Binary.hs └── README.md /.ghci: -------------------------------------------------------------------------------- 1 | :set -isrc -Wall 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cabal-dev 2 | dist 3 | -------------------------------------------------------------------------------- /bench/.ghci: -------------------------------------------------------------------------------- 1 | :set -Lcabal-dev/packages-7.2.1.conf 2 | :set -Wall 3 | :set -isrc 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Author 2 | Simon Meier 3 | 4 | Contributors 5 | Bas van Dijk 6 | 7 | Building on the code in the `binary`, `cereal`, and `text` libraries. 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Lennart Kolmodin, Galois, Inc. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS 21 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /blaze-binary.cabal: -------------------------------------------------------------------------------- 1 | name: blaze-binary 2 | version: 0.1.0.0 3 | license: BSD3 4 | license-file: LICENSE 5 | author: Simon Meier 6 | maintainer: Simon Meier 7 | category: Data 8 | stability: experimental 9 | build-type: Simple 10 | cabal-version: >= 1.6 11 | synopsis: A binary serialization library 12 | 13 | description: 14 | A binary serialization library, similar to binary and cereal, 15 | but with better performance and support for inspecting the primitive stream 16 | of values representing an encoded Haskell value. 17 | 18 | extra-source-files: 19 | LICENCE 20 | 21 | source-repository head 22 | type: git 23 | location: git://github.com/meiersi/blaze-binary.git 24 | 25 | flag genericBenchmarks 26 | Description: Build the generic benchmarks (This takes some time) 27 | Default: True 28 | 29 | library 30 | build-depends: base == 4.* 31 | , bytestring == 0.9.* 32 | , bytestring-builder == 0.1.* 33 | , containers == 0.4.* 34 | , array >= 0.3 && < 0.5 35 | , ghc-prim 36 | 37 | hs-source-dirs: src 38 | 39 | exposed-modules: Data.Blaze.Binary 40 | 41 | ghc-options: -Wall 42 | ghc-prof-options: -prof -auto-all 43 | 44 | if impl(ghc >= 7.2.1) 45 | cpp-options: -DGENERICS 46 | 47 | benchmark bench 48 | type: exitcode-stdio-1.0 49 | main-is: Benchmarks.hs 50 | hs-source-dirs: bench, src 51 | ghc-options: -Wall 52 | build-depends: base >= 4 && < 5 53 | , ghc-prim >= 0.2 && < 0.3 54 | , containers >= 0.4 && < 0.5 55 | , array >= 0.4 && < 0.5 56 | , bytestring >= 0.9 && < 0.10 57 | , criterion >= 0.6 && < 0.7 58 | , cereal >= 0.3 && < 0.4 59 | -- Use the 'cps' branch from https://github.com/kolmodin/binary.git 60 | -- for binary-0.6.0.0 61 | , binary >= 0.6 && < 0.7 62 | -- The 'master' branch from git://github.com/meiersi/bytestring-builder.git 63 | , bytestring-builder >= 0.1 && < 0.2 64 | , deepseq >= 1.3 && < 1.4 65 | , primitive >= 0.4 && < 0.5 66 | , attoparsec >= 0.10 && < 0.11 67 | 68 | benchmark bench-generic 69 | type: exitcode-stdio-1.0 70 | main-is: GenericBenchmarks.hs 71 | hs-source-dirs: bench, src 72 | ghc-options: -Wall 73 | build-depends: base >= 4 && < 5 74 | , ghc-prim >= 0.2 && < 0.3 75 | , containers >= 0.4 && < 0.5 76 | , array >= 0.4 && < 0.5 77 | , bytestring >= 0.9 && < 0.10 78 | , criterion >= 0.6 && < 0.7 79 | , bytestring-builder >= 0.1 && < 0.2 80 | , deepseq >= 1.3 && < 1.4 81 | , primitive >= 0.4 && < 0.5 82 | 83 | if impl(ghc >= 7.2.1) && flag(genericBenchmarks) 84 | cpp-options: -DGENERICS 85 | else 86 | buildable: False 87 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary/StreamDecoding.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, UnboxedTuples, BangPatterns #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Data.Blaze.Binary.Encoding 5 | -- Copyright : 2012, Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Stability : 10 | -- Portability : portable 11 | -- 12 | -- Stream based decoding of binary values. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.Blaze.Binary.StreamDecoding ( 16 | benchWord8s 17 | ) where 18 | 19 | import Data.Word 20 | import Control.Applicative 21 | 22 | import qualified Data.ByteString.Internal as S 23 | 24 | import Foreign.Ptr (plusPtr) 25 | import Foreign.ForeignPtr (touchForeignPtr) 26 | import Foreign.Storable (peek) 27 | 28 | #if __GLASGOW_HASKELL__ >= 702 29 | import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) 30 | #else 31 | import Foreign.ForeignPtr (unsafeForeignPtrToPtr) 32 | #endif 33 | 34 | ------------------------------------------------------------------------ 35 | 36 | -- | The representation for a stream of values to be serialized. 37 | data VStream = 38 | -- VChar {-# UNPACK #-} !Char VStreamRep 39 | -- | VWord {-# UNPACK #-} !Word VStreamRep 40 | VWord8 {-# UNPACK #-} !Word8 VStream 41 | -- | VWord16 {-# UNPACK #-} !Word16 VStreamRep 42 | -- | VWord32 {-# UNPACK #-} !Word32 VStreamRep 43 | -- | VWord64 {-# UNPACK #-} !Word64 VStreamRep 44 | -- | VInt {-# UNPACK #-} !Int VStreamRep 45 | -- | VInt8 {-# UNPACK #-} !Int8 VStreamRep 46 | -- | VInt16 {-# UNPACK #-} !Int16 VStreamRep 47 | -- | VInt32 {-# UNPACK #-} !Int32 VStreamRep 48 | -- | VInt64 {-# UNPACK #-} !Int64 VStreamRep 49 | -- | VFloat {-# UNPACK #-} !Float VStreamRep 50 | -- | VDouble {-# UNPACK #-} !Double VStreamRep 51 | -- | VInteger !Integer VStreamRep 52 | -- | VByteString !S.ByteString VStreamRep 53 | -- | VBuilder !B.Builder VStreamRep 54 | | VFail String 55 | | VEmpty 56 | 57 | newtype Decoder a = Decoder { unDecoder :: VStream -> (# a, VStream #) } 58 | 59 | toVStream :: S.ByteString -> VStream 60 | toVStream (S.PS fpbuf off len) = 61 | go ip0 62 | where 63 | pbuf = unsafeForeignPtrToPtr fpbuf 64 | ip0 = pbuf `plusPtr` off 65 | ipe = ip0 `plusPtr` len 66 | 67 | go !ip 68 | | ip < ipe = S.inlinePerformIO $ do 69 | w <- peek ip 70 | touchForeignPtr fpbuf 71 | return $ VWord8 w (go (ip `plusPtr` 1)) 72 | | otherwise = 73 | VEmpty 74 | 75 | runDecoder :: Decoder a -> S.ByteString -> Either String a 76 | runDecoder d bs = case unDecoder d (toVStream bs) of 77 | (# _, VFail msg #) -> Left msg 78 | (# x, _ #) -> Right x 79 | 80 | instance Functor Decoder where 81 | {-# INLINE fmap #-} 82 | fmap = \f d -> Decoder $ \vs0 -> case unDecoder d vs0 of 83 | (# x, vs1 #) -> (# f x, vs1 #) 84 | 85 | 86 | instance Applicative Decoder where 87 | {-# INLINE pure #-} 88 | pure = \x -> Decoder $ \vs -> (# x, vs #) 89 | 90 | {-# INLINE (<*>) #-} 91 | (<*>) = \fd xd -> Decoder $ \vs0 -> 92 | case unDecoder fd vs0 of 93 | (# f, vs1 #) -> case unDecoder xd vs1 of 94 | (# x, vs2 #) -> (# f x, vs2 #) 95 | 96 | instance Monad Decoder where 97 | {-# INLINE return #-} 98 | return = pure 99 | 100 | {-# INLINE (>>=) #-} 101 | (>>=) = \md f -> Decoder $ \vs0 -> 102 | case unDecoder md vs0 of 103 | (# _, vs1@(VFail _) #) -> (# error "impossible", vs1 #) 104 | (# m, vs1 #) -> unDecoder (f m) vs1 105 | 106 | -- We store the failure in the remainder of the stream to piggy-back failure 107 | -- detection on the pattern matching of the stream constructor.- 108 | fail msg = Decoder $ \_ -> (# error "Decoder:fail: impossible", VFail msg #) 109 | 110 | {-# INLINE word8 #-} 111 | word8 :: Decoder Word8 112 | word8 = Decoder $ \vs0 -> case vs0 of 113 | VWord8 w vs1 -> (# w, vs1 #) 114 | _ -> (# error "impossible", VFail "expected Word8, but got something else" #) 115 | 116 | word8s :: Decoder [Word8] 117 | word8s = do 118 | tag <- word8 119 | case tag of 120 | 0 -> return [] 121 | 1 -> (:) <$> word8 <*> word8s 122 | _ -> fail $ "word8s: unexpected tag " ++ show tag 123 | 124 | 125 | benchWord8s :: S.ByteString -> [Word8] 126 | benchWord8s bs = case runDecoder word8s bs of 127 | Left msg -> error msg 128 | Right x -> x 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | Projects such as cloud-haskell or acid-state crucially rely on performant, 5 | generic serialization. As I've developed the new bytestring builder, I 6 | wondered what speedup I could gain using it for encoding a Haskell value to an 7 | unambiguous binary representation. This library, `blaze-binary`, is the 8 | (current) result state of this experiment. 9 | 10 | In preliminary benchmarks on my i7, 64bit Linux machine, this library is 2 - 4 11 | times faster for binary encoding than both `binary-0.5.0.2` and 12 | `cereal-0.3.5.1`. Decoding is as fast as `binary-0.5.0.2`, but allows feeding 13 | the input in a chunkwise fashion, like `attoparsec`. Our decoder is at least 14 | 2x faster than using attoparsec directly. 15 | 16 | As an additional improvement over binary and cereal, this library can 17 | also output a textual representation of the sequence of primitive values 18 | (e.g., `Int`s, `Double`s, and strict `ByteString`s). Moreover, the encoding 19 | for this stream of primitive values can be chosen at runtime without any 20 | performance impact. This allows for example a developer of a CloudHaskell 21 | application to analyse the messages sent and received without having access to 22 | the type of the data being sent. This is especially interesting for displaying 23 | error messages in the case of a failed parse of a received message or to 24 | investigate the communication patterns of a running CloudHaskell application. 25 | 26 | 27 | Encoding Implementation 28 | ======================= 29 | 30 | The implementation uses a two step approach to encode a Haskell value to a 31 | sequence of bytes. In a first step, the Haskell value is converted to a 32 | stream of primitive values, where a primitive value is an `IntX` or `WordX` 33 | for `X` in `["", "8","16","32","64"]`, a `Float`, `Double`, `Integer`, a 34 | `ByteString`, or a `Text` value. The conversion uses a difference list 35 | representation of the primitive stream to ensure *O(1)*-concatentation. In 36 | the second step, the stream of primitive values is converted to a sequence of 37 | bytes using the new bytestring builder and its support for bounded encodings. 38 | 39 | This splitting of the encoding into a "flattening pass" and an "primitive 40 | encoding pass" results in the nice benefit that the encoding of the stream of 41 | primitive values can be chosen at runtime. Morover, it is more efficient, as 42 | the benchmarks demonstrate. In the beginning, I implemented a version that 43 | encodes the values directly using the new bytestring builder. This initial 44 | version did not result in any speedup with respect to binary and cereal. My 45 | current hypothesis is that the type of all of these builders leads to too many 46 | unknown and possibly even unsaturated calls, whereas the difference list for 47 | the stream of primitive values only results in calls to unknown THUNKs. 48 | Evaluating unknown THUNKs is the fastest unkwon call. 49 | 50 | 51 | High-level Encoding Format 52 | ========================== 53 | 54 | In contrast to binary and cereal, this library encodes lists in a streaming 55 | fashion, tagging `(:)` with 1 and `[]` with 0. This results in only one pass 56 | through a list and reduces GC pressure as it retains less memory than the list 57 | serialization used by binary and cereal, which prefixes the list with the 58 | number of elements. 59 | 60 | We also do not use the `Put` monad. The monadic value-passing is just not 61 | required. 62 | 63 | 64 | Encoding Primitive Values 65 | ========================= 66 | 67 | 68 | I plan to implement two different encoding formats: one format optimized for 69 | compactness and one optimized for throughput. Both of these formats come in a 70 | tagged variant that allows decoding the stream of primitive values without 71 | access to the type. 72 | 73 | All results are prefixed with a 4-byte identifier. Currently, we use the 74 | following assignment of identifiers to formats. 75 | 76 | 0xce,0xbb,0x2e,0x30 throughput, untagged 77 | 0xce,0xbb,0x2e,0x31 throughput, tagged 78 | 0xce,0xbb,0x2e,0x32 compact, untagged 79 | 0xce,0xbb,0x2e,0x33 compact, tagged 80 | 81 | The compact and the throughput format only differ in how they encode `IntX`s, 82 | `WordX`s, and `Integer`s. For the common primitive values they use the 83 | following encodings. 84 | 85 | - `Char`s are UTF-8 encoded. 86 | - `Float`s are encoded as IEEE 754 values with their octets in little-endian 87 | order. 88 | - `Double`s are encoded as IEEE 754 values with their octets in 89 | little-endian order. 90 | - `ByteString`s are encoded with their length prefixed according to the 91 | `Int` format. 92 | - `Text` values are encoded using a zero-terminated, modified UTF-8 format 93 | that works like UTF-8 except that it encodes `'\x0'` as `[0xC0,0x80]`. 94 | This format never outputs a '0x00' for any Unicode codepoint and can 95 | therefore be zero-terminated, which allows an efficient streaming 96 | encoding. 97 | 98 | 99 | The compact format 100 | ------------------ 101 | 102 | This is the default format. It trades some performance for compactness and 103 | portability. `Int`s and `Word`s wider than 2 bytes are encoded using a 104 | variable length base-128 encoding, as used by (Google's protocol bufffers)[https://developers.google.com/protocol-buffers/docs/encoding]. 105 | 106 | 107 | The throughput format 108 | --------------------- 109 | 110 | This format is optmized for maximum throughput on 64bit, x86 machines. I 111 | assume they are the future server machines of choice. All primitive values are 112 | therefore encoded using a little-endian encoding. 113 | 114 | 115 | The tagged format 116 | ----------------- 117 | 118 | Before every primitive value a tag-byte is written indicating the type of the 119 | following primitive value. This allows decoding a binary value to a 120 | human-readable stream of primitive values. 121 | 122 | 123 | API 124 | --- 125 | 126 | In the first releases, all low-level encoding and decoding support is kept 127 | internal. This simplifies experimentation. There is one abstract type for 128 | `Encoder`s and one for `Decoder`s. The `Monoid` and `Monad` typeclasses are 129 | provided as combintors for them. 130 | 131 | newtype Encoder a = ... 132 | newtype Decoder a = ... 133 | 134 | class Binary a where 135 | toBinary :: Encoder a 136 | fromBinary :: Decoder a 137 | 138 | Only one format is supported in the beginning. The untagged, throughput 139 | format. This format gives a good baseline for the possible speed of the 140 | implementation. We run an `Encoder` by converting it to a bytestring 141 | `Builder`. 142 | 143 | encode :: Encoder a -> a -> Builder 144 | 145 | We run a `Decoder` by converting it to an 146 | (`Data.Attoparsec.ByteString.Result`)[http://hackage.haskell.org/packages/archive/attoparsec/0.10.1.1/doc/html/Data-Attoparsec-ByteString.html#t:Result]. 147 | 148 | decode :: Decoder a -> Result a 149 | 150 | Note that the decoder selects the appropriate format based on the 4-byte 151 | prefix. 152 | 153 | We provide convenience functions for the conversion to and from bytestrings. 154 | 155 | toBinaryBuilder :: Binary a => a -> Builder 156 | toBinaryByteString :: Binary a => a -> S.ByteString 157 | toBinaryLazyByteString :: Binary a => a -> L.ByteString 158 | 159 | fromBinaryByteString :: Binary a => S.ByteString -> Either String a 160 | fromBinaryLazyByteString :: Binary a => L.ByteString -> Either String a 161 | 162 | 163 | 164 | Security Concerns 165 | ================= 166 | 167 | Note that the input of the decoder is *untrusted* and may be an arbitrary 168 | sequence of bytes. The decoding implementation must make sure that for *any* 169 | bytestring either an error is reported via `Left` or a Haskell value 170 | satisfying *all* invariants of its type is returned. This entails for example 171 | that we must validate every `Text` value. This also excludes using functions 172 | such as `fromAscList` without having validated their input first. 173 | 174 | The benefit of implementing fully validating decoders is that we can use them 175 | for implementing public interfaces. If the cost of validation is too high then 176 | we can consider implementing a second `UnsafeBinary` typeclass whose decoder 177 | is only guaranteed to be correct for bytestrings in the range of the encoder. 178 | 179 | Note that we must also take care to provide good bounds on the resource usage 180 | of our implementation. This concerns heap space and stack space. Some 181 | implementations require considerable stack space. They might profit from 182 | catching `StackOverflow` exceptions and report them politely to their caller 183 | using a `Left` result. 184 | 185 | Note also that we must report overflows when decoding `Int` and `Word` values, 186 | as we cannot guarantee that using a truncated 64-bit number will work. 187 | 188 | 189 | TODO for a first release 190 | ======================== 191 | 192 | Implement the above API for the throughput format and benchmark against 193 | binary, cereal, and attoparsec to catch regressions. 194 | 195 | 196 | Future Work 197 | =========== 198 | 199 | - Implement generic serialization (DONE by Bas van Dijk, needs benchmarking) 200 | - Implement all four suggested formats 201 | - Implement debugging decoder for tagged formats 202 | - Implement error reporting for tagged format that produces human-readable 203 | output. It will still be flattened though. 204 | 205 | 206 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary/Decoding.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, UnboxedTuples, MagicHash, ScopedTypeVariables, BangPatterns, DeriveDataTypeable, OverloadedStrings #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Data.Blaze.Binary.Encoding 5 | -- Copyright : 2012, Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Stability : 10 | -- Portability : portable 11 | -- 12 | -- Binary encoding of values. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.Blaze.Binary.Decoding where 16 | 17 | import Prelude hiding (catch) 18 | 19 | import Control.Applicative 20 | import Control.Exception 21 | 22 | import Data.Int (Int8, Int16, Int32, Int64) 23 | import Data.Typeable 24 | import qualified Data.ByteString.Internal as S 25 | import GHC.Prim 26 | import GHC.Ptr 27 | import GHC.Word 28 | import GHC.Exts 29 | import GHC.IO (IO(IO)) 30 | import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 31 | import Foreign.Storable (Storable, sizeOf, peek) 32 | 33 | #if __GLASGOW_HASKELL__ >= 702 34 | import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) 35 | #else 36 | import Foreign.ForeignPtr (unsafeForeignPtrToPtr) 37 | #endif 38 | 39 | data ParseException = ParseException String -- {-# UNPACK #-} !(Ptr Word8) 40 | deriving( Show, Typeable ) 41 | 42 | instance Exception ParseException where 43 | 44 | newtype Decoder a = Decoder { 45 | -- unDecoder :: ForeignPtr Word8 -> Addr# -> Addr# 46 | unDecoder :: ForeignPtr Word8 -> Ptr Word8 -> Ptr Word8 47 | -> State# RealWorld -> (# State# RealWorld, Addr#, a #) 48 | } 49 | 50 | instance Functor Decoder where 51 | fmap f = \(Decoder io) -> Decoder $ \fpbuf ip0 ipe s0 -> 52 | case io fpbuf ip0 ipe s0 of 53 | (# s1, ip1, x #) -> (# s1, ip1, f x #) 54 | 55 | instance Applicative Decoder where 56 | {-# INLINE pure #-} 57 | pure x = Decoder $ \_ ip0 _ s0 -> (# s0, getAddr ip0, x #) 58 | 59 | {-# INLINE (<*>) #-} 60 | Decoder fIO <*> Decoder xIO = Decoder $ \fpbuf ip0 ipe s0 -> 61 | case fIO fpbuf ip0 ipe s0 of 62 | (# s1, ip1, f #) -> case xIO fpbuf (Ptr ip1) ipe s1 of 63 | (# s2, ip2, x #) -> (# s2, ip2, f x #) 64 | 65 | {-# INLINE liftIO #-} 66 | liftIO :: IO a -> Decoder a 67 | liftIO (IO io) = Decoder $ \_ !(Ptr ip0) _ s0 -> case io s0 of 68 | (# s1, x #) -> (# s1, ip0, x #) 69 | 70 | {-# INLINE runIO #-} 71 | runIO :: IO a -> State# RealWorld -> (# State# RealWorld, a #) 72 | runIO (IO io) = io 73 | 74 | instance Monad Decoder where 75 | {-# INLINE return #-} 76 | return = pure 77 | 78 | {-# INLINE (>>=) #-} 79 | Decoder xIO >>= f = Decoder $ \fpbuf ip0 ipe s0 -> 80 | case xIO fpbuf ip0 ipe s0 of 81 | (# s1, ip1, x #) -> unDecoder (f x) fpbuf (Ptr ip1) ipe s1 82 | 83 | {-# INLINE fail #-} 84 | fail msg = liftIO $ throw $ ParseException msg 85 | 86 | 87 | {- 88 | requires :: Int -> Decoder a -> Decoder a 89 | requires n p = Decoder $ \buf@(Buffer ip ipe) -> 90 | if ipe `minusPtr` ip >= n 91 | then unDecoder p buf 92 | else throw $ ParseException $ 93 | "required " ++ show n ++ 94 | " bytes, but there are only " ++ show (ipe `minusPtr` ip) ++ 95 | " bytes left." 96 | -} 97 | 98 | {-# INLINE storable #-} 99 | storable :: forall a. Storable a => Decoder a 100 | storable = Decoder $ \fpbuf ip0 ipe s0 -> 101 | let ip1 = ip0 `plusPtr` size in 102 | if ip1 <= ipe 103 | then case runIO (peek (castPtr ip0 :: Ptr a)) s0 of 104 | (# s1, x #) -> (# s1, getAddr ip1, x #) 105 | else unDecoder 106 | (fail $ "less than the required " ++ show size ++ " bytes left.") 107 | fpbuf ip0 ipe s0 108 | where 109 | size = sizeOf (undefined :: a) 110 | 111 | runDecoder :: Decoder a -> S.ByteString -> Either String a 112 | runDecoder p (S.PS fpbuf off len) = S.inlinePerformIO $ do 113 | withForeignPtr fpbuf $ \pbuf -> do 114 | let !ip = pbuf `plusPtr` off 115 | !ipe = ip `plusPtr` len 116 | (`catch` handler) $ do 117 | x <- IO $ \s0 -> case unDecoder p fpbuf ip ipe s0 of 118 | (# s1, _, x #) -> (# s1, x #) 119 | return (Right x) 120 | where 121 | handler :: ParseException -> IO (Either String a) 122 | handler (ParseException msg) = return $ Left msg 123 | 124 | -- Primitive parsers 125 | -------------------- 126 | 127 | {-# INLINE word8 #-} 128 | word8 :: Decoder Word8 129 | word8 = storable 130 | 131 | {-# INLINE word16 #-} 132 | word16 :: Decoder Word16 133 | word16 = storable 134 | 135 | {-# INLINE word32 #-} 136 | word32 :: Decoder Word32 137 | word32 = storable 138 | 139 | {-# INLINE word64 #-} 140 | word64 :: Decoder Word64 141 | word64 = storable 142 | 143 | {-# INLINE word #-} 144 | word :: Decoder Word 145 | word = storable 146 | 147 | {-# INLINE int8 #-} 148 | int8 :: Decoder Int8 149 | int8 = storable 150 | 151 | {-# INLINE int16 #-} 152 | int16 :: Decoder Int16 153 | int16 = storable 154 | 155 | {-# INLINE int32 #-} 156 | int32 :: Decoder Int32 157 | int32 = storable 158 | 159 | {-# INLINE int64 #-} 160 | int64 :: Decoder Int64 161 | int64 = storable 162 | 163 | {-# INLINE int #-} 164 | int :: Decoder Int 165 | int = storable 166 | 167 | {-# INLINE float #-} 168 | float :: Decoder Float 169 | float = storable 170 | 171 | {-# INLINE double #-} 172 | double :: Decoder Double 173 | double = storable 174 | 175 | {-# INLINE byteString #-} 176 | byteString :: Decoder S.ByteString 177 | byteString = int >>= byteStringSlice 178 | 179 | {-# INLINE byteStringSlice #-} 180 | byteStringSlice :: Int -> Decoder S.ByteString 181 | byteStringSlice len = Decoder $ \fpbuf ip0 ipe s0 -> 182 | let ip1 = ip0 `plusPtr` len 183 | in 184 | if ip1 <= ipe 185 | then (# s0 186 | , getAddr ip1 187 | , S.PS fpbuf (ip0 `minusPtr` unsafeForeignPtrToPtr fpbuf) len 188 | #) 189 | else unDecoder 190 | (fail $ "less than the required " ++ show len ++ " bytes left.") 191 | fpbuf ip0 ipe s0 192 | 193 | char :: Decoder Char 194 | char = do 195 | w0 <- word8 196 | case () of 197 | _ | w0 < 0x80 -> return (chr1 w0) 198 | | w0 < 0xe0 -> chr2 w0 <$> word8 199 | | w0 < 0xf0 -> chr3 w0 <$> word8 <*> word8 200 | | otherwise -> chr4 w0 <$> word8 <*> word8 <*> word8 201 | 202 | {-# INLINE getAddr #-} 203 | getAddr :: Ptr a -> Addr# 204 | getAddr (Ptr a) = a 205 | 206 | -- Decoder combinators 207 | -------------------- 208 | 209 | {-# INLINE decodeList #-} 210 | decodeList :: Decoder a -> Decoder [a] 211 | decodeList x = 212 | go 213 | where 214 | go = do tag <- word8 215 | case tag of 216 | 0 -> return [] 217 | 1 -> (:) <$> x <*> go 218 | _ -> fail $ "decodeList: unexpected tag " ++ show tag 219 | 220 | {-# INLINE decodeMaybe #-} 221 | decodeMaybe :: Decoder a -> Decoder (Maybe a) 222 | decodeMaybe just = 223 | go 224 | where 225 | go = do tag <- word8 226 | case tag of 227 | 0 -> return Nothing 228 | 1 -> Just <$> just 229 | _ -> fail $ "decodeMaybe: unexpected tag " ++ show tag 230 | 231 | {-# INLINE decodeEither #-} 232 | decodeEither :: Decoder a -> Decoder b -> Decoder (Either a b) 233 | decodeEither left right = 234 | go 235 | where 236 | go = do tag <- word8 237 | case tag of 238 | 0 -> Left <$> left 239 | 1 -> Right <$> right 240 | _ -> fail $ "decodeEither: unexpected tag " ++ show tag 241 | 242 | 243 | word8sSimple :: Decoder [Word8] 244 | word8sSimple = decodeList word8 245 | 246 | word8s :: Decoder [Word8] 247 | word8s = 248 | go [] 249 | where 250 | go xs = do 251 | tag <- word8 252 | case tag of 253 | 0 -> return (reverse xs) 254 | 1 -> do x <- word8 255 | go (x:xs) 256 | _ -> fail $ "word8s: unexpected tag " ++ show tag 257 | 258 | ------------------------------------------------------------------------------ 259 | -- UTF-8 decoding helpers 260 | ------------------------------------------------------------------------------ 261 | 262 | chr1 :: Word8 -> Char 263 | chr1 (W8# x#) = C# (chr# (word2Int# x#)) 264 | {-# INLINE chr1 #-} 265 | 266 | chr2 :: Word8 -> Word8 -> Char 267 | chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#)) 268 | where 269 | !y1# = word2Int# x1# 270 | !y2# = word2Int# x2# 271 | !z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6# 272 | !z2# = y2# -# 0x80# 273 | {-# INLINE chr2 #-} 274 | 275 | chr3 :: Word8 -> Word8 -> Word8 -> Char 276 | chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#)) 277 | where 278 | !y1# = word2Int# x1# 279 | !y2# = word2Int# x2# 280 | !y3# = word2Int# x3# 281 | !z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12# 282 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 6# 283 | !z3# = y3# -# 0x80# 284 | {-# INLINE chr3 #-} 285 | 286 | chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char 287 | chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) = 288 | C# (chr# (z1# +# z2# +# z3# +# z4#)) 289 | where 290 | !y1# = word2Int# x1# 291 | !y2# = word2Int# x2# 292 | !y3# = word2Int# x3# 293 | !y4# = word2Int# x4# 294 | !z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18# 295 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 12# 296 | !z3# = uncheckedIShiftL# (y3# -# 0x80#) 6# 297 | !z4# = y4# -# 0x80# 298 | {-# INLINE chr4 #-} 299 | -------------------------------------------------------------------------------- /bench/Benchmarks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, PackageImports, ScopedTypeVariables, BangPatterns #-} 2 | 3 | -- | 4 | -- Copyright : (c) 2012 Simon Meier, Bas van Dijk 5 | -- License : BSD3-style (see LICENSE) 6 | -- 7 | -- Maintainer : Simon Meier 8 | -- Stability : experimental 9 | -- Portability : tested on GHC only 10 | -- 11 | -- Benchmark encoding and decoding speed. 12 | module Main (main) where 13 | 14 | import Prelude hiding (words) 15 | import Data.Monoid ((<>)) 16 | import Criterion.Main 17 | import Control.DeepSeq 18 | import Control.Applicative 19 | 20 | import Data.Blaze.Binary.Encoding (renderTextualUtf8, renderTagged) 21 | import qualified Data.Blaze.Binary.Decoding as Blaze (Decoder, runDecoder) 22 | import qualified Data.Blaze.Binary.ParamDecoding as ParamBlaze (Decoder, runDecoder, word8s, string) 23 | import qualified Data.Blaze.Binary.IterDecoding as IterBlaze (DStream, decodeWith, word8s, string, listOfWord8s ) 24 | import qualified Data.Blaze.Binary.StreamDecoding as StreamBlaze (benchWord8s) 25 | import qualified Data.ByteString as S 26 | import qualified Data.ByteString.Internal as S 27 | import qualified Data.ByteString.Lazy as L 28 | import qualified Data.ByteString.Lazy.Char8 as LC8 29 | import Data.Serialize 30 | 31 | import Data.Binary (Binary) 32 | import qualified Data.Binary as Binary 33 | 34 | import qualified Data.Blaze.Binary as Blaze 35 | 36 | import qualified Data.Sequence as Seq 37 | import Data.Tree 38 | import Data.Word 39 | import qualified Data.Foldable as F (toList) 40 | 41 | import qualified Data.Attoparsec as A 42 | 43 | ------------------------------------------------------------------------------ 44 | -- Benchmark 45 | ------------------------------------------------------------------------------ 46 | 47 | -- | The number of repetitions to consider. 48 | nRepl :: Int 49 | nRepl = 1000 50 | 51 | -- We use NOINLINE to ensure that GHC has no chance of optimizing too much. 52 | 53 | {-# NOINLINE intData #-} 54 | intData :: Int -> [Int] 55 | intData n = take n [0..] 56 | 57 | {-# NOINLINE stringData #-} 58 | stringData :: Int -> [String] 59 | stringData n = take n $ cycle ["hello", "world"] 60 | 61 | {-# NOINLINE seqIntData #-} 62 | seqIntData :: Int -> Seq.Seq Int 63 | seqIntData = Seq.fromList . intData 64 | 65 | -- | Build a balanced binary tree. 66 | {-# NOINLINE treeIntData #-} 67 | treeIntData :: Int -> Tree Int 68 | treeIntData n = 69 | head $ go [0..n] -- assuming n >= 0 70 | where 71 | go [] = [] 72 | go [x] = [Node x []] 73 | go xs = 74 | [Node r $ concatMap go [ls, rs]] 75 | where 76 | (ls, r:rs) = splitAt (length xs `div` 2) xs 77 | 78 | testValue :: Int -> [Maybe (String, S.ByteString, [Int], Double)] 79 | testValue n = replicate n $ Just 80 | ("Haskell", S.pack [0xbe,0xef], [-2..1], 0.123 :: Double) 81 | 82 | word8Data :: Int -> [Word8] 83 | word8Data n = take n $ cycle [(0::Word8)..] 84 | 85 | word8sData :: Int -> [[Word8]] 86 | word8sData n = take n $ cycle [[1..5], [101..105]] 87 | 88 | charData :: Int -> String 89 | charData n = take n ['\0'..] 90 | 91 | -- benchmarks 92 | ------------- 93 | 94 | main :: IO () 95 | main = Criterion.Main.defaultMain $ 96 | [ bgroup ("decode (" ++ show nRepl ++ ")") 97 | -- [ bench "param-blaze-binary: word8s" $ nf 98 | -- (benchParamDecoder ParamBlaze.word8s . S.copy) 99 | -- (Blaze.toByteString $ word8Data nRepl) 100 | [ bench "iter-blaze-binary: word8s" $ nf 101 | (benchIterDecoder IterBlaze.word8s) 102 | (Blaze.toByteString $ word8Data nRepl) 103 | , bench "binary-cps: word8s" $ nf (Binary.decode :: L.ByteString -> [Word8]) (Binary.encode $ word8Data nRepl) 104 | , bench "iter-blaze-binary: [word8s]" $ nf 105 | (benchIterDecoder IterBlaze.listOfWord8s) 106 | (Blaze.toByteString $ word8sData nRepl) 107 | , bench "binary-cps: [word8s]" $ nf (Binary.decode :: L.ByteString -> [[Word8]]) (Binary.encode $ word8sData nRepl) 108 | -- , bench "attoparsec-noinline: word8s" $ nf 109 | -- (benchAttoparsec attoBinaryWord8sNoInline) 110 | -- (Blaze.toByteString $ word8Data nRepl) 111 | -- , bench "param-blaze-binary: string" $ nf 112 | -- (benchParamDecoder ParamBlaze.string) 113 | -- (Blaze.toByteString $ charData nRepl) 114 | , bench "iter-blaze-binary: string" $ nf 115 | (benchIterDecoder IterBlaze.string) 116 | (Blaze.toByteString $ charData nRepl) 117 | -- , bench "blaze-binary: word8sSimple" $ nf (benchDecoder Blaze.word8sSimple) (Blaze.toByteString $ word8Data nRepl) 118 | -- , bench "cereal: word8s" $ nf (decodeLazy :: L.ByteString -> Either String [Word8]) (encodeLazy $ word8Data nRepl) 119 | , bench "binary-cps: string" $ nf (Binary.decode :: L.ByteString -> String) (Binary.encode $ charData nRepl) 120 | -- , bench "stream-blaze-binary: word8s" $ nf 121 | -- (StreamBlaze.benchWord8s . S.copy) 122 | -- (Blaze.toByteString $ word8Data nRepl) 123 | -- , bench "blaze-binary: word8s" $ nf 124 | -- (benchDecoder (Blaze.decode :: Blaze.Decoder [Word8]) . S.copy) 125 | -- (Blaze.toByteString $ word8Data nRepl) 126 | 127 | -- , bench "blaze-binary: string" $ nf 128 | -- (benchDecoder (Blaze.decode :: Blaze.Decoder String)) 129 | -- (Blaze.toByteString $ charData nRepl) 130 | -- , bench "blaze-binary: word8sSimple" $ nf (benchDecoder Blaze.word8sSimple) (Blaze.toByteString $ word8Data nRepl) 131 | -- , bench "cereal: word8s" $ nf (decodeLazy :: L.ByteString -> Either String [Word8]) (encodeLazy $ word8Data nRepl) 132 | -- , bench "attoparsec-inlined: word8s" $ nf 133 | -- (benchAttoparsec attoBinaryWord8s) 134 | -- (Blaze.toByteString $ word8Data nRepl) 135 | {- ======= 136 | [ bench "param-blaze-binary: word8s" $ nf 137 | (benchParamDecoder ParamBlaze.word8s . S.copy) 138 | (Blaze.toByteString $ word8Data nRepl) 139 | , bench "iter-blaze-binary: word8s" $ nf 140 | (benchIterDecoder IterBlaze.word8s) 141 | (Blaze.toByteString $ word8Data nRepl) 142 | , bench "binary: word8s" $ nf (Binary.decode :: L.ByteString -> [Word8]) (Binary.encode $ word8Data nRepl) 143 | , bench "attoparsec-noinline: word8s" $ nf 144 | (benchAttoparsec attoBinaryWord8sNoInline) 145 | (Blaze.toByteString $ word8Data nRepl) 146 | , bench "param-blaze-binary: string" $ nf 147 | (benchParamDecoder ParamBlaze.string) 148 | (Blaze.toByteString $ charData nRepl) 149 | , bench "iter-blaze-binary: string" $ nf 150 | (benchIterDecoder IterBlaze.string) 151 | (Blaze.toByteString $ charData nRepl) 152 | -- , bench "blaze-binary: word8sSimple" $ nf (benchDecoder Blaze.word8sSimple) (Blaze.toByteString $ word8Data nRepl) 153 | -- , bench "cereal: word8s" $ nf (decodeLazy :: L.ByteString -> Either String [Word8]) (encodeLazy $ word8Data nRepl) 154 | , bench "binary: string" $ nf (Binary.decode :: L.ByteString -> String) (Binary.encode $ charData nRepl) 155 | , bench "stream-blaze-binary: word8s" $ nf 156 | (StreamBlaze.benchWord8s . S.copy) 157 | (Blaze.toByteString $ word8Data nRepl) 158 | , bench "blaze-binary: word8s" $ nf 159 | (benchDecoder (Blaze.decode :: Blaze.Decoder [Word8]) . S.copy) 160 | (Blaze.toByteString $ word8Data nRepl) 161 | 162 | , bench "blaze-binary: string" $ nf 163 | (benchDecoder (Blaze.decode :: Blaze.Decoder String)) 164 | (Blaze.toByteString $ charData nRepl) 165 | -- , bench "blaze-binary: word8sSimple" $ nf (benchDecoder Blaze.word8sSimple) (Blaze.toByteString $ word8Data nRepl) 166 | -- , bench "cereal: word8s" $ nf (decodeLazy :: L.ByteString -> Either String [Word8]) (encodeLazy $ word8Data nRepl) 167 | , bench "attoparsec-inlined: word8s" $ nf 168 | (benchAttoparsec attoBinaryWord8s) 169 | (Blaze.toByteString $ word8Data nRepl) 170 | >>>>>>> basvandijk/master -} 171 | ] 172 | 173 | , bgroup "encode" 174 | [ benchmarks "[Word8] " id (word8Data nRepl) 175 | , benchmarks "[[Word8]]" id (word8sData nRepl) 176 | , benchmarks "String " id (charData nRepl) 177 | , benchmarks "[String] " id (stringData nRepl) 178 | , benchmarks "testValue " id (testValue nRepl) 179 | , benchmarks "Tree Int " id (treeIntData nRepl) 180 | , benchmarks "Seq Int " id (seqIntData nRepl) 181 | , benchmarks "[Int] " id (intData nRepl) 182 | ] 183 | ] 184 | where 185 | benchAttoparsec :: A.Parser a -> S.ByteString -> a 186 | benchAttoparsec p bs = case A.eitherResult $ A.parse p bs of 187 | Left msg -> error msg 188 | Right x -> x 189 | 190 | benchDecoder :: Blaze.Decoder a -> S.ByteString -> a 191 | benchDecoder d bs = case Blaze.runDecoder d bs of 192 | Left msg -> error msg 193 | Right x -> x 194 | 195 | benchParamDecoder :: ParamBlaze.Decoder a -> S.ByteString -> a 196 | benchParamDecoder d bs = case ParamBlaze.runDecoder d bs of 197 | Left msg -> error msg 198 | Right x -> x 199 | 200 | benchIterDecoder :: IterBlaze.DStream a -> S.ByteString -> a 201 | benchIterDecoder d bs = case IterBlaze.decodeWith d bs of 202 | Left msg -> error msg 203 | Right x -> x 204 | 205 | benchmarks :: forall a b. (Binary a, Blaze.Binary a, Serialize a, NFData a) 206 | => String -> (b -> a) -> b -> Benchmark 207 | benchmarks name f x = bgroup (name ++ show nRepl) 208 | -- [ bgroup "decode" 209 | -- [ bench "blaze-binary" $ nf (benchDecoder Blaze.decode :: S.ByteString -> a) (Blaze.toByteString $ f x) 210 | -- -- , bench "blaze-binary tagged" $ whnf (L.length . renderTagged . Blaze.encode . f) x andrea 211 | -- , bench "cereal" $ nf (decodeLazy :: L.ByteString -> Either String a) (encodeLazy $ f x) 212 | -- , bench "binary" $ nf (Binary.decode :: L.ByteString -> a) (Binary.encode $ f x) 213 | -- ] 214 | --, bgroup "encode" 215 | [ bench "blaze-binary" $ nf (L.length . Blaze.toLazyByteString . f) x 216 | -- , bench "blaze-binary tagged" $ whnf (L.length . renderTagged . Blaze.encode . f) x andrea 217 | -- , bench "cereal" $ nf (L.length . encodeLazy . f) x 218 | , bench "binary" $ nf (L.length . Binary.encode . f) x 219 | ] 220 | --] 221 | 222 | {- 223 | -- | Testing the new binary encoding format. 224 | testNewBinary :: Blaze.Binary a => a -> IO () 225 | testNewBinary x = 226 | LC8.putStrLn $ renderTextualUtf8 $ Blaze.encode (x, Blaze.toLazyByteString x) 227 | -} 228 | 229 | instance NFData S.ByteString where 230 | rnf (S.PS _ _ _) = () 231 | 232 | instance NFData a => NFData (Seq.Seq a) where 233 | rnf = rnf . F.toList 234 | 235 | ------------------------------------------------------------------------------ 236 | -- Attoparsec 237 | ------------------------------------------------------------------------------ 238 | 239 | {-# INLINE genAttoBinaryWord8s #-} 240 | genAttoBinaryWord8s :: (A.Parser Word8) -> A.Parser [Word8] 241 | genAttoBinaryWord8s w8 = do 242 | go 243 | where 244 | go = do 245 | tag <- w8 246 | case tag of 247 | 0 -> return [] 248 | 1 -> (:) <$> w8 <*> go 249 | _ -> fail $ "parseBinaryWord8s: unknown tag " ++ show tag 250 | 251 | attoBinaryWord8s :: A.Parser [Word8] 252 | attoBinaryWord8s = genAttoBinaryWord8s A.anyWord8 253 | 254 | attoBinaryWord8sNoInline :: A.Parser [Word8] 255 | attoBinaryWord8sNoInline = genAttoBinaryWord8s attoWord8_noinline 256 | 257 | {-# NOINLINE attoWord8_noinline #-} 258 | attoWord8_noinline :: A.Parser Word8 259 | attoWord8_noinline = A.anyWord8 260 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary/Encoding.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Data.Blaze.Binary.Encoding 5 | -- Copyright : 2012, Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Stability : 10 | -- Portability : portable 11 | -- 12 | -- Binary encoding of values. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.Blaze.Binary.Encoding ( 16 | 17 | Encoding 18 | 19 | -- * Streams of values to be encoded 20 | , VStream 21 | , render 22 | , renderCompact 23 | , renderTagged 24 | , renderTextualUtf8 25 | 26 | -- ** Encoding combinators 27 | 28 | -- ** Construction 29 | , word 30 | , word8 31 | , word16 32 | , word32 33 | , word64 34 | 35 | , int 36 | , int8 37 | , int16 38 | , int32 39 | , int64 40 | 41 | , integer 42 | , float 43 | , double 44 | 45 | , char 46 | 47 | , byteString 48 | 49 | , builder 50 | 51 | , (<>) 52 | 53 | ) where 54 | 55 | import Prelude hiding (putChar) 56 | 57 | import qualified Data.ByteString as S 58 | import qualified Data.ByteString.Char8 as SC8 59 | import qualified Data.ByteString.Lazy as L 60 | import qualified Data.ByteString.Lazy.Builder as B 61 | import qualified Data.ByteString.Lazy.Builder.ASCII as B 62 | import qualified Data.ByteString.Lazy.Builder.Extras as B 63 | import qualified Data.ByteString.Lazy.Builder.Internal as B 64 | import qualified Data.ByteString.Lazy.Builder.BasicEncoding as E 65 | import qualified Data.ByteString.Lazy.Builder.BasicEncoding.Internal as E 66 | import Data.Monoid 67 | import Data.Word 68 | import Data.Int 69 | import Foreign.Ptr 70 | 71 | #if __GLASGOW_HASKELL__ < 704 72 | 73 | infixr 6 <> 74 | 75 | (<>) :: Monoid m => m -> m -> m 76 | (<>) = mappend 77 | 78 | #endif 79 | 80 | ------------------------------------------------------------------------ 81 | 82 | -- | The representation for a stream of values to be serialized. 83 | data VStreamRep = 84 | VChar {-# UNPACK #-} !Char VStreamRep 85 | | VWord {-# UNPACK #-} !Word VStreamRep 86 | | VWord8 {-# UNPACK #-} !Word8 VStreamRep 87 | | VWord16 {-# UNPACK #-} !Word16 VStreamRep 88 | | VWord32 {-# UNPACK #-} !Word32 VStreamRep 89 | | VWord64 {-# UNPACK #-} !Word64 VStreamRep 90 | | VInt {-# UNPACK #-} !Int VStreamRep 91 | | VInt8 {-# UNPACK #-} !Int8 VStreamRep 92 | | VInt16 {-# UNPACK #-} !Int16 VStreamRep 93 | | VInt32 {-# UNPACK #-} !Int32 VStreamRep 94 | | VInt64 {-# UNPACK #-} !Int64 VStreamRep 95 | | VFloat {-# UNPACK #-} !Float VStreamRep 96 | | VDouble {-# UNPACK #-} !Double VStreamRep 97 | | VInteger !Integer VStreamRep 98 | | VByteString !S.ByteString VStreamRep 99 | | VBuilder !B.Builder VStreamRep 100 | | VEmpty 101 | 102 | type Encoding t = t -> VStream 103 | 104 | -- | A stream of values to be encoded. 105 | newtype VStream = VStream { toVStreamRep :: VStreamRep -> VStreamRep } 106 | 107 | instance Monoid VStream where 108 | {-# INLINE mempty #-} 109 | mempty = VStream id 110 | {-# INLINE mappend #-} 111 | b1 `mappend` b2 = VStream (toVStreamRep b1 . toVStreamRep b2) 112 | {-# INLINE mconcat #-} 113 | mconcat = foldr mappend mempty 114 | 115 | -- | Binary encode a 'VStream' to a lazy bytestring 'B.Builder' using a 116 | -- compact Base-128 encoding for integers and words. 117 | renderCompact :: VStream -> B.Builder 118 | renderCompact = renderWith 119 | (E.fromF E.word8) E.word16Base128LE E.word32Base128LE E.word64Base128LE 120 | E.wordBase128LE 121 | (E.fromF E.int8) E.int16ZigZagBase128LE E.int32ZigZagBase128LE E.int64ZigZagBase128LE 122 | E.intZigZagBase128LE 123 | E.charUtf8 124 | (E.fromF E.floatLE) (E.fromF E.doubleLE) 125 | (error "render: integer: implement") 126 | (\x -> E.encodeWithB E.intZigZagBase128LE (S.length x) <> B.byteString x) 127 | id 128 | 129 | -- | Binary encode a 'VStream' to a lazy bytestring 'B.Builder'. 130 | render :: VStream -> B.Builder 131 | render = renderWith 132 | (fe E.word8) (fe E.word16LE) (fe E.word32LE) (fe E.word64LE) (fe (fromIntegral E.>$< E.word64LE)) 133 | (fe E.int8) (fe E.int16LE) (fe E.int32LE) (fe E.int64LE) (fe (fromIntegral E.>$< E.int64LE)) 134 | E.charUtf8 (fe E.floatLE) (fe E.doubleLE) 135 | (error "render: integer: implement") 136 | (\x -> B.int64LE (fromIntegral $ S.length x) <> B.byteString x) 137 | id 138 | where 139 | {-# INLINE fe #-} 140 | fe = E.fromF 141 | 142 | -- | Binary encode a 'VStream' to a lazy bytestring 'B.Builder' using a tagged 143 | -- format that allows to reconstruct the value stream. 144 | renderTagged :: VStream -> L.ByteString 145 | renderTagged = 146 | B.toLazyByteString 147 | . renderWith 148 | (tf 0 E.word8) (tf 1 E.word16LE) (tf 2 E.word32LE) (tf 3 E.word64LE) (tf 4 (fromIntegral E.>$< E.word64LE)) 149 | (tf 5 E.int8) (tf 6 E.int16LE) (tf 7 E.int32LE) (tf 8 E.int64LE) (tf 9 (fromIntegral E.>$< E.int64LE)) 150 | (tb 10 E.charUtf8) (tf 11 E.floatLE) (tf 12 E.doubleLE) 151 | (error "render: integer: implement") 152 | ((B.word8 14 <>) . B.byteString) 153 | (B.word8 15 <>) 154 | where 155 | {-# INLINE tf #-} 156 | tf t fe = tb t (E.fromF fe) 157 | 158 | {-# INLINE tb #-} 159 | tb t fb = (,) t E.>$< E.fromF E.word8 `E.pairB` fb 160 | 161 | -- | Binary encode a 'VStream' to a lazy bytestring 'B.Builder'. 162 | {-# INLINE renderWith #-} 163 | renderWith :: E.BoundedEncoding Word8 -> E.BoundedEncoding Word16 -> E.BoundedEncoding Word32 -> E.BoundedEncoding Word64 -> E.BoundedEncoding Word 164 | -> E.BoundedEncoding Int8 -> E.BoundedEncoding Int16 -> E.BoundedEncoding Int32 -> E.BoundedEncoding Int64 -> E.BoundedEncoding Int 165 | -> E.BoundedEncoding Char 166 | -> E.BoundedEncoding Float -> E.BoundedEncoding Double 167 | -> (Integer -> B.Builder) 168 | -> (S.ByteString -> B.Builder) 169 | -> (B.Builder -> B.Builder) 170 | -> VStream 171 | -> B.Builder 172 | renderWith w8 w16 w32 w64 w i8 i16 i32 i64 i c f d ibig bs b = 173 | -- take care that inlining is possible once all encodings are fixed 174 | \vs0 -> B.builder $ step (toVStreamRep vs0 VEmpty) 175 | where 176 | step vs1 k (B.BufferRange op0 ope0) = 177 | go vs1 op0 178 | where 179 | go vs !op 180 | | op `plusPtr` bound <= ope0 = case vs of 181 | VEmpty -> k (B.BufferRange op ope0) 182 | VWord8 x vs' -> E.runB w8 x op >>= go vs' 183 | VWord16 x vs' -> E.runB w16 x op >>= go vs' 184 | VWord32 x vs' -> E.runB w32 x op >>= go vs' 185 | VWord64 x vs' -> E.runB w64 x op >>= go vs' 186 | VWord x vs' -> E.runB w x op >>= go vs' 187 | VInt8 x vs' -> E.runB i8 x op >>= go vs' 188 | VInt16 x vs' -> E.runB i16 x op >>= go vs' 189 | VInt32 x vs' -> E.runB i32 x op >>= go vs' 190 | VInt64 x vs' -> E.runB i64 x op >>= go vs' 191 | VInt x vs' -> E.runB i x op >>= go vs' 192 | VChar x vs' -> E.runB c x op >>= go vs' 193 | VFloat x vs' -> E.runB f x op >>= go vs' 194 | VDouble x vs' -> E.runB d x op >>= go vs' 195 | VInteger x vs' -> B.runBuilderWith (ibig x) (step vs' k) (B.BufferRange op ope0) 196 | VByteString x vs' -> B.runBuilderWith (bs x) (step vs' k) (B.BufferRange op ope0) 197 | VBuilder x vs' -> B.runBuilderWith (b x) (step vs' k) (B.BufferRange op ope0) 198 | | otherwise = return $ B.bufferFull bound op (step vs k) 199 | 200 | bound = max' w8 $ max' w16 $ max' w32 $ max' w64 $ max' w $ 201 | max' i8 $ max' i16 $ max' i32 $ max' i64 $ max' i $ 202 | max' c $ max' f $ E.sizeBound d 203 | 204 | {-# INLINE max' #-} 205 | max' e = max (E.sizeBound e) 206 | 207 | 208 | renderTextualUtf8 :: VStream -> L.ByteString 209 | renderTextualUtf8 vs0 = 210 | B.toLazyByteString $ go (toVStreamRep vs0 VEmpty) 211 | where 212 | go VEmpty = mempty 213 | go (VWord8 1 (VChar x vs)) = line "w8,c 1," (B.charUtf8 x) vs 214 | go (VWord8 1 (VWord x vs)) = line "w8,w 1," (B.wordDec x) vs 215 | go (VWord8 1 (VInt x vs)) = line "w8,i 1," (B.intDec x) vs 216 | go (VInt l (VByteString x vs)) 217 | | l > 0 = line "i,bs " (B.intDec l <> B.char8 ',' <> B.byteStringHexFixed x) vs 218 | go (VWord8 x vs) = line "w8 " (B.word8Dec x) vs 219 | go (VWord16 x vs) = line "w16 " (B.word16Dec x) vs 220 | go (VWord32 x vs) = line "w32 " (B.word32Dec x) vs 221 | go (VWord64 x vs) = line "w64 " (B.word64Dec x) vs 222 | go (VWord x vs) = line "w " (B.wordDec x) vs 223 | go (VInt8 x vs) = line "i8 " (B.int8Dec x) vs 224 | go (VInt16 x vs) = line "i16 " (B.int16Dec x) vs 225 | go (VInt32 x vs) = line "i32 " (B.int32Dec x) vs 226 | go (VInt64 x vs) = line "i64 " (B.int64Dec x) vs 227 | go (VInt x vs) = line "i " (B.intDec x) vs 228 | go (VChar x vs) = line "c " (B.charUtf8 x) vs 229 | go (VFloat x vs) = line "f " (B.floatDec x) vs 230 | go (VDouble x vs) = line "d " (B.doubleDec x) vs 231 | go (VInteger x vs) = line "I " (B.integerDec x) vs 232 | go (VByteString x vs) = line "bs " (B.byteStringHexFixed x) vs 233 | go (VBuilder x vs) = line "B " (B.lazyByteStringHexFixed $ B.toLazyByteString x) vs 234 | 235 | line :: SC8.ByteString -> B.Builder -> VStreamRep -> B.Builder 236 | line pre b vs = B.byteStringCopy pre <> b <> B.char8 '\n' <> go vs 237 | 238 | 239 | -- VStream construction 240 | ------------------------------ 241 | 242 | {-# INLINE float #-} 243 | float :: Encoding Float 244 | float = VStream . VFloat 245 | 246 | {-# INLINE double #-} 247 | double :: Encoding Double 248 | double = VStream . VDouble 249 | 250 | {-# INLINE integer #-} 251 | integer :: Encoding Integer 252 | integer = VStream . VInteger 253 | 254 | {-# INLINE word #-} 255 | word :: Encoding Word 256 | word = VStream . VWord 257 | 258 | {-# INLINE word8 #-} 259 | word8 :: Encoding Word8 260 | word8 = VStream . VWord8 261 | 262 | {-# INLINE word16 #-} 263 | word16 :: Encoding Word16 264 | word16 = VStream . VWord16 265 | 266 | {-# INLINE word32 #-} 267 | word32 :: Encoding Word32 268 | word32 = VStream . VWord32 269 | 270 | {-# INLINE word64 #-} 271 | word64 :: Encoding Word64 272 | word64 = VStream . VWord64 273 | 274 | {-# INLINE int #-} 275 | int :: Encoding Int 276 | int = VStream . VInt 277 | 278 | {-# INLINE int8 #-} 279 | int8 :: Encoding Int8 280 | int8 = VStream . VInt8 281 | 282 | {-# INLINE int16 #-} 283 | int16 :: Encoding Int16 284 | int16 = VStream . VInt16 285 | 286 | {-# INLINE int32 #-} 287 | int32 :: Encoding Int32 288 | int32 = VStream . VInt32 289 | 290 | {-# INLINE int64 #-} 291 | int64 :: Encoding Int64 292 | int64 = VStream . VInt64 293 | 294 | {-# INLINE char #-} 295 | char :: Encoding Char 296 | char = VStream . VChar 297 | 298 | {-# INLINE byteString #-} 299 | byteString :: Encoding S.ByteString 300 | byteString = VStream . VByteString 301 | 302 | {-# INLINE builder #-} 303 | builder :: Encoding B.Builder 304 | builder = VStream . VBuilder 305 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary/IterDecoding.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MagicHash, RankNTypes, BangPatterns #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Data.Blaze.Binary.Encoding 5 | -- Copyright : 2012, Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Stability : 10 | -- Portability : portable 11 | -- 12 | -- Iteratee-style decoding of binary values. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.Blaze.Binary.IterDecoding where 16 | 17 | import Prelude hiding (catch) 18 | 19 | import Control.Applicative 20 | 21 | import qualified Data.ByteString.Internal as S 22 | 23 | import Foreign 24 | import GHC.Word 25 | import GHC.Int 26 | import GHC.Prim 27 | import GHC.Types 28 | 29 | data DStreamRep a = 30 | DWord8 (Word# -> DStreamRep a) 31 | | DInt (Int# -> DStreamRep a) 32 | | DChar (Char# -> DStreamRep a) 33 | | DByteString {-# UNPACK #-} !Int (S.ByteString -> DStreamRep a) 34 | | DWord (Word# -> DStreamRep a) 35 | | DFloat (Float# -> DStreamRep a) 36 | | DDouble (Double# -> DStreamRep a) 37 | | DWord64 (Word# -> DStreamRep a) 38 | | DInt64 (Int# -> DStreamRep a) 39 | | DWord32 (Word# -> DStreamRep a) 40 | | DInt32 (Int# -> DStreamRep a) 41 | | DWord16 (Word# -> DStreamRep a) 42 | | DInt16 (Int# -> DStreamRep a) 43 | | DInt8 (Int# -> DStreamRep a) 44 | | DSlowWord8 (Ptr Word8) String (Word8 -> DStreamRep a) 45 | -- ^ For reading a multi-byte primitive at the boundary. 46 | | DFail String 47 | | DReturn a 48 | 49 | newtype DStream a = DStream { 50 | unDStream :: forall r. (a -> DStreamRep r) -> DStreamRep r 51 | } 52 | 53 | instance Functor DStream where 54 | {-# INLINE fmap #-} 55 | fmap f = \s -> DStream $ \k -> unDStream s (k . f) 56 | 57 | instance Applicative DStream where 58 | {-# INLINE pure #-} 59 | pure = \x -> DStream $ \k -> k x 60 | 61 | {-# INLINE (<*>) #-} 62 | (<*>) = \sf sx -> DStream $ \k -> 63 | unDStream sf (\f -> unDStream sx (\x -> k (f x))) 64 | 65 | instance Monad DStream where 66 | return = pure 67 | 68 | {-# INLINE (>>=) #-} 69 | (>>=) = \sm f -> DStream $ \k -> unDStream sm (\m -> unDStream (f m) k) 70 | 71 | {-# INLINE (>>) #-} 72 | (>>) = \sm sn -> DStream $ \k -> unDStream sm (\_ -> unDStream sn k) 73 | 74 | fail msg = DStream $ \_ -> DFail msg 75 | 76 | word8 :: DStream Word8 77 | word8 = DStream $ \k -> DWord8 (\x -> k (W8# x)) 78 | 79 | char :: DStream Char 80 | char = DStream $ \k -> DChar (\x -> k (C# x)) 81 | 82 | {-# NOINLINE word8s #-} 83 | word8s :: DStream [Word8] 84 | word8s = decodeList word8 85 | 86 | int :: DStream Int 87 | int = DStream $ \k -> DInt (\x -> k (I# x)) 88 | 89 | string :: DStream String 90 | string = decodeList char 91 | 92 | listOfWord8s :: DStream [[Word8]] 93 | listOfWord8s = decodeList word8s 94 | 95 | {-# NOINLINE decodeList #-} 96 | decodeList :: DStream a -> DStream [a] 97 | decodeList decode = 98 | int >>= go [] 99 | where 100 | go xs !n 101 | | n <= 0 = return xs 102 | | otherwise = do x <- decode; go (x:xs) (n - 1) 103 | 104 | -- {-# NOINLINE decodeList #-} 105 | -- decodeList :: DStream a -> DStream [a] 106 | -- decodeList decode = 107 | -- int >>= go 108 | -- where 109 | -- go !n 110 | -- | n <= 0 = return [] 111 | -- | otherwise = force ((:) <$> decode <*> go (n - 1)) 112 | 113 | -- decodeList :: DStream a -> DStream [a] 114 | -- decodeList decode = 115 | -- go 116 | -- where 117 | -- go = do 118 | -- tag <- word8 119 | -- case tag of 120 | -- 0 -> return [] 121 | -- 1 -> force ((:) <$> decode <*> go) 122 | -- _ -> fail $ "decodeList: unexpected tag " ++ show tag 123 | 124 | -- | Use 'force' to ensure that the finally returned value is in WHNF. This 125 | -- reduces memory usage, as it flattens all the one-argument PAPS that were 126 | -- built up. Note that flattening too early may result in an increased 127 | -- runtime, as then some arguments are copied multiple times. 128 | {-# INLINE force #-} 129 | force :: DStream a -> DStream a 130 | force ds = DStream $ \k -> unDStream ds (\x -> x `seq` (k x)) 131 | 132 | decodeWith :: DStream a -> S.ByteString -> Either String a 133 | decodeWith ds0 (S.PS fpbuf off len) = S.inlinePerformIO $ do 134 | withForeignPtr fpbuf $ \pbuf -> do 135 | let !ip0 = pbuf `plusPtr` off 136 | !ipe = ip0 `plusPtr` len 137 | 138 | err :: String -> Ptr Word8 -> IO (Either String a) 139 | err msg ip = return $ Left $ msg ++ 140 | " (at byte " ++ show (ip `minusPtr` ip0) ++ 141 | " of " ++ show len ++ ")" 142 | 143 | unexpectedEOI loc = 144 | err ("unexpected end-of-input while decoding " ++ loc) 145 | 146 | go :: Ptr Word8 -> DStreamRep a -> IO (Either String a) 147 | go !ip ds = case ds of 148 | DReturn x -> return $ Right x 149 | 150 | DFail msg -> err msg ip 151 | 152 | DWord8 k -> readN 1 $ \ip' -> do (W8# x) <- peek $ castPtr ip 153 | go ip' (k x) 154 | 155 | DWord16 k -> readN 2 $ \ip' -> do (W16# x) <- peek $ castPtr ip 156 | go ip' (k x) 157 | 158 | DWord32 k -> readN 4 $ \ip' -> do (W32# x) <- peek $ castPtr ip 159 | go ip' (k x) 160 | 161 | DWord64 k -> readN 8 $ \ip' -> do (W64# x) <- peek $ castPtr ip 162 | go ip' (k x) 163 | 164 | DWord k -> readN (sizeOf (undefined :: Word)) $ \ip' -> do 165 | (W# x) <- peek $ castPtr ip 166 | go ip' (k x) 167 | 168 | DInt8 k -> readN 1 $ \ip' -> do (I8# x) <- peek $ castPtr ip 169 | go ip' (k x) 170 | DInt16 k -> readN 2 $ \ip' -> do (I16# x) <- peek $ castPtr ip 171 | go ip' (k x) 172 | DInt32 k -> readN 4 $ \ip' -> do (I32# x) <- peek $ castPtr ip 173 | go ip' (k x) 174 | DInt64 k -> readN 8 $ \ip' -> do (I64# x) <- peek $ castPtr ip 175 | go ip' (k x) 176 | 177 | DInt k -> readN (sizeOf (undefined :: Int)) $ \ip' -> do 178 | (I# x) <- peek $ castPtr ip 179 | go ip' (k x) 180 | 181 | DFloat k -> readN (sizeOf (undefined :: Float)) $ \ip' -> do 182 | (F# x) <- peek $ castPtr ip 183 | go ip' (k x) 184 | 185 | DDouble k -> readN (sizeOf (undefined :: Double)) $ \ip' -> do 186 | (D# x) <- peek $ castPtr ip 187 | go ip' (k x) 188 | 189 | DChar k 190 | | ip `plusPtr` 4 <= ipe -> do 191 | let peek8 = peekByteOff ip 192 | w0 <- peek ip 193 | case () of 194 | _ | w0 < 0x80 -> do 195 | let !c# = chr1# w0 196 | go (ip `plusPtr` 1) (k c#) 197 | 198 | | w0 < 0xe0 -> do 199 | w1 <- peek8 1 200 | let !c# = chr2# w0 w1 201 | go (ip `plusPtr` 2) (k c#) 202 | 203 | | w0 < 0xf0 -> do 204 | w1 <- peek8 1; w2 <- peek8 2 205 | let !c# = chr3# w0 w1 w2 206 | go (ip `plusPtr` 3) (k c#) 207 | 208 | | otherwise -> do 209 | w1 <- peek8 1; w2 <- peek8 2; w3 <- peek8 3 210 | let !c# = chr4# w0 w1 w2 w3 211 | go (ip `plusPtr` 4) (k c#) 212 | 213 | | otherwise -> 214 | go ip (unDStream (slowCharUtf8 ip) (\ !(C# c#) -> k c#)) 215 | 216 | DSlowWord8 ipErr locErr k 217 | | ip < ipe -> do x <- peek $ castPtr ip 218 | go (ip `plusPtr` 1) (k x) 219 | | otherwise -> unexpectedEOI locErr ipErr 220 | where 221 | {-# INLINE readN #-} 222 | readN :: Int 223 | -> (Ptr Word8 -> IO (Either String a)) 224 | -> IO (Either String a) 225 | readN n io = 226 | let ip' = ip `plusPtr` n in 227 | if ip' <= ipe 228 | then io ip' 229 | else unexpectedEOI ("reading " ++ show n ++ " bytes") ip 230 | 231 | -- start the decoding 232 | go ip0 (unDStream ds0 DReturn) 233 | {- 234 | {-# INLINE fastCharUtf8 #-} 235 | fastCharUtf8 :: Ptr Word8 -> State# RealWorld -> (# State RealWorld, Char# #) 236 | fastCharUtf8 ip = \s0 -> 237 | case runIO (peek ip0) s0 of 238 | (# s1, w0 #) 239 | | w0 < 0x80 -> (# s1, chr1 w0 #) 240 | 241 | | w0 < 0xe0 -> 242 | case runIO (peekByteOff ip0 1) s1 of 243 | (# s2, w1 #) -> (# s2, chr2 w0 w1 #) 244 | 245 | | w0 < 0xf0 -> 246 | case runIO (peekByteOff ip0 1) s1 of 247 | (# s2, w1 #) -> case runIO (peekByteOff ip0 2) s2 of 248 | (# s3, w2 #) -> (# s3, chr3 w0 w1 w2 #) 249 | 250 | | otherwise -> 251 | case runIO (peekByteOff ip0 1) s1 of 252 | (# s2, w1 #) -> case runIO (peekByteOff ip0 2) s2 of 253 | (# s3, w2 #) -> case runIO (peekByteOff ip0 3) s3 of 254 | (# s4, w3 #) -> (# s4, chr4 w0 w1 w2 w3 #) 255 | -} 256 | 257 | slowCharUtf8 :: Ptr Word8 -> DStream Char 258 | slowCharUtf8 ip = do 259 | w0 <- word8' 260 | case () of 261 | _ | w0 < 0x80 -> return (chr1 w0) 262 | | w0 < 0xe0 -> chr2 w0 <$> word8' 263 | | w0 < 0xf0 -> chr3 w0 <$> word8' <*> word8' 264 | | otherwise -> chr4 w0 <$> word8' <*> word8' <*> word8' 265 | where 266 | word8' = slowWord8 ip "char (UTF-8)" 267 | chr1 w0 = C# (chr1# w0) 268 | chr2 w0 w1 = C# (chr2# w0 w1) 269 | chr3 w0 w1 w2 = C# (chr3# w0 w1 w2) 270 | chr4 w0 w1 w2 w3 = C# (chr4# w0 w1 w2 w3) 271 | 272 | slowWord8 :: Ptr Word8 -> String -> DStream Word8 273 | slowWord8 ip msg = DStream (\k -> DSlowWord8 ip msg k) 274 | 275 | {- 276 | data Res a = Res !a {-# UNPACK #-} !(Ptr Word8) 277 | 278 | data Buffer = Buffer {-# UNPACK #-} !(Ptr Word8) -- ^ First input byte 279 | {-# UNPACK #-} !(Ptr Word8) -- ^ First byte after 280 | 281 | data ParseException = ParseException String {-# UNPACK #-} !(Ptr Word8) 282 | deriving( Show, Typeable ) 283 | 284 | instance Exception ParseException where 285 | 286 | newtype Parser a = Parser { unParser :: Buffer -> IO (Res a) } 287 | 288 | instance Functor Res where 289 | {-# INLINE fmap #-} 290 | fmap f (Res x ip) = Res (f x) ip 291 | 292 | instance Functor Parser where 293 | fmap f = Parser . fmap (fmap (fmap f)) . unParser 294 | 295 | instance Applicative Parser where 296 | {-# INLINE pure #-} 297 | pure x = Parser $ \(Buffer ip _) -> return (Res x ip) 298 | 299 | {-# INLINE (<*>) #-} 300 | Parser fIO <*> Parser xIO = Parser $ \ !buf@(Buffer _ ipe0) -> do 301 | Res f ip1 <- fIO buf 302 | Res x ip2 <- xIO (Buffer ip1 ipe0) 303 | evaluate (Res (f x) ip2) 304 | 305 | instance Monad Parser where 306 | {-# INLINE return #-} 307 | return = pure 308 | 309 | {-# INLINE (>>=) #-} 310 | Parser xIO >>= f = Parser $ \ !buf@(Buffer _ ipe0) -> do 311 | Res x ip1 <- xIO buf 312 | unParser (f x) (Buffer ip1 ipe0) 313 | 314 | {-# INLINE fail #-} 315 | fail msg = Parser $ \(Buffer ip _) -> throw $ ParseException msg ip 316 | 317 | 318 | requires :: Int -> Parser a -> Parser a 319 | requires n p = Parser $ \buf@(Buffer ip ipe) -> 320 | if ipe `minusPtr` ip >= n 321 | then unParser p buf 322 | else throw $ (`ParseException` ip) $ 323 | "required " ++ show n ++ 324 | " bytes, but there are only " ++ show (ipe `minusPtr` ip) ++ 325 | " bytes left." 326 | 327 | 328 | {-# INLINE word8 #-} 329 | word8 :: Parser Word8 330 | word8 = Parser $ \(Buffer ip ipe) -> do 331 | let ip' = ip `plusPtr` 1 332 | if ip' < ipe 333 | then do x <- peek ip 334 | return (Res x ip') 335 | else throw $ (`ParseException` (ip' `plusPtr` (-1))) $ 336 | "less than the one byte left" 337 | 338 | word8sSimple :: Parser [Word8] 339 | word8sSimple = do 340 | tag <- word8 341 | case tag of 342 | 0 -> return [] 343 | 1 -> (:) <$> word8 <*> word8s 344 | _ -> fail $ "word8s: unexpected tag " ++ show tag 345 | 346 | word8s :: Parser [Word8] 347 | word8s = 348 | go [] 349 | where 350 | go xs = do 351 | tag <- word8 352 | case tag of 353 | 0 -> return (reverse xs) 354 | 1 -> do x <- word8 355 | go (x:xs) 356 | _ -> fail $ "word8s: unexpected tag " ++ show tag 357 | 358 | runParser :: Parser a -> S.ByteString -> Either String a 359 | runParser p (S.PS fpbuf off len) = S.inlinePerformIO $ do 360 | withForeignPtr fpbuf $ \pbuf -> do 361 | let !ip = pbuf `plusPtr` off 362 | !ipe = ip `plusPtr` len 363 | (`catch` handler) $ do 364 | Res x _ <- unParser p (Buffer ip ipe) 365 | return (Right x) 366 | where 367 | handler :: ParseException -> IO (Either String a) 368 | handler (ParseException msg _) = return $ Left msg 369 | 370 | -} 371 | 372 | ------------------------------------------------------------------------------ 373 | -- UTF-8 decoding helpers 374 | ------------------------------------------------------------------------------ 375 | 376 | chr1# :: Word8 -> Char# 377 | chr1# (W8# x#) = (chr# (word2Int# x#)) 378 | {-# INLINE chr1# #-} 379 | 380 | chr2# :: Word8 -> Word8 -> Char# 381 | chr2# (W8# x1#) (W8# x2#) = 382 | (chr# (z1# +# z2#)) 383 | where 384 | !y1# = word2Int# x1# 385 | !y2# = word2Int# x2# 386 | !z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6# 387 | !z2# = y2# -# 0x80# 388 | {-# INLINE chr2# #-} 389 | 390 | chr3# :: Word8 -> Word8 -> Word8 -> Char# 391 | chr3# (W8# x1#) (W8# x2#) (W8# x3#) = 392 | (chr# (z1# +# z2# +# z3#)) 393 | where 394 | !y1# = word2Int# x1# 395 | !y2# = word2Int# x2# 396 | !y3# = word2Int# x3# 397 | !z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12# 398 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 6# 399 | !z3# = y3# -# 0x80# 400 | {-# INLINE chr3# #-} 401 | 402 | chr4# :: Word8 -> Word8 -> Word8 -> Word8 -> Char# 403 | chr4# (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) = 404 | (chr# (z1# +# z2# +# z3# +# z4#)) 405 | where 406 | !y1# = word2Int# x1# 407 | !y2# = word2Int# x2# 408 | !y3# = word2Int# x3# 409 | !y4# = word2Int# x4# 410 | !z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18# 411 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 12# 412 | !z3# = uncheckedIShiftL# (y3# -# 0x80#) 6# 413 | !z4# = y4# -# 0x80# 414 | {-# INLINE chr4# #-} 415 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, FlexibleContexts, FlexibleInstances #-} 2 | 3 | #ifdef GENERICS 4 | {-# LANGUAGE DefaultSignatures 5 | , TypeOperators 6 | , BangPatterns 7 | , KindSignatures 8 | , ScopedTypeVariables 9 | #-} 10 | #endif 11 | 12 | ----------------------------------------------------------------------------- 13 | -- | 14 | -- Module : Data.Blaze.Binary 15 | -- Copyright : 2012, Simon Meier 16 | -- License : BSD3-style (see LICENSE) 17 | -- 18 | -- Maintainer : Simon Meier 19 | -- Stability : 20 | -- Portability : 21 | -- 22 | ----------------------------------------------------------------------------- 23 | 24 | module Data.Blaze.Binary ( 25 | 26 | -- * The Binary class 27 | Binary(..) 28 | , toByteString 29 | , toLazyByteString 30 | 31 | ) where 32 | 33 | import Control.Applicative 34 | 35 | import Data.Blaze.Binary.Encoding 36 | import qualified Data.Blaze.Binary.Decoding as D 37 | 38 | import Data.Word 39 | import Data.Monoid 40 | import Data.Foldable (foldMap) 41 | import Foreign 42 | 43 | -- And needed for the instances: 44 | import Data.Array.Unboxed 45 | import qualified Data.ByteString as S 46 | import qualified Data.ByteString.Lazy as L 47 | import qualified Data.ByteString.Lazy.Internal as L (foldrChunks, ByteString(..)) 48 | import qualified Data.ByteString.Lazy.Builder as B 49 | import qualified Data.Map as Map 50 | import qualified Data.Set as Set 51 | import qualified Data.IntMap as IntMap 52 | import qualified Data.IntSet as IntSet 53 | import qualified Data.Ratio as R 54 | import qualified Data.Tree as T 55 | import qualified Data.Sequence as Seq 56 | 57 | #ifdef GENERICS 58 | import GHC.Generics 59 | #endif 60 | 61 | ------------------------------------------------------------------------ 62 | 63 | -- | If your compiler has support for the @DeriveGeneric@ and 64 | -- @DefaultSignatures@ language extensions (@ghc >= 7.2.1@), the 'encode' and 'decode' 65 | -- methods will have default generic implementations. 66 | -- 67 | -- To use this option, simply add a @deriving 'Generic'@ clause to your datatype 68 | -- and declare a 'Binary' instance for it without giving a definition for 69 | -- 'encode' and 'decode'. 70 | class Binary t where 71 | -- | Encode a value in the Put monad. 72 | encode :: Encoding t 73 | decode :: D.Decoder t 74 | 75 | #ifdef GENERICS 76 | default encode :: (Generic t, GBinary (Rep t)) => Encoding t 77 | encode = gEncode . from 78 | {-# INLINE encode #-} 79 | 80 | default decode :: (Generic t, GBinary (Rep t)) => D.Decoder t 81 | decode = to <$> gDecode 82 | {-# INLINE decode #-} 83 | #endif 84 | 85 | -- | Encode a value to a strict 'S.ByteString'. 86 | toByteString :: Binary t => t -> S.ByteString 87 | -- FIXME: Use more efficient conversion. 88 | toByteString = S.concat . L.toChunks . toLazyByteString 89 | 90 | 91 | -- | Encode a value to a lazy 'L.ByteString'. 92 | toLazyByteString :: Binary t => t -> L.ByteString 93 | toLazyByteString = B.toLazyByteString . render . encode 94 | 95 | ------------------------------------------------------------------------ 96 | -- Simple instances 97 | 98 | wrongTag :: Show a => String -> a -> D.Decoder b 99 | wrongTag loc tag = fail $ "decode " ++ loc ++ ": could not parse tag " ++ show tag 100 | 101 | -- The () type need never be written to disk: values of singleton type 102 | -- can be reconstructed from the type alone 103 | instance Binary () where 104 | {-# INLINE encode #-} 105 | encode () = mempty 106 | {-# INLINE decode #-} 107 | decode = return () 108 | 109 | -- Bools are encoded as a byte in the range 0 .. 1 110 | instance Binary Bool where 111 | {-# INLINE encode #-} 112 | encode = \x -> word8 (if x then 1 else 0) 113 | decode = do tag <- D.word8 114 | case tag of 115 | 0 -> return False 116 | 1 -> return True 117 | _ -> wrongTag "Bool" tag 118 | 119 | -- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2 120 | instance Binary Ordering where 121 | {-# INLINE encode #-} 122 | encode = \x -> word8 (case x of LT -> 0; EQ -> 1; GT -> 2) 123 | decode = do tag <- D.word8 124 | case tag of 125 | 0 -> return LT 126 | 1 -> return EQ 127 | 2 -> return GT 128 | _ -> wrongTag "Ordering" tag 129 | 130 | ------------------------------------------------------------------------ 131 | -- Words and Ints 132 | 133 | -- Words8s are written as bytes 134 | instance Binary Word8 where 135 | {-# INLINE encode #-} 136 | encode = word8 137 | {-# INLINE decode #-} 138 | decode = D.word8 139 | 140 | -- Words16s are written as 2 bytes in big-endian (network) order 141 | instance Binary Word16 where 142 | {-# INLINE encode #-} 143 | encode = word16 144 | {-# INLINE decode #-} 145 | decode = D.word16 146 | 147 | -- Words32s are written as 4 bytes in big-endian (network) order 148 | instance Binary Word32 where 149 | {-# INLINE encode #-} 150 | encode = word32 151 | {-# INLINE decode #-} 152 | decode = D.word32 153 | 154 | -- Words64s are written as 8 bytes in big-endian (network) order 155 | instance Binary Word64 where 156 | {-# INLINE encode #-} 157 | encode = word64 158 | {-# INLINE decode #-} 159 | decode = D.word64 160 | 161 | -- Int8s are written as a single byte. 162 | instance Binary Int8 where 163 | {-# INLINE encode #-} 164 | encode = int8 165 | {-# INLINE decode #-} 166 | decode = D.int8 167 | 168 | -- Int16s are written as a 2 bytes in big endian format 169 | instance Binary Int16 where 170 | {-# INLINE encode #-} 171 | encode = int16 172 | {-# INLINE decode #-} 173 | decode = D.int16 174 | 175 | -- Int32s are written as a 4 bytes in big endian format 176 | instance Binary Int32 where 177 | {-# INLINE encode #-} 178 | encode = int32 179 | {-# INLINE decode #-} 180 | decode = D.int32 181 | 182 | -- Int64s are written as a 8 bytes in big endian format 183 | instance Binary Int64 where 184 | {-# INLINE encode #-} 185 | encode = int64 186 | {-# INLINE decode #-} 187 | decode = D.int64 188 | 189 | ------------------------------------------------------------------------ 190 | 191 | -- Words are are written as Word64s, that is, 8 bytes in big endian format 192 | instance Binary Word where 193 | {-# INLINE encode #-} 194 | encode = word 195 | {-# INLINE decode #-} 196 | decode = D.word 197 | 198 | -- Ints are are written as Int64s, that is, 8 bytes in big endian format 199 | instance Binary Int where 200 | {-# INLINE encode #-} 201 | encode = int 202 | {-# INLINE decode #-} 203 | decode = D.int 204 | 205 | instance Binary Integer where 206 | {-# INLINE encode #-} 207 | encode = integer 208 | decode = error "TODO: decode Integer!" 209 | 210 | instance (Binary a, Integral a) => Binary (R.Ratio a) where 211 | {-# INLINE encode #-} 212 | encode = \r -> encode (R.numerator r, R.denominator r) 213 | decode = error "TODO: decode Ratio!" 214 | 215 | instance Binary Char where 216 | {-# INLINE encode #-} 217 | encode = char 218 | {-# INLINE decode #-} 219 | decode = D.char 220 | 221 | instance (Binary a, Binary b) => Binary (a,b) where 222 | {-# INLINE encode #-} 223 | encode (a,b) = encode a <> encode b 224 | {-# INLINE decode #-} 225 | decode = (,) <$> decode <*> decode 226 | 227 | instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where 228 | {-# INLINE encode #-} 229 | encode (a,b,c) = encode a <> encode b <> encode c 230 | {-# INLINE decode #-} 231 | decode = (,,) <$> decode <*> decode <*> decode 232 | 233 | instance (Binary a, Binary b, Binary c, Binary d) 234 | => Binary (a,b,c,d) where 235 | encode (a,b,c,d) = encode a <> encode b <> encode c <> encode d 236 | decode = (,,,) <$> decode <*> decode <*> decode <*> decode 237 | 238 | instance (Binary a, Binary b, Binary c, Binary d, Binary e) 239 | => Binary (a,b,c,d,e) where 240 | encode (a,b,c,d,e) = encode a <> encode b <> encode c <> encode d <> encode e 241 | decode = (,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode 242 | 243 | -- 244 | -- and now just recurse: 245 | -- 246 | 247 | instance (Binary a, Binary b, Binary c, Binary d, Binary e 248 | , Binary f) 249 | => Binary (a,b,c,d,e,f) where 250 | encode (a,b,c,d,e,f) = encode a <> encode b <> encode c <> encode d <> encode e <> encode f 251 | decode = (,,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode <*> decode 252 | 253 | 254 | instance (Binary a, Binary b, Binary c, Binary d, Binary e 255 | , Binary f, Binary g) 256 | => Binary (a,b,c,d,e,f,g) where 257 | encode (a,b,c,d,e,f,g) = encode a <> encode b <> encode c <> encode d <> encode e <> encode f <> encode g 258 | decode = (,,,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode 259 | 260 | instance (Binary a, Binary b, Binary c, Binary d, Binary e, 261 | Binary f, Binary g, Binary h) 262 | => Binary (a,b,c,d,e,f,g,h) where 263 | encode (a,b,c,d,e,f,g,h) = encode a <> encode b <> encode c <> encode d <> encode e <> encode f <> encode g <> encode h 264 | decode = (,,,,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode 265 | 266 | instance (Binary a, Binary b, Binary c, Binary d, Binary e, 267 | Binary f, Binary g, Binary h, Binary i) 268 | => Binary (a,b,c,d,e,f,g,h,i) where 269 | encode (a,b,c,d,e,f,g,h,i) = encode a <> encode b <> encode c <> encode d <> encode e <> encode f <> encode g <> encode h <> encode i 270 | decode = (,,,,,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode 271 | 272 | instance (Binary a, Binary b, Binary c, Binary d, Binary e, 273 | Binary f, Binary g, Binary h, Binary i, Binary j) 274 | => Binary (a,b,c,d,e,f,g,h,i,j) where 275 | encode (a,b,c,d,e,f,g,h,i,j) = encode a <> encode b <> encode c <> encode d <> encode e <> encode f <> encode g <> encode h <> encode i <> encode j 276 | decode = (,,,,,,,,,) <$> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode <*> decode 277 | 278 | ------------------------------------------------------------------------ 279 | -- Container types 280 | 281 | -- | Share list encoding, as it is required for faster tree encoding. 282 | {-# INLINE encodeList #-} 283 | encodeList :: Encoding a -> Encoding [a] 284 | -- encodeList f = (<> word8 0) . foldMap ((word8 1 <>) . f) 285 | -- encodeList = \f xs -> encode (length xs) <> foldMap f xs 286 | encodeList f = 287 | go (0 :: Int) mempty 288 | where 289 | go !len acc [] = encode len <> acc 290 | go !len acc (x:xs) = go (len + 1) (f x <> acc) xs 291 | -- \f xs -> encode (length xs) <> foldMap f xs 292 | 293 | -- Encoding the list in reverse order might be interesting to simplify its 294 | -- parsing. It just depends on which side is easier to get up to speed :-) 295 | -- encodeList f = (<> word8 0) . foldl (\lhs x -> word8 1 <> f x <> lhs) mempty 296 | 297 | instance Binary a => Binary [a] where 298 | {-# INLINE encode #-} 299 | encode = encodeList encode 300 | {-# INLINE decode #-} 301 | decode = D.decodeList decode 302 | 303 | instance (Binary a) => Binary (Maybe a) where 304 | {-# INLINE encode #-} 305 | encode = maybe (word8 0) ((word8 1 <>) . encode) 306 | {-# INLINE decode #-} 307 | decode = D.decodeMaybe decode 308 | 309 | instance (Binary a, Binary b) => Binary (Either a b) where 310 | {-# INLINE encode #-} 311 | encode = either ((word8 0 <>) . encode) ((word8 1 <>) . encode) 312 | {-# INLINE decode #-} 313 | decode = D.decodeEither decode decode 314 | 315 | ------------------------------------------------------------------------ 316 | -- ByteStrings (have specially efficient instances) 317 | 318 | instance Binary S.ByteString where 319 | {-# INLINE encode #-} 320 | encode = byteString 321 | {-# INLINE decode #-} 322 | decode = D.byteString 323 | 324 | instance Binary L.ByteString where 325 | encode = L.foldrChunks (\bs s -> encode bs <> s) (encode S.empty) 326 | decode = do 327 | bs <- decode 328 | if S.null bs 329 | then return L.Empty 330 | else L.Chunk bs <$> decode 331 | 332 | ------------------------------------------------------------------------ 333 | -- Maps and Sets 334 | 335 | instance (Ord a, Binary a) => Binary (Set.Set a) where 336 | {-# INLINE encode #-} 337 | encode = encode . Set.toAscList 338 | {-# INLINE decode #-} 339 | decode = Set.fromAscList <$> decode 340 | 341 | instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where 342 | {-# INLINE encode #-} 343 | encode = encode . Map.toAscList 344 | {-# INLINE decode #-} 345 | decode = Map.fromAscList <$> decode 346 | 347 | instance Binary IntSet.IntSet where 348 | {-# INLINE encode #-} 349 | encode = encode . IntSet.toAscList 350 | {-# INLINE decode #-} 351 | decode = IntSet.fromAscList <$> decode 352 | 353 | instance (Binary e) => Binary (IntMap.IntMap e) where 354 | {-# INLINE encode #-} 355 | encode = encode . IntMap.toAscList 356 | {-# INLINE decode #-} 357 | decode = IntMap.fromAscList <$> decode 358 | 359 | ------------------------------------------------------------------------ 360 | -- Queues and Sequences 361 | 362 | instance (Binary e) => Binary (Seq.Seq e) where 363 | {-# INLINE encode #-} 364 | encode = \s -> int (Seq.length s) <> foldMap encode s 365 | {-# INLINE decode #-} 366 | decode = do 367 | D.int >>= go Seq.empty 368 | where 369 | go !s !len 370 | | len <= 0 = return s 371 | | otherwise = do 372 | x <- decode 373 | go (s Seq.|> x) (len - 1) 374 | 375 | 376 | ------------------------------------------------------------------------ 377 | -- Floating point 378 | 379 | instance Binary Double where 380 | {-# INLINE encode #-} 381 | encode = double 382 | {-# INLINE decode #-} 383 | decode = D.double 384 | 385 | instance Binary Float where 386 | {-# INLINE encode #-} 387 | encode = float 388 | {-# INLINE decode #-} 389 | decode = D.float 390 | 391 | ------------------------------------------------------------------------ 392 | -- Trees 393 | 394 | instance (Binary e) => Binary (T.Tree e) where 395 | {-# INLINE encode #-} 396 | encode = 397 | go 398 | where 399 | go (T.Node x cs) = encode x <> encodeList go cs 400 | 401 | {-# INLINE decode #-} 402 | decode = 403 | go 404 | where 405 | go = T.Node <$> decode <*> D.decodeList go 406 | 407 | 408 | ------------------------------------------------------------------------ 409 | -- Arrays 410 | 411 | instance (Binary i, Ix i, Binary e) => Binary (Array i e) where 412 | {-# INLINE encode #-} 413 | encode = \a -> encode (bounds a) <> encode (elems a) 414 | {-# INLINE decode #-} 415 | decode = listArray <$> decode <*> decode 416 | 417 | -- 418 | -- The IArray UArray e constraint is non portable. Requires flexible instances 419 | -- 420 | instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where 421 | {-# INLINE encode #-} 422 | encode = \a -> encode (bounds a) <> encode (elems a) 423 | {-# INLINE decode #-} 424 | decode = listArray <$> decode <*> decode 425 | 426 | #ifdef GENERICS 427 | ------------------------------------------------------------------------ 428 | -- Generic Binary 429 | 430 | class GBinary f where 431 | gEncode :: Encoding (f a) 432 | gDecode :: D.Decoder (f a) 433 | 434 | instance GBinary a => GBinary (M1 i c a) where 435 | gEncode = gEncode . unM1 436 | gDecode = M1 <$> gDecode 437 | {-# INLINE gEncode #-} 438 | {-# INLINE gDecode #-} 439 | 440 | instance Binary a => GBinary (K1 i a) where 441 | gEncode = encode . unK1 442 | gDecode = K1 <$> decode 443 | {-# INLINE gEncode #-} 444 | {-# INLINE gDecode #-} 445 | 446 | instance GBinary U1 where 447 | gEncode = const mempty 448 | gDecode = pure U1 449 | {-# INLINE gEncode #-} 450 | {-# INLINE gDecode #-} 451 | 452 | instance (GBinary a, GBinary b) => GBinary (a :*: b) where 453 | gEncode (a :*: b) = gEncode a <> gEncode b 454 | gDecode = (:*:) <$> gDecode <*> gDecode 455 | {-# INLINE gEncode #-} 456 | {-# INLINE gDecode #-} 457 | 458 | -- The following GBinary instance for sums has support for serializing types 459 | -- with up to 2^64-1 constructors. It will use the minimal number of bytes 460 | -- needed to encode the constructor. For example when a type has 2^8 461 | -- constructors or less it will use a single byte to encode the constructor. If 462 | -- it has 2^16 constructors or less it will use two bytes, and so on till 2^64-1. 463 | 464 | #define GUARD(WORD) (size - 1) <= fromIntegral (maxBound :: WORD) 465 | #define ENCODESUM(WORD) GUARD(WORD) = encodeSum (0 :: WORD) (fromIntegral size) 466 | #define DECODESUM(WORD) GUARD(WORD) = (decode :: D.Decoder WORD) >>= checkDecodeSum (fromIntegral size) 467 | 468 | instance ( EncodeSum a, EncodeSum b 469 | , DecodeSum a, DecodeSum b 470 | , GBinary a, GBinary b 471 | , SumSize a, SumSize b) => GBinary (a :+: b) where 472 | gEncode | ENCODESUM(Word8) | ENCODESUM(Word16) | ENCODESUM(Word32) | ENCODESUM(Word64) 473 | | otherwise = sizeError "encode" size 474 | where 475 | size = unTagged (sumSize :: Tagged (a :+: b) Word64) 476 | 477 | gDecode | DECODESUM(Word8) | DECODESUM(Word16) | DECODESUM(Word32) | DECODESUM(Word64) 478 | | otherwise = sizeError "decode" size 479 | where 480 | size = unTagged (sumSize :: Tagged (a :+: b) Word64) 481 | {-# INLINE gEncode #-} 482 | {-# INLINE gDecode #-} 483 | 484 | sizeError :: Show size => String -> size -> error 485 | sizeError s size = error $ "Can't " ++ s ++ " a type with " ++ show size ++ " constructors" 486 | 487 | ------------------------------------------------------------------------ 488 | 489 | class EncodeSum f where 490 | encodeSum :: (Num word, Bits word, Binary word) => word -> word -> Encoding (f a) 491 | 492 | instance (EncodeSum a, EncodeSum b, GBinary a, GBinary b) => EncodeSum (a :+: b) where 493 | encodeSum !tag !size s = case s of 494 | L1 x -> encodeSum tag sizeL x 495 | R1 x -> encodeSum (tag + sizeL) sizeR x 496 | where 497 | sizeL = size `shiftR` 1 498 | sizeR = size - sizeL 499 | {-# INLINE encodeSum #-} 500 | 501 | instance GBinary a => EncodeSum (C1 c a) where 502 | encodeSum !tag _ x = encode tag <> gEncode x 503 | {-# INLINE encodeSum #-} 504 | 505 | ------------------------------------------------------------------------ 506 | 507 | checkDecodeSum :: (Ord word, Bits word, DecodeSum f) => word -> word -> D.Decoder (f a) 508 | checkDecodeSum size tag | tag < size = decodeSum tag size 509 | | otherwise = fail "Unknown encoding for constructor" 510 | {-# INLINE checkDecodeSum #-} 511 | 512 | class DecodeSum f where 513 | decodeSum :: (Ord word, Num word, Bits word) => word -> word -> D.Decoder (f a) 514 | 515 | instance (DecodeSum a, DecodeSum b, GBinary a, GBinary b) => DecodeSum (a :+: b) where 516 | decodeSum !tag !size | tag < sizeL = L1 <$> decodeSum tag sizeL 517 | | otherwise = R1 <$> decodeSum (tag - sizeL) sizeR 518 | where 519 | sizeL = size `shiftR` 1 520 | sizeR = size - sizeL 521 | {-# INLINE decodeSum #-} 522 | 523 | instance GBinary a => DecodeSum (C1 c a) where 524 | decodeSum _ _ = gDecode 525 | {-# INLINE decodeSum #-} 526 | 527 | ------------------------------------------------------------------------ 528 | 529 | class SumSize f where 530 | sumSize :: Tagged f Word64 531 | 532 | newtype Tagged (s :: * -> *) b = Tagged {unTagged :: b} 533 | 534 | instance (SumSize a, SumSize b) => SumSize (a :+: b) where 535 | sumSize = Tagged $ unTagged (sumSize :: Tagged a Word64) + 536 | unTagged (sumSize :: Tagged b Word64) 537 | {-# INLINE sumSize #-} 538 | 539 | instance SumSize (C1 c a) where 540 | sumSize = Tagged 1 541 | {-# INLINE sumSize #-} 542 | #endif 543 | -------------------------------------------------------------------------------- /src/Data/Blaze/Binary/ParamDecoding.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE UnboxedTuples, MagicHash, ScopedTypeVariables, BangPatterns, DeriveDataTypeable, OverloadedStrings #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Data.Blaze.Binary.Encoding 5 | -- Copyright : 2012, Simon Meier 6 | -- License : BSD3-style (see LICENSE) 7 | -- 8 | -- Maintainer : Simon Meier 9 | -- Stability : 10 | -- Portability : portable 11 | -- 12 | -- Decoding of binary values parametrized over the primitive parsers. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Data.Blaze.Binary.ParamDecoding where 16 | 17 | import Prelude hiding (catch) 18 | 19 | import qualified Data.Blaze.Binary.Decoding as D 20 | 21 | import Control.Applicative 22 | import Control.Exception 23 | import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar, takeMVar) 24 | 25 | import Data.Typeable 26 | import qualified Data.ByteString.Internal as S 27 | import GHC.Prim 28 | import GHC.Ptr 29 | import GHC.Word 30 | import GHC.Exts 31 | import GHC.IO (IO(IO)) 32 | import GHC.Conc.Sync (forkIOWithUnmask) 33 | import Foreign 34 | 35 | 36 | ------------------------------------------------------------------------------ 37 | -- Decoding exceptions 38 | ------------------------------------------------------------------------------ 39 | 40 | -- | Extract the 'Addr#' from a 'Ptr'. 41 | {-# INLINE getPtr #-} 42 | getPtr :: Ptr a -> Addr# 43 | getPtr (Ptr p) = p 44 | 45 | -- | Extract an 'IO' operation to its primtive representation. 46 | {-# INLINE runIO #-} 47 | runIO :: IO a -> State# RealWorld -> (# State# RealWorld, a #) 48 | runIO (IO io) = io 49 | 50 | ------------------------------------------------------------------------------ 51 | -- Decoding exceptions 52 | ------------------------------------------------------------------------------ 53 | 54 | -- | Internally, we use 'DecodingException' to report failed parses. This 55 | -- allows us to write the succeeding parsing code, as if there would be no 56 | -- failure. This works well, as we currently focus on parsing consecutive 57 | -- chunks of memory. Note that copying the whole input once to make it 58 | -- consecutive is very likely less effort than all the parameter copying 59 | -- necessitated by having an interruptible parser. 60 | data DecodingException = DecodingException String (Ptr Word8) 61 | deriving( Show, Typeable ) 62 | 63 | instance Exception DecodingException where 64 | 65 | ------------------------------------------------------------------------------ 66 | -- Primitive decoders 67 | ------------------------------------------------------------------------------ 68 | 69 | -- We currently use a boxed pointer because that results in the 'stg_ap_pv' 70 | -- calling pattern, which is precompiled in contrast to the 'stg_ap_nv' 71 | -- calling pattern. 72 | 73 | -- Highly unsafe trick: primitive decoders are only called via an unknown call 74 | -- and never inlined. Pattern mathcing on their result makes sure that their 75 | -- evaluation took place. At least for the primitive ones that return unboxed 76 | -- values. 77 | 78 | type PrimDecoder a = Ptr Word8 -> State# RealWorld -> (# State# RealWorld, Addr#, a #) 79 | type PrimDecoderWord = Addr# -> (# Addr#, Word# #) 80 | type PrimDecoderInt = Ptr Word8 -> State# RealWorld -> (# State# RealWorld, Addr#, Int# #) 81 | type PrimDecoderChar = Addr# -> (# Addr#, Char# #) 82 | type PrimDecoderFloat = Ptr Word8 -> State# RealWorld -> (# State# RealWorld, Addr#, Float# #) 83 | type PrimDecoderDouble = Ptr Word8 -> State# RealWorld -> (# State# RealWorld, Addr#, Double# #) 84 | 85 | -- | These are the decoders for extracting primitive values. They are all 86 | -- given 87 | data PrimDecoders = PrimDecoders { 88 | pdWord8 :: PrimDecoderWord 89 | , pdWord16 :: PrimDecoderWord 90 | , pdWord32 :: PrimDecoderWord 91 | , pdWord64 :: PrimDecoderWord 92 | , pdInt8 :: PrimDecoderInt 93 | , pdInt16 :: PrimDecoderInt 94 | , pdInt32 :: PrimDecoderInt 95 | , pdInt64 :: PrimDecoderInt 96 | , pdFloat :: PrimDecoderFloat 97 | , pdDouble :: PrimDecoderDouble 98 | , pdChar :: PrimDecoderChar 99 | , pdByteString :: PrimDecoder S.ByteString 100 | } 101 | 102 | -- Prededefined primitive decoders 103 | ---------------------------------- 104 | 105 | -- FIXME: Make this code also works on big-endian and 32-bit machines. 106 | 107 | {-# INLINE decodersLE #-} 108 | decodersLE :: ForeignPtr Word8 -- ^ Pointer to the underlying buffer 109 | -> Ptr Word8 -- ^ Pointer to first byte after the buffer 110 | -> PrimDecoders 111 | decodersLE !fpbuf !ipe = 112 | PrimDecoders w8 w16 w32 w64 113 | i8 i16 i32 i64 114 | float double charUtf8 undefined 115 | where 116 | w8 = word (\ip -> fmap fromIntegral (peek (ip :: Ptr Word8))) 117 | w16 = word (\ip -> fmap fromIntegral (peek (ip :: Ptr Word16))) 118 | w32 = word (\ip -> fmap fromIntegral (peek (ip :: Ptr Word32))) 119 | w64 = word (\ip -> fmap fromIntegral (peek (ip :: Ptr Word64))) 120 | -- whost = word (\ip -> fmap truncateWord (peek (ip :: Ptr Word64))) 121 | 122 | i8 = int (\ip -> fmap fromIntegral (peek (ip :: Ptr Int8))) 123 | i16 = int (\ip -> fmap fromIntegral (peek (ip :: Ptr Int16))) 124 | i32 = int (\ip -> fmap fromIntegral (peek (ip :: Ptr Int32))) 125 | i64 = int (\ip -> fmap fromIntegral (peek (ip :: Ptr Int64))) 126 | -- ihost = int (\ip -> fmap truncateInt (peek (ip :: Ptr Int64))) 127 | 128 | -- FIXME: Implement for non-64bit arch 129 | -- truncateWord :: Word64 -> Word 130 | -- truncateWord = fromIntegral 131 | 132 | -- FIXME: Implement for non-64bit arch 133 | -- truncateInt :: Int64 -> Int 134 | -- truncateInt = fromIntegral 135 | 136 | {-# INLINE word #-} 137 | word :: forall a. Storable a => (Ptr a -> IO Word) -> PrimDecoderWord 138 | word peekAt = \ip0a -> 139 | let ip0 = Ptr ip0a in 140 | case nextPtr (castPtr ip0 :: Ptr a) of 141 | ip1 | ip1 <= ipe -> case runIO (peekAt (castPtr ip0)) realWorld# of 142 | (# _, W# x #) -> (# getPtr ip1, x #) 143 | | otherwise -> 144 | case runIO (tooFewBytes ip0) realWorld# of 145 | (# _, W# x #) -> (# getPtr ip0, x #) 146 | 147 | {-# INLINE int #-} 148 | int :: forall a. Storable a => (Ptr a -> IO Int) -> PrimDecoderInt 149 | int peekAt = \ip0 s0 -> 150 | case nextPtr (castPtr ip0 :: Ptr a) of 151 | ip1 | ip1 <= ipe -> case runIO (peekAt (castPtr ip0)) s0 of 152 | (# s1, I# x #) -> (# s1, getPtr ip1, x #) 153 | | otherwise -> 154 | case runIO (tooFewBytes ip0) s0 of 155 | (# s1, I# x #) -> (# s1, getPtr ip0, x #) 156 | 157 | {-# INLINE float #-} 158 | float :: PrimDecoderFloat 159 | float = \ip0 s0 -> 160 | case nextPtr (castPtr ip0 :: Ptr Float) of 161 | ip1 | ip1 <= ipe -> case runIO (peek (castPtr ip0 :: Ptr Float)) s0 of 162 | (# s1, F# x #) -> (# s1, getPtr ip1, x #) 163 | | otherwise -> 164 | case runIO (tooFewBytes ip0) s0 of 165 | (# s1, F# x #) -> (# s1, getPtr ip0, x #) 166 | 167 | {-# INLINE double #-} 168 | double :: PrimDecoderDouble 169 | double = \ip0 s0 -> 170 | case nextPtr (castPtr ip0 :: Ptr Double) of 171 | ip1 | ip1 <= ipe -> case runIO (peek (castPtr ip0 :: Ptr Double)) s0 of 172 | (# s1, D# x #) -> (# s1, getPtr ip1, x #) 173 | | otherwise -> 174 | case runIO (tooFewBytes ip0) s0 of 175 | (# s1, D# x #) -> (# s1, getPtr ip0, x #) 176 | 177 | {-# INLINE nextPtr #-} 178 | nextPtr :: forall a. Storable a => Ptr a -> Ptr Word8 179 | nextPtr = castPtr . (`plusPtr` (sizeOf (undefined :: a))) 180 | 181 | tooFewBytes :: Ptr Word8 -> IO a 182 | tooFewBytes = throw . DecodingException "too few bytes" 183 | {- 184 | {-# INLINE wordN #-} 185 | wordN :: Int -> (Ptr Word8 -> PrimDecoderWord) -> PrimDecoderWord 186 | wordN n d = \ip0 s0 -> 187 | case ip0 `plusPtr` n of 188 | ip1 | ip1 <= ipe -> d ip1 ip0 s0 189 | | otherwise -> 190 | case runIO (throw (DecodingException "too few bytes" ip0)) s0 of 191 | -- unreachable, but makes the type checker happy. 192 | (# s1, W# w #) -> (# s1, getPtr ip0, w #) 193 | -} 194 | 195 | charUtf8 :: PrimDecoderChar 196 | charUtf8 = \ip0a -> let ip0 = Ptr ip0a in 197 | if ip0 < ipe then 198 | case runIO (peek ip0) realWorld# of 199 | (# s1, w0 #) 200 | | w0 < 0x80 -> (# getPtr (ip0 `plusPtr` 1), chr1 w0 #) 201 | 202 | | w0 < 0xe0 && ip0 `plusPtr` 2 <= ipe -> 203 | case runIO (peekByteOff ip0 1) s1 of 204 | (# s2, w1 #) -> 205 | (# getPtr (ip0 `plusPtr` 2), chr2 w0 w1 #) 206 | 207 | | w0 < 0xf0 && ip0 `plusPtr` 3 <= ipe -> 208 | case runIO (peekByteOff ip0 1) s1 of 209 | (# s2, w1 #) -> case runIO (peekByteOff ip0 2) s2 of 210 | (# s3, w2 #) -> 211 | (# getPtr (ip0 `plusPtr` 3), chr3 w0 w1 w2 #) 212 | 213 | | ip0 `plusPtr` 4 <= ipe -> 214 | case runIO (peekByteOff ip0 1) s1 of 215 | (# s2, w1 #) -> case runIO (peekByteOff ip0 2) s2 of 216 | (# s3, w2 #) -> case runIO (peekByteOff ip0 3) s3 of 217 | (# s4, w3 #) -> 218 | let x = chr4 w0 w1 w2 w3 in 219 | if I# x <= 0x10ffff 220 | then (# getPtr (ip0 `plusPtr` 4), chr# x #) 221 | else runIOChr ip0 (throw (DecodingException ("invalid Unicode codepoint: " ++ show (I# x)) ip0)) 222 | 223 | | otherwise -> runIOChr ip0 (tooFewBytes ip0) 224 | else runIOChr ip0 (tooFewBytes ip0) 225 | where 226 | runIOChr ip io = case runIO io realWorld# of 227 | (# s1, C# c #) -> (# getPtr ip, c #) 228 | 229 | 230 | ------------------------------------------------------------------------------ 231 | -- Decoder 232 | ------------------------------------------------------------------------------ 233 | 234 | -- | One decoding step. Note that we use a 'Ptr Word8' because the 235 | -- 'stg_ap_pnv' calling patterns is not precompiled in GHC. 236 | type DecodeStep a = 237 | Ptr Word8 -- ^ Next byte to read 238 | -> (# Addr#, a #) 239 | -- ^ World state, new next byte to read, and decoded value 240 | 241 | -- | A decoder for Haskell values. 242 | newtype Decoder a = Decoder { 243 | unDecoder :: PrimDecoders -> DecodeStep a 244 | } 245 | 246 | -- Utilities 247 | ------------ 248 | 249 | -- | Convert an 'IO' action to a 'Decoder' action. 250 | {-# INLINE ioToDecoder #-} 251 | ioToDecoder :: IO a -> Decoder a 252 | ioToDecoder (IO io) = Decoder $ \_ !(Ptr ip0) -> case io realWorld# of 253 | (# _, x #) -> (# ip0, x #) 254 | 255 | -- | A 'DecodeStep' that fails with the given message. 256 | failStep :: String -> DecodeStep a 257 | failStep msg ip0 = 258 | case runIO (throw (DecodingException msg ip0)) realWorld# of 259 | -- unreachable, but makes the type checker happy. 260 | (# _, x #) -> (# getPtr ip0, x #) 261 | 262 | 263 | -- Instances 264 | ------------ 265 | 266 | instance Functor Decoder where 267 | {-# INLINE fmap #-} 268 | fmap = \f (Decoder io) -> Decoder $ \pd ip0 -> 269 | case io pd ip0 of 270 | (# ip1, x #) -> (# ip1, f x #) 271 | 272 | instance Applicative Decoder where 273 | {-# INLINE pure #-} 274 | pure x = Decoder $ \_ ip0 -> (# getPtr ip0, x #) 275 | 276 | {-# INLINE (<*>) #-} 277 | Decoder fIO <*> Decoder xIO = Decoder $ \pd ip0 -> 278 | case fIO pd ip0 of 279 | (# ip1, f #) -> case xIO pd (Ptr ip1) of 280 | (# ip2, x #) -> (# ip2, f x #) 281 | 282 | instance Monad Decoder where 283 | {-# INLINE return #-} 284 | return = pure 285 | 286 | {-# INLINE (>>=) #-} 287 | Decoder xIO >>= f = Decoder $ \pd ip0 -> 288 | case xIO pd ip0 of 289 | (# ip1, x #) -> unDecoder (f x) pd (Ptr ip1) 290 | 291 | {-# INLINE fail #-} 292 | fail = Decoder . const . failStep 293 | 294 | 295 | -- Decoder execution 296 | -------------------- 297 | 298 | -- | Execute a decoder on a strict 'S.ByteString'. 299 | runDecoder :: Decoder a -> S.ByteString -> Either String a 300 | runDecoder p (S.PS fpbuf off len) = S.inlinePerformIO $ do 301 | withForeignPtr fpbuf $ \pbuf -> do 302 | let !ip0 = pbuf `plusPtr` off 303 | !ipe = ip0 `plusPtr` len 304 | !pd = decodersLE fpbuf ipe 305 | 306 | decodeFast = (handle (decodingException ip0)) $ do 307 | x <- IO $ \s0 -> case unDecoder p pd ip0 of 308 | (# _, x #) -> (# s0, x #) 309 | return (Right x) 310 | 311 | -- For deeply nested messages our decoder might overflow the 312 | -- stack. We report this error politely as a decoding failure 313 | -- instead of killing all the pure code above us. As a 314 | -- stackoverflow is an asynchronous exception, we have to ensure 315 | -- that we are not masked. Note that we are essentially allocating 316 | -- a fresh stack for the decoding using 'forkIOWithUnmask' :-) 317 | decodeLarge = do 318 | mv <- newEmptyMVar 319 | _ <- forkIOWithUnmask $ \unmask -> 320 | handle (allExceptions mv) $ 321 | handle (stackOverflow mv) $ 322 | unmask $ putMVar mv =<< (Right <$> decodeFast) 323 | res <- takeMVar mv 324 | case res of 325 | Left e -> throw e 326 | Right x -> return x 327 | 328 | if len < 1024 then decodeFast else decodeLarge 329 | where 330 | decodingException :: Ptr Word8 -> DecodingException -> IO (Either String a) 331 | decodingException ip0 (DecodingException msg ip) = return $ Left $ 332 | msg ++ 333 | " (at byte " ++ show (ip `minusPtr` ip0) ++ " of " ++ show len ++ ")" 334 | 335 | stackOverflow :: MVar (Either e (Either String a)) -> AsyncException -> IO () 336 | stackOverflow mv StackOverflow = putMVar mv $ Right $ Left $ 337 | "stack overflow: the message of size " ++ show len ++ 338 | " may be nested too deeply." 339 | stackOverflow _ e = throw e 340 | 341 | allExceptions :: MVar (Either SomeException a) -> SomeException -> IO () 342 | allExceptions mv e = putMVar mv (Left e) 343 | 344 | 345 | -- Decoder construction 346 | ----------------------- 347 | 348 | 349 | {- 350 | requires :: Int -> Decoder a -> Decoder a 351 | requires n p = Decoder $ \buf@(Buffer ip ipe) -> 352 | if ipe `minusPtr` ip >= n 353 | then unDecoder p buf 354 | else throw $ DecodingException $ 355 | "required " ++ show n ++ 356 | " bytes, but there are only " ++ show (ipe `minusPtr` ip) ++ 357 | " bytes left." 358 | -} 359 | 360 | {-# INLINE prim #-} 361 | prim :: b -> Decoder a 362 | prim = error "PDecoder: prim - implement" 363 | {- sel = Decoder $ \pd fpbuf ip0 ipe s0 -> 364 | D.unDecoder (sel pd) fpbuf ip0 ipe s0 -} 365 | 366 | 367 | 368 | -- Primitive parsers 369 | -------------------- 370 | 371 | word8 :: Decoder Word8 372 | word8 = Decoder $ \pd !(Ptr ip0) -> case pdWord8 pd ip0 of 373 | (# ip1, w #) -> (# ip1, W8# w #) 374 | 375 | word8s = decodeList word8 376 | 377 | string = decodeList char 378 | 379 | -- {-# NOINLINE decodeList #-} 380 | -- decodeList :: Decoder a -> Decoder [a] 381 | -- decodeList x = go 382 | -- where 383 | -- go = do 384 | -- tag <- word8 385 | -- case tag of 386 | -- 0 -> return [] 387 | -- 1 -> (:) <$> x <*> go 388 | -- _ -> fail $ "decodeList: unexpected tag " ++ show tag 389 | {-# NOINLINE decodeList #-} 390 | decodeList :: Decoder a -> Decoder [a] 391 | decodeList decode = 392 | int >>= go 393 | where 394 | go !n 395 | | n <= 0 = return [] 396 | | otherwise = (:) <$> decode <*> go (n - 1) 397 | 398 | {-# INLINE word16 #-} 399 | word16 :: Decoder Word16 400 | word16 = prim pdWord16 401 | 402 | {-# INLINE word32 #-} 403 | word32 :: Decoder Word32 404 | word32 = prim pdWord32 405 | 406 | {-# INLINE word64 #-} 407 | word64 :: Decoder Word64 408 | word64 = prim pdWord64 409 | 410 | {-# INLINE word #-} 411 | word :: Decoder Word 412 | word = prim pdWord64 413 | 414 | {-# INLINE int8 #-} 415 | int8 :: Decoder Int8 416 | int8 = prim pdInt8 417 | 418 | {-# INLINE int16 #-} 419 | int16 :: Decoder Int16 420 | int16 = prim pdInt16 421 | 422 | {-# INLINE int32 #-} 423 | int32 :: Decoder Int32 424 | int32 = prim pdInt32 425 | 426 | {-# INLINE int64 #-} 427 | int64 :: Decoder Int64 428 | int64 = prim pdInt64 429 | 430 | {-# INLINE int #-} 431 | int :: Decoder Int 432 | int = prim pdInt64 433 | 434 | {-# INLINE float #-} 435 | float :: Decoder Float 436 | float = prim pdFloat 437 | 438 | {-# INLINE double #-} 439 | double :: Decoder Double 440 | double = prim pdDouble 441 | 442 | {- 443 | {-# INLINE byteString #-} 444 | byteString :: Int -> Decoder S.ByteString 445 | byteString = \len -> prim (`pdByteString` len) 446 | -} 447 | 448 | char :: Decoder Char 449 | char = Decoder $ \pd ip0 -> case pdChar pd (getPtr ip0) of 450 | (# ip1, x #) -> (# ip1, C# x #) 451 | 452 | {-# INLINE getAddr #-} 453 | getAddr :: Ptr a -> Addr# 454 | getAddr (Ptr a) = a 455 | 456 | -- Decoder combinators 457 | -------------------- 458 | 459 | {- 460 | {-# INLINE decodeList #-} 461 | decodeList :: Decoder a -> Decoder [a] 462 | decodeList x = 463 | go 464 | where 465 | go = do tag <- word8 466 | case tag of 467 | 0 -> return [] 468 | 1 -> (:) <$> x <*> go 469 | _ -> fail $ "decodeList: unexpected tag " ++ show tag 470 | -} 471 | 472 | {-# INLINE decodeMaybe #-} 473 | decodeMaybe :: Decoder a -> Decoder (Maybe a) 474 | decodeMaybe just = 475 | go 476 | where 477 | go = do tag <- word8 478 | case tag of 479 | 0 -> return Nothing 480 | 1 -> Just <$> just 481 | _ -> fail $ "decodeMaybe: unexpected tag " ++ show tag 482 | 483 | {-# INLINE decodeEither #-} 484 | decodeEither :: Decoder a -> Decoder b -> Decoder (Either a b) 485 | decodeEither left right = 486 | go 487 | where 488 | go = do tag <- word8 489 | case tag of 490 | 0 -> Left <$> left 491 | 1 -> Right <$> right 492 | _ -> fail $ "decodeEither: unexpected tag " ++ show tag 493 | 494 | 495 | ------------------------------------------------------------------------------ 496 | -- UTF-8 decoding helpers 497 | ------------------------------------------------------------------------------ 498 | 499 | chr1 :: Word8 -> Char# 500 | chr1 (W8# x#) = chr# (word2Int# x#) 501 | {-# INLINE chr1 #-} 502 | 503 | chr2 :: Word8 -> Word8 -> Char# 504 | chr2 (W8# x1#) (W8# x2#) = chr# (z1# +# z2#) 505 | where 506 | !y1# = word2Int# x1# 507 | !y2# = word2Int# x2# 508 | !z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6# 509 | !z2# = y2# -# 0x80# 510 | {-# INLINE chr2 #-} 511 | 512 | chr3 :: Word8 -> Word8 -> Word8 -> Char# 513 | chr3 (W8# x1#) (W8# x2#) (W8# x3#) = chr# (z1# +# z2# +# z3#) 514 | where 515 | !y1# = word2Int# x1# 516 | !y2# = word2Int# x2# 517 | !y3# = word2Int# x3# 518 | !z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12# 519 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 6# 520 | !z3# = y3# -# 0x80# 521 | {-# INLINE chr3 #-} 522 | 523 | chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Int# 524 | chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) = 525 | (z1# +# z2# +# z3# +# z4#) 526 | where 527 | !y1# = word2Int# x1# 528 | !y2# = word2Int# x2# 529 | !y3# = word2Int# x3# 530 | !y4# = word2Int# x4# 531 | !z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18# 532 | !z2# = uncheckedIShiftL# (y2# -# 0x80#) 12# 533 | !z3# = uncheckedIShiftL# (y3# -# 0x80#) 6# 534 | !z4# = y4# -# 0x80# 535 | {-# INLINE chr4 #-} 536 | -------------------------------------------------------------------------------- /bench/GenericBenchmarks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, ScopedTypeVariables #-} 2 | 3 | -- | 4 | -- Copyright : (c) 2012 Simon Meier, Bas van Dijk 5 | -- License : BSD3-style (see LICENSE) 6 | -- 7 | -- Maintainer : Simon Meier 8 | -- Stability : experimental 9 | -- Portability : tested on GHC only 10 | -- 11 | -- Benchmark generic encoding and decoding speed. 12 | module Main (main) where 13 | 14 | import Prelude hiding (words) 15 | import Data.Monoid ((<>)) 16 | import Criterion.Main 17 | import Control.DeepSeq 18 | import Control.Applicative 19 | 20 | import qualified Data.Blaze.Binary as Blaze 21 | import qualified Data.Blaze.Binary.Encoding as E (word8) 22 | import qualified Data.Blaze.Binary.Decoding as D (word8) 23 | import qualified Data.Blaze.Binary.Decoding as Blaze (Decoder, runDecoder) 24 | import qualified Data.ByteString as S 25 | import qualified Data.ByteString.Lazy as L 26 | 27 | import GHC.Generics 28 | 29 | ------------------------------------------------------------------------------ 30 | -- Benchmark 31 | ------------------------------------------------------------------------------ 32 | 33 | main :: IO () 34 | main = Criterion.Main.defaultMain 35 | [ bgroup "decode" 36 | [ bgroup "bigProduct" 37 | [ bgroup "lazy" 38 | [ bench "generic" $ benchDecode bigProductL 39 | , bench "manual" $ benchDecode bigProductL' 40 | ] 41 | , bgroup "strict" 42 | [ bench "generic" $ benchDecode bigProduct 43 | , bench "manual" $ benchDecode bigProduct' 44 | ] 45 | ] 46 | , bgroup "bigSum" 47 | [ bgroup "0" 48 | [ bench "generic" $ benchDecode C00 49 | , bench "manual" $ benchDecode C'00 50 | ] 51 | , bgroup "99" 52 | [ bench "generic" $ benchDecode C99 53 | , bench "manual" $ benchDecode C'99 54 | ] 55 | ] 56 | ] 57 | , bgroup "encode" 58 | [ bgroup "bigProduct" 59 | [ bgroup "lazy" 60 | [ bench "generic" $ nf (L.length . Blaze.toLazyByteString) bigProductL 61 | , bench "manual" $ nf (L.length . Blaze.toLazyByteString) bigProductL' 62 | ] 63 | , bgroup "strict" 64 | [ bench "generic" $ nf (L.length . Blaze.toLazyByteString) bigProduct 65 | , bench "manual" $ nf (L.length . Blaze.toLazyByteString) bigProduct' 66 | ] 67 | ] 68 | , bgroup "bigSum" 69 | [ bgroup "0" 70 | [ bench "generic" $ nf (L.length . Blaze.toLazyByteString) C00 71 | , bench "manual" $ nf (L.length . Blaze.toLazyByteString) C'00 72 | ] 73 | , bgroup "99" 74 | [ bench "generic" $ nf (L.length . Blaze.toLazyByteString) C99 75 | , bench "manual" $ nf (L.length . Blaze.toLazyByteString) C'99 76 | ] 77 | ] 78 | ] 79 | ] 80 | where 81 | benchDecode :: forall a. (NFData a, Blaze.Binary a) => a -> Pure 82 | benchDecode x = nf (benchDecoder (Blaze.decode :: Blaze.Decoder a)) (Blaze.toByteString x) 83 | 84 | benchDecoder :: Blaze.Decoder a -> S.ByteString -> a 85 | benchDecoder d bs = case Blaze.runDecoder d bs of 86 | Left msg -> error msg 87 | Right x -> x 88 | 89 | ------------------------------------------------------------------------------ 90 | -- Big strict products 91 | ------------------------------------------------------------------------------ 92 | 93 | data BigProduct = BigProduct !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 94 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 95 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 96 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 97 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 98 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 99 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 100 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 101 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 102 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 103 | deriving Generic 104 | 105 | instance NFData BigProduct 106 | 107 | instance Blaze.Binary BigProduct 108 | 109 | bigProduct :: BigProduct 110 | bigProduct = BigProduct 00 01 02 03 04 05 06 07 08 09 111 | 10 11 12 13 14 15 16 17 18 19 112 | 20 21 22 23 24 25 26 27 28 29 113 | 30 31 32 33 34 35 36 37 38 39 114 | 40 41 42 43 44 45 46 47 48 49 115 | 50 51 52 53 54 55 56 57 58 59 116 | 60 61 62 63 64 65 66 67 68 69 117 | 70 71 72 73 74 75 76 77 78 79 118 | 80 81 82 83 84 85 86 87 88 89 119 | 90 91 92 93 94 95 96 97 98 99 120 | 121 | data BigProduct' = BigProduct' !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 122 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 123 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 124 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 125 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 126 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 127 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 128 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 129 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 130 | !Int !Int !Int !Int !Int !Int !Int !Int !Int !Int 131 | 132 | instance NFData BigProduct' 133 | 134 | instance Blaze.Binary BigProduct' where 135 | encode (BigProduct' i00 i01 i02 i03 i04 i05 i06 i07 i08 i09 136 | i10 i11 i12 i13 i14 i15 i16 i17 i18 i19 137 | i20 i21 i22 i23 i24 i25 i26 i27 i28 i29 138 | i30 i31 i32 i33 i34 i35 i36 i37 i38 i39 139 | i40 i41 i42 i43 i44 i45 i46 i47 i48 i49 140 | i50 i51 i52 i53 i54 i55 i56 i57 i58 i59 141 | i60 i61 i62 i63 i64 i65 i66 i67 i68 i69 142 | i70 i71 i72 i73 i74 i75 i76 i77 i78 i79 143 | i80 i81 i82 i83 i84 i85 i86 i87 i88 i89 144 | i90 i91 i92 i93 i94 i95 i96 i97 i98 i99 145 | ) = 146 | 147 | Blaze.encode i00 <> Blaze.encode i01 <> Blaze.encode i02 <> Blaze.encode i03 <> Blaze.encode i04 148 | <> Blaze.encode i05 <> Blaze.encode i06 <> Blaze.encode i07 <> Blaze.encode i08 <> Blaze.encode i09 149 | <> Blaze.encode i10 <> Blaze.encode i11 <> Blaze.encode i12 <> Blaze.encode i13 <> Blaze.encode i14 150 | <> Blaze.encode i15 <> Blaze.encode i16 <> Blaze.encode i17 <> Blaze.encode i18 <> Blaze.encode i19 151 | <> Blaze.encode i20 <> Blaze.encode i21 <> Blaze.encode i22 <> Blaze.encode i23 <> Blaze.encode i24 152 | <> Blaze.encode i25 <> Blaze.encode i26 <> Blaze.encode i27 <> Blaze.encode i28 <> Blaze.encode i29 153 | <> Blaze.encode i30 <> Blaze.encode i31 <> Blaze.encode i32 <> Blaze.encode i33 <> Blaze.encode i34 154 | <> Blaze.encode i35 <> Blaze.encode i36 <> Blaze.encode i37 <> Blaze.encode i38 <> Blaze.encode i39 155 | <> Blaze.encode i40 <> Blaze.encode i41 <> Blaze.encode i42 <> Blaze.encode i43 <> Blaze.encode i44 156 | <> Blaze.encode i45 <> Blaze.encode i46 <> Blaze.encode i47 <> Blaze.encode i48 <> Blaze.encode i49 157 | <> Blaze.encode i50 <> Blaze.encode i51 <> Blaze.encode i52 <> Blaze.encode i53 <> Blaze.encode i54 158 | <> Blaze.encode i55 <> Blaze.encode i56 <> Blaze.encode i57 <> Blaze.encode i58 <> Blaze.encode i59 159 | <> Blaze.encode i60 <> Blaze.encode i61 <> Blaze.encode i62 <> Blaze.encode i63 <> Blaze.encode i64 160 | <> Blaze.encode i65 <> Blaze.encode i66 <> Blaze.encode i67 <> Blaze.encode i68 <> Blaze.encode i69 161 | <> Blaze.encode i70 <> Blaze.encode i71 <> Blaze.encode i72 <> Blaze.encode i73 <> Blaze.encode i74 162 | <> Blaze.encode i75 <> Blaze.encode i76 <> Blaze.encode i77 <> Blaze.encode i78 <> Blaze.encode i79 163 | <> Blaze.encode i80 <> Blaze.encode i81 <> Blaze.encode i82 <> Blaze.encode i83 <> Blaze.encode i84 164 | <> Blaze.encode i85 <> Blaze.encode i86 <> Blaze.encode i87 <> Blaze.encode i88 <> Blaze.encode i89 165 | <> Blaze.encode i90 <> Blaze.encode i91 <> Blaze.encode i92 <> Blaze.encode i93 <> Blaze.encode i94 166 | <> Blaze.encode i95 <> Blaze.encode i96 <> Blaze.encode i97 <> Blaze.encode i98 <> Blaze.encode i99 167 | 168 | decode = BigProduct' <$> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 169 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 170 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 171 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 172 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 173 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 174 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 175 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 176 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 177 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 178 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 179 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 180 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 181 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 182 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 183 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 184 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 185 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 186 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 187 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 188 | 189 | bigProduct' :: BigProduct' 190 | bigProduct' = BigProduct' 00 01 02 03 04 05 06 07 08 09 191 | 10 11 12 13 14 15 16 17 18 19 192 | 20 21 22 23 24 25 26 27 28 29 193 | 30 31 32 33 34 35 36 37 38 39 194 | 40 41 42 43 44 45 46 47 48 49 195 | 50 51 52 53 54 55 56 57 58 59 196 | 60 61 62 63 64 65 66 67 68 69 197 | 70 71 72 73 74 75 76 77 78 79 198 | 80 81 82 83 84 85 86 87 88 89 199 | 90 91 92 93 94 95 96 97 98 99 200 | 201 | -------------------------------------------------------------------------------- 202 | -- Big lazy products 203 | -------------------------------------------------------------------------------- 204 | 205 | data BigProductL = BigProductL Int Int Int Int Int Int Int Int Int Int 206 | Int Int Int Int Int Int Int Int Int Int 207 | Int Int Int Int Int Int Int Int Int Int 208 | Int Int Int Int Int Int Int Int Int Int 209 | Int Int Int Int Int Int Int Int Int Int 210 | Int Int Int Int Int Int Int Int Int Int 211 | Int Int Int Int Int Int Int Int Int Int 212 | Int Int Int Int Int Int Int Int Int Int 213 | Int Int Int Int Int Int Int Int Int Int 214 | Int Int Int Int Int Int Int Int Int Int 215 | deriving Generic 216 | 217 | instance NFData BigProductL where 218 | rnf (BigProductL i00 i01 i02 i03 i04 i05 i06 i07 i08 i09 219 | i10 i11 i12 i13 i14 i15 i16 i17 i18 i19 220 | i20 i21 i22 i23 i24 i25 i26 i27 i28 i29 221 | i30 i31 i32 i33 i34 i35 i36 i37 i38 i39 222 | i40 i41 i42 i43 i44 i45 i46 i47 i48 i49 223 | i50 i51 i52 i53 i54 i55 i56 i57 i58 i59 224 | i60 i61 i62 i63 i64 i65 i66 i67 i68 i69 225 | i70 i71 i72 i73 i74 i75 i76 i77 i78 i79 226 | i80 i81 i82 i83 i84 i85 i86 i87 i88 i89 227 | i90 i91 i92 i93 i94 i95 i96 i97 i98 i99 228 | ) = rnf i00 `seq` rnf i01 `seq` rnf i02 `seq` rnf i03 `seq` rnf i04 `seq` rnf i05 `seq` rnf i06 `seq` rnf i07 `seq` rnf i08 `seq` rnf i09 `seq` 229 | rnf i10 `seq` rnf i11 `seq` rnf i12 `seq` rnf i13 `seq` rnf i14 `seq` rnf i15 `seq` rnf i16 `seq` rnf i17 `seq` rnf i18 `seq` rnf i19 `seq` 230 | rnf i20 `seq` rnf i21 `seq` rnf i22 `seq` rnf i23 `seq` rnf i24 `seq` rnf i25 `seq` rnf i26 `seq` rnf i27 `seq` rnf i28 `seq` rnf i29 `seq` 231 | rnf i30 `seq` rnf i31 `seq` rnf i32 `seq` rnf i33 `seq` rnf i34 `seq` rnf i35 `seq` rnf i36 `seq` rnf i37 `seq` rnf i38 `seq` rnf i39 `seq` 232 | rnf i40 `seq` rnf i41 `seq` rnf i42 `seq` rnf i43 `seq` rnf i44 `seq` rnf i45 `seq` rnf i46 `seq` rnf i47 `seq` rnf i48 `seq` rnf i49 `seq` 233 | rnf i50 `seq` rnf i51 `seq` rnf i52 `seq` rnf i53 `seq` rnf i54 `seq` rnf i55 `seq` rnf i56 `seq` rnf i57 `seq` rnf i58 `seq` rnf i59 `seq` 234 | rnf i60 `seq` rnf i61 `seq` rnf i62 `seq` rnf i63 `seq` rnf i64 `seq` rnf i65 `seq` rnf i66 `seq` rnf i67 `seq` rnf i68 `seq` rnf i69 `seq` 235 | rnf i70 `seq` rnf i71 `seq` rnf i72 `seq` rnf i73 `seq` rnf i74 `seq` rnf i75 `seq` rnf i76 `seq` rnf i77 `seq` rnf i78 `seq` rnf i79 `seq` 236 | rnf i80 `seq` rnf i81 `seq` rnf i82 `seq` rnf i83 `seq` rnf i84 `seq` rnf i85 `seq` rnf i86 `seq` rnf i87 `seq` rnf i88 `seq` rnf i89 `seq` 237 | rnf i90 `seq` rnf i91 `seq` rnf i92 `seq` rnf i93 `seq` rnf i94 `seq` rnf i95 `seq` rnf i96 `seq` rnf i97 `seq` rnf i98 `seq` rnf i99 238 | 239 | instance Blaze.Binary BigProductL 240 | 241 | bigProductL :: BigProductL 242 | bigProductL = BigProductL 00 01 02 03 04 05 06 07 08 09 243 | 10 11 12 13 14 15 16 17 18 19 244 | 20 21 22 23 24 25 26 27 28 29 245 | 30 31 32 33 34 35 36 37 38 39 246 | 40 41 42 43 44 45 46 47 48 49 247 | 50 51 52 53 54 55 56 57 58 59 248 | 60 61 62 63 64 65 66 67 68 69 249 | 70 71 72 73 74 75 76 77 78 79 250 | 80 81 82 83 84 85 86 87 88 89 251 | 90 91 92 93 94 95 96 97 98 99 252 | 253 | data BigProductL' = BigProductL' Int Int Int Int Int Int Int Int Int Int 254 | Int Int Int Int Int Int Int Int Int Int 255 | Int Int Int Int Int Int Int Int Int Int 256 | Int Int Int Int Int Int Int Int Int Int 257 | Int Int Int Int Int Int Int Int Int Int 258 | Int Int Int Int Int Int Int Int Int Int 259 | Int Int Int Int Int Int Int Int Int Int 260 | Int Int Int Int Int Int Int Int Int Int 261 | Int Int Int Int Int Int Int Int Int Int 262 | Int Int Int Int Int Int Int Int Int Int 263 | 264 | instance NFData BigProductL' where 265 | rnf (BigProductL' i00 i01 i02 i03 i04 i05 i06 i07 i08 i09 266 | i10 i11 i12 i13 i14 i15 i16 i17 i18 i19 267 | i20 i21 i22 i23 i24 i25 i26 i27 i28 i29 268 | i30 i31 i32 i33 i34 i35 i36 i37 i38 i39 269 | i40 i41 i42 i43 i44 i45 i46 i47 i48 i49 270 | i50 i51 i52 i53 i54 i55 i56 i57 i58 i59 271 | i60 i61 i62 i63 i64 i65 i66 i67 i68 i69 272 | i70 i71 i72 i73 i74 i75 i76 i77 i78 i79 273 | i80 i81 i82 i83 i84 i85 i86 i87 i88 i89 274 | i90 i91 i92 i93 i94 i95 i96 i97 i98 i99 275 | ) = rnf i00 `seq` rnf i01 `seq` rnf i02 `seq` rnf i03 `seq` rnf i04 `seq` rnf i05 `seq` rnf i06 `seq` rnf i07 `seq` rnf i08 `seq` rnf i09 `seq` 276 | rnf i10 `seq` rnf i11 `seq` rnf i12 `seq` rnf i13 `seq` rnf i14 `seq` rnf i15 `seq` rnf i16 `seq` rnf i17 `seq` rnf i18 `seq` rnf i19 `seq` 277 | rnf i20 `seq` rnf i21 `seq` rnf i22 `seq` rnf i23 `seq` rnf i24 `seq` rnf i25 `seq` rnf i26 `seq` rnf i27 `seq` rnf i28 `seq` rnf i29 `seq` 278 | rnf i30 `seq` rnf i31 `seq` rnf i32 `seq` rnf i33 `seq` rnf i34 `seq` rnf i35 `seq` rnf i36 `seq` rnf i37 `seq` rnf i38 `seq` rnf i39 `seq` 279 | rnf i40 `seq` rnf i41 `seq` rnf i42 `seq` rnf i43 `seq` rnf i44 `seq` rnf i45 `seq` rnf i46 `seq` rnf i47 `seq` rnf i48 `seq` rnf i49 `seq` 280 | rnf i50 `seq` rnf i51 `seq` rnf i52 `seq` rnf i53 `seq` rnf i54 `seq` rnf i55 `seq` rnf i56 `seq` rnf i57 `seq` rnf i58 `seq` rnf i59 `seq` 281 | rnf i60 `seq` rnf i61 `seq` rnf i62 `seq` rnf i63 `seq` rnf i64 `seq` rnf i65 `seq` rnf i66 `seq` rnf i67 `seq` rnf i68 `seq` rnf i69 `seq` 282 | rnf i70 `seq` rnf i71 `seq` rnf i72 `seq` rnf i73 `seq` rnf i74 `seq` rnf i75 `seq` rnf i76 `seq` rnf i77 `seq` rnf i78 `seq` rnf i79 `seq` 283 | rnf i80 `seq` rnf i81 `seq` rnf i82 `seq` rnf i83 `seq` rnf i84 `seq` rnf i85 `seq` rnf i86 `seq` rnf i87 `seq` rnf i88 `seq` rnf i89 `seq` 284 | rnf i90 `seq` rnf i91 `seq` rnf i92 `seq` rnf i93 `seq` rnf i94 `seq` rnf i95 `seq` rnf i96 `seq` rnf i97 `seq` rnf i98 `seq` rnf i99 285 | 286 | instance Blaze.Binary BigProductL' where 287 | encode (BigProductL' i00 i01 i02 i03 i04 i05 i06 i07 i08 i09 288 | i10 i11 i12 i13 i14 i15 i16 i17 i18 i19 289 | i20 i21 i22 i23 i24 i25 i26 i27 i28 i29 290 | i30 i31 i32 i33 i34 i35 i36 i37 i38 i39 291 | i40 i41 i42 i43 i44 i45 i46 i47 i48 i49 292 | i50 i51 i52 i53 i54 i55 i56 i57 i58 i59 293 | i60 i61 i62 i63 i64 i65 i66 i67 i68 i69 294 | i70 i71 i72 i73 i74 i75 i76 i77 i78 i79 295 | i80 i81 i82 i83 i84 i85 i86 i87 i88 i89 296 | i90 i91 i92 i93 i94 i95 i96 i97 i98 i99 297 | ) = 298 | 299 | Blaze.encode i00 <> Blaze.encode i01 <> Blaze.encode i02 <> Blaze.encode i03 <> Blaze.encode i04 300 | <> Blaze.encode i05 <> Blaze.encode i06 <> Blaze.encode i07 <> Blaze.encode i08 <> Blaze.encode i09 301 | <> Blaze.encode i10 <> Blaze.encode i11 <> Blaze.encode i12 <> Blaze.encode i13 <> Blaze.encode i14 302 | <> Blaze.encode i15 <> Blaze.encode i16 <> Blaze.encode i17 <> Blaze.encode i18 <> Blaze.encode i19 303 | <> Blaze.encode i20 <> Blaze.encode i21 <> Blaze.encode i22 <> Blaze.encode i23 <> Blaze.encode i24 304 | <> Blaze.encode i25 <> Blaze.encode i26 <> Blaze.encode i27 <> Blaze.encode i28 <> Blaze.encode i29 305 | <> Blaze.encode i30 <> Blaze.encode i31 <> Blaze.encode i32 <> Blaze.encode i33 <> Blaze.encode i34 306 | <> Blaze.encode i35 <> Blaze.encode i36 <> Blaze.encode i37 <> Blaze.encode i38 <> Blaze.encode i39 307 | <> Blaze.encode i40 <> Blaze.encode i41 <> Blaze.encode i42 <> Blaze.encode i43 <> Blaze.encode i44 308 | <> Blaze.encode i45 <> Blaze.encode i46 <> Blaze.encode i47 <> Blaze.encode i48 <> Blaze.encode i49 309 | <> Blaze.encode i50 <> Blaze.encode i51 <> Blaze.encode i52 <> Blaze.encode i53 <> Blaze.encode i54 310 | <> Blaze.encode i55 <> Blaze.encode i56 <> Blaze.encode i57 <> Blaze.encode i58 <> Blaze.encode i59 311 | <> Blaze.encode i60 <> Blaze.encode i61 <> Blaze.encode i62 <> Blaze.encode i63 <> Blaze.encode i64 312 | <> Blaze.encode i65 <> Blaze.encode i66 <> Blaze.encode i67 <> Blaze.encode i68 <> Blaze.encode i69 313 | <> Blaze.encode i70 <> Blaze.encode i71 <> Blaze.encode i72 <> Blaze.encode i73 <> Blaze.encode i74 314 | <> Blaze.encode i75 <> Blaze.encode i76 <> Blaze.encode i77 <> Blaze.encode i78 <> Blaze.encode i79 315 | <> Blaze.encode i80 <> Blaze.encode i81 <> Blaze.encode i82 <> Blaze.encode i83 <> Blaze.encode i84 316 | <> Blaze.encode i85 <> Blaze.encode i86 <> Blaze.encode i87 <> Blaze.encode i88 <> Blaze.encode i89 317 | <> Blaze.encode i90 <> Blaze.encode i91 <> Blaze.encode i92 <> Blaze.encode i93 <> Blaze.encode i94 318 | <> Blaze.encode i95 <> Blaze.encode i96 <> Blaze.encode i97 <> Blaze.encode i98 <> Blaze.encode i99 319 | 320 | decode = BigProductL' <$> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 321 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 322 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 323 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 324 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 325 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 326 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 327 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 328 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 329 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 330 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 331 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 332 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 333 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 334 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 335 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 336 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 337 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 338 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 339 | <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode <*> Blaze.decode 340 | 341 | bigProductL' :: BigProductL' 342 | bigProductL' = BigProductL' 00 01 02 03 04 05 06 07 08 09 343 | 10 11 12 13 14 15 16 17 18 19 344 | 20 21 22 23 24 25 26 27 28 29 345 | 30 31 32 33 34 35 36 37 38 39 346 | 40 41 42 43 44 45 46 47 48 49 347 | 50 51 52 53 54 55 56 57 58 59 348 | 60 61 62 63 64 65 66 67 68 69 349 | 70 71 72 73 74 75 76 77 78 79 350 | 80 81 82 83 84 85 86 87 88 89 351 | 90 91 92 93 94 95 96 97 98 99 352 | 353 | -------------------------------------------------------------------------------- 354 | -- Big sums 355 | -------------------------------------------------------------------------------- 356 | 357 | data BigSum = C00 | C01 | C02 | C03 | C04 | C05 | C06 | C07 | C08 | C09 358 | | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 359 | | C20 | C21 | C22 | C23 | C24 | C25 | C26 | C27 | C28 | C29 360 | | C30 | C31 | C32 | C33 | C34 | C35 | C36 | C37 | C38 | C39 361 | | C40 | C41 | C42 | C43 | C44 | C45 | C46 | C47 | C48 | C49 362 | | C50 | C51 | C52 | C53 | C54 | C55 | C56 | C57 | C58 | C59 363 | | C60 | C61 | C62 | C63 | C64 | C65 | C66 | C67 | C68 | C69 364 | | C70 | C71 | C72 | C73 | C74 | C75 | C76 | C77 | C78 | C79 365 | | C80 | C81 | C82 | C83 | C84 | C85 | C86 | C87 | C88 | C89 366 | | C90 | C91 | C92 | C93 | C94 | C95 | C96 | C97 | C98 | C99 367 | deriving Generic 368 | 369 | instance NFData BigSum 370 | 371 | instance Blaze.Binary BigSum 372 | 373 | data BigSum' = C'00 | C'01 | C'02 | C'03 | C'04 | C'05 | C'06 | C'07 | C'08 | C'09 374 | | C'10 | C'11 | C'12 | C'13 | C'14 | C'15 | C'16 | C'17 | C'18 | C'19 375 | | C'20 | C'21 | C'22 | C'23 | C'24 | C'25 | C'26 | C'27 | C'28 | C'29 376 | | C'30 | C'31 | C'32 | C'33 | C'34 | C'35 | C'36 | C'37 | C'38 | C'39 377 | | C'40 | C'41 | C'42 | C'43 | C'44 | C'45 | C'46 | C'47 | C'48 | C'49 378 | | C'50 | C'51 | C'52 | C'53 | C'54 | C'55 | C'56 | C'57 | C'58 | C'59 379 | | C'60 | C'61 | C'62 | C'63 | C'64 | C'65 | C'66 | C'67 | C'68 | C'69 380 | | C'70 | C'71 | C'72 | C'73 | C'74 | C'75 | C'76 | C'77 | C'78 | C'79 381 | | C'80 | C'81 | C'82 | C'83 | C'84 | C'85 | C'86 | C'87 | C'88 | C'89 382 | | C'90 | C'91 | C'92 | C'93 | C'94 | C'95 | C'96 | C'97 | C'98 | C'99 383 | 384 | instance NFData BigSum' 385 | 386 | instance Blaze.Binary BigSum' where 387 | encode C'00 = E.word8 00; encode C'01 = E.word8 01; encode C'02 = E.word8 02; encode C'03 = E.word8 03; encode C'04 = E.word8 04 388 | encode C'05 = E.word8 05; encode C'06 = E.word8 06; encode C'07 = E.word8 07; encode C'08 = E.word8 08; encode C'09 = E.word8 09 389 | encode C'10 = E.word8 10; encode C'11 = E.word8 11; encode C'12 = E.word8 12; encode C'13 = E.word8 13; encode C'14 = E.word8 14 390 | encode C'15 = E.word8 15; encode C'16 = E.word8 16; encode C'17 = E.word8 17; encode C'18 = E.word8 18; encode C'19 = E.word8 19 391 | encode C'20 = E.word8 20; encode C'21 = E.word8 21; encode C'22 = E.word8 22; encode C'23 = E.word8 23; encode C'24 = E.word8 24 392 | encode C'25 = E.word8 25; encode C'26 = E.word8 26; encode C'27 = E.word8 27; encode C'28 = E.word8 28; encode C'29 = E.word8 29 393 | encode C'30 = E.word8 30; encode C'31 = E.word8 31; encode C'32 = E.word8 32; encode C'33 = E.word8 33; encode C'34 = E.word8 34 394 | encode C'35 = E.word8 35; encode C'36 = E.word8 36; encode C'37 = E.word8 37; encode C'38 = E.word8 38; encode C'39 = E.word8 39 395 | encode C'40 = E.word8 40; encode C'41 = E.word8 41; encode C'42 = E.word8 42; encode C'43 = E.word8 43; encode C'44 = E.word8 44 396 | encode C'45 = E.word8 45; encode C'46 = E.word8 46; encode C'47 = E.word8 47; encode C'48 = E.word8 48; encode C'49 = E.word8 49 397 | encode C'50 = E.word8 50; encode C'51 = E.word8 51; encode C'52 = E.word8 52; encode C'53 = E.word8 53; encode C'54 = E.word8 54 398 | encode C'55 = E.word8 55; encode C'56 = E.word8 56; encode C'57 = E.word8 57; encode C'58 = E.word8 58; encode C'59 = E.word8 59 399 | encode C'60 = E.word8 60; encode C'61 = E.word8 61; encode C'62 = E.word8 62; encode C'63 = E.word8 63; encode C'64 = E.word8 64 400 | encode C'65 = E.word8 65; encode C'66 = E.word8 66; encode C'67 = E.word8 67; encode C'68 = E.word8 68; encode C'69 = E.word8 69 401 | encode C'70 = E.word8 70; encode C'71 = E.word8 71; encode C'72 = E.word8 72; encode C'73 = E.word8 73; encode C'74 = E.word8 74 402 | encode C'75 = E.word8 75; encode C'76 = E.word8 76; encode C'77 = E.word8 77; encode C'78 = E.word8 78; encode C'79 = E.word8 79 403 | encode C'80 = E.word8 80; encode C'81 = E.word8 81; encode C'82 = E.word8 82; encode C'83 = E.word8 83; encode C'84 = E.word8 84 404 | encode C'85 = E.word8 85; encode C'86 = E.word8 86; encode C'87 = E.word8 87; encode C'88 = E.word8 88; encode C'89 = E.word8 89 405 | encode C'90 = E.word8 90; encode C'91 = E.word8 91; encode C'92 = E.word8 92; encode C'93 = E.word8 93; encode C'94 = E.word8 94 406 | encode C'95 = E.word8 95; encode C'96 = E.word8 96; encode C'97 = E.word8 97; encode C'98 = E.word8 98; encode C'99 = E.word8 99 407 | 408 | decode = D.word8 >>= \tag -> 409 | case tag of 410 | 00 -> return C'00; 01 -> return C'01; 02 -> return C'02; 03 -> return C'03; 04 -> return C'04 411 | 05 -> return C'05; 06 -> return C'06; 07 -> return C'07; 08 -> return C'08; 09 -> return C'09 412 | 10 -> return C'10; 11 -> return C'11; 12 -> return C'12; 13 -> return C'13; 14 -> return C'14 413 | 15 -> return C'15; 16 -> return C'16; 17 -> return C'17; 18 -> return C'18; 19 -> return C'19 414 | 20 -> return C'20; 21 -> return C'21; 22 -> return C'22; 23 -> return C'23; 24 -> return C'24 415 | 25 -> return C'25; 26 -> return C'26; 27 -> return C'27; 28 -> return C'28; 29 -> return C'29 416 | 30 -> return C'30; 31 -> return C'31; 32 -> return C'32; 33 -> return C'33; 34 -> return C'34 417 | 35 -> return C'35; 36 -> return C'36; 37 -> return C'37; 38 -> return C'38; 39 -> return C'39 418 | 40 -> return C'40; 41 -> return C'41; 42 -> return C'42; 43 -> return C'43; 44 -> return C'44 419 | 45 -> return C'45; 46 -> return C'46; 47 -> return C'47; 48 -> return C'48; 49 -> return C'49 420 | 50 -> return C'50; 51 -> return C'51; 52 -> return C'52; 53 -> return C'53; 54 -> return C'54 421 | 55 -> return C'55; 56 -> return C'56; 57 -> return C'57; 58 -> return C'58; 59 -> return C'59 422 | 60 -> return C'60; 61 -> return C'61; 62 -> return C'62; 63 -> return C'63; 64 -> return C'64 423 | 65 -> return C'65; 66 -> return C'66; 67 -> return C'67; 68 -> return C'68; 69 -> return C'69 424 | 70 -> return C'70; 71 -> return C'71; 72 -> return C'72; 73 -> return C'73; 74 -> return C'74 425 | 75 -> return C'75; 76 -> return C'76; 77 -> return C'77; 78 -> return C'78; 79 -> return C'79 426 | 80 -> return C'80; 81 -> return C'81; 82 -> return C'82; 83 -> return C'83; 84 -> return C'84 427 | 85 -> return C'85; 86 -> return C'86; 87 -> return C'87; 88 -> return C'88; 89 -> return C'89 428 | 90 -> return C'90; 91 -> return C'91; 92 -> return C'92; 93 -> return C'93; 94 -> return C'94 429 | 95 -> return C'95; 96 -> return C'96; 97 -> return C'97; 98 -> return C'98; 99 -> return C'99 430 | _ -> fail "Unknown tag" 431 | --------------------------------------------------------------------------------