├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── Setup.hs ├── bench ├── bench.hs └── bench.png ├── changelog.md ├── compat ├── Data │ └── Either.hs └── Prelude.hs ├── examples └── Redis │ ├── Atto.hs │ ├── Reply.hs │ ├── Scanner.hs │ └── Zepto.hs ├── lib ├── Scanner.hs └── Scanner │ ├── Internal.hs │ └── OctetPredicates.hs ├── scanner.cabal └── spec └── spec.hs /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.swp 3 | .stack-work 4 | Makefile 5 | stack.yaml 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | cabal: 3.4 3 | ghc: 4 | - '7.6' 5 | - '7.8' 6 | - '7.10' 7 | - '8.0' 8 | - '8.2' 9 | - '8.4' 10 | - '8.6' 11 | - '8.8' 12 | - '8.10' 13 | - '9.0.1' 14 | cache: 15 | directories: 16 | - $HOME/.ghc 17 | - $HOME/.cabal 18 | install: cabal build --enable-tests --only-dependencies 19 | script: cabal test 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Yuras Shumovich 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Yuras Shumovich nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scanner 2 | Fast non-backtracking incremental combinator parsing for bytestrings 3 | 4 | [![Build Status](https://travis-ci.org/Yuras/scanner.svg?branch=master)](https://travis-ci.org/Yuras/scanner) 5 | 6 | On hackage: http://hackage.haskell.org/package/scanner 7 | 8 | On stackage: https://www.stackage.org/package/scanner 9 | 10 | It is often convinient to use backtracking to parse some sophisticated 11 | input. Unfortunately it kills performance, so usually you should avoid 12 | backtracking. 13 | 14 | Often (actually always, but it could be too hard sometimes) you can 15 | implement your parser without any backtracking. It that case all the 16 | bookkeeping usuall parser combinators do becomes unnecessary. The 17 | scanner library is designed for such cases. It is often 2 times faster 18 | then attoparsec. 19 | 20 | As an example, please checkout redis protocol parser included into the 21 | repo, both using attoparsec and scanner libraries: 22 | https://github.com/Yuras/scanner/tree/master/examples/Redis 23 | 24 | Benchmark results: 25 | 26 | ![Bechmark results](https://raw.githubusercontent.com/Yuras/scanner/master/bench/bench.png) 27 | 28 | But if you really really really need backtracking, then you can just 29 | inject attoparsec parser into a scanner: http://hackage.haskell.org/package/scanner-attoparsec 30 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /bench/bench.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main 4 | ( main 5 | ) 6 | where 7 | 8 | import qualified Scanner 9 | 10 | import qualified Redis.Reply as Redis 11 | import qualified Redis.Atto 12 | import qualified Redis.Zepto 13 | import qualified Redis.Scanner 14 | 15 | import Data.ByteString (ByteString) 16 | import qualified Data.ByteString as ByteString 17 | import qualified Data.Attoparsec.ByteString as Atto 18 | import qualified Data.Attoparsec.Zepto as Zepto 19 | import qualified Data.Serialize.Get as Cereal 20 | 21 | import Criterion 22 | import Criterion.Main 23 | 24 | main :: IO () 25 | main = do 26 | let smallStringInput = "+OK\r\n" 27 | longStringInput = "+11111111111111111111111111122222222222222222222233333333333333333333333444444444444444444445555555555555555555555666666666666666666677777777777777777777888888888888888888888999999999999999999000000000000000000\r\n" 28 | intInput = ":123\r\n" 29 | bulkInput = "$10\r\n0123456789\r\n" 30 | multiInput = "*3\r\n+A\r\n+B\r\n+C\r\n" 31 | binaryInput = ByteString.pack [5, 65, 66, 67, 68, 69] 32 | print (stringAtto smallStringInput) 33 | print (stringScanner smallStringInput) 34 | print (stringWordScanner smallStringInput) 35 | print (redisByteStringReply smallStringInput) 36 | print (redisAttoReply smallStringInput) 37 | print (redisZeptoReply smallStringInput) 38 | print (redisScannerReply smallStringInput) 39 | print (redisAttoReply intInput) 40 | print (redisZeptoReply intInput) 41 | print (redisScannerReply intInput) 42 | print (redisAttoReply bulkInput) 43 | print (redisZeptoReply bulkInput) 44 | print (redisScannerReply bulkInput) 45 | print (redisAttoReply multiInput) 46 | print (redisZeptoReply multiInput) 47 | print (redisScannerReply multiInput) 48 | defaultMain 49 | [ bgroup "scanner" 50 | [ bgroup "string" 51 | [ bench "Atto" $ whnf stringAtto smallStringInput 52 | , bench "Scanner" $ whnf stringScanner smallStringInput 53 | , bench "WordScanner" $ whnf stringWordScanner smallStringInput 54 | ] 55 | ] 56 | 57 | , bgroup "redis" 58 | [ bgroup "small string" 59 | [ bench "Atto" $ whnf redisAttoReply smallStringInput 60 | , bench "Zepto" $ whnf redisZeptoReply smallStringInput 61 | , bench "Scanner" $ whnf redisScannerReply smallStringInput 62 | , bench "ByteString" $ whnf redisByteStringReply smallStringInput 63 | ] 64 | , bgroup "long string" 65 | [ bench "Atto" $ whnf redisAttoReply longStringInput 66 | , bench "Zepto" $ whnf redisZeptoReply longStringInput 67 | , bench "Scanner" $ whnf redisScannerReply longStringInput 68 | , bench "ByteString" $ whnf redisByteStringReply longStringInput 69 | ] 70 | 71 | , bgroup "integer" 72 | [ bench "Atto" $ whnf redisAttoReply intInput 73 | , bench "Zepto" $ whnf redisZeptoReply intInput 74 | , bench "Scanner" $ whnf redisScannerReply intInput 75 | ] 76 | 77 | , bgroup "bulk" 78 | [ bench "Atto" $ whnf redisAttoReply bulkInput 79 | , bench "Zepto" $ whnf redisZeptoReply bulkInput 80 | , bench "Scanner" $ whnf redisScannerReply bulkInput 81 | ] 82 | 83 | , bgroup "multi" 84 | [ bench "Atto" $ whnf redisAttoReply multiInput 85 | , bench "Zepto" $ whnf redisZeptoReply multiInput 86 | , bench "Scanner" $ whnf redisScannerReply multiInput 87 | ] 88 | ] 89 | 90 | , bgroup "cereal" 91 | [ bench "Cereal" $ whnf binaryCereal binaryInput 92 | , bench "Scanner" $ whnf binaryScanner binaryInput 93 | ] 94 | ] 95 | 96 | {-# NOINLINE stringAtto #-} 97 | stringAtto :: ByteString -> Either String () 98 | stringAtto bs = case Atto.parse (Atto.string "+OK\r\n") bs of 99 | Atto.Done _ _ -> Right () 100 | Atto.Fail _ _ err -> Left err 101 | Atto.Partial _ -> Left "Not enough input" 102 | 103 | {-# NOINLINE stringScanner #-} 104 | stringScanner :: ByteString -> Either String () 105 | stringScanner bs = case Scanner.scan (Scanner.string "+OK\r\n") bs of 106 | Scanner.Done _ _ -> Right () 107 | Scanner.Fail _ err -> Left err 108 | Scanner.More _ -> Left "Not enought input" 109 | 110 | {-# NOINLINE stringWordScanner #-} 111 | stringWordScanner :: ByteString -> Either String () 112 | stringWordScanner bs = case Scanner.scan s bs of 113 | Scanner.Done _ _ -> Right () 114 | Scanner.Fail _ err -> Left err 115 | Scanner.More _ -> Left "Not enought input" 116 | where 117 | s = do 118 | Scanner.char8 '+' 119 | Scanner.char8 'O' 120 | Scanner.char8 'K' 121 | Scanner.char8 '\r' 122 | Scanner.char8 '\n' 123 | 124 | {-# NOINLINE redisAttoReply #-} 125 | redisAttoReply :: ByteString -> Either String Redis.Reply 126 | redisAttoReply bs = case Atto.parse Redis.Atto.reply bs of 127 | Atto.Done _ r -> Right r 128 | Atto.Fail _ _ err -> Left err 129 | Atto.Partial _ -> Left "Not enough input" 130 | 131 | {-# NOINLINE redisZeptoReply #-} 132 | redisZeptoReply :: ByteString -> Either String Redis.Reply 133 | redisZeptoReply = Zepto.parse Redis.Zepto.reply 134 | 135 | {-# NOINLINE redisScannerReply #-} 136 | redisScannerReply :: ByteString -> Either String Redis.Reply 137 | redisScannerReply bs = case Scanner.scan Redis.Scanner.reply bs of 138 | Scanner.Done _ r -> Right r 139 | Scanner.Fail _ err -> Left err 140 | Scanner.More _ -> Left "Not enought input" 141 | 142 | {-# NOINLINE redisByteStringReply #-} 143 | redisByteStringReply :: ByteString -> Either String Redis.Reply 144 | redisByteStringReply bs = case ByteString.uncons bs of 145 | Just (c, bs') -> case c of 146 | 43 -> let (l, r) = ByteString.span (/= 13) bs' 147 | in case ByteString.uncons r of 148 | Just (c', bs'') -> case c' of 149 | 13 -> case ByteString.uncons bs'' of 150 | Just (c'', _) -> case c'' of 151 | 10 -> Right (Redis.String l) 152 | _ -> Left "Unexpected input" 153 | Nothing -> Left "Not enough input" 154 | _ -> Left "Unexpected input" 155 | Nothing -> Left "Not enought input" 156 | _ -> Left "Unknown type" 157 | Nothing -> Left "Not enought input" 158 | 159 | binaryScanner :: ByteString -> Either String ByteString 160 | binaryScanner bs = case Scanner.scan p bs of 161 | Scanner.Done _ r -> Right r 162 | Scanner.Fail _ err -> Left err 163 | Scanner.More _ -> Left "Not enought input" 164 | where 165 | p = do 166 | n <- fromIntegral <$> Scanner.anyWord8 167 | Scanner.take n 168 | 169 | binaryCereal :: ByteString -> Either String ByteString 170 | binaryCereal bs = Cereal.runGet g bs 171 | where 172 | g = do 173 | n <- fromIntegral <$> Cereal.getWord8 174 | Cereal.getBytes n 175 | -------------------------------------------------------------------------------- /bench/bench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yuras/scanner/7d655b915f3b7ebec136e38b6fb8ca721d3e9dd1/bench/bench.png -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | 0.3.1 2 | 3 | * support ghc-8.8 4 | 5 | 0.3 6 | 7 | * add foldWhile, foldWhile1, satisfy, satisfyMaybe 8 | 9 | 0.2 10 | 11 | * make Scanner a newtype instead of data, see https://github.com/Yuras/scanner/pull/3 12 | * improve `string` performance 13 | * add `scanWith` 14 | 15 | 0.1 16 | 17 | * initial release 18 | -------------------------------------------------------------------------------- /compat/Data/Either.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE PackageImports #-} 2 | {-# LANGUAGE CPP #-} 3 | 4 | module Data.Either 5 | ( 6 | module Export, 7 | #if MIN_VERSION_base(4,7,0) 8 | #else 9 | isRight, 10 | isLeft, 11 | #endif 12 | ) 13 | where 14 | 15 | import "base" Data.Either as Export 16 | 17 | #if MIN_VERSION_base(4,7,0) 18 | #else 19 | isRight :: Either a b -> Bool 20 | isRight (Right _) = True 21 | isRight _ = False 22 | 23 | isLeft :: Either a b -> Bool 24 | isLeft = not . isRight 25 | #endif 26 | -------------------------------------------------------------------------------- /compat/Prelude.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE PackageImports #-} 2 | {-# LANGUAGE CPP #-} 3 | 4 | module Prelude 5 | ( 6 | module P, 7 | 8 | #if MIN_VERSION_base(4,8,0) 9 | #else 10 | (<$>), 11 | Monoid(..), 12 | Applicative(..), 13 | #endif 14 | ) 15 | where 16 | 17 | #if MIN_VERSION_base(4,6,0) 18 | import "base" Prelude as P 19 | #else 20 | import "base" Prelude as P hiding (catch) 21 | #endif 22 | 23 | #if MIN_VERSION_base(4,8,0) 24 | #else 25 | import Data.Functor((<$>)) 26 | import Data.Monoid(Monoid(..)) 27 | import Control.Applicative(Applicative(..)) 28 | #endif 29 | -------------------------------------------------------------------------------- /examples/Redis/Atto.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Redis.Atto 4 | ( reply 5 | ) 6 | where 7 | 8 | import Redis.Reply 9 | 10 | import Prelude hiding (error) 11 | import Data.ByteString (ByteString) 12 | import Data.Attoparsec.ByteString (Parser) 13 | import qualified Data.Attoparsec.ByteString as Atto (takeTill) 14 | import qualified Data.Attoparsec.ByteString.Char8 as Atto hiding (takeTill) 15 | import Control.Monad 16 | 17 | {-# INLINE reply #-} 18 | reply :: Parser Reply 19 | reply = do 20 | c <- Atto.anyChar 21 | case c of 22 | '+' -> string 23 | '-' -> error 24 | ':' -> integer 25 | '$' -> bulk 26 | '*' -> multi 27 | _ -> fail "Unknown reply type" 28 | 29 | {-# INLINE string #-} 30 | string :: Parser Reply 31 | string = String <$> line 32 | 33 | {-# INLINE error #-} 34 | error :: Parser Reply 35 | error = Error <$> line 36 | 37 | {-# INLINE integer #-} 38 | integer :: Parser Reply 39 | integer = Integer <$> integral 40 | 41 | {-# INLINE bulk #-} 42 | bulk :: Parser Reply 43 | bulk = Bulk <$> do 44 | len <- integral 45 | if len < 0 46 | then return Nothing 47 | else Just <$> Atto.take len <* eol 48 | 49 | -- don't inline it to break the circle between reply and multi 50 | {-# NOINLINE multi #-} 51 | multi :: Parser Reply 52 | multi = Multi <$> do 53 | len <- integral 54 | if len < 0 55 | then return Nothing 56 | else Just <$> Atto.count len reply 57 | 58 | {-# INLINE integral #-} 59 | integral :: Integral i => Parser i 60 | integral = Atto.signed Atto.decimal <* eol 61 | 62 | {-# INLINE line #-} 63 | line :: Parser ByteString 64 | line = Atto.takeTill (== 13) <* eol 65 | 66 | {-# INLINE eol #-} 67 | eol :: Parser () 68 | eol = void $ Atto.string "\r\n" 69 | -------------------------------------------------------------------------------- /examples/Redis/Reply.hs: -------------------------------------------------------------------------------- 1 | 2 | module Redis.Reply 3 | ( Reply (..) 4 | ) 5 | where 6 | 7 | import Data.Int 8 | import Data.ByteString (ByteString) 9 | 10 | data Reply 11 | = String ByteString 12 | | Error ByteString 13 | | Integer Int64 14 | | Bulk (Maybe ByteString) 15 | | Multi (Maybe [Reply]) 16 | deriving (Show, Eq) 17 | -------------------------------------------------------------------------------- /examples/Redis/Scanner.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Redis.Scanner 4 | ( reply 5 | ) 6 | where 7 | 8 | import Scanner (Scanner) 9 | import qualified Scanner 10 | 11 | import Redis.Reply 12 | 13 | import Prelude hiding (error) 14 | import Data.ByteString (ByteString) 15 | import qualified Data.Text.Encoding as Text 16 | import qualified Data.Text.Read as Text 17 | import Control.Monad 18 | 19 | {-# INLINE reply #-} 20 | reply :: Scanner Reply 21 | reply = do 22 | c <- Scanner.anyChar8 23 | case c of 24 | '+' -> string 25 | '-' -> error 26 | ':' -> integer 27 | '$' -> bulk 28 | '*' -> multi 29 | _ -> fail "Unknown reply type" 30 | 31 | {-# INLINE string #-} 32 | string :: Scanner Reply 33 | string = String <$> line 34 | 35 | {-# INLINE error #-} 36 | error :: Scanner Reply 37 | error = Error <$> line 38 | 39 | {-# INLINE integer #-} 40 | integer :: Scanner Reply 41 | integer = Integer <$> integral 42 | 43 | {-# INLINE bulk #-} 44 | bulk :: Scanner Reply 45 | bulk = Bulk <$> do 46 | len <- integral 47 | if len < 0 48 | then return Nothing 49 | else Just <$> Scanner.take len <* eol 50 | 51 | -- don't inline it to break the circle between reply and multi 52 | {-# NOINLINE multi #-} 53 | multi :: Scanner Reply 54 | multi = Multi <$> do 55 | len <- integral 56 | if len < 0 57 | then return Nothing 58 | else Just <$> replicateM len reply 59 | 60 | {-# INLINE integral #-} 61 | integral :: Integral i => Scanner i 62 | integral = do 63 | str <- line 64 | case Text.signed Text.decimal (Text.decodeUtf8 str) of 65 | Left err -> fail (show err) 66 | Right (l, _) -> return l 67 | 68 | {-# INLINE line #-} 69 | line :: Scanner ByteString 70 | line = Scanner.takeWhileChar8 (/= '\r') <* eol 71 | 72 | {-# INLINE eol #-} 73 | eol :: Scanner () 74 | eol = do 75 | Scanner.char8 '\r' 76 | Scanner.char8 '\n' 77 | -------------------------------------------------------------------------------- /examples/Redis/Zepto.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Redis.Zepto 4 | ( reply 5 | ) 6 | where 7 | 8 | import Redis.Reply 9 | 10 | import Prelude hiding (error) 11 | import Data.ByteString (ByteString) 12 | import Data.Attoparsec.Zepto (Parser) 13 | import qualified Data.Attoparsec.Zepto as Zepto 14 | import qualified Data.Text.Encoding as Text 15 | import qualified Data.Text.Read as Text 16 | import Control.Monad 17 | 18 | {-# INLINE reply #-} 19 | reply :: Parser Reply 20 | reply = do 21 | c <- Zepto.take 1 22 | case c of 23 | "+" -> string 24 | "-" -> error 25 | ":" -> integer 26 | "$" -> bulk 27 | "*" -> multi 28 | _ -> fail "Unknown reply type" 29 | 30 | {-# INLINE string #-} 31 | string :: Parser Reply 32 | string = String <$> line 33 | 34 | {-# INLINE error #-} 35 | error :: Parser Reply 36 | error = Error <$> line 37 | 38 | {-# INLINE integer #-} 39 | integer :: Parser Reply 40 | integer = Integer <$> integral 41 | 42 | {-# INLINE integral #-} 43 | integral :: Integral i => Parser i 44 | integral = do 45 | str <- line 46 | case Text.signed Text.decimal (Text.decodeUtf8 str) of 47 | Left err -> fail (show err) 48 | Right (l, _) -> return l 49 | 50 | {-# INLINE bulk #-} 51 | bulk :: Parser Reply 52 | bulk = Bulk <$> do 53 | len <- integral 54 | if len < 0 55 | then return Nothing 56 | else Just <$> Zepto.take len <* eol 57 | 58 | -- don't inline it to break the circle between reply and multi 59 | {-# NOINLINE multi #-} 60 | multi :: Parser Reply 61 | multi = Multi <$> do 62 | len <- integral 63 | if len < 0 64 | then return Nothing 65 | else Just <$> replicateM len reply 66 | 67 | {-# INLINE line #-} 68 | line :: Parser ByteString 69 | line = Zepto.takeWhile (/= 13) <* eol 70 | 71 | {-# INLINE eol #-} 72 | eol :: Parser () 73 | eol = Zepto.string "\r\n" 74 | -------------------------------------------------------------------------------- /lib/Scanner.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Fast not-backtracking incremental scanner for bytestrings 3 | -- 4 | -- Unlike attoparsec or most of other parser combinator libraries, 5 | -- scanner doesn't support backtracking. But you probably don't need it 6 | -- anyway, at least if you need fast parser. 7 | -- 8 | -- Scanner processes input incrementally. When more input is needed, scanner 9 | -- returns `More` continuation. All the already processed input is discarded. 10 | 11 | module Scanner 12 | ( Scanner 13 | , Result (..) 14 | , scan 15 | , scanOnly 16 | , scanLazy 17 | , scanWith 18 | , anyWord8 19 | , anyChar8 20 | , word8 21 | , char8 22 | , take 23 | , takeWhile 24 | , takeWhileChar8 25 | , string 26 | , skipWhile 27 | , skipSpace 28 | , lookAhead 29 | , lookAheadChar8 30 | , foldlWhile 31 | , foldlWhile1 32 | , satisfy 33 | , satisfyMaybe 34 | , decimal 35 | ) 36 | where 37 | 38 | import Scanner.Internal 39 | 40 | import Prelude hiding (take, takeWhile) 41 | import Data.Word 42 | import qualified Data.Char as Char 43 | import Data.ByteString (ByteString) 44 | import qualified Data.ByteString as ByteString 45 | import qualified Data.ByteString.Lazy as Lazy (ByteString) 46 | import qualified Data.ByteString.Lazy as Lazy.ByteString 47 | import Control.Monad 48 | import GHC.Base (unsafeChr) 49 | 50 | -- | Scan the complete input, without resupplying 51 | scanOnly :: Scanner a -> ByteString -> Either String a 52 | scanOnly s bs = go (scan s bs) 53 | where 54 | go res = case res of 55 | Done _ r -> Right r 56 | Fail _ err -> Left err 57 | More more -> go (more ByteString.empty) 58 | 59 | -- | Scan lazy bytestring by resupplying scanner with chunks 60 | scanLazy :: Scanner a -> Lazy.ByteString -> Either String a 61 | scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) 62 | where 63 | go more chunks = 64 | let (chunk, chunks') = case chunks of 65 | [] -> (ByteString.empty, []) 66 | (c:cs) -> (c, cs) 67 | in case more chunk of 68 | Done _ r -> Right r 69 | Fail _ err -> Left err 70 | More more' -> go more' chunks' 71 | 72 | -- | Scan with the provided resupply action 73 | scanWith :: Monad m => m ByteString -> Scanner a -> ByteString -> m (Result a) 74 | scanWith more s input = go input (scan s) 75 | where 76 | go bs next = case next bs of 77 | More next' -> do 78 | bs' <- more 79 | go bs' next' 80 | res -> return res 81 | 82 | -- | Consume the next 8-bit char 83 | -- 84 | -- It fails if end of input 85 | {-# INLINE anyChar8 #-} 86 | anyChar8 :: Scanner Char 87 | anyChar8 = w2c <$> anyWord8 88 | 89 | -- | Consume the specified word or fail 90 | {-# INLINE word8 #-} 91 | word8 :: Word8 -> Scanner () 92 | word8 w = do 93 | w' <- anyWord8 94 | unless (w' == w) $ 95 | fail "unexpected word" 96 | 97 | -- | Consume the specified 8-bit char or fail 98 | {-# INLINE char8 #-} 99 | char8 :: Char -> Scanner () 100 | char8 = word8 . c2w 101 | 102 | -- | Take input while the predicate is `True` 103 | {-# INLINE takeWhileChar8 #-} 104 | takeWhileChar8 :: (Char -> Bool) -> Scanner ByteString 105 | takeWhileChar8 p = takeWhile (p . w2c) 106 | 107 | -- | Return the next byte, if any, without consuming it 108 | {-# INLINE lookAheadChar8 #-} 109 | lookAheadChar8 :: Scanner (Maybe Char) 110 | lookAheadChar8 = fmap w2c <$> lookAhead 111 | 112 | -- | Skip any input while the preducate is `True` 113 | {-# INLINE skipWhile #-} 114 | skipWhile :: (Word8 -> Bool) -> Scanner () 115 | skipWhile = void . takeWhile 116 | 117 | -- | Skip space 118 | {-# INLINE skipSpace #-} 119 | skipSpace :: Scanner () 120 | skipSpace = skipWhile isSpaceWord8 121 | 122 | {-# INLINE isSpaceWord8 #-} 123 | isSpaceWord8 :: Word8 -> Bool 124 | isSpaceWord8 w = w == 32 || w <= 13 125 | 126 | {-# INLINE w2c #-} 127 | w2c :: Word8 -> Char 128 | w2c = unsafeChr . fromIntegral 129 | 130 | {-# INLINE c2w #-} 131 | c2w :: Char -> Word8 132 | c2w = fromIntegral . Char.ord 133 | -------------------------------------------------------------------------------- /lib/Scanner/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RankNTypes, BangPatterns, CPP #-} 2 | {-# OPTIONS_HADDOCK not-home #-} 3 | 4 | -- | Scanner implementation 5 | 6 | module Scanner.Internal 7 | where 8 | 9 | import Prelude hiding (take, takeWhile) 10 | import Data.Word 11 | import Data.ByteString (ByteString) 12 | import qualified Data.ByteString as ByteString 13 | import qualified Data.ByteString.Unsafe as ByteString (unsafeDrop) 14 | import qualified Scanner.OctetPredicates as OctetPredicates 15 | import Control.Monad 16 | import Control.Monad.Fail 17 | 18 | -- | CPS scanner without backtracking 19 | newtype Scanner a = Scanner 20 | { run :: forall r. ByteString -> Next a r -> Result r 21 | } 22 | 23 | -- | Scanner continuation 24 | type Next a r = ByteString -> a -> Result r 25 | 26 | -- | Scanner result 27 | data Result r 28 | -- | Successful result with the rest of input 29 | = Done ByteString r 30 | 31 | -- | Scanner failed with rest of input and error message 32 | | Fail ByteString String 33 | 34 | -- | Need more input 35 | | More (ByteString -> Result r) 36 | 37 | -- | Run scanner with the input 38 | scan :: Scanner r -> ByteString -> Result r 39 | scan s bs = run s bs Done 40 | 41 | instance Functor Scanner where 42 | {-# INLINE fmap #-} 43 | fmap f (Scanner s) = Scanner $ \bs next -> 44 | s bs $ \bs' a -> 45 | next bs' (f a) 46 | 47 | instance Applicative Scanner where 48 | {-# INLINE pure #-} 49 | pure = return 50 | {-# INLINE (<*>) #-} 51 | (<*>) = ap 52 | 53 | {-# INLINE (*>) #-} 54 | (*>) = (>>) 55 | 56 | {-# INLINE (<*) #-} 57 | s1 <* s2 = s1 >>= \a -> s2 >> return a 58 | 59 | instance Monad Scanner where 60 | {-# INLINE return #-} 61 | return a = Scanner $ \bs next -> 62 | next bs a 63 | 64 | {-# INLINE (>>=) #-} 65 | s1 >>= s2 = Scanner $ \bs next -> 66 | run s1 bs $ \bs' a -> 67 | run (s2 a) bs' next 68 | 69 | #if !(MIN_VERSION_base(4,13,0)) 70 | {-# INLINE fail #-} 71 | fail err = Scanner $ \bs _ -> 72 | Fail bs err 73 | #endif 74 | 75 | instance MonadFail Scanner where 76 | {-# INLINE fail #-} 77 | fail err = Scanner $ \bs _ -> 78 | Fail bs err 79 | 80 | -- | Consume the next word 81 | -- 82 | -- It fails if end of input 83 | {-# INLINE anyWord8 #-} 84 | anyWord8 :: Scanner Word8 85 | anyWord8 = Scanner $ \bs next -> 86 | case ByteString.uncons bs of 87 | Just (c, bs') -> next bs' c 88 | _ -> More $ \bs' -> slowPath bs' next 89 | where 90 | slowPath bs next = 91 | case ByteString.uncons bs of 92 | Just (c, bs') -> next bs' c 93 | _ -> Fail ByteString.empty "No more input" 94 | 95 | -- | Take input while the predicate is `True` 96 | {-# INLINE takeWhile #-} 97 | takeWhile :: (Word8 -> Bool) -> Scanner ByteString 98 | takeWhile p = Scanner $ \bs next -> 99 | let (l, r) = ByteString.span p bs 100 | in if ByteString.null r 101 | then More $ \bs' -> 102 | if ByteString.null bs' 103 | then next ByteString.empty l 104 | else run (slowPath l) bs' next 105 | else next r l 106 | where 107 | slowPath l = go [l] 108 | go res = do 109 | chunk <- takeChunk 110 | done <- endOfInput 111 | if done || ByteString.null chunk 112 | then return . ByteString.concat . reverse $ (chunk : res) 113 | else go (chunk : res) 114 | takeChunk = Scanner $ \bs next -> 115 | let (l, r) = ByteString.span p bs 116 | in next r l 117 | 118 | -- | Take the specified number of bytes 119 | {-# INLINE take #-} 120 | take :: Int -> Scanner ByteString 121 | take n = Scanner $ \bs next -> 122 | let len = ByteString.length bs 123 | in if len >= n 124 | then let (l, r) = ByteString.splitAt n bs 125 | in next r l 126 | else More $ \bs' -> 127 | if ByteString.null bs' 128 | then Fail ByteString.empty "No more input" 129 | else run (slowPath bs len) bs' next 130 | where 131 | slowPath bs len = go [bs] (n - len) 132 | go res 0 = return . ByteString.concat . reverse $ res 133 | go res i = Scanner $ \bs next -> 134 | let len = ByteString.length bs 135 | in if len >= i 136 | then let (l, r) = ByteString.splitAt i bs 137 | in next r (ByteString.concat . reverse $ (l : res)) 138 | else More $ \bs' -> 139 | if ByteString.null bs' 140 | then Fail ByteString.empty "No more input" 141 | else run (go (bs : res) (i - len)) bs' next 142 | 143 | -- | Returns `True` when there is no more input 144 | {-# INLINE endOfInput #-} 145 | endOfInput :: Scanner Bool 146 | endOfInput = Scanner $ \bs next -> 147 | if ByteString.null bs 148 | then More $ \bs' -> next bs' (ByteString.null bs') 149 | else next bs False 150 | 151 | -- | Consume the specified string 152 | -- 153 | -- Warning: it is not optimized yet, so for for small string it is better 154 | -- to consume it byte-by-byte using `Scanner.word8` 155 | {-# INLINE string #-} 156 | string :: ByteString -> Scanner () 157 | string str = Scanner $ \bs next -> 158 | let strL = ByteString.length str 159 | in if ByteString.isPrefixOf str bs 160 | then next (ByteString.unsafeDrop strL bs) () 161 | else run slowPath bs next 162 | where 163 | slowPath = do 164 | bs <- take (ByteString.length str) 165 | if bs == str 166 | then return () 167 | else Control.Monad.Fail.fail "Unexpected input" 168 | 169 | -- | Return the next byte, if any, without consuming it 170 | {-# INLINE lookAhead #-} 171 | lookAhead :: Scanner (Maybe Word8) 172 | lookAhead = Scanner $ \bs next -> 173 | case ByteString.uncons bs of 174 | Just (c, _) -> next bs (Just c) 175 | _ -> More $ \bs' -> slowPath bs' next 176 | where 177 | slowPath bs next = 178 | case ByteString.uncons bs of 179 | Just (c, _) -> next bs (Just c) 180 | _ -> next ByteString.empty Nothing 181 | 182 | {-| Fold over the octets, which satisfy the predicate -} 183 | {-# INLINE foldlWhile #-} 184 | foldlWhile :: (Word8 -> Bool) -> (a -> Word8 -> a) -> a -> Scanner a 185 | foldlWhile p step init = Scanner $ \ bs next -> let 186 | (l, r) = ByteString.span p bs 187 | state = ByteString.foldl' step init l 188 | in if ByteString.null r 189 | then More $ \ bs -> if ByteString.null bs 190 | then next ByteString.empty state 191 | else run (loop state) bs next 192 | else next r state 193 | where 194 | loop state = do 195 | chunk <- takeChunk state 196 | if ByteString.null chunk 197 | then return state 198 | else do 199 | done <- endOfInput 200 | if done 201 | then return state 202 | else loop (ByteString.foldl' step state chunk) 203 | takeChunk state = Scanner $ \ bs next -> 204 | let (l, r) = ByteString.span p bs 205 | in next r l 206 | 207 | {-| Fold over the octets, which satisfy the predicate, ensuring that there's at least one -} 208 | {-# INLINE foldlWhile1 #-} 209 | foldlWhile1 :: (Word8 -> Bool) -> (a -> Word8 -> a) -> a -> Scanner a 210 | foldlWhile1 predicate step init = do 211 | head <- satisfy predicate 212 | foldlWhile predicate step (step init head) 213 | 214 | {-| Consume a single octet which satisfies the predicate and fail if it does not -} 215 | {-# INLINE satisfy #-} 216 | satisfy :: (Word8 -> Bool) -> Scanner Word8 217 | satisfy predicate = Scanner $ \ chunk next -> case ByteString.uncons chunk of 218 | Just (word8, remainder) -> handleHeadAndTail word8 remainder next chunk 219 | Nothing -> More $ \ chunk -> case ByteString.uncons chunk of 220 | Just (word8, remainder) -> handleHeadAndTail word8 remainder next chunk 221 | Nothing -> Fail chunk "No more input" 222 | where 223 | handleHeadAndTail :: Word8 -> ByteString -> (ByteString -> Word8 -> Result r) -> ByteString -> Result r 224 | handleHeadAndTail word8 remainder next chunk = if predicate word8 225 | then if ByteString.null remainder 226 | then More $ \ chunk -> next chunk word8 227 | else next remainder word8 228 | else Fail chunk "Octet doesn't satisfy the predicate" 229 | 230 | {-| Consume a single octet in case it satisfies the predicate -} 231 | {-# INLINE satisfyMaybe #-} 232 | satisfyMaybe :: (Word8 -> Bool) -> Scanner (Maybe Word8) 233 | satisfyMaybe predicate = Scanner $ \ chunk next -> case ByteString.uncons chunk of 234 | Just (word8, remainder) -> handleHeadAndTail word8 remainder next chunk 235 | Nothing -> More $ \ chunk -> case ByteString.uncons chunk of 236 | Just (word8, remainder) -> handleHeadAndTail word8 remainder next chunk 237 | Nothing -> next ByteString.empty Nothing 238 | where 239 | handleHeadAndTail :: Word8 -> ByteString -> (ByteString -> Maybe Word8 -> Result r) -> ByteString -> Result r 240 | handleHeadAndTail word8 remainder next chunk = if predicate word8 241 | then if ByteString.null remainder 242 | then More $ \ chunk -> next chunk (Just word8) 243 | else next remainder (Just word8) 244 | else next chunk Nothing 245 | 246 | {-| Parse a non-negative decimal number in ASCII -} 247 | {-# INLINE decimal #-} 248 | decimal :: Integral n => Scanner n 249 | decimal = foldlWhile1 OctetPredicates.isDigit step 0 where 250 | step a w = a * 10 + fromIntegral (w - 48) 251 | -------------------------------------------------------------------------------- /lib/Scanner/OctetPredicates.hs: -------------------------------------------------------------------------------- 1 | module Scanner.OctetPredicates 2 | where 3 | 4 | import Prelude 5 | import Data.Word 6 | 7 | 8 | isDigit :: Word8 -> Bool 9 | isDigit w = w - 48 <= 9 10 | -------------------------------------------------------------------------------- /scanner.cabal: -------------------------------------------------------------------------------- 1 | name: scanner 2 | version: 0.3.1 3 | synopsis: Fast non-backtracking incremental combinator parsing for bytestrings 4 | homepage: https://github.com/Yuras/scanner 5 | license: BSD3 6 | license-file: LICENSE 7 | author: Yuras Shumovich 8 | maintainer: shumovichy@gmail.com 9 | copyright: (c) Yuras Shumovich 2016 10 | category: Parsing 11 | build-type: Simple 12 | cabal-version: >=1.10 13 | extra-source-files: README.md changelog.md bench/bench.png 14 | description: Parser combinator library designed to be fast. It doesn't 15 | support backtracking. 16 | 17 | source-repository head 18 | type: git 19 | location: git@github.com:Yuras/scanner.git 20 | 21 | library 22 | exposed-modules: Scanner 23 | Scanner.Internal 24 | other-modules: Prelude 25 | Data.Either 26 | Scanner.OctetPredicates 27 | build-depends: base <5 28 | , fail 29 | , bytestring 30 | hs-source-dirs: lib, compat 31 | ghc-options: -O2 32 | default-language: Haskell2010 33 | 34 | test-suite spec 35 | type: exitcode-stdio-1.0 36 | hs-source-dirs: spec, compat 37 | main-is: spec.hs 38 | build-depends: base 39 | , bytestring 40 | , hspec 41 | , scanner 42 | other-modules: Prelude 43 | Data.Either 44 | default-language: Haskell2010 45 | 46 | benchmark bench 47 | type: exitcode-stdio-1.0 48 | hs-source-dirs: bench, examples, compat 49 | main-is: bench.hs 50 | other-modules: Redis.Reply 51 | Redis.Atto 52 | Redis.Zepto 53 | Redis.Scanner 54 | default-language: Haskell2010 55 | build-depends: base 56 | , bytestring 57 | , text 58 | , attoparsec 59 | , cereal 60 | , criterion 61 | , scanner 62 | -------------------------------------------------------------------------------- /spec/spec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main 4 | ( main 5 | ) 6 | where 7 | 8 | import Scanner 9 | 10 | import Prelude hiding (take, takeWhile) 11 | import Data.Either 12 | import qualified Data.ByteString as ByteString 13 | import qualified Data.ByteString.Lazy as Lazy.ByteString 14 | import Test.Hspec 15 | 16 | main :: IO () 17 | main = hspec $ do 18 | anyWord8Spec 19 | stringSpec 20 | takeSpec 21 | takeWhileSpec 22 | lookAheadSpec 23 | scanWithSpec 24 | 25 | anyWord8Spec :: Spec 26 | anyWord8Spec = describe "anyWord8" $ do 27 | it "should return the current byte" $ do 28 | let bs = ByteString.pack [42, 43] 29 | scanOnly anyWord8 bs `shouldBe` Right 42 30 | 31 | it "should consume the current byte" $ do 32 | let bs = ByteString.pack [42, 43] 33 | scanOnly (anyWord8 *> anyWord8) bs `shouldBe` Right 43 34 | 35 | let bs' = Lazy.ByteString.fromChunks 36 | [ ByteString.pack [42] 37 | , ByteString.pack [43] 38 | , ByteString.pack [44] 39 | ] 40 | scanLazy (anyWord8 *> anyWord8 *> anyWord8) bs' `shouldBe` Right 44 41 | 42 | it "should ask for more input" $ do 43 | let bs = Lazy.ByteString.fromChunks 44 | [ ByteString.pack [42] 45 | , ByteString.pack [43] 46 | ] 47 | scanLazy (anyWord8 *> anyWord8) bs `shouldBe` Right 43 48 | 49 | it "should fail on end of input" $ do 50 | let bs = ByteString.empty 51 | scanOnly anyWord8 bs `shouldSatisfy` isLeft 52 | 53 | stringSpec :: Spec 54 | stringSpec = describe "string" $ do 55 | it "should consume the string" $ do 56 | let bs = "hello world" 57 | scanOnly (string "hello" *> anyWord8) bs `shouldBe` Right 32 58 | 59 | it "should ask for more input" $ do 60 | let bs = Lazy.ByteString.fromChunks 61 | [ "hel" 62 | , "lo" 63 | ] 64 | scanLazy (string "hello") bs `shouldBe` Right () 65 | 66 | it "should fail on wrong input" $ do 67 | let bs = "helo world" 68 | scanOnly (string "hello") bs `shouldSatisfy` isLeft 69 | 70 | takeSpec :: Spec 71 | takeSpec = describe "take" $ do 72 | it "should return the first n bytes" $ do 73 | let bs = "hello world" 74 | scanOnly (take 5) bs `shouldBe` Right "hello" 75 | 76 | it "should ask for more input" $ do 77 | let bs = Lazy.ByteString.fromChunks 78 | [ "he" 79 | , "l" 80 | , "lo world" 81 | ] 82 | scanLazy (take 5) bs `shouldBe` Right "hello" 83 | 84 | it "should fail on end of input" $ do 85 | let bs = "hell" 86 | scanOnly (take 5) bs `shouldSatisfy` isLeft 87 | 88 | let bs' = Lazy.ByteString.fromChunks 89 | [ "he" 90 | , "l" 91 | , "l" 92 | ] 93 | scanLazy (take 5) bs' `shouldSatisfy` isLeft 94 | 95 | takeWhileSpec :: Spec 96 | takeWhileSpec = describe "takeWhile" $ do 97 | it "should return bytes according to the predicate" $ do 98 | let bs = "hello world" 99 | scanOnly (takeWhile (/= 32)) bs `shouldBe` Right "hello" 100 | 101 | it "should ask for more input" $ do 102 | let bs = Lazy.ByteString.fromChunks 103 | [ "he" 104 | , "l" 105 | , "lo world" 106 | ] 107 | scanLazy (takeWhile (/= 32)) bs `shouldBe` Right "hello" 108 | 109 | it "should return everything is predicate where becomes False" $ do 110 | let bs = "hello" 111 | scanOnly (takeWhile (/= 32)) bs `shouldBe` Right "hello" 112 | 113 | lookAheadSpec :: Spec 114 | lookAheadSpec = describe "lookAhead" $ do 115 | it "should return the next byte" $ do 116 | let bs = ByteString.pack [42, 43] 117 | scanOnly lookAhead bs `shouldBe` Right (Just 42) 118 | 119 | it "should return Nothing on end of input" $ do 120 | let bs = ByteString.empty 121 | scanOnly lookAhead bs `shouldBe` Right Nothing 122 | 123 | it "should not consume input" $ do 124 | let bs = ByteString.pack [42, 43] 125 | scanOnly (lookAhead *> anyWord8) bs `shouldBe` Right 42 126 | 127 | it "should ask for more input" $ do 128 | let bs = Lazy.ByteString.fromChunks 129 | [ ByteString.pack [42] 130 | , ByteString.pack [43] 131 | ] 132 | scanLazy (anyWord8 *> lookAhead) bs `shouldBe` Right (Just 43) 133 | 134 | scanWithSpec :: Spec 135 | scanWithSpec = describe "scanWith" $ do 136 | it "should apply the scanner" $ do 137 | let bs = ByteString.pack [42, 43] 138 | let Just (Scanner.Done _ r) = scanWith (Just ByteString.empty) anyWord8 bs 139 | r `shouldBe` 42 140 | 141 | it "should resupply scanner when necessary" $ do 142 | let bs = "a" 143 | p = Scanner.anyChar8 *> Scanner.anyChar8 144 | 145 | let Just (Scanner.Done _ r) = scanWith (Just "b") p bs 146 | r `shouldBe` 'b' 147 | --------------------------------------------------------------------------------