├── .ghci ├── Setup.lhs ├── benchmarks ├── med.txt.bz2 ├── Makefile ├── Warp.hs ├── http-request.txt ├── http-response.txt ├── Alternative.hs ├── Sets.hs ├── json-data │ ├── twitter1.json │ ├── example.json │ ├── integers.json │ └── twitter10.json ├── Tiny.hs ├── warp-3.0.1.1 │ ├── LICENSE │ └── Network │ │ └── Wai │ │ └── Handler │ │ └── Warp │ │ ├── ReadInt.hs │ │ └── RequestHeader.hs ├── Common.hs ├── HeadersByteString.hs ├── HeadersByteString │ └── Atto.hs ├── HeadersText.hs ├── Links.hs ├── Numbers.hs ├── Genome.hs ├── TextFastSet.hs ├── Benchmarks.hs └── IsSpace.hs ├── .gitignore ├── .hgignore ├── examples ├── Makefile ├── attoparsec-examples.cabal ├── Atto_RFC2616.hs ├── RFC2616.hs ├── Parsec_RFC2616.hs └── rfc2616.c ├── tests ├── QC │ ├── Text │ │ ├── FastSet.hs │ │ └── Regressions.hs │ ├── IPv6 │ │ └── Types.hs │ ├── Simple.hs │ ├── Rechunked.hs │ ├── Combinator.hs │ ├── Buffer.hs │ ├── Common.hs │ ├── Text.hs │ └── ByteString.hs └── QC.hs ├── Data ├── Attoparsec │ ├── Types.hs │ ├── Char8.hs │ ├── Lazy.hs │ ├── Number.hs │ ├── Text │ │ └── Lazy.hs │ ├── ByteString │ │ └── Lazy.hs │ ├── Zepto.hs │ ├── Internal.hs │ ├── ByteString.hs │ ├── Internal │ │ └── Types.hs │ └── Combinator.hs └── Attoparsec.hs ├── internal └── Data │ └── Attoparsec │ ├── Internal │ ├── Fhthagn.hs │ └── Compat.hs │ ├── ByteString │ ├── FastSet.hs │ └── Buffer.hs │ └── Text │ ├── FastSet.hs │ └── Buffer.hs ├── README.md ├── cabal.haskell-ci ├── LICENSE ├── .hgtags ├── doc ├── whats-in-a-parser-1.md └── attoparsec-rewired-2.md ├── changelog.md └── attoparsec.cabal /.ghci: -------------------------------------------------------------------------------- 1 | :set -fobject-code -optP-include -optPdist/build/autogen/cabal_macros.h -itests 2 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | > import Distribution.Simple 3 | > main = defaultMain 4 | -------------------------------------------------------------------------------- /benchmarks/med.txt.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haskell/attoparsec/HEAD/benchmarks/med.txt.bz2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.hi 2 | *.o 3 | /dist/ 4 | /dist-newstyle/ 5 | .cabal-sandbox 6 | cabal.sandbox.config 7 | *.sw[op] 8 | -------------------------------------------------------------------------------- /benchmarks/Makefile: -------------------------------------------------------------------------------- 1 | all: med.txt tiny 2 | 3 | tiny: Tiny.hs 4 | ghc -O --make -o $@ $< 5 | 6 | %: %.bz2 7 | bunzip2 -k $< 8 | 9 | clean: 10 | -rm -f *.o *.hi tiny 11 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | \.(?:aux|eventlog|h[ip]|hs.html|log|[mt]ix|[oa]|orig|prof|ps|rej|swp)$ 2 | ~$ 3 | ^(?:.+/|)\.cabal-sandbox/ 4 | ^(?:.+/|)dist/ 5 | benchmarks/Arse 6 | benchmarks/dist 7 | benchmarks/med.txt 8 | benchmarks/tiny 9 | benchmarks/warp-[0-9]* 10 | cabal.sandbox.config 11 | examples/c-http 12 | examples/http_parser.[ch] 13 | hpc.*\.html$ 14 | 15 | syntax: glob 16 | .\#* 17 | \#* 18 | -------------------------------------------------------------------------------- /benchmarks/Warp.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Warp (benchmarks) where 4 | 5 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf) 6 | import Data.ByteString (ByteString) 7 | import Network.Wai.Handler.Warp.ReadInt (readInt) 8 | import qualified Data.Attoparsec.ByteString.Char8 as B 9 | 10 | benchmarks :: Benchmark 11 | benchmarks = bgroup "warp" [ 12 | bgroup "decimal" [ 13 | bench "warp" $ nf (readInt :: ByteString -> Int) "31337" 14 | , bench "atto" $ nf (B.parse (B.decimal :: B.Parser Int)) "31337" 15 | ] 16 | ] 17 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | CC := cc 2 | CFLAGS := -O3 -Wall 3 | # To make the code about 6% faster: 4 | # CFLAGS += -fwhole-program --combine 5 | CPPFLAGS := -I$(HOME)/hg/http-parser 6 | 7 | all: c-http 8 | 9 | c-http: rfc2616.c http_parser.c 10 | $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $^ 11 | 12 | clean: 13 | rm -f *.hi *.o c-http 14 | 15 | http_parser.c: http_parser.h 16 | curl -O https://raw.githubusercontent.com/joyent/http-parser/master/http_parser.c 17 | 18 | http_parser.h: 19 | curl -O https://raw.githubusercontent.com/joyent/http-parser/master/http_parser.h 20 | -------------------------------------------------------------------------------- /tests/QC/Text/FastSet.hs: -------------------------------------------------------------------------------- 1 | module QC.Text.FastSet where 2 | 3 | import Test.Tasty (TestTree) 4 | import Test.Tasty.QuickCheck (testProperty) 5 | import Test.QuickCheck 6 | import qualified Data.Attoparsec.Text.FastSet as FastSet 7 | 8 | membershipCorrect :: String -> String -> Property 9 | membershipCorrect members others = 10 | let fs = FastSet.fromList members 11 | correct c = (c `FastSet.member` fs) == (c `elem` members) 12 | in property $ all correct (members ++ others) 13 | 14 | tests :: [TestTree] 15 | tests = [ testProperty "membership is correct" membershipCorrect ] 16 | -------------------------------------------------------------------------------- /Data/Attoparsec/Types.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.Attoparsec.Types 3 | -- Copyright : Bryan O'Sullivan 2007-2015 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : unknown 9 | -- 10 | -- Simple, efficient parser combinators for strings, loosely based on 11 | -- the Parsec library. 12 | 13 | module Data.Attoparsec.Types 14 | ( 15 | Parser 16 | , IResult(..) 17 | , Chunk(chunkElemToChar) 18 | ) where 19 | 20 | import Data.Attoparsec.Internal.Types (Parser(..), IResult(..), Chunk(..)) 21 | -------------------------------------------------------------------------------- /benchmarks/http-request.txt: -------------------------------------------------------------------------------- 1 | GET / HTTP/1.1 2 | Host: twitter.com 3 | Accept: text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8 4 | Accept-Encoding: gzip,deflate,sdch 5 | Accept-Language: en-GB,en-US;q=0.8,en;q=0.6 6 | Cache-Control: max-age=0 7 | Cookie: guest_id=v1%3A139; _twitter_sess=BAh7CSIKZmxhc2hJQz-e1e1; __utma=43838368.452555194.1399611824.1; __utmb=43838368; __utmc=43838368; __utmz=1399611824.1.1.utmcsr=(direct)|utmcmd=(none) 8 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.86 Safari/537.36 9 | 10 | -------------------------------------------------------------------------------- /tests/QC.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-} 2 | module Main (main) where 3 | 4 | import qualified QC.Buffer as Buffer 5 | import qualified QC.ByteString as ByteString 6 | import qualified QC.Combinator as Combinator 7 | import qualified QC.Simple as Simple 8 | import qualified QC.Text as Text 9 | import Test.Tasty (defaultMain, testGroup) 10 | 11 | main = defaultMain tests 12 | 13 | tests = testGroup "tests" [ 14 | testGroup "bs" ByteString.tests 15 | , testGroup "buf" Buffer.tests 16 | , testGroup "combinator" Combinator.tests 17 | , testGroup "simple" Simple.tests 18 | , testGroup "text" Text.tests 19 | ] 20 | -------------------------------------------------------------------------------- /Data/Attoparsec.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.Attoparsec 3 | -- Copyright : Bryan O'Sullivan 2007-2015 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : unknown 9 | -- 10 | -- Simple, efficient combinator parsing for 11 | -- 'Data.ByteString.ByteString' strings, loosely based on the Parsec 12 | -- library. 13 | -- 14 | -- This module is deprecated. Use "Data.Attoparsec.ByteString" 15 | -- instead. 16 | 17 | module Data.Attoparsec 18 | {-# DEPRECATED "This module will be removed in the next major release." #-} 19 | ( 20 | module Data.Attoparsec.ByteString 21 | ) where 22 | 23 | import Data.Attoparsec.ByteString 24 | -------------------------------------------------------------------------------- /internal/Data/Attoparsec/Internal/Fhthagn.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings, 2 | RecordWildCards, MagicHash, UnboxedTuples #-} 3 | 4 | module Data.Attoparsec.Internal.Fhthagn 5 | ( 6 | inlinePerformIO 7 | ) where 8 | 9 | import GHC.Exts (realWorld#) 10 | import GHC.IO (IO(IO)) 11 | 12 | -- | Just like unsafePerformIO, but we inline it. Big performance gains as 13 | -- it exposes lots of things to further inlining. /Very unsafe/. In 14 | -- particular, you should do no memory allocation inside an 15 | -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. 16 | inlinePerformIO :: IO a -> a 17 | inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r 18 | {-# INLINE inlinePerformIO #-} 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to attoparsec 2 | 3 | attoparsec is a fast Haskell parser combinator library, aimed 4 | particularly at dealing efficiently with network protocols and 5 | complicated text/binary file formats. 6 | 7 | # Join in! 8 | 9 | I'm happy to receive bug reports, fixes, documentation enhancements, 10 | and other improvements. 11 | 12 | Please report bugs via the 13 | [github issue tracker](https://github.com/haskell/attoparsec/issues). 14 | 15 | Master [git repository](https://github.com/haskell/attoparsec): 16 | 17 | * `git clone https://github.com/haskell/attoparsec.git` 18 | 19 | Authors 20 | ------- 21 | 22 | This library was written by Bryan O'Sullivan 23 | and is maintained by Ben Gamari . 24 | -------------------------------------------------------------------------------- /benchmarks/http-response.txt: -------------------------------------------------------------------------------- 1 | HTTP/1.1 200 OK 2 | Date: Fri, 09 May 2014 04:24:57 GMT 3 | Expires: -1 4 | Cache-Control: private, max-age=0 5 | Content-Type: text/html; charset=ISO-8859-1 6 | Set-Cookie: PREF=ID=1a871afc4240db5e:FF:LM=1399609497:S=e5MZ0itEekjVwNcU; expires=Sun, 08-May-2016 04:24:57 GMT; path=/; domain=.google.com 7 | Set-Cookie: NID=67=-WlfaDG6VSEPk7abAjrK98HBSoCD2ID6JKkUR95tEumzDmg7Fc8pSQ8; expires=Sat, 08-Nov-2014 04:24:57 GMT; path=/; domain=.google.com; HttpOnly 8 | P3P: CP="This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657 for more info." 9 | Server: gws 10 | X-XSS-Protection: 1; mode=block 11 | X-Frame-Options: SAMEORIGIN 12 | Alternate-Protocol: 80:quic 13 | Transfer-Encoding: chunked 14 | 15 | -------------------------------------------------------------------------------- /Data/Attoparsec/Char8.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.Attoparsec.Char8 3 | -- Copyright : Bryan O'Sullivan 2007-2015 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : unknown 9 | -- 10 | -- Simple, efficient, character-oriented combinator parsing for 11 | -- 'Data.ByteString.ByteString' strings, loosely based on the Parsec 12 | -- library. 13 | -- 14 | -- This module is deprecated. Use "Data.Attoparsec.ByteString.Char8" 15 | -- instead. 16 | 17 | module Data.Attoparsec.Char8 18 | {-# DEPRECATED "This module will be removed in the next major release." #-} 19 | ( 20 | module Data.Attoparsec.ByteString.Char8 21 | ) where 22 | 23 | import Data.Attoparsec.ByteString.Char8 24 | -------------------------------------------------------------------------------- /examples/attoparsec-examples.cabal: -------------------------------------------------------------------------------- 1 | -- These examples are not intended to be installed. 2 | -- So don't install 'em. 3 | 4 | name: attoparsec-examples 5 | version: 0 6 | cabal-version: >=1.6 7 | build-type: Simple 8 | 9 | executable atto-rfc2616 10 | main-is: Atto_RFC2616.hs 11 | hs-source-dirs: .. . 12 | ghc-options: -O2 -Wall -rtsopts 13 | build-depends: 14 | array, 15 | base == 4.*, 16 | bytestring >= 0.10.4.0, 17 | deepseq >= 1.1, 18 | scientific >= 0.3.1, 19 | text >= 1.1.1.0 20 | 21 | executable parsec-rfc2616 22 | main-is: Parsec_RFC2616.hs 23 | hs-source-dirs: . 24 | ghc-options: -O2 -Wall -rtsopts 25 | build-depends: 26 | array, 27 | base == 4.*, 28 | containers, 29 | bytestring >= 0.10.4.0, 30 | parsec, 31 | text >= 1.1.1.0 32 | -------------------------------------------------------------------------------- /benchmarks/Alternative.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | -- This benchmark reveals a huge performance regression that showed up 4 | -- under GHC 7.8.1 (https://github.com/bos/attoparsec/issues/56). 5 | -- 6 | -- With GHC 7.6.3 and older, this program runs in 0.04 seconds. Under 7 | -- GHC 7.8.1 with (<|>) inlined, time jumps to 12 seconds! 8 | 9 | import Control.Applicative 10 | import Data.Text (Text) 11 | import qualified Data.Attoparsec.Text as A 12 | import qualified Data.Text as T 13 | 14 | testParser :: Text -> Either String Int 15 | testParser f = fmap length -- avoid printing out the entire matched list 16 | . A.parseOnly (many ((() <$ A.string "b") <|> (() <$ A.anyChar))) 17 | $ f 18 | 19 | main :: IO () 20 | main = print . testParser $ T.replicate 50000 "a" 21 | -------------------------------------------------------------------------------- /benchmarks/Sets.hs: -------------------------------------------------------------------------------- 1 | module Sets (benchmarks) where 2 | 3 | import Test.Tasty.Bench 4 | import Data.Char (ord) 5 | import qualified Data.Attoparsec.Text.FastSet as FastSet 6 | import qualified TextFastSet 7 | import qualified Data.HashSet as HashSet 8 | import qualified Data.IntSet as IntSet 9 | 10 | smallSet :: String 11 | smallSet = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] 12 | 13 | benchmarks :: Benchmark 14 | benchmarks = bgroup "sets" [ 15 | bench "Fast" $ whnf (FastSet.member '*') (FastSet.fromList smallSet) 16 | , bench "Hash" $ whnf (HashSet.member '*') (HashSet.fromList smallSet) 17 | , bench "Int" $ whnf (IntSet.member (ord '*')) 18 | (IntSet.fromList (map ord smallSet)) 19 | , bench "TextFast" $ whnf (TextFastSet.member '*') 20 | (TextFastSet.fromList smallSet) 21 | ] 22 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter1.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"80430860","profile_image_url":"http://a2.twimg.com/profile_images/536455139/icon32_normal.png","created_at":"Wed, 26 Jan 2011 07:07:02 +0000","from_user":"kazu_yamamoto","id_str":"30159761706061824","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell Server Pages \u3063\u3066\u3001\u307e\u3060\u7d9a\u3044\u3066\u3044\u305f\u306e\u304b\uff01","id":30159761706061824,"from_user_id":80430860,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"}],"max_id":30159761706061824,"since_id":0,"refresh_url":"?since_id=30159761706061824&q=haskell","next_page":"?page=2&max_id=30159761706061824&rpp=1&q=haskell","results_per_page":1,"page":1,"completed_in":0.012606,"since_id_str":"0","max_id_str":"30159761706061824","query":"haskell"} -------------------------------------------------------------------------------- /internal/Data/Attoparsec/Internal/Compat.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE MagicHash #-} 3 | module Data.Attoparsec.Internal.Compat where 4 | 5 | import Data.ByteString.Internal (ByteString(..)) 6 | import Data.Word (Word8) 7 | import Foreign.ForeignPtr (ForeignPtr) 8 | 9 | #if MIN_VERSION_bytestring(0,11,0) 10 | import Data.ByteString.Internal (plusForeignPtr) 11 | #endif 12 | 13 | withPS :: ByteString -> (ForeignPtr Word8 -> Int -> Int -> r) -> r 14 | #if MIN_VERSION_bytestring(0,11,0) 15 | withPS (BS fp len) kont = kont fp 0 len 16 | #else 17 | withPS (PS fp off len) kont = kont fp off len 18 | #endif 19 | {-# INLINE withPS #-} 20 | 21 | mkPS :: ForeignPtr Word8 -> Int -> Int -> ByteString 22 | #if MIN_VERSION_bytestring(0,11,0) 23 | mkPS fp off len = BS (plusForeignPtr fp off) len 24 | #else 25 | mkPS fp off len = PS fp off len 26 | #endif 27 | {-# INLINE mkPS #-} 28 | -------------------------------------------------------------------------------- /benchmarks/Tiny.hs: -------------------------------------------------------------------------------- 1 | import Control.Applicative ((<|>), many) 2 | import Control.Monad (forM_) 3 | import System.Environment (getArgs) 4 | import qualified Data.Attoparsec.ByteString.Char8 as A 5 | import qualified Data.ByteString.Char8 as B 6 | import qualified Text.Parsec as P 7 | import qualified Text.Parsec.ByteString as P 8 | 9 | attoparsec = do 10 | args <- getArgs 11 | forM_ args $ \arg -> do 12 | input <- B.readFile arg 13 | case A.parse p input `A.feed` B.empty of 14 | A.Done _ xs -> print (length xs) 15 | what -> print what 16 | where 17 | slow = many (A.many1 A.letter_ascii <|> A.many1 A.digit) 18 | fast = many (A.takeWhile1 isLetter <|> A.takeWhile1 isDigit) 19 | isDigit c = c >= '0' && c <= '9' 20 | isLetter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 21 | p = fast 22 | 23 | parsec = do 24 | args <- getArgs 25 | forM_ args $ \arg -> do 26 | input <- readFile arg 27 | case P.parse (P.many (P.many1 P.letter P.<|> P.many1 P.digit)) "" input of 28 | Left err -> print err 29 | Right xs -> print (length xs) 30 | 31 | main = attoparsec 32 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | branches: master 2 | 3 | -- constraint-set text-1 4 | -- constraints: text < 2 5 | -- ghc: < 8.4 6 | -- tests: True 7 | -- run-tests: True 8 | -- 9 | -- constraint-set text-2 10 | -- constraints: text >= 2 11 | -- ghc: >= 8.4 && < 9.4 12 | -- tests: True 13 | -- run-tests: True 14 | -- 15 | -- constraint-set bytestring-0.12 16 | -- -- bytestring-0.12 requires base >=4.9 (GHC 8.0) 17 | -- ghc: >= 8.0 18 | -- constraints: bytestring >= 0.12 19 | -- -- 20 | -- -- The following is silently ignored here: 21 | -- -- 22 | -- -- raw-project 23 | -- -- allow-newer: bytestring 24 | -- -- 25 | -- tests: True 26 | -- run-tests: True 27 | -- 28 | -- -- The following is meant to be for constraint-set bytestring-0.12 only, 29 | -- -- but there is currently no way to enable `allow-newer: bytestring` 30 | -- -- just for the constraint set. 31 | -- -- 32 | -- -- Since core library `bytestring` is constrained to `installed`, 33 | -- -- it is not harmful to allow newer `bytestring` in the default runs 34 | -- -- as well---it will have no effect there. 35 | -- -- 36 | -- raw-project 37 | -- allow-newer: bytestring 38 | -------------------------------------------------------------------------------- /Data/Attoparsec/Lazy.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.Attoparsec.Lazy 3 | -- Copyright : Bryan O'Sullivan 2007-2015 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : unknown 9 | -- 10 | -- Simple, efficient combinator parsing for lazy 'ByteString' 11 | -- strings, loosely based on the Parsec library. 12 | -- 13 | -- This is essentially the same code as in the 'Data.Attoparsec' 14 | -- module, only with a 'parse' function that can consume a lazy 15 | -- 'ByteString' incrementally, and a 'Result' type that does not allow 16 | -- more input to be fed in. Think of this as suitable for use with a 17 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'. 18 | -- 19 | -- Behind the scenes, strict 'B.ByteString' values are still used 20 | -- internally to store parser input and manipulate it efficiently. 21 | -- High-performance parsers such as 'string' still expect strict 22 | -- 'B.ByteString' parameters. 23 | 24 | module Data.Attoparsec.Lazy 25 | ( 26 | module Data.Attoparsec.ByteString.Lazy 27 | ) where 28 | 29 | import Data.Attoparsec.ByteString.Lazy 30 | -------------------------------------------------------------------------------- /tests/QC/IPv6/Types.hs: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | 3 | -- | 4 | -- Module : Text.IPv6Addr 5 | -- Copyright : Copyright © Michel Boucey 2011-2015 6 | -- License : BSD-Style 7 | -- Maintainer : michel.boucey@gmail.com 8 | -- 9 | -- Dealing with IPv6 address text representations, canonization and manipulations. 10 | -- 11 | 12 | -- ----------------------------------------------------------------------------- 13 | 14 | module QC.IPv6.Types where 15 | 16 | import qualified Data.Text as T 17 | 18 | data IPv6Addr = IPv6Addr T.Text 19 | 20 | instance Show IPv6Addr where 21 | show (IPv6Addr addr) = T.unpack addr 22 | 23 | data IPv6AddrToken 24 | = SixteenBit T.Text -- ^ A four hexadecimal digits group representing a 16-Bit chunk 25 | | AllZeros -- ^ An all zeros 16-Bit chunk 26 | | Colon -- ^ A separator between 16-Bit chunks 27 | | DoubleColon -- ^ A double-colon stands for a unique compression of many consecutive 16-Bit chunks 28 | | IPv4Addr T.Text -- ^ An embedded IPv4 address as representation of the last 32-Bit 29 | deriving (Eq,Show) 30 | -------------------------------------------------------------------------------- /benchmarks/warp-3.0.1.1/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Michael Snoyman, http://www.yesodweb.com/ 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /tests/QC/Simple.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-} 3 | 4 | module QC.Simple ( 5 | tests 6 | ) where 7 | 8 | import Control.Applicative ((<|>)) 9 | import Data.ByteString (ByteString) 10 | import Data.List (foldl') 11 | import Data.Maybe (fromMaybe) 12 | import QC.Rechunked (rechunkBS) 13 | import Test.Tasty (TestTree) 14 | import Test.Tasty.QuickCheck (testProperty) 15 | import Test.QuickCheck (Property, counterexample, forAll) 16 | import qualified Data.Attoparsec.ByteString.Char8 as A 17 | 18 | t_issue75 = expect issue75 "ab" (A.Done "" "b") 19 | 20 | issue75 :: A.Parser ByteString 21 | issue75 = "a" >> ("b" <|> "") 22 | 23 | expect :: (Show r, Eq r) => A.Parser r -> ByteString -> A.Result r -> Property 24 | expect p input wanted = 25 | forAll (rechunkBS input) $ \in' -> 26 | let result = parse p in' 27 | in counterexample (show result ++ " /= " ++ show wanted) $ 28 | fromMaybe False (A.compareResults result wanted) 29 | 30 | parse :: A.Parser r -> [ByteString] -> A.Result r 31 | parse p (x:xs) = foldl' A.feed (A.parse p x) xs 32 | parse p [] = A.parse p "" 33 | 34 | tests :: [TestTree] 35 | tests = [ 36 | testProperty "issue75" t_issue75 37 | ] 38 | -------------------------------------------------------------------------------- /examples/Atto_RFC2616.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | 3 | module Main (main) where 4 | 5 | import Control.Applicative 6 | import Control.Exception (bracket) 7 | import Control.Monad (forM_) 8 | import Data.Attoparsec.ByteString 9 | import RFC2616 10 | import System.Environment 11 | import System.IO 12 | import qualified Data.ByteString.Char8 as B 13 | 14 | refill :: Handle -> IO B.ByteString 15 | refill h = B.hGet h (80*1024) 16 | 17 | listy :: FilePath -> Handle -> IO () 18 | listy file h = do 19 | r <- parseWith (refill h) (many request) =<< refill h 20 | case r of 21 | Fail _ _ msg -> hPutStrLn stderr $ file ++ ": " ++ msg 22 | Done _ reqs -> print (length reqs) 23 | 24 | incrementy :: FilePath -> Handle -> IO () 25 | incrementy file h = go (0::Int) =<< refill h 26 | where 27 | go !n is = do 28 | r <- parseWith (refill h) request is 29 | case r of 30 | Fail _ _ msg -> hPutStrLn stderr $ file ++ ": " ++ msg 31 | Done bs _req 32 | | B.null bs -> do 33 | s <- refill h 34 | if B.null s 35 | then print (n+1) 36 | else go (n+1) s 37 | | otherwise -> go (n+1) bs 38 | 39 | main :: IO () 40 | main = do 41 | args <- getArgs 42 | forM_ args $ \arg -> 43 | bracket (openFile arg ReadMode) hClose $ 44 | -- listy arg 45 | incrementy arg 46 | -------------------------------------------------------------------------------- /benchmarks/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# OPTIONS_GHC -fno-warn-orphans #-} 3 | 4 | module Common ( 5 | chunksOf 6 | , pathTo 7 | , rechunkBS 8 | , rechunkT 9 | ) where 10 | 11 | import Control.DeepSeq (NFData(rnf)) 12 | import System.Directory (doesDirectoryExist) 13 | import System.FilePath (()) 14 | import Text.Parsec (ParseError) 15 | import qualified Data.ByteString as B 16 | import qualified Data.ByteString.Lazy as BL 17 | import qualified Data.Text as T 18 | import qualified Data.Text.Lazy as TL 19 | 20 | #if !MIN_VERSION_bytestring(0,10,0) 21 | import Data.ByteString.Internal (ByteString(..)) 22 | 23 | instance NFData ByteString where 24 | rnf (PS _ _ _) = () 25 | #endif 26 | 27 | instance NFData ParseError where 28 | rnf = rnf . show 29 | 30 | chunksOf :: Int -> [a] -> [[a]] 31 | chunksOf k = go 32 | where go xs = case splitAt k xs of 33 | ([],_) -> [] 34 | (y, ys) -> y : go ys 35 | 36 | rechunkBS :: Int -> B.ByteString -> BL.ByteString 37 | rechunkBS n = BL.fromChunks . map B.pack . chunksOf n . B.unpack 38 | 39 | rechunkT :: Int -> T.Text -> TL.Text 40 | rechunkT n = TL.fromChunks . map T.pack . chunksOf n . T.unpack 41 | 42 | pathTo :: String -> IO FilePath 43 | pathTo wat = do 44 | exists <- doesDirectoryExist "benchmarks" 45 | return $ if exists 46 | then "benchmarks" wat 47 | else wat 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Lennart Kolmodin 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS 21 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /benchmarks/HeadersByteString.hs: -------------------------------------------------------------------------------- 1 | module HeadersByteString (headers) where 2 | 3 | import Common (pathTo, rechunkBS) 4 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf, nfIO) 5 | import HeadersByteString.Atto (request, response) 6 | import Network.Wai.Handler.Warp.RequestHeader (parseHeaderLines) 7 | import qualified Data.Attoparsec.ByteString.Char8 as B 8 | import qualified Data.Attoparsec.ByteString.Lazy as BL 9 | import qualified Data.ByteString.Char8 as B 10 | 11 | -- Note: In the benchmarks for parsing an http request 12 | -- from a strict bytestring, we consider warp's implementation, 13 | -- which has highly optimized code for handling the first 14 | -- line in particular. It's treatment of the headers 15 | -- is more relaxed from the treatment they are given by the 16 | -- attoparsec parser benchmarked here. Consequently, it 17 | -- is should not be possible to match its performance since 18 | -- it accepts header names with disallowed characters. 19 | 20 | headers :: IO Benchmark 21 | headers = do 22 | req <- B.readFile =<< pathTo "http-request.txt" 23 | resp <- B.readFile =<< pathTo "http-response.txt" 24 | let reql = rechunkBS 4 req 25 | respl = rechunkBS 4 resp 26 | return $ bgroup "headers" [ 27 | bgroup "B" [ 28 | bench "request" $ nf (B.parseOnly request) req 29 | , bench "warp" $ nfIO (parseHeaderLines (B.split '\n' req)) 30 | , bench "response" $ nf (B.parseOnly response) resp 31 | ] 32 | , bgroup "BL" [ 33 | bench "request" $ nf (BL.parse request) reql 34 | , bench "response" $ nf (BL.parse response) respl 35 | ] 36 | ] 37 | -------------------------------------------------------------------------------- /benchmarks/HeadersByteString/Atto.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-} 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures -fno-warn-orphans #-} 3 | module HeadersByteString.Atto 4 | ( 5 | request 6 | , response 7 | ) where 8 | 9 | import Control.Applicative 10 | import Control.DeepSeq (NFData(..)) 11 | import Network.HTTP.Types.Version (HttpVersion, http11) 12 | import qualified Data.Attoparsec.ByteString.Char8 as B 13 | import qualified Data.ByteString.Char8 as B 14 | 15 | instance NFData HttpVersion where 16 | rnf !_ = () 17 | 18 | isHeaderChar :: Char -> Bool 19 | isHeaderChar c = 20 | (c >= 'a' && c <= 'z') || 21 | (c >= 'A' && c <= 'Z') || 22 | (c >= '0' && c <= '9') || 23 | (c == '_') || 24 | (c == '-') 25 | 26 | header = do 27 | name <- B.takeWhile1 isHeaderChar <* B.char ':' <* B.skipSpace 28 | body <- bodyLine 29 | return (name, body) 30 | 31 | bodyLine = B.takeTill (\c -> c == '\r' || c == '\n') <* B.endOfLine 32 | 33 | requestLine = do 34 | m <- (B.takeTill B.isSpace <* B.char ' ') 35 | (p,q) <- B.break (=='?') <$> (B.takeTill B.isSpace <* B.char ' ') 36 | v <- httpVersion 37 | return (m,p,q,v) 38 | 39 | httpVersion = http11 <$ "HTTP/1.1" 40 | 41 | responseLine = (,,) <$> 42 | (httpVersion <* B.skipSpace) <*> 43 | (int <* B.skipSpace) <*> 44 | bodyLine 45 | 46 | int :: B.Parser Int 47 | int = B.decimal 48 | 49 | request = (,) <$> (requestLine <* B.endOfLine) <*> manyheader 50 | 51 | response = (,) <$> responseLine <*> many header 52 | 53 | manyheader = do 54 | c <- B.peekChar' 55 | if c == '\r' || c == '\n' 56 | then return [] 57 | else (:) <$> header <*> manyheader 58 | -------------------------------------------------------------------------------- /tests/QC/Text/Regressions.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module QC.Text.Regressions ( 4 | tests 5 | ) where 6 | 7 | import Data.Text (Text) 8 | import qualified Data.Text as T 9 | import Data.List (foldl') 10 | import Data.Maybe (fromMaybe) 11 | import Data.Char (isLower) 12 | import Data.Monoid ((<>)) 13 | import QC.Rechunked (rechunkT) 14 | import Test.Tasty (TestTree) 15 | import Test.Tasty.QuickCheck (testProperty) 16 | import Test.QuickCheck (Property, counterexample, forAll) 17 | import qualified Data.Attoparsec.Text as A 18 | 19 | 20 | -------------------------------------------------------------------------------- 21 | -- 105 was about runScanner not always returning the final state. The result 22 | -- did depend on how the data was fed to the parser. 23 | 24 | t_issue105 :: Property 25 | t_issue105 = expect issue105 "lowER" (A.Done "ER" "low") 26 | 27 | issue105 :: A.Parser Text 28 | issue105 = do 29 | (_, firstFourLowercaseLetters) <- A.runScanner "" f 30 | return $ firstFourLowercaseLetters 31 | 32 | where 33 | f :: Text -> Char -> Maybe Text 34 | f acc c = if T.length acc < 4 && isLower c 35 | then Just $ acc <> T.singleton c 36 | else Nothing 37 | 38 | 39 | expect :: (Show r, Eq r) => A.Parser r -> Text -> A.Result r -> Property 40 | expect p input wanted = 41 | forAll (rechunkT input) $ \in' -> 42 | let result = parse p in' 43 | in counterexample (show result ++ " /= " ++ show wanted) $ 44 | fromMaybe False (A.compareResults result wanted) 45 | 46 | parse :: A.Parser r -> [Text] -> A.Result r 47 | parse p (x:xs) = foldl' A.feed (A.parse p x) xs 48 | parse p [] = A.parse p "" 49 | 50 | 51 | tests :: [TestTree] 52 | tests = [ 53 | testProperty "issue105" t_issue105 54 | ] 55 | -------------------------------------------------------------------------------- /benchmarks/HeadersText.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-} 3 | module HeadersText (headers) where 4 | 5 | import Common (pathTo, rechunkT) 6 | import Control.Applicative 7 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf) 8 | import Data.Char (isSpace) 9 | import qualified Data.Attoparsec.Text as T 10 | import qualified Data.Attoparsec.Text.Lazy as TL 11 | import qualified Data.Text.IO as T 12 | 13 | header = do 14 | name <- T.takeWhile1 (T.inClass "a-zA-Z0-9_-") <* T.char ':' <* T.skipSpace 15 | body <- (:) <$> bodyLine <*> many (T.takeWhile1 isSpace *> bodyLine) 16 | return (name, body) 17 | 18 | bodyLine = T.takeTill (\c -> c == '\r' || c == '\n') <* T.endOfLine 19 | 20 | requestLine = 21 | (,,) <$> 22 | (method <* T.skipSpace) <*> 23 | (T.takeTill isSpace <* T.skipSpace) <*> 24 | httpVersion 25 | where method = "GET" <|> "POST" 26 | 27 | httpVersion = "HTTP/" *> ((,) <$> (int <* T.char '.') <*> int) 28 | 29 | responseLine = (,,) <$> 30 | (httpVersion <* T.skipSpace) <*> 31 | (int <* T.skipSpace) <*> 32 | bodyLine 33 | 34 | int :: T.Parser Int 35 | int = T.decimal 36 | 37 | request = (,) <$> (requestLine <* T.endOfLine) <*> many header 38 | 39 | response = (,) <$> responseLine <*> many header 40 | 41 | headers :: IO Benchmark 42 | headers = do 43 | req <- T.readFile =<< pathTo "http-request.txt" 44 | resp <- T.readFile =<< pathTo "http-response.txt" 45 | let reql = rechunkT 4 req 46 | respl = rechunkT 4 resp 47 | return $ bgroup "headers" [ 48 | bgroup "T" [ 49 | bench "request" $ nf (T.parseOnly request) req 50 | , bench "response" $ nf (T.parseOnly response) resp 51 | ] 52 | , bgroup "TL" [ 53 | bench "request" $ nf (TL.parse request) reql 54 | , bench "response" $ nf (TL.parse response) respl 55 | ] 56 | ] 57 | -------------------------------------------------------------------------------- /tests/QC/Rechunked.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | 3 | module QC.Rechunked ( 4 | rechunkBS 5 | , rechunkT 6 | ) where 7 | 8 | import Control.Monad (forM, forM_) 9 | import Control.Monad.ST (ST, runST) 10 | import Data.List (unfoldr) 11 | import Test.QuickCheck (Gen, choose) 12 | import qualified Data.ByteString as B 13 | import qualified Data.Text as T 14 | import qualified Data.Vector as V 15 | import qualified Data.Vector.Generic as G 16 | import qualified Data.Vector.Generic.Mutable as M 17 | 18 | rechunkBS :: B.ByteString -> Gen [B.ByteString] 19 | rechunkBS = fmap (map B.copy) . rechunk_ B.splitAt B.length 20 | 21 | rechunkT :: T.Text -> Gen [T.Text] 22 | rechunkT = fmap (map T.copy) . rechunk_ T.splitAt T.length 23 | 24 | rechunk_ :: (Int -> a -> (a,a)) -> (a -> Int) -> a -> Gen [a] 25 | rechunk_ split len xs = (unfoldr go . (,) xs) `fmap` rechunkSizes (len xs) 26 | where go (b,r:rs) = Just (h, (t,rs)) 27 | where (h,t) = split r b 28 | go (_,_) = Nothing 29 | 30 | rechunkSizes :: Int -> Gen [Int] 31 | rechunkSizes n0 = shuffle =<< loop [] (0:repeat 1) n0 32 | where loop _ [] _ = error "it's 2015, where's my Stream type?" 33 | loop acc (lb:lbs) n 34 | | n <= 0 = shuffle (reverse acc) 35 | | otherwise = do 36 | !i <- choose (lb,n) 37 | loop (i:acc) lbs (n-i) 38 | 39 | shuffle :: [Int] -> Gen [Int] 40 | shuffle (0:xs) = (0:) `fmap` shuffle xs 41 | shuffle xs = fisherYates xs 42 | 43 | fisherYates :: [a] -> Gen [a] 44 | fisherYates xs = (V.toList . V.backpermute v) `fmap` swapIndices (G.length v) 45 | where 46 | v = V.fromList xs 47 | swapIndices n0 = do 48 | swaps <- forM [0..n] $ \i -> ((,) i) `fmap` choose (i,n) 49 | return (runST (swapAll swaps)) 50 | where 51 | n = n0 - 1 52 | swapAll :: [(Int,Int)] -> ST s (V.Vector Int) 53 | swapAll ijs = do 54 | mv <- G.unsafeThaw (G.enumFromTo 0 n :: V.Vector Int) 55 | forM_ ijs $ uncurry (M.swap mv) 56 | G.unsafeFreeze mv 57 | -------------------------------------------------------------------------------- /tests/QC/Combinator.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, OverloadedStrings #-} 2 | 3 | module QC.Combinator where 4 | 5 | #if !MIN_VERSION_base(4,8,0) 6 | import Control.Applicative ((<*>), (<$>), (<*), (*>)) 7 | #endif 8 | import Data.Maybe (fromJust, isJust) 9 | import Data.Word (Word8) 10 | import QC.Common (Repack, parseBS, repackBS, toLazyBS) 11 | import Test.Tasty (TestTree) 12 | import Test.Tasty.QuickCheck (testProperty) 13 | import Test.QuickCheck 14 | import qualified Data.Attoparsec.ByteString.Char8 as P 15 | import qualified Data.Attoparsec.Combinator as C 16 | import qualified Data.ByteString as B 17 | import qualified Data.ByteString.Char8 as B8 18 | 19 | choice :: NonEmptyList (NonEmptyList Word8) -> Gen Property 20 | choice (NonEmpty xs) = do 21 | let ys = map (B.pack . getNonEmpty) xs 22 | return . forAll (repackBS <$> arbitrary <*> elements ys) $ 23 | maybe False (`elem` ys) . parseBS (C.choice (map P.string ys)) 24 | 25 | count :: Positive (Small Int) -> Repack -> B.ByteString -> Bool 26 | count (Positive (Small n)) rs s = 27 | (length <$> parseBS (C.count n (P.string s)) input) == Just n 28 | where input = repackBS rs (B.concat (replicate (n+1) s)) 29 | 30 | lookAhead :: NonEmptyList Word8 -> Bool 31 | lookAhead (NonEmpty xs) = 32 | let ys = B.pack xs 33 | withLookAheadThenConsume = (\x y -> (x, y)) <$> C.lookAhead (P.string ys) <*> P.string ys 34 | mr = parseBS withLookAheadThenConsume $ toLazyBS ys 35 | in isJust mr && fst (fromJust mr) == snd (fromJust mr) 36 | 37 | match :: Int -> NonNegative Int -> NonNegative Int -> Repack -> Bool 38 | match n (NonNegative x) (NonNegative y) rs = 39 | parseBS (P.match parser) (repackBS rs input) == Just (input, n) 40 | where parser = P.skipWhile (=='x') *> P.signed P.decimal <* 41 | P.skipWhile (=='y') 42 | input = B.concat [ 43 | B8.replicate x 'x', B8.pack (show n), B8.replicate y 'y' 44 | ] 45 | 46 | tests :: [TestTree] 47 | tests = [ 48 | testProperty "choice" choice 49 | , testProperty "count" count 50 | , testProperty "lookAhead" lookAhead 51 | , testProperty "match" match 52 | ] 53 | -------------------------------------------------------------------------------- /benchmarks/Links.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Links (links) where 4 | 5 | import Control.Applicative 6 | import Control.DeepSeq (NFData(..)) 7 | import Test.Tasty.Bench (Benchmark, bench, nf) 8 | import Data.Attoparsec.ByteString as A 9 | import Data.Attoparsec.ByteString.Char8 as A8 10 | import Data.ByteString.Char8 as B8 11 | 12 | data Link = Link { 13 | linkURL :: ByteString 14 | , linkParams :: [(ByteString, ByteString)] 15 | } deriving (Eq, Show) 16 | 17 | instance NFData Link where 18 | rnf l = rnf (linkURL l) `seq` rnf (linkParams l) 19 | 20 | link :: Parser Link 21 | link = Link <$> url <*> many (char8 ';' *> skipSpace *> param) 22 | where url = char8 '<' *> A8.takeTill (=='>') <* char8 '>' <* skipSpace 23 | 24 | param :: Parser (ByteString, ByteString) 25 | param = do 26 | name <- paramName 27 | skipSpace *> "=" *> skipSpace 28 | c <- peekChar' 29 | let isTokenChar = A.inClass "!#$%&'()*+./0-9:<=>?@a-zA-Z[]^_`{|}~-" 30 | val <- case c of 31 | '"' -> quotedString 32 | _ -> A.takeWhile isTokenChar 33 | skipSpace 34 | return (name, val) 35 | 36 | data Quot = Literal | Backslash 37 | 38 | quotedString :: Parser ByteString 39 | quotedString = char '"' *> (fixup <$> body) <* char '"' 40 | where body = A8.scan Literal $ \s c -> 41 | case (s,c) of 42 | (Literal, '\\') -> backslash 43 | (Literal, '"') -> Nothing 44 | _ -> literal 45 | literal = Just Literal 46 | backslash = Just Backslash 47 | fixup = B8.pack . go . B8.unpack 48 | where go ('\\' : x@'\\' : xs) = x : go xs 49 | go ('\\' : x@'"' : xs) = x : go xs 50 | go (x : xs) = x : go xs 51 | go xs = xs 52 | 53 | paramName :: Parser ByteString 54 | paramName = do 55 | name <- A.takeWhile1 $ A.inClass "a-zA-Z0-9!#$&+-.^_`|~" 56 | c <- peekChar 57 | return $ case c of 58 | Just '*' -> B8.snoc name '*' 59 | _ -> name 60 | 61 | links :: Benchmark 62 | links = bench "links" $ nf (A.parseOnly link) lnk 63 | where lnk = "; rel=\"next\", ; rel=\"last\"" 64 | -------------------------------------------------------------------------------- /examples/RFC2616.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module RFC2616 4 | ( 5 | Header(..) 6 | , Request(..) 7 | , Response(..) 8 | , request 9 | , response 10 | ) where 11 | 12 | import Control.Applicative 13 | import Data.Attoparsec.ByteString as P 14 | import Data.Attoparsec.ByteString.Char8 (char8, endOfLine, isDigit_w8) 15 | import Data.ByteString (ByteString) 16 | import Data.Word (Word8) 17 | import Data.Attoparsec.ByteString.Char8 (isEndOfLine, isHorizontalSpace) 18 | 19 | isToken :: Word8 -> Bool 20 | isToken w = w <= 127 && notInClass "\0-\31()<>@,;:\\\"/[]?={} \t" w 21 | 22 | skipSpaces :: Parser () 23 | skipSpaces = satisfy isHorizontalSpace *> skipWhile isHorizontalSpace 24 | 25 | data Request = Request { 26 | requestMethod :: ByteString 27 | , requestUri :: ByteString 28 | , requestVersion :: ByteString 29 | } deriving (Eq, Ord, Show) 30 | 31 | httpVersion :: Parser ByteString 32 | httpVersion = "HTTP/" *> P.takeWhile (\c -> isDigit_w8 c || c == 46) 33 | 34 | requestLine :: Parser Request 35 | requestLine = Request <$> (takeWhile1 isToken <* char8 ' ') 36 | <*> (takeWhile1 (/=32) <* char8 ' ') 37 | <*> (httpVersion <* endOfLine) 38 | 39 | data Header = Header { 40 | headerName :: ByteString 41 | , headerValue :: [ByteString] 42 | } deriving (Eq, Ord, Show) 43 | 44 | messageHeader :: Parser Header 45 | messageHeader = Header 46 | <$> (P.takeWhile isToken <* char8 ':' <* skipWhile isHorizontalSpace) 47 | <*> ((:) <$> (takeTill isEndOfLine <* endOfLine) 48 | <*> (many $ skipSpaces *> takeTill isEndOfLine <* endOfLine)) 49 | 50 | request :: Parser (Request, [Header]) 51 | request = (,) <$> requestLine <*> many messageHeader <* endOfLine 52 | 53 | data Response = Response { 54 | responseVersion :: ByteString 55 | , responseCode :: ByteString 56 | , responseMsg :: ByteString 57 | } deriving (Eq, Ord, Show) 58 | 59 | responseLine :: Parser Response 60 | responseLine = Response <$> (httpVersion <* char8 ' ') 61 | <*> (P.takeWhile isDigit_w8 <* char8 ' ') 62 | <*> (takeTill isEndOfLine <* endOfLine) 63 | 64 | response :: Parser (Response, [Header]) 65 | response = (,) <$> responseLine <*> many messageHeader <* endOfLine 66 | -------------------------------------------------------------------------------- /benchmarks/warp-3.0.1.1/Network/Wai/Handler/Warp/ReadInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE MagicHash #-} 4 | {-# LANGUAGE BangPatterns #-} 5 | 6 | -- Copyright : Erik de Castro Lopo 7 | -- License : BSD3 8 | 9 | module Network.Wai.Handler.Warp.ReadInt ( 10 | readInt 11 | , readInt64 12 | ) where 13 | 14 | -- This function lives in its own file because the MagicHash pragma interacts 15 | -- poorly with the CPP pragma. 16 | 17 | import Data.ByteString (ByteString) 18 | import qualified Data.ByteString as S 19 | import Data.Int (Int64) 20 | import GHC.Prim 21 | import GHC.Types 22 | import GHC.Word 23 | 24 | {-# INLINE readInt #-} 25 | readInt :: Integral a => ByteString -> a 26 | readInt bs = fromIntegral $ readInt64 bs 27 | 28 | -- This function is used to parse the Content-Length field of HTTP headers and 29 | -- is a performance hot spot. It should only be replaced with something 30 | -- significantly and provably faster. 31 | -- 32 | -- It needs to be able work correctly on 32 bit CPUs for file sizes > 2G so we 33 | -- use Int64 here and then make a generic 'readInt' that allows conversion to 34 | -- Int and Integer. 35 | 36 | {-# NOINLINE readInt64 #-} 37 | readInt64 :: ByteString -> Int64 38 | readInt64 bs = S.foldl' (\ !i !c -> i * 10 + fromIntegral (mhDigitToInt c)) 0 39 | $ S.takeWhile isDigit bs 40 | 41 | data Table = Table !Addr# 42 | 43 | {-# NOINLINE mhDigitToInt #-} 44 | mhDigitToInt :: Word8 -> Int 45 | #if MIN_VERSION_base(4,16,0) 46 | mhDigitToInt (W8# i) = I# (word2Int# (word8ToWord# (indexWord8OffAddr# addr (word2Int# (word8ToWord# i))))) 47 | #else 48 | mhDigitToInt (W8# i) = I# (word2Int# (indexWord8OffAddr# addr (word2Int# i))) 49 | #endif 50 | where 51 | !(Table addr) = table 52 | table :: Table 53 | table = Table 54 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 55 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 56 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 57 | \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x00\x00\x00\x00\x00\x00\ 58 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 59 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 60 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 61 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 62 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 63 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 64 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 65 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 66 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 67 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 68 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ 69 | \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"# 70 | 71 | isDigit :: Word8 -> Bool 72 | isDigit w = w >= 48 && w <= 57 73 | -------------------------------------------------------------------------------- /benchmarks/Numbers.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# OPTIONS_GHC -fno-warn-deprecations #-} 3 | 4 | module Numbers (numbers) where 5 | 6 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf) 7 | import Data.Scientific (Scientific(..)) 8 | import Text.Parsec.Text () 9 | import Text.Parsec.Text.Lazy () 10 | import qualified Data.Attoparsec.ByteString.Char8 as AC 11 | import qualified Data.Attoparsec.Text as AT 12 | import qualified Data.ByteString.Char8 as BC 13 | import qualified Data.Text as T 14 | 15 | strN, strNePos, strNeNeg :: String 16 | strN = "1234.56789" 17 | strNePos = "1234.56789e3" 18 | strNeNeg = "1234.56789e-3" 19 | 20 | numbers :: Benchmark 21 | numbers = bgroup "numbers" [ 22 | let !tN = T.pack strN 23 | !tNePos = T.pack strNePos 24 | !tNeNeg = T.pack strNeNeg 25 | in bgroup "Text" 26 | [ 27 | bgroup "no power" 28 | [ bench "double" $ nf (AT.parseOnly AT.double) tN 29 | , bench "number" $ nf (AT.parseOnly AT.number) tN 30 | , bench "rational" $ 31 | nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tN 32 | , bench "scientific" $ 33 | nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tN 34 | ] 35 | , bgroup "positive power" 36 | [ bench "double" $ nf (AT.parseOnly AT.double) tNePos 37 | , bench "number" $ nf (AT.parseOnly AT.number) tNePos 38 | , bench "rational" $ 39 | nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tNePos 40 | , bench "scientific" $ 41 | nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNePos 42 | ] 43 | , bgroup "negative power" 44 | [ bench "double" $ nf (AT.parseOnly AT.double) tNeNeg 45 | , bench "number" $ nf (AT.parseOnly AT.number) tNeNeg 46 | , bench "rational" $ 47 | nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tNeNeg 48 | , bench "scientific" $ 49 | nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNeNeg 50 | ] 51 | ] 52 | , let !bN = BC.pack strN 53 | !bNePos = BC.pack strNePos 54 | !bNeNeg = BC.pack strNeNeg 55 | in bgroup "ByteString" 56 | [ bgroup "no power" 57 | [ bench "double" $ nf (AC.parseOnly AC.double) bN 58 | , bench "number" $ nf (AC.parseOnly AC.number) bN 59 | , bench "rational" $ 60 | nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bN 61 | , bench "scientific" $ 62 | nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bN 63 | ] 64 | , bgroup "positive power" 65 | [ bench "double" $ nf (AC.parseOnly AC.double) bNePos 66 | , bench "number" $ nf (AC.parseOnly AC.number) bNePos 67 | , bench "rational" $ 68 | nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNePos 69 | , bench "scientific" $ 70 | nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNePos 71 | ] 72 | , bgroup "negative power" 73 | [ bench "double" $ nf (AC.parseOnly AC.double) bNeNeg 74 | , bench "number" $ nf (AC.parseOnly AC.number) bNeNeg 75 | , bench "rational" $ 76 | nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNeNeg 77 | , bench "scientific" $ 78 | nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNeNeg 79 | ] 80 | ] 81 | ] 82 | -------------------------------------------------------------------------------- /benchmarks/Genome.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Genome 4 | ( 5 | genome 6 | ) where 7 | 8 | import Control.Applicative 9 | import Test.Tasty.Bench 10 | import Data.ByteString (ByteString) 11 | import qualified Data.ByteString.Char8 as B8 12 | import qualified Data.ByteString.Lazy.Char8 as L8 13 | import Data.Attoparsec.ByteString.Char8 as B 14 | import qualified Data.Attoparsec.ByteString.Lazy as BL 15 | import Data.Text (Text) 16 | import qualified Data.Text as T 17 | import qualified Data.Text.Lazy as TL 18 | import Data.Attoparsec.Text as T 19 | import qualified Data.Attoparsec.Text.Lazy as TL 20 | import Common (rechunkBS, rechunkT) 21 | 22 | genome :: Benchmark 23 | genome = bgroup "genome" [ 24 | bgroup "bytestring" [ 25 | bench "s" $ nf (map (B.parse searchBS)) (B8.tails geneB) 26 | , bench "l" $ nf (map (BL.parse searchBS)) (L8.tails geneBL) 27 | , bgroup "CI" [ 28 | bench "s" $ nf (map (B.parse searchBSCI)) (B8.tails geneB) 29 | , bench "l" $ nf (map (BL.parse searchBSCI)) (L8.tails geneBL) 30 | ] 31 | ] 32 | , bgroup "text" [ 33 | bench "s" $ nf (map (T.parse searchT)) (T.tails geneT) 34 | , bench "l" $ nf (map (TL.parse searchT)) (TL.tails geneTL) 35 | , bgroup "CI" [ 36 | bench "s" $ nf (map (T.parse searchTCI)) (T.tails geneT) 37 | , bench "l" $ nf (map (TL.parse searchTCI)) (TL.tails geneTL) 38 | ] 39 | ] 40 | ] 41 | where geneB = B8.pack gene 42 | geneBL = rechunkBS 4 geneB 43 | geneT = T.pack gene 44 | geneTL = rechunkT 4 geneT 45 | 46 | searchBS :: B.Parser ByteString 47 | searchBS = "caac" *> ("aaca" <|> "aact") 48 | 49 | searchBSCI :: B.Parser ByteString 50 | searchBSCI = B.stringCI "CAAC" *> (B.stringCI "AACA" <|> B.stringCI "AACT") 51 | 52 | searchT :: T.Parser Text 53 | searchT = "caac" *> ("aaca" <|> "aact") 54 | 55 | searchTCI :: T.Parser Text 56 | searchTCI = T.asciiCI "CAAC" *> (T.asciiCI "AACA" <|> T.asciiCI "AACT") 57 | 58 | -- Dictyostelium discoideum developmental protein DG1094 (gacT) gene, 59 | -- partial cds. http://www.ncbi.nlm.nih.gov/nuccore/AF081586.1 60 | 61 | gene :: String 62 | gene = "atcgatttagaaagatacaaagatagaaccatcaataataaacaagagaagagagcaagt\ 63 | \agagatattaataaagagattgaaagagagattgaaaagaagagattatcaccaagagaa\ 64 | \agattaaatttatttggtctttcttcctcatcttcatcagtgaattcaacattaacaaga\ 65 | \tctacagcaaatattatctctacaatagacggtagtggaggtagtaatcgtaatagtaaa\ 66 | \aattatggtaatggctcatcctcctcctcaaatagaagatatagtaatactattaatcaa\ 67 | \caattacaaatgcaattacaacaacttcaaatccaacaacaacaatatcaacaaactcaa\ 68 | \caatctcaaataccattacaatatcaacaacaacaacagcaacaacaacaacaaaccact\ 69 | \acaactacaactacatcaagtggtagtaatagattctcttcaaatagatataaaccagtt\ 70 | \gatcttacacaatcatcttcaaactttcgttattcacgtgaaatttatgatgatgattat\ 71 | \tattcaaataataatttaatgatgtttggtaatgagcaaccaaatcaaacaccaatttct\ 72 | \gtatcatcttcatctgcattcacacgtcaaagatctcaaagttgctttgaaccagagaat\ 73 | \cttgtattgctacaacaacaatatcaacaatatcaacaacaacaacaacaacaacaacaa\ 74 | \attccattccaagcaaatccacaatatagtaatgctgttattgaacaaaaattggatcaa\ 75 | \attagagataccattaataatttacatagagataaccgagtctctaga" 76 | -------------------------------------------------------------------------------- /examples/Parsec_RFC2616.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, FlexibleContexts #-} 2 | 3 | module Main (main) where 4 | 5 | import Control.Applicative 6 | import Control.Exception (bracket) 7 | import System.Environment (getArgs) 8 | import System.IO (hClose, openFile, IOMode(ReadMode)) 9 | import Text.Parsec.Char (anyChar, char, satisfy, string) 10 | import Text.Parsec.Combinator (many1, manyTill, skipMany1) 11 | import Text.Parsec.Prim hiding (many, token, (<|>)) 12 | import qualified Data.IntSet as S 13 | 14 | #if 1 15 | import Text.Parsec.ByteString.Lazy (Parser, parseFromFile) 16 | import qualified Data.ByteString.Lazy as B 17 | #else 18 | import Text.Parsec.ByteString (Parser, parseFromFile) 19 | import qualified Data.ByteString as B 20 | #endif 21 | 22 | token :: Stream s m Char => ParsecT s u m Char 23 | token = satisfy $ \c -> S.notMember (fromEnum c) set 24 | where set = S.fromList . map fromEnum $ ['\0'..'\31'] ++ "()<>@,;:\\\"/[]?={} \t" ++ ['\128'..'\255'] 25 | 26 | isHorizontalSpace :: Char -> Bool 27 | isHorizontalSpace c = c == ' ' || c == '\t' 28 | 29 | skipHSpaces :: Stream s m Char => ParsecT s u m () 30 | skipHSpaces = skipMany1 (satisfy isHorizontalSpace) 31 | 32 | data Request = Request { 33 | _requestMethod :: String 34 | , _requestUri :: String 35 | , _requestProtocol :: String 36 | } deriving (Eq, Ord, Show) 37 | 38 | requestLine :: Stream s m Char => ParsecT s u m Request 39 | requestLine = do 40 | method <- many1 token <* skipHSpaces 41 | uri <- many1 (satisfy (not . isHorizontalSpace)) <* skipHSpaces <* string "HTTP/" 42 | proto <- many httpVersion <* endOfLine 43 | return $! Request method uri proto 44 | where 45 | httpVersion = satisfy $ \c -> c == '1' || c == '0' || c == '.' 46 | 47 | endOfLine :: Stream s m Char => ParsecT s u m () 48 | endOfLine = (string "\r\n" *> pure ()) <|> (char '\n' *> pure ()) 49 | 50 | data Header = Header { 51 | _headerName :: String 52 | , _headerValue :: [String] 53 | } deriving (Eq, Ord, Show) 54 | 55 | messageHeader :: Stream s m Char => ParsecT s u m Header 56 | messageHeader = do 57 | header <- many1 token <* char ':' <* skipHSpaces 58 | body <- manyTill anyChar endOfLine 59 | conts <- many $ skipHSpaces *> manyTill anyChar endOfLine 60 | return $! Header header (body:conts) 61 | 62 | request :: Stream s m Char => ParsecT s u m (Request, [Header]) 63 | request = (,) <$> requestLine <*> many messageHeader <* endOfLine 64 | 65 | listy :: FilePath -> IO () 66 | listy arg = do 67 | r <- parseFromFile (many request) arg 68 | case r of 69 | Left err -> putStrLn $ arg ++ ": " ++ show err 70 | Right rs -> print (length rs) 71 | 72 | chunky :: FilePath -> IO () 73 | chunky arg = bracket (openFile arg ReadMode) hClose $ \h -> 74 | loop (0::Int) =<< B.hGetContents h 75 | where 76 | loop !n bs 77 | | B.null bs = print n 78 | | otherwise = case parse myReq arg bs of 79 | Left err -> putStrLn $ arg ++ ": " ++ show err 80 | Right (r,bs') -> loop (n+1) bs' 81 | myReq :: Parser ((Request, [Header]), B.ByteString) 82 | myReq = liftA2 (,) request getInput 83 | 84 | main :: IO () 85 | main = mapM_ f =<< getArgs 86 | where 87 | --f = listy 88 | f = chunky 89 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 187e68710ea1903fdb2fcc78964574982830adc0 0.2 2 | 9c2929e868fdfe4cb267f1cc9d41bdab31d97640 0.2.1 3 | 1f5944a3920322fa8e2bb2fbc9382223968a77d7 0.2.2 4 | d4934c7fdf544d4e5b2925cada4b91d92a8948c2 0.3 5 | 91a8667f027294bdf9df7bbe3fe1083a8d09f26c 0.4 6 | 7ad5cecf582ce41410159040c6a47002ad61613f 0.5 7 | d5f93303fb5169d856291fa6cde834b89866a1ed 0.5.1 8 | ace3afe105f50a5c90ee135dc72ad306fc3b296c 0.6 9 | 4e6ad4e2712c82714bb055c261adb376dae4c513 0.7 10 | 5504cf45797458399d3f840088bd31f84ef1d56d 0.7.1 11 | 69d4bdcb1e2b35d768ea291886b85cbadd7bf839 0.7.2 12 | f20aef71b438d9ab2db399479ae1836e5d98c31c 0.8.0.0 13 | 31829cff7c795e219d23c582921039c7415cb035 0.8.0.1 14 | 87b4dc291f43a548bf0d5338c9a6a5f5545496f7 0.8.0.2 15 | 4948902625875849d6c9d3ac56ec6670d593b1ee 0.8.1.0 16 | 8b98d031ac4d2dfe322a0e58dcc85d6ca825938a 0.8.1.1 17 | 8b98d031ac4d2dfe322a0e58dcc85d6ca825938a 0.8.1.1 18 | 74092eb3c30ebc0d878d37877526ec73c08bce96 0.8.1.1 19 | 277c0528a1e2751df6e81c0c0747909ddbae1453 0.8.2.0 20 | 277c0528a1e2751df6e81c0c0747909ddbae1453 0.8.2.0 21 | 95181c9226105a695a4fb25f0a76540ec0cc7c58 0.8.2.0 22 | 76c08ffe493594369487fd26f85487f9cc0572f7 0.8.3.0 23 | 39351efe1d73462a717ce004a2d2f75729c67f3b 0.8.4.0 24 | d04f4a8c7afdedddd4828c0844ab2b75bdb90789 0.8.5.0 25 | 034e03c9d2b74c8c52438896a0098a086809adc6 0.8.5.1 26 | 9712028a0311a3d12e213727603b5e84a2b8103c 0.8.5.2 27 | f4a4d9c2ccb365a8408ee20d2514fac99d44ecb1 0.8.5.3 28 | 39eeec699da24257b65e7787aebebbb409771812 0.8.6.0 29 | cbfbab5a33eb7522af4dd16b4306ef848b5b2350 0.8.6.1 30 | 7de1dedc888995de3188c757b872ac364cfd1c49 0.9.0.0 31 | 30d71153204a79666638fcb4765115cbfc8d2d93 0.9.1.0 32 | 61f6daa5cd357349143e57b8530bd9ba7f55fdf6 0.9.1.1 33 | e539c7f6c6ceaa79318177cc3bc149d7b6e7c6c1 0.9.1.2 34 | 3033c0af33fa3b83d002eb9150acf8949e56f19f 0.10.0.0 35 | 3033c0af33fa3b83d002eb9150acf8949e56f19f 0.10.0.0 36 | d39f15e57df591fb785272ba5d3acf0d70fcb0ff 0.10.0.0 37 | 33c33d8467265f9f2226a351a59b616a5ace9fbb 0.10.0.1 38 | 066fc7a19fc07c3b7c50e9b591af3dc5591742a1 0.10.0.2 39 | 80fa1e1f6e122d3de65ce3d6cd9f3aceaa108977 0.10.0.3 40 | 80fa1e1f6e122d3de65ce3d6cd9f3aceaa108977 0.10.0.3 41 | 940330f992c8130d8e17b99f93d570d454c0fa4d 0.10.0.3 42 | 5702ca99c6ac5cfdd8a864c0e4c61a4db1158d7a 0.10.1.0 43 | e8bc38ea66812bf5f24599d3f33480411084affb 0.10.1.1 44 | f0bc0f79b70b97a856042386aa8856ac1cad88f1 0.10.2.0 45 | 030191aa39c97afcad17d365ed114a2eedc04ad4 0.10.3.0 46 | ec23283e88c00af34c3144a8775ca4358daea3f7 0.10.4.0 47 | 6f4b317a12acdd05d67fd9ec55d3defaf37a01eb 0.11.1.0 48 | 48809d7716468fe2bbc47b191b1d64358daaf6d1 0.11.2.1 49 | 856a772f83a6e382bbcfe53e221a5a92623b107c 0.11.3.0 50 | a3c7a521fd58595d6a470ab890cc54a80dd45bd7 0.11.3.1 51 | 38326ce07aca6c8c3a014b0d454d2a5bfdbd6a52 0.11.3.2 52 | 694039f4fc157a8ca5dce37959391615b6c3f535 0.11.3.3 53 | 1fe92d1838fb1b1afcee1ca18d4970cf7e01956e 0.11.3.4 54 | 68203a4cb847da905fa44b5aa1942b99d5867ab4 0.12.0.0 55 | c9c2b05707b2fffcd749d5817e3e0d04e6ad6d0d 0.12.1.0 56 | 9157c2b4c35aa5f683b4ac51e6ff52c696bc22e2 0.12.1.1 57 | 682ba515f7ba62500ad79371b37b90932e0c0e3d 0.12.1.2 58 | 2066a72b79b478f59a9c7f1839c522fc59ba9af7 0.12.1.3 59 | ef875ad3bebef2b6fb1bd7063de6a46a06d0d4b4 0.12.1.4 60 | dc1fcff28d13a508f677c7b06af6420877842493 0.12.1.5 61 | 013e04e4526bfd5e9e4a611b42e6f03c413bea91 0.12.1.6 62 | 0ddbce995b77b8db391b4d6519f13feddf4b5bf3 0.13.0.0 63 | 46c16463ec20ad46c9cd0bfb1050c4268f34b749 0.13.0.1 64 | -------------------------------------------------------------------------------- /benchmarks/json-data/example.json: -------------------------------------------------------------------------------- 1 | {"web-app": { 2 | "servlet": [ 3 | { 4 | "servlet-name": "cofaxCDS", 5 | "servlet-class": "org.cofax.cds.CDSServlet", 6 | "init-param": { 7 | "configGlossary:installationAt": "Philadelphia, PA", 8 | "configGlossary:adminEmail": "ksm@pobox.com", 9 | "configGlossary:poweredBy": "Cofax", 10 | "configGlossary:poweredByIcon": "/images/cofax.gif", 11 | "configGlossary:staticPath": "/content/static", 12 | "templateProcessorClass": "org.cofax.WysiwygTemplate", 13 | "templateLoaderClass": "org.cofax.FilesTemplateLoader", 14 | "templatePath": "templates", 15 | "templateOverridePath": "", 16 | "defaultListTemplate": "listTemplate.htm", 17 | "defaultFileTemplate": "articleTemplate.htm", 18 | "useJSP": false, 19 | "jspListTemplate": "listTemplate.jsp", 20 | "jspFileTemplate": "articleTemplate.jsp", 21 | "cachePackageTagsTrack": 200, 22 | "cachePackageTagsStore": 200, 23 | "cachePackageTagsRefresh": 60, 24 | "cacheTemplatesTrack": 100, 25 | "cacheTemplatesStore": 50, 26 | "cacheTemplatesRefresh": 15, 27 | "cachePagesTrack": 200, 28 | "cachePagesStore": 100, 29 | "cachePagesRefresh": 10, 30 | "cachePagesDirtyRead": 10, 31 | "searchEngineListTemplate": "forSearchEnginesList.htm", 32 | "searchEngineFileTemplate": "forSearchEngines.htm", 33 | "searchEngineRobotsDb": "WEB-INF/robots.db", 34 | "useDataStore": true, 35 | "dataStoreClass": "org.cofax.SqlDataStore", 36 | "redirectionClass": "org.cofax.SqlRedirection", 37 | "dataStoreName": "cofax", 38 | "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver", 39 | "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon", 40 | "dataStoreUser": "sa", 41 | "dataStorePassword": "dataStoreTestQuery", 42 | "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';", 43 | "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log", 44 | "dataStoreInitConns": 10, 45 | "dataStoreMaxConns": 100, 46 | "dataStoreConnUsageLimit": 100, 47 | "dataStoreLogLevel": "debug", 48 | "maxUrlLength": 500}}, 49 | { 50 | "servlet-name": "cofaxEmail", 51 | "servlet-class": "org.cofax.cds.EmailServlet", 52 | "init-param": { 53 | "mailHost": "mail1", 54 | "mailHostOverride": "mail2"}}, 55 | { 56 | "servlet-name": "cofaxAdmin", 57 | "servlet-class": "org.cofax.cds.AdminServlet"}, 58 | 59 | { 60 | "servlet-name": "fileServlet", 61 | "servlet-class": "org.cofax.cds.FileServlet"}, 62 | { 63 | "servlet-name": "cofaxTools", 64 | "servlet-class": "org.cofax.cms.CofaxToolsServlet", 65 | "init-param": { 66 | "templatePath": "toolstemplates/", 67 | "log": 1, 68 | "logLocation": "/usr/local/tomcat/logs/CofaxTools.log", 69 | "logMaxSize": "", 70 | "dataLog": 1, 71 | "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log", 72 | "dataLogMaxSize": "", 73 | "removePageCache": "/content/admin/remove?cache=pages&id=", 74 | "removeTemplateCache": "/content/admin/remove?cache=templates&id=", 75 | "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder", 76 | "lookInContext": 1, 77 | "adminGroupID": 4, 78 | "betaServer": true}}], 79 | "servlet-mapping": { 80 | "cofaxCDS": "/", 81 | "cofaxEmail": "/cofaxutil/aemail/*", 82 | "cofaxAdmin": "/admin/*", 83 | "fileServlet": "/static/*", 84 | "cofaxTools": "/tools/*"}, 85 | 86 | "taglib": { 87 | "taglib-uri": "cofax.tld", 88 | "taglib-location": "/WEB-INF/tlds/cofax.tld"}}} 89 | -------------------------------------------------------------------------------- /tests/QC/Buffer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings, 2 | TypeSynonymInstances #-} 3 | 4 | module QC.Buffer (tests) where 5 | 6 | #if !MIN_VERSION_base(4,8,0) 7 | import Control.Applicative ((<$>)) 8 | import Data.Monoid (Monoid(mconcat)) 9 | #endif 10 | import QC.Common () 11 | import Test.Tasty (TestTree) 12 | import Test.Tasty.QuickCheck (testProperty) 13 | import Test.QuickCheck 14 | import qualified Data.Attoparsec.ByteString.Buffer as BB 15 | import qualified Data.Attoparsec.Text.Buffer as BT 16 | import qualified Data.ByteString as B 17 | import qualified Data.ByteString.Unsafe as B 18 | import qualified Data.Text as T 19 | import qualified Data.Text.Unsafe as T 20 | 21 | data BP t b = BP [t] !t !b 22 | deriving (Eq, Show) 23 | 24 | type BPB = BP B.ByteString BB.Buffer 25 | type BPT = BP T.Text BT.Buffer 26 | 27 | instance Arbitrary BPB where 28 | arbitrary = do 29 | bss <- arbitrary 30 | return $! toBP BB.buffer bss 31 | 32 | shrink (BP bss _ _) = toBP BB.buffer <$> shrink bss 33 | 34 | instance Arbitrary BPT where 35 | arbitrary = do 36 | bss <- arbitrary 37 | return $! toBP BT.buffer bss 38 | 39 | shrink (BP bss _ _) = toBP BT.buffer <$> shrink bss 40 | 41 | toBP :: (Monoid a, Monoid b) => (a -> b) -> [a] -> BP a b 42 | toBP buf bss = BP bss (mconcat bss) (mconcat (map buf bss)) 43 | 44 | b_unbuffer :: BPB -> Property 45 | b_unbuffer (BP _ts t buf) = t === BB.unbuffer buf 46 | 47 | t_unbuffer :: BPT -> Property 48 | t_unbuffer (BP _ts t buf) = t === BT.unbuffer buf 49 | 50 | -- This test triggers both branches in Data.Attoparsec.Text.Buffer.append 51 | -- and checks that Data.Text.Array.copyI manipulations are correct. 52 | t_unbuffer_three :: Property 53 | t_unbuffer_three = t_unbuffer $ toBP BT.buffer [t, t, t] 54 | where 55 | -- Make it long enough to increase chances of a segmentation fault 56 | t = T.replicate 1000 "\0" 57 | 58 | b_length :: BPB -> Property 59 | b_length (BP _ts t buf) = B.length t === BB.length buf 60 | 61 | t_length :: BPT -> Property 62 | t_length (BP _ts t buf) = BT.lengthCodeUnits t === BT.length buf 63 | 64 | b_unsafeIndex :: BPB -> Gen Property 65 | b_unsafeIndex (BP _ts t buf) = do 66 | let l = B.length t 67 | i <- choose (0,l-1) 68 | return $ l === 0 .||. B.unsafeIndex t i === BB.unsafeIndex buf i 69 | 70 | t_iter :: BPT -> Gen Property 71 | t_iter (BP _ts t buf) = do 72 | let l = BT.lengthCodeUnits t 73 | i <- choose (0,l-1) 74 | let it (T.Iter c q) = (c,q) 75 | return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i) 76 | 77 | t_iter_ :: BPT -> Gen Property 78 | t_iter_ (BP _ts t buf) = do 79 | let l = BT.lengthCodeUnits t 80 | i <- choose (0,l-1) 81 | return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i 82 | 83 | b_unsafeDrop :: BPB -> Gen Property 84 | b_unsafeDrop (BP _ts t buf) = do 85 | i <- choose (0, B.length t) 86 | return $ B.unsafeDrop i t === BB.unsafeDrop i buf 87 | 88 | t_dropCodeUnits :: BPT -> Gen Property 89 | t_dropCodeUnits (BP _ts t buf) = do 90 | i <- choose (0, BT.lengthCodeUnits t) 91 | return $ dropCodeUnits i t === BT.dropCodeUnits i buf 92 | where 93 | #if MIN_VERSION_text(2,0,0) 94 | dropCodeUnits = T.dropWord8 95 | #else 96 | dropCodeUnits = T.dropWord16 97 | #endif 98 | 99 | tests :: [TestTree] 100 | tests = [ 101 | testProperty "b_unbuffer" b_unbuffer 102 | , testProperty "t_unbuffer" t_unbuffer 103 | , testProperty "t_unbuffer_three" t_unbuffer_three 104 | , testProperty "b_length" b_length 105 | , testProperty "t_length" t_length 106 | , testProperty "b_unsafeIndex" b_unsafeIndex 107 | , testProperty "t_iter" t_iter 108 | , testProperty "t_iter_" t_iter_ 109 | , testProperty "b_unsafeDrop" b_unsafeDrop 110 | , testProperty "t_dropCodeUnits" t_dropCodeUnits 111 | ] 112 | -------------------------------------------------------------------------------- /tests/QC/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, FlexibleInstances #-} 2 | {-# OPTIONS_GHC -fno-warn-orphans #-} 3 | module QC.Common 4 | ( 5 | ASCII(..) 6 | , parseBS 7 | , parseT 8 | , toLazyBS 9 | , toStrictBS 10 | , Repack 11 | , repackBS 12 | , repackBS_ 13 | , repackT 14 | , repackT_ 15 | , liftOp 16 | ) where 17 | 18 | #if !MIN_VERSION_base(4,8,0) 19 | import Control.Applicative ((<*>), (<$>)) 20 | #endif 21 | import Data.Char (isAlpha) 22 | import Test.QuickCheck 23 | import Test.QuickCheck.Unicode (shrinkChar, string) 24 | import qualified Data.ByteString as B 25 | import qualified Data.ByteString.Lazy as BL 26 | import qualified Data.Text as T 27 | import qualified Data.Text.Lazy as TL 28 | import qualified Data.Attoparsec.ByteString.Lazy as BL 29 | import qualified Data.Attoparsec.Text.Lazy as TL 30 | 31 | #if !MIN_VERSION_base(4,4,0) 32 | -- This should really be a dependency on the random package :-( 33 | instance Random Word8 where 34 | randomR = integralRandomR 35 | random = randomR (minBound,maxBound) 36 | 37 | instance Arbitrary Word8 where 38 | arbitrary = choose (minBound, maxBound) 39 | #endif 40 | 41 | parseBS :: BL.Parser r -> BL.ByteString -> Maybe r 42 | parseBS p = BL.maybeResult . BL.parse p 43 | 44 | parseT :: TL.Parser r -> TL.Text -> Maybe r 45 | parseT p = TL.maybeResult . TL.parse p 46 | 47 | toStrictBS :: BL.ByteString -> B.ByteString 48 | toStrictBS = B.concat . BL.toChunks 49 | 50 | toLazyBS :: B.ByteString -> BL.ByteString 51 | toLazyBS = BL.fromChunks . (:[]) 52 | 53 | instance Arbitrary B.ByteString where 54 | arbitrary = B.pack <$> arbitrary 55 | shrink = map B.pack . shrink . B.unpack 56 | 57 | instance Arbitrary BL.ByteString where 58 | arbitrary = repackBS <$> arbitrary <*> arbitrary 59 | shrink = map BL.pack . shrink . BL.unpack 60 | 61 | newtype ASCII a = ASCII { fromASCII :: a } 62 | deriving (Eq, Ord, Show) 63 | 64 | instance Arbitrary (ASCII B.ByteString) where 65 | arbitrary = (ASCII . B.pack) <$> listOf (choose (0,127)) 66 | shrink = map (ASCII . B.pack) . shrink . B.unpack . fromASCII 67 | 68 | instance Arbitrary (ASCII BL.ByteString) where 69 | arbitrary = ASCII <$> (repackBS <$> arbitrary <*> (fromASCII <$> arbitrary)) 70 | shrink = map (ASCII . BL.pack) . shrink . BL.unpack . fromASCII 71 | 72 | type Repack = NonEmptyList (Positive (Small Int)) 73 | 74 | repackBS :: Repack -> B.ByteString -> BL.ByteString 75 | repackBS (NonEmpty bs) = 76 | BL.fromChunks . repackBS_ (map (getSmall . getPositive) bs) 77 | 78 | repackBS_ :: [Int] -> B.ByteString -> [B.ByteString] 79 | repackBS_ = go . cycle 80 | where go (b:bs) s 81 | | B.null s = [] 82 | | otherwise = let (h,t) = B.splitAt b s 83 | in h : go bs t 84 | go _ _ = error "unpossible" 85 | 86 | instance Arbitrary T.Text where 87 | arbitrary = T.pack <$> string 88 | shrink = map T.pack . shrinkList shrinkChar . T.unpack 89 | 90 | instance Arbitrary TL.Text where 91 | arbitrary = TL.pack <$> string 92 | shrink = map TL.pack . shrinkList shrinkChar . TL.unpack 93 | 94 | repackT :: Repack -> T.Text -> TL.Text 95 | repackT (NonEmpty bs) = 96 | TL.fromChunks . repackT_ (map (getSmall . getPositive) bs) 97 | 98 | repackT_ :: [Int] -> T.Text -> [T.Text] 99 | repackT_ = go . cycle 100 | where go (b:bs) s 101 | | T.null s = [] 102 | | otherwise = let (h,t) = T.splitAt b s 103 | in h : go bs t 104 | go _ _ = error "unpossible" 105 | 106 | liftOp :: (Show a, Testable prop) => 107 | String -> (a -> a -> prop) -> a -> a -> Property 108 | liftOp name f x y = counterexample desc (f x y) 109 | where op = case name of 110 | (c:_) | isAlpha c -> " `" ++ name ++ "` " 111 | | otherwise -> " " ++ name ++ " " 112 | _ -> " ??? " 113 | desc = "not (" ++ show x ++ op ++ show y ++ ")" 114 | -------------------------------------------------------------------------------- /internal/Data/Attoparsec/ByteString/FastSet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, MagicHash #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.Attoparsec.ByteString.FastSet 6 | -- Copyright : Bryan O'Sullivan 2007-2015 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : bos@serpentine.com 10 | -- Stability : experimental 11 | -- Portability : unknown 12 | -- 13 | -- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The 14 | -- set representation is unboxed for efficiency. For small sets, we 15 | -- test for membership using a binary search. For larger sets, we use 16 | -- a lookup table. 17 | -- 18 | ----------------------------------------------------------------------------- 19 | module Data.Attoparsec.ByteString.FastSet 20 | ( 21 | -- * Data type 22 | FastSet 23 | -- * Construction 24 | , fromList 25 | , set 26 | -- * Lookup 27 | , memberChar 28 | , memberWord8 29 | -- * Debugging 30 | , fromSet 31 | -- * Handy interface 32 | , charClass 33 | ) where 34 | 35 | import Data.Bits ((.&.), (.|.), unsafeShiftL) 36 | import Foreign.Marshal.Utils (fillBytes) 37 | import Foreign.Storable (peekByteOff, pokeByteOff) 38 | import GHC.Exts (Int(I#), iShiftRA#) 39 | import GHC.Word (Word8) 40 | import qualified Data.ByteString as B 41 | import qualified Data.ByteString.Char8 as B8 42 | import qualified Data.ByteString.Internal as I 43 | import qualified Data.ByteString.Unsafe as U 44 | 45 | data FastSet = Sorted { fromSet :: !B.ByteString } 46 | | Table { fromSet :: !B.ByteString } 47 | deriving (Eq, Ord) 48 | 49 | instance Show FastSet where 50 | show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) 51 | show (Table _) = "FastSet Table" 52 | 53 | -- | The lower bound on the size of a lookup table. We choose this to 54 | -- balance table density against performance. 55 | tableCutoff :: Int 56 | tableCutoff = 8 57 | 58 | -- | Create a set. 59 | set :: B.ByteString -> FastSet 60 | set s | B.length s < tableCutoff = Sorted . B.sort $ s 61 | | otherwise = Table . mkTable $ s 62 | 63 | fromList :: [Word8] -> FastSet 64 | fromList = set . B.pack 65 | 66 | data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 67 | 68 | shiftR :: Int -> Int -> Int 69 | shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) 70 | 71 | index :: Int -> I 72 | index i = I (i `shiftR` 3) (1 `unsafeShiftL` (i .&. 7)) 73 | {-# INLINE index #-} 74 | 75 | -- | Check the set for membership. 76 | memberWord8 :: Word8 -> FastSet -> Bool 77 | memberWord8 w (Table t) = 78 | let I byte bit = index (fromIntegral w) 79 | in U.unsafeIndex t byte .&. bit /= 0 80 | memberWord8 w (Sorted s) = search 0 (B.length s - 1) 81 | where search lo hi 82 | | hi < lo = False 83 | | otherwise = 84 | let mid = (lo + hi) `quot` 2 85 | in case compare w (U.unsafeIndex s mid) of 86 | GT -> search (mid + 1) hi 87 | LT -> search lo (mid - 1) 88 | _ -> True 89 | 90 | -- | Check the set for membership. Only works with 8-bit characters: 91 | -- characters above code point 255 will give wrong answers. 92 | memberChar :: Char -> FastSet -> Bool 93 | memberChar c = memberWord8 (I.c2w c) 94 | {-# INLINE memberChar #-} 95 | 96 | mkTable :: B.ByteString -> B.ByteString 97 | mkTable s = I.unsafeCreate 32 $ \t -> do 98 | fillBytes t 0 32 99 | U.unsafeUseAsCStringLen s $ \(p, l) -> 100 | let loop n | n == l = return () 101 | | otherwise = do 102 | c <- peekByteOff p n :: IO Word8 103 | let I byte bit = index (fromIntegral c) 104 | prev <- peekByteOff t byte :: IO Word8 105 | pokeByteOff t byte (prev .|. bit) 106 | loop (n + 1) 107 | in loop 0 108 | 109 | charClass :: String -> FastSet 110 | charClass = set . B8.pack . go 111 | where go (a:'-':b:xs) = [a..b] ++ go xs 112 | go (x:xs) = x : go xs 113 | go _ = "" 114 | -------------------------------------------------------------------------------- /doc/whats-in-a-parser-1.md: -------------------------------------------------------------------------------- 1 | My goal in working on the 2 | [new GHC I/O manager](http://www.serpentine.com/blog/2010/01/22/new-ghc-io-manager-first-benchmark-numbers/) 3 | has been to get the Haskell network stack into a state where it could 4 | be used to attack high-performance and scalable networking problems, 5 | domains in which it has historically been weak. 6 | 7 | While it's encouraging to have an excellent networking stack (Johan 8 | and I now have this thoroughly in hand), the next thing I'd look for 9 | is libraries to help build networked applications. One of the 10 | fundamental things that such apps need to do well is parse data, be it 11 | received from the network or read from files. 12 | 13 | The Haskell parsing library of first resort has for years been 14 | [Parsec](http://www.haskell.org/haskellwiki/Parsec). While other 15 | capable libraries exist 16 | (e.g. [polyparse](http://hackage.haskell.org/package/polyparse) and 17 | [uu-parsinglib](http://hackage.haskell.org/package/uu-parsinglib)), 18 | they don't appear to see much use. 19 | 20 | As appealing as Parsec's API is, it has a few problems: 21 | 22 | * Parsec 2 is slow, and it has high memory overhead, due to its use of 23 | Haskell's `String` type for tokens. Parsec 3 can use the more 24 | efficient `ByteString` type (which is in any case much more 25 | appropriate for networked applications that deal in octets), but it 26 | achieves this flexibility at the cost of being even slower than 27 | Parsec 2. 28 | 29 | * Parsec's API demands that all of a parser's input be available at 30 | once. People usually work around this by feeding a Parsec parser 31 | with lazily read data, but lazy I/O is at odds with my goal of 32 | writing solid networked code. 33 | 34 | What properties should a parsing library for networked applications 35 | ideally possess? There are a few obvious desiderata that have been 36 | well known for years. For example, it's important to have an appealing 37 | API and programming model. Parsec squarely fits this desire. 38 | 39 | Performance is also a big consideration. Ideally, a parsing library 40 | would be fast enough that you wouldn't feel any real need for either 41 | of the following: 42 | 43 | * A few weeks to write an insane hand-bummed parser. 44 | 45 | * Mechanical parser generators or lexers 46 | (e.g. [happy](http://www.haskell.org/happy/) or 47 | [alex](http://www.haskell.org/alex/)). 48 | 49 | There are some additional important constraints on a realistic 50 | library: it must fit well into a highly concurrent networked world 51 | full of unreliable, hostile and incompetent clients. 52 | 53 | * High concurrency levels demand a low per-connection memory 54 | footprint. 55 | 56 | * The need to cope with poorly behaved clients requires that 57 | applications must be able to throttle connections that are too busy, 58 | or kill connections that are too slow or attempting to consume too 59 | many server resources. A good parsing library will not get in the 60 | way of these needs. 61 | 62 | A few years ago, I made a few half-hearted attempts to write a 63 | specialised version of Parsec, which I eventually named 64 | [Attoparsec](http://hackage.haskell.org/package/attoparsec). 65 | 66 | I began with a stripped-down Parsec that was specialised to accept 67 | `ByteString` input. I then extended the API to allow a parser to 68 | consume small chunks of input at a time. 69 | 70 | Because I wasn't using Attoparsec "in anger" at the time, I made sure 71 | that my library worked (more or less), but I was not measuring its 72 | performance. 73 | 74 | In late January of this year, I began to think about using Attoparsec 75 | as the parser for a simple HTTP server that I could use to benchmark 76 | our new GHC I/O manager code. Clearly, I'd want the parser to perform 77 | well, or it would distort my numbers rather badly. 78 | 79 | By coincidence, [John MacFarlane](http://johnmacfarlane.net/) emailed 80 | me around the same time, with disturbing findings: he'd tried 81 | Attoparsec, and found its performance to be *terrible*! In fact, it 82 | was 4 to 20 times _slower_ than plain Parsec with his experimental 83 | parser and test data. Clearly, I had some hard work to look forward 84 | to. 85 | 86 | Happily, that work is now almost complete, and I am pleased with the 87 | results. In the next post, I'll have some details of what this all 88 | entails. 89 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # 0.14.4 2 | 3 | * Fix a segmentation fault when built against `text-2.0` 4 | * Restructure project to allow more convenient usage of benchmark suite 5 | * Allow benchmarks to build with GHC 9.2 6 | 7 | # 0.14.3 8 | 9 | * Support for GHC 9.2.1 10 | 11 | # 0.14.2 12 | 13 | * Support for GHC 9.2.1 14 | 15 | # 0.14.1 16 | 17 | * Added `Data.Attoparsec.ByteString.getChunk`. 18 | 19 | # 0.14.0 20 | 21 | * Added `Data.Attoparsec.ByteString.takeWhileIncluding`. 22 | * Make `Data.Attoparsec.{Text,ByteString}.Lazy.parseOnly` accept lazy input. 23 | 24 | # 0.13.2.1 25 | 26 | * Improved performance of `Data.Attoparsec.Text.asciiCI` 27 | 28 | # 0.13.2.0 29 | 30 | * `pure` is now strict in `Position` 31 | 32 | # 0.13.1.0 33 | 34 | * `runScanner` now correctly returns the final state 35 | (https://github.com/bos/attoparsec/issues/105). 36 | * `Parser`, `ZeptoT`, `Buffer`, and `More` now expose `Semigroup` instances. 37 | * `Parser`, and `ZeptoT` now expose `MonadFail` instances. 38 | 39 | # 0.13.0.2 40 | 41 | * Restore the fast specialised character set implementation for Text 42 | * Move testsuite from test-framework to tasty 43 | * Performance optimization of takeWhile and takeWhile1 44 | 45 | # 0.13.0.1 46 | 47 | * Fixed a bug in the implementations of inClass and notInClass for 48 | Text (https://github.com/bos/attoparsec/issues/103) 49 | 50 | # 0.13.0.0 51 | 52 | * Made the parser type in the Zepto module a monad transformer 53 | (needed by aeson's string unescaping parser). 54 | 55 | # 0.12.1.6 56 | 57 | * Fixed a case folding bug in the ByteString version of stringCI. 58 | 59 | # 0.12.1.5 60 | 61 | * Fixed an indexing bug in the new Text implementation of string, 62 | reported by Michel Boucey. 63 | 64 | # 0.12.1.4 65 | 66 | * Fixed a case where the string parser would consume an unnecessary 67 | amount of input before failing a match, when it could bail much 68 | earlier (https://github.com/bos/attoparsec/issues/97) 69 | 70 | * Added more context to error messages 71 | (https://github.com/bos/attoparsec/pull/79) 72 | 73 | # 0.12.1.3 74 | 75 | * Fixed incorrect tracking of Text lengths 76 | (https://github.com/bos/attoparsec/issues/80) 77 | 78 | # 0.12.1.2 79 | 80 | * Fixed the incorrect tracking of capacity if the initial buffer was 81 | empty (https://github.com/bos/attoparsec/issues/75) 82 | 83 | # 0.12.1.1 84 | 85 | * Fixed a data corruption bug that occurred under some circumstances 86 | if a buffer grew after prompting for more input 87 | (https://github.com/bos/attoparsec/issues/74) 88 | 89 | # 0.12.1.0 90 | 91 | * Now compatible with GHC 7.9 92 | 93 | * Reintroduced the Chunk class, used by the parsers package 94 | 95 | # 0.12.0.0 96 | 97 | * A new internal representation makes almost all real-world parsers 98 | faster, sometimes by big margins. For example, parsing JSON data 99 | with aeson is now up to 70% faster. These performance improvements 100 | also come with reduced memory consumption and some new capabilities. 101 | 102 | * The new match combinator gives both the result of a parse and the 103 | input that it matched. 104 | 105 | * The test suite has doubled in size. This made it possible to switch 106 | to the new internal representation with a decent degree of 107 | confidence that everything was more or less working. 108 | 109 | * The benchmark suite now contains a small family of benchmarks taken 110 | from real-world uses of attoparsec. 111 | 112 | * A few types that ought to have been private now are. 113 | 114 | * A few obsolete modules and functions have been marked as deprecated. 115 | They will be removed from the next major release. 116 | 117 | # 0.11.3.0 118 | 119 | * New function scientific is compatible with rational, but parses 120 | integers more efficiently (https://github.com/bos/aeson/issues/198) 121 | 122 | # 0.11.2.0 123 | 124 | * The new Chunk typeclass allows for some code sharing with Ed 125 | Kmett's parsers package: http://hackage.haskell.org/package/parsers 126 | 127 | * New function runScanner generalises scan to return the final state 128 | of the scanner as well as the input consumed. 129 | 130 | # 0.11.1.0 131 | 132 | * New dependency: the scientific package. This allows us to parse 133 | numbers much more efficiently. 134 | 135 | * peekWord8', peekChar': new primitive parsers that allow 136 | single-character lookahead. 137 | -------------------------------------------------------------------------------- /benchmarks/TextFastSet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | 3 | ------------------------------------------------------------------------------ 4 | -- | 5 | -- Module : Data.Attoparsec.FastSet 6 | -- Copyright : Felipe Lessa 2010, Bryan O'Sullivan 2007-2015 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : felipe.lessa@gmail.com 10 | -- Stability : experimental 11 | -- Portability : unknown 12 | -- 13 | -- Fast set membership tests for 'Char' values. We test for 14 | -- membership using a hashtable implemented with Robin Hood 15 | -- collision resolution. The set representation is unboxed, 16 | -- and the characters and hashes interleaved, for efficiency. 17 | -- 18 | -- 19 | ----------------------------------------------------------------------------- 20 | module TextFastSet 21 | ( 22 | -- * Data type 23 | FastSet 24 | -- * Construction 25 | , fromList 26 | , set 27 | -- * Lookup 28 | , member 29 | -- * Handy interface 30 | , charClass 31 | ) where 32 | 33 | import Data.Bits ((.|.), (.&.), shiftR) 34 | import Data.Function (on) 35 | import Data.List (sort, sortBy) 36 | import qualified Data.Array.Base as AB 37 | import qualified Data.Array.Unboxed as A 38 | import qualified Data.Text as T 39 | 40 | data FastSet = FastSet { 41 | table :: {-# UNPACK #-} !(A.UArray Int Int) 42 | , mask :: {-# UNPACK #-} !Int 43 | } 44 | 45 | data Entry = Entry { 46 | key :: {-# UNPACK #-} !Char 47 | , initialIndex :: {-# UNPACK #-} !Int 48 | , index :: {-# UNPACK #-} !Int 49 | } 50 | 51 | offset :: Entry -> Int 52 | offset e = index e - initialIndex e 53 | 54 | resolveCollisions :: [Entry] -> [Entry] 55 | resolveCollisions [] = [] 56 | resolveCollisions [e] = [e] 57 | resolveCollisions (a:b:entries) = a' : resolveCollisions (b' : entries) 58 | where (a', b') 59 | | index a < index b = (a, b) 60 | | offset a < offset b = (b { index=index a }, a { index=index a + 1 }) 61 | | otherwise = (a, b { index=index a + 1 }) 62 | 63 | pad :: Int -> [Entry] -> [Entry] 64 | pad = go 0 65 | where -- ensure that we pad enough so that lookups beyond the 66 | -- last hash in the table fall within the array 67 | go !_ !m [] = replicate (max 1 m + 1) empty 68 | go k m (e:entries) = map (const empty) [k..i - 1] ++ e : 69 | go (i + 1) (m + i - k - 1) entries 70 | where i = index e 71 | empty = Entry '\0' maxBound 0 72 | 73 | nextPowerOf2 :: Int -> Int 74 | nextPowerOf2 0 = 1 75 | nextPowerOf2 x = go (x - 1) 1 76 | where go y 32 = y + 1 77 | go y k = go (y .|. (y `shiftR` k)) $ k * 2 78 | 79 | fastHash :: Char -> Int 80 | fastHash = fromEnum 81 | 82 | fromList :: String -> FastSet 83 | fromList s = FastSet (AB.listArray (0, length interleaved - 1) interleaved) 84 | mask' 85 | where s' = ordNub (sort s) 86 | l = length s' 87 | mask' = nextPowerOf2 ((5 * l) `div` 4) - 1 88 | entries = pad mask' . 89 | resolveCollisions . 90 | sortBy (compare `on` initialIndex) . 91 | zipWith (\c i -> Entry c i i) s' . 92 | map ((.&. mask') . fastHash) $ s' 93 | interleaved = concatMap (\e -> [fromEnum $ key e, initialIndex e]) 94 | entries 95 | 96 | ordNub :: Eq a => [a] -> [a] 97 | ordNub [] = [] 98 | ordNub (y:ys) = go y ys 99 | where go x (z:zs) 100 | | x == z = go x zs 101 | | otherwise = x : go z zs 102 | go x [] = [x] 103 | 104 | set :: T.Text -> FastSet 105 | set = fromList . T.unpack 106 | 107 | -- | Check the set for membership. 108 | member :: Char -> FastSet -> Bool 109 | member c a = go (2 * i) 110 | where i = fastHash c .&. mask a 111 | lookupAt j b = (i' <= i) && (c == c' || b) 112 | where c' = toEnum $ AB.unsafeAt (table a) j 113 | i' = AB.unsafeAt (table a) $ j + 1 114 | go j = lookupAt j . lookupAt (j + 2) . lookupAt (j + 4) . 115 | lookupAt (j + 6) . go $ j + 8 116 | 117 | charClass :: String -> FastSet 118 | charClass = fromList . go 119 | where go (a:'-':b:xs) = [a..b] ++ go xs 120 | go (x:xs) = x : go xs 121 | go _ = "" 122 | -------------------------------------------------------------------------------- /Data/Attoparsec/Number.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | -- | 3 | -- Module : Data.Attoparsec.Number 4 | -- Copyright : Bryan O'Sullivan 2007-2015 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : bos@serpentine.com 8 | -- Stability : experimental 9 | -- Portability : unknown 10 | -- 11 | -- This module is deprecated, and both the module and 'Number' type 12 | -- will be removed in the next major release. Use the 13 | -- package 14 | -- and the 'Data.Scientific.Scientific' type instead. 15 | -- 16 | -- A simple number type, useful for parsing both exact and inexact 17 | -- quantities without losing much precision. 18 | module Data.Attoparsec.Number 19 | {-# DEPRECATED "This module will be removed in the next major release." #-} 20 | ( 21 | Number(..) 22 | ) where 23 | 24 | import Control.DeepSeq (NFData(rnf)) 25 | import Data.Data (Data) 26 | import Data.Function (on) 27 | import Data.Typeable (Typeable) 28 | 29 | -- | A numeric type that can represent integers accurately, and 30 | -- floating point numbers to the precision of a 'Double'. 31 | -- 32 | -- /Note/: this type is deprecated, and will be removed in the next 33 | -- major release. Use the 'Data.Scientific.Scientific' type instead. 34 | data Number = I !Integer 35 | | D {-# UNPACK #-} !Double 36 | deriving (Typeable, Data) 37 | {-# DEPRECATED Number "Use Scientific instead." #-} 38 | 39 | instance Show Number where 40 | show (I a) = show a 41 | show (D a) = show a 42 | 43 | instance NFData Number where 44 | rnf (I _) = () 45 | rnf (D _) = () 46 | {-# INLINE rnf #-} 47 | 48 | binop :: (Integer -> Integer -> a) -> (Double -> Double -> a) 49 | -> Number -> Number -> a 50 | binop _ d (D a) (D b) = d a b 51 | binop i _ (I a) (I b) = i a b 52 | binop _ d (D a) (I b) = d a (fromIntegral b) 53 | binop _ d (I a) (D b) = d (fromIntegral a) b 54 | {-# INLINE binop #-} 55 | 56 | instance Eq Number where 57 | (==) = binop (==) (==) 58 | {-# INLINE (==) #-} 59 | 60 | (/=) = binop (/=) (/=) 61 | {-# INLINE (/=) #-} 62 | 63 | instance Ord Number where 64 | (<) = binop (<) (<) 65 | {-# INLINE (<) #-} 66 | 67 | (<=) = binop (<=) (<=) 68 | {-# INLINE (<=) #-} 69 | 70 | (>) = binop (>) (>) 71 | {-# INLINE (>) #-} 72 | 73 | (>=) = binop (>=) (>=) 74 | {-# INLINE (>=) #-} 75 | 76 | compare = binop compare compare 77 | {-# INLINE compare #-} 78 | 79 | instance Num Number where 80 | (+) = binop (((I$!).) . (+)) (((D$!).) . (+)) 81 | {-# INLINE (+) #-} 82 | 83 | (-) = binop (((I$!).) . (-)) (((D$!).) . (-)) 84 | {-# INLINE (-) #-} 85 | 86 | (*) = binop (((I$!).) . (*)) (((D$!).) . (*)) 87 | {-# INLINE (*) #-} 88 | 89 | abs (I a) = I $! abs a 90 | abs (D a) = D $! abs a 91 | {-# INLINE abs #-} 92 | 93 | negate (I a) = I $! negate a 94 | negate (D a) = D $! negate a 95 | {-# INLINE negate #-} 96 | 97 | signum (I a) = I $! signum a 98 | signum (D a) = D $! signum a 99 | {-# INLINE signum #-} 100 | 101 | fromInteger = (I$!) . fromInteger 102 | {-# INLINE fromInteger #-} 103 | 104 | instance Real Number where 105 | toRational (I a) = fromIntegral a 106 | toRational (D a) = toRational a 107 | {-# INLINE toRational #-} 108 | 109 | instance Fractional Number where 110 | fromRational = (D$!) . fromRational 111 | {-# INLINE fromRational #-} 112 | 113 | (/) = binop (((D$!).) . (/) `on` fromIntegral) 114 | (((D$!).) . (/)) 115 | {-# INLINE (/) #-} 116 | 117 | recip (I a) = D $! recip (fromIntegral a) 118 | recip (D a) = D $! recip a 119 | {-# INLINE recip #-} 120 | 121 | instance RealFrac Number where 122 | properFraction (I a) = (fromIntegral a,0) 123 | properFraction (D a) = case properFraction a of 124 | (i,d) -> (i,D d) 125 | {-# INLINE properFraction #-} 126 | truncate (I a) = fromIntegral a 127 | truncate (D a) = truncate a 128 | {-# INLINE truncate #-} 129 | round (I a) = fromIntegral a 130 | round (D a) = round a 131 | {-# INLINE round #-} 132 | ceiling (I a) = fromIntegral a 133 | ceiling (D a) = ceiling a 134 | {-# INLINE ceiling #-} 135 | floor (I a) = fromIntegral a 136 | floor (D a) = floor a 137 | {-# INLINE floor #-} 138 | -------------------------------------------------------------------------------- /internal/Data/Attoparsec/Text/FastSet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | 3 | ------------------------------------------------------------------------------ 4 | -- | 5 | -- Module : Data.Attoparsec.FastSet 6 | -- Copyright : Felipe Lessa 2010, Bryan O'Sullivan 2007-2015 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : felipe.lessa@gmail.com 10 | -- Stability : experimental 11 | -- Portability : unknown 12 | -- 13 | -- Fast set membership tests for 'Char' values. We test for 14 | -- membership using a hashtable implemented with Robin Hood 15 | -- collision resolution. The set representation is unboxed, 16 | -- and the characters and hashes interleaved, for efficiency. 17 | -- 18 | -- 19 | ----------------------------------------------------------------------------- 20 | module Data.Attoparsec.Text.FastSet 21 | ( 22 | -- * Data type 23 | FastSet 24 | -- * Construction 25 | , fromList 26 | , set 27 | -- * Lookup 28 | , member 29 | -- * Handy interface 30 | , charClass 31 | ) where 32 | 33 | import Data.Bits ((.|.), (.&.), shiftR) 34 | import Data.Function (on) 35 | import Data.List (sort, sortBy) 36 | import qualified Data.Array.Base as AB 37 | import qualified Data.Array.Unboxed as A 38 | import qualified Data.Text as T 39 | 40 | data FastSet = FastSet { 41 | table :: {-# UNPACK #-} !(A.UArray Int Int) 42 | , mask :: {-# UNPACK #-} !Int 43 | } 44 | 45 | data Entry = Entry { 46 | key :: {-# UNPACK #-} !Char 47 | , initialIndex :: {-# UNPACK #-} !Int 48 | , index :: {-# UNPACK #-} !Int 49 | } 50 | 51 | offset :: Entry -> Int 52 | offset e = index e - initialIndex e 53 | 54 | resolveCollisions :: [Entry] -> [Entry] 55 | resolveCollisions [] = [] 56 | resolveCollisions [e] = [e] 57 | resolveCollisions (a:b:entries) = a' : resolveCollisions (b' : entries) 58 | where (a', b') 59 | | index a < index b = (a, b) 60 | | offset a < offset b = (b { index=index a }, a { index=index a + 1 }) 61 | | otherwise = (a, b { index=index a + 1 }) 62 | 63 | pad :: Int -> [Entry] -> [Entry] 64 | pad = go 0 65 | where -- ensure that we pad enough so that lookups beyond the 66 | -- last hash in the table fall within the array 67 | go !_ !m [] = replicate (max 1 m + 1) empty 68 | go k m (e:entries) = map (const empty) [k..i - 1] ++ e : 69 | go (i + 1) (m + i - k - 1) entries 70 | where i = index e 71 | empty = Entry '\0' maxBound 0 72 | 73 | nextPowerOf2 :: Int -> Int 74 | nextPowerOf2 0 = 1 75 | nextPowerOf2 x = go (x - 1) 1 76 | where go y 32 = y + 1 77 | go y k = go (y .|. (y `shiftR` k)) $ k * 2 78 | 79 | fastHash :: Char -> Int 80 | fastHash = fromEnum 81 | 82 | fromList :: String -> FastSet 83 | fromList s = FastSet (AB.listArray (0, length interleaved - 1) interleaved) 84 | mask' 85 | where s' = ordNub (sort s) 86 | l = length s' 87 | mask' = nextPowerOf2 ((5 * l) `div` 4) - 1 88 | entries = pad mask' . 89 | resolveCollisions . 90 | sortBy (compare `on` initialIndex) . 91 | zipWith (\c i -> Entry c i i) s' . 92 | map ((.&. mask') . fastHash) $ s' 93 | interleaved = concatMap (\e -> [fromEnum $ key e, initialIndex e]) 94 | entries 95 | 96 | ordNub :: Eq a => [a] -> [a] 97 | ordNub [] = [] 98 | ordNub (y:ys) = go y ys 99 | where go x (z:zs) 100 | | x == z = go x zs 101 | | otherwise = x : go z zs 102 | go x [] = [x] 103 | 104 | set :: T.Text -> FastSet 105 | set = fromList . T.unpack 106 | 107 | -- | Check the set for membership. 108 | member :: Char -> FastSet -> Bool 109 | member c a = go (2 * i) 110 | where i = fastHash c .&. mask a 111 | lookupAt j b = (i' <= i) && (c == c' || b) 112 | where c' = toEnum $ AB.unsafeAt (table a) j 113 | i' = AB.unsafeAt (table a) $ j + 1 114 | go j = lookupAt j . lookupAt (j + 2) . lookupAt (j + 4) . 115 | lookupAt (j + 6) . go $ j + 8 116 | 117 | charClass :: String -> FastSet 118 | charClass = fromList . go 119 | where go (a:'-':b:xs) = [a..b] ++ go xs 120 | go (x:xs) = x : go xs 121 | go _ = "" 122 | -------------------------------------------------------------------------------- /Data/Attoparsec/Text/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 702 3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules 4 | #endif 5 | 6 | {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} 7 | -- | 8 | -- Module : Data.Attoparsec.Text.Lazy 9 | -- Copyright : Bryan O'Sullivan 2007-2015 10 | -- License : BSD3 11 | -- 12 | -- Maintainer : bos@serpentine.com 13 | -- Stability : experimental 14 | -- Portability : unknown 15 | -- 16 | -- Simple, efficient combinator parsing that can consume lazy 'Text' 17 | -- strings, loosely based on the Parsec library. 18 | -- 19 | -- This is essentially the same code as in the 'Data.Attoparsec.Text' 20 | -- module, only with a 'parse' function that can consume a lazy 21 | -- 'Text' incrementally, and a 'Result' type that does not allow 22 | -- more input to be fed in. Think of this as suitable for use with a 23 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'. 24 | -- 25 | -- /Note:/ The various parser functions and combinators such as 26 | -- 'string' still expect /strict/ 'T.Text' parameters, and return 27 | -- strict 'T.Text' results. Behind the scenes, strict 'T.Text' values 28 | -- are still used internally to store parser input and manipulate it 29 | -- efficiently. 30 | 31 | module Data.Attoparsec.Text.Lazy 32 | ( 33 | Result(..) 34 | , module Data.Attoparsec.Text 35 | -- * Running parsers 36 | , parse 37 | , parseOnly 38 | , parseTest 39 | -- ** Result conversion 40 | , maybeResult 41 | , eitherResult 42 | ) where 43 | 44 | import Control.DeepSeq (NFData(rnf)) 45 | import Data.List (intercalate) 46 | import Data.Text.Lazy.Internal (Text(..), chunk) 47 | import qualified Data.Attoparsec.Internal.Types as T 48 | import qualified Data.Attoparsec.Text as A 49 | import qualified Data.Text as T 50 | import Data.Attoparsec.Text hiding (IResult(..), Result, eitherResult, 51 | maybeResult, parse, parseOnly, parseWith, parseTest) 52 | 53 | -- | The result of a parse. 54 | data Result r = Fail Text [String] String 55 | -- ^ The parse failed. The 'Text' is the input 56 | -- that had not yet been consumed when the failure 57 | -- occurred. The @[@'String'@]@ is a list of contexts 58 | -- in which the error occurred. The 'String' is the 59 | -- message describing the error, if any. 60 | | Done Text r 61 | -- ^ The parse succeeded. The 'Text' is the 62 | -- input that had not yet been consumed (if any) when 63 | -- the parse succeeded. 64 | deriving (Show) 65 | 66 | instance NFData r => NFData (Result r) where 67 | rnf (Fail bs ctxs msg) = rnf bs `seq` rnf ctxs `seq` rnf msg 68 | rnf (Done bs r) = rnf bs `seq` rnf r 69 | {-# INLINE rnf #-} 70 | 71 | fmapR :: (a -> b) -> Result a -> Result b 72 | fmapR _ (Fail st stk msg) = Fail st stk msg 73 | fmapR f (Done bs r) = Done bs (f r) 74 | 75 | instance Functor Result where 76 | fmap = fmapR 77 | 78 | -- | Run a parser and return its result. 79 | parse :: A.Parser a -> Text -> Result a 80 | parse p s = case s of 81 | Chunk x xs -> go (A.parse p x) xs 82 | empty -> go (A.parse p T.empty) empty 83 | where 84 | go (T.Fail x stk msg) ys = Fail (chunk x ys) stk msg 85 | go (T.Done x r) ys = Done (chunk x ys) r 86 | go (T.Partial k) (Chunk y ys) = go (k y) ys 87 | go (T.Partial k) empty = go (k T.empty) empty 88 | 89 | -- | Run a parser and print its result to standard output. 90 | parseTest :: (Show a) => A.Parser a -> Text -> IO () 91 | parseTest p s = print (parse p s) 92 | 93 | -- | Convert a 'Result' value to a 'Maybe' value. 94 | maybeResult :: Result r -> Maybe r 95 | maybeResult (Done _ r) = Just r 96 | maybeResult _ = Nothing 97 | 98 | -- | Convert a 'Result' value to an 'Either' value. 99 | eitherResult :: Result r -> Either String r 100 | eitherResult (Done _ r) = Right r 101 | eitherResult (Fail _ [] msg) = Left msg 102 | eitherResult (Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg) 103 | 104 | -- | Run a parser that cannot be resupplied via a 'T.Partial' result. 105 | -- 106 | -- This function does not force a parser to consume all of its input. 107 | -- Instead, any residual input will be discarded. To force a parser 108 | -- to consume all of its input, use something like this: 109 | -- 110 | -- @ 111 | --'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') 112 | -- @ 113 | parseOnly :: A.Parser a -> Text -> Either String a 114 | parseOnly p = eitherResult . parse p 115 | {-# INLINE parseOnly #-} 116 | -------------------------------------------------------------------------------- /doc/attoparsec-rewired-2.md: -------------------------------------------------------------------------------- 1 | In my 2 | [first of this pair of articles](http://www.serpentine.com/blog/2010/03/03/whats-in-a-parsing-library-1/), 3 | I laid out some of the qualities I've been looking for in a parsing 4 | library. 5 | 6 | Before I dive back into detail, I want to show off some numbers. The 7 | new Attoparsec code is _fast_. 8 | 9 | ![Performance](http://chart.apis.google.com/chart?cht=bvs&chs=340x200&chd=t:260,0,0,0,0|0,471,0,0,0|0,0,17037,0,0|0,0,0,23753,0|0,0,0,0,36753&chds=0,40000&chco=4D89F9|894DF9|F94D89|4DB989|1969FD&chdl=260+ms:+http_parser|471+ms:+Attoparsec|17037+ms:+Parsec+3+CPS|23753+ms:+Lazy+Parsec+3+CPS|36753+ms:+Parsec+3&chxt=y&chxl=0:||10|20|30|40&chtt=Time+to+parse+45,668|HTTP+GET+requests) 10 | 11 | What did I benchmark? I captured some real HTTP GET requests from a 12 | live public web server, averaging 431 bytes per request. I chucked 13 | them into a file, and measured the time needed to parse the entire 14 | contents of the file with the following libraries: 15 | 16 | * Ryan Dahl's [http-parser](http://github.com/ry/http-parser) library, 17 | which is 1,672 lines of hand-rolled C craziness. Its heritage seems 18 | to be closely based on the Ragel-generated parser used by Mongrel. 19 | This library is a fair approximation to about as fast as you can 20 | get, since it's been tuned for just one purpose. I wrote a small, 21 | but reasonably realistic, 22 | [driver program](http://bitbucket.org/bos/attoparsec/src/tip/examples/rfc2616.c) 23 | to wire it up to file-based data, adding another 210 lines of code. 24 | 25 | * An Attoparsec-based 26 | [HTTP request parser](http://bitbucket.org/bos/attoparsec/src/tip/examples/RFC2616.hs), 27 | 54 lines long, with about 30 lines of 28 | [driver program](http://bitbucket.org/bos/attoparsec/src/tip/examples/TestRFC2616.hs). 29 | (Attoparsec itself is about 900 lines of code.) 30 | 31 | * Several 32 | [Parsec-3-based parsers](http://bitbucket.org/bos/attoparsec/src/tip/examples/Parsec_RFC2616.hs), 33 | which are almost identical in length to the Attoparsec-based 34 | version. 35 | 36 | The Parsec 3 parsers come in three varieties: 37 | 38 | * The fastest uses a patch that Antoine Latter wrote to switch Parsec 39 | 3's internal machinery over to using continuation passing style 40 | (CPS). This parser uses `ByteString` for input, and reads the 41 | entire 18.8MB file in one chunk. 42 | 43 | * Next is the same parser, using lazy `ByteString` I/O to read the 44 | file in 64KB chunks. This costs about 50% in performance, but is 45 | almost mandatory to maintain a sensible footprint on large inputs. 46 | 47 | * In last place is the official version of Parsec 3, reading the input 48 | in one chunk. (Reading lazily still costs an additional 50%, but I 49 | didn't want to further clutter the chart with more big numbers.) 50 | 51 | What's interesting to me is that the tiny Attoparsec-based parser, 52 | which is more or less a transliteration of the relevant parts of 53 | [RFC 2616](http://www.w3.org/Protocols/rfc2616/rfc2616.html), is so 54 | fast. 55 | 56 | I went back and remeasured performance of the Attoparsec and C parsers 57 | on a larger data set (295,568 URLs), and got these numbers: 58 | 59 | * Attoparsec: 2.889 seconds, or 102,308 requests/second 60 | 61 | * C: 1.614 seconds, or 183,128 requests/second 62 | 63 | That clocks the Attoparsec-based parser at about 56% the speed of the 64 | C parser. Not bad, given that it's about 3.2% the number of lines of 65 | code! 66 | 67 | Of course there are tradeoffs involved here. 68 | 69 | * Parsec 3 emits much more friendly error messages, and can handle 70 | many different input types. Attoparsec, being aimed at 71 | plumbing-oriented network protocols, considers friendly error 72 | messages to not be worth the effort, and is specialised to the 73 | arrays of bytes you get straight off the network. 74 | 75 | * Parsec 3 requires all of its input to be available when the parser 76 | is run (either in one large chunk or via lazy I/O). If Attoparsec 77 | has insufficient data to return a complete result, it hands back a 78 | continuation that you provide extra data to. This eliminates the 79 | need for lazy I/O and any additional buffering, and makes for a 80 | beautiful, pure API that doesn't care what its input source is. 81 | 82 | The memory footprint of the Attoparsec-based parser is small: it will 83 | run in 568KB of heap on my 64-bit laptop. The smallest heap size that 84 | the Parsec 3 parser can survive in isn't all that much larger: with 85 | lazily read input, it will run in a 750KB heap. 86 | 87 | Overall, this is yet another instance where a little careful attention 88 | to performance yields very exciting results. Personally, I'd be quite 89 | happy to trade a 97% reduction in code size for such a small 90 | performance hit, especially given the clarity, ease of use, and 91 | flexibility of the resulting code. (The `http_parser` API is frankly 92 | not so much fun to use, even though I completely understand the 93 | motivation behind it.) 94 | -------------------------------------------------------------------------------- /Data/Attoparsec/ByteString/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 702 3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules 4 | #endif 5 | 6 | -- | 7 | -- Module : Data.Attoparsec.ByteString.Lazy 8 | -- Copyright : Bryan O'Sullivan 2007-2015 9 | -- License : BSD3 10 | -- 11 | -- Maintainer : bos@serpentine.com 12 | -- Stability : experimental 13 | -- Portability : unknown 14 | -- 15 | -- Simple, efficient combinator parsing that can consume lazy 16 | -- 'ByteString' strings, loosely based on the Parsec library. 17 | -- 18 | -- This is essentially the same code as in the 'Data.Attoparsec' 19 | -- module, only with a 'parse' function that can consume a lazy 20 | -- 'ByteString' incrementally, and a 'Result' type that does not allow 21 | -- more input to be fed in. Think of this as suitable for use with a 22 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'. 23 | -- 24 | -- /Note:/ The various parser functions and combinators such as 25 | -- 'string' still expect /strict/ 'B.ByteString' parameters, and 26 | -- return strict 'B.ByteString' results. Behind the scenes, strict 27 | -- 'B.ByteString' values are still used internally to store parser 28 | -- input and manipulate it efficiently. 29 | 30 | module Data.Attoparsec.ByteString.Lazy 31 | ( 32 | Result(..) 33 | , module Data.Attoparsec.ByteString 34 | -- * Running parsers 35 | , parse 36 | , parseOnly 37 | , parseTest 38 | -- ** Result conversion 39 | , maybeResult 40 | , eitherResult 41 | ) where 42 | 43 | import Control.DeepSeq (NFData(rnf)) 44 | import Data.ByteString.Lazy.Internal (ByteString(..), chunk) 45 | import Data.List (intercalate) 46 | import qualified Data.ByteString as B 47 | import qualified Data.Attoparsec.ByteString as A 48 | import qualified Data.Attoparsec.Internal.Types as T 49 | import Data.Attoparsec.ByteString 50 | hiding (IResult(..), Result, eitherResult, maybeResult, 51 | parse, parseOnly, parseWith, parseTest) 52 | 53 | -- | The result of a parse. 54 | data Result r = Fail ByteString [String] String 55 | -- ^ The parse failed. The 'ByteString' is the input 56 | -- that had not yet been consumed when the failure 57 | -- occurred. The @[@'String'@]@ is a list of contexts 58 | -- in which the error occurred. The 'String' is the 59 | -- message describing the error, if any. 60 | | Done ByteString r 61 | -- ^ The parse succeeded. The 'ByteString' is the 62 | -- input that had not yet been consumed (if any) when 63 | -- the parse succeeded. 64 | 65 | instance NFData r => NFData (Result r) where 66 | rnf (Fail bs ctxs msg) = rnfBS bs `seq` rnf ctxs `seq` rnf msg 67 | rnf (Done bs r) = rnfBS bs `seq` rnf r 68 | {-# INLINE rnf #-} 69 | 70 | rnfBS :: ByteString -> () 71 | rnfBS (Chunk _ xs) = rnfBS xs 72 | rnfBS Empty = () 73 | {-# INLINE rnfBS #-} 74 | 75 | instance Show r => Show (Result r) where 76 | show (Fail bs stk msg) = 77 | "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg 78 | show (Done bs r) = "Done " ++ show bs ++ " " ++ show r 79 | 80 | fmapR :: (a -> b) -> Result a -> Result b 81 | fmapR _ (Fail st stk msg) = Fail st stk msg 82 | fmapR f (Done bs r) = Done bs (f r) 83 | 84 | instance Functor Result where 85 | fmap = fmapR 86 | 87 | -- | Run a parser and return its result. 88 | parse :: A.Parser a -> ByteString -> Result a 89 | parse p s = case s of 90 | Chunk x xs -> go (A.parse p x) xs 91 | empty -> go (A.parse p B.empty) empty 92 | where 93 | go (T.Fail x stk msg) ys = Fail (chunk x ys) stk msg 94 | go (T.Done x r) ys = Done (chunk x ys) r 95 | go (T.Partial k) (Chunk y ys) = go (k y) ys 96 | go (T.Partial k) empty = go (k B.empty) empty 97 | 98 | -- | Run a parser and print its result to standard output. 99 | parseTest :: (Show a) => A.Parser a -> ByteString -> IO () 100 | parseTest p s = print (parse p s) 101 | 102 | -- | Convert a 'Result' value to a 'Maybe' value. 103 | maybeResult :: Result r -> Maybe r 104 | maybeResult (Done _ r) = Just r 105 | maybeResult _ = Nothing 106 | 107 | -- | Convert a 'Result' value to an 'Either' value. 108 | eitherResult :: Result r -> Either String r 109 | eitherResult (Done _ r) = Right r 110 | eitherResult (Fail _ [] msg) = Left msg 111 | eitherResult (Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg) 112 | 113 | -- | Run a parser that cannot be resupplied via a 'T.Partial' result. 114 | -- 115 | -- This function does not force a parser to consume all of its input. 116 | -- Instead, any residual input will be discarded. To force a parser 117 | -- to consume all of its input, use something like this: 118 | -- 119 | -- @ 120 | --'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') 121 | -- @ 122 | parseOnly :: A.Parser a -> ByteString -> Either String a 123 | parseOnly p = eitherResult . parse p 124 | {-# INLINE parseOnly #-} 125 | -------------------------------------------------------------------------------- /benchmarks/Benchmarks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP #-} 2 | 3 | import Common () 4 | import Control.Applicative (many) 5 | import Test.Tasty.Bench (bench, bgroup, defaultMain, nf) 6 | import Data.Bits 7 | import Data.Char (isAlpha) 8 | import Data.Word (Word32) 9 | import Data.Word (Word8) 10 | import Numbers (numbers) 11 | import Common (chunksOf) 12 | import Text.Parsec.Text () 13 | import Text.Parsec.Text.Lazy () 14 | import qualified Warp 15 | import qualified Aeson 16 | import qualified Genome 17 | import qualified Data.Attoparsec.ByteString as AB 18 | import qualified Data.Attoparsec.ByteString.Char8 as AC 19 | import qualified Data.Attoparsec.ByteString.Lazy as ABL 20 | import qualified Data.Attoparsec.Text as AT 21 | import qualified Data.Attoparsec.Text.Lazy as ATL 22 | import qualified Data.ByteString as B 23 | import qualified Data.ByteString.Char8 as BC 24 | import qualified Data.ByteString.Lazy as BL 25 | import qualified Data.Text as T 26 | import qualified Data.Text.Lazy as TL 27 | import qualified HeadersByteString 28 | import qualified HeadersText 29 | import qualified Links 30 | import qualified Text.Parsec as P 31 | import qualified Sets 32 | 33 | main :: IO () 34 | main = do 35 | let s = take 1024 . cycle $ ['a'..'z'] ++ ['A'..'Z'] 36 | !b = BC.pack s 37 | !bl = BL.fromChunks . map BC.pack . chunksOf 4 $ s 38 | !t = T.pack s 39 | !tl = TL.fromChunks . map T.pack . chunksOf 4 $ s 40 | aeson <- Aeson.aeson 41 | headersBS <- HeadersByteString.headers 42 | headersT <- HeadersText.headers 43 | defaultMain [ 44 | bgroup "many" [ 45 | bgroup "attoparsec" [ 46 | bench "B" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b 47 | , bench "BL" $ nf (ABL.parse (many (AC.satisfy AC.isAlpha_ascii))) bl 48 | , bench "T" $ nf (AT.parse (many (AT.satisfy AC.isAlpha_ascii))) t 49 | , bench "TL" $ nf (ATL.parse (many (AT.satisfy AC.isAlpha_ascii))) tl 50 | ] 51 | , bgroup "parsec" [ 52 | bench "S" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") s 53 | , bench "B" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") b 54 | , bench "BL" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") bl 55 | , bench "T" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") t 56 | , bench "TL" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") tl 57 | ] 58 | ] 59 | , bgroup "comparison" [ 60 | bgroup "many-vs-takeWhile" [ 61 | bench "many" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b 62 | , bench "takeWhile" $ nf (AB.parse (AC.takeWhile AC.isAlpha_ascii)) b 63 | ] 64 | , bgroup "letter-vs-isAlpha" [ 65 | bench "letter" $ nf (AB.parse (many AC.letter_ascii)) b 66 | , bench "isAlpha" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b 67 | ] 68 | ] 69 | , bgroup "takeWhile" [ 70 | bench "isAlpha" $ nf (ABL.parse (AC.takeWhile isAlpha)) bl 71 | , bench "isAlpha_ascii" $ nf (ABL.parse (AC.takeWhile AC.isAlpha_ascii)) bl 72 | , bench "isAlpha_iso8859_15" $ 73 | nf (ABL.parse (AC.takeWhile AC.isAlpha_iso8859_15)) bl 74 | , bench "T isAlpha" $ nf (AT.parse (AT.takeWhile isAlpha)) t 75 | , bench "TL isAlpha" $ nf (ATL.parse (AT.takeWhile isAlpha)) tl 76 | ] 77 | , bgroup "takeWhile1" [ 78 | bench "isAlpha" $ nf (ABL.parse (AC.takeWhile1 isAlpha)) bl 79 | , bench "isAlpha_ascii" $ nf (ABL.parse (AC.takeWhile1 AC.isAlpha_ascii)) bl 80 | , bench "T isAlpha" $ nf (AT.parse (AT.takeWhile1 isAlpha)) t 81 | , bench "TL isAlpha" $ nf (ATL.parse (AT.takeWhile1 isAlpha)) tl 82 | ] 83 | , bench "word32LE" $ nf (AB.parse word32LE) b 84 | , bgroup "scan" [ 85 | bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"") 86 | , bench "long" $ nf (AB.parse quotedString) b 87 | ] 88 | , aeson 89 | , Genome.genome 90 | , headersBS 91 | , headersT 92 | , Links.links 93 | , numbers 94 | , Sets.benchmarks 95 | , Warp.benchmarks 96 | ] 97 | 98 | -- Benchmarks bind and (potential) bounds-check merging. 99 | word32LE :: AB.Parser Word32 100 | word32LE = do 101 | w1 <- AB.anyWord8 102 | w2 <- AB.anyWord8 103 | w3 <- AB.anyWord8 104 | w4 <- AB.anyWord8 105 | return $! (fromIntegral w1 :: Word32) + 106 | fromIntegral w2 `unsafeShiftL` 8 + 107 | fromIntegral w3 `unsafeShiftL` 16 + 108 | fromIntegral w4 `unsafeShiftL` 32 109 | 110 | doubleQuote, backslash :: Word8 111 | doubleQuote = 34 112 | backslash = 92 113 | {-# INLINE backslash #-} 114 | {-# INLINE doubleQuote #-} 115 | 116 | -- | Parse a string without a leading quote. 117 | quotedString :: AB.Parser B.ByteString 118 | quotedString = AB.scan False $ \s c -> if s then Just False 119 | else if c == doubleQuote 120 | then Nothing 121 | else Just (c == backslash) 122 | 123 | #if !MIN_VERSION_base(4,5,0) 124 | unsafeShiftL :: Bits a => a -> Int -> a 125 | unsafeShiftL = shiftL 126 | #endif 127 | -------------------------------------------------------------------------------- /benchmarks/IsSpace.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wall -fwarn-tabs #-} 2 | {-# LANGUAGE ForeignFunctionInterface #-} 3 | ---------------------------------------------------------------- 4 | -- 2010.10.09 5 | -- | 6 | -- Module : IsSpace 7 | -- Copyright : Copyright (c) 2010 wren ng thornton 8 | -- License : BSD 9 | -- Maintainer : wren@community.haskell.org 10 | -- Stability : experimental 11 | -- Portability : portable (FFI) 12 | -- 13 | -- A benchmark for comparing different definitions of predicates 14 | -- for detecting whitespace. As of the last run the results are: 15 | -- 16 | -- * Data.Char.isSpace : 14.44786 us +/- 258.0377 ns 17 | -- * isSpace_DataChar : 43.25154 us +/- 655.7037 ns 18 | -- * isSpace_Char : 29.26598 us +/- 454.1445 ns 19 | -- * isPerlSpace : 20 | -- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us 21 | -- * isSpace_Char8 : 11.84677 us +/- 178.9795 ns 22 | -- * isSpace_w8 : 11.55470 us +/- 133.7644 ns 23 | ---------------------------------------------------------------- 24 | module IsSpace (main) where 25 | 26 | import qualified Data.Char as C 27 | import Data.Word (Word8) 28 | import qualified Data.ByteString as B 29 | import qualified Data.ByteString.Char8 as B8 30 | import Foreign.C.Types (CInt) 31 | 32 | import Criterion (bench, nf) 33 | import Criterion.Main (defaultMain) 34 | 35 | ---------------------------------------------------------------- 36 | ----- Character predicates 37 | -- N.B. \x9..\xD == "\t\n\v\f\r" 38 | 39 | -- | Recognize the same characters as Perl's @/\s/@ in Unicode mode. 40 | -- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except 41 | -- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@, 42 | -- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not 43 | -- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@ 44 | -- (nor non-Unicode @/\s/@). 45 | isPerlSpace :: Char -> Bool 46 | isPerlSpace c 47 | = (' ' == c) 48 | || ('\t' <= c && c <= '\r' && c /= '\v') 49 | || ('\x85' == c) 50 | || ('\x2028' == c) 51 | || ('\x2029' == c) 52 | {-# INLINE isPerlSpace #-} 53 | 54 | 55 | -- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's 56 | -- not exported. This is the definition as of attoparsec-0.8.1.0. 57 | isSpace :: Char -> Bool 58 | isSpace c = c `B8.elem` spaces 59 | where 60 | spaces = B8.pack " \n\r\t\v\f" 61 | {-# NOINLINE spaces #-} 62 | {-# INLINE isSpace #-} 63 | 64 | 65 | -- | An alternate version of 'Data.Attoparsec.Char8.isSpace'. 66 | isSpace_Char8 :: Char -> Bool 67 | isSpace_Char8 c = (' ' == c) || ('\t' <= c && c <= '\r') 68 | {-# INLINE isSpace_Char8 #-} 69 | 70 | 71 | -- | An alternate version of 'Data.Char.isSpace'. This uses the 72 | -- same trick as 'isSpace_Char8' but we include Unicode whitespaces 73 | -- too, in order to have the same results as 'Data.Char.isSpace' 74 | -- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace). 75 | isSpace_Char :: Char -> Bool 76 | isSpace_Char c 77 | = (' ' == c) 78 | || ('\t' <= c && c <= '\r') 79 | || ('\xA0' == c) 80 | || (iswspace (fromIntegral (C.ord c)) /= 0) 81 | {-# INLINE isSpace_Char #-} 82 | 83 | foreign import ccall unsafe "u_iswspace" 84 | iswspace :: CInt -> CInt 85 | 86 | -- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace' 87 | -- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char' 88 | -- is slower than 'Data.Char.isSpace'. It appears to be something 89 | -- special in how the base library was compiled. 90 | isSpace_DataChar :: Char -> Bool 91 | isSpace_DataChar c = 92 | c == ' ' || 93 | c == '\t' || 94 | c == '\n' || 95 | c == '\r' || 96 | c == '\f' || 97 | c == '\v' || 98 | c == '\xa0' || 99 | iswspace (fromIntegral (C.ord c)) /= 0 100 | {-# INLINE isSpace_DataChar #-} 101 | 102 | 103 | -- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'. 104 | isSpace_w8 :: Word8 -> Bool 105 | isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) 106 | {-# INLINE isSpace_w8 #-} 107 | 108 | ---------------------------------------------------------------- 109 | 110 | main :: IO () 111 | main = defaultMain 112 | [ bench "Data.Char.isSpace" $ nf (map C.isSpace) ['\x0'..'\255'] 113 | , bench "isSpace_DataChar" $ nf (map isSpace_DataChar) ['\x0'..'\255'] 114 | , bench "isSpace_Char" $ nf (map isSpace_Char) ['\x0'..'\255'] 115 | , bench "isPerlSpace" $ nf (map isPerlSpace) ['\x0'..'\255'] 116 | , bench "Data.Attoparsec.Char8.isSpace" 117 | $ nf (map isSpace) ['\x0'..'\255'] 118 | , bench "isSpace_Char8" $ nf (map isSpace_Char8) ['\x0'..'\255'] 119 | , bench "isSpace_w8" $ nf (map isSpace_w8) [0..255] 120 | ] 121 | 122 | ---------------------------------------------------------------- 123 | ----------------------------------------------------------- fin. 124 | -------------------------------------------------------------------------------- /benchmarks/json-data/integers.json: -------------------------------------------------------------------------------- 1 | [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 29, 31, 33, 35, 37, 39, 42, 44, 47, 50, 53, 56, 60, 63, 67, 72, 76, 81, 86, 91, 96, 102, 109, 115, 122, 130, 138, 146, 155, 165, 175, 186, 197, 209, 222, 236, 250, 266, 282, 299, 318, 337, 358, 380, 403, 428, 454, 482, 511, 543, 576, 611, 649, 688, 730, 775, 823, 873, 926, 983, 1043, 1107, 1175, 1247, 1323, 1404, 1490, 1582, 1679, 1781, 1890, 2006, 2129, 2259, 2398, 2544, 2700, 2865, 3041, 3227, 3425, 3634, 3857, 4093, 4343, 4609, 4891, 5191, 5509, 5846, 6204, 6583, 6986, 7414, 7868, 8349, 8860, 9403, 9978, 10589, 11237, 11925, 12655, 13430, 14252, 15124, 16050, 17032, 18075, 19181, 20355, 21601, 22923, 24326, 25815, 27396, 29072, 30852, 32740, 34744, 36871, 39128, 41523, 44064, 46762, 49624, 52661, 55884, 59305, 62935, 66787, 70875, 75213, 79817, 84702, 89887, 95389, 101227, 107423, 113998, 120976, 128381, 136239, 144578, 153427, 162818, 172784, 183360, 194583, 206493, 219132, 232545, 246778, 261883, 277912, 294923, 312975, 332131, 352460, 374034, 396928, 421223, 447005, 474365, 503400, 534212, 566911, 601610, 638433, 677511, 718980, 762987, 809688, 859247, 911840, 967652, 1026880, 1089734, 1156434, 1227217, 1302333, 1382046, 1466638, 1556408, 1651673, 1752769, 1860052, 1973902, 2094721, 2222934, 2358996, 2503385, 2656613, 2819219, 2991777, 3174898, 3369227, 3575451, 3794297, 4026539, 4272995, 4534536, 4812086, 5106625, 5419191, 5750889, 6102889, 6476435, 6872845, 7293518, 7739939, 8213686, 8716429, 9249944, 9816115, 10416939, 11054539, 11731166, 12449207, 13211198, 14019829, 14877955, 15788605, 16754994, 17780533, 18868844, 20023768, 21249383, 22550016, 23930257, 25394980, 26949356, 28598872, 30349352, 32206975, 34178300, 36270285, 38490317, 40846232, 43346349, 45999492, 48815029, 51802899, 54973651, 58338478, 61909260, 65698602, 69719882, 73987297, 78515911, 83321713, 88421668, 93833782, 99577160, 105672079, 112140056, 119003924, 126287916, 134017747, 142220706, 150925751, 160163614, 169966908, 180370243, 191410345, 203126189, 215559137, 228753081, 242754599, 257613123, 273381107, 290114218, 307871529, 326715729, 346713346, 367934976, 390455540, 414354543, 439716356, 466630515, 495192035, 525501749, 557666661, 591800322, 628023236, 666463282, 707256166, 750545902, 796485316, 845236589, 896971830, 951873682, 1010135966, 1071964368, 1137577163, 1207205986, 1281096650, 1359510014, 1442722903, 1531029087, 1624740315, 1724187420, 1829721484, 1941715077, 2060563573, 2186686548, 2320529259, 2462564214, 2613292844, 2773247272, 2942992191, 3123126858, 3314287206, 3517148098, 3732425698, 3960880011, 4203317554, 4460594215, 4733618266, 5023353573, 5330822998, 5657112012, 6003372524, 6370826950, 6760772526, 7174585891, 7613727944, 8079749004, 8574294281, 9099109685, 9656047991, 10247075377, 10874278366, 11539871197, 12246203633, 12995769265, 13791214310, 14635346955, 15531147272, 16481777734, 17490594386, 18561158687, 19697250088, 20902879371, 22182302812, 23540037202, 24980875800, 26509905246, 28132523526, 29854459026, 31681790754, 33620969802, 35678842122, 37862672691, 40180171161, 42639519077, 45249398761, 48019023960, 50958172379, 54077220194, 57387178688, 60899733121, 64627283986, 68582990784, 72780818484, 77235586822, 81963022620, 86979815309, 92303675844, 97953399235, 103948930895, 110311437058, 117063379497, 124228594829, 131832378662, 139901574895, 148464670491, 157551896043, 167195332496, 177429024407, 188289100133, 199813899374, 212044108527, 225022904322, 238796106249, 253412338321, 268923200725, 285383451995, 302851202324, 321388118716, 341059642687, 361935221296, 384088552321, 407597844432, 432546093294, 459021374572, 487117154867, 516932621682, 548573033590, 582150091830, 617782334651, 655595555791, 695723248569, 738307077168, 783497376747, 831453684183, 882345301285, 936351892486, 993664119121, 1054484312524, 1119027188325, 1187520604468, 1260206365627, 1337341076854, 1419197049486, 1506063262491, 1598246382662, 1696071847252, 1799885012878, 1910052374747, 2026962860500, 2151029203266, 2282689398739, 2422408251457, 2570679015712, 2728025136906, 2895002099486, 3072199387991, 3260242568131, 3459795495242, 3671562657914, 3896291665080, 4134775885316, 4387857247705, 4656429214122, 4941439933460, 5243895588908, 5564863950114, 5905478142772, 6266940648935, 6650527552175, 7057593042589, 7489574197539, 7947996055022, 8434476997558, 8950734465625, 9498591020797, 10079980779998, 10696956243580, 11351695541337, 12046510122031, 12783852913581, 13566326982715, 14396694724673, 15277887615381, 16213016560543, 17205382878181, 18258489953389, 19376055606456, 20562025218016, 21820585657560, 23156180064488, 24573523533875, 26077619762337, 27673778712750, 29367635360200, 31165169585327, 33072727285306, 35097042776985, 37245262571278, 39524970602741, 41944215003394, 44511536515321, 47235998642351, 50127219647252, 53195406507421, 56451390948928, 59906667686130, 63573435001862, 67464637811456, 71594013362620, 75976139732519, 80626487293267, 85561473327514, 90798519986944, 96356115798305, 102253880934088, 108512636478302, 115154477931865, 122202853217119, 129682645456833, 137620260819954, 146043721744222, 154982765864743, 164468950997792, 174535766550465, 185218751749486, 196555621106568, 208586397563259, 221353553785311, 234902162105402, 249280053643550, 264537987166964, 280729828285480, 297912739615178, 316147382581544, 335498131574595, 356033301212013, 377825387512598, 400951323831468, 425492752460545, 451536312853150, 479173947490266, 508503226468250, 539627691953919, 572657223723034, 607708427072674, 644905044476938, 684378392439283, 726267825083707, 770721226121430, 817895530929871, 867957280587026, 921083209817197, 977460870923688, 1037289295911185, 1100779699135317, 1168156222959992, 1239656729054927, 1315533638126921, 1396054821049394, 1481504544536185, 1572184474698156, 1668414742025481, 1770535071555377, 1878905982215138, 1993910059574563, 2115953306501003, 2245466576485316, 2382907094698829, 2528760072151156, 2683540418647385, 2847794560591953] 2 | -------------------------------------------------------------------------------- /examples/rfc2616.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This is a simple driver for the http-parser package. It is 3 | * intended to read one HTTP request after another from a file, 4 | * nothing more. 5 | * 6 | * For "feature parity" with the Haskell code in RFC2616.hs, we 7 | * allocate and populate a simple structure describing each request, 8 | * since that's the sort of thing that many real applications would 9 | * themselves do and the library doesn't do this for us. 10 | * 11 | * For the http-parser source, see 12 | * https://github.com/joyent/http-parser/blob/master/http_parser.c 13 | */ 14 | 15 | /* 16 | * Turn off this preprocessor symbol to have the callbacks do nothing 17 | * at all, which "improves performance" by about 50%. 18 | */ 19 | /*#define LOOK_BUSY*/ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "http_parser.h" 31 | 32 | struct http_string { 33 | size_t len; 34 | char value[0]; 35 | }; 36 | 37 | struct http_header { 38 | struct http_string *name; 39 | struct http_string *value; 40 | struct http_header *next; 41 | }; 42 | 43 | struct http_request { 44 | struct http_string *method; 45 | struct http_string *uri; 46 | struct http_header *headers, *last; 47 | }; 48 | 49 | struct data { 50 | size_t count; 51 | struct http_request req; 52 | }; 53 | 54 | static void *xmalloc(size_t size) 55 | { 56 | void *ptr; 57 | 58 | if ((ptr = malloc(size)) == NULL) { 59 | perror("malloc"); 60 | exit(1); 61 | } 62 | 63 | return ptr; 64 | } 65 | 66 | static struct http_string *xstrdup(const char *src, size_t len, size_t extra) 67 | { 68 | struct http_string *dst = xmalloc(sizeof(*dst) + len + extra); 69 | memcpy(dst->value, src, len); 70 | dst->len = len; 71 | return dst; 72 | } 73 | 74 | static void xstrcat(struct http_string **dst, const char *src, size_t len) 75 | { 76 | struct http_string *p; 77 | 78 | if (*dst == NULL) { 79 | *dst = xstrdup(src, len, 0); 80 | return; 81 | } 82 | 83 | p = xstrdup((*dst)->value, (*dst)->len, len); 84 | memcpy(p->value + (*dst)->len, src, len); 85 | p->len += len; 86 | free(*dst); 87 | *dst = p; 88 | } 89 | 90 | static int begin(http_parser *p) 91 | { 92 | struct data *data = p->data; 93 | 94 | data->count++; 95 | 96 | return 0; 97 | } 98 | 99 | static int url(http_parser *p, const char *at, size_t len) 100 | { 101 | #ifdef LOOK_BUSY 102 | struct data *data = p->data; 103 | 104 | xstrcat(&data->req.uri, at, len); 105 | #endif 106 | 107 | return 0; 108 | } 109 | 110 | static int header_field(http_parser *p, const char *at, size_t len) 111 | { 112 | #ifdef LOOK_BUSY 113 | struct data *data = p->data; 114 | 115 | if (data->req.last && data->req.last->value == NULL) { 116 | xstrcat(&data->req.last->name, at, len); 117 | } else { 118 | struct http_header *hdr = xmalloc(sizeof(*hdr)); 119 | 120 | hdr->name = xstrdup(at, len, 0); 121 | hdr->value = NULL; 122 | hdr->next = NULL; 123 | 124 | if (data->req.last != NULL) 125 | data->req.last->next = hdr; 126 | data->req.last = hdr; 127 | if (data->req.headers == NULL) 128 | data->req.headers = hdr; 129 | } 130 | #endif 131 | 132 | return 0; 133 | } 134 | 135 | static int header_value(http_parser *p, const char *at, size_t len) 136 | { 137 | #ifdef LOOK_BUSY 138 | struct data *data = p->data; 139 | 140 | xstrcat(&data->req.last->value, at, len); 141 | #endif 142 | 143 | return 0; 144 | } 145 | 146 | static int complete(http_parser *p) 147 | { 148 | #ifdef LOOK_BUSY 149 | struct data *data = p->data; 150 | struct http_header *hdr, *next; 151 | 152 | free(data->req.method); 153 | free(data->req.uri); 154 | 155 | for (hdr = data->req.headers; hdr != NULL; hdr = next) { 156 | next = hdr->next; 157 | free(hdr->name); 158 | free(hdr->value); 159 | free(hdr); 160 | hdr = next; 161 | } 162 | 163 | data->req.method = NULL; 164 | data->req.uri = NULL; 165 | data->req.headers = NULL; 166 | data->req.last = NULL; 167 | #endif 168 | 169 | /* Bludgeon http_parser into understanding that we really want to 170 | * keep parsing after a request that in principle ought to close 171 | * the "connection". */ 172 | if (!http_should_keep_alive(p)) { 173 | p->http_major = 1; 174 | p->http_minor = 1; 175 | p->flags &= ~6; 176 | } 177 | 178 | return 0; 179 | } 180 | 181 | static void parse(const char *path, int fd) 182 | { 183 | struct data data; 184 | http_parser_settings s; 185 | http_parser p; 186 | ssize_t nread; 187 | 188 | memset(&s, 0, sizeof(s)); 189 | s.on_message_begin = begin; 190 | s.on_url = url; 191 | s.on_header_field = header_field; 192 | s.on_header_value = header_value; 193 | s.on_message_complete = complete; 194 | 195 | p.data = &data; 196 | 197 | http_parser_init(&p, HTTP_REQUEST); 198 | 199 | data.count = 0; 200 | data.req.method = NULL; 201 | data.req.uri = NULL; 202 | data.req.headers = NULL; 203 | data.req.last = NULL; 204 | 205 | do { 206 | char buf[HTTP_MAX_HEADER_SIZE]; 207 | size_t np; 208 | 209 | nread = read(fd, buf, sizeof(buf)); 210 | 211 | np = http_parser_execute(&p, &s, buf, nread); 212 | if (np != nread) { 213 | fprintf(stderr, "%s: parse failed\n", path); 214 | break; 215 | } 216 | } while (nread > 0); 217 | 218 | printf("%ld\n", (unsigned long) data.count); 219 | } 220 | 221 | int main(int argc, char **argv) 222 | { 223 | int i; 224 | 225 | for (i = 1; i < argc; i++) { 226 | int fd; 227 | 228 | fd = open(argv[i], O_RDONLY); 229 | if (fd == -1) { 230 | perror(argv[i]); 231 | continue; 232 | } 233 | parse(argv[i], fd); 234 | close(fd); 235 | } 236 | 237 | return 0; 238 | } 239 | 240 | /* 241 | * Local Variables: 242 | * c-file-style: "stroustrup" 243 | * End: 244 | */ 245 | -------------------------------------------------------------------------------- /Data/Attoparsec/Zepto.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 702 3 | {-# LANGUAGE Trustworthy #-} -- Data.ByteString.Unsafe 4 | #endif 5 | {-# LANGUAGE BangPatterns #-} 6 | 7 | -- | 8 | -- Module : Data.Attoparsec.Zepto 9 | -- Copyright : Bryan O'Sullivan 2007-2015 10 | -- License : BSD3 11 | -- 12 | -- Maintainer : bos@serpentine.com 13 | -- Stability : experimental 14 | -- Portability : unknown 15 | -- 16 | -- A tiny, highly specialized combinator parser for 'B.ByteString' 17 | -- strings. 18 | -- 19 | -- While the main attoparsec module generally performs well, this 20 | -- module is particularly fast for simple non-recursive loops that 21 | -- should not normally result in failed parses. 22 | -- 23 | -- /Warning/: on more complex inputs involving recursion or failure, 24 | -- parsers based on this module may be as much as /ten times slower/ 25 | -- than regular attoparsec! You should /only/ use this module when you 26 | -- have benchmarks that prove that its use speeds your code up. 27 | module Data.Attoparsec.Zepto 28 | ( 29 | Parser 30 | , ZeptoT 31 | , parse 32 | , parseT 33 | , atEnd 34 | , string 35 | , take 36 | , takeWhile 37 | ) where 38 | 39 | import Control.Applicative 40 | import Control.Monad (MonadPlus(..), ap) 41 | import qualified Control.Monad.Fail as Fail 42 | import Control.Monad.IO.Class (MonadIO(..)) 43 | import Data.ByteString (ByteString) 44 | import Data.Functor.Identity (Identity(runIdentity)) 45 | import Data.Monoid as Mon (Monoid(..)) 46 | import Data.Semigroup (Semigroup(..)) 47 | import Data.Word (Word8) 48 | import Prelude hiding (take, takeWhile) 49 | import qualified Data.ByteString as B 50 | import qualified Data.ByteString.Unsafe as B 51 | 52 | newtype S = S { 53 | input :: ByteString 54 | } 55 | 56 | data Result a = Fail String 57 | | OK !a S 58 | 59 | -- | A simple parser. 60 | -- 61 | -- This monad is strict in its state, and the monadic bind operator 62 | -- ('>>=') evaluates each result to weak head normal form before 63 | -- passing it along. 64 | newtype ZeptoT m a = Parser { 65 | runParser :: S -> m (Result a) 66 | } 67 | 68 | type Parser a = ZeptoT Identity a 69 | 70 | instance Monad m => Functor (ZeptoT m) where 71 | fmap f m = Parser $ \s -> do 72 | result <- runParser m s 73 | case result of 74 | OK a s' -> return (OK (f a) s') 75 | Fail err -> return (Fail err) 76 | {-# INLINE fmap #-} 77 | 78 | instance MonadIO m => MonadIO (ZeptoT m) where 79 | liftIO act = Parser $ \s -> do 80 | result <- liftIO act 81 | return (OK result s) 82 | {-# INLINE liftIO #-} 83 | 84 | instance Monad m => Monad (ZeptoT m) where 85 | return = pure 86 | {-# INLINE return #-} 87 | 88 | m >>= k = Parser $ \s -> do 89 | result <- runParser m s 90 | case result of 91 | OK a s' -> runParser (k a) s' 92 | Fail err -> return (Fail err) 93 | {-# INLINE (>>=) #-} 94 | 95 | #if !(MIN_VERSION_base(4,13,0)) 96 | fail = Fail.fail 97 | {-# INLINE fail #-} 98 | #endif 99 | 100 | instance Monad m => Fail.MonadFail (ZeptoT m) where 101 | fail msg = Parser $ \_ -> return (Fail msg) 102 | {-# INLINE fail #-} 103 | 104 | instance Monad m => MonadPlus (ZeptoT m) where 105 | mzero = fail "mzero" 106 | {-# INLINE mzero #-} 107 | 108 | mplus a b = Parser $ \s -> do 109 | result <- runParser a s 110 | case result of 111 | ok@(OK _ _) -> return ok 112 | _ -> runParser b s 113 | {-# INLINE mplus #-} 114 | 115 | instance (Monad m) => Applicative (ZeptoT m) where 116 | pure a = Parser $ \s -> return (OK a s) 117 | {-# INLINE pure #-} 118 | (<*>) = ap 119 | {-# INLINE (<*>) #-} 120 | 121 | gets :: Monad m => (S -> a) -> ZeptoT m a 122 | gets f = Parser $ \s -> return (OK (f s) s) 123 | {-# INLINE gets #-} 124 | 125 | put :: Monad m => S -> ZeptoT m () 126 | put s = Parser $ \_ -> return (OK () s) 127 | {-# INLINE put #-} 128 | 129 | -- | Run a parser. 130 | parse :: Parser a -> ByteString -> Either String a 131 | parse p bs = case runIdentity (runParser p (S bs)) of 132 | (OK a _) -> Right a 133 | (Fail err) -> Left err 134 | {-# INLINE parse #-} 135 | 136 | -- | Run a parser on top of the given base monad. 137 | parseT :: Monad m => ZeptoT m a -> ByteString -> m (Either String a) 138 | parseT p bs = do 139 | result <- runParser p (S bs) 140 | case result of 141 | OK a _ -> return (Right a) 142 | Fail err -> return (Left err) 143 | {-# INLINE parseT #-} 144 | 145 | instance Monad m => Semigroup (ZeptoT m a) where 146 | (<>) = mplus 147 | {-# INLINE (<>) #-} 148 | 149 | instance Monad m => Mon.Monoid (ZeptoT m a) where 150 | mempty = fail "mempty" 151 | {-# INLINE mempty #-} 152 | mappend = (<>) 153 | {-# INLINE mappend #-} 154 | 155 | instance Monad m => Alternative (ZeptoT m) where 156 | empty = fail "empty" 157 | {-# INLINE empty #-} 158 | (<|>) = mplus 159 | {-# INLINE (<|>) #-} 160 | 161 | -- | Consume input while the predicate returns 'True'. 162 | takeWhile :: Monad m => (Word8 -> Bool) -> ZeptoT m ByteString 163 | takeWhile p = do 164 | (h,t) <- gets (B.span p . input) 165 | put (S t) 166 | return h 167 | {-# INLINE takeWhile #-} 168 | 169 | -- | Consume @n@ bytes of input. 170 | take :: Monad m => Int -> ZeptoT m ByteString 171 | take !n = do 172 | s <- gets input 173 | if B.length s >= n 174 | then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s) 175 | else fail "insufficient input" 176 | {-# INLINE take #-} 177 | 178 | -- | Match a string exactly. 179 | string :: Monad m => ByteString -> ZeptoT m () 180 | string s = do 181 | i <- gets input 182 | if s `B.isPrefixOf` i 183 | then put (S (B.unsafeDrop (B.length s) i)) >> return () 184 | else fail "string" 185 | {-# INLINE string #-} 186 | 187 | -- | Indicate whether the end of the input has been reached. 188 | atEnd :: Monad m => ZeptoT m Bool 189 | atEnd = do 190 | i <- gets input 191 | return $! B.null i 192 | {-# INLINE atEnd #-} 193 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter10.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"207858021","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 04:30:38 +0000","from_user":"pboudarga","id_str":"30120402839666689","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I'm at Rolla Sushi Grill (27737 Bouquet Canyon Road, #106, Btw Haskell Canyon and Rosedell Drive, Saugus) http://4sq.com/gqqdhs","id":30120402839666689,"from_user_id":207858021,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://foursquare.com" rel="nofollow">foursquare</a>"},{"from_user_id_str":"69988683","profile_image_url":"http://a0.twimg.com/profile_images/1211955817/avatar_7888_normal.gif","created_at":"Wed, 26 Jan 2011 04:25:23 +0000","from_user":"YNK33","id_str":"30119083059978240","metadata":{"result_type":"recent"},"to_user_id":null,"text":"hsndfile 0.5.0: Free and open source Haskell bindings for libsndfile http://bit.ly/gHaBWG Mac Os","id":30119083059978240,"from_user_id":69988683,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"81492","profile_image_url":"http://a1.twimg.com/profile_images/423894208/Picture_7_normal.jpg","created_at":"Wed, 26 Jan 2011 04:24:28 +0000","from_user":"satzz","id_str":"30118851488251904","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Emacs\u306e\u30e2\u30fc\u30c9\u8868\u793a\u304c\u4eca(Ruby Controller Outputz RoR Flymake REl hs)\u3068\u306a\u3063\u3066\u3066\u3088\u304f\u308f\u304b\u3089\u306a\u3044\u3093\u3060\u3051\u3069\u6700\u5f8c\u306eREl\u3068\u304bhs\u3063\u3066\u4f55\u3060\u308d\u3046\u2026haskell\u3068\u304b2\u5e74\u4ee5\u4e0a\u66f8\u3044\u3066\u306a\u3044\u3051\u3069\u2026","id":30118851488251904,"from_user_id":81492,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.hootsuite.com" rel="nofollow">HootSuite</a>"},{"from_user_id_str":"9518356","profile_image_url":"http://a2.twimg.com/profile_images/119165723/ocaml-icon_normal.png","created_at":"Wed, 26 Jan 2011 04:19:19 +0000","from_user":"planet_ocaml","id_str":"30117557788741632","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I so miss #haskell type classes in #ocaml - i want to do something like refinement. Also why does ocaml not have... http://bit.ly/geYRwt","id":30117557788741632,"from_user_id":9518356,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"218059","profile_image_url":"http://a1.twimg.com/profile_images/1053837723/twitter-icon9_normal.jpg","created_at":"Wed, 26 Jan 2011 04:16:32 +0000","from_user":"aprikip","id_str":"30116854940835840","metadata":{"result_type":"recent"},"to_user_id":null,"text":"yatex-mode\u3084haskell-mode\u306e\u3053\u3068\u3067\u3059\u306d\u3001\u308f\u304b\u308a\u307e\u3059\u3002","id":30116854940835840,"from_user_id":218059,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://sites.google.com/site/yorufukurou/" rel="nofollow">YoruFukurou</a>"},{"from_user_id_str":"216363","profile_image_url":"http://a1.twimg.com/profile_images/72454310/Tim-Avatar_normal.png","created_at":"Wed, 26 Jan 2011 04:15:30 +0000","from_user":"dysinger","id_str":"30116594684264448","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell in Hawaii tonight for me... #fun","id":30116594684264448,"from_user_id":216363,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.nambu.com/" rel="nofollow">Nambu</a>"},{"from_user_id_str":"1774820","profile_image_url":"http://a2.twimg.com/profile_images/61169291/dan_desert_thumb_normal.jpg","created_at":"Wed, 26 Jan 2011 04:13:36 +0000","from_user":"DanMil","id_str":"30116117682851840","metadata":{"result_type":"recent"},"to_user_id":1594784,"text":"@ojrac @chewedwire @tomheon Haskell isn't a language, it's a belief system. A seductive one...","id":30116117682851840,"from_user_id":1774820,"to_user":"ojrac","geo":null,"iso_language_code":"en","to_user_id_str":"1594784","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"659256","profile_image_url":"http://a0.twimg.com/profile_images/746976711/angular-final_normal.jpg","created_at":"Wed, 26 Jan 2011 04:11:06 +0000","from_user":"djspiewak","id_str":"30115488931520512","metadata":{"result_type":"recent"},"to_user_id":null,"text":"One of the very nice things about Haskell as opposed to SML is the reduced proliferation of identifiers (e.g. andb, orb, etc). #typeclasses","id":30115488931520512,"from_user_id":659256,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 04:06:12 +0000","from_user":"listwarenet","id_str":"30114255890026496","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-cafe/84752-re-haskell-cafe-gpl-license-of-h-matrix-and-prelude-numeric.html Re: Haskell-c","id":30114255890026496,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://1e10.org/cloud/" rel="nofollow">1e10</a>"},{"from_user_id_str":"1594784","profile_image_url":"http://a2.twimg.com/profile_images/378515773/square-profile_normal.jpg","created_at":"Wed, 26 Jan 2011 04:01:29 +0000","from_user":"ojrac","id_str":"30113067333324800","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @tomheon: @ojrac @chewedwire Don't worry, learning Haskell will not give you any clear idea what monad means.","id":30113067333324800,"from_user_id":1594784,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"}],"max_id":30120402839666689,"since_id":0,"refresh_url":"?since_id=30120402839666689&q=haskell","next_page":"?page=2&max_id=30120402839666689&rpp=10&q=haskell","results_per_page":10,"page":1,"completed_in":0.012714,"since_id_str":"0","max_id_str":"30120402839666689","query":"haskell"} -------------------------------------------------------------------------------- /internal/Data/Attoparsec/ByteString/Buffer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | -- | 3 | -- Module : Data.Attoparsec.ByteString.Buffer 4 | -- Copyright : Bryan O'Sullivan 2007-2015 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : bos@serpentine.com 8 | -- Stability : experimental 9 | -- Portability : GHC 10 | -- 11 | -- An "immutable" buffer that supports cheap appends. 12 | -- 13 | -- A Buffer is divided into an immutable read-only zone, followed by a 14 | -- mutable area that we've preallocated, but not yet written to. 15 | -- 16 | -- We overallocate at the end of a Buffer so that we can cheaply 17 | -- append. Since a user of an existing Buffer cannot see past the end 18 | -- of its immutable zone into the data that will change during an 19 | -- append, this is safe. 20 | -- 21 | -- Once we run out of space at the end of a Buffer, we do the usual 22 | -- doubling of the buffer size. 23 | -- 24 | -- The fact of having a mutable buffer really helps with performance, 25 | -- but it does have a consequence: if someone misuses the Partial API 26 | -- that attoparsec uses by calling the same continuation repeatedly 27 | -- (which never makes sense in practice), they could overwrite data. 28 | -- 29 | -- Since the API *looks* pure, it should *act* pure, too, so we use 30 | -- two generation counters (one mutable, one immutable) to track the 31 | -- number of appends to a mutable buffer. If the counters ever get out 32 | -- of sync, someone is appending twice to a mutable buffer, so we 33 | -- duplicate the entire buffer in order to preserve the immutability 34 | -- of its older self. 35 | -- 36 | -- While we could go a step further and gain protection against API 37 | -- abuse on a multicore system, by use of an atomic increment 38 | -- instruction to bump the mutable generation counter, that would be 39 | -- very expensive, and feels like it would also be in the realm of the 40 | -- ridiculous. Clients should never call a continuation more than 41 | -- once; we lack a linear type system that could enforce this; and 42 | -- there's only so far we should go to accommodate broken uses. 43 | 44 | module Data.Attoparsec.ByteString.Buffer 45 | ( 46 | Buffer 47 | , buffer 48 | , unbuffer 49 | , pappend 50 | , length 51 | , unsafeIndex 52 | , substring 53 | , unsafeDrop 54 | ) where 55 | 56 | import Control.Exception (assert) 57 | import Data.ByteString.Internal (ByteString(..), nullForeignPtr) 58 | import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) 59 | import Data.Attoparsec.Internal.Compat 60 | import Data.List (foldl1') 61 | import Data.Monoid as Mon (Monoid(..)) 62 | import Data.Semigroup (Semigroup(..)) 63 | import Data.Word (Word8) 64 | import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 65 | import Foreign.Marshal.Utils (copyBytes) 66 | import Foreign.Ptr (castPtr, plusPtr) 67 | import Foreign.Storable (peek, peekByteOff, poke, sizeOf) 68 | import GHC.ForeignPtr (mallocPlainForeignPtrBytes) 69 | import Prelude hiding (length) 70 | 71 | -- If _cap is zero, this buffer is empty. 72 | data Buffer = Buf { 73 | _fp :: {-# UNPACK #-} !(ForeignPtr Word8) 74 | , _off :: {-# UNPACK #-} !Int 75 | , _len :: {-# UNPACK #-} !Int 76 | , _cap :: {-# UNPACK #-} !Int 77 | , _gen :: {-# UNPACK #-} !Int 78 | } 79 | 80 | instance Show Buffer where 81 | showsPrec p = showsPrec p . unbuffer 82 | 83 | -- | The initial 'Buffer' has no mutable zone, so we can avoid all 84 | -- copies in the (hopefully) common case of no further input being fed 85 | -- to us. 86 | buffer :: ByteString -> Buffer 87 | buffer bs = withPS bs $ \fp off len -> Buf fp off len len 0 88 | 89 | unbuffer :: Buffer -> ByteString 90 | unbuffer (Buf fp off len _ _) = mkPS fp off len 91 | 92 | instance Semigroup Buffer where 93 | (Buf _ _ _ 0 _) <> b = b 94 | a <> (Buf _ _ _ 0 _) = a 95 | buf <> (Buf fp off len _ _) = append buf fp off len 96 | 97 | instance Monoid Buffer where 98 | mempty = Buf nullForeignPtr 0 0 0 0 99 | 100 | mappend = (<>) 101 | 102 | mconcat [] = Mon.mempty 103 | mconcat xs = foldl1' mappend xs 104 | 105 | pappend :: Buffer -> ByteString -> Buffer 106 | pappend (Buf _ _ _ 0 _) bs = buffer bs 107 | pappend buf bs = withPS bs $ \fp off len -> append buf fp off len 108 | 109 | append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer 110 | append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 = 111 | inlinePerformIO . withForeignPtr fp0 $ \ptr0 -> 112 | withForeignPtr fp1 $ \ptr1 -> do 113 | let genSize = sizeOf (0::Int) 114 | newlen = len0 + len1 115 | gen <- if gen0 == 0 116 | then return 0 117 | else peek (castPtr ptr0) 118 | if gen == gen0 && newlen <= cap0 119 | then do 120 | let newgen = gen + 1 121 | poke (castPtr ptr0) newgen 122 | copyBytes (ptr0 `plusPtr` (off0+len0)) 123 | (ptr1 `plusPtr` off1) 124 | (fromIntegral len1) 125 | return (Buf fp0 off0 newlen cap0 newgen) 126 | else do 127 | let newcap = newlen * 2 128 | fp <- mallocPlainForeignPtrBytes (newcap + genSize) 129 | withForeignPtr fp $ \ptr_ -> do 130 | let ptr = ptr_ `plusPtr` genSize 131 | newgen = 1 132 | poke (castPtr ptr_) newgen 133 | copyBytes ptr (ptr0 `plusPtr` off0) (fromIntegral len0) 134 | copyBytes (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) 135 | (fromIntegral len1) 136 | return (Buf fp genSize newlen newcap newgen) 137 | 138 | length :: Buffer -> Int 139 | length (Buf _ _ len _ _) = len 140 | {-# INLINE length #-} 141 | 142 | unsafeIndex :: Buffer -> Int -> Word8 143 | unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) . 144 | inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i) 145 | {-# INLINE unsafeIndex #-} 146 | 147 | substring :: Int -> Int -> Buffer -> ByteString 148 | substring s l (Buf fp off len _ _) = 149 | assert (s >= 0 && s <= len) . 150 | assert (l >= 0 && l <= len-s) $ 151 | mkPS fp (off+s) l 152 | {-# INLINE substring #-} 153 | 154 | unsafeDrop :: Int -> Buffer -> ByteString 155 | unsafeDrop s (Buf fp off len _ _) = 156 | assert (s >= 0 && s <= len) $ 157 | mkPS fp (off+s) (len-s) 158 | {-# INLINE unsafeDrop #-} 159 | -------------------------------------------------------------------------------- /benchmarks/warp-3.0.1.1/Network/Wai/Handler/Warp/RequestHeader.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE BangPatterns #-} 4 | 5 | module Network.Wai.Handler.Warp.RequestHeader ( 6 | parseHeaderLines 7 | , parseByteRanges 8 | ) where 9 | 10 | import Control.Exception (Exception, throwIO) 11 | import Control.Monad (when) 12 | import Data.Typeable (Typeable) 13 | import qualified Data.ByteString as S 14 | import qualified Data.ByteString.Char8 as B (unpack, readInteger) 15 | import Data.ByteString.Internal (ByteString(..), memchr) 16 | import qualified Data.CaseInsensitive as CI 17 | import Data.Word (Word8) 18 | import Foreign.ForeignPtr (withForeignPtr) 19 | import Foreign.Ptr (Ptr, plusPtr, minusPtr, nullPtr) 20 | import Foreign.Storable (peek) 21 | import qualified Network.HTTP.Types as H 22 | -- import Network.Wai.Handler.Warp.Types 23 | import qualified Network.HTTP.Types.Header as HH 24 | -- $setup 25 | -- >>> :set -XOverloadedStrings 26 | 27 | data InvalidRequest = NotEnoughLines [String] 28 | | BadFirstLine String 29 | | NonHttp 30 | | IncompleteHeaders 31 | | ConnectionClosedByPeer 32 | | OverLargeHeader 33 | deriving (Eq, Typeable, Show) 34 | 35 | instance Exception InvalidRequest 36 | 37 | ---------------------------------------------------------------- 38 | 39 | parseHeaderLines :: [ByteString] 40 | -> IO (H.Method 41 | ,ByteString -- Path 42 | ,ByteString -- Path, parsed 43 | ,ByteString -- Query 44 | ,H.HttpVersion 45 | ,H.RequestHeaders 46 | ) 47 | parseHeaderLines [] = throwIO $ NotEnoughLines [] 48 | parseHeaderLines (firstLine:otherLines) = do 49 | (method, path', query, httpversion) <- parseRequestLine firstLine 50 | let path = H.extractPath path' 51 | hdr = map parseHeader otherLines 52 | return (method, path', path, query, httpversion, hdr) 53 | 54 | ---------------------------------------------------------------- 55 | 56 | -- | 57 | -- 58 | -- >>> parseRequestLine "GET / HTTP/1.1" 59 | -- ("GET","/","",HTTP/1.1) 60 | -- >>> parseRequestLine "POST /cgi/search.cgi?key=foo HTTP/1.0" 61 | -- ("POST","/cgi/search.cgi","?key=foo",HTTP/1.0) 62 | -- >>> parseRequestLine "GET " 63 | -- *** Exception: Warp: Invalid first line of request: "GET " 64 | -- >>> parseRequestLine "GET /NotHTTP UNKNOWN/1.1" 65 | -- *** Exception: Warp: Request line specified a non-HTTP request 66 | parseRequestLine :: ByteString 67 | -> IO (H.Method 68 | ,ByteString -- Path 69 | ,ByteString -- Query 70 | ,H.HttpVersion) 71 | parseRequestLine requestLine@(PS fptr off len) = withForeignPtr fptr $ \ptr -> do 72 | when (len < 14) $ throwIO baderr 73 | let methodptr = ptr `plusPtr` off 74 | limptr = methodptr `plusPtr` len 75 | lim0 = fromIntegral len 76 | 77 | pathptr0 <- memchr methodptr 32 lim0 -- ' ' 78 | when (pathptr0 == nullPtr || (limptr `minusPtr` pathptr0) < 11) $ 79 | throwIO baderr 80 | let pathptr = pathptr0 `plusPtr` 1 81 | lim1 = fromIntegral (limptr `minusPtr` pathptr0) 82 | 83 | httpptr0 <- memchr pathptr 32 lim1 -- ' ' 84 | when (httpptr0 == nullPtr || (limptr `minusPtr` httpptr0) < 9) $ 85 | throwIO baderr 86 | let httpptr = httpptr0 `plusPtr` 1 87 | lim2 = fromIntegral (httpptr0 `minusPtr` pathptr) 88 | 89 | checkHTTP httpptr 90 | !hv <- httpVersion httpptr 91 | queryptr <- memchr pathptr 63 lim2 -- '?' 92 | 93 | let !method = bs ptr methodptr pathptr0 94 | !path 95 | | queryptr == nullPtr = bs ptr pathptr httpptr0 96 | | otherwise = bs ptr pathptr queryptr 97 | !query 98 | | queryptr == nullPtr = S.empty 99 | | otherwise = bs ptr queryptr httpptr0 100 | 101 | return (method,path,query,hv) 102 | where 103 | baderr = BadFirstLine $ B.unpack requestLine 104 | check :: Ptr Word8 -> Int -> Word8 -> IO () 105 | check p n w = do 106 | w0 <- peek $ p `plusPtr` n 107 | when (w0 /= w) $ throwIO NonHttp 108 | checkHTTP httpptr = do 109 | check httpptr 0 72 -- 'H' 110 | check httpptr 1 84 -- 'T' 111 | check httpptr 2 84 -- 'T' 112 | check httpptr 3 80 -- 'P' 113 | check httpptr 4 47 -- '/' 114 | check httpptr 6 46 -- '.' 115 | httpVersion httpptr = do 116 | major <- peek $ httpptr `plusPtr` 5 117 | minor <- peek $ httpptr `plusPtr` 7 118 | return $ if major == (49 :: Word8) && minor == (49 :: Word8) then 119 | H.http11 120 | else 121 | H.http10 122 | bs ptr p0 p1 = PS fptr o l 123 | where 124 | o = p0 `minusPtr` ptr 125 | l = p1 `minusPtr` p0 126 | 127 | ---------------------------------------------------------------- 128 | 129 | -- | 130 | -- 131 | -- >>> parseHeader "Content-Length:47" 132 | -- ("Content-Length","47") 133 | -- >>> parseHeader "Accept-Ranges: bytes" 134 | -- ("Accept-Ranges","bytes") 135 | -- >>> parseHeader "Host: example.com:8080" 136 | -- ("Host","example.com:8080") 137 | -- >>> parseHeader "NoSemiColon" 138 | -- ("NoSemiColon","") 139 | 140 | parseHeader :: ByteString -> H.Header 141 | parseHeader s = 142 | let (k, rest) = S.break (== 58) s -- ':' 143 | rest' = S.dropWhile (\c -> c == 32 || c == 9) $ S.drop 1 rest 144 | in (CI.mk k, rest') 145 | 146 | parseByteRanges :: S.ByteString -> Maybe HH.ByteRanges 147 | parseByteRanges bs1 = do 148 | bs2 <- stripPrefix "bytes=" bs1 149 | (r, bs3) <- range bs2 150 | ranges (r:) bs3 151 | where 152 | range bs2 = 153 | case stripPrefix "-" bs2 of 154 | Just bs3 -> do 155 | (i, bs4) <- B.readInteger bs3 156 | Just (HH.ByteRangeSuffix i, bs4) 157 | Nothing -> do 158 | (i, bs3) <- B.readInteger bs2 159 | bs4 <- stripPrefix "-" bs3 160 | case B.readInteger bs4 of 161 | Nothing -> Just (HH.ByteRangeFrom i, bs4) 162 | Just (j, bs5) -> Just (HH.ByteRangeFromTo i j, bs5) 163 | ranges front bs3 = 164 | case stripPrefix "," bs3 of 165 | Nothing -> Just (front []) 166 | Just bs4 -> do 167 | (r, bs5) <- range bs4 168 | ranges (front . (r:)) bs5 169 | 170 | stripPrefix x y 171 | | x `S.isPrefixOf` y = Just (S.drop (S.length x) y) 172 | | otherwise = Nothing 173 | -------------------------------------------------------------------------------- /attoparsec.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.0 2 | -- 2.0 needed for internal libraries 3 | name: attoparsec 4 | version: 0.14.4 5 | license: BSD3 6 | license-file: LICENSE 7 | category: Text, Parsing 8 | author: Bryan O'Sullivan 9 | maintainer: Ben Gamari 10 | stability: experimental 11 | synopsis: Fast combinator parsing for bytestrings and text 12 | homepage: https://github.com/haskell/attoparsec 13 | bug-reports: https://github.com/haskell/attoparsec/issues 14 | build-type: Simple 15 | description: 16 | A fast parser combinator library, aimed particularly at dealing 17 | efficiently with network protocols and complicated text/binary 18 | file formats. 19 | 20 | tested-with: 21 | GHC == 9.12.1 22 | GHC == 9.10.1 23 | GHC == 9.8.4 24 | GHC == 9.6.6 25 | GHC == 9.4.8 26 | GHC == 9.2.8 27 | GHC == 9.0.2 28 | GHC == 8.10.7 29 | GHC == 8.8.4 30 | GHC == 8.6.5 31 | GHC == 8.4.4 32 | GHC == 8.2.2 33 | GHC == 8.0.2 34 | 35 | extra-source-files: 36 | benchmarks/*.txt 37 | benchmarks/json-data/*.json 38 | benchmarks/Makefile 39 | benchmarks/med.txt.bz2 40 | examples/*.c 41 | examples/*.hs 42 | examples/Makefile 43 | 44 | extra-doc-files: 45 | README.md 46 | changelog.md 47 | 48 | Flag developer 49 | Description: Whether to build the library in development mode 50 | Default: False 51 | Manual: True 52 | 53 | -- We need to test and benchmark these modules, 54 | -- but do not want to expose them to end users 55 | library attoparsec-internal 56 | hs-source-dirs: internal 57 | build-depends: array, 58 | base >= 4.3 && < 5, 59 | bytestring <0.13, 60 | text >= 1.1.1.3 61 | if !impl(ghc >= 8.0) 62 | build-depends: semigroups >=0.16.1 && <0.21 63 | exposed-modules: Data.Attoparsec.ByteString.Buffer 64 | Data.Attoparsec.ByteString.FastSet 65 | Data.Attoparsec.Internal.Compat 66 | Data.Attoparsec.Internal.Fhthagn 67 | Data.Attoparsec.Text.Buffer 68 | Data.Attoparsec.Text.FastSet 69 | ghc-options: -O2 -Wall 70 | default-language: Haskell2010 71 | 72 | library 73 | build-depends: array, 74 | base >= 4.5 && < 5, 75 | bytestring <0.13, 76 | containers, 77 | deepseq, 78 | scientific >= 0.3.1 && < 0.4, 79 | transformers >= 0.2 && (< 0.4 || >= 0.4.1.0) && < 0.7, 80 | text >= 1.1.1.3, 81 | ghc-prim < 0.14, 82 | attoparsec-internal 83 | if impl(ghc < 7.4) 84 | build-depends: 85 | bytestring < 0.10.4.0 86 | 87 | if !impl(ghc >= 8.0) 88 | -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+ 89 | build-depends: fail == 4.9.*, 90 | semigroups >=0.16.1 && <0.21 91 | 92 | exposed-modules: Data.Attoparsec 93 | Data.Attoparsec.ByteString 94 | Data.Attoparsec.ByteString.Char8 95 | Data.Attoparsec.ByteString.Lazy 96 | Data.Attoparsec.Char8 97 | Data.Attoparsec.Combinator 98 | Data.Attoparsec.Internal 99 | Data.Attoparsec.Internal.Types 100 | Data.Attoparsec.Lazy 101 | Data.Attoparsec.Number 102 | Data.Attoparsec.Text 103 | Data.Attoparsec.Text.Lazy 104 | Data.Attoparsec.Types 105 | Data.Attoparsec.Zepto 106 | other-modules: Data.Attoparsec.ByteString.Internal 107 | Data.Attoparsec.Text.Internal 108 | ghc-options: -O2 -Wall 109 | 110 | default-language: Haskell2010 111 | default-extensions: 112 | TypeOperators 113 | 114 | if flag(developer) 115 | ghc-prof-options: -auto-all 116 | ghc-options: -Werror 117 | 118 | test-suite attoparsec-tests 119 | type: exitcode-stdio-1.0 120 | hs-source-dirs: tests 121 | main-is: QC.hs 122 | other-modules: QC.Buffer 123 | QC.ByteString 124 | QC.Combinator 125 | QC.Common 126 | QC.IPv6.Internal 127 | QC.IPv6.Types 128 | QC.Rechunked 129 | QC.Simple 130 | QC.Text 131 | QC.Text.FastSet 132 | QC.Text.Regressions 133 | 134 | ghc-options: 135 | -Wall -threaded -rtsopts 136 | 137 | if flag(developer) 138 | ghc-options: -Werror 139 | 140 | build-depends: 141 | array, 142 | attoparsec, 143 | attoparsec-internal, 144 | base, 145 | bytestring, 146 | deepseq >= 1.1, 147 | QuickCheck >= 2.13.2 && < 2.16, 148 | quickcheck-unicode, 149 | scientific, 150 | tasty >= 0.11, 151 | tasty-quickcheck >= 0.8, 152 | text, 153 | transformers, 154 | vector 155 | 156 | default-language: Haskell2010 157 | 158 | if !impl(ghc >= 8.0) 159 | -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+ 160 | build-depends: fail == 4.9.*, 161 | semigroups >=0.16.1 && <0.19 162 | 163 | benchmark attoparsec-benchmarks 164 | type: exitcode-stdio-1.0 165 | hs-source-dirs: benchmarks benchmarks/warp-3.0.1.1 166 | ghc-options: -O2 -Wall -rtsopts 167 | main-is: Benchmarks.hs 168 | other-modules: 169 | Aeson 170 | Common 171 | Genome 172 | HeadersByteString 173 | HeadersByteString.Atto 174 | HeadersText 175 | Links 176 | Network.Wai.Handler.Warp.ReadInt 177 | Network.Wai.Handler.Warp.RequestHeader 178 | Numbers 179 | Sets 180 | TextFastSet 181 | Warp 182 | ghc-options: -O2 -Wall 183 | 184 | if flag(developer) 185 | ghc-options: -Werror 186 | 187 | build-depends: 188 | array, 189 | attoparsec, 190 | attoparsec-internal, 191 | base == 4.*, 192 | bytestring >= 0.10.4.0, 193 | case-insensitive, 194 | containers, 195 | deepseq >= 1.1, 196 | directory, 197 | filepath, 198 | ghc-prim, 199 | http-types, 200 | parsec >= 3.1.2, 201 | scientific, 202 | tasty-bench >= 0.3, 203 | text >= 1.1.1.0, 204 | transformers, 205 | unordered-containers, 206 | vector 207 | 208 | default-language: Haskell2010 209 | 210 | if !impl(ghc >= 8.0) 211 | -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+ 212 | build-depends: fail == 4.9.*, 213 | semigroups >=0.16.1 && <0.19 214 | 215 | source-repository head 216 | type: git 217 | location: https://github.com/haskell/attoparsec.git 218 | -------------------------------------------------------------------------------- /Data/Attoparsec/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} 2 | -- | 3 | -- Module : Data.Attoparsec.Internal 4 | -- Copyright : Bryan O'Sullivan 2007-2015 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : bos@serpentine.com 8 | -- Stability : experimental 9 | -- Portability : unknown 10 | -- 11 | -- Simple, efficient parser combinators, loosely based on the Parsec 12 | -- library. 13 | 14 | module Data.Attoparsec.Internal 15 | ( compareResults 16 | , prompt 17 | , demandInput 18 | , demandInput_ 19 | , wantInput 20 | , endOfInput 21 | , atEnd 22 | , satisfyElem 23 | , concatReverse 24 | ) where 25 | 26 | #if !MIN_VERSION_base(4,8,0) 27 | import Control.Applicative ((<$>)) 28 | import Data.Monoid (Monoid, mconcat) 29 | #endif 30 | import Data.Attoparsec.Internal.Types 31 | import Data.ByteString (ByteString) 32 | import Data.Text (Text) 33 | import Prelude hiding (succ) 34 | 35 | -- | Compare two 'IResult' values for equality. 36 | -- 37 | -- If both 'IResult's are 'Partial', the result will be 'Nothing', as 38 | -- they are incomplete and hence their equality cannot be known. 39 | -- (This is why there is no 'Eq' instance for 'IResult'.) 40 | compareResults :: (Eq i, Eq r) => IResult i r -> IResult i r -> Maybe Bool 41 | compareResults (Fail t0 ctxs0 msg0) (Fail t1 ctxs1 msg1) = 42 | Just (t0 == t1 && ctxs0 == ctxs1 && msg0 == msg1) 43 | compareResults (Done t0 r0) (Done t1 r1) = 44 | Just (t0 == t1 && r0 == r1) 45 | compareResults (Partial _) (Partial _) = Nothing 46 | compareResults _ _ = Just False 47 | 48 | -- | Ask for input. If we receive any, pass the augmented input to a 49 | -- success continuation, otherwise to a failure continuation. 50 | prompt :: Chunk t 51 | => State t -> Pos -> More 52 | -> (State t -> Pos -> More -> IResult t r) 53 | -> (State t -> Pos -> More -> IResult t r) 54 | -> IResult t r 55 | prompt t pos _more lose succ = Partial $ \s -> 56 | if nullChunk s 57 | then lose t pos Complete 58 | else succ (pappendChunk t s) pos Incomplete 59 | {-# SPECIALIZE prompt :: State ByteString -> Pos -> More 60 | -> (State ByteString -> Pos -> More 61 | -> IResult ByteString r) 62 | -> (State ByteString -> Pos -> More 63 | -> IResult ByteString r) 64 | -> IResult ByteString r #-} 65 | {-# SPECIALIZE prompt :: State Text -> Pos -> More 66 | -> (State Text -> Pos -> More -> IResult Text r) 67 | -> (State Text -> Pos -> More -> IResult Text r) 68 | -> IResult Text r #-} 69 | 70 | -- | Immediately demand more input via a 'Partial' continuation 71 | -- result. 72 | demandInput :: Chunk t => Parser t () 73 | demandInput = Parser $ \t pos more lose succ -> 74 | case more of 75 | Complete -> lose t pos more [] "not enough input" 76 | _ -> let lose' _ pos' more' = lose t pos' more' [] "not enough input" 77 | succ' t' pos' more' = succ t' pos' more' () 78 | in prompt t pos more lose' succ' 79 | {-# SPECIALIZE demandInput :: Parser ByteString () #-} 80 | {-# SPECIALIZE demandInput :: Parser Text () #-} 81 | 82 | -- | Immediately demand more input via a 'Partial' continuation 83 | -- result. Return the new input. 84 | demandInput_ :: Chunk t => Parser t t 85 | demandInput_ = Parser $ \t pos more lose succ -> 86 | case more of 87 | Complete -> lose t pos more [] "not enough input" 88 | _ -> Partial $ \s -> 89 | if nullChunk s 90 | then lose t pos Complete [] "not enough input" 91 | else succ (pappendChunk t s) pos more s 92 | {-# SPECIALIZE demandInput_ :: Parser ByteString ByteString #-} 93 | {-# SPECIALIZE demandInput_ :: Parser Text Text #-} 94 | 95 | -- | This parser always succeeds. It returns 'True' if any input is 96 | -- available either immediately or on demand, and 'False' if the end 97 | -- of all input has been reached. 98 | wantInput :: forall t . Chunk t => Parser t Bool 99 | wantInput = Parser $ \t pos more _lose succ -> 100 | case () of 101 | _ | pos < atBufferEnd (undefined :: t) t -> succ t pos more True 102 | | more == Complete -> succ t pos more False 103 | | otherwise -> let lose' t' pos' more' = succ t' pos' more' False 104 | succ' t' pos' more' = succ t' pos' more' True 105 | in prompt t pos more lose' succ' 106 | {-# INLINE wantInput #-} 107 | 108 | -- | Match only if all input has been consumed. 109 | endOfInput :: forall t . Chunk t => Parser t () 110 | endOfInput = Parser $ \t pos more lose succ -> 111 | case () of 112 | _| pos < atBufferEnd (undefined :: t) t -> lose t pos more [] "endOfInput" 113 | | more == Complete -> succ t pos more () 114 | | otherwise -> 115 | let lose' t' pos' more' _ctx _msg = succ t' pos' more' () 116 | succ' t' pos' more' _a = lose t' pos' more' [] "endOfInput" 117 | in runParser demandInput t pos more lose' succ' 118 | {-# SPECIALIZE endOfInput :: Parser ByteString () #-} 119 | {-# SPECIALIZE endOfInput :: Parser Text () #-} 120 | 121 | -- | Return an indication of whether the end of input has been 122 | -- reached. 123 | atEnd :: Chunk t => Parser t Bool 124 | atEnd = not <$> wantInput 125 | {-# INLINE atEnd #-} 126 | 127 | satisfySuspended :: forall t r . Chunk t 128 | => (ChunkElem t -> Bool) 129 | -> State t -> Pos -> More 130 | -> Failure t (State t) r 131 | -> Success t (State t) (ChunkElem t) r 132 | -> IResult t r 133 | satisfySuspended p t pos more lose succ = 134 | runParser (demandInput >> go) t pos more lose succ 135 | where go = Parser $ \t' pos' more' lose' succ' -> 136 | case bufferElemAt (undefined :: t) pos' t' of 137 | Just (e, l) | p e -> succ' t' (pos' + Pos l) more' e 138 | | otherwise -> lose' t' pos' more' [] "satisfyElem" 139 | Nothing -> runParser (demandInput >> go) t' pos' more' lose' succ' 140 | {-# SPECIALIZE satisfySuspended :: (ChunkElem ByteString -> Bool) 141 | -> State ByteString -> Pos -> More 142 | -> Failure ByteString (State ByteString) r 143 | -> Success ByteString (State ByteString) 144 | (ChunkElem ByteString) r 145 | -> IResult ByteString r #-} 146 | {-# SPECIALIZE satisfySuspended :: (ChunkElem Text -> Bool) 147 | -> State Text -> Pos -> More 148 | -> Failure Text (State Text) r 149 | -> Success Text (State Text) 150 | (ChunkElem Text) r 151 | -> IResult Text r #-} 152 | 153 | -- | The parser @satisfyElem p@ succeeds for any chunk element for which the 154 | -- predicate @p@ returns 'True'. Returns the element that is 155 | -- actually parsed. 156 | satisfyElem :: forall t . Chunk t 157 | => (ChunkElem t -> Bool) -> Parser t (ChunkElem t) 158 | satisfyElem p = Parser $ \t pos more lose succ -> 159 | case bufferElemAt (undefined :: t) pos t of 160 | Just (e, l) | p e -> succ t (pos + Pos l) more e 161 | | otherwise -> lose t pos more [] "satisfyElem" 162 | Nothing -> satisfySuspended p t pos more lose succ 163 | {-# INLINE satisfyElem #-} 164 | 165 | -- | Concatenate a monoid after reversing its elements. Used to 166 | -- glue together a series of textual chunks that have been accumulated 167 | -- \"backwards\". 168 | concatReverse :: Monoid m => [m] -> m 169 | concatReverse [x] = x 170 | concatReverse xs = mconcat (reverse xs) 171 | {-# INLINE concatReverse #-} 172 | -------------------------------------------------------------------------------- /tests/QC/Text.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-} 2 | {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} 3 | module QC.Text (tests) where 4 | 5 | #if !MIN_VERSION_base(4,8,0) 6 | import Control.Applicative ((<*>), (<$>)) 7 | #endif 8 | import Data.Int (Int64) 9 | import Data.Word (Word8) 10 | import Prelude hiding (take, takeWhile) 11 | import QC.Common (liftOp, parseT) 12 | import qualified QC.Text.FastSet as FastSet 13 | import qualified QC.Text.Regressions as Regressions 14 | import Test.Tasty (TestTree, testGroup) 15 | import Test.Tasty.QuickCheck (testProperty) 16 | import Test.QuickCheck 17 | import qualified Data.Attoparsec.Text as P 18 | import qualified Data.Attoparsec.Text.Lazy as PL 19 | import qualified Data.Attoparsec.Text.FastSet as S 20 | import qualified Data.ByteString as BS 21 | import qualified Data.Char as Char 22 | import qualified Data.Text as T 23 | import qualified Data.Text.Encoding as TE 24 | import qualified Data.Text.Lazy as L 25 | 26 | -- Basic byte-level combinators. 27 | 28 | satisfy :: Char -> L.Text -> Property 29 | satisfy w s = parseT (P.satisfy (<=w)) (L.cons w s) === Just w 30 | 31 | satisfyWith :: Char -> L.Text -> Property 32 | satisfyWith c s = parseT (P.satisfyWith id (<=c)) (L.cons c s) === Just c 33 | 34 | char :: Char -> L.Text -> Property 35 | char w s = parseT (P.char w) (L.cons w s) === Just w 36 | 37 | skip :: Char -> L.Text -> Property 38 | skip w s = 39 | case (parseT (P.skip ( maybe (property True) (expectFailure . it) mcs 41 | (Just _, mcs) -> maybe (property False) it mcs 42 | where it cs = liftOp "<" (<) (fst cs) w 43 | 44 | anyChar :: L.Text -> Property 45 | anyChar s 46 | | L.null s = p === Nothing 47 | | otherwise = p === Just (L.head s) 48 | where p = parseT P.anyChar s 49 | 50 | notChar :: Char -> NonEmptyList Char -> Property 51 | notChar w (NonEmpty s) = parseT (P.notChar w) bs === if v == w 52 | then Nothing 53 | else Just v 54 | where v = L.head bs 55 | bs = L.pack s 56 | 57 | peekChar :: L.Text -> Property 58 | peekChar s 59 | | L.null s = p === Just (Nothing, s) 60 | | otherwise = p === Just (Just (L.head s), s) 61 | where p = parseT ((,) <$> P.peekChar <*> P.takeLazyText) s 62 | 63 | peekChar' :: L.Text -> Property 64 | peekChar' s = parseT P.peekChar' s === (fst <$> L.uncons s) 65 | 66 | string :: L.Text -> L.Text -> Property 67 | string s t = parseT (P.string s') (s `L.append` t) === Just s' 68 | where s' = toStrict s 69 | 70 | strings :: L.Text -> L.Text -> L.Text -> Property 71 | strings s t u = 72 | parseT (P.string (toStrict s) >> P.string t') (L.concat [s,t,u]) 73 | === Just t' 74 | where t' = toStrict t 75 | 76 | -- | Note: "simple, and efficient" works for well formed input... 77 | -- i.e. e.g. Latin1 texts 78 | stringCI :: [Word8] -> Property 79 | stringCI ws = P.parseOnly (P.stringCI fs) s === Right s 80 | where fs = T.toCaseFold s 81 | s = TE.decodeLatin1 (BS.pack ws) 82 | 83 | asciiCI :: T.Text -> Gen Bool 84 | asciiCI x = 85 | (\s i -> P.parseOnly (P.asciiCI s) i == Right i) 86 | <$> maybeModifyCase x 87 | <*> maybeModifyCase x 88 | where 89 | maybeModifyCase s = elements [s, toLower s, toUpper s] 90 | toLower = T.map (\c -> if c < Char.chr 127 then Char.toLower c else c) 91 | toUpper = T.map (\c -> if c < Char.chr 127 then Char.toUpper c else c) 92 | 93 | toStrict :: L.Text -> T.Text 94 | toStrict = T.concat . L.toChunks 95 | 96 | skipWhile :: Char -> L.Text -> Property 97 | skipWhile w s = 98 | let t = L.dropWhile (<=w) s 99 | in case PL.parse (P.skipWhile (<=w)) s of 100 | PL.Done t' () -> t === t' 101 | _ -> property False 102 | 103 | take :: Int -> L.Text -> Property 104 | take n s = maybe (liftOp "<" (<) (L.length s) (fromIntegral n)) 105 | (=== T.take n (toStrict s)) $ 106 | parseT (P.take n) s 107 | 108 | takeText :: L.Text -> Property 109 | takeText s = maybe (property False) (=== toStrict s) . parseT P.takeText $ s 110 | 111 | takeLazyText :: L.Text -> Property 112 | takeLazyText s = maybe (property False) (=== s) . parseT P.takeLazyText $ s 113 | 114 | takeCount :: Positive Int -> L.Text -> Property 115 | takeCount (Positive k) s = 116 | case parseT (P.take k) s of 117 | Nothing -> liftOp ">" (>) (fromIntegral k) (L.length s) 118 | Just _s -> liftOp "<=" (<=) (fromIntegral k) (L.length s) 119 | 120 | takeWhile :: Char -> L.Text -> Property 121 | takeWhile w s = 122 | let (h,t) = L.span (==w) s 123 | in case PL.parse (P.takeWhile (==w)) s of 124 | PL.Done t' h' -> t === t' .&&. toStrict h === h' 125 | _ -> property False 126 | 127 | takeWhile1 :: Char -> L.Text -> Property 128 | takeWhile1 w s = 129 | let s' = L.cons w s 130 | (h,t) = L.span (<=w) s' 131 | in case PL.parse (P.takeWhile1 (<=w)) s' of 132 | PL.Done t' h' -> t === t' .&&. toStrict h === h' 133 | _ -> property False 134 | 135 | takeTill :: Char -> L.Text -> Property 136 | takeTill w s = 137 | let (h,t) = L.break (==w) s 138 | in case PL.parse (P.takeTill (==w)) s of 139 | PL.Done t' h' -> t === t' .&&. toStrict h === h' 140 | _ -> property False 141 | 142 | takeWhile1_empty :: Property 143 | takeWhile1_empty = parseT (P.takeWhile1 undefined) L.empty === Nothing 144 | 145 | endOfInput :: L.Text -> Property 146 | endOfInput s = parseT P.endOfInput s === if L.null s 147 | then Just () 148 | else Nothing 149 | 150 | endOfLine :: L.Text -> Property 151 | endOfLine s = 152 | case (parseT P.endOfLine s, L.uncons s) of 153 | (Nothing, mcs) -> maybe (property True) (expectFailure . eol) mcs 154 | (Just _, mcs) -> maybe (property False) eol mcs 155 | where eol (c,s') = c === '\n' .||. 156 | (c, fst <$> L.uncons s') === ('\r', Just '\n') 157 | 158 | scan :: L.Text -> Positive Int64 -> Property 159 | -- for some reason, if counterexample is removed, this test fails? 160 | scan s (Positive k) = counterexample (show s) 161 | $ parseT p s === Just (toStrict $ L.take k s) 162 | where p = P.scan k $ \ n _ -> 163 | if n > 0 then let !n' = n - 1 in Just n' else Nothing 164 | 165 | members :: String -> Property 166 | members s = property $ all (`S.member` set) s 167 | where set = S.fromList s 168 | 169 | nonmembers :: String -> String -> Property 170 | nonmembers s s' = property . not . any (`S.member` set) $ filter (not . (`elem` s)) s' 171 | where set = S.fromList s 172 | 173 | tests :: [TestTree] 174 | tests = [ 175 | testProperty "anyChar" anyChar 176 | , testProperty "asciiCI" asciiCI 177 | , testProperty "char" char 178 | , testProperty "endOfInput" endOfInput 179 | , testProperty "endOfLine" endOfLine 180 | , testProperty "notChar" notChar 181 | , testProperty "peekChar" peekChar 182 | , testProperty "peekChar'" peekChar' 183 | , testProperty "satisfy" satisfy 184 | , testProperty "satisfyWith" satisfyWith 185 | , testProperty "scan" scan 186 | , testProperty "skip" skip 187 | , testProperty "skipWhile" skipWhile 188 | , testProperty "string" string 189 | , testProperty "strings" strings 190 | , testProperty "stringCI" stringCI 191 | , testProperty "take" take 192 | , testProperty "takeText" takeText 193 | , testProperty "takeCount" takeCount 194 | , testProperty "takeLazyText" takeLazyText 195 | , testProperty "takeTill" takeTill 196 | , testProperty "takeWhile" takeWhile 197 | , testProperty "takeWhile1" takeWhile1 198 | , testProperty "takeWhile1_empty" takeWhile1_empty 199 | , testProperty "members" members 200 | , testProperty "nonmembers" nonmembers 201 | , testGroup "FastSet" FastSet.tests 202 | , testGroup "Regressions" Regressions.tests 203 | ] 204 | -------------------------------------------------------------------------------- /internal/Data/Attoparsec/Text/Buffer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards, 2 | UnboxedTuples #-} 3 | 4 | -- | 5 | -- Module : Data.Attoparsec.Text.Buffer 6 | -- Copyright : Bryan O'Sullivan 2007-2015 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : bos@serpentine.com 10 | -- Stability : experimental 11 | -- Portability : GHC 12 | -- 13 | -- An immutable buffer that supports cheap appends. 14 | 15 | -- A Buffer is divided into an immutable read-only zone, followed by a 16 | -- mutable area that we've preallocated, but not yet written to. 17 | -- 18 | -- We overallocate at the end of a Buffer so that we can cheaply 19 | -- append. Since a user of an existing Buffer cannot see past the end 20 | -- of its immutable zone into the data that will change during an 21 | -- append, this is safe. 22 | -- 23 | -- Once we run out of space at the end of a Buffer, we do the usual 24 | -- doubling of the buffer size. 25 | 26 | module Data.Attoparsec.Text.Buffer 27 | ( 28 | Buffer 29 | , buffer 30 | , unbuffer 31 | , unbufferAt 32 | , length 33 | , pappend 34 | , iter 35 | , iter_ 36 | , substring 37 | , lengthCodeUnits 38 | , dropCodeUnits 39 | ) where 40 | 41 | import Control.Exception (assert) 42 | import Data.List (foldl1') 43 | import Data.Monoid as Mon (Monoid(..)) 44 | import Data.Semigroup (Semigroup(..)) 45 | import Data.Text () 46 | import Data.Text.Internal (Text(..)) 47 | #if MIN_VERSION_text(2,0,0) 48 | import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader) 49 | import Data.Text.Unsafe (iterArray, lengthWord8) 50 | #else 51 | import Data.Bits (shiftR) 52 | import Data.Text.Internal.Encoding.Utf16 (chr2) 53 | import Data.Text.Internal.Unsafe.Char (unsafeChr) 54 | import Data.Text.Unsafe (lengthWord16) 55 | #endif 56 | import Data.Text.Unsafe (Iter(..)) 57 | import Foreign.Storable (sizeOf) 58 | import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#) 59 | import GHC.ST (ST(..), runST) 60 | import Prelude hiding (length) 61 | import qualified Data.Text.Array as A 62 | 63 | -- If _cap is zero, this buffer is empty. 64 | data Buffer = Buf { 65 | _arr :: {-# UNPACK #-} !A.Array 66 | , _off :: {-# UNPACK #-} !Int 67 | , _len :: {-# UNPACK #-} !Int 68 | , _cap :: {-# UNPACK #-} !Int 69 | , _gen :: {-# UNPACK #-} !Int 70 | } 71 | 72 | instance Show Buffer where 73 | showsPrec p = showsPrec p . unbuffer 74 | 75 | -- | The initial 'Buffer' has no mutable zone, so we can avoid all 76 | -- copies in the (hopefully) common case of no further input being fed 77 | -- to us. 78 | buffer :: Text -> Buffer 79 | buffer (Text arr off len) = Buf arr off len len 0 80 | 81 | unbuffer :: Buffer -> Text 82 | unbuffer (Buf arr off len _ _) = Text arr off len 83 | 84 | unbufferAt :: Int -> Buffer -> Text 85 | unbufferAt s (Buf arr off len _ _) = 86 | assert (s >= 0 && s <= len) $ 87 | Text arr (off+s) (len-s) 88 | 89 | instance Semigroup Buffer where 90 | (Buf _ _ _ 0 _) <> b = b 91 | a <> (Buf _ _ _ 0 _) = a 92 | buf <> (Buf arr off len _ _) = append buf arr off len 93 | {-# INLINE (<>) #-} 94 | 95 | instance Monoid Buffer where 96 | mempty = Buf A.empty 0 0 0 0 97 | {-# INLINE mempty #-} 98 | 99 | mappend = (<>) 100 | 101 | mconcat [] = Mon.mempty 102 | mconcat xs = foldl1' (<>) xs 103 | 104 | pappend :: Buffer -> Text -> Buffer 105 | pappend (Buf _ _ _ 0 _) t = buffer t 106 | pappend buf (Text arr off len) = append buf arr off len 107 | 108 | append :: Buffer -> A.Array -> Int -> Int -> Buffer 109 | append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do 110 | #if MIN_VERSION_text(2,0,0) 111 | let woff = sizeOf (0::Int) 112 | #else 113 | let woff = sizeOf (0::Int) `shiftR` 1 114 | #endif 115 | newlen = len0 + len1 116 | !gen = if gen0 == 0 then 0 else readGen arr0 117 | if gen == gen0 && newlen <= cap0 118 | then do 119 | let newgen = gen + 1 120 | marr <- unsafeThaw arr0 121 | writeGen marr newgen 122 | #if MIN_VERSION_text(2,0,0) 123 | A.copyI len1 marr (off0+len0) arr1 off1 124 | #else 125 | A.copyI marr (off0+len0) arr1 off1 (off0+newlen) 126 | #endif 127 | arr2 <- A.unsafeFreeze marr 128 | return (Buf arr2 off0 newlen cap0 newgen) 129 | else do 130 | let newcap = newlen * 2 131 | newgen = 1 132 | marr <- A.new (newcap + woff) 133 | writeGen marr newgen 134 | #if MIN_VERSION_text(2,0,0) 135 | A.copyI len0 marr woff arr0 off0 136 | A.copyI len1 marr (woff+len0) arr1 off1 137 | #else 138 | A.copyI marr woff arr0 off0 (woff+len0) 139 | A.copyI marr (woff+len0) arr1 off1 (woff+newlen) 140 | #endif 141 | arr2 <- A.unsafeFreeze marr 142 | return (Buf arr2 woff newlen newcap newgen) 143 | 144 | length :: Buffer -> Int 145 | length (Buf _ _ len _ _) = len 146 | {-# INLINE length #-} 147 | 148 | substring :: Int -> Int -> Buffer -> Text 149 | substring s l (Buf arr off len _ _) = 150 | assert (s >= 0 && s <= len) . 151 | assert (l >= 0 && l <= len-s) $ 152 | Text arr (off+s) l 153 | {-# INLINE substring #-} 154 | 155 | #if MIN_VERSION_text(2,0,0) 156 | 157 | lengthCodeUnits :: Text -> Int 158 | lengthCodeUnits = lengthWord8 159 | 160 | dropCodeUnits :: Int -> Buffer -> Text 161 | dropCodeUnits s (Buf arr off len _ _) = 162 | assert (s >= 0 && s <= len) $ 163 | Text arr (off+s) (len-s) 164 | {-# INLINE dropCodeUnits #-} 165 | 166 | -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8 167 | -- array, returning the current character and the delta to add to give 168 | -- the next offset to iterate at. 169 | iter :: Buffer -> Int -> Iter 170 | iter (Buf arr off _ _ _) i = iterArray arr (off + i) 171 | {-# INLINE iter #-} 172 | 173 | -- | /O(1)/ Iterate one step through a UTF-8 array, returning the 174 | -- delta to add to give the next offset to iterate at. 175 | iter_ :: Buffer -> Int -> Int 176 | iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A.unsafeIndex arr (off+i) 177 | {-# INLINE iter_ #-} 178 | 179 | unsafeThaw :: A.Array -> ST s (A.MArray s) 180 | unsafeThaw (A.ByteArray a) = ST $ \s# -> 181 | (# s#, A.MutableByteArray (unsafeCoerce# a) #) 182 | 183 | readGen :: A.Array -> Int 184 | readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r# 185 | 186 | writeGen :: A.MArray s -> Int -> ST s () 187 | writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# -> 188 | case writeIntArray# a 0# gen# s0# of 189 | s1# -> (# s1#, () #) 190 | 191 | #else 192 | 193 | lengthCodeUnits :: Text -> Int 194 | lengthCodeUnits = lengthWord16 195 | 196 | dropCodeUnits :: Int -> Buffer -> Text 197 | dropCodeUnits s (Buf arr off len _ _) = 198 | assert (s >= 0 && s <= len) $ 199 | Text arr (off+s) (len-s) 200 | {-# INLINE dropCodeUnits #-} 201 | 202 | -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16 203 | -- array, returning the current character and the delta to add to give 204 | -- the next offset to iterate at. 205 | iter :: Buffer -> Int -> Iter 206 | iter (Buf arr off _ _ _) i 207 | | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1 208 | | otherwise = Iter (chr2 m n) 2 209 | where m = A.unsafeIndex arr j 210 | n = A.unsafeIndex arr k 211 | j = off + i 212 | k = j + 1 213 | {-# INLINE iter #-} 214 | 215 | -- | /O(1)/ Iterate one step through a UTF-16 array, returning the 216 | -- delta to add to give the next offset to iterate at. 217 | iter_ :: Buffer -> Int -> Int 218 | iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1 219 | | otherwise = 2 220 | where m = A.unsafeIndex arr (off+i) 221 | {-# INLINE iter_ #-} 222 | 223 | unsafeThaw :: A.Array -> ST s (A.MArray s) 224 | unsafeThaw A.Array{..} = ST $ \s# -> 225 | (# s#, A.MArray (unsafeCoerce# aBA) #) 226 | 227 | readGen :: A.Array -> Int 228 | readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r# 229 | 230 | writeGen :: A.MArray s -> Int -> ST s () 231 | writeGen a (I# gen#) = ST $ \s0# -> 232 | case writeIntArray# (A.maBA a) 0# gen# s0# of 233 | s1# -> (# s1#, () #) 234 | 235 | #endif 236 | -------------------------------------------------------------------------------- /Data/Attoparsec/ByteString.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 702 3 | {-# LANGUAGE Trustworthy #-} 4 | #endif 5 | -- | 6 | -- Module : Data.Attoparsec.ByteString 7 | -- Copyright : Bryan O'Sullivan 2007-2015 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : bos@serpentine.com 11 | -- Stability : experimental 12 | -- Portability : unknown 13 | -- 14 | -- Simple, efficient combinator parsing for 'B.ByteString' strings, 15 | -- loosely based on the Parsec library. 16 | 17 | module Data.Attoparsec.ByteString 18 | ( 19 | -- * Differences from Parsec 20 | -- $parsec 21 | 22 | -- * Incremental input 23 | -- $incremental 24 | 25 | -- * Performance considerations 26 | -- $performance 27 | 28 | -- * Parser types 29 | I.Parser 30 | , Result 31 | , T.IResult(..) 32 | , I.compareResults 33 | 34 | -- * Running parsers 35 | , parse 36 | , feed 37 | , I.parseOnly 38 | , parseWith 39 | , parseTest 40 | 41 | -- ** Result conversion 42 | , maybeResult 43 | , eitherResult 44 | 45 | -- * Parsing individual bytes 46 | , I.word8 47 | , I.anyWord8 48 | , I.notWord8 49 | , I.satisfy 50 | , I.satisfyWith 51 | , I.skip 52 | 53 | -- ** Lookahead 54 | , I.peekWord8 55 | , I.peekWord8' 56 | 57 | -- ** Byte classes 58 | , I.inClass 59 | , I.notInClass 60 | 61 | -- * Efficient string handling 62 | , I.string 63 | , I.skipWhile 64 | , I.take 65 | , I.scan 66 | , I.runScanner 67 | , I.takeWhile 68 | , I.takeWhile1 69 | , I.takeWhileIncluding 70 | , I.takeTill 71 | , I.getChunk 72 | 73 | -- ** Consume all remaining input 74 | , I.takeByteString 75 | , I.takeLazyByteString 76 | 77 | -- * Combinators 78 | , try 79 | , () 80 | , choice 81 | , count 82 | , option 83 | , many' 84 | , many1 85 | , many1' 86 | , manyTill 87 | , manyTill' 88 | , sepBy 89 | , sepBy' 90 | , sepBy1 91 | , sepBy1' 92 | , skipMany 93 | , skipMany1 94 | , eitherP 95 | , I.match 96 | -- * State observation and manipulation functions 97 | , I.endOfInput 98 | , I.atEnd 99 | ) where 100 | 101 | import Data.Attoparsec.Combinator 102 | import Data.List (intercalate) 103 | import qualified Data.Attoparsec.ByteString.Internal as I 104 | import qualified Data.Attoparsec.Internal as I 105 | import qualified Data.ByteString as B 106 | import Data.Attoparsec.ByteString.Internal (Result, parse) 107 | import qualified Data.Attoparsec.Internal.Types as T 108 | 109 | -- $parsec 110 | -- 111 | -- Compared to Parsec 3, attoparsec makes several tradeoffs. It is 112 | -- not intended for, or ideal for, all possible uses. 113 | -- 114 | -- * While attoparsec can consume input incrementally, Parsec cannot. 115 | -- Incremental input is a huge deal for efficient and secure network 116 | -- and system programming, since it gives much more control to users 117 | -- of the library over matters such as resource usage and the I/O 118 | -- model to use. 119 | -- 120 | -- * Much of the performance advantage of attoparsec is gained via 121 | -- high-performance parsers such as 'I.takeWhile' and 'I.string'. 122 | -- If you use complicated combinators that return lists of bytes or 123 | -- characters, there is less performance difference between the two 124 | -- libraries. 125 | -- 126 | -- * Unlike Parsec 3, attoparsec does not support being used as a 127 | -- monad transformer. 128 | -- 129 | -- * attoparsec is specialised to deal only with strict 'B.ByteString' 130 | -- input. Efficiency concerns rule out both lists and lazy 131 | -- bytestrings. The usual use for lazy bytestrings would be to 132 | -- allow consumption of very large input without a large footprint. 133 | -- For this need, attoparsec's incremental input provides an 134 | -- excellent substitute, with much more control over when input 135 | -- takes place. If you must use lazy bytestrings, see the 136 | -- "Data.Attoparsec.ByteString.Lazy" module, which feeds lazy chunks 137 | -- to a regular parser. 138 | -- 139 | -- * Parsec parsers can produce more helpful error messages than 140 | -- attoparsec parsers. This is a matter of focus: attoparsec avoids 141 | -- the extra book-keeping in favour of higher performance. 142 | 143 | -- $incremental 144 | -- 145 | -- attoparsec supports incremental input, meaning that you can feed it 146 | -- a bytestring that represents only part of the expected total amount 147 | -- of data to parse. If your parser reaches the end of a fragment of 148 | -- input and could consume more input, it will suspend parsing and 149 | -- return a 'T.Partial' continuation. 150 | -- 151 | -- Supplying the 'T.Partial' continuation with a bytestring will 152 | -- resume parsing at the point where it was suspended, with the 153 | -- bytestring you supplied used as new input at the end of the 154 | -- existing input. You must be prepared for the result of the resumed 155 | -- parse to be another 'T.Partial' continuation. 156 | -- 157 | -- To indicate that you have no more input, supply the 'T.Partial' 158 | -- continuation with an empty bytestring. 159 | -- 160 | -- Remember that some parsing combinators will not return a result 161 | -- until they reach the end of input. They may thus cause 'T.Partial' 162 | -- results to be returned. 163 | -- 164 | -- If you do not need support for incremental input, consider using 165 | -- the 'I.parseOnly' function to run your parser. It will never 166 | -- prompt for more input. 167 | -- 168 | -- /Note/: incremental input does /not/ imply that attoparsec will 169 | -- release portions of its internal state for garbage collection as it 170 | -- proceeds. Its internal representation is equivalent to a single 171 | -- 'ByteString': if you feed incremental input to a parser, it will 172 | -- require memory proportional to the amount of input you supply. 173 | -- (This is necessary to support arbitrary backtracking.) 174 | 175 | -- $performance 176 | -- 177 | -- If you write an attoparsec-based parser carefully, it can be 178 | -- realistic to expect it to perform similarly to a hand-rolled C 179 | -- parser (measuring megabytes parsed per second). 180 | -- 181 | -- To actually achieve high performance, there are a few guidelines 182 | -- that it is useful to follow. 183 | -- 184 | -- Use the 'B.ByteString'-oriented parsers whenever possible, 185 | -- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is 186 | -- about a factor of 100 difference in performance between the two 187 | -- kinds of parser. 188 | -- 189 | -- For very simple byte-testing predicates, write them by hand instead 190 | -- of using 'I.inClass' or 'I.notInClass'. For instance, both of 191 | -- these predicates test for an end-of-line byte, but the first is 192 | -- much faster than the second: 193 | -- 194 | -- >endOfLine_fast w = w == 13 || w == 10 195 | -- >endOfLine_slow = inClass "\r\n" 196 | -- 197 | -- Make active use of benchmarking and profiling tools to measure, 198 | -- find the problems with, and improve the performance of your parser. 199 | 200 | -- | Run a parser and print its result to standard output. 201 | parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () 202 | parseTest p s = print (parse p s) 203 | 204 | -- | Run a parser with an initial input string, and a monadic action 205 | -- that can supply more input if needed. 206 | parseWith :: Monad m => 207 | (m B.ByteString) 208 | -- ^ An action that will be executed to provide the parser 209 | -- with more input, if necessary. The action must return an 210 | -- 'B.empty' string when there is no more input available. 211 | -> I.Parser a 212 | -> B.ByteString 213 | -- ^ Initial input for the parser. 214 | -> m (Result a) 215 | parseWith refill p s = step $ parse p s 216 | where step (T.Partial k) = (step . k) =<< refill 217 | step r = return r 218 | {-# INLINE parseWith #-} 219 | 220 | -- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result 221 | -- is treated as failure. 222 | maybeResult :: Result r -> Maybe r 223 | maybeResult (T.Done _ r) = Just r 224 | maybeResult _ = Nothing 225 | 226 | -- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' 227 | -- result is treated as failure. 228 | eitherResult :: Result r -> Either String r 229 | eitherResult (T.Done _ r) = Right r 230 | eitherResult (T.Fail _ [] msg) = Left msg 231 | eitherResult (T.Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg) 232 | eitherResult _ = Left "Result: incomplete input" 233 | -------------------------------------------------------------------------------- /tests/QC/ByteString.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-} 2 | module QC.ByteString (tests) where 3 | 4 | #if !MIN_VERSION_base(4,8,0) 5 | import Control.Applicative ((<*>), (<$>)) 6 | #endif 7 | import Data.Char (chr, ord, toUpper) 8 | import Data.Int (Int64) 9 | import Data.Word (Word8) 10 | import Prelude hiding (take, takeWhile) 11 | import QC.Common (ASCII(..), liftOp, parseBS, toStrictBS) 12 | import Test.Tasty (TestTree) 13 | import Test.Tasty.QuickCheck (testProperty) 14 | import Test.QuickCheck 15 | import qualified Data.Attoparsec.ByteString as P 16 | import qualified Data.Attoparsec.ByteString.Char8 as P8 17 | import qualified Data.Attoparsec.ByteString.FastSet as S 18 | import qualified Data.Attoparsec.ByteString.Lazy as PL 19 | import qualified Data.ByteString as B 20 | import qualified Data.ByteString.Char8 as B8 21 | import qualified Data.ByteString.Lazy as L 22 | import qualified Data.ByteString.Lazy.Char8 as L8 23 | 24 | -- Basic byte-level combinators. 25 | 26 | satisfy :: Word8 -> L.ByteString -> Property 27 | satisfy w s = parseBS (P.satisfy (<=w)) (L.cons w s) === Just w 28 | 29 | satisfyWith :: Word8 -> L.ByteString -> Property 30 | satisfyWith w s = parseBS (P.satisfyWith (chr . fromIntegral) (<=c)) 31 | (L.cons (fromIntegral (ord c)) s) === Just c 32 | where 33 | c = chr (fromIntegral w) 34 | 35 | word8 :: Word8 -> L.ByteString -> Property 36 | word8 w s = parseBS (P.word8 w) (L.cons w s) === Just w 37 | 38 | skip :: Word8 -> L.ByteString -> Property 39 | skip w s = 40 | case (parseBS (P.skip ( maybe (property True) (expectFailure . it) mcs 42 | (Just _, mcs) -> maybe (property False) it mcs 43 | where it cs = liftOp "<" (<) (fst cs) w 44 | 45 | anyWord8 :: L.ByteString -> Property 46 | anyWord8 s 47 | | L.null s = p === Nothing 48 | | otherwise = p === Just (L.head s) 49 | where p = parseBS P.anyWord8 s 50 | 51 | notWord8 :: Word8 -> NonEmptyList Word8 -> Property 52 | notWord8 w (NonEmpty s) = parseBS (P.notWord8 w) bs === if v == w 53 | then Nothing 54 | else Just v 55 | where v = L.head bs 56 | bs = L.pack s 57 | 58 | peekWord8 :: L.ByteString -> Property 59 | peekWord8 s 60 | | L.null s = p === Just (Nothing, s) 61 | | otherwise = p === Just (Just (L.head s), s) 62 | where p = parseBS ((,) <$> P.peekWord8 <*> P.takeLazyByteString) s 63 | 64 | peekWord8' :: L.ByteString -> Property 65 | peekWord8' s = parseBS P.peekWord8' s === (fst <$> L.uncons s) 66 | 67 | string :: L.ByteString -> L.ByteString -> Property 68 | string s t = parseBS (P.string s') (s `L.append` t) === Just s' 69 | where s' = toStrictBS s 70 | 71 | stringCI :: ASCII L.ByteString -> ASCII L.ByteString -> Property 72 | stringCI (ASCII s) (ASCII t) = 73 | parseBS (P8.stringCI up) (s `L.append` t) === Just s' 74 | where s' = toStrictBS s 75 | up = B8.map toUpper s' 76 | 77 | strings :: L.ByteString -> L.ByteString -> L.ByteString -> Property 78 | strings s t u = 79 | parseBS (P.string (toStrictBS s) >> P.string t') (L.concat [s,t,u]) 80 | === Just t' 81 | where t' = toStrictBS t 82 | 83 | skipWhile :: Word8 -> L.ByteString -> Property 84 | skipWhile w s = 85 | let t = L.dropWhile (<=w) s 86 | in case PL.parse (P.skipWhile (<=w)) s of 87 | PL.Done t' () -> t === t' 88 | _ -> property False 89 | 90 | takeCount :: Positive Int -> L.ByteString -> Property 91 | takeCount (Positive k) s = 92 | case parseBS (P.take k) s of 93 | Nothing -> liftOp ">" (>) (fromIntegral k) (L.length s) 94 | Just _s -> liftOp "<=" (<=) (fromIntegral k) (L.length s) 95 | 96 | takeWhile :: Word8 -> L.ByteString -> Property 97 | takeWhile w s = 98 | let (h,t) = L.span (==w) s 99 | in case PL.parse (P.takeWhile (==w)) s of 100 | PL.Done t' h' -> t === t' .&&. toStrictBS h === h' 101 | _ -> property False 102 | 103 | take :: Int -> L.ByteString -> Property 104 | take n s = maybe (property $ L.length s < fromIntegral n) 105 | (=== B.take n (toStrictBS s)) $ 106 | parseBS (P.take n) s 107 | 108 | takeByteString :: L.ByteString -> Property 109 | takeByteString s = maybe (property False) (=== toStrictBS s) . 110 | parseBS P.takeByteString $ s 111 | 112 | takeLazyByteString :: L.ByteString -> Property 113 | takeLazyByteString s = maybe (property False) (=== s) . 114 | parseBS P.takeLazyByteString $ s 115 | 116 | takeWhile1 :: Word8 -> L.ByteString -> Property 117 | takeWhile1 w s = 118 | let s' = L.cons w s 119 | (h,t) = L.span (<=w) s' 120 | in case PL.parse (P.takeWhile1 (<=w)) s' of 121 | PL.Done t' h' -> t === t' .&&. toStrictBS h === h' 122 | _ -> property False 123 | 124 | takeWhileIncluding :: Word8 -> L.ByteString -> Property 125 | takeWhileIncluding w s = 126 | let s' = L.cons w $ L.snoc s (w+1) 127 | (h_,t_) = L.span (<=w) s' 128 | (h,t) = 129 | case L.uncons t_ of 130 | Nothing -> (h_, t_) 131 | Just (n, nt) -> (h_ `L.snoc` n, nt) 132 | in w < 255 ==> case PL.parse (P.takeWhileIncluding (<=w)) s' of 133 | PL.Done t' h' -> t === t' .&&. toStrictBS h === h' 134 | _ -> property False 135 | 136 | takeTill :: Word8 -> L.ByteString -> Property 137 | takeTill w s = 138 | let (h,t) = L.break (==w) s 139 | in case PL.parse (P.takeTill (==w)) s of 140 | PL.Done t' h' -> t === t' .&&. toStrictBS h === h' 141 | _ -> property False 142 | 143 | takeWhile1_empty :: Property 144 | takeWhile1_empty = parseBS (P.takeWhile1 undefined) L.empty === Nothing 145 | 146 | getChunk :: L.ByteString -> Property 147 | getChunk s = 148 | maybe (property False) (=== L.toChunks s) $ 149 | parseBS getChunks s 150 | where getChunks = go [] 151 | go res = do 152 | mchunk <- P.getChunk 153 | case mchunk of 154 | Nothing -> return (reverse res) 155 | Just chunk -> do 156 | _ <- P.take (B.length chunk) 157 | go (chunk:res) 158 | 159 | endOfInput :: L.ByteString -> Property 160 | endOfInput s = parseBS P.endOfInput s === if L.null s 161 | then Just () 162 | else Nothing 163 | 164 | endOfLine :: L.ByteString -> Property 165 | endOfLine s = 166 | case (parseBS P8.endOfLine s, L8.uncons s) of 167 | (Nothing, mcs) -> maybe (property True) (expectFailure . eol) mcs 168 | (Just _, mcs) -> maybe (property False) eol mcs 169 | where eol (c,s') = c === '\n' .||. 170 | (c, fst <$> L8.uncons s') === ('\r', Just '\n') 171 | 172 | scan :: L.ByteString -> Positive Int64 -> Property 173 | scan s (Positive k) = parseBS p s === Just (toStrictBS $ L.take k s) 174 | where p = P.scan k $ \ n _ -> 175 | if n > 0 then let !n' = n - 1 in Just n' else Nothing 176 | 177 | members :: [Word8] -> Property 178 | members s = property $ all (`S.memberWord8` set) s 179 | where set = S.fromList s 180 | 181 | nonmembers :: [Word8] -> [Word8] -> Property 182 | nonmembers s s' = property . not . any (`S.memberWord8` set) $ filter (not . (`elem` s)) s' 183 | where set = S.fromList s 184 | 185 | tests :: [TestTree] 186 | tests = [ 187 | testProperty "anyWord8" anyWord8 188 | , testProperty "endOfInput" endOfInput 189 | , testProperty "endOfLine" endOfLine 190 | , testProperty "notWord8" notWord8 191 | , testProperty "peekWord8" peekWord8 192 | , testProperty "peekWord8'" peekWord8' 193 | , testProperty "satisfy" satisfy 194 | , testProperty "satisfyWith" satisfyWith 195 | , testProperty "scan" scan 196 | , testProperty "skip" skip 197 | , testProperty "skipWhile" skipWhile 198 | , testProperty "string" string 199 | , testProperty "stringCI" stringCI 200 | , testProperty "strings" strings 201 | , testProperty "take" take 202 | , testProperty "takeByteString" takeByteString 203 | , testProperty "takeCount" takeCount 204 | , testProperty "takeLazyByteString" takeLazyByteString 205 | , testProperty "takeTill" takeTill 206 | , testProperty "takeWhile" takeWhile 207 | , testProperty "takeWhile1" takeWhile1 208 | , testProperty "takeWhile1_empty" takeWhile1_empty 209 | , testProperty "takeWhileIncluding" takeWhileIncluding 210 | , testProperty "getChunk" getChunk 211 | , testProperty "word8" word8 212 | , testProperty "members" members 213 | , testProperty "nonmembers" nonmembers 214 | ] 215 | -------------------------------------------------------------------------------- /Data/Attoparsec/Internal/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, GeneralizedNewtypeDeriving, OverloadedStrings, 2 | Rank2Types, RecordWildCards, TypeFamilies #-} 3 | -- | 4 | -- Module : Data.Attoparsec.Internal.Types 5 | -- Copyright : Bryan O'Sullivan 2007-2015 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : unknown 11 | -- 12 | -- Simple, efficient parser combinators, loosely based on the Parsec 13 | -- library. 14 | 15 | module Data.Attoparsec.Internal.Types 16 | ( 17 | Parser(..) 18 | , State 19 | , Failure 20 | , Success 21 | , Pos(..) 22 | , IResult(..) 23 | , More(..) 24 | , (<>) 25 | , Chunk(..) 26 | ) where 27 | 28 | import Control.Applicative as App (Applicative(..), (<$>)) 29 | import Control.Applicative (Alternative(..)) 30 | import Control.DeepSeq (NFData(rnf)) 31 | import Control.Monad (MonadPlus(..)) 32 | import qualified Control.Monad.Fail as Fail (MonadFail(..)) 33 | import Data.Monoid as Mon (Monoid(..)) 34 | import Data.Semigroup (Semigroup(..)) 35 | import Data.Word (Word8) 36 | import Data.ByteString (ByteString) 37 | import qualified Data.ByteString as BS 38 | import Data.ByteString.Internal (w2c) 39 | import Data.Text (Text) 40 | import qualified Data.Text as Text 41 | import Data.Text.Unsafe (Iter(..)) 42 | import Prelude hiding (succ) 43 | import qualified Data.Attoparsec.ByteString.Buffer as B 44 | import qualified Data.Attoparsec.Text.Buffer as T 45 | 46 | newtype Pos = Pos { fromPos :: Int } 47 | deriving (Eq, Ord, Show, Num) 48 | 49 | -- | The result of a parse. This is parameterised over the type @i@ 50 | -- of string that was processed. 51 | -- 52 | -- This type is an instance of 'Functor', where 'fmap' transforms the 53 | -- value in a 'Done' result. 54 | data IResult i r = 55 | Fail i [String] String 56 | -- ^ The parse failed. The @i@ parameter is the input that had 57 | -- not yet been consumed when the failure occurred. The 58 | -- @[@'String'@]@ is a list of contexts in which the error 59 | -- occurred. The 'String' is the message describing the error, if 60 | -- any. 61 | | Partial (i -> IResult i r) 62 | -- ^ Supply this continuation with more input so that the parser 63 | -- can resume. To indicate that no more input is available, pass 64 | -- an empty string to the continuation. 65 | -- 66 | -- __Note__: if you get a 'Partial' result, do not call its 67 | -- continuation more than once. 68 | | Done i r 69 | -- ^ The parse succeeded. The @i@ parameter is the input that had 70 | -- not yet been consumed (if any) when the parse succeeded. 71 | 72 | instance (Show i, Show r) => Show (IResult i r) where 73 | showsPrec d ir = showParen (d > 10) $ 74 | case ir of 75 | (Fail t stk msg) -> showString "Fail" . f t . f stk . f msg 76 | (Partial _) -> showString "Partial _" 77 | (Done t r) -> showString "Done" . f t . f r 78 | where f :: Show a => a -> ShowS 79 | f x = showChar ' ' . showsPrec 11 x 80 | 81 | instance (NFData i, NFData r) => NFData (IResult i r) where 82 | rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg 83 | rnf (Partial _) = () 84 | rnf (Done t r) = rnf t `seq` rnf r 85 | {-# INLINE rnf #-} 86 | 87 | instance Functor (IResult i) where 88 | fmap _ (Fail t stk msg) = Fail t stk msg 89 | fmap f (Partial k) = Partial (fmap f . k) 90 | fmap f (Done t r) = Done t (f r) 91 | 92 | -- | The core parser type. This is parameterised over the type @i@ 93 | -- of string being processed. 94 | -- 95 | -- This type is an instance of the following classes: 96 | -- 97 | -- * 'Monad', where 'fail' throws an exception (i.e. fails) with an 98 | -- error message. 99 | -- 100 | -- * 'Functor' and 'Applicative', which follow the usual definitions. 101 | -- 102 | -- * 'MonadPlus', where 'mzero' fails (with no error message) and 103 | -- 'mplus' executes the right-hand parser if the left-hand one 104 | -- fails. When the parser on the right executes, the input is reset 105 | -- to the same state as the parser on the left started with. (In 106 | -- other words, attoparsec is a backtracking parser that supports 107 | -- arbitrary lookahead.) 108 | -- 109 | -- * 'Alternative', which follows 'MonadPlus'. 110 | newtype Parser i a = Parser { 111 | runParser :: forall r. 112 | State i -> Pos -> More 113 | -> Failure i (State i) r 114 | -> Success i (State i) a r 115 | -> IResult i r 116 | } 117 | 118 | type family State i 119 | type instance State ByteString = B.Buffer 120 | type instance State Text = T.Buffer 121 | 122 | type Failure i t r = t -> Pos -> More -> [String] -> String 123 | -> IResult i r 124 | type Success i t a r = t -> Pos -> More -> a -> IResult i r 125 | 126 | -- | Have we read all available input? 127 | data More = Complete | Incomplete 128 | deriving (Eq, Show) 129 | 130 | instance Semigroup More where 131 | c@Complete <> _ = c 132 | _ <> m = m 133 | 134 | instance Mon.Monoid More where 135 | mappend = (<>) 136 | mempty = Incomplete 137 | 138 | instance Monad (Parser i) where 139 | #if !(MIN_VERSION_base(4,13,0)) 140 | fail = Fail.fail 141 | {-# INLINE fail #-} 142 | #endif 143 | 144 | return = App.pure 145 | {-# INLINE return #-} 146 | 147 | m >>= k = Parser $ \t !pos more lose succ -> 148 | let succ' t' !pos' more' a = runParser (k a) t' pos' more' lose succ 149 | in runParser m t pos more lose succ' 150 | {-# INLINE (>>=) #-} 151 | 152 | (>>) = (*>) 153 | {-# INLINE (>>) #-} 154 | 155 | 156 | instance Fail.MonadFail (Parser i) where 157 | fail err = Parser $ \t pos more lose _succ -> lose t pos more [] msg 158 | where msg = "Failed reading: " ++ err 159 | {-# INLINE fail #-} 160 | 161 | plus :: Parser i a -> Parser i a -> Parser i a 162 | plus f g = Parser $ \t pos more lose succ -> 163 | let lose' t' _pos' more' _ctx _msg = runParser g t' pos more' lose succ 164 | in runParser f t pos more lose' succ 165 | 166 | instance MonadPlus (Parser i) where 167 | mzero = fail "mzero" 168 | {-# INLINE mzero #-} 169 | mplus = plus 170 | 171 | instance Functor (Parser i) where 172 | fmap f p = Parser $ \t pos more lose succ -> 173 | let succ' t' pos' more' a = succ t' pos' more' (f a) 174 | in runParser p t pos more lose succ' 175 | {-# INLINE fmap #-} 176 | 177 | apP :: Parser i (a -> b) -> Parser i a -> Parser i b 178 | apP d e = do 179 | b <- d 180 | a <- e 181 | return (b a) 182 | {-# INLINE apP #-} 183 | 184 | instance Applicative (Parser i) where 185 | pure v = Parser $ \t !pos more _lose succ -> succ t pos more v 186 | {-# INLINE pure #-} 187 | (<*>) = apP 188 | {-# INLINE (<*>) #-} 189 | m *> k = m >>= \_ -> k 190 | {-# INLINE (*>) #-} 191 | x <* y = x >>= \a -> y >> pure a 192 | {-# INLINE (<*) #-} 193 | 194 | instance Semigroup (Parser i a) where 195 | (<>) = plus 196 | {-# INLINE (<>) #-} 197 | 198 | instance Monoid (Parser i a) where 199 | mempty = fail "mempty" 200 | {-# INLINE mempty #-} 201 | mappend = (<>) 202 | {-# INLINE mappend #-} 203 | 204 | instance Alternative (Parser i) where 205 | empty = fail "empty" 206 | {-# INLINE empty #-} 207 | 208 | (<|>) = plus 209 | {-# INLINE (<|>) #-} 210 | 211 | many v = many_v 212 | where 213 | many_v = some_v <|> pure [] 214 | some_v = (:) <$> v <*> many_v 215 | {-# INLINE many #-} 216 | 217 | some v = some_v 218 | where 219 | many_v = some_v <|> pure [] 220 | some_v = (:) <$> v <*> many_v 221 | {-# INLINE some #-} 222 | 223 | -- | A common interface for input chunks. 224 | class Monoid c => Chunk c where 225 | type ChunkElem c 226 | -- | Test if the chunk is empty. 227 | nullChunk :: c -> Bool 228 | -- | Append chunk to a buffer. 229 | pappendChunk :: State c -> c -> State c 230 | -- | Position at the end of a buffer. The first argument is ignored. 231 | atBufferEnd :: c -> State c -> Pos 232 | -- | Return the buffer element at the given position along with its length. 233 | bufferElemAt :: c -> Pos -> State c -> Maybe (ChunkElem c, Int) 234 | -- | Map an element to the corresponding character. 235 | -- The first argument is ignored. 236 | chunkElemToChar :: c -> ChunkElem c -> Char 237 | 238 | instance Chunk ByteString where 239 | type ChunkElem ByteString = Word8 240 | nullChunk = BS.null 241 | {-# INLINE nullChunk #-} 242 | pappendChunk = B.pappend 243 | {-# INLINE pappendChunk #-} 244 | atBufferEnd _ = Pos . B.length 245 | {-# INLINE atBufferEnd #-} 246 | bufferElemAt _ (Pos i) buf 247 | | i < B.length buf = Just (B.unsafeIndex buf i, 1) 248 | | otherwise = Nothing 249 | {-# INLINE bufferElemAt #-} 250 | chunkElemToChar _ = w2c 251 | {-# INLINE chunkElemToChar #-} 252 | 253 | instance Chunk Text where 254 | type ChunkElem Text = Char 255 | nullChunk = Text.null 256 | {-# INLINE nullChunk #-} 257 | pappendChunk = T.pappend 258 | {-# INLINE pappendChunk #-} 259 | atBufferEnd _ = Pos . T.length 260 | {-# INLINE atBufferEnd #-} 261 | bufferElemAt _ (Pos i) buf 262 | | i < T.length buf = let Iter c l = T.iter buf i in Just (c, l) 263 | | otherwise = Nothing 264 | {-# INLINE bufferElemAt #-} 265 | chunkElemToChar _ = id 266 | {-# INLINE chunkElemToChar #-} 267 | -------------------------------------------------------------------------------- /Data/Attoparsec/Combinator.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 702 3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules 4 | #endif 5 | -- | 6 | -- Module : Data.Attoparsec.Combinator 7 | -- Copyright : Daan Leijen 1999-2001, Bryan O'Sullivan 2007-2015 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : bos@serpentine.com 11 | -- Stability : experimental 12 | -- Portability : portable 13 | -- 14 | -- Useful parser combinators, similar to those provided by Parsec. 15 | module Data.Attoparsec.Combinator 16 | ( 17 | -- * Combinators 18 | try 19 | , () 20 | , choice 21 | , count 22 | , option 23 | , many' 24 | , many1 25 | , many1' 26 | , manyTill 27 | , manyTill' 28 | , sepBy 29 | , sepBy' 30 | , sepBy1 31 | , sepBy1' 32 | , skipMany 33 | , skipMany1 34 | , eitherP 35 | , feed 36 | , satisfyElem 37 | , endOfInput 38 | , atEnd 39 | , lookAhead 40 | ) where 41 | 42 | #if !MIN_VERSION_base(4,8,0) 43 | import Control.Applicative (Applicative(..), (<$>)) 44 | import Data.Monoid (Monoid(mappend)) 45 | #endif 46 | import Control.Applicative (Alternative(..), liftA2, many, (<|>)) 47 | import Control.Monad (MonadPlus(..)) 48 | import Data.Attoparsec.Internal.Types (Parser(..), IResult(..)) 49 | import Data.Attoparsec.Internal (endOfInput, atEnd, satisfyElem) 50 | import Data.ByteString (ByteString) 51 | import Data.Foldable (asum) 52 | import Data.Text (Text) 53 | import qualified Data.Attoparsec.Zepto as Z 54 | import Prelude hiding (succ) 55 | 56 | -- | Attempt a parse, and if it fails, rewind the input so that no 57 | -- input appears to have been consumed. 58 | -- 59 | -- This combinator is provided for compatibility with Parsec. 60 | -- attoparsec parsers always backtrack on failure. 61 | try :: Parser i a -> Parser i a 62 | try = id 63 | {-# INLINE try #-} 64 | 65 | -- | Name the parser, in case failure occurs. 66 | () :: Parser i a 67 | -> String -- ^ the name to use if parsing fails 68 | -> Parser i a 69 | p msg0 = Parser $ \t pos more lose succ -> 70 | let lose' t' pos' more' strs msg = lose t' pos' more' (msg0:strs) msg 71 | in runParser p t pos more lose' succ 72 | {-# INLINE () #-} 73 | infix 0 74 | 75 | -- | @choice ps@ tries to apply the actions in the list @ps@ in order, 76 | -- until one of them succeeds. Returns the value of the succeeding 77 | -- action. 78 | choice :: Alternative f => [f a] -> f a 79 | choice = asum 80 | {-# SPECIALIZE choice :: [Parser ByteString a] 81 | -> Parser ByteString a #-} 82 | {-# SPECIALIZE choice :: [Parser Text a] -> Parser Text a #-} 83 | {-# SPECIALIZE choice :: [Z.Parser a] -> Z.Parser a #-} 84 | 85 | -- | @option x p@ tries to apply action @p@. If @p@ fails without 86 | -- consuming input, it returns the value @x@, otherwise the value 87 | -- returned by @p@. 88 | -- 89 | -- > priority = option 0 (digitToInt <$> digit) 90 | option :: Alternative f => a -> f a -> f a 91 | option x p = p <|> pure x 92 | {-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-} 93 | {-# SPECIALIZE option :: a -> Parser Text a -> Parser Text a #-} 94 | {-# SPECIALIZE option :: a -> Z.Parser a -> Z.Parser a #-} 95 | 96 | -- | A version of 'liftM2' that is strict in the result of its first 97 | -- action. 98 | liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c 99 | liftM2' f a b = do 100 | !x <- a 101 | y <- b 102 | return (f x y) 103 | {-# INLINE liftM2' #-} 104 | 105 | -- | @many' p@ applies the action @p@ /zero/ or more times. Returns a 106 | -- list of the returned values of @p@. The value returned by @p@ is 107 | -- forced to WHNF. 108 | -- 109 | -- > word = many' letter 110 | many' :: (MonadPlus m) => m a -> m [a] 111 | many' p = many_p 112 | where many_p = some_p `mplus` return [] 113 | some_p = liftM2' (:) p many_p 114 | {-# INLINE many' #-} 115 | 116 | -- | @many1 p@ applies the action @p@ /one/ or more times. Returns a 117 | -- list of the returned values of @p@. 118 | -- 119 | -- > word = many1 letter 120 | many1 :: Alternative f => f a -> f [a] 121 | many1 p = liftA2 (:) p (many p) 122 | {-# INLINE many1 #-} 123 | 124 | -- | @many1' p@ applies the action @p@ /one/ or more times. Returns a 125 | -- list of the returned values of @p@. The value returned by @p@ is 126 | -- forced to WHNF. 127 | -- 128 | -- > word = many1' letter 129 | many1' :: (MonadPlus m) => m a -> m [a] 130 | many1' p = liftM2' (:) p (many' p) 131 | {-# INLINE many1' #-} 132 | 133 | -- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated 134 | -- by @sep@. Returns a list of the values returned by @p@. 135 | -- 136 | -- > commaSep p = p `sepBy` (char ',') 137 | sepBy :: Alternative f => f a -> f s -> f [a] 138 | sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure [] 139 | {-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s 140 | -> Parser ByteString [a] #-} 141 | {-# SPECIALIZE sepBy :: Parser Text a -> Parser Text s -> Parser Text [a] #-} 142 | {-# SPECIALIZE sepBy :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-} 143 | 144 | -- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated 145 | -- by @sep@. Returns a list of the values returned by @p@. The value 146 | -- returned by @p@ is forced to WHNF. 147 | -- 148 | -- > commaSep p = p `sepBy'` (char ',') 149 | sepBy' :: (MonadPlus m) => m a -> m s -> m [a] 150 | sepBy' p s = scan `mplus` return [] 151 | where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return []) 152 | {-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s 153 | -> Parser ByteString [a] #-} 154 | {-# SPECIALIZE sepBy' :: Parser Text a -> Parser Text s -> Parser Text [a] #-} 155 | {-# SPECIALIZE sepBy' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-} 156 | 157 | -- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated 158 | -- by @sep@. Returns a list of the values returned by @p@. 159 | -- 160 | -- > commaSep p = p `sepBy1` (char ',') 161 | sepBy1 :: Alternative f => f a -> f s -> f [a] 162 | sepBy1 p s = scan 163 | where scan = liftA2 (:) p ((s *> scan) <|> pure []) 164 | {-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s 165 | -> Parser ByteString [a] #-} 166 | {-# SPECIALIZE sepBy1 :: Parser Text a -> Parser Text s -> Parser Text [a] #-} 167 | {-# SPECIALIZE sepBy1 :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-} 168 | 169 | -- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated 170 | -- by @sep@. Returns a list of the values returned by @p@. The value 171 | -- returned by @p@ is forced to WHNF. 172 | -- 173 | -- > commaSep p = p `sepBy1'` (char ',') 174 | sepBy1' :: (MonadPlus m) => m a -> m s -> m [a] 175 | sepBy1' p s = scan 176 | where scan = liftM2' (:) p ((s >> scan) `mplus` return []) 177 | {-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s 178 | -> Parser ByteString [a] #-} 179 | {-# SPECIALIZE sepBy1' :: Parser Text a -> Parser Text s -> Parser Text [a] #-} 180 | {-# SPECIALIZE sepBy1' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-} 181 | 182 | -- | @manyTill p end@ applies action @p@ /zero/ or more times until 183 | -- action @end@ succeeds, and returns the list of values returned by 184 | -- @p@. This can be used to scan comments: 185 | -- 186 | -- > simpleComment = string "") 187 | -- 188 | -- (Note the overlapping parsers @anyChar@ and @string \"-->\"@. 189 | -- While this will work, it is not very efficient, as it will cause a 190 | -- lot of backtracking.) 191 | manyTill :: Alternative f => f a -> f b -> f [a] 192 | manyTill p end = scan 193 | where scan = (end *> pure []) <|> liftA2 (:) p scan 194 | {-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b 195 | -> Parser ByteString [a] #-} 196 | {-# SPECIALIZE manyTill :: Parser Text a -> Parser Text b -> Parser Text [a] #-} 197 | {-# SPECIALIZE manyTill :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-} 198 | 199 | -- | @manyTill' p end@ applies action @p@ /zero/ or more times until 200 | -- action @end@ succeeds, and returns the list of values returned by 201 | -- @p@. This can be used to scan comments: 202 | -- 203 | -- > simpleComment = string "") 204 | -- 205 | -- (Note the overlapping parsers @anyChar@ and @string \"-->\"@. 206 | -- While this will work, it is not very efficient, as it will cause a 207 | -- lot of backtracking.) 208 | -- 209 | -- The value returned by @p@ is forced to WHNF. 210 | manyTill' :: (MonadPlus m) => m a -> m b -> m [a] 211 | manyTill' p end = scan 212 | where scan = (end >> return []) `mplus` liftM2' (:) p scan 213 | {-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b 214 | -> Parser ByteString [a] #-} 215 | {-# SPECIALIZE manyTill' :: Parser Text a -> Parser Text b -> Parser Text [a] #-} 216 | {-# SPECIALIZE manyTill' :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-} 217 | 218 | -- | Skip zero or more instances of an action. 219 | skipMany :: Alternative f => f a -> f () 220 | skipMany p = scan 221 | where scan = (p *> scan) <|> pure () 222 | {-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-} 223 | {-# SPECIALIZE skipMany :: Parser Text a -> Parser Text () #-} 224 | {-# SPECIALIZE skipMany :: Z.Parser a -> Z.Parser () #-} 225 | 226 | -- | Skip one or more instances of an action. 227 | skipMany1 :: Alternative f => f a -> f () 228 | skipMany1 p = p *> skipMany p 229 | {-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-} 230 | {-# SPECIALIZE skipMany1 :: Parser Text a -> Parser Text () #-} 231 | {-# SPECIALIZE skipMany1 :: Z.Parser a -> Z.Parser () #-} 232 | 233 | -- | Apply the given action repeatedly, returning every result. 234 | count :: Monad m => Int -> m a -> m [a] 235 | count n p = sequence (replicate n p) 236 | {-# INLINE count #-} 237 | 238 | -- | Combine two alternatives. 239 | eitherP :: (Alternative f) => f a -> f b -> f (Either a b) 240 | eitherP a b = (Left <$> a) <|> (Right <$> b) 241 | {-# INLINE eitherP #-} 242 | 243 | -- | If a parser has returned a 'T.Partial' result, supply it with more 244 | -- input. 245 | feed :: Monoid i => IResult i r -> i -> IResult i r 246 | feed (Fail t ctxs msg) d = Fail (mappend t d) ctxs msg 247 | feed (Partial k) d = k d 248 | feed (Done t r) d = Done (mappend t d) r 249 | {-# INLINE feed #-} 250 | 251 | -- | Apply a parser without consuming any input. 252 | lookAhead :: Parser i a -> Parser i a 253 | lookAhead p = Parser $ \t pos more lose succ -> 254 | let succ' t' _pos' more' = succ t' pos more' 255 | in runParser p t pos more lose succ' 256 | {-# INLINE lookAhead #-} 257 | --------------------------------------------------------------------------------