├── .ghci
├── Setup.lhs
├── benchmarks
    ├── med.txt.bz2
    ├── Makefile
    ├── Warp.hs
    ├── http-request.txt
    ├── http-response.txt
    ├── Alternative.hs
    ├── Sets.hs
    ├── json-data
    │   ├── twitter1.json
    │   ├── example.json
    │   ├── integers.json
    │   └── twitter10.json
    ├── Tiny.hs
    ├── warp-3.0.1.1
    │   ├── LICENSE
    │   └── Network
    │   │   └── Wai
    │   │       └── Handler
    │   │           └── Warp
    │   │               ├── ReadInt.hs
    │   │               └── RequestHeader.hs
    ├── Common.hs
    ├── HeadersByteString.hs
    ├── HeadersByteString
    │   └── Atto.hs
    ├── HeadersText.hs
    ├── Links.hs
    ├── Numbers.hs
    ├── Genome.hs
    ├── TextFastSet.hs
    ├── Benchmarks.hs
    └── IsSpace.hs
├── .gitignore
├── .hgignore
├── examples
    ├── Makefile
    ├── attoparsec-examples.cabal
    ├── Atto_RFC2616.hs
    ├── RFC2616.hs
    ├── Parsec_RFC2616.hs
    └── rfc2616.c
├── tests
    ├── QC
    │   ├── Text
    │   │   ├── FastSet.hs
    │   │   └── Regressions.hs
    │   ├── IPv6
    │   │   └── Types.hs
    │   ├── Simple.hs
    │   ├── Rechunked.hs
    │   ├── Combinator.hs
    │   ├── Buffer.hs
    │   ├── Common.hs
    │   ├── Text.hs
    │   └── ByteString.hs
    └── QC.hs
├── Data
    ├── Attoparsec
    │   ├── Types.hs
    │   ├── Char8.hs
    │   ├── Lazy.hs
    │   ├── Number.hs
    │   ├── Text
    │   │   └── Lazy.hs
    │   ├── ByteString
    │   │   └── Lazy.hs
    │   ├── Zepto.hs
    │   ├── Internal.hs
    │   ├── ByteString.hs
    │   ├── Internal
    │   │   └── Types.hs
    │   └── Combinator.hs
    └── Attoparsec.hs
├── internal
    └── Data
    │   └── Attoparsec
    │       ├── Internal
    │           ├── Fhthagn.hs
    │           └── Compat.hs
    │       ├── ByteString
    │           ├── FastSet.hs
    │           └── Buffer.hs
    │       └── Text
    │           ├── FastSet.hs
    │           └── Buffer.hs
├── README.md
├── cabal.haskell-ci
├── LICENSE
├── .hgtags
├── doc
    ├── whats-in-a-parser-1.md
    └── attoparsec-rewired-2.md
├── changelog.md
└── attoparsec.cabal


/.ghci:
--------------------------------------------------------------------------------
1 | :set -fobject-code -optP-include -optPdist/build/autogen/cabal_macros.h -itests
2 | 


--------------------------------------------------------------------------------
/Setup.lhs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env runhaskell
2 | > import Distribution.Simple
3 | > main = defaultMain
4 | 


--------------------------------------------------------------------------------
/benchmarks/med.txt.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haskell/attoparsec/HEAD/benchmarks/med.txt.bz2


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.hi
2 | *.o
3 | /dist/
4 | /dist-newstyle/
5 | .cabal-sandbox
6 | cabal.sandbox.config
7 | *.sw[op]
8 | 


--------------------------------------------------------------------------------
/benchmarks/Makefile:
--------------------------------------------------------------------------------
 1 | all: med.txt tiny
 2 | 
 3 | tiny: Tiny.hs
 4 | 	ghc -O --make -o $@ $<
 5 | 
 6 | %: %.bz2
 7 | 	bunzip2 -k $<
 8 | 
 9 | clean:
10 | 	-rm -f *.o *.hi tiny
11 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
 1 | \.(?:aux|eventlog|h[ip]|hs.html|log|[mt]ix|[oa]|orig|prof|ps|rej|swp)$
 2 | ~$
 3 | ^(?:.+/|)\.cabal-sandbox/
 4 | ^(?:.+/|)dist/
 5 | benchmarks/Arse
 6 | benchmarks/dist
 7 | benchmarks/med.txt
 8 | benchmarks/tiny
 9 | benchmarks/warp-[0-9]*
10 | cabal.sandbox.config
11 | examples/c-http
12 | examples/http_parser.[ch]
13 | hpc.*\.html$
14 | 
15 | syntax: glob
16 | .\#*
17 | \#*
18 | 


--------------------------------------------------------------------------------
/benchmarks/Warp.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module Warp (benchmarks) where
 4 | 
 5 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf)
 6 | import Data.ByteString (ByteString)
 7 | import Network.Wai.Handler.Warp.ReadInt (readInt)
 8 | import qualified Data.Attoparsec.ByteString.Char8 as B
 9 | 
10 | benchmarks :: Benchmark
11 | benchmarks = bgroup "warp" [
12 |     bgroup "decimal" [
13 |       bench "warp" $ nf (readInt :: ByteString -> Int) "31337"
14 |     , bench "atto" $ nf (B.parse (B.decimal :: B.Parser Int)) "31337"
15 |     ]
16 |   ]
17 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | CC := cc
 2 | CFLAGS := -O3 -Wall
 3 | # To make the code about 6% faster:
 4 | # CFLAGS += -fwhole-program --combine
 5 | CPPFLAGS := -I$(HOME)/hg/http-parser
 6 | 
 7 | all: c-http
 8 | 
 9 | c-http: rfc2616.c http_parser.c
10 | 	$(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $^
11 | 
12 | clean:
13 | 	rm -f *.hi *.o c-http
14 | 
15 | http_parser.c: http_parser.h
16 | 	curl -O https://raw.githubusercontent.com/joyent/http-parser/master/http_parser.c
17 | 
18 | http_parser.h:
19 | 	curl -O https://raw.githubusercontent.com/joyent/http-parser/master/http_parser.h
20 | 


--------------------------------------------------------------------------------
/tests/QC/Text/FastSet.hs:
--------------------------------------------------------------------------------
 1 | module QC.Text.FastSet where
 2 | 
 3 | import Test.Tasty (TestTree)
 4 | import Test.Tasty.QuickCheck (testProperty)
 5 | import Test.QuickCheck
 6 | import qualified Data.Attoparsec.Text.FastSet as FastSet
 7 | 
 8 | membershipCorrect :: String -> String -> Property
 9 | membershipCorrect members others =
10 |     let fs = FastSet.fromList members
11 |         correct c = (c `FastSet.member` fs) == (c `elem` members)
12 |     in property $ all correct (members ++ others)
13 | 
14 | tests :: [TestTree]
15 | tests = [ testProperty "membership is correct" membershipCorrect ]
16 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Types.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.Attoparsec.Types
 3 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 4 | -- License     :  BSD3
 5 | --
 6 | -- Maintainer  :  bos@serpentine.com
 7 | -- Stability   :  experimental
 8 | -- Portability :  unknown
 9 | --
10 | -- Simple, efficient parser combinators for strings, loosely based on
11 | -- the Parsec library.
12 | 
13 | module Data.Attoparsec.Types
14 |     (
15 |       Parser
16 |     , IResult(..)
17 |     , Chunk(chunkElemToChar)
18 |     ) where
19 | 
20 | import Data.Attoparsec.Internal.Types (Parser(..), IResult(..), Chunk(..))
21 | 


--------------------------------------------------------------------------------
/benchmarks/http-request.txt:
--------------------------------------------------------------------------------
 1 | GET / HTTP/1.1
 2 | Host: twitter.com
 3 | Accept: text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8
 4 | Accept-Encoding: gzip,deflate,sdch
 5 | Accept-Language: en-GB,en-US;q=0.8,en;q=0.6
 6 | Cache-Control: max-age=0
 7 | Cookie: guest_id=v1%3A139; _twitter_sess=BAh7CSIKZmxhc2hJQz-e1e1; __utma=43838368.452555194.1399611824.1; __utmb=43838368; __utmc=43838368; __utmz=1399611824.1.1.utmcsr=(direct)|utmcmd=(none)
 8 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.86 Safari/537.36
 9 | 
10 | 


--------------------------------------------------------------------------------
/tests/QC.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
 2 | module Main (main) where
 3 | 
 4 | import qualified QC.Buffer as Buffer
 5 | import qualified QC.ByteString as ByteString
 6 | import qualified QC.Combinator as Combinator
 7 | import qualified QC.Simple as Simple
 8 | import qualified QC.Text as Text
 9 | import Test.Tasty (defaultMain, testGroup)
10 | 
11 | main = defaultMain tests
12 | 
13 | tests = testGroup "tests" [
14 |     testGroup "bs" ByteString.tests
15 |   , testGroup "buf" Buffer.tests
16 |   , testGroup "combinator" Combinator.tests
17 |   , testGroup "simple" Simple.tests
18 |   , testGroup "text" Text.tests
19 |   ]
20 | 


--------------------------------------------------------------------------------
/Data/Attoparsec.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.Attoparsec
 3 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 4 | -- License     :  BSD3
 5 | --
 6 | -- Maintainer  :  bos@serpentine.com
 7 | -- Stability   :  experimental
 8 | -- Portability :  unknown
 9 | --
10 | -- Simple, efficient combinator parsing for
11 | -- 'Data.ByteString.ByteString' strings, loosely based on the Parsec
12 | -- library.
13 | --
14 | -- This module is deprecated. Use "Data.Attoparsec.ByteString"
15 | -- instead.
16 | 
17 | module Data.Attoparsec
18 |     {-# DEPRECATED "This module will be removed in the next major release." #-}
19 |     (
20 |       module Data.Attoparsec.ByteString
21 |     ) where
22 | 
23 | import Data.Attoparsec.ByteString
24 | 


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/Internal/Fhthagn.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings,
 2 |     RecordWildCards, MagicHash, UnboxedTuples #-}
 3 | 
 4 | module Data.Attoparsec.Internal.Fhthagn
 5 |     (
 6 |       inlinePerformIO
 7 |     ) where
 8 | 
 9 | import GHC.Exts (realWorld#)
10 | import GHC.IO (IO(IO))
11 | 
12 | -- | Just like unsafePerformIO, but we inline it. Big performance gains as
13 | -- it exposes lots of things to further inlining. /Very unsafe/. In
14 | -- particular, you should do no memory allocation inside an
15 | -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
16 | inlinePerformIO :: IO a -> a
17 | inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
18 | {-# INLINE inlinePerformIO #-}
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to attoparsec
 2 | 
 3 | attoparsec is a fast Haskell parser combinator library, aimed
 4 | particularly at dealing efficiently with network protocols and
 5 | complicated text/binary file formats.
 6 | 
 7 | # Join in!
 8 | 
 9 | I'm happy to receive bug reports, fixes, documentation enhancements,
10 | and other improvements.
11 | 
12 | Please report bugs via the
13 | [github issue tracker](https://github.com/haskell/attoparsec/issues).
14 | 
15 | Master [git repository](https://github.com/haskell/attoparsec):
16 | 
17 | * `git clone https://github.com/haskell/attoparsec.git`
18 | 
19 | Authors
20 | -------
21 | 
22 | This library was written by Bryan O'Sullivan <bos@serpentine.com>
23 | and is maintained by Ben Gamari <ben@smart-cactus.org>.
24 | 


--------------------------------------------------------------------------------
/benchmarks/http-response.txt:
--------------------------------------------------------------------------------
 1 | HTTP/1.1 200 OK
 2 | Date: Fri, 09 May 2014 04:24:57 GMT
 3 | Expires: -1
 4 | Cache-Control: private, max-age=0
 5 | Content-Type: text/html; charset=ISO-8859-1
 6 | Set-Cookie: PREF=ID=1a871afc4240db5e:FF:LM=1399609497:S=e5MZ0itEekjVwNcU; expires=Sun, 08-May-2016 04:24:57 GMT; path=/; domain=.google.com
 7 | Set-Cookie: NID=67=-WlfaDG6VSEPk7abAjrK98HBSoCD2ID6JKkUR95tEumzDmg7Fc8pSQ8; expires=Sat, 08-Nov-2014 04:24:57 GMT; path=/; domain=.google.com; HttpOnly
 8 | P3P: CP="This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657 for more info."
 9 | Server: gws
10 | X-XSS-Protection: 1; mode=block
11 | X-Frame-Options: SAMEORIGIN
12 | Alternate-Protocol: 80:quic
13 | Transfer-Encoding: chunked
14 | 
15 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Char8.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.Attoparsec.Char8
 3 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 4 | -- License     :  BSD3
 5 | --
 6 | -- Maintainer  :  bos@serpentine.com
 7 | -- Stability   :  experimental
 8 | -- Portability :  unknown
 9 | --
10 | -- Simple, efficient, character-oriented combinator parsing for
11 | -- 'Data.ByteString.ByteString' strings, loosely based on the Parsec
12 | -- library.
13 | --
14 | -- This module is deprecated. Use "Data.Attoparsec.ByteString.Char8"
15 | -- instead.
16 | 
17 | module Data.Attoparsec.Char8
18 |     {-# DEPRECATED "This module will be removed in the next major release." #-}
19 |     (
20 |       module Data.Attoparsec.ByteString.Char8
21 |     ) where
22 | 
23 | import Data.Attoparsec.ByteString.Char8
24 | 


--------------------------------------------------------------------------------
/examples/attoparsec-examples.cabal:
--------------------------------------------------------------------------------
 1 | -- These examples are not intended to be installed.
 2 | -- So don't install 'em.
 3 | 
 4 | name: attoparsec-examples
 5 | version: 0
 6 | cabal-version: >=1.6
 7 | build-type: Simple
 8 | 
 9 | executable atto-rfc2616
10 |   main-is: Atto_RFC2616.hs
11 |   hs-source-dirs: .. .
12 |   ghc-options: -O2 -Wall -rtsopts
13 |   build-depends:
14 |     array,
15 |     base == 4.*,
16 |     bytestring >= 0.10.4.0,
17 |     deepseq >= 1.1,
18 |     scientific >= 0.3.1,
19 |     text >= 1.1.1.0
20 | 
21 | executable parsec-rfc2616
22 |   main-is: Parsec_RFC2616.hs
23 |   hs-source-dirs: .
24 |   ghc-options: -O2 -Wall -rtsopts
25 |   build-depends:
26 |     array,
27 |     base == 4.*,
28 |     containers,
29 |     bytestring >= 0.10.4.0,
30 |     parsec,
31 |     text >= 1.1.1.0
32 | 


--------------------------------------------------------------------------------
/benchmarks/Alternative.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | -- This benchmark reveals a huge performance regression that showed up
 4 | -- under GHC 7.8.1 (https://github.com/bos/attoparsec/issues/56).
 5 | --
 6 | -- With GHC 7.6.3 and older, this program runs in 0.04 seconds.  Under
 7 | -- GHC 7.8.1 with (<|>) inlined, time jumps to 12 seconds!
 8 | 
 9 | import Control.Applicative
10 | import Data.Text (Text)
11 | import qualified Data.Attoparsec.Text as A
12 | import qualified Data.Text as T
13 | 
14 | testParser :: Text -> Either String Int
15 | testParser f = fmap length -- avoid printing out the entire matched list
16 |         . A.parseOnly (many ((() <$ A.string "b") <|> (() <$ A.anyChar)))
17 |         $ f
18 | 
19 | main :: IO ()
20 | main = print . testParser $ T.replicate 50000 "a"
21 | 


--------------------------------------------------------------------------------
/benchmarks/Sets.hs:
--------------------------------------------------------------------------------
 1 | module Sets (benchmarks) where
 2 | 
 3 | import Test.Tasty.Bench
 4 | import Data.Char (ord)
 5 | import qualified Data.Attoparsec.Text.FastSet as FastSet
 6 | import qualified TextFastSet
 7 | import qualified Data.HashSet as HashSet
 8 | import qualified Data.IntSet as IntSet
 9 | 
10 | smallSet :: String
11 | smallSet = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']
12 | 
13 | benchmarks :: Benchmark
14 | benchmarks = bgroup "sets" [
15 |     bench "Fast" $ whnf (FastSet.member '*') (FastSet.fromList smallSet)
16 |   , bench "Hash" $ whnf (HashSet.member '*') (HashSet.fromList smallSet)
17 |   , bench "Int" $ whnf (IntSet.member (ord '*'))
18 |                   (IntSet.fromList (map ord smallSet))
19 |   , bench "TextFast" $ whnf (TextFastSet.member '*')
20 |                        (TextFastSet.fromList smallSet)
21 |   ]
22 | 


--------------------------------------------------------------------------------
/benchmarks/json-data/twitter1.json:
--------------------------------------------------------------------------------
1 | {"results":[{"from_user_id_str":"80430860","profile_image_url":"http://a2.twimg.com/profile_images/536455139/icon32_normal.png","created_at":"Wed, 26 Jan 2011 07:07:02 +0000","from_user":"kazu_yamamoto","id_str":"30159761706061824","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell Server Pages \u3063\u3066\u3001\u307e\u3060\u7d9a\u3044\u3066\u3044\u305f\u306e\u304b\uff01","id":30159761706061824,"from_user_id":80430860,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"}],"max_id":30159761706061824,"since_id":0,"refresh_url":"?since_id=30159761706061824&q=haskell","next_page":"?page=2&max_id=30159761706061824&rpp=1&q=haskell","results_per_page":1,"page":1,"completed_in":0.012606,"since_id_str":"0","max_id_str":"30159761706061824","query":"haskell"}


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/Internal/Compat.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | {-# LANGUAGE MagicHash #-}
 3 | module Data.Attoparsec.Internal.Compat where
 4 | 
 5 | import Data.ByteString.Internal (ByteString(..))
 6 | import Data.Word (Word8)
 7 | import Foreign.ForeignPtr (ForeignPtr)
 8 | 
 9 | #if MIN_VERSION_bytestring(0,11,0)
10 | import Data.ByteString.Internal (plusForeignPtr)
11 | #endif
12 | 
13 | withPS :: ByteString -> (ForeignPtr Word8 -> Int -> Int -> r) -> r
14 | #if MIN_VERSION_bytestring(0,11,0)
15 | withPS (BS fp len)     kont = kont fp 0   len
16 | #else
17 | withPS (PS fp off len) kont = kont fp off len
18 | #endif
19 | {-# INLINE withPS #-}
20 | 
21 | mkPS :: ForeignPtr Word8 -> Int -> Int -> ByteString
22 | #if MIN_VERSION_bytestring(0,11,0)
23 | mkPS fp off len = BS (plusForeignPtr fp off) len
24 | #else
25 | mkPS fp off len = PS fp off len
26 | #endif
27 | {-# INLINE mkPS #-}
28 | 


--------------------------------------------------------------------------------
/benchmarks/Tiny.hs:
--------------------------------------------------------------------------------
 1 | import Control.Applicative ((<|>), many)
 2 | import Control.Monad (forM_)
 3 | import System.Environment (getArgs)
 4 | import qualified Data.Attoparsec.ByteString.Char8 as A
 5 | import qualified Data.ByteString.Char8 as B
 6 | import qualified Text.Parsec as P
 7 | import qualified Text.Parsec.ByteString as P
 8 | 
 9 | attoparsec = do
10 |   args <- getArgs
11 |   forM_ args $ \arg -> do
12 |     input <- B.readFile arg
13 |     case A.parse p input `A.feed` B.empty of
14 |       A.Done _ xs -> print (length xs)
15 |       what        -> print what
16 |  where
17 |   slow = many (A.many1 A.letter_ascii <|> A.many1 A.digit)
18 |   fast = many (A.takeWhile1 isLetter <|> A.takeWhile1 isDigit)
19 |   isDigit c  = c >= '0' && c <= '9'
20 |   isLetter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
21 |   p = fast
22 | 
23 | parsec = do
24 |   args <- getArgs
25 |   forM_ args $ \arg -> do
26 |     input <- readFile arg
27 |     case P.parse (P.many (P.many1 P.letter P.<|> P.many1 P.digit)) "" input of
28 |       Left err -> print err
29 |       Right xs -> print (length xs)
30 | 
31 | main = attoparsec
32 | 


--------------------------------------------------------------------------------
/cabal.haskell-ci:
--------------------------------------------------------------------------------
 1 | branches: master
 2 | 
 3 | -- constraint-set text-1
 4 | --   constraints: text < 2
 5 | --   ghc: < 8.4
 6 | --   tests: True
 7 | --   run-tests: True
 8 | --
 9 | -- constraint-set text-2
10 | --   constraints: text >= 2
11 | --   ghc: >= 8.4 && < 9.4
12 | --   tests: True
13 | --   run-tests: True
14 | --
15 | -- constraint-set bytestring-0.12
16 | --   -- bytestring-0.12 requires base >=4.9 (GHC 8.0)
17 | --   ghc: >= 8.0
18 | --   constraints: bytestring >= 0.12
19 | --   --
20 | --   -- The following is silently ignored here:
21 | --   --
22 | --   -- raw-project
23 | --   --   allow-newer: bytestring
24 | --   --
25 | --   tests: True
26 | --   run-tests: True
27 | --
28 | -- -- The following is meant to be for constraint-set bytestring-0.12 only,
29 | -- -- but there is currently no way to enable `allow-newer: bytestring`
30 | -- -- just for the constraint set.
31 | -- --
32 | -- -- Since core library `bytestring` is constrained to `installed`,
33 | -- -- it is not harmful to allow newer `bytestring` in the default runs
34 | -- -- as well---it will have no effect there.
35 | -- --
36 | -- raw-project
37 | --   allow-newer: bytestring
38 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Lazy.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module      :  Data.Attoparsec.Lazy
 3 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 4 | -- License     :  BSD3
 5 | --
 6 | -- Maintainer  :  bos@serpentine.com
 7 | -- Stability   :  experimental
 8 | -- Portability :  unknown
 9 | --
10 | -- Simple, efficient combinator parsing for lazy 'ByteString'
11 | -- strings, loosely based on the Parsec library.
12 | --
13 | -- This is essentially the same code as in the 'Data.Attoparsec'
14 | -- module, only with a 'parse' function that can consume a lazy
15 | -- 'ByteString' incrementally, and a 'Result' type that does not allow
16 | -- more input to be fed in.  Think of this as suitable for use with a
17 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
18 | --
19 | -- Behind the scenes, strict 'B.ByteString' values are still used
20 | -- internally to store parser input and manipulate it efficiently.
21 | -- High-performance parsers such as 'string' still expect strict
22 | -- 'B.ByteString' parameters.
23 | 
24 | module Data.Attoparsec.Lazy
25 |     (
26 |       module Data.Attoparsec.ByteString.Lazy
27 |     ) where
28 | 
29 | import Data.Attoparsec.ByteString.Lazy
30 | 


--------------------------------------------------------------------------------
/tests/QC/IPv6/Types.hs:
--------------------------------------------------------------------------------
 1 | -- -----------------------------------------------------------------------------
 2 | 
 3 | -- |
 4 | -- Module      :  Text.IPv6Addr
 5 | -- Copyright   :  Copyright © Michel Boucey 2011-2015
 6 | -- License     :  BSD-Style
 7 | -- Maintainer  :  michel.boucey@gmail.com
 8 | --
 9 | -- Dealing with IPv6 address text representations, canonization and manipulations.
10 | --
11 | 
12 | -- -----------------------------------------------------------------------------
13 | 
14 | module QC.IPv6.Types where
15 | 
16 | import qualified Data.Text as T
17 | 
18 | data IPv6Addr = IPv6Addr T.Text
19 | 
20 | instance Show IPv6Addr where
21 |     show (IPv6Addr addr) = T.unpack addr
22 | 
23 | data IPv6AddrToken
24 |     = SixteenBit T.Text  -- ^ A four hexadecimal digits group representing a 16-Bit chunk
25 |     | AllZeros           -- ^ An all zeros 16-Bit chunk
26 |     | Colon              -- ^ A separator between 16-Bit chunks
27 |     | DoubleColon        -- ^ A double-colon stands for a unique compression of many consecutive 16-Bit chunks
28 |     | IPv4Addr T.Text    -- ^ An embedded IPv4 address as representation of the last 32-Bit
29 |     deriving (Eq,Show)
30 | 


--------------------------------------------------------------------------------
/benchmarks/warp-3.0.1.1/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 Michael Snoyman, http://www.yesodweb.com/
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/tests/QC/Simple.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
 3 | 
 4 | module QC.Simple (
 5 |       tests
 6 |     ) where
 7 | 
 8 | import Control.Applicative ((<|>))
 9 | import Data.ByteString (ByteString)
10 | import Data.List (foldl')
11 | import Data.Maybe (fromMaybe)
12 | import QC.Rechunked (rechunkBS)
13 | import Test.Tasty (TestTree)
14 | import Test.Tasty.QuickCheck (testProperty)
15 | import Test.QuickCheck (Property, counterexample, forAll)
16 | import qualified Data.Attoparsec.ByteString.Char8 as A
17 | 
18 | t_issue75 = expect issue75 "ab" (A.Done "" "b")
19 | 
20 | issue75 :: A.Parser ByteString
21 | issue75 = "a" >> ("b" <|> "")
22 | 
23 | expect :: (Show r, Eq r) => A.Parser r -> ByteString -> A.Result r -> Property
24 | expect p input wanted =
25 |   forAll (rechunkBS input) $ \in' ->
26 |     let result = parse p in'
27 |     in counterexample (show result ++ " /= " ++ show wanted) $
28 |        fromMaybe False (A.compareResults result wanted)
29 | 
30 | parse :: A.Parser r -> [ByteString] -> A.Result r
31 | parse p (x:xs) = foldl' A.feed (A.parse p x) xs
32 | parse p []     = A.parse p ""
33 | 
34 | tests :: [TestTree]
35 | tests = [
36 |       testProperty "issue75" t_issue75
37 |   ]
38 | 


--------------------------------------------------------------------------------
/examples/Atto_RFC2616.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | 
 3 | module Main (main) where
 4 | 
 5 | import Control.Applicative
 6 | import Control.Exception (bracket)
 7 | import Control.Monad (forM_)
 8 | import Data.Attoparsec.ByteString
 9 | import RFC2616
10 | import System.Environment
11 | import System.IO
12 | import qualified Data.ByteString.Char8 as B
13 | 
14 | refill :: Handle -> IO B.ByteString
15 | refill h = B.hGet h (80*1024)
16 | 
17 | listy :: FilePath -> Handle -> IO ()
18 | listy file h = do
19 |   r <- parseWith (refill h) (many request) =<< refill h
20 |   case r of
21 |     Fail _ _ msg -> hPutStrLn stderr $ file ++ ": " ++ msg
22 |     Done _ reqs  -> print (length reqs)
23 | 
24 | incrementy :: FilePath -> Handle -> IO ()
25 | incrementy file h = go (0::Int) =<< refill h
26 |  where
27 |    go !n is = do
28 |      r <- parseWith (refill h) request is
29 |      case r of
30 |        Fail _ _ msg -> hPutStrLn stderr $ file ++ ": " ++ msg
31 |        Done bs _req
32 |            | B.null bs -> do
33 |               s <- refill h
34 |               if B.null s
35 |                 then print (n+1)
36 |                 else go (n+1) s
37 |            | otherwise -> go (n+1) bs
38 | 
39 | main :: IO ()
40 | main = do
41 |   args <- getArgs
42 |   forM_ args $ \arg ->
43 |     bracket (openFile arg ReadMode) hClose $
44 |       -- listy arg
45 |       incrementy arg
46 | 


--------------------------------------------------------------------------------
/benchmarks/Common.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | {-# OPTIONS_GHC -fno-warn-orphans #-}
 3 | 
 4 | module Common (
 5 |       chunksOf
 6 |     , pathTo
 7 |     , rechunkBS
 8 |     , rechunkT
 9 |     ) where
10 | 
11 | import Control.DeepSeq (NFData(rnf))
12 | import System.Directory (doesDirectoryExist)
13 | import System.FilePath ((</>))
14 | import Text.Parsec (ParseError)
15 | import qualified Data.ByteString as B
16 | import qualified Data.ByteString.Lazy as BL
17 | import qualified Data.Text as T
18 | import qualified Data.Text.Lazy as TL
19 | 
20 | #if !MIN_VERSION_bytestring(0,10,0)
21 | import Data.ByteString.Internal (ByteString(..))
22 | 
23 | instance NFData ByteString where
24 |     rnf (PS _ _ _) = ()
25 | #endif
26 | 
27 | instance NFData ParseError where
28 |     rnf = rnf . show
29 | 
30 | chunksOf :: Int -> [a] -> [[a]]
31 | chunksOf k = go
32 |   where go xs = case splitAt k xs of
33 |                   ([],_)  -> []
34 |                   (y, ys) -> y : go ys
35 | 
36 | rechunkBS :: Int -> B.ByteString -> BL.ByteString
37 | rechunkBS n = BL.fromChunks . map B.pack . chunksOf n . B.unpack
38 | 
39 | rechunkT :: Int -> T.Text -> TL.Text
40 | rechunkT n = TL.fromChunks . map T.pack . chunksOf n . T.unpack
41 | 
42 | pathTo :: String -> IO FilePath
43 | pathTo wat = do
44 |   exists <- doesDirectoryExist "benchmarks"
45 |   return $ if exists
46 |            then "benchmarks" </> wat
47 |            else wat
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Lennart Kolmodin
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in the
14 |    documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the author nor the names of his contributors
17 |    may be used to endorse or promote products derived from this software
18 |    without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
21 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 | POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/benchmarks/HeadersByteString.hs:
--------------------------------------------------------------------------------
 1 | module HeadersByteString (headers) where
 2 | 
 3 | import Common (pathTo, rechunkBS)
 4 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf, nfIO)
 5 | import HeadersByteString.Atto (request, response)
 6 | import Network.Wai.Handler.Warp.RequestHeader (parseHeaderLines)
 7 | import qualified Data.Attoparsec.ByteString.Char8 as B
 8 | import qualified Data.Attoparsec.ByteString.Lazy as BL
 9 | import qualified Data.ByteString.Char8 as B
10 | 
11 | -- Note: In the benchmarks for parsing an http request
12 | -- from a strict bytestring, we consider warp's implementation,
13 | -- which has highly optimized code for handling the first
14 | -- line in particular. It's treatment of the headers
15 | -- is more relaxed from the treatment they are given by the
16 | -- attoparsec parser benchmarked here. Consequently, it
17 | -- is should not be possible to match its performance since
18 | -- it accepts header names with disallowed characters.
19 | 
20 | headers :: IO Benchmark
21 | headers = do
22 |   req <- B.readFile =<< pathTo "http-request.txt"
23 |   resp <- B.readFile =<< pathTo "http-response.txt"
24 |   let reql    = rechunkBS 4 req
25 |       respl   = rechunkBS 4 resp
26 |   return $ bgroup "headers" [
27 |       bgroup "B" [
28 |         bench "request" $ nf (B.parseOnly request) req
29 |       , bench "warp" $ nfIO (parseHeaderLines (B.split '\n' req))
30 |       , bench "response" $ nf (B.parseOnly response) resp
31 |       ]
32 |     , bgroup "BL" [
33 |         bench "request" $ nf (BL.parse request) reql
34 |       , bench "response" $ nf (BL.parse response) respl
35 |       ]
36 |     ]
37 | 


--------------------------------------------------------------------------------
/benchmarks/HeadersByteString/Atto.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-}
 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures -fno-warn-orphans #-}
 3 | module HeadersByteString.Atto
 4 |     (
 5 |       request
 6 |     , response
 7 |     ) where
 8 | 
 9 | import Control.Applicative
10 | import Control.DeepSeq (NFData(..))
11 | import Network.HTTP.Types.Version (HttpVersion, http11)
12 | import qualified Data.Attoparsec.ByteString.Char8 as B
13 | import qualified Data.ByteString.Char8 as B
14 | 
15 | instance NFData HttpVersion where
16 |     rnf !_ = ()
17 | 
18 | isHeaderChar :: Char -> Bool
19 | isHeaderChar c =
20 |   (c >= 'a' && c <= 'z') ||
21 |   (c >= 'A' && c <= 'Z') ||
22 |   (c >= '0' && c <= '9') ||
23 |   (c == '_') ||
24 |   (c == '-')
25 | 
26 | header = do
27 |   name <- B.takeWhile1 isHeaderChar <* B.char ':' <* B.skipSpace
28 |   body <- bodyLine
29 |   return (name, body)
30 | 
31 | bodyLine = B.takeTill (\c -> c == '\r' || c == '\n') <* B.endOfLine
32 | 
33 | requestLine = do
34 |   m <- (B.takeTill B.isSpace <* B.char ' ')
35 |   (p,q) <- B.break (=='?') <$> (B.takeTill B.isSpace <* B.char ' ')
36 |   v <- httpVersion
37 |   return (m,p,q,v)
38 | 
39 | httpVersion = http11 <$ "HTTP/1.1"
40 | 
41 | responseLine = (,,) <$>
42 |                (httpVersion <* B.skipSpace) <*>
43 |                (int <* B.skipSpace) <*>
44 |                bodyLine
45 | 
46 | int :: B.Parser Int
47 | int = B.decimal
48 | 
49 | request = (,) <$> (requestLine <* B.endOfLine) <*> manyheader
50 | 
51 | response = (,) <$> responseLine <*> many header
52 | 
53 | manyheader = do
54 |   c <- B.peekChar'
55 |   if c == '\r' || c == '\n'
56 |     then return []
57 |     else (:) <$> header <*> manyheader
58 | 


--------------------------------------------------------------------------------
/tests/QC/Text/Regressions.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module QC.Text.Regressions (
 4 |       tests
 5 |     ) where
 6 | 
 7 | import Data.Text (Text)
 8 | import qualified Data.Text as T
 9 | import Data.List (foldl')
10 | import Data.Maybe (fromMaybe)
11 | import Data.Char (isLower)
12 | import Data.Monoid ((<>))
13 | import QC.Rechunked (rechunkT)
14 | import Test.Tasty (TestTree)
15 | import Test.Tasty.QuickCheck (testProperty)
16 | import Test.QuickCheck (Property, counterexample, forAll)
17 | import qualified Data.Attoparsec.Text as A
18 | 
19 | 
20 | --------------------------------------------------------------------------------
21 | -- 105 was about runScanner not always returning the final state. The result
22 | -- did depend on how the data was fed to the parser.
23 | 
24 | t_issue105 :: Property
25 | t_issue105 = expect issue105 "lowER" (A.Done "ER" "low")
26 | 
27 | issue105 :: A.Parser Text
28 | issue105 = do
29 |     (_, firstFourLowercaseLetters) <- A.runScanner "" f
30 |     return $ firstFourLowercaseLetters
31 | 
32 |   where
33 |     f :: Text -> Char -> Maybe Text
34 |     f acc c = if T.length acc < 4 && isLower c
35 |         then Just $ acc <> T.singleton c
36 |         else Nothing
37 | 
38 | 
39 | expect :: (Show r, Eq r) => A.Parser r -> Text -> A.Result r -> Property
40 | expect p input wanted =
41 |   forAll (rechunkT input) $ \in' ->
42 |     let result = parse p in'
43 |     in counterexample (show result ++ " /= " ++ show wanted) $
44 |        fromMaybe False (A.compareResults result wanted)
45 | 
46 | parse :: A.Parser r -> [Text] -> A.Result r
47 | parse p (x:xs) = foldl' A.feed (A.parse p x) xs
48 | parse p []     = A.parse p ""
49 | 
50 | 
51 | tests :: [TestTree]
52 | tests = [
53 |       testProperty "issue105" t_issue105
54 |   ]
55 | 


--------------------------------------------------------------------------------
/benchmarks/HeadersText.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
 3 | module HeadersText (headers) where
 4 | 
 5 | import Common (pathTo, rechunkT)
 6 | import Control.Applicative
 7 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf)
 8 | import Data.Char (isSpace)
 9 | import qualified Data.Attoparsec.Text as T
10 | import qualified Data.Attoparsec.Text.Lazy as TL
11 | import qualified Data.Text.IO as T
12 | 
13 | header = do
14 |   name <- T.takeWhile1 (T.inClass "a-zA-Z0-9_-") <* T.char ':' <* T.skipSpace
15 |   body <- (:) <$> bodyLine <*> many (T.takeWhile1 isSpace *> bodyLine)
16 |   return (name, body)
17 | 
18 | bodyLine = T.takeTill (\c -> c == '\r' || c == '\n') <* T.endOfLine
19 | 
20 | requestLine =
21 |     (,,) <$>
22 |     (method <* T.skipSpace) <*>
23 |     (T.takeTill isSpace <* T.skipSpace) <*>
24 |     httpVersion
25 |   where method = "GET" <|> "POST"
26 | 
27 | httpVersion = "HTTP/" *> ((,) <$> (int <* T.char '.') <*> int)
28 | 
29 | responseLine = (,,) <$>
30 |                (httpVersion <* T.skipSpace) <*>
31 |                (int <* T.skipSpace) <*>
32 |                bodyLine
33 | 
34 | int :: T.Parser Int
35 | int = T.decimal
36 | 
37 | request = (,) <$> (requestLine <* T.endOfLine) <*> many header
38 | 
39 | response = (,) <$> responseLine <*> many header
40 | 
41 | headers :: IO Benchmark
42 | headers = do
43 |   req <- T.readFile =<< pathTo "http-request.txt"
44 |   resp <- T.readFile =<< pathTo "http-response.txt"
45 |   let reql    = rechunkT 4 req
46 |       respl   = rechunkT 4 resp
47 |   return $ bgroup "headers" [
48 |       bgroup "T" [
49 |         bench "request" $ nf (T.parseOnly request) req
50 |       , bench "response" $ nf (T.parseOnly response) resp
51 |       ]
52 |     , bgroup "TL" [
53 |         bench "request" $ nf (TL.parse request) reql
54 |       , bench "response" $ nf (TL.parse response) respl
55 |       ]
56 |     ]
57 | 


--------------------------------------------------------------------------------
/tests/QC/Rechunked.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | 
 3 | module QC.Rechunked (
 4 |       rechunkBS
 5 |     , rechunkT
 6 |     ) where
 7 | 
 8 | import Control.Monad (forM, forM_)
 9 | import Control.Monad.ST (ST, runST)
10 | import Data.List (unfoldr)
11 | import Test.QuickCheck (Gen, choose)
12 | import qualified Data.ByteString as B
13 | import qualified Data.Text as T
14 | import qualified Data.Vector as V
15 | import qualified Data.Vector.Generic as G
16 | import qualified Data.Vector.Generic.Mutable as M
17 | 
18 | rechunkBS :: B.ByteString -> Gen [B.ByteString]
19 | rechunkBS = fmap (map B.copy) . rechunk_ B.splitAt B.length
20 | 
21 | rechunkT :: T.Text -> Gen [T.Text]
22 | rechunkT = fmap (map T.copy) . rechunk_ T.splitAt T.length
23 | 
24 | rechunk_ :: (Int -> a -> (a,a)) -> (a -> Int) -> a -> Gen [a]
25 | rechunk_ split len xs = (unfoldr go . (,) xs) `fmap` rechunkSizes (len xs)
26 |   where go (b,r:rs)   = Just (h, (t,rs))
27 |           where (h,t) = split r b
28 |         go (_,_)      = Nothing
29 | 
30 | rechunkSizes :: Int -> Gen [Int]
31 | rechunkSizes n0 = shuffle =<< loop [] (0:repeat 1) n0
32 |   where loop _ [] _ = error "it's 2015, where's my Stream type?"
33 |         loop acc (lb:lbs) n
34 |           | n <= 0 = shuffle (reverse acc)
35 |           | otherwise = do
36 |             !i <- choose (lb,n)
37 |             loop (i:acc) lbs (n-i)
38 | 
39 | shuffle :: [Int] -> Gen [Int]
40 | shuffle (0:xs) = (0:) `fmap` shuffle xs
41 | shuffle xs = fisherYates xs
42 | 
43 | fisherYates :: [a] -> Gen [a]
44 | fisherYates xs = (V.toList . V.backpermute v) `fmap` swapIndices (G.length v)
45 |   where
46 |     v = V.fromList xs
47 |     swapIndices n0 = do
48 |         swaps <- forM [0..n] $ \i -> ((,) i) `fmap` choose (i,n)
49 |         return (runST (swapAll swaps))
50 |       where
51 |         n = n0 - 1
52 |         swapAll :: [(Int,Int)] -> ST s (V.Vector Int)
53 |         swapAll ijs = do
54 |           mv <- G.unsafeThaw (G.enumFromTo 0 n :: V.Vector Int)
55 |           forM_ ijs $ uncurry (M.swap mv)
56 |           G.unsafeFreeze mv
57 | 


--------------------------------------------------------------------------------
/tests/QC/Combinator.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP, OverloadedStrings #-}
 2 | 
 3 | module QC.Combinator where
 4 | 
 5 | #if !MIN_VERSION_base(4,8,0)
 6 | import Control.Applicative ((<*>), (<$>), (<*), (*>))
 7 | #endif
 8 | import Data.Maybe (fromJust, isJust)
 9 | import Data.Word (Word8)
10 | import QC.Common (Repack, parseBS, repackBS, toLazyBS)
11 | import Test.Tasty (TestTree)
12 | import Test.Tasty.QuickCheck (testProperty)
13 | import Test.QuickCheck
14 | import qualified Data.Attoparsec.ByteString.Char8 as P
15 | import qualified Data.Attoparsec.Combinator as C
16 | import qualified Data.ByteString as B
17 | import qualified Data.ByteString.Char8 as B8
18 | 
19 | choice :: NonEmptyList (NonEmptyList Word8) -> Gen Property
20 | choice (NonEmpty xs) = do
21 |   let ys = map (B.pack . getNonEmpty) xs
22 |   return . forAll (repackBS <$> arbitrary <*> elements ys) $
23 |     maybe False (`elem` ys) . parseBS (C.choice (map P.string ys))
24 | 
25 | count :: Positive (Small Int) -> Repack -> B.ByteString -> Bool
26 | count (Positive (Small n)) rs s =
27 |     (length <$> parseBS (C.count n (P.string s)) input) == Just n
28 |   where input = repackBS rs (B.concat (replicate (n+1) s))
29 | 
30 | lookAhead :: NonEmptyList Word8 -> Bool
31 | lookAhead (NonEmpty xs) =
32 |   let ys = B.pack xs
33 |       withLookAheadThenConsume = (\x y -> (x, y)) <$> C.lookAhead (P.string ys) <*> P.string ys
34 |       mr = parseBS withLookAheadThenConsume $ toLazyBS ys
35 |   in isJust mr && fst (fromJust mr) == snd (fromJust mr)
36 | 
37 | match :: Int -> NonNegative Int -> NonNegative Int -> Repack -> Bool
38 | match n (NonNegative x) (NonNegative y) rs =
39 |     parseBS (P.match parser) (repackBS rs input) == Just (input, n)
40 |   where parser = P.skipWhile (=='x') *> P.signed P.decimal <*
41 |                  P.skipWhile (=='y')
42 |         input = B.concat [
43 |             B8.replicate x 'x', B8.pack (show n), B8.replicate y 'y'
44 |           ]
45 | 
46 | tests :: [TestTree]
47 | tests = [
48 |     testProperty "choice" choice
49 |   , testProperty "count" count
50 |   , testProperty "lookAhead" lookAhead
51 |   , testProperty "match" match
52 |   ]
53 | 


--------------------------------------------------------------------------------
/benchmarks/Links.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module Links (links) where
 4 | 
 5 | import Control.Applicative
 6 | import Control.DeepSeq (NFData(..))
 7 | import Test.Tasty.Bench (Benchmark, bench, nf)
 8 | import Data.Attoparsec.ByteString as A
 9 | import Data.Attoparsec.ByteString.Char8 as A8
10 | import Data.ByteString.Char8 as B8
11 | 
12 | data Link = Link {
13 |       linkURL :: ByteString
14 |     , linkParams :: [(ByteString, ByteString)]
15 |     } deriving (Eq, Show)
16 | 
17 | instance NFData Link where
18 |     rnf l = rnf (linkURL l) `seq` rnf (linkParams l)
19 | 
20 | link :: Parser Link
21 | link = Link <$> url <*> many (char8 ';' *> skipSpace *> param)
22 |   where url = char8 '<' *> A8.takeTill (=='>') <* char8 '>' <* skipSpace
23 | 
24 | param :: Parser (ByteString, ByteString)
25 | param = do
26 |   name <- paramName
27 |   skipSpace *> "=" *> skipSpace
28 |   c <- peekChar'
29 |   let isTokenChar = A.inClass "!#$%&'()*+./0-9:<=>?@a-zA-Z[]^_`{|}~-"
30 |   val <- case c of
31 |            '"' -> quotedString
32 |            _   -> A.takeWhile isTokenChar
33 |   skipSpace
34 |   return (name, val)
35 | 
36 | data Quot = Literal | Backslash
37 | 
38 | quotedString :: Parser ByteString
39 | quotedString = char '"' *> (fixup <$> body) <* char '"'
40 |   where body = A8.scan Literal $ \s c ->
41 |           case (s,c) of
42 |             (Literal,  '\\') -> backslash
43 |             (Literal,  '"')  -> Nothing
44 |             _                -> literal
45 |         literal   = Just Literal
46 |         backslash = Just Backslash
47 |         fixup = B8.pack . go . B8.unpack
48 |           where go ('\\' : x@'\\' : xs) = x : go xs
49 |                 go ('\\' : x@'"' : xs)  = x : go xs
50 |                 go (x : xs)             = x : go xs
51 |                 go xs                   = xs
52 | 
53 | paramName :: Parser ByteString
54 | paramName = do
55 |   name <- A.takeWhile1 $ A.inClass "a-zA-Z0-9!#$&+-.^_`|~"
56 |   c <- peekChar
57 |   return $ case c of
58 |              Just '*' -> B8.snoc name '*'
59 |              _        -> name
60 | 
61 | links :: Benchmark
62 | links = bench "links" $ nf (A.parseOnly link) lnk
63 |   where lnk = "<https://api.github.com/search/code?q=addClass+user%3Amozilla&page=2>; rel=\"next\", <https://api.github.com/search/code?q=addClass+user%3Amozilla&page=34>; rel=\"last\""
64 | 


--------------------------------------------------------------------------------
/examples/RFC2616.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module RFC2616
 4 |     (
 5 |       Header(..)
 6 |     , Request(..)
 7 |     , Response(..)
 8 |     , request
 9 |     , response
10 |     ) where
11 | 
12 | import Control.Applicative
13 | import Data.Attoparsec.ByteString as P
14 | import Data.Attoparsec.ByteString.Char8 (char8, endOfLine, isDigit_w8)
15 | import Data.ByteString (ByteString)
16 | import Data.Word (Word8)
17 | import Data.Attoparsec.ByteString.Char8 (isEndOfLine, isHorizontalSpace)
18 | 
19 | isToken :: Word8 -> Bool
20 | isToken w = w <= 127 && notInClass "\0-\31()<>@,;:\\\"/[]?={} \t" w
21 | 
22 | skipSpaces :: Parser ()
23 | skipSpaces = satisfy isHorizontalSpace *> skipWhile isHorizontalSpace
24 | 
25 | data Request = Request {
26 |       requestMethod  :: ByteString
27 |     , requestUri     :: ByteString
28 |     , requestVersion :: ByteString
29 |     } deriving (Eq, Ord, Show)
30 | 
31 | httpVersion :: Parser ByteString
32 | httpVersion = "HTTP/" *> P.takeWhile (\c -> isDigit_w8 c || c == 46)
33 | 
34 | requestLine :: Parser Request
35 | requestLine = Request <$> (takeWhile1 isToken <* char8 ' ')
36 |                       <*> (takeWhile1 (/=32) <* char8 ' ')
37 |                       <*> (httpVersion <* endOfLine)
38 | 
39 | data Header = Header {
40 |       headerName  :: ByteString
41 |     , headerValue :: [ByteString]
42 |     } deriving (Eq, Ord, Show)
43 | 
44 | messageHeader :: Parser Header
45 | messageHeader = Header
46 |   <$> (P.takeWhile isToken <* char8 ':' <* skipWhile isHorizontalSpace)
47 |   <*> ((:) <$> (takeTill isEndOfLine <* endOfLine)
48 |            <*> (many $ skipSpaces *> takeTill isEndOfLine <* endOfLine))
49 | 
50 | request :: Parser (Request, [Header])
51 | request = (,) <$> requestLine <*> many messageHeader <* endOfLine
52 | 
53 | data Response = Response {
54 |       responseVersion :: ByteString
55 |     , responseCode    :: ByteString
56 |     , responseMsg     :: ByteString
57 |     } deriving (Eq, Ord, Show)
58 | 
59 | responseLine :: Parser Response
60 | responseLine = Response <$> (httpVersion <* char8 ' ')
61 |                         <*> (P.takeWhile isDigit_w8 <* char8 ' ')
62 |                         <*> (takeTill isEndOfLine <* endOfLine)
63 | 
64 | response :: Parser (Response, [Header])
65 | response = (,) <$> responseLine <*> many messageHeader <* endOfLine
66 | 


--------------------------------------------------------------------------------
/benchmarks/warp-3.0.1.1/Network/Wai/Handler/Warp/ReadInt.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | {-# LANGUAGE MagicHash #-}
 4 | {-# LANGUAGE BangPatterns #-}
 5 | 
 6 | -- Copyright     : Erik de Castro Lopo <erikd@mega-nerd.com>
 7 | -- License       : BSD3
 8 | 
 9 | module Network.Wai.Handler.Warp.ReadInt (
10 |     readInt
11 |   , readInt64
12 |   ) where
13 | 
14 | -- This function lives in its own file because the MagicHash pragma interacts
15 | -- poorly with the CPP pragma.
16 | 
17 | import Data.ByteString (ByteString)
18 | import qualified Data.ByteString as S
19 | import Data.Int (Int64)
20 | import GHC.Prim
21 | import GHC.Types
22 | import GHC.Word
23 | 
24 | {-# INLINE readInt #-}
25 | readInt :: Integral a => ByteString -> a
26 | readInt bs = fromIntegral $ readInt64 bs
27 | 
28 | -- This function is used to parse the Content-Length field of HTTP headers and
29 | -- is a performance hot spot. It should only be replaced with something
30 | -- significantly and provably faster.
31 | --
32 | -- It needs to be able work correctly on 32 bit CPUs for file sizes > 2G so we
33 | -- use Int64 here and then make a generic 'readInt' that allows conversion to
34 | -- Int and Integer.
35 | 
36 | {-# NOINLINE readInt64 #-}
37 | readInt64 :: ByteString -> Int64
38 | readInt64 bs = S.foldl' (\ !i !c -> i * 10 + fromIntegral (mhDigitToInt c)) 0
39 |              $ S.takeWhile isDigit bs
40 | 
41 | data Table = Table !Addr#
42 | 
43 | {-# NOINLINE mhDigitToInt #-}
44 | mhDigitToInt :: Word8 -> Int
45 | #if MIN_VERSION_base(4,16,0)
46 | mhDigitToInt (W8# i) = I# (word2Int# (word8ToWord# (indexWord8OffAddr# addr (word2Int# (word8ToWord# i)))))
47 | #else
48 | mhDigitToInt (W8# i) = I# (word2Int# (indexWord8OffAddr# addr (word2Int# i)))
49 | #endif
50 |   where
51 |     !(Table addr) = table
52 |     table :: Table
53 |     table = Table
54 |         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
55 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
56 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
57 |         \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x00\x00\x00\x00\x00\x00\
58 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
59 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
60 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
61 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
62 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
63 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
64 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
65 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
66 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
67 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
68 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
69 |         \\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"#
70 | 
71 | isDigit :: Word8 -> Bool
72 | isDigit w = w >= 48 && w <= 57
73 | 


--------------------------------------------------------------------------------
/benchmarks/Numbers.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# OPTIONS_GHC -fno-warn-deprecations #-}
 3 | 
 4 | module Numbers (numbers) where
 5 | 
 6 | import Test.Tasty.Bench (Benchmark, bench, bgroup, nf)
 7 | import Data.Scientific (Scientific(..))
 8 | import Text.Parsec.Text ()
 9 | import Text.Parsec.Text.Lazy ()
10 | import qualified Data.Attoparsec.ByteString.Char8 as AC
11 | import qualified Data.Attoparsec.Text as AT
12 | import qualified Data.ByteString.Char8 as BC
13 | import qualified Data.Text as T
14 | 
15 | strN, strNePos, strNeNeg :: String
16 | strN     = "1234.56789"
17 | strNePos = "1234.56789e3"
18 | strNeNeg = "1234.56789e-3"
19 | 
20 | numbers :: Benchmark
21 | numbers = bgroup "numbers" [
22 |   let !tN     = T.pack strN
23 |       !tNePos = T.pack strNePos
24 |       !tNeNeg = T.pack strNeNeg
25 |   in bgroup "Text"
26 |   [
27 |     bgroup "no power"
28 |     [ bench "double" $ nf (AT.parseOnly AT.double) tN
29 |     , bench "number" $ nf (AT.parseOnly AT.number) tN
30 |     , bench "rational" $
31 |       nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tN
32 |     , bench "scientific" $
33 |       nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tN
34 |     ]
35 |   , bgroup "positive power"
36 |     [ bench "double" $ nf (AT.parseOnly AT.double) tNePos
37 |     , bench "number" $ nf (AT.parseOnly AT.number) tNePos
38 |     , bench "rational" $
39 |       nf (AT.parseOnly (AT.rational :: AT.Parser Rational)) tNePos
40 |     , bench "scientific" $
41 |       nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNePos
42 |     ]
43 |   , bgroup "negative power"
44 |     [ bench "double" $ nf (AT.parseOnly AT.double) tNeNeg
45 |     , bench "number" $ nf (AT.parseOnly AT.number) tNeNeg
46 |     , bench "rational" $
47 |       nf (AT.parseOnly (AT.rational :: AT.Parser Rational))  tNeNeg
48 |     , bench "scientific" $
49 |       nf (AT.parseOnly (AT.rational :: AT.Parser Scientific)) tNeNeg
50 |     ]
51 |   ]
52 |   , let !bN     = BC.pack strN
53 |         !bNePos = BC.pack strNePos
54 |         !bNeNeg = BC.pack strNeNeg
55 |   in bgroup "ByteString"
56 |   [ bgroup "no power"
57 |     [ bench "double" $ nf (AC.parseOnly AC.double) bN
58 |     , bench "number" $ nf (AC.parseOnly AC.number) bN
59 |     , bench "rational" $
60 |       nf (AC.parseOnly (AC.rational :: AC.Parser Rational))   bN
61 |     , bench "scientific" $
62 |       nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bN
63 |     ]
64 |   , bgroup "positive power"
65 |     [ bench "double" $ nf (AC.parseOnly AC.double) bNePos
66 |     , bench "number" $ nf (AC.parseOnly AC.number) bNePos
67 |     , bench "rational" $
68 |       nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNePos
69 |     , bench "scientific" $
70 |       nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNePos
71 |     ]
72 |   , bgroup "negative power"
73 |     [ bench "double" $ nf (AC.parseOnly AC.double) bNeNeg
74 |     , bench "number" $ nf (AC.parseOnly AC.number) bNeNeg
75 |     , bench "rational" $
76 |       nf (AC.parseOnly (AC.rational :: AC.Parser Rational)) bNeNeg
77 |     , bench "scientific" $
78 |       nf (AC.parseOnly (AC.rational :: AC.Parser Scientific)) bNeNeg
79 |     ]
80 |   ]
81 |  ]
82 | 


--------------------------------------------------------------------------------
/benchmarks/Genome.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module Genome
 4 |     (
 5 |       genome
 6 |     ) where
 7 | 
 8 | import Control.Applicative
 9 | import Test.Tasty.Bench
10 | import Data.ByteString (ByteString)
11 | import qualified Data.ByteString.Char8 as B8
12 | import qualified Data.ByteString.Lazy.Char8 as L8
13 | import Data.Attoparsec.ByteString.Char8 as B
14 | import qualified Data.Attoparsec.ByteString.Lazy as BL
15 | import Data.Text (Text)
16 | import qualified Data.Text as T
17 | import qualified Data.Text.Lazy as TL
18 | import Data.Attoparsec.Text as T
19 | import qualified Data.Attoparsec.Text.Lazy as TL
20 | import Common (rechunkBS, rechunkT)
21 | 
22 | genome :: Benchmark
23 | genome = bgroup "genome" [
24 |     bgroup "bytestring" [
25 |         bench "s" $ nf (map (B.parse searchBS)) (B8.tails geneB)
26 |       , bench "l" $ nf (map (BL.parse searchBS)) (L8.tails geneBL)
27 |       , bgroup "CI" [
28 |           bench "s" $ nf (map (B.parse searchBSCI)) (B8.tails geneB)
29 |         , bench "l" $ nf (map (BL.parse searchBSCI)) (L8.tails geneBL)
30 |       ]
31 |     ]
32 |   , bgroup "text" [
33 |         bench "s" $ nf (map (T.parse searchT)) (T.tails geneT)
34 |       , bench "l" $ nf (map (TL.parse searchT)) (TL.tails geneTL)
35 |       , bgroup "CI" [
36 |           bench "s" $ nf (map (T.parse searchTCI)) (T.tails geneT)
37 |         , bench "l" $ nf (map (TL.parse searchTCI)) (TL.tails geneTL)
38 |       ]
39 |     ]
40 |   ]
41 |   where geneB  = B8.pack gene
42 |         geneBL = rechunkBS 4 geneB
43 |         geneT  = T.pack gene
44 |         geneTL = rechunkT 4 geneT
45 | 
46 | searchBS :: B.Parser ByteString
47 | searchBS = "caac" *> ("aaca" <|> "aact")
48 | 
49 | searchBSCI :: B.Parser ByteString
50 | searchBSCI = B.stringCI "CAAC" *> (B.stringCI "AACA" <|> B.stringCI "AACT")
51 | 
52 | searchT :: T.Parser Text
53 | searchT = "caac" *> ("aaca" <|> "aact")
54 | 
55 | searchTCI :: T.Parser Text
56 | searchTCI = T.asciiCI "CAAC" *> (T.asciiCI "AACA" <|> T.asciiCI "AACT")
57 | 
58 | -- Dictyostelium discoideum developmental protein DG1094 (gacT) gene,
59 | -- partial cds. http://www.ncbi.nlm.nih.gov/nuccore/AF081586.1
60 | 
61 | gene :: String
62 | gene = "atcgatttagaaagatacaaagatagaaccatcaataataaacaagagaagagagcaagt\
63 |        \agagatattaataaagagattgaaagagagattgaaaagaagagattatcaccaagagaa\
64 |        \agattaaatttatttggtctttcttcctcatcttcatcagtgaattcaacattaacaaga\
65 |        \tctacagcaaatattatctctacaatagacggtagtggaggtagtaatcgtaatagtaaa\
66 |        \aattatggtaatggctcatcctcctcctcaaatagaagatatagtaatactattaatcaa\
67 |        \caattacaaatgcaattacaacaacttcaaatccaacaacaacaatatcaacaaactcaa\
68 |        \caatctcaaataccattacaatatcaacaacaacaacagcaacaacaacaacaaaccact\
69 |        \acaactacaactacatcaagtggtagtaatagattctcttcaaatagatataaaccagtt\
70 |        \gatcttacacaatcatcttcaaactttcgttattcacgtgaaatttatgatgatgattat\
71 |        \tattcaaataataatttaatgatgtttggtaatgagcaaccaaatcaaacaccaatttct\
72 |        \gtatcatcttcatctgcattcacacgtcaaagatctcaaagttgctttgaaccagagaat\
73 |        \cttgtattgctacaacaacaatatcaacaatatcaacaacaacaacaacaacaacaacaa\
74 |        \attccattccaagcaaatccacaatatagtaatgctgttattgaacaaaaattggatcaa\
75 |        \attagagataccattaataatttacatagagataaccgagtctctaga"
76 | 


--------------------------------------------------------------------------------
/examples/Parsec_RFC2616.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns, CPP, FlexibleContexts #-}
 2 | 
 3 | module Main (main) where
 4 | 
 5 | import Control.Applicative
 6 | import Control.Exception (bracket)
 7 | import System.Environment (getArgs)
 8 | import System.IO (hClose, openFile, IOMode(ReadMode))
 9 | import Text.Parsec.Char (anyChar, char, satisfy, string)
10 | import Text.Parsec.Combinator (many1, manyTill, skipMany1)
11 | import Text.Parsec.Prim hiding (many, token, (<|>))
12 | import qualified Data.IntSet as S
13 | 
14 | #if 1
15 | import Text.Parsec.ByteString.Lazy (Parser, parseFromFile)
16 | import qualified Data.ByteString.Lazy as B
17 | #else
18 | import Text.Parsec.ByteString (Parser, parseFromFile)
19 | import qualified Data.ByteString as B
20 | #endif
21 | 
22 | token :: Stream s m Char => ParsecT s u m Char
23 | token = satisfy $ \c -> S.notMember (fromEnum c) set
24 |   where set = S.fromList . map fromEnum $ ['\0'..'\31'] ++ "()<>@,;:\\\"/[]?={} \t" ++ ['\128'..'\255']
25 | 
26 | isHorizontalSpace :: Char -> Bool
27 | isHorizontalSpace c = c == ' ' || c == '\t'
28 | 
29 | skipHSpaces :: Stream s m Char => ParsecT s u m ()
30 | skipHSpaces = skipMany1 (satisfy isHorizontalSpace)
31 | 
32 | data Request = Request {
33 |       _requestMethod   :: String
34 |     , _requestUri      :: String
35 |     , _requestProtocol :: String
36 |     } deriving (Eq, Ord, Show)
37 | 
38 | requestLine :: Stream s m Char => ParsecT s u m Request
39 | requestLine = do
40 |   method <- many1 token <* skipHSpaces
41 |   uri <- many1 (satisfy (not . isHorizontalSpace)) <* skipHSpaces <* string "HTTP/"
42 |   proto <- many httpVersion <* endOfLine
43 |   return $! Request method uri proto
44 |  where
45 |   httpVersion = satisfy $ \c -> c == '1' || c == '0' || c == '.'
46 | 
47 | endOfLine :: Stream s m Char => ParsecT s u m ()
48 | endOfLine = (string "\r\n" *> pure ()) <|> (char '\n' *> pure ())
49 | 
50 | data Header = Header {
51 |       _headerName  :: String
52 |     , _headerValue :: [String]
53 |     } deriving (Eq, Ord, Show)
54 | 
55 | messageHeader :: Stream s m Char => ParsecT s u m Header
56 | messageHeader = do
57 |   header <- many1 token <* char ':' <* skipHSpaces
58 |   body <- manyTill anyChar endOfLine
59 |   conts <- many $ skipHSpaces *> manyTill anyChar endOfLine
60 |   return $! Header header (body:conts)
61 | 
62 | request :: Stream s m Char => ParsecT s u m (Request, [Header])
63 | request = (,) <$> requestLine <*> many messageHeader <* endOfLine
64 | 
65 | listy :: FilePath -> IO ()
66 | listy arg = do
67 |   r <- parseFromFile (many request) arg
68 |   case r of
69 |     Left err -> putStrLn $ arg ++ ": " ++ show err
70 |     Right rs -> print (length rs)
71 | 
72 | chunky :: FilePath -> IO ()
73 | chunky arg = bracket (openFile arg ReadMode) hClose $ \h ->
74 |                loop (0::Int) =<< B.hGetContents h
75 |  where
76 |   loop !n bs
77 |       | B.null bs = print n
78 |       | otherwise = case parse myReq arg bs of
79 |                       Left err      -> putStrLn $ arg ++ ": " ++ show err
80 |                       Right (r,bs') -> loop (n+1) bs'
81 |   myReq :: Parser ((Request, [Header]), B.ByteString)
82 |   myReq = liftA2 (,) request getInput
83 | 
84 | main :: IO ()
85 | main = mapM_ f =<< getArgs
86 |   where
87 |     --f = listy
88 |     f = chunky
89 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
 1 | 187e68710ea1903fdb2fcc78964574982830adc0 0.2
 2 | 9c2929e868fdfe4cb267f1cc9d41bdab31d97640 0.2.1
 3 | 1f5944a3920322fa8e2bb2fbc9382223968a77d7 0.2.2
 4 | d4934c7fdf544d4e5b2925cada4b91d92a8948c2 0.3
 5 | 91a8667f027294bdf9df7bbe3fe1083a8d09f26c 0.4
 6 | 7ad5cecf582ce41410159040c6a47002ad61613f 0.5
 7 | d5f93303fb5169d856291fa6cde834b89866a1ed 0.5.1
 8 | ace3afe105f50a5c90ee135dc72ad306fc3b296c 0.6
 9 | 4e6ad4e2712c82714bb055c261adb376dae4c513 0.7
10 | 5504cf45797458399d3f840088bd31f84ef1d56d 0.7.1
11 | 69d4bdcb1e2b35d768ea291886b85cbadd7bf839 0.7.2
12 | f20aef71b438d9ab2db399479ae1836e5d98c31c 0.8.0.0
13 | 31829cff7c795e219d23c582921039c7415cb035 0.8.0.1
14 | 87b4dc291f43a548bf0d5338c9a6a5f5545496f7 0.8.0.2
15 | 4948902625875849d6c9d3ac56ec6670d593b1ee 0.8.1.0
16 | 8b98d031ac4d2dfe322a0e58dcc85d6ca825938a 0.8.1.1
17 | 8b98d031ac4d2dfe322a0e58dcc85d6ca825938a 0.8.1.1
18 | 74092eb3c30ebc0d878d37877526ec73c08bce96 0.8.1.1
19 | 277c0528a1e2751df6e81c0c0747909ddbae1453 0.8.2.0
20 | 277c0528a1e2751df6e81c0c0747909ddbae1453 0.8.2.0
21 | 95181c9226105a695a4fb25f0a76540ec0cc7c58 0.8.2.0
22 | 76c08ffe493594369487fd26f85487f9cc0572f7 0.8.3.0
23 | 39351efe1d73462a717ce004a2d2f75729c67f3b 0.8.4.0
24 | d04f4a8c7afdedddd4828c0844ab2b75bdb90789 0.8.5.0
25 | 034e03c9d2b74c8c52438896a0098a086809adc6 0.8.5.1
26 | 9712028a0311a3d12e213727603b5e84a2b8103c 0.8.5.2
27 | f4a4d9c2ccb365a8408ee20d2514fac99d44ecb1 0.8.5.3
28 | 39eeec699da24257b65e7787aebebbb409771812 0.8.6.0
29 | cbfbab5a33eb7522af4dd16b4306ef848b5b2350 0.8.6.1
30 | 7de1dedc888995de3188c757b872ac364cfd1c49 0.9.0.0
31 | 30d71153204a79666638fcb4765115cbfc8d2d93 0.9.1.0
32 | 61f6daa5cd357349143e57b8530bd9ba7f55fdf6 0.9.1.1
33 | e539c7f6c6ceaa79318177cc3bc149d7b6e7c6c1 0.9.1.2
34 | 3033c0af33fa3b83d002eb9150acf8949e56f19f 0.10.0.0
35 | 3033c0af33fa3b83d002eb9150acf8949e56f19f 0.10.0.0
36 | d39f15e57df591fb785272ba5d3acf0d70fcb0ff 0.10.0.0
37 | 33c33d8467265f9f2226a351a59b616a5ace9fbb 0.10.0.1
38 | 066fc7a19fc07c3b7c50e9b591af3dc5591742a1 0.10.0.2
39 | 80fa1e1f6e122d3de65ce3d6cd9f3aceaa108977 0.10.0.3
40 | 80fa1e1f6e122d3de65ce3d6cd9f3aceaa108977 0.10.0.3
41 | 940330f992c8130d8e17b99f93d570d454c0fa4d 0.10.0.3
42 | 5702ca99c6ac5cfdd8a864c0e4c61a4db1158d7a 0.10.1.0
43 | e8bc38ea66812bf5f24599d3f33480411084affb 0.10.1.1
44 | f0bc0f79b70b97a856042386aa8856ac1cad88f1 0.10.2.0
45 | 030191aa39c97afcad17d365ed114a2eedc04ad4 0.10.3.0
46 | ec23283e88c00af34c3144a8775ca4358daea3f7 0.10.4.0
47 | 6f4b317a12acdd05d67fd9ec55d3defaf37a01eb 0.11.1.0
48 | 48809d7716468fe2bbc47b191b1d64358daaf6d1 0.11.2.1
49 | 856a772f83a6e382bbcfe53e221a5a92623b107c 0.11.3.0
50 | a3c7a521fd58595d6a470ab890cc54a80dd45bd7 0.11.3.1
51 | 38326ce07aca6c8c3a014b0d454d2a5bfdbd6a52 0.11.3.2
52 | 694039f4fc157a8ca5dce37959391615b6c3f535 0.11.3.3
53 | 1fe92d1838fb1b1afcee1ca18d4970cf7e01956e 0.11.3.4
54 | 68203a4cb847da905fa44b5aa1942b99d5867ab4 0.12.0.0
55 | c9c2b05707b2fffcd749d5817e3e0d04e6ad6d0d 0.12.1.0
56 | 9157c2b4c35aa5f683b4ac51e6ff52c696bc22e2 0.12.1.1
57 | 682ba515f7ba62500ad79371b37b90932e0c0e3d 0.12.1.2
58 | 2066a72b79b478f59a9c7f1839c522fc59ba9af7 0.12.1.3
59 | ef875ad3bebef2b6fb1bd7063de6a46a06d0d4b4 0.12.1.4
60 | dc1fcff28d13a508f677c7b06af6420877842493 0.12.1.5
61 | 013e04e4526bfd5e9e4a611b42e6f03c413bea91 0.12.1.6
62 | 0ddbce995b77b8db391b4d6519f13feddf4b5bf3 0.13.0.0
63 | 46c16463ec20ad46c9cd0bfb1050c4268f34b749 0.13.0.1
64 | 


--------------------------------------------------------------------------------
/benchmarks/json-data/example.json:
--------------------------------------------------------------------------------
 1 | {"web-app": {
 2 |   "servlet": [   
 3 |     {
 4 |       "servlet-name": "cofaxCDS",
 5 |       "servlet-class": "org.cofax.cds.CDSServlet",
 6 |       "init-param": {
 7 |         "configGlossary:installationAt": "Philadelphia, PA",
 8 |         "configGlossary:adminEmail": "ksm@pobox.com",
 9 |         "configGlossary:poweredBy": "Cofax",
10 |         "configGlossary:poweredByIcon": "/images/cofax.gif",
11 |         "configGlossary:staticPath": "/content/static",
12 |         "templateProcessorClass": "org.cofax.WysiwygTemplate",
13 |         "templateLoaderClass": "org.cofax.FilesTemplateLoader",
14 |         "templatePath": "templates",
15 |         "templateOverridePath": "",
16 |         "defaultListTemplate": "listTemplate.htm",
17 |         "defaultFileTemplate": "articleTemplate.htm",
18 |         "useJSP": false,
19 |         "jspListTemplate": "listTemplate.jsp",
20 |         "jspFileTemplate": "articleTemplate.jsp",
21 |         "cachePackageTagsTrack": 200,
22 |         "cachePackageTagsStore": 200,
23 |         "cachePackageTagsRefresh": 60,
24 |         "cacheTemplatesTrack": 100,
25 |         "cacheTemplatesStore": 50,
26 |         "cacheTemplatesRefresh": 15,
27 |         "cachePagesTrack": 200,
28 |         "cachePagesStore": 100,
29 |         "cachePagesRefresh": 10,
30 |         "cachePagesDirtyRead": 10,
31 |         "searchEngineListTemplate": "forSearchEnginesList.htm",
32 |         "searchEngineFileTemplate": "forSearchEngines.htm",
33 |         "searchEngineRobotsDb": "WEB-INF/robots.db",
34 |         "useDataStore": true,
35 |         "dataStoreClass": "org.cofax.SqlDataStore",
36 |         "redirectionClass": "org.cofax.SqlRedirection",
37 |         "dataStoreName": "cofax",
38 |         "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver",
39 |         "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon",
40 |         "dataStoreUser": "sa",
41 |         "dataStorePassword": "dataStoreTestQuery",
42 |         "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';",
43 |         "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log",
44 |         "dataStoreInitConns": 10,
45 |         "dataStoreMaxConns": 100,
46 |         "dataStoreConnUsageLimit": 100,
47 |         "dataStoreLogLevel": "debug",
48 |         "maxUrlLength": 500}},
49 |     {
50 |       "servlet-name": "cofaxEmail",
51 |       "servlet-class": "org.cofax.cds.EmailServlet",
52 |       "init-param": {
53 |       "mailHost": "mail1",
54 |       "mailHostOverride": "mail2"}},
55 |     {
56 |       "servlet-name": "cofaxAdmin",
57 |       "servlet-class": "org.cofax.cds.AdminServlet"},
58 |  
59 |     {
60 |       "servlet-name": "fileServlet",
61 |       "servlet-class": "org.cofax.cds.FileServlet"},
62 |     {
63 |       "servlet-name": "cofaxTools",
64 |       "servlet-class": "org.cofax.cms.CofaxToolsServlet",
65 |       "init-param": {
66 |         "templatePath": "toolstemplates/",
67 |         "log": 1,
68 |         "logLocation": "/usr/local/tomcat/logs/CofaxTools.log",
69 |         "logMaxSize": "",
70 |         "dataLog": 1,
71 |         "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log",
72 |         "dataLogMaxSize": "",
73 |         "removePageCache": "/content/admin/remove?cache=pages&id=",
74 |         "removeTemplateCache": "/content/admin/remove?cache=templates&id=",
75 |         "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder",
76 |         "lookInContext": 1,
77 |         "adminGroupID": 4,
78 |         "betaServer": true}}],
79 |   "servlet-mapping": {
80 |     "cofaxCDS": "/",
81 |     "cofaxEmail": "/cofaxutil/aemail/*",
82 |     "cofaxAdmin": "/admin/*",
83 |     "fileServlet": "/static/*",
84 |     "cofaxTools": "/tools/*"},
85 |  
86 |   "taglib": {
87 |     "taglib-uri": "cofax.tld",
88 |     "taglib-location": "/WEB-INF/tlds/cofax.tld"}}}
89 | 


--------------------------------------------------------------------------------
/tests/QC/Buffer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings,
  2 |     TypeSynonymInstances #-}
  3 | 
  4 | module QC.Buffer (tests) where
  5 | 
  6 | #if !MIN_VERSION_base(4,8,0)
  7 | import Control.Applicative ((<$>))
  8 | import Data.Monoid (Monoid(mconcat))
  9 | #endif
 10 | import QC.Common ()
 11 | import Test.Tasty (TestTree)
 12 | import Test.Tasty.QuickCheck (testProperty)
 13 | import Test.QuickCheck
 14 | import qualified Data.Attoparsec.ByteString.Buffer as BB
 15 | import qualified Data.Attoparsec.Text.Buffer as BT
 16 | import qualified Data.ByteString as B
 17 | import qualified Data.ByteString.Unsafe as B
 18 | import qualified Data.Text as T
 19 | import qualified Data.Text.Unsafe as T
 20 | 
 21 | data BP t b = BP [t] !t !b
 22 |             deriving (Eq, Show)
 23 | 
 24 | type BPB = BP B.ByteString BB.Buffer
 25 | type BPT = BP T.Text BT.Buffer
 26 | 
 27 | instance Arbitrary BPB where
 28 |   arbitrary = do
 29 |     bss <- arbitrary
 30 |     return $! toBP BB.buffer bss
 31 | 
 32 |   shrink (BP bss _ _) = toBP BB.buffer <$> shrink bss
 33 | 
 34 | instance Arbitrary BPT where
 35 |   arbitrary = do
 36 |     bss <- arbitrary
 37 |     return $! toBP BT.buffer bss
 38 | 
 39 |   shrink (BP bss _ _) = toBP BT.buffer <$> shrink bss
 40 | 
 41 | toBP :: (Monoid a, Monoid b) => (a -> b) -> [a] -> BP a b
 42 | toBP buf bss = BP bss (mconcat bss) (mconcat (map buf bss))
 43 | 
 44 | b_unbuffer :: BPB -> Property
 45 | b_unbuffer (BP _ts t buf) = t === BB.unbuffer buf
 46 | 
 47 | t_unbuffer :: BPT -> Property
 48 | t_unbuffer (BP _ts t buf) = t === BT.unbuffer buf
 49 | 
 50 | -- This test triggers both branches in Data.Attoparsec.Text.Buffer.append
 51 | -- and checks that Data.Text.Array.copyI manipulations are correct.
 52 | t_unbuffer_three :: Property
 53 | t_unbuffer_three = t_unbuffer $ toBP BT.buffer [t, t, t]
 54 |   where
 55 |     -- Make it long enough to increase chances of a segmentation fault
 56 |     t = T.replicate 1000 "\0"
 57 | 
 58 | b_length :: BPB -> Property
 59 | b_length (BP _ts t buf) = B.length t === BB.length buf
 60 | 
 61 | t_length :: BPT -> Property
 62 | t_length (BP _ts t buf) = BT.lengthCodeUnits t === BT.length buf
 63 | 
 64 | b_unsafeIndex :: BPB -> Gen Property
 65 | b_unsafeIndex (BP _ts t buf) = do
 66 |   let l = B.length t
 67 |   i <- choose (0,l-1)
 68 |   return $ l === 0 .||. B.unsafeIndex t i === BB.unsafeIndex buf i
 69 | 
 70 | t_iter :: BPT -> Gen Property
 71 | t_iter (BP _ts t buf) = do
 72 |   let l = BT.lengthCodeUnits t
 73 |   i <- choose (0,l-1)
 74 |   let it (T.Iter c q) = (c,q)
 75 |   return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i)
 76 | 
 77 | t_iter_ :: BPT -> Gen Property
 78 | t_iter_ (BP _ts t buf) = do
 79 |   let l = BT.lengthCodeUnits t
 80 |   i <- choose (0,l-1)
 81 |   return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i
 82 | 
 83 | b_unsafeDrop :: BPB -> Gen Property
 84 | b_unsafeDrop (BP _ts t buf) = do
 85 |   i <- choose (0, B.length t)
 86 |   return $ B.unsafeDrop i t === BB.unsafeDrop i buf
 87 | 
 88 | t_dropCodeUnits :: BPT -> Gen Property
 89 | t_dropCodeUnits (BP _ts t buf) = do
 90 |   i <- choose (0, BT.lengthCodeUnits t)
 91 |   return $ dropCodeUnits i t === BT.dropCodeUnits i buf
 92 |   where
 93 | #if MIN_VERSION_text(2,0,0)
 94 |     dropCodeUnits = T.dropWord8
 95 | #else
 96 |     dropCodeUnits = T.dropWord16
 97 | #endif
 98 | 
 99 | tests :: [TestTree]
100 | tests = [
101 |     testProperty "b_unbuffer" b_unbuffer
102 |   , testProperty "t_unbuffer" t_unbuffer
103 |   , testProperty "t_unbuffer_three" t_unbuffer_three
104 |   , testProperty "b_length" b_length
105 |   , testProperty "t_length" t_length
106 |   , testProperty "b_unsafeIndex" b_unsafeIndex
107 |   , testProperty "t_iter" t_iter
108 |   , testProperty "t_iter_" t_iter_
109 |   , testProperty "b_unsafeDrop" b_unsafeDrop
110 |   , testProperty "t_dropCodeUnits" t_dropCodeUnits
111 |   ]
112 | 


--------------------------------------------------------------------------------
/tests/QC/Common.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, FlexibleInstances #-}
  2 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  3 | module QC.Common
  4 |     (
  5 |       ASCII(..)
  6 |     , parseBS
  7 |     , parseT
  8 |     , toLazyBS
  9 |     , toStrictBS
 10 |     , Repack
 11 |     , repackBS
 12 |     , repackBS_
 13 |     , repackT
 14 |     , repackT_
 15 |     , liftOp
 16 |     ) where
 17 | 
 18 | #if !MIN_VERSION_base(4,8,0)
 19 | import Control.Applicative ((<*>), (<$>))
 20 | #endif
 21 | import Data.Char (isAlpha)
 22 | import Test.QuickCheck
 23 | import Test.QuickCheck.Unicode (shrinkChar, string)
 24 | import qualified Data.ByteString as B
 25 | import qualified Data.ByteString.Lazy as BL
 26 | import qualified Data.Text as T
 27 | import qualified Data.Text.Lazy as TL
 28 | import qualified Data.Attoparsec.ByteString.Lazy as BL
 29 | import qualified Data.Attoparsec.Text.Lazy as TL
 30 | 
 31 | #if !MIN_VERSION_base(4,4,0)
 32 | -- This should really be a dependency on the random package :-(
 33 | instance Random Word8 where
 34 |   randomR = integralRandomR
 35 |   random = randomR (minBound,maxBound)
 36 | 
 37 | instance Arbitrary Word8 where
 38 |     arbitrary = choose (minBound, maxBound)
 39 | #endif
 40 | 
 41 | parseBS :: BL.Parser r -> BL.ByteString -> Maybe r
 42 | parseBS p = BL.maybeResult . BL.parse p
 43 | 
 44 | parseT :: TL.Parser r -> TL.Text -> Maybe r
 45 | parseT p = TL.maybeResult . TL.parse p
 46 | 
 47 | toStrictBS :: BL.ByteString -> B.ByteString
 48 | toStrictBS = B.concat . BL.toChunks
 49 | 
 50 | toLazyBS :: B.ByteString -> BL.ByteString
 51 | toLazyBS = BL.fromChunks . (:[])
 52 | 
 53 | instance Arbitrary B.ByteString where
 54 |     arbitrary = B.pack <$> arbitrary
 55 |     shrink = map B.pack . shrink . B.unpack
 56 | 
 57 | instance Arbitrary BL.ByteString where
 58 |     arbitrary = repackBS <$> arbitrary <*> arbitrary
 59 |     shrink = map BL.pack . shrink . BL.unpack
 60 | 
 61 | newtype ASCII a = ASCII { fromASCII :: a }
 62 |                   deriving (Eq, Ord, Show)
 63 | 
 64 | instance Arbitrary (ASCII B.ByteString) where
 65 |     arbitrary = (ASCII . B.pack) <$> listOf (choose (0,127))
 66 |     shrink = map (ASCII . B.pack) . shrink . B.unpack . fromASCII
 67 | 
 68 | instance Arbitrary (ASCII BL.ByteString) where
 69 |     arbitrary = ASCII <$> (repackBS <$> arbitrary <*> (fromASCII <$> arbitrary))
 70 |     shrink = map (ASCII . BL.pack) . shrink . BL.unpack . fromASCII
 71 | 
 72 | type Repack = NonEmptyList (Positive (Small Int))
 73 | 
 74 | repackBS :: Repack -> B.ByteString -> BL.ByteString
 75 | repackBS (NonEmpty bs) =
 76 |     BL.fromChunks . repackBS_ (map (getSmall . getPositive) bs)
 77 | 
 78 | repackBS_ :: [Int] -> B.ByteString -> [B.ByteString]
 79 | repackBS_ = go . cycle
 80 |   where go (b:bs) s
 81 |           | B.null s = []
 82 |           | otherwise = let (h,t) = B.splitAt b s
 83 |                         in h : go bs t
 84 |         go _ _ = error "unpossible"
 85 | 
 86 | instance Arbitrary T.Text where
 87 |     arbitrary = T.pack <$> string
 88 |     shrink    = map T.pack . shrinkList shrinkChar . T.unpack
 89 | 
 90 | instance Arbitrary TL.Text where
 91 |     arbitrary = TL.pack <$> string
 92 |     shrink    = map TL.pack . shrinkList shrinkChar . TL.unpack
 93 | 
 94 | repackT :: Repack -> T.Text -> TL.Text
 95 | repackT (NonEmpty bs) =
 96 |     TL.fromChunks . repackT_ (map (getSmall . getPositive) bs)
 97 | 
 98 | repackT_ :: [Int] -> T.Text -> [T.Text]
 99 | repackT_ = go . cycle
100 |   where go (b:bs) s
101 |           | T.null s = []
102 |           | otherwise = let (h,t) = T.splitAt b s
103 |                         in h : go bs t
104 |         go _ _ = error "unpossible"
105 | 
106 | liftOp :: (Show a, Testable prop) =>
107 |           String -> (a -> a -> prop) -> a -> a -> Property
108 | liftOp name f x y = counterexample desc (f x y)
109 |   where op = case name of
110 |                (c:_) | isAlpha c -> " `" ++ name ++ "` "
111 |                      | otherwise -> " " ++ name ++ " "
112 |                _ -> " ??? "
113 |         desc = "not (" ++ show x ++ op ++ show y ++ ")"
114 | 


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/ByteString/FastSet.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, MagicHash #-}
  2 | 
  3 | -----------------------------------------------------------------------------
  4 | -- |
  5 | -- Module      :  Data.Attoparsec.ByteString.FastSet
  6 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  bos@serpentine.com
 10 | -- Stability   :  experimental
 11 | -- Portability :  unknown
 12 | --
 13 | -- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
 14 | -- set representation is unboxed for efficiency.  For small sets, we
 15 | -- test for membership using a binary search.  For larger sets, we use
 16 | -- a lookup table.
 17 | --
 18 | -----------------------------------------------------------------------------
 19 | module Data.Attoparsec.ByteString.FastSet
 20 |     (
 21 |     -- * Data type
 22 |       FastSet
 23 |     -- * Construction
 24 |     , fromList
 25 |     , set
 26 |     -- * Lookup
 27 |     , memberChar
 28 |     , memberWord8
 29 |     -- * Debugging
 30 |     , fromSet
 31 |     -- * Handy interface
 32 |     , charClass
 33 |     ) where
 34 | 
 35 | import Data.Bits ((.&.), (.|.), unsafeShiftL)
 36 | import Foreign.Marshal.Utils (fillBytes)
 37 | import Foreign.Storable (peekByteOff, pokeByteOff)
 38 | import GHC.Exts (Int(I#), iShiftRA#)
 39 | import GHC.Word (Word8)
 40 | import qualified Data.ByteString as B
 41 | import qualified Data.ByteString.Char8 as B8
 42 | import qualified Data.ByteString.Internal as I
 43 | import qualified Data.ByteString.Unsafe as U
 44 | 
 45 | data FastSet = Sorted { fromSet :: !B.ByteString }
 46 |              | Table  { fromSet :: !B.ByteString }
 47 |     deriving (Eq, Ord)
 48 | 
 49 | instance Show FastSet where
 50 |     show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
 51 |     show (Table _) = "FastSet Table"
 52 | 
 53 | -- | The lower bound on the size of a lookup table.  We choose this to
 54 | -- balance table density against performance.
 55 | tableCutoff :: Int
 56 | tableCutoff = 8
 57 | 
 58 | -- | Create a set.
 59 | set :: B.ByteString -> FastSet
 60 | set s | B.length s < tableCutoff = Sorted . B.sort $ s
 61 |       | otherwise                = Table . mkTable $ s
 62 | 
 63 | fromList :: [Word8] -> FastSet
 64 | fromList = set . B.pack
 65 | 
 66 | data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
 67 | 
 68 | shiftR :: Int -> Int -> Int
 69 | shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
 70 | 
 71 | index :: Int -> I
 72 | index i = I (i `shiftR` 3) (1 `unsafeShiftL` (i .&. 7))
 73 | {-# INLINE index #-}
 74 | 
 75 | -- | Check the set for membership.
 76 | memberWord8 :: Word8 -> FastSet -> Bool
 77 | memberWord8 w (Table t)  =
 78 |     let I byte bit = index (fromIntegral w)
 79 |     in  U.unsafeIndex t byte .&. bit /= 0
 80 | memberWord8 w (Sorted s) = search 0 (B.length s - 1)
 81 |     where search lo hi
 82 |               | hi < lo = False
 83 |               | otherwise =
 84 |                   let mid = (lo + hi) `quot` 2
 85 |                   in case compare w (U.unsafeIndex s mid) of
 86 |                        GT -> search (mid + 1) hi
 87 |                        LT -> search lo (mid - 1)
 88 |                        _ -> True
 89 | 
 90 | -- | Check the set for membership.  Only works with 8-bit characters:
 91 | -- characters above code point 255 will give wrong answers.
 92 | memberChar :: Char -> FastSet -> Bool
 93 | memberChar c = memberWord8 (I.c2w c)
 94 | {-# INLINE memberChar #-}
 95 | 
 96 | mkTable :: B.ByteString -> B.ByteString
 97 | mkTable s = I.unsafeCreate 32 $ \t -> do
 98 |             fillBytes t 0 32
 99 |             U.unsafeUseAsCStringLen s $ \(p, l) ->
100 |               let loop n | n == l = return ()
101 |                          | otherwise = do
102 |                     c <- peekByteOff p n :: IO Word8
103 |                     let I byte bit = index (fromIntegral c)
104 |                     prev <- peekByteOff t byte :: IO Word8
105 |                     pokeByteOff t byte (prev .|. bit)
106 |                     loop (n + 1)
107 |               in loop 0
108 | 
109 | charClass :: String -> FastSet
110 | charClass = set . B8.pack . go
111 |     where go (a:'-':b:xs) = [a..b] ++ go xs
112 |           go (x:xs) = x : go xs
113 |           go _ = ""
114 | 


--------------------------------------------------------------------------------
/doc/whats-in-a-parser-1.md:
--------------------------------------------------------------------------------
 1 | My goal in working on the
 2 | [new GHC I/O manager](http://www.serpentine.com/blog/2010/01/22/new-ghc-io-manager-first-benchmark-numbers/)
 3 | has been to get the Haskell network stack into a state where it could
 4 | be used to attack high-performance and scalable networking problems,
 5 | domains in which it has historically been weak.
 6 | 
 7 | While it's encouraging to have an excellent networking stack (Johan
 8 | and I now have this thoroughly in hand), the next thing I'd look for
 9 | is libraries to help build networked applications.  One of the
10 | fundamental things that such apps need to do well is parse data, be it
11 | received from the network or read from files.
12 | 
13 | The Haskell parsing library of first resort has for years been
14 | [Parsec](http://www.haskell.org/haskellwiki/Parsec).  While other
15 | capable libraries exist
16 | (e.g. [polyparse](http://hackage.haskell.org/package/polyparse) and
17 | [uu-parsinglib](http://hackage.haskell.org/package/uu-parsinglib)),
18 | they don't appear to see much use.
19 | 
20 | As appealing as Parsec's API is, it has a few problems:
21 | 
22 | * Parsec 2 is slow, and it has high memory overhead, due to its use of
23 |   Haskell's `String` type for tokens.  Parsec 3 can use the more
24 |   efficient `ByteString` type (which is in any case much more
25 |   appropriate for networked applications that deal in octets), but it
26 |   achieves this flexibility at the cost of being even slower than
27 |   Parsec 2.
28 | 
29 | * Parsec's API demands that all of a parser's input be available at
30 |   once.  People usually work around this by feeding a Parsec parser
31 |   with lazily read data, but lazy I/O is at odds with my goal of
32 |   writing solid networked code.
33 | 
34 | What properties should a parsing library for networked applications
35 | ideally possess?  There are a few obvious desiderata that have been
36 | well known for years. For example, it's important to have an appealing
37 | API and programming model.  Parsec squarely fits this desire.
38 |   
39 | Performance is also a big consideration.  Ideally, a parsing library
40 | would be fast enough that you wouldn't feel any real need for either
41 | of the following:
42 | 
43 | * A few weeks to write an insane hand-bummed parser.
44 |   
45 | * Mechanical parser generators or lexers
46 |   (e.g. [happy](http://www.haskell.org/happy/) or
47 |   [alex](http://www.haskell.org/alex/)).
48 |   
49 | There are some additional important constraints on a realistic
50 | library: it must fit well into a highly concurrent networked world
51 | full of unreliable, hostile and incompetent clients.
52 | 
53 | * High concurrency levels demand a low per-connection memory
54 |   footprint.
55 | 
56 | * The need to cope with poorly behaved clients requires that
57 |   applications must be able to throttle connections that are too busy,
58 |   or kill connections that are too slow or attempting to consume too
59 |   many server resources.  A good parsing library will not get in the
60 |   way of these needs.
61 | 
62 | A few years ago, I made a few half-hearted attempts to write a
63 | specialised version of Parsec, which I eventually named
64 | [Attoparsec](http://hackage.haskell.org/package/attoparsec).
65 | 
66 | I began with a stripped-down Parsec that was specialised to accept
67 | `ByteString` input.  I then extended the API to allow a parser to
68 | consume small chunks of input at a time.
69 | 
70 | Because I wasn't using Attoparsec "in anger" at the time, I made sure
71 | that my library worked (more or less), but I was not measuring its
72 | performance.
73 | 
74 | In late January of this year, I began to think about using Attoparsec
75 | as the parser for a simple HTTP server that I could use to benchmark
76 | our new GHC I/O manager code.  Clearly, I'd want the parser to perform
77 | well, or it would distort my numbers rather badly.
78 | 
79 | By coincidence, [John MacFarlane](http://johnmacfarlane.net/) emailed
80 | me around the same time, with disturbing findings: he'd tried
81 | Attoparsec, and found its performance to be *terrible*!  In fact, it
82 | was 4 to 20 times _slower_ than plain Parsec with his experimental
83 | parser and test data.  Clearly, I had some hard work to look forward
84 | to.
85 | 
86 | Happily, that work is now almost complete, and I am pleased with the
87 | results.  In the next post, I'll have some details of what this all
88 | entails.
89 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
  1 | # 0.14.4
  2 | 
  3 | * Fix a segmentation fault when built against `text-2.0`
  4 | * Restructure project to allow more convenient usage of benchmark suite
  5 | * Allow benchmarks to build with GHC 9.2
  6 | 
  7 | # 0.14.3
  8 | 
  9 | * Support for GHC 9.2.1
 10 | 
 11 | # 0.14.2
 12 | 
 13 | * Support for GHC 9.2.1
 14 | 
 15 | # 0.14.1
 16 | 
 17 | * Added `Data.Attoparsec.ByteString.getChunk`.
 18 | 
 19 | # 0.14.0
 20 | 
 21 | * Added `Data.Attoparsec.ByteString.takeWhileIncluding`.
 22 | * Make `Data.Attoparsec.{Text,ByteString}.Lazy.parseOnly` accept lazy input.
 23 | 
 24 | # 0.13.2.1
 25 | 
 26 | * Improved performance of `Data.Attoparsec.Text.asciiCI`
 27 | 
 28 | # 0.13.2.0
 29 | 
 30 | * `pure` is now strict in `Position`
 31 | 
 32 | # 0.13.1.0
 33 | 
 34 | * `runScanner` now correctly returns the final state
 35 |   (https://github.com/bos/attoparsec/issues/105).
 36 | * `Parser`, `ZeptoT`, `Buffer`, and `More` now expose `Semigroup` instances.
 37 | * `Parser`, and `ZeptoT` now expose `MonadFail` instances.
 38 | 
 39 | # 0.13.0.2
 40 | 
 41 | * Restore the fast specialised character set implementation for Text
 42 | * Move testsuite from test-framework to tasty
 43 | * Performance optimization of takeWhile and takeWhile1
 44 | 
 45 | # 0.13.0.1
 46 | 
 47 | * Fixed a bug in the implementations of inClass and notInClass for
 48 |   Text (https://github.com/bos/attoparsec/issues/103)
 49 | 
 50 | # 0.13.0.0
 51 | 
 52 | * Made the parser type in the Zepto module a monad transformer
 53 |   (needed by aeson's string unescaping parser).
 54 | 
 55 | # 0.12.1.6
 56 | 
 57 | * Fixed a case folding bug in the ByteString version of stringCI.
 58 | 
 59 | # 0.12.1.5
 60 | 
 61 | * Fixed an indexing bug in the new Text implementation of string,
 62 |   reported by Michel Boucey.
 63 | 
 64 | # 0.12.1.4
 65 | 
 66 | * Fixed a case where the string parser would consume an unnecessary
 67 |   amount of input before failing a match, when it could bail much
 68 |   earlier (https://github.com/bos/attoparsec/issues/97)
 69 | 
 70 | * Added more context to error messages
 71 |   (https://github.com/bos/attoparsec/pull/79)
 72 | 
 73 | # 0.12.1.3
 74 | 
 75 | * Fixed incorrect tracking of Text lengths
 76 |   (https://github.com/bos/attoparsec/issues/80)
 77 | 
 78 | # 0.12.1.2
 79 | 
 80 | * Fixed the incorrect tracking of capacity if the initial buffer was
 81 |   empty (https://github.com/bos/attoparsec/issues/75)
 82 | 
 83 | # 0.12.1.1
 84 | 
 85 | * Fixed a data corruption bug that occurred under some circumstances
 86 |   if a buffer grew after prompting for more input
 87 |   (https://github.com/bos/attoparsec/issues/74)
 88 | 
 89 | # 0.12.1.0
 90 | 
 91 | * Now compatible with GHC 7.9
 92 | 
 93 | * Reintroduced the Chunk class, used by the parsers package
 94 | 
 95 | # 0.12.0.0
 96 | 
 97 | * A new internal representation makes almost all real-world parsers
 98 |   faster, sometimes by big margins.  For example, parsing JSON data
 99 |   with aeson is now up to 70% faster.  These performance improvements
100 |   also come with reduced memory consumption and some new capabilities.
101 | 
102 | * The new match combinator gives both the result of a parse and the
103 |   input that it matched.
104 | 
105 | * The test suite has doubled in size.  This made it possible to switch
106 |   to the new internal representation with a decent degree of
107 |   confidence that everything was more or less working.
108 | 
109 | * The benchmark suite now contains a small family of benchmarks taken
110 |   from real-world uses of attoparsec.
111 | 
112 | * A few types that ought to have been private now are.
113 | 
114 | * A few obsolete modules and functions have been marked as deprecated.
115 |   They will be removed from the next major release.
116 | 
117 | # 0.11.3.0
118 | 
119 | * New function scientific is compatible with rational, but parses
120 |   integers more efficiently (https://github.com/bos/aeson/issues/198)
121 | 
122 | # 0.11.2.0
123 | 
124 | * The new Chunk typeclass allows for some code sharing with Ed
125 |   Kmett's parsers package: http://hackage.haskell.org/package/parsers
126 | 
127 | * New function runScanner generalises scan to return the final state
128 |   of the scanner as well as the input consumed.
129 | 
130 | # 0.11.1.0
131 | 
132 | * New dependency: the scientific package.  This allows us to parse
133 |   numbers much more efficiently.
134 | 
135 | * peekWord8', peekChar': new primitive parsers that allow
136 |   single-character lookahead.
137 | 


--------------------------------------------------------------------------------
/benchmarks/TextFastSet.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | 
  3 | ------------------------------------------------------------------------------
  4 | -- |
  5 | -- Module      :  Data.Attoparsec.FastSet
  6 | -- Copyright   :  Felipe Lessa 2010, Bryan O'Sullivan 2007-2015
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  felipe.lessa@gmail.com
 10 | -- Stability   :  experimental
 11 | -- Portability :  unknown
 12 | --
 13 | -- Fast set membership tests for 'Char' values. We test for
 14 | -- membership using a hashtable implemented with Robin Hood
 15 | -- collision resolution. The set representation is unboxed,
 16 | -- and the characters and hashes interleaved, for efficiency.
 17 | --
 18 | --
 19 | -----------------------------------------------------------------------------
 20 | module TextFastSet
 21 |     (
 22 |     -- * Data type
 23 |       FastSet
 24 |     -- * Construction
 25 |     , fromList
 26 |     , set
 27 |     -- * Lookup
 28 |     , member
 29 |     -- * Handy interface
 30 |     , charClass
 31 |     ) where
 32 | 
 33 | import Data.Bits ((.|.), (.&.), shiftR)
 34 | import Data.Function (on)
 35 | import Data.List (sort, sortBy)
 36 | import qualified Data.Array.Base as AB
 37 | import qualified Data.Array.Unboxed as A
 38 | import qualified Data.Text as T
 39 | 
 40 | data FastSet = FastSet {
 41 |     table :: {-# UNPACK #-} !(A.UArray Int Int)
 42 |   , mask  :: {-# UNPACK #-} !Int
 43 |   }
 44 | 
 45 | data Entry = Entry {
 46 |     key          :: {-# UNPACK #-} !Char
 47 |   , initialIndex :: {-# UNPACK #-} !Int
 48 |   , index        :: {-# UNPACK #-} !Int
 49 |   }
 50 | 
 51 | offset :: Entry -> Int
 52 | offset e = index e - initialIndex e
 53 | 
 54 | resolveCollisions :: [Entry] -> [Entry]
 55 | resolveCollisions [] = []
 56 | resolveCollisions [e] = [e]
 57 | resolveCollisions (a:b:entries) = a' : resolveCollisions (b' : entries)
 58 |   where (a', b')
 59 |           | index a < index b   = (a, b)
 60 |           | offset a < offset b = (b { index=index a }, a { index=index a + 1 })
 61 |           | otherwise           = (a, b { index=index a + 1 })
 62 | 
 63 | pad :: Int -> [Entry] -> [Entry]
 64 | pad = go 0
 65 |   where -- ensure that we pad enough so that lookups beyond the
 66 |         -- last hash in the table fall within the array
 67 |         go !_ !m []          = replicate (max 1 m + 1) empty
 68 |         go  k  m (e:entries) = map (const empty) [k..i - 1] ++ e :
 69 |                                go (i + 1) (m + i - k - 1) entries
 70 |           where i            = index e
 71 |         empty                = Entry '\0' maxBound 0
 72 | 
 73 | nextPowerOf2 :: Int -> Int
 74 | nextPowerOf2 0  = 1
 75 | nextPowerOf2 x  = go (x - 1) 1
 76 |   where go y 32 = y + 1
 77 |         go y k  = go (y .|. (y `shiftR` k)) $ k * 2
 78 | 
 79 | fastHash :: Char -> Int
 80 | fastHash = fromEnum
 81 | 
 82 | fromList :: String -> FastSet
 83 | fromList s = FastSet (AB.listArray (0, length interleaved - 1) interleaved)
 84 |              mask'
 85 |   where s'      = ordNub (sort s)
 86 |         l       = length s'
 87 |         mask'   = nextPowerOf2 ((5 * l) `div` 4) - 1
 88 |         entries = pad mask' .
 89 |                   resolveCollisions .
 90 |                   sortBy (compare `on` initialIndex) .
 91 |                   zipWith (\c i -> Entry c i i) s' .
 92 |                   map ((.&. mask') . fastHash) $ s'
 93 |         interleaved = concatMap (\e -> [fromEnum $ key e, initialIndex e])
 94 |                       entries
 95 | 
 96 | ordNub :: Eq a => [a] -> [a]
 97 | ordNub []     = []
 98 | ordNub (y:ys) = go y ys
 99 |   where go x (z:zs)
100 |           | x == z    = go x zs
101 |           | otherwise = x : go z zs
102 |         go x []       = [x]
103 | 
104 | set :: T.Text -> FastSet
105 | set = fromList . T.unpack
106 | 
107 | -- | Check the set for membership.
108 | member :: Char -> FastSet -> Bool
109 | member c a           = go (2 * i)
110 |   where i            = fastHash c .&. mask a
111 |         lookupAt j b = (i' <= i) && (c == c' || b)
112 |             where c' = toEnum $ AB.unsafeAt (table a) j
113 |                   i' = AB.unsafeAt (table a) $ j + 1
114 |         go j         = lookupAt j . lookupAt (j + 2) . lookupAt (j + 4) .
115 |                        lookupAt (j + 6) . go $ j + 8
116 | 
117 | charClass :: String -> FastSet
118 | charClass = fromList . go
119 |   where go (a:'-':b:xs) = [a..b] ++ go xs
120 |         go (x:xs)       = x : go xs
121 |         go _            = ""
122 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Number.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | -- |
  3 | -- Module      :  Data.Attoparsec.Number
  4 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  5 | -- License     :  BSD3
  6 | --
  7 | -- Maintainer  :  bos@serpentine.com
  8 | -- Stability   :  experimental
  9 | -- Portability :  unknown
 10 | --
 11 | -- This module is deprecated, and both the module and 'Number' type
 12 | -- will be removed in the next major release.  Use the
 13 | -- <http://hackage.haskell.org/package/scientific scientific> package
 14 | -- and the 'Data.Scientific.Scientific' type instead.
 15 | --
 16 | -- A simple number type, useful for parsing both exact and inexact
 17 | -- quantities without losing much precision.
 18 | module Data.Attoparsec.Number
 19 |     {-# DEPRECATED "This module will be removed in the next major release." #-}
 20 |     (
 21 |       Number(..)
 22 |     ) where
 23 | 
 24 | import Control.DeepSeq (NFData(rnf))
 25 | import Data.Data (Data)
 26 | import Data.Function (on)
 27 | import Data.Typeable (Typeable)
 28 | 
 29 | -- | A numeric type that can represent integers accurately, and
 30 | -- floating point numbers to the precision of a 'Double'.
 31 | --
 32 | -- /Note/: this type is deprecated, and will be removed in the next
 33 | -- major release.  Use the 'Data.Scientific.Scientific' type instead.
 34 | data Number = I !Integer
 35 |             | D {-# UNPACK #-} !Double
 36 |               deriving (Typeable, Data)
 37 | {-# DEPRECATED Number "Use Scientific instead." #-}
 38 | 
 39 | instance Show Number where
 40 |     show (I a) = show a
 41 |     show (D a) = show a
 42 | 
 43 | instance NFData Number where
 44 |     rnf (I _) = ()
 45 |     rnf (D _) = ()
 46 |     {-# INLINE rnf #-}
 47 | 
 48 | binop :: (Integer -> Integer -> a) -> (Double -> Double -> a)
 49 |       -> Number -> Number -> a
 50 | binop _ d (D a) (D b) = d a b
 51 | binop i _ (I a) (I b) = i a b
 52 | binop _ d (D a) (I b) = d a (fromIntegral b)
 53 | binop _ d (I a) (D b) = d (fromIntegral a) b
 54 | {-# INLINE binop #-}
 55 | 
 56 | instance Eq Number where
 57 |     (==) = binop (==) (==)
 58 |     {-# INLINE (==) #-}
 59 | 
 60 |     (/=) = binop (/=) (/=)
 61 |     {-# INLINE (/=) #-}
 62 | 
 63 | instance Ord Number where
 64 |     (<) = binop (<) (<)
 65 |     {-# INLINE (<) #-}
 66 | 
 67 |     (<=) = binop (<=) (<=)
 68 |     {-# INLINE (<=) #-}
 69 | 
 70 |     (>) = binop (>) (>)
 71 |     {-# INLINE (>) #-}
 72 | 
 73 |     (>=) = binop (>=) (>=)
 74 |     {-# INLINE (>=) #-}
 75 | 
 76 |     compare = binop compare compare
 77 |     {-# INLINE compare #-}
 78 | 
 79 | instance Num Number where
 80 |     (+) = binop (((I$!).) . (+)) (((D$!).) . (+))
 81 |     {-# INLINE (+) #-}
 82 | 
 83 |     (-) = binop (((I$!).) . (-)) (((D$!).) . (-))
 84 |     {-# INLINE (-) #-}
 85 | 
 86 |     (*) = binop (((I$!).) . (*)) (((D$!).) . (*))
 87 |     {-# INLINE (*) #-}
 88 | 
 89 |     abs (I a) = I $! abs a
 90 |     abs (D a) = D $! abs a
 91 |     {-# INLINE abs #-}
 92 | 
 93 |     negate (I a) = I $! negate a
 94 |     negate (D a) = D $! negate a
 95 |     {-# INLINE negate #-}
 96 | 
 97 |     signum (I a) = I $! signum a
 98 |     signum (D a) = D $! signum a
 99 |     {-# INLINE signum #-}
100 | 
101 |     fromInteger = (I$!) . fromInteger
102 |     {-# INLINE fromInteger #-}
103 | 
104 | instance Real Number where
105 |     toRational (I a) = fromIntegral a
106 |     toRational (D a) = toRational a
107 |     {-# INLINE toRational #-}
108 | 
109 | instance Fractional Number where
110 |     fromRational = (D$!) . fromRational
111 |     {-# INLINE fromRational #-}
112 | 
113 |     (/) = binop (((D$!).) . (/) `on` fromIntegral)
114 |                 (((D$!).) . (/))
115 |     {-# INLINE (/) #-}
116 | 
117 |     recip (I a) = D $! recip (fromIntegral a)
118 |     recip (D a) = D $! recip a
119 |     {-# INLINE recip #-}
120 | 
121 | instance RealFrac Number where
122 |     properFraction (I a) = (fromIntegral a,0)
123 |     properFraction (D a) = case properFraction a of
124 |                              (i,d) -> (i,D d)
125 |     {-# INLINE properFraction #-}
126 |     truncate (I a) = fromIntegral a
127 |     truncate (D a) = truncate a
128 |     {-# INLINE truncate #-}
129 |     round (I a) = fromIntegral a
130 |     round (D a) = round a
131 |     {-# INLINE round #-}
132 |     ceiling (I a) = fromIntegral a
133 |     ceiling (D a) = ceiling a
134 |     {-# INLINE ceiling #-}
135 |     floor (I a) = fromIntegral a
136 |     floor (D a) = floor a
137 |     {-# INLINE floor #-}
138 | 


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/Text/FastSet.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | 
  3 | ------------------------------------------------------------------------------
  4 | -- |
  5 | -- Module      :  Data.Attoparsec.FastSet
  6 | -- Copyright   :  Felipe Lessa 2010, Bryan O'Sullivan 2007-2015
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  felipe.lessa@gmail.com
 10 | -- Stability   :  experimental
 11 | -- Portability :  unknown
 12 | --
 13 | -- Fast set membership tests for 'Char' values. We test for
 14 | -- membership using a hashtable implemented with Robin Hood
 15 | -- collision resolution. The set representation is unboxed,
 16 | -- and the characters and hashes interleaved, for efficiency.
 17 | --
 18 | --
 19 | -----------------------------------------------------------------------------
 20 | module Data.Attoparsec.Text.FastSet
 21 |     (
 22 |     -- * Data type
 23 |       FastSet
 24 |     -- * Construction
 25 |     , fromList
 26 |     , set
 27 |     -- * Lookup
 28 |     , member
 29 |     -- * Handy interface
 30 |     , charClass
 31 |     ) where
 32 | 
 33 | import Data.Bits ((.|.), (.&.), shiftR)
 34 | import Data.Function (on)
 35 | import Data.List (sort, sortBy)
 36 | import qualified Data.Array.Base as AB
 37 | import qualified Data.Array.Unboxed as A
 38 | import qualified Data.Text as T
 39 | 
 40 | data FastSet = FastSet {
 41 |     table :: {-# UNPACK #-} !(A.UArray Int Int)
 42 |   , mask  :: {-# UNPACK #-} !Int
 43 |   }
 44 | 
 45 | data Entry = Entry {
 46 |     key          :: {-# UNPACK #-} !Char
 47 |   , initialIndex :: {-# UNPACK #-} !Int
 48 |   , index        :: {-# UNPACK #-} !Int
 49 |   }
 50 | 
 51 | offset :: Entry -> Int
 52 | offset e = index e - initialIndex e
 53 | 
 54 | resolveCollisions :: [Entry] -> [Entry]
 55 | resolveCollisions [] = []
 56 | resolveCollisions [e] = [e]
 57 | resolveCollisions (a:b:entries) = a' : resolveCollisions (b' : entries)
 58 |   where (a', b')
 59 |           | index a < index b   = (a, b)
 60 |           | offset a < offset b = (b { index=index a }, a { index=index a + 1 })
 61 |           | otherwise           = (a, b { index=index a + 1 })
 62 | 
 63 | pad :: Int -> [Entry] -> [Entry]
 64 | pad = go 0
 65 |   where -- ensure that we pad enough so that lookups beyond the
 66 |         -- last hash in the table fall within the array
 67 |         go !_ !m []          = replicate (max 1 m + 1) empty
 68 |         go  k  m (e:entries) = map (const empty) [k..i - 1] ++ e :
 69 |                                go (i + 1) (m + i - k - 1) entries
 70 |           where i            = index e
 71 |         empty                = Entry '\0' maxBound 0
 72 | 
 73 | nextPowerOf2 :: Int -> Int
 74 | nextPowerOf2 0  = 1
 75 | nextPowerOf2 x  = go (x - 1) 1
 76 |   where go y 32 = y + 1
 77 |         go y k  = go (y .|. (y `shiftR` k)) $ k * 2
 78 | 
 79 | fastHash :: Char -> Int
 80 | fastHash = fromEnum
 81 | 
 82 | fromList :: String -> FastSet
 83 | fromList s = FastSet (AB.listArray (0, length interleaved - 1) interleaved)
 84 |              mask'
 85 |   where s'      = ordNub (sort s)
 86 |         l       = length s'
 87 |         mask'   = nextPowerOf2 ((5 * l) `div` 4) - 1
 88 |         entries = pad mask' .
 89 |                   resolveCollisions .
 90 |                   sortBy (compare `on` initialIndex) .
 91 |                   zipWith (\c i -> Entry c i i) s' .
 92 |                   map ((.&. mask') . fastHash) $ s'
 93 |         interleaved = concatMap (\e -> [fromEnum $ key e, initialIndex e])
 94 |                       entries
 95 | 
 96 | ordNub :: Eq a => [a] -> [a]
 97 | ordNub []     = []
 98 | ordNub (y:ys) = go y ys
 99 |   where go x (z:zs)
100 |           | x == z    = go x zs
101 |           | otherwise = x : go z zs
102 |         go x []       = [x]
103 | 
104 | set :: T.Text -> FastSet
105 | set = fromList . T.unpack
106 | 
107 | -- | Check the set for membership.
108 | member :: Char -> FastSet -> Bool
109 | member c a           = go (2 * i)
110 |   where i            = fastHash c .&. mask a
111 |         lookupAt j b = (i' <= i) && (c == c' || b)
112 |             where c' = toEnum $ AB.unsafeAt (table a) j
113 |                   i' = AB.unsafeAt (table a) $ j + 1
114 |         go j         = lookupAt j . lookupAt (j + 2) . lookupAt (j + 4) .
115 |                        lookupAt (j + 6) . go $ j + 8
116 | 
117 | charClass :: String -> FastSet
118 | charClass = fromList . go
119 |   where go (a:'-':b:xs) = [a..b] ++ go xs
120 |         go (x:xs)       = x : go xs
121 |         go _            = ""
122 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Text/Lazy.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | #if __GLASGOW_HASKELL__ >= 702
  3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules
  4 | #endif
  5 | 
  6 | {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
  7 | -- |
  8 | -- Module      :  Data.Attoparsec.Text.Lazy
  9 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 10 | -- License     :  BSD3
 11 | --
 12 | -- Maintainer  :  bos@serpentine.com
 13 | -- Stability   :  experimental
 14 | -- Portability :  unknown
 15 | --
 16 | -- Simple, efficient combinator parsing that can consume lazy 'Text'
 17 | -- strings, loosely based on the Parsec library.
 18 | --
 19 | -- This is essentially the same code as in the 'Data.Attoparsec.Text'
 20 | -- module, only with a 'parse' function that can consume a lazy
 21 | -- 'Text' incrementally, and a 'Result' type that does not allow
 22 | -- more input to be fed in.  Think of this as suitable for use with a
 23 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
 24 | --
 25 | -- /Note:/ The various parser functions and combinators such as
 26 | -- 'string' still expect /strict/ 'T.Text' parameters, and return
 27 | -- strict 'T.Text' results.  Behind the scenes, strict 'T.Text' values
 28 | -- are still used internally to store parser input and manipulate it
 29 | -- efficiently.
 30 | 
 31 | module Data.Attoparsec.Text.Lazy
 32 |     (
 33 |       Result(..)
 34 |     , module Data.Attoparsec.Text
 35 |     -- * Running parsers
 36 |     , parse
 37 |     , parseOnly
 38 |     , parseTest
 39 |     -- ** Result conversion
 40 |     , maybeResult
 41 |     , eitherResult
 42 |     ) where
 43 | 
 44 | import Control.DeepSeq (NFData(rnf))
 45 | import Data.List (intercalate)
 46 | import Data.Text.Lazy.Internal (Text(..), chunk)
 47 | import qualified Data.Attoparsec.Internal.Types as T
 48 | import qualified Data.Attoparsec.Text as A
 49 | import qualified Data.Text as T
 50 | import Data.Attoparsec.Text hiding (IResult(..), Result, eitherResult,
 51 |                                     maybeResult, parse, parseOnly, parseWith, parseTest)
 52 | 
 53 | -- | The result of a parse.
 54 | data Result r = Fail Text [String] String
 55 |               -- ^ The parse failed.  The 'Text' is the input
 56 |               -- that had not yet been consumed when the failure
 57 |               -- occurred.  The @[@'String'@]@ is a list of contexts
 58 |               -- in which the error occurred.  The 'String' is the
 59 |               -- message describing the error, if any.
 60 |               | Done Text r
 61 |               -- ^ The parse succeeded.  The 'Text' is the
 62 |               -- input that had not yet been consumed (if any) when
 63 |               -- the parse succeeded.
 64 |     deriving (Show)
 65 | 
 66 | instance NFData r => NFData (Result r) where
 67 |     rnf (Fail bs ctxs msg) = rnf bs `seq` rnf ctxs `seq` rnf msg
 68 |     rnf (Done bs r)        = rnf bs `seq` rnf r
 69 |     {-# INLINE rnf #-}
 70 | 
 71 | fmapR :: (a -> b) -> Result a -> Result b
 72 | fmapR _ (Fail st stk msg) = Fail st stk msg
 73 | fmapR f (Done bs r)       = Done bs (f r)
 74 | 
 75 | instance Functor Result where
 76 |     fmap = fmapR
 77 | 
 78 | -- | Run a parser and return its result.
 79 | parse :: A.Parser a -> Text -> Result a
 80 | parse p s = case s of
 81 |               Chunk x xs -> go (A.parse p x) xs
 82 |               empty      -> go (A.parse p T.empty) empty
 83 |   where
 84 |     go (T.Fail x stk msg) ys      = Fail (chunk x ys) stk msg
 85 |     go (T.Done x r) ys            = Done (chunk x ys) r
 86 |     go (T.Partial k) (Chunk y ys) = go (k y) ys
 87 |     go (T.Partial k) empty        = go (k T.empty) empty
 88 | 
 89 | -- | Run a parser and print its result to standard output.
 90 | parseTest :: (Show a) => A.Parser a -> Text -> IO ()
 91 | parseTest p s = print (parse p s)
 92 | 
 93 | -- | Convert a 'Result' value to a 'Maybe' value.
 94 | maybeResult :: Result r -> Maybe r
 95 | maybeResult (Done _ r) = Just r
 96 | maybeResult _          = Nothing
 97 | 
 98 | -- | Convert a 'Result' value to an 'Either' value.
 99 | eitherResult :: Result r -> Either String r
100 | eitherResult (Done _ r)        = Right r
101 | eitherResult (Fail _ [] msg)   = Left msg
102 | eitherResult (Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg)
103 | 
104 | -- | Run a parser that cannot be resupplied via a 'T.Partial' result.
105 | --
106 | -- This function does not force a parser to consume all of its input.
107 | -- Instead, any residual input will be discarded.  To force a parser
108 | -- to consume all of its input, use something like this:
109 | --
110 | -- @
111 | --'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput')
112 | -- @
113 | parseOnly :: A.Parser a -> Text -> Either String a
114 | parseOnly p = eitherResult . parse p
115 | {-# INLINE parseOnly #-}
116 | 


--------------------------------------------------------------------------------
/doc/attoparsec-rewired-2.md:
--------------------------------------------------------------------------------
 1 | In my
 2 | [first of this pair of articles](http://www.serpentine.com/blog/2010/03/03/whats-in-a-parsing-library-1/),
 3 | I laid out some of the qualities I've been looking for in a parsing
 4 | library.
 5 | 
 6 | Before I dive back into detail, I want to show off some numbers.  The
 7 | new Attoparsec code is _fast_.
 8 | 
 9 | ![Performance](http://chart.apis.google.com/chart?cht=bvs&chs=340x200&chd=t:260,0,0,0,0|0,471,0,0,0|0,0,17037,0,0|0,0,0,23753,0|0,0,0,0,36753&chds=0,40000&chco=4D89F9|894DF9|F94D89|4DB989|1969FD&chdl=260+ms:+http_parser|471+ms:+Attoparsec|17037+ms:+Parsec+3+CPS|23753+ms:+Lazy+Parsec+3+CPS|36753+ms:+Parsec+3&chxt=y&chxl=0:||10|20|30|40&chtt=Time+to+parse+45,668|HTTP+GET+requests)
10 | 
11 | What did I benchmark?  I captured some real HTTP GET requests from a
12 | live public web server, averaging 431 bytes per request.  I chucked
13 | them into a file, and measured the time needed to parse the entire
14 | contents of the file with the following libraries:
15 | 
16 | * Ryan Dahl's [http-parser](http://github.com/ry/http-parser) library,
17 |   which is 1,672 lines of hand-rolled C craziness.  Its heritage seems
18 |   to be closely based on the Ragel-generated parser used by Mongrel.
19 |   This library is a fair approximation to about as fast as you can
20 |   get, since it's been tuned for just one purpose.  I wrote a small,
21 |   but reasonably realistic,
22 |   [driver program](http://bitbucket.org/bos/attoparsec/src/tip/examples/rfc2616.c)
23 |   to wire it up to file-based data, adding another 210 lines of code.
24 |   
25 | * An Attoparsec-based
26 |   [HTTP request parser](http://bitbucket.org/bos/attoparsec/src/tip/examples/RFC2616.hs),
27 |   54 lines long, with about 30 lines of
28 |   [driver program](http://bitbucket.org/bos/attoparsec/src/tip/examples/TestRFC2616.hs).
29 |   (Attoparsec itself is about 900 lines of code.)
30 |   
31 | * Several
32 |   [Parsec-3-based parsers](http://bitbucket.org/bos/attoparsec/src/tip/examples/Parsec_RFC2616.hs),
33 |   which are almost identical in length to the Attoparsec-based
34 |   version.
35 |   
36 | The Parsec 3 parsers come in three varieties:
37 | 
38 | * The fastest uses a patch that Antoine Latter wrote to switch Parsec
39 |   3's internal machinery over to using continuation passing style
40 |   (CPS).  This parser uses `ByteString` for input, and reads the
41 |   entire 18.8MB file in one chunk.
42 |   
43 | * Next is the same parser, using lazy `ByteString` I/O to read the
44 |   file in 64KB chunks.  This costs about 50% in performance, but is
45 |   almost mandatory to maintain a sensible footprint on large inputs.
46 |   
47 | * In last place is the official version of Parsec 3, reading the input
48 |   in one chunk.  (Reading lazily still costs an additional 50%, but I
49 |   didn't want to further clutter the chart with more big numbers.)
50 | 
51 | What's interesting to me is that the tiny Attoparsec-based parser,
52 | which is more or less a transliteration of the relevant parts of
53 | [RFC 2616](http://www.w3.org/Protocols/rfc2616/rfc2616.html), is so
54 | fast.
55 | 
56 | I went back and remeasured performance of the Attoparsec and C parsers
57 | on a larger data set (295,568 URLs), and got these numbers:
58 | 
59 | * Attoparsec: 2.889 seconds, or 102,308 requests/second
60 | 
61 | * C: 1.614 seconds, or 183,128 requests/second
62 | 
63 | That clocks the Attoparsec-based parser at about 56% the speed of the
64 | C parser.  Not bad, given that it's about 3.2% the number of lines of
65 | code!
66 | 
67 | Of course there are tradeoffs involved here.
68 | 
69 | * Parsec 3 emits much more friendly error messages, and can handle
70 |   many different input types.  Attoparsec, being aimed at
71 |   plumbing-oriented network protocols, considers friendly error
72 |   messages to not be worth the effort, and is specialised to the
73 |   arrays of bytes you get straight off the network.
74 |   
75 | * Parsec 3 requires all of its input to be available when the parser
76 |   is run (either in one large chunk or via lazy I/O).  If Attoparsec
77 |   has insufficient data to return a complete result, it hands back a
78 |   continuation that you provide extra data to.  This eliminates the
79 |   need for lazy I/O and any additional buffering, and makes for a
80 |   beautiful, pure API that doesn't care what its input source is.
81 | 
82 | The memory footprint of the Attoparsec-based parser is small: it will
83 | run in 568KB of heap on my 64-bit laptop.  The smallest heap size that
84 | the Parsec 3 parser can survive in isn't all that much larger: with
85 | lazily read input, it will run in a 750KB heap.
86 | 
87 | Overall, this is yet another instance where a little careful attention
88 | to performance yields very exciting results.  Personally, I'd be quite
89 | happy to trade a 97% reduction in code size for such a small
90 | performance hit, especially given the clarity, ease of use, and
91 | flexibility of the resulting code.  (The `http_parser` API is frankly
92 | not so much fun to use, even though I completely understand the
93 | motivation behind it.)
94 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/ByteString/Lazy.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | #if __GLASGOW_HASKELL__ >= 702
  3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules
  4 | #endif
  5 | 
  6 | -- |
  7 | -- Module      :  Data.Attoparsec.ByteString.Lazy
  8 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  9 | -- License     :  BSD3
 10 | --
 11 | -- Maintainer  :  bos@serpentine.com
 12 | -- Stability   :  experimental
 13 | -- Portability :  unknown
 14 | --
 15 | -- Simple, efficient combinator parsing that can consume lazy
 16 | -- 'ByteString' strings, loosely based on the Parsec library.
 17 | --
 18 | -- This is essentially the same code as in the 'Data.Attoparsec'
 19 | -- module, only with a 'parse' function that can consume a lazy
 20 | -- 'ByteString' incrementally, and a 'Result' type that does not allow
 21 | -- more input to be fed in.  Think of this as suitable for use with a
 22 | -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
 23 | --
 24 | -- /Note:/ The various parser functions and combinators such as
 25 | -- 'string' still expect /strict/ 'B.ByteString' parameters, and
 26 | -- return strict 'B.ByteString' results.  Behind the scenes, strict
 27 | -- 'B.ByteString' values are still used internally to store parser
 28 | -- input and manipulate it efficiently.
 29 | 
 30 | module Data.Attoparsec.ByteString.Lazy
 31 |     (
 32 |       Result(..)
 33 |     , module Data.Attoparsec.ByteString
 34 |     -- * Running parsers
 35 |     , parse
 36 |     , parseOnly
 37 |     , parseTest
 38 |     -- ** Result conversion
 39 |     , maybeResult
 40 |     , eitherResult
 41 |     ) where
 42 | 
 43 | import Control.DeepSeq (NFData(rnf))
 44 | import Data.ByteString.Lazy.Internal (ByteString(..), chunk)
 45 | import Data.List (intercalate)
 46 | import qualified Data.ByteString as B
 47 | import qualified Data.Attoparsec.ByteString as A
 48 | import qualified Data.Attoparsec.Internal.Types as T
 49 | import Data.Attoparsec.ByteString
 50 |     hiding (IResult(..), Result, eitherResult, maybeResult,
 51 |             parse, parseOnly, parseWith, parseTest)
 52 | 
 53 | -- | The result of a parse.
 54 | data Result r = Fail ByteString [String] String
 55 |               -- ^ The parse failed.  The 'ByteString' is the input
 56 |               -- that had not yet been consumed when the failure
 57 |               -- occurred.  The @[@'String'@]@ is a list of contexts
 58 |               -- in which the error occurred.  The 'String' is the
 59 |               -- message describing the error, if any.
 60 |               | Done ByteString r
 61 |               -- ^ The parse succeeded.  The 'ByteString' is the
 62 |               -- input that had not yet been consumed (if any) when
 63 |               -- the parse succeeded.
 64 | 
 65 | instance NFData r => NFData (Result r) where
 66 |     rnf (Fail bs ctxs msg) = rnfBS bs `seq` rnf ctxs `seq` rnf msg
 67 |     rnf (Done bs r)        = rnfBS bs `seq` rnf r
 68 |     {-# INLINE rnf #-}
 69 | 
 70 | rnfBS :: ByteString -> ()
 71 | rnfBS (Chunk _ xs) = rnfBS xs
 72 | rnfBS Empty        = ()
 73 | {-# INLINE rnfBS #-}
 74 | 
 75 | instance Show r => Show (Result r) where
 76 |     show (Fail bs stk msg) =
 77 |         "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg
 78 |     show (Done bs r)       = "Done " ++ show bs ++ " " ++ show r
 79 | 
 80 | fmapR :: (a -> b) -> Result a -> Result b
 81 | fmapR _ (Fail st stk msg) = Fail st stk msg
 82 | fmapR f (Done bs r)       = Done bs (f r)
 83 | 
 84 | instance Functor Result where
 85 |     fmap = fmapR
 86 | 
 87 | -- | Run a parser and return its result.
 88 | parse :: A.Parser a -> ByteString -> Result a
 89 | parse p s = case s of
 90 |               Chunk x xs -> go (A.parse p x) xs
 91 |               empty      -> go (A.parse p B.empty) empty
 92 |   where
 93 |     go (T.Fail x stk msg) ys      = Fail (chunk x ys) stk msg
 94 |     go (T.Done x r) ys            = Done (chunk x ys) r
 95 |     go (T.Partial k) (Chunk y ys) = go (k y) ys
 96 |     go (T.Partial k) empty        = go (k B.empty) empty
 97 | 
 98 | -- | Run a parser and print its result to standard output.
 99 | parseTest :: (Show a) => A.Parser a -> ByteString -> IO ()
100 | parseTest p s = print (parse p s)
101 | 
102 | -- | Convert a 'Result' value to a 'Maybe' value.
103 | maybeResult :: Result r -> Maybe r
104 | maybeResult (Done _ r) = Just r
105 | maybeResult _          = Nothing
106 | 
107 | -- | Convert a 'Result' value to an 'Either' value.
108 | eitherResult :: Result r -> Either String r
109 | eitherResult (Done _ r)        = Right r
110 | eitherResult (Fail _ [] msg)   = Left msg
111 | eitherResult (Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg)
112 | 
113 | -- | Run a parser that cannot be resupplied via a 'T.Partial' result.
114 | --
115 | -- This function does not force a parser to consume all of its input.
116 | -- Instead, any residual input will be discarded.  To force a parser
117 | -- to consume all of its input, use something like this:
118 | --
119 | -- @
120 | --'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput')
121 | -- @
122 | parseOnly :: A.Parser a -> ByteString -> Either String a
123 | parseOnly p = eitherResult . parse p
124 | {-# INLINE parseOnly #-}
125 | 


--------------------------------------------------------------------------------
/benchmarks/Benchmarks.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP #-}
  2 | 
  3 | import Common ()
  4 | import Control.Applicative (many)
  5 | import Test.Tasty.Bench (bench, bgroup, defaultMain, nf)
  6 | import Data.Bits
  7 | import Data.Char (isAlpha)
  8 | import Data.Word (Word32)
  9 | import Data.Word (Word8)
 10 | import Numbers (numbers)
 11 | import Common (chunksOf)
 12 | import Text.Parsec.Text ()
 13 | import Text.Parsec.Text.Lazy ()
 14 | import qualified Warp
 15 | import qualified Aeson
 16 | import qualified Genome
 17 | import qualified Data.Attoparsec.ByteString as AB
 18 | import qualified Data.Attoparsec.ByteString.Char8 as AC
 19 | import qualified Data.Attoparsec.ByteString.Lazy as ABL
 20 | import qualified Data.Attoparsec.Text as AT
 21 | import qualified Data.Attoparsec.Text.Lazy as ATL
 22 | import qualified Data.ByteString as B
 23 | import qualified Data.ByteString.Char8 as BC
 24 | import qualified Data.ByteString.Lazy as BL
 25 | import qualified Data.Text as T
 26 | import qualified Data.Text.Lazy as TL
 27 | import qualified HeadersByteString
 28 | import qualified HeadersText
 29 | import qualified Links
 30 | import qualified Text.Parsec as P
 31 | import qualified Sets
 32 | 
 33 | main :: IO ()
 34 | main = do
 35 |   let s  = take 1024 . cycle $ ['a'..'z'] ++ ['A'..'Z']
 36 |       !b = BC.pack s
 37 |       !bl = BL.fromChunks . map BC.pack . chunksOf 4 $ s
 38 |       !t = T.pack s
 39 |       !tl = TL.fromChunks . map T.pack . chunksOf 4 $ s
 40 |   aeson <- Aeson.aeson
 41 |   headersBS <- HeadersByteString.headers
 42 |   headersT <- HeadersText.headers
 43 |   defaultMain [
 44 |      bgroup "many" [
 45 |        bgroup "attoparsec" [
 46 |          bench "B" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b
 47 |        , bench "BL" $ nf (ABL.parse (many (AC.satisfy AC.isAlpha_ascii))) bl
 48 |        , bench "T" $ nf (AT.parse (many (AT.satisfy AC.isAlpha_ascii))) t
 49 |        , bench "TL" $ nf (ATL.parse (many (AT.satisfy AC.isAlpha_ascii))) tl
 50 |        ]
 51 |      , bgroup "parsec" [
 52 |          bench "S" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") s
 53 |        , bench "B" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") b
 54 |        , bench "BL" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") bl
 55 |        , bench "T" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") t
 56 |        , bench "TL" $ nf (P.parse (many (P.satisfy AC.isAlpha_ascii)) "") tl
 57 |        ]
 58 |      ]
 59 |    , bgroup "comparison" [
 60 |        bgroup "many-vs-takeWhile" [
 61 |          bench "many" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b
 62 |        , bench "takeWhile" $ nf (AB.parse (AC.takeWhile AC.isAlpha_ascii)) b
 63 |        ]
 64 |      , bgroup "letter-vs-isAlpha" [
 65 |          bench "letter" $ nf (AB.parse (many AC.letter_ascii)) b
 66 |        , bench "isAlpha" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b
 67 |        ]
 68 |      ]
 69 |    , bgroup "takeWhile" [
 70 |        bench "isAlpha" $ nf (ABL.parse (AC.takeWhile isAlpha)) bl
 71 |      , bench "isAlpha_ascii" $ nf (ABL.parse (AC.takeWhile AC.isAlpha_ascii)) bl
 72 |      , bench "isAlpha_iso8859_15" $
 73 |        nf (ABL.parse (AC.takeWhile AC.isAlpha_iso8859_15)) bl
 74 |      , bench "T isAlpha" $ nf (AT.parse (AT.takeWhile isAlpha)) t
 75 |      , bench "TL isAlpha" $ nf (ATL.parse (AT.takeWhile isAlpha)) tl
 76 |      ]
 77 |    , bgroup "takeWhile1" [
 78 |        bench "isAlpha" $ nf (ABL.parse (AC.takeWhile1 isAlpha)) bl
 79 |      , bench "isAlpha_ascii" $ nf (ABL.parse (AC.takeWhile1 AC.isAlpha_ascii)) bl
 80 |      , bench "T isAlpha" $ nf (AT.parse (AT.takeWhile1 isAlpha)) t
 81 |      , bench "TL isAlpha" $ nf (ATL.parse (AT.takeWhile1 isAlpha)) tl
 82 |      ]
 83 |    , bench "word32LE" $ nf (AB.parse word32LE) b
 84 |    , bgroup "scan" [
 85 |        bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"")
 86 |      , bench "long" $ nf (AB.parse quotedString) b
 87 |      ]
 88 |    , aeson
 89 |    , Genome.genome
 90 |    , headersBS
 91 |    , headersT
 92 |    , Links.links
 93 |    , numbers
 94 |    , Sets.benchmarks
 95 |    , Warp.benchmarks
 96 |    ]
 97 | 
 98 | -- Benchmarks bind and (potential) bounds-check merging.
 99 | word32LE :: AB.Parser Word32
100 | word32LE = do
101 |     w1 <- AB.anyWord8
102 |     w2 <- AB.anyWord8
103 |     w3 <- AB.anyWord8
104 |     w4 <- AB.anyWord8
105 |     return $! (fromIntegral w1 :: Word32) +
106 |         fromIntegral w2 `unsafeShiftL` 8 +
107 |         fromIntegral w3 `unsafeShiftL` 16 +
108 |         fromIntegral w4 `unsafeShiftL` 32
109 | 
110 | doubleQuote, backslash :: Word8
111 | doubleQuote = 34
112 | backslash = 92
113 | {-# INLINE backslash #-}
114 | {-# INLINE doubleQuote #-}
115 | 
116 | -- | Parse a string without a leading quote.
117 | quotedString :: AB.Parser B.ByteString
118 | quotedString = AB.scan False $ \s c -> if s then Just False
119 |                                        else if c == doubleQuote
120 |                                             then Nothing
121 |                                             else Just (c == backslash)
122 | 
123 | #if !MIN_VERSION_base(4,5,0)
124 | unsafeShiftL :: Bits a => a -> Int -> a
125 | unsafeShiftL = shiftL
126 | #endif
127 | 


--------------------------------------------------------------------------------
/benchmarks/IsSpace.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -Wall -fwarn-tabs #-}
  2 | {-# LANGUAGE ForeignFunctionInterface #-}
  3 | ----------------------------------------------------------------
  4 | --                                                    2010.10.09
  5 | -- |
  6 | -- Module      :  IsSpace
  7 | -- Copyright   :  Copyright (c) 2010 wren ng thornton
  8 | -- License     :  BSD
  9 | -- Maintainer  :  wren@community.haskell.org
 10 | -- Stability   :  experimental
 11 | -- Portability :  portable (FFI)
 12 | --
 13 | -- A benchmark for comparing different definitions of predicates
 14 | -- for detecting whitespace. As of the last run the results are:
 15 | -- 
 16 | -- * Data.Char.isSpace             : 14.44786 us +/- 258.0377 ns
 17 | -- * isSpace_DataChar              : 43.25154 us +/- 655.7037 ns
 18 | -- * isSpace_Char                  : 29.26598 us +/- 454.1445 ns
 19 | -- * isPerlSpace                   :
 20 | -- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us
 21 | -- * isSpace_Char8                 : 11.84677 us +/- 178.9795 ns
 22 | -- * isSpace_w8                    : 11.55470 us +/- 133.7644 ns
 23 | ----------------------------------------------------------------
 24 | module IsSpace (main) where
 25 | 
 26 | import qualified Data.Char             as C
 27 | import           Data.Word             (Word8)
 28 | import qualified Data.ByteString       as B
 29 | import qualified Data.ByteString.Char8 as B8
 30 | import           Foreign.C.Types       (CInt)
 31 | 
 32 | import           Criterion             (bench, nf)
 33 | import           Criterion.Main        (defaultMain)
 34 | 
 35 | ----------------------------------------------------------------
 36 | ----- Character predicates
 37 | -- N.B. \x9..\xD == "\t\n\v\f\r"
 38 | 
 39 | -- | Recognize the same characters as Perl's @/\s/@ in Unicode mode.
 40 | -- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except
 41 | -- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@,
 42 | -- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not
 43 | -- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@
 44 | -- (nor non-Unicode @/\s/@).
 45 | isPerlSpace :: Char -> Bool
 46 | isPerlSpace c
 47 |     =  (' '      == c)
 48 |     || ('\t' <= c && c <= '\r' && c /= '\v')
 49 |     || ('\x85'   == c)
 50 |     || ('\x2028' == c)
 51 |     || ('\x2029' == c)
 52 | {-# INLINE isPerlSpace #-}
 53 | 
 54 | 
 55 | -- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's
 56 | -- not exported. This is the definition as of attoparsec-0.8.1.0.
 57 | isSpace :: Char -> Bool
 58 | isSpace c = c `B8.elem` spaces
 59 |     where
 60 |     spaces = B8.pack " \n\r\t\v\f"
 61 |     {-# NOINLINE spaces #-}
 62 | {-# INLINE isSpace #-}
 63 | 
 64 | 
 65 | -- | An alternate version of 'Data.Attoparsec.Char8.isSpace'.
 66 | isSpace_Char8 :: Char -> Bool
 67 | isSpace_Char8 c =  (' ' == c) || ('\t' <= c && c <= '\r')
 68 | {-# INLINE isSpace_Char8 #-}
 69 | 
 70 | 
 71 | -- | An alternate version of 'Data.Char.isSpace'. This uses the
 72 | -- same trick as 'isSpace_Char8' but we include Unicode whitespaces
 73 | -- too, in order to have the same results as 'Data.Char.isSpace'
 74 | -- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace).
 75 | isSpace_Char :: Char -> Bool
 76 | isSpace_Char c
 77 |     =  (' '    == c)
 78 |     || ('\t' <= c && c <= '\r')
 79 |     || ('\xA0' == c)
 80 |     || (iswspace (fromIntegral (C.ord c)) /= 0)
 81 | {-# INLINE isSpace_Char #-}
 82 | 
 83 | foreign import ccall unsafe "u_iswspace"
 84 |     iswspace :: CInt -> CInt
 85 | 
 86 | -- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
 87 | -- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char'
 88 | -- is slower than 'Data.Char.isSpace'. It appears to be something
 89 | -- special in how the base library was compiled.
 90 | isSpace_DataChar :: Char -> Bool
 91 | isSpace_DataChar c =
 92 |     c == ' '     ||
 93 |     c == '\t'    ||
 94 |     c == '\n'    ||
 95 |     c == '\r'    ||
 96 |     c == '\f'    ||
 97 |     c == '\v'    ||
 98 |     c == '\xa0'  ||
 99 |     iswspace (fromIntegral (C.ord c)) /= 0
100 | {-# INLINE isSpace_DataChar #-}
101 | 
102 | 
103 | -- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'.
104 | isSpace_w8 :: Word8 -> Bool
105 | isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
106 | {-# INLINE isSpace_w8 #-}
107 | 
108 | ----------------------------------------------------------------
109 | 
110 | main :: IO ()
111 | main = defaultMain
112 |     [ bench "Data.Char.isSpace" $ nf (map C.isSpace)        ['\x0'..'\255']
113 |     , bench "isSpace_DataChar"  $ nf (map isSpace_DataChar) ['\x0'..'\255']
114 |     , bench "isSpace_Char"      $ nf (map isSpace_Char)     ['\x0'..'\255']
115 |     , bench "isPerlSpace"       $ nf (map isPerlSpace)      ['\x0'..'\255']
116 |     , bench "Data.Attoparsec.Char8.isSpace"
117 |                                 $ nf (map isSpace)          ['\x0'..'\255']
118 |     , bench "isSpace_Char8"     $ nf (map isSpace_Char8)    ['\x0'..'\255']
119 |     , bench "isSpace_w8"        $ nf (map isSpace_w8)       [0..255]
120 |     ]
121 | 
122 | ----------------------------------------------------------------
123 | ----------------------------------------------------------- fin.
124 | 


--------------------------------------------------------------------------------
/benchmarks/json-data/integers.json:
--------------------------------------------------------------------------------
1 | [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 29, 31, 33, 35, 37, 39, 42, 44, 47, 50, 53, 56, 60, 63, 67, 72, 76, 81, 86, 91, 96, 102, 109, 115, 122, 130, 138, 146, 155, 165, 175, 186, 197, 209, 222, 236, 250, 266, 282, 299, 318, 337, 358, 380, 403, 428, 454, 482, 511, 543, 576, 611, 649, 688, 730, 775, 823, 873, 926, 983, 1043, 1107, 1175, 1247, 1323, 1404, 1490, 1582, 1679, 1781, 1890, 2006, 2129, 2259, 2398, 2544, 2700, 2865, 3041, 3227, 3425, 3634, 3857, 4093, 4343, 4609, 4891, 5191, 5509, 5846, 6204, 6583, 6986, 7414, 7868, 8349, 8860, 9403, 9978, 10589, 11237, 11925, 12655, 13430, 14252, 15124, 16050, 17032, 18075, 19181, 20355, 21601, 22923, 24326, 25815, 27396, 29072, 30852, 32740, 34744, 36871, 39128, 41523, 44064, 46762, 49624, 52661, 55884, 59305, 62935, 66787, 70875, 75213, 79817, 84702, 89887, 95389, 101227, 107423, 113998, 120976, 128381, 136239, 144578, 153427, 162818, 172784, 183360, 194583, 206493, 219132, 232545, 246778, 261883, 277912, 294923, 312975, 332131, 352460, 374034, 396928, 421223, 447005, 474365, 503400, 534212, 566911, 601610, 638433, 677511, 718980, 762987, 809688, 859247, 911840, 967652, 1026880, 1089734, 1156434, 1227217, 1302333, 1382046, 1466638, 1556408, 1651673, 1752769, 1860052, 1973902, 2094721, 2222934, 2358996, 2503385, 2656613, 2819219, 2991777, 3174898, 3369227, 3575451, 3794297, 4026539, 4272995, 4534536, 4812086, 5106625, 5419191, 5750889, 6102889, 6476435, 6872845, 7293518, 7739939, 8213686, 8716429, 9249944, 9816115, 10416939, 11054539, 11731166, 12449207, 13211198, 14019829, 14877955, 15788605, 16754994, 17780533, 18868844, 20023768, 21249383, 22550016, 23930257, 25394980, 26949356, 28598872, 30349352, 32206975, 34178300, 36270285, 38490317, 40846232, 43346349, 45999492, 48815029, 51802899, 54973651, 58338478, 61909260, 65698602, 69719882, 73987297, 78515911, 83321713, 88421668, 93833782, 99577160, 105672079, 112140056, 119003924, 126287916, 134017747, 142220706, 150925751, 160163614, 169966908, 180370243, 191410345, 203126189, 215559137, 228753081, 242754599, 257613123, 273381107, 290114218, 307871529, 326715729, 346713346, 367934976, 390455540, 414354543, 439716356, 466630515, 495192035, 525501749, 557666661, 591800322, 628023236, 666463282, 707256166, 750545902, 796485316, 845236589, 896971830, 951873682, 1010135966, 1071964368, 1137577163, 1207205986, 1281096650, 1359510014, 1442722903, 1531029087, 1624740315, 1724187420, 1829721484, 1941715077, 2060563573, 2186686548, 2320529259, 2462564214, 2613292844, 2773247272, 2942992191, 3123126858, 3314287206, 3517148098, 3732425698, 3960880011, 4203317554, 4460594215, 4733618266, 5023353573, 5330822998, 5657112012, 6003372524, 6370826950, 6760772526, 7174585891, 7613727944, 8079749004, 8574294281, 9099109685, 9656047991, 10247075377, 10874278366, 11539871197, 12246203633, 12995769265, 13791214310, 14635346955, 15531147272, 16481777734, 17490594386, 18561158687, 19697250088, 20902879371, 22182302812, 23540037202, 24980875800, 26509905246, 28132523526, 29854459026, 31681790754, 33620969802, 35678842122, 37862672691, 40180171161, 42639519077, 45249398761, 48019023960, 50958172379, 54077220194, 57387178688, 60899733121, 64627283986, 68582990784, 72780818484, 77235586822, 81963022620, 86979815309, 92303675844, 97953399235, 103948930895, 110311437058, 117063379497, 124228594829, 131832378662, 139901574895, 148464670491, 157551896043, 167195332496, 177429024407, 188289100133, 199813899374, 212044108527, 225022904322, 238796106249, 253412338321, 268923200725, 285383451995, 302851202324, 321388118716, 341059642687, 361935221296, 384088552321, 407597844432, 432546093294, 459021374572, 487117154867, 516932621682, 548573033590, 582150091830, 617782334651, 655595555791, 695723248569, 738307077168, 783497376747, 831453684183, 882345301285, 936351892486, 993664119121, 1054484312524, 1119027188325, 1187520604468, 1260206365627, 1337341076854, 1419197049486, 1506063262491, 1598246382662, 1696071847252, 1799885012878, 1910052374747, 2026962860500, 2151029203266, 2282689398739, 2422408251457, 2570679015712, 2728025136906, 2895002099486, 3072199387991, 3260242568131, 3459795495242, 3671562657914, 3896291665080, 4134775885316, 4387857247705, 4656429214122, 4941439933460, 5243895588908, 5564863950114, 5905478142772, 6266940648935, 6650527552175, 7057593042589, 7489574197539, 7947996055022, 8434476997558, 8950734465625, 9498591020797, 10079980779998, 10696956243580, 11351695541337, 12046510122031, 12783852913581, 13566326982715, 14396694724673, 15277887615381, 16213016560543, 17205382878181, 18258489953389, 19376055606456, 20562025218016, 21820585657560, 23156180064488, 24573523533875, 26077619762337, 27673778712750, 29367635360200, 31165169585327, 33072727285306, 35097042776985, 37245262571278, 39524970602741, 41944215003394, 44511536515321, 47235998642351, 50127219647252, 53195406507421, 56451390948928, 59906667686130, 63573435001862, 67464637811456, 71594013362620, 75976139732519, 80626487293267, 85561473327514, 90798519986944, 96356115798305, 102253880934088, 108512636478302, 115154477931865, 122202853217119, 129682645456833, 137620260819954, 146043721744222, 154982765864743, 164468950997792, 174535766550465, 185218751749486, 196555621106568, 208586397563259, 221353553785311, 234902162105402, 249280053643550, 264537987166964, 280729828285480, 297912739615178, 316147382581544, 335498131574595, 356033301212013, 377825387512598, 400951323831468, 425492752460545, 451536312853150, 479173947490266, 508503226468250, 539627691953919, 572657223723034, 607708427072674, 644905044476938, 684378392439283, 726267825083707, 770721226121430, 817895530929871, 867957280587026, 921083209817197, 977460870923688, 1037289295911185, 1100779699135317, 1168156222959992, 1239656729054927, 1315533638126921, 1396054821049394, 1481504544536185, 1572184474698156, 1668414742025481, 1770535071555377, 1878905982215138, 1993910059574563, 2115953306501003, 2245466576485316, 2382907094698829, 2528760072151156, 2683540418647385, 2847794560591953]
2 | 


--------------------------------------------------------------------------------
/examples/rfc2616.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This is a simple driver for the http-parser package.  It is
  3 |  * intended to read one HTTP request after another from a file,
  4 |  * nothing more.
  5 |  *
  6 |  * For "feature parity" with the Haskell code in RFC2616.hs, we
  7 |  * allocate and populate a simple structure describing each request,
  8 |  * since that's the sort of thing that many real applications would
  9 |  * themselves do and the library doesn't do this for us.
 10 |  *
 11 |  * For the http-parser source, see
 12 |  * https://github.com/joyent/http-parser/blob/master/http_parser.c
 13 |  */
 14 | 
 15 | /*
 16 |  * Turn off this preprocessor symbol to have the callbacks do nothing
 17 |  * at all, which "improves performance" by about 50%.
 18 |  */
 19 | /*#define LOOK_BUSY*/
 20 | 
 21 | #include <assert.h>
 22 | #include <fcntl.h>
 23 | #include <stdio.h>
 24 | #include <stdlib.h>
 25 | #include <string.h>
 26 | #include <sys/stat.h>
 27 | #include <sys/types.h>
 28 | #include <unistd.h>
 29 | 
 30 | #include "http_parser.h"
 31 | 
 32 | struct http_string {
 33 |     size_t len;
 34 |     char value[0];
 35 | };
 36 | 
 37 | struct http_header {
 38 |     struct http_string *name;
 39 |     struct http_string *value;
 40 |     struct http_header *next;
 41 | };
 42 | 
 43 | struct http_request {
 44 |     struct http_string *method;
 45 |     struct http_string *uri;
 46 |     struct http_header *headers, *last;
 47 | };
 48 | 
 49 | struct data {
 50 |     size_t count;
 51 |     struct http_request req;
 52 | };
 53 | 
 54 | static void *xmalloc(size_t size)
 55 | {
 56 |     void *ptr;
 57 | 
 58 |     if ((ptr = malloc(size)) == NULL) {
 59 | 	perror("malloc");
 60 | 	exit(1);
 61 |     }
 62 | 
 63 |     return ptr;
 64 | }
 65 | 
 66 | static struct http_string *xstrdup(const char *src, size_t len, size_t extra)
 67 | {
 68 |     struct http_string *dst = xmalloc(sizeof(*dst) + len + extra);
 69 |     memcpy(dst->value, src, len);
 70 |     dst->len = len;
 71 |     return dst;
 72 | }
 73 | 
 74 | static void xstrcat(struct http_string **dst, const char *src, size_t len)
 75 | {
 76 |     struct http_string *p;
 77 | 
 78 |     if (*dst == NULL) {
 79 | 	*dst = xstrdup(src, len, 0);
 80 | 	return;
 81 |     }
 82 | 
 83 |     p = xstrdup((*dst)->value, (*dst)->len, len);
 84 |     memcpy(p->value + (*dst)->len, src, len);
 85 |     p->len += len;
 86 |     free(*dst);
 87 |     *dst = p;
 88 | }
 89 | 
 90 | static int begin(http_parser *p)
 91 | {
 92 |     struct data *data = p->data;
 93 | 
 94 |     data->count++;
 95 | 
 96 |     return 0;
 97 | }
 98 | 
 99 | static int url(http_parser *p, const char *at, size_t len)
100 | {
101 | #ifdef LOOK_BUSY
102 |     struct data *data = p->data;
103 | 
104 |     xstrcat(&data->req.uri, at, len);
105 | #endif
106 | 
107 |     return 0;
108 | }
109 | 
110 | static int header_field(http_parser *p, const char *at, size_t len)
111 | {
112 | #ifdef LOOK_BUSY
113 |     struct data *data = p->data;
114 | 
115 |     if (data->req.last && data->req.last->value == NULL) {
116 | 	xstrcat(&data->req.last->name, at, len);
117 |     } else {
118 | 	struct http_header *hdr = xmalloc(sizeof(*hdr));
119 | 
120 | 	hdr->name = xstrdup(at, len, 0);
121 | 	hdr->value = NULL;
122 | 	hdr->next = NULL;
123 | 
124 | 	if (data->req.last != NULL)
125 | 	    data->req.last->next = hdr;
126 | 	data->req.last = hdr;
127 | 	if (data->req.headers == NULL)
128 | 	    data->req.headers = hdr;
129 |     }
130 | #endif
131 | 
132 |     return 0;
133 | }
134 | 
135 | static int header_value(http_parser *p, const char *at, size_t len)
136 | {
137 | #ifdef LOOK_BUSY
138 |     struct data *data = p->data;
139 | 
140 |     xstrcat(&data->req.last->value, at, len);
141 | #endif
142 | 
143 |     return 0;
144 | }
145 | 
146 | static int complete(http_parser *p)
147 | {
148 | #ifdef LOOK_BUSY
149 |     struct data *data = p->data;
150 |     struct http_header *hdr, *next;
151 | 
152 |     free(data->req.method);
153 |     free(data->req.uri);
154 | 
155 |     for (hdr = data->req.headers; hdr != NULL; hdr = next) {
156 | 	next = hdr->next;
157 | 	free(hdr->name);
158 | 	free(hdr->value);
159 | 	free(hdr);
160 | 	hdr = next;
161 |     }
162 | 
163 |     data->req.method = NULL;
164 |     data->req.uri = NULL;
165 |     data->req.headers = NULL;
166 |     data->req.last = NULL;
167 | #endif
168 | 
169 |     /* Bludgeon http_parser into understanding that we really want to
170 |      * keep parsing after a request that in principle ought to close
171 |      * the "connection". */
172 |     if (!http_should_keep_alive(p)) {
173 | 	p->http_major = 1;
174 | 	p->http_minor = 1;
175 | 	p->flags &= ~6;
176 |     }
177 | 
178 |     return 0;
179 | }
180 | 
181 | static void parse(const char *path, int fd)
182 | {
183 |     struct data data;
184 |     http_parser_settings s;
185 |     http_parser p;
186 |     ssize_t nread;
187 | 
188 |     memset(&s, 0, sizeof(s));
189 |     s.on_message_begin = begin;
190 |     s.on_url = url;
191 |     s.on_header_field = header_field;
192 |     s.on_header_value = header_value;
193 |     s.on_message_complete = complete;
194 | 
195 |     p.data = &data;
196 | 
197 |     http_parser_init(&p, HTTP_REQUEST);
198 | 
199 |     data.count = 0;
200 |     data.req.method = NULL;
201 |     data.req.uri = NULL;
202 |     data.req.headers = NULL;
203 |     data.req.last = NULL;
204 | 
205 |     do {
206 | 	char buf[HTTP_MAX_HEADER_SIZE];
207 | 	size_t np;
208 | 
209 | 	nread = read(fd, buf, sizeof(buf));
210 | 
211 | 	np = http_parser_execute(&p, &s, buf, nread);
212 | 	if (np != nread) {
213 | 	    fprintf(stderr, "%s: parse failed\n", path);
214 | 	    break;
215 | 	}
216 |     } while (nread > 0);
217 | 
218 |     printf("%ld\n", (unsigned long) data.count);
219 | }
220 | 
221 | int main(int argc, char **argv)
222 | {
223 |     int i;
224 | 
225 |     for (i = 1; i < argc; i++) {
226 | 	int fd;
227 | 
228 | 	fd = open(argv[i], O_RDONLY);
229 | 	if (fd == -1) {
230 | 	    perror(argv[i]);
231 | 	    continue;
232 | 	}
233 | 	parse(argv[i], fd);
234 | 	close(fd);
235 |     }
236 | 
237 |     return 0;
238 | }
239 | 
240 | /*
241 |  * Local Variables:
242 |  * c-file-style: "stroustrup"
243 |  * End:
244 |  */
245 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Zepto.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | #if __GLASGOW_HASKELL__ >= 702
  3 | {-# LANGUAGE Trustworthy #-} -- Data.ByteString.Unsafe
  4 | #endif
  5 | {-# LANGUAGE BangPatterns #-}
  6 | 
  7 | -- |
  8 | -- Module      :  Data.Attoparsec.Zepto
  9 | -- Copyright   :  Bryan O'Sullivan 2007-2015
 10 | -- License     :  BSD3
 11 | --
 12 | -- Maintainer  :  bos@serpentine.com
 13 | -- Stability   :  experimental
 14 | -- Portability :  unknown
 15 | --
 16 | -- A tiny, highly specialized combinator parser for 'B.ByteString'
 17 | -- strings.
 18 | --
 19 | -- While the main attoparsec module generally performs well, this
 20 | -- module is particularly fast for simple non-recursive loops that
 21 | -- should not normally result in failed parses.
 22 | --
 23 | -- /Warning/: on more complex inputs involving recursion or failure,
 24 | -- parsers based on this module may be as much as /ten times slower/
 25 | -- than regular attoparsec! You should /only/ use this module when you
 26 | -- have benchmarks that prove that its use speeds your code up.
 27 | module Data.Attoparsec.Zepto
 28 |     (
 29 |       Parser
 30 |     , ZeptoT
 31 |     , parse
 32 |     , parseT
 33 |     , atEnd
 34 |     , string
 35 |     , take
 36 |     , takeWhile
 37 |     ) where
 38 | 
 39 | import Control.Applicative
 40 | import Control.Monad (MonadPlus(..), ap)
 41 | import qualified Control.Monad.Fail as Fail
 42 | import Control.Monad.IO.Class (MonadIO(..))
 43 | import Data.ByteString (ByteString)
 44 | import Data.Functor.Identity (Identity(runIdentity))
 45 | import Data.Monoid as Mon (Monoid(..))
 46 | import Data.Semigroup (Semigroup(..))
 47 | import Data.Word (Word8)
 48 | import Prelude hiding (take, takeWhile)
 49 | import qualified Data.ByteString as B
 50 | import qualified Data.ByteString.Unsafe as B
 51 | 
 52 | newtype S = S {
 53 |       input :: ByteString
 54 |     }
 55 | 
 56 | data Result a = Fail String
 57 |               | OK !a S
 58 | 
 59 | -- | A simple parser.
 60 | --
 61 | -- This monad is strict in its state, and the monadic bind operator
 62 | -- ('>>=') evaluates each result to weak head normal form before
 63 | -- passing it along.
 64 | newtype ZeptoT m a = Parser {
 65 |       runParser :: S -> m (Result a)
 66 |     }
 67 | 
 68 | type Parser a = ZeptoT Identity a
 69 | 
 70 | instance Monad m => Functor (ZeptoT m) where
 71 |     fmap f m = Parser $ \s -> do
 72 |       result <- runParser m s
 73 |       case result of
 74 |         OK a s'  -> return (OK (f a) s')
 75 |         Fail err -> return (Fail err)
 76 |     {-# INLINE fmap #-}
 77 | 
 78 | instance MonadIO m => MonadIO (ZeptoT m) where
 79 |   liftIO act = Parser $ \s -> do
 80 |     result <- liftIO act
 81 |     return (OK result s)
 82 |   {-# INLINE liftIO #-}
 83 | 
 84 | instance Monad m => Monad (ZeptoT m) where
 85 |     return = pure
 86 |     {-# INLINE return #-}
 87 | 
 88 |     m >>= k   = Parser $ \s -> do
 89 |       result <- runParser m s
 90 |       case result of
 91 |         OK a s'  -> runParser (k a) s'
 92 |         Fail err -> return (Fail err)
 93 |     {-# INLINE (>>=) #-}
 94 | 
 95 | #if !(MIN_VERSION_base(4,13,0))
 96 |     fail = Fail.fail
 97 |     {-# INLINE fail #-}
 98 | #endif
 99 | 
100 | instance Monad m => Fail.MonadFail (ZeptoT m) where
101 |     fail msg = Parser $ \_ -> return (Fail msg)
102 |     {-# INLINE fail #-}
103 | 
104 | instance Monad m => MonadPlus (ZeptoT m) where
105 |     mzero = fail "mzero"
106 |     {-# INLINE mzero #-}
107 | 
108 |     mplus a b = Parser $ \s -> do
109 |       result <- runParser a s
110 |       case result of
111 |         ok@(OK _ _) -> return ok
112 |         _           -> runParser b s
113 |     {-# INLINE mplus #-}
114 | 
115 | instance (Monad m) => Applicative (ZeptoT m) where
116 |     pure a = Parser $ \s -> return (OK a s)
117 |     {-# INLINE pure #-}
118 |     (<*>)  = ap
119 |     {-# INLINE (<*>) #-}
120 | 
121 | gets :: Monad m => (S -> a) -> ZeptoT m a
122 | gets f = Parser $ \s -> return (OK (f s) s)
123 | {-# INLINE gets #-}
124 | 
125 | put :: Monad m => S -> ZeptoT m ()
126 | put s = Parser $ \_ -> return (OK () s)
127 | {-# INLINE put #-}
128 | 
129 | -- | Run a parser.
130 | parse :: Parser a -> ByteString -> Either String a
131 | parse p bs = case runIdentity (runParser p (S bs)) of
132 |                (OK a _)   -> Right a
133 |                (Fail err) -> Left err
134 | {-# INLINE parse #-}
135 | 
136 | -- | Run a parser on top of the given base monad.
137 | parseT :: Monad m => ZeptoT m a -> ByteString -> m (Either String a)
138 | parseT p bs = do
139 |   result <- runParser p (S bs)
140 |   case result of
141 |     OK a _   -> return (Right a)
142 |     Fail err -> return (Left err)
143 | {-# INLINE parseT #-}
144 | 
145 | instance Monad m => Semigroup (ZeptoT m a) where
146 |     (<>) = mplus
147 |     {-# INLINE (<>) #-}
148 | 
149 | instance Monad m => Mon.Monoid (ZeptoT m a) where
150 |     mempty  = fail "mempty"
151 |     {-# INLINE mempty #-}
152 |     mappend = (<>)
153 |     {-# INLINE mappend #-}
154 | 
155 | instance Monad m => Alternative (ZeptoT m) where
156 |     empty = fail "empty"
157 |     {-# INLINE empty #-}
158 |     (<|>) = mplus
159 |     {-# INLINE (<|>) #-}
160 | 
161 | -- | Consume input while the predicate returns 'True'.
162 | takeWhile :: Monad m => (Word8 -> Bool) -> ZeptoT m ByteString
163 | takeWhile p = do
164 |   (h,t) <- gets (B.span p . input)
165 |   put (S t)
166 |   return h
167 | {-# INLINE takeWhile #-}
168 | 
169 | -- | Consume @n@ bytes of input.
170 | take :: Monad m => Int -> ZeptoT m ByteString
171 | take !n = do
172 |   s <- gets input
173 |   if B.length s >= n
174 |     then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s)
175 |     else fail "insufficient input"
176 | {-# INLINE take #-}
177 | 
178 | -- | Match a string exactly.
179 | string :: Monad m => ByteString -> ZeptoT m ()
180 | string s = do
181 |   i <- gets input
182 |   if s `B.isPrefixOf` i
183 |     then put (S (B.unsafeDrop (B.length s) i)) >> return ()
184 |     else fail "string"
185 | {-# INLINE string #-}
186 | 
187 | -- | Indicate whether the end of the input has been reached.
188 | atEnd :: Monad m => ZeptoT m Bool
189 | atEnd = do
190 |   i <- gets input
191 |   return $! B.null i
192 | {-# INLINE atEnd #-}
193 | 


--------------------------------------------------------------------------------
/benchmarks/json-data/twitter10.json:
--------------------------------------------------------------------------------
1 | {"results":[{"from_user_id_str":"207858021","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 04:30:38 +0000","from_user":"pboudarga","id_str":"30120402839666689","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I'm at Rolla Sushi Grill (27737 Bouquet Canyon Road, #106, Btw Haskell Canyon and Rosedell Drive, Saugus) http://4sq.com/gqqdhs","id":30120402839666689,"from_user_id":207858021,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://foursquare.com&quot; rel=&quot;nofollow&quot;&gt;foursquare&lt;/a&gt;"},{"from_user_id_str":"69988683","profile_image_url":"http://a0.twimg.com/profile_images/1211955817/avatar_7888_normal.gif","created_at":"Wed, 26 Jan 2011 04:25:23 +0000","from_user":"YNK33","id_str":"30119083059978240","metadata":{"result_type":"recent"},"to_user_id":null,"text":"hsndfile 0.5.0: Free and open source Haskell bindings for libsndfile http://bit.ly/gHaBWG Mac Os","id":30119083059978240,"from_user_id":69988683,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitterfeed.com&quot; rel=&quot;nofollow&quot;&gt;twitterfeed&lt;/a&gt;"},{"from_user_id_str":"81492","profile_image_url":"http://a1.twimg.com/profile_images/423894208/Picture_7_normal.jpg","created_at":"Wed, 26 Jan 2011 04:24:28 +0000","from_user":"satzz","id_str":"30118851488251904","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Emacs\u306e\u30e2\u30fc\u30c9\u8868\u793a\u304c\u4eca(Ruby Controller Outputz RoR Flymake REl hs)\u3068\u306a\u3063\u3066\u3066\u3088\u304f\u308f\u304b\u3089\u306a\u3044\u3093\u3060\u3051\u3069\u6700\u5f8c\u306eREl\u3068\u304bhs\u3063\u3066\u4f55\u3060\u308d\u3046\u2026haskell\u3068\u304b2\u5e74\u4ee5\u4e0a\u66f8\u3044\u3066\u306a\u3044\u3051\u3069\u2026","id":30118851488251904,"from_user_id":81492,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"&lt;a href=&quot;http://www.hootsuite.com&quot; rel=&quot;nofollow&quot;&gt;HootSuite&lt;/a&gt;"},{"from_user_id_str":"9518356","profile_image_url":"http://a2.twimg.com/profile_images/119165723/ocaml-icon_normal.png","created_at":"Wed, 26 Jan 2011 04:19:19 +0000","from_user":"planet_ocaml","id_str":"30117557788741632","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I so miss #haskell type classes in #ocaml - i want to do something like refinement. Also why does ocaml not have... http://bit.ly/geYRwt","id":30117557788741632,"from_user_id":9518356,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitterfeed.com&quot; rel=&quot;nofollow&quot;&gt;twitterfeed&lt;/a&gt;"},{"from_user_id_str":"218059","profile_image_url":"http://a1.twimg.com/profile_images/1053837723/twitter-icon9_normal.jpg","created_at":"Wed, 26 Jan 2011 04:16:32 +0000","from_user":"aprikip","id_str":"30116854940835840","metadata":{"result_type":"recent"},"to_user_id":null,"text":"yatex-mode\u3084haskell-mode\u306e\u3053\u3068\u3067\u3059\u306d\u3001\u308f\u304b\u308a\u307e\u3059\u3002","id":30116854940835840,"from_user_id":218059,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"&lt;a href=&quot;http://sites.google.com/site/yorufukurou/&quot; rel=&quot;nofollow&quot;&gt;YoruFukurou&lt;/a&gt;"},{"from_user_id_str":"216363","profile_image_url":"http://a1.twimg.com/profile_images/72454310/Tim-Avatar_normal.png","created_at":"Wed, 26 Jan 2011 04:15:30 +0000","from_user":"dysinger","id_str":"30116594684264448","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell in Hawaii tonight for me... #fun","id":30116594684264448,"from_user_id":216363,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://www.nambu.com/&quot; rel=&quot;nofollow&quot;&gt;Nambu&lt;/a&gt;"},{"from_user_id_str":"1774820","profile_image_url":"http://a2.twimg.com/profile_images/61169291/dan_desert_thumb_normal.jpg","created_at":"Wed, 26 Jan 2011 04:13:36 +0000","from_user":"DanMil","id_str":"30116117682851840","metadata":{"result_type":"recent"},"to_user_id":1594784,"text":"@ojrac @chewedwire @tomheon Haskell isn't a language, it's a belief system.  A seductive one...","id":30116117682851840,"from_user_id":1774820,"to_user":"ojrac","geo":null,"iso_language_code":"en","to_user_id_str":"1594784","source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"},{"from_user_id_str":"659256","profile_image_url":"http://a0.twimg.com/profile_images/746976711/angular-final_normal.jpg","created_at":"Wed, 26 Jan 2011 04:11:06 +0000","from_user":"djspiewak","id_str":"30115488931520512","metadata":{"result_type":"recent"},"to_user_id":null,"text":"One of the very nice things about Haskell as opposed to SML is the reduced proliferation of identifiers (e.g. andb, orb, etc). #typeclasses","id":30115488931520512,"from_user_id":659256,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://itunes.apple.com/us/app/twitter/id409789998?mt=12&quot; rel=&quot;nofollow&quot;&gt;Twitter for Mac&lt;/a&gt;"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 04:06:12 +0000","from_user":"listwarenet","id_str":"30114255890026496","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-cafe/84752-re-haskell-cafe-gpl-license-of-h-matrix-and-prelude-numeric.html Re: Haskell-c","id":30114255890026496,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"&lt;a href=&quot;http://1e10.org/cloud/&quot; rel=&quot;nofollow&quot;&gt;1e10&lt;/a&gt;"},{"from_user_id_str":"1594784","profile_image_url":"http://a2.twimg.com/profile_images/378515773/square-profile_normal.jpg","created_at":"Wed, 26 Jan 2011 04:01:29 +0000","from_user":"ojrac","id_str":"30113067333324800","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @tomheon: @ojrac @chewedwire Don't worry, learning Haskell will not give you any clear idea what monad means.","id":30113067333324800,"from_user_id":1594784,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"&lt;a href=&quot;http://twitter.com/&quot;&gt;web&lt;/a&gt;"}],"max_id":30120402839666689,"since_id":0,"refresh_url":"?since_id=30120402839666689&q=haskell","next_page":"?page=2&max_id=30120402839666689&rpp=10&q=haskell","results_per_page":10,"page":1,"completed_in":0.012714,"since_id_str":"0","max_id_str":"30120402839666689","query":"haskell"}


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/ByteString/Buffer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | -- |
  3 | -- Module      :  Data.Attoparsec.ByteString.Buffer
  4 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  5 | -- License     :  BSD3
  6 | --
  7 | -- Maintainer  :  bos@serpentine.com
  8 | -- Stability   :  experimental
  9 | -- Portability :  GHC
 10 | --
 11 | -- An "immutable" buffer that supports cheap appends.
 12 | --
 13 | -- A Buffer is divided into an immutable read-only zone, followed by a
 14 | -- mutable area that we've preallocated, but not yet written to.
 15 | --
 16 | -- We overallocate at the end of a Buffer so that we can cheaply
 17 | -- append.  Since a user of an existing Buffer cannot see past the end
 18 | -- of its immutable zone into the data that will change during an
 19 | -- append, this is safe.
 20 | --
 21 | -- Once we run out of space at the end of a Buffer, we do the usual
 22 | -- doubling of the buffer size.
 23 | --
 24 | -- The fact of having a mutable buffer really helps with performance,
 25 | -- but it does have a consequence: if someone misuses the Partial API
 26 | -- that attoparsec uses by calling the same continuation repeatedly
 27 | -- (which never makes sense in practice), they could overwrite data.
 28 | --
 29 | -- Since the API *looks* pure, it should *act* pure, too, so we use
 30 | -- two generation counters (one mutable, one immutable) to track the
 31 | -- number of appends to a mutable buffer. If the counters ever get out
 32 | -- of sync, someone is appending twice to a mutable buffer, so we
 33 | -- duplicate the entire buffer in order to preserve the immutability
 34 | -- of its older self.
 35 | --
 36 | -- While we could go a step further and gain protection against API
 37 | -- abuse on a multicore system, by use of an atomic increment
 38 | -- instruction to bump the mutable generation counter, that would be
 39 | -- very expensive, and feels like it would also be in the realm of the
 40 | -- ridiculous.  Clients should never call a continuation more than
 41 | -- once; we lack a linear type system that could enforce this; and
 42 | -- there's only so far we should go to accommodate broken uses.
 43 | 
 44 | module Data.Attoparsec.ByteString.Buffer
 45 |     (
 46 |       Buffer
 47 |     , buffer
 48 |     , unbuffer
 49 |     , pappend
 50 |     , length
 51 |     , unsafeIndex
 52 |     , substring
 53 |     , unsafeDrop
 54 |     ) where
 55 | 
 56 | import Control.Exception (assert)
 57 | import Data.ByteString.Internal (ByteString(..), nullForeignPtr)
 58 | import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO)
 59 | import Data.Attoparsec.Internal.Compat
 60 | import Data.List (foldl1')
 61 | import Data.Monoid as Mon (Monoid(..))
 62 | import Data.Semigroup (Semigroup(..))
 63 | import Data.Word (Word8)
 64 | import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
 65 | import Foreign.Marshal.Utils (copyBytes)
 66 | import Foreign.Ptr (castPtr, plusPtr)
 67 | import Foreign.Storable (peek, peekByteOff, poke, sizeOf)
 68 | import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
 69 | import Prelude hiding (length)
 70 | 
 71 | -- If _cap is zero, this buffer is empty.
 72 | data Buffer = Buf {
 73 |       _fp  :: {-# UNPACK #-} !(ForeignPtr Word8)
 74 |     , _off :: {-# UNPACK #-} !Int
 75 |     , _len :: {-# UNPACK #-} !Int
 76 |     , _cap :: {-# UNPACK #-} !Int
 77 |     , _gen :: {-# UNPACK #-} !Int
 78 |     }
 79 | 
 80 | instance Show Buffer where
 81 |     showsPrec p = showsPrec p . unbuffer
 82 | 
 83 | -- | The initial 'Buffer' has no mutable zone, so we can avoid all
 84 | -- copies in the (hopefully) common case of no further input being fed
 85 | -- to us.
 86 | buffer :: ByteString -> Buffer
 87 | buffer bs = withPS bs $ \fp off len -> Buf fp off len len 0
 88 | 
 89 | unbuffer :: Buffer -> ByteString
 90 | unbuffer (Buf fp off len _ _) = mkPS fp off len
 91 | 
 92 | instance Semigroup Buffer where
 93 |     (Buf _ _ _ 0 _) <> b                    = b
 94 |     a               <> (Buf _ _ _ 0 _)      = a
 95 |     buf             <> (Buf fp off len _ _) = append buf fp off len
 96 | 
 97 | instance Monoid Buffer where
 98 |     mempty = Buf nullForeignPtr 0 0 0 0
 99 | 
100 |     mappend = (<>)
101 | 
102 |     mconcat [] = Mon.mempty
103 |     mconcat xs = foldl1' mappend xs
104 | 
105 | pappend :: Buffer -> ByteString -> Buffer
106 | pappend (Buf _ _ _ 0 _) bs  = buffer bs
107 | pappend buf             bs  = withPS bs $ \fp off len -> append buf fp off len
108 | 
109 | append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer
110 | append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 =
111 |   inlinePerformIO . withForeignPtr fp0 $ \ptr0 ->
112 |     withForeignPtr fp1 $ \ptr1 -> do
113 |       let genSize = sizeOf (0::Int)
114 |           newlen  = len0 + len1
115 |       gen <- if gen0 == 0
116 |              then return 0
117 |              else peek (castPtr ptr0)
118 |       if gen == gen0 && newlen <= cap0
119 |         then do
120 |           let newgen = gen + 1
121 |           poke (castPtr ptr0) newgen
122 |           copyBytes (ptr0 `plusPtr` (off0+len0))
123 |                     (ptr1 `plusPtr` off1)
124 |                     (fromIntegral len1)
125 |           return (Buf fp0 off0 newlen cap0 newgen)
126 |         else do
127 |           let newcap = newlen * 2
128 |           fp <- mallocPlainForeignPtrBytes (newcap + genSize)
129 |           withForeignPtr fp $ \ptr_ -> do
130 |             let ptr    = ptr_ `plusPtr` genSize
131 |                 newgen = 1
132 |             poke (castPtr ptr_) newgen
133 |             copyBytes ptr (ptr0 `plusPtr` off0) (fromIntegral len0)
134 |             copyBytes (ptr `plusPtr` len0) (ptr1 `plusPtr` off1)
135 |                       (fromIntegral len1)
136 |             return (Buf fp genSize newlen newcap newgen)
137 | 
138 | length :: Buffer -> Int
139 | length (Buf _ _ len _ _) = len
140 | {-# INLINE length #-}
141 | 
142 | unsafeIndex :: Buffer -> Int -> Word8
143 | unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) .
144 |     inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i)
145 | {-# INLINE unsafeIndex #-}
146 | 
147 | substring :: Int -> Int -> Buffer -> ByteString
148 | substring s l (Buf fp off len _ _) =
149 |   assert (s >= 0 && s <= len) .
150 |   assert (l >= 0 && l <= len-s) $
151 |   mkPS fp (off+s) l
152 | {-# INLINE substring #-}
153 | 
154 | unsafeDrop :: Int -> Buffer -> ByteString
155 | unsafeDrop s (Buf fp off len _ _) =
156 |   assert (s >= 0 && s <= len) $
157 |   mkPS fp (off+s) (len-s)
158 | {-# INLINE unsafeDrop #-}
159 | 


--------------------------------------------------------------------------------
/benchmarks/warp-3.0.1.1/Network/Wai/Handler/Warp/RequestHeader.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE BangPatterns #-}
  4 | 
  5 | module Network.Wai.Handler.Warp.RequestHeader (
  6 |       parseHeaderLines
  7 |     , parseByteRanges
  8 |     ) where
  9 | 
 10 | import Control.Exception (Exception, throwIO)
 11 | import Control.Monad (when)
 12 | import Data.Typeable (Typeable)
 13 | import qualified Data.ByteString as S
 14 | import qualified Data.ByteString.Char8 as B (unpack, readInteger)
 15 | import Data.ByteString.Internal (ByteString(..), memchr)
 16 | import qualified Data.CaseInsensitive as CI
 17 | import Data.Word (Word8)
 18 | import Foreign.ForeignPtr (withForeignPtr)
 19 | import Foreign.Ptr (Ptr, plusPtr, minusPtr, nullPtr)
 20 | import Foreign.Storable (peek)
 21 | import qualified Network.HTTP.Types as H
 22 | -- import Network.Wai.Handler.Warp.Types
 23 | import qualified Network.HTTP.Types.Header as HH
 24 | -- $setup
 25 | -- >>> :set -XOverloadedStrings
 26 | 
 27 | data InvalidRequest = NotEnoughLines [String]
 28 |                     | BadFirstLine String
 29 |                     | NonHttp
 30 |                     | IncompleteHeaders
 31 |                     | ConnectionClosedByPeer
 32 |                     | OverLargeHeader
 33 |                     deriving (Eq, Typeable, Show)
 34 | 
 35 | instance Exception InvalidRequest
 36 | 
 37 | ----------------------------------------------------------------
 38 | 
 39 | parseHeaderLines :: [ByteString]
 40 |                  -> IO (H.Method
 41 |                        ,ByteString  --  Path
 42 |                        ,ByteString  --  Path, parsed
 43 |                        ,ByteString  --  Query
 44 |                        ,H.HttpVersion
 45 |                        ,H.RequestHeaders
 46 |                        )
 47 | parseHeaderLines [] = throwIO $ NotEnoughLines []
 48 | parseHeaderLines (firstLine:otherLines) = do
 49 |     (method, path', query, httpversion) <- parseRequestLine firstLine
 50 |     let path = H.extractPath path'
 51 |         hdr = map parseHeader otherLines
 52 |     return (method, path', path, query, httpversion, hdr)
 53 | 
 54 | ----------------------------------------------------------------
 55 | 
 56 | -- |
 57 | --
 58 | -- >>> parseRequestLine "GET / HTTP/1.1"
 59 | -- ("GET","/","",HTTP/1.1)
 60 | -- >>> parseRequestLine "POST /cgi/search.cgi?key=foo HTTP/1.0"
 61 | -- ("POST","/cgi/search.cgi","?key=foo",HTTP/1.0)
 62 | -- >>> parseRequestLine "GET "
 63 | -- *** Exception: Warp: Invalid first line of request: "GET "
 64 | -- >>> parseRequestLine "GET /NotHTTP UNKNOWN/1.1"
 65 | -- *** Exception: Warp: Request line specified a non-HTTP request
 66 | parseRequestLine :: ByteString
 67 |                  -> IO (H.Method
 68 |                        ,ByteString -- Path
 69 |                        ,ByteString -- Query
 70 |                        ,H.HttpVersion)
 71 | parseRequestLine requestLine@(PS fptr off len) = withForeignPtr fptr $ \ptr -> do
 72 |     when (len < 14) $ throwIO baderr
 73 |     let methodptr = ptr `plusPtr` off
 74 |         limptr = methodptr `plusPtr` len
 75 |         lim0 = fromIntegral len
 76 | 
 77 |     pathptr0 <- memchr methodptr 32 lim0 -- ' '
 78 |     when (pathptr0 == nullPtr || (limptr `minusPtr` pathptr0) < 11) $
 79 |         throwIO baderr
 80 |     let pathptr = pathptr0 `plusPtr` 1
 81 |         lim1 = fromIntegral (limptr `minusPtr` pathptr0)
 82 | 
 83 |     httpptr0 <- memchr pathptr 32 lim1 -- ' '
 84 |     when (httpptr0 == nullPtr || (limptr `minusPtr` httpptr0) < 9) $
 85 |         throwIO baderr
 86 |     let httpptr = httpptr0 `plusPtr` 1
 87 |         lim2 = fromIntegral (httpptr0 `minusPtr` pathptr)
 88 | 
 89 |     checkHTTP httpptr
 90 |     !hv <- httpVersion httpptr
 91 |     queryptr <- memchr pathptr 63 lim2 -- '?'
 92 | 
 93 |     let !method = bs ptr methodptr pathptr0
 94 |         !path
 95 |           | queryptr == nullPtr = bs ptr pathptr httpptr0
 96 |           | otherwise           = bs ptr pathptr queryptr
 97 |         !query
 98 |           | queryptr == nullPtr = S.empty
 99 |           | otherwise           = bs ptr queryptr httpptr0
100 | 
101 |     return (method,path,query,hv)
102 |   where
103 |     baderr = BadFirstLine $ B.unpack requestLine
104 |     check :: Ptr Word8 -> Int -> Word8 -> IO ()
105 |     check p n w = do
106 |         w0 <- peek $ p `plusPtr` n
107 |         when (w0 /= w) $ throwIO NonHttp
108 |     checkHTTP httpptr = do
109 |         check httpptr 0 72 -- 'H'
110 |         check httpptr 1 84 -- 'T'
111 |         check httpptr 2 84 -- 'T'
112 |         check httpptr 3 80 -- 'P'
113 |         check httpptr 4 47 -- '/'
114 |         check httpptr 6 46 -- '.'
115 |     httpVersion httpptr = do
116 |         major <- peek $ httpptr `plusPtr` 5
117 |         minor <- peek $ httpptr `plusPtr` 7
118 |         return $ if major == (49 :: Word8) && minor == (49 :: Word8) then
119 |             H.http11
120 |           else
121 |             H.http10
122 |     bs ptr p0 p1 = PS fptr o l
123 |       where
124 |         o = p0 `minusPtr` ptr
125 |         l = p1 `minusPtr` p0
126 | 
127 | ----------------------------------------------------------------
128 | 
129 | -- |
130 | --
131 | -- >>> parseHeader "Content-Length:47"
132 | -- ("Content-Length","47")
133 | -- >>> parseHeader "Accept-Ranges: bytes"
134 | -- ("Accept-Ranges","bytes")
135 | -- >>> parseHeader "Host:  example.com:8080"
136 | -- ("Host","example.com:8080")
137 | -- >>> parseHeader "NoSemiColon"
138 | -- ("NoSemiColon","")
139 | 
140 | parseHeader :: ByteString -> H.Header
141 | parseHeader s =
142 |     let (k, rest) = S.break (== 58) s -- ':'
143 |         rest' = S.dropWhile (\c -> c == 32 || c == 9) $ S.drop 1 rest
144 |      in (CI.mk k, rest')
145 | 
146 | parseByteRanges :: S.ByteString -> Maybe HH.ByteRanges
147 | parseByteRanges bs1 = do
148 |     bs2 <- stripPrefix "bytes=" bs1
149 |     (r, bs3) <- range bs2
150 |     ranges (r:) bs3
151 |   where
152 |     range bs2 =
153 |         case stripPrefix "-" bs2 of
154 |             Just bs3 -> do
155 |                 (i, bs4) <- B.readInteger bs3
156 |                 Just (HH.ByteRangeSuffix i, bs4)
157 |             Nothing -> do
158 |                 (i, bs3) <- B.readInteger bs2
159 |                 bs4 <- stripPrefix "-" bs3
160 |                 case B.readInteger bs4 of
161 |                     Nothing -> Just (HH.ByteRangeFrom i, bs4)
162 |                     Just (j, bs5) -> Just (HH.ByteRangeFromTo i j, bs5)
163 |     ranges front bs3 =
164 |         case stripPrefix "," bs3 of
165 |             Nothing -> Just (front [])
166 |             Just bs4 -> do
167 |                 (r, bs5) <- range bs4
168 |                 ranges (front . (r:)) bs5
169 | 
170 |     stripPrefix x y
171 |         | x `S.isPrefixOf` y = Just (S.drop (S.length x) y)
172 |         | otherwise = Nothing
173 | 


--------------------------------------------------------------------------------
/attoparsec.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:   2.0
  2 |   -- 2.0 needed for internal libraries
  3 | name:            attoparsec
  4 | version:         0.14.4
  5 | license:         BSD3
  6 | license-file:    LICENSE
  7 | category:        Text, Parsing
  8 | author:          Bryan O'Sullivan <bos@serpentine.com>
  9 | maintainer:      Ben Gamari <ben@smart-cactus.org>
 10 | stability:       experimental
 11 | synopsis:        Fast combinator parsing for bytestrings and text
 12 | homepage:        https://github.com/haskell/attoparsec
 13 | bug-reports:     https://github.com/haskell/attoparsec/issues
 14 | build-type:      Simple
 15 | description:
 16 |     A fast parser combinator library, aimed particularly at dealing
 17 |     efficiently with network protocols and complicated text/binary
 18 |     file formats.
 19 | 
 20 | tested-with:
 21 |   GHC == 9.12.1
 22 |   GHC == 9.10.1
 23 |   GHC == 9.8.4
 24 |   GHC == 9.6.6
 25 |   GHC == 9.4.8
 26 |   GHC == 9.2.8
 27 |   GHC == 9.0.2
 28 |   GHC == 8.10.7
 29 |   GHC == 8.8.4
 30 |   GHC == 8.6.5
 31 |   GHC == 8.4.4
 32 |   GHC == 8.2.2
 33 |   GHC == 8.0.2
 34 | 
 35 | extra-source-files:
 36 |     benchmarks/*.txt
 37 |     benchmarks/json-data/*.json
 38 |     benchmarks/Makefile
 39 |     benchmarks/med.txt.bz2
 40 |     examples/*.c
 41 |     examples/*.hs
 42 |     examples/Makefile
 43 | 
 44 | extra-doc-files:
 45 |     README.md
 46 |     changelog.md
 47 | 
 48 | Flag developer
 49 |   Description: Whether to build the library in development mode
 50 |   Default: False
 51 |   Manual: True
 52 | 
 53 | -- We need to test and benchmark these modules,
 54 | -- but do not want to expose them to end users
 55 | library attoparsec-internal
 56 |   hs-source-dirs: internal
 57 |   build-depends: array,
 58 |                  base >= 4.3 && < 5,
 59 |                  bytestring <0.13,
 60 |                  text >= 1.1.1.3
 61 |   if !impl(ghc >= 8.0)
 62 |     build-depends: semigroups >=0.16.1 && <0.21
 63 |   exposed-modules: Data.Attoparsec.ByteString.Buffer
 64 |                    Data.Attoparsec.ByteString.FastSet
 65 |                    Data.Attoparsec.Internal.Compat
 66 |                    Data.Attoparsec.Internal.Fhthagn
 67 |                    Data.Attoparsec.Text.Buffer
 68 |                    Data.Attoparsec.Text.FastSet
 69 |   ghc-options: -O2 -Wall
 70 |   default-language: Haskell2010
 71 | 
 72 | library
 73 |   build-depends: array,
 74 |                  base >= 4.5 && < 5,
 75 |                  bytestring <0.13,
 76 |                  containers,
 77 |                  deepseq,
 78 |                  scientific >= 0.3.1 && < 0.4,
 79 |                  transformers >= 0.2 && (< 0.4 || >= 0.4.1.0) && < 0.7,
 80 |                  text >= 1.1.1.3,
 81 |                  ghc-prim < 0.14,
 82 |                  attoparsec-internal
 83 |   if impl(ghc < 7.4)
 84 |     build-depends:
 85 |       bytestring < 0.10.4.0
 86 | 
 87 |   if !impl(ghc >= 8.0)
 88 |     -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+
 89 |     build-depends: fail == 4.9.*,
 90 |                    semigroups >=0.16.1 && <0.21
 91 | 
 92 |   exposed-modules: Data.Attoparsec
 93 |                    Data.Attoparsec.ByteString
 94 |                    Data.Attoparsec.ByteString.Char8
 95 |                    Data.Attoparsec.ByteString.Lazy
 96 |                    Data.Attoparsec.Char8
 97 |                    Data.Attoparsec.Combinator
 98 |                    Data.Attoparsec.Internal
 99 |                    Data.Attoparsec.Internal.Types
100 |                    Data.Attoparsec.Lazy
101 |                    Data.Attoparsec.Number
102 |                    Data.Attoparsec.Text
103 |                    Data.Attoparsec.Text.Lazy
104 |                    Data.Attoparsec.Types
105 |                    Data.Attoparsec.Zepto
106 |   other-modules:   Data.Attoparsec.ByteString.Internal
107 |                    Data.Attoparsec.Text.Internal
108 |   ghc-options: -O2 -Wall
109 | 
110 |   default-language: Haskell2010
111 |   default-extensions:
112 |     TypeOperators
113 | 
114 |   if flag(developer)
115 |     ghc-prof-options: -auto-all
116 |     ghc-options: -Werror
117 | 
118 | test-suite attoparsec-tests
119 |   type:           exitcode-stdio-1.0
120 |   hs-source-dirs: tests
121 |   main-is:        QC.hs
122 |   other-modules:  QC.Buffer
123 |                   QC.ByteString
124 |                   QC.Combinator
125 |                   QC.Common
126 |                   QC.IPv6.Internal
127 |                   QC.IPv6.Types
128 |                   QC.Rechunked
129 |                   QC.Simple
130 |                   QC.Text
131 |                   QC.Text.FastSet
132 |                   QC.Text.Regressions
133 | 
134 |   ghc-options:
135 |     -Wall -threaded -rtsopts
136 | 
137 |   if flag(developer)
138 |     ghc-options: -Werror
139 | 
140 |   build-depends:
141 |     array,
142 |     attoparsec,
143 |     attoparsec-internal,
144 |     base,
145 |     bytestring,
146 |     deepseq >= 1.1,
147 |     QuickCheck >= 2.13.2 && < 2.16,
148 |     quickcheck-unicode,
149 |     scientific,
150 |     tasty >= 0.11,
151 |     tasty-quickcheck >= 0.8,
152 |     text,
153 |     transformers,
154 |     vector
155 | 
156 |   default-language: Haskell2010
157 | 
158 |   if !impl(ghc >= 8.0)
159 |     -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+
160 |     build-depends: fail == 4.9.*,
161 |                    semigroups >=0.16.1 && <0.19
162 | 
163 | benchmark attoparsec-benchmarks
164 |   type: exitcode-stdio-1.0
165 |   hs-source-dirs: benchmarks benchmarks/warp-3.0.1.1
166 |   ghc-options: -O2 -Wall -rtsopts
167 |   main-is: Benchmarks.hs
168 |   other-modules:
169 |     Aeson
170 |     Common
171 |     Genome
172 |     HeadersByteString
173 |     HeadersByteString.Atto
174 |     HeadersText
175 |     Links
176 |     Network.Wai.Handler.Warp.ReadInt
177 |     Network.Wai.Handler.Warp.RequestHeader
178 |     Numbers
179 |     Sets
180 |     TextFastSet
181 |     Warp
182 |   ghc-options: -O2 -Wall
183 | 
184 |   if flag(developer)
185 |     ghc-options: -Werror
186 | 
187 |   build-depends:
188 |     array,
189 |     attoparsec,
190 |     attoparsec-internal,
191 |     base == 4.*,
192 |     bytestring >= 0.10.4.0,
193 |     case-insensitive,
194 |     containers,
195 |     deepseq >= 1.1,
196 |     directory,
197 |     filepath,
198 |     ghc-prim,
199 |     http-types,
200 |     parsec >= 3.1.2,
201 |     scientific,
202 |     tasty-bench >= 0.3,
203 |     text >= 1.1.1.0,
204 |     transformers,
205 |     unordered-containers,
206 |     vector
207 | 
208 |   default-language: Haskell2010
209 | 
210 |   if !impl(ghc >= 8.0)
211 |     -- Data.Semigroup && Control.Monad.Fail are available in base-4.9+
212 |     build-depends: fail == 4.9.*,
213 |                    semigroups >=0.16.1 && <0.19
214 | 
215 | source-repository head
216 |   type:     git
217 |   location: https://github.com/haskell/attoparsec.git
218 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Internal.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
  2 | -- |
  3 | -- Module      :  Data.Attoparsec.Internal
  4 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  5 | -- License     :  BSD3
  6 | --
  7 | -- Maintainer  :  bos@serpentine.com
  8 | -- Stability   :  experimental
  9 | -- Portability :  unknown
 10 | --
 11 | -- Simple, efficient parser combinators, loosely based on the Parsec
 12 | -- library.
 13 | 
 14 | module Data.Attoparsec.Internal
 15 |     ( compareResults
 16 |     , prompt
 17 |     , demandInput
 18 |     , demandInput_
 19 |     , wantInput
 20 |     , endOfInput
 21 |     , atEnd
 22 |     , satisfyElem
 23 |     , concatReverse
 24 |     ) where
 25 | 
 26 | #if !MIN_VERSION_base(4,8,0)
 27 | import Control.Applicative ((<$>))
 28 | import Data.Monoid (Monoid, mconcat)
 29 | #endif
 30 | import Data.Attoparsec.Internal.Types
 31 | import Data.ByteString (ByteString)
 32 | import Data.Text (Text)
 33 | import Prelude hiding (succ)
 34 | 
 35 | -- | Compare two 'IResult' values for equality.
 36 | --
 37 | -- If both 'IResult's are 'Partial', the result will be 'Nothing', as
 38 | -- they are incomplete and hence their equality cannot be known.
 39 | -- (This is why there is no 'Eq' instance for 'IResult'.)
 40 | compareResults :: (Eq i, Eq r) => IResult i r -> IResult i r -> Maybe Bool
 41 | compareResults (Fail t0 ctxs0 msg0) (Fail t1 ctxs1 msg1) =
 42 |     Just (t0 == t1 && ctxs0 == ctxs1 && msg0 == msg1)
 43 | compareResults (Done t0 r0) (Done t1 r1) =
 44 |     Just (t0 == t1 && r0 == r1)
 45 | compareResults (Partial _) (Partial _) = Nothing
 46 | compareResults _ _ = Just False
 47 | 
 48 | -- | Ask for input.  If we receive any, pass the augmented input to a
 49 | -- success continuation, otherwise to a failure continuation.
 50 | prompt :: Chunk t
 51 |        => State t -> Pos -> More
 52 |        -> (State t -> Pos -> More -> IResult t r)
 53 |        -> (State t -> Pos -> More -> IResult t r)
 54 |        -> IResult t r
 55 | prompt t pos _more lose succ = Partial $ \s ->
 56 |   if nullChunk s
 57 |   then lose t pos Complete
 58 |   else succ (pappendChunk t s) pos Incomplete
 59 | {-# SPECIALIZE prompt :: State ByteString -> Pos -> More
 60 |                       -> (State ByteString -> Pos -> More
 61 |                           -> IResult ByteString r)
 62 |                       -> (State ByteString -> Pos -> More
 63 |                           -> IResult ByteString r)
 64 |                       -> IResult ByteString r #-}
 65 | {-# SPECIALIZE prompt :: State Text -> Pos -> More
 66 |                       -> (State Text -> Pos -> More -> IResult Text r)
 67 |                       -> (State Text -> Pos -> More -> IResult Text r)
 68 |                       -> IResult Text r #-}
 69 | 
 70 | -- | Immediately demand more input via a 'Partial' continuation
 71 | -- result.
 72 | demandInput :: Chunk t => Parser t ()
 73 | demandInput = Parser $ \t pos more lose succ ->
 74 |   case more of
 75 |     Complete -> lose t pos more [] "not enough input"
 76 |     _ -> let lose' _ pos' more' = lose t pos' more' [] "not enough input"
 77 |              succ' t' pos' more' = succ t' pos' more' ()
 78 |          in prompt t pos more lose' succ'
 79 | {-# SPECIALIZE demandInput :: Parser ByteString () #-}
 80 | {-# SPECIALIZE demandInput :: Parser Text () #-}
 81 | 
 82 | -- | Immediately demand more input via a 'Partial' continuation
 83 | -- result.  Return the new input.
 84 | demandInput_ :: Chunk t => Parser t t
 85 | demandInput_ = Parser $ \t pos more lose succ ->
 86 |   case more of
 87 |     Complete -> lose t pos more [] "not enough input"
 88 |     _ -> Partial $ \s ->
 89 |          if nullChunk s
 90 |          then lose t pos Complete [] "not enough input"
 91 |          else succ (pappendChunk t s) pos more s
 92 | {-# SPECIALIZE demandInput_ :: Parser ByteString ByteString #-}
 93 | {-# SPECIALIZE demandInput_ :: Parser Text Text #-}
 94 | 
 95 | -- | This parser always succeeds.  It returns 'True' if any input is
 96 | -- available either immediately or on demand, and 'False' if the end
 97 | -- of all input has been reached.
 98 | wantInput :: forall t . Chunk t => Parser t Bool
 99 | wantInput = Parser $ \t pos more _lose succ ->
100 |   case () of
101 |     _ | pos < atBufferEnd (undefined :: t) t -> succ t pos more True
102 |       | more == Complete -> succ t pos more False
103 |       | otherwise       -> let lose' t' pos' more' = succ t' pos' more' False
104 |                                succ' t' pos' more' = succ t' pos' more' True
105 |                            in prompt t pos more lose' succ'
106 | {-# INLINE wantInput #-}
107 | 
108 | -- | Match only if all input has been consumed.
109 | endOfInput :: forall t . Chunk t => Parser t ()
110 | endOfInput = Parser $ \t pos more lose succ ->
111 |   case () of
112 |     _| pos < atBufferEnd (undefined :: t) t -> lose t pos more [] "endOfInput"
113 |      | more == Complete -> succ t pos more ()
114 |      | otherwise ->
115 |        let lose' t' pos' more' _ctx _msg = succ t' pos' more' ()
116 |            succ' t' pos' more' _a = lose t' pos' more' [] "endOfInput"
117 |        in  runParser demandInput t pos more lose' succ'
118 | {-# SPECIALIZE endOfInput :: Parser ByteString () #-}
119 | {-# SPECIALIZE endOfInput :: Parser Text () #-}
120 | 
121 | -- | Return an indication of whether the end of input has been
122 | -- reached.
123 | atEnd :: Chunk t => Parser t Bool
124 | atEnd = not <$> wantInput
125 | {-# INLINE atEnd #-}
126 | 
127 | satisfySuspended :: forall t r . Chunk t
128 |                  => (ChunkElem t -> Bool)
129 |                  -> State t -> Pos -> More
130 |                  -> Failure t (State t) r
131 |                  -> Success t (State t) (ChunkElem t) r
132 |                  -> IResult t r
133 | satisfySuspended p t pos more lose succ =
134 |     runParser (demandInput >> go) t pos more lose succ
135 |   where go = Parser $ \t' pos' more' lose' succ' ->
136 |           case bufferElemAt (undefined :: t) pos' t' of
137 |             Just (e, l) | p e -> succ' t' (pos' + Pos l) more' e
138 |                         | otherwise -> lose' t' pos' more' [] "satisfyElem"
139 |             Nothing -> runParser (demandInput >> go) t' pos' more' lose' succ'
140 | {-# SPECIALIZE satisfySuspended :: (ChunkElem ByteString -> Bool)
141 |                                 -> State ByteString -> Pos -> More
142 |                                 -> Failure ByteString (State ByteString) r
143 |                                 -> Success ByteString (State ByteString)
144 |                                            (ChunkElem ByteString) r
145 |                                 -> IResult ByteString r #-}
146 | {-# SPECIALIZE satisfySuspended :: (ChunkElem Text -> Bool)
147 |                                 -> State Text -> Pos -> More
148 |                                 -> Failure Text (State Text) r
149 |                                 -> Success Text (State Text)
150 |                                            (ChunkElem Text) r
151 |                                 -> IResult Text r #-}
152 | 
153 | -- | The parser @satisfyElem p@ succeeds for any chunk element for which the
154 | -- predicate @p@ returns 'True'. Returns the element that is
155 | -- actually parsed.
156 | satisfyElem :: forall t . Chunk t
157 |             => (ChunkElem t -> Bool) -> Parser t (ChunkElem t)
158 | satisfyElem p = Parser $ \t pos more lose succ ->
159 |     case bufferElemAt (undefined :: t) pos t of
160 |       Just (e, l) | p e -> succ t (pos + Pos l) more e
161 |                   | otherwise -> lose t pos more [] "satisfyElem"
162 |       Nothing -> satisfySuspended p t pos more lose succ
163 | {-# INLINE satisfyElem #-}
164 | 
165 | -- | Concatenate a monoid after reversing its elements.  Used to
166 | -- glue together a series of textual chunks that have been accumulated
167 | -- \"backwards\".
168 | concatReverse :: Monoid m => [m] -> m
169 | concatReverse [x] = x
170 | concatReverse xs  = mconcat (reverse xs)
171 | {-# INLINE concatReverse #-}
172 | 


--------------------------------------------------------------------------------
/tests/QC/Text.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
  2 | {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
  3 | module QC.Text (tests) where
  4 | 
  5 | #if !MIN_VERSION_base(4,8,0)
  6 | import Control.Applicative ((<*>), (<$>))
  7 | #endif
  8 | import Data.Int (Int64)
  9 | import Data.Word (Word8)
 10 | import Prelude hiding (take, takeWhile)
 11 | import QC.Common (liftOp, parseT)
 12 | import qualified QC.Text.FastSet as FastSet
 13 | import qualified QC.Text.Regressions as Regressions
 14 | import Test.Tasty (TestTree, testGroup)
 15 | import Test.Tasty.QuickCheck (testProperty)
 16 | import Test.QuickCheck
 17 | import qualified Data.Attoparsec.Text as P
 18 | import qualified Data.Attoparsec.Text.Lazy as PL
 19 | import qualified Data.Attoparsec.Text.FastSet as S
 20 | import qualified Data.ByteString as BS
 21 | import qualified Data.Char as Char
 22 | import qualified Data.Text as T
 23 | import qualified Data.Text.Encoding as TE
 24 | import qualified Data.Text.Lazy as L
 25 | 
 26 | -- Basic byte-level combinators.
 27 | 
 28 | satisfy :: Char -> L.Text -> Property
 29 | satisfy w s = parseT (P.satisfy (<=w)) (L.cons w s) === Just w
 30 | 
 31 | satisfyWith :: Char -> L.Text -> Property
 32 | satisfyWith c s = parseT (P.satisfyWith id (<=c)) (L.cons c s) === Just c
 33 | 
 34 | char :: Char -> L.Text -> Property
 35 | char w s = parseT (P.char w) (L.cons w s) === Just w
 36 | 
 37 | skip :: Char -> L.Text -> Property
 38 | skip w s =
 39 |   case (parseT (P.skip (<w)) s, L.uncons s) of
 40 |     (Nothing, mcs) -> maybe (property True) (expectFailure . it) mcs
 41 |     (Just _,  mcs) -> maybe (property False) it mcs
 42 |   where it cs = liftOp "<" (<) (fst cs) w
 43 | 
 44 | anyChar :: L.Text -> Property
 45 | anyChar s
 46 |     | L.null s  = p === Nothing
 47 |     | otherwise = p === Just (L.head s)
 48 |   where p = parseT P.anyChar s
 49 | 
 50 | notChar :: Char -> NonEmptyList Char -> Property
 51 | notChar w (NonEmpty s) = parseT (P.notChar w) bs === if v == w
 52 |                                                       then Nothing
 53 |                                                       else Just v
 54 |     where v = L.head bs
 55 |           bs = L.pack s
 56 | 
 57 | peekChar :: L.Text -> Property
 58 | peekChar s
 59 |     | L.null s  = p === Just (Nothing, s)
 60 |     | otherwise = p === Just (Just (L.head s), s)
 61 |   where p = parseT ((,) <$> P.peekChar <*> P.takeLazyText) s
 62 | 
 63 | peekChar' :: L.Text -> Property
 64 | peekChar' s = parseT P.peekChar' s === (fst <$> L.uncons s)
 65 | 
 66 | string :: L.Text -> L.Text -> Property
 67 | string s t = parseT (P.string s') (s `L.append` t) === Just s'
 68 |   where s' = toStrict s
 69 | 
 70 | strings :: L.Text -> L.Text -> L.Text -> Property
 71 | strings s t u =
 72 |     parseT (P.string (toStrict s) >> P.string t') (L.concat [s,t,u])
 73 |     === Just t'
 74 |   where t' = toStrict t
 75 | 
 76 | -- | Note: "simple, and efficient" works for well formed input...
 77 | -- i.e. e.g. Latin1 texts
 78 | stringCI :: [Word8] -> Property
 79 | stringCI ws = P.parseOnly (P.stringCI fs) s === Right s
 80 |   where fs = T.toCaseFold s
 81 |         s  = TE.decodeLatin1 (BS.pack ws)
 82 | 
 83 | asciiCI :: T.Text -> Gen Bool
 84 | asciiCI x =
 85 |   (\s i -> P.parseOnly (P.asciiCI s) i == Right i)
 86 |     <$> maybeModifyCase x
 87 |     <*> maybeModifyCase x
 88 |   where
 89 |     maybeModifyCase s = elements [s, toLower s, toUpper s]
 90 |     toLower = T.map (\c -> if c < Char.chr 127 then Char.toLower c else c)
 91 |     toUpper = T.map (\c -> if c < Char.chr 127 then Char.toUpper c else c)
 92 | 
 93 | toStrict :: L.Text -> T.Text
 94 | toStrict = T.concat . L.toChunks
 95 | 
 96 | skipWhile :: Char -> L.Text -> Property
 97 | skipWhile w s =
 98 |     let t = L.dropWhile (<=w) s
 99 |     in case PL.parse (P.skipWhile (<=w)) s of
100 |          PL.Done t' () -> t === t'
101 |          _             -> property False
102 | 
103 | take :: Int -> L.Text -> Property
104 | take n s = maybe (liftOp "<" (<) (L.length s) (fromIntegral n))
105 |            (=== T.take n (toStrict s)) $
106 |            parseT (P.take n) s
107 | 
108 | takeText :: L.Text -> Property
109 | takeText s = maybe (property False) (=== toStrict s) . parseT P.takeText $ s
110 | 
111 | takeLazyText :: L.Text -> Property
112 | takeLazyText s = maybe (property False) (=== s) . parseT P.takeLazyText $ s
113 | 
114 | takeCount :: Positive Int -> L.Text -> Property
115 | takeCount (Positive k) s =
116 |     case parseT (P.take k) s of
117 |       Nothing -> liftOp ">" (>) (fromIntegral k) (L.length s)
118 |       Just _s -> liftOp "<=" (<=) (fromIntegral k) (L.length s)
119 | 
120 | takeWhile :: Char -> L.Text -> Property
121 | takeWhile w s =
122 |     let (h,t) = L.span (==w) s
123 |     in case PL.parse (P.takeWhile (==w)) s of
124 |          PL.Done t' h' -> t === t' .&&. toStrict h === h'
125 |          _             -> property False
126 | 
127 | takeWhile1 :: Char -> L.Text -> Property
128 | takeWhile1 w s =
129 |   let s'    = L.cons w s
130 |       (h,t) = L.span (<=w) s'
131 |     in case PL.parse (P.takeWhile1 (<=w)) s' of
132 |          PL.Done t' h' -> t === t' .&&. toStrict h === h'
133 |          _             -> property False
134 | 
135 | takeTill :: Char -> L.Text -> Property
136 | takeTill w s =
137 |     let (h,t) = L.break (==w) s
138 |     in case PL.parse (P.takeTill (==w)) s of
139 |          PL.Done t' h' -> t === t' .&&. toStrict h === h'
140 |          _             -> property False
141 | 
142 | takeWhile1_empty :: Property
143 | takeWhile1_empty = parseT (P.takeWhile1 undefined) L.empty === Nothing
144 | 
145 | endOfInput :: L.Text -> Property
146 | endOfInput s = parseT P.endOfInput s === if L.null s
147 |                                          then Just ()
148 |                                          else Nothing
149 | 
150 | endOfLine :: L.Text -> Property
151 | endOfLine s =
152 |   case (parseT P.endOfLine s, L.uncons s) of
153 |     (Nothing, mcs) -> maybe (property True) (expectFailure . eol) mcs
154 |     (Just _,  mcs) -> maybe (property False) eol mcs
155 |   where eol (c,s') = c === '\n' .||.
156 |                      (c, fst <$> L.uncons s') === ('\r', Just '\n')
157 | 
158 | scan :: L.Text -> Positive Int64 -> Property
159 | -- for some reason, if counterexample is removed, this test fails?
160 | scan s (Positive k) = counterexample (show s)
161 |                     $ parseT p s === Just (toStrict $ L.take k s)
162 |   where p = P.scan k $ \ n _ ->
163 |             if n > 0 then let !n' = n - 1 in Just n' else Nothing
164 | 
165 | members :: String -> Property
166 | members s = property $ all (`S.member` set) s
167 |     where set = S.fromList s
168 | 
169 | nonmembers :: String -> String -> Property
170 | nonmembers s s' = property . not . any (`S.member` set) $ filter (not . (`elem` s)) s'
171 |     where set = S.fromList s
172 | 
173 | tests :: [TestTree]
174 | tests = [
175 |       testProperty "anyChar" anyChar
176 |     , testProperty "asciiCI" asciiCI
177 |     , testProperty "char" char
178 |     , testProperty "endOfInput" endOfInput
179 |     , testProperty "endOfLine" endOfLine
180 |     , testProperty "notChar" notChar
181 |     , testProperty "peekChar" peekChar
182 |     , testProperty "peekChar'" peekChar'
183 |     , testProperty "satisfy" satisfy
184 |     , testProperty "satisfyWith" satisfyWith
185 |     , testProperty "scan" scan
186 |     , testProperty "skip" skip
187 |     , testProperty "skipWhile" skipWhile
188 |     , testProperty "string" string
189 |     , testProperty "strings" strings
190 |     , testProperty "stringCI" stringCI
191 |     , testProperty "take" take
192 |     , testProperty "takeText" takeText
193 |     , testProperty "takeCount" takeCount
194 |     , testProperty "takeLazyText" takeLazyText
195 |     , testProperty "takeTill" takeTill
196 |     , testProperty "takeWhile" takeWhile
197 |     , testProperty "takeWhile1" takeWhile1
198 |     , testProperty "takeWhile1_empty" takeWhile1_empty
199 |     , testProperty "members" members
200 |     , testProperty "nonmembers" nonmembers
201 |     , testGroup "FastSet" FastSet.tests
202 |     , testGroup "Regressions" Regressions.tests
203 |   ]
204 | 


--------------------------------------------------------------------------------
/internal/Data/Attoparsec/Text/Buffer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
  2 |     UnboxedTuples #-}
  3 | 
  4 | -- |
  5 | -- Module      :  Data.Attoparsec.Text.Buffer
  6 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  bos@serpentine.com
 10 | -- Stability   :  experimental
 11 | -- Portability :  GHC
 12 | --
 13 | -- An immutable buffer that supports cheap appends.
 14 | 
 15 | -- A Buffer is divided into an immutable read-only zone, followed by a
 16 | -- mutable area that we've preallocated, but not yet written to.
 17 | --
 18 | -- We overallocate at the end of a Buffer so that we can cheaply
 19 | -- append.  Since a user of an existing Buffer cannot see past the end
 20 | -- of its immutable zone into the data that will change during an
 21 | -- append, this is safe.
 22 | --
 23 | -- Once we run out of space at the end of a Buffer, we do the usual
 24 | -- doubling of the buffer size.
 25 | 
 26 | module Data.Attoparsec.Text.Buffer
 27 |     (
 28 |       Buffer
 29 |     , buffer
 30 |     , unbuffer
 31 |     , unbufferAt
 32 |     , length
 33 |     , pappend
 34 |     , iter
 35 |     , iter_
 36 |     , substring
 37 |     , lengthCodeUnits
 38 |     , dropCodeUnits
 39 |     ) where
 40 | 
 41 | import Control.Exception (assert)
 42 | import Data.List (foldl1')
 43 | import Data.Monoid as Mon (Monoid(..))
 44 | import Data.Semigroup (Semigroup(..))
 45 | import Data.Text ()
 46 | import Data.Text.Internal (Text(..))
 47 | #if MIN_VERSION_text(2,0,0)
 48 | import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
 49 | import Data.Text.Unsafe (iterArray, lengthWord8)
 50 | #else
 51 | import Data.Bits (shiftR)
 52 | import Data.Text.Internal.Encoding.Utf16 (chr2)
 53 | import Data.Text.Internal.Unsafe.Char (unsafeChr)
 54 | import Data.Text.Unsafe (lengthWord16)
 55 | #endif
 56 | import Data.Text.Unsafe (Iter(..))
 57 | import Foreign.Storable (sizeOf)
 58 | import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
 59 | import GHC.ST (ST(..), runST)
 60 | import Prelude hiding (length)
 61 | import qualified Data.Text.Array as A
 62 | 
 63 | -- If _cap is zero, this buffer is empty.
 64 | data Buffer = Buf {
 65 |       _arr :: {-# UNPACK #-} !A.Array
 66 |     , _off :: {-# UNPACK #-} !Int
 67 |     , _len :: {-# UNPACK #-} !Int
 68 |     , _cap :: {-# UNPACK #-} !Int
 69 |     , _gen :: {-# UNPACK #-} !Int
 70 |     }
 71 | 
 72 | instance Show Buffer where
 73 |     showsPrec p = showsPrec p . unbuffer
 74 | 
 75 | -- | The initial 'Buffer' has no mutable zone, so we can avoid all
 76 | -- copies in the (hopefully) common case of no further input being fed
 77 | -- to us.
 78 | buffer :: Text -> Buffer
 79 | buffer (Text arr off len) = Buf arr off len len 0
 80 | 
 81 | unbuffer :: Buffer -> Text
 82 | unbuffer (Buf arr off len _ _) = Text arr off len
 83 | 
 84 | unbufferAt :: Int -> Buffer -> Text
 85 | unbufferAt s (Buf arr off len _ _) =
 86 |   assert (s >= 0 && s <= len) $
 87 |   Text arr (off+s) (len-s)
 88 | 
 89 | instance Semigroup Buffer where
 90 |     (Buf _ _ _ 0 _) <> b                     = b
 91 |     a               <> (Buf _ _ _ 0 _)       = a
 92 |     buf             <> (Buf arr off len _ _) = append buf arr off len
 93 |     {-# INLINE (<>) #-}
 94 | 
 95 | instance Monoid Buffer where
 96 |     mempty = Buf A.empty 0 0 0 0
 97 |     {-# INLINE mempty #-}
 98 | 
 99 |     mappend = (<>)
100 | 
101 |     mconcat [] = Mon.mempty
102 |     mconcat xs = foldl1' (<>) xs
103 | 
104 | pappend :: Buffer -> Text -> Buffer
105 | pappend (Buf _ _ _ 0 _) t      = buffer t
106 | pappend buf (Text arr off len) = append buf arr off len
107 | 
108 | append :: Buffer -> A.Array -> Int -> Int -> Buffer
109 | append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
110 | #if MIN_VERSION_text(2,0,0)
111 |   let woff    = sizeOf (0::Int)
112 | #else
113 |   let woff    = sizeOf (0::Int) `shiftR` 1
114 | #endif
115 |       newlen  = len0 + len1
116 |       !gen    = if gen0 == 0 then 0 else readGen arr0
117 |   if gen == gen0 && newlen <= cap0
118 |     then do
119 |       let newgen = gen + 1
120 |       marr <- unsafeThaw arr0
121 |       writeGen marr newgen
122 | #if MIN_VERSION_text(2,0,0)
123 |       A.copyI len1 marr (off0+len0) arr1 off1
124 | #else
125 |       A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
126 | #endif
127 |       arr2 <- A.unsafeFreeze marr
128 |       return (Buf arr2 off0 newlen cap0 newgen)
129 |     else do
130 |       let newcap = newlen * 2
131 |           newgen = 1
132 |       marr <- A.new (newcap + woff)
133 |       writeGen marr newgen
134 | #if MIN_VERSION_text(2,0,0)
135 |       A.copyI len0 marr woff arr0 off0
136 |       A.copyI len1 marr (woff+len0) arr1 off1
137 | #else
138 |       A.copyI marr woff arr0 off0 (woff+len0)
139 |       A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
140 | #endif
141 |       arr2 <- A.unsafeFreeze marr
142 |       return (Buf arr2 woff newlen newcap newgen)
143 | 
144 | length :: Buffer -> Int
145 | length (Buf _ _ len _ _) = len
146 | {-# INLINE length #-}
147 | 
148 | substring :: Int -> Int -> Buffer -> Text
149 | substring s l (Buf arr off len _ _) =
150 |   assert (s >= 0 && s <= len) .
151 |   assert (l >= 0 && l <= len-s) $
152 |   Text arr (off+s) l
153 | {-# INLINE substring #-}
154 | 
155 | #if MIN_VERSION_text(2,0,0)
156 | 
157 | lengthCodeUnits :: Text -> Int
158 | lengthCodeUnits = lengthWord8
159 | 
160 | dropCodeUnits :: Int -> Buffer -> Text
161 | dropCodeUnits s (Buf arr off len _ _) =
162 |   assert (s >= 0 && s <= len) $
163 |   Text arr (off+s) (len-s)
164 | {-# INLINE dropCodeUnits #-}
165 | 
166 | -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
167 | -- array, returning the current character and the delta to add to give
168 | -- the next offset to iterate at.
169 | iter :: Buffer -> Int -> Iter
170 | iter (Buf arr off _ _ _) i = iterArray arr (off + i)
171 | {-# INLINE iter #-}
172 | 
173 | -- | /O(1)/ Iterate one step through a UTF-8 array, returning the
174 | -- delta to add to give the next offset to iterate at.
175 | iter_ :: Buffer -> Int -> Int
176 | iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A.unsafeIndex arr (off+i)
177 | {-# INLINE iter_ #-}
178 | 
179 | unsafeThaw :: A.Array -> ST s (A.MArray s)
180 | unsafeThaw (A.ByteArray a) = ST $ \s# ->
181 |                           (# s#, A.MutableByteArray (unsafeCoerce# a) #)
182 | 
183 | readGen :: A.Array -> Int
184 | readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r#
185 | 
186 | writeGen :: A.MArray s -> Int -> ST s ()
187 | writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# ->
188 |   case writeIntArray# a 0# gen# s0# of
189 |     s1# -> (# s1#, () #)
190 | 
191 | #else
192 | 
193 | lengthCodeUnits :: Text -> Int
194 | lengthCodeUnits = lengthWord16
195 | 
196 | dropCodeUnits :: Int -> Buffer -> Text
197 | dropCodeUnits s (Buf arr off len _ _) =
198 |   assert (s >= 0 && s <= len) $
199 |   Text arr (off+s) (len-s)
200 | {-# INLINE dropCodeUnits #-}
201 | 
202 | -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
203 | -- array, returning the current character and the delta to add to give
204 | -- the next offset to iterate at.
205 | iter :: Buffer -> Int -> Iter
206 | iter (Buf arr off _ _ _) i
207 |     | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
208 |     | otherwise                = Iter (chr2 m n) 2
209 |   where m = A.unsafeIndex arr j
210 |         n = A.unsafeIndex arr k
211 |         j = off + i
212 |         k = j + 1
213 | {-# INLINE iter #-}
214 | 
215 | -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
216 | -- delta to add to give the next offset to iterate at.
217 | iter_ :: Buffer -> Int -> Int
218 | iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
219 |                                 | otherwise                = 2
220 |   where m = A.unsafeIndex arr (off+i)
221 | {-# INLINE iter_ #-}
222 | 
223 | unsafeThaw :: A.Array -> ST s (A.MArray s)
224 | unsafeThaw A.Array{..} = ST $ \s# ->
225 |                           (# s#, A.MArray (unsafeCoerce# aBA) #)
226 | 
227 | readGen :: A.Array -> Int
228 | readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
229 | 
230 | writeGen :: A.MArray s -> Int -> ST s ()
231 | writeGen a (I# gen#) = ST $ \s0# ->
232 |   case writeIntArray# (A.maBA a) 0# gen# s0# of
233 |     s1# -> (# s1#, () #)
234 | 
235 | #endif
236 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/ByteString.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | #if __GLASGOW_HASKELL__ >= 702
  3 | {-# LANGUAGE Trustworthy #-}
  4 | #endif
  5 | -- |
  6 | -- Module      :  Data.Attoparsec.ByteString
  7 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  bos@serpentine.com
 11 | -- Stability   :  experimental
 12 | -- Portability :  unknown
 13 | --
 14 | -- Simple, efficient combinator parsing for 'B.ByteString' strings,
 15 | -- loosely based on the Parsec library.
 16 | 
 17 | module Data.Attoparsec.ByteString
 18 |     (
 19 |     -- * Differences from Parsec
 20 |     -- $parsec
 21 | 
 22 |     -- * Incremental input
 23 |     -- $incremental
 24 | 
 25 |     -- * Performance considerations
 26 |     -- $performance
 27 | 
 28 |     -- * Parser types
 29 |       I.Parser
 30 |     , Result
 31 |     , T.IResult(..)
 32 |     , I.compareResults
 33 | 
 34 |     -- * Running parsers
 35 |     , parse
 36 |     , feed
 37 |     , I.parseOnly
 38 |     , parseWith
 39 |     , parseTest
 40 | 
 41 |     -- ** Result conversion
 42 |     , maybeResult
 43 |     , eitherResult
 44 | 
 45 |     -- * Parsing individual bytes
 46 |     , I.word8
 47 |     , I.anyWord8
 48 |     , I.notWord8
 49 |     , I.satisfy
 50 |     , I.satisfyWith
 51 |     , I.skip
 52 | 
 53 |     -- ** Lookahead
 54 |     , I.peekWord8
 55 |     , I.peekWord8'
 56 | 
 57 |     -- ** Byte classes
 58 |     , I.inClass
 59 |     , I.notInClass
 60 | 
 61 |     -- * Efficient string handling
 62 |     , I.string
 63 |     , I.skipWhile
 64 |     , I.take
 65 |     , I.scan
 66 |     , I.runScanner
 67 |     , I.takeWhile
 68 |     , I.takeWhile1
 69 |     , I.takeWhileIncluding
 70 |     , I.takeTill
 71 |     , I.getChunk
 72 | 
 73 |     -- ** Consume all remaining input
 74 |     , I.takeByteString
 75 |     , I.takeLazyByteString
 76 | 
 77 |     -- * Combinators
 78 |     , try
 79 |     , (<?>)
 80 |     , choice
 81 |     , count
 82 |     , option
 83 |     , many'
 84 |     , many1
 85 |     , many1'
 86 |     , manyTill
 87 |     , manyTill'
 88 |     , sepBy
 89 |     , sepBy'
 90 |     , sepBy1
 91 |     , sepBy1'
 92 |     , skipMany
 93 |     , skipMany1
 94 |     , eitherP
 95 |     , I.match
 96 |     -- * State observation and manipulation functions
 97 |     , I.endOfInput
 98 |     , I.atEnd
 99 |     ) where
100 | 
101 | import Data.Attoparsec.Combinator
102 | import Data.List (intercalate)
103 | import qualified Data.Attoparsec.ByteString.Internal as I
104 | import qualified Data.Attoparsec.Internal as I
105 | import qualified Data.ByteString as B
106 | import Data.Attoparsec.ByteString.Internal (Result, parse)
107 | import qualified Data.Attoparsec.Internal.Types as T
108 | 
109 | -- $parsec
110 | --
111 | -- Compared to Parsec 3, attoparsec makes several tradeoffs.  It is
112 | -- not intended for, or ideal for, all possible uses.
113 | --
114 | -- * While attoparsec can consume input incrementally, Parsec cannot.
115 | --   Incremental input is a huge deal for efficient and secure network
116 | --   and system programming, since it gives much more control to users
117 | --   of the library over matters such as resource usage and the I/O
118 | --   model to use.
119 | --
120 | -- * Much of the performance advantage of attoparsec is gained via
121 | --   high-performance parsers such as 'I.takeWhile' and 'I.string'.
122 | --   If you use complicated combinators that return lists of bytes or
123 | --   characters, there is less performance difference between the two
124 | --   libraries.
125 | --
126 | -- * Unlike Parsec 3, attoparsec does not support being used as a
127 | --   monad transformer.
128 | --
129 | -- * attoparsec is specialised to deal only with strict 'B.ByteString'
130 | --   input.  Efficiency concerns rule out both lists and lazy
131 | --   bytestrings.  The usual use for lazy bytestrings would be to
132 | --   allow consumption of very large input without a large footprint.
133 | --   For this need, attoparsec's incremental input provides an
134 | --   excellent substitute, with much more control over when input
135 | --   takes place.  If you must use lazy bytestrings, see the
136 | --   "Data.Attoparsec.ByteString.Lazy" module, which feeds lazy chunks
137 | --   to a regular parser.
138 | --
139 | -- * Parsec parsers can produce more helpful error messages than
140 | --   attoparsec parsers.  This is a matter of focus: attoparsec avoids
141 | --   the extra book-keeping in favour of higher performance.
142 | 
143 | -- $incremental
144 | --
145 | -- attoparsec supports incremental input, meaning that you can feed it
146 | -- a bytestring that represents only part of the expected total amount
147 | -- of data to parse. If your parser reaches the end of a fragment of
148 | -- input and could consume more input, it will suspend parsing and
149 | -- return a 'T.Partial' continuation.
150 | --
151 | -- Supplying the 'T.Partial' continuation with a bytestring will
152 | -- resume parsing at the point where it was suspended, with the
153 | -- bytestring you supplied used as new input at the end of the
154 | -- existing input. You must be prepared for the result of the resumed
155 | -- parse to be another 'T.Partial' continuation.
156 | --
157 | -- To indicate that you have no more input, supply the 'T.Partial'
158 | -- continuation with an empty bytestring.
159 | --
160 | -- Remember that some parsing combinators will not return a result
161 | -- until they reach the end of input.  They may thus cause 'T.Partial'
162 | -- results to be returned.
163 | --
164 | -- If you do not need support for incremental input, consider using
165 | -- the 'I.parseOnly' function to run your parser.  It will never
166 | -- prompt for more input.
167 | --
168 | -- /Note/: incremental input does /not/ imply that attoparsec will
169 | -- release portions of its internal state for garbage collection as it
170 | -- proceeds.  Its internal representation is equivalent to a single
171 | -- 'ByteString': if you feed incremental input to a parser, it will
172 | -- require memory proportional to the amount of input you supply.
173 | -- (This is necessary to support arbitrary backtracking.)
174 | 
175 | -- $performance
176 | --
177 | -- If you write an attoparsec-based parser carefully, it can be
178 | -- realistic to expect it to perform similarly to a hand-rolled C
179 | -- parser (measuring megabytes parsed per second).
180 | --
181 | -- To actually achieve high performance, there are a few guidelines
182 | -- that it is useful to follow.
183 | --
184 | -- Use the 'B.ByteString'-oriented parsers whenever possible,
185 | -- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is
186 | -- about a factor of 100 difference in performance between the two
187 | -- kinds of parser.
188 | --
189 | -- For very simple byte-testing predicates, write them by hand instead
190 | -- of using 'I.inClass' or 'I.notInClass'.  For instance, both of
191 | -- these predicates test for an end-of-line byte, but the first is
192 | -- much faster than the second:
193 | --
194 | -- >endOfLine_fast w = w == 13 || w == 10
195 | -- >endOfLine_slow   = inClass "\r\n"
196 | --
197 | -- Make active use of benchmarking and profiling tools to measure,
198 | -- find the problems with, and improve the performance of your parser.
199 | 
200 | -- | Run a parser and print its result to standard output.
201 | parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO ()
202 | parseTest p s = print (parse p s)
203 | 
204 | -- | Run a parser with an initial input string, and a monadic action
205 | -- that can supply more input if needed.
206 | parseWith :: Monad m =>
207 |              (m B.ByteString)
208 |           -- ^ An action that will be executed to provide the parser
209 |           -- with more input, if necessary.  The action must return an
210 |           -- 'B.empty' string when there is no more input available.
211 |           -> I.Parser a
212 |           -> B.ByteString
213 |           -- ^ Initial input for the parser.
214 |           -> m (Result a)
215 | parseWith refill p s = step $ parse p s
216 |   where step (T.Partial k) = (step . k) =<< refill
217 |         step r             = return r
218 | {-# INLINE parseWith #-}
219 | 
220 | -- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result
221 | -- is treated as failure.
222 | maybeResult :: Result r -> Maybe r
223 | maybeResult (T.Done _ r) = Just r
224 | maybeResult _            = Nothing
225 | 
226 | -- | Convert a 'Result' value to an 'Either' value. A 'T.Partial'
227 | -- result is treated as failure.
228 | eitherResult :: Result r -> Either String r
229 | eitherResult (T.Done _ r)        = Right r
230 | eitherResult (T.Fail _ [] msg)   = Left msg
231 | eitherResult (T.Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg)
232 | eitherResult _                   = Left "Result: incomplete input"
233 | 


--------------------------------------------------------------------------------
/tests/QC/ByteString.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
  2 | module QC.ByteString (tests) where
  3 | 
  4 | #if !MIN_VERSION_base(4,8,0)
  5 | import Control.Applicative ((<*>), (<$>))
  6 | #endif
  7 | import Data.Char (chr, ord, toUpper)
  8 | import Data.Int (Int64)
  9 | import Data.Word (Word8)
 10 | import Prelude hiding (take, takeWhile)
 11 | import QC.Common (ASCII(..), liftOp, parseBS, toStrictBS)
 12 | import Test.Tasty (TestTree)
 13 | import Test.Tasty.QuickCheck (testProperty)
 14 | import Test.QuickCheck
 15 | import qualified Data.Attoparsec.ByteString as P
 16 | import qualified Data.Attoparsec.ByteString.Char8 as P8
 17 | import qualified Data.Attoparsec.ByteString.FastSet as S
 18 | import qualified Data.Attoparsec.ByteString.Lazy as PL
 19 | import qualified Data.ByteString as B
 20 | import qualified Data.ByteString.Char8 as B8
 21 | import qualified Data.ByteString.Lazy as L
 22 | import qualified Data.ByteString.Lazy.Char8 as L8
 23 | 
 24 | -- Basic byte-level combinators.
 25 | 
 26 | satisfy :: Word8 -> L.ByteString -> Property
 27 | satisfy w s = parseBS (P.satisfy (<=w)) (L.cons w s) === Just w
 28 | 
 29 | satisfyWith :: Word8 -> L.ByteString -> Property
 30 | satisfyWith w s = parseBS (P.satisfyWith (chr . fromIntegral) (<=c))
 31 |                          (L.cons (fromIntegral (ord c)) s) === Just c
 32 |   where
 33 |     c = chr (fromIntegral w)
 34 | 
 35 | word8 :: Word8 -> L.ByteString -> Property
 36 | word8 w s = parseBS (P.word8 w) (L.cons w s) === Just w
 37 | 
 38 | skip :: Word8 -> L.ByteString -> Property
 39 | skip w s =
 40 |   case (parseBS (P.skip (<w)) s, L.uncons s) of
 41 |     (Nothing, mcs) -> maybe (property True) (expectFailure . it) mcs
 42 |     (Just _,  mcs) -> maybe (property False) it mcs
 43 |   where it cs = liftOp "<" (<) (fst cs) w
 44 | 
 45 | anyWord8 :: L.ByteString -> Property
 46 | anyWord8 s
 47 |     | L.null s  = p === Nothing
 48 |     | otherwise = p === Just (L.head s)
 49 |   where p = parseBS P.anyWord8 s
 50 | 
 51 | notWord8 :: Word8 -> NonEmptyList Word8 -> Property
 52 | notWord8 w (NonEmpty s) = parseBS (P.notWord8 w) bs === if v == w
 53 |                                                         then Nothing
 54 |                                                         else Just v
 55 |     where v = L.head bs
 56 |           bs = L.pack s
 57 | 
 58 | peekWord8 :: L.ByteString -> Property
 59 | peekWord8 s
 60 |     | L.null s  = p === Just (Nothing, s)
 61 |     | otherwise = p === Just (Just (L.head s), s)
 62 |   where p = parseBS ((,) <$> P.peekWord8 <*> P.takeLazyByteString) s
 63 | 
 64 | peekWord8' :: L.ByteString -> Property
 65 | peekWord8' s = parseBS P.peekWord8' s === (fst <$> L.uncons s)
 66 | 
 67 | string :: L.ByteString -> L.ByteString -> Property
 68 | string s t = parseBS (P.string s') (s `L.append` t) === Just s'
 69 |   where s' = toStrictBS s
 70 | 
 71 | stringCI :: ASCII L.ByteString -> ASCII L.ByteString -> Property
 72 | stringCI (ASCII s) (ASCII t) =
 73 |     parseBS (P8.stringCI up) (s `L.append` t) === Just s'
 74 |   where s' = toStrictBS s
 75 |         up = B8.map toUpper s'
 76 | 
 77 | strings :: L.ByteString -> L.ByteString -> L.ByteString -> Property
 78 | strings s t u =
 79 |     parseBS (P.string (toStrictBS s) >> P.string t') (L.concat [s,t,u])
 80 |     === Just t'
 81 |   where t' = toStrictBS t
 82 | 
 83 | skipWhile :: Word8 -> L.ByteString -> Property
 84 | skipWhile w s =
 85 |     let t = L.dropWhile (<=w) s
 86 |     in case PL.parse (P.skipWhile (<=w)) s of
 87 |          PL.Done t' () -> t === t'
 88 |          _             -> property False
 89 | 
 90 | takeCount :: Positive Int -> L.ByteString -> Property
 91 | takeCount (Positive k) s =
 92 |     case parseBS (P.take k) s of
 93 |       Nothing -> liftOp ">" (>) (fromIntegral k) (L.length s)
 94 |       Just _s -> liftOp "<=" (<=) (fromIntegral k) (L.length s)
 95 | 
 96 | takeWhile :: Word8 -> L.ByteString -> Property
 97 | takeWhile w s =
 98 |     let (h,t) = L.span (==w) s
 99 |     in case PL.parse (P.takeWhile (==w)) s of
100 |          PL.Done t' h' -> t === t' .&&. toStrictBS h === h'
101 |          _             -> property False
102 | 
103 | take :: Int -> L.ByteString -> Property
104 | take n s = maybe (property $ L.length s < fromIntegral n)
105 |            (=== B.take n (toStrictBS s)) $
106 |            parseBS (P.take n) s
107 | 
108 | takeByteString :: L.ByteString -> Property
109 | takeByteString s = maybe (property False) (=== toStrictBS s) .
110 |                    parseBS P.takeByteString $ s
111 | 
112 | takeLazyByteString :: L.ByteString -> Property
113 | takeLazyByteString s = maybe (property False) (=== s) .
114 |                        parseBS P.takeLazyByteString $ s
115 | 
116 | takeWhile1 :: Word8 -> L.ByteString -> Property
117 | takeWhile1 w s =
118 |     let s'    = L.cons w s
119 |         (h,t) = L.span (<=w) s'
120 |     in case PL.parse (P.takeWhile1 (<=w)) s' of
121 |          PL.Done t' h' -> t === t' .&&. toStrictBS h === h'
122 |          _             -> property False
123 | 
124 | takeWhileIncluding :: Word8 -> L.ByteString -> Property
125 | takeWhileIncluding w s =
126 |     let s'    = L.cons w $ L.snoc s (w+1)
127 |         (h_,t_) = L.span (<=w) s'
128 |         (h,t) =
129 |           case L.uncons t_ of
130 |             Nothing -> (h_, t_)
131 |             Just (n, nt) -> (h_ `L.snoc` n, nt)
132 |     in w < 255 ==> case PL.parse (P.takeWhileIncluding (<=w)) s' of
133 |          PL.Done t' h' -> t === t' .&&. toStrictBS h === h'
134 |          _             -> property False
135 | 
136 | takeTill :: Word8 -> L.ByteString -> Property
137 | takeTill w s =
138 |     let (h,t) = L.break (==w) s
139 |     in case PL.parse (P.takeTill (==w)) s of
140 |          PL.Done t' h' -> t === t' .&&. toStrictBS h === h'
141 |          _             -> property False
142 | 
143 | takeWhile1_empty :: Property
144 | takeWhile1_empty = parseBS (P.takeWhile1 undefined) L.empty === Nothing
145 | 
146 | getChunk :: L.ByteString -> Property
147 | getChunk s =
148 |   maybe (property False) (=== L.toChunks s) $
149 |     parseBS getChunks s
150 |   where getChunks = go []
151 |         go res = do
152 |           mchunk <- P.getChunk
153 |           case mchunk of
154 |             Nothing -> return (reverse res)
155 |             Just chunk -> do
156 |               _ <- P.take (B.length chunk)
157 |               go (chunk:res)
158 | 
159 | endOfInput :: L.ByteString -> Property
160 | endOfInput s = parseBS P.endOfInput s === if L.null s
161 |                                           then Just ()
162 |                                           else Nothing
163 | 
164 | endOfLine :: L.ByteString -> Property
165 | endOfLine s =
166 |   case (parseBS P8.endOfLine s, L8.uncons s) of
167 |     (Nothing, mcs) -> maybe (property True) (expectFailure . eol) mcs
168 |     (Just _,  mcs) -> maybe (property False) eol mcs
169 |   where eol (c,s') = c === '\n' .||.
170 |                      (c, fst <$> L8.uncons s') === ('\r', Just '\n')
171 | 
172 | scan :: L.ByteString -> Positive Int64 -> Property
173 | scan s (Positive k) = parseBS p s === Just (toStrictBS $ L.take k s)
174 |   where p = P.scan k $ \ n _ ->
175 |             if n > 0 then let !n' = n - 1 in Just n' else Nothing
176 | 
177 | members :: [Word8] -> Property
178 | members s = property $ all (`S.memberWord8` set) s
179 |     where set = S.fromList s
180 | 
181 | nonmembers :: [Word8] -> [Word8] -> Property
182 | nonmembers s s' = property . not . any (`S.memberWord8` set) $ filter (not . (`elem` s)) s'
183 |     where set = S.fromList s
184 | 
185 | tests :: [TestTree]
186 | tests = [
187 |       testProperty "anyWord8" anyWord8
188 |     , testProperty "endOfInput" endOfInput
189 |     , testProperty "endOfLine" endOfLine
190 |     , testProperty "notWord8" notWord8
191 |     , testProperty "peekWord8" peekWord8
192 |     , testProperty "peekWord8'" peekWord8'
193 |     , testProperty "satisfy" satisfy
194 |     , testProperty "satisfyWith" satisfyWith
195 |     , testProperty "scan" scan
196 |     , testProperty "skip" skip
197 |     , testProperty "skipWhile" skipWhile
198 |     , testProperty "string" string
199 |     , testProperty "stringCI" stringCI
200 |     , testProperty "strings" strings
201 |     , testProperty "take" take
202 |     , testProperty "takeByteString" takeByteString
203 |     , testProperty "takeCount" takeCount
204 |     , testProperty "takeLazyByteString" takeLazyByteString
205 |     , testProperty "takeTill" takeTill
206 |     , testProperty "takeWhile" takeWhile
207 |     , testProperty "takeWhile1" takeWhile1
208 |     , testProperty "takeWhile1_empty" takeWhile1_empty
209 |     , testProperty "takeWhileIncluding" takeWhileIncluding
210 |     , testProperty "getChunk" getChunk
211 |     , testProperty "word8" word8
212 |     , testProperty "members" members
213 |     , testProperty "nonmembers" nonmembers
214 |   ]
215 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Internal/Types.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, BangPatterns, GeneralizedNewtypeDeriving, OverloadedStrings,
  2 |     Rank2Types, RecordWildCards, TypeFamilies #-}
  3 | -- |
  4 | -- Module      :  Data.Attoparsec.Internal.Types
  5 | -- Copyright   :  Bryan O'Sullivan 2007-2015
  6 | -- License     :  BSD3
  7 | --
  8 | -- Maintainer  :  bos@serpentine.com
  9 | -- Stability   :  experimental
 10 | -- Portability :  unknown
 11 | --
 12 | -- Simple, efficient parser combinators, loosely based on the Parsec
 13 | -- library.
 14 | 
 15 | module Data.Attoparsec.Internal.Types
 16 |     (
 17 |       Parser(..)
 18 |     , State
 19 |     , Failure
 20 |     , Success
 21 |     , Pos(..)
 22 |     , IResult(..)
 23 |     , More(..)
 24 |     , (<>)
 25 |     , Chunk(..)
 26 |     ) where
 27 | 
 28 | import Control.Applicative as App (Applicative(..), (<$>))
 29 | import Control.Applicative (Alternative(..))
 30 | import Control.DeepSeq (NFData(rnf))
 31 | import Control.Monad (MonadPlus(..))
 32 | import qualified Control.Monad.Fail as Fail (MonadFail(..))
 33 | import Data.Monoid as Mon (Monoid(..))
 34 | import Data.Semigroup  (Semigroup(..))
 35 | import Data.Word (Word8)
 36 | import Data.ByteString (ByteString)
 37 | import qualified Data.ByteString as BS
 38 | import Data.ByteString.Internal (w2c)
 39 | import Data.Text (Text)
 40 | import qualified Data.Text as Text
 41 | import Data.Text.Unsafe (Iter(..))
 42 | import Prelude hiding (succ)
 43 | import qualified Data.Attoparsec.ByteString.Buffer as B
 44 | import qualified Data.Attoparsec.Text.Buffer as T
 45 | 
 46 | newtype Pos = Pos { fromPos :: Int }
 47 |             deriving (Eq, Ord, Show, Num)
 48 | 
 49 | -- | The result of a parse.  This is parameterised over the type @i@
 50 | -- of string that was processed.
 51 | --
 52 | -- This type is an instance of 'Functor', where 'fmap' transforms the
 53 | -- value in a 'Done' result.
 54 | data IResult i r =
 55 |     Fail i [String] String
 56 |     -- ^ The parse failed.  The @i@ parameter is the input that had
 57 |     -- not yet been consumed when the failure occurred.  The
 58 |     -- @[@'String'@]@ is a list of contexts in which the error
 59 |     -- occurred.  The 'String' is the message describing the error, if
 60 |     -- any.
 61 |   | Partial (i -> IResult i r)
 62 |     -- ^ Supply this continuation with more input so that the parser
 63 |     -- can resume.  To indicate that no more input is available, pass
 64 |     -- an empty string to the continuation.
 65 |     --
 66 |     -- __Note__: if you get a 'Partial' result, do not call its
 67 |     -- continuation more than once.
 68 |   | Done i r
 69 |     -- ^ The parse succeeded.  The @i@ parameter is the input that had
 70 |     -- not yet been consumed (if any) when the parse succeeded.
 71 | 
 72 | instance (Show i, Show r) => Show (IResult i r) where
 73 |     showsPrec d ir = showParen (d > 10) $
 74 |       case ir of
 75 |         (Fail t stk msg) -> showString "Fail" . f t . f stk . f msg
 76 |         (Partial _)      -> showString "Partial _"
 77 |         (Done t r)       -> showString "Done" . f t . f r
 78 |       where f :: Show a => a -> ShowS
 79 |             f x = showChar ' ' . showsPrec 11 x
 80 | 
 81 | instance (NFData i, NFData r) => NFData (IResult i r) where
 82 |     rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg
 83 |     rnf (Partial _)  = ()
 84 |     rnf (Done t r)   = rnf t `seq` rnf r
 85 |     {-# INLINE rnf #-}
 86 | 
 87 | instance Functor (IResult i) where
 88 |     fmap _ (Fail t stk msg) = Fail t stk msg
 89 |     fmap f (Partial k)      = Partial (fmap f . k)
 90 |     fmap f (Done t r)   = Done t (f r)
 91 | 
 92 | -- | The core parser type.  This is parameterised over the type @i@
 93 | -- of string being processed.
 94 | --
 95 | -- This type is an instance of the following classes:
 96 | --
 97 | -- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
 98 | --   error message.
 99 | --
100 | -- * 'Functor' and 'Applicative', which follow the usual definitions.
101 | --
102 | -- * 'MonadPlus', where 'mzero' fails (with no error message) and
103 | --   'mplus' executes the right-hand parser if the left-hand one
104 | --   fails.  When the parser on the right executes, the input is reset
105 | --   to the same state as the parser on the left started with. (In
106 | --   other words, attoparsec is a backtracking parser that supports
107 | --   arbitrary lookahead.)
108 | --
109 | -- * 'Alternative', which follows 'MonadPlus'.
110 | newtype Parser i a = Parser {
111 |       runParser :: forall r.
112 |                    State i -> Pos -> More
113 |                 -> Failure i (State i)   r
114 |                 -> Success i (State i) a r
115 |                 -> IResult i r
116 |     }
117 | 
118 | type family State i
119 | type instance State ByteString = B.Buffer
120 | type instance State Text = T.Buffer
121 | 
122 | type Failure i t   r = t -> Pos -> More -> [String] -> String
123 |                        -> IResult i r
124 | type Success i t a r = t -> Pos -> More -> a -> IResult i r
125 | 
126 | -- | Have we read all available input?
127 | data More = Complete | Incomplete
128 |             deriving (Eq, Show)
129 | 
130 | instance Semigroup More where
131 |     c@Complete <> _ = c
132 |     _          <> m = m
133 | 
134 | instance Mon.Monoid More where
135 |     mappend = (<>)
136 |     mempty  = Incomplete
137 | 
138 | instance Monad (Parser i) where
139 | #if !(MIN_VERSION_base(4,13,0))
140 |     fail = Fail.fail
141 |     {-# INLINE fail #-}
142 | #endif
143 | 
144 |     return = App.pure
145 |     {-# INLINE return #-}
146 | 
147 |     m >>= k = Parser $ \t !pos more lose succ ->
148 |         let succ' t' !pos' more' a = runParser (k a) t' pos' more' lose succ
149 |         in runParser m t pos more lose succ'
150 |     {-# INLINE (>>=) #-}
151 | 
152 |     (>>) = (*>)
153 |     {-# INLINE (>>) #-}
154 | 
155 | 
156 | instance Fail.MonadFail (Parser i) where
157 |     fail err = Parser $ \t pos more lose _succ -> lose t pos more [] msg
158 |       where msg = "Failed reading: " ++ err
159 |     {-# INLINE fail #-}
160 | 
161 | plus :: Parser i a -> Parser i a -> Parser i a
162 | plus f g = Parser $ \t pos more lose succ ->
163 |   let lose' t' _pos' more' _ctx _msg = runParser g t' pos more' lose succ
164 |   in runParser f t pos more lose' succ
165 | 
166 | instance MonadPlus (Parser i) where
167 |     mzero = fail "mzero"
168 |     {-# INLINE mzero #-}
169 |     mplus = plus
170 | 
171 | instance Functor (Parser i) where
172 |     fmap f p = Parser $ \t pos more lose succ ->
173 |       let succ' t' pos' more' a = succ t' pos' more' (f a)
174 |       in runParser p t pos more lose succ'
175 |     {-# INLINE fmap #-}
176 | 
177 | apP :: Parser i (a -> b) -> Parser i a -> Parser i b
178 | apP d e = do
179 |   b <- d
180 |   a <- e
181 |   return (b a)
182 | {-# INLINE apP #-}
183 | 
184 | instance Applicative (Parser i) where
185 |     pure v = Parser $ \t !pos more _lose succ -> succ t pos more v
186 |     {-# INLINE pure #-}
187 |     (<*>)  = apP
188 |     {-# INLINE (<*>) #-}
189 |     m *> k = m >>= \_ -> k
190 |     {-# INLINE (*>) #-}
191 |     x <* y = x >>= \a -> y >> pure a
192 |     {-# INLINE (<*) #-}
193 | 
194 | instance Semigroup (Parser i a) where
195 |     (<>) = plus
196 |     {-# INLINE (<>) #-}
197 | 
198 | instance Monoid (Parser i a) where
199 |     mempty  = fail "mempty"
200 |     {-# INLINE mempty #-}
201 |     mappend = (<>)
202 |     {-# INLINE mappend #-}
203 | 
204 | instance Alternative (Parser i) where
205 |     empty = fail "empty"
206 |     {-# INLINE empty #-}
207 | 
208 |     (<|>) = plus
209 |     {-# INLINE (<|>) #-}
210 | 
211 |     many v = many_v
212 |       where
213 |         many_v = some_v <|> pure []
214 |         some_v = (:) <$> v <*> many_v
215 |     {-# INLINE many #-}
216 | 
217 |     some v = some_v
218 |       where
219 |         many_v = some_v <|> pure []
220 |         some_v = (:) <$> v <*> many_v
221 |     {-# INLINE some #-}
222 | 
223 | -- | A common interface for input chunks.
224 | class Monoid c => Chunk c where
225 |   type ChunkElem c
226 |   -- | Test if the chunk is empty.
227 |   nullChunk :: c -> Bool
228 |   -- | Append chunk to a buffer.
229 |   pappendChunk :: State c -> c -> State c
230 |   -- | Position at the end of a buffer. The first argument is ignored.
231 |   atBufferEnd :: c -> State c -> Pos
232 |   -- | Return the buffer element at the given position along with its length.
233 |   bufferElemAt :: c -> Pos -> State c -> Maybe (ChunkElem c, Int)
234 |   -- | Map an element to the corresponding character.
235 |   --   The first argument is ignored.
236 |   chunkElemToChar :: c -> ChunkElem c -> Char
237 | 
238 | instance Chunk ByteString where
239 |   type ChunkElem ByteString = Word8
240 |   nullChunk = BS.null
241 |   {-# INLINE nullChunk #-}
242 |   pappendChunk = B.pappend
243 |   {-# INLINE pappendChunk #-}
244 |   atBufferEnd _ = Pos . B.length
245 |   {-# INLINE atBufferEnd #-}
246 |   bufferElemAt _ (Pos i) buf
247 |     | i < B.length buf = Just (B.unsafeIndex buf i, 1)
248 |     | otherwise = Nothing
249 |   {-# INLINE bufferElemAt #-}
250 |   chunkElemToChar _ = w2c
251 |   {-# INLINE chunkElemToChar #-}
252 | 
253 | instance Chunk Text where
254 |   type ChunkElem Text = Char
255 |   nullChunk = Text.null
256 |   {-# INLINE nullChunk #-}
257 |   pappendChunk = T.pappend
258 |   {-# INLINE pappendChunk #-}
259 |   atBufferEnd _ = Pos . T.length
260 |   {-# INLINE atBufferEnd #-}
261 |   bufferElemAt _ (Pos i) buf
262 |     | i < T.length buf = let Iter c l = T.iter buf i in Just (c, l)
263 |     | otherwise = Nothing
264 |   {-# INLINE bufferElemAt #-}
265 |   chunkElemToChar _ = id
266 |   {-# INLINE chunkElemToChar #-}
267 | 


--------------------------------------------------------------------------------
/Data/Attoparsec/Combinator.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP #-}
  2 | #if __GLASGOW_HASKELL__ >= 702
  3 | {-# LANGUAGE Trustworthy #-} -- Imports internal modules
  4 | #endif
  5 | -- |
  6 | -- Module      :  Data.Attoparsec.Combinator
  7 | -- Copyright   :  Daan Leijen 1999-2001, Bryan O'Sullivan 2007-2015
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  bos@serpentine.com
 11 | -- Stability   :  experimental
 12 | -- Portability :  portable
 13 | --
 14 | -- Useful parser combinators, similar to those provided by Parsec.
 15 | module Data.Attoparsec.Combinator
 16 |     (
 17 |     -- * Combinators
 18 |       try
 19 |     , (<?>)
 20 |     , choice
 21 |     , count
 22 |     , option
 23 |     , many'
 24 |     , many1
 25 |     , many1'
 26 |     , manyTill
 27 |     , manyTill'
 28 |     , sepBy
 29 |     , sepBy'
 30 |     , sepBy1
 31 |     , sepBy1'
 32 |     , skipMany
 33 |     , skipMany1
 34 |     , eitherP
 35 |     , feed
 36 |     , satisfyElem
 37 |     , endOfInput
 38 |     , atEnd
 39 |     , lookAhead
 40 |     ) where
 41 | 
 42 | #if !MIN_VERSION_base(4,8,0)
 43 | import Control.Applicative (Applicative(..), (<$>))
 44 | import Data.Monoid (Monoid(mappend))
 45 | #endif
 46 | import Control.Applicative (Alternative(..), liftA2, many, (<|>))
 47 | import Control.Monad (MonadPlus(..))
 48 | import Data.Attoparsec.Internal.Types (Parser(..), IResult(..))
 49 | import Data.Attoparsec.Internal (endOfInput, atEnd, satisfyElem)
 50 | import Data.ByteString (ByteString)
 51 | import Data.Foldable (asum)
 52 | import Data.Text (Text)
 53 | import qualified Data.Attoparsec.Zepto as Z
 54 | import Prelude hiding (succ)
 55 | 
 56 | -- | Attempt a parse, and if it fails, rewind the input so that no
 57 | -- input appears to have been consumed.
 58 | --
 59 | -- This combinator is provided for compatibility with Parsec.
 60 | -- attoparsec parsers always backtrack on failure.
 61 | try :: Parser i a -> Parser i a
 62 | try = id
 63 | {-# INLINE try #-}
 64 | 
 65 | -- | Name the parser, in case failure occurs.
 66 | (<?>) :: Parser i a
 67 |       -> String                 -- ^ the name to use if parsing fails
 68 |       -> Parser i a
 69 | p <?> msg0 = Parser $ \t pos more lose succ ->
 70 |              let lose' t' pos' more' strs msg = lose t' pos' more' (msg0:strs) msg
 71 |              in runParser p t pos more lose' succ
 72 | {-# INLINE (<?>) #-}
 73 | infix 0 <?>
 74 | 
 75 | -- | @choice ps@ tries to apply the actions in the list @ps@ in order,
 76 | -- until one of them succeeds. Returns the value of the succeeding
 77 | -- action.
 78 | choice :: Alternative f => [f a] -> f a
 79 | choice = asum
 80 | {-# SPECIALIZE choice :: [Parser ByteString a]
 81 |                       -> Parser ByteString a #-}
 82 | {-# SPECIALIZE choice :: [Parser Text a] -> Parser Text a #-}
 83 | {-# SPECIALIZE choice :: [Z.Parser a] -> Z.Parser a #-}
 84 | 
 85 | -- | @option x p@ tries to apply action @p@. If @p@ fails without
 86 | -- consuming input, it returns the value @x@, otherwise the value
 87 | -- returned by @p@.
 88 | --
 89 | -- > priority  = option 0 (digitToInt <$> digit)
 90 | option :: Alternative f => a -> f a -> f a
 91 | option x p = p <|> pure x
 92 | {-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-}
 93 | {-# SPECIALIZE option :: a -> Parser Text a -> Parser Text a #-}
 94 | {-# SPECIALIZE option :: a -> Z.Parser a -> Z.Parser a #-}
 95 | 
 96 | -- | A version of 'liftM2' that is strict in the result of its first
 97 | -- action.
 98 | liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c
 99 | liftM2' f a b = do
100 |   !x <- a
101 |   y <- b
102 |   return (f x y)
103 | {-# INLINE liftM2' #-}
104 | 
105 | -- | @many' p@ applies the action @p@ /zero/ or more times. Returns a
106 | -- list of the returned values of @p@. The value returned by @p@ is
107 | -- forced to WHNF.
108 | --
109 | -- >  word  = many' letter
110 | many' :: (MonadPlus m) => m a -> m [a]
111 | many' p = many_p
112 |   where many_p = some_p `mplus` return []
113 |         some_p = liftM2' (:) p many_p
114 | {-# INLINE many' #-}
115 | 
116 | -- | @many1 p@ applies the action @p@ /one/ or more times. Returns a
117 | -- list of the returned values of @p@.
118 | --
119 | -- >  word  = many1 letter
120 | many1 :: Alternative f => f a -> f [a]
121 | many1 p = liftA2 (:) p (many p)
122 | {-# INLINE many1 #-}
123 | 
124 | -- | @many1' p@ applies the action @p@ /one/ or more times. Returns a
125 | -- list of the returned values of @p@. The value returned by @p@ is
126 | -- forced to WHNF.
127 | --
128 | -- >  word  = many1' letter
129 | many1' :: (MonadPlus m) => m a -> m [a]
130 | many1' p = liftM2' (:) p (many' p)
131 | {-# INLINE many1' #-}
132 | 
133 | -- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated
134 | -- by @sep@. Returns a list of the values returned by @p@.
135 | --
136 | -- > commaSep p  = p `sepBy` (char ',')
137 | sepBy :: Alternative f => f a -> f s -> f [a]
138 | sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
139 | {-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s
140 |                      -> Parser ByteString [a] #-}
141 | {-# SPECIALIZE sepBy :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
142 | {-# SPECIALIZE sepBy :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
143 | 
144 | -- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated
145 | -- by @sep@. Returns a list of the values returned by @p@. The value
146 | -- returned by @p@ is forced to WHNF.
147 | --
148 | -- > commaSep p  = p `sepBy'` (char ',')
149 | sepBy' :: (MonadPlus m) => m a -> m s -> m [a]
150 | sepBy' p s = scan `mplus` return []
151 |   where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return [])
152 | {-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s
153 |                       -> Parser ByteString [a] #-}
154 | {-# SPECIALIZE sepBy' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
155 | {-# SPECIALIZE sepBy' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
156 | 
157 | -- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated
158 | -- by @sep@. Returns a list of the values returned by @p@.
159 | --
160 | -- > commaSep p  = p `sepBy1` (char ',')
161 | sepBy1 :: Alternative f => f a -> f s -> f [a]
162 | sepBy1 p s = scan
163 |     where scan = liftA2 (:) p ((s *> scan) <|> pure [])
164 | {-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s
165 |                       -> Parser ByteString [a] #-}
166 | {-# SPECIALIZE sepBy1 :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
167 | {-# SPECIALIZE sepBy1 :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
168 | 
169 | -- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated
170 | -- by @sep@. Returns a list of the values returned by @p@. The value
171 | -- returned by @p@ is forced to WHNF.
172 | --
173 | -- > commaSep p  = p `sepBy1'` (char ',')
174 | sepBy1' :: (MonadPlus m) => m a -> m s -> m [a]
175 | sepBy1' p s = scan
176 |     where scan = liftM2' (:) p ((s >> scan) `mplus` return [])
177 | {-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s
178 |                        -> Parser ByteString [a] #-}
179 | {-# SPECIALIZE sepBy1' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
180 | {-# SPECIALIZE sepBy1' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
181 | 
182 | -- | @manyTill p end@ applies action @p@ /zero/ or more times until
183 | -- action @end@ succeeds, and returns the list of values returned by
184 | -- @p@.  This can be used to scan comments:
185 | --
186 | -- >  simpleComment   = string "<!--" *> manyTill anyChar (string "-->")
187 | --
188 | -- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
189 | -- While this will work, it is not very efficient, as it will cause a
190 | -- lot of backtracking.)
191 | manyTill :: Alternative f => f a -> f b -> f [a]
192 | manyTill p end = scan
193 |     where scan = (end *> pure []) <|> liftA2 (:) p scan
194 | {-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b
195 |                         -> Parser ByteString [a] #-}
196 | {-# SPECIALIZE manyTill :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
197 | {-# SPECIALIZE manyTill :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
198 | 
199 | -- | @manyTill' p end@ applies action @p@ /zero/ or more times until
200 | -- action @end@ succeeds, and returns the list of values returned by
201 | -- @p@.  This can be used to scan comments:
202 | --
203 | -- >  simpleComment   = string "<!--" *> manyTill' anyChar (string "-->")
204 | --
205 | -- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
206 | -- While this will work, it is not very efficient, as it will cause a
207 | -- lot of backtracking.)
208 | --
209 | -- The value returned by @p@ is forced to WHNF.
210 | manyTill' :: (MonadPlus m) => m a -> m b -> m [a]
211 | manyTill' p end = scan
212 |     where scan = (end >> return []) `mplus` liftM2' (:) p scan
213 | {-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b
214 |                          -> Parser ByteString [a] #-}
215 | {-# SPECIALIZE manyTill' :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
216 | {-# SPECIALIZE manyTill' :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
217 | 
218 | -- | Skip zero or more instances of an action.
219 | skipMany :: Alternative f => f a -> f ()
220 | skipMany p = scan
221 |     where scan = (p *> scan) <|> pure ()
222 | {-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-}
223 | {-# SPECIALIZE skipMany :: Parser Text a -> Parser Text () #-}
224 | {-# SPECIALIZE skipMany :: Z.Parser a -> Z.Parser () #-}
225 | 
226 | -- | Skip one or more instances of an action.
227 | skipMany1 :: Alternative f => f a -> f ()
228 | skipMany1 p = p *> skipMany p
229 | {-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-}
230 | {-# SPECIALIZE skipMany1 :: Parser Text a -> Parser Text () #-}
231 | {-# SPECIALIZE skipMany1 :: Z.Parser a -> Z.Parser () #-}
232 | 
233 | -- | Apply the given action repeatedly, returning every result.
234 | count :: Monad m => Int -> m a -> m [a]
235 | count n p = sequence (replicate n p)
236 | {-# INLINE count #-}
237 | 
238 | -- | Combine two alternatives.
239 | eitherP :: (Alternative f) => f a -> f b -> f (Either a b)
240 | eitherP a b = (Left <$> a) <|> (Right <$> b)
241 | {-# INLINE eitherP #-}
242 | 
243 | -- | If a parser has returned a 'T.Partial' result, supply it with more
244 | -- input.
245 | feed :: Monoid i => IResult i r -> i -> IResult i r
246 | feed (Fail t ctxs msg) d = Fail (mappend t d) ctxs msg
247 | feed (Partial k) d    = k d
248 | feed (Done t r) d     = Done (mappend t d) r
249 | {-# INLINE feed #-}
250 | 
251 | -- | Apply a parser without consuming any input.
252 | lookAhead :: Parser i a -> Parser i a
253 | lookAhead p = Parser $ \t pos more lose succ ->
254 |   let succ' t' _pos' more' = succ t' pos more'
255 |   in runParser p t pos more lose succ'
256 | {-# INLINE lookAhead #-}
257 | 


--------------------------------------------------------------------------------