├── .gitignore
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── example-data
    ├── bad.json
    └── good.json
├── example.hs
├── paripari.cabal
├── specialise-all.hs
├── src
    └── Text
    │   ├── PariPari.hs
    │   └── PariPari
    │       ├── Internal
    │           ├── Acceptor.hs
    │           ├── Chunk.hs
    │           ├── Class.hs
    │           ├── Combinators.hs
    │           ├── Reporter.hs
    │           └── Run.hs
    │       └── Lens.hs
├── stack.yaml
└── test
    └── test.hs


/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | docs
 3 | wiki
 4 | TAGS
 5 | tags
 6 | wip
 7 | .stack-work
 8 | .DS_Store
 9 | .*.swp
10 | .*.swo
11 | *.o
12 | *.hi
13 | *~
14 | *#
15 | stack.yaml.lock
16 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
  1 | # This Travis job script has been generated by a script via
  2 | #
  3 | #   haskell-ci 'paripari.cabal'
  4 | #
  5 | # To regenerate the script (for example after adjusting tested-with) run
  6 | #
  7 | #   haskell-ci regenerate
  8 | #
  9 | # For more information, see https://github.com/haskell-CI/haskell-ci
 10 | #
 11 | # version: 0.10.1
 12 | #
 13 | version: ~> 1.0
 14 | language: c
 15 | os: linux
 16 | dist: xenial
 17 | git:
 18 |   # whether to recursively clone submodules
 19 |   submodules: false
 20 | cache:
 21 |   directories:
 22 |     - $HOME/.cabal/packages
 23 |     - $HOME/.cabal/store
 24 |     - $HOME/.hlint
 25 | before_cache:
 26 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log
 27 |   # remove files that are regenerated by 'cabal update'
 28 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/00-index.*
 29 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/*.json
 30 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.cache
 31 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar
 32 |   - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx
 33 |   - rm -rfv $CABALHOME/packages/head.hackage
 34 | jobs:
 35 |   include:
 36 |     - compiler: ghc-8.10.1
 37 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.10.1","cabal-install-3.2"]}}
 38 |       os: linux
 39 |     - compiler: ghc-8.8.3
 40 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.3","cabal-install-3.2"]}}
 41 |       os: linux
 42 |     - compiler: ghc-8.6.5
 43 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.2"]}}
 44 |       os: linux
 45 |     - compiler: ghc-8.4.4
 46 |       addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.2"]}}
 47 |       os: linux
 48 | before_install:
 49 |   - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//')
 50 |   - WITHCOMPILER="-w $HC"
 51 |   - HADDOCK=$(echo "/opt/$CC/bin/haddock" | sed 's/-/\//')
 52 |   - HCPKG="$HC-pkg"
 53 |   - unset CC
 54 |   - CABAL=/opt/ghc/bin/cabal
 55 |   - CABALHOME=$HOME/.cabal
 56 |   - export PATH="$CABALHOME/bin:$PATH"
 57 |   - TOP=$(pwd)
 58 |   - "HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\\d+)\\.(\\d+)\\.(\\d+)(\\.(\\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')"
 59 |   - echo $HCNUMVER
 60 |   - CABAL="$CABAL -vnormal+nowrap"
 61 |   - set -o pipefail
 62 |   - TEST=--enable-tests
 63 |   - BENCH=--enable-benchmarks
 64 |   - HEADHACKAGE=false
 65 |   - rm -f $CABALHOME/config
 66 |   - |
 67 |     echo "verbose: normal +nowrap +markoutput"          >> $CABALHOME/config
 68 |     echo "remote-build-reporting: anonymous"            >> $CABALHOME/config
 69 |     echo "write-ghc-environment-files: always"          >> $CABALHOME/config
 70 |     echo "remote-repo-cache: $CABALHOME/packages"       >> $CABALHOME/config
 71 |     echo "logs-dir:          $CABALHOME/logs"           >> $CABALHOME/config
 72 |     echo "world-file:        $CABALHOME/world"          >> $CABALHOME/config
 73 |     echo "extra-prog-path:   $CABALHOME/bin"            >> $CABALHOME/config
 74 |     echo "symlink-bindir:    $CABALHOME/bin"            >> $CABALHOME/config
 75 |     echo "installdir:        $CABALHOME/bin"            >> $CABALHOME/config
 76 |     echo "build-summary:     $CABALHOME/logs/build.log" >> $CABALHOME/config
 77 |     echo "store-dir:         $CABALHOME/store"          >> $CABALHOME/config
 78 |     echo "install-dirs user"                            >> $CABALHOME/config
 79 |     echo "  prefix: $CABALHOME"                         >> $CABALHOME/config
 80 |     echo "repository hackage.haskell.org"               >> $CABALHOME/config
 81 |     echo "  url: http://hackage.haskell.org/"           >> $CABALHOME/config
 82 | install:
 83 |   - ${CABAL} --version
 84 |   - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]"
 85 |   - |
 86 |     echo "program-default-options"                >> $CABALHOME/config
 87 |     echo "  ghc-options: $GHCJOBS +RTS -M6G -RTS" >> $CABALHOME/config
 88 |   - cat $CABALHOME/config
 89 |   - rm -fv cabal.project cabal.project.local cabal.project.freeze
 90 |   - travis_retry ${CABAL} v2-update -v
 91 |   # Generate cabal.project
 92 |   - rm -rf cabal.project cabal.project.local cabal.project.freeze
 93 |   - touch cabal.project
 94 |   - |
 95 |     echo "packages: ." >> cabal.project
 96 |   - echo 'package paripari' >> cabal.project
 97 |   - "echo '  ghc-options: -Werror=missing-methods' >> cabal.project"
 98 |   - |
 99 |   - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(paripari)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
100 |   - cat cabal.project || true
101 |   - cat cabal.project.local || true
102 |   - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi
103 |   - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH}
104 |   - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'"
105 |   - rm  cabal.project.freeze
106 |   - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all
107 |   - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all
108 | script:
109 |   - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX)
110 |   # Packaging...
111 |   - ${CABAL} v2-sdist all
112 |   # Unpacking...
113 |   - mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/
114 |   - cd ${DISTDIR} || false
115 |   - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \;
116 |   - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm       '{}' \;
117 |   - PKGDIR_paripari="$(find . -maxdepth 1 -type d -regex '.*/paripari-[0-9.]*')"
118 |   # Generate cabal.project
119 |   - rm -rf cabal.project cabal.project.local cabal.project.freeze
120 |   - touch cabal.project
121 |   - |
122 |     echo "packages: ${PKGDIR_paripari}" >> cabal.project
123 |   - echo 'package paripari' >> cabal.project
124 |   - "echo '  ghc-options: -Werror=missing-methods' >> cabal.project"
125 |   - |
126 |   - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(paripari)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
127 |   - cat cabal.project || true
128 |   - cat cabal.project.local || true
129 |   # Building...
130 |   # this builds all libraries and executables (without tests/benchmarks)
131 |   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all
132 |   # Building with tests and benchmarks...
133 |   # build & run tests, build benchmarks
134 |   - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all
135 |   # Testing...
136 |   - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all
137 |   # cabal check...
138 |   - (cd ${PKGDIR_paripari} && ${CABAL} -vnormal check)
139 |   # haddock...
140 |   - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all
141 |   # Building without installed constraints for packages in global-db...
142 |   - rm -f cabal.project.local
143 |   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all
144 | 
145 | # REGENDATA ("0.10.1",["paripari.cabal"])
146 | # EOF
147 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Daniel Mendler
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | example.hs : README.md Makefile
2 | 	unlit --language haskell -f markdown -i README.md -o example.hs
3 | 	sed -i '/OPTIONS_GHC/d' example.hs
4 | 	sed -i 's/{-# SPECIALISE_ALL/-- {-# SPECIALISE_ALL/g' example.hs
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PariPari: Fast parser combinator library for Haskell
  2 | 
  3 | [![Hackage](https://img.shields.io/hackage/v/paripari.svg)](https://hackage.haskell.org/package/paripari)
  4 | [![Build Status](https://secure.travis-ci.org/minad/paripari.png?branch=master)](http://travis-ci.org/minad/paripari)
  5 | 
  6 | PariPari is a parser combinator library for Haskell. PariPari can be used as
  7 | a drop in replacement for the Parsec class of libraries. However be aware that the library
  8 | is new and unstable.
  9 | 
 10 | PariPari offers two parsing strategies. There is a fast Acceptor and a slower Reporter.
 11 | If the Acceptor fails, the Reporter returns a report about the parsing errors.
 12 | This allows for fast parsing in the good case without compromising on the quality of the error messages.
 13 | 
 14 | Like Attoparsec, the parser combinators backtrack by default. To avoid exponential parsing time if errors are found and
 15 | in order to avoid bad error messages, the `<!>` parser combinator is provided, which does not backtrack.
 16 | 
 17 | PariPari operates on strict `ByteString` and `Text`.
 18 | As a consequence, PariPari is only a good fit for data which is available at once (no streaming).
 19 | If characters are parsed using the `char` and `satisfy` combinators, bytestrings are interpreted as UTF-8 and decoded on the fly.
 20 | The interface of PariPari matches mostly the one of Attoparsec/Megaparsec/etc.
 21 | 
 22 | ## Features
 23 | 
 24 | * Fast-path parser without error reporting (Acceptor)
 25 |   and fallback to slower error reporting parser (Reporter)
 26 |   to optimize the common case
 27 | * Backtracking by default for ease of use
 28 | * `<!>` can be used to avoid backtracking
 29 | * Support for strict UTF-8 `ByteString` and strict `Text`
 30 | * Combinators for indentation-sensitive parsing
 31 | * Error recovery support via `recover`
 32 | * Provides flexible parsers for integers and fractional numbers
 33 |   of base 2 to 36 with support for separators between digits
 34 | * Most Parsec/Megaparsec combinators provided, relying on the `parser-combinators` library
 35 | 
 36 | ## Example
 37 | 
 38 | In this example we use PariPari to parse JSON. The following is literate haskell.
 39 | 
 40 | ### Prologue
 41 | 
 42 | We specify a preprocessor, language pragmas and the library imports.
 43 | Performance of PariPari depends crucially on the specialisation of `Parser k a` to
 44 | `Acceptor ByteString a` and `Reporter ByteString a`. In larger parsers it seems that the
 45 | GHC specialiser does not kick in. As a workaround we use `paripari-specialise-all` as a
 46 | preprocessor, which generates `SPECIALISE` pragmas from our custom `SPECIALISE_ALL` pragma.
 47 | Using the preprocessor is not necessary, however without it I observed 2x-4x slowdowns in parsing speed.
 48 | 
 49 | ``` haskell
 50 | {-# OPTIONS_GHC -F -pgmF paripari-specialise-all #-}
 51 | {-# LANGUAGE ConstraintKinds #-}
 52 | {-# LANGUAGE FlexibleContexts #-}
 53 | {-# LANGUAGE OverloadedStrings #-}
 54 | {-# LANGUAGE Rank2Types #-}
 55 | 
 56 | import Data.Foldable (for_)
 57 | import System.Environment (getArgs)
 58 | import Text.PariPari
 59 | import qualified Data.ByteString as B
 60 | ```
 61 | 
 62 | ### Basic parser types
 63 | 
 64 | We parametrize the parser with the string type.
 65 | Both `ByteString` and `Text` are supported.
 66 | Note that even in the case of `ByteStrings` a `Parser` instance
 67 | is provided, which interprets the bytes as UTF-8.
 68 | 
 69 | ``` haskell
 70 | type StringType = B.ByteString
 71 | type PMonad p = Parser StringType p
 72 | type P a = (forall p. PMonad p => p a)
 73 | ```
 74 | 
 75 | The `P` shortcut can be used for simple combinators.
 76 | For functions returning parsers, the `PMonad` constraint must
 77 | be used for specialization to work, e.g., `char :: PMonad p => Char -> p Char`.
 78 | 
 79 | Now we ensure that the GHC specialiser kicks in
 80 | and specialises all parsers. These pragmas are processed
 81 | by the preprocessor `paripari-specialise-all`.
 82 | 
 83 | ``` haskell
 84 | {-# SPECIALISE_ALL PMonad p = p ~ Acceptor StringType #-}
 85 | {-# SPECIALISE_ALL PMonad p = p ~ Reporter StringType #-}
 86 | {-# SPECIALISE_ALL P = Acceptor StringType #-}
 87 | {-# SPECIALISE_ALL P = Reporter StringType #-}
 88 | ```
 89 | 
 90 | ### JSON datatype
 91 | 
 92 | We define a datatype of JSON values.
 93 | 
 94 | ``` haskell
 95 | data Value
 96 |   = Object ![(StringType, Value)]
 97 |   | Array  ![Value]
 98 |   | String !StringType
 99 |   | Number !Integer !Integer
100 |   | Bool   !Bool
101 |   | Null
102 |   deriving (Eq, Show)
103 | ```
104 | 
105 | ### Parsers
106 | 
107 | A JSON toplevel value is either an object or an array.
108 | 
109 | ``` haskell
110 | json :: P Value
111 | json = space *> (object <|> array) <?> "json"
112 | ```
113 | 
114 | Objects consist of pairs of a text string and a value.
115 | 
116 | ``` haskell
117 | object :: P Value
118 | object = Object <$> (char '{' *> space *> sepBy pair (space *> char ',' *> space) <* space <* char '}') <?> "object"
119 | 
120 | pair :: P (StringType, Value)
121 | pair = (,) <$> (text <* space) <*> (char ':' *> space *> value)
122 | ```
123 | 
124 | Arrays are a list of values.
125 | 
126 | ``` haskell
127 | array :: P Value
128 | array = Array <$> (char '[' *> sepBy value (space *> char ',' *> space) <* space <* char ']') <?> "array"
129 | ```
130 | 
131 | Furthermore, JSON supports text strings, boolean values, null and floating point numbers.
132 | 
133 | ``` haskell
134 | value :: P Value
135 | value =
136 |   (String <$> text)
137 |     <|> object
138 |     <|> array
139 |     <|> (Bool False <$ string "false")
140 |     <|> (Bool True  <$ string "true")
141 |     <|> (Null       <$ string "null")
142 |     <|> number
143 | 
144 | text :: P StringType
145 | text = char '"' *> takeCharsWhile (/= '"') <* char '"' <?> "text"
146 | ```
147 | 
148 | Floating point numbers are parsed by `fractionDec` which returns a coefficient,
149 | the base of the exponent and the exponent. The conversion to `Double` can be done
150 | for example by the `scientific` library.
151 | 
152 | ``` haskell
153 | number :: P Value
154 | number = label "number" $ do
155 |   neg <- sign
156 |   frac <- fractionDec (pure ())
157 |   pure $ case frac of
158 |            Left n -> Number (neg n) 0
159 |            Right (c, _, e) -> Number (neg c) e
160 | ```
161 | 
162 | For spaces we need another helper function.
163 | 
164 | ``` haskell
165 | space :: P ()
166 | space = skipCharsWhile (\c -> c == ' ' || c == '\n' || c == '\t')
167 | ```
168 | 
169 | ### Main function
170 | 
171 | The main function of the example program reads a file, runs the parser
172 | and prints the value if the parsing succeeded.
173 | In the case of an error a report is printed.
174 | 
175 | ``` haskell
176 | main :: IO ()
177 | main = do
178 |   args <- getArgs
179 |   case args of
180 |     [file] -> do
181 |       src <- B.readFile file
182 |       let (result, reports) = runParser json file src
183 |       for_ reports $ putStrLn . showReport
184 |       print result
185 |     _ -> error "Usage: paripari-example test.json"
186 | ```
187 | 
188 | ## Benchmark
189 | 
190 | See the repository [parsers-bench](https://github.com/minad/parsers-bench/).
191 | 
192 | ```
193 | parsers-bench-0.1.0: benchmarks
194 | Running 1 benchmarks...
195 | Benchmark bench-speed: RUNNING...
196 | benchmarking CSV (PariPari)/csv-40.csv
197 | time                 16.34 μs   (16.18 μs .. 16.53 μs)
198 |                      0.998 R²   (0.996 R² .. 0.999 R²)
199 | mean                 16.87 μs   (16.41 μs .. 18.64 μs)
200 | std dev              2.703 μs   (569.0 ns .. 5.580 μs)
201 | variance introduced by outliers: 94% (severely inflated)
202 | 
203 | benchmarking CSV (PariPari, Reporter)/csv-40.csv
204 | time                 104.9 μs   (103.2 μs .. 106.5 μs)
205 |                      0.985 R²   (0.972 R² .. 0.992 R²)
206 | mean                 128.0 μs   (118.4 μs .. 140.0 μs)
207 | std dev              40.73 μs   (30.44 μs .. 51.00 μs)
208 | variance introduced by outliers: 98% (severely inflated)
209 | 
210 | benchmarking CSV (Attoparsec)/csv-40.csv
211 | time                 57.69 μs   (56.52 μs .. 60.22 μs)
212 |                      0.917 R²   (0.796 R² .. 0.988 R²)
213 | mean                 68.62 μs   (62.95 μs .. 81.74 μs)
214 | std dev              27.44 μs   (15.54 μs .. 49.65 μs)
215 | variance introduced by outliers: 99% (severely inflated)
216 | 
217 | benchmarking CSV (Megaparsec)/csv-40.csv
218 | time                 52.12 μs   (51.54 μs .. 52.98 μs)
219 |                      0.996 R²   (0.993 R² .. 0.998 R²)
220 | mean                 54.34 μs   (53.25 μs .. 56.03 μs)
221 | std dev              4.705 μs   (3.416 μs .. 6.766 μs)
222 | variance introduced by outliers: 79% (severely inflated)
223 | 
224 | benchmarking Log (PariPari)/log-40.log
225 | time                 364.4 μs   (359.8 μs .. 369.6 μs)
226 |                      0.998 R²   (0.997 R² .. 0.999 R²)
227 | mean                 362.3 μs   (359.7 μs .. 365.9 μs)
228 | std dev              10.50 μs   (7.760 μs .. 14.60 μs)
229 | variance introduced by outliers: 22% (moderately inflated)
230 | 
231 | benchmarking Log (PariPari, Reporter)/log-40.log
232 | time                 411.5 μs   (404.1 μs .. 421.8 μs)
233 |                      0.984 R²   (0.964 R² .. 0.996 R²)
234 | mean                 425.2 μs   (411.3 μs .. 453.2 μs)
235 | std dev              60.17 μs   (37.11 μs .. 88.14 μs)
236 | variance introduced by outliers: 87% (severely inflated)
237 | 
238 | benchmarking Log (Attoparsec)/log-40.log
239 | time                 368.6 μs   (364.2 μs .. 372.5 μs)
240 |                      0.998 R²   (0.997 R² .. 0.999 R²)
241 | mean                 364.6 μs   (360.9 μs .. 371.3 μs)
242 | std dev              16.87 μs   (10.96 μs .. 27.01 μs)
243 | variance introduced by outliers: 42% (moderately inflated)
244 | 
245 | benchmarking Log (Megaparsec)/log-40.log
246 | time                 412.4 μs   (405.9 μs .. 423.8 μs)
247 |                      0.992 R²   (0.982 R² .. 0.998 R²)
248 | mean                 419.7 μs   (410.9 μs .. 433.7 μs)
249 | std dev              34.86 μs   (21.22 μs .. 51.70 μs)
250 | variance introduced by outliers: 69% (severely inflated)
251 | 
252 | benchmarking JSON (PariPari)/json-40.json
253 | time                 20.33 μs   (20.08 μs .. 20.68 μs)
254 |                      0.998 R²   (0.997 R² .. 1.000 R²)
255 | mean                 20.48 μs   (20.27 μs .. 21.02 μs)
256 | std dev              1.179 μs   (578.2 ns .. 2.199 μs)
257 | variance introduced by outliers: 65% (severely inflated)
258 | 
259 | benchmarking JSON (PariPari, Reporter)/json-40.json
260 | time                 76.16 μs   (74.89 μs .. 78.25 μs)
261 |                      0.994 R²   (0.987 R² .. 0.999 R²)
262 | mean                 76.02 μs   (74.68 μs .. 78.57 μs)
263 | std dev              5.598 μs   (3.438 μs .. 8.326 μs)
264 | variance introduced by outliers: 72% (severely inflated)
265 | 
266 | benchmarking JSON (Attoparsec)/json-40.json
267 | time                 21.67 μs   (21.43 μs .. 21.97 μs)
268 |                      0.999 R²   (0.998 R² .. 0.999 R²)
269 | mean                 21.69 μs   (21.48 μs .. 21.99 μs)
270 | std dev              836.0 ns   (554.7 ns .. 1.371 μs)
271 | variance introduced by outliers: 45% (moderately inflated)
272 | 
273 | benchmarking JSON (Megaparsec)/json-40.json
274 | time                 31.50 μs   (30.84 μs .. 32.42 μs)
275 |                      0.994 R²   (0.989 R² .. 0.998 R²)
276 | mean                 31.45 μs   (30.88 μs .. 32.42 μs)
277 | std dev              2.377 μs   (1.554 μs .. 3.610 μs)
278 | variance introduced by outliers: 75% (severely inflated)
279 | 
280 | benchmarking JSON (PariPari, highlevel)/json-40.json
281 | time                 30.63 μs   (30.08 μs .. 31.24 μs)
282 |                      0.998 R²   (0.996 R² .. 0.999 R²)
283 | mean                 30.69 μs   (30.35 μs .. 31.27 μs)
284 | std dev              1.459 μs   (1.056 μs .. 2.053 μs)
285 | variance introduced by outliers: 54% (severely inflated)
286 | 
287 | benchmarking JSON (PariPari, Reporter, highlevel)/json-40.json
288 | time                 109.8 μs   (107.2 μs .. 113.2 μs)
289 |                      0.995 R²   (0.992 R² .. 0.998 R²)
290 | mean                 110.0 μs   (108.6 μs .. 112.2 μs)
291 | std dev              6.079 μs   (4.499 μs .. 7.572 μs)
292 | variance introduced by outliers: 57% (severely inflated)
293 | 
294 | benchmarking JSON (Attoparsec, highlevel)/json-40.json
295 | time                 33.94 μs   (33.56 μs .. 34.39 μs)
296 |                      0.998 R²   (0.997 R² .. 0.999 R²)
297 | mean                 34.31 μs   (33.87 μs .. 35.74 μs)
298 | std dev              2.511 μs   (896.2 ns .. 5.032 μs)
299 | variance introduced by outliers: 74% (severely inflated)
300 | 
301 | benchmarking JSON (Megaparsec, highlevel)/json-40.json
302 | time                 58.52 μs   (56.70 μs .. 60.51 μs)
303 |                      0.993 R²   (0.990 R² .. 0.998 R²)
304 | mean                 57.58 μs   (56.73 μs .. 58.92 μs)
305 | std dev              3.511 μs   (2.575 μs .. 4.710 μs)
306 | variance introduced by outliers: 64% (severely inflated)
307 | 
308 | Benchmark bench-speed: FINISH
309 | ```
310 | 
311 | ## Thanks
312 | 
313 | * Mark Karpov @mrkkrp - For the `parser-combinators` library,
314 |   which was extracted from Megaparsec and parsers-bench.
315 |   The json example above is adapted from parsers-bench.
316 | 
317 | ## Related projects
318 | 
319 | * [parser-combinators](http://hackage.haskell.org/package/parser-combinators)
320 | * [parsec](http://hackage.haskell.org/package/parsec)
321 | * [megaparsec](http://hackage.haskell.org/package/megaparsec)
322 | * [attoparsec](http://hackage.haskell.org/package/attoparsec)
323 | * [Earley](http://hackage.haskell.org/package/Earley)
324 | * [trifecta](http://hackage.haskell.org/package/trifecta)
325 | * [parsix](http://hackage.haskell.org/package/parsix)
326 | * [uu-parsinglib](http://hackage.haskell.org/package/uu-parsinglib)
327 | 
328 | ## License
329 | 
330 | Released under the MIT License.
331 | 
332 | Copyright (c) 2018 Daniel Mendler
333 | 


--------------------------------------------------------------------------------
/example-data/bad.json:
--------------------------------------------------------------------------------
 1 | {"widget": {
 2 |     "debug": "on",
 3 |     "window": {
 4 |         "title": "Sample Konfabulator Widget",
 5 |         "name": "main_window",
 6 |         "width": 500,
 7 |         "height": 500
 8 |     "image": {
 9 |         "src": "Images/Sun.png",
10 |         "name": "sun1",
11 |         "hOffset": 250,
12 |         "vOffset": 250,
13 |         "alignment": "center"
14 |     },
15 |     "text": {
16 |         "data": "Click Here",
17 |         "size": 36,
18 |         "style": "bold",
19 |         "name": "text1",
20 |         "hOffset": 250,
21 |         "vOffset": 100,
22 |         "alignment": "center",
23 |         "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
24 |     }
25 | }}
26 | 


--------------------------------------------------------------------------------
/example-data/good.json:
--------------------------------------------------------------------------------
 1 | {"widget": {
 2 |     "debug": "on",
 3 |     "window": {
 4 |         "title": "Sample Konfabulator Widget",
 5 |         "name": "main_window",
 6 |         "width": 500,
 7 |         "height": 500
 8 |     },
 9 |     "image": {
10 |         "src": "Images/Sun.png",
11 |         "name": "sun1",
12 |         "hOffset": 250,
13 |         "vOffset": 250,
14 |         "alignment": "center"
15 |     },
16 |     "text": {
17 |         "data": "Click Here",
18 |         "size": 36,
19 |         "style": "bold",
20 |         "name": "text1",
21 |         "hOffset": 250,
22 |         "vOffset": 100,
23 |         "alignment": "center",
24 |         "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
25 |     }
26 | }}
27 | 


--------------------------------------------------------------------------------
/example.hs:
--------------------------------------------------------------------------------
 1 | 
 2 | {-# LANGUAGE ConstraintKinds #-}
 3 | {-# LANGUAGE FlexibleContexts #-}
 4 | {-# LANGUAGE OverloadedStrings #-}
 5 | {-# LANGUAGE Rank2Types #-}
 6 | 
 7 | import Data.Foldable (for_)
 8 | import System.Environment (getArgs)
 9 | import Text.PariPari
10 | import qualified Data.ByteString as B
11 | 
12 | type StringType = B.ByteString
13 | type PMonad p = Parser StringType p
14 | type P a = (forall p. PMonad p => p a)
15 | 
16 | -- {-# SPECIALISE_ALL PMonad p = p ~ Acceptor StringType #-}
17 | -- {-# SPECIALISE_ALL PMonad p = p ~ Reporter StringType #-}
18 | -- {-# SPECIALISE_ALL P = Acceptor StringType #-}
19 | -- {-# SPECIALISE_ALL P = Reporter StringType #-}
20 | 
21 | data Value
22 |   = Object ![(StringType, Value)]
23 |   | Array  ![Value]
24 |   | String !StringType
25 |   | Number !Integer !Integer
26 |   | Bool   !Bool
27 |   | Null
28 |   deriving (Eq, Show)
29 | 
30 | json :: P Value
31 | json = space *> (object <|> array) <?> "json"
32 | 
33 | object :: P Value
34 | object = Object <$> (char '{' *> space *> sepBy pair (space *> char ',' *> space) <* space <* char '}') <?> "object"
35 | 
36 | pair :: P (StringType, Value)
37 | pair = (,) <$> (text <* space) <*> (char ':' *> space *> value)
38 | 
39 | array :: P Value
40 | array = Array <$> (char '[' *> sepBy value (space *> char ',' *> space) <* space <* char ']') <?> "array"
41 | 
42 | value :: P Value
43 | value =
44 |   (String <$> text)
45 |     <|> object
46 |     <|> array
47 |     <|> (Bool False <$ string "false")
48 |     <|> (Bool True  <$ string "true")
49 |     <|> (Null       <$ string "null")
50 |     <|> number
51 | 
52 | text :: P StringType
53 | text = char '"' *> takeCharsWhile (/= '"') <* char '"' <?> "text"
54 | 
55 | number :: P Value
56 | number = label "number" $ do
57 |   neg <- sign
58 |   frac <- fractionDec (pure ())
59 |   pure $ case frac of
60 |            Left n -> Number (neg n) 0
61 |            Right (c, _, e) -> Number (neg c) e
62 | 
63 | space :: P ()
64 | space = skipCharsWhile (\c -> c == ' ' || c == '\n' || c == '\t')
65 | 
66 | main :: IO ()
67 | main = do
68 |   args <- getArgs
69 |   case args of
70 |     [file] -> do
71 |       src <- B.readFile file
72 |       let (result, reports) = runParser json file src
73 |       for_ reports $ putStrLn . showReport
74 |       print result
75 |     _ -> error "Usage: paripari-example test.json"
76 | 


--------------------------------------------------------------------------------
/paripari.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version: 1.12
 2 | name:           paripari
 3 | version:        0.7.0.0
 4 | synopsis:       Parser combinators with fast-path and slower fallback for error reporting
 5 | description:    PariPari offers two parsing strategies. There is a fast Acceptor and a slower Reporter. If the Acceptor fails, the Reporter returns a report about the parsing errors.
 6 | category:       Text
 7 | stability:      experimental
 8 | homepage:       https://github.com/minad/paripari#readme
 9 | bug-reports:    https://github.com/minad/paripari/issues
10 | author:         Daniel Mendler <mail@daniel-mendler.de>
11 | maintainer:     Daniel Mendler <mail@daniel-mendler.de>
12 | copyright:      2018 Daniel Mendler
13 | license:        MIT
14 | license-file:   LICENSE
15 | tested-with:    GHC == 8.4.4, GHC == 8.6.5, GHC == 8.8.3, GHC == 8.10.1
16 | build-type:     Simple
17 | 
18 | source-repository head
19 |   type: git
20 |   location: https://github.com/minad/paripari
21 | 
22 | library
23 |   exposed-modules:
24 |       Text.PariPari
25 |       Text.PariPari.Internal.Acceptor
26 |       Text.PariPari.Internal.Chunk
27 |       Text.PariPari.Internal.Class
28 |       Text.PariPari.Internal.Combinators
29 |       Text.PariPari.Internal.Reporter
30 |       Text.PariPari.Internal.Run
31 |       Text.PariPari.Lens
32 |   other-modules:
33 |       Paths_paripari
34 |   hs-source-dirs:
35 |       src
36 |   ghc-options: -O2 -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances
37 |   build-depends:
38 |       base >=4.8 && <5
39 |     , bytestring >=0.10 && <0.11
40 |     , parser-combinators >=1.0 && <1.3
41 |     , text >=0.11 && <1.3
42 |   default-language: Haskell2010
43 | 
44 | executable paripari-example
45 |   main-is: example.hs
46 |   other-modules:
47 |       Paths_paripari
48 |   ghc-options: -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances
49 |   build-depends:
50 |       base >=4.8 && <5
51 |     , bytestring >=0.10 && <0.11
52 |     , paripari
53 |     , parser-combinators >=1.0 && <1.3
54 |     , text >=0.11 && <1.3
55 |   default-language: Haskell2010
56 | 
57 | executable paripari-specialise-all
58 |   main-is: specialise-all.hs
59 |   other-modules:
60 |       Paths_paripari
61 |   ghc-options: -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances
62 |   build-depends:
63 |       base >=4.8 && <5
64 |     , bytestring >=0.10 && <0.11
65 |     , paripari
66 |     , parser-combinators >=1.0 && <1.3
67 |     , text >=0.11 && <1.3
68 |   default-language: Haskell2010
69 | 
70 | test-suite test
71 |   type: exitcode-stdio-1.0
72 |   main-is: test.hs
73 |   other-modules:
74 |       Paths_paripari
75 |   hs-source-dirs:
76 |       test
77 |   ghc-options: -O0 -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances
78 |   build-depends:
79 |       base >=4.8 && <5
80 |     , bytestring >=0.10 && <0.11
81 |     , paripari
82 |     , parser-combinators >=1.0 && <1.3
83 |     , random
84 |     , tasty
85 |     , tasty-hunit
86 |     , text >=0.11 && <1.3
87 |   default-language: Haskell2010
88 | 


--------------------------------------------------------------------------------
/specialise-all.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ConstraintKinds #-}
  2 | {-# LANGUAGE FlexibleContexts #-}
  3 | {-# LANGUAGE OverloadedStrings #-}
  4 | {-# LANGUAGE Rank2Types #-}
  5 | 
  6 | import Data.Foldable (for_)
  7 | import System.Environment (getArgs)
  8 | import Text.PariPari
  9 | import qualified Data.Char as C
 10 | import qualified Data.List.NonEmpty as NE
 11 | import qualified Data.Text as T
 12 | import qualified Data.Text.IO as T
 13 | 
 14 | type StringType = T.Text
 15 | type PMonad p = Parser StringType p
 16 | type P a = (forall p. PMonad p => p a)
 17 | 
 18 | data Type
 19 |   = TypeName   !StringType
 20 |   | TypeVar    !StringType
 21 |   | TypeApp    !Type !(NE.NonEmpty Type)
 22 |   | TypeEq     !Type !Type
 23 |   | TypeConstr !Type !Type
 24 |   | TypeLam    !Type !Type
 25 |   | TypeTuple  ![Type]
 26 |   | TypeList   !Type
 27 |   deriving (Show, Eq)
 28 | 
 29 | data SourceLine
 30 |   = SpecialiseAll !Type !Type
 31 |   | TypeDecl      !(NE.NonEmpty StringType) !Type
 32 |   | OtherLine     !StringType
 33 |   deriving (Show)
 34 | 
 35 | source :: P [SourceLine]
 36 | source = sepBy sourceLine (char '\n') <* eof
 37 | 
 38 | sourceLine :: P SourceLine
 39 | sourceLine = specialiseAll <|> typeDecl <|> otherLine
 40 | 
 41 | otherLine :: P SourceLine
 42 | otherLine = OtherLine <$> takeCharsWhile (/= '\n')
 43 | 
 44 | specialiseAll :: P SourceLine
 45 | specialiseAll = SpecialiseAll
 46 |   <$> (symbol "{-#" *> symbol "SPECIALISE_ALL" *> type_)
 47 |   <*> (symbol "=" *> type_ <* symbol "#-}")
 48 | 
 49 | identifierAtom :: PMonad p => (Char -> Bool) -> p ()
 50 | identifierAtom f = satisfy f *> skipCharsWhile (\c -> C.isAlphaNum c || c == '_' || c == '\'')
 51 | 
 52 | name :: P StringType
 53 | name = asChunk (sepEndBy (identifierAtom C.isUpper) (char '.') *>
 54 |                 identifierAtom C.isLower) <* space
 55 | 
 56 | typeName :: P StringType
 57 | typeName = asChunk (void $ sepBy1 (identifierAtom C.isUpper) (char '.')) <* space
 58 | 
 59 | symbol :: PMonad p => String -> p StringType
 60 | symbol s = string s <* space
 61 | 
 62 | typeTuple :: P Type
 63 | typeTuple = do
 64 |   ts <- between (symbol "(") (symbol ")") (sepBy type_ (symbol ","))
 65 |   pure $ case ts of
 66 |            []  -> TypeName "()"
 67 |            [t] -> t
 68 |            _   -> TypeTuple ts
 69 | 
 70 | typeAtom :: P Type
 71 | typeAtom =
 72 |   TypeName <$> typeName
 73 |   <|> TypeVar <$> name
 74 |   <|> TypeList <$> between (symbol "[") (symbol "]") type_
 75 |   <|> typeTuple
 76 | 
 77 | typeApp :: P Type
 78 | typeApp = do
 79 |   t <- typeAtom
 80 |   option t $ TypeApp t <$> some typeAtom
 81 | 
 82 | type_ :: P Type
 83 | type_ = do
 84 |   t <- typeApp
 85 |   TypeEq t <$> (symbol "~" *> type_)
 86 |     <|> TypeLam t <$> (symbol "->" *> type_)
 87 |     <|> TypeConstr t <$> (symbol "=>" *> type_)
 88 |     <|> pure t
 89 | 
 90 | typeDecl :: P SourceLine
 91 | typeDecl = TypeDecl <$> sepBy1 name (symbol ",") <*> (symbol "::" *> type_)
 92 | 
 93 | space :: P ()
 94 | space = skipCharsWhile (== ' ')
 95 | 
 96 | showType :: Type -> StringType
 97 | showType (TypeName t)  = t
 98 | showType (TypeVar t)   = t
 99 | showType (TypeApp t a) = "(" <> showType t <> " " <> T.intercalate " " (NE.toList $ fmap showType a) <> ")"
100 | showType (TypeEq a b)  = "(" <> showType a <> " ~ " <> showType b <> ")"
101 | showType (TypeLam a b)  = "(" <> showType a <> " -> " <> showType b <> ")"
102 | showType (TypeConstr a b)  = "(" <> showType a <> " => " <> showType b <> ")"
103 | showType (TypeList t) = "[" <> showType t <> "]"
104 | showType (TypeTuple t) = "(" <> T.intercalate ", " (fmap showType t) <> ")"
105 | 
106 | showSource :: SourceLine -> StringType
107 | showSource (SpecialiseAll from to) = "-- SPECIALISE_ALL " <> showType from <> " = " <> showType to
108 | showSource (TypeDecl      names t) = T.intercalate "," (NE.toList names) <> " :: " <> showType t
109 | showSource (OtherLine     str)     = str
110 | 
111 | substitute :: [(StringType, Type)] -> Type -> Type
112 | substitute vars t@(TypeVar v)
113 |   | Just t' <- lookup v vars = t'
114 |   | otherwise = t
115 | substitute _    t@TypeName{} = t
116 | substitute vars (TypeApp t a) = TypeApp (substitute vars t) (fmap (substitute vars) a)
117 | substitute vars (TypeEq a b) = TypeEq (substitute vars a) (substitute vars b)
118 | substitute vars (TypeConstr a b) = TypeConstr (substitute vars a) (substitute vars b)
119 | substitute vars (TypeLam a b) = TypeLam (substitute vars a) (substitute vars b)
120 | substitute vars (TypeTuple t) = TypeTuple (fmap (substitute vars) t)
121 | substitute vars (TypeList t) = TypeList (substitute vars t)
122 | 
123 | unify :: Type -> Type -> Maybe [(StringType, Type)]
124 | unify (TypeVar v) t =
125 |   Just [(v, t)]
126 | unify (TypeName t1) (TypeName t2)
127 |   | t1 == t2 = Just []
128 | unify (TypeApp t1 a1) (TypeApp t2 a2) | length a1 == length a2 = do
129 |   t <- unify t1 t2
130 |   a <- foldMap (uncurry unify) $ NE.zip a1 a2
131 |   pure $ t <> a
132 | unify (TypeEq a1 b1) (TypeEq a2 b2) = do
133 |   a <- unify a1 a2
134 |   b <- unify b1 b2
135 |   pure $ a <> b
136 | unify (TypeConstr a1 b1) (TypeConstr a2 b2) = do
137 |   a <- unify a1 a2
138 |   b <- unify b1 b2
139 |   pure $ a <> b
140 | unify (TypeLam a1 b1) (TypeLam a2 b2) = do
141 |   a <- unify a1 a2
142 |   b <- unify b1 b2
143 |   pure $ a <> b
144 | unify (TypeTuple t1) (TypeTuple t2) | length t1 == length t2 =
145 |   foldMap (uncurry unify) $ zip t1 t2
146 | unify (TypeList t1) (TypeList t2) =
147 |   unify t1 t2
148 | unify _ _ = Nothing
149 | 
150 | simplify :: Type -> Type
151 | simplify t@TypeVar{} = t
152 | simplify t@TypeName{} = t
153 | simplify (TypeApp t a) = TypeApp (simplify t) (fmap simplify a)
154 | simplify (TypeEq a b) = TypeEq (simplify a) (simplify b)
155 | simplify (TypeLam a b) = TypeLam (simplify a) (simplify b)
156 | simplify (TypeTuple t) = TypeTuple (fmap simplify t)
157 | simplify (TypeList t) = TypeList (simplify t)
158 | simplify (TypeConstr (TypeEq (TypeVar v) t) t') = substitute [(v, t)] t'
159 | simplify (TypeConstr c t) = TypeConstr (TypeTuple otherConstraints) (substitute constraintVars t)
160 |   where eqConstraints = [e | e@(TypeEq TypeVar{} _) <- constraints]
161 |         constraintVars = [(v,x) | TypeEq (TypeVar v) x <- eqConstraints]
162 |         otherConstraints = filter (\x -> all (/= x) eqConstraints) constraints
163 |         constraints = case c of
164 |           TypeTuple xs -> xs
165 |           x -> [x]
166 | 
167 | specialiseType :: Type -> Type -> Type -> Type
168 | specialiseType from to typ
169 |   | Just vars <- unify from typ = substitute vars to
170 |   | otherwise =
171 |       case typ of
172 |         TypeName{}     -> typ
173 |         TypeVar{}      -> typ
174 |         TypeApp t a    -> TypeApp (specialiseType from to t) (fmap (specialiseType from to) a)
175 |         TypeEq a b     -> TypeEq (specialiseType from to a) (specialiseType from to b)
176 |         TypeConstr a b -> TypeConstr (specialiseType from to a) (specialiseType from to b)
177 |         TypeLam a b    -> TypeLam (specialiseType from to a) (specialiseType from to b)
178 |         TypeTuple t    -> TypeTuple (fmap (specialiseType from to) t)
179 |         TypeList t     -> TypeList (specialiseType from to t)
180 | 
181 | specialise :: [(Type, Type)] -> (NE.NonEmpty StringType, Type) -> [StringType]
182 | specialise specs (names, typ) = concatMap go specs
183 |   where go (from, to)
184 |           | typ' <- specialiseType from to typ, typ /= typ' =
185 |               ["{-# SPECIALISE " <> n <> " :: " <> showType (simplify typ') <> " #-}" | n <- NE.toList names]
186 |           | otherwise = []
187 | 
188 | main :: IO ()
189 | main = do
190 |   args <- getArgs
191 |   case args of
192 |     [src, _, dst] -> do
193 |       code <- T.readFile src
194 |       let (result, reports) = runParser source src code
195 |       for_ reports $ putStrLn . showReport
196 |       case result of
197 |         Nothing -> pure ()
198 |         Just ls -> do
199 |           let specialisers = [(from, to) | SpecialiseAll from to <- ls]
200 |               specialisedTypeDecls = concatMap (specialise specialisers) [(n, t) | TypeDecl n t <- ls]
201 |           T.writeFile dst $ T.intercalate "\n" $ map showSource ls <> specialisedTypeDecls
202 |     _ -> error "Usage: paripari-specialise-all src _ dst"
203 | 


--------------------------------------------------------------------------------
/src/Text/PariPari.hs:
--------------------------------------------------------------------------------
 1 | module Text.PariPari (
 2 |   C.Parser(..)
 3 |   , C.Pos(..)
 4 |   , C.Error(..)
 5 |   , C.showError
 6 | 
 7 |   , K.Chunk(showChunk)
 8 | 
 9 |   , U.runParser
10 |   , U.runParserWithOptions
11 | 
12 |   , A.Acceptor
13 |   , A.runAcceptor
14 | 
15 |   , R.Reporter
16 |   , R.Report(..)
17 |   , R.ErrorContext(..)
18 |   , R.ReportOptions(..)
19 |   , R.runReporter
20 |   , R.showReport
21 |   , R.showErrors
22 |   , R.runReporterWithOptions
23 |   , R.defaultReportOptions
24 | 
25 |   , module Text.PariPari.Internal.Combinators
26 | ) where
27 | 
28 | import Text.PariPari.Internal.Combinators
29 | import qualified Text.PariPari.Internal.Acceptor as A
30 | import qualified Text.PariPari.Internal.Chunk as K
31 | import qualified Text.PariPari.Internal.Class as C
32 | import qualified Text.PariPari.Internal.Reporter as R
33 | import qualified Text.PariPari.Internal.Run as U
34 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Acceptor.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE CPP #-}
  3 | {-# LANGUAGE FlexibleInstances #-}
  4 | {-# LANGUAGE MagicHash #-}
  5 | {-# LANGUAGE MultiParamTypeClasses #-}
  6 | {-# LANGUAGE MultiWayIf #-}
  7 | {-# LANGUAGE NamedFieldPuns #-}
  8 | {-# LANGUAGE PatternSynonyms #-}
  9 | {-# LANGUAGE Rank2Types #-}
 10 | {-# LANGUAGE ScopedTypeVariables #-}
 11 | {-# LANGUAGE TypeApplications #-}
 12 | {-# LANGUAGE UnboxedTuples #-}
 13 | module Text.PariPari.Internal.Acceptor (
 14 |   Acceptor(..)
 15 |   , Env(..)
 16 |   , get
 17 |   , local
 18 |   , runAcceptor
 19 | ) where
 20 | 
 21 | import Control.Monad (void)
 22 | import Data.Semigroup as Sem
 23 | import Data.String (IsString(..))
 24 | import GHC.Base hiding (State#)
 25 | import GHC.Word
 26 | import Text.PariPari.Internal.Chunk
 27 | import Text.PariPari.Internal.Class
 28 | import qualified Control.Monad.Fail as Fail
 29 | 
 30 | data Env k = Env
 31 |   { _envBuf     :: !(Buffer k)
 32 |   , _envFile    :: !FilePath
 33 |   , _envRefLine :: Int#
 34 |   , _envRefCol  :: Int#
 35 |   }
 36 | 
 37 | type State# = (# Int#, Int#, Int# #)
 38 | 
 39 | type Result# a = (# Int# | (# State#, a #) #)
 40 | 
 41 | pattern Ok# :: State# -> a -> Result# a
 42 | pattern Ok# s a = (# | (# s, a #) #)
 43 | pattern Err# :: Int# -> Result# a
 44 | pattern Err# o = (# o | #)
 45 | {-# COMPLETE Ok#, Err# #-}
 46 | 
 47 | _stLine :: State# -> Int#
 48 | _stLine (# _, x, _ #) = x
 49 | {-# INLINE _stLine #-}
 50 | 
 51 | _stCol :: State# -> Int#
 52 | _stCol (# _, _, x #) = x
 53 | {-# INLINE _stCol #-}
 54 | 
 55 | _stOff :: State# -> Int#
 56 | _stOff (# x, _, _ #) = x
 57 | {-# INLINE _stOff #-}
 58 | 
 59 | -- | Parser which is optimised for fast parsing. Error reporting
 60 | -- is minimal.
 61 | newtype Acceptor k a = Acceptor
 62 |   { unAcceptor :: Env k -> State# -> Result# a }
 63 | 
 64 | instance (Chunk k, Semigroup a) => Sem.Semigroup (Acceptor k a) where
 65 |   p1 <> p2 = (<>) <$> p1 <*> p2
 66 |   {-# INLINE (<>) #-}
 67 | 
 68 | instance (Chunk k, Semigroup a, Monoid a) => Monoid (Acceptor k a) where
 69 |   mempty = pure mempty
 70 |   {-# INLINE mempty #-}
 71 | 
 72 |   mappend = (<>)
 73 |   {-# INLINE mappend #-}
 74 | 
 75 | instance Functor (Acceptor k) where
 76 |   fmap f p = Acceptor $ \env st -> case unAcceptor p env st of
 77 |     Err# o -> Err# o
 78 |     Ok# st' x -> Ok# st' (f x)
 79 |   {-# INLINE fmap #-}
 80 | 
 81 | instance Chunk k => Applicative (Acceptor k) where
 82 |   pure x = Acceptor $ \_ st -> Ok# st x
 83 |   {-# INLINE pure #-}
 84 | 
 85 |   f <*> a = Acceptor $ \env st -> case unAcceptor f env st of
 86 |     Err# o -> Err# o
 87 |     Ok# s f' -> case unAcceptor a env s of
 88 |       Err# o -> Err# o
 89 |       Ok# s' a' -> Ok# s' (f' a')
 90 |   {-# INLINE (<*>) #-}
 91 | 
 92 |   p1 *> p2 = do
 93 |     void p1
 94 |     p2
 95 |   {-# INLINE (*>) #-}
 96 | 
 97 |   p1 <* p2 = do
 98 |     x <- p1
 99 |     void p2
100 |     pure x
101 |   {-# INLINE (<*) #-}
102 | 
103 | instance Chunk k => Alternative (Acceptor k) where
104 |   empty = Acceptor $ \_ st -> Err# (_stOff st)
105 |   {-# INLINE empty #-}
106 | 
107 |   p1 <|> p2 = Acceptor $ \env st ->
108 |     case unAcceptor p1 env st of
109 |       Ok# st' x -> Ok# st' x
110 |       Err# _ -> unAcceptor p2 env st
111 |   {-# INLINE (<|>) #-}
112 | 
113 | instance Chunk k => MonadPlus (Acceptor k)
114 | 
115 | instance Chunk k => Monad (Acceptor k) where
116 |   p >>= f = Acceptor $ \env st ->
117 |     case unAcceptor p env st of
118 |       Err# o -> Err# o
119 |       Ok# s x -> unAcceptor (f x) env s
120 |   {-# INLINE (>>=) #-}
121 | 
122 | #if !MIN_VERSION_base(4,11,0)
123 |   fail = Fail.fail
124 |   {-# INLINE fail #-}
125 | #endif
126 | 
127 | instance Chunk k => Fail.MonadFail (Acceptor k) where
128 |   fail msg = failWith $ EFail msg
129 |   {-# INLINE fail #-}
130 | 
131 | instance Chunk k => Parser k (Acceptor k) where
132 |   getPos = get $ \_ st -> Pos (I# (_stLine st)) (I# (_stCol st))
133 |   {-# INLINE getPos #-}
134 | 
135 |   getFile = get $ \env _ -> _envFile env
136 |   {-# INLINE getFile #-}
137 | 
138 |   getRefPos = get $ \env _ -> Pos (I# (_envRefLine env)) (I# (_envRefCol env))
139 |   {-# INLINE getRefPos #-}
140 | 
141 |   withRefPos p = local (\st env -> env { _envRefLine = _stLine st, _envRefCol = _stCol st }) p
142 |   {-# INLINE withRefPos #-}
143 | 
144 |   notFollowedBy p = Acceptor $ \env st ->
145 |     case unAcceptor p env st of
146 |       Err# _ -> Ok# st ()
147 |       Ok# _ _ -> Err# (_stOff st)
148 |   {-# INLINE notFollowedBy #-}
149 | 
150 |   lookAhead p = Acceptor $ \env st -> do
151 |     case unAcceptor p env st of
152 |       Err# _ -> Err# (_stOff st)
153 |       Ok# _ x -> Ok# st x
154 |   {-# INLINE lookAhead #-}
155 | 
156 |   failWith _ = Acceptor $ \_ st -> Err# (_stOff st)
157 |   {-# INLINE failWith #-}
158 | 
159 |   eof = Acceptor $ \env st ->
160 |     case indexByte @k (_envBuf env) (_stOff st) `eqWord#` int2Word# 0# of
161 |       1# -> Ok# st ()
162 |       _ -> Err# (_stOff st)
163 |   {-# INLINE eof #-}
164 | 
165 |   label _ p = p
166 |   {-# INLINE label #-}
167 | 
168 |   hidden p = p
169 |   {-# INLINE hidden #-}
170 | 
171 |   recover p _ = p
172 |   {-# INLINE recover #-}
173 | 
174 |   try p = Acceptor $ \env st ->
175 |     case unAcceptor p env st of
176 |       Ok# st' x -> Ok# st' x
177 |       Err# _ -> Err# (_stOff st)
178 |   {-# INLINE try #-}
179 | 
180 |   p1 <!> p2 = Acceptor $ \env st ->
181 |     case unAcceptor p1 env st of
182 |       Ok# st' x -> Ok# st' x
183 |       Err# o | 1# <- o ==# _stOff st -> unAcceptor p2 env st
184 |              | otherwise -> Err# o
185 |   {-# INLINE (<!>) #-}
186 | 
187 |   chunk k = Acceptor $ \env (# stOff, stLine, stCol #) ->
188 |     case matchChunk @k (_envBuf env) stOff k of
189 |       -1# -> Err# stOff
190 |       n -> Ok# (# stOff +# n, stLine, stCol +# n #) k
191 |   {-# INLINE chunk #-}
192 | 
193 |   asChunk p = do
194 |     I# begin' <- get (const (\s -> I# (_stOff s)))
195 |     p
196 |     I# end' <- get (const (\s -> I# (_stOff s)))
197 |     src <- get (\env _ -> _envBuf env)
198 |     pure $ packChunk src begin' (end' -# begin')
199 |   {-# INLINE asChunk #-}
200 | 
201 |   scan f = Acceptor $ \env (# stOff, stLine, stCol #) ->
202 |     case indexChar @k (_envBuf env) stOff of
203 |       (# c, w #)
204 |         | 1# <- c `neChar#` '\0'#, Just r <- f (C# c) ->
205 |           Ok# (# stOff +# w,
206 |                 case c `eqChar#` '\n'# of 1# -> stLine +# 1#; _ -> stLine,
207 |                 case c `eqChar#` '\n'# of 1# -> 1#; _ -> stCol +# 1# #) r
208 |       _ -> Err# stOff
209 |   {-# INLINE scan #-}
210 | 
211 |   -- By inling this combinator, GHC should figure out the `charWidth`
212 |   -- of the character resulting in an optimised decoder.
213 |   char '\0' = error "Character '\\0' cannot be parsed because it is used as sentinel"
214 |   char c@(C# c') =
215 |     Acceptor $ \env (# stOff, stLine, stCol #) ->
216 |     case matchChar @k (_envBuf env) stOff c' of
217 |       -1# -> Err# stOff
218 |       w -> Ok# (# stOff +# w,
219 |                 if c == '\n' then stLine +# 1# else stLine,
220 |                 if c == '\n' then 1# else stCol +# 1# #) c
221 |   {-# INLINE char #-}
222 | 
223 |   asciiScan f = Acceptor $ \env (# stOff, stLine, stCol #) ->
224 |     if | b <- W8# (indexByte @k (_envBuf env) stOff),
225 |          b /= 0,
226 |          b < 128,
227 |          Just x <- f b ->
228 |            Ok# (# stOff +# 1#
229 |                , if b == asc_newline then stLine +# 1# else stLine
230 |                , if b == asc_newline then 1# else stCol +# 1# #) x
231 |        | otherwise ->
232 |            Err# stOff
233 |   {-# INLINE asciiScan #-}
234 | 
235 |   asciiByte 0 = error "Character '\\0' cannot be parsed because it is used as sentinel"
236 |   asciiByte b
237 |     | b >= 128 = error "Not an ASCII character"
238 |     | otherwise = Acceptor $ \env (# stOff, stLine, stCol #) ->
239 |         if W8# (indexByte @k (_envBuf env) stOff) == b then
240 |           Ok# (# stOff +# 1#
241 |               , if b == asc_newline then stLine +# 1# else stLine
242 |               , if b == asc_newline then 1# else stCol +# 1# #) b
243 |         else
244 |           Err# stOff
245 |   {-# INLINE asciiByte #-}
246 | 
247 | instance Chunk k => IsString (Acceptor k k) where
248 |   fromString = string
249 |   {-# INLINE fromString #-}
250 | 
251 | -- | Reader monad, get something from the environment
252 | get :: (Env k -> State# -> a) -> Acceptor k a
253 | get f = Acceptor $ \env st -> Ok# st (f env st)
254 | {-# INLINE get #-}
255 | 
256 | -- | Reader monad, modify environment locally
257 | local :: (State# -> Env k -> Env k) -> Acceptor k a -> Acceptor k a
258 | local f p = Acceptor $ \env st ->
259 |   unAcceptor p (f st env) st
260 | {-# INLINE local #-}
261 | 
262 | -- | Run 'Acceptor' on the given chunk, returning either
263 | -- a simple 'Error' or, if successful, the result.
264 | runAcceptor :: Chunk k => Acceptor k a -> FilePath -> k -> Maybe a
265 | runAcceptor p f k =
266 |   let !(# b, off #) = unpackChunk k
267 |   in case unAcceptor p (initialEnv f b) (# off, 1#, 1# #) of
268 |        Err# _ -> Nothing
269 |        Ok# _ x -> Just x
270 | 
271 | initialEnv :: FilePath -> Buffer k -> Env k
272 | initialEnv _envFile _envBuf = Env
273 |   { _envBuf
274 |   , _envFile
275 |   , _envRefLine = 1#
276 |   , _envRefCol = 1#
277 |   }
278 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Chunk.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE AllowAmbiguousTypes #-}
  2 | {-# LANGUAGE BangPatterns #-}
  3 | {-# LANGUAGE DeriveGeneric #-}
  4 | {-# LANGUAGE FlexibleContexts #-}
  5 | {-# LANGUAGE MagicHash #-}
  6 | {-# LANGUAGE MultiWayIf #-}
  7 | {-# LANGUAGE PatternSynonyms #-}
  8 | {-# LANGUAGE TypeApplications #-}
  9 | {-# LANGUAGE TypeFamilies #-}
 10 | {-# LANGUAGE UnboxedTuples #-}
 11 | module Text.PariPari.Internal.Chunk (
 12 |   Chunk(..)
 13 |   , showByte
 14 |   , showByteString
 15 |   , unsafeAsciiToChar
 16 |   , asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p,
 17 |     asc_minus, asc_plus, asc_point, asc_newline
 18 | ) where
 19 | 
 20 | import Data.Bits (unsafeShiftL, (.|.), (.&.))
 21 | import Data.ByteString (ByteString)
 22 | import Data.Foldable (foldl')
 23 | import Data.String (fromString)
 24 | import Data.Text (Text)
 25 | import GHC.Base
 26 | import GHC.Word
 27 | import GHC.ForeignPtr
 28 | import GHC.Show (showLitChar)
 29 | import Numeric (showHex)
 30 | import qualified Data.ByteString as B
 31 | import qualified Data.ByteString.Internal as B
 32 | import qualified Data.Text.Array as T
 33 | import qualified Data.Text.Encoding as T
 34 | import qualified Data.Text.Internal as T
 35 | 
 36 | class Ord k => Chunk k where
 37 |   type Buffer k
 38 |   matchChunk :: Buffer k -> Int# -> k -> Int# -- Returns -1# if not matched
 39 |   packChunk :: Buffer k -> Int# -> Int# -> k
 40 |   unpackChunk :: k -> (# Buffer k, Int# #)
 41 |   showChunk :: k -> String
 42 |   matchChar :: Buffer k -> Int# -> Char# -> Int# -- Returns -1# if not matched
 43 |   indexChar :: Buffer k -> Int# -> (# Char#, Int# #)
 44 |   indexByte :: Buffer k -> Int# -> Word#
 45 |   stringToChunk :: String -> k
 46 | 
 47 | instance Chunk ByteString where
 48 |   type Buffer ByteString = ForeignPtr Word8
 49 | 
 50 |   matchChunk (ForeignPtr p _) i (B.PS (ForeignPtr p' _) (I# j) (I# n)) = go 0#
 51 |     where go :: Int# -> Int#
 52 |           go k | 1# <- k >=# n = n
 53 |                | w <- indexWord8OffAddr# p (i +# k),
 54 |                  w' <- indexWord8OffAddr# p' (j +# k),
 55 |                  1# <- w `neWord#` int2Word# 0#,
 56 |                  1# <- w `eqWord#` w' = go (k +# 1#)
 57 |                | otherwise = -1#
 58 |   {-# INLINE matchChunk #-}
 59 | 
 60 |   packChunk b i n = B.PS b (I# i) (I# n)
 61 |   {-# INLINE packChunk #-}
 62 | 
 63 |   unpackChunk k =
 64 |     let !(B.PS b (I# i) _) = k <> fromString "\0\0\0" -- sentinel
 65 |     in (# b, i #)
 66 |   {-# INLINE unpackChunk #-}
 67 | 
 68 |   showChunk = showByteString
 69 | 
 70 |   indexByte (ForeignPtr p _) i = indexWord8OffAddr# p i
 71 |   {-# INLINE indexByte #-}
 72 | 
 73 |   matchChar p i m
 74 |     | C# m <= '\x7F' =
 75 |         if | unsafeChr (at p i) == C# m -> 1#
 76 |            | otherwise -> -1#
 77 |     | C# m <= '\x7FF' =
 78 |         if | a1 <- at p i, a2 <- at p (i +# 1#),
 79 |              unsafeChr (((a1 .&. 31) `unsafeShiftL` 6)
 80 |                          .|. (a2 .&. 0x3F)) == C# m -> 2#
 81 |            | otherwise -> -1#
 82 |     | C# m <= '\xFFFF' =
 83 |         if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#),
 84 |              unsafeChr (((a1 .&. 15) `unsafeShiftL` 12)
 85 |                          .|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
 86 |                          .|. (a3 .&. 0x3F)) == C# m -> 3#
 87 |            | otherwise -> -1#
 88 |     | otherwise =
 89 |         if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#),
 90 |              unsafeChr (((a1 .&. 7) `unsafeShiftL` 18)
 91 |                          .|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
 92 |                          .|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
 93 |                          .|. (a4 .&. 0x3F)) == C# m -> 4#
 94 |            | otherwise -> -1#
 95 |   {-# INLINE matchChar #-}
 96 | 
 97 |   -- TODO detect invalid utf-8?
 98 |   indexChar p i
 99 |     | a1 <- at p i,
100 |       a1 <= 0x7F =
101 |       (# unsafeChr# a1, 1# #)
102 |     | a1 <- at p i, a2 <- at p (i +# 1#),
103 |       (a1 .&. 0xE0) == 0xC0,
104 |       (a2 .&. 0xC0) == 0x80 =
105 |       (# unsafeChr# (((a1 .&. 31) `unsafeShiftL` 6)
106 |                      .|. (a2 .&. 0x3F)), 2# #)
107 |     | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#),
108 |       (a1 .&. 0xF0) == 0xE0,
109 |       (a2 .&. 0xC0) == 0x80,
110 |       (a3 .&. 0xC0) == 0x80 =
111 |       (# unsafeChr# (((a1 .&. 15) `unsafeShiftL` 12)
112 |                      .|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
113 |                      .|. (a3 .&. 0x3F)), 3# #)
114 |     | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#),
115 |       (a1 .&. 0xF8) == 0xF0,
116 |       (a2 .&. 0xC0) == 0x80,
117 |       (a3 .&. 0xC0) == 0x80,
118 |       (a4 .&. 0xC0) == 0x80 =
119 |       (# unsafeChr# (((a1 .&. 7) `unsafeShiftL` 18)
120 |                      .|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
121 |                      .|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
122 |                      .|. (a4 .&. 0x3F)), 4# #)
123 |     | otherwise = (# '\0'#, 0# #)
124 |   {-# INLINE indexChar #-}
125 | 
126 |   stringToChunk t = T.encodeUtf8 $ fromString t
127 |   {-# INLINE stringToChunk #-}
128 | 
129 | instance Chunk Text where
130 |   type Buffer Text = T.Array
131 | 
132 |   matchChunk a i (T.Text a' (I# j) (I# n)) = go 0#
133 |     where go :: Int# -> Int#
134 |           go k | 1# <- k >=# n = n
135 |                | w <- T.unsafeIndex a (I# (i +# k)),
136 |                  w' <- T.unsafeIndex a' (I# (j +# k)),
137 |                  w /= 0, w == w' = go (k +# 1#)
138 |                | otherwise = -1#
139 |   {-# INLINE matchChunk #-}
140 | 
141 |   packChunk b i n = T.Text b (I# i) (I# n)
142 |   {-# INLINE packChunk #-}
143 | 
144 |   unpackChunk k =
145 |     let !(T.Text b (I# i) _) = k <> fromString "\0" -- sentinel
146 |     in (# b, i #)
147 |   {-# INLINE unpackChunk #-}
148 | 
149 |   showChunk = show
150 | 
151 |   indexByte a i
152 |     | W16# c <- T.unsafeIndex a (I# i), 1# <- c `leWord#` (int2Word# 0xFF#) = c
153 |     | otherwise = int2Word# 0#
154 |   {-# INLINE indexByte #-}
155 | 
156 |   indexChar a i
157 |     | hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)) =
158 |         if hi < 0xD800 || hi > 0xDFFF then
159 |           (# unsafeChr# (fromIntegral hi), 1# #)
160 |         else
161 |           (# unsafeChr# (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) + (fromIntegral lo - 0xDC00)), 2# #)
162 |   {-# INLINE indexChar #-}
163 | 
164 |   matchChar a i m
165 |     | C# m <= '\xFFFF' =
166 |         if | unsafeChr (fromIntegral (T.unsafeIndex a (I# i))) == C# m -> 1#
167 |            | otherwise -> -1#
168 |     | otherwise =
169 |         if | hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)),
170 |              hi >= 0xD800, hi <= 0xDFFF,
171 |              C# m == unsafeChr (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) +
172 |                                 (fromIntegral lo - 0xDC00)) -> 2#
173 |            | otherwise -> -1#
174 | 
175 |   stringToChunk t = fromString t
176 |   {-# INLINE stringToChunk #-}
177 | 
178 | at :: ForeignPtr Word8 -> Int# -> Int
179 | at p i = fromIntegral $ W8# (indexByte @ByteString p i)
180 | {-# INLINE at #-}
181 | 
182 | asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p,
183 |   asc_minus, asc_plus, asc_point, asc_newline :: Word8
184 | asc_0 = 48
185 | asc_9 = 57
186 | asc_A = 65
187 | asc_E = 69
188 | asc_P = 80
189 | asc_a = 97
190 | asc_e = 101
191 | asc_p = 112
192 | asc_minus = 45
193 | asc_plus = 43
194 | asc_point = 46
195 | asc_newline = 10
196 | 
197 | unsafeAsciiToChar :: Word8 -> Char
198 | unsafeAsciiToChar x = unsafeChr (fromIntegral x)
199 | {-# INLINE unsafeAsciiToChar #-}
200 | 
201 | unsafeChr# :: Int -> Char#
202 | unsafeChr# (I# i) = chr# i
203 | {-# INLINE unsafeChr# #-}
204 | 
205 | byteS :: Word8 -> ShowS
206 | byteS b
207 |   | b < 128 = showLitChar $ unsafeAsciiToChar b
208 |   | otherwise = ("\\x" <>) . showHex b
209 | 
210 | bytesS :: ByteString -> ShowS
211 | bytesS b | B.length b == 1 = byteS $ B.head b
212 |          | otherwise = foldl' ((. byteS) . (.)) id $ B.unpack b
213 | 
214 | showByte :: Word8 -> String
215 | showByte b = ('\'':) . byteS b . ('\'':) $ ""
216 | 
217 | showByteString :: ByteString -> String
218 | showByteString b = ('"':) . bytesS b . ('"':) $ ""
219 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Class.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | {-# LANGUAGE FunctionalDependencies #-}
  3 | module Text.PariPari.Internal.Class (
  4 |   Parser(..)
  5 |   , Alternative(..)
  6 |   , MonadPlus
  7 |   , Pos(..)
  8 |   , Error(..)
  9 |   , showError
 10 |   , expectedEnd
 11 |   , unexpectedEnd
 12 |   , string
 13 | ) where
 14 | 
 15 | import Control.Applicative (Alternative(empty, (<|>)))
 16 | import Control.Monad (MonadPlus(..))
 17 | import Data.List (intercalate)
 18 | import Data.String (IsString)
 19 | import Data.Word (Word8)
 20 | import GHC.Generics (Generic)
 21 | import Text.PariPari.Internal.Chunk
 22 | import qualified Control.Monad.Fail as Fail
 23 | 
 24 | -- | Parsing errors
 25 | data Error
 26 |   = EInvalidUtf8
 27 |   | EExpected         [String]
 28 |   | EUnexpected       String
 29 |   | EFail             String
 30 |   | EIndentNotAligned {-#UNPACK#-}!Int {-#UNPACK#-}!Int
 31 |   | EIndentOverLine   {-#UNPACK#-}!Int {-#UNPACK#-}!Int
 32 |   | ENotEnoughIndent  {-#UNPACK#-}!Int {-#UNPACK#-}!Int
 33 |   deriving (Eq, Ord, Show, Generic)
 34 | 
 35 | -- | Line and column position starting at (1,1)
 36 | data Pos = Pos
 37 |   { _posLine   :: {-#UNPACK#-}!Int
 38 |   , _posCol :: {-#UNPACK#-}!Int
 39 |   } deriving (Eq, Show, Generic)
 40 | 
 41 | infixl 3 <!>
 42 | 
 43 | -- | Parser class, which specifies the necessary
 44 | -- primitives for parsing. All other parser combinators
 45 | -- rely on these primitives.
 46 | class (Fail.MonadFail p, MonadPlus p, Chunk k, IsString (p k)) => Parser k p | p -> k where
 47 |   -- | Get file name associated with current parser
 48 |   getFile :: p FilePath
 49 | 
 50 |   -- | Get current position of the parser
 51 |   getPos :: p Pos
 52 | 
 53 |   -- | Get reference position used for indentation-sensitive parsing
 54 |   getRefPos :: p Pos
 55 | 
 56 |   -- | Update reference position with current position
 57 |   withRefPos :: p a -> p a
 58 | 
 59 |   -- | Parser which succeeds when the given parser fails
 60 |   notFollowedBy :: Show a => p a -> p ()
 61 | 
 62 |   -- | Look ahead and return result of the given parser
 63 |   -- The current position stays the same.
 64 |   lookAhead :: p a -> p a
 65 | 
 66 |   -- | Parser failure with detailled 'Error'
 67 |   failWith :: Error -> p a
 68 | 
 69 |   -- | Parser which succeeds at the end of file
 70 |   eof :: p ()
 71 | 
 72 |   -- | Annotate the given parser with a label used for error reporting.
 73 |   --
 74 |   -- __Note__: This function has zero cost in the 'Acceptor'. You can
 75 |   -- use it to improve the error reports without slowing
 76 |   -- down the fast path of your parser.
 77 |   label :: String -> p a -> p a
 78 | 
 79 |   -- | Hide errors occurring within the given parser
 80 |   -- from the error report. Based on the given
 81 |   -- labels an 'Error' is constructed instead.
 82 |   --
 83 |   -- __Note__: This function has zero cost in the 'Acceptor'. You can
 84 |   -- use it to improve the error reports without slowing
 85 |   -- down the fast path of your parser.
 86 |   hidden :: p a -> p a
 87 | 
 88 |   -- | Reset position if parser fails
 89 |   try :: p a -> p a
 90 | 
 91 |   -- | Alternative which does not backtrack.
 92 |   (<!>) :: p a -> p a -> p a
 93 | 
 94 |   -- | Parse with error recovery.
 95 |   -- If the parser p fails in `recover p r`
 96 |   -- the parser r continues at the position where p failed.
 97 |   -- If the recovering parser r fails too, the whole
 98 |   -- parser fails. The errors reported by the recovering
 99 |   -- parser are ignored in any case.
100 |   -- Error recovery support is only available
101 |   -- in the 'Reporter' instance.
102 |   --
103 |   -- __Note__: This function has zero cost in the 'Acceptor'. You can
104 |   -- use it to improve the error reports without slowing
105 |   -- down the fast path of your parser.
106 |   recover :: p a -> p a -> p a
107 | 
108 |   -- | Parse a chunk of elements. The chunk must not
109 |   -- contain multiple lines, otherwise the position information
110 |   -- will be invalid.
111 |   chunk :: k -> p k
112 | 
113 |   -- | Run the given parser and return the
114 |   -- result as buffer
115 |   asChunk :: p () -> p k
116 | 
117 |   -- | Parse a single character
118 |   --
119 |   -- __Note__: The character '\0' cannot be parsed using this combinator
120 |   -- since it is used as decoding sentinel. Use 'element' instead.
121 |   char :: Char -> p Char
122 | 
123 |   -- | Scan a single character
124 |   --
125 |   -- __Note__: The character '\0' cannot be parsed using this combinator
126 |   -- since it is used as decoding sentinel. Use 'elementScan' instead.
127 |   scan :: (Char -> Maybe a) -> p a
128 | 
129 |   -- | Parse a single character within the ASCII charset
130 |   --
131 |   -- __Note__: The character '\0' cannot be parsed using this combinator
132 |   -- since it is used as decoding sentinel. Use 'element' instead.
133 |   asciiByte :: Word8 -> p Word8
134 | 
135 |   -- | Scan a single character within the ASCII charset
136 |   --
137 |   -- __Note__: The character '\0' cannot be parsed using this combinator
138 |   -- since it is used as decoding sentinel. Use 'elementScan' instead.
139 |   asciiScan :: (Word8 -> Maybe a) -> p a
140 | 
141 | -- | Pretty string representation of 'Error'
142 | showError :: Error -> String
143 | showError EInvalidUtf8             = "Invalid UTF-8 character found"
144 | showError (EExpected tokens)       = "Expected " <> intercalate ", " tokens
145 | showError (EUnexpected token)      = "Unexpected " <> token
146 | showError (EFail msg)              = msg
147 | showError (EIndentNotAligned rc c) = "Invalid alignment, expected column " <> show rc <> " expected, got " <> show c
148 | showError (EIndentOverLine   rl l) = "Indentation over line, expected line " <> show rl <> ", got " <> show l
149 | showError (ENotEnoughIndent  rc c) = "Must be indented deeper than column " <> show rc <> ", got column " <> show c
150 | 
151 | expectedEnd :: Error
152 | expectedEnd = EExpected ["end of file"]
153 | 
154 | unexpectedEnd :: Error
155 | unexpectedEnd = EUnexpected "end of file"
156 | 
157 | -- | Parse a string
158 | string :: Parser k p => String -> p k
159 | string t = chunk (stringToChunk t)
160 | {-# INLINE string #-}
161 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Combinators.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE Rank2Types #-}
  3 | module Text.PariPari.Internal.Combinators (
  4 |   -- * Basic combinators
  5 |   void
  6 |   , (<|>)
  7 |   , empty
  8 | 
  9 |   -- * Control.Monad.Combinators.NonEmpty
 10 |   , ON.some
 11 |   , ON.endBy1
 12 |   , ON.someTill
 13 |   , ON.sepBy1
 14 |   , ON.sepEndBy1
 15 | 
 16 |   -- * Control.Monad.Combinators
 17 |   , O.optional -- dont use Applicative version for efficiency
 18 |   , O.many -- dont use Applicative version for efficiency
 19 |   , O.between
 20 |   , O.choice
 21 |   , O.count
 22 |   , O.count'
 23 |   , O.eitherP
 24 |   , O.endBy
 25 |   , O.manyTill
 26 |   , O.option
 27 |   , O.sepBy
 28 |   , O.sepEndBy
 29 |   , O.skipMany
 30 |   , O.skipSome
 31 |   , O.skipCount
 32 |   , O.skipManyTill
 33 |   , O.skipSomeTill
 34 | 
 35 |   -- * Labels
 36 |   , (<?>)
 37 | 
 38 |   -- * Position
 39 |   , getLine
 40 |   , getCol
 41 |   , withPos
 42 |   , withSpan
 43 | 
 44 |   -- * Indentation
 45 |   , getRefCol
 46 |   , getRefLine
 47 |   , withRefPos
 48 |   , align
 49 |   , indented
 50 |   , line
 51 |   , linefold
 52 | 
 53 |   -- * Char combinators
 54 |   , digitByte
 55 |   , integer
 56 |   , integer'
 57 |   , decimal
 58 |   , octal
 59 |   , hexadecimal
 60 |   , digit
 61 |   , sign
 62 |   , signed
 63 |   , fractionHex
 64 |   , fractionDec
 65 |   , char'
 66 |   , notChar
 67 |   , anyChar
 68 |   , anyAsciiByte
 69 |   , alphaNumChar
 70 |   , digitChar
 71 |   , letterChar
 72 |   , lowerChar
 73 |   , upperChar
 74 |   , symbolChar
 75 |   , categoryChar
 76 |   , punctuationChar
 77 |   , spaceChar
 78 |   , asciiChar
 79 |   , satisfy
 80 |   , asciiSatisfy
 81 |   , skipChars
 82 |   , takeChars
 83 |   , skipCharsWhile
 84 |   , takeCharsWhile
 85 |   , skipCharsWhile1
 86 |   , takeCharsWhile1
 87 |   , scanChars
 88 |   , scanChars1
 89 |   , string
 90 | ) where
 91 | 
 92 | import Control.Applicative (optional)
 93 | import Control.Monad (when)
 94 | import Control.Monad.Combinators (option, skipCount, skipMany)
 95 | import Data.Functor (void)
 96 | import Data.Maybe (fromMaybe)
 97 | import Data.Word (Word8)
 98 | import Prelude hiding (getLine)
 99 | import Text.PariPari.Internal.Chunk
100 | import Text.PariPari.Internal.Class
101 | import qualified Control.Monad.Combinators as O
102 | import qualified Control.Monad.Combinators.NonEmpty as ON
103 | import qualified Data.Char as C
104 | 
105 | type P k a  = (forall p. Parser k p => p a)
106 | 
107 | -- | Infix alias for 'label'
108 | (<?>) :: Parser k p => p a -> String -> p a
109 | (<?>) = flip label
110 | {-# INLINE (<?>) #-}
111 | infix 0 <?>
112 | 
113 | -- | Get line number of the reference position
114 | getRefLine :: P k Int
115 | getRefLine = _posLine <$> getRefPos
116 | {-# INLINE getRefLine #-}
117 | 
118 | -- | Get column number of the reference position
119 | getRefCol :: P k Int
120 | getRefCol = _posCol <$> getRefPos
121 | {-# INLINE getRefCol #-}
122 | 
123 | -- | Get current line number
124 | getLine :: P k Int
125 | getLine = _posLine <$> getPos
126 | {-# INLINE getLine #-}
127 | 
128 | -- | Get current column
129 | getCol :: P k Int
130 | getCol = _posCol <$> getPos
131 | {-# INLINE getCol #-}
132 | 
133 | -- | Decorate the parser result with the current position
134 | withPos :: Parser k p => p a -> p (Pos, a)
135 | withPos p = do
136 |   pos <- getPos
137 |   ret <- p
138 |   pure (pos, ret)
139 | {-# INLINE withPos #-}
140 | 
141 | -- | Decorate the parser result with the position span
142 | withSpan :: Parser k p => p a -> p (Pos, Pos, a)
143 | withSpan p = do
144 |   begin <- getPos
145 |   ret <- p
146 |   end <- getPos
147 |   pure (begin, end, ret)
148 | {-# INLINE withSpan #-}
149 | 
150 | -- | Parser succeeds on the same line as the reference line
151 | line :: P k ()
152 | line = do
153 |   l <- getLine
154 |   rl <- getRefLine
155 |   when (l /= rl) $ failWith $ EIndentOverLine rl l
156 | {-# INLINE line #-}
157 | 
158 | -- | Parser succeeds on the same column as the reference column
159 | align :: P k ()
160 | align = do
161 |   c <- getCol
162 |   rc <- getRefCol
163 |   when (c /= rc) $ failWith $ EIndentNotAligned rc c
164 | {-# INLINE align #-}
165 | 
166 | -- | Parser succeeds for columns greater than the current reference column
167 | indented :: P k ()
168 | indented = do
169 |   c <- getCol
170 |   rc <- getRefCol
171 |   when (c <= rc) $ failWith $ ENotEnoughIndent rc c
172 | {-# INLINE indented #-}
173 | 
174 | -- | Parser succeeds either on the reference line or
175 | -- for columns greater than the current reference column
176 | linefold :: P k ()
177 | linefold = line <|> indented
178 | {-# INLINE linefold #-}
179 | 
180 | -- | Parse a digit byte for the given base.
181 | -- Bases 2 to 36 are supported.
182 | digitByte :: Parser k p => Int -> p Word8
183 | digitByte base = asciiSatisfy (isDigit base)
184 | {-# INLINE digitByte #-}
185 | 
186 | isDigit :: Int -> Word8 -> Bool
187 | isDigit base b
188 |   | base >= 2 && base <= 10 = b >= asc_0 && b <= asc_0 + fromIntegral base - 1
189 |   | base <= 36 = (b >= asc_0 && b <= asc_9)
190 |                  || ((fromIntegral b :: Word) - fromIntegral asc_A) < fromIntegral (base - 10)
191 |                  || ((fromIntegral b :: Word) - fromIntegral asc_a) < fromIntegral (base - 10)
192 |   |otherwise = error "Text.PariPari.Internal.Combinators.isDigit: Bases 2 to 36 are supported"
193 | {-# INLINE isDigit #-}
194 | 
195 | digitToInt :: Int -> Word8 -> Word
196 | digitToInt base b
197 |   | n <- (fromIntegral b :: Word) - fromIntegral asc_0, base <= 10 || n <= 9  = n
198 |   | n <- (fromIntegral b :: Word) - fromIntegral asc_A, n               <= 26 = n + 10
199 |   | n <- (fromIntegral b :: Word) - fromIntegral asc_a                        = n + 10
200 | {-# INLINE digitToInt #-}
201 | 
202 | -- | Parse a single digit of the given base and return its value.
203 | -- Bases 2 to 36 are supported.
204 | digit :: Parser k p => Int -> p Word
205 | digit base = digitToInt base <$> asciiSatisfy (isDigit base)
206 | {-# INLINE digit #-}
207 | 
208 | -- | Parse an integer of the given base.
209 | -- Returns the integer and the number of digits.
210 | -- Bases 2 to 36 are supported.
211 | -- Digits can be separated by separator, e.g. `optional (char '_')`.
212 | -- Signs are not parsed by this combinator.
213 | integer' :: (Num a, Parser k p) => p sep -> Int -> p (a, Int)
214 | integer' sep base = label (integerLabel base) $ do
215 |   d <- digit base
216 |   accum 1 $ fromIntegral d
217 |   where accum !i !n = next i n <|> pure (n, i)
218 |         next !i !n = do
219 |           void $ sep
220 |           d <- digit base
221 |           accum (i + 1) $ n * fromIntegral base + fromIntegral d
222 | {-# INLINE integer' #-}
223 | 
224 | -- | Parse an integer of the given base.
225 | -- Bases 2 to 36 are supported.
226 | -- Digits can be separated by separator, e.g. `optional (char '_')`.
227 | -- Signs are not parsed by this combinator.
228 | integer :: (Num a, Parser k p) => p sep -> Int -> p a
229 | integer sep base = label (integerLabel base) $ do
230 |   d <- digit base
231 |   accum $ fromIntegral d
232 |   where accum !n = next n <|> pure n
233 |         next !n = do
234 |           void $ sep
235 |           d <- digit base
236 |           accum $ n * fromIntegral base + fromIntegral d
237 | {-# INLINE integer #-}
238 | 
239 | integerLabel :: Int -> String
240 | integerLabel 2  = "binary integer"
241 | integerLabel 8  = "octal integer"
242 | integerLabel 10 = "decimal integer"
243 | integerLabel 16 = "hexadecimal integer"
244 | integerLabel b  = "integer of base " <> show b
245 | 
246 | -- | Parses a decimal integer.
247 | -- Signs are not parsed by this combinator.
248 | decimal :: Num a => P k a
249 | decimal = integer (pure ()) 10
250 | {-# INLINE decimal #-}
251 | 
252 | -- | Parses an octal integer.
253 | -- Signs are not parsed by this combinator.
254 | octal :: Num a => P k a
255 | octal = integer (pure ()) 8
256 | {-# INLINE octal #-}
257 | 
258 | -- | Parses a hexadecimal integer.
259 | -- Signs are not parsed by this combinator.
260 | hexadecimal :: Num a => P k a
261 | hexadecimal = integer (pure ()) 16
262 | {-# INLINE hexadecimal #-}
263 | 
264 | -- | Parse plus or minus sign
265 | sign :: (Parser k f, Num a) => f (a -> a)
266 | sign = (negate <$ asciiByte asc_minus) <|> (id <$ optional (asciiByte asc_plus))
267 | {-# INLINE sign #-}
268 | 
269 | -- | Parse a number with a plus or minus sign.
270 | signed :: (Num a, Parser k p) => p a -> p a
271 | signed p = ($) <$> sign <*> p
272 | {-# INLINE signed #-}
273 | 
274 | fractionExp :: (Num a, Parser k p) => p expSep -> p digitSep -> p (Maybe a)
275 | fractionExp expSep digitSep = do
276 |   e <- optional expSep
277 |   case e of
278 |     Nothing{} -> pure Nothing
279 |     Just{} -> Just <$> signed (integer digitSep 10)
280 | {-# INLINE fractionExp #-}
281 | 
282 | -- | Parse a fraction of arbitrary exponent base and mantissa base.
283 | -- 'fractionDec' and 'fractionHex' should be used instead probably.
284 | -- Returns either an integer in 'Left' or a fraction in 'Right'.
285 | -- Signs are not parsed by this combinator.
286 | fraction :: (Num a, Parser k p) => p expSep -> Int -> Int -> p digitSep -> p (Either a (a, Int, a))
287 | fraction expSep expBase mantBasePow digitSep = do
288 |   let mantBase = expBase ^ mantBasePow
289 |   mant <- integer digitSep mantBase
290 |   frac <- optional $ asciiByte asc_point *> option (0, 0) (integer' digitSep mantBase)
291 |   expn <- fractionExp expSep digitSep
292 |   let (fracVal, fracLen) = fromMaybe (0, 0) frac
293 |       expVal = fromMaybe 0 expn
294 |   pure $ case (frac, expn) of
295 |            (Nothing, Nothing) -> Left mant
296 |            _ -> Right ( mant * fromIntegral mantBase ^ fracLen + fracVal
297 |                       , expBase
298 |                       , expVal - fromIntegral (fracLen * mantBasePow))
299 | {-# INLINE fraction #-}
300 | 
301 | -- | Parse a decimal fraction, e.g., 123.456e-78, returning (mantissa, 10, exponent),
302 | -- corresponding to mantissa * 10^exponent.
303 | -- Digits can be separated by separator, e.g. `optional (char '_')`.
304 | -- Signs are not parsed by this combinator.
305 | fractionDec :: (Num a, Parser k p) => p digitSep -> p (Either a (a, Int, a))
306 | fractionDec sep = fraction (asciiSatisfy (\b -> b == asc_E || b == asc_e)) 10 1 sep <?> "fraction"
307 | {-# INLINE fractionDec #-}
308 | 
309 | -- | Parse a hexadecimal fraction, e.g., co.ffeep123, returning (mantissa, 2, exponent),
310 | -- corresponding to mantissa * 2^exponent.
311 | -- Digits can be separated by separator, e.g. `optional (char '_')`.
312 | -- Signs are not parsed by this combinator.
313 | fractionHex :: (Num a, Parser k p) => p digitSep -> p (Either a (a, Int, a))
314 | fractionHex sep = fraction (asciiSatisfy (\b -> b == asc_P || b == asc_p)) 2 4 sep <?> "hexadecimal fraction"
315 | {-# INLINE fractionHex #-}
316 | 
317 | -- | Parse a case-insensitive character
318 | char' :: Parser k p => Char -> p Char
319 | char' x =
320 |   let l = C.toLower x
321 |       u = C.toUpper x
322 |   in satisfy (\c -> c == l || c == u)
323 | {-# INLINE char' #-}
324 | 
325 | -- | Parse a character different from the given one.
326 | notChar :: Parser k p => Char -> p Char
327 | notChar c = satisfy (/= c)
328 | {-# INLINE notChar #-}
329 | 
330 | -- | Parse an arbitrary character.
331 | anyChar :: P k Char
332 | anyChar = satisfy (const True)
333 | {-# INLINE anyChar #-}
334 | 
335 | -- | Parse an arbitrary ASCII byte.
336 | anyAsciiByte :: P k Word8
337 | anyAsciiByte = asciiSatisfy (const True)
338 | {-# INLINE anyAsciiByte #-}
339 | 
340 | -- | Parse an alphanumeric character, including Unicode.
341 | alphaNumChar :: P k Char
342 | alphaNumChar = satisfy C.isAlphaNum <?> "alphanumeric character"
343 | {-# INLINE alphaNumChar #-}
344 | 
345 | -- | Parse a letter character, including Unicode.
346 | letterChar :: P k Char
347 | letterChar = satisfy C.isLetter <?> "letter"
348 | {-# INLINE letterChar #-}
349 | 
350 | -- | Parse a lowercase letter, including Unicode.
351 | lowerChar :: P k Char
352 | lowerChar = satisfy C.isLower <?> "lowercase letter"
353 | {-# INLINE lowerChar #-}
354 | 
355 | -- | Parse a uppercase letter, including Unicode.
356 | upperChar :: P k Char
357 | upperChar = satisfy C.isUpper <?> "uppercase letter"
358 | {-# INLINE upperChar #-}
359 | 
360 | -- | Parse a space character, including Unicode.
361 | spaceChar :: P k Char
362 | spaceChar = satisfy C.isSpace <?> "space"
363 | {-# INLINE spaceChar #-}
364 | 
365 | -- | Parse a symbol character, including Unicode.
366 | symbolChar :: P k Char
367 | symbolChar = satisfy C.isSymbol <?> "symbol"
368 | {-# INLINE symbolChar #-}
369 | 
370 | -- | Parse a punctuation character, including Unicode.
371 | punctuationChar :: P k Char
372 | punctuationChar = satisfy C.isPunctuation <?> "punctuation"
373 | {-# INLINE punctuationChar #-}
374 | 
375 | -- | Parse a digit character of the given base.
376 | -- Bases 2 to 36 are supported.
377 | digitChar :: Parser k p => Int -> p Char
378 | digitChar base = unsafeAsciiToChar <$> digitByte base
379 | {-# INLINE digitChar #-}
380 | 
381 | -- | Parse a character beloning to the ASCII charset (< 128)
382 | asciiChar :: P k Char
383 | asciiChar = unsafeAsciiToChar <$> anyAsciiByte
384 | {-# INLINE asciiChar #-}
385 | 
386 | -- | Parse a character belonging to the given Unicode category
387 | categoryChar :: Parser k p => C.GeneralCategory -> p Char
388 | categoryChar cat = satisfy ((== cat) . C.generalCategory) <?> untitle (show cat)
389 | {-# INLINE categoryChar #-}
390 | 
391 | untitle :: String -> String
392 | untitle []     = []
393 | untitle (x:xs) = C.toLower x : go xs
394 |   where go [] = ""
395 |         go (y:ys) | C.isUpper y = ' ' : C.toLower y : untitle ys
396 |                   | otherwise   = y : ys
397 | 
398 | -- | Skip the next n characters
399 | skipChars :: Parser k p => Int -> p ()
400 | skipChars n = skipCount n anyChar
401 | {-# INLINE skipChars #-}
402 | 
403 | -- | Skip char while predicate is true
404 | skipCharsWhile :: Parser k p => (Char -> Bool) -> p ()
405 | skipCharsWhile f = skipMany (satisfy f)
406 | {-# INLINE skipCharsWhile #-}
407 | 
408 | -- | Skip at least one char while predicate is true
409 | skipCharsWhile1 :: Parser k p => (Char -> Bool) -> p ()
410 | skipCharsWhile1 f = satisfy f *> skipCharsWhile f
411 | {-# INLINE skipCharsWhile1 #-}
412 | 
413 | -- | Take the next n characters and advance the position by n characters
414 | takeChars :: Parser k p => Int -> p k
415 | takeChars n = asChunk (skipChars n) <?> "string of length " <> show n
416 | {-# INLINE takeChars #-}
417 | 
418 | -- | Take chars while predicate is true
419 | takeCharsWhile :: Parser k p => (Char -> Bool) -> p k
420 | takeCharsWhile f = asChunk (skipCharsWhile f)
421 | {-# INLINE takeCharsWhile #-}
422 | 
423 | -- | Take at least one byte while predicate is true
424 | takeCharsWhile1 :: Parser k p => (Char -> Bool) -> p k
425 | takeCharsWhile1 f = asChunk (skipCharsWhile1 f)
426 | {-# INLINE takeCharsWhile1 #-}
427 | 
428 | -- | Parse a single character with the given predicate
429 | satisfy :: Parser k p => (Char -> Bool) -> p Char
430 | satisfy f = scan $ \c -> if f c then Just c else Nothing
431 | {-# INLINE satisfy #-}
432 | 
433 | -- | Parse a single character within the ASCII charset with the given predicate
434 | asciiSatisfy :: Parser k p => (Word8 -> Bool) -> p Word8
435 | asciiSatisfy f = asciiScan $ \b -> if f b then Just b else Nothing
436 | {-# INLINE asciiSatisfy #-}
437 | 
438 | scanChars :: Parser k p => (s -> Char -> Maybe s) -> s -> p s
439 | scanChars f = go
440 |   where go s = (scan (f s) >>= go) <|> pure s
441 | {-# INLINE scanChars #-}
442 | 
443 | scanChars1 :: Parser k p => (s -> Char -> Maybe s) -> s -> p s
444 | scanChars1 f s = scan (f s) >>= scanChars f
445 | {-# INLINE scanChars1 #-}
446 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Reporter.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns #-}
  2 | {-# LANGUAGE CPP #-}
  3 | {-# LANGUAGE DeriveGeneric #-}
  4 | {-# LANGUAGE FlexibleInstances #-}
  5 | {-# LANGUAGE MultiParamTypeClasses #-}
  6 | {-# LANGUAGE MultiWayIf #-}
  7 | {-# LANGUAGE NamedFieldPuns #-}
  8 | {-# LANGUAGE Rank2Types #-}
  9 | {-# LANGUAGE ScopedTypeVariables #-}
 10 | {-# LANGUAGE TypeApplications #-}
 11 | {-# LANGUAGE UnboxedTuples #-}
 12 | {-# LANGUAGE MagicHash #-}
 13 | module Text.PariPari.Internal.Reporter (
 14 |   Reporter(..)
 15 |   , Env(..)
 16 |   , State(..)
 17 |   , local
 18 |   , get
 19 |   , raiseError
 20 |   , mergeErrorState
 21 |   , Report(..)
 22 |   , runReporter
 23 |   , runReporterWithOptions
 24 |   , ErrorContext(..)
 25 |   , showReport
 26 |   , showErrors
 27 |   , ReportOptions(..)
 28 |   , defaultReportOptions
 29 | ) where
 30 | 
 31 | import Control.Monad (void)
 32 | import Data.Function (on)
 33 | import Data.List (intercalate, sort, group, sortOn)
 34 | import Data.Semigroup as Sem
 35 | import Data.String (IsString(..))
 36 | import GHC.Base
 37 | import GHC.Word
 38 | import GHC.Generics (Generic)
 39 | import Text.PariPari.Internal.Chunk
 40 | import Text.PariPari.Internal.Class
 41 | import qualified Control.Monad.Fail as Fail
 42 | import qualified Data.List.NonEmpty as NE
 43 | 
 44 | data ErrorContext = ErrorContext
 45 |   { _ecErrors  :: ![Error]
 46 |   , _ecContext :: ![String]
 47 |   } deriving (Eq, Show, Generic)
 48 | 
 49 | data ReportOptions = ReportOptions
 50 |   { _optMaxContexts         :: {-#UNPACK#-}!Int
 51 |   , _optMaxErrorsPerContext :: {-#UNPACK#-}!Int
 52 |   , _optMaxLabelsPerContext :: {-#UNPACK#-}!Int
 53 |   } deriving (Eq, Show, Generic)
 54 | 
 55 | data Report = Report
 56 |   { _reportFile   :: !FilePath
 57 |   , _reportErrors :: ![ErrorContext]
 58 |   , _reportLine   :: {-#UNPACK#-}!Int
 59 |   , _reportCol    :: {-#UNPACK#-}!Int
 60 |   } deriving (Eq, Show, Generic)
 61 | 
 62 | data Env k = Env
 63 |   { _envBuf       :: !(Buffer k)
 64 |   , _envFile      :: !FilePath
 65 |   , _envOptions   :: !ReportOptions
 66 |   , _envHidden    :: !Bool
 67 |   , _envContext   :: ![String]
 68 |   , _envRefLine   :: Int#
 69 |   , _envRefCol    :: Int#
 70 |   }
 71 | 
 72 | data State = State
 73 |   { _stOff       :: Int#
 74 |   , _stLine      :: Int#
 75 |   , _stCol       :: Int#
 76 |   , _stErrOff    :: Int#
 77 |   , _stErrLine   :: Int#
 78 |   , _stErrCol    :: Int#
 79 |   , _stErrors    :: ![ErrorContext]
 80 |   , _stReports   :: ![Report]
 81 |   }
 82 | 
 83 | -- | Parser which is optimised for good error reports.
 84 | -- Performance is secondary, since the 'Reporter' is used
 85 | -- as a fallback to the 'Acceptor'.
 86 | newtype Reporter k a = Reporter
 87 |   { unReporter :: forall b. Env k -> State
 88 |                -> (a     -> State -> b)
 89 |                -> (State -> b)
 90 |                -> b
 91 |   }
 92 | 
 93 | instance (Chunk k, Semigroup a) => Sem.Semigroup (Reporter k a) where
 94 |   p1 <> p2 = (<>) <$> p1 <*> p2
 95 |   {-# INLINE (<>) #-}
 96 | 
 97 | instance (Chunk k, Semigroup a, Monoid a) => Monoid (Reporter k a) where
 98 |   mempty = pure mempty
 99 |   {-# INLINE mempty #-}
100 | 
101 |   mappend = (<>)
102 |   {-# INLINE mappend #-}
103 | 
104 | instance Chunk k => Functor (Reporter k) where
105 |   fmap f p = Reporter $ \env st ok err ->
106 |     unReporter p env st (ok . f) err
107 |   {-# INLINE fmap #-}
108 | 
109 | instance Chunk k => Applicative (Reporter k) where
110 |   pure x = Reporter $ \_ st ok _ -> ok x st
111 |   {-# INLINE pure #-}
112 | 
113 |   f <*> a = Reporter $ \env st ok err ->
114 |     let ok1 f' s =
115 |           let ok2 a' s' = ok (f' a') s'
116 |           in unReporter a env s ok2 err
117 |     in unReporter f env st ok1 err
118 |   {-# INLINE (<*>) #-}
119 | 
120 |   p1 *> p2 = do
121 |     void p1
122 |     p2
123 |   {-# INLINE (*>) #-}
124 | 
125 |   p1 <* p2 = do
126 |     x <- p1
127 |     void p2
128 |     pure x
129 |   {-# INLINE (<*) #-}
130 | 
131 | instance Chunk k => Alternative (Reporter k) where
132 |   empty = Reporter $ \_ st _ err -> err st
133 |   {-# INLINE empty #-}
134 | 
135 |   p1 <|> p2 = Reporter $ \env st ok err ->
136 |     let err' s = unReporter p2 env (mergeErrorState env st s) ok err
137 |     in unReporter p1 env st ok err'
138 |   {-# INLINE (<|>) #-}
139 | 
140 | instance Chunk k => MonadPlus (Reporter k)
141 | 
142 | instance Chunk k => Monad (Reporter k) where
143 |   p >>= f = Reporter $ \env st ok err ->
144 |     let ok' x s = unReporter (f x) env s ok err
145 |     in unReporter p env st ok' err
146 |   {-# INLINE (>>=) #-}
147 | 
148 | #if !MIN_VERSION_base(4,11,0)
149 |   fail = Fail.fail
150 |   {-# INLINE fail #-}
151 | #endif
152 | 
153 | instance Chunk k => Fail.MonadFail (Reporter k) where
154 |   fail msg = failWith $ EFail msg
155 |   {-# INLINE fail #-}
156 | 
157 | instance Chunk k => Parser k (Reporter k) where
158 |   getPos = get $ \_ st -> Pos (I# (_stLine st)) (I# (_stCol st))
159 |   {-# INLINE getPos #-}
160 | 
161 |   getFile = get $ \env _ -> _envFile env
162 |   {-# INLINE getFile #-}
163 | 
164 |   getRefPos = get $ \env _ -> Pos (I# (_envRefLine env)) (I# (_envRefCol env))
165 |   {-# INLINE getRefPos #-}
166 | 
167 |   withRefPos p = local (\st env -> env { _envRefLine = _stLine st, _envRefCol = _stCol st }) p
168 |   {-# INLINE withRefPos #-}
169 | 
170 |   label l p = local (const $ addLabel l) p
171 |   {-# INLINE label #-}
172 | 
173 |   hidden p = local (const $ \env -> env { _envHidden = True }) p
174 |   {-# INLINE hidden #-}
175 | 
176 |   try p = Reporter $ \env st ok err ->
177 |     let err' _ = err st
178 |     in unReporter p env st ok err'
179 |   {-# INLINE try #-}
180 | 
181 |   p1 <!> p2 = Reporter $ \env st ok err ->
182 |     let err' s
183 |           | 1# <- _stOff s ==# _stOff st = unReporter p2 env (mergeErrorState env st s) ok err
184 |           | otherwise = err s
185 |     in unReporter p1 env st ok err'
186 |   {-# INLINE (<!>) #-}
187 | 
188 |   notFollowedBy p = Reporter $ \env st ok err ->
189 |     let ok' x _ = raiseError env st err $ EUnexpected $ show x
190 |         err' _ = ok () st
191 |     in unReporter p env st ok' err'
192 |   {-# INLINE notFollowedBy #-}
193 | 
194 |   lookAhead p = Reporter $ \env st ok err ->
195 |     let ok' x _ = ok x st
196 |     in unReporter p env st ok' err
197 |   {-# INLINE lookAhead #-}
198 | 
199 |   failWith e = Reporter $ \env st _ err -> raiseError env st err e
200 |   {-# INLINE failWith #-}
201 | 
202 |   eof = Reporter $ \env st ok err ->
203 |     case indexByte @k (_envBuf env) (_stOff st) `eqWord#` int2Word# 0# of
204 |       1# -> ok () st
205 |       _ -> raiseError env st err expectedEnd
206 |   {-# INLINE eof #-}
207 | 
208 |   recover p r = Reporter $ \env st ok err ->
209 |     let err1 s =
210 |           let err2 _ = err s
211 |           in unReporter r env (addReport env s) ok err2
212 |     in unReporter p env st ok err1
213 |   {-# INLINE recover #-}
214 | 
215 |   chunk k = Reporter $ \env st@State{_stOff,_stCol} ok err ->
216 |     case matchChunk @k (_envBuf env) _stOff k of
217 |       -1# -> raiseError env st err $ EExpected [showChunk @k k]
218 |       n -> ok k st { _stOff = _stOff +# n, _stCol = _stCol +# n }
219 |   {-# INLINE chunk #-}
220 | 
221 |   asChunk p = do
222 |     I# begin' <- get (const (\s -> I# (_stOff s)))
223 |     p
224 |     I# end' <- get (const (\s -> I# (_stOff s)))
225 |     src <- get (\env _ -> _envBuf env)
226 |     pure $ packChunk src begin' (end' -# begin')
227 |   {-# INLINE asChunk #-}
228 | 
229 |   scan f = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err ->
230 |     case indexChar @k (_envBuf env) _stOff of
231 |       (# '\0'#, _ #) -> raiseError env st err unexpectedEnd
232 |       (# c, w #) ->
233 |         case f (C# c) of
234 |           Just r ->
235 |             ok r st { _stOff = _stOff +# w
236 |                     , _stLine = case c `eqChar#` '\n'# of 1# -> _stLine +# 1#; _ -> _stLine
237 |                     , _stCol = case c `eqChar#` '\n'# of 1# -> 1#; _ -> _stCol +# 1#
238 |                     }
239 |           Nothing -> raiseError env st err $ EUnexpected $ show (C# c)
240 |   {-# INLINE scan #-}
241 | 
242 |   -- By inling this combinator, GHC should figure out the `charWidth`
243 |   -- of the character resulting in an optimised decoder.
244 |   char '\0' = error "Character '\\0' cannot be parsed because it is used as sentinel"
245 |   char c@(C# c') =
246 |     Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err ->
247 |         case matchChar @k (_envBuf env) _stOff c' of
248 |           -1# -> raiseError env st err $ EExpected [show c]
249 |           w -> ok c st
250 |             { _stOff = _stOff +# w
251 |             , _stLine = if c == '\n' then _stLine +# 1# else _stLine
252 |             , _stCol = if c == '\n' then 1# else _stCol +# 1#
253 |             }
254 |   {-# INLINE char #-}
255 | 
256 |   asciiScan f = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err ->
257 |     let b = W8# (indexByte @k (_envBuf env) _stOff)
258 |     in if | b /= 0,
259 |             b < 128,
260 |             Just x <- f b ->
261 |               ok x st
262 |               { _stOff = _stOff +# 1#
263 |               , _stLine = if b == asc_newline then _stLine +# 1# else _stLine
264 |               , _stCol = if b == asc_newline then 1# else _stCol +# 1#
265 |               }
266 |           | otherwise ->
267 |               raiseError env st err $ EUnexpected $ showByte b
268 |   {-# INLINE asciiScan #-}
269 | 
270 |   asciiByte 0 = error "Character '\\0' cannot be parsed because it is used as sentinel"
271 |   asciiByte b
272 |     | b >= 128 = error "Not an ASCII character"
273 |     | otherwise = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err ->
274 |         if W8# (indexByte @k (_envBuf env) _stOff) == b then
275 |           ok b st
276 |           { _stOff = _stOff +# 1#
277 |           , _stLine = if b == asc_newline then _stLine +# 1# else _stLine
278 |           , _stCol = if b == asc_newline then 1# else _stCol +# 1#
279 |           }
280 |         else
281 |           raiseError env st err $ EExpected [showByte b]
282 |   {-# INLINE asciiByte #-}
283 | 
284 | instance Chunk k => IsString (Reporter k k) where
285 |   fromString = string
286 |   {-# INLINE fromString #-}
287 | 
288 | raiseError :: Env k -> State -> (State -> b) -> Error -> b
289 | raiseError env st err e = err $ addError env st e
290 | {-# INLINE raiseError #-}
291 | 
292 | -- | Reader monad, modify environment locally
293 | local :: (State -> Env k -> Env k) -> Reporter k a -> Reporter k a
294 | local f p = Reporter $ \env st ok err ->
295 |   unReporter p (f st env) st ok err
296 | {-# INLINE local #-}
297 | 
298 | -- | Reader monad, get something from the environment
299 | get :: (Env k -> State -> a) -> Reporter k a
300 | get f = Reporter $ \env st ok _ -> ok (f env st) st
301 | {-# INLINE get #-}
302 | 
303 | addLabel :: String -> Env k -> Env k
304 | addLabel l env = case _envContext env of
305 |   (l':_) | l == l' -> env
306 |   ls               -> env { _envContext = take (_optMaxLabelsPerContext._envOptions $ env) $ l : ls }
307 | {-# INLINE addLabel #-}
308 | 
309 | -- | Add parser error to the list of errors
310 | -- which are kept in the parser state.
311 | -- Errors of lower priority and at an earlier position.
312 | -- Furthermore the error is merged with existing errors if possible.
313 | addError :: Env k -> State -> Error -> State
314 | addError env st e
315 |   | 1# <- _stOff st ># _stErrOff st,
316 |     Just e' <- mkError env e =
317 |       st { _stErrors    = [e']
318 |          , _stErrOff    = _stOff st
319 |          , _stErrLine   = _stLine st
320 |          , _stErrCol = _stCol st
321 |          }
322 |   | otherwise = st
323 | {-# INLINE addError #-}
324 | 
325 | mkError :: Env k -> Error -> Maybe ErrorContext
326 | mkError env e
327 |   | _envHidden env, (l:ls) <- _envContext env = Just $ ErrorContext [EExpected [l]] ls
328 |   | _envHidden env = Nothing
329 |   | otherwise = Just $ ErrorContext [e] $ _envContext env
330 | {-# INLINE mkError #-}
331 | 
332 | -- | Merge errors of two states, used when backtracking
333 | mergeErrorState :: Env k -> State -> State -> State
334 | mergeErrorState env s s'
335 |   | 1# <- _stErrOff s' ># _stErrOff s =
336 |       s { _stErrors    = _stErrors s'
337 |         , _stErrOff    = _stErrOff s'
338 |         , _stErrLine   = _stErrLine s'
339 |         , _stErrCol    = _stErrCol s'
340 |         }
341 |   | 1# <- _stErrOff s' ==# _stErrOff s =
342 |       s { _stErrors = shrinkErrors env $ _stErrors s' <> _stErrors s }
343 |   | otherwise = s
344 | {-# INLINE mergeErrorState #-}
345 | 
346 | shrinkErrors :: Env k -> [ErrorContext] -> [ErrorContext]
347 | shrinkErrors env = take (_optMaxContexts._envOptions $ env) . map (mergeErrorContexts env) . NE.groupBy ((==) `on` _ecContext) . sortOn _ecContext
348 | 
349 | -- | Shrink error context by deleting duplicates
350 | -- and merging errors if possible.
351 | mergeErrorContexts :: Env k -> NonEmpty ErrorContext -> ErrorContext
352 | mergeErrorContexts env es@(ErrorContext{_ecContext}:| _) = ErrorContext
353 |   { _ecErrors = take (_optMaxErrorsPerContext._envOptions $ env) $ nubSort $ mergeEExpected $ concatMap _ecErrors $ NE.toList es
354 |   , _ecContext = _ecContext
355 |   }
356 | 
357 | mergeEExpected :: [Error] -> [Error]
358 | mergeEExpected es = [EExpected $ nubSort expects | not (null expects)] <> filter (null . asEExpected) es
359 |   where expects = concatMap asEExpected es
360 | 
361 | nubSort :: Ord a => [a] -> [a]
362 | nubSort = map head . group . sort
363 | 
364 | asEExpected :: Error -> [String]
365 | asEExpected (EExpected s) = s
366 | asEExpected _ = []
367 | 
368 | defaultReportOptions :: ReportOptions
369 | defaultReportOptions = ReportOptions
370 |   { _optMaxContexts         = 20
371 |   , _optMaxErrorsPerContext = 20
372 |   , _optMaxLabelsPerContext = 5
373 |   }
374 | 
375 | -- | Run 'Reporter' with additional 'ReportOptions'.
376 | runReporterWithOptions :: Chunk k => ReportOptions -> Reporter k a -> FilePath -> k -> (Maybe a, [Report])
377 | runReporterWithOptions o p f k =
378 |   let !(# b, off #) = unpackChunk k
379 |       env = initialEnv o f b
380 |       ok x s = (Just x, reverse $ _stReports s)
381 |       err s = (Nothing, reverse $ _stReports $ addReport env s)
382 |   in unReporter p env (initialState off) ok err
383 | 
384 | -- | Run 'Reporter' on the given chunk, returning the result
385 | -- if successful and reports from error recoveries.
386 | -- In the case of an error, 'Nothing' is returned and the 'Report' list
387 | -- is non-empty.
388 | runReporter :: Chunk k => Reporter k a -> FilePath -> k -> (Maybe a, [Report])
389 | runReporter = runReporterWithOptions defaultReportOptions
390 | 
391 | addReport :: Env k -> State -> State
392 | addReport e s = s { _stReports = Report { _reportFile = _envFile e
393 |                                         , _reportErrors = _stErrors s
394 |                                         , _reportLine = I# (_stErrLine s)
395 |                                         , _reportCol = I# (_stErrCol s) } : _stReports s }
396 | 
397 | initialEnv :: ReportOptions -> FilePath -> Buffer k -> Env k
398 | initialEnv _envOptions _envFile _envBuf = Env
399 |   { _envFile
400 |   , _envBuf
401 |   , _envOptions
402 |   , _envContext   = []
403 |   , _envHidden    = False
404 |   , _envRefLine   = 1#
405 |   , _envRefCol    = 1#
406 |   }
407 | 
408 | initialState :: Int# -> State
409 | initialState _stOff = State
410 |   { _stOff
411 |   , _stLine      = 1#
412 |   , _stCol       = 1#
413 |   , _stErrOff    = 0#
414 |   , _stErrLine   = 0#
415 |   , _stErrCol    = 0#
416 |   , _stErrors    = []
417 |   , _stReports   = []
418 |   }
419 | 
420 | -- | Pretty string representation of 'Report'.
421 | showReport :: Report -> String
422 | showReport r =
423 |   "Parser errors at " <> _reportFile r
424 |   <> ", line " <> show (_reportLine r)
425 |   <> ", column " <> show (_reportCol r)
426 |   <> "\n\n" <> showErrors (_reportErrors r)
427 | 
428 | -- | Pretty string representation of '[ErrorContext]'.
429 | showErrors :: [ErrorContext] -> String
430 | showErrors [] = "No errors"
431 | showErrors es = intercalate "\n" $ map showErrorContext es
432 | 
433 | showErrorContext :: ErrorContext -> String
434 | showErrorContext ec =
435 |   intercalate ", " (map showError $ _ecErrors ec)
436 |   <> showContext (_ecContext ec) <> "."
437 | 
438 | showContext :: [String] -> String
439 | showContext [] = ""
440 | showContext xs = " in context of " <> intercalate ", " xs
441 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Internal/Run.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE Rank2Types #-}
 2 | module Text.PariPari.Internal.Run (
 3 |   runParser
 4 |   , runParserWithOptions
 5 | ) where
 6 | 
 7 | import Text.PariPari.Internal.Acceptor
 8 | import Text.PariPari.Internal.Class
 9 | import Text.PariPari.Internal.Chunk
10 | import Text.PariPari.Internal.Reporter
11 | 
12 | -- | Rsun fast 'Acceptor' and slower 'Reporter' on the given  sequentially.
13 | -- The 'FilePath' is used for error reporting.
14 | -- When the acceptor does not return successfully, the result from the reporter
15 | -- is awaited.
16 | runParser :: Chunk k => (forall p. Parser k p => p a) -> FilePath -> k -> (Maybe a, [Report])
17 | runParser = runParserWithOptions defaultReportOptions
18 | {-# INLINE runParser #-}
19 | 
20 | -- | Run parsers **sequentially** with additional 'ReportOptions'.
21 | runParserWithOptions :: Chunk k => ReportOptions -> (forall p. Parser k p => p a) -> FilePath -> k -> (Maybe a, [Report])
22 | runParserWithOptions o p f b =
23 |   let a = runAcceptor p f b
24 |       r = runReporterWithOptions o p f b
25 |   in case a of
26 |        Nothing  -> r
27 |        Just x -> (Just x, [])
28 | {-# INLINE runParserWithOptions #-}
29 | 


--------------------------------------------------------------------------------
/src/Text/PariPari/Lens.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE Rank2Types #-}
 2 | module Text.PariPari.Lens (
 3 |   posLine
 4 |   , posCol
 5 |   , reportLine
 6 |   , reportCol
 7 |   , reportFile
 8 |   , reportErrors
 9 |   , ecErrors
10 |   , ecContext
11 |   , optMaxContexts
12 |   , optMaxErrorsPerContext
13 |   , optMaxLabelsPerContext
14 | ) where
15 | 
16 | import Text.PariPari.Internal.Class
17 | import Text.PariPari.Internal.Reporter
18 | 
19 | type Lens a b = forall f . Functor f => (b -> f b) -> (a -> f a)
20 | 
21 | posLine :: Lens Pos Int
22 | posLine k p = fmap (\x -> p { _posLine = x }) (k (_posLine p))
23 | {-# INLINE posLine #-}
24 | 
25 | posCol :: Lens Pos Int
26 | posCol k p = fmap (\x -> p { _posCol = x }) (k (_posCol p))
27 | {-# INLINE posCol #-}
28 | 
29 | reportLine :: Lens Report Int
30 | reportLine k r = fmap (\x -> r { _reportLine = x }) (k (_reportLine r))
31 | {-# INLINE reportLine #-}
32 | 
33 | reportCol :: Lens Report Int
34 | reportCol k r = fmap (\x -> r { _reportCol = x }) (k (_reportCol r))
35 | {-# INLINE reportCol #-}
36 | 
37 | reportFile :: Lens Report FilePath
38 | reportFile k r = fmap (\x -> r { _reportFile = x }) (k (_reportFile r))
39 | {-# INLINE reportFile #-}
40 | 
41 | reportErrors :: Lens Report [ErrorContext]
42 | reportErrors k r = fmap (\x -> r { _reportErrors = x }) (k (_reportErrors r))
43 | {-# INLINE reportErrors #-}
44 | 
45 | ecErrors :: Lens ErrorContext [Error]
46 | ecErrors k e = fmap (\x -> e { _ecErrors = x }) (k (_ecErrors e))
47 | {-# INLINE ecErrors #-}
48 | 
49 | ecContext :: Lens ErrorContext [String]
50 | ecContext k e = fmap (\x -> e { _ecContext = x }) (k (_ecContext e))
51 | {-# INLINE ecContext #-}
52 | 
53 | optMaxContexts :: Lens ReportOptions Int
54 | optMaxContexts k o = fmap (\x -> o { _optMaxContexts = x }) (k (_optMaxContexts o))
55 | {-# INLINE optMaxContexts #-}
56 | 
57 | optMaxErrorsPerContext :: Lens ReportOptions Int
58 | optMaxErrorsPerContext k o = fmap (\x -> o { _optMaxErrorsPerContext = x }) (k (_optMaxErrorsPerContext o))
59 | {-# INLINE optMaxErrorsPerContext #-}
60 | 
61 | optMaxLabelsPerContext :: Lens ReportOptions Int
62 | optMaxLabelsPerContext k o = fmap (\x -> o { _optMaxLabelsPerContext = x }) (k (_optMaxLabelsPerContext o))
63 | {-# INLINE optMaxLabelsPerContext #-}
64 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | packages:
2 | - .
3 | resolver: nightly-2020-05-26
4 | 


--------------------------------------------------------------------------------
/test/test.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE RankNTypes #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE TypeApplications #-}
  5 | {-# LANGUAGE FlexibleContexts #-}
  6 | module Main (main) where
  7 | 
  8 | import Control.Monad (replicateM, replicateM_)
  9 | import Data.ByteString (ByteString)
 10 | import Data.Either (isLeft)
 11 | import Data.Text (Text)
 12 | import Prelude hiding (getLine)
 13 | import System.Random
 14 | import Test.Tasty
 15 | import Test.Tasty.HUnit
 16 | import Text.PariPari
 17 | import Text.PariPari.Internal.Chunk (stringToChunk, asc_a, asc_0, asc_9)
 18 | import qualified Data.Char as C
 19 | import qualified Data.List.NonEmpty as NE
 20 | import Data.String (IsString(..))
 21 | 
 22 | runAcceptor' :: Chunk k => Acceptor k a -> FilePath -> k -> Either Error a
 23 | runAcceptor' a f k = maybe (Left $ EFail "Nothing") Right $ runAcceptor a f k
 24 | 
 25 | main :: IO ()
 26 | main = defaultMain tests
 27 | 
 28 | randomTries :: Int
 29 | randomTries = 1000
 30 | 
 31 | randomStringLen :: Int
 32 | randomStringLen = 1000
 33 | 
 34 | runReporterEither :: Chunk k => Reporter k a -> FilePath -> k -> Either [Report] a
 35 | runReporterEither p f k = case runReporter p f k of
 36 |   (Just a, []) -> Right a
 37 |   (_,      r)  -> Left r
 38 | 
 39 | -- Only generate valid Unicode characters
 40 | randomChar :: IO Char
 41 | randomChar = do
 42 |   c <- randomIO
 43 |   let n = C.ord c
 44 |   if n == 0 || (n >= 0xD800 && n <= 0xDFFF) || n > 0x10FFFF then
 45 |     randomChar
 46 |   else
 47 |     pure c
 48 | 
 49 | randomString :: IO String
 50 | randomString = do
 51 |   n <- randomRIO (1, randomStringLen)
 52 |   replicateM n randomChar
 53 | 
 54 | randomAsciiString :: IO String
 55 | randomAsciiString = do
 56 |   n <- randomRIO (1, randomStringLen)
 57 |   replicateM n (randomRIO (C.chr 1, C.chr 127))
 58 | 
 59 | tests :: TestTree
 60 | tests = testGroup "Tests"
 61 |   [ testGroup "Acceptor"
 62 |     [ testGroup "Text"       $ parserTests @Text       runAcceptor'
 63 |     , testGroup "ByteString" $ parserTests @ByteString runAcceptor'
 64 |     ]
 65 | 
 66 |   , testGroup "Reporter"
 67 |     [ testGroup "Text"       $ parserTests @Text       runReporterEither
 68 |     , testGroup "ByteString" $ parserTests @ByteString runReporterEither
 69 |     ]
 70 | 
 71 |   , testGroup "Reporter specific" reporterTests
 72 |   ]
 73 | 
 74 | parserTests :: forall k p e. (Parser k p, Chunk k, IsString k, Eq e, Show e, Show k)
 75 |           => (forall a. p a -> FilePath -> k -> Either e a) -> [TestTree]
 76 | parserTests run =
 77 |   [ testGroup "ChunkParser"
 78 |     [ testCase "getFile" $ do
 79 |         ok getFile "" "filename"
 80 | 
 81 |     , testCase "getPos" $ do
 82 |         ok getPos "" (Pos 1 1)
 83 |         ok (char 'a' *> getPos) "abc" (Pos 1 2)
 84 |         ok (char 'a' *> char '\n' *> getPos) "a\nb" (Pos 2 1)
 85 |         ok (chunk "a\n" *> getPos) "a\nb" (Pos 1 3) -- chunk must not contain newlines!
 86 |         ok (char 'a' *> char '\n' *> char 'b' *> getPos) "a\nb" (Pos 2 2)
 87 | 
 88 |     , testCase "getRefPos" $ do
 89 |         ok getRefPos "" (Pos 1 1)
 90 |         ok (char 'a' *> getRefPos) "abc" (Pos 1 1)
 91 |         ok (char 'a' *> char '\n' *> withRefPos getRefPos) "a\nb" (Pos 2 1)
 92 |         ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefPos) "a\nb" (Pos 2 2)
 93 |         ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefPos)) "a\nb" (Pos 2 1)
 94 | 
 95 |     , testCase "notFollowedBy" $ do
 96 |         ok (char 'a' <* notFollowedBy (char 'c')) "abc" 'a'
 97 |         err (char 'a' <* notFollowedBy (char 'b')) "abc"
 98 |         ok (char 'a' *> notFollowedBy (chunk "bd") *> char 'b') "abc" 'b'
 99 |         err (char 'a' *> notFollowedBy (chunk "bc")) "abc"
100 |         ok (char 'a' *> notFollowedBy (char 'c') *> getPos) "abc" (Pos 1 2)
101 | 
102 |     , testCase "lookAhead" $ do
103 |         ok (lookAhead (char 'a')) "abc" 'a'
104 |         ok (lookAhead (char 'a') *> getPos) "abc" (Pos 1 1)
105 |         ok (lookAhead (chunk "ab") *> getPos) "abc" (Pos 1 1)
106 |         err (lookAhead (char 'b')) "abc"
107 |         err (lookAhead (chunk "bd")) "abc"
108 |         err (lookAhead (char 'a')) ""
109 | 
110 |     , testCase "failWith" $
111 |         err (failWith (EFail "empty") :: p ()) "abc"
112 | 
113 |     , testCase "eof" $ do
114 |         ok eof "" ()
115 |         ok eof "\0" ()
116 |         ok eof "\0\0" ()
117 |         ok (chunk "abc" *> eof) "abc" ()
118 |         ok (chunk "abc" *> eof) "abc\0" ()
119 |         err eof "abc"
120 |         err (chunk "ab" *> eof) "abc"
121 | 
122 |     , testCase "label" $ do
123 |         ok (label "blub" $ char 'a') "abc" 'a'
124 |         err (label "blub" $ char 'b') "abc"
125 |         ok (char 'a' <?> "blub") "abc" 'a'
126 | 
127 |     , testCase "hidden" $ do
128 |         ok (hidden $ char 'a') "abc" 'a'
129 |         err (hidden $ char 'b') "abc"
130 | 
131 |     , testCase "try" $ do
132 |         ok (try $ char 'a') "abc" 'a'
133 |         err (try $ char 'b') "abc"
134 | 
135 |     , testCase "(<!>)" $ do
136 |         ok (char 'b' <!> char 'a' <!> char 'c') "abc" 'a'
137 |         ok (chunk "abd" <!> chunk "abc" <!> chunk "abe") "abcdef" "abc"
138 | 
139 |     , testCase "recover" $ do
140 |         ok (recover (char 'a' <* eof) (char 'b')) "a" 'a'
141 |         err (recover (char 'a') (char 'b')) "c"
142 |         err (recover (char 'a' <* eof) (char 'b')) "c"
143 | 
144 |     , testCase "chunk" $ do
145 |         ok (chunk "ab") "abc" "ab"
146 |         err (chunk "bc") "abc"
147 |         err (chunk "ab") ""
148 | 
149 |     , testCase "asChunk" $ do
150 |         ok (asChunk (void $ chunk "ab")) "abc" "ab"
151 |         ok (asChunk (void $ anyChar *> anyChar)) "abc" "ab"
152 |         ok (asChunk (skipCount 2 anyChar)) "abc" "ab"
153 |         err (asChunk (void $ chunk "bc")) "abc"
154 |         err (asChunk (void $ chunk "ab")) ""
155 | 
156 |     , testCase "char" $ do
157 |         ok (char 'a') "abc" 'a'
158 |         ok (char 'a' <* eof) "a" 'a'
159 |         err (char 'b') "abc"
160 |         err (char 'a') ""
161 | 
162 |     , testCase "char-random" $ replicateM_ randomTries $ do
163 |         s <- randomString
164 |         ok (traverse char s *> eof) s ()
165 | 
166 |     , testCase "scan" $ do
167 |         ok (scan (\c -> if c == 'a' then Just c else Nothing)) "abc" 'a'
168 |         ok (scan (\c -> if c == 'a' then Just c else Nothing) <* eof) "a" 'a'
169 |         err (scan (\c -> if c == 'b' then Just c else Nothing)) "abc"
170 |         err (scan (\c -> if c == 'a' then Just c else Nothing)) ""
171 | 
172 |         -- because of sentinel
173 |         err (scan (\c -> if c == '\0' then Just c else Nothing)) "\0"
174 |         err (scan (\c -> if c == '\0' then Just c else Nothing)) ""
175 | 
176 |     , testCase "asciiScan" $ do
177 |         ok (asciiScan (\c -> if c == asc_a then Just c else Nothing)) "abc" asc_a
178 |         ok (asciiScan (\c -> if c == asc_a then Just c else Nothing) <* eof) "a" asc_a
179 |         err (asciiScan (\c -> if c == asc_0 then Just c else Nothing)) "abc"
180 |         err (asciiScan (\c -> if c == asc_0 then Just c else Nothing)) ""
181 | 
182 |         -- because of sentinel
183 |         err (asciiScan (\c -> if c == 0 then Just c else Nothing)) "\0"
184 |         err (asciiScan (\c -> if c == 0 then Just c else Nothing)) ""
185 | 
186 |     , testCase "asciiByte" $ do
187 |         ok (asciiByte asc_a) "abc" asc_a
188 |         ok (asciiByte asc_a <* eof) "a" asc_a
189 |         ok (asciiByte 127 <* eof) "\x7F" 127
190 |         err (asciiByte asc_0) "abc"
191 |         err (asciiByte asc_0) ""
192 | 
193 |     , testCase "asciiByte-random" $ replicateM_ randomTries $ do
194 |         s <- randomAsciiString
195 |         ok (traverse (asciiByte . fromIntegral . C.ord) s *> eof) s ()
196 |     ]
197 | 
198 |   , testGroup "Alternative"
199 |     [ testCase "(<|>)" $ do
200 |         ok (char 'b' <|> char 'a' <|> char 'c') "abc" 'a'
201 |         ok (chunk "abd" <|> chunk "abc" <|> chunk "abe") "abcdef" "abc"
202 | 
203 |     , testCase "empty" $ do
204 |         err (empty :: p ()) "abc"
205 |         err (empty :: p ()) ""
206 |     ]
207 | 
208 |   , testGroup "MonadFail"
209 |     [ testCase "fail" $ do
210 |         err (failWith (EFail "empty") :: p ()) "abc"
211 |     ]
212 | 
213 |   , testGroup "Basic Combinators"
214 |     [ testCase "optional" $ do
215 |         ok (optional (char 'a')) "abc" (Just 'a')
216 |         ok (optional (char 'b')) "abc" Nothing
217 |         ok (optional (char 'a')) "" Nothing
218 | 
219 |     , testCase "some" $ do
220 |         ok (some (char 'a')) "abc" (NE.fromList "a")
221 |         ok (some (char 'a')) "aabc" (NE.fromList "aa")
222 |         err (some (char 'b')) "abc"
223 |         err (some (char 'a')) ""
224 | 
225 |     , testCase "many" $ do
226 |         ok (many (char 'a')) "abc" "a"
227 |         ok (many (char 'a')) "aabc" "aa"
228 |         ok (many (char 'b')) "abc" ""
229 |         ok (many (char 'a')) "" ""
230 |     ]
231 | 
232 |   , testGroup "Position Combinators"
233 |     [ testCase "getLine" $ do
234 |         ok getLine "" 1
235 |         ok (char 'a' *> getLine) "abc" 1
236 |         ok (char 'a' *> char '\n' *> getLine) "a\nb" 2
237 |         ok (char 'a' *> char '\n' *> char 'b' *> getLine) "a\nb" 2
238 | 
239 |     , testCase "getRefLine" $ do
240 |         ok getRefLine "" 1
241 |         ok (char 'a' *> getRefLine) "abc" 1
242 |         ok (char 'a' *> char '\n' *> withRefPos getRefLine) "a\nb" 2
243 |         ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefLine) "a\nb" 2
244 |         ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefLine)) "a\nb" 2
245 | 
246 |     , testCase "getCol" $ do
247 |         ok getCol "" 1
248 |         ok (char 'a' *> getCol) "abc" 2
249 |         ok (char 'a' *> char '\n' *> getCol) "a\nb" 1
250 |         ok (char 'a' *> char '\n' *> char 'b' *> getCol) "a\nb" 2
251 | 
252 |     , testCase "getRefCol" $ do
253 |         ok getRefCol "" 1
254 |         ok (char 'a' *> getRefCol) "abc" 1
255 |         ok (char 'a' *> char '\n' *> withRefPos getRefCol) "a\nb" 1
256 |         ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefCol) "a\nb" 2
257 |         ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefCol)) "a\nb" 1
258 | 
259 |     , testCase "withPos" $ do
260 |         ok (withPos $ char 'a') "abc" (Pos 1 1, 'a')
261 |         ok (char 'a' *> withPos (char 'b')) "abc" (Pos 1 2, 'b')
262 |         ok (char 'a' *> char '\n' *> withPos (char 'b')) "a\nb" (Pos 2 1, 'b')
263 | 
264 |     , testCase "withSpan" $ do
265 |         ok (withSpan $ chunk "ab") "abc" (Pos 1 1, Pos 1 3, "ab")
266 |         ok (char 'a' *> withSpan (chunk "bcd")) "abcde" (Pos 1 2, Pos 1 5, "bcd")
267 |         ok (char 'a' *> char '\n' *> withSpan (chunk "bcd")) "a\nbcde" (Pos 2 1, Pos 2 4, "bcd")
268 |     ]
269 | 
270 |   , testGroup "Char Combinators"
271 |     [ testCase "satisfy" $ do
272 |         ok (satisfy (== 'a')) "abc" 'a'
273 |         ok (satisfy (== 'a') <* eof) "a" 'a'
274 |         err (satisfy (== 'b')) "abc"
275 |         err (satisfy (== 'a')) ""
276 | 
277 |         -- because of sentinel
278 |         err (satisfy (== '\0')) "\0"
279 |         err (satisfy (== '\0')) ""
280 | 
281 |     , testCase "satisfy-random" $ replicateM_ randomTries $ do
282 |         s <- randomString
283 |         ok (traverse (satisfy . (==)) s *> eof) s ()
284 | 
285 |     , testCase "asciiSatisfy" $ do
286 |         ok (asciiSatisfy (== asc_a)) "abc" asc_a
287 |         ok (asciiSatisfy (== asc_a) <* eof) "a" asc_a
288 |         err (asciiSatisfy (== asc_0)) "abc"
289 |         err (asciiSatisfy (== asc_0)) ""
290 | 
291 |         -- because of sentinel
292 |         err (asciiSatisfy (== 0)) "\0"
293 |         err (asciiSatisfy (== 0)) ""
294 | 
295 |     , testCase "asciiSatisfy-random" $ replicateM_ randomTries $ do
296 |         s <- randomAsciiString
297 |         ok (traverse (asciiSatisfy . (==) . fromIntegral . C.ord) s *> eof) s ()
298 | 
299 |     , testCase "string" $ do
300 |         ok (string "ab") "abc" (stringToChunk "ab")
301 |         err (string "bc") "abc"
302 |         err (string "ab") ""
303 | 
304 |     , testCase "string-random" $ replicateM_ randomTries $ do
305 |         s <- randomString
306 |         ok (string s <* eof) s (stringToChunk s)
307 | 
308 |     , testCase "anyChar" $ do
309 |         ok anyChar "abc" 'a'
310 |         ok (anyChar <* eof) "a" 'a'
311 |         err anyChar ""
312 | 
313 |     , testCase "anyChar-random" $ replicateM_ randomTries $ do
314 |         s <- randomString
315 |         ok (traverse (const anyChar) s *> eof) s ()
316 | 
317 |     , testCase "anyAsciiByte" $ do
318 |         ok anyAsciiByte "abc" asc_a
319 |         ok (anyAsciiByte <* eof) "a" asc_a
320 |         err anyAsciiByte ""
321 |         err anyAsciiByte "\x80"
322 | 
323 |     , testCase "anyAsciiByte-random" $ replicateM_ randomTries $ do
324 |         s <- randomAsciiString
325 |         ok (traverse (const anyAsciiByte) s *> eof) s ()
326 | 
327 |     , testCase "notChar" $ do
328 |         ok (notChar 'b') "abc" 'a'
329 |         ok (notChar 'b' <* eof) "a" 'a'
330 |         err (notChar 'a') "a"
331 |         err (notChar 'a') ""
332 | 
333 |     , testCase "char'" $ do
334 |         ok (char' 'a') "a" 'a'
335 |         ok (char' 'a' <* eof) "a" 'a'
336 |         ok (char' 'a') "A" 'A'
337 |         ok (char' 'A') "a" 'a'
338 |         ok (char' 'A') "A" 'A'
339 |         ok (char' '9') "9" '9'
340 |         err (char' 'a') "b"
341 |         err (char' 'a') ""
342 | 
343 |     , testCase "alphaNumChar" $ do
344 |         ok (alphaNumChar <* eof) "a" 'a'
345 |         ok alphaNumChar "9" '9'
346 |         err alphaNumChar "_"
347 |         err alphaNumChar ""
348 | 
349 |     , testCase "digitChar" $ do
350 |         ok (digitChar 10 <* eof) "9" '9'
351 |         ok (digitChar 36) "z" 'z'
352 |         err (digitChar 2) "2"
353 |         err (digitChar 2) ""
354 | 
355 |     , testCase "letterChar" $ do
356 |         ok (letterChar <* eof) "a" 'a'
357 |         err letterChar "9"
358 |         err letterChar "_"
359 |         err letterChar ""
360 | 
361 |     , testCase "lowerChar" $ do
362 |         ok (lowerChar <* eof) "a" 'a'
363 |         err lowerChar "A"
364 |         err lowerChar "9"
365 |         err lowerChar "_"
366 |         err lowerChar ""
367 | 
368 |     , testCase "upperChar" $ do
369 |         ok (upperChar <* eof) "A" 'A'
370 |         err upperChar "a"
371 |         err upperChar "9"
372 |         err upperChar "_"
373 |         err upperChar ""
374 | 
375 |     , testCase "symbolChar" $ do
376 |         ok (symbolChar <* eof) "+" '+'
377 |         err symbolChar "a"
378 |         err symbolChar "."
379 |         err symbolChar ""
380 | 
381 |     , testCase "punctuationChar" $ do
382 |         ok (punctuationChar <* eof) "." '.'
383 |         err punctuationChar "+"
384 |         err punctuationChar "a"
385 |         err punctuationChar ""
386 | 
387 |     , testCase "spaceChar" $ do
388 |         ok (spaceChar <* eof) " " ' '
389 |         ok (spaceChar <* eof) "\n" '\n'
390 |         ok (spaceChar <* eof) "\r" '\r'
391 |         ok (spaceChar <* eof) "\t" '\t'
392 |         err spaceChar "a"
393 |         err spaceChar ""
394 | 
395 |     , testCase "asciiChar" $ do
396 |         ok (asciiChar <* eof) "a" 'a'
397 |         ok (asciiChar <* eof) "\x7F" '\x7F'
398 |         err asciiChar "\x80"
399 |         err asciiChar ""
400 | 
401 |     , testCase "categoryChar" $ do
402 |         ok (categoryChar C.UppercaseLetter <* eof) "A" 'A'
403 |         err (categoryChar C.UppercaseLetter) "a"
404 |         err (categoryChar C.UppercaseLetter) ""
405 | 
406 |     , testCase "digitByte" $ do
407 |         ok (digitByte 2 <* eof) "0" asc_0
408 |         ok (digitByte 10 <* eof) "9" asc_9
409 |         err (digitByte 10) "\0"
410 |         err (digitByte 10) ""
411 | 
412 |     , testCase "skipChars" $ do
413 |         ok (skipChars 0) "" ()
414 |         ok (skipChars 3 <* eof) "abc" ()
415 |         ok (skipChars 3 <* char 'b') "aaab" ()
416 |         err (skipChars 1) ""
417 |         err (skipChars 2) "a"
418 | 
419 |     , testCase "takeChars" $ do
420 |         ok (takeChars 0) "" (stringToChunk "")
421 |         ok (takeChars 3 <* eof) "abc" (stringToChunk "abc")
422 |         ok (takeChars 3 <* char 'b') "aaab" (stringToChunk "aaa")
423 |         err (takeChars 1) ""
424 |         err (takeChars 2) "a"
425 | 
426 |     , testCase "skipCharsWhile" $ do
427 |         ok (skipCharsWhile (== 'a')) "" ()
428 |         ok (skipCharsWhile (== 'a')) "b" ()
429 |         ok (skipCharsWhile (== 'a') *> eof) "aaa" ()
430 |         ok (skipCharsWhile (== 'a') *> char 'b') "aaab" 'b'
431 | 
432 |     , testCase "takeCharsWhile" $ do
433 |         ok (takeCharsWhile (== 'a')) "" (stringToChunk "")
434 |         ok (takeCharsWhile (== 'a')) "b" (stringToChunk "")
435 |         ok (takeCharsWhile (== 'a') <* eof) "aaa" (stringToChunk "aaa")
436 |         ok (takeCharsWhile (== 'a') <* char 'b') "aaab" (stringToChunk "aaa")
437 | 
438 |     , testCase "skipCharsWhile1" $ do
439 |         err (skipCharsWhile1 (== 'a')) ""
440 |         err (skipCharsWhile1 (== 'a')) "b"
441 |         ok (skipCharsWhile1 (== 'a') *> eof) "aaa" ()
442 |         ok (skipCharsWhile1 (== 'a') *> char 'b') "aaab" 'b'
443 | 
444 |     , testCase "takeCharsWhile1" $ do
445 |         err (takeCharsWhile1 (== 'a')) ""
446 |         err (takeCharsWhile1 (== 'a')) "b"
447 |         ok (takeCharsWhile1 (== 'a') <* eof) "aaa" (stringToChunk "aaa")
448 |         ok (takeCharsWhile1 (== 'a') <* char 'b') "aaab" (stringToChunk "aaa")
449 |     ]
450 | 
451 |   , testGroup "Fraction Combinators"
452 |     [ testCase "fractionDec" $ do
453 |         okFraction (fractionDec (pure ()) <* eof) "1.23" (Right (123, 10, -2))
454 |         okFraction (fractionDec (pure ()) <* eof) "99e0" (Right (99, 10, 0))
455 |         okFraction (fractionDec (pure ()) <* eof) "123.45" (Right (12345, 10, -2))
456 |         okFraction (fractionDec (pure ()) <* eof) "00123." (Right (123, 10, 0))
457 |         okFraction (fractionDec (pure ()) <* eof) "456.000" (Right (456000, 10, -3))
458 | 
459 |         okFraction (fractionDec (pure ()) <* eof) "987e-5" (Right (987, 10, -5))
460 |         okFraction (fractionDec (pure ()) <* eof) "987.e-123" (Right (987, 10, -123))
461 |         okFraction (fractionDec (pure ()) <* eof) "987.654e-67" (Right (987654, 10, -70))
462 |         okFraction (fractionDec (pure ()) <* eof) "987.654000e-7" (Right (987654000, 10, -13))
463 |         okFraction (fractionDec (pure ()) <* eof) "000987.654000e-7" (Right (987654000, 10, -13))
464 | 
465 |         okFraction (fractionDec (pure ()) <* eof) "987e+5" (Right (987, 10, 5))
466 |         okFraction (fractionDec (pure ()) <* eof) "987.e+123" (Right (987, 10, 123))
467 |         okFraction (fractionDec (pure ()) <* eof) "987.654e+67" (Right (987654, 10, 64))
468 |         okFraction (fractionDec (pure ()) <* eof) "987.654000e+7" (Right (987654000, 10, 1))
469 |         okFraction (fractionDec (pure ()) <* eof) "000987.654000e+7" (Right (987654000, 10, 1))
470 | 
471 |         okFraction (fractionDec (pure ()) <* eof) "987e5" (Right (987, 10, 5))
472 |         okFraction (fractionDec (pure ()) <* eof) "987.e123" (Right (987, 10, 123))
473 |         okFraction (fractionDec (pure ()) <* eof) "987.654e67" (Right (987654, 10, 64))
474 |         okFraction (fractionDec (pure ()) <* eof) "987.654000e7" (Right (987654000, 10, 1))
475 |         okFraction (fractionDec (pure ()) <* eof) "000987.654000e7" (Right (987654000, 10, 1))
476 | 
477 |         okFraction (fractionDec (pure ())) "123" (Left 123)
478 | 
479 |         errFraction (fractionDec (pure ())) "123e"
480 |         errFraction (fractionDec (pure ())) ""
481 |         errFraction (fractionDec (pure ())) "abc"
482 |     ]
483 | 
484 |   , testGroup "Integer Combinators"
485 |     [ testCase "decimal" $ do
486 |         ok @Integer (decimal <* eof) "0123" 123
487 |         ok @Integer (decimal <* eof) "1234567890" 1234567890
488 |         ok @Integer (decimal <* string "abc" <* eof) "123abc" 123
489 |         err @Integer decimal "abc"
490 |         err @Integer decimal "-1"
491 |         err @Integer decimal ""
492 | 
493 |     , testCase "octal" $ do
494 |         ok @Integer (octal <* eof) "0123" 0o123
495 |         ok @Integer (octal <* eof) "12345670" 0o12345670
496 |         ok @Integer (octal <* string "abc" <* eof) "123abc" 0o123
497 |         err @Integer octal "8abc"
498 |         err @Integer octal "-1"
499 |         err @Integer octal ""
500 | 
501 |     , testCase "hexadecimal" $ do
502 |         ok @Integer (hexadecimal <* eof) "0123" 0x123
503 |         ok @Integer (hexadecimal <* eof) "123456789aBcDeF0" 0x123456789ABCDEF0
504 |         ok @Integer (hexadecimal <* string "xyz" <* eof) "123xyz" 0x123
505 |         err @Integer hexadecimal "gabc"
506 |         err @Integer hexadecimal "-1"
507 |         err @Integer hexadecimal ""
508 | 
509 |     , testCase "integer" $ do
510 |         ok @Integer (integer (char '_') 10) "1_2_3" 123
511 |         ok @Integer (integer (char '_') 10 <* char '_') "1_2_3_" 123
512 |         ok @Integer (integer (optional $ char '_') 10) "123_456_789" 123456789
513 |         err @Integer (integer (pure ()) 10) "abc"
514 |         err @Integer (integer (char '_') 10) "_1_2_3"
515 |         err @Integer (integer (pure ()) 10) "-1"
516 |         err @Integer (integer (pure ()) 10) ""
517 | 
518 |         ok @Integer (integer (pure ()) 2) "101" 5
519 |         ok @Integer (integer (pure ()) 7) "321" 162
520 |         ok @Integer (integer (pure ()) 36) "XyZ" 44027
521 | 
522 |     , testCase "integer'" $ do
523 |         ok @(Integer, Int) (integer' (char '_') 10) "1_2_3" (123, 3)
524 |         ok @(Integer, Int) (integer' (char '_') 10 <* char '_') "1_2_3_" (123, 3)
525 |         ok @(Integer, Int) (integer' (optional $ char '_') 10) "123_456_789" (123456789, 9)
526 |         err @(Integer, Int) (integer' (pure ()) 10) "abc"
527 |         err @(Integer, Int) (integer' (char '_') 10) "_1_2_3"
528 |         err @(Integer, Int) (integer' (pure ()) 10) "-1"
529 |         err @(Integer, Int) (integer' (pure ()) 10) ""
530 | 
531 |         ok @(Integer, Int) (integer' (pure ()) 10) "0123" (123, 4)
532 |         ok @(Integer, Int) (integer' (pure ()) 10) "01230" (1230, 5)
533 |         ok @(Integer, Int) (integer' (pure ()) 10) "000" (0, 3)
534 | 
535 |         ok @(Integer, Int) (integer' (pure ()) 2) "101" (5, 3)
536 |         ok @(Integer, Int) (integer' (pure ()) 7) "321" (162, 3)
537 |         ok @(Integer, Int) (integer' (pure ()) 36) "XyZ" (44027, 3)
538 | 
539 |     , testCase "signed" $ do
540 |         ok @Integer (signed decimal) "-123" (-123)
541 |         ok @Integer (signed decimal) "+123" 123
542 |         err @Integer (signed decimal) "- 123"
543 |         err @Integer (signed decimal) "+ 123"
544 |         err @Integer (signed decimal) ""
545 | 
546 |     , testCase "digit" $ do
547 |         ok (digit 2) "1" 1
548 |         ok (digit 10) "7" 7
549 |         ok (digit 36) "Z" 35
550 |         err (digit 10) "a"
551 |         err (digit 10) ""
552 |     ]
553 |   ]
554 |   where
555 |   ok :: (Eq a, Show a, HasCallStack) => p a -> String -> a -> Assertion
556 |   ok p i o = run p "filename" (stringToChunk @k i) @?= Right o
557 |   err :: (Eq a, Show a, HasCallStack) => p a -> String -> Assertion
558 |   err p i = assertBool "err" $ isLeft $ run p "filename" (stringToChunk @k i)
559 |   errFraction :: HasCallStack => p (Either Integer (Integer, Int, Integer)) -> String -> Assertion
560 |   errFraction = err
561 |   okFraction :: HasCallStack => p (Either Integer (Integer, Int, Integer)) -> String -> Either Integer (Integer, Int, Integer) -> Assertion
562 |   okFraction = ok
563 | 
564 | reporterTests :: [TestTree]
565 | reporterTests =
566 |   [ testCase "recover" $ do
567 |       run (char 'a' <* eof) "a" (Just 'a', 0)
568 |       run (char 'a') "b" (Nothing, 1)
569 |       run (recover (char 'a' <* eof) (char 'b')) "a" (Just 'a', 0)
570 |       run (recover (char 'a') (char 'b')) "b" (Just 'b', 1)
571 |       run ((,)
572 |             <$> recover (char 'a') (char 'b')
573 |             <*> recover (char 'a') (char 'c')) "bc" (Just ('b', 'c'), 2)
574 |       run (recover (char 'a') (char 'b') *>
575 |            recover (char 'a') (char 'c') *>
576 |            char 'a') "bcd" (Nothing, 3)
577 |       run (recover (char 'a' <* eof) (char 'b')) "c" (Nothing, 1)
578 |   ]
579 |   where
580 |   run :: (Eq a, Show a, HasCallStack) => Reporter Text a -> Text -> (Maybe a, Int) -> Assertion
581 |   run p i (o, r) = do
582 |     let (out, rep) = runReporter p "filename" i
583 |     out @?= o
584 |     length rep @?= r
585 | 
586 | {-
587 | TODO:
588 | 
589 | Instances:
590 | Semigroup
591 | Monoid
592 | Functor
593 | Applicative
594 | Monad
595 | MonadPlus
596 | Alternative
597 | 
598 | Additional combinators provided by parser-combinators.
599 | They are also tested by megaparsec:
600 | endBy1
601 | someTill
602 | sepBy1
603 | sepEndBy1
604 | between
605 | choice
606 | count
607 | count'
608 | eitherP
609 | endBy
610 | manyTill
611 | option
612 | sepBy
613 | sepEndBy
614 | skipMany
615 | skipSome
616 | skipCount
617 | skipManyTill
618 | skipSomeTill
619 | 
620 | Identation:
621 | align
622 | indented
623 | line
624 | linefold
625 | 
626 | Fraction:
627 | fraction
628 | fractionHex
629 | 
630 | Char Combinators:
631 | scanChars
632 | scanChars1
633 | -}
634 | 


--------------------------------------------------------------------------------