├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── example-data ├── bad.json └── good.json ├── example.hs ├── paripari.cabal ├── specialise-all.hs ├── src └── Text │ ├── PariPari.hs │ └── PariPari │ ├── Internal │ ├── Acceptor.hs │ ├── Chunk.hs │ ├── Class.hs │ ├── Combinators.hs │ ├── Reporter.hs │ └── Run.hs │ └── Lens.hs ├── stack.yaml └── test └── test.hs /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | docs 3 | wiki 4 | TAGS 5 | tags 6 | wip 7 | .stack-work 8 | .DS_Store 9 | .*.swp 10 | .*.swo 11 | *.o 12 | *.hi 13 | *~ 14 | *# 15 | stack.yaml.lock 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This Travis job script has been generated by a script via 2 | # 3 | # haskell-ci 'paripari.cabal' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.10.1 12 | # 13 | version: ~> 1.0 14 | language: c 15 | os: linux 16 | dist: xenial 17 | git: 18 | # whether to recursively clone submodules 19 | submodules: false 20 | cache: 21 | directories: 22 | - $HOME/.cabal/packages 23 | - $HOME/.cabal/store 24 | - $HOME/.hlint 25 | before_cache: 26 | - rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log 27 | # remove files that are regenerated by 'cabal update' 28 | - rm -fv $CABALHOME/packages/hackage.haskell.org/00-index.* 29 | - rm -fv $CABALHOME/packages/hackage.haskell.org/*.json 30 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.cache 31 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar 32 | - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx 33 | - rm -rfv $CABALHOME/packages/head.hackage 34 | jobs: 35 | include: 36 | - compiler: ghc-8.10.1 37 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.10.1","cabal-install-3.2"]}} 38 | os: linux 39 | - compiler: ghc-8.8.3 40 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.3","cabal-install-3.2"]}} 41 | os: linux 42 | - compiler: ghc-8.6.5 43 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.2"]}} 44 | os: linux 45 | - compiler: ghc-8.4.4 46 | addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.2"]}} 47 | os: linux 48 | before_install: 49 | - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//') 50 | - WITHCOMPILER="-w $HC" 51 | - HADDOCK=$(echo "/opt/$CC/bin/haddock" | sed 's/-/\//') 52 | - HCPKG="$HC-pkg" 53 | - unset CC 54 | - CABAL=/opt/ghc/bin/cabal 55 | - CABALHOME=$HOME/.cabal 56 | - export PATH="$CABALHOME/bin:$PATH" 57 | - TOP=$(pwd) 58 | - "HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\\d+)\\.(\\d+)\\.(\\d+)(\\.(\\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')" 59 | - echo $HCNUMVER 60 | - CABAL="$CABAL -vnormal+nowrap" 61 | - set -o pipefail 62 | - TEST=--enable-tests 63 | - BENCH=--enable-benchmarks 64 | - HEADHACKAGE=false 65 | - rm -f $CABALHOME/config 66 | - | 67 | echo "verbose: normal +nowrap +markoutput" >> $CABALHOME/config 68 | echo "remote-build-reporting: anonymous" >> $CABALHOME/config 69 | echo "write-ghc-environment-files: always" >> $CABALHOME/config 70 | echo "remote-repo-cache: $CABALHOME/packages" >> $CABALHOME/config 71 | echo "logs-dir: $CABALHOME/logs" >> $CABALHOME/config 72 | echo "world-file: $CABALHOME/world" >> $CABALHOME/config 73 | echo "extra-prog-path: $CABALHOME/bin" >> $CABALHOME/config 74 | echo "symlink-bindir: $CABALHOME/bin" >> $CABALHOME/config 75 | echo "installdir: $CABALHOME/bin" >> $CABALHOME/config 76 | echo "build-summary: $CABALHOME/logs/build.log" >> $CABALHOME/config 77 | echo "store-dir: $CABALHOME/store" >> $CABALHOME/config 78 | echo "install-dirs user" >> $CABALHOME/config 79 | echo " prefix: $CABALHOME" >> $CABALHOME/config 80 | echo "repository hackage.haskell.org" >> $CABALHOME/config 81 | echo " url: http://hackage.haskell.org/" >> $CABALHOME/config 82 | install: 83 | - ${CABAL} --version 84 | - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" 85 | - | 86 | echo "program-default-options" >> $CABALHOME/config 87 | echo " ghc-options: $GHCJOBS +RTS -M6G -RTS" >> $CABALHOME/config 88 | - cat $CABALHOME/config 89 | - rm -fv cabal.project cabal.project.local cabal.project.freeze 90 | - travis_retry ${CABAL} v2-update -v 91 | # Generate cabal.project 92 | - rm -rf cabal.project cabal.project.local cabal.project.freeze 93 | - touch cabal.project 94 | - | 95 | echo "packages: ." >> cabal.project 96 | - echo 'package paripari' >> cabal.project 97 | - "echo ' ghc-options: -Werror=missing-methods' >> cabal.project" 98 | - | 99 | - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(paripari)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" 100 | - cat cabal.project || true 101 | - cat cabal.project.local || true 102 | - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi 103 | - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH} 104 | - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'" 105 | - rm cabal.project.freeze 106 | - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all 107 | - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all 108 | script: 109 | - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) 110 | # Packaging... 111 | - ${CABAL} v2-sdist all 112 | # Unpacking... 113 | - mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/ 114 | - cd ${DISTDIR} || false 115 | - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \; 116 | - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm '{}' \; 117 | - PKGDIR_paripari="$(find . -maxdepth 1 -type d -regex '.*/paripari-[0-9.]*')" 118 | # Generate cabal.project 119 | - rm -rf cabal.project cabal.project.local cabal.project.freeze 120 | - touch cabal.project 121 | - | 122 | echo "packages: ${PKGDIR_paripari}" >> cabal.project 123 | - echo 'package paripari' >> cabal.project 124 | - "echo ' ghc-options: -Werror=missing-methods' >> cabal.project" 125 | - | 126 | - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(paripari)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" 127 | - cat cabal.project || true 128 | - cat cabal.project.local || true 129 | # Building... 130 | # this builds all libraries and executables (without tests/benchmarks) 131 | - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all 132 | # Building with tests and benchmarks... 133 | # build & run tests, build benchmarks 134 | - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all 135 | # Testing... 136 | - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all 137 | # cabal check... 138 | - (cd ${PKGDIR_paripari} && ${CABAL} -vnormal check) 139 | # haddock... 140 | - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all 141 | # Building without installed constraints for packages in global-db... 142 | - rm -f cabal.project.local 143 | - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all 144 | 145 | # REGENDATA ("0.10.1",["paripari.cabal"]) 146 | # EOF 147 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Daniel Mendler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | example.hs : README.md Makefile 2 | unlit --language haskell -f markdown -i README.md -o example.hs 3 | sed -i '/OPTIONS_GHC/d' example.hs 4 | sed -i 's/{-# SPECIALISE_ALL/-- {-# SPECIALISE_ALL/g' example.hs 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PariPari: Fast parser combinator library for Haskell 2 | 3 | [![Hackage](https://img.shields.io/hackage/v/paripari.svg)](https://hackage.haskell.org/package/paripari) 4 | [![Build Status](https://secure.travis-ci.org/minad/paripari.png?branch=master)](http://travis-ci.org/minad/paripari) 5 | 6 | PariPari is a parser combinator library for Haskell. PariPari can be used as 7 | a drop in replacement for the Parsec class of libraries. However be aware that the library 8 | is new and unstable. 9 | 10 | PariPari offers two parsing strategies. There is a fast Acceptor and a slower Reporter. 11 | If the Acceptor fails, the Reporter returns a report about the parsing errors. 12 | This allows for fast parsing in the good case without compromising on the quality of the error messages. 13 | 14 | Like Attoparsec, the parser combinators backtrack by default. To avoid exponential parsing time if errors are found and 15 | in order to avoid bad error messages, the `` parser combinator is provided, which does not backtrack. 16 | 17 | PariPari operates on strict `ByteString` and `Text`. 18 | As a consequence, PariPari is only a good fit for data which is available at once (no streaming). 19 | If characters are parsed using the `char` and `satisfy` combinators, bytestrings are interpreted as UTF-8 and decoded on the fly. 20 | The interface of PariPari matches mostly the one of Attoparsec/Megaparsec/etc. 21 | 22 | ## Features 23 | 24 | * Fast-path parser without error reporting (Acceptor) 25 | and fallback to slower error reporting parser (Reporter) 26 | to optimize the common case 27 | * Backtracking by default for ease of use 28 | * `` can be used to avoid backtracking 29 | * Support for strict UTF-8 `ByteString` and strict `Text` 30 | * Combinators for indentation-sensitive parsing 31 | * Error recovery support via `recover` 32 | * Provides flexible parsers for integers and fractional numbers 33 | of base 2 to 36 with support for separators between digits 34 | * Most Parsec/Megaparsec combinators provided, relying on the `parser-combinators` library 35 | 36 | ## Example 37 | 38 | In this example we use PariPari to parse JSON. The following is literate haskell. 39 | 40 | ### Prologue 41 | 42 | We specify a preprocessor, language pragmas and the library imports. 43 | Performance of PariPari depends crucially on the specialisation of `Parser k a` to 44 | `Acceptor ByteString a` and `Reporter ByteString a`. In larger parsers it seems that the 45 | GHC specialiser does not kick in. As a workaround we use `paripari-specialise-all` as a 46 | preprocessor, which generates `SPECIALISE` pragmas from our custom `SPECIALISE_ALL` pragma. 47 | Using the preprocessor is not necessary, however without it I observed 2x-4x slowdowns in parsing speed. 48 | 49 | ``` haskell 50 | {-# OPTIONS_GHC -F -pgmF paripari-specialise-all #-} 51 | {-# LANGUAGE ConstraintKinds #-} 52 | {-# LANGUAGE FlexibleContexts #-} 53 | {-# LANGUAGE OverloadedStrings #-} 54 | {-# LANGUAGE Rank2Types #-} 55 | 56 | import Data.Foldable (for_) 57 | import System.Environment (getArgs) 58 | import Text.PariPari 59 | import qualified Data.ByteString as B 60 | ``` 61 | 62 | ### Basic parser types 63 | 64 | We parametrize the parser with the string type. 65 | Both `ByteString` and `Text` are supported. 66 | Note that even in the case of `ByteStrings` a `Parser` instance 67 | is provided, which interprets the bytes as UTF-8. 68 | 69 | ``` haskell 70 | type StringType = B.ByteString 71 | type PMonad p = Parser StringType p 72 | type P a = (forall p. PMonad p => p a) 73 | ``` 74 | 75 | The `P` shortcut can be used for simple combinators. 76 | For functions returning parsers, the `PMonad` constraint must 77 | be used for specialization to work, e.g., `char :: PMonad p => Char -> p Char`. 78 | 79 | Now we ensure that the GHC specialiser kicks in 80 | and specialises all parsers. These pragmas are processed 81 | by the preprocessor `paripari-specialise-all`. 82 | 83 | ``` haskell 84 | {-# SPECIALISE_ALL PMonad p = p ~ Acceptor StringType #-} 85 | {-# SPECIALISE_ALL PMonad p = p ~ Reporter StringType #-} 86 | {-# SPECIALISE_ALL P = Acceptor StringType #-} 87 | {-# SPECIALISE_ALL P = Reporter StringType #-} 88 | ``` 89 | 90 | ### JSON datatype 91 | 92 | We define a datatype of JSON values. 93 | 94 | ``` haskell 95 | data Value 96 | = Object ![(StringType, Value)] 97 | | Array ![Value] 98 | | String !StringType 99 | | Number !Integer !Integer 100 | | Bool !Bool 101 | | Null 102 | deriving (Eq, Show) 103 | ``` 104 | 105 | ### Parsers 106 | 107 | A JSON toplevel value is either an object or an array. 108 | 109 | ``` haskell 110 | json :: P Value 111 | json = space *> (object <|> array) "json" 112 | ``` 113 | 114 | Objects consist of pairs of a text string and a value. 115 | 116 | ``` haskell 117 | object :: P Value 118 | object = Object <$> (char '{' *> space *> sepBy pair (space *> char ',' *> space) <* space <* char '}') "object" 119 | 120 | pair :: P (StringType, Value) 121 | pair = (,) <$> (text <* space) <*> (char ':' *> space *> value) 122 | ``` 123 | 124 | Arrays are a list of values. 125 | 126 | ``` haskell 127 | array :: P Value 128 | array = Array <$> (char '[' *> sepBy value (space *> char ',' *> space) <* space <* char ']') "array" 129 | ``` 130 | 131 | Furthermore, JSON supports text strings, boolean values, null and floating point numbers. 132 | 133 | ``` haskell 134 | value :: P Value 135 | value = 136 | (String <$> text) 137 | <|> object 138 | <|> array 139 | <|> (Bool False <$ string "false") 140 | <|> (Bool True <$ string "true") 141 | <|> (Null <$ string "null") 142 | <|> number 143 | 144 | text :: P StringType 145 | text = char '"' *> takeCharsWhile (/= '"') <* char '"' "text" 146 | ``` 147 | 148 | Floating point numbers are parsed by `fractionDec` which returns a coefficient, 149 | the base of the exponent and the exponent. The conversion to `Double` can be done 150 | for example by the `scientific` library. 151 | 152 | ``` haskell 153 | number :: P Value 154 | number = label "number" $ do 155 | neg <- sign 156 | frac <- fractionDec (pure ()) 157 | pure $ case frac of 158 | Left n -> Number (neg n) 0 159 | Right (c, _, e) -> Number (neg c) e 160 | ``` 161 | 162 | For spaces we need another helper function. 163 | 164 | ``` haskell 165 | space :: P () 166 | space = skipCharsWhile (\c -> c == ' ' || c == '\n' || c == '\t') 167 | ``` 168 | 169 | ### Main function 170 | 171 | The main function of the example program reads a file, runs the parser 172 | and prints the value if the parsing succeeded. 173 | In the case of an error a report is printed. 174 | 175 | ``` haskell 176 | main :: IO () 177 | main = do 178 | args <- getArgs 179 | case args of 180 | [file] -> do 181 | src <- B.readFile file 182 | let (result, reports) = runParser json file src 183 | for_ reports $ putStrLn . showReport 184 | print result 185 | _ -> error "Usage: paripari-example test.json" 186 | ``` 187 | 188 | ## Benchmark 189 | 190 | See the repository [parsers-bench](https://github.com/minad/parsers-bench/). 191 | 192 | ``` 193 | parsers-bench-0.1.0: benchmarks 194 | Running 1 benchmarks... 195 | Benchmark bench-speed: RUNNING... 196 | benchmarking CSV (PariPari)/csv-40.csv 197 | time 16.34 μs (16.18 μs .. 16.53 μs) 198 | 0.998 R² (0.996 R² .. 0.999 R²) 199 | mean 16.87 μs (16.41 μs .. 18.64 μs) 200 | std dev 2.703 μs (569.0 ns .. 5.580 μs) 201 | variance introduced by outliers: 94% (severely inflated) 202 | 203 | benchmarking CSV (PariPari, Reporter)/csv-40.csv 204 | time 104.9 μs (103.2 μs .. 106.5 μs) 205 | 0.985 R² (0.972 R² .. 0.992 R²) 206 | mean 128.0 μs (118.4 μs .. 140.0 μs) 207 | std dev 40.73 μs (30.44 μs .. 51.00 μs) 208 | variance introduced by outliers: 98% (severely inflated) 209 | 210 | benchmarking CSV (Attoparsec)/csv-40.csv 211 | time 57.69 μs (56.52 μs .. 60.22 μs) 212 | 0.917 R² (0.796 R² .. 0.988 R²) 213 | mean 68.62 μs (62.95 μs .. 81.74 μs) 214 | std dev 27.44 μs (15.54 μs .. 49.65 μs) 215 | variance introduced by outliers: 99% (severely inflated) 216 | 217 | benchmarking CSV (Megaparsec)/csv-40.csv 218 | time 52.12 μs (51.54 μs .. 52.98 μs) 219 | 0.996 R² (0.993 R² .. 0.998 R²) 220 | mean 54.34 μs (53.25 μs .. 56.03 μs) 221 | std dev 4.705 μs (3.416 μs .. 6.766 μs) 222 | variance introduced by outliers: 79% (severely inflated) 223 | 224 | benchmarking Log (PariPari)/log-40.log 225 | time 364.4 μs (359.8 μs .. 369.6 μs) 226 | 0.998 R² (0.997 R² .. 0.999 R²) 227 | mean 362.3 μs (359.7 μs .. 365.9 μs) 228 | std dev 10.50 μs (7.760 μs .. 14.60 μs) 229 | variance introduced by outliers: 22% (moderately inflated) 230 | 231 | benchmarking Log (PariPari, Reporter)/log-40.log 232 | time 411.5 μs (404.1 μs .. 421.8 μs) 233 | 0.984 R² (0.964 R² .. 0.996 R²) 234 | mean 425.2 μs (411.3 μs .. 453.2 μs) 235 | std dev 60.17 μs (37.11 μs .. 88.14 μs) 236 | variance introduced by outliers: 87% (severely inflated) 237 | 238 | benchmarking Log (Attoparsec)/log-40.log 239 | time 368.6 μs (364.2 μs .. 372.5 μs) 240 | 0.998 R² (0.997 R² .. 0.999 R²) 241 | mean 364.6 μs (360.9 μs .. 371.3 μs) 242 | std dev 16.87 μs (10.96 μs .. 27.01 μs) 243 | variance introduced by outliers: 42% (moderately inflated) 244 | 245 | benchmarking Log (Megaparsec)/log-40.log 246 | time 412.4 μs (405.9 μs .. 423.8 μs) 247 | 0.992 R² (0.982 R² .. 0.998 R²) 248 | mean 419.7 μs (410.9 μs .. 433.7 μs) 249 | std dev 34.86 μs (21.22 μs .. 51.70 μs) 250 | variance introduced by outliers: 69% (severely inflated) 251 | 252 | benchmarking JSON (PariPari)/json-40.json 253 | time 20.33 μs (20.08 μs .. 20.68 μs) 254 | 0.998 R² (0.997 R² .. 1.000 R²) 255 | mean 20.48 μs (20.27 μs .. 21.02 μs) 256 | std dev 1.179 μs (578.2 ns .. 2.199 μs) 257 | variance introduced by outliers: 65% (severely inflated) 258 | 259 | benchmarking JSON (PariPari, Reporter)/json-40.json 260 | time 76.16 μs (74.89 μs .. 78.25 μs) 261 | 0.994 R² (0.987 R² .. 0.999 R²) 262 | mean 76.02 μs (74.68 μs .. 78.57 μs) 263 | std dev 5.598 μs (3.438 μs .. 8.326 μs) 264 | variance introduced by outliers: 72% (severely inflated) 265 | 266 | benchmarking JSON (Attoparsec)/json-40.json 267 | time 21.67 μs (21.43 μs .. 21.97 μs) 268 | 0.999 R² (0.998 R² .. 0.999 R²) 269 | mean 21.69 μs (21.48 μs .. 21.99 μs) 270 | std dev 836.0 ns (554.7 ns .. 1.371 μs) 271 | variance introduced by outliers: 45% (moderately inflated) 272 | 273 | benchmarking JSON (Megaparsec)/json-40.json 274 | time 31.50 μs (30.84 μs .. 32.42 μs) 275 | 0.994 R² (0.989 R² .. 0.998 R²) 276 | mean 31.45 μs (30.88 μs .. 32.42 μs) 277 | std dev 2.377 μs (1.554 μs .. 3.610 μs) 278 | variance introduced by outliers: 75% (severely inflated) 279 | 280 | benchmarking JSON (PariPari, highlevel)/json-40.json 281 | time 30.63 μs (30.08 μs .. 31.24 μs) 282 | 0.998 R² (0.996 R² .. 0.999 R²) 283 | mean 30.69 μs (30.35 μs .. 31.27 μs) 284 | std dev 1.459 μs (1.056 μs .. 2.053 μs) 285 | variance introduced by outliers: 54% (severely inflated) 286 | 287 | benchmarking JSON (PariPari, Reporter, highlevel)/json-40.json 288 | time 109.8 μs (107.2 μs .. 113.2 μs) 289 | 0.995 R² (0.992 R² .. 0.998 R²) 290 | mean 110.0 μs (108.6 μs .. 112.2 μs) 291 | std dev 6.079 μs (4.499 μs .. 7.572 μs) 292 | variance introduced by outliers: 57% (severely inflated) 293 | 294 | benchmarking JSON (Attoparsec, highlevel)/json-40.json 295 | time 33.94 μs (33.56 μs .. 34.39 μs) 296 | 0.998 R² (0.997 R² .. 0.999 R²) 297 | mean 34.31 μs (33.87 μs .. 35.74 μs) 298 | std dev 2.511 μs (896.2 ns .. 5.032 μs) 299 | variance introduced by outliers: 74% (severely inflated) 300 | 301 | benchmarking JSON (Megaparsec, highlevel)/json-40.json 302 | time 58.52 μs (56.70 μs .. 60.51 μs) 303 | 0.993 R² (0.990 R² .. 0.998 R²) 304 | mean 57.58 μs (56.73 μs .. 58.92 μs) 305 | std dev 3.511 μs (2.575 μs .. 4.710 μs) 306 | variance introduced by outliers: 64% (severely inflated) 307 | 308 | Benchmark bench-speed: FINISH 309 | ``` 310 | 311 | ## Thanks 312 | 313 | * Mark Karpov @mrkkrp - For the `parser-combinators` library, 314 | which was extracted from Megaparsec and parsers-bench. 315 | The json example above is adapted from parsers-bench. 316 | 317 | ## Related projects 318 | 319 | * [parser-combinators](http://hackage.haskell.org/package/parser-combinators) 320 | * [parsec](http://hackage.haskell.org/package/parsec) 321 | * [megaparsec](http://hackage.haskell.org/package/megaparsec) 322 | * [attoparsec](http://hackage.haskell.org/package/attoparsec) 323 | * [Earley](http://hackage.haskell.org/package/Earley) 324 | * [trifecta](http://hackage.haskell.org/package/trifecta) 325 | * [parsix](http://hackage.haskell.org/package/parsix) 326 | * [uu-parsinglib](http://hackage.haskell.org/package/uu-parsinglib) 327 | 328 | ## License 329 | 330 | Released under the MIT License. 331 | 332 | Copyright (c) 2018 Daniel Mendler 333 | -------------------------------------------------------------------------------- /example-data/bad.json: -------------------------------------------------------------------------------- 1 | {"widget": { 2 | "debug": "on", 3 | "window": { 4 | "title": "Sample Konfabulator Widget", 5 | "name": "main_window", 6 | "width": 500, 7 | "height": 500 8 | "image": { 9 | "src": "Images/Sun.png", 10 | "name": "sun1", 11 | "hOffset": 250, 12 | "vOffset": 250, 13 | "alignment": "center" 14 | }, 15 | "text": { 16 | "data": "Click Here", 17 | "size": 36, 18 | "style": "bold", 19 | "name": "text1", 20 | "hOffset": 250, 21 | "vOffset": 100, 22 | "alignment": "center", 23 | "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" 24 | } 25 | }} 26 | -------------------------------------------------------------------------------- /example-data/good.json: -------------------------------------------------------------------------------- 1 | {"widget": { 2 | "debug": "on", 3 | "window": { 4 | "title": "Sample Konfabulator Widget", 5 | "name": "main_window", 6 | "width": 500, 7 | "height": 500 8 | }, 9 | "image": { 10 | "src": "Images/Sun.png", 11 | "name": "sun1", 12 | "hOffset": 250, 13 | "vOffset": 250, 14 | "alignment": "center" 15 | }, 16 | "text": { 17 | "data": "Click Here", 18 | "size": 36, 19 | "style": "bold", 20 | "name": "text1", 21 | "hOffset": 250, 22 | "vOffset": 100, 23 | "alignment": "center", 24 | "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" 25 | } 26 | }} 27 | -------------------------------------------------------------------------------- /example.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# LANGUAGE ConstraintKinds #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | {-# LANGUAGE Rank2Types #-} 6 | 7 | import Data.Foldable (for_) 8 | import System.Environment (getArgs) 9 | import Text.PariPari 10 | import qualified Data.ByteString as B 11 | 12 | type StringType = B.ByteString 13 | type PMonad p = Parser StringType p 14 | type P a = (forall p. PMonad p => p a) 15 | 16 | -- {-# SPECIALISE_ALL PMonad p = p ~ Acceptor StringType #-} 17 | -- {-# SPECIALISE_ALL PMonad p = p ~ Reporter StringType #-} 18 | -- {-# SPECIALISE_ALL P = Acceptor StringType #-} 19 | -- {-# SPECIALISE_ALL P = Reporter StringType #-} 20 | 21 | data Value 22 | = Object ![(StringType, Value)] 23 | | Array ![Value] 24 | | String !StringType 25 | | Number !Integer !Integer 26 | | Bool !Bool 27 | | Null 28 | deriving (Eq, Show) 29 | 30 | json :: P Value 31 | json = space *> (object <|> array) "json" 32 | 33 | object :: P Value 34 | object = Object <$> (char '{' *> space *> sepBy pair (space *> char ',' *> space) <* space <* char '}') "object" 35 | 36 | pair :: P (StringType, Value) 37 | pair = (,) <$> (text <* space) <*> (char ':' *> space *> value) 38 | 39 | array :: P Value 40 | array = Array <$> (char '[' *> sepBy value (space *> char ',' *> space) <* space <* char ']') "array" 41 | 42 | value :: P Value 43 | value = 44 | (String <$> text) 45 | <|> object 46 | <|> array 47 | <|> (Bool False <$ string "false") 48 | <|> (Bool True <$ string "true") 49 | <|> (Null <$ string "null") 50 | <|> number 51 | 52 | text :: P StringType 53 | text = char '"' *> takeCharsWhile (/= '"') <* char '"' "text" 54 | 55 | number :: P Value 56 | number = label "number" $ do 57 | neg <- sign 58 | frac <- fractionDec (pure ()) 59 | pure $ case frac of 60 | Left n -> Number (neg n) 0 61 | Right (c, _, e) -> Number (neg c) e 62 | 63 | space :: P () 64 | space = skipCharsWhile (\c -> c == ' ' || c == '\n' || c == '\t') 65 | 66 | main :: IO () 67 | main = do 68 | args <- getArgs 69 | case args of 70 | [file] -> do 71 | src <- B.readFile file 72 | let (result, reports) = runParser json file src 73 | for_ reports $ putStrLn . showReport 74 | print result 75 | _ -> error "Usage: paripari-example test.json" 76 | -------------------------------------------------------------------------------- /paripari.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | name: paripari 3 | version: 0.7.0.0 4 | synopsis: Parser combinators with fast-path and slower fallback for error reporting 5 | description: PariPari offers two parsing strategies. There is a fast Acceptor and a slower Reporter. If the Acceptor fails, the Reporter returns a report about the parsing errors. 6 | category: Text 7 | stability: experimental 8 | homepage: https://github.com/minad/paripari#readme 9 | bug-reports: https://github.com/minad/paripari/issues 10 | author: Daniel Mendler 11 | maintainer: Daniel Mendler 12 | copyright: 2018 Daniel Mendler 13 | license: MIT 14 | license-file: LICENSE 15 | tested-with: GHC == 8.4.4, GHC == 8.6.5, GHC == 8.8.3, GHC == 8.10.1 16 | build-type: Simple 17 | 18 | source-repository head 19 | type: git 20 | location: https://github.com/minad/paripari 21 | 22 | library 23 | exposed-modules: 24 | Text.PariPari 25 | Text.PariPari.Internal.Acceptor 26 | Text.PariPari.Internal.Chunk 27 | Text.PariPari.Internal.Class 28 | Text.PariPari.Internal.Combinators 29 | Text.PariPari.Internal.Reporter 30 | Text.PariPari.Internal.Run 31 | Text.PariPari.Lens 32 | other-modules: 33 | Paths_paripari 34 | hs-source-dirs: 35 | src 36 | ghc-options: -O2 -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances 37 | build-depends: 38 | base >=4.8 && <5 39 | , bytestring >=0.10 && <0.11 40 | , parser-combinators >=1.0 && <1.3 41 | , text >=0.11 && <1.3 42 | default-language: Haskell2010 43 | 44 | executable paripari-example 45 | main-is: example.hs 46 | other-modules: 47 | Paths_paripari 48 | ghc-options: -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances 49 | build-depends: 50 | base >=4.8 && <5 51 | , bytestring >=0.10 && <0.11 52 | , paripari 53 | , parser-combinators >=1.0 && <1.3 54 | , text >=0.11 && <1.3 55 | default-language: Haskell2010 56 | 57 | executable paripari-specialise-all 58 | main-is: specialise-all.hs 59 | other-modules: 60 | Paths_paripari 61 | ghc-options: -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances 62 | build-depends: 63 | base >=4.8 && <5 64 | , bytestring >=0.10 && <0.11 65 | , paripari 66 | , parser-combinators >=1.0 && <1.3 67 | , text >=0.11 && <1.3 68 | default-language: Haskell2010 69 | 70 | test-suite test 71 | type: exitcode-stdio-1.0 72 | main-is: test.hs 73 | other-modules: 74 | Paths_paripari 75 | hs-source-dirs: 76 | test 77 | ghc-options: -O0 -Wall -Wcompat -Widentities -Wmonomorphism-restriction -Wincomplete-uni-patterns -Wincomplete-record-updates -Wtabs -fprint-potential-instances 78 | build-depends: 79 | base >=4.8 && <5 80 | , bytestring >=0.10 && <0.11 81 | , paripari 82 | , parser-combinators >=1.0 && <1.3 83 | , random 84 | , tasty 85 | , tasty-hunit 86 | , text >=0.11 && <1.3 87 | default-language: Haskell2010 88 | -------------------------------------------------------------------------------- /specialise-all.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ConstraintKinds #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE OverloadedStrings #-} 4 | {-# LANGUAGE Rank2Types #-} 5 | 6 | import Data.Foldable (for_) 7 | import System.Environment (getArgs) 8 | import Text.PariPari 9 | import qualified Data.Char as C 10 | import qualified Data.List.NonEmpty as NE 11 | import qualified Data.Text as T 12 | import qualified Data.Text.IO as T 13 | 14 | type StringType = T.Text 15 | type PMonad p = Parser StringType p 16 | type P a = (forall p. PMonad p => p a) 17 | 18 | data Type 19 | = TypeName !StringType 20 | | TypeVar !StringType 21 | | TypeApp !Type !(NE.NonEmpty Type) 22 | | TypeEq !Type !Type 23 | | TypeConstr !Type !Type 24 | | TypeLam !Type !Type 25 | | TypeTuple ![Type] 26 | | TypeList !Type 27 | deriving (Show, Eq) 28 | 29 | data SourceLine 30 | = SpecialiseAll !Type !Type 31 | | TypeDecl !(NE.NonEmpty StringType) !Type 32 | | OtherLine !StringType 33 | deriving (Show) 34 | 35 | source :: P [SourceLine] 36 | source = sepBy sourceLine (char '\n') <* eof 37 | 38 | sourceLine :: P SourceLine 39 | sourceLine = specialiseAll <|> typeDecl <|> otherLine 40 | 41 | otherLine :: P SourceLine 42 | otherLine = OtherLine <$> takeCharsWhile (/= '\n') 43 | 44 | specialiseAll :: P SourceLine 45 | specialiseAll = SpecialiseAll 46 | <$> (symbol "{-#" *> symbol "SPECIALISE_ALL" *> type_) 47 | <*> (symbol "=" *> type_ <* symbol "#-}") 48 | 49 | identifierAtom :: PMonad p => (Char -> Bool) -> p () 50 | identifierAtom f = satisfy f *> skipCharsWhile (\c -> C.isAlphaNum c || c == '_' || c == '\'') 51 | 52 | name :: P StringType 53 | name = asChunk (sepEndBy (identifierAtom C.isUpper) (char '.') *> 54 | identifierAtom C.isLower) <* space 55 | 56 | typeName :: P StringType 57 | typeName = asChunk (void $ sepBy1 (identifierAtom C.isUpper) (char '.')) <* space 58 | 59 | symbol :: PMonad p => String -> p StringType 60 | symbol s = string s <* space 61 | 62 | typeTuple :: P Type 63 | typeTuple = do 64 | ts <- between (symbol "(") (symbol ")") (sepBy type_ (symbol ",")) 65 | pure $ case ts of 66 | [] -> TypeName "()" 67 | [t] -> t 68 | _ -> TypeTuple ts 69 | 70 | typeAtom :: P Type 71 | typeAtom = 72 | TypeName <$> typeName 73 | <|> TypeVar <$> name 74 | <|> TypeList <$> between (symbol "[") (symbol "]") type_ 75 | <|> typeTuple 76 | 77 | typeApp :: P Type 78 | typeApp = do 79 | t <- typeAtom 80 | option t $ TypeApp t <$> some typeAtom 81 | 82 | type_ :: P Type 83 | type_ = do 84 | t <- typeApp 85 | TypeEq t <$> (symbol "~" *> type_) 86 | <|> TypeLam t <$> (symbol "->" *> type_) 87 | <|> TypeConstr t <$> (symbol "=>" *> type_) 88 | <|> pure t 89 | 90 | typeDecl :: P SourceLine 91 | typeDecl = TypeDecl <$> sepBy1 name (symbol ",") <*> (symbol "::" *> type_) 92 | 93 | space :: P () 94 | space = skipCharsWhile (== ' ') 95 | 96 | showType :: Type -> StringType 97 | showType (TypeName t) = t 98 | showType (TypeVar t) = t 99 | showType (TypeApp t a) = "(" <> showType t <> " " <> T.intercalate " " (NE.toList $ fmap showType a) <> ")" 100 | showType (TypeEq a b) = "(" <> showType a <> " ~ " <> showType b <> ")" 101 | showType (TypeLam a b) = "(" <> showType a <> " -> " <> showType b <> ")" 102 | showType (TypeConstr a b) = "(" <> showType a <> " => " <> showType b <> ")" 103 | showType (TypeList t) = "[" <> showType t <> "]" 104 | showType (TypeTuple t) = "(" <> T.intercalate ", " (fmap showType t) <> ")" 105 | 106 | showSource :: SourceLine -> StringType 107 | showSource (SpecialiseAll from to) = "-- SPECIALISE_ALL " <> showType from <> " = " <> showType to 108 | showSource (TypeDecl names t) = T.intercalate "," (NE.toList names) <> " :: " <> showType t 109 | showSource (OtherLine str) = str 110 | 111 | substitute :: [(StringType, Type)] -> Type -> Type 112 | substitute vars t@(TypeVar v) 113 | | Just t' <- lookup v vars = t' 114 | | otherwise = t 115 | substitute _ t@TypeName{} = t 116 | substitute vars (TypeApp t a) = TypeApp (substitute vars t) (fmap (substitute vars) a) 117 | substitute vars (TypeEq a b) = TypeEq (substitute vars a) (substitute vars b) 118 | substitute vars (TypeConstr a b) = TypeConstr (substitute vars a) (substitute vars b) 119 | substitute vars (TypeLam a b) = TypeLam (substitute vars a) (substitute vars b) 120 | substitute vars (TypeTuple t) = TypeTuple (fmap (substitute vars) t) 121 | substitute vars (TypeList t) = TypeList (substitute vars t) 122 | 123 | unify :: Type -> Type -> Maybe [(StringType, Type)] 124 | unify (TypeVar v) t = 125 | Just [(v, t)] 126 | unify (TypeName t1) (TypeName t2) 127 | | t1 == t2 = Just [] 128 | unify (TypeApp t1 a1) (TypeApp t2 a2) | length a1 == length a2 = do 129 | t <- unify t1 t2 130 | a <- foldMap (uncurry unify) $ NE.zip a1 a2 131 | pure $ t <> a 132 | unify (TypeEq a1 b1) (TypeEq a2 b2) = do 133 | a <- unify a1 a2 134 | b <- unify b1 b2 135 | pure $ a <> b 136 | unify (TypeConstr a1 b1) (TypeConstr a2 b2) = do 137 | a <- unify a1 a2 138 | b <- unify b1 b2 139 | pure $ a <> b 140 | unify (TypeLam a1 b1) (TypeLam a2 b2) = do 141 | a <- unify a1 a2 142 | b <- unify b1 b2 143 | pure $ a <> b 144 | unify (TypeTuple t1) (TypeTuple t2) | length t1 == length t2 = 145 | foldMap (uncurry unify) $ zip t1 t2 146 | unify (TypeList t1) (TypeList t2) = 147 | unify t1 t2 148 | unify _ _ = Nothing 149 | 150 | simplify :: Type -> Type 151 | simplify t@TypeVar{} = t 152 | simplify t@TypeName{} = t 153 | simplify (TypeApp t a) = TypeApp (simplify t) (fmap simplify a) 154 | simplify (TypeEq a b) = TypeEq (simplify a) (simplify b) 155 | simplify (TypeLam a b) = TypeLam (simplify a) (simplify b) 156 | simplify (TypeTuple t) = TypeTuple (fmap simplify t) 157 | simplify (TypeList t) = TypeList (simplify t) 158 | simplify (TypeConstr (TypeEq (TypeVar v) t) t') = substitute [(v, t)] t' 159 | simplify (TypeConstr c t) = TypeConstr (TypeTuple otherConstraints) (substitute constraintVars t) 160 | where eqConstraints = [e | e@(TypeEq TypeVar{} _) <- constraints] 161 | constraintVars = [(v,x) | TypeEq (TypeVar v) x <- eqConstraints] 162 | otherConstraints = filter (\x -> all (/= x) eqConstraints) constraints 163 | constraints = case c of 164 | TypeTuple xs -> xs 165 | x -> [x] 166 | 167 | specialiseType :: Type -> Type -> Type -> Type 168 | specialiseType from to typ 169 | | Just vars <- unify from typ = substitute vars to 170 | | otherwise = 171 | case typ of 172 | TypeName{} -> typ 173 | TypeVar{} -> typ 174 | TypeApp t a -> TypeApp (specialiseType from to t) (fmap (specialiseType from to) a) 175 | TypeEq a b -> TypeEq (specialiseType from to a) (specialiseType from to b) 176 | TypeConstr a b -> TypeConstr (specialiseType from to a) (specialiseType from to b) 177 | TypeLam a b -> TypeLam (specialiseType from to a) (specialiseType from to b) 178 | TypeTuple t -> TypeTuple (fmap (specialiseType from to) t) 179 | TypeList t -> TypeList (specialiseType from to t) 180 | 181 | specialise :: [(Type, Type)] -> (NE.NonEmpty StringType, Type) -> [StringType] 182 | specialise specs (names, typ) = concatMap go specs 183 | where go (from, to) 184 | | typ' <- specialiseType from to typ, typ /= typ' = 185 | ["{-# SPECIALISE " <> n <> " :: " <> showType (simplify typ') <> " #-}" | n <- NE.toList names] 186 | | otherwise = [] 187 | 188 | main :: IO () 189 | main = do 190 | args <- getArgs 191 | case args of 192 | [src, _, dst] -> do 193 | code <- T.readFile src 194 | let (result, reports) = runParser source src code 195 | for_ reports $ putStrLn . showReport 196 | case result of 197 | Nothing -> pure () 198 | Just ls -> do 199 | let specialisers = [(from, to) | SpecialiseAll from to <- ls] 200 | specialisedTypeDecls = concatMap (specialise specialisers) [(n, t) | TypeDecl n t <- ls] 201 | T.writeFile dst $ T.intercalate "\n" $ map showSource ls <> specialisedTypeDecls 202 | _ -> error "Usage: paripari-specialise-all src _ dst" 203 | -------------------------------------------------------------------------------- /src/Text/PariPari.hs: -------------------------------------------------------------------------------- 1 | module Text.PariPari ( 2 | C.Parser(..) 3 | , C.Pos(..) 4 | , C.Error(..) 5 | , C.showError 6 | 7 | , K.Chunk(showChunk) 8 | 9 | , U.runParser 10 | , U.runParserWithOptions 11 | 12 | , A.Acceptor 13 | , A.runAcceptor 14 | 15 | , R.Reporter 16 | , R.Report(..) 17 | , R.ErrorContext(..) 18 | , R.ReportOptions(..) 19 | , R.runReporter 20 | , R.showReport 21 | , R.showErrors 22 | , R.runReporterWithOptions 23 | , R.defaultReportOptions 24 | 25 | , module Text.PariPari.Internal.Combinators 26 | ) where 27 | 28 | import Text.PariPari.Internal.Combinators 29 | import qualified Text.PariPari.Internal.Acceptor as A 30 | import qualified Text.PariPari.Internal.Chunk as K 31 | import qualified Text.PariPari.Internal.Class as C 32 | import qualified Text.PariPari.Internal.Reporter as R 33 | import qualified Text.PariPari.Internal.Run as U 34 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Acceptor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE FlexibleInstances #-} 4 | {-# LANGUAGE MagicHash #-} 5 | {-# LANGUAGE MultiParamTypeClasses #-} 6 | {-# LANGUAGE MultiWayIf #-} 7 | {-# LANGUAGE NamedFieldPuns #-} 8 | {-# LANGUAGE PatternSynonyms #-} 9 | {-# LANGUAGE Rank2Types #-} 10 | {-# LANGUAGE ScopedTypeVariables #-} 11 | {-# LANGUAGE TypeApplications #-} 12 | {-# LANGUAGE UnboxedTuples #-} 13 | module Text.PariPari.Internal.Acceptor ( 14 | Acceptor(..) 15 | , Env(..) 16 | , get 17 | , local 18 | , runAcceptor 19 | ) where 20 | 21 | import Control.Monad (void) 22 | import Data.Semigroup as Sem 23 | import Data.String (IsString(..)) 24 | import GHC.Base hiding (State#) 25 | import GHC.Word 26 | import Text.PariPari.Internal.Chunk 27 | import Text.PariPari.Internal.Class 28 | import qualified Control.Monad.Fail as Fail 29 | 30 | data Env k = Env 31 | { _envBuf :: !(Buffer k) 32 | , _envFile :: !FilePath 33 | , _envRefLine :: Int# 34 | , _envRefCol :: Int# 35 | } 36 | 37 | type State# = (# Int#, Int#, Int# #) 38 | 39 | type Result# a = (# Int# | (# State#, a #) #) 40 | 41 | pattern Ok# :: State# -> a -> Result# a 42 | pattern Ok# s a = (# | (# s, a #) #) 43 | pattern Err# :: Int# -> Result# a 44 | pattern Err# o = (# o | #) 45 | {-# COMPLETE Ok#, Err# #-} 46 | 47 | _stLine :: State# -> Int# 48 | _stLine (# _, x, _ #) = x 49 | {-# INLINE _stLine #-} 50 | 51 | _stCol :: State# -> Int# 52 | _stCol (# _, _, x #) = x 53 | {-# INLINE _stCol #-} 54 | 55 | _stOff :: State# -> Int# 56 | _stOff (# x, _, _ #) = x 57 | {-# INLINE _stOff #-} 58 | 59 | -- | Parser which is optimised for fast parsing. Error reporting 60 | -- is minimal. 61 | newtype Acceptor k a = Acceptor 62 | { unAcceptor :: Env k -> State# -> Result# a } 63 | 64 | instance (Chunk k, Semigroup a) => Sem.Semigroup (Acceptor k a) where 65 | p1 <> p2 = (<>) <$> p1 <*> p2 66 | {-# INLINE (<>) #-} 67 | 68 | instance (Chunk k, Semigroup a, Monoid a) => Monoid (Acceptor k a) where 69 | mempty = pure mempty 70 | {-# INLINE mempty #-} 71 | 72 | mappend = (<>) 73 | {-# INLINE mappend #-} 74 | 75 | instance Functor (Acceptor k) where 76 | fmap f p = Acceptor $ \env st -> case unAcceptor p env st of 77 | Err# o -> Err# o 78 | Ok# st' x -> Ok# st' (f x) 79 | {-# INLINE fmap #-} 80 | 81 | instance Chunk k => Applicative (Acceptor k) where 82 | pure x = Acceptor $ \_ st -> Ok# st x 83 | {-# INLINE pure #-} 84 | 85 | f <*> a = Acceptor $ \env st -> case unAcceptor f env st of 86 | Err# o -> Err# o 87 | Ok# s f' -> case unAcceptor a env s of 88 | Err# o -> Err# o 89 | Ok# s' a' -> Ok# s' (f' a') 90 | {-# INLINE (<*>) #-} 91 | 92 | p1 *> p2 = do 93 | void p1 94 | p2 95 | {-# INLINE (*>) #-} 96 | 97 | p1 <* p2 = do 98 | x <- p1 99 | void p2 100 | pure x 101 | {-# INLINE (<*) #-} 102 | 103 | instance Chunk k => Alternative (Acceptor k) where 104 | empty = Acceptor $ \_ st -> Err# (_stOff st) 105 | {-# INLINE empty #-} 106 | 107 | p1 <|> p2 = Acceptor $ \env st -> 108 | case unAcceptor p1 env st of 109 | Ok# st' x -> Ok# st' x 110 | Err# _ -> unAcceptor p2 env st 111 | {-# INLINE (<|>) #-} 112 | 113 | instance Chunk k => MonadPlus (Acceptor k) 114 | 115 | instance Chunk k => Monad (Acceptor k) where 116 | p >>= f = Acceptor $ \env st -> 117 | case unAcceptor p env st of 118 | Err# o -> Err# o 119 | Ok# s x -> unAcceptor (f x) env s 120 | {-# INLINE (>>=) #-} 121 | 122 | #if !MIN_VERSION_base(4,11,0) 123 | fail = Fail.fail 124 | {-# INLINE fail #-} 125 | #endif 126 | 127 | instance Chunk k => Fail.MonadFail (Acceptor k) where 128 | fail msg = failWith $ EFail msg 129 | {-# INLINE fail #-} 130 | 131 | instance Chunk k => Parser k (Acceptor k) where 132 | getPos = get $ \_ st -> Pos (I# (_stLine st)) (I# (_stCol st)) 133 | {-# INLINE getPos #-} 134 | 135 | getFile = get $ \env _ -> _envFile env 136 | {-# INLINE getFile #-} 137 | 138 | getRefPos = get $ \env _ -> Pos (I# (_envRefLine env)) (I# (_envRefCol env)) 139 | {-# INLINE getRefPos #-} 140 | 141 | withRefPos p = local (\st env -> env { _envRefLine = _stLine st, _envRefCol = _stCol st }) p 142 | {-# INLINE withRefPos #-} 143 | 144 | notFollowedBy p = Acceptor $ \env st -> 145 | case unAcceptor p env st of 146 | Err# _ -> Ok# st () 147 | Ok# _ _ -> Err# (_stOff st) 148 | {-# INLINE notFollowedBy #-} 149 | 150 | lookAhead p = Acceptor $ \env st -> do 151 | case unAcceptor p env st of 152 | Err# _ -> Err# (_stOff st) 153 | Ok# _ x -> Ok# st x 154 | {-# INLINE lookAhead #-} 155 | 156 | failWith _ = Acceptor $ \_ st -> Err# (_stOff st) 157 | {-# INLINE failWith #-} 158 | 159 | eof = Acceptor $ \env st -> 160 | case indexByte @k (_envBuf env) (_stOff st) `eqWord#` int2Word# 0# of 161 | 1# -> Ok# st () 162 | _ -> Err# (_stOff st) 163 | {-# INLINE eof #-} 164 | 165 | label _ p = p 166 | {-# INLINE label #-} 167 | 168 | hidden p = p 169 | {-# INLINE hidden #-} 170 | 171 | recover p _ = p 172 | {-# INLINE recover #-} 173 | 174 | try p = Acceptor $ \env st -> 175 | case unAcceptor p env st of 176 | Ok# st' x -> Ok# st' x 177 | Err# _ -> Err# (_stOff st) 178 | {-# INLINE try #-} 179 | 180 | p1 p2 = Acceptor $ \env st -> 181 | case unAcceptor p1 env st of 182 | Ok# st' x -> Ok# st' x 183 | Err# o | 1# <- o ==# _stOff st -> unAcceptor p2 env st 184 | | otherwise -> Err# o 185 | {-# INLINE () #-} 186 | 187 | chunk k = Acceptor $ \env (# stOff, stLine, stCol #) -> 188 | case matchChunk @k (_envBuf env) stOff k of 189 | -1# -> Err# stOff 190 | n -> Ok# (# stOff +# n, stLine, stCol +# n #) k 191 | {-# INLINE chunk #-} 192 | 193 | asChunk p = do 194 | I# begin' <- get (const (\s -> I# (_stOff s))) 195 | p 196 | I# end' <- get (const (\s -> I# (_stOff s))) 197 | src <- get (\env _ -> _envBuf env) 198 | pure $ packChunk src begin' (end' -# begin') 199 | {-# INLINE asChunk #-} 200 | 201 | scan f = Acceptor $ \env (# stOff, stLine, stCol #) -> 202 | case indexChar @k (_envBuf env) stOff of 203 | (# c, w #) 204 | | 1# <- c `neChar#` '\0'#, Just r <- f (C# c) -> 205 | Ok# (# stOff +# w, 206 | case c `eqChar#` '\n'# of 1# -> stLine +# 1#; _ -> stLine, 207 | case c `eqChar#` '\n'# of 1# -> 1#; _ -> stCol +# 1# #) r 208 | _ -> Err# stOff 209 | {-# INLINE scan #-} 210 | 211 | -- By inling this combinator, GHC should figure out the `charWidth` 212 | -- of the character resulting in an optimised decoder. 213 | char '\0' = error "Character '\\0' cannot be parsed because it is used as sentinel" 214 | char c@(C# c') = 215 | Acceptor $ \env (# stOff, stLine, stCol #) -> 216 | case matchChar @k (_envBuf env) stOff c' of 217 | -1# -> Err# stOff 218 | w -> Ok# (# stOff +# w, 219 | if c == '\n' then stLine +# 1# else stLine, 220 | if c == '\n' then 1# else stCol +# 1# #) c 221 | {-# INLINE char #-} 222 | 223 | asciiScan f = Acceptor $ \env (# stOff, stLine, stCol #) -> 224 | if | b <- W8# (indexByte @k (_envBuf env) stOff), 225 | b /= 0, 226 | b < 128, 227 | Just x <- f b -> 228 | Ok# (# stOff +# 1# 229 | , if b == asc_newline then stLine +# 1# else stLine 230 | , if b == asc_newline then 1# else stCol +# 1# #) x 231 | | otherwise -> 232 | Err# stOff 233 | {-# INLINE asciiScan #-} 234 | 235 | asciiByte 0 = error "Character '\\0' cannot be parsed because it is used as sentinel" 236 | asciiByte b 237 | | b >= 128 = error "Not an ASCII character" 238 | | otherwise = Acceptor $ \env (# stOff, stLine, stCol #) -> 239 | if W8# (indexByte @k (_envBuf env) stOff) == b then 240 | Ok# (# stOff +# 1# 241 | , if b == asc_newline then stLine +# 1# else stLine 242 | , if b == asc_newline then 1# else stCol +# 1# #) b 243 | else 244 | Err# stOff 245 | {-# INLINE asciiByte #-} 246 | 247 | instance Chunk k => IsString (Acceptor k k) where 248 | fromString = string 249 | {-# INLINE fromString #-} 250 | 251 | -- | Reader monad, get something from the environment 252 | get :: (Env k -> State# -> a) -> Acceptor k a 253 | get f = Acceptor $ \env st -> Ok# st (f env st) 254 | {-# INLINE get #-} 255 | 256 | -- | Reader monad, modify environment locally 257 | local :: (State# -> Env k -> Env k) -> Acceptor k a -> Acceptor k a 258 | local f p = Acceptor $ \env st -> 259 | unAcceptor p (f st env) st 260 | {-# INLINE local #-} 261 | 262 | -- | Run 'Acceptor' on the given chunk, returning either 263 | -- a simple 'Error' or, if successful, the result. 264 | runAcceptor :: Chunk k => Acceptor k a -> FilePath -> k -> Maybe a 265 | runAcceptor p f k = 266 | let !(# b, off #) = unpackChunk k 267 | in case unAcceptor p (initialEnv f b) (# off, 1#, 1# #) of 268 | Err# _ -> Nothing 269 | Ok# _ x -> Just x 270 | 271 | initialEnv :: FilePath -> Buffer k -> Env k 272 | initialEnv _envFile _envBuf = Env 273 | { _envBuf 274 | , _envFile 275 | , _envRefLine = 1# 276 | , _envRefCol = 1# 277 | } 278 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Chunk.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE AllowAmbiguousTypes #-} 2 | {-# LANGUAGE BangPatterns #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE FlexibleContexts #-} 5 | {-# LANGUAGE MagicHash #-} 6 | {-# LANGUAGE MultiWayIf #-} 7 | {-# LANGUAGE PatternSynonyms #-} 8 | {-# LANGUAGE TypeApplications #-} 9 | {-# LANGUAGE TypeFamilies #-} 10 | {-# LANGUAGE UnboxedTuples #-} 11 | module Text.PariPari.Internal.Chunk ( 12 | Chunk(..) 13 | , showByte 14 | , showByteString 15 | , unsafeAsciiToChar 16 | , asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p, 17 | asc_minus, asc_plus, asc_point, asc_newline 18 | ) where 19 | 20 | import Data.Bits (unsafeShiftL, (.|.), (.&.)) 21 | import Data.ByteString (ByteString) 22 | import Data.Foldable (foldl') 23 | import Data.String (fromString) 24 | import Data.Text (Text) 25 | import GHC.Base 26 | import GHC.Word 27 | import GHC.ForeignPtr 28 | import GHC.Show (showLitChar) 29 | import Numeric (showHex) 30 | import qualified Data.ByteString as B 31 | import qualified Data.ByteString.Internal as B 32 | import qualified Data.Text.Array as T 33 | import qualified Data.Text.Encoding as T 34 | import qualified Data.Text.Internal as T 35 | 36 | class Ord k => Chunk k where 37 | type Buffer k 38 | matchChunk :: Buffer k -> Int# -> k -> Int# -- Returns -1# if not matched 39 | packChunk :: Buffer k -> Int# -> Int# -> k 40 | unpackChunk :: k -> (# Buffer k, Int# #) 41 | showChunk :: k -> String 42 | matchChar :: Buffer k -> Int# -> Char# -> Int# -- Returns -1# if not matched 43 | indexChar :: Buffer k -> Int# -> (# Char#, Int# #) 44 | indexByte :: Buffer k -> Int# -> Word# 45 | stringToChunk :: String -> k 46 | 47 | instance Chunk ByteString where 48 | type Buffer ByteString = ForeignPtr Word8 49 | 50 | matchChunk (ForeignPtr p _) i (B.PS (ForeignPtr p' _) (I# j) (I# n)) = go 0# 51 | where go :: Int# -> Int# 52 | go k | 1# <- k >=# n = n 53 | | w <- indexWord8OffAddr# p (i +# k), 54 | w' <- indexWord8OffAddr# p' (j +# k), 55 | 1# <- w `neWord#` int2Word# 0#, 56 | 1# <- w `eqWord#` w' = go (k +# 1#) 57 | | otherwise = -1# 58 | {-# INLINE matchChunk #-} 59 | 60 | packChunk b i n = B.PS b (I# i) (I# n) 61 | {-# INLINE packChunk #-} 62 | 63 | unpackChunk k = 64 | let !(B.PS b (I# i) _) = k <> fromString "\0\0\0" -- sentinel 65 | in (# b, i #) 66 | {-# INLINE unpackChunk #-} 67 | 68 | showChunk = showByteString 69 | 70 | indexByte (ForeignPtr p _) i = indexWord8OffAddr# p i 71 | {-# INLINE indexByte #-} 72 | 73 | matchChar p i m 74 | | C# m <= '\x7F' = 75 | if | unsafeChr (at p i) == C# m -> 1# 76 | | otherwise -> -1# 77 | | C# m <= '\x7FF' = 78 | if | a1 <- at p i, a2 <- at p (i +# 1#), 79 | unsafeChr (((a1 .&. 31) `unsafeShiftL` 6) 80 | .|. (a2 .&. 0x3F)) == C# m -> 2# 81 | | otherwise -> -1# 82 | | C# m <= '\xFFFF' = 83 | if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), 84 | unsafeChr (((a1 .&. 15) `unsafeShiftL` 12) 85 | .|. ((a2 .&. 0x3F) `unsafeShiftL` 6) 86 | .|. (a3 .&. 0x3F)) == C# m -> 3# 87 | | otherwise -> -1# 88 | | otherwise = 89 | if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#), 90 | unsafeChr (((a1 .&. 7) `unsafeShiftL` 18) 91 | .|. ((a2 .&. 0x3F) `unsafeShiftL` 12) 92 | .|. ((a3 .&. 0x3F) `unsafeShiftL` 6) 93 | .|. (a4 .&. 0x3F)) == C# m -> 4# 94 | | otherwise -> -1# 95 | {-# INLINE matchChar #-} 96 | 97 | -- TODO detect invalid utf-8? 98 | indexChar p i 99 | | a1 <- at p i, 100 | a1 <= 0x7F = 101 | (# unsafeChr# a1, 1# #) 102 | | a1 <- at p i, a2 <- at p (i +# 1#), 103 | (a1 .&. 0xE0) == 0xC0, 104 | (a2 .&. 0xC0) == 0x80 = 105 | (# unsafeChr# (((a1 .&. 31) `unsafeShiftL` 6) 106 | .|. (a2 .&. 0x3F)), 2# #) 107 | | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), 108 | (a1 .&. 0xF0) == 0xE0, 109 | (a2 .&. 0xC0) == 0x80, 110 | (a3 .&. 0xC0) == 0x80 = 111 | (# unsafeChr# (((a1 .&. 15) `unsafeShiftL` 12) 112 | .|. ((a2 .&. 0x3F) `unsafeShiftL` 6) 113 | .|. (a3 .&. 0x3F)), 3# #) 114 | | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#), 115 | (a1 .&. 0xF8) == 0xF0, 116 | (a2 .&. 0xC0) == 0x80, 117 | (a3 .&. 0xC0) == 0x80, 118 | (a4 .&. 0xC0) == 0x80 = 119 | (# unsafeChr# (((a1 .&. 7) `unsafeShiftL` 18) 120 | .|. ((a2 .&. 0x3F) `unsafeShiftL` 12) 121 | .|. ((a3 .&. 0x3F) `unsafeShiftL` 6) 122 | .|. (a4 .&. 0x3F)), 4# #) 123 | | otherwise = (# '\0'#, 0# #) 124 | {-# INLINE indexChar #-} 125 | 126 | stringToChunk t = T.encodeUtf8 $ fromString t 127 | {-# INLINE stringToChunk #-} 128 | 129 | instance Chunk Text where 130 | type Buffer Text = T.Array 131 | 132 | matchChunk a i (T.Text a' (I# j) (I# n)) = go 0# 133 | where go :: Int# -> Int# 134 | go k | 1# <- k >=# n = n 135 | | w <- T.unsafeIndex a (I# (i +# k)), 136 | w' <- T.unsafeIndex a' (I# (j +# k)), 137 | w /= 0, w == w' = go (k +# 1#) 138 | | otherwise = -1# 139 | {-# INLINE matchChunk #-} 140 | 141 | packChunk b i n = T.Text b (I# i) (I# n) 142 | {-# INLINE packChunk #-} 143 | 144 | unpackChunk k = 145 | let !(T.Text b (I# i) _) = k <> fromString "\0" -- sentinel 146 | in (# b, i #) 147 | {-# INLINE unpackChunk #-} 148 | 149 | showChunk = show 150 | 151 | indexByte a i 152 | | W16# c <- T.unsafeIndex a (I# i), 1# <- c `leWord#` (int2Word# 0xFF#) = c 153 | | otherwise = int2Word# 0# 154 | {-# INLINE indexByte #-} 155 | 156 | indexChar a i 157 | | hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)) = 158 | if hi < 0xD800 || hi > 0xDFFF then 159 | (# unsafeChr# (fromIntegral hi), 1# #) 160 | else 161 | (# unsafeChr# (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) + (fromIntegral lo - 0xDC00)), 2# #) 162 | {-# INLINE indexChar #-} 163 | 164 | matchChar a i m 165 | | C# m <= '\xFFFF' = 166 | if | unsafeChr (fromIntegral (T.unsafeIndex a (I# i))) == C# m -> 1# 167 | | otherwise -> -1# 168 | | otherwise = 169 | if | hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)), 170 | hi >= 0xD800, hi <= 0xDFFF, 171 | C# m == unsafeChr (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) + 172 | (fromIntegral lo - 0xDC00)) -> 2# 173 | | otherwise -> -1# 174 | 175 | stringToChunk t = fromString t 176 | {-# INLINE stringToChunk #-} 177 | 178 | at :: ForeignPtr Word8 -> Int# -> Int 179 | at p i = fromIntegral $ W8# (indexByte @ByteString p i) 180 | {-# INLINE at #-} 181 | 182 | asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p, 183 | asc_minus, asc_plus, asc_point, asc_newline :: Word8 184 | asc_0 = 48 185 | asc_9 = 57 186 | asc_A = 65 187 | asc_E = 69 188 | asc_P = 80 189 | asc_a = 97 190 | asc_e = 101 191 | asc_p = 112 192 | asc_minus = 45 193 | asc_plus = 43 194 | asc_point = 46 195 | asc_newline = 10 196 | 197 | unsafeAsciiToChar :: Word8 -> Char 198 | unsafeAsciiToChar x = unsafeChr (fromIntegral x) 199 | {-# INLINE unsafeAsciiToChar #-} 200 | 201 | unsafeChr# :: Int -> Char# 202 | unsafeChr# (I# i) = chr# i 203 | {-# INLINE unsafeChr# #-} 204 | 205 | byteS :: Word8 -> ShowS 206 | byteS b 207 | | b < 128 = showLitChar $ unsafeAsciiToChar b 208 | | otherwise = ("\\x" <>) . showHex b 209 | 210 | bytesS :: ByteString -> ShowS 211 | bytesS b | B.length b == 1 = byteS $ B.head b 212 | | otherwise = foldl' ((. byteS) . (.)) id $ B.unpack b 213 | 214 | showByte :: Word8 -> String 215 | showByte b = ('\'':) . byteS b . ('\'':) $ "" 216 | 217 | showByteString :: ByteString -> String 218 | showByteString b = ('"':) . bytesS b . ('"':) $ "" 219 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Class.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | {-# LANGUAGE FunctionalDependencies #-} 3 | module Text.PariPari.Internal.Class ( 4 | Parser(..) 5 | , Alternative(..) 6 | , MonadPlus 7 | , Pos(..) 8 | , Error(..) 9 | , showError 10 | , expectedEnd 11 | , unexpectedEnd 12 | , string 13 | ) where 14 | 15 | import Control.Applicative (Alternative(empty, (<|>))) 16 | import Control.Monad (MonadPlus(..)) 17 | import Data.List (intercalate) 18 | import Data.String (IsString) 19 | import Data.Word (Word8) 20 | import GHC.Generics (Generic) 21 | import Text.PariPari.Internal.Chunk 22 | import qualified Control.Monad.Fail as Fail 23 | 24 | -- | Parsing errors 25 | data Error 26 | = EInvalidUtf8 27 | | EExpected [String] 28 | | EUnexpected String 29 | | EFail String 30 | | EIndentNotAligned {-#UNPACK#-}!Int {-#UNPACK#-}!Int 31 | | EIndentOverLine {-#UNPACK#-}!Int {-#UNPACK#-}!Int 32 | | ENotEnoughIndent {-#UNPACK#-}!Int {-#UNPACK#-}!Int 33 | deriving (Eq, Ord, Show, Generic) 34 | 35 | -- | Line and column position starting at (1,1) 36 | data Pos = Pos 37 | { _posLine :: {-#UNPACK#-}!Int 38 | , _posCol :: {-#UNPACK#-}!Int 39 | } deriving (Eq, Show, Generic) 40 | 41 | infixl 3 42 | 43 | -- | Parser class, which specifies the necessary 44 | -- primitives for parsing. All other parser combinators 45 | -- rely on these primitives. 46 | class (Fail.MonadFail p, MonadPlus p, Chunk k, IsString (p k)) => Parser k p | p -> k where 47 | -- | Get file name associated with current parser 48 | getFile :: p FilePath 49 | 50 | -- | Get current position of the parser 51 | getPos :: p Pos 52 | 53 | -- | Get reference position used for indentation-sensitive parsing 54 | getRefPos :: p Pos 55 | 56 | -- | Update reference position with current position 57 | withRefPos :: p a -> p a 58 | 59 | -- | Parser which succeeds when the given parser fails 60 | notFollowedBy :: Show a => p a -> p () 61 | 62 | -- | Look ahead and return result of the given parser 63 | -- The current position stays the same. 64 | lookAhead :: p a -> p a 65 | 66 | -- | Parser failure with detailled 'Error' 67 | failWith :: Error -> p a 68 | 69 | -- | Parser which succeeds at the end of file 70 | eof :: p () 71 | 72 | -- | Annotate the given parser with a label used for error reporting. 73 | -- 74 | -- __Note__: This function has zero cost in the 'Acceptor'. You can 75 | -- use it to improve the error reports without slowing 76 | -- down the fast path of your parser. 77 | label :: String -> p a -> p a 78 | 79 | -- | Hide errors occurring within the given parser 80 | -- from the error report. Based on the given 81 | -- labels an 'Error' is constructed instead. 82 | -- 83 | -- __Note__: This function has zero cost in the 'Acceptor'. You can 84 | -- use it to improve the error reports without slowing 85 | -- down the fast path of your parser. 86 | hidden :: p a -> p a 87 | 88 | -- | Reset position if parser fails 89 | try :: p a -> p a 90 | 91 | -- | Alternative which does not backtrack. 92 | () :: p a -> p a -> p a 93 | 94 | -- | Parse with error recovery. 95 | -- If the parser p fails in `recover p r` 96 | -- the parser r continues at the position where p failed. 97 | -- If the recovering parser r fails too, the whole 98 | -- parser fails. The errors reported by the recovering 99 | -- parser are ignored in any case. 100 | -- Error recovery support is only available 101 | -- in the 'Reporter' instance. 102 | -- 103 | -- __Note__: This function has zero cost in the 'Acceptor'. You can 104 | -- use it to improve the error reports without slowing 105 | -- down the fast path of your parser. 106 | recover :: p a -> p a -> p a 107 | 108 | -- | Parse a chunk of elements. The chunk must not 109 | -- contain multiple lines, otherwise the position information 110 | -- will be invalid. 111 | chunk :: k -> p k 112 | 113 | -- | Run the given parser and return the 114 | -- result as buffer 115 | asChunk :: p () -> p k 116 | 117 | -- | Parse a single character 118 | -- 119 | -- __Note__: The character '\0' cannot be parsed using this combinator 120 | -- since it is used as decoding sentinel. Use 'element' instead. 121 | char :: Char -> p Char 122 | 123 | -- | Scan a single character 124 | -- 125 | -- __Note__: The character '\0' cannot be parsed using this combinator 126 | -- since it is used as decoding sentinel. Use 'elementScan' instead. 127 | scan :: (Char -> Maybe a) -> p a 128 | 129 | -- | Parse a single character within the ASCII charset 130 | -- 131 | -- __Note__: The character '\0' cannot be parsed using this combinator 132 | -- since it is used as decoding sentinel. Use 'element' instead. 133 | asciiByte :: Word8 -> p Word8 134 | 135 | -- | Scan a single character within the ASCII charset 136 | -- 137 | -- __Note__: The character '\0' cannot be parsed using this combinator 138 | -- since it is used as decoding sentinel. Use 'elementScan' instead. 139 | asciiScan :: (Word8 -> Maybe a) -> p a 140 | 141 | -- | Pretty string representation of 'Error' 142 | showError :: Error -> String 143 | showError EInvalidUtf8 = "Invalid UTF-8 character found" 144 | showError (EExpected tokens) = "Expected " <> intercalate ", " tokens 145 | showError (EUnexpected token) = "Unexpected " <> token 146 | showError (EFail msg) = msg 147 | showError (EIndentNotAligned rc c) = "Invalid alignment, expected column " <> show rc <> " expected, got " <> show c 148 | showError (EIndentOverLine rl l) = "Indentation over line, expected line " <> show rl <> ", got " <> show l 149 | showError (ENotEnoughIndent rc c) = "Must be indented deeper than column " <> show rc <> ", got column " <> show c 150 | 151 | expectedEnd :: Error 152 | expectedEnd = EExpected ["end of file"] 153 | 154 | unexpectedEnd :: Error 155 | unexpectedEnd = EUnexpected "end of file" 156 | 157 | -- | Parse a string 158 | string :: Parser k p => String -> p k 159 | string t = chunk (stringToChunk t) 160 | {-# INLINE string #-} 161 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Combinators.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE Rank2Types #-} 3 | module Text.PariPari.Internal.Combinators ( 4 | -- * Basic combinators 5 | void 6 | , (<|>) 7 | , empty 8 | 9 | -- * Control.Monad.Combinators.NonEmpty 10 | , ON.some 11 | , ON.endBy1 12 | , ON.someTill 13 | , ON.sepBy1 14 | , ON.sepEndBy1 15 | 16 | -- * Control.Monad.Combinators 17 | , O.optional -- dont use Applicative version for efficiency 18 | , O.many -- dont use Applicative version for efficiency 19 | , O.between 20 | , O.choice 21 | , O.count 22 | , O.count' 23 | , O.eitherP 24 | , O.endBy 25 | , O.manyTill 26 | , O.option 27 | , O.sepBy 28 | , O.sepEndBy 29 | , O.skipMany 30 | , O.skipSome 31 | , O.skipCount 32 | , O.skipManyTill 33 | , O.skipSomeTill 34 | 35 | -- * Labels 36 | , () 37 | 38 | -- * Position 39 | , getLine 40 | , getCol 41 | , withPos 42 | , withSpan 43 | 44 | -- * Indentation 45 | , getRefCol 46 | , getRefLine 47 | , withRefPos 48 | , align 49 | , indented 50 | , line 51 | , linefold 52 | 53 | -- * Char combinators 54 | , digitByte 55 | , integer 56 | , integer' 57 | , decimal 58 | , octal 59 | , hexadecimal 60 | , digit 61 | , sign 62 | , signed 63 | , fractionHex 64 | , fractionDec 65 | , char' 66 | , notChar 67 | , anyChar 68 | , anyAsciiByte 69 | , alphaNumChar 70 | , digitChar 71 | , letterChar 72 | , lowerChar 73 | , upperChar 74 | , symbolChar 75 | , categoryChar 76 | , punctuationChar 77 | , spaceChar 78 | , asciiChar 79 | , satisfy 80 | , asciiSatisfy 81 | , skipChars 82 | , takeChars 83 | , skipCharsWhile 84 | , takeCharsWhile 85 | , skipCharsWhile1 86 | , takeCharsWhile1 87 | , scanChars 88 | , scanChars1 89 | , string 90 | ) where 91 | 92 | import Control.Applicative (optional) 93 | import Control.Monad (when) 94 | import Control.Monad.Combinators (option, skipCount, skipMany) 95 | import Data.Functor (void) 96 | import Data.Maybe (fromMaybe) 97 | import Data.Word (Word8) 98 | import Prelude hiding (getLine) 99 | import Text.PariPari.Internal.Chunk 100 | import Text.PariPari.Internal.Class 101 | import qualified Control.Monad.Combinators as O 102 | import qualified Control.Monad.Combinators.NonEmpty as ON 103 | import qualified Data.Char as C 104 | 105 | type P k a = (forall p. Parser k p => p a) 106 | 107 | -- | Infix alias for 'label' 108 | () :: Parser k p => p a -> String -> p a 109 | () = flip label 110 | {-# INLINE () #-} 111 | infix 0 112 | 113 | -- | Get line number of the reference position 114 | getRefLine :: P k Int 115 | getRefLine = _posLine <$> getRefPos 116 | {-# INLINE getRefLine #-} 117 | 118 | -- | Get column number of the reference position 119 | getRefCol :: P k Int 120 | getRefCol = _posCol <$> getRefPos 121 | {-# INLINE getRefCol #-} 122 | 123 | -- | Get current line number 124 | getLine :: P k Int 125 | getLine = _posLine <$> getPos 126 | {-# INLINE getLine #-} 127 | 128 | -- | Get current column 129 | getCol :: P k Int 130 | getCol = _posCol <$> getPos 131 | {-# INLINE getCol #-} 132 | 133 | -- | Decorate the parser result with the current position 134 | withPos :: Parser k p => p a -> p (Pos, a) 135 | withPos p = do 136 | pos <- getPos 137 | ret <- p 138 | pure (pos, ret) 139 | {-# INLINE withPos #-} 140 | 141 | -- | Decorate the parser result with the position span 142 | withSpan :: Parser k p => p a -> p (Pos, Pos, a) 143 | withSpan p = do 144 | begin <- getPos 145 | ret <- p 146 | end <- getPos 147 | pure (begin, end, ret) 148 | {-# INLINE withSpan #-} 149 | 150 | -- | Parser succeeds on the same line as the reference line 151 | line :: P k () 152 | line = do 153 | l <- getLine 154 | rl <- getRefLine 155 | when (l /= rl) $ failWith $ EIndentOverLine rl l 156 | {-# INLINE line #-} 157 | 158 | -- | Parser succeeds on the same column as the reference column 159 | align :: P k () 160 | align = do 161 | c <- getCol 162 | rc <- getRefCol 163 | when (c /= rc) $ failWith $ EIndentNotAligned rc c 164 | {-# INLINE align #-} 165 | 166 | -- | Parser succeeds for columns greater than the current reference column 167 | indented :: P k () 168 | indented = do 169 | c <- getCol 170 | rc <- getRefCol 171 | when (c <= rc) $ failWith $ ENotEnoughIndent rc c 172 | {-# INLINE indented #-} 173 | 174 | -- | Parser succeeds either on the reference line or 175 | -- for columns greater than the current reference column 176 | linefold :: P k () 177 | linefold = line <|> indented 178 | {-# INLINE linefold #-} 179 | 180 | -- | Parse a digit byte for the given base. 181 | -- Bases 2 to 36 are supported. 182 | digitByte :: Parser k p => Int -> p Word8 183 | digitByte base = asciiSatisfy (isDigit base) 184 | {-# INLINE digitByte #-} 185 | 186 | isDigit :: Int -> Word8 -> Bool 187 | isDigit base b 188 | | base >= 2 && base <= 10 = b >= asc_0 && b <= asc_0 + fromIntegral base - 1 189 | | base <= 36 = (b >= asc_0 && b <= asc_9) 190 | || ((fromIntegral b :: Word) - fromIntegral asc_A) < fromIntegral (base - 10) 191 | || ((fromIntegral b :: Word) - fromIntegral asc_a) < fromIntegral (base - 10) 192 | |otherwise = error "Text.PariPari.Internal.Combinators.isDigit: Bases 2 to 36 are supported" 193 | {-# INLINE isDigit #-} 194 | 195 | digitToInt :: Int -> Word8 -> Word 196 | digitToInt base b 197 | | n <- (fromIntegral b :: Word) - fromIntegral asc_0, base <= 10 || n <= 9 = n 198 | | n <- (fromIntegral b :: Word) - fromIntegral asc_A, n <= 26 = n + 10 199 | | n <- (fromIntegral b :: Word) - fromIntegral asc_a = n + 10 200 | {-# INLINE digitToInt #-} 201 | 202 | -- | Parse a single digit of the given base and return its value. 203 | -- Bases 2 to 36 are supported. 204 | digit :: Parser k p => Int -> p Word 205 | digit base = digitToInt base <$> asciiSatisfy (isDigit base) 206 | {-# INLINE digit #-} 207 | 208 | -- | Parse an integer of the given base. 209 | -- Returns the integer and the number of digits. 210 | -- Bases 2 to 36 are supported. 211 | -- Digits can be separated by separator, e.g. `optional (char '_')`. 212 | -- Signs are not parsed by this combinator. 213 | integer' :: (Num a, Parser k p) => p sep -> Int -> p (a, Int) 214 | integer' sep base = label (integerLabel base) $ do 215 | d <- digit base 216 | accum 1 $ fromIntegral d 217 | where accum !i !n = next i n <|> pure (n, i) 218 | next !i !n = do 219 | void $ sep 220 | d <- digit base 221 | accum (i + 1) $ n * fromIntegral base + fromIntegral d 222 | {-# INLINE integer' #-} 223 | 224 | -- | Parse an integer of the given base. 225 | -- Bases 2 to 36 are supported. 226 | -- Digits can be separated by separator, e.g. `optional (char '_')`. 227 | -- Signs are not parsed by this combinator. 228 | integer :: (Num a, Parser k p) => p sep -> Int -> p a 229 | integer sep base = label (integerLabel base) $ do 230 | d <- digit base 231 | accum $ fromIntegral d 232 | where accum !n = next n <|> pure n 233 | next !n = do 234 | void $ sep 235 | d <- digit base 236 | accum $ n * fromIntegral base + fromIntegral d 237 | {-# INLINE integer #-} 238 | 239 | integerLabel :: Int -> String 240 | integerLabel 2 = "binary integer" 241 | integerLabel 8 = "octal integer" 242 | integerLabel 10 = "decimal integer" 243 | integerLabel 16 = "hexadecimal integer" 244 | integerLabel b = "integer of base " <> show b 245 | 246 | -- | Parses a decimal integer. 247 | -- Signs are not parsed by this combinator. 248 | decimal :: Num a => P k a 249 | decimal = integer (pure ()) 10 250 | {-# INLINE decimal #-} 251 | 252 | -- | Parses an octal integer. 253 | -- Signs are not parsed by this combinator. 254 | octal :: Num a => P k a 255 | octal = integer (pure ()) 8 256 | {-# INLINE octal #-} 257 | 258 | -- | Parses a hexadecimal integer. 259 | -- Signs are not parsed by this combinator. 260 | hexadecimal :: Num a => P k a 261 | hexadecimal = integer (pure ()) 16 262 | {-# INLINE hexadecimal #-} 263 | 264 | -- | Parse plus or minus sign 265 | sign :: (Parser k f, Num a) => f (a -> a) 266 | sign = (negate <$ asciiByte asc_minus) <|> (id <$ optional (asciiByte asc_plus)) 267 | {-# INLINE sign #-} 268 | 269 | -- | Parse a number with a plus or minus sign. 270 | signed :: (Num a, Parser k p) => p a -> p a 271 | signed p = ($) <$> sign <*> p 272 | {-# INLINE signed #-} 273 | 274 | fractionExp :: (Num a, Parser k p) => p expSep -> p digitSep -> p (Maybe a) 275 | fractionExp expSep digitSep = do 276 | e <- optional expSep 277 | case e of 278 | Nothing{} -> pure Nothing 279 | Just{} -> Just <$> signed (integer digitSep 10) 280 | {-# INLINE fractionExp #-} 281 | 282 | -- | Parse a fraction of arbitrary exponent base and mantissa base. 283 | -- 'fractionDec' and 'fractionHex' should be used instead probably. 284 | -- Returns either an integer in 'Left' or a fraction in 'Right'. 285 | -- Signs are not parsed by this combinator. 286 | fraction :: (Num a, Parser k p) => p expSep -> Int -> Int -> p digitSep -> p (Either a (a, Int, a)) 287 | fraction expSep expBase mantBasePow digitSep = do 288 | let mantBase = expBase ^ mantBasePow 289 | mant <- integer digitSep mantBase 290 | frac <- optional $ asciiByte asc_point *> option (0, 0) (integer' digitSep mantBase) 291 | expn <- fractionExp expSep digitSep 292 | let (fracVal, fracLen) = fromMaybe (0, 0) frac 293 | expVal = fromMaybe 0 expn 294 | pure $ case (frac, expn) of 295 | (Nothing, Nothing) -> Left mant 296 | _ -> Right ( mant * fromIntegral mantBase ^ fracLen + fracVal 297 | , expBase 298 | , expVal - fromIntegral (fracLen * mantBasePow)) 299 | {-# INLINE fraction #-} 300 | 301 | -- | Parse a decimal fraction, e.g., 123.456e-78, returning (mantissa, 10, exponent), 302 | -- corresponding to mantissa * 10^exponent. 303 | -- Digits can be separated by separator, e.g. `optional (char '_')`. 304 | -- Signs are not parsed by this combinator. 305 | fractionDec :: (Num a, Parser k p) => p digitSep -> p (Either a (a, Int, a)) 306 | fractionDec sep = fraction (asciiSatisfy (\b -> b == asc_E || b == asc_e)) 10 1 sep "fraction" 307 | {-# INLINE fractionDec #-} 308 | 309 | -- | Parse a hexadecimal fraction, e.g., co.ffeep123, returning (mantissa, 2, exponent), 310 | -- corresponding to mantissa * 2^exponent. 311 | -- Digits can be separated by separator, e.g. `optional (char '_')`. 312 | -- Signs are not parsed by this combinator. 313 | fractionHex :: (Num a, Parser k p) => p digitSep -> p (Either a (a, Int, a)) 314 | fractionHex sep = fraction (asciiSatisfy (\b -> b == asc_P || b == asc_p)) 2 4 sep "hexadecimal fraction" 315 | {-# INLINE fractionHex #-} 316 | 317 | -- | Parse a case-insensitive character 318 | char' :: Parser k p => Char -> p Char 319 | char' x = 320 | let l = C.toLower x 321 | u = C.toUpper x 322 | in satisfy (\c -> c == l || c == u) 323 | {-# INLINE char' #-} 324 | 325 | -- | Parse a character different from the given one. 326 | notChar :: Parser k p => Char -> p Char 327 | notChar c = satisfy (/= c) 328 | {-# INLINE notChar #-} 329 | 330 | -- | Parse an arbitrary character. 331 | anyChar :: P k Char 332 | anyChar = satisfy (const True) 333 | {-# INLINE anyChar #-} 334 | 335 | -- | Parse an arbitrary ASCII byte. 336 | anyAsciiByte :: P k Word8 337 | anyAsciiByte = asciiSatisfy (const True) 338 | {-# INLINE anyAsciiByte #-} 339 | 340 | -- | Parse an alphanumeric character, including Unicode. 341 | alphaNumChar :: P k Char 342 | alphaNumChar = satisfy C.isAlphaNum "alphanumeric character" 343 | {-# INLINE alphaNumChar #-} 344 | 345 | -- | Parse a letter character, including Unicode. 346 | letterChar :: P k Char 347 | letterChar = satisfy C.isLetter "letter" 348 | {-# INLINE letterChar #-} 349 | 350 | -- | Parse a lowercase letter, including Unicode. 351 | lowerChar :: P k Char 352 | lowerChar = satisfy C.isLower "lowercase letter" 353 | {-# INLINE lowerChar #-} 354 | 355 | -- | Parse a uppercase letter, including Unicode. 356 | upperChar :: P k Char 357 | upperChar = satisfy C.isUpper "uppercase letter" 358 | {-# INLINE upperChar #-} 359 | 360 | -- | Parse a space character, including Unicode. 361 | spaceChar :: P k Char 362 | spaceChar = satisfy C.isSpace "space" 363 | {-# INLINE spaceChar #-} 364 | 365 | -- | Parse a symbol character, including Unicode. 366 | symbolChar :: P k Char 367 | symbolChar = satisfy C.isSymbol "symbol" 368 | {-# INLINE symbolChar #-} 369 | 370 | -- | Parse a punctuation character, including Unicode. 371 | punctuationChar :: P k Char 372 | punctuationChar = satisfy C.isPunctuation "punctuation" 373 | {-# INLINE punctuationChar #-} 374 | 375 | -- | Parse a digit character of the given base. 376 | -- Bases 2 to 36 are supported. 377 | digitChar :: Parser k p => Int -> p Char 378 | digitChar base = unsafeAsciiToChar <$> digitByte base 379 | {-# INLINE digitChar #-} 380 | 381 | -- | Parse a character beloning to the ASCII charset (< 128) 382 | asciiChar :: P k Char 383 | asciiChar = unsafeAsciiToChar <$> anyAsciiByte 384 | {-# INLINE asciiChar #-} 385 | 386 | -- | Parse a character belonging to the given Unicode category 387 | categoryChar :: Parser k p => C.GeneralCategory -> p Char 388 | categoryChar cat = satisfy ((== cat) . C.generalCategory) untitle (show cat) 389 | {-# INLINE categoryChar #-} 390 | 391 | untitle :: String -> String 392 | untitle [] = [] 393 | untitle (x:xs) = C.toLower x : go xs 394 | where go [] = "" 395 | go (y:ys) | C.isUpper y = ' ' : C.toLower y : untitle ys 396 | | otherwise = y : ys 397 | 398 | -- | Skip the next n characters 399 | skipChars :: Parser k p => Int -> p () 400 | skipChars n = skipCount n anyChar 401 | {-# INLINE skipChars #-} 402 | 403 | -- | Skip char while predicate is true 404 | skipCharsWhile :: Parser k p => (Char -> Bool) -> p () 405 | skipCharsWhile f = skipMany (satisfy f) 406 | {-# INLINE skipCharsWhile #-} 407 | 408 | -- | Skip at least one char while predicate is true 409 | skipCharsWhile1 :: Parser k p => (Char -> Bool) -> p () 410 | skipCharsWhile1 f = satisfy f *> skipCharsWhile f 411 | {-# INLINE skipCharsWhile1 #-} 412 | 413 | -- | Take the next n characters and advance the position by n characters 414 | takeChars :: Parser k p => Int -> p k 415 | takeChars n = asChunk (skipChars n) "string of length " <> show n 416 | {-# INLINE takeChars #-} 417 | 418 | -- | Take chars while predicate is true 419 | takeCharsWhile :: Parser k p => (Char -> Bool) -> p k 420 | takeCharsWhile f = asChunk (skipCharsWhile f) 421 | {-# INLINE takeCharsWhile #-} 422 | 423 | -- | Take at least one byte while predicate is true 424 | takeCharsWhile1 :: Parser k p => (Char -> Bool) -> p k 425 | takeCharsWhile1 f = asChunk (skipCharsWhile1 f) 426 | {-# INLINE takeCharsWhile1 #-} 427 | 428 | -- | Parse a single character with the given predicate 429 | satisfy :: Parser k p => (Char -> Bool) -> p Char 430 | satisfy f = scan $ \c -> if f c then Just c else Nothing 431 | {-# INLINE satisfy #-} 432 | 433 | -- | Parse a single character within the ASCII charset with the given predicate 434 | asciiSatisfy :: Parser k p => (Word8 -> Bool) -> p Word8 435 | asciiSatisfy f = asciiScan $ \b -> if f b then Just b else Nothing 436 | {-# INLINE asciiSatisfy #-} 437 | 438 | scanChars :: Parser k p => (s -> Char -> Maybe s) -> s -> p s 439 | scanChars f = go 440 | where go s = (scan (f s) >>= go) <|> pure s 441 | {-# INLINE scanChars #-} 442 | 443 | scanChars1 :: Parser k p => (s -> Char -> Maybe s) -> s -> p s 444 | scanChars1 f s = scan (f s) >>= scanChars f 445 | {-# INLINE scanChars1 #-} 446 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Reporter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE FlexibleInstances #-} 5 | {-# LANGUAGE MultiParamTypeClasses #-} 6 | {-# LANGUAGE MultiWayIf #-} 7 | {-# LANGUAGE NamedFieldPuns #-} 8 | {-# LANGUAGE Rank2Types #-} 9 | {-# LANGUAGE ScopedTypeVariables #-} 10 | {-# LANGUAGE TypeApplications #-} 11 | {-# LANGUAGE UnboxedTuples #-} 12 | {-# LANGUAGE MagicHash #-} 13 | module Text.PariPari.Internal.Reporter ( 14 | Reporter(..) 15 | , Env(..) 16 | , State(..) 17 | , local 18 | , get 19 | , raiseError 20 | , mergeErrorState 21 | , Report(..) 22 | , runReporter 23 | , runReporterWithOptions 24 | , ErrorContext(..) 25 | , showReport 26 | , showErrors 27 | , ReportOptions(..) 28 | , defaultReportOptions 29 | ) where 30 | 31 | import Control.Monad (void) 32 | import Data.Function (on) 33 | import Data.List (intercalate, sort, group, sortOn) 34 | import Data.Semigroup as Sem 35 | import Data.String (IsString(..)) 36 | import GHC.Base 37 | import GHC.Word 38 | import GHC.Generics (Generic) 39 | import Text.PariPari.Internal.Chunk 40 | import Text.PariPari.Internal.Class 41 | import qualified Control.Monad.Fail as Fail 42 | import qualified Data.List.NonEmpty as NE 43 | 44 | data ErrorContext = ErrorContext 45 | { _ecErrors :: ![Error] 46 | , _ecContext :: ![String] 47 | } deriving (Eq, Show, Generic) 48 | 49 | data ReportOptions = ReportOptions 50 | { _optMaxContexts :: {-#UNPACK#-}!Int 51 | , _optMaxErrorsPerContext :: {-#UNPACK#-}!Int 52 | , _optMaxLabelsPerContext :: {-#UNPACK#-}!Int 53 | } deriving (Eq, Show, Generic) 54 | 55 | data Report = Report 56 | { _reportFile :: !FilePath 57 | , _reportErrors :: ![ErrorContext] 58 | , _reportLine :: {-#UNPACK#-}!Int 59 | , _reportCol :: {-#UNPACK#-}!Int 60 | } deriving (Eq, Show, Generic) 61 | 62 | data Env k = Env 63 | { _envBuf :: !(Buffer k) 64 | , _envFile :: !FilePath 65 | , _envOptions :: !ReportOptions 66 | , _envHidden :: !Bool 67 | , _envContext :: ![String] 68 | , _envRefLine :: Int# 69 | , _envRefCol :: Int# 70 | } 71 | 72 | data State = State 73 | { _stOff :: Int# 74 | , _stLine :: Int# 75 | , _stCol :: Int# 76 | , _stErrOff :: Int# 77 | , _stErrLine :: Int# 78 | , _stErrCol :: Int# 79 | , _stErrors :: ![ErrorContext] 80 | , _stReports :: ![Report] 81 | } 82 | 83 | -- | Parser which is optimised for good error reports. 84 | -- Performance is secondary, since the 'Reporter' is used 85 | -- as a fallback to the 'Acceptor'. 86 | newtype Reporter k a = Reporter 87 | { unReporter :: forall b. Env k -> State 88 | -> (a -> State -> b) 89 | -> (State -> b) 90 | -> b 91 | } 92 | 93 | instance (Chunk k, Semigroup a) => Sem.Semigroup (Reporter k a) where 94 | p1 <> p2 = (<>) <$> p1 <*> p2 95 | {-# INLINE (<>) #-} 96 | 97 | instance (Chunk k, Semigroup a, Monoid a) => Monoid (Reporter k a) where 98 | mempty = pure mempty 99 | {-# INLINE mempty #-} 100 | 101 | mappend = (<>) 102 | {-# INLINE mappend #-} 103 | 104 | instance Chunk k => Functor (Reporter k) where 105 | fmap f p = Reporter $ \env st ok err -> 106 | unReporter p env st (ok . f) err 107 | {-# INLINE fmap #-} 108 | 109 | instance Chunk k => Applicative (Reporter k) where 110 | pure x = Reporter $ \_ st ok _ -> ok x st 111 | {-# INLINE pure #-} 112 | 113 | f <*> a = Reporter $ \env st ok err -> 114 | let ok1 f' s = 115 | let ok2 a' s' = ok (f' a') s' 116 | in unReporter a env s ok2 err 117 | in unReporter f env st ok1 err 118 | {-# INLINE (<*>) #-} 119 | 120 | p1 *> p2 = do 121 | void p1 122 | p2 123 | {-# INLINE (*>) #-} 124 | 125 | p1 <* p2 = do 126 | x <- p1 127 | void p2 128 | pure x 129 | {-# INLINE (<*) #-} 130 | 131 | instance Chunk k => Alternative (Reporter k) where 132 | empty = Reporter $ \_ st _ err -> err st 133 | {-# INLINE empty #-} 134 | 135 | p1 <|> p2 = Reporter $ \env st ok err -> 136 | let err' s = unReporter p2 env (mergeErrorState env st s) ok err 137 | in unReporter p1 env st ok err' 138 | {-# INLINE (<|>) #-} 139 | 140 | instance Chunk k => MonadPlus (Reporter k) 141 | 142 | instance Chunk k => Monad (Reporter k) where 143 | p >>= f = Reporter $ \env st ok err -> 144 | let ok' x s = unReporter (f x) env s ok err 145 | in unReporter p env st ok' err 146 | {-# INLINE (>>=) #-} 147 | 148 | #if !MIN_VERSION_base(4,11,0) 149 | fail = Fail.fail 150 | {-# INLINE fail #-} 151 | #endif 152 | 153 | instance Chunk k => Fail.MonadFail (Reporter k) where 154 | fail msg = failWith $ EFail msg 155 | {-# INLINE fail #-} 156 | 157 | instance Chunk k => Parser k (Reporter k) where 158 | getPos = get $ \_ st -> Pos (I# (_stLine st)) (I# (_stCol st)) 159 | {-# INLINE getPos #-} 160 | 161 | getFile = get $ \env _ -> _envFile env 162 | {-# INLINE getFile #-} 163 | 164 | getRefPos = get $ \env _ -> Pos (I# (_envRefLine env)) (I# (_envRefCol env)) 165 | {-# INLINE getRefPos #-} 166 | 167 | withRefPos p = local (\st env -> env { _envRefLine = _stLine st, _envRefCol = _stCol st }) p 168 | {-# INLINE withRefPos #-} 169 | 170 | label l p = local (const $ addLabel l) p 171 | {-# INLINE label #-} 172 | 173 | hidden p = local (const $ \env -> env { _envHidden = True }) p 174 | {-# INLINE hidden #-} 175 | 176 | try p = Reporter $ \env st ok err -> 177 | let err' _ = err st 178 | in unReporter p env st ok err' 179 | {-# INLINE try #-} 180 | 181 | p1 p2 = Reporter $ \env st ok err -> 182 | let err' s 183 | | 1# <- _stOff s ==# _stOff st = unReporter p2 env (mergeErrorState env st s) ok err 184 | | otherwise = err s 185 | in unReporter p1 env st ok err' 186 | {-# INLINE () #-} 187 | 188 | notFollowedBy p = Reporter $ \env st ok err -> 189 | let ok' x _ = raiseError env st err $ EUnexpected $ show x 190 | err' _ = ok () st 191 | in unReporter p env st ok' err' 192 | {-# INLINE notFollowedBy #-} 193 | 194 | lookAhead p = Reporter $ \env st ok err -> 195 | let ok' x _ = ok x st 196 | in unReporter p env st ok' err 197 | {-# INLINE lookAhead #-} 198 | 199 | failWith e = Reporter $ \env st _ err -> raiseError env st err e 200 | {-# INLINE failWith #-} 201 | 202 | eof = Reporter $ \env st ok err -> 203 | case indexByte @k (_envBuf env) (_stOff st) `eqWord#` int2Word# 0# of 204 | 1# -> ok () st 205 | _ -> raiseError env st err expectedEnd 206 | {-# INLINE eof #-} 207 | 208 | recover p r = Reporter $ \env st ok err -> 209 | let err1 s = 210 | let err2 _ = err s 211 | in unReporter r env (addReport env s) ok err2 212 | in unReporter p env st ok err1 213 | {-# INLINE recover #-} 214 | 215 | chunk k = Reporter $ \env st@State{_stOff,_stCol} ok err -> 216 | case matchChunk @k (_envBuf env) _stOff k of 217 | -1# -> raiseError env st err $ EExpected [showChunk @k k] 218 | n -> ok k st { _stOff = _stOff +# n, _stCol = _stCol +# n } 219 | {-# INLINE chunk #-} 220 | 221 | asChunk p = do 222 | I# begin' <- get (const (\s -> I# (_stOff s))) 223 | p 224 | I# end' <- get (const (\s -> I# (_stOff s))) 225 | src <- get (\env _ -> _envBuf env) 226 | pure $ packChunk src begin' (end' -# begin') 227 | {-# INLINE asChunk #-} 228 | 229 | scan f = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err -> 230 | case indexChar @k (_envBuf env) _stOff of 231 | (# '\0'#, _ #) -> raiseError env st err unexpectedEnd 232 | (# c, w #) -> 233 | case f (C# c) of 234 | Just r -> 235 | ok r st { _stOff = _stOff +# w 236 | , _stLine = case c `eqChar#` '\n'# of 1# -> _stLine +# 1#; _ -> _stLine 237 | , _stCol = case c `eqChar#` '\n'# of 1# -> 1#; _ -> _stCol +# 1# 238 | } 239 | Nothing -> raiseError env st err $ EUnexpected $ show (C# c) 240 | {-# INLINE scan #-} 241 | 242 | -- By inling this combinator, GHC should figure out the `charWidth` 243 | -- of the character resulting in an optimised decoder. 244 | char '\0' = error "Character '\\0' cannot be parsed because it is used as sentinel" 245 | char c@(C# c') = 246 | Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err -> 247 | case matchChar @k (_envBuf env) _stOff c' of 248 | -1# -> raiseError env st err $ EExpected [show c] 249 | w -> ok c st 250 | { _stOff = _stOff +# w 251 | , _stLine = if c == '\n' then _stLine +# 1# else _stLine 252 | , _stCol = if c == '\n' then 1# else _stCol +# 1# 253 | } 254 | {-# INLINE char #-} 255 | 256 | asciiScan f = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err -> 257 | let b = W8# (indexByte @k (_envBuf env) _stOff) 258 | in if | b /= 0, 259 | b < 128, 260 | Just x <- f b -> 261 | ok x st 262 | { _stOff = _stOff +# 1# 263 | , _stLine = if b == asc_newline then _stLine +# 1# else _stLine 264 | , _stCol = if b == asc_newline then 1# else _stCol +# 1# 265 | } 266 | | otherwise -> 267 | raiseError env st err $ EUnexpected $ showByte b 268 | {-# INLINE asciiScan #-} 269 | 270 | asciiByte 0 = error "Character '\\0' cannot be parsed because it is used as sentinel" 271 | asciiByte b 272 | | b >= 128 = error "Not an ASCII character" 273 | | otherwise = Reporter $ \env st@State{_stOff, _stLine, _stCol} ok err -> 274 | if W8# (indexByte @k (_envBuf env) _stOff) == b then 275 | ok b st 276 | { _stOff = _stOff +# 1# 277 | , _stLine = if b == asc_newline then _stLine +# 1# else _stLine 278 | , _stCol = if b == asc_newline then 1# else _stCol +# 1# 279 | } 280 | else 281 | raiseError env st err $ EExpected [showByte b] 282 | {-# INLINE asciiByte #-} 283 | 284 | instance Chunk k => IsString (Reporter k k) where 285 | fromString = string 286 | {-# INLINE fromString #-} 287 | 288 | raiseError :: Env k -> State -> (State -> b) -> Error -> b 289 | raiseError env st err e = err $ addError env st e 290 | {-# INLINE raiseError #-} 291 | 292 | -- | Reader monad, modify environment locally 293 | local :: (State -> Env k -> Env k) -> Reporter k a -> Reporter k a 294 | local f p = Reporter $ \env st ok err -> 295 | unReporter p (f st env) st ok err 296 | {-# INLINE local #-} 297 | 298 | -- | Reader monad, get something from the environment 299 | get :: (Env k -> State -> a) -> Reporter k a 300 | get f = Reporter $ \env st ok _ -> ok (f env st) st 301 | {-# INLINE get #-} 302 | 303 | addLabel :: String -> Env k -> Env k 304 | addLabel l env = case _envContext env of 305 | (l':_) | l == l' -> env 306 | ls -> env { _envContext = take (_optMaxLabelsPerContext._envOptions $ env) $ l : ls } 307 | {-# INLINE addLabel #-} 308 | 309 | -- | Add parser error to the list of errors 310 | -- which are kept in the parser state. 311 | -- Errors of lower priority and at an earlier position. 312 | -- Furthermore the error is merged with existing errors if possible. 313 | addError :: Env k -> State -> Error -> State 314 | addError env st e 315 | | 1# <- _stOff st ># _stErrOff st, 316 | Just e' <- mkError env e = 317 | st { _stErrors = [e'] 318 | , _stErrOff = _stOff st 319 | , _stErrLine = _stLine st 320 | , _stErrCol = _stCol st 321 | } 322 | | otherwise = st 323 | {-# INLINE addError #-} 324 | 325 | mkError :: Env k -> Error -> Maybe ErrorContext 326 | mkError env e 327 | | _envHidden env, (l:ls) <- _envContext env = Just $ ErrorContext [EExpected [l]] ls 328 | | _envHidden env = Nothing 329 | | otherwise = Just $ ErrorContext [e] $ _envContext env 330 | {-# INLINE mkError #-} 331 | 332 | -- | Merge errors of two states, used when backtracking 333 | mergeErrorState :: Env k -> State -> State -> State 334 | mergeErrorState env s s' 335 | | 1# <- _stErrOff s' ># _stErrOff s = 336 | s { _stErrors = _stErrors s' 337 | , _stErrOff = _stErrOff s' 338 | , _stErrLine = _stErrLine s' 339 | , _stErrCol = _stErrCol s' 340 | } 341 | | 1# <- _stErrOff s' ==# _stErrOff s = 342 | s { _stErrors = shrinkErrors env $ _stErrors s' <> _stErrors s } 343 | | otherwise = s 344 | {-# INLINE mergeErrorState #-} 345 | 346 | shrinkErrors :: Env k -> [ErrorContext] -> [ErrorContext] 347 | shrinkErrors env = take (_optMaxContexts._envOptions $ env) . map (mergeErrorContexts env) . NE.groupBy ((==) `on` _ecContext) . sortOn _ecContext 348 | 349 | -- | Shrink error context by deleting duplicates 350 | -- and merging errors if possible. 351 | mergeErrorContexts :: Env k -> NonEmpty ErrorContext -> ErrorContext 352 | mergeErrorContexts env es@(ErrorContext{_ecContext}:| _) = ErrorContext 353 | { _ecErrors = take (_optMaxErrorsPerContext._envOptions $ env) $ nubSort $ mergeEExpected $ concatMap _ecErrors $ NE.toList es 354 | , _ecContext = _ecContext 355 | } 356 | 357 | mergeEExpected :: [Error] -> [Error] 358 | mergeEExpected es = [EExpected $ nubSort expects | not (null expects)] <> filter (null . asEExpected) es 359 | where expects = concatMap asEExpected es 360 | 361 | nubSort :: Ord a => [a] -> [a] 362 | nubSort = map head . group . sort 363 | 364 | asEExpected :: Error -> [String] 365 | asEExpected (EExpected s) = s 366 | asEExpected _ = [] 367 | 368 | defaultReportOptions :: ReportOptions 369 | defaultReportOptions = ReportOptions 370 | { _optMaxContexts = 20 371 | , _optMaxErrorsPerContext = 20 372 | , _optMaxLabelsPerContext = 5 373 | } 374 | 375 | -- | Run 'Reporter' with additional 'ReportOptions'. 376 | runReporterWithOptions :: Chunk k => ReportOptions -> Reporter k a -> FilePath -> k -> (Maybe a, [Report]) 377 | runReporterWithOptions o p f k = 378 | let !(# b, off #) = unpackChunk k 379 | env = initialEnv o f b 380 | ok x s = (Just x, reverse $ _stReports s) 381 | err s = (Nothing, reverse $ _stReports $ addReport env s) 382 | in unReporter p env (initialState off) ok err 383 | 384 | -- | Run 'Reporter' on the given chunk, returning the result 385 | -- if successful and reports from error recoveries. 386 | -- In the case of an error, 'Nothing' is returned and the 'Report' list 387 | -- is non-empty. 388 | runReporter :: Chunk k => Reporter k a -> FilePath -> k -> (Maybe a, [Report]) 389 | runReporter = runReporterWithOptions defaultReportOptions 390 | 391 | addReport :: Env k -> State -> State 392 | addReport e s = s { _stReports = Report { _reportFile = _envFile e 393 | , _reportErrors = _stErrors s 394 | , _reportLine = I# (_stErrLine s) 395 | , _reportCol = I# (_stErrCol s) } : _stReports s } 396 | 397 | initialEnv :: ReportOptions -> FilePath -> Buffer k -> Env k 398 | initialEnv _envOptions _envFile _envBuf = Env 399 | { _envFile 400 | , _envBuf 401 | , _envOptions 402 | , _envContext = [] 403 | , _envHidden = False 404 | , _envRefLine = 1# 405 | , _envRefCol = 1# 406 | } 407 | 408 | initialState :: Int# -> State 409 | initialState _stOff = State 410 | { _stOff 411 | , _stLine = 1# 412 | , _stCol = 1# 413 | , _stErrOff = 0# 414 | , _stErrLine = 0# 415 | , _stErrCol = 0# 416 | , _stErrors = [] 417 | , _stReports = [] 418 | } 419 | 420 | -- | Pretty string representation of 'Report'. 421 | showReport :: Report -> String 422 | showReport r = 423 | "Parser errors at " <> _reportFile r 424 | <> ", line " <> show (_reportLine r) 425 | <> ", column " <> show (_reportCol r) 426 | <> "\n\n" <> showErrors (_reportErrors r) 427 | 428 | -- | Pretty string representation of '[ErrorContext]'. 429 | showErrors :: [ErrorContext] -> String 430 | showErrors [] = "No errors" 431 | showErrors es = intercalate "\n" $ map showErrorContext es 432 | 433 | showErrorContext :: ErrorContext -> String 434 | showErrorContext ec = 435 | intercalate ", " (map showError $ _ecErrors ec) 436 | <> showContext (_ecContext ec) <> "." 437 | 438 | showContext :: [String] -> String 439 | showContext [] = "" 440 | showContext xs = " in context of " <> intercalate ", " xs 441 | -------------------------------------------------------------------------------- /src/Text/PariPari/Internal/Run.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Rank2Types #-} 2 | module Text.PariPari.Internal.Run ( 3 | runParser 4 | , runParserWithOptions 5 | ) where 6 | 7 | import Text.PariPari.Internal.Acceptor 8 | import Text.PariPari.Internal.Class 9 | import Text.PariPari.Internal.Chunk 10 | import Text.PariPari.Internal.Reporter 11 | 12 | -- | Rsun fast 'Acceptor' and slower 'Reporter' on the given sequentially. 13 | -- The 'FilePath' is used for error reporting. 14 | -- When the acceptor does not return successfully, the result from the reporter 15 | -- is awaited. 16 | runParser :: Chunk k => (forall p. Parser k p => p a) -> FilePath -> k -> (Maybe a, [Report]) 17 | runParser = runParserWithOptions defaultReportOptions 18 | {-# INLINE runParser #-} 19 | 20 | -- | Run parsers **sequentially** with additional 'ReportOptions'. 21 | runParserWithOptions :: Chunk k => ReportOptions -> (forall p. Parser k p => p a) -> FilePath -> k -> (Maybe a, [Report]) 22 | runParserWithOptions o p f b = 23 | let a = runAcceptor p f b 24 | r = runReporterWithOptions o p f b 25 | in case a of 26 | Nothing -> r 27 | Just x -> (Just x, []) 28 | {-# INLINE runParserWithOptions #-} 29 | -------------------------------------------------------------------------------- /src/Text/PariPari/Lens.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Rank2Types #-} 2 | module Text.PariPari.Lens ( 3 | posLine 4 | , posCol 5 | , reportLine 6 | , reportCol 7 | , reportFile 8 | , reportErrors 9 | , ecErrors 10 | , ecContext 11 | , optMaxContexts 12 | , optMaxErrorsPerContext 13 | , optMaxLabelsPerContext 14 | ) where 15 | 16 | import Text.PariPari.Internal.Class 17 | import Text.PariPari.Internal.Reporter 18 | 19 | type Lens a b = forall f . Functor f => (b -> f b) -> (a -> f a) 20 | 21 | posLine :: Lens Pos Int 22 | posLine k p = fmap (\x -> p { _posLine = x }) (k (_posLine p)) 23 | {-# INLINE posLine #-} 24 | 25 | posCol :: Lens Pos Int 26 | posCol k p = fmap (\x -> p { _posCol = x }) (k (_posCol p)) 27 | {-# INLINE posCol #-} 28 | 29 | reportLine :: Lens Report Int 30 | reportLine k r = fmap (\x -> r { _reportLine = x }) (k (_reportLine r)) 31 | {-# INLINE reportLine #-} 32 | 33 | reportCol :: Lens Report Int 34 | reportCol k r = fmap (\x -> r { _reportCol = x }) (k (_reportCol r)) 35 | {-# INLINE reportCol #-} 36 | 37 | reportFile :: Lens Report FilePath 38 | reportFile k r = fmap (\x -> r { _reportFile = x }) (k (_reportFile r)) 39 | {-# INLINE reportFile #-} 40 | 41 | reportErrors :: Lens Report [ErrorContext] 42 | reportErrors k r = fmap (\x -> r { _reportErrors = x }) (k (_reportErrors r)) 43 | {-# INLINE reportErrors #-} 44 | 45 | ecErrors :: Lens ErrorContext [Error] 46 | ecErrors k e = fmap (\x -> e { _ecErrors = x }) (k (_ecErrors e)) 47 | {-# INLINE ecErrors #-} 48 | 49 | ecContext :: Lens ErrorContext [String] 50 | ecContext k e = fmap (\x -> e { _ecContext = x }) (k (_ecContext e)) 51 | {-# INLINE ecContext #-} 52 | 53 | optMaxContexts :: Lens ReportOptions Int 54 | optMaxContexts k o = fmap (\x -> o { _optMaxContexts = x }) (k (_optMaxContexts o)) 55 | {-# INLINE optMaxContexts #-} 56 | 57 | optMaxErrorsPerContext :: Lens ReportOptions Int 58 | optMaxErrorsPerContext k o = fmap (\x -> o { _optMaxErrorsPerContext = x }) (k (_optMaxErrorsPerContext o)) 59 | {-# INLINE optMaxErrorsPerContext #-} 60 | 61 | optMaxLabelsPerContext :: Lens ReportOptions Int 62 | optMaxLabelsPerContext k o = fmap (\x -> o { _optMaxLabelsPerContext = x }) (k (_optMaxLabelsPerContext o)) 63 | {-# INLINE optMaxLabelsPerContext #-} 64 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - . 3 | resolver: nightly-2020-05-26 4 | -------------------------------------------------------------------------------- /test/test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RankNTypes #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE TypeApplications #-} 5 | {-# LANGUAGE FlexibleContexts #-} 6 | module Main (main) where 7 | 8 | import Control.Monad (replicateM, replicateM_) 9 | import Data.ByteString (ByteString) 10 | import Data.Either (isLeft) 11 | import Data.Text (Text) 12 | import Prelude hiding (getLine) 13 | import System.Random 14 | import Test.Tasty 15 | import Test.Tasty.HUnit 16 | import Text.PariPari 17 | import Text.PariPari.Internal.Chunk (stringToChunk, asc_a, asc_0, asc_9) 18 | import qualified Data.Char as C 19 | import qualified Data.List.NonEmpty as NE 20 | import Data.String (IsString(..)) 21 | 22 | runAcceptor' :: Chunk k => Acceptor k a -> FilePath -> k -> Either Error a 23 | runAcceptor' a f k = maybe (Left $ EFail "Nothing") Right $ runAcceptor a f k 24 | 25 | main :: IO () 26 | main = defaultMain tests 27 | 28 | randomTries :: Int 29 | randomTries = 1000 30 | 31 | randomStringLen :: Int 32 | randomStringLen = 1000 33 | 34 | runReporterEither :: Chunk k => Reporter k a -> FilePath -> k -> Either [Report] a 35 | runReporterEither p f k = case runReporter p f k of 36 | (Just a, []) -> Right a 37 | (_, r) -> Left r 38 | 39 | -- Only generate valid Unicode characters 40 | randomChar :: IO Char 41 | randomChar = do 42 | c <- randomIO 43 | let n = C.ord c 44 | if n == 0 || (n >= 0xD800 && n <= 0xDFFF) || n > 0x10FFFF then 45 | randomChar 46 | else 47 | pure c 48 | 49 | randomString :: IO String 50 | randomString = do 51 | n <- randomRIO (1, randomStringLen) 52 | replicateM n randomChar 53 | 54 | randomAsciiString :: IO String 55 | randomAsciiString = do 56 | n <- randomRIO (1, randomStringLen) 57 | replicateM n (randomRIO (C.chr 1, C.chr 127)) 58 | 59 | tests :: TestTree 60 | tests = testGroup "Tests" 61 | [ testGroup "Acceptor" 62 | [ testGroup "Text" $ parserTests @Text runAcceptor' 63 | , testGroup "ByteString" $ parserTests @ByteString runAcceptor' 64 | ] 65 | 66 | , testGroup "Reporter" 67 | [ testGroup "Text" $ parserTests @Text runReporterEither 68 | , testGroup "ByteString" $ parserTests @ByteString runReporterEither 69 | ] 70 | 71 | , testGroup "Reporter specific" reporterTests 72 | ] 73 | 74 | parserTests :: forall k p e. (Parser k p, Chunk k, IsString k, Eq e, Show e, Show k) 75 | => (forall a. p a -> FilePath -> k -> Either e a) -> [TestTree] 76 | parserTests run = 77 | [ testGroup "ChunkParser" 78 | [ testCase "getFile" $ do 79 | ok getFile "" "filename" 80 | 81 | , testCase "getPos" $ do 82 | ok getPos "" (Pos 1 1) 83 | ok (char 'a' *> getPos) "abc" (Pos 1 2) 84 | ok (char 'a' *> char '\n' *> getPos) "a\nb" (Pos 2 1) 85 | ok (chunk "a\n" *> getPos) "a\nb" (Pos 1 3) -- chunk must not contain newlines! 86 | ok (char 'a' *> char '\n' *> char 'b' *> getPos) "a\nb" (Pos 2 2) 87 | 88 | , testCase "getRefPos" $ do 89 | ok getRefPos "" (Pos 1 1) 90 | ok (char 'a' *> getRefPos) "abc" (Pos 1 1) 91 | ok (char 'a' *> char '\n' *> withRefPos getRefPos) "a\nb" (Pos 2 1) 92 | ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefPos) "a\nb" (Pos 2 2) 93 | ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefPos)) "a\nb" (Pos 2 1) 94 | 95 | , testCase "notFollowedBy" $ do 96 | ok (char 'a' <* notFollowedBy (char 'c')) "abc" 'a' 97 | err (char 'a' <* notFollowedBy (char 'b')) "abc" 98 | ok (char 'a' *> notFollowedBy (chunk "bd") *> char 'b') "abc" 'b' 99 | err (char 'a' *> notFollowedBy (chunk "bc")) "abc" 100 | ok (char 'a' *> notFollowedBy (char 'c') *> getPos) "abc" (Pos 1 2) 101 | 102 | , testCase "lookAhead" $ do 103 | ok (lookAhead (char 'a')) "abc" 'a' 104 | ok (lookAhead (char 'a') *> getPos) "abc" (Pos 1 1) 105 | ok (lookAhead (chunk "ab") *> getPos) "abc" (Pos 1 1) 106 | err (lookAhead (char 'b')) "abc" 107 | err (lookAhead (chunk "bd")) "abc" 108 | err (lookAhead (char 'a')) "" 109 | 110 | , testCase "failWith" $ 111 | err (failWith (EFail "empty") :: p ()) "abc" 112 | 113 | , testCase "eof" $ do 114 | ok eof "" () 115 | ok eof "\0" () 116 | ok eof "\0\0" () 117 | ok (chunk "abc" *> eof) "abc" () 118 | ok (chunk "abc" *> eof) "abc\0" () 119 | err eof "abc" 120 | err (chunk "ab" *> eof) "abc" 121 | 122 | , testCase "label" $ do 123 | ok (label "blub" $ char 'a') "abc" 'a' 124 | err (label "blub" $ char 'b') "abc" 125 | ok (char 'a' "blub") "abc" 'a' 126 | 127 | , testCase "hidden" $ do 128 | ok (hidden $ char 'a') "abc" 'a' 129 | err (hidden $ char 'b') "abc" 130 | 131 | , testCase "try" $ do 132 | ok (try $ char 'a') "abc" 'a' 133 | err (try $ char 'b') "abc" 134 | 135 | , testCase "()" $ do 136 | ok (char 'b' char 'a' char 'c') "abc" 'a' 137 | ok (chunk "abd" chunk "abc" chunk "abe") "abcdef" "abc" 138 | 139 | , testCase "recover" $ do 140 | ok (recover (char 'a' <* eof) (char 'b')) "a" 'a' 141 | err (recover (char 'a') (char 'b')) "c" 142 | err (recover (char 'a' <* eof) (char 'b')) "c" 143 | 144 | , testCase "chunk" $ do 145 | ok (chunk "ab") "abc" "ab" 146 | err (chunk "bc") "abc" 147 | err (chunk "ab") "" 148 | 149 | , testCase "asChunk" $ do 150 | ok (asChunk (void $ chunk "ab")) "abc" "ab" 151 | ok (asChunk (void $ anyChar *> anyChar)) "abc" "ab" 152 | ok (asChunk (skipCount 2 anyChar)) "abc" "ab" 153 | err (asChunk (void $ chunk "bc")) "abc" 154 | err (asChunk (void $ chunk "ab")) "" 155 | 156 | , testCase "char" $ do 157 | ok (char 'a') "abc" 'a' 158 | ok (char 'a' <* eof) "a" 'a' 159 | err (char 'b') "abc" 160 | err (char 'a') "" 161 | 162 | , testCase "char-random" $ replicateM_ randomTries $ do 163 | s <- randomString 164 | ok (traverse char s *> eof) s () 165 | 166 | , testCase "scan" $ do 167 | ok (scan (\c -> if c == 'a' then Just c else Nothing)) "abc" 'a' 168 | ok (scan (\c -> if c == 'a' then Just c else Nothing) <* eof) "a" 'a' 169 | err (scan (\c -> if c == 'b' then Just c else Nothing)) "abc" 170 | err (scan (\c -> if c == 'a' then Just c else Nothing)) "" 171 | 172 | -- because of sentinel 173 | err (scan (\c -> if c == '\0' then Just c else Nothing)) "\0" 174 | err (scan (\c -> if c == '\0' then Just c else Nothing)) "" 175 | 176 | , testCase "asciiScan" $ do 177 | ok (asciiScan (\c -> if c == asc_a then Just c else Nothing)) "abc" asc_a 178 | ok (asciiScan (\c -> if c == asc_a then Just c else Nothing) <* eof) "a" asc_a 179 | err (asciiScan (\c -> if c == asc_0 then Just c else Nothing)) "abc" 180 | err (asciiScan (\c -> if c == asc_0 then Just c else Nothing)) "" 181 | 182 | -- because of sentinel 183 | err (asciiScan (\c -> if c == 0 then Just c else Nothing)) "\0" 184 | err (asciiScan (\c -> if c == 0 then Just c else Nothing)) "" 185 | 186 | , testCase "asciiByte" $ do 187 | ok (asciiByte asc_a) "abc" asc_a 188 | ok (asciiByte asc_a <* eof) "a" asc_a 189 | ok (asciiByte 127 <* eof) "\x7F" 127 190 | err (asciiByte asc_0) "abc" 191 | err (asciiByte asc_0) "" 192 | 193 | , testCase "asciiByte-random" $ replicateM_ randomTries $ do 194 | s <- randomAsciiString 195 | ok (traverse (asciiByte . fromIntegral . C.ord) s *> eof) s () 196 | ] 197 | 198 | , testGroup "Alternative" 199 | [ testCase "(<|>)" $ do 200 | ok (char 'b' <|> char 'a' <|> char 'c') "abc" 'a' 201 | ok (chunk "abd" <|> chunk "abc" <|> chunk "abe") "abcdef" "abc" 202 | 203 | , testCase "empty" $ do 204 | err (empty :: p ()) "abc" 205 | err (empty :: p ()) "" 206 | ] 207 | 208 | , testGroup "MonadFail" 209 | [ testCase "fail" $ do 210 | err (failWith (EFail "empty") :: p ()) "abc" 211 | ] 212 | 213 | , testGroup "Basic Combinators" 214 | [ testCase "optional" $ do 215 | ok (optional (char 'a')) "abc" (Just 'a') 216 | ok (optional (char 'b')) "abc" Nothing 217 | ok (optional (char 'a')) "" Nothing 218 | 219 | , testCase "some" $ do 220 | ok (some (char 'a')) "abc" (NE.fromList "a") 221 | ok (some (char 'a')) "aabc" (NE.fromList "aa") 222 | err (some (char 'b')) "abc" 223 | err (some (char 'a')) "" 224 | 225 | , testCase "many" $ do 226 | ok (many (char 'a')) "abc" "a" 227 | ok (many (char 'a')) "aabc" "aa" 228 | ok (many (char 'b')) "abc" "" 229 | ok (many (char 'a')) "" "" 230 | ] 231 | 232 | , testGroup "Position Combinators" 233 | [ testCase "getLine" $ do 234 | ok getLine "" 1 235 | ok (char 'a' *> getLine) "abc" 1 236 | ok (char 'a' *> char '\n' *> getLine) "a\nb" 2 237 | ok (char 'a' *> char '\n' *> char 'b' *> getLine) "a\nb" 2 238 | 239 | , testCase "getRefLine" $ do 240 | ok getRefLine "" 1 241 | ok (char 'a' *> getRefLine) "abc" 1 242 | ok (char 'a' *> char '\n' *> withRefPos getRefLine) "a\nb" 2 243 | ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefLine) "a\nb" 2 244 | ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefLine)) "a\nb" 2 245 | 246 | , testCase "getCol" $ do 247 | ok getCol "" 1 248 | ok (char 'a' *> getCol) "abc" 2 249 | ok (char 'a' *> char '\n' *> getCol) "a\nb" 1 250 | ok (char 'a' *> char '\n' *> char 'b' *> getCol) "a\nb" 2 251 | 252 | , testCase "getRefCol" $ do 253 | ok getRefCol "" 1 254 | ok (char 'a' *> getRefCol) "abc" 1 255 | ok (char 'a' *> char '\n' *> withRefPos getRefCol) "a\nb" 1 256 | ok (char 'a' *> char '\n' *> char 'b' *> withRefPos getRefCol) "a\nb" 2 257 | ok (char 'a' *> char '\n' *> withRefPos (char 'b' *> getRefCol)) "a\nb" 1 258 | 259 | , testCase "withPos" $ do 260 | ok (withPos $ char 'a') "abc" (Pos 1 1, 'a') 261 | ok (char 'a' *> withPos (char 'b')) "abc" (Pos 1 2, 'b') 262 | ok (char 'a' *> char '\n' *> withPos (char 'b')) "a\nb" (Pos 2 1, 'b') 263 | 264 | , testCase "withSpan" $ do 265 | ok (withSpan $ chunk "ab") "abc" (Pos 1 1, Pos 1 3, "ab") 266 | ok (char 'a' *> withSpan (chunk "bcd")) "abcde" (Pos 1 2, Pos 1 5, "bcd") 267 | ok (char 'a' *> char '\n' *> withSpan (chunk "bcd")) "a\nbcde" (Pos 2 1, Pos 2 4, "bcd") 268 | ] 269 | 270 | , testGroup "Char Combinators" 271 | [ testCase "satisfy" $ do 272 | ok (satisfy (== 'a')) "abc" 'a' 273 | ok (satisfy (== 'a') <* eof) "a" 'a' 274 | err (satisfy (== 'b')) "abc" 275 | err (satisfy (== 'a')) "" 276 | 277 | -- because of sentinel 278 | err (satisfy (== '\0')) "\0" 279 | err (satisfy (== '\0')) "" 280 | 281 | , testCase "satisfy-random" $ replicateM_ randomTries $ do 282 | s <- randomString 283 | ok (traverse (satisfy . (==)) s *> eof) s () 284 | 285 | , testCase "asciiSatisfy" $ do 286 | ok (asciiSatisfy (== asc_a)) "abc" asc_a 287 | ok (asciiSatisfy (== asc_a) <* eof) "a" asc_a 288 | err (asciiSatisfy (== asc_0)) "abc" 289 | err (asciiSatisfy (== asc_0)) "" 290 | 291 | -- because of sentinel 292 | err (asciiSatisfy (== 0)) "\0" 293 | err (asciiSatisfy (== 0)) "" 294 | 295 | , testCase "asciiSatisfy-random" $ replicateM_ randomTries $ do 296 | s <- randomAsciiString 297 | ok (traverse (asciiSatisfy . (==) . fromIntegral . C.ord) s *> eof) s () 298 | 299 | , testCase "string" $ do 300 | ok (string "ab") "abc" (stringToChunk "ab") 301 | err (string "bc") "abc" 302 | err (string "ab") "" 303 | 304 | , testCase "string-random" $ replicateM_ randomTries $ do 305 | s <- randomString 306 | ok (string s <* eof) s (stringToChunk s) 307 | 308 | , testCase "anyChar" $ do 309 | ok anyChar "abc" 'a' 310 | ok (anyChar <* eof) "a" 'a' 311 | err anyChar "" 312 | 313 | , testCase "anyChar-random" $ replicateM_ randomTries $ do 314 | s <- randomString 315 | ok (traverse (const anyChar) s *> eof) s () 316 | 317 | , testCase "anyAsciiByte" $ do 318 | ok anyAsciiByte "abc" asc_a 319 | ok (anyAsciiByte <* eof) "a" asc_a 320 | err anyAsciiByte "" 321 | err anyAsciiByte "\x80" 322 | 323 | , testCase "anyAsciiByte-random" $ replicateM_ randomTries $ do 324 | s <- randomAsciiString 325 | ok (traverse (const anyAsciiByte) s *> eof) s () 326 | 327 | , testCase "notChar" $ do 328 | ok (notChar 'b') "abc" 'a' 329 | ok (notChar 'b' <* eof) "a" 'a' 330 | err (notChar 'a') "a" 331 | err (notChar 'a') "" 332 | 333 | , testCase "char'" $ do 334 | ok (char' 'a') "a" 'a' 335 | ok (char' 'a' <* eof) "a" 'a' 336 | ok (char' 'a') "A" 'A' 337 | ok (char' 'A') "a" 'a' 338 | ok (char' 'A') "A" 'A' 339 | ok (char' '9') "9" '9' 340 | err (char' 'a') "b" 341 | err (char' 'a') "" 342 | 343 | , testCase "alphaNumChar" $ do 344 | ok (alphaNumChar <* eof) "a" 'a' 345 | ok alphaNumChar "9" '9' 346 | err alphaNumChar "_" 347 | err alphaNumChar "" 348 | 349 | , testCase "digitChar" $ do 350 | ok (digitChar 10 <* eof) "9" '9' 351 | ok (digitChar 36) "z" 'z' 352 | err (digitChar 2) "2" 353 | err (digitChar 2) "" 354 | 355 | , testCase "letterChar" $ do 356 | ok (letterChar <* eof) "a" 'a' 357 | err letterChar "9" 358 | err letterChar "_" 359 | err letterChar "" 360 | 361 | , testCase "lowerChar" $ do 362 | ok (lowerChar <* eof) "a" 'a' 363 | err lowerChar "A" 364 | err lowerChar "9" 365 | err lowerChar "_" 366 | err lowerChar "" 367 | 368 | , testCase "upperChar" $ do 369 | ok (upperChar <* eof) "A" 'A' 370 | err upperChar "a" 371 | err upperChar "9" 372 | err upperChar "_" 373 | err upperChar "" 374 | 375 | , testCase "symbolChar" $ do 376 | ok (symbolChar <* eof) "+" '+' 377 | err symbolChar "a" 378 | err symbolChar "." 379 | err symbolChar "" 380 | 381 | , testCase "punctuationChar" $ do 382 | ok (punctuationChar <* eof) "." '.' 383 | err punctuationChar "+" 384 | err punctuationChar "a" 385 | err punctuationChar "" 386 | 387 | , testCase "spaceChar" $ do 388 | ok (spaceChar <* eof) " " ' ' 389 | ok (spaceChar <* eof) "\n" '\n' 390 | ok (spaceChar <* eof) "\r" '\r' 391 | ok (spaceChar <* eof) "\t" '\t' 392 | err spaceChar "a" 393 | err spaceChar "" 394 | 395 | , testCase "asciiChar" $ do 396 | ok (asciiChar <* eof) "a" 'a' 397 | ok (asciiChar <* eof) "\x7F" '\x7F' 398 | err asciiChar "\x80" 399 | err asciiChar "" 400 | 401 | , testCase "categoryChar" $ do 402 | ok (categoryChar C.UppercaseLetter <* eof) "A" 'A' 403 | err (categoryChar C.UppercaseLetter) "a" 404 | err (categoryChar C.UppercaseLetter) "" 405 | 406 | , testCase "digitByte" $ do 407 | ok (digitByte 2 <* eof) "0" asc_0 408 | ok (digitByte 10 <* eof) "9" asc_9 409 | err (digitByte 10) "\0" 410 | err (digitByte 10) "" 411 | 412 | , testCase "skipChars" $ do 413 | ok (skipChars 0) "" () 414 | ok (skipChars 3 <* eof) "abc" () 415 | ok (skipChars 3 <* char 'b') "aaab" () 416 | err (skipChars 1) "" 417 | err (skipChars 2) "a" 418 | 419 | , testCase "takeChars" $ do 420 | ok (takeChars 0) "" (stringToChunk "") 421 | ok (takeChars 3 <* eof) "abc" (stringToChunk "abc") 422 | ok (takeChars 3 <* char 'b') "aaab" (stringToChunk "aaa") 423 | err (takeChars 1) "" 424 | err (takeChars 2) "a" 425 | 426 | , testCase "skipCharsWhile" $ do 427 | ok (skipCharsWhile (== 'a')) "" () 428 | ok (skipCharsWhile (== 'a')) "b" () 429 | ok (skipCharsWhile (== 'a') *> eof) "aaa" () 430 | ok (skipCharsWhile (== 'a') *> char 'b') "aaab" 'b' 431 | 432 | , testCase "takeCharsWhile" $ do 433 | ok (takeCharsWhile (== 'a')) "" (stringToChunk "") 434 | ok (takeCharsWhile (== 'a')) "b" (stringToChunk "") 435 | ok (takeCharsWhile (== 'a') <* eof) "aaa" (stringToChunk "aaa") 436 | ok (takeCharsWhile (== 'a') <* char 'b') "aaab" (stringToChunk "aaa") 437 | 438 | , testCase "skipCharsWhile1" $ do 439 | err (skipCharsWhile1 (== 'a')) "" 440 | err (skipCharsWhile1 (== 'a')) "b" 441 | ok (skipCharsWhile1 (== 'a') *> eof) "aaa" () 442 | ok (skipCharsWhile1 (== 'a') *> char 'b') "aaab" 'b' 443 | 444 | , testCase "takeCharsWhile1" $ do 445 | err (takeCharsWhile1 (== 'a')) "" 446 | err (takeCharsWhile1 (== 'a')) "b" 447 | ok (takeCharsWhile1 (== 'a') <* eof) "aaa" (stringToChunk "aaa") 448 | ok (takeCharsWhile1 (== 'a') <* char 'b') "aaab" (stringToChunk "aaa") 449 | ] 450 | 451 | , testGroup "Fraction Combinators" 452 | [ testCase "fractionDec" $ do 453 | okFraction (fractionDec (pure ()) <* eof) "1.23" (Right (123, 10, -2)) 454 | okFraction (fractionDec (pure ()) <* eof) "99e0" (Right (99, 10, 0)) 455 | okFraction (fractionDec (pure ()) <* eof) "123.45" (Right (12345, 10, -2)) 456 | okFraction (fractionDec (pure ()) <* eof) "00123." (Right (123, 10, 0)) 457 | okFraction (fractionDec (pure ()) <* eof) "456.000" (Right (456000, 10, -3)) 458 | 459 | okFraction (fractionDec (pure ()) <* eof) "987e-5" (Right (987, 10, -5)) 460 | okFraction (fractionDec (pure ()) <* eof) "987.e-123" (Right (987, 10, -123)) 461 | okFraction (fractionDec (pure ()) <* eof) "987.654e-67" (Right (987654, 10, -70)) 462 | okFraction (fractionDec (pure ()) <* eof) "987.654000e-7" (Right (987654000, 10, -13)) 463 | okFraction (fractionDec (pure ()) <* eof) "000987.654000e-7" (Right (987654000, 10, -13)) 464 | 465 | okFraction (fractionDec (pure ()) <* eof) "987e+5" (Right (987, 10, 5)) 466 | okFraction (fractionDec (pure ()) <* eof) "987.e+123" (Right (987, 10, 123)) 467 | okFraction (fractionDec (pure ()) <* eof) "987.654e+67" (Right (987654, 10, 64)) 468 | okFraction (fractionDec (pure ()) <* eof) "987.654000e+7" (Right (987654000, 10, 1)) 469 | okFraction (fractionDec (pure ()) <* eof) "000987.654000e+7" (Right (987654000, 10, 1)) 470 | 471 | okFraction (fractionDec (pure ()) <* eof) "987e5" (Right (987, 10, 5)) 472 | okFraction (fractionDec (pure ()) <* eof) "987.e123" (Right (987, 10, 123)) 473 | okFraction (fractionDec (pure ()) <* eof) "987.654e67" (Right (987654, 10, 64)) 474 | okFraction (fractionDec (pure ()) <* eof) "987.654000e7" (Right (987654000, 10, 1)) 475 | okFraction (fractionDec (pure ()) <* eof) "000987.654000e7" (Right (987654000, 10, 1)) 476 | 477 | okFraction (fractionDec (pure ())) "123" (Left 123) 478 | 479 | errFraction (fractionDec (pure ())) "123e" 480 | errFraction (fractionDec (pure ())) "" 481 | errFraction (fractionDec (pure ())) "abc" 482 | ] 483 | 484 | , testGroup "Integer Combinators" 485 | [ testCase "decimal" $ do 486 | ok @Integer (decimal <* eof) "0123" 123 487 | ok @Integer (decimal <* eof) "1234567890" 1234567890 488 | ok @Integer (decimal <* string "abc" <* eof) "123abc" 123 489 | err @Integer decimal "abc" 490 | err @Integer decimal "-1" 491 | err @Integer decimal "" 492 | 493 | , testCase "octal" $ do 494 | ok @Integer (octal <* eof) "0123" 0o123 495 | ok @Integer (octal <* eof) "12345670" 0o12345670 496 | ok @Integer (octal <* string "abc" <* eof) "123abc" 0o123 497 | err @Integer octal "8abc" 498 | err @Integer octal "-1" 499 | err @Integer octal "" 500 | 501 | , testCase "hexadecimal" $ do 502 | ok @Integer (hexadecimal <* eof) "0123" 0x123 503 | ok @Integer (hexadecimal <* eof) "123456789aBcDeF0" 0x123456789ABCDEF0 504 | ok @Integer (hexadecimal <* string "xyz" <* eof) "123xyz" 0x123 505 | err @Integer hexadecimal "gabc" 506 | err @Integer hexadecimal "-1" 507 | err @Integer hexadecimal "" 508 | 509 | , testCase "integer" $ do 510 | ok @Integer (integer (char '_') 10) "1_2_3" 123 511 | ok @Integer (integer (char '_') 10 <* char '_') "1_2_3_" 123 512 | ok @Integer (integer (optional $ char '_') 10) "123_456_789" 123456789 513 | err @Integer (integer (pure ()) 10) "abc" 514 | err @Integer (integer (char '_') 10) "_1_2_3" 515 | err @Integer (integer (pure ()) 10) "-1" 516 | err @Integer (integer (pure ()) 10) "" 517 | 518 | ok @Integer (integer (pure ()) 2) "101" 5 519 | ok @Integer (integer (pure ()) 7) "321" 162 520 | ok @Integer (integer (pure ()) 36) "XyZ" 44027 521 | 522 | , testCase "integer'" $ do 523 | ok @(Integer, Int) (integer' (char '_') 10) "1_2_3" (123, 3) 524 | ok @(Integer, Int) (integer' (char '_') 10 <* char '_') "1_2_3_" (123, 3) 525 | ok @(Integer, Int) (integer' (optional $ char '_') 10) "123_456_789" (123456789, 9) 526 | err @(Integer, Int) (integer' (pure ()) 10) "abc" 527 | err @(Integer, Int) (integer' (char '_') 10) "_1_2_3" 528 | err @(Integer, Int) (integer' (pure ()) 10) "-1" 529 | err @(Integer, Int) (integer' (pure ()) 10) "" 530 | 531 | ok @(Integer, Int) (integer' (pure ()) 10) "0123" (123, 4) 532 | ok @(Integer, Int) (integer' (pure ()) 10) "01230" (1230, 5) 533 | ok @(Integer, Int) (integer' (pure ()) 10) "000" (0, 3) 534 | 535 | ok @(Integer, Int) (integer' (pure ()) 2) "101" (5, 3) 536 | ok @(Integer, Int) (integer' (pure ()) 7) "321" (162, 3) 537 | ok @(Integer, Int) (integer' (pure ()) 36) "XyZ" (44027, 3) 538 | 539 | , testCase "signed" $ do 540 | ok @Integer (signed decimal) "-123" (-123) 541 | ok @Integer (signed decimal) "+123" 123 542 | err @Integer (signed decimal) "- 123" 543 | err @Integer (signed decimal) "+ 123" 544 | err @Integer (signed decimal) "" 545 | 546 | , testCase "digit" $ do 547 | ok (digit 2) "1" 1 548 | ok (digit 10) "7" 7 549 | ok (digit 36) "Z" 35 550 | err (digit 10) "a" 551 | err (digit 10) "" 552 | ] 553 | ] 554 | where 555 | ok :: (Eq a, Show a, HasCallStack) => p a -> String -> a -> Assertion 556 | ok p i o = run p "filename" (stringToChunk @k i) @?= Right o 557 | err :: (Eq a, Show a, HasCallStack) => p a -> String -> Assertion 558 | err p i = assertBool "err" $ isLeft $ run p "filename" (stringToChunk @k i) 559 | errFraction :: HasCallStack => p (Either Integer (Integer, Int, Integer)) -> String -> Assertion 560 | errFraction = err 561 | okFraction :: HasCallStack => p (Either Integer (Integer, Int, Integer)) -> String -> Either Integer (Integer, Int, Integer) -> Assertion 562 | okFraction = ok 563 | 564 | reporterTests :: [TestTree] 565 | reporterTests = 566 | [ testCase "recover" $ do 567 | run (char 'a' <* eof) "a" (Just 'a', 0) 568 | run (char 'a') "b" (Nothing, 1) 569 | run (recover (char 'a' <* eof) (char 'b')) "a" (Just 'a', 0) 570 | run (recover (char 'a') (char 'b')) "b" (Just 'b', 1) 571 | run ((,) 572 | <$> recover (char 'a') (char 'b') 573 | <*> recover (char 'a') (char 'c')) "bc" (Just ('b', 'c'), 2) 574 | run (recover (char 'a') (char 'b') *> 575 | recover (char 'a') (char 'c') *> 576 | char 'a') "bcd" (Nothing, 3) 577 | run (recover (char 'a' <* eof) (char 'b')) "c" (Nothing, 1) 578 | ] 579 | where 580 | run :: (Eq a, Show a, HasCallStack) => Reporter Text a -> Text -> (Maybe a, Int) -> Assertion 581 | run p i (o, r) = do 582 | let (out, rep) = runReporter p "filename" i 583 | out @?= o 584 | length rep @?= r 585 | 586 | {- 587 | TODO: 588 | 589 | Instances: 590 | Semigroup 591 | Monoid 592 | Functor 593 | Applicative 594 | Monad 595 | MonadPlus 596 | Alternative 597 | 598 | Additional combinators provided by parser-combinators. 599 | They are also tested by megaparsec: 600 | endBy1 601 | someTill 602 | sepBy1 603 | sepEndBy1 604 | between 605 | choice 606 | count 607 | count' 608 | eitherP 609 | endBy 610 | manyTill 611 | option 612 | sepBy 613 | sepEndBy 614 | skipMany 615 | skipSome 616 | skipCount 617 | skipManyTill 618 | skipSomeTill 619 | 620 | Identation: 621 | align 622 | indented 623 | line 624 | linefold 625 | 626 | Fraction: 627 | fraction 628 | fractionHex 629 | 630 | Char Combinators: 631 | scanChars 632 | scanChars1 633 | -} 634 | --------------------------------------------------------------------------------