├── .github └── workflows │ └── haskell-ci.yml ├── .gitignore ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── cabal.haskell-ci ├── cabal.project ├── parsec.cabal ├── src └── Text │ ├── Parsec.hs │ ├── Parsec │ ├── ByteString.hs │ ├── ByteString │ │ └── Lazy.hs │ ├── Char.hs │ ├── Combinator.hs │ ├── Error.hs │ ├── Expr.hs │ ├── Language.hs │ ├── Perm.hs │ ├── Pos.hs │ ├── Prim.hs │ ├── String.hs │ ├── Text.hs │ ├── Text │ │ └── Lazy.hs │ └── Token.hs │ └── ParserCombinators │ ├── Parsec.hs │ └── Parsec │ ├── Char.hs │ ├── Combinator.hs │ ├── Error.hs │ ├── Expr.hs │ ├── Language.hs │ ├── Perm.hs │ ├── Pos.hs │ ├── Prim.hs │ └── Token.hs └── test ├── Bugs.hs ├── Bugs ├── Bug2.hs ├── Bug35.hs ├── Bug6.hs └── Bug9.hs ├── Features.hs ├── Features ├── Feature150.hs └── Feature80.hs ├── Main.hs ├── Util.hs ├── issue127.hs ├── issue171.hs └── issue175.hs /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'cabal.project' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.19.20250104 12 | # 13 | # REGENDATA ("0.19.20250104",["github","cabal.project"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | push: 18 | branches: 19 | - master 20 | pull_request: 21 | branches: 22 | - master 23 | jobs: 24 | linux: 25 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 26 | runs-on: ubuntu-20.04 27 | timeout-minutes: 28 | 60 29 | container: 30 | image: buildpack-deps:jammy 31 | continue-on-error: ${{ matrix.allow-failure }} 32 | strategy: 33 | matrix: 34 | include: 35 | - compiler: ghc-9.10.1 36 | compilerKind: ghc 37 | compilerVersion: 9.10.1 38 | setup-method: ghcup 39 | allow-failure: false 40 | - compiler: ghc-9.8.2 41 | compilerKind: ghc 42 | compilerVersion: 9.8.2 43 | setup-method: ghcup 44 | allow-failure: false 45 | - compiler: ghc-9.6.4 46 | compilerKind: ghc 47 | compilerVersion: 9.6.4 48 | setup-method: ghcup 49 | allow-failure: false 50 | - compiler: ghc-9.4.8 51 | compilerKind: ghc 52 | compilerVersion: 9.4.8 53 | setup-method: ghcup 54 | allow-failure: false 55 | - compiler: ghc-9.2.8 56 | compilerKind: ghc 57 | compilerVersion: 9.2.8 58 | setup-method: ghcup 59 | allow-failure: false 60 | - compiler: ghc-9.0.2 61 | compilerKind: ghc 62 | compilerVersion: 9.0.2 63 | setup-method: ghcup 64 | allow-failure: false 65 | - compiler: ghc-8.10.7 66 | compilerKind: ghc 67 | compilerVersion: 8.10.7 68 | setup-method: ghcup 69 | allow-failure: false 70 | - compiler: ghc-8.8.4 71 | compilerKind: ghc 72 | compilerVersion: 8.8.4 73 | setup-method: ghcup 74 | allow-failure: false 75 | - compiler: ghc-8.6.5 76 | compilerKind: ghc 77 | compilerVersion: 8.6.5 78 | setup-method: ghcup 79 | allow-failure: false 80 | fail-fast: false 81 | steps: 82 | - name: apt-get install 83 | run: | 84 | apt-get update 85 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 86 | - name: Install GHCup 87 | run: | 88 | mkdir -p "$HOME/.ghcup/bin" 89 | curl -sL https://downloads.haskell.org/ghcup/0.1.30.0/x86_64-linux-ghcup-0.1.30.0 > "$HOME/.ghcup/bin/ghcup" 90 | chmod a+x "$HOME/.ghcup/bin/ghcup" 91 | - name: Install cabal-install 92 | run: | 93 | "$HOME/.ghcup/bin/ghcup" install cabal 3.14.1.1 || (cat "$HOME"/.ghcup/logs/*.* && false) 94 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.14.1.1 -vnormal+nowrap" >> "$GITHUB_ENV" 95 | - name: Install GHC (GHCup) 96 | if: matrix.setup-method == 'ghcup' 97 | run: | 98 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 99 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 100 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 101 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 102 | echo "HC=$HC" >> "$GITHUB_ENV" 103 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 104 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 105 | env: 106 | HCKIND: ${{ matrix.compilerKind }} 107 | HCNAME: ${{ matrix.compiler }} 108 | HCVER: ${{ matrix.compilerVersion }} 109 | - name: Set PATH and environment variables 110 | run: | 111 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 112 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 113 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 114 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 115 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 116 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 117 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 118 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 119 | echo "HEADHACKAGE=false" >> "$GITHUB_ENV" 120 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 121 | env: 122 | HCKIND: ${{ matrix.compilerKind }} 123 | HCNAME: ${{ matrix.compiler }} 124 | HCVER: ${{ matrix.compilerVersion }} 125 | - name: env 126 | run: | 127 | env 128 | - name: write cabal config 129 | run: | 130 | mkdir -p $CABAL_DIR 131 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 164 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 165 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 166 | rm -f cabal-plan.xz 167 | chmod a+x $HOME/.cabal/bin/cabal-plan 168 | cabal-plan --version 169 | - name: checkout 170 | uses: actions/checkout@v4 171 | with: 172 | path: source 173 | - name: initial cabal.project for sdist 174 | run: | 175 | touch cabal.project 176 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 177 | cat cabal.project 178 | - name: sdist 179 | run: | 180 | mkdir -p sdist 181 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 182 | - name: unpack 183 | run: | 184 | mkdir -p unpacked 185 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 186 | - name: generate cabal.project 187 | run: | 188 | PKGDIR_parsec="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/parsec-[0-9.]*')" 189 | echo "PKGDIR_parsec=${PKGDIR_parsec}" >> "$GITHUB_ENV" 190 | rm -f cabal.project cabal.project.local 191 | touch cabal.project 192 | touch cabal.project.local 193 | echo "packages: ${PKGDIR_parsec}" >> cabal.project 194 | echo "package parsec" >> cabal.project 195 | echo " ghc-options: -Werror=missing-methods" >> cabal.project 196 | cat >> cabal.project <> cabal.project.local 199 | cat cabal.project 200 | cat cabal.project.local 201 | - name: dump install plan 202 | run: | 203 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 204 | cabal-plan 205 | - name: restore cache 206 | uses: actions/cache/restore@v4 207 | with: 208 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 209 | path: ~/.cabal/store 210 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 211 | - name: install dependencies 212 | run: | 213 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 214 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 215 | - name: build w/o tests 216 | run: | 217 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 218 | - name: build 219 | run: | 220 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 221 | - name: tests 222 | run: | 223 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 224 | - name: cabal check 225 | run: | 226 | cd ${PKGDIR_parsec} || false 227 | ${CABAL} -vnormal check 228 | - name: haddock 229 | run: | 230 | $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all 231 | - name: unconstrained build 232 | run: | 233 | rm -f cabal.project.local 234 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 235 | - name: prepare for constraint sets 236 | run: | 237 | rm -f cabal.project.local 238 | - name: constraint set bytestring-0.12 239 | run: | 240 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='bytestring ^>=0.12' all --dry-run 241 | cabal-plan topo | sort 242 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='bytestring ^>=0.12' --dependencies-only -j2 all 243 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='bytestring ^>=0.12' all 244 | $CABAL v2-test $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='bytestring ^>=0.12' all 245 | - name: constraint set mtl-2.3 246 | run: | 247 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='mtl ^>=2.3' all --dry-run 248 | cabal-plan topo | sort 249 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='mtl ^>=2.3' --dependencies-only -j2 all 250 | $CABAL v2-build $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='mtl ^>=2.3' all 251 | $CABAL v2-test $ARG_COMPILER --enable-tests --disable-benchmarks --constraint='mtl ^>=2.3' all 252 | - name: save cache 253 | if: always() 254 | uses: actions/cache/save@v4 255 | with: 256 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 257 | path: ~/.cabal/store 258 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | /dist-boot/ 3 | /dist-install/ 4 | /dist-newstyle/ 5 | /cabal.project.local 6 | /.cabal-sandbox/ 7 | /cabal.sandbox.config 8 | /.ghc.environment.* 9 | *~ 10 | ghc.mk 11 | GNUmakefile 12 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | ### 3.1.18.0 2 | 3 | - Drop support for GHCs prior 8.6.5 4 | 5 | ### 3.1.17.0 6 | 7 | - Move `many1 :: ParsecT s u m a -> ParsecT s u m [a]` to `Text.Parsec.Prim`. 8 | Drop `Stream` constraint requirement. 9 | - Change the position comparison in `mergeError` to not compare source names. 10 | This doesn't alter reported error positions when only a single source is parsed. 11 | This fixes performance issue caused by long source names. 12 | - Add `Exception ParseError` instance 13 | 14 | ### 3.1.16.0 15 | 16 | - Add `tokens'` and `string'` combinators which don't consume the prefix. 17 | 18 | ### 3.1.15.0 19 | 20 | - Add `INLINABLE` pragmas to most overloaded combinators 21 | - Support recent versions of dependencies 22 | - Fix memory leak in `>>=` https://github.com/haskell/parsec/issues/127 23 | 24 | ### 3.1.14.0 25 | 26 | - Add `parseFromFile` to `Text.Parsec.Text.Lazy` and `Text.Parsec.Text` (#103, #104). 27 | 28 | - Clarify Haddock documentation in various places (#105,#101,#102). 29 | 30 | - Add support for `base-4.13`. 31 | 32 | ### 3.1.13.0 33 | 34 | - Add official support for [`SafeHaskell`](http://downloads.haskell.org/~ghc/latest/docs/html/users_guide/safe_haskell.html) 35 | 36 | **NOTE**: This is the first version whose `SafeHaskell` properties 37 | have become an intentional part of the API contract; previous 38 | versions were merely accidentally safe-inferred (or not depending 39 | on various factors; in other words, this was a fragile 40 | property). If you rely on `SafeHaskell` to consider module imports 41 | from `parsec` *safe*, this is the first version of `parsec` which 42 | actually guarantees a well-defined state; you can declare this 43 | requirement by either specifying 44 | 45 | build-depends: parsec >= 3.1.13.0 && < 3.2 46 | 47 | or, starting with `cabal-version:2.0`, via 48 | 49 | build-depends: parsec ^>= 3.1.13.0 50 | 51 | - Drop support for GHC 7.0, GHC 7.2, and GHC 7.4.1; support window 52 | starts with GHC 7.4.2. 53 | 54 | ### 3.1.12.0 55 | 56 | - Support limited to GHC 7.0 & GHC 7.2 only 57 | 58 | - Add `MonadFail` instance for `ParsecT` 59 | - Add `Semigroup`/`Monoid` instances for `ParsecT` (#80,#82) 60 | - Fix space leak in Applicative/Monad interface (#37) 61 | - Add `parserTrace` and `parserTraced` combinators for debugging. 62 | 63 | ### 3.1.11 64 | 65 | - Include `README.md` in package. 66 | 67 | ### 3.1.10 68 | 69 | - Most types now have a `Typeable` instance. Some instances are dropped from 70 | older versions of GHC (sorry about that!). 71 | - The token-parser now rejects Unicode numeric escape sequences for characters 72 | outside the Unicode range. 73 | - The token-parser now loses less precision when parsing literal doubles. 74 | - Documentation fixes and corrections. 75 | - We no longer test parsec builds on GHC 7.4. 76 | 77 | ### 3.1.9 78 | 79 | - Many and various updates to documentation and package description (including 80 | the homepage links). 81 | - Add an `Eq` instance for `ParseError` 82 | - Fixed a regression from 3.1.6: `runP` is again exported from module 83 | Text.Parsec. 84 | 85 | ### 3.1.8 86 | 87 | - Fix a regression from 3.1.6 related to exports from the main module. 88 | 89 | ### 3.1.7 90 | 91 | - Fix a regression from 3.1.6 related to the reported position of error messages. 92 | See bug #9 for details. 93 | - Reset the current error position on success of `lookAhead`. 94 | 95 | ### 3.1.6 96 | 97 | - Export `Text` instances from Text.Parsec 98 | - Make Text.Parsec exports more visible 99 | - Re-arrange Text.Parsec exports 100 | - Add functions `crlf` and `endOfLine` to Text.Parsec.Char for handling 101 | input streams that do not have normalized line terminators. 102 | - Fix off-by-one error in Token.charControl 103 | 104 | ### 3.1.4 105 | 106 | - Relax dependency on `text` 107 | 108 | ### 3.1.5 109 | 110 | - Relax dependency on `text` 111 | 112 | ### 3.1.3 113 | 114 | - Fix a regression introduced in 3.1.2 related to positions reported by error messages. 115 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 1999-2000, Daan Leijen; 2007, Paolo Martini. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 12 | This software is provided by the copyright holders "as is" and any express or 13 | implied warranties, including, but not limited to, the implied warranties of 14 | merchantability and fitness for a particular purpose are disclaimed. In no 15 | event shall the copyright holders be liable for any direct, indirect, 16 | incidental, special, exemplary, or consequential damages (including, but not 17 | limited to, procurement of substitute goods or services; loss of use, data, 18 | or profits; or business interruption) however caused and on any theory of 19 | liability, whether in contract, strict liability, or tort (including 20 | negligence or otherwise) arising in any way out of the use of this software, 21 | even if advised of the possibility of such damage. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Parsec [![Build Status](https://travis-ci.org/haskell/parsec.svg?branch=master)](https://travis-ci.org/haskell/parsec) 2 | ====== 3 | 4 | **Please refer to the [package description on Hackage](https://hackage.haskell.org/package/parsec#description) for more information.** 5 | 6 | A monadic parser combinator library, written by Daan Leijen. Parsec is designed 7 | from scratch as an industrial-strength parser library. It is simple, safe, well 8 | documented, has extensive libraries, good error messages, and is fast. 9 | 10 | Some links: 11 | 12 | * [Parsec on Hackage](https://hackage.haskell.org/package/parsec), 13 | contains the generated documentation. 14 | * The 2001 paper written by Daan Leijen, some what outdated 15 | ([PDF](https://web.archive.org/web/20140528151730/http://legacy.cs.uu.nl/daan/download/parsec/parsec.pdf), 16 | [HTML](https://web.archive.org/web/20140528151730/http://legacy.cs.uu.nl/daan/download/parsec/parsec.html), 17 | thanks to [archive.org](http://web.archive.org); 18 | and [PDF](https://research.microsoft.com/en-us/um/people/daan/download/parsec/parsec.pdf), 19 | thanks to Microsoft Research). 20 | * [Using Parsec](http://book.realworldhaskell.org/read/using-parsec.html), 21 | chapter 16 of [Real World Haskell](http://book.realworldhaskell.org/). 22 | * [An introduction to the Parsec library](https://www.kuniga.me/blog/2014/01/21/an-introduction-to-the-parsec-library.html) 23 | on Kunigami's blog. 24 | * [An introduction to parsing text in Haskell with Parsec](https://jsdw.me/posts/haskell-parsec-basics/) on Wilson's blog. 25 | * Differences between Parsec and 26 | [Attoparsec](http://hackage.haskell.org/package/attoparsec) 27 | (Haskell's other prominent parser library) as explained in 28 | [an answer on StackExchange](http://stackoverflow.com/a/19213247). 29 | * Differences between Parsec and [Happy](http://www.haskell.org/happy) 30 | (Haskell's parser generator) as explained in two 31 | answers on separate StackExchange questions 32 | ([1](http://stackoverflow.com/a/7270904), 33 | [2](http://stackoverflow.com/a/14775331)). 34 | * Differences between Parsec and 35 | [Megaparsec](http://hackage.haskell.org/package/megaparsec) 36 | (an advanced fork of Parsec) as explained in 37 | [Megaparsec's README](https://github.com/mrkkrp/megaparsec#megaparsec-vs-parsec). 38 | 39 | 40 | By analyzing [Parsec's reverse dependencies on Hackage](http://packdeps.haskellers.com/reverse/parsec) 41 | we can find open source project that make use of Parsec. For example 42 | [bibtex](http://hackage.haskell.org/package/bibtex), 43 | [ConfigFile](http://hackage.haskell.org/package/ConfigFile), 44 | [csv](http://hackage.haskell.org/package/csv) and 45 | [hjson](http://hackage.haskell.org/package/hjson). 46 | 47 | 48 | ## Getting started 49 | 50 | This requires a working version of `cabal` and `ghci`, which are part of 51 | any modern installation of Haskell, such as 52 | [Haskell Platform](https://www.haskell.org/platform). 53 | 54 | First install Parsec. 55 | 56 | cabal install parsec 57 | 58 | Below we show how a very simple parser that tests matching parentheses 59 | was made from GHCI (the interactive GHC environment), which we started 60 | with the `ghci` command). 61 | 62 | ``` 63 | Prelude> :m +Text.Parsec 64 | Prelude Text.Parsec> let parenSet = char '(' >> many parenSet >> char ')' :: Parsec String () Char 65 | Loading package transformers-0.3.0.0 ... linking ... done. 66 | Loading package array-0.5.0.0 ... linking ... done. 67 | Loading package deepseq-1.3.0.2 ... linking ... done. 68 | Loading package bytestring-0.10.4.0 ... linking ... done. 69 | Loading package mtl-2.1.3.1 ... linking ... done. 70 | Loading package text-1.1.1.3 ... linking ... done. 71 | Loading package parsec-3.1.5 ... linking ... done. 72 | Prelude Text.Parsec> let parens = (many parenSet >> eof) <|> eof 73 | Prelude Text.Parsec> parse parens "" "()" 74 | Right () 75 | Prelude Text.Parsec> parse parens "" "()(())" 76 | Right () 77 | Prelude Text.Parsec> parse parens "" "(" 78 | Left (line 1, column 2): 79 | unexpected end of input 80 | expecting "(" or ")" 81 | ``` 82 | 83 | The `Right ()` results indicate successes: the parentheses matched. 84 | The `Left [...]` result indicates a parse failure, and is detailed 85 | with an error message. 86 | 87 | For a more thorough introduction to Parsec we recommend the links at 88 | the top of this README file. 89 | 90 | 91 | ## Contributing 92 | 93 | Issues (bugs, feature requests or otherwise feedback) may be reported in 94 | [the Github issue tracker for this project](https://github.com/haskell/parsec/issues). 95 | 96 | Pull-requests are also welcome. 97 | 98 | 99 | ## License 100 | 101 | See the [LICENSE](https://github.com/haskell/parsec/blob/master/LICENSE) 102 | file in the repository. 103 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Distribution.Simple 4 | 5 | main :: IO () 6 | main = defaultMain 7 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | branches: master 2 | 3 | constraint-set mtl-2.3 4 | ghc: >=8.6 5 | constraints: mtl ^>=2.3 6 | tests: True 7 | run-tests: True 8 | 9 | constraint-set bytestring-0.12 10 | ghc: >=8.2.2 11 | constraints: bytestring ^>=0.12 12 | tests: True 13 | run-tests: True 14 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | -------------------------------------------------------------------------------- /parsec.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | name: parsec 3 | version: 3.1.18.0 4 | synopsis: Monadic parser combinators 5 | description: 6 | Parsec is designed from scratch as an industrial-strength parser 7 | library. It is simple, safe, well documented (on the package 8 | homepage), has extensive libraries, good error messages, 9 | and is fast. It is defined as a monad transformer that can be 10 | stacked on arbitrary monads, and it is also parametric in the 11 | input stream type. 12 | . 13 | The main entry point is the "Text.Parsec" module which provides 14 | defaults for parsing 'Char'acter data. 15 | . 16 | The "Text.ParserCombinators.Parsec" module hierarchy contains 17 | the legacy @parsec-2@ API and may be removed at some point in 18 | the future. 19 | 20 | license: BSD2 21 | license-file: LICENSE 22 | author: 23 | Daan Leijen , Paolo Martini , Antoine Latter 24 | 25 | maintainer: 26 | Oleg Grenrus , Herbert Valerio Riedel 27 | 28 | homepage: https://github.com/haskell/parsec 29 | bug-reports: https://github.com/haskell/parsec/issues 30 | category: Parsing 31 | build-type: Simple 32 | tested-with: 33 | GHC ==8.6.5 34 | || ==8.8.4 35 | || ==8.10.7 36 | || ==9.0.2 37 | || ==9.2.8 38 | || ==9.4.8 39 | || ==9.6.4 40 | || ==9.8.2 41 | || ==9.10.1 42 | || ==9.12.1 43 | 44 | extra-source-files: 45 | ChangeLog.md 46 | README.md 47 | 48 | source-repository head 49 | type: git 50 | location: https://github.com/haskell/parsec 51 | 52 | library 53 | hs-source-dirs: src 54 | exposed-modules: 55 | Text.Parsec 56 | Text.Parsec.ByteString 57 | Text.Parsec.ByteString.Lazy 58 | Text.Parsec.Char 59 | Text.Parsec.Combinator 60 | Text.Parsec.Error 61 | Text.Parsec.Expr 62 | Text.Parsec.Language 63 | Text.Parsec.Perm 64 | Text.Parsec.Pos 65 | Text.Parsec.Prim 66 | Text.Parsec.String 67 | Text.Parsec.Text 68 | Text.Parsec.Text.Lazy 69 | Text.Parsec.Token 70 | Text.ParserCombinators.Parsec 71 | Text.ParserCombinators.Parsec.Char 72 | Text.ParserCombinators.Parsec.Combinator 73 | Text.ParserCombinators.Parsec.Error 74 | Text.ParserCombinators.Parsec.Expr 75 | Text.ParserCombinators.Parsec.Language 76 | Text.ParserCombinators.Parsec.Perm 77 | Text.ParserCombinators.Parsec.Pos 78 | Text.ParserCombinators.Parsec.Prim 79 | Text.ParserCombinators.Parsec.Token 80 | 81 | build-depends: 82 | base >=4.12.0.0 && <4.22 83 | , bytestring >=0.10.8.2 && <0.13 84 | , mtl >=2.2.2 && <2.4 85 | , text >=1.2.3.0 && <1.3 || >=2.0 && <2.2 86 | 87 | default-language: Haskell2010 88 | other-extensions: 89 | CPP 90 | DeriveDataTypeable 91 | ExistentialQuantification 92 | FlexibleContexts 93 | FlexibleInstances 94 | FunctionalDependencies 95 | MultiParamTypeClasses 96 | PolymorphicComponents 97 | Safe 98 | StandaloneDeriving 99 | Trustworthy 100 | UndecidableInstances 101 | 102 | ghc-options: -Wall 103 | ghc-options: 104 | -Wcompat -Wnoncanonical-monad-instances -Wno-trustworthy-safe 105 | 106 | if impl(ghc <8.8) 107 | ghc-options: -Wnoncanonical-monadfail-instances 108 | 109 | -- these flags may abort compilation with GHC-8.10 110 | -- https://gitlab.haskell.org/ghc/ghc/-/merge_requests/3295 111 | -- https://gitlab.haskell.org/ghc/ghc/-/issues/22728 112 | -- if impl(ghc >= 9.0) 113 | -- -- ghc-options: -Winferred-safe-imports -Wmissing-safe-haskell-mode 114 | 115 | test-suite parsec-tests 116 | type: exitcode-stdio-1.0 117 | hs-source-dirs: test 118 | main-is: Main.hs 119 | other-modules: 120 | Bugs 121 | Bugs.Bug2 122 | Bugs.Bug35 123 | Bugs.Bug6 124 | Bugs.Bug9 125 | Features 126 | Features.Feature150 127 | Features.Feature80 128 | Util 129 | 130 | build-depends: 131 | base 132 | , mtl 133 | , parsec 134 | , tasty >=1.4 && <1.6 135 | , tasty-hunit >=0.10 && <0.11 136 | 137 | -- dependencies whose version bounds are not inherited via lib:parsec 138 | default-language: Haskell2010 139 | ghc-options: -Wall 140 | 141 | if impl(ghc >=8.0) 142 | ghc-options: 143 | -Wcompat -Wnoncanonical-monad-instances 144 | -Wnoncanonical-monadfail-instances 145 | 146 | else 147 | build-depends: semigroups 148 | 149 | test-suite parsec-issue127 150 | default-language: Haskell2010 151 | type: exitcode-stdio-1.0 152 | main-is: issue127.hs 153 | hs-source-dirs: test 154 | build-depends: 155 | base 156 | , parsec 157 | 158 | test-suite parsec-issue171 159 | default-language: Haskell2010 160 | type: exitcode-stdio-1.0 161 | main-is: issue171.hs 162 | hs-source-dirs: test 163 | build-depends: 164 | base 165 | , deepseq 166 | , parsec 167 | , tasty 168 | , tasty-hunit 169 | 170 | test-suite parsec-issue175 171 | default-language: Haskell2010 172 | type: exitcode-stdio-1.0 173 | main-is: issue175.hs 174 | hs-source-dirs: test 175 | build-depends: 176 | base 177 | , parsec 178 | , tasty 179 | , tasty-hunit 180 | -------------------------------------------------------------------------------- /src/Text/Parsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | {-| 4 | Module : Text.Parsec 5 | Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 6 | License : BSD-style (see the LICENSE file) 7 | 8 | Maintainer : aslatter@gmail.com 9 | Stability : provisional 10 | Portability : portable 11 | 12 | This module includes everything you need to get started writing a 13 | parser. 14 | 15 | By default this module is set up to parse character data. If you'd like 16 | to parse the result of your own tokenizer you should start with the following 17 | imports: 18 | 19 | @ 20 | import Text.Parsec.Prim 21 | import Text.Parsec.Combinator 22 | @ 23 | 24 | Then you can implement your own version of 'satisfy' on top of the 'tokenPrim' 25 | primitive. 26 | 27 | -} 28 | 29 | module Text.Parsec 30 | ( -- * Parsers 31 | ParsecT 32 | , Parsec 33 | , token 34 | , tokens 35 | , runParserT 36 | , runParser 37 | , parse 38 | , parseTest 39 | , getPosition 40 | , getInput 41 | , getState 42 | , putState 43 | , modifyState 44 | -- * Combinators 45 | , (<|>) 46 | , () 47 | , label 48 | , labels 49 | , try 50 | , unexpected 51 | , choice 52 | , many 53 | , many1 54 | , skipMany 55 | , skipMany1 56 | , count 57 | , between 58 | , option 59 | , optionMaybe 60 | , optional 61 | , sepBy 62 | , sepBy1 63 | , endBy 64 | , endBy1 65 | , sepEndBy 66 | , sepEndBy1 67 | , chainl 68 | , chainl1 69 | , chainr 70 | , chainr1 71 | , eof 72 | , notFollowedBy 73 | , manyTill 74 | , lookAhead 75 | , anyToken 76 | -- * Character Parsing 77 | , module Text.Parsec.Char 78 | -- * Error messages 79 | , ParseError 80 | , errorPos 81 | -- * Position 82 | , SourcePos 83 | , SourceName, Line, Column 84 | , sourceName, sourceLine, sourceColumn 85 | , incSourceLine, incSourceColumn 86 | , setSourceLine, setSourceColumn, setSourceName 87 | -- * Debugging 88 | -- 89 | -- | As a more comprehensive alternative for debugging Parsec parsers, 90 | -- there's also the [parsec-free](http://hackage.haskell.org/package/parsec-free) 91 | -- package. 92 | -- 93 | , parserTrace, parserTraced 94 | -- * Low-level operations 95 | , manyAccum 96 | , tokenPrim 97 | , tokenPrimEx 98 | , runPT 99 | , unknownError 100 | , sysUnExpectError 101 | , mergeErrorReply 102 | , getParserState 103 | , setParserState 104 | , updateParserState 105 | , Stream (..) 106 | , runParsecT 107 | , mkPT 108 | , runP 109 | , Consumed (..) 110 | , Reply (..) 111 | , State (..) 112 | , setPosition 113 | , setInput 114 | -- * Other stuff 115 | , setState 116 | , updateState 117 | , parsecMap 118 | , parserReturn 119 | , parserBind 120 | , parserFail 121 | , parserZero 122 | , parserPlus 123 | ) where 124 | 125 | import Text.Parsec.Pos 126 | import Text.Parsec.Error 127 | import Text.Parsec.Prim 128 | import Text.Parsec.Char 129 | import Text.Parsec.Combinator 130 | -------------------------------------------------------------------------------- /src/Text/Parsec/ByteString.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.ByteString 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Convenience definitions for working with 'C.ByteString's. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.ByteString 18 | ( Parser, GenParser, parseFromFile 19 | ) where 20 | 21 | import qualified Data.ByteString.Char8 as C 22 | 23 | import Text.Parsec.Error 24 | import Text.Parsec.Prim 25 | 26 | type Parser = Parsec C.ByteString () 27 | type GenParser t st = Parsec C.ByteString st 28 | 29 | -- | @parseFromFile p filePath@ runs a strict bytestring parser @p@ on the 30 | -- input read from @filePath@ using 'ByteString.Char8.readFile'. Returns either a 'ParseError' 31 | -- ('Left') or a value of type @a@ ('Right'). 32 | -- 33 | -- > main = do{ result <- parseFromFile numbers "digits.txt" 34 | -- > ; case result of 35 | -- > Left err -> print err 36 | -- > Right xs -> print (sum xs) 37 | -- > } 38 | 39 | parseFromFile :: Parser a -> FilePath -> IO (Either ParseError a) 40 | parseFromFile p fname 41 | = do input <- C.readFile fname 42 | return (runP p () fname input) 43 | -------------------------------------------------------------------------------- /src/Text/Parsec/ByteString/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.ByteString.Lazy 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Convenience definitions for working with lazy 'C.ByteString's. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.ByteString.Lazy 18 | ( Parser, GenParser, parseFromFile 19 | ) where 20 | 21 | import qualified Data.ByteString.Lazy.Char8 as C 22 | 23 | import Text.Parsec.Error 24 | import Text.Parsec.Prim 25 | 26 | type Parser = Parsec C.ByteString () 27 | type GenParser t st = Parsec C.ByteString st 28 | 29 | -- | @parseFromFile p filePath@ runs a lazy bytestring parser @p@ on the 30 | -- input read from @filePath@ using 'ByteString.Lazy.Char8.readFile'. Returns either a 'ParseError' 31 | -- ('Left') or a value of type @a@ ('Right'). 32 | -- 33 | -- > main = do{ result <- parseFromFile numbers "digits.txt" 34 | -- > ; case result of 35 | -- > Left err -> print err 36 | -- > Right xs -> print (sum xs) 37 | -- > } 38 | 39 | parseFromFile :: Parser a -> FilePath -> IO (Either ParseError a) 40 | parseFromFile p fname 41 | = do input <- C.readFile fname 42 | return (runP p () fname input) 43 | -------------------------------------------------------------------------------- /src/Text/Parsec/Char.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts, Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.Char 6 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Commonly used character parsers. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.Char where 18 | 19 | import Data.Char (isSpace, isUpper, isLower, isAlphaNum, isAlpha, isDigit, isHexDigit, isOctDigit) 20 | 21 | import Text.Parsec.Pos 22 | import Text.Parsec.Prim 23 | 24 | -- | @oneOf cs@ succeeds if the current character is in the supplied 25 | -- list of characters @cs@. Returns the parsed character. See also 26 | -- 'satisfy'. 27 | -- 28 | -- > vowel = oneOf "aeiou" 29 | 30 | oneOf :: (Stream s m Char) => [Char] -> ParsecT s u m Char 31 | {-# INLINABLE oneOf #-} 32 | oneOf cs = satisfy (\c -> elem c cs) 33 | 34 | -- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current 35 | -- character /not/ in the supplied list of characters @cs@. Returns the 36 | -- parsed character. 37 | -- 38 | -- > consonant = noneOf "aeiou" 39 | 40 | noneOf :: (Stream s m Char) => [Char] -> ParsecT s u m Char 41 | {-# INLINABLE noneOf #-} 42 | noneOf cs = satisfy (\c -> not (elem c cs)) 43 | 44 | -- | Skips /zero/ or more white space characters. See also 'skipMany'. 45 | 46 | spaces :: (Stream s m Char) => ParsecT s u m () 47 | {-# INLINABLE spaces #-} 48 | spaces = skipMany space "white space" 49 | 50 | -- | Parses a white space character (any character which satisfies 'isSpace') 51 | -- Returns the parsed character. 52 | 53 | space :: (Stream s m Char) => ParsecT s u m Char 54 | {-# INLINABLE space #-} 55 | space = satisfy isSpace "space" 56 | 57 | -- | Parses a newline character (\'\\n\'). Returns a newline character. 58 | 59 | newline :: (Stream s m Char) => ParsecT s u m Char 60 | {-# INLINABLE newline #-} 61 | newline = char '\n' "lf new-line" 62 | 63 | -- | Parses a carriage return character (\'\\r\') followed by a newline character (\'\\n\'). 64 | -- Returns a newline character. 65 | 66 | crlf :: (Stream s m Char) => ParsecT s u m Char 67 | {-# INLINABLE crlf #-} 68 | crlf = char '\r' *> char '\n' "crlf new-line" 69 | 70 | -- | Parses a CRLF (see 'crlf') or LF (see 'newline') end-of-line. 71 | -- Returns a newline character (\'\\n\'). 72 | -- 73 | -- > endOfLine = newline <|> crlf 74 | -- 75 | 76 | endOfLine :: (Stream s m Char) => ParsecT s u m Char 77 | {-# INLINABLE endOfLine #-} 78 | endOfLine = newline <|> crlf "new-line" 79 | 80 | -- | Parses a tab character (\'\\t\'). Returns a tab character. 81 | 82 | tab :: (Stream s m Char) => ParsecT s u m Char 83 | {-# INLINABLE tab #-} 84 | tab = char '\t' "tab" 85 | 86 | -- | Parses an upper case letter (according to 'isUpper'). 87 | -- Returns the parsed character. 88 | 89 | upper :: (Stream s m Char) => ParsecT s u m Char 90 | {-# INLINABLE upper #-} 91 | upper = satisfy isUpper "uppercase letter" 92 | 93 | -- | Parses a lower case character (according to 'isLower'). 94 | -- Returns the parsed character. 95 | 96 | lower :: (Stream s m Char) => ParsecT s u m Char 97 | {-# INLINABLE lower #-} 98 | lower = satisfy isLower "lowercase letter" 99 | 100 | -- | Parses a alphabetic or numeric Unicode characters 101 | -- according to 'isAlphaNum'. Returns the parsed character. 102 | -- 103 | -- Note that numeric digits outside the ASCII range (such as arabic-indic digits like e.g. \"٤\" or @U+0664@), 104 | -- as well as numeric characters which aren't digits, are parsed by this function 105 | -- but not by 'digit'. 106 | 107 | alphaNum :: (Stream s m Char => ParsecT s u m Char) 108 | {-# INLINABLE alphaNum #-} 109 | alphaNum = satisfy isAlphaNum "letter or digit" 110 | 111 | -- | Parses an alphabetic Unicode characters (lower-case, upper-case and title-case letters, 112 | -- plus letters of caseless scripts and modifiers letters according to 'isAlpha'). 113 | -- Returns the parsed character. 114 | 115 | letter :: (Stream s m Char) => ParsecT s u m Char 116 | {-# INLINABLE letter #-} 117 | letter = satisfy isAlpha "letter" 118 | 119 | -- | Parses an ASCII digit. Returns the parsed character. 120 | 121 | digit :: (Stream s m Char) => ParsecT s u m Char 122 | {-# INLINABLE digit #-} 123 | digit = satisfy isDigit "digit" 124 | 125 | -- | Parses a hexadecimal digit (a digit or a letter between \'a\' and 126 | -- \'f\' or \'A\' and \'F\'). Returns the parsed character. 127 | 128 | hexDigit :: (Stream s m Char) => ParsecT s u m Char 129 | {-# INLINABLE hexDigit #-} 130 | hexDigit = satisfy isHexDigit "hexadecimal digit" 131 | 132 | -- | Parses an octal digit (a character between \'0\' and \'7\'). Returns 133 | -- the parsed character. 134 | 135 | octDigit :: (Stream s m Char) => ParsecT s u m Char 136 | {-# INLINABLE octDigit #-} 137 | octDigit = satisfy isOctDigit "octal digit" 138 | 139 | -- | @char c@ parses a single character @c@. Returns the parsed 140 | -- character (i.e. @c@). 141 | -- 142 | -- > semiColon = char ';' 143 | 144 | char :: (Stream s m Char) => Char -> ParsecT s u m Char 145 | {-# INLINABLE char #-} 146 | char c = satisfy (==c) show [c] 147 | 148 | -- | This parser succeeds for any character. Returns the parsed character. 149 | 150 | anyChar :: (Stream s m Char) => ParsecT s u m Char 151 | {-# INLINABLE anyChar #-} 152 | anyChar = satisfy (const True) 153 | 154 | -- | The parser @satisfy f@ succeeds for any character for which the 155 | -- supplied function @f@ returns 'True'. Returns the character that is 156 | -- actually parsed. 157 | 158 | -- > digit = satisfy isDigit 159 | -- > oneOf cs = satisfy (\c -> c `elem` cs) 160 | 161 | satisfy :: (Stream s m Char) => (Char -> Bool) -> ParsecT s u m Char 162 | {-# INLINABLE satisfy #-} 163 | satisfy f = tokenPrim (\c -> show [c]) 164 | (\pos c _cs -> updatePosChar pos c) 165 | (\c -> if f c then Just c else Nothing) 166 | 167 | -- | @'string' s@ parses a sequence of characters given by @s@. Returns 168 | -- the parsed string (i.e. @s@). 169 | -- 170 | -- > divOrMod = string "div" 171 | -- > <|> string "mod" 172 | -- 173 | -- Consider using 'string''. 174 | 175 | string :: (Stream s m Char) => String -> ParsecT s u m String 176 | {-# INLINABLE string #-} 177 | string s = tokens show updatePosString s 178 | 179 | -- | @'string'' s@ parses a sequence of characters given by @s@. 180 | -- Doesn't consume matching prefix. 181 | -- 182 | -- > carOrCdr = string' "car" 183 | -- > <|> string' "cdr" 184 | -- 185 | -- @since 3.1.16.0 186 | 187 | string' :: (Stream s m Char) => String -> ParsecT s u m String 188 | {-# INLINABLE string' #-} 189 | string' s = tokens' show updatePosString s 190 | -------------------------------------------------------------------------------- /src/Text/Parsec/Combinator.hs: -------------------------------------------------------------------------------- 1 | -- due to Debug.Trace 2 | {-# LANGUAGE Trustworthy #-} 3 | 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Text.Parsec.Combinator 7 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 8 | -- License : BSD-style (see the LICENSE file) 9 | -- 10 | -- Maintainer : derek.a.elkins@gmail.com 11 | -- Stability : provisional 12 | -- Portability : portable 13 | -- 14 | -- Commonly used generic combinators. 15 | -- 16 | -- See also the [parser-combinators](http://hackage.haskell.org/package/parser-combinators) 17 | -- package for additional (and generalised) combinators. 18 | -- 19 | ----------------------------------------------------------------------------- 20 | 21 | module Text.Parsec.Combinator 22 | ( choice 23 | , count 24 | , between 25 | , option, optionMaybe, optional 26 | , skipMany1 27 | , many1 28 | , sepBy, sepBy1 29 | , endBy, endBy1 30 | , sepEndBy, sepEndBy1 31 | , chainl, chainl1 32 | , chainr, chainr1 33 | , eof, notFollowedBy 34 | -- tricky combinators 35 | , manyTill, lookAhead, anyToken 36 | -- * Debugging 37 | -- 38 | -- | As a more comprehensive alternative for debugging Parsec parsers, 39 | -- there's also the [parsec-free](http://hackage.haskell.org/package/parsec-free) 40 | -- package. 41 | -- 42 | , parserTrace, parserTraced 43 | ) where 44 | 45 | import Control.Monad (mzero, liftM) 46 | import Debug.Trace (trace) 47 | 48 | import Text.Parsec.Prim 49 | 50 | -- | @choice ps@ tries to apply the parsers in the list @ps@ in order, 51 | -- until one of them succeeds. Returns the value of the succeeding 52 | -- parser. 53 | 54 | choice :: (Stream s m t) => [ParsecT s u m a] -> ParsecT s u m a 55 | {-# INLINABLE choice #-} 56 | choice ps = foldr (<|>) mzero ps 57 | 58 | -- | @option x p@ tries to apply parser @p@. If @p@ fails without 59 | -- consuming input, it returns the value @x@, otherwise the value 60 | -- returned by @p@. 61 | -- 62 | -- > priority = option 0 (do{ d <- digit 63 | -- > ; return (digitToInt d) 64 | -- > }) 65 | 66 | option :: (Stream s m t) => a -> ParsecT s u m a -> ParsecT s u m a 67 | {-# INLINABLE option #-} 68 | option x p = p <|> return x 69 | 70 | -- | @optionMaybe p@ tries to apply parser @p@. If @p@ fails without 71 | -- consuming input, it return 'Nothing', otherwise it returns 72 | -- 'Just' the value returned by @p@. 73 | 74 | optionMaybe :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m (Maybe a) 75 | {-# INLINABLE optionMaybe #-} 76 | optionMaybe p = option Nothing (liftM Just p) 77 | 78 | -- | @optional p@ tries to apply parser @p@. It will parse @p@ or nothing. 79 | -- It only fails if @p@ fails after consuming input. It discards the result 80 | -- of @p@. 81 | 82 | optional :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m () 83 | {-# INLINABLE optional #-} 84 | optional p = do{ _ <- p; return ()} <|> return () 85 | 86 | -- | @between open close p@ parses @open@, followed by @p@ and @close@. 87 | -- Returns the value returned by @p@. 88 | -- 89 | -- > braces = between (symbol "{") (symbol "}") 90 | 91 | between :: (Stream s m t) => ParsecT s u m open -> ParsecT s u m close 92 | -> ParsecT s u m a -> ParsecT s u m a 93 | {-# INLINABLE between #-} 94 | between open close p 95 | = do{ _ <- open; x <- p; _ <- close; return x } 96 | 97 | -- | @skipMany1 p@ applies the parser @p@ /one/ or more times, skipping 98 | -- its result. 99 | 100 | skipMany1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m () 101 | {-# INLINABLE skipMany1 #-} 102 | skipMany1 p = do{ _ <- p; skipMany p } 103 | {- 104 | skipMany p = scan 105 | where 106 | scan = do{ p; scan } <|> return () 107 | -} 108 | 109 | -- | @sepBy p sep@ parses /zero/ or more occurrences of @p@, separated 110 | -- by @sep@. Returns a list of values returned by @p@. 111 | -- 112 | -- > commaSep p = p `sepBy` (symbol ",") 113 | 114 | sepBy :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 115 | {-# INLINABLE sepBy #-} 116 | sepBy p sep = sepBy1 p sep <|> return [] 117 | 118 | -- | @sepBy1 p sep@ parses /one/ or more occurrences of @p@, separated 119 | -- by @sep@. Returns a list of values returned by @p@. 120 | 121 | sepBy1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 122 | {-# INLINABLE sepBy1 #-} 123 | sepBy1 p sep = do{ x <- p 124 | ; xs <- many (sep >> p) 125 | ; return (x:xs) 126 | } 127 | 128 | 129 | -- | @sepEndBy1 p sep@ parses /one/ or more occurrences of @p@, 130 | -- separated and optionally ended by @sep@. Returns a list of values 131 | -- returned by @p@. 132 | 133 | sepEndBy1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 134 | {-# INLINABLE sepEndBy1 #-} 135 | sepEndBy1 p sep = do{ x <- p 136 | ; do{ _ <- sep 137 | ; xs <- sepEndBy p sep 138 | ; return (x:xs) 139 | } 140 | <|> return [x] 141 | } 142 | 143 | -- | @sepEndBy p sep@ parses /zero/ or more occurrences of @p@, 144 | -- separated and optionally ended by @sep@, ie. haskell style 145 | -- statements. Returns a list of values returned by @p@. 146 | -- 147 | -- > haskellStatements = haskellStatement `sepEndBy` semi 148 | 149 | sepEndBy :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 150 | {-# INLINABLE sepEndBy #-} 151 | sepEndBy p sep = sepEndBy1 p sep <|> return [] 152 | 153 | 154 | -- | @endBy1 p sep@ parses /one/ or more occurrences of @p@, separated 155 | -- and ended by @sep@. Returns a list of values returned by @p@. 156 | 157 | endBy1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 158 | {-# INLINABLE endBy1 #-} 159 | endBy1 p sep = many1 (do{ x <- p; _ <- sep; return x }) 160 | 161 | -- | @endBy p sep@ parses /zero/ or more occurrences of @p@, separated 162 | -- and ended by @sep@. Returns a list of values returned by @p@. 163 | -- 164 | -- > cStatements = cStatement `endBy` semi 165 | 166 | endBy :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a] 167 | {-# INLINABLE endBy #-} 168 | endBy p sep = many (do{ x <- p; _ <- sep; return x }) 169 | 170 | -- | @count n p@ parses @n@ occurrences of @p@. If @n@ is smaller or 171 | -- equal to zero, the parser equals to @return []@. Returns a list of 172 | -- @n@ values returned by @p@. 173 | 174 | count :: (Stream s m t) => Int -> ParsecT s u m a -> ParsecT s u m [a] 175 | {-# INLINABLE count #-} 176 | count n p | n <= 0 = return [] 177 | | otherwise = sequence (replicate n p) 178 | 179 | -- | @chainr p op x@ parses /zero/ or more occurrences of @p@, 180 | -- separated by @op@ Returns a value obtained by a /right/ associative 181 | -- application of all functions returned by @op@ to the values returned 182 | -- by @p@. If there are no occurrences of @p@, the value @x@ is 183 | -- returned. 184 | 185 | chainr :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> a -> ParsecT s u m a 186 | {-# INLINABLE chainr #-} 187 | chainr p op x = chainr1 p op <|> return x 188 | 189 | -- | @chainl p op x@ parses /zero/ or more occurrences of @p@, 190 | -- separated by @op@. Returns a value obtained by a /left/ associative 191 | -- application of all functions returned by @op@ to the values returned 192 | -- by @p@. If there are zero occurrences of @p@, the value @x@ is 193 | -- returned. 194 | 195 | chainl :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> a -> ParsecT s u m a 196 | {-# INLINABLE chainl #-} 197 | chainl p op x = chainl1 p op <|> return x 198 | 199 | -- | @chainl1 p op@ parses /one/ or more occurrences of @p@, 200 | -- separated by @op@ Returns a value obtained by a /left/ associative 201 | -- application of all functions returned by @op@ to the values returned 202 | -- by @p@. This parser can for example be used to eliminate left 203 | -- recursion which typically occurs in expression grammars. 204 | -- 205 | -- > expr = term `chainl1` addop 206 | -- > term = factor `chainl1` mulop 207 | -- > factor = parens expr <|> integer 208 | -- > 209 | -- > mulop = do{ symbol "*"; return (*) } 210 | -- > <|> do{ symbol "/"; return (div) } 211 | -- > 212 | -- > addop = do{ symbol "+"; return (+) } 213 | -- > <|> do{ symbol "-"; return (-) } 214 | 215 | chainl1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a 216 | {-# INLINABLE chainl1 #-} 217 | chainl1 p op = do{ x <- p; rest x } 218 | where 219 | rest x = do{ f <- op 220 | ; y <- p 221 | ; rest (f x y) 222 | } 223 | <|> return x 224 | 225 | -- | @chainr1 p op x@ parses /one/ or more occurrences of |p|, 226 | -- separated by @op@ Returns a value obtained by a /right/ associative 227 | -- application of all functions returned by @op@ to the values returned 228 | -- by @p@. 229 | 230 | chainr1 :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a 231 | {-# INLINABLE chainr1 #-} 232 | chainr1 p op = scan 233 | where 234 | scan = do{ x <- p; rest x } 235 | 236 | rest x = do{ f <- op 237 | ; y <- scan 238 | ; return (f x y) 239 | } 240 | <|> return x 241 | 242 | ----------------------------------------------------------- 243 | -- Tricky combinators 244 | ----------------------------------------------------------- 245 | -- | The parser @anyToken@ accepts any kind of token. It is for example 246 | -- used to implement 'eof'. Returns the accepted token. 247 | 248 | anyToken :: (Stream s m t, Show t) => ParsecT s u m t 249 | {-# INLINABLE anyToken #-} 250 | anyToken = tokenPrim show (\pos _tok _toks -> pos) Just 251 | 252 | -- | This parser only succeeds at the end of the input. This is not a 253 | -- primitive parser but it is defined using 'notFollowedBy'. 254 | -- 255 | -- > eof = notFollowedBy anyToken "end of input" 256 | 257 | eof :: (Stream s m t, Show t) => ParsecT s u m () 258 | {-# INLINABLE eof #-} 259 | eof = notFollowedBy anyToken "end of input" 260 | 261 | -- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser 262 | -- does not consume any input. This parser can be used to implement the 263 | -- \'longest match\' rule. For example, when recognizing keywords (for 264 | -- example @let@), we want to make sure that a keyword is not followed 265 | -- by a legal identifier character, in which case the keyword is 266 | -- actually an identifier (for example @lets@). We can program this 267 | -- behaviour as follows: 268 | -- 269 | -- > keywordLet = try (do{ string "let" 270 | -- > ; notFollowedBy alphaNum 271 | -- > }) 272 | -- 273 | -- __NOTE__: Currently, 'notFollowedBy' exhibits surprising behaviour 274 | -- when applied to a parser @p@ that doesn't consume any input; 275 | -- specifically 276 | -- 277 | -- - @'notFollowedBy' . 'notFollowedBy'@ is /not/ equivalent to 'lookAhead', and 278 | -- 279 | -- - @'notFollowedBy' 'eof'@ /never/ fails. 280 | -- 281 | -- See [haskell/parsec#8](https://github.com/haskell/parsec/issues/8) 282 | -- for more details. 283 | 284 | notFollowedBy :: (Stream s m t, Show a) => ParsecT s u m a -> ParsecT s u m () 285 | {-# INLINABLE notFollowedBy #-} 286 | notFollowedBy p = try (do{ c <- try p; unexpected (show c) } 287 | <|> return () 288 | ) 289 | 290 | -- | @manyTill p end@ applies parser @p@ /zero/ or more times until 291 | -- parser @end@ succeeds. Returns the list of values returned by @p@. 292 | -- This parser can be used to scan comments: 293 | -- 294 | -- > simpleComment = do{ string "")) 296 | -- > } 297 | -- 298 | -- Note the overlapping parsers @anyChar@ and @string \"-->\"@, and 299 | -- therefore the use of the 'try' combinator. 300 | 301 | manyTill :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a] 302 | {-# INLINABLE manyTill #-} 303 | manyTill p end = scan 304 | where 305 | scan = do{ _ <- end; return [] } 306 | <|> 307 | do{ x <- p; xs <- scan; return (x:xs) } 308 | 309 | -- | @parserTrace label@ is an impure function, implemented with "Debug.Trace" that 310 | -- prints to the console the remaining parser state at the time it is invoked. 311 | -- It is intended to be used for debugging parsers by inspecting their intermediate states. 312 | -- 313 | -- > *> parseTest (oneOf "aeiou" >> parserTrace "label") "atest" 314 | -- > label: "test" 315 | -- > ... 316 | -- 317 | -- @since 3.1.12.0 318 | parserTrace :: (Show t, Stream s m t) => String -> ParsecT s u m () 319 | {-# INLINABLE parserTrace #-} 320 | parserTrace s = pt <|> return () 321 | where 322 | pt = try $ do 323 | x <- try $ many1 anyToken 324 | trace (s++": " ++ show x) $ try $ eof 325 | fail (show x) 326 | 327 | -- | @parserTraced label p@ is an impure function, implemented with "Debug.Trace" that 328 | -- prints to the console the remaining parser state at the time it is invoked. 329 | -- It then continues to apply parser @p@, and if @p@ fails will indicate that 330 | -- the label has been backtracked. 331 | -- It is intended to be used for debugging parsers by inspecting their intermediate states. 332 | -- 333 | -- > *> parseTest (oneOf "aeiou" >> parserTraced "label" (oneOf "nope")) "atest" 334 | -- > label: "test" 335 | -- > label backtracked 336 | -- > parse error at (line 1, column 2): 337 | -- > ... 338 | -- 339 | -- @since 3.1.12.0 340 | parserTraced :: (Stream s m t, Show t) => String -> ParsecT s u m b -> ParsecT s u m b 341 | {-# INLINABLE parserTraced #-} 342 | parserTraced s p = do 343 | parserTrace s 344 | p <|> trace (s ++ " backtracked") (fail s) 345 | -------------------------------------------------------------------------------- /src/Text/Parsec/Error.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Text.Parsec.Error 7 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 8 | -- License : BSD-style (see the LICENSE file) 9 | -- 10 | -- Maintainer : derek.a.elkins@gmail.com 11 | -- Stability : provisional 12 | -- Portability : portable 13 | -- 14 | -- Parse errors 15 | -- 16 | ----------------------------------------------------------------------------- 17 | 18 | module Text.Parsec.Error 19 | ( Message ( SysUnExpect, UnExpect, Expect, Message ) 20 | , messageString 21 | , ParseError, errorPos, errorMessages, errorIsUnknown 22 | , showErrorMessages 23 | , newErrorMessage, newErrorUnknown 24 | , addErrorMessage, setErrorPos, setErrorMessage 25 | , mergeError 26 | ) where 27 | 28 | import Control.Exception ( Exception ) 29 | import Data.List ( nub, sort ) 30 | import Data.Typeable ( Typeable ) 31 | import qualified Data.Monoid as Mon 32 | 33 | import Text.Parsec.Pos 34 | 35 | -- | This abstract data type represents parse error messages. There are 36 | -- four kinds of messages: 37 | -- 38 | -- > data Message = SysUnExpect String 39 | -- > | UnExpect String 40 | -- > | Expect String 41 | -- > | Message String 42 | -- 43 | -- The fine distinction between different kinds of parse errors allows 44 | -- the system to generate quite good error messages for the user. It 45 | -- also allows error messages that are formatted in different 46 | -- languages. Each kind of message is generated by different combinators: 47 | -- 48 | -- * A 'SysUnExpect' message is automatically generated by the 49 | -- 'Text.Parsec.Combinator.satisfy' combinator. The argument is the 50 | -- unexpected input. 51 | -- 52 | -- * A 'UnExpect' message is generated by the 'Text.Parsec.Prim.unexpected' 53 | -- combinator. The argument describes the 54 | -- unexpected item. 55 | -- 56 | -- * A 'Expect' message is generated by the 'Text.Parsec.Prim.' 57 | -- combinator. The argument describes the expected item. 58 | -- 59 | -- * A 'Message' message is generated by the 'fail' 60 | -- combinator. The argument is some general parser message. 61 | 62 | data Message = SysUnExpect !String -- @ library generated unexpect 63 | | UnExpect !String -- @ unexpected something 64 | | Expect !String -- @ expecting something 65 | | Message !String -- @ raw message 66 | deriving ( Typeable ) 67 | 68 | instance Enum Message where 69 | fromEnum (SysUnExpect _) = 0 70 | fromEnum (UnExpect _) = 1 71 | fromEnum (Expect _) = 2 72 | fromEnum (Message _) = 3 73 | toEnum _ = error "toEnum is undefined for Message" 74 | 75 | -- < Return 'True' only when 'compare' would return 'EQ'. 76 | 77 | instance Eq Message where 78 | 79 | m1 == m2 = fromEnum m1 == fromEnum m2 80 | 81 | -- < Compares two error messages without looking at their content. Only 82 | -- the constructors are compared where: 83 | -- 84 | -- > 'SysUnExpect' < 'UnExpect' < 'Expect' < 'Message' 85 | 86 | instance Ord Message where 87 | compare msg1 msg2 = compare (fromEnum msg1) (fromEnum msg2) 88 | 89 | -- | Extract the message string from an error message 90 | 91 | messageString :: Message -> String 92 | messageString (SysUnExpect s) = s 93 | messageString (UnExpect s) = s 94 | messageString (Expect s) = s 95 | messageString (Message s) = s 96 | 97 | -- | The abstract data type @ParseError@ represents parse errors. It 98 | -- provides the source position ('SourcePos') of the error 99 | -- and a list of error messages ('Message'). A @ParseError@ 100 | -- can be returned by the function 'Text.Parsec.Prim.parse'. @ParseError@ is an 101 | -- instance of the 'Show' and 'Eq' classes. 102 | 103 | data ParseError = ParseError !SourcePos [Message] 104 | deriving ( Typeable ) 105 | 106 | -- | Extracts the source position from the parse error 107 | 108 | errorPos :: ParseError -> SourcePos 109 | errorPos (ParseError pos _msgs) 110 | = pos 111 | 112 | -- | Extracts the list of error messages from the parse error 113 | 114 | errorMessages :: ParseError -> [Message] 115 | errorMessages (ParseError _pos msgs) 116 | = sort msgs 117 | 118 | errorIsUnknown :: ParseError -> Bool 119 | errorIsUnknown (ParseError _pos msgs) 120 | = null msgs 121 | 122 | -- < Create parse errors 123 | 124 | newErrorUnknown :: SourcePos -> ParseError 125 | newErrorUnknown pos 126 | = ParseError pos [] 127 | 128 | newErrorMessage :: Message -> SourcePos -> ParseError 129 | newErrorMessage msg pos 130 | = ParseError pos [msg] 131 | 132 | addErrorMessage :: Message -> ParseError -> ParseError 133 | addErrorMessage msg (ParseError pos msgs) 134 | = ParseError pos (msg:msgs) 135 | 136 | setErrorPos :: SourcePos -> ParseError -> ParseError 137 | setErrorPos pos (ParseError _ msgs) 138 | = ParseError pos msgs 139 | 140 | setErrorMessage :: Message -> ParseError -> ParseError 141 | setErrorMessage msg (ParseError pos msgs) 142 | = ParseError pos (msg : filter (msg /=) msgs) 143 | 144 | mergeError :: ParseError -> ParseError -> ParseError 145 | mergeError e1@(ParseError pos1 msgs1) e2@(ParseError pos2 msgs2) 146 | -- prefer meaningful errors 147 | | null msgs2 && not (null msgs1) = e1 148 | | null msgs1 && not (null msgs2) = e2 149 | | otherwise 150 | -- perfectly we'd compare the consumed token count 151 | -- https://github.com/haskell/parsec/issues/175 152 | = case compareErrorPos pos1 pos2 of 153 | -- select the longest match 154 | EQ -> ParseError pos1 (msgs1 ++ msgs2) 155 | GT -> e1 156 | LT -> e2 157 | 158 | compareErrorPos :: SourcePos -> SourcePos -> Ordering 159 | compareErrorPos x y = Mon.mappend (compare (sourceLine x) (sourceLine y)) (compare (sourceColumn x) (sourceColumn y)) 160 | 161 | instance Show ParseError where 162 | show err 163 | = show (errorPos err) ++ ":" ++ 164 | showErrorMessages "or" "unknown parse error" 165 | "expecting" "unexpected" "end of input" 166 | (errorMessages err) 167 | 168 | instance Eq ParseError where 169 | l == r 170 | = errorPos l == errorPos r && messageStrs l == messageStrs r 171 | where 172 | messageStrs = map messageString . errorMessages 173 | 174 | -- | @since 3.1.17.0 175 | instance Exception ParseError 176 | 177 | -- Language independent show function 178 | 179 | -- TODO 180 | -- < The standard function for showing error messages. Formats a list of 181 | -- error messages in English. This function is used in the |Show| 182 | -- instance of |ParseError <#ParseError>|. The resulting string will be 183 | -- formatted like: 184 | -- 185 | -- |unexpected /{The first UnExpect or a SysUnExpect message}/; 186 | -- expecting /{comma separated list of Expect messages}/; 187 | -- /{comma separated list of Message messages}/ 188 | 189 | showErrorMessages :: 190 | String -> String -> String -> String -> String -> [Message] -> String 191 | showErrorMessages msgOr msgUnknown msgExpecting msgUnExpected msgEndOfInput msgs 192 | | null msgs = msgUnknown 193 | | otherwise = concat $ map ("\n"++) $ clean $ 194 | [showSysUnExpect,showUnExpect,showExpect,showMessages] 195 | where 196 | (sysUnExpect,msgs1) = span ((SysUnExpect "") ==) msgs 197 | (unExpect,msgs2) = span ((UnExpect "") ==) msgs1 198 | (expect,messages) = span ((Expect "") ==) msgs2 199 | 200 | showExpect = showMany msgExpecting expect 201 | showUnExpect = showMany msgUnExpected unExpect 202 | showSysUnExpect 203 | | not (null unExpect) = "" 204 | | [] <- sysUnExpect = "" 205 | | msg : _ <- sysUnExpect 206 | , null (messageString msg) = msgUnExpected ++ " " ++ msgEndOfInput 207 | | msg : _ <- sysUnExpect = msgUnExpected ++ " " ++ messageString msg 208 | 209 | showMessages = showMany "" messages 210 | 211 | -- helpers 212 | showMany pre msgs3 = case clean (map messageString msgs3) of 213 | [] -> "" 214 | ms | null pre -> commasOr ms 215 | | otherwise -> pre ++ " " ++ commasOr ms 216 | 217 | commasOr [] = "" 218 | commasOr [m] = m 219 | commasOr ms = commaSep (init ms) ++ " " ++ msgOr ++ " " ++ last ms 220 | 221 | commaSep = separate ", " . clean 222 | 223 | separate _ [] = "" 224 | separate _ [m] = m 225 | separate sep (m:ms) = m ++ sep ++ separate sep ms 226 | 227 | clean = nub . filter (not . null) 228 | -------------------------------------------------------------------------------- /src/Text/Parsec/Expr.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Text.Parsec.Expr 7 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 8 | -- License : BSD-style (see the LICENSE file) 9 | -- 10 | -- Maintainer : derek.a.elkins@gmail.com 11 | -- Stability : provisional 12 | -- Portability : non-portable 13 | -- 14 | -- A helper module to parse \"expressions\". 15 | -- Builds a parser given a table of operators and associativities. 16 | -- 17 | ----------------------------------------------------------------------------- 18 | 19 | module Text.Parsec.Expr 20 | ( Assoc(..), Operator(..), OperatorTable 21 | , buildExpressionParser 22 | ) where 23 | 24 | import Data.Typeable ( Typeable ) 25 | 26 | import Text.Parsec.Prim 27 | import Text.Parsec.Combinator 28 | 29 | ----------------------------------------------------------- 30 | -- Assoc and OperatorTable 31 | ----------------------------------------------------------- 32 | 33 | -- | This data type specifies the associativity of operators: left, right 34 | -- or none. 35 | 36 | data Assoc = AssocNone 37 | | AssocLeft 38 | | AssocRight 39 | deriving ( Typeable ) 40 | 41 | -- | This data type specifies operators that work on values of type @a@. 42 | -- An operator is either binary infix or unary prefix or postfix. A 43 | -- binary operator has also an associated associativity. 44 | 45 | data Operator s u m a = Infix (ParsecT s u m (a -> a -> a)) Assoc 46 | | Prefix (ParsecT s u m (a -> a)) 47 | | Postfix (ParsecT s u m (a -> a)) 48 | deriving ( Typeable ) 49 | 50 | -- | An @OperatorTable s u m a@ is a list of @Operator s u m a@ 51 | -- lists. The list is ordered in descending 52 | -- precedence. All operators in one list have the same precedence (but 53 | -- may have a different associativity). 54 | 55 | type OperatorTable s u m a = [[Operator s u m a]] 56 | 57 | ----------------------------------------------------------- 58 | -- Convert an OperatorTable and basic term parser into 59 | -- a full fledged expression parser 60 | ----------------------------------------------------------- 61 | 62 | -- | @buildExpressionParser table term@ builds an expression parser for 63 | -- terms @term@ with operators from @table@, taking the associativity 64 | -- and precedence specified in @table@ into account. Prefix and postfix 65 | -- operators of the same precedence can only occur once (i.e. @--2@ is 66 | -- not allowed if @-@ is prefix negate). Prefix and postfix operators 67 | -- of the same precedence associate to the left (i.e. if @++@ is 68 | -- postfix increment, than @-2++@ equals @-1@, not @-3@). 69 | -- 70 | -- The @buildExpressionParser@ takes care of all the complexity 71 | -- involved in building expression parser. Here is an example of an 72 | -- expression parser that handles prefix signs, postfix increment and 73 | -- basic arithmetic. 74 | -- 75 | -- > expr = buildExpressionParser table term 76 | -- > "expression" 77 | -- > 78 | -- > term = parens expr 79 | -- > <|> natural 80 | -- > "simple expression" 81 | -- > 82 | -- > table = [ [prefix "-" negate, prefix "+" id ] 83 | -- > , [postfix "++" (+1)] 84 | -- > , [binary "*" (*) AssocLeft, binary "/" (div) AssocLeft ] 85 | -- > , [binary "+" (+) AssocLeft, binary "-" (-) AssocLeft ] 86 | -- > ] 87 | -- > 88 | -- > binary name fun assoc = Infix (do{ reservedOp name; return fun }) assoc 89 | -- > prefix name fun = Prefix (do{ reservedOp name; return fun }) 90 | -- > postfix name fun = Postfix (do{ reservedOp name; return fun }) 91 | 92 | buildExpressionParser :: (Stream s m t) 93 | => OperatorTable s u m a 94 | -> ParsecT s u m a 95 | -> ParsecT s u m a 96 | {-# INLINABLE buildExpressionParser #-} 97 | buildExpressionParser operators simpleExpr 98 | = foldl (makeParser) simpleExpr operators 99 | where 100 | makeParser term ops 101 | = let (rassoc,lassoc,nassoc 102 | ,prefix,postfix) = foldr splitOp ([],[],[],[],[]) ops 103 | 104 | rassocOp = choice rassoc 105 | lassocOp = choice lassoc 106 | nassocOp = choice nassoc 107 | prefixOp = choice prefix "" 108 | postfixOp = choice postfix "" 109 | 110 | ambiguous assoc op= try $ 111 | do{ _ <- op; fail ("ambiguous use of a " ++ assoc 112 | ++ " associative operator") 113 | } 114 | 115 | ambiguousRight = ambiguous "right" rassocOp 116 | ambiguousLeft = ambiguous "left" lassocOp 117 | ambiguousNon = ambiguous "non" nassocOp 118 | 119 | termP = do{ pre <- prefixP 120 | ; x <- term 121 | ; post <- postfixP 122 | ; return (post (pre x)) 123 | } 124 | 125 | postfixP = postfixOp <|> return id 126 | 127 | prefixP = prefixOp <|> return id 128 | 129 | rassocP x = do{ f <- rassocOp 130 | ; y <- do{ z <- termP; rassocP1 z } 131 | ; return (f x y) 132 | } 133 | <|> ambiguousLeft 134 | <|> ambiguousNon 135 | -- <|> return x 136 | 137 | rassocP1 x = rassocP x <|> return x 138 | 139 | lassocP x = do{ f <- lassocOp 140 | ; y <- termP 141 | ; lassocP1 (f x y) 142 | } 143 | <|> ambiguousRight 144 | <|> ambiguousNon 145 | -- <|> return x 146 | 147 | lassocP1 x = lassocP x <|> return x 148 | 149 | nassocP x = do{ f <- nassocOp 150 | ; y <- termP 151 | ; ambiguousRight 152 | <|> ambiguousLeft 153 | <|> ambiguousNon 154 | <|> return (f x y) 155 | } 156 | -- <|> return x 157 | 158 | in do{ x <- termP 159 | ; rassocP x <|> lassocP x <|> nassocP x <|> return x 160 | "operator" 161 | } 162 | 163 | 164 | splitOp (Infix op assoc) (rassoc,lassoc,nassoc,prefix,postfix) 165 | = case assoc of 166 | AssocNone -> (rassoc,lassoc,op:nassoc,prefix,postfix) 167 | AssocLeft -> (rassoc,op:lassoc,nassoc,prefix,postfix) 168 | AssocRight -> (op:rassoc,lassoc,nassoc,prefix,postfix) 169 | 170 | splitOp (Prefix op) (rassoc,lassoc,nassoc,prefix,postfix) 171 | = (rassoc,lassoc,nassoc,op:prefix,postfix) 172 | 173 | splitOp (Postfix op) (rassoc,lassoc,nassoc,prefix,postfix) 174 | = (rassoc,lassoc,nassoc,prefix,op:postfix) 175 | -------------------------------------------------------------------------------- /src/Text/Parsec/Language.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.Language 6 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : non-portable (uses non-portable module Text.Parsec.Token) 12 | -- 13 | -- A helper module that defines some language definitions that can be used 14 | -- to instantiate a token parser (see "Text.Parsec.Token"). 15 | -- 16 | ----------------------------------------------------------------------------- 17 | 18 | module Text.Parsec.Language 19 | ( haskellDef, haskell 20 | , mondrianDef, mondrian 21 | , emptyDef 22 | , haskellStyle 23 | , javaStyle 24 | , LanguageDef 25 | , GenLanguageDef 26 | ) where 27 | 28 | import Text.Parsec 29 | import Text.Parsec.Token 30 | 31 | ----------------------------------------------------------- 32 | -- Styles: haskellStyle, javaStyle 33 | ----------------------------------------------------------- 34 | 35 | -- | This is a minimal token definition for Haskell style languages. It 36 | -- defines the style of comments, valid identifiers and case 37 | -- sensitivity. It does not define any reserved words or operators. 38 | 39 | haskellStyle :: LanguageDef st 40 | haskellStyle = emptyDef 41 | { commentStart = "{-" 42 | , commentEnd = "-}" 43 | , commentLine = "--" 44 | , nestedComments = True 45 | , identStart = letter 46 | , identLetter = alphaNum <|> oneOf "_'" 47 | , opStart = opLetter haskellStyle 48 | , opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" 49 | , reservedOpNames= [] 50 | , reservedNames = [] 51 | , caseSensitive = True 52 | } 53 | 54 | -- | This is a minimal token definition for Java style languages. It 55 | -- defines the style of comments, valid identifiers and case 56 | -- sensitivity. It does not define any reserved words or operators. 57 | 58 | javaStyle :: LanguageDef st 59 | javaStyle = emptyDef 60 | { commentStart = "/*" 61 | , commentEnd = "*/" 62 | , commentLine = "//" 63 | , nestedComments = True 64 | , identStart = letter 65 | , identLetter = alphaNum <|> oneOf "_'" 66 | , reservedNames = [] 67 | , reservedOpNames= [] 68 | , caseSensitive = False 69 | } 70 | 71 | ----------------------------------------------------------- 72 | -- minimal language definition 73 | -------------------------------------------------------- 74 | 75 | -- | This is the most minimal token definition. It is recommended to use 76 | -- this definition as the basis for other definitions. @emptyDef@ has 77 | -- no reserved names or operators, is case sensitive and doesn't accept 78 | -- comments, identifiers or operators. 79 | 80 | emptyDef :: LanguageDef st 81 | emptyDef = LanguageDef 82 | { commentStart = "" 83 | , commentEnd = "" 84 | , commentLine = "" 85 | , nestedComments = True 86 | , identStart = letter <|> char '_' 87 | , identLetter = alphaNum <|> oneOf "_'" 88 | , opStart = opLetter emptyDef 89 | , opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" 90 | , reservedOpNames= [] 91 | , reservedNames = [] 92 | , caseSensitive = True 93 | } 94 | 95 | 96 | 97 | ----------------------------------------------------------- 98 | -- Haskell 99 | ----------------------------------------------------------- 100 | 101 | -- | A lexer for the Haskell language. 102 | 103 | haskell :: TokenParser st 104 | haskell = makeTokenParser haskellDef 105 | 106 | -- | The language definition for the Haskell language. 107 | 108 | haskellDef :: LanguageDef st 109 | haskellDef = haskell98Def 110 | { identLetter = identLetter haskell98Def <|> char '#' 111 | , reservedNames = reservedNames haskell98Def ++ 112 | ["foreign","import","export","primitive" 113 | ,"_ccall_","_casm_" 114 | ,"forall" 115 | ] 116 | } 117 | 118 | -- | The language definition for the language Haskell98. 119 | 120 | haskell98Def :: LanguageDef st 121 | haskell98Def = haskellStyle 122 | { reservedOpNames= ["::","..","=","\\","|","<-","->","@","~","=>"] 123 | , reservedNames = ["let","in","case","of","if","then","else", 124 | "data","type", 125 | "class","default","deriving","do","import", 126 | "infix","infixl","infixr","instance","module", 127 | "newtype","where", 128 | "primitive" 129 | -- "as","qualified","hiding" 130 | ] 131 | } 132 | 133 | 134 | ----------------------------------------------------------- 135 | -- Mondrian 136 | ----------------------------------------------------------- 137 | 138 | -- | A lexer for the Mondrian language. 139 | 140 | mondrian :: TokenParser st 141 | mondrian = makeTokenParser mondrianDef 142 | 143 | -- | The language definition for the language Mondrian. 144 | 145 | mondrianDef :: LanguageDef st 146 | mondrianDef = javaStyle 147 | { reservedNames = [ "case", "class", "default", "extends" 148 | , "import", "in", "let", "new", "of", "package" 149 | ] 150 | , caseSensitive = True 151 | } 152 | -------------------------------------------------------------------------------- /src/Text/Parsec/Perm.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE ExistentialQuantification #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE StandaloneDeriving #-} 5 | {-# LANGUAGE Safe #-} 6 | 7 | ----------------------------------------------------------------------------- 8 | -- | 9 | -- Module : Text.Parsec.Perm 10 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 11 | -- License : BSD-style (see the file libraries/parsec/LICENSE) 12 | -- 13 | -- Maintainer : derek.a.elkins@gmail.com 14 | -- Stability : provisional 15 | -- Portability : non-portable (uses existentially quantified data constructors) 16 | -- 17 | -- This module implements permutation parsers. The algorithm used 18 | -- is fairly complex since we push the type system to its limits :-) 19 | -- The algorithm is described in: 20 | -- 21 | -- /Parsing Permutation Phrases,/ 22 | -- by Arthur Baars, Andres Loh and Doaitse Swierstra. 23 | -- Published as a functional pearl at the Haskell Workshop 2001. 24 | -- 25 | -- From the abstract: 26 | -- 27 | -- A permutation phrase is a sequence of elements (possibly of different types) 28 | -- in which each element occurs exactly once and the order is irrelevant. 29 | -- Some of the permutable elements may be optional. 30 | -- 31 | ----------------------------------------------------------------------------- 32 | 33 | 34 | module Text.Parsec.Perm 35 | ( PermParser 36 | , StreamPermParser -- abstract 37 | 38 | , permute 39 | , (<||>), (<$$>) 40 | , (<|?>), (<$?>) 41 | ) where 42 | 43 | import Control.Monad.Identity ( Identity ) 44 | import Data.Typeable ( Typeable ) 45 | import Text.Parsec 46 | 47 | infixl 1 <||>, <|?> 48 | infixl 2 <$$>, <$?> 49 | 50 | 51 | {--------------------------------------------------------------- 52 | test -- parse a permutation of 53 | * an optional string of 'a's 54 | * a required 'b' 55 | * an optional 'c' 56 | ---------------------------------------------------------------} 57 | {- 58 | test input 59 | = parse (do{ x <- ptest; eof; return x }) "" input 60 | 61 | ptest :: Parser (String,Char,Char) 62 | ptest 63 | = permute $ 64 | (,,) <$?> ("",many1 (char 'a')) 65 | <||> char 'b' 66 | <|?> ('_',char 'c') 67 | -} 68 | 69 | {--------------------------------------------------------------- 70 | Building a permutation parser 71 | ---------------------------------------------------------------} 72 | 73 | -- | The expression @perm \<||> p@ adds parser @p@ to the permutation 74 | -- parser @perm@. The parser @p@ is not allowed to accept empty input - 75 | -- use the optional combinator ('<|?>') instead. Returns a 76 | -- new permutation parser that includes @p@. 77 | 78 | (<||>) :: (Stream s Identity tok) => StreamPermParser s st (a -> b) -> Parsec s st a -> StreamPermParser s st b 79 | (<||>) perm p = add perm p 80 | 81 | -- | The expression @f \<$$> p@ creates a fresh permutation parser 82 | -- consisting of parser @p@. The the final result of the permutation 83 | -- parser is the function @f@ applied to the return value of @p@. The 84 | -- parser @p@ is not allowed to accept empty input - use the optional 85 | -- combinator ('<$?>') instead. 86 | -- 87 | -- If the function @f@ takes more than one parameter, the type variable 88 | -- @b@ is instantiated to a functional type which combines nicely with 89 | -- the adds parser @p@ to the ('<||>') combinator. This 90 | -- results in stylized code where a permutation parser starts with a 91 | -- combining function @f@ followed by the parsers. The function @f@ 92 | -- gets its parameters in the order in which the parsers are specified, 93 | -- but actual input can be in any order. 94 | 95 | (<$$>) :: (Stream s Identity tok) => (a -> b) -> Parsec s st a -> StreamPermParser s st b 96 | (<$$>) f p = newperm f <||> p 97 | 98 | -- | The expression @perm \<||> (x,p)@ adds parser @p@ to the 99 | -- permutation parser @perm@. The parser @p@ is optional - if it can 100 | -- not be applied, the default value @x@ will be used instead. Returns 101 | -- a new permutation parser that includes the optional parser @p@. 102 | 103 | (<|?>) :: (Stream s Identity tok) => StreamPermParser s st (a -> b) -> (a, Parsec s st a) -> StreamPermParser s st b 104 | (<|?>) perm (x,p) = addopt perm x p 105 | 106 | -- | The expression @f \<$?> (x,p)@ creates a fresh permutation parser 107 | -- consisting of parser @p@. The the final result of the permutation 108 | -- parser is the function @f@ applied to the return value of @p@. The 109 | -- parser @p@ is optional - if it can not be applied, the default value 110 | -- @x@ will be used instead. 111 | 112 | (<$?>) :: (Stream s Identity tok) => (a -> b) -> (a, Parsec s st a) -> StreamPermParser s st b 113 | (<$?>) f (x,p) = newperm f <|?> (x,p) 114 | 115 | {--------------------------------------------------------------- 116 | The permutation tree 117 | ---------------------------------------------------------------} 118 | 119 | -- | Provided for backwards compatibility. The tok type is ignored. 120 | 121 | type PermParser tok st a = StreamPermParser String st a 122 | 123 | -- | The type @StreamPermParser s st a@ denotes a permutation parser that, 124 | -- when converted by the 'permute' function, parses 125 | -- @s@ streams with user state @st@ and returns a value of 126 | -- type @a@ on success. 127 | -- 128 | -- Normally, a permutation parser is first build with special operators 129 | -- like ('<||>') and than transformed into a normal parser 130 | -- using 'permute'. 131 | 132 | data StreamPermParser s st a = Perm (Maybe a) [StreamBranch s st a] 133 | deriving ( Typeable ) 134 | 135 | -- type Branch st a = StreamBranch String st a 136 | 137 | data StreamBranch s st a = forall b. Branch (StreamPermParser s st (b -> a)) (Parsec s st b) 138 | deriving ( Typeable ) 139 | 140 | -- | The parser @permute perm@ parses a permutation of parser described 141 | -- by @perm@. For example, suppose we want to parse a permutation of: 142 | -- an optional string of @a@'s, the character @b@ and an optional @c@. 143 | -- This can be described by: 144 | -- 145 | -- > test = permute (tuple <$?> ("",many1 (char 'a')) 146 | -- > <||> char 'b' 147 | -- > <|?> ('_',char 'c')) 148 | -- > where 149 | -- > tuple a b c = (a,b,c) 150 | 151 | -- transform a permutation tree into a normal parser 152 | permute :: (Stream s Identity tok) => StreamPermParser s st a -> Parsec s st a 153 | permute (Perm def xs) 154 | = choice (map branch xs ++ empty) 155 | where 156 | empty 157 | = case def of 158 | Nothing -> [] 159 | Just x -> [return x] 160 | 161 | branch (Branch perm p) 162 | = do{ x <- p 163 | ; f <- permute perm 164 | ; return (f x) 165 | } 166 | 167 | -- build permutation trees 168 | newperm :: (Stream s Identity tok) => (a -> b) -> StreamPermParser s st (a -> b) 169 | newperm f 170 | = Perm (Just f) [] 171 | 172 | add :: (Stream s Identity tok) => StreamPermParser s st (a -> b) -> Parsec s st a -> StreamPermParser s st b 173 | add perm@(Perm _mf fs) p 174 | = Perm Nothing (first:map insert fs) 175 | where 176 | first = Branch perm p 177 | insert (Branch perm' p') 178 | = Branch (add (mapPerms flip perm') p) p' 179 | 180 | addopt :: (Stream s Identity tok) => StreamPermParser s st (a -> b) -> a -> Parsec s st a -> StreamPermParser s st b 181 | addopt perm@(Perm mf fs) x p 182 | = Perm (fmap ($ x) mf) (first:map insert fs) 183 | where 184 | first = Branch perm p 185 | insert (Branch perm' p') 186 | = Branch (addopt (mapPerms flip perm') x p) p' 187 | 188 | 189 | mapPerms :: (Stream s Identity tok) => (a -> b) -> StreamPermParser s st a -> StreamPermParser s st b 190 | mapPerms f (Perm x xs) 191 | = Perm (fmap f x) (map mapBranch xs) 192 | where 193 | mapBranch (Branch perm p) 194 | = Branch (mapPerms (f.) perm) p 195 | -------------------------------------------------------------------------------- /src/Text/Parsec/Pos.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Text.Parsec.Pos 7 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 8 | -- License : BSD-style (see the LICENSE file) 9 | -- 10 | -- Maintainer : derek.a.elkins@gmail.com 11 | -- Stability : provisional 12 | -- Portability : portable 13 | -- 14 | -- Textual source positions. 15 | -- 16 | ----------------------------------------------------------------------------- 17 | 18 | module Text.Parsec.Pos 19 | ( SourceName, Line, Column 20 | , SourcePos 21 | , sourceLine, sourceColumn, sourceName 22 | , incSourceLine, incSourceColumn 23 | , setSourceLine, setSourceColumn, setSourceName 24 | , newPos, initialPos 25 | , updatePosChar, updatePosString 26 | ) where 27 | 28 | import Data.Data (Data) 29 | import Data.Typeable (Typeable) 30 | 31 | -- < Source positions: a file name, a line and a column 32 | -- upper left is (1,1) 33 | 34 | type SourceName = String 35 | type Line = Int 36 | type Column = Int 37 | 38 | -- | The abstract data type @SourcePos@ represents source positions. It 39 | -- contains the name of the source (i.e. file name), a line number and 40 | -- a column number. @SourcePos@ is an instance of the 'Show', 'Eq' and 41 | -- 'Ord' class. 42 | 43 | data SourcePos = SourcePos SourceName !Line !Column 44 | deriving ( Eq, Ord, Data, Typeable) 45 | 46 | -- | Create a new 'SourcePos' with the given source name, 47 | -- line number and column number. 48 | 49 | newPos :: SourceName -> Line -> Column -> SourcePos 50 | newPos name line column 51 | = SourcePos name line column 52 | 53 | -- | Create a new 'SourcePos' with the given source name, 54 | -- and line number and column number set to 1, the upper left. 55 | 56 | initialPos :: SourceName -> SourcePos 57 | initialPos name 58 | = newPos name 1 1 59 | 60 | -- | Extracts the name of the source from a source position. 61 | 62 | sourceName :: SourcePos -> SourceName 63 | sourceName (SourcePos name _line _column) = name 64 | 65 | -- | Extracts the line number from a source position. 66 | 67 | sourceLine :: SourcePos -> Line 68 | sourceLine (SourcePos _name line _column) = line 69 | 70 | -- | Extracts the column number from a source position. 71 | 72 | sourceColumn :: SourcePos -> Column 73 | sourceColumn (SourcePos _name _line column) = column 74 | 75 | -- | Increments the line number of a source position. 76 | 77 | incSourceLine :: SourcePos -> Line -> SourcePos 78 | incSourceLine (SourcePos name line column) n = SourcePos name (line+n) column 79 | 80 | -- | Increments the column number of a source position. 81 | 82 | incSourceColumn :: SourcePos -> Column -> SourcePos 83 | incSourceColumn (SourcePos name line column) n = SourcePos name line (column+n) 84 | 85 | -- | Set the name of the source. 86 | 87 | setSourceName :: SourcePos -> SourceName -> SourcePos 88 | setSourceName (SourcePos _name line column) n = SourcePos n line column 89 | 90 | -- | Set the line number of a source position. 91 | 92 | setSourceLine :: SourcePos -> Line -> SourcePos 93 | setSourceLine (SourcePos name _line column) n = SourcePos name n column 94 | 95 | -- | Set the column number of a source position. 96 | 97 | setSourceColumn :: SourcePos -> Column -> SourcePos 98 | setSourceColumn (SourcePos name line _column) n = SourcePos name line n 99 | 100 | -- | The expression @updatePosString pos s@ updates the source position 101 | -- @pos@ by calling 'updatePosChar' on every character in @s@, ie. 102 | -- @foldl updatePosChar pos string@. 103 | 104 | updatePosString :: SourcePos -> String -> SourcePos 105 | updatePosString pos string 106 | = foldl updatePosChar pos string 107 | 108 | -- | Update a source position given a character. If the character is a 109 | -- newline (\'\\n\') or carriage return (\'\\r\') the line number is 110 | -- incremented by 1. If the character is a tab (\'\t\') the column 111 | -- number is incremented to the nearest 8'th column, ie. @column + 8 - 112 | -- ((column-1) \`mod\` 8)@. In all other cases, the column is 113 | -- incremented by 1. 114 | 115 | updatePosChar :: SourcePos -> Char -> SourcePos 116 | updatePosChar (SourcePos name line column) c 117 | = case c of 118 | '\n' -> SourcePos name (line+1) 1 119 | '\t' -> SourcePos name line (column + 8 - ((column-1) `mod` 8)) 120 | _ -> SourcePos name line (column + 1) 121 | 122 | instance Show SourcePos where 123 | show (SourcePos name line column) 124 | | null name = showLineColumn 125 | | otherwise = "\"" ++ name ++ "\" " ++ showLineColumn 126 | where 127 | showLineColumn = "(line " ++ show line ++ 128 | ", column " ++ show column ++ 129 | ")" 130 | -------------------------------------------------------------------------------- /src/Text/Parsec/Prim.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | {-# LANGUAGE ExistentialQuantification #-} 4 | {-# LANGUAGE FlexibleContexts #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | {-# LANGUAGE FunctionalDependencies #-} 7 | {-# LANGUAGE MultiParamTypeClasses #-} 8 | {-# LANGUAGE PolymorphicComponents #-} 9 | {-# LANGUAGE Safe #-} 10 | {-# LANGUAGE StandaloneDeriving #-} 11 | {-# LANGUAGE UndecidableInstances #-} 12 | 13 | {-# OPTIONS_GHC -fno-warn-name-shadowing #-} 14 | 15 | ----------------------------------------------------------------------------- 16 | -- | 17 | -- Module : Text.Parsec.Prim 18 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 19 | -- License : BSD-style (see the LICENSE file) 20 | -- 21 | -- Maintainer : derek.a.elkins@gmail.com 22 | -- Stability : provisional 23 | -- Portability : portable 24 | -- 25 | -- The primitive parser combinators. 26 | -- 27 | ----------------------------------------------------------------------------- 28 | 29 | {-# OPTIONS_HADDOCK not-home #-} 30 | 31 | module Text.Parsec.Prim 32 | ( unknownError 33 | , sysUnExpectError 34 | , unexpected 35 | , ParsecT 36 | , runParsecT 37 | , mkPT 38 | , Parsec 39 | , Consumed(..) 40 | , Reply(..) 41 | , State(..) 42 | , parsecMap 43 | , parserReturn 44 | , parserBind 45 | , mergeErrorReply 46 | , parserFail 47 | , parserZero 48 | , parserPlus 49 | , () 50 | , (<|>) 51 | , label 52 | , labels 53 | , lookAhead 54 | , Stream(..) 55 | , tokens 56 | , tokens' 57 | , try 58 | , token 59 | , tokenPrim 60 | , tokenPrimEx 61 | , many 62 | , skipMany 63 | , manyAccum 64 | , many1 65 | , runPT 66 | , runP 67 | , runParserT 68 | , runParser 69 | , parse 70 | , parseTest 71 | , getPosition 72 | , getInput 73 | , setPosition 74 | , setInput 75 | , getParserState 76 | , setParserState 77 | , updateParserState 78 | , getState 79 | , putState 80 | , modifyState 81 | , setState 82 | , updateState 83 | ) where 84 | 85 | 86 | import Prelude hiding (sequence) 87 | import qualified Data.ByteString.Lazy.Char8 as CL 88 | import qualified Data.ByteString.Char8 as C 89 | 90 | import Data.Typeable ( Typeable ) 91 | 92 | import qualified Data.Text as Text 93 | import qualified Data.Text.Lazy as TextL 94 | 95 | -- To define Monoid instance 96 | import qualified Data.List.NonEmpty as NE 97 | import Data.List ( genericReplicate ) 98 | import Data.Traversable (sequence) 99 | import qualified Data.Functor as Functor ( Functor(..) ) 100 | import qualified Data.Semigroup as Semigroup ( Semigroup(..) ) 101 | import qualified Data.Monoid as Monoid ( Monoid(..) ) 102 | 103 | import qualified Control.Applicative as Applicative ( Applicative(..), Alternative(..), liftA2 ) 104 | import Control.Monad (MonadPlus (..), ap, void, liftM) 105 | import Control.Monad.Trans (MonadTrans (lift), MonadIO (liftIO)) 106 | import Control.Monad.Identity (Identity, runIdentity) 107 | import qualified Control.Monad.Fail as Fail 108 | 109 | import Control.Monad.Reader.Class (MonadReader (..)) 110 | import Control.Monad.State.Class (MonadState (..)) 111 | import Control.Monad.Cont.Class (MonadCont (..)) 112 | import Control.Monad.Error.Class (MonadError (..)) 113 | 114 | import Text.Parsec.Pos 115 | import Text.Parsec.Error 116 | 117 | unknownError :: State s u -> ParseError 118 | unknownError state = newErrorUnknown (statePos state) 119 | 120 | sysUnExpectError :: String -> SourcePos -> Reply s u a 121 | sysUnExpectError msg pos = Error (newErrorMessage (SysUnExpect msg) pos) 122 | 123 | -- | The parser @unexpected msg@ always fails with an unexpected error 124 | -- message @msg@ without consuming any input. 125 | -- 126 | -- The parsers 'fail', ('') and @unexpected@ are the three parsers 127 | -- used to generate error messages. Of these, only ('') is commonly 128 | -- used. For an example of the use of @unexpected@, see the definition 129 | -- of 'Text.Parsec.Combinator.notFollowedBy'. 130 | 131 | unexpected :: (Stream s m t) => String -> ParsecT s u m a 132 | unexpected msg 133 | = ParsecT $ \s _ _ _ eerr -> 134 | eerr $ newErrorMessage (UnExpect msg) (statePos s) 135 | 136 | -- | ParserT monad transformer and Parser type 137 | 138 | -- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@, 139 | -- underlying monad @m@ and return type @a@. Parsec is strict in the user state. 140 | -- If this is undesirable, simply use a data type like @data Box a = Box a@ and 141 | -- the state type @Box YourStateType@ to add a level of indirection. 142 | 143 | newtype ParsecT s u m a 144 | = ParsecT {unParser :: forall b . 145 | State s u 146 | -> (a -> State s u -> ParseError -> m b) -- consumed ok 147 | -> (ParseError -> m b) -- consumed err 148 | -> (a -> State s u -> ParseError -> m b) -- empty ok 149 | -> (ParseError -> m b) -- empty err 150 | -> m b 151 | } 152 | deriving ( Typeable ) 153 | 154 | -- | Low-level unpacking of the ParsecT type. To run your parser, please look to 155 | -- runPT, runP, runParserT, runParser and other such functions. 156 | runParsecT :: Monad m => ParsecT s u m a -> State s u -> m (Consumed (m (Reply s u a))) 157 | {-# INLINABLE runParsecT #-} 158 | runParsecT p s = unParser p s cok cerr eok eerr 159 | where cok a s' err = return . Consumed . return $ Ok a s' err 160 | cerr err = return . Consumed . return $ Error err 161 | eok a s' err = return . Empty . return $ Ok a s' err 162 | eerr err = return . Empty . return $ Error err 163 | 164 | -- | Low-level creation of the ParsecT type. You really shouldn't have to do this. 165 | mkPT :: Monad m => (State s u -> m (Consumed (m (Reply s u a)))) -> ParsecT s u m a 166 | {-# INLINABLE mkPT #-} 167 | mkPT k = ParsecT $ \s cok cerr eok eerr -> do 168 | cons <- k s 169 | case cons of 170 | Consumed mrep -> do 171 | rep <- mrep 172 | case rep of 173 | Ok x s' err -> cok x s' err 174 | Error err -> cerr err 175 | Empty mrep -> do 176 | rep <- mrep 177 | case rep of 178 | Ok x s' err -> eok x s' err 179 | Error err -> eerr err 180 | 181 | type Parsec s u = ParsecT s u Identity 182 | 183 | data Consumed a = Consumed a 184 | | Empty !a 185 | deriving ( Typeable ) 186 | 187 | data Reply s u a = Ok a !(State s u) ParseError 188 | | Error ParseError 189 | deriving ( Typeable ) 190 | 191 | data State s u = State { 192 | stateInput :: s, 193 | statePos :: !SourcePos, 194 | stateUser :: !u 195 | } 196 | deriving ( Typeable ) 197 | 198 | -- | The 'Semigroup' instance for 'ParsecT' is used to append the result 199 | -- of several parsers, for example: 200 | -- 201 | -- @ 202 | -- (many $ char 'a') <> (many $ char 'b') 203 | -- @ 204 | -- 205 | -- The above will parse a string like @"aabbb"@ and return a successful 206 | -- parse result @"aabbb"@. Compare against the below which will 207 | -- produce a result of @"bbb"@ for the same input: 208 | -- 209 | -- @ 210 | -- (many $ char 'a') >> (many $ char 'b') 211 | -- (many $ char 'a') *> (many $ char 'b') 212 | -- @ 213 | -- 214 | -- @since 3.1.12 215 | instance Semigroup.Semigroup a => Semigroup.Semigroup (ParsecT s u m a) where 216 | -- | Combines two parsers like '*>', '>>' and @do {...;...}@ 217 | -- /but/ also combines their results with (<>) instead of 218 | -- discarding the first. 219 | (<>) = Applicative.liftA2 (Semigroup.<>) 220 | 221 | sconcat = fmap Semigroup.sconcat . sequence 222 | stimes b = Semigroup.sconcat . NE.fromList . genericReplicate b 223 | 224 | -- | The 'Monoid' instance for 'ParsecT' is used for the same purposes as 225 | -- the 'Semigroup' instance. 226 | -- 227 | -- @since 3.1.12 228 | instance ( Monoid.Monoid a 229 | , Semigroup.Semigroup (ParsecT s u m a) 230 | ) => Monoid.Monoid (ParsecT s u m a) where 231 | -- | A parser that always succeeds, consumes no input, and 232 | -- returns the underlying 'Monoid''s 'mempty' value 233 | mempty = Applicative.pure Monoid.mempty 234 | 235 | -- | See 'ParsecT''s 'Semigroup.<>' implementation 236 | mappend = (Semigroup.<>) 237 | 238 | mconcat = Functor.fmap Monoid.mconcat . sequence 239 | 240 | instance Functor Consumed where 241 | fmap f (Consumed x) = Consumed (f x) 242 | fmap f (Empty x) = Empty (f x) 243 | 244 | instance Functor (Reply s u) where 245 | fmap f (Ok x s e) = Ok (f x) s e 246 | fmap _ (Error e) = Error e -- XXX 247 | 248 | instance Functor (ParsecT s u m) where 249 | fmap f p = parsecMap f p 250 | 251 | parsecMap :: (a -> b) -> ParsecT s u m a -> ParsecT s u m b 252 | parsecMap f p 253 | = ParsecT $ \s cok cerr eok eerr -> 254 | unParser p s (cok . f) cerr (eok . f) eerr 255 | 256 | instance Applicative.Applicative (ParsecT s u m) where 257 | pure = parserReturn 258 | (<*>) = ap -- TODO: Can this be optimized? 259 | p1 *> p2 = p1 `parserBind` const p2 260 | p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 } 261 | 262 | instance Applicative.Alternative (ParsecT s u m) where 263 | empty = mzero 264 | (<|>) = mplus 265 | 266 | -- TODO: https://github.com/haskell/parsec/issues/179 267 | -- investigate what's wrong with haddock 268 | -- 269 | -- many = many 270 | -- some = many1 271 | 272 | instance Monad (ParsecT s u m) where 273 | return = Applicative.pure 274 | p >>= f = parserBind p f 275 | (>>) = (Applicative.*>) 276 | #if !MIN_VERSION_base(4,13,0) 277 | fail = Fail.fail 278 | #endif 279 | 280 | -- | @since 3.1.12.0 281 | instance Fail.MonadFail (ParsecT s u m) where 282 | fail = parserFail 283 | 284 | instance (MonadIO m) => MonadIO (ParsecT s u m) where 285 | liftIO = lift . liftIO 286 | 287 | instance (MonadReader r m) => MonadReader r (ParsecT s u m) where 288 | ask = lift ask 289 | local f p = mkPT $ \s -> local f (runParsecT p s) 290 | 291 | -- I'm presuming the user might want a separate, non-backtracking 292 | -- state aside from the Parsec user state. 293 | instance (MonadState s m) => MonadState s (ParsecT s' u m) where 294 | get = lift get 295 | put = lift . put 296 | 297 | instance (MonadCont m) => MonadCont (ParsecT s u m) where 298 | callCC f = mkPT $ \s -> 299 | callCC $ \c -> 300 | runParsecT (f (\a -> mkPT $ \s' -> c (pack s' a))) s 301 | 302 | where pack s a= Empty $ return (Ok a s (unknownError s)) 303 | 304 | instance (MonadError e m) => MonadError e (ParsecT s u m) where 305 | throwError = lift . throwError 306 | p `catchError` h = mkPT $ \s -> 307 | runParsecT p s `catchError` \e -> 308 | runParsecT (h e) s 309 | 310 | parserReturn :: a -> ParsecT s u m a 311 | parserReturn x 312 | = ParsecT $ \s _ _ eok _ -> 313 | eok x s (unknownError s) 314 | 315 | parserBind :: ParsecT s u m a -> (a -> ParsecT s u m b) -> ParsecT s u m b 316 | {-# INLINE parserBind #-} 317 | parserBind m k 318 | = ParsecT $ \s cok cerr eok eerr -> 319 | let 320 | -- consumed-okay case for m 321 | mcok x s err 322 | | errorIsUnknown err = unParser (k x) s cok cerr cok cerr 323 | | otherwise = 324 | let 325 | -- if (k x) consumes, those go straight up 326 | pcok = cok 327 | pcerr = cerr 328 | 329 | -- if (k x) doesn't consume input, but is okay, 330 | -- we still return in the consumed continuation 331 | peok x s err' = cok x s (mergeError err err') 332 | 333 | -- if (k x) doesn't consume input, but errors, 334 | -- we return the error in the 'consumed-error' 335 | -- continuation 336 | peerr err' = cerr (mergeError err err') 337 | in unParser (k x) s pcok pcerr peok peerr 338 | 339 | -- empty-ok case for m 340 | meok x s err 341 | | errorIsUnknown err = unParser (k x) s cok cerr eok eerr 342 | | otherwise = 343 | let 344 | -- in these cases, (k x) can return as empty 345 | pcok = cok 346 | peok x s err' = eok x s (mergeError err err') 347 | pcerr = cerr 348 | peerr err' = eerr (mergeError err err') 349 | in unParser (k x) s pcok pcerr peok peerr 350 | -- consumed-error case for m 351 | mcerr = cerr 352 | 353 | -- empty-error case for m 354 | meerr = eerr 355 | 356 | in unParser m s mcok mcerr meok meerr 357 | 358 | 359 | mergeErrorReply :: ParseError -> Reply s u a -> Reply s u a 360 | mergeErrorReply err1 reply -- XXX where to put it? 361 | = case reply of 362 | Ok x state err2 -> Ok x state (mergeError err1 err2) 363 | Error err2 -> Error (mergeError err1 err2) 364 | 365 | parserFail :: String -> ParsecT s u m a 366 | parserFail msg 367 | = ParsecT $ \s _ _ _ eerr -> 368 | eerr $ newErrorMessage (Message msg) (statePos s) 369 | 370 | instance MonadPlus (ParsecT s u m) where 371 | mzero = parserZero 372 | mplus p1 p2 = parserPlus p1 p2 373 | 374 | -- | @parserZero@ always fails without consuming any input. @parserZero@ is defined 375 | -- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member 376 | -- of the 'Control.Applicative.Alternative' class. 377 | 378 | parserZero :: ParsecT s u m a 379 | parserZero 380 | = ParsecT $ \s _ _ _ eerr -> 381 | eerr $ unknownError s 382 | 383 | parserPlus :: ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a 384 | {-# INLINE parserPlus #-} 385 | parserPlus m n 386 | = ParsecT $ \s cok cerr eok eerr -> 387 | let 388 | meerr err = 389 | let 390 | neok y s' err' = eok y s' (mergeError err err') 391 | neerr err' = eerr $ mergeError err err' 392 | in unParser n s cok cerr neok neerr 393 | in unParser m s cok cerr eok meerr 394 | 395 | instance MonadTrans (ParsecT s u) where 396 | lift amb = ParsecT $ \s _ _ eok _ -> do 397 | a <- amb 398 | eok a s $ unknownError s 399 | 400 | infix 0 401 | infixr 1 <|> 402 | 403 | -- | The parser @p \ msg@ behaves as parser @p@, but whenever the 404 | -- parser @p@ fails /without consuming any input/, it replaces expect 405 | -- error messages with the expect error message @msg@. 406 | -- 407 | -- This is normally used at the end of a set alternatives where we want 408 | -- to return an error message in terms of a higher level construct 409 | -- rather than returning all possible characters. For example, if the 410 | -- @expr@ parser from the 'try' example would fail, the error 411 | -- message is: '...: expecting expression'. Without the @(\)@ 412 | -- combinator, the message would be like '...: expecting \"let\" or 413 | -- letter', which is less friendly. 414 | 415 | () :: (ParsecT s u m a) -> String -> (ParsecT s u m a) 416 | p msg = label p msg 417 | 418 | -- | This combinator implements choice. The parser @p \<|> q@ first 419 | -- applies @p@. If it succeeds, the value of @p@ is returned. If @p@ 420 | -- fails /without consuming any input/, parser @q@ is tried. This 421 | -- combinator is defined equal to the 'mplus' member of the 'MonadPlus' 422 | -- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'. 423 | -- 424 | -- The parser is called /predictive/ since @q@ is only tried when 425 | -- parser @p@ didn't consume any input (i.e.. the look ahead is 1). 426 | -- This non-backtracking behaviour allows for both an efficient 427 | -- implementation of the parser combinators and the generation of good 428 | -- error messages. 429 | 430 | (<|>) :: (ParsecT s u m a) -> (ParsecT s u m a) -> (ParsecT s u m a) 431 | p1 <|> p2 = mplus p1 p2 432 | 433 | -- | A synonym for @\@, but as a function instead of an operator. 434 | label :: ParsecT s u m a -> String -> ParsecT s u m a 435 | label p msg 436 | = labels p [msg] 437 | 438 | labels :: ParsecT s u m a -> [String] -> ParsecT s u m a 439 | labels p msgs = 440 | ParsecT $ \s cok cerr eok eerr -> 441 | let eok' x s' error = eok x s' $ if errorIsUnknown error 442 | then error 443 | else setExpectErrors error msgs 444 | eerr' err = eerr $ setExpectErrors err msgs 445 | 446 | in unParser p s cok cerr eok' eerr' 447 | 448 | where 449 | setExpectErrors err [] = setErrorMessage (Expect "") err 450 | setExpectErrors err [msg] = setErrorMessage (Expect msg) err 451 | setExpectErrors err (msg:msgs) 452 | = foldr (\msg' err' -> addErrorMessage (Expect msg') err') 453 | (setErrorMessage (Expect msg) err) msgs 454 | 455 | -- TODO: There should be a stronger statement that can be made about this 456 | 457 | -- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream 458 | -- 459 | -- Some rough guidelines for a \"correct\" instance of Stream: 460 | -- 461 | -- * unfoldM uncons gives the [t] corresponding to the stream 462 | -- 463 | -- * A @Stream@ instance is responsible for maintaining the \"position within the stream\" in the stream state @s@. This is trivial unless you are using the monad in a non-trivial way. 464 | 465 | class (Monad m) => Stream s m t | s -> t where 466 | uncons :: s -> m (Maybe (t,s)) 467 | 468 | instance (Monad m) => Stream [tok] m tok where 469 | uncons [] = return $ Nothing 470 | uncons (t:ts) = return $ Just (t,ts) 471 | {-# INLINE uncons #-} 472 | 473 | 474 | instance (Monad m) => Stream CL.ByteString m Char where 475 | uncons = return . CL.uncons 476 | 477 | instance (Monad m) => Stream C.ByteString m Char where 478 | uncons = return . C.uncons 479 | 480 | instance (Monad m) => Stream Text.Text m Char where 481 | uncons = return . Text.uncons 482 | {-# INLINE uncons #-} 483 | 484 | instance (Monad m) => Stream TextL.Text m Char where 485 | uncons = return . TextL.uncons 486 | {-# INLINE uncons #-} 487 | 488 | 489 | tokens :: (Stream s m t, Eq t) 490 | => ([t] -> String) -- Pretty print a list of tokens 491 | -> (SourcePos -> [t] -> SourcePos) 492 | -> [t] -- List of tokens to parse 493 | -> ParsecT s u m [t] 494 | {-# INLINE tokens #-} 495 | tokens _ _ [] 496 | = ParsecT $ \s _ _ eok _ -> 497 | eok [] s $ unknownError s 498 | tokens showTokens nextposs tts@(tok:toks) 499 | = ParsecT $ \(State input pos u) cok cerr _eok eerr -> 500 | let 501 | errEof = (setErrorMessage (Expect (showTokens tts)) 502 | (newErrorMessage (SysUnExpect "") pos)) 503 | 504 | errExpect x = (setErrorMessage (Expect (showTokens tts)) 505 | (newErrorMessage (SysUnExpect (showTokens [x])) pos)) 506 | 507 | walk [] rs = ok rs 508 | walk (t:ts) rs = do 509 | sr <- uncons rs 510 | case sr of 511 | Nothing -> cerr $ errEof 512 | Just (x,xs) | t == x -> walk ts xs 513 | | otherwise -> cerr $ errExpect x 514 | 515 | ok rs = let pos' = nextposs pos tts 516 | s' = State rs pos' u 517 | in cok tts s' (newErrorUnknown pos') 518 | in do 519 | sr <- uncons input 520 | case sr of 521 | Nothing -> eerr $ errEof 522 | Just (x,xs) 523 | | tok == x -> walk toks xs 524 | | otherwise -> eerr $ errExpect x 525 | 526 | -- | Like 'tokens', but doesn't consume matching prefix. 527 | -- 528 | -- @since 3.1.16.0 529 | tokens' :: (Stream s m t, Eq t) 530 | => ([t] -> String) -- Pretty print a list of tokens 531 | -> (SourcePos -> [t] -> SourcePos) 532 | -> [t] -- List of tokens to parse 533 | -> ParsecT s u m [t] 534 | {-# INLINE tokens' #-} 535 | tokens' _ _ [] 536 | = ParsecT $ \s _ _ eok _ -> 537 | eok [] s $ unknownError s 538 | tokens' showTokens nextposs tts@(tok:toks) 539 | = ParsecT $ \(State input pos u) cok _cerr _eok eerr -> 540 | let 541 | errEof = (setErrorMessage (Expect (showTokens tts)) 542 | (newErrorMessage (SysUnExpect "") pos)) 543 | 544 | errExpect x = (setErrorMessage (Expect (showTokens tts)) 545 | (newErrorMessage (SysUnExpect (showTokens [x])) pos)) 546 | 547 | walk [] rs = ok rs 548 | walk (t:ts) rs = do 549 | sr <- uncons rs 550 | case sr of 551 | Nothing -> eerr $ errEof 552 | Just (x,xs) | t == x -> walk ts xs 553 | | otherwise -> eerr $ errExpect x 554 | 555 | ok rs = let pos' = nextposs pos tts 556 | s' = State rs pos' u 557 | in cok tts s' (newErrorUnknown pos') 558 | in do 559 | sr <- uncons input 560 | case sr of 561 | Nothing -> eerr $ errEof 562 | Just (x,xs) 563 | | tok == x -> walk toks xs 564 | | otherwise -> eerr $ errExpect x 565 | 566 | -- | The parser @try p@ behaves like parser @p@, except that it 567 | -- pretends that it hasn't consumed any input when an error occurs. 568 | -- 569 | -- This combinator is used whenever arbitrary look ahead is needed. 570 | -- Since it pretends that it hasn't consumed any input when @p@ fails, 571 | -- the ('<|>') combinator will try its second alternative even when the 572 | -- first parser failed while consuming input. 573 | -- 574 | -- The @try@ combinator can for example be used to distinguish 575 | -- identifiers and reserved words. Both reserved words and identifiers 576 | -- are a sequence of letters. Whenever we expect a certain reserved 577 | -- word where we can also expect an identifier we have to use the @try@ 578 | -- combinator. Suppose we write: 579 | -- 580 | -- > expr = letExpr <|> identifier "expression" 581 | -- > 582 | -- > letExpr = do{ string "let"; ... } 583 | -- > identifier = many1 letter 584 | -- 585 | -- If the user writes \"lexical\", the parser fails with: @unexpected 586 | -- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator 587 | -- only tries alternatives when the first alternative hasn't consumed 588 | -- input, the @identifier@ parser is never tried (because the prefix 589 | -- \"le\" of the @string \"let\"@ parser is already consumed). The 590 | -- right behaviour can be obtained by adding the @try@ combinator: 591 | -- 592 | -- > expr = letExpr <|> identifier "expression" 593 | -- > 594 | -- > letExpr = do{ try (string "let"); ... } 595 | -- > identifier = many1 letter 596 | 597 | try :: ParsecT s u m a -> ParsecT s u m a 598 | try p = 599 | ParsecT $ \s cok _ eok eerr -> 600 | unParser p s cok eerr eok eerr 601 | 602 | -- | @lookAhead p@ parses @p@ without consuming any input. 603 | -- 604 | -- If @p@ fails and consumes some input, so does @lookAhead@. Combine with 'try' 605 | -- if this is undesirable. 606 | 607 | lookAhead :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m a 608 | lookAhead p = 609 | ParsecT $ \s _ cerr eok eerr -> do 610 | let eok' a _ _ = eok a s (newErrorUnknown (statePos s)) 611 | unParser p s eok' cerr eok' eerr 612 | 613 | -- | The parser @token showTok posFromTok testTok@ accepts a token @t@ 614 | -- with result @x@ when the function @testTok t@ returns @'Just' x@. The 615 | -- source position of the @t@ should be returned by @posFromTok t@ and 616 | -- the token can be shown using @showTok t@. 617 | -- 618 | -- This combinator is expressed in terms of 'tokenPrim'. 619 | -- It is used to accept user defined token streams. For example, 620 | -- suppose that we have a stream of basic tokens tupled with source 621 | -- positions. We can then define a parser that accepts single tokens as: 622 | -- 623 | -- > mytoken x 624 | -- > = token showTok posFromTok testTok 625 | -- > where 626 | -- > showTok (pos,t) = show t 627 | -- > posFromTok (pos,t) = pos 628 | -- > testTok (pos,t) = if x == t then Just t else Nothing 629 | 630 | token :: (Stream s Identity t) 631 | => (t -> String) -- ^ Token pretty-printing function. 632 | -> (t -> SourcePos) -- ^ Computes the position of a token. 633 | -> (t -> Maybe a) -- ^ Matching function for the token to parse. 634 | -> Parsec s u a 635 | {-# INLINABLE token #-} 636 | token showToken tokpos test = tokenPrim showToken nextpos test 637 | where 638 | nextpos _ tok ts = case runIdentity (uncons ts) of 639 | Nothing -> tokpos tok 640 | Just (tok',_) -> tokpos tok' 641 | 642 | -- | The parser @tokenPrim showTok nextPos testTok@ accepts a token @t@ 643 | -- with result @x@ when the function @testTok t@ returns @'Just' x@. The 644 | -- token can be shown using @showTok t@. The position of the /next/ 645 | -- token should be returned when @nextPos@ is called with the current 646 | -- source position @pos@, the current token @t@ and the rest of the 647 | -- tokens @toks@, @nextPos pos t toks@. 648 | -- 649 | -- This is the most primitive combinator for accepting tokens. For 650 | -- example, the 'Text.Parsec.Char.char' parser could be implemented as: 651 | -- 652 | -- > char c 653 | -- > = tokenPrim showChar nextPos testChar 654 | -- > where 655 | -- > showChar x = "'" ++ x ++ "'" 656 | -- > testChar x = if x == c then Just x else Nothing 657 | -- > nextPos pos x xs = updatePosChar pos x 658 | 659 | tokenPrim :: (Stream s m t) 660 | => (t -> String) -- ^ Token pretty-printing function. 661 | -> (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function. 662 | -> (t -> Maybe a) -- ^ Matching function for the token to parse. 663 | -> ParsecT s u m a 664 | {-# INLINE tokenPrim #-} 665 | tokenPrim showToken nextpos test = tokenPrimEx showToken nextpos Nothing test 666 | 667 | tokenPrimEx :: (Stream s m t) 668 | => (t -> String) 669 | -> (SourcePos -> t -> s -> SourcePos) 670 | -> Maybe (SourcePos -> t -> s -> u -> u) 671 | -> (t -> Maybe a) 672 | -> ParsecT s u m a 673 | {-# INLINE tokenPrimEx #-} 674 | tokenPrimEx showToken nextpos Nothing test 675 | = ParsecT $ \(State input pos user) cok _cerr _eok eerr -> do 676 | r <- uncons input 677 | case r of 678 | Nothing -> eerr $ unexpectError "" pos 679 | Just (c,cs) 680 | -> case test c of 681 | Just x -> let newpos = nextpos pos c cs 682 | newstate = State cs newpos user 683 | in seq newpos $ seq newstate $ 684 | cok x newstate (newErrorUnknown newpos) 685 | Nothing -> eerr $ unexpectError (showToken c) pos 686 | tokenPrimEx showToken nextpos (Just nextState) test 687 | = ParsecT $ \(State input pos user) cok _cerr _eok eerr -> do 688 | r <- uncons input 689 | case r of 690 | Nothing -> eerr $ unexpectError "" pos 691 | Just (c,cs) 692 | -> case test c of 693 | Just x -> let newpos = nextpos pos c cs 694 | newUser = nextState pos c cs user 695 | newstate = State cs newpos newUser 696 | in seq newpos $ seq newstate $ 697 | cok x newstate $ newErrorUnknown newpos 698 | Nothing -> eerr $ unexpectError (showToken c) pos 699 | 700 | unexpectError :: String -> SourcePos -> ParseError 701 | unexpectError msg pos = newErrorMessage (SysUnExpect msg) pos 702 | 703 | 704 | -- | @many p@ applies the parser @p@ /zero/ or more times. Returns a 705 | -- list of the returned values of @p@. 706 | -- 707 | -- > identifier = do{ c <- letter 708 | -- > ; cs <- many (alphaNum <|> char '_') 709 | -- > ; return (c:cs) 710 | -- > } 711 | 712 | many :: ParsecT s u m a -> ParsecT s u m [a] 713 | many p 714 | = do xs <- manyAccum (:) p 715 | return (reverse xs) 716 | 717 | -- | @many1 p@ applies the parser @p@ /one/ or more times. Returns a 718 | -- list of the returned values of @p@. 719 | -- 720 | -- > word = many1 letter 721 | 722 | many1 :: ParsecT s u m a -> ParsecT s u m [a] 723 | {-# INLINABLE many1 #-} 724 | many1 p = do{ x <- p; xs <- many p; return (x:xs) } 725 | 726 | -- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping 727 | -- its result. 728 | -- 729 | -- > spaces = skipMany space 730 | 731 | skipMany :: ParsecT s u m a -> ParsecT s u m () 732 | skipMany p 733 | = do _ <- manyAccum (\_ _ -> []) p 734 | return () 735 | 736 | manyAccum :: (a -> [a] -> [a]) 737 | -> ParsecT s u m a 738 | -> ParsecT s u m [a] 739 | manyAccum acc p = 740 | ParsecT $ \s cok cerr eok _eerr -> 741 | let walk xs x s' _err = 742 | unParser p s' 743 | (seq xs $ walk $ acc x xs) -- consumed-ok 744 | cerr -- consumed-err 745 | manyErr -- empty-ok 746 | (\e -> cok (acc x xs) s' e) -- empty-err 747 | in unParser p s (walk []) cerr manyErr (\e -> eok [] s e) 748 | 749 | manyErr :: a 750 | manyErr = error "Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string." 751 | 752 | 753 | -- < Running a parser: monadic (runPT) and pure (runP) 754 | 755 | runPT :: (Stream s m t) 756 | => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a) 757 | {-# INLINABLE runPT #-} 758 | runPT p u name s 759 | = do res <- runParsecT p (State s (initialPos name) u) 760 | r <- parserReply res 761 | case r of 762 | Ok x _ _ -> return (Right x) 763 | Error err -> return (Left err) 764 | where 765 | parserReply res 766 | = case res of 767 | Consumed r -> r 768 | Empty r -> r 769 | 770 | runP :: (Stream s Identity t) 771 | => Parsec s u a -> u -> SourceName -> s -> Either ParseError a 772 | runP p u name s = runIdentity $ runPT p u name s 773 | 774 | -- | The most general way to run a parser. @runParserT p state filePath 775 | -- input@ runs parser @p@ on the input list of tokens @input@, 776 | -- obtained from source @filePath@ with the initial user state @st@. 777 | -- The @filePath@ is only used in error messages and may be the empty 778 | -- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a 779 | -- value of type @a@ ('Right'). 780 | 781 | runParserT :: (Stream s m t) 782 | => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a) 783 | runParserT = runPT 784 | 785 | -- | The most general way to run a parser over the Identity monad. @runParser p state filePath 786 | -- input@ runs parser @p@ on the input list of tokens @input@, 787 | -- obtained from source @filePath@ with the initial user state @st@. 788 | -- The @filePath@ is only used in error messages and may be the empty 789 | -- string. Returns either a 'ParseError' ('Left') or a 790 | -- value of type @a@ ('Right'). 791 | -- 792 | -- > parseFromFile p fname 793 | -- > = do{ input <- readFile fname 794 | -- > ; return (runParser p () fname input) 795 | -- > } 796 | 797 | runParser :: (Stream s Identity t) 798 | => Parsec s u a -> u -> SourceName -> s -> Either ParseError a 799 | runParser = runP 800 | 801 | -- | @parse p filePath input@ runs a parser @p@ over Identity without user 802 | -- state. The @filePath@ is only used in error messages and may be the 803 | -- empty string. Returns either a 'ParseError' ('Left') 804 | -- or a value of type @a@ ('Right'). 805 | -- 806 | -- > main = case (parse numbers "" "11, 2, 43") of 807 | -- > Left err -> print err 808 | -- > Right xs -> print (sum xs) 809 | -- > 810 | -- > numbers = commaSep integer 811 | 812 | parse :: (Stream s Identity t) 813 | => Parsec s () a -> SourceName -> s -> Either ParseError a 814 | parse p = runP p () 815 | 816 | -- | The expression @parseTest p input@ applies a parser @p@ against 817 | -- input @input@ and prints the result to stdout. Used for testing 818 | -- parsers. 819 | 820 | parseTest :: (Stream s Identity t, Show a) 821 | => Parsec s () a -> s -> IO () 822 | parseTest p input 823 | = case parse p "" input of 824 | Left err -> do putStr "parse error at " 825 | print err 826 | Right x -> print x 827 | 828 | -- < Parser state combinators 829 | 830 | -- | Returns the current source position. See also 'SourcePos'. 831 | 832 | getPosition :: (Monad m) => ParsecT s u m SourcePos 833 | getPosition = do state <- getParserState 834 | return (statePos state) 835 | 836 | -- | Returns the current input 837 | 838 | getInput :: (Monad m) => ParsecT s u m s 839 | getInput = do state <- getParserState 840 | return (stateInput state) 841 | 842 | -- | @setPosition pos@ sets the current source position to @pos@. 843 | 844 | setPosition :: (Monad m) => SourcePos -> ParsecT s u m () 845 | setPosition pos 846 | = do _ <- updateParserState (\(State input _ user) -> State input pos user) 847 | return () 848 | 849 | -- | @setInput input@ continues parsing with @input@. The 'getInput' and 850 | -- @setInput@ functions can for example be used to deal with #include 851 | -- files. 852 | 853 | setInput :: (Monad m) => s -> ParsecT s u m () 854 | setInput input 855 | = do _ <- updateParserState (\(State _ pos user) -> State input pos user) 856 | return () 857 | 858 | -- | Returns the full parser state as a 'State' record. 859 | 860 | getParserState :: (Monad m) => ParsecT s u m (State s u) 861 | getParserState = updateParserState id 862 | 863 | -- | @setParserState st@ set the full parser state to @st@. 864 | 865 | setParserState :: (Monad m) => State s u -> ParsecT s u m (State s u) 866 | setParserState st = updateParserState (const st) 867 | 868 | -- | @updateParserState f@ applies function @f@ to the parser state. 869 | 870 | updateParserState :: (State s u -> State s u) -> ParsecT s u m (State s u) 871 | updateParserState f = 872 | ParsecT $ \s _ _ eok _ -> 873 | let s' = f s 874 | in eok s' s' $ unknownError s' 875 | 876 | -- < User state combinators 877 | 878 | -- | Returns the current user state. 879 | 880 | getState :: (Monad m) => ParsecT s u m u 881 | getState = stateUser `liftM` getParserState 882 | 883 | -- | @putState st@ set the user state to @st@. 884 | 885 | putState :: (Monad m) => u -> ParsecT s u m () 886 | putState u = do _ <- updateParserState $ \s -> s { stateUser = u } 887 | return () 888 | 889 | -- | @modifyState f@ applies function @f@ to the user state. Suppose 890 | -- that we want to count identifiers in a source, we could use the user 891 | -- state as: 892 | -- 893 | -- > expr = do{ x <- identifier 894 | -- > ; modifyState (+1) 895 | -- > ; return (Id x) 896 | -- > } 897 | 898 | modifyState :: (Monad m) => (u -> u) -> ParsecT s u m () 899 | modifyState f = do _ <- updateParserState $ \s -> s { stateUser = f (stateUser s) } 900 | return () 901 | 902 | -- XXX Compat 903 | 904 | -- | An alias for putState for backwards compatibility. 905 | 906 | setState :: (Monad m) => u -> ParsecT s u m () 907 | setState = putState 908 | 909 | -- | An alias for modifyState for backwards compatibility. 910 | 911 | updateState :: (Monad m) => (u -> u) -> ParsecT s u m () 912 | updateState = modifyState 913 | -------------------------------------------------------------------------------- /src/Text/Parsec/String.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.String 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the file libraries/parsec/LICENSE) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Make Strings an instance of 'Stream' with 'Char' token type. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.String 18 | ( Parser, GenParser, parseFromFile 19 | ) where 20 | 21 | import Text.Parsec.Error 22 | import Text.Parsec.Prim 23 | 24 | type Parser = Parsec String () 25 | type GenParser tok st = Parsec [tok] st 26 | 27 | -- | @parseFromFile p filePath@ runs a string parser @p@ on the 28 | -- input read from @filePath@ using 'Prelude.readFile'. Returns either a 'ParseError' 29 | -- ('Left') or a value of type @a@ ('Right'). 30 | -- 31 | -- > main = do{ result <- parseFromFile numbers "digits.txt" 32 | -- > ; case result of 33 | -- > Left err -> print err 34 | -- > Right xs -> print (sum xs) 35 | -- > } 36 | parseFromFile :: Parser a -> FilePath -> IO (Either ParseError a) 37 | parseFromFile p fname 38 | = do input <- readFile fname 39 | return (runP p () fname input) 40 | -------------------------------------------------------------------------------- /src/Text/Parsec/Text.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.String 6 | -- Copyright : (c) Antoine Latter 2011 7 | -- License : BSD-style (see the file libraries/parsec/LICENSE) 8 | -- 9 | -- Maintainer : aslatter@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Convenience definitions for working with 'Text.Text'. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.Text 18 | ( Parser, GenParser, parseFromFile 19 | ) where 20 | 21 | import qualified Data.Text as Text 22 | import qualified Data.Text.IO as T 23 | 24 | import Text.Parsec.Prim 25 | import Text.Parsec.Error 26 | 27 | type Parser = Parsec Text.Text () 28 | type GenParser st = Parsec Text.Text st 29 | 30 | -- | @parseFromFile p filePath@ runs a strict text parser @p@ on the 31 | -- input read from @filePath@ using 'Data.Text.IO.readFile'. Returns either a 'ParseError' 32 | -- ('Left') or a value of type @a@ ('Right'). 33 | -- 34 | -- > main = do{ result <- parseFromFile numbers "digits.txt" 35 | -- > ; case result of 36 | -- > Left err -> print err 37 | -- > Right xs -> print (sum xs) 38 | -- > } 39 | -- 40 | -- @since 3.1.14.0 41 | 42 | parseFromFile :: Parser a -> FilePath -> IO (Either ParseError a) 43 | parseFromFile p fname 44 | = do input <- T.readFile fname 45 | return (runP p () fname input) 46 | -------------------------------------------------------------------------------- /src/Text/Parsec/Text/Lazy.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parsec.String 6 | -- Copyright : (c) Antoine Latter 2011 7 | -- License : BSD-style (see the file libraries/parsec/LICENSE) 8 | -- 9 | -- Maintainer : aslatter@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Convenience definitions for working with lazy 'Text.Text'. 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.Parsec.Text.Lazy 18 | ( Parser, GenParser, parseFromFile 19 | ) where 20 | 21 | import qualified Data.Text.Lazy as Text 22 | import qualified Data.Text.Lazy.IO as TL 23 | 24 | import Text.Parsec.Prim 25 | import Text.Parsec.Error 26 | 27 | type Parser = Parsec Text.Text () 28 | type GenParser st = Parsec Text.Text st 29 | 30 | -- | @parseFromFile p filePath@ runs a strict text parser @p@ on the 31 | -- input read from @filePath@ using 'Data.Text.Lazy.IO.readFile'. Returns either a 'ParseError' 32 | -- ('Left') or a value of type @a@ ('Right'). 33 | -- 34 | -- > main = do{ result <- parseFromFile numbers "digits.txt" 35 | -- > ; case result of 36 | -- > Left err -> print err 37 | -- > Right xs -> print (sum xs) 38 | -- > } 39 | -- 40 | -- @since 3.1.14.0 41 | 42 | parseFromFile :: Parser a -> FilePath -> IO (Either ParseError a) 43 | parseFromFile p fname 44 | = do input <- TL.readFile fname 45 | return (runP p () fname input) 46 | -------------------------------------------------------------------------------- /src/Text/Parsec/Token.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE PolymorphicComponents #-} 3 | {-# LANGUAGE Safe #-} 4 | 5 | ----------------------------------------------------------------------------- 6 | -- | 7 | -- Module : Text.Parsec.Token 8 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 9 | -- License : BSD-style (see the LICENSE file) 10 | -- 11 | -- Maintainer : derek.a.elkins@gmail.com 12 | -- Stability : provisional 13 | -- Portability : non-portable (uses local universal quantification: PolymorphicComponents) 14 | -- 15 | -- A helper module to parse lexical elements (tokens). See 'makeTokenParser' 16 | -- for a description of how to use it. 17 | -- 18 | ----------------------------------------------------------------------------- 19 | 20 | {-# OPTIONS_GHC -fno-warn-name-shadowing #-} 21 | 22 | module Text.Parsec.Token 23 | ( LanguageDef 24 | , GenLanguageDef (..) 25 | , TokenParser 26 | , GenTokenParser (..) 27 | , makeTokenParser 28 | ) where 29 | 30 | import Data.Char ( isAlpha, toLower, toUpper, isSpace, digitToInt ) 31 | import Data.Typeable ( Typeable ) 32 | import Data.List ( nub, sort ) 33 | import Control.Monad.Identity (Identity) 34 | 35 | import Text.Parsec.Prim 36 | import Text.Parsec.Char 37 | import Text.Parsec.Combinator 38 | 39 | ----------------------------------------------------------- 40 | -- Language Definition 41 | ----------------------------------------------------------- 42 | 43 | type LanguageDef st = GenLanguageDef String st Identity 44 | 45 | -- | The @GenLanguageDef@ type is a record that contains all parameterizable 46 | -- features of the "Text.Parsec.Token" module. The module "Text.Parsec.Language" 47 | -- contains some default definitions. 48 | 49 | data GenLanguageDef s u m 50 | = LanguageDef { 51 | 52 | -- | Describes the start of a block comment. Use the empty string if the 53 | -- language doesn't support block comments. For example \"\/*\". 54 | 55 | commentStart :: String, 56 | 57 | -- | Describes the end of a block comment. Use the empty string if the 58 | -- language doesn't support block comments. For example \"*\/\". 59 | 60 | commentEnd :: String, 61 | 62 | -- | Describes the start of a line comment. Use the empty string if the 63 | -- language doesn't support line comments. For example \"\/\/\". 64 | 65 | commentLine :: String, 66 | 67 | -- | Set to 'True' if the language supports nested block comments. 68 | 69 | nestedComments :: Bool, 70 | 71 | -- | This parser should accept any start characters of identifiers. For 72 | -- example @letter \<|> char \'_\'@. 73 | 74 | identStart :: ParsecT s u m Char, 75 | 76 | -- | This parser should accept any legal tail characters of identifiers. 77 | -- For example @alphaNum \<|> char \'_\'@. 78 | 79 | identLetter :: ParsecT s u m Char, 80 | 81 | -- | This parser should accept any start characters of operators. For 82 | -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@ 83 | 84 | opStart :: ParsecT s u m Char, 85 | 86 | -- | This parser should accept any legal tail characters of operators. 87 | -- Note that this parser should even be defined if the language doesn't 88 | -- support user-defined operators, or otherwise the 'reservedOp' 89 | -- parser won't work correctly. 90 | 91 | opLetter :: ParsecT s u m Char, 92 | 93 | -- | The list of reserved identifiers. 94 | 95 | reservedNames :: [String], 96 | 97 | -- | The list of reserved operators. 98 | 99 | reservedOpNames:: [String], 100 | 101 | -- | Set to 'True' if the language is case sensitive. 102 | 103 | caseSensitive :: Bool 104 | 105 | } 106 | deriving ( Typeable ) 107 | 108 | ----------------------------------------------------------- 109 | -- A first class module: TokenParser 110 | ----------------------------------------------------------- 111 | 112 | type TokenParser st = GenTokenParser String st Identity 113 | 114 | -- | The type of the record that holds lexical parsers that work on 115 | -- @s@ streams with state @u@ over a monad @m@. 116 | 117 | data GenTokenParser s u m 118 | = TokenParser { 119 | 120 | -- | This lexeme parser parses a legal identifier. Returns the identifier 121 | -- string. This parser will fail on identifiers that are reserved 122 | -- words. Legal identifier (start) characters and reserved words are 123 | -- defined in the 'LanguageDef' that is passed to 124 | -- 'makeTokenParser'. An @identifier@ is treated as 125 | -- a single token using 'try'. 126 | 127 | identifier :: ParsecT s u m String, 128 | 129 | -- | The lexeme parser @reserved name@ parses @symbol 130 | -- name@, but it also checks that the @name@ is not a prefix of a 131 | -- valid identifier. A @reserved@ word is treated as a single token 132 | -- using 'try'. 133 | 134 | reserved :: String -> ParsecT s u m (), 135 | 136 | -- | This lexeme parser parses a legal operator. Returns the name of the 137 | -- operator. This parser will fail on any operators that are reserved 138 | -- operators. Legal operator (start) characters and reserved operators 139 | -- are defined in the 'LanguageDef' that is passed to 140 | -- 'makeTokenParser'. An @operator@ is treated as a 141 | -- single token using 'try'. 142 | 143 | operator :: ParsecT s u m String, 144 | 145 | -- |The lexeme parser @reservedOp name@ parses @symbol 146 | -- name@, but it also checks that the @name@ is not a prefix of a 147 | -- valid operator. A @reservedOp@ is treated as a single token using 148 | -- 'try'. 149 | 150 | reservedOp :: String -> ParsecT s u m (), 151 | 152 | 153 | -- | This lexeme parser parses a single literal character. Returns the 154 | -- literal character value. This parsers deals correctly with escape 155 | -- sequences. The literal character is parsed according to the grammar 156 | -- rules defined in the Haskell report (which matches most programming 157 | -- languages quite closely). 158 | 159 | charLiteral :: ParsecT s u m Char, 160 | 161 | -- | This lexeme parser parses a literal string. Returns the literal 162 | -- string value. This parsers deals correctly with escape sequences and 163 | -- gaps. The literal string is parsed according to the grammar rules 164 | -- defined in the Haskell report (which matches most programming 165 | -- languages quite closely). 166 | 167 | stringLiteral :: ParsecT s u m String, 168 | 169 | -- | This lexeme parser parses a natural number (a non-negative whole 170 | -- number). Returns the value of the number. The number can be 171 | -- specified in 'decimal', 'hexadecimal' or 172 | -- 'octal'. The number is parsed according to the grammar 173 | -- rules in the Haskell report. 174 | 175 | natural :: ParsecT s u m Integer, 176 | 177 | -- | This lexeme parser parses an integer (a whole number). This parser 178 | -- is like 'natural' except that it can be prefixed with 179 | -- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The 180 | -- number can be specified in 'decimal', 'hexadecimal' 181 | -- or 'octal'. The number is parsed according 182 | -- to the grammar rules in the Haskell report. 183 | 184 | integer :: ParsecT s u m Integer, 185 | 186 | -- | This lexeme parser parses a floating point value. Returns the value 187 | -- of the number. The number is parsed according to the grammar rules 188 | -- defined in the Haskell report. 189 | 190 | float :: ParsecT s u m Double, 191 | 192 | -- | This lexeme parser parses either 'natural' or a 'float'. 193 | -- Returns the value of the number. This parsers deals with 194 | -- any overlap in the grammar rules for naturals and floats. The number 195 | -- is parsed according to the grammar rules defined in the Haskell report. 196 | 197 | naturalOrFloat :: ParsecT s u m (Either Integer Double), 198 | 199 | -- | Parses a non-negative whole number in the decimal system. Returns the 200 | -- value of the number. 201 | 202 | decimal :: ParsecT s u m Integer, 203 | 204 | -- | Parses a non-negative whole number in the hexadecimal system. The 205 | -- number should be prefixed with \"x\" or \"X\". Returns the value of the 206 | -- number. 207 | 208 | hexadecimal :: ParsecT s u m Integer, 209 | 210 | -- | Parses a non-negative whole number in the octal system. The number 211 | -- should be prefixed with \"o\" or \"O\". Returns the value of the 212 | -- number. 213 | 214 | octal :: ParsecT s u m Integer, 215 | 216 | -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips 217 | -- trailing white space. 218 | 219 | symbol :: String -> ParsecT s u m String, 220 | 221 | -- | @lexeme p@ first applies parser @p@ and then the 'whiteSpace' 222 | -- parser, returning the value of @p@. Every lexical 223 | -- token (lexeme) is defined using @lexeme@, this way every parse 224 | -- starts at a point without white space. Parsers that use @lexeme@ are 225 | -- called /lexeme/ parsers in this document. 226 | -- 227 | -- The only point where the 'whiteSpace' parser should be 228 | -- called explicitly is the start of the main parser in order to skip 229 | -- any leading white space. 230 | -- 231 | -- > mainParser = do{ whiteSpace 232 | -- > ; ds <- many (lexeme digit) 233 | -- > ; eof 234 | -- > ; return (sum ds) 235 | -- > } 236 | 237 | lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a, 238 | 239 | -- | Parses any white space. White space consists of /zero/ or more 240 | -- occurrences of a 'space', a line comment or a block (multi 241 | -- line) comment. Block comments may be nested. How comments are 242 | -- started and ended is defined in the 'LanguageDef' 243 | -- that is passed to 'makeTokenParser'. 244 | 245 | whiteSpace :: ParsecT s u m (), 246 | 247 | -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis, 248 | -- returning the value of @p@. 249 | 250 | parens :: forall a. ParsecT s u m a -> ParsecT s u m a, 251 | 252 | -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and 253 | -- \'}\'), returning the value of @p@. 254 | 255 | braces :: forall a. ParsecT s u m a -> ParsecT s u m a, 256 | 257 | -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\' 258 | -- and \'>\'), returning the value of @p@. 259 | 260 | angles :: forall a. ParsecT s u m a -> ParsecT s u m a, 261 | 262 | -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\' 263 | -- and \']\'), returning the value of @p@. 264 | 265 | brackets :: forall a. ParsecT s u m a -> ParsecT s u m a, 266 | 267 | -- | DEPRECATED: Use 'brackets'. 268 | 269 | squares :: forall a. ParsecT s u m a -> ParsecT s u m a, 270 | 271 | -- | Lexeme parser |semi| parses the character \';\' and skips any 272 | -- trailing white space. Returns the string \";\". 273 | 274 | semi :: ParsecT s u m String, 275 | 276 | -- | Lexeme parser @comma@ parses the character \',\' and skips any 277 | -- trailing white space. Returns the string \",\". 278 | 279 | comma :: ParsecT s u m String, 280 | 281 | -- | Lexeme parser @colon@ parses the character \':\' and skips any 282 | -- trailing white space. Returns the string \":\". 283 | 284 | colon :: ParsecT s u m String, 285 | 286 | -- | Lexeme parser @dot@ parses the character \'.\' and skips any 287 | -- trailing white space. Returns the string \".\". 288 | 289 | dot :: ParsecT s u m String, 290 | 291 | -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@ 292 | -- separated by 'semi'. Returns a list of values returned by 293 | -- @p@. 294 | 295 | semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a], 296 | 297 | -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@ 298 | -- separated by 'semi'. Returns a list of values returned by @p@. 299 | 300 | semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a], 301 | 302 | -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of 303 | -- @p@ separated by 'comma'. Returns a list of values returned 304 | -- by @p@. 305 | 306 | commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a], 307 | 308 | -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of 309 | -- @p@ separated by 'comma'. Returns a list of values returned 310 | -- by @p@. 311 | 312 | commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] 313 | } 314 | deriving ( Typeable ) 315 | 316 | ----------------------------------------------------------- 317 | -- Given a LanguageDef, create a token parser. 318 | ----------------------------------------------------------- 319 | 320 | -- | The expression @makeTokenParser language@ creates a 'GenTokenParser' 321 | -- record that contains lexical parsers that are 322 | -- defined using the definitions in the @language@ record. 323 | -- 324 | -- The use of this function is quite stylized - one imports the 325 | -- appropriate language definition and selects the lexical parsers that 326 | -- are needed from the resulting 'GenTokenParser'. 327 | -- 328 | -- > module Main where 329 | -- > 330 | -- > import Text.Parsec 331 | -- > import qualified Text.Parsec.Token as P 332 | -- > import Text.Parsec.Language (haskellDef) 333 | -- > 334 | -- > -- The parser 335 | -- > ... 336 | -- > 337 | -- > expr = parens expr 338 | -- > <|> identifier 339 | -- > <|> ... 340 | -- > 341 | -- > 342 | -- > -- The lexer 343 | -- > lexer = P.makeTokenParser haskellDef 344 | -- > 345 | -- > parens = P.parens lexer 346 | -- > braces = P.braces lexer 347 | -- > identifier = P.identifier lexer 348 | -- > reserved = P.reserved lexer 349 | -- > ... 350 | 351 | makeTokenParser :: (Stream s m Char) 352 | => GenLanguageDef s u m -> GenTokenParser s u m 353 | {-# INLINABLE makeTokenParser #-} 354 | makeTokenParser languageDef 355 | = TokenParser{ identifier = identifier 356 | , reserved = reserved 357 | , operator = operator 358 | , reservedOp = reservedOp 359 | 360 | , charLiteral = charLiteral 361 | , stringLiteral = stringLiteral 362 | , natural = natural 363 | , integer = integer 364 | , float = float 365 | , naturalOrFloat = naturalOrFloat 366 | , decimal = decimal 367 | , hexadecimal = hexadecimal 368 | , octal = octal 369 | 370 | , symbol = symbol 371 | , lexeme = lexeme 372 | , whiteSpace = whiteSpace 373 | 374 | , parens = parens 375 | , braces = braces 376 | , angles = angles 377 | , brackets = brackets 378 | , squares = brackets 379 | , semi = semi 380 | , comma = comma 381 | , colon = colon 382 | , dot = dot 383 | , semiSep = semiSep 384 | , semiSep1 = semiSep1 385 | , commaSep = commaSep 386 | , commaSep1 = commaSep1 387 | } 388 | where 389 | 390 | ----------------------------------------------------------- 391 | -- Bracketing 392 | ----------------------------------------------------------- 393 | parens p = between (symbol "(") (symbol ")") p 394 | braces p = between (symbol "{") (symbol "}") p 395 | angles p = between (symbol "<") (symbol ">") p 396 | brackets p = between (symbol "[") (symbol "]") p 397 | 398 | semi = symbol ";" 399 | comma = symbol "," 400 | dot = symbol "." 401 | colon = symbol ":" 402 | 403 | commaSep p = sepBy p comma 404 | semiSep p = sepBy p semi 405 | 406 | commaSep1 p = sepBy1 p comma 407 | semiSep1 p = sepBy1 p semi 408 | 409 | 410 | ----------------------------------------------------------- 411 | -- Chars & Strings 412 | ----------------------------------------------------------- 413 | charLiteral = lexeme (between (char '\'') 414 | (char '\'' "end of character") 415 | characterChar ) 416 | "character" 417 | 418 | characterChar = charLetter <|> charEscape 419 | "literal character" 420 | 421 | charEscape = do{ _ <- char '\\'; escapeCode } 422 | charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) 423 | 424 | 425 | 426 | stringLiteral = lexeme ( 427 | do{ str <- between (char '"') 428 | (char '"' "end of string") 429 | (many stringChar) 430 | ; return (foldr (maybe id (:)) "" str) 431 | } 432 | "literal string") 433 | 434 | stringChar = do{ c <- stringLetter; return (Just c) } 435 | <|> stringEscape 436 | "string character" 437 | 438 | stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) 439 | 440 | stringEscape = do{ _ <- char '\\' 441 | ; do{ _ <- escapeGap ; return Nothing } 442 | <|> do{ _ <- escapeEmpty; return Nothing } 443 | <|> do{ esc <- escapeCode; return (Just esc) } 444 | } 445 | 446 | escapeEmpty = char '&' 447 | escapeGap = do{ _ <- many1 space 448 | ; char '\\' "end of string gap" 449 | } 450 | 451 | 452 | 453 | -- escape codes 454 | escapeCode = charEsc <|> charNum <|> charAscii <|> charControl 455 | "escape code" 456 | 457 | charControl = do{ _ <- char '^' 458 | ; code <- upper 459 | ; return (toEnum (fromEnum code - fromEnum 'A' + 1)) 460 | } 461 | 462 | charNum = do{ code <- decimal 463 | <|> do{ _ <- char 'o'; number 8 octDigit } 464 | <|> do{ _ <- char 'x'; number 16 hexDigit } 465 | ; if code > 0x10FFFF 466 | then fail "invalid escape sequence" 467 | else return (toEnum (fromInteger code)) 468 | } 469 | 470 | charEsc = choice (map parseEsc escMap) 471 | where 472 | parseEsc (c,code) = do{ _ <- char c; return code } 473 | 474 | charAscii = choice (map parseAscii asciiMap) 475 | where 476 | parseAscii (asc,code) = try (do{ _ <- string asc; return code }) 477 | 478 | 479 | -- escape code tables 480 | escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'") 481 | asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) 482 | 483 | ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM", 484 | "FS","GS","RS","US","SP"] 485 | ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL", 486 | "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB", 487 | "CAN","SUB","ESC","DEL"] 488 | 489 | ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI', 490 | '\EM','\FS','\GS','\RS','\US','\SP'] 491 | ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK', 492 | '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK', 493 | '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] 494 | 495 | 496 | ----------------------------------------------------------- 497 | -- Numbers 498 | ----------------------------------------------------------- 499 | naturalOrFloat = lexeme (natFloat) "number" 500 | 501 | float = lexeme floating "float" 502 | integer = lexeme int "integer" 503 | natural = lexeme nat "natural" 504 | 505 | 506 | -- floats 507 | floating = do{ n <- decimal 508 | ; fractExponent n 509 | } 510 | 511 | 512 | natFloat = do{ _ <- char '0' 513 | ; zeroNumFloat 514 | } 515 | <|> decimalFloat 516 | 517 | zeroNumFloat = do{ n <- hexadecimal <|> octal 518 | ; return (Left n) 519 | } 520 | <|> decimalFloat 521 | <|> fractFloat (0 :: Integer) 522 | <|> return (Left 0) 523 | 524 | decimalFloat = do{ n <- decimal 525 | ; option (Left n) 526 | (fractFloat n) 527 | } 528 | 529 | fractFloat n = do{ f <- fractExponent n 530 | ; return (Right f) 531 | } 532 | 533 | fractExponent n = do{ fract <- fraction 534 | ; expo <- option "" exponent' 535 | ; readDouble (show n ++ fract ++ expo) 536 | } 537 | <|> 538 | do{ expo <- exponent' 539 | ; readDouble (show n ++ expo) 540 | } 541 | where 542 | readDouble s = 543 | case reads s of 544 | [(x, "")] -> return x 545 | _ -> parserZero 546 | 547 | fraction = do{ _ <- char '.' 548 | ; digits <- many1 digit "fraction" 549 | ; return ('.' : digits) 550 | } 551 | "fraction" 552 | 553 | exponent' = do{ _ <- oneOf "eE" 554 | ; sign' <- fmap (:[]) (oneOf "+-") <|> return "" 555 | ; e <- decimal "exponent" 556 | ; return ('e' : sign' ++ show e) 557 | } 558 | "exponent" 559 | 560 | 561 | -- integers and naturals 562 | int = do{ f <- lexeme sign 563 | ; n <- nat 564 | ; return (f n) 565 | } 566 | 567 | sign = (char '-' >> return negate) 568 | <|> (char '+' >> return id) 569 | <|> return id 570 | 571 | nat = zeroNumber <|> decimal 572 | 573 | zeroNumber = do{ _ <- char '0' 574 | ; hexadecimal <|> octal <|> decimal <|> return 0 575 | } 576 | "" 577 | 578 | decimal = number 10 digit 579 | hexadecimal = do{ _ <- oneOf "xX"; number 16 hexDigit } 580 | octal = do{ _ <- oneOf "oO"; number 8 octDigit } 581 | 582 | number base baseDigit 583 | = do{ digits <- many1 baseDigit 584 | ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits 585 | ; seq n (return n) 586 | } 587 | 588 | ----------------------------------------------------------- 589 | -- Operators & reserved ops 590 | ----------------------------------------------------------- 591 | reservedOp name = 592 | lexeme $ try $ 593 | do{ _ <- string name 594 | ; notFollowedBy (opLetter languageDef) ("end of " ++ show name) 595 | } 596 | 597 | operator = 598 | lexeme $ try $ 599 | do{ name <- oper 600 | ; if (isReservedOp name) 601 | then unexpected ("reserved operator " ++ show name) 602 | else return name 603 | } 604 | 605 | oper = 606 | do{ c <- (opStart languageDef) 607 | ; cs <- many (opLetter languageDef) 608 | ; return (c:cs) 609 | } 610 | "operator" 611 | 612 | isReservedOp name = 613 | isReserved (sort (reservedOpNames languageDef)) name 614 | 615 | 616 | ----------------------------------------------------------- 617 | -- Identifiers & Reserved words 618 | ----------------------------------------------------------- 619 | reserved name = 620 | lexeme $ try $ 621 | do{ _ <- caseString name 622 | ; notFollowedBy (identLetter languageDef) ("end of " ++ show name) 623 | } 624 | 625 | caseString name 626 | | caseSensitive languageDef = string name 627 | | otherwise = do{ walk name; return name } 628 | where 629 | walk [] = return () 630 | walk (c:cs) = do{ _ <- caseChar c msg; walk cs } 631 | 632 | caseChar c | isAlpha c = char (toLower c) <|> char (toUpper c) 633 | | otherwise = char c 634 | 635 | msg = show name 636 | 637 | 638 | identifier = 639 | lexeme $ try $ 640 | do{ name <- ident 641 | ; if (isReservedName name) 642 | then unexpected ("reserved word " ++ show name) 643 | else return name 644 | } 645 | 646 | 647 | ident 648 | = do{ c <- identStart languageDef 649 | ; cs <- many (identLetter languageDef) 650 | ; return (c:cs) 651 | } 652 | "identifier" 653 | 654 | isReservedName name 655 | = isReserved theReservedNames caseName 656 | where 657 | caseName | caseSensitive languageDef = name 658 | | otherwise = map toLower name 659 | 660 | 661 | isReserved names name 662 | = scan names 663 | where 664 | scan [] = False 665 | scan (r:rs) = case (compare r name) of 666 | LT -> scan rs 667 | EQ -> True 668 | GT -> False 669 | 670 | theReservedNames 671 | | caseSensitive languageDef = sort reserved 672 | | otherwise = sort . map (map toLower) $ reserved 673 | where 674 | reserved = reservedNames languageDef 675 | 676 | 677 | 678 | ----------------------------------------------------------- 679 | -- White space & symbols 680 | ----------------------------------------------------------- 681 | symbol name 682 | = lexeme (string name) 683 | 684 | lexeme p 685 | = do{ x <- p; whiteSpace; return x } 686 | 687 | 688 | --whiteSpace 689 | whiteSpace 690 | | noLine && noMulti = skipMany (simpleSpace "") 691 | | noLine = skipMany (simpleSpace <|> multiLineComment "") 692 | | noMulti = skipMany (simpleSpace <|> oneLineComment "") 693 | | otherwise = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment "") 694 | where 695 | noLine = null (commentLine languageDef) 696 | noMulti = null (commentStart languageDef) 697 | 698 | 699 | simpleSpace = 700 | skipMany1 (satisfy isSpace) 701 | 702 | oneLineComment = 703 | do{ _ <- try (string (commentLine languageDef)) 704 | ; skipMany (satisfy (/= '\n')) 705 | ; return () 706 | } 707 | 708 | multiLineComment = 709 | do { _ <- try (string (commentStart languageDef)) 710 | ; inComment 711 | } 712 | 713 | inComment 714 | | nestedComments languageDef = inCommentMulti 715 | | otherwise = inCommentSingle 716 | 717 | inCommentMulti 718 | = do{ _ <- try (string (commentEnd languageDef)) ; return () } 719 | <|> do{ multiLineComment ; inCommentMulti } 720 | <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } 721 | <|> do{ _ <- oneOf startEnd ; inCommentMulti } 722 | "end of comment" 723 | where 724 | startEnd = nub (commentEnd languageDef ++ commentStart languageDef) 725 | 726 | inCommentSingle 727 | = do{ _ <- try (string (commentEnd languageDef)); return () } 728 | <|> do{ skipMany1 (noneOf startEnd) ; inCommentSingle } 729 | <|> do{ _ <- oneOf startEnd ; inCommentSingle } 730 | "end of comment" 731 | where 732 | startEnd = nub (commentEnd languageDef ++ commentStart languageDef) 733 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec 18 | ( -- complete modules 19 | module Text.ParserCombinators.Parsec.Prim 20 | , module Text.ParserCombinators.Parsec.Combinator 21 | , module Text.ParserCombinators.Parsec.Char 22 | 23 | -- module Text.ParserCombinators.Parsec.Error 24 | , ParseError 25 | , errorPos 26 | 27 | -- module Text.ParserCombinators.Parsec.Pos 28 | , SourcePos 29 | , SourceName, Line, Column 30 | , sourceName, sourceLine, sourceColumn 31 | , incSourceLine, incSourceColumn 32 | , setSourceLine, setSourceColumn, setSourceName 33 | 34 | ) where 35 | 36 | import Text.Parsec.String() 37 | 38 | import Text.ParserCombinators.Parsec.Prim 39 | import Text.ParserCombinators.Parsec.Combinator 40 | import Text.ParserCombinators.Parsec.Char 41 | 42 | import Text.ParserCombinators.Parsec.Error 43 | import Text.ParserCombinators.Parsec.Pos 44 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Char.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Char 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Char 18 | ( CharParser, 19 | spaces, 20 | space, 21 | newline, 22 | tab, 23 | upper, 24 | lower, 25 | alphaNum, 26 | letter, 27 | digit, 28 | hexDigit, 29 | octDigit, 30 | char, 31 | string, 32 | anyChar, 33 | oneOf, 34 | noneOf, 35 | satisfy 36 | ) where 37 | 38 | 39 | import Text.Parsec.Char 40 | import Text.Parsec.String 41 | 42 | type CharParser st = GenParser Char st 43 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Combinator.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Combinator 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Combinator 18 | ( choice, 19 | count, 20 | between, 21 | option, 22 | optionMaybe, 23 | optional, 24 | skipMany1, 25 | many1, 26 | sepBy, 27 | sepBy1, 28 | endBy, 29 | endBy1, 30 | sepEndBy, 31 | sepEndBy1, 32 | chainl, 33 | chainl1, 34 | chainr, 35 | chainr1, 36 | eof, 37 | notFollowedBy, 38 | manyTill, 39 | lookAhead, 40 | anyToken 41 | ) where 42 | 43 | 44 | import Text.Parsec.Combinator 45 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Error.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Error 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Error 18 | ( Message (SysUnExpect,UnExpect,Expect,Message), 19 | messageString, 20 | messageCompare, 21 | messageEq, 22 | ParseError, 23 | errorPos, 24 | errorMessages, 25 | errorIsUnknown, 26 | showErrorMessages, 27 | newErrorMessage, 28 | newErrorUnknown, 29 | addErrorMessage, 30 | setErrorPos, 31 | setErrorMessage, 32 | mergeError 33 | ) where 34 | 35 | import Text.Parsec.Error 36 | 37 | 38 | messageCompare :: Message -> Message -> Ordering 39 | messageCompare = compare 40 | 41 | messageEq :: Message -> Message -> Bool 42 | messageEq = (==) 43 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Expr.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Expr 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Expr 18 | ( Assoc (AssocNone,AssocLeft,AssocRight), 19 | Operator(..), 20 | OperatorTable, 21 | buildExpressionParser 22 | ) where 23 | 24 | import Text.Parsec.Expr(Assoc(..)) 25 | import qualified Text.Parsec.Expr as N 26 | import Text.ParserCombinators.Parsec(GenParser) 27 | 28 | import Control.Monad.Identity (Identity) 29 | 30 | data Operator tok st a = Infix (GenParser tok st (a -> a -> a)) Assoc 31 | | Prefix (GenParser tok st (a -> a)) 32 | | Postfix (GenParser tok st (a -> a)) 33 | 34 | type OperatorTable tok st a = [[Operator tok st a]] 35 | 36 | convert :: Operator tok st a -> N.Operator [tok] st Identity a 37 | convert (Infix p a) = N.Infix p a 38 | convert (Prefix p) = N.Prefix p 39 | convert (Postfix p) = N.Postfix p 40 | 41 | buildExpressionParser :: OperatorTable tok st a 42 | -> GenParser tok st a 43 | -> GenParser tok st a 44 | buildExpressionParser = N.buildExpressionParser . map (map convert) 45 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Language.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Language 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Language 18 | ( haskellDef, 19 | haskell, 20 | mondrianDef, 21 | mondrian, 22 | emptyDef, 23 | haskellStyle, 24 | javaStyle, 25 | LanguageDef, 26 | GenLanguageDef(..), 27 | ) where 28 | 29 | import Text.Parsec.Token 30 | import Text.Parsec.Language 31 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Perm.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Perm 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Perm 18 | ( PermParser, 19 | permute, 20 | (<||>), 21 | (<$$>), 22 | (<|?>), 23 | (<$?>) 24 | ) where 25 | 26 | import Text.Parsec.Perm 27 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Pos.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Pos 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Pos 18 | ( SourceName, 19 | Line, 20 | Column, 21 | SourcePos, 22 | sourceLine, 23 | sourceColumn, 24 | sourceName, 25 | incSourceLine, 26 | incSourceColumn, 27 | setSourceLine, 28 | setSourceColumn, 29 | setSourceName, 30 | newPos, 31 | initialPos, 32 | updatePosChar, 33 | updatePosString 34 | ) where 35 | 36 | 37 | import Text.Parsec.Pos 38 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Prim.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Prim 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Prim 18 | ( (), 19 | (<|>), 20 | Parser, 21 | GenParser, 22 | runParser, 23 | parse, 24 | parseFromFile, 25 | parseTest, 26 | token, 27 | tokens, 28 | tokenPrim, 29 | tokenPrimEx, 30 | try, 31 | label, 32 | labels, 33 | unexpected, 34 | pzero, 35 | many, 36 | skipMany, 37 | getState, 38 | setState, 39 | updateState, 40 | getPosition, 41 | setPosition, 42 | getInput, 43 | setInput, 44 | State(..), 45 | getParserState, 46 | setParserState 47 | ) where 48 | 49 | import Text.Parsec.Prim hiding (runParser, try) 50 | import qualified Text.Parsec.Prim as N -- 'N' for 'New' 51 | import Text.Parsec.String 52 | 53 | import Text.Parsec.Error 54 | import Text.Parsec.Pos 55 | 56 | pzero :: GenParser tok st a 57 | pzero = parserZero 58 | 59 | runParser :: GenParser tok st a 60 | -> st 61 | -> SourceName 62 | -> [tok] 63 | -> Either ParseError a 64 | runParser = N.runParser 65 | 66 | try :: GenParser tok st a -> GenParser tok st a 67 | try = N.try 68 | -------------------------------------------------------------------------------- /src/Text/ParserCombinators/Parsec/Token.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.ParserCombinators.Parsec.Token 6 | -- Copyright : (c) Paolo Martini 2007 7 | -- License : BSD-style (see the LICENSE file) 8 | -- 9 | -- Maintainer : derek.a.elkins@gmail.com 10 | -- Stability : provisional 11 | -- Portability : portable 12 | -- 13 | -- Parsec compatibility module 14 | -- 15 | ----------------------------------------------------------------------------- 16 | 17 | module Text.ParserCombinators.Parsec.Token 18 | ( LanguageDef, 19 | GenLanguageDef(..), 20 | TokenParser, 21 | GenTokenParser(..), 22 | makeTokenParser 23 | ) where 24 | 25 | import Text.Parsec.Token 26 | -------------------------------------------------------------------------------- /test/Bugs.hs: -------------------------------------------------------------------------------- 1 | 2 | module Bugs 3 | ( bugs 4 | ) where 5 | 6 | import Test.Tasty 7 | 8 | import qualified Bugs.Bug2 9 | import qualified Bugs.Bug6 10 | import qualified Bugs.Bug9 11 | import qualified Bugs.Bug35 12 | 13 | bugs :: [TestTree] 14 | bugs = [ Bugs.Bug2.main 15 | , Bugs.Bug6.main 16 | , Bugs.Bug9.main 17 | , Bugs.Bug35.main 18 | ] 19 | -------------------------------------------------------------------------------- /test/Bugs/Bug2.hs: -------------------------------------------------------------------------------- 1 | 2 | module Bugs.Bug2 3 | ( main 4 | ) where 5 | 6 | import Test.Tasty 7 | import Test.Tasty.HUnit 8 | 9 | import Text.Parsec 10 | import Text.Parsec.String 11 | import qualified Text.Parsec.Token as P 12 | import Text.Parsec.Language (haskellDef) 13 | 14 | main :: TestTree 15 | main = 16 | testCase "Control Char Parsing (#2)" $ 17 | parseString "\"test\\^Bstring\"" @?= "test\^Bstring" 18 | 19 | where 20 | parseString :: String -> String 21 | parseString input = 22 | case parse parser "Example" input of 23 | Left{} -> error "Parse failure" 24 | Right str -> str 25 | 26 | parser :: Parser String 27 | parser = P.stringLiteral $ P.makeTokenParser haskellDef 28 | -------------------------------------------------------------------------------- /test/Bugs/Bug35.hs: -------------------------------------------------------------------------------- 1 | 2 | module Bugs.Bug35 (main) where 3 | 4 | import Text.Parsec 5 | import Text.Parsec.Language 6 | import Text.Parsec.String 7 | import qualified Text.Parsec.Token as Token 8 | 9 | import Test.Tasty 10 | import Test.Tasty.HUnit 11 | 12 | trickyFloats :: [String] 13 | trickyFloats = 14 | [ "1.5339794352098402e-118" 15 | , "2.108934760892056e-59" 16 | , "2.250634744599241e-19" 17 | , "5.0e-324" 18 | , "5.960464477539063e-8" 19 | , "0.25996181067141905" 20 | , "0.3572019862807257" 21 | , "0.46817723004874223" 22 | , "0.9640035681058178" 23 | , "4.23808622486133" 24 | , "4.540362294799751" 25 | , "5.212384849884261" 26 | , "13.958257048123212" 27 | , "32.96176575630599" 28 | , "38.47735512322269" 29 | ] 30 | 31 | float :: Parser Double 32 | float = Token.float (Token.makeTokenParser emptyDef) 33 | 34 | testBatch :: Assertion 35 | testBatch = mapM_ testFloat trickyFloats 36 | where testFloat x = parse float "" x @?= Right (read x :: Double) 37 | 38 | main :: TestTree 39 | main = testCase "Quality of output of Text.Parsec.Token.float (#35)" testBatch 40 | -------------------------------------------------------------------------------- /test/Bugs/Bug6.hs: -------------------------------------------------------------------------------- 1 | 2 | module Bugs.Bug6 3 | ( main 4 | ) where 5 | 6 | import Test.Tasty 7 | import Test.Tasty.HUnit 8 | 9 | import Text.Parsec 10 | import Text.Parsec.String 11 | 12 | import Util 13 | 14 | main :: TestTree 15 | main = 16 | testCase "Look-ahead preserving error location (#6)" $ 17 | parseErrors variable "return" @?= ["'return' is a reserved keyword"] 18 | 19 | variable :: Parser String 20 | variable = do 21 | x <- lookAhead (many1 letter) 22 | if x == "return" 23 | then fail "'return' is a reserved keyword" 24 | else string x 25 | -------------------------------------------------------------------------------- /test/Bugs/Bug9.hs: -------------------------------------------------------------------------------- 1 | module Bugs.Bug9 ( main ) where 2 | 3 | import Control.Applicative ((<$), (<$>), (<*)) 4 | import Text.Parsec 5 | import Text.Parsec.Expr 6 | import Text.Parsec.Language (haskellStyle) 7 | import Text.Parsec.String (Parser) 8 | import qualified Text.Parsec.Token as P 9 | 10 | import Test.Tasty 11 | import Test.Tasty.HUnit 12 | 13 | import Util 14 | 15 | data Expr = Const Integer | Op Expr Expr 16 | deriving Show 17 | 18 | main :: TestTree 19 | main = 20 | testCase "Tracing of current position in error message (#9)" 21 | $ result @?= ["unexpected '>'","expecting operator or end of input"] 22 | 23 | where 24 | result :: [String] 25 | result = parseErrors parseTopLevel "4 >> 5" 26 | 27 | -- Syntax analysis 28 | 29 | parseTopLevel :: Parser Expr 30 | parseTopLevel = parseExpr <* eof 31 | 32 | parseExpr :: Parser Expr 33 | parseExpr = buildExpressionParser table (Const <$> integer) 34 | where 35 | table = [[ Infix (Op <$ reserved ">>>") AssocLeft ]] 36 | 37 | -- Lexical analysis 38 | 39 | lexer = P.makeTokenParser haskellStyle { P.reservedOpNames = [">>>"] } 40 | 41 | integer = P.integer lexer 42 | reserved = P.reserved lexer 43 | _reservedOp = P.reservedOp lexer 44 | 45 | -------------------------------------------------------------------------------- /test/Features.hs: -------------------------------------------------------------------------------- 1 | module Features 2 | ( features 3 | ) where 4 | 5 | import Test.Tasty 6 | 7 | import qualified Features.Feature80 8 | import qualified Features.Feature150 9 | 10 | features :: [TestTree] 11 | features = 12 | [ Features.Feature80.main 13 | , Features.Feature150.main 14 | ] 15 | -------------------------------------------------------------------------------- /test/Features/Feature150.hs: -------------------------------------------------------------------------------- 1 | module Features.Feature150 ( main ) where 2 | 3 | import Control.Applicative ((*>)) 4 | import Control.Monad.Identity 5 | import Test.Tasty 6 | import Test.Tasty.HUnit 7 | 8 | import Text.Parsec 9 | 10 | main :: TestTree 11 | main = 12 | testCase "string' (#150)" $ do 13 | parseString (boot <|> bool) "boot" @?= "boot" 14 | parseFail (boot <|> bool) "bool" @?= "no parse" 15 | parseFail (boot <|> bool) "booz" @?= "no parse" 16 | 17 | parseString (boot' <|> bool') "boot" @?= "boot" 18 | parseString (boot' <|> bool') "bool" @?= "bool" 19 | parseFail (boot' <|> bool') "booz" @?= "no parse" 20 | 21 | parseString (boot' <|> bool' <|> char 'b' *> many anyChar) "boomerang" @?= "oomerang" 22 | 23 | where 24 | boot :: ParsecT String () Identity String 25 | boot = string "boot" 26 | 27 | bool :: ParsecT String () Identity String 28 | bool = string "bool" 29 | 30 | boot' :: ParsecT String () Identity String 31 | boot' = string' "boot" 32 | 33 | bool' :: ParsecT String () Identity String 34 | bool' = string' "bool" 35 | 36 | parseString :: ParsecT String () Identity String -> String -> String 37 | parseString p input = 38 | case parse p "Example" input of 39 | Left{} -> error "Parse failure" 40 | Right str -> str 41 | 42 | parseFail :: ParsecT String () Identity String -> String -> String 43 | parseFail p input = 44 | case parse p "Example" input of 45 | Left{} -> "no parse" 46 | Right _ -> error "Parsed but shouldn't" 47 | -------------------------------------------------------------------------------- /test/Features/Feature80.hs: -------------------------------------------------------------------------------- 1 | module Features.Feature80 ( main ) where 2 | 3 | import Control.Applicative (pure) 4 | import Control.Monad.Identity 5 | import Data.List.NonEmpty 6 | import Data.Semigroup 7 | import Test.Tasty 8 | import Test.Tasty.HUnit 9 | 10 | import Text.Parsec 11 | 12 | main :: TestTree 13 | main = 14 | testCase "Monoid instance (#80)" $ do 15 | parseString (as <> bs) "aabbb" @?= "aabbb" 16 | parseString (mempty <> as) "aabbb" @?= "aa" 17 | parseString (as <> mempty) "aabbb" @?= "aa" 18 | parseString (sconcat $ fromList [as, mempty, bs]) "aabbb" @?= "aabbb" 19 | parseString (mconcat [as, mempty, bs]) "aabbb" @?= "aabbb" 20 | parseString (mempty :: ParsecT String () Identity String) "aabbb" @?= "" 21 | parseString (stimes (2::Int) str_a) "aabbb" @?= "aa" 22 | parseFail (stimes (3::Int) str_a) "aabbb" @?= "no parse" 23 | parseString ((one ch_a) <> (one ch_a) <> bs) "aabbb" @?= "aabbb" 24 | 25 | where 26 | one = fmap pure 27 | 28 | as :: ParsecT String () Identity String 29 | as = many $ char 'a' 30 | bs :: ParsecT String () Identity String 31 | bs = many $ char 'b' 32 | ch_a :: ParsecT String () Identity Char 33 | ch_a = char 'a' 34 | str_a :: ParsecT String () Identity String 35 | str_a = string "a" 36 | 37 | parseString :: ParsecT String () Identity String -> String -> String 38 | parseString p input = 39 | case parse p "Example" input of 40 | Left{} -> error "Parse failure" 41 | Right str -> str 42 | 43 | parseFail :: ParsecT String () Identity String -> String -> String 44 | parseFail p input = 45 | case parse p "Example" input of 46 | Left{} -> "no parse" 47 | Right _ -> error "Parsed but shouldn't" 48 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | 2 | import Test.Tasty 3 | 4 | import Bugs ( bugs ) 5 | import Features ( features ) 6 | 7 | main :: IO () 8 | main = do 9 | defaultMain $ testGroup "All" 10 | [ testGroup "Bugs" bugs 11 | , testGroup "Features" features 12 | ] 13 | -------------------------------------------------------------------------------- /test/Util.hs: -------------------------------------------------------------------------------- 1 | 2 | module Util where 3 | 4 | import Text.Parsec 5 | import Text.Parsec.String ( Parser ) 6 | 7 | -- | Returns the error messages associated 8 | -- with a failed parse. 9 | parseErrors :: Parser a -> String -> [String] 10 | parseErrors p input = 11 | case parse p "" input of 12 | Left err -> 13 | drop 1 $ lines $ show err 14 | Right{} -> [] 15 | -------------------------------------------------------------------------------- /test/issue127.hs: -------------------------------------------------------------------------------- 1 | -- this should run in constant memory 2 | module Main (main) where 3 | 4 | import Text.Parsec 5 | import System.Environment (getArgs) 6 | import Control.Monad (replicateM_) 7 | 8 | main :: IO () 9 | main = do 10 | n <- getArgs >>= \args -> return $ case args of 11 | arg : _ -> read arg 12 | _ -> 1000000 13 | 14 | print $ runParser (replicateM_ n $ return ()) () "test" "" 15 | -------------------------------------------------------------------------------- /test/issue171.hs: -------------------------------------------------------------------------------- 1 | -- this should be fast 2 | module Main (main) where 3 | 4 | import Control.DeepSeq (NFData (..)) 5 | import System.CPUTime (getCPUTime) 6 | import Text.Printf (printf) 7 | import Test.Tasty (defaultMain) 8 | import Test.Tasty.HUnit (testCaseSteps, assertBool) 9 | 10 | import Text.Parsec 11 | import Text.Parsec.String (Parser) 12 | 13 | main :: IO () 14 | main = defaultMain $ testCaseSteps "issue-171" $ \info -> do 15 | time0 <- getCPUTime 16 | check $ concat $ replicate 100000 "a " 17 | time1 <- getCPUTime 18 | let diff = (time1 - time0) `div` 1000000000 19 | info $ printf "%d milliseconds\n" diff 20 | assertBool "" (diff < 200) 21 | 22 | parser :: Parser [String] 23 | parser = many (char 'a' <|> char 'b') `sepBy` char ' ' 24 | 25 | check :: String -> IO () 26 | check s = putStrLn $ either onError (const "") $ parse parser {- important: pass input as SourceName -} s s 27 | 28 | onError :: ParseError -> String 29 | onError err = rnf (show err) `seq` "error" 30 | -------------------------------------------------------------------------------- /test/issue175.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Text.Parsec 4 | import Text.Parsec.Error 5 | import Text.Parsec.String (Parser) 6 | import Text.Parsec.Pos (newPos) 7 | 8 | import Test.Tasty (defaultMain) 9 | import Test.Tasty.HUnit (assertFailure, testCaseSteps, (@?=)) 10 | 11 | main :: IO () 12 | main = defaultMain $ testCaseSteps "issue175" $ \info -> do 13 | case parse p "" "x" of 14 | Right _ -> assertFailure "Unexpected success" 15 | -- with setPosition the "longest match" is arbitrary 16 | -- megaparsec tracks consumed tokens separately, but we don't. 17 | -- so our position is arbitrary. 18 | Left err -> do 19 | info $ show err 20 | errorPos err @?= newPos "aaa" 9 1 -- can be arbitrary 21 | length (errorMessages err) @?= 2 22 | 23 | p :: Parser Char 24 | p = p1 <|> p2 where 25 | p1 = setPosition (newPos "aaa" 9 1) >> char 'a' 26 | p2 = setPosition (newPos "zzz" 1 1) >> char 'b' 27 | --------------------------------------------------------------------------------