├── .github ├── dependabot.yml └── workflows │ └── ci.yaml ├── .gitignore ├── CHANGELOG.md ├── HACKING.md ├── LICENSE.md ├── README.md ├── Text ├── Megaparsec.hs └── Megaparsec │ ├── Byte.hs │ ├── Byte │ ├── Binary.hs │ └── Lexer.hs │ ├── Char.hs │ ├── Char │ └── Lexer.hs │ ├── Class.hs │ ├── Common.hs │ ├── Debug.hs │ ├── Error.hs │ ├── Error.hs-boot │ ├── Error │ └── Builder.hs │ ├── Internal.hs │ ├── Internal.hs-boot │ ├── Lexer.hs │ ├── Pos.hs │ ├── State.hs │ ├── Stream.hs │ └── Unicode.hs ├── bench ├── memory │ └── Main.hs └── speed │ └── Main.hs ├── cabal.project ├── flake.lock ├── flake.nix ├── megaparsec-tests ├── LICENSE.md ├── README.md ├── megaparsec-tests.cabal ├── src │ └── Test │ │ └── Hspec │ │ └── Megaparsec │ │ └── AdHoc.hs └── tests │ ├── Spec.hs │ └── Text │ ├── Megaparsec │ ├── Byte │ │ ├── BinarySpec.hs │ │ └── LexerSpec.hs │ ├── ByteSpec.hs │ ├── Char │ │ └── LexerSpec.hs │ ├── CharSpec.hs │ ├── DebugSpec.hs │ ├── ErrorSpec.hs │ ├── PosSpec.hs │ ├── StreamSpec.hs │ └── UnicodeSpec.hs │ └── MegaparsecSpec.hs ├── megaparsec.cabal ├── parsers-bench ├── ParsersBench │ ├── CSV │ │ ├── Attoparsec.hs │ │ └── Megaparsec.hs │ ├── Json │ │ ├── Attoparsec.hs │ │ ├── Common.hs │ │ └── Megaparsec.hs │ └── Log │ │ ├── Attoparsec.hs │ │ ├── Common.hs │ │ └── Megaparsec.hs ├── README.md ├── bench │ ├── memory │ │ └── Main.hs │ └── speed │ │ └── Main.hs ├── data │ ├── csv-10.csv │ ├── csv-20.csv │ ├── csv-40.csv │ ├── csv-5.csv │ ├── json-10.json │ ├── json-20.json │ ├── json-40.json │ ├── json-5.json │ ├── log-10.log │ ├── log-20.log │ ├── log-40.log │ └── log-5.log └── parsers-bench.cabal └── shell.nix /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | types: 8 | - opened 9 | - synchronize 10 | jobs: 11 | ormolu: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: haskell-actions/run-ormolu@v15 16 | build: 17 | runs-on: ubuntu-latest 18 | needs: ormolu 19 | strategy: 20 | matrix: 21 | cabal: ["3.12"] 22 | ghc: ["9.8.4", "9.10.1", "9.12.1"] 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: haskell-actions/setup@v2 26 | id: setup-haskell-cabal 27 | with: 28 | ghc-version: ${{ matrix.ghc }} 29 | cabal-version: ${{ matrix.cabal }} 30 | - run: cabal update 31 | - run: cabal freeze 32 | - uses: actions/cache@v4.0.0 33 | with: 34 | path: | 35 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 36 | dist-newstyle 37 | key: ${{ runner.os }}-${{ matrix.ghc }}-0-${{ hashFiles('cabal.project.freeze') }} 38 | restore-keys: | 39 | ${{ runner.os }}-${{ matrix.ghc }}-0- 40 | - run: cabal format 41 | - run: pushd megaparsec-tests && cabal format && popd 42 | - run: git diff --exit-code --color=always 43 | - run: cabal build all 44 | - run: cabal test all 45 | - run: cabal haddock megaparsec 46 | - run: cabal haddock megaparsec-tests 47 | - run: cabal sdist 48 | - run: pushd megaparsec-tests && cabal sdist && popd 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *# 2 | *.aux 3 | *.chi 4 | *.chs.h 5 | *.dyn_hi 6 | *.dyn_o 7 | *.eventlog 8 | *.hi 9 | *.hp 10 | *.o 11 | *.prof 12 | *~ 13 | .HTF/ 14 | .cabal-sandbox/ 15 | .ghc.environment.* 16 | .hpc 17 | .hsenv 18 | .stack-work/ 19 | .virtualenv 20 | .vscode 21 | TAGS 22 | benchmarks.tix 23 | cabal-dev 24 | cabal.config 25 | cabal.project.local 26 | cabal.sandbox.config 27 | dist-*/ 28 | dist/ 29 | hie.yaml 30 | stack.yaml 31 | stack.yaml.lock 32 | -------------------------------------------------------------------------------- /HACKING.md: -------------------------------------------------------------------------------- 1 | # Hacking 2 | 3 | * [Development with `ghcid`](#development-with-ghcid) 4 | * [Running unit tests](#running-unit-tests) 5 | * [Checking dependent packages](#checking-dependent-packages) 6 | * [Benchmarks](#benchmarks) 7 | * [Releasing a new version](#releasing-a-new-version) 8 | 9 | This document tries to describe everything you need to know to 10 | develop/maintain Megaparsec. 11 | 12 | ## Development with `ghcid` 13 | 14 | We use `nix` for development. First enter the Nix shell: 15 | 16 | ```console 17 | $ nix develop 18 | ``` 19 | 20 | Inside the shell you can: 21 | 22 | * Build the `megaparsec` and `megaparsec-tests` packages with `cabal build 23 | all`. 24 | 25 | * Run tests from the `megaparsec-tests` package with `cabal test all`. 26 | 27 | * Run `ghcid` for interactive feedback as you edit with `ghcid 28 | --command="cabal repl megaparsec"` or `ghcid --command="cabal repl 29 | megaparsec-tests --enable-tests"` depending on the package you're editing. 30 | 31 | ## Running unit tests 32 | 33 | The tests in `megaparsec-tests` are usually not enough to gain confidence in 34 | non-trivial changes. It is wise to use tests from other packages: 35 | 36 | ```console 37 | $ nix build .#all_base --no-link 38 | ``` 39 | 40 | The `base` group includes building and testing the following packages: 41 | 42 | * `megaparsec` 43 | * `hspec-megaparsec` 44 | * `megaparsec-tests` 45 | * `parser-combinators-tests` 46 | 47 | It is worth noting that individual derivations from `base` can be built like 48 | this: 49 | 50 | ```console 51 | $ nix build .#base/parser-combinators-tests --no-link 52 | ``` 53 | 54 | ## Checking dependent packages 55 | 56 | To find out how your changes affect a selected set of dependent packages do: 57 | 58 | ```console 59 | $ nix build .#all_deps --no-link 60 | ``` 61 | 62 | The “selected set” includes packages that are known to be high-quality, 63 | well-tested, and non-trivial, so they are good targets for this sort of 64 | testing. You can also try to build and test a particular package like this: 65 | 66 | ```console 67 | $ nix build .#deps/mmark --no-link 68 | ``` 69 | 70 | When you introduce a breaking change, some packages may stop compiling. 71 | Usually it's easy enough to create a patch and make it compile with the 72 | current dev version of Megaparsec. To do so, follow these steps: 73 | 74 | * Start by cloning the repo of the failing package. 75 | 76 | * Checkout commit that corresponds to the version our current `nixpkgs` 77 | packages use. 78 | 79 | * Attempt to compile the package with current dev version of Megaparsec to 80 | reproduce the build errors. Often, if the broken package uses stack you 81 | can just add the path to the updated Megaparsec directory to the 82 | `extra-deps` section. 83 | 84 | * Perform whatever changes that are necessary to make the package work. 85 | 86 | * Use `git diff > my-package.patch` or `git diff --cached > 87 | my-package.patch` to create the patch file. The first command will output 88 | unstaged changes, while the second command will write only staged changes. 89 | 90 | * Adjust `default.nix` to apply the patch. You want to edit the `deps` 91 | attribute set. For example: 92 | 93 | ```nix 94 | # Dependent packages of interest: 95 | deps = { 96 | # ... 97 | idris = patch haskellPackages.idris ./nix/patches/idris.patch; 98 | }; 99 | ``` 100 | 101 | ## Benchmarks 102 | 103 | To build all benchmarks run: 104 | 105 | ```console 106 | $ nix build .#all_benches 107 | ``` 108 | 109 | This will create several symlinks in `result`. It is also possible to build 110 | benchmarks for just a specific package: 111 | 112 | ```console 113 | $ nix build .#benches/megaparsec # builds megaparsec's microbenchmarks 114 | ``` 115 | 116 | `cd` to the `bench` sub-directory and run benchmarks from there because some 117 | benchmarks need data to run on and the paths are relative, so it'll fail if 118 | run from the root of Megaparsec's repo. 119 | 120 | ## Releasing a new version 121 | 122 | To release a new version of Megaparsec, follow these steps: 123 | 124 | * Bump version for both `megaparsec` and `megaparsec-tests`. In the 125 | `megaparsec.cabal` file update the version of the package. In the 126 | `megaparsec-tests.cabal` file update the version of the package (we keep 127 | it in sync with version of `megaparsec`) as well as the versions of 128 | `megaparsec` it depends on. 129 | 130 | * Create git tag and push it to the repo. 131 | 132 | * Generate distribution tarballs by running: 133 | 134 | ```console 135 | $ nix build .#all_dist 136 | ``` 137 | 138 | This will create `result` with two symlinks to directories containing the 139 | tarballs. Typically they are called `result/megaparsec-source-*` and 140 | `result/megaparsec-tests-source-*`. 141 | 142 | * To upload the tarballs to Hackage, execute the following: 143 | 144 | ```console 145 | $ cabal upload --publish result/megaparsec-source-*/megaparsec-*.tar.gz 146 | $ cabal upload --publish result/megaparsec-tests-source-*/megaparsec-tests-*.tar.gz 147 | ``` 148 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2015–present Megaparsec contributors\ 2 | Copyright © 2007 Paolo Martini\ 3 | Copyright © 1999–2000 Daan Leijen 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS 18 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 20 | NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 23 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /Text/Megaparsec/Byte.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE TypeFamilies #-} 4 | {-# LANGUAGE TypeOperators #-} 5 | 6 | -- | 7 | -- Module : Text.Megaparsec.Byte 8 | -- Copyright : © 2015–present Megaparsec contributors 9 | -- License : FreeBSD 10 | -- 11 | -- Maintainer : Mark Karpov 12 | -- Stability : experimental 13 | -- Portability : portable 14 | -- 15 | -- Commonly used binary parsers. 16 | -- 17 | -- @since 6.0.0 18 | module Text.Megaparsec.Byte 19 | ( -- * Simple parsers 20 | newline, 21 | crlf, 22 | eol, 23 | tab, 24 | space, 25 | hspace, 26 | space1, 27 | hspace1, 28 | 29 | -- * Categories of characters 30 | controlChar, 31 | spaceChar, 32 | upperChar, 33 | lowerChar, 34 | letterChar, 35 | alphaNumChar, 36 | printChar, 37 | digitChar, 38 | binDigitChar, 39 | octDigitChar, 40 | hexDigitChar, 41 | asciiChar, 42 | 43 | -- * Single byte 44 | char, 45 | char', 46 | 47 | -- * Sequence of bytes 48 | string, 49 | string', 50 | ) 51 | where 52 | 53 | import Control.Applicative 54 | import Data.Char hiding (isSpace, toLower, toUpper) 55 | import Data.Functor (void) 56 | import Data.Proxy 57 | import Data.Word (Word8) 58 | import Text.Megaparsec 59 | import Text.Megaparsec.Common 60 | 61 | ---------------------------------------------------------------------------- 62 | -- Simple parsers 63 | 64 | -- | Parse a newline byte. 65 | newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 66 | newline = char 10 67 | {-# INLINE newline #-} 68 | 69 | -- | Parse a carriage return character followed by a newline character. 70 | -- Return the sequence of characters parsed. 71 | crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s) 72 | crlf = string (tokensToChunk (Proxy :: Proxy s) [13, 10]) 73 | {-# INLINE crlf #-} 74 | 75 | -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the 76 | -- sequence of characters parsed. 77 | eol :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s) 78 | eol = 79 | (tokenToChunk (Proxy :: Proxy s) <$> newline) 80 | <|> crlf 81 | "end of line" 82 | {-# INLINE eol #-} 83 | 84 | -- | Parse a tab character. 85 | tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 86 | tab = char 9 87 | {-# INLINE tab #-} 88 | 89 | -- | Skip /zero/ or more white space characters. 90 | -- 91 | -- See also: 'skipMany' and 'spaceChar'. 92 | space :: (MonadParsec e s m, Token s ~ Word8) => m () 93 | space = void $ takeWhileP (Just "white space") isSpace 94 | {-# INLINE space #-} 95 | 96 | -- | Like 'space', but does not accept newlines and carriage returns. 97 | -- 98 | -- @since 9.0.0 99 | hspace :: (MonadParsec e s m, Token s ~ Word8) => m () 100 | hspace = void $ takeWhileP (Just "white space") isHSpace 101 | {-# INLINE hspace #-} 102 | 103 | -- | Skip /one/ or more white space characters. 104 | -- 105 | -- See also: 'skipSome' and 'spaceChar'. 106 | space1 :: (MonadParsec e s m, Token s ~ Word8) => m () 107 | space1 = void $ takeWhile1P (Just "white space") isSpace 108 | {-# INLINE space1 #-} 109 | 110 | -- | Like 'space1', but does not accept newlines and carriage returns. 111 | -- 112 | -- @since 9.0.0 113 | hspace1 :: (MonadParsec e s m, Token s ~ Word8) => m () 114 | hspace1 = void $ takeWhile1P (Just "white space") isHSpace 115 | {-# INLINE hspace1 #-} 116 | 117 | ---------------------------------------------------------------------------- 118 | -- Categories of characters 119 | 120 | -- | Parse a control character. 121 | controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 122 | controlChar = satisfy (isControl . toChar) "control character" 123 | {-# INLINE controlChar #-} 124 | 125 | -- | Parse a space character, and the control characters: tab, newline, 126 | -- carriage return, form feed, and vertical tab. 127 | spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 128 | spaceChar = satisfy isSpace "white space" 129 | {-# INLINE spaceChar #-} 130 | 131 | -- | Parse an upper-case character. 132 | upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 133 | upperChar = satisfy (isUpper . toChar) "uppercase letter" 134 | {-# INLINE upperChar #-} 135 | 136 | -- | Parse a lower-case alphabetic character. 137 | lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 138 | lowerChar = satisfy (isLower . toChar) "lowercase letter" 139 | {-# INLINE lowerChar #-} 140 | 141 | -- | Parse an alphabetic character: lower-case or upper-case. 142 | letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 143 | letterChar = satisfy (isLetter . toChar) "letter" 144 | {-# INLINE letterChar #-} 145 | 146 | -- | Parse an alphabetic or digit characters. 147 | alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 148 | alphaNumChar = satisfy (isAlphaNum . toChar) "alphanumeric character" 149 | {-# INLINE alphaNumChar #-} 150 | 151 | -- | Parse a printable character: letter, number, mark, punctuation, symbol 152 | -- or space. 153 | printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 154 | printChar = satisfy (isPrint . toChar) "printable character" 155 | {-# INLINE printChar #-} 156 | 157 | -- | Parse an ASCII digit, i.e between “0” and “9”. 158 | digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 159 | digitChar = satisfy isDigit' "digit" 160 | where 161 | isDigit' x = x >= 48 && x <= 57 162 | {-# INLINE digitChar #-} 163 | 164 | -- | Parse a binary digit, i.e. “0” or “1”. 165 | -- 166 | -- @since 7.0.0 167 | binDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 168 | binDigitChar = satisfy isBinDigit "binary digit" 169 | where 170 | isBinDigit x = x == 48 || x == 49 171 | {-# INLINE binDigitChar #-} 172 | 173 | -- | Parse an octal digit, i.e. between “0” and “7”. 174 | octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 175 | octDigitChar = satisfy isOctDigit' "octal digit" 176 | where 177 | isOctDigit' x = x >= 48 && x <= 55 178 | {-# INLINE octDigitChar #-} 179 | 180 | -- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or 181 | -- “A” and “F”. 182 | hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 183 | hexDigitChar = satisfy (isHexDigit . toChar) "hexadecimal digit" 184 | {-# INLINE hexDigitChar #-} 185 | 186 | -- | Parse a character from the first 128 characters of the Unicode 187 | -- character set, corresponding to the ASCII character set. 188 | asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) 189 | asciiChar = satisfy (< 128) "ASCII character" 190 | {-# INLINE asciiChar #-} 191 | 192 | ---------------------------------------------------------------------------- 193 | -- Single byte 194 | 195 | -- | A type-constrained version of 'single'. 196 | -- 197 | -- > newline = char 10 198 | char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s) 199 | char = single 200 | {-# INLINE char #-} 201 | 202 | -- | The same as 'char' but case-insensitive. This parser returns the 203 | -- actually parsed character preserving its case. 204 | -- 205 | -- >>> parseTest (char' 101) "E" 206 | -- 69 -- 'E' 207 | -- >>> parseTest (char' 101) "G" 208 | -- 1:1: 209 | -- unexpected 'G' 210 | -- expecting 'E' or 'e' 211 | char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s) 212 | char' c = 213 | choice 214 | [ char (toLower c), 215 | char (toUpper c) 216 | ] 217 | {-# INLINE char' #-} 218 | 219 | ---------------------------------------------------------------------------- 220 | -- Helpers 221 | 222 | -- | 'Word8'-specialized version of 'Data.Char.isSpace'. 223 | isSpace :: Word8 -> Bool 224 | isSpace x 225 | | x >= 9 && x <= 13 = True 226 | | x == 32 = True 227 | | x == 160 = True 228 | | otherwise = False 229 | {-# INLINE isSpace #-} 230 | 231 | -- | Like 'isSpace', but does not accept newlines and carriage returns. 232 | isHSpace :: Word8 -> Bool 233 | isHSpace x 234 | | x == 9 = True 235 | | x == 11 = True 236 | | x == 12 = True 237 | | x == 32 = True 238 | | x == 160 = True 239 | | otherwise = False 240 | {-# INLINE isHSpace #-} 241 | 242 | -- | Convert a byte to char. 243 | toChar :: Word8 -> Char 244 | toChar = chr . fromIntegral 245 | {-# INLINE toChar #-} 246 | 247 | -- | Convert a byte to its upper-case version. 248 | toUpper :: Word8 -> Word8 249 | toUpper x 250 | | x >= 97 && x <= 122 = x - 32 251 | | x == 247 = x -- division sign 252 | | x == 255 = x -- latin small letter y with diaeresis 253 | | x >= 224 = x - 32 254 | | otherwise = x 255 | {-# INLINE toUpper #-} 256 | 257 | -- | Convert a byte to its lower-case version. 258 | toLower :: Word8 -> Word8 259 | toLower x 260 | | x >= 65 && x <= 90 = x + 32 261 | | x == 215 = x -- multiplication sign 262 | | x >= 192 && x <= 222 = x + 32 263 | | otherwise = x 264 | {-# INLINE toLower #-} 265 | -------------------------------------------------------------------------------- /Text/Megaparsec/Byte/Binary.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE AllowAmbiguousTypes #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE Safe #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE TypeApplications #-} 6 | {-# LANGUAGE TypeFamilies #-} 7 | 8 | -- | 9 | -- Module : Text.Megaparsec.Byte.Binary 10 | -- Copyright : © 2021–present Megaparsec contributors 11 | -- License : FreeBSD 12 | -- 13 | -- Maintainer : Mark Karpov 14 | -- Stability : experimental 15 | -- Portability : portable 16 | -- 17 | -- Binary-format number parsers. 18 | -- 19 | -- @since 9.2.0 20 | module Text.Megaparsec.Byte.Binary 21 | ( -- * Generic parsers 22 | BinaryChunk (..), 23 | anyLE, 24 | anyBE, 25 | 26 | -- * Parsing unsigned values 27 | word8, 28 | word16le, 29 | word16be, 30 | word32le, 31 | word32be, 32 | word64le, 33 | word64be, 34 | 35 | -- * Parsing signed values 36 | int8, 37 | int16le, 38 | int16be, 39 | int32le, 40 | int32be, 41 | int64le, 42 | int64be, 43 | ) 44 | where 45 | 46 | import Data.Bits 47 | import qualified Data.ByteString as B 48 | import qualified Data.ByteString.Lazy as BL 49 | import Data.Int 50 | import Data.Word 51 | import Text.Megaparsec 52 | 53 | -- | Data types that can be converted to little- or big- endian numbers. 54 | class BinaryChunk chunk where 55 | convertChunkBE :: (Bits a, Num a) => chunk -> a 56 | convertChunkLE :: (Bits a, Num a) => chunk -> a 57 | 58 | instance BinaryChunk B.ByteString where 59 | convertChunkBE = B.foldl' go 0 60 | where 61 | go acc byte = (acc `unsafeShiftL` 8) .|. fromIntegral byte 62 | convertChunkLE = B.foldl' go 0 63 | where 64 | go acc byte = (acc .|. fromIntegral byte) `rotateR` 8 65 | 66 | instance BinaryChunk BL.ByteString where 67 | convertChunkBE = BL.foldl' go 0 68 | where 69 | go acc byte = (acc `unsafeShiftL` 8) .|. fromIntegral byte 70 | convertChunkLE = BL.foldl' go 0 71 | where 72 | go acc byte = (acc .|. fromIntegral byte) `rotateR` 8 73 | 74 | ---------------------------------------------------------------------------- 75 | -- Generic parsers 76 | 77 | -- | Parse a little-endian number. 78 | -- 79 | -- You may wish to call this with a visible type application: 80 | -- 81 | -- > number <- anyLE (Just "little-endian 32 bit word") @Word32 82 | anyLE :: 83 | forall a e s m. 84 | (MonadParsec e s m, FiniteBits a, Num a, BinaryChunk (Tokens s)) => 85 | -- | Label, if any 86 | Maybe String -> 87 | m a 88 | anyLE mlabel = convertChunkLE <$> takeP mlabel (finiteByteSize @a) 89 | {-# INLINE anyLE #-} 90 | 91 | -- | Parse a big-endian number. 92 | -- 93 | -- You may wish to call this with a visible type application: 94 | -- 95 | -- > number <- anyBE (Just "big-endian 32 bit word") @Word32 96 | anyBE :: 97 | forall a e s m. 98 | (MonadParsec e s m, FiniteBits a, Num a, BinaryChunk (Tokens s)) => 99 | -- | Label, if any 100 | Maybe String -> 101 | m a 102 | anyBE mlabel = convertChunkBE <$> takeP mlabel (finiteByteSize @a) 103 | {-# INLINE anyBE #-} 104 | 105 | -------------------------------------------------------------------------------- 106 | -- Parsing unsigned values 107 | 108 | -- | Parse a 'Word8'. 109 | word8 :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word8 110 | word8 = anyBE (Just "8 bit word") 111 | {-# INLINE word8 #-} 112 | 113 | -- | Parse a little-endian 'Word16'. 114 | word16le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word16 115 | word16le = anyLE (Just "little-endian 16 bit word") 116 | {-# INLINE word16le #-} 117 | 118 | -- | Parse a big-endian 'Word16'. 119 | word16be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word16 120 | word16be = anyBE (Just "big-endian 16 bit word") 121 | {-# INLINE word16be #-} 122 | 123 | -- | Parse a little-endian 'Word32'. 124 | word32le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word32 125 | word32le = anyLE (Just "little-endian 32 bit word") 126 | {-# INLINE word32le #-} 127 | 128 | -- | Parse a big-endian 'Word32'. 129 | word32be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word32 130 | word32be = anyBE (Just "big-endian 32 bit word") 131 | {-# INLINE word32be #-} 132 | 133 | -- | Parse a little-endian 'Word64'. 134 | word64le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word64 135 | word64le = anyLE (Just "little-endian 64 word") 136 | {-# INLINE word64le #-} 137 | 138 | -- | Parse a big-endian 'Word64'. 139 | word64be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word64 140 | word64be = anyBE (Just "big-endian 64 word") 141 | {-# INLINE word64be #-} 142 | 143 | ---------------------------------------------------------------------------- 144 | -- Parsing signed values 145 | 146 | -- | Parse a 'Int8'. 147 | int8 :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int8 148 | int8 = anyBE (Just "8 bit int") 149 | {-# INLINE int8 #-} 150 | 151 | -- | Parse a little-endian 'Int16'. 152 | int16le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int16 153 | int16le = anyLE (Just "little-endian 16 bit int") 154 | {-# INLINE int16le #-} 155 | 156 | -- | Parse a big-endian 'Int16'. 157 | int16be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int16 158 | int16be = anyBE (Just "big-endian 16 bit int") 159 | {-# INLINE int16be #-} 160 | 161 | -- | Parse a little-endian 'Int32'. 162 | int32le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int32 163 | int32le = anyLE (Just "little-endian 32 bit int") 164 | {-# INLINE int32le #-} 165 | 166 | -- | Parse a big-endian 'Int32'. 167 | int32be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int32 168 | int32be = anyBE (Just "big-endian 32 bit int") 169 | {-# INLINE int32be #-} 170 | 171 | -- | Parse a little-endian 'Int64'. 172 | int64le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int64 173 | int64le = anyLE (Just "little-endian 64 int") 174 | {-# INLINE int64le #-} 175 | 176 | -- | Parse a big-endian 'Int64'. 177 | int64be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int64 178 | int64be = anyBE (Just "big-endian 64 int") 179 | {-# INLINE int64be #-} 180 | 181 | -------------------------------------------------------------------------------- 182 | -- Helpers 183 | 184 | -- | Return the number of bytes in the argument. 185 | -- 186 | -- Performs ceiling division, so byte-unaligned types (bitsize not a 187 | -- multiple of 8) should work, but further usage is not tested. 188 | finiteByteSize :: forall a. (FiniteBits a) => Int 189 | finiteByteSize = finiteBitSize @a undefined `ceilDiv` 8 190 | where 191 | ceilDiv x y = (x + y - 1) `div` y 192 | {-# INLINE finiteByteSize #-} 193 | -------------------------------------------------------------------------------- /Text/Megaparsec/Byte/Lexer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE TypeFamilies #-} 4 | {-# LANGUAGE TypeOperators #-} 5 | 6 | -- | 7 | -- Module : Text.Megaparsec.Byte.Lexer 8 | -- Copyright : © 2015–present Megaparsec contributors 9 | -- License : FreeBSD 10 | -- 11 | -- Maintainer : Mark Karpov 12 | -- Stability : experimental 13 | -- Portability : portable 14 | -- 15 | -- Stripped-down version of "Text.Megaparsec.Char.Lexer" for streams of 16 | -- bytes. 17 | -- 18 | -- This module is intended to be imported qualified: 19 | -- 20 | -- > import qualified Text.Megaparsec.Byte.Lexer as L 21 | module Text.Megaparsec.Byte.Lexer 22 | ( -- * White space 23 | space, 24 | lexeme, 25 | symbol, 26 | symbol', 27 | skipLineComment, 28 | skipBlockComment, 29 | skipBlockCommentNested, 30 | 31 | -- * Numbers 32 | decimal, 33 | binary, 34 | octal, 35 | hexadecimal, 36 | scientific, 37 | float, 38 | signed, 39 | ) 40 | where 41 | 42 | import Control.Applicative 43 | import Data.Functor (void) 44 | import Data.List (foldl') 45 | import Data.Proxy 46 | import Data.Scientific (Scientific) 47 | import qualified Data.Scientific as Sci 48 | import Data.Word (Word8) 49 | import Text.Megaparsec 50 | import qualified Text.Megaparsec.Byte as B 51 | import Text.Megaparsec.Lexer 52 | 53 | ---------------------------------------------------------------------------- 54 | -- White space 55 | 56 | -- | Given a comment prefix this function returns a parser that skips line 57 | -- comments. Note that it stops just before the newline character but 58 | -- doesn't consume the newline. Newline is either supposed to be consumed by 59 | -- 'space' parser or picked up manually. 60 | skipLineComment :: 61 | (MonadParsec e s m, Token s ~ Word8) => 62 | -- | Line comment prefix 63 | Tokens s -> 64 | m () 65 | skipLineComment prefix = 66 | B.string prefix *> void (takeWhileP (Just "character") (/= 10)) 67 | {-# INLINEABLE skipLineComment #-} 68 | 69 | -- | @'skipBlockComment' start end@ skips non-nested block comment starting 70 | -- with @start@ and ending with @end@. 71 | skipBlockComment :: 72 | (MonadParsec e s m) => 73 | -- | Start of block comment 74 | Tokens s -> 75 | -- | End of block comment 76 | Tokens s -> 77 | m () 78 | skipBlockComment start end = p >> void (manyTill anySingle n) 79 | where 80 | p = B.string start 81 | n = B.string end 82 | {-# INLINEABLE skipBlockComment #-} 83 | 84 | -- | @'skipBlockCommentNested' start end@ skips possibly nested block 85 | -- comment starting with @start@ and ending with @end@. 86 | -- 87 | -- @since 5.0.0 88 | skipBlockCommentNested :: 89 | (MonadParsec e s m, Token s ~ Word8) => 90 | -- | Start of block comment 91 | Tokens s -> 92 | -- | End of block comment 93 | Tokens s -> 94 | m () 95 | skipBlockCommentNested start end = p >> void (manyTill e n) 96 | where 97 | e = skipBlockCommentNested start end <|> void anySingle 98 | p = B.string start 99 | n = B.string end 100 | {-# INLINEABLE skipBlockCommentNested #-} 101 | 102 | ---------------------------------------------------------------------------- 103 | -- Numbers 104 | 105 | -- | Parse an integer in the decimal representation according to the format 106 | -- of integer literals described in the Haskell report. 107 | -- 108 | -- If you need to parse signed integers, see the 'signed' combinator. 109 | -- 110 | -- __Warning__: this function does not perform range checks. 111 | decimal :: 112 | forall e s m a. 113 | (MonadParsec e s m, Token s ~ Word8, Num a) => 114 | m a 115 | decimal = decimal_ "integer" 116 | {-# INLINEABLE decimal #-} 117 | 118 | -- | A non-public helper to parse decimal integers. 119 | decimal_ :: 120 | forall e s m a. 121 | (MonadParsec e s m, Token s ~ Word8, Num a) => 122 | m a 123 | decimal_ = mkNum <$> takeWhile1P (Just "digit") isDigit 124 | where 125 | mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s) 126 | step a w = a * 10 + fromIntegral (w - 48) 127 | {-# INLINE decimal_ #-} 128 | 129 | -- | Parse an integer in the binary representation. The binary number is 130 | -- expected to be a non-empty sequence of zeroes “0” and ones “1”. 131 | -- 132 | -- You could of course parse some prefix before the actual number: 133 | -- 134 | -- > binary = char 48 >> char' 98 >> L.binary 135 | -- 136 | -- __Warning__: this function does not perform range checks. 137 | -- 138 | -- @since 7.0.0 139 | binary :: 140 | forall e s m a. 141 | (MonadParsec e s m, Token s ~ Word8, Num a) => 142 | m a 143 | binary = 144 | mkNum 145 | <$> takeWhile1P Nothing isBinDigit 146 | "binary integer" 147 | where 148 | mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s) 149 | step a w = a * 2 + fromIntegral (w - 48) 150 | isBinDigit w = w == 48 || w == 49 151 | {-# INLINEABLE binary #-} 152 | 153 | -- | Parse an integer in the octal representation. The format of the octal 154 | -- number is expected to be according to the Haskell report except for the 155 | -- fact that this parser doesn't parse “0o” or “0O” prefix. It is a 156 | -- responsibility of the programmer to parse correct prefix before parsing 157 | -- the number itself. 158 | -- 159 | -- For example you can make it conform to the Haskell report like this: 160 | -- 161 | -- > octal = char 48 >> char' 111 >> L.octal 162 | -- 163 | -- __Warning__: this function does not perform range checks. 164 | octal :: 165 | forall e s m a. 166 | (MonadParsec e s m, Token s ~ Word8, Num a) => 167 | m a 168 | octal = 169 | mkNum 170 | <$> takeWhile1P Nothing isOctDigit 171 | "octal integer" 172 | where 173 | mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s) 174 | step a w = a * 8 + fromIntegral (w - 48) 175 | isOctDigit w = w - 48 < 8 176 | {-# INLINEABLE octal #-} 177 | 178 | -- | Parse an integer in the hexadecimal representation. The format of the 179 | -- hexadecimal number is expected to be according to the Haskell report 180 | -- except for the fact that this parser doesn't parse “0x” or “0X” prefix. 181 | -- It is a responsibility of the programmer to parse correct prefix before 182 | -- parsing the number itself. 183 | -- 184 | -- For example you can make it conform to the Haskell report like this: 185 | -- 186 | -- > hexadecimal = char 48 >> char' 120 >> L.hexadecimal 187 | -- 188 | -- __Warning__: this function does not perform range checks. 189 | hexadecimal :: 190 | forall e s m a. 191 | (MonadParsec e s m, Token s ~ Word8, Num a) => 192 | m a 193 | hexadecimal = 194 | mkNum 195 | <$> takeWhile1P Nothing isHexDigit 196 | "hexadecimal integer" 197 | where 198 | mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s) 199 | step a w 200 | | w >= 48 && w <= 57 = a * 16 + fromIntegral (w - 48) 201 | | w >= 97 = a * 16 + fromIntegral (w - 87) 202 | | otherwise = a * 16 + fromIntegral (w - 55) 203 | isHexDigit w = 204 | (w >= 48 && w <= 57) 205 | || (w >= 97 && w <= 102) 206 | || (w >= 65 && w <= 70) 207 | {-# INLINEABLE hexadecimal #-} 208 | 209 | -- | Parse a floating point value as a 'Scientific' number. 'Scientific' is 210 | -- great for parsing of arbitrary precision numbers coming from an untrusted 211 | -- source. See documentation in "Data.Scientific" for more information. 212 | -- 213 | -- The parser can be used to parse integers or floating point values. Use 214 | -- functions like 'Data.Scientific.floatingOrInteger' from "Data.Scientific" 215 | -- to test and extract integer or real values. 216 | -- 217 | -- This function does not parse sign, if you need to parse signed numbers, 218 | -- see 'signed'. 219 | scientific :: 220 | forall e s m. 221 | (MonadParsec e s m, Token s ~ Word8) => 222 | m Scientific 223 | scientific = do 224 | c' <- decimal_ 225 | SP c e' <- option (SP c' 0) (try $ dotDecimal_ (Proxy :: Proxy s) c') 226 | e <- option e' (try $ exponent_ e') 227 | return (Sci.scientific c e) 228 | {-# INLINEABLE scientific #-} 229 | 230 | data SP = SP !Integer {-# UNPACK #-} !Int 231 | 232 | -- | Parse a floating point number according to the syntax for floating 233 | -- point literals described in the Haskell report. 234 | -- 235 | -- This function does not parse sign, if you need to parse signed numbers, 236 | -- see 'signed'. 237 | -- 238 | -- __Note__: in versions /6.0.0/–/6.1.1/ this function accepted plain integers. 239 | float :: (MonadParsec e s m, Token s ~ Word8, RealFloat a) => m a 240 | float = do 241 | c' <- decimal_ 242 | Sci.toRealFloat 243 | <$> ( ( do 244 | SP c e' <- dotDecimal_ (Proxy :: Proxy s) c' 245 | e <- option e' (try $ exponent_ e') 246 | return (Sci.scientific c e) 247 | ) 248 | <|> (Sci.scientific c' <$> exponent_ 0) 249 | ) 250 | {-# INLINEABLE float #-} 251 | 252 | dotDecimal_ :: 253 | (MonadParsec e s m, Token s ~ Word8) => 254 | Proxy s -> 255 | Integer -> 256 | m SP 257 | dotDecimal_ pxy c' = do 258 | void (B.char 46) 259 | let mkNum = foldl' step (SP c' 0) . chunkToTokens pxy 260 | step (SP a e') w = 261 | SP 262 | (a * 10 + fromIntegral (w - 48)) 263 | (e' - 1) 264 | mkNum <$> takeWhile1P (Just "digit") isDigit 265 | {-# INLINE dotDecimal_ #-} 266 | 267 | exponent_ :: 268 | (MonadParsec e s m, Token s ~ Word8) => 269 | Int -> 270 | m Int 271 | exponent_ e' = do 272 | void (B.char' 101) 273 | (+ e') <$> signed (return ()) decimal_ 274 | {-# INLINE exponent_ #-} 275 | 276 | -- | @'signed' space p@ parser parses an optional sign character (“+” or 277 | -- “-”), then if there is a sign it consumes optional white space (using 278 | -- @space@ parser), then it runs parser @p@ which should return a number. 279 | -- Sign of the number is changed according to the previously parsed sign 280 | -- character. 281 | -- 282 | -- For example, to parse signed integer you can write: 283 | -- 284 | -- > lexeme = L.lexeme spaceConsumer 285 | -- > integer = lexeme L.decimal 286 | -- > signedInteger = L.signed spaceConsumer integer 287 | signed :: 288 | (MonadParsec e s m, Token s ~ Word8, Num a) => 289 | -- | How to consume white space after the sign 290 | m () -> 291 | -- | How to parse the number itself 292 | m a -> 293 | -- | Parser for signed numbers 294 | m a 295 | signed spc p = option id (lexeme spc sign) <*> p 296 | where 297 | sign = (id <$ B.char 43) <|> (negate <$ B.char 45) 298 | {-# INLINEABLE signed #-} 299 | 300 | ---------------------------------------------------------------------------- 301 | -- Helpers 302 | 303 | -- | A fast predicate to check if the given 'Word8' is a digit in ASCII. 304 | isDigit :: Word8 -> Bool 305 | isDigit w = w - 48 < 10 306 | {-# INLINE isDigit #-} 307 | -------------------------------------------------------------------------------- /Text/Megaparsec/Char.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE LambdaCase #-} 3 | {-# LANGUAGE Safe #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE TypeFamilies #-} 6 | {-# LANGUAGE TypeOperators #-} 7 | 8 | -- | 9 | -- Module : Text.Megaparsec.Char 10 | -- Copyright : © 2015–present Megaparsec contributors 11 | -- © 2007 Paolo Martini 12 | -- © 1999–2001 Daan Leijen 13 | -- License : FreeBSD 14 | -- 15 | -- Maintainer : Mark Karpov 16 | -- Stability : experimental 17 | -- Portability : non-portable 18 | -- 19 | -- Commonly used character parsers. 20 | module Text.Megaparsec.Char 21 | ( -- * Simple parsers 22 | newline, 23 | crlf, 24 | eol, 25 | tab, 26 | space, 27 | hspace, 28 | space1, 29 | hspace1, 30 | 31 | -- * Categories of characters 32 | controlChar, 33 | spaceChar, 34 | upperChar, 35 | lowerChar, 36 | letterChar, 37 | alphaNumChar, 38 | printChar, 39 | digitChar, 40 | binDigitChar, 41 | octDigitChar, 42 | hexDigitChar, 43 | markChar, 44 | numberChar, 45 | punctuationChar, 46 | symbolChar, 47 | separatorChar, 48 | asciiChar, 49 | latin1Char, 50 | charCategory, 51 | categoryName, 52 | 53 | -- * Single character 54 | char, 55 | char', 56 | 57 | -- * Sequence of characters 58 | string, 59 | string', 60 | ) 61 | where 62 | 63 | import Control.Applicative 64 | import Data.Char 65 | import Data.Functor (void) 66 | import Data.Proxy 67 | import Text.Megaparsec 68 | import Text.Megaparsec.Common 69 | 70 | ---------------------------------------------------------------------------- 71 | -- Simple parsers 72 | 73 | -- | Parse a newline character. 74 | newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 75 | newline = char '\n' 76 | {-# INLINE newline #-} 77 | 78 | -- | Parse a carriage return character followed by a newline character. 79 | -- Return the sequence of characters parsed. 80 | crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s) 81 | crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n") 82 | {-# INLINE crlf #-} 83 | 84 | -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the 85 | -- sequence of characters parsed. 86 | eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s) 87 | eol = 88 | (tokenToChunk (Proxy :: Proxy s) <$> newline) 89 | <|> crlf 90 | "end of line" 91 | {-# INLINE eol #-} 92 | 93 | -- | Parse a tab character. 94 | tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 95 | tab = char '\t' 96 | {-# INLINE tab #-} 97 | 98 | -- | Skip /zero/ or more white space characters. 99 | -- 100 | -- See also: 'skipMany' and 'spaceChar'. 101 | space :: (MonadParsec e s m, Token s ~ Char) => m () 102 | space = void $ takeWhileP (Just "white space") isSpace 103 | {-# INLINE space #-} 104 | 105 | -- | Like 'space', but does not accept newlines and carriage returns. 106 | -- 107 | -- @since 9.0.0 108 | hspace :: (MonadParsec e s m, Token s ~ Char) => m () 109 | hspace = void $ takeWhileP (Just "white space") isHSpace 110 | {-# INLINE hspace #-} 111 | 112 | -- | Skip /one/ or more white space characters. 113 | -- 114 | -- See also: 'skipSome' and 'spaceChar'. 115 | -- 116 | -- @since 6.0.0 117 | space1 :: (MonadParsec e s m, Token s ~ Char) => m () 118 | space1 = void $ takeWhile1P (Just "white space") isSpace 119 | {-# INLINE space1 #-} 120 | 121 | -- | Like 'space1', but does not accept newlines and carriage returns. 122 | -- 123 | -- @since 9.0.0 124 | hspace1 :: (MonadParsec e s m, Token s ~ Char) => m () 125 | hspace1 = void $ takeWhile1P (Just "white space") isHSpace 126 | {-# INLINE hspace1 #-} 127 | 128 | ---------------------------------------------------------------------------- 129 | -- Categories of characters 130 | 131 | -- | Parse a control character (a non-printing character of the Latin-1 132 | -- subset of Unicode). 133 | controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 134 | controlChar = satisfy isControl "control character" 135 | {-# INLINE controlChar #-} 136 | 137 | -- | Parse a Unicode space character, and the control characters: tab, 138 | -- newline, carriage return, form feed, and vertical tab. 139 | spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 140 | spaceChar = satisfy isSpace "white space" 141 | {-# INLINE spaceChar #-} 142 | 143 | -- | Parse an upper-case or title-case alphabetic Unicode character. Title 144 | -- case is used by a small number of letter ligatures like the 145 | -- single-character form of Lj. 146 | upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 147 | upperChar = satisfy isUpper "uppercase letter" 148 | {-# INLINE upperChar #-} 149 | 150 | -- | Parse a lower-case alphabetic Unicode character. 151 | lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 152 | lowerChar = satisfy isLower "lowercase letter" 153 | {-# INLINE lowerChar #-} 154 | 155 | -- | Parse an alphabetic Unicode character: lower-case, upper-case, or 156 | -- title-case letter, or a letter of case-less scripts\/modifier letter. 157 | letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 158 | letterChar = satisfy isLetter "letter" 159 | {-# INLINE letterChar #-} 160 | 161 | -- | Parse an alphabetic or numeric digit Unicode characters. 162 | -- 163 | -- Note that the numeric digits outside the ASCII range are parsed by this 164 | -- parser but not by 'digitChar'. Such digits may be part of identifiers but 165 | -- are not used by the printer and reader to represent numbers. 166 | alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 167 | alphaNumChar = satisfy isAlphaNum "alphanumeric character" 168 | {-# INLINE alphaNumChar #-} 169 | 170 | -- | Parse a printable Unicode character: letter, number, mark, punctuation, 171 | -- symbol or space. 172 | printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 173 | printChar = satisfy isPrint "printable character" 174 | {-# INLINE printChar #-} 175 | 176 | -- | Parse an ASCII digit, i.e between “0” and “9”. 177 | digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 178 | digitChar = satisfy isDigit "digit" 179 | {-# INLINE digitChar #-} 180 | 181 | -- | Parse a binary digit, i.e. "0" or "1". 182 | -- 183 | -- @since 7.0.0 184 | binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 185 | binDigitChar = satisfy isBinDigit "binary digit" 186 | where 187 | isBinDigit x = x == '0' || x == '1' 188 | {-# INLINE binDigitChar #-} 189 | 190 | -- | Parse an octal digit, i.e. between “0” and “7”. 191 | octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 192 | octDigitChar = satisfy isOctDigit "octal digit" 193 | {-# INLINE octDigitChar #-} 194 | 195 | -- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or 196 | -- “A” and “F”. 197 | hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 198 | hexDigitChar = satisfy isHexDigit "hexadecimal digit" 199 | {-# INLINE hexDigitChar #-} 200 | 201 | -- | Parse a Unicode mark character (accents and the like), which combines 202 | -- with preceding characters. 203 | markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 204 | markChar = satisfy isMark "mark character" 205 | {-# INLINE markChar #-} 206 | 207 | -- | Parse a Unicode numeric character, including digits from various 208 | -- scripts, Roman numerals, etc. 209 | numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 210 | numberChar = satisfy isNumber "numeric character" 211 | {-# INLINE numberChar #-} 212 | 213 | -- | Parse a Unicode punctuation character, including various kinds of 214 | -- connectors, brackets and quotes. 215 | punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 216 | punctuationChar = satisfy isPunctuation "punctuation" 217 | {-# INLINE punctuationChar #-} 218 | 219 | -- | Parse a Unicode symbol characters, including mathematical and currency 220 | -- symbols. 221 | symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 222 | symbolChar = satisfy isSymbol "symbol" 223 | {-# INLINE symbolChar #-} 224 | 225 | -- | Parse a Unicode space and separator characters. 226 | separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 227 | separatorChar = satisfy isSeparator "separator" 228 | {-# INLINE separatorChar #-} 229 | 230 | -- | Parse a character from the first 128 characters of the Unicode 231 | -- character set, corresponding to the ASCII character set. 232 | asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 233 | asciiChar = satisfy isAscii "ASCII character" 234 | {-# INLINE asciiChar #-} 235 | 236 | -- | Parse a character from the first 256 characters of the Unicode 237 | -- character set, corresponding to the ISO 8859-1 (Latin-1) character set. 238 | latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s) 239 | latin1Char = satisfy isLatin1 "Latin-1 character" 240 | {-# INLINE latin1Char #-} 241 | 242 | -- | @'charCategory' cat@ parses character in Unicode General Category 243 | -- @cat@, see 'Data.Char.GeneralCategory'. 244 | charCategory :: 245 | (MonadParsec e s m, Token s ~ Char) => 246 | GeneralCategory -> 247 | m (Token s) 248 | charCategory cat = satisfy ((== cat) . generalCategory) categoryName cat 249 | {-# INLINE charCategory #-} 250 | 251 | -- | Return the human-readable name of Unicode General Category. 252 | categoryName :: GeneralCategory -> String 253 | categoryName = \case 254 | UppercaseLetter -> "uppercase letter" 255 | LowercaseLetter -> "lowercase letter" 256 | TitlecaseLetter -> "titlecase letter" 257 | ModifierLetter -> "modifier letter" 258 | OtherLetter -> "other letter" 259 | NonSpacingMark -> "non-spacing mark" 260 | SpacingCombiningMark -> "spacing combining mark" 261 | EnclosingMark -> "enclosing mark" 262 | DecimalNumber -> "decimal number character" 263 | LetterNumber -> "letter number character" 264 | OtherNumber -> "other number character" 265 | ConnectorPunctuation -> "connector punctuation" 266 | DashPunctuation -> "dash punctuation" 267 | OpenPunctuation -> "open punctuation" 268 | ClosePunctuation -> "close punctuation" 269 | InitialQuote -> "initial quote" 270 | FinalQuote -> "final quote" 271 | OtherPunctuation -> "other punctuation" 272 | MathSymbol -> "math symbol" 273 | CurrencySymbol -> "currency symbol" 274 | ModifierSymbol -> "modifier symbol" 275 | OtherSymbol -> "other symbol" 276 | Space -> "white space" 277 | LineSeparator -> "line separator" 278 | ParagraphSeparator -> "paragraph separator" 279 | Control -> "control character" 280 | Format -> "format character" 281 | Surrogate -> "surrogate character" 282 | PrivateUse -> "private-use Unicode character" 283 | NotAssigned -> "non-assigned Unicode character" 284 | 285 | ---------------------------------------------------------------------------- 286 | -- Single character 287 | 288 | -- | A type-constrained version of 'single'. 289 | -- 290 | -- > semicolon = char ';' 291 | char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) 292 | char = single 293 | {-# INLINE char #-} 294 | 295 | -- | The same as 'char' but case-insensitive. This parser returns the 296 | -- actually parsed character preserving its case. 297 | -- 298 | -- >>> parseTest (char' 'e') "E" 299 | -- 'E' 300 | -- >>> parseTest (char' 'e') "G" 301 | -- 1:1: 302 | -- unexpected 'G' 303 | -- expecting 'E' or 'e' 304 | char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) 305 | char' c = 306 | choice 307 | [ char (toLower c), 308 | char (toUpper c), 309 | char (toTitle c) 310 | ] 311 | {-# INLINE char' #-} 312 | 313 | ---------------------------------------------------------------------------- 314 | -- Helpers 315 | 316 | -- | Is it a horizontal space character? 317 | isHSpace :: Char -> Bool 318 | isHSpace x = isSpace x && x /= '\n' && x /= '\r' 319 | -------------------------------------------------------------------------------- /Text/Megaparsec/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | -- | 5 | -- Module : Text.Megaparsec.Common 6 | -- Copyright : © 2018–present Megaparsec contributors 7 | -- License : FreeBSD 8 | -- 9 | -- Maintainer : Mark Karpov 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- Common token combinators. This module is not public, the functions from 14 | -- it are re-exported in "Text.Megaparsec.Byte" and "Text.Megaparsec.Char". 15 | -- 16 | -- @since 7.0.0 17 | module Text.Megaparsec.Common 18 | ( string, 19 | string', 20 | ) 21 | where 22 | 23 | import qualified Data.CaseInsensitive as CI 24 | import Data.Function (on) 25 | import Text.Megaparsec 26 | 27 | -- | A synonym for 'chunk'. 28 | string :: (MonadParsec e s m) => Tokens s -> m (Tokens s) 29 | string = chunk 30 | {-# INLINE string #-} 31 | 32 | -- | The same as 'string', but case-insensitive. On success returns string 33 | -- cased as the parsed input. 34 | -- 35 | -- >>> parseTest (string' "foobar") "foObAr" 36 | -- "foObAr" 37 | string' :: 38 | (MonadParsec e s m, CI.FoldCase (Tokens s)) => 39 | Tokens s -> 40 | m (Tokens s) 41 | string' = tokens ((==) `on` CI.mk) 42 | {-# INLINE string' #-} 43 | -------------------------------------------------------------------------------- /Text/Megaparsec/Debug.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE MultiParamTypeClasses #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE Unsafe #-} 6 | 7 | -- | 8 | -- Module : Text.Megaparsec.Debug 9 | -- Copyright : © 2015–present Megaparsec contributors 10 | -- License : FreeBSD 11 | -- 12 | -- Maintainer : Mark Karpov 13 | -- Stability : experimental 14 | -- Portability : portable 15 | -- 16 | -- Debugging helpers. 17 | -- 18 | -- @since 7.0.0 19 | module Text.Megaparsec.Debug 20 | ( MonadParsecDbg (..), 21 | dbg', 22 | ) 23 | where 24 | 25 | import Control.Monad.Identity (IdentityT, mapIdentityT) 26 | import qualified Control.Monad.Trans.RWS.Lazy as L 27 | import qualified Control.Monad.Trans.RWS.Strict as S 28 | import qualified Control.Monad.Trans.Reader as L 29 | import qualified Control.Monad.Trans.State.Lazy as L 30 | import qualified Control.Monad.Trans.State.Strict as S 31 | import qualified Control.Monad.Trans.Writer.Lazy as L 32 | import qualified Control.Monad.Trans.Writer.Strict as S 33 | import Data.Bifunctor (Bifunctor (first)) 34 | import qualified Data.List as List 35 | import qualified Data.List.NonEmpty as NE 36 | import Data.Proxy 37 | import qualified Data.Set as E 38 | import Debug.Trace 39 | import Text.Megaparsec.Class (MonadParsec) 40 | import Text.Megaparsec.Error 41 | import Text.Megaparsec.Internal 42 | import Text.Megaparsec.State 43 | import Text.Megaparsec.Stream 44 | 45 | -- | Type class describing parser monads that can trace during evaluation. 46 | -- 47 | -- @since 9.3.0 48 | class (MonadParsec e s m) => MonadParsecDbg e s m where 49 | -- | @'dbg' label p@ parser works exactly like @p@, but when it's evaluated 50 | -- it prints information useful for debugging. The @label@ is only used to 51 | -- refer to this parser in the debugging output. This combinator uses the 52 | -- 'trace' function from "Debug.Trace" under the hood. 53 | -- 54 | -- Typical usage is to wrap every sub-parser in misbehaving parser with 55 | -- 'dbg' assigning meaningful labels. Then give it a shot and go through the 56 | -- print-out. As of current version, this combinator prints all available 57 | -- information except for /hints/, which are probably only interesting to 58 | -- the maintainer of Megaparsec itself and may be quite verbose to output in 59 | -- general. Let me know if you would like to be able to see hints in the 60 | -- debugging output. 61 | -- 62 | -- The output itself is pretty self-explanatory, although the following 63 | -- abbreviations should be clarified (they are derived from the low-level 64 | -- source code): 65 | -- 66 | -- * @COK@—“consumed OK”. The parser consumed input and succeeded. 67 | -- * @CERR@—“consumed error”. The parser consumed input and failed. 68 | -- * @EOK@—“empty OK”. The parser succeeded without consuming input. 69 | -- * @EERR@—“empty error”. The parser failed without consuming input. 70 | -- 71 | -- __Note__: up until the version /9.3.0/ this was a non-polymorphic 72 | -- function that worked only in 'ParsecT'. It was first introduced in the 73 | -- version /7.0.0/. 74 | dbg :: 75 | (Show a) => 76 | -- | Debugging label 77 | String -> 78 | -- | Parser to debug 79 | m a -> 80 | -- | Parser that prints debugging messages 81 | m a 82 | 83 | -- | @dbg (p :: StateT st m)@ prints state __after__ running @p@: 84 | -- 85 | -- >>> p = modify succ >> dbg "a" (single 'a' >> modify succ) 86 | -- >>> parseTest (runStateT p 0) "a" 87 | -- a> IN: 'a' 88 | -- a> MATCH (COK): 'a' 89 | -- a> VALUE: () (STATE: 2) 90 | -- ((),2) 91 | instance 92 | (Show st, MonadParsecDbg e s m) => 93 | MonadParsecDbg e s (L.StateT st m) 94 | where 95 | dbg str sma = L.StateT $ \s -> 96 | dbgWithComment "STATE" str $ L.runStateT sma s 97 | 98 | -- | @dbg (p :: StateT st m)@ prints state __after__ running @p@: 99 | -- 100 | -- >>> p = modify succ >> dbg "a" (single 'a' >> modify succ) 101 | -- >>> parseTest (runStateT p 0) "a" 102 | -- a> IN: 'a' 103 | -- a> MATCH (COK): 'a' 104 | -- a> VALUE: () (STATE: 2) 105 | -- ((),2) 106 | instance 107 | (Show st, MonadParsecDbg e s m) => 108 | MonadParsecDbg e s (S.StateT st m) 109 | where 110 | dbg str sma = S.StateT $ \s -> 111 | dbgWithComment "STATE" str $ S.runStateT sma s 112 | 113 | instance 114 | (MonadParsecDbg e s m) => 115 | MonadParsecDbg e s (L.ReaderT r m) 116 | where 117 | dbg = L.mapReaderT . dbg 118 | 119 | -- | @dbg (p :: WriterT st m)@ prints __only__ log produced by @p@: 120 | -- 121 | -- >>> p = tell [0] >> dbg "a" (single 'a' >> tell [1]) 122 | -- >>> parseTest (runWriterT p) "a" 123 | -- a> IN: 'a' 124 | -- a> MATCH (COK): 'a' 125 | -- a> VALUE: () (LOG: [1]) 126 | -- ((),[0,1]) 127 | instance 128 | (Monoid w, Show w, MonadParsecDbg e s m) => 129 | MonadParsecDbg e s (L.WriterT w m) 130 | where 131 | dbg str wma = L.WriterT $ dbgWithComment "LOG" str $ L.runWriterT wma 132 | 133 | -- | @dbg (p :: WriterT st m)@ prints __only__ log produced by @p@: 134 | -- 135 | -- >>> p = tell [0] >> dbg "a" (single 'a' >> tell [1]) 136 | -- >>> parseTest (runWriterT p) "a" 137 | -- a> IN: 'a' 138 | -- a> MATCH (COK): 'a' 139 | -- a> VALUE: () (LOG: [1]) 140 | -- ((),[0,1]) 141 | instance 142 | (Monoid w, Show w, MonadParsecDbg e s m) => 143 | MonadParsecDbg e s (S.WriterT w m) 144 | where 145 | dbg str wma = S.WriterT $ dbgWithComment "LOG" str $ S.runWriterT wma 146 | 147 | -- | @RWST@ works like @StateT@ inside a @WriterT@: subparser's log and its 148 | -- final state is printed: 149 | -- 150 | -- >>> p = tell [0] >> modify succ >> dbg "a" (single 'a' >> tell [1] >> modify succ) 151 | -- >>> parseTest (runRWST p () 0) "a" 152 | -- a> IN: 'a' 153 | -- a> MATCH (COK): 'a' 154 | -- a> VALUE: () (STATE: 2) (LOG: [1]) 155 | -- ((),2,[0,1]) 156 | instance 157 | (Monoid w, Show w, Show st, MonadParsecDbg e s m) => 158 | MonadParsecDbg e s (L.RWST r w st m) 159 | where 160 | dbg str sma = L.RWST $ \r s -> do 161 | let smth = 162 | (\(a, st, w) -> ShowComment "LOG" (ShowComment "STATE" (a, st), w)) 163 | <$> L.runRWST sma r s 164 | ((a, st), w) <- first unComment . unComment <$> dbg str smth 165 | pure (a, st, w) 166 | 167 | -- | @RWST@ works like @StateT@ inside a @WriterT@: subparser's log and its 168 | -- final state is printed: 169 | -- 170 | -- >>> p = tell [0] >> modify succ >> dbg "a" (single 'a' >> tell [1] >> modify succ) 171 | -- >>> parseTest (runRWST p () 0) "a" 172 | -- a> IN: 'a' 173 | -- a> MATCH (COK): 'a' 174 | -- a> VALUE: () (STATE: 2) (LOG: [1]) 175 | -- ((),2,[0,1]) 176 | instance 177 | (Monoid w, Show w, Show st, MonadParsecDbg e s m) => 178 | MonadParsecDbg e s (S.RWST r w st m) 179 | where 180 | dbg str sma = S.RWST $ \r s -> do 181 | let smth = 182 | (\(a, st, w) -> ShowComment "LOG" (ShowComment "STATE" (a, st), w)) 183 | <$> S.runRWST sma r s 184 | ((a, st), w) <- first unComment . unComment <$> dbg str smth 185 | pure (a, st, w) 186 | 187 | instance (MonadParsecDbg e s m) => MonadParsecDbg e s (IdentityT m) where 188 | dbg = mapIdentityT . dbg 189 | 190 | -- | @'dbgWithComment' label_a label_c m@ traces the first component of the 191 | -- result produced by @m@ with @label_a@ and the second component with 192 | -- @label_b@. 193 | dbgWithComment :: 194 | (MonadParsecDbg e s m, Show a, Show c) => 195 | -- | Debugging label (for @a@) 196 | String -> 197 | -- | Extra component label (for @c@) 198 | String -> 199 | -- | Parser to debug 200 | m (a, c) -> 201 | -- | Parser that prints debugging messages 202 | m (a, c) 203 | dbgWithComment lbl str ma = 204 | unComment <$> dbg str (ShowComment lbl <$> ma) 205 | 206 | -- | A wrapper with a special show instance: 207 | -- 208 | -- >>> show (ShowComment "STATE" ("Hello, world!", 42)) 209 | -- Hello, world! (STATE: 42) 210 | data ShowComment c a = ShowComment String (a, c) 211 | 212 | unComment :: ShowComment c a -> (a, c) 213 | unComment (ShowComment _ val) = val 214 | 215 | instance (Show c, Show a) => Show (ShowComment c a) where 216 | show (ShowComment lbl (a, c)) = show a ++ " (" ++ lbl ++ ": " ++ show c ++ ")" 217 | 218 | instance 219 | (VisualStream s, ShowErrorComponent e) => 220 | MonadParsecDbg e s (ParsecT e s m) 221 | where 222 | dbg lbl p = ParsecT $ \s cok cerr eok eerr -> 223 | let l = dbgLog lbl 224 | unfold = streamTake 40 225 | cok' x s' hs = 226 | flip trace (cok x s' hs) $ 227 | l (DbgIn (unfold (stateInput s))) 228 | ++ l (DbgCOK (streamTake (streamDelta s s') (stateInput s)) x hs) 229 | cerr' err s' = 230 | flip trace (cerr err s') $ 231 | l (DbgIn (unfold (stateInput s))) 232 | ++ l (DbgCERR (streamTake (streamDelta s s') (stateInput s)) err) 233 | eok' x s' hs = 234 | flip trace (eok x s' hs) $ 235 | l (DbgIn (unfold (stateInput s))) 236 | ++ l (DbgEOK (streamTake (streamDelta s s') (stateInput s)) x hs) 237 | eerr' err s' = 238 | flip trace (eerr err s') $ 239 | l (DbgIn (unfold (stateInput s))) 240 | ++ l (DbgEERR (streamTake (streamDelta s s') (stateInput s)) err) 241 | in unParser p s cok' cerr' eok' eerr' 242 | 243 | -- | A single piece of info to be rendered with 'dbgLog'. 244 | data DbgItem s e a 245 | = DbgIn [Token s] 246 | | DbgCOK [Token s] a (Hints (Token s)) 247 | | DbgCERR [Token s] (ParseError s e) 248 | | DbgEOK [Token s] a (Hints (Token s)) 249 | | DbgEERR [Token s] (ParseError s e) 250 | 251 | -- | Render a single piece of debugging info. 252 | dbgLog :: 253 | forall s e a. 254 | (VisualStream s, ShowErrorComponent e, Show a) => 255 | -- | Debugging label 256 | String -> 257 | -- | Information to render 258 | DbgItem s e a -> 259 | -- | Rendered result 260 | String 261 | dbgLog lbl item = prefix msg 262 | where 263 | prefix = unlines . fmap ((lbl ++ "> ") ++) . lines 264 | pxy = Proxy :: Proxy s 265 | showHints hs = "[" ++ List.intercalate "," (showErrorItem pxy <$> E.toAscList hs) ++ "]" 266 | msg = case item of 267 | DbgIn ts -> 268 | "IN: " ++ showStream pxy ts 269 | DbgCOK ts a (Hints hs) -> 270 | "MATCH (COK): " 271 | ++ showStream pxy ts 272 | ++ "\nVALUE: " 273 | ++ show a 274 | ++ "\nHINTS: " 275 | ++ showHints hs 276 | DbgCERR ts e -> 277 | "MATCH (CERR): " ++ showStream pxy ts ++ "\nERROR:\n" ++ parseErrorPretty e 278 | DbgEOK ts a (Hints hs) -> 279 | "MATCH (EOK): " 280 | ++ showStream pxy ts 281 | ++ "\nVALUE: " 282 | ++ show a 283 | ++ "\nHINTS: " 284 | ++ showHints hs 285 | DbgEERR ts e -> 286 | "MATCH (EERR): " ++ showStream pxy ts ++ "\nERROR:\n" ++ parseErrorPretty e 287 | 288 | -- | Pretty-print a list of tokens. 289 | showStream :: (VisualStream s) => Proxy s -> [Token s] -> String 290 | showStream pxy ts = 291 | case NE.nonEmpty ts of 292 | Nothing -> "" 293 | Just ne -> 294 | let (h, r) = splitAt 40 (showTokens pxy ne) 295 | in if null r then h else h ++ " <…>" 296 | 297 | -- | Calculate number of consumed tokens given 'State' of parser before and 298 | -- after parsing. 299 | streamDelta :: 300 | -- | State of parser before consumption 301 | State s e -> 302 | -- | State of parser after consumption 303 | State s e -> 304 | -- | Number of consumed tokens 305 | Int 306 | streamDelta s0 s1 = stateOffset s1 - stateOffset s0 307 | 308 | -- | Extract a given number of tokens from the stream. 309 | streamTake :: forall s. (Stream s) => Int -> s -> [Token s] 310 | streamTake n s = 311 | case fst <$> takeN_ n s of 312 | Nothing -> [] 313 | Just chk -> chunkToTokens (Proxy :: Proxy s) chk 314 | 315 | -- | Just like 'dbg', but doesn't require the return value of the parser to 316 | -- be 'Show'-able. 317 | -- 318 | -- @since 9.1.0 319 | dbg' :: 320 | (MonadParsecDbg e s m) => 321 | -- | Debugging label 322 | String -> 323 | -- | Parser to debug 324 | m a -> 325 | -- | Parser that prints debugging messages 326 | m a 327 | dbg' lbl p = unBlind <$> dbg lbl (Blind <$> p) 328 | 329 | -- | A wrapper type with a dummy 'Show' instance. 330 | newtype Blind x = Blind {unBlind :: x} 331 | 332 | instance Show (Blind x) where 333 | show _ = "NOT SHOWN" 334 | -------------------------------------------------------------------------------- /Text/Megaparsec/Error.hs-boot: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RoleAnnotations #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | module Text.Megaparsec.Error 5 | ( ParseError, 6 | ) 7 | where 8 | 9 | type role ParseError nominal nominal 10 | 11 | data ParseError s e 12 | -------------------------------------------------------------------------------- /Text/Megaparsec/Error/Builder.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE DeriveGeneric #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE Safe #-} 5 | {-# LANGUAGE ScopedTypeVariables #-} 6 | {-# LANGUAGE StandaloneDeriving #-} 7 | {-# LANGUAGE UndecidableInstances #-} 8 | 9 | -- | 10 | -- Module : Text.Megaparsec.Error.Builder 11 | -- Copyright : © 2015–present Megaparsec contributors 12 | -- License : FreeBSD 13 | -- 14 | -- Maintainer : Mark Karpov 15 | -- Stability : experimental 16 | -- Portability : portable 17 | -- 18 | -- A set of helpers that should make construction of 'ParseError's more 19 | -- concise. This is primarily useful in test suites and for debugging. 20 | -- 21 | -- @since 6.0.0 22 | module Text.Megaparsec.Error.Builder 23 | ( -- * Top-level helpers 24 | err, 25 | errFancy, 26 | 27 | -- * Error components 28 | utok, 29 | utoks, 30 | ulabel, 31 | ueof, 32 | etok, 33 | etoks, 34 | elabel, 35 | eeof, 36 | fancy, 37 | 38 | -- * Data types 39 | ET, 40 | EF, 41 | ) 42 | where 43 | 44 | import Data.Data (Data) 45 | import Data.List.NonEmpty (NonEmpty (..)) 46 | import qualified Data.List.NonEmpty as NE 47 | import Data.Proxy 48 | import Data.Set (Set) 49 | import qualified Data.Set as E 50 | import GHC.Generics 51 | import Text.Megaparsec.Error 52 | import Text.Megaparsec.Stream 53 | 54 | ---------------------------------------------------------------------------- 55 | -- Data types 56 | 57 | -- | Auxiliary type for construction of trivial parse errors. 58 | data ET s = ET (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s))) 59 | deriving (Generic) 60 | 61 | deriving instance (Eq (Token s)) => Eq (ET s) 62 | 63 | deriving instance (Ord (Token s)) => Ord (ET s) 64 | 65 | deriving instance 66 | ( Data s, 67 | Data (Token s), 68 | Ord (Token s) 69 | ) => 70 | Data (ET s) 71 | 72 | instance (Stream s) => Semigroup (ET s) where 73 | ET us0 ps0 <> ET us1 ps1 = ET (n us0 us1) (E.union ps0 ps1) 74 | where 75 | n Nothing Nothing = Nothing 76 | n (Just x) Nothing = Just x 77 | n Nothing (Just y) = Just y 78 | n (Just x) (Just y) = Just (max x y) 79 | 80 | instance (Stream s) => Monoid (ET s) where 81 | mempty = ET Nothing E.empty 82 | mappend = (<>) 83 | 84 | -- | Auxiliary type for construction of fancy parse errors. 85 | newtype EF e = EF (Set (ErrorFancy e)) 86 | deriving (Eq, Ord, Data, Generic) 87 | 88 | instance (Ord e) => Semigroup (EF e) where 89 | EF xs0 <> EF xs1 = EF (E.union xs0 xs1) 90 | 91 | instance (Ord e) => Monoid (EF e) where 92 | mempty = EF E.empty 93 | mappend = (<>) 94 | 95 | ---------------------------------------------------------------------------- 96 | -- Top-level helpers 97 | 98 | -- | Assemble a 'ParseError' from the offset and the @'ET' t@ value. @'ET' 99 | -- t@ is a monoid and can be assembled by combining primitives provided by 100 | -- this module, see below. 101 | err :: 102 | -- | 'ParseError' offset 103 | Int -> 104 | -- | Error components 105 | ET s -> 106 | -- | Resulting 'ParseError' 107 | ParseError s e 108 | err p (ET us ps) = TrivialError p us ps 109 | 110 | -- | Like 'err', but constructs a “fancy” 'ParseError'. 111 | errFancy :: 112 | -- | 'ParseError' offset 113 | Int -> 114 | -- | Error components 115 | EF e -> 116 | -- | Resulting 'ParseError' 117 | ParseError s e 118 | errFancy p (EF xs) = FancyError p xs 119 | 120 | ---------------------------------------------------------------------------- 121 | -- Error components 122 | 123 | -- | Construct an “unexpected token” error component. 124 | utok :: Token s -> ET s 125 | utok = unexp . Tokens . nes 126 | 127 | -- | Construct an “unexpected tokens” error component. Empty chunk produces 128 | -- 'EndOfInput'. 129 | utoks :: forall s. (Stream s) => Tokens s -> ET s 130 | utoks = unexp . canonicalizeTokens (Proxy :: Proxy s) 131 | 132 | -- | Construct an “unexpected label” error component. Do not use with empty 133 | -- strings (for empty strings it's bottom). 134 | ulabel :: String -> ET s 135 | ulabel label 136 | | label == "" = error "Text.Megaparsec.Error.Builder.ulabel: empty label" 137 | | otherwise = unexp . Label . NE.fromList $ label 138 | 139 | -- | Construct an “unexpected end of input” error component. 140 | ueof :: ET s 141 | ueof = unexp EndOfInput 142 | 143 | -- | Construct an “expected token” error component. 144 | etok :: Token s -> ET s 145 | etok = expe . Tokens . nes 146 | 147 | -- | Construct an “expected tokens” error component. Empty chunk produces 148 | -- 'EndOfInput'. 149 | etoks :: forall s. (Stream s) => Tokens s -> ET s 150 | etoks = expe . canonicalizeTokens (Proxy :: Proxy s) 151 | 152 | -- | Construct an “expected label” error component. Do not use with empty 153 | -- strings. 154 | elabel :: String -> ET s 155 | elabel label 156 | | label == "" = error "Text.Megaparsec.Error.Builder.elabel: empty label" 157 | | otherwise = expe . Label . NE.fromList $ label 158 | 159 | -- | Construct an “expected end of input” error component. 160 | eeof :: ET s 161 | eeof = expe EndOfInput 162 | 163 | -- | Construct a custom error component. 164 | fancy :: ErrorFancy e -> EF e 165 | fancy = EF . E.singleton 166 | 167 | ---------------------------------------------------------------------------- 168 | -- Helpers 169 | 170 | -- | Construct the appropriate 'ErrorItem' representation for the given 171 | -- token stream. The empty string produces 'EndOfInput'. 172 | canonicalizeTokens :: 173 | (Stream s) => 174 | Proxy s -> 175 | Tokens s -> 176 | ErrorItem (Token s) 177 | canonicalizeTokens pxy ts = 178 | case NE.nonEmpty (chunkToTokens pxy ts) of 179 | Nothing -> EndOfInput 180 | Just xs -> Tokens xs 181 | 182 | -- | Lift an unexpected item into 'ET'. 183 | unexp :: ErrorItem (Token s) -> ET s 184 | unexp u = ET (pure u) E.empty 185 | 186 | -- | Lift an expected item into 'ET'. 187 | expe :: ErrorItem (Token s) -> ET s 188 | expe p = ET Nothing (E.singleton p) 189 | 190 | -- | Make a singleton non-empty list from a value. 191 | nes :: a -> NonEmpty a 192 | nes x = x :| [] 193 | -------------------------------------------------------------------------------- /Text/Megaparsec/Internal.hs-boot: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RoleAnnotations #-} 2 | 3 | module Text.Megaparsec.Internal 4 | ( Reply, 5 | ) 6 | where 7 | 8 | type role Reply nominal nominal representational 9 | 10 | data Reply e s a 11 | -------------------------------------------------------------------------------- /Text/Megaparsec/Lexer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE Safe #-} 3 | 4 | -- | 5 | -- Module : Text.Megaparsec.Common 6 | -- Copyright : © 2018–present Megaparsec contributors 7 | -- License : FreeBSD 8 | -- 9 | -- Maintainer : Mark Karpov 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- Common token combinators. This module is not public, the functions from 14 | -- it are re-exported in "Text.Megaparsec.Byte" and "Text.Megaparsec.Char". 15 | -- 16 | -- @since 7.0.0 17 | module Text.Megaparsec.Lexer 18 | ( -- * White space 19 | space, 20 | lexeme, 21 | symbol, 22 | symbol', 23 | ) 24 | where 25 | 26 | import qualified Data.CaseInsensitive as CI 27 | import Text.Megaparsec 28 | import Text.Megaparsec.Common 29 | 30 | ---------------------------------------------------------------------------- 31 | -- White space 32 | 33 | -- | @'space' sc lineComment blockComment@ produces a parser that can parse 34 | -- white space in general. It's expected that you create such a parser once 35 | -- and pass it to other functions in this module as needed (when you see 36 | -- @spaceConsumer@ in documentation, usually it means that something like 37 | -- 'space' is expected there). 38 | -- 39 | -- @sc@ is used to parse blocks of space characters. You can use 40 | -- 'Text.Megaparsec.Char.space1' from "Text.Megaparsec.Char" for this 41 | -- purpose as well as your own parser (if you don't want to automatically 42 | -- consume newlines, for example). Make sure that the parser does not 43 | -- succeed on the empty input though. In an earlier version of the library 44 | -- 'Text.Megaparsec.Char.spaceChar' was recommended, but now parsers based 45 | -- on 'takeWhile1P' are preferred because of their speed. 46 | -- 47 | -- @lineComment@ is used to parse line comments. You can use 48 | -- @skipLineComment@ if you don't need anything special. 49 | -- 50 | -- @blockComment@ is used to parse block (multi-line) comments. You can use 51 | -- @skipBlockComment@ or @skipBlockCommentNested@ if you don't need anything 52 | -- special. 53 | -- 54 | -- If you don't want to allow a kind of comment, simply pass 'empty' which 55 | -- will fail instantly when parsing of that sort of comment is attempted and 56 | -- 'space' will just move on or finish depending on whether there is more 57 | -- white space for it to consume. 58 | space :: 59 | (MonadParsec e s m) => 60 | -- | A parser for space characters which does not accept empty 61 | -- input (e.g. 'Text.Megaparsec.Char.space1') 62 | m () -> 63 | -- | A parser for a line comment (e.g. 'skipLineComment') 64 | m () -> 65 | -- | A parser for a block comment (e.g. 'skipBlockComment') 66 | m () -> 67 | m () 68 | space sp line block = 69 | skipMany $ 70 | choice 71 | [hidden sp, hidden line, hidden block] 72 | {-# INLINEABLE space #-} 73 | 74 | -- | This is a wrapper for lexemes. The typical usage is to supply the first 75 | -- argument (parser that consumes white space, probably defined via 'space') 76 | -- and use the resulting function to wrap parsers for every lexeme. 77 | -- 78 | -- > lexeme = L.lexeme spaceConsumer 79 | -- > integer = lexeme L.decimal 80 | lexeme :: 81 | (MonadParsec e s m) => 82 | -- | How to consume white space after lexeme 83 | m () -> 84 | -- | How to parse actual lexeme 85 | m a -> 86 | m a 87 | lexeme spc p = p <* spc 88 | {-# INLINEABLE lexeme #-} 89 | 90 | -- | This is a helper to parse symbols, i.e. verbatim strings. You pass the 91 | -- first argument (parser that consumes white space, probably defined via 92 | -- 'space') and then you can use the resulting function to parse strings: 93 | -- 94 | -- > symbol = L.symbol spaceConsumer 95 | -- > 96 | -- > parens = between (symbol "(") (symbol ")") 97 | -- > braces = between (symbol "{") (symbol "}") 98 | -- > angles = between (symbol "<") (symbol ">") 99 | -- > brackets = between (symbol "[") (symbol "]") 100 | -- > semicolon = symbol ";" 101 | -- > comma = symbol "," 102 | -- > colon = symbol ":" 103 | -- > dot = symbol "." 104 | symbol :: 105 | (MonadParsec e s m) => 106 | -- | How to consume white space after lexeme 107 | m () -> 108 | -- | Symbol to parse 109 | Tokens s -> 110 | m (Tokens s) 111 | symbol spc = lexeme spc . string 112 | {-# INLINEABLE symbol #-} 113 | 114 | -- | A case-insensitive version of 'symbol'. This may be helpful if you're 115 | -- working with case-insensitive languages. 116 | symbol' :: 117 | (MonadParsec e s m, CI.FoldCase (Tokens s)) => 118 | -- | How to consume white space after lexeme 119 | m () -> 120 | -- | Symbol to parse (case-insensitive) 121 | Tokens s -> 122 | m (Tokens s) 123 | symbol' spc = lexeme spc . string' 124 | {-# INLINEABLE symbol' #-} 125 | -------------------------------------------------------------------------------- /Text/Megaparsec/Pos.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE Safe #-} 5 | 6 | -- | 7 | -- Module : Text.Megaparsec.Pos 8 | -- Copyright : © 2015–present Megaparsec contributors 9 | -- License : FreeBSD 10 | -- 11 | -- Maintainer : Mark Karpov 12 | -- Stability : experimental 13 | -- Portability : portable 14 | -- 15 | -- Textual source position. The position includes name of file, line number, 16 | -- and column number. 17 | -- 18 | -- You probably do not want to import this module directly because 19 | -- "Text.Megaparsec" re-exports it anyway. 20 | module Text.Megaparsec.Pos 21 | ( -- * Abstract position 22 | Pos, 23 | mkPos, 24 | unPos, 25 | pos1, 26 | defaultTabWidth, 27 | InvalidPosException (..), 28 | 29 | -- * Source position 30 | SourcePos (..), 31 | initialPos, 32 | sourcePosPretty, 33 | ) 34 | where 35 | 36 | import Control.DeepSeq 37 | import Control.Exception 38 | import Data.Data (Data) 39 | import GHC.Generics 40 | 41 | ---------------------------------------------------------------------------- 42 | -- Abstract position 43 | 44 | -- | 'Pos' is the type for positive integers. This is used to represent line 45 | -- number, column number, and similar things like indentation level. 46 | -- 'Semigroup' instance can be used to safely and efficiently add 'Pos'es 47 | -- together. 48 | -- 49 | -- @since 5.0.0 50 | newtype Pos = Pos Int 51 | deriving (Show, Eq, Ord, Data, Generic, NFData) 52 | 53 | -- | Construction of 'Pos' from 'Int'. The function throws 54 | -- 'InvalidPosException' when given a non-positive argument. 55 | -- 56 | -- @since 6.0.0 57 | mkPos :: Int -> Pos 58 | mkPos a = 59 | if a <= 0 60 | then throw (InvalidPosException a) 61 | else Pos a 62 | {-# INLINE mkPos #-} 63 | 64 | -- | Extract 'Int' from 'Pos'. 65 | -- 66 | -- @since 6.0.0 67 | unPos :: Pos -> Int 68 | unPos (Pos w) = w 69 | {-# INLINE unPos #-} 70 | 71 | -- | Position with value 1. 72 | -- 73 | -- @since 6.0.0 74 | pos1 :: Pos 75 | pos1 = mkPos 1 76 | 77 | -- | Value of tab width used by default. Always prefer this constant when 78 | -- you want to refer to the default tab width because actual value /may/ 79 | -- change in future. 80 | -- 81 | -- Currently: 82 | -- 83 | -- > defaultTabWidth = mkPos 8 84 | -- 85 | -- @since 5.0.0 86 | defaultTabWidth :: Pos 87 | defaultTabWidth = mkPos 8 88 | 89 | instance Semigroup Pos where 90 | (Pos x) <> (Pos y) = Pos (x + y) 91 | {-# INLINE (<>) #-} 92 | 93 | instance Read Pos where 94 | readsPrec d = 95 | readParen (d > 10) $ \r1 -> do 96 | ("Pos", r2) <- lex r1 97 | (x, r3) <- readsPrec 11 r2 98 | return (mkPos x, r3) 99 | 100 | -- | The exception is thrown by 'mkPos' when its argument is not a positive 101 | -- number. 102 | -- 103 | -- @since 5.0.0 104 | newtype InvalidPosException 105 | = -- | Contains the actual value that was passed to 'mkPos' 106 | InvalidPosException Int 107 | deriving (Eq, Show, Data, Generic) 108 | 109 | instance Exception InvalidPosException 110 | 111 | instance NFData InvalidPosException 112 | 113 | ---------------------------------------------------------------------------- 114 | -- Source position 115 | 116 | -- | The data type 'SourcePos' represents source positions. It contains the 117 | -- name of the source file, a line number, and a column number. Source line 118 | -- and column positions change intensively during parsing, so we need to 119 | -- make them strict to avoid memory leaks. 120 | data SourcePos = SourcePos 121 | { -- | Name of source file 122 | sourceName :: FilePath, 123 | -- | Line number 124 | sourceLine :: !Pos, 125 | -- | Column number 126 | sourceColumn :: !Pos 127 | } 128 | deriving (Show, Read, Eq, Ord, Data, Generic) 129 | 130 | instance NFData SourcePos 131 | 132 | -- | Construct initial position (line 1, column 1) given name of source 133 | -- file. 134 | initialPos :: FilePath -> SourcePos 135 | initialPos n = SourcePos n pos1 pos1 136 | 137 | -- | Pretty-print a 'SourcePos'. 138 | -- 139 | -- @since 5.0.0 140 | sourcePosPretty :: SourcePos -> String 141 | sourcePosPretty (SourcePos n l c) 142 | | null n = showLC 143 | | otherwise = n <> ":" <> showLC 144 | where 145 | showLC = show (unPos l) <> ":" <> show (unPos c) 146 | -------------------------------------------------------------------------------- /Text/Megaparsec/State.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE DeriveGeneric #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE Safe #-} 5 | {-# LANGUAGE StandaloneDeriving #-} 6 | {-# LANGUAGE UndecidableInstances #-} 7 | 8 | -- | 9 | -- Module : Text.Megaparsec.State 10 | -- Copyright : © 2015–present Megaparsec contributors 11 | -- © 2007 Paolo Martini 12 | -- © 1999–2001 Daan Leijen 13 | -- License : FreeBSD 14 | -- 15 | -- Maintainer : Mark Karpov 16 | -- Stability : experimental 17 | -- Portability : portable 18 | -- 19 | -- Definition of Megaparsec's 'State'. 20 | -- 21 | -- @since 6.5.0 22 | module Text.Megaparsec.State 23 | ( State (..), 24 | initialState, 25 | PosState (..), 26 | initialPosState, 27 | ) 28 | where 29 | 30 | import Control.DeepSeq (NFData) 31 | import Data.Data (Data) 32 | import GHC.Generics 33 | import {-# SOURCE #-} Text.Megaparsec.Error (ParseError) 34 | import Text.Megaparsec.Pos 35 | 36 | -- | This is the Megaparsec's state parametrized over stream type @s@ and 37 | -- custom error component type @e@. 38 | data State s e = State 39 | { -- | The rest of input to process 40 | stateInput :: s, 41 | -- | Number of processed tokens so far 42 | -- 43 | -- @since 7.0.0 44 | stateOffset :: {-# UNPACK #-} !Int, 45 | -- | State that is used for line\/column calculation 46 | -- 47 | -- @since 7.0.0 48 | statePosState :: PosState s, 49 | -- | Collection of “delayed” 'ParseError's in reverse order. This means 50 | -- that the last registered error is the first element of the list. 51 | -- 52 | -- @since 8.0.0 53 | stateParseErrors :: [ParseError s e] 54 | } 55 | deriving (Generic) 56 | 57 | deriving instance 58 | ( Show (ParseError s e), 59 | Show s 60 | ) => 61 | Show (State s e) 62 | 63 | deriving instance 64 | ( Eq (ParseError s e), 65 | Eq s 66 | ) => 67 | Eq (State s e) 68 | 69 | deriving instance 70 | ( Data e, 71 | Data (ParseError s e), 72 | Data s 73 | ) => 74 | Data (State s e) 75 | 76 | instance (NFData s, NFData (ParseError s e)) => NFData (State s e) 77 | 78 | -- | Given the name of the source file and the input construct the initial 79 | -- state for a parser. 80 | -- 81 | -- @since 9.6.0 82 | initialState :: 83 | -- | Name of the file the input is coming from 84 | FilePath -> 85 | -- | Input 86 | s -> 87 | State s e 88 | initialState name s = 89 | State 90 | { stateInput = s, 91 | stateOffset = 0, 92 | statePosState = initialPosState name s, 93 | stateParseErrors = [] 94 | } 95 | 96 | -- | A special kind of state that is used to calculate line\/column 97 | -- positions on demand. 98 | -- 99 | -- @since 7.0.0 100 | data PosState s = PosState 101 | { -- | The rest of input to process 102 | pstateInput :: s, 103 | -- | Offset corresponding to beginning of 'pstateInput' 104 | pstateOffset :: !Int, 105 | -- | Source position corresponding to beginning of 'pstateInput' 106 | pstateSourcePos :: !SourcePos, 107 | -- | Tab width to use for column calculation 108 | pstateTabWidth :: Pos, 109 | -- | Prefix to prepend to offending line 110 | pstateLinePrefix :: String 111 | } 112 | deriving (Show, Eq, Data, Generic) 113 | 114 | instance (NFData s) => NFData (PosState s) 115 | 116 | -- | Given the name of source file and the input construct the initial 117 | -- positional state. 118 | -- 119 | -- @since 9.6.0 120 | initialPosState :: 121 | -- | Name of the file the input is coming from 122 | FilePath -> 123 | -- | Input 124 | s -> 125 | PosState s 126 | initialPosState name s = 127 | PosState 128 | { pstateInput = s, 129 | pstateOffset = 0, 130 | pstateSourcePos = initialPos name, 131 | pstateTabWidth = defaultTabWidth, 132 | pstateLinePrefix = "" 133 | } 134 | -------------------------------------------------------------------------------- /Text/Megaparsec/Unicode.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Safe #-} 2 | 3 | -- | 4 | -- Module : Text.Megaparsec.Unicode 5 | -- Copyright : © 2024–present Megaparsec contributors 6 | -- License : FreeBSD 7 | -- 8 | -- Maintainer : Mark Karpov 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Utility functions for working with Unicode. 13 | -- 14 | -- @since 9.7.0 15 | module Text.Megaparsec.Unicode 16 | ( stringLength, 17 | charLength, 18 | isWideChar, 19 | ) 20 | where 21 | 22 | import Data.Array (Array, bounds, listArray, (!)) 23 | import Data.Char (ord) 24 | 25 | -- | Calculate length of a string taking into account the fact that certain 26 | -- 'Char's may span more than 1 column. 27 | -- 28 | -- @since 9.7.0 29 | stringLength :: (Traversable t) => t Char -> Int 30 | stringLength = sum . fmap charLength 31 | 32 | -- | Return length of an individual 'Char'. 33 | -- 34 | -- @since 9.7.0 35 | charLength :: Char -> Int 36 | charLength ch = if isWideChar ch then 2 else 1 37 | 38 | -- | Determine whether the given 'Char' is “wide”, that is, whether it spans 39 | -- 2 columns instead of one. 40 | -- 41 | -- @since 9.7.0 42 | isWideChar :: Char -> Bool 43 | isWideChar c = go (bounds wideCharRanges) 44 | where 45 | go (lo, hi) 46 | | hi < lo = False 47 | | a <= n && n <= b = True 48 | | n < a = go (lo, pred mid) 49 | | otherwise = go (succ mid, hi) 50 | where 51 | mid = (lo + hi) `div` 2 52 | (a, b) = wideCharRanges ! mid 53 | n = ord c 54 | 55 | -- | Wide character ranges. 56 | wideCharRanges :: Array Int (Int, Int) 57 | wideCharRanges = 58 | listArray 59 | (0, 118) 60 | [ (0x001100, 0x00115f), 61 | (0x00231a, 0x00231b), 62 | (0x002329, 0x00232a), 63 | (0x0023e9, 0x0023ec), 64 | (0x0023f0, 0x0023f0), 65 | (0x0023f3, 0x0023f3), 66 | (0x0025fd, 0x0025fe), 67 | (0x002614, 0x002615), 68 | (0x002648, 0x002653), 69 | (0x00267f, 0x00267f), 70 | (0x002693, 0x002693), 71 | (0x0026a1, 0x0026a1), 72 | (0x0026aa, 0x0026ab), 73 | (0x0026bd, 0x0026be), 74 | (0x0026c4, 0x0026c5), 75 | (0x0026ce, 0x0026ce), 76 | (0x0026d4, 0x0026d4), 77 | (0x0026ea, 0x0026ea), 78 | (0x0026f2, 0x0026f3), 79 | (0x0026f5, 0x0026f5), 80 | (0x0026fa, 0x0026fa), 81 | (0x0026fd, 0x0026fd), 82 | (0x002705, 0x002705), 83 | (0x00270a, 0x00270b), 84 | (0x002728, 0x002728), 85 | (0x00274c, 0x00274c), 86 | (0x00274e, 0x00274e), 87 | (0x002753, 0x002755), 88 | (0x002757, 0x002757), 89 | (0x002795, 0x002797), 90 | (0x0027b0, 0x0027b0), 91 | (0x0027bf, 0x0027bf), 92 | (0x002b1b, 0x002b1c), 93 | (0x002b50, 0x002b50), 94 | (0x002b55, 0x002b55), 95 | (0x002e80, 0x002e99), 96 | (0x002e9b, 0x002ef3), 97 | (0x002f00, 0x002fd5), 98 | (0x002ff0, 0x002ffb), 99 | (0x003000, 0x00303e), 100 | (0x003041, 0x003096), 101 | (0x003099, 0x0030ff), 102 | (0x003105, 0x00312f), 103 | (0x003131, 0x00318e), 104 | (0x003190, 0x0031ba), 105 | (0x0031c0, 0x0031e3), 106 | (0x0031f0, 0x00321e), 107 | (0x003220, 0x003247), 108 | (0x003250, 0x004db5), 109 | (0x004e00, 0x009fef), 110 | (0x00a000, 0x00a48c), 111 | (0x00a490, 0x00a4c6), 112 | (0x00a960, 0x00a97c), 113 | (0x00ac00, 0x00d7a3), 114 | (0x00f900, 0x00fa6d), 115 | (0x00fa70, 0x00fad9), 116 | (0x00fe10, 0x00fe19), 117 | (0x00fe30, 0x00fe52), 118 | (0x00fe54, 0x00fe66), 119 | (0x00fe68, 0x00fe6b), 120 | (0x00ff01, 0x00ff60), 121 | (0x00ffe0, 0x00ffe6), 122 | (0x016fe0, 0x016fe3), 123 | (0x017000, 0x0187f7), 124 | (0x018800, 0x018af2), 125 | (0x01b000, 0x01b11e), 126 | (0x01b150, 0x01b152), 127 | (0x01b164, 0x01b167), 128 | (0x01b170, 0x01b2fb), 129 | (0x01f004, 0x01f004), 130 | (0x01f0cf, 0x01f0cf), 131 | (0x01f18e, 0x01f18e), 132 | (0x01f191, 0x01f19a), 133 | (0x01f200, 0x01f202), 134 | (0x01f210, 0x01f23b), 135 | (0x01f240, 0x01f248), 136 | (0x01f250, 0x01f251), 137 | (0x01f260, 0x01f265), 138 | (0x01f300, 0x01f320), 139 | (0x01f32d, 0x01f335), 140 | (0x01f337, 0x01f37c), 141 | (0x01f37e, 0x01f393), 142 | (0x01f3a0, 0x01f3ca), 143 | (0x01f3cf, 0x01f3d3), 144 | (0x01f3e0, 0x01f3f0), 145 | (0x01f3f4, 0x01f3f4), 146 | (0x01f3f8, 0x01f43e), 147 | (0x01f440, 0x01f440), 148 | (0x01f442, 0x01f4fc), 149 | (0x01f4ff, 0x01f53d), 150 | (0x01f54b, 0x01f54e), 151 | (0x01f550, 0x01f567), 152 | (0x01f57a, 0x01f57a), 153 | (0x01f595, 0x01f596), 154 | (0x01f5a4, 0x01f5a4), 155 | (0x01f5fb, 0x01f64f), 156 | (0x01f680, 0x01f6c5), 157 | (0x01f6cc, 0x01f6cc), 158 | (0x01f6d0, 0x01f6d2), 159 | (0x01f6d5, 0x01f6d5), 160 | (0x01f6eb, 0x01f6ec), 161 | (0x01f6f4, 0x01f6fa), 162 | (0x01f7e0, 0x01f7eb), 163 | (0x01f90d, 0x01f971), 164 | (0x01f973, 0x01f976), 165 | (0x01f97a, 0x01f9a2), 166 | (0x01f9a5, 0x01f9aa), 167 | (0x01f9ae, 0x01f9ca), 168 | (0x01f9cd, 0x01f9ff), 169 | (0x01fa70, 0x01fa73), 170 | (0x01fa78, 0x01fa7a), 171 | (0x01fa80, 0x01fa82), 172 | (0x01fa90, 0x01fa95), 173 | (0x020000, 0x02a6d6), 174 | (0x02a700, 0x02b734), 175 | (0x02b740, 0x02b81d), 176 | (0x02b820, 0x02cea1), 177 | (0x02ceb0, 0x02ebe0), 178 | (0x02f800, 0x02fa1d) 179 | ] 180 | {-# NOINLINE wideCharRanges #-} 181 | -------------------------------------------------------------------------------- /bench/memory/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | 4 | module Main (main) where 5 | 6 | import Control.DeepSeq 7 | import Control.Monad 8 | import Data.ByteString (ByteString) 9 | import qualified Data.ByteString as B 10 | import Data.List.NonEmpty (NonEmpty (..)) 11 | import qualified Data.List.NonEmpty as NE 12 | import qualified Data.Set as E 13 | import Data.Text (Text) 14 | import qualified Data.Text as T 15 | import Data.Void 16 | import Text.Megaparsec 17 | import qualified Text.Megaparsec.Byte.Binary as Binary 18 | import Text.Megaparsec.Char 19 | import qualified Text.Megaparsec.Char.Lexer as L 20 | import Weigh 21 | 22 | -- | The type of parser that consumes 'Text'. 23 | type Parser = Parsec Void Text 24 | 25 | -- | The type of parser that consumes 'ByteString'. 26 | type ParserBs = Parsec Void ByteString 27 | 28 | main :: IO () 29 | main = mainWith $ do 30 | setColumns [Case, Allocated, GCs, Max] 31 | bparser "string" manyAs (string . fst) 32 | bparser "string'" manyAs (string' . fst) 33 | bparser "many" manyAs (const $ many (char 'a')) 34 | bparser "some" manyAs (const $ some (char 'a')) 35 | bparser "choice" (const "b") (choice . fmap char . manyAsB' . snd) 36 | bparser "count" manyAs (\(_, n) -> count n (char 'a')) 37 | bparser "count'" manyAs (\(_, n) -> count' 1 n (char 'a')) 38 | bparser "endBy" manyAbs' (const $ endBy (char 'a') (char 'b')) 39 | bparser "endBy1" manyAbs' (const $ endBy1 (char 'a') (char 'b')) 40 | bparser "manyTill" manyAsB (const $ manyTill (char 'a') (char 'b')) 41 | bparser "someTill" manyAsB (const $ someTill (char 'a') (char 'b')) 42 | bparser "sepBy" manyAbs (const $ sepBy (char 'a') (char 'b')) 43 | bparser "sepBy1" manyAbs (const $ sepBy1 (char 'a') (char 'b')) 44 | bparser "sepEndBy" manyAbs' (const $ sepEndBy (char 'a') (char 'b')) 45 | bparser "sepEndBy1" manyAbs' (const $ sepEndBy1 (char 'a') (char 'b')) 46 | bparser "skipMany" manyAs (const $ skipMany (char 'a')) 47 | bparser "skipSome" manyAs (const $ skipSome (char 'a')) 48 | bparser "skipCount" manyAs (\(_, n) -> skipCount n (char 'a')) 49 | bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b')) 50 | bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b')) 51 | bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a')) 52 | bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a')) 53 | bparser "decimal" mkInt (const (L.decimal :: Parser Integer)) 54 | bparser "octal" mkInt (const (L.octal :: Parser Integer)) 55 | bparser "hexadecimal" mkInt (const (L.hexadecimal :: Parser Integer)) 56 | bparser "scientific" mkInt (const L.scientific) 57 | bparserBs "word32be" many0x33 (const $ many Binary.word32be) 58 | bparserBs "word32le" many0x33 (const $ many Binary.word32le) 59 | 60 | forM_ stdSeries $ \n -> 61 | bbundle "single error" n [n] 62 | 63 | bbundle "2 errors" 1000 [1, 1000] 64 | bbundle "4 errors" 1000 [1, 500, 1000] 65 | bbundle "100 errors" 1000 [10, 20 .. 1000] 66 | 67 | breachOffset 0 1000 68 | breachOffset 0 2000 69 | breachOffset 0 4000 70 | breachOffset 1000 1000 71 | 72 | breachOffsetNoLine 0 1000 73 | breachOffsetNoLine 0 2000 74 | breachOffsetNoLine 0 4000 75 | breachOffsetNoLine 1000 1000 76 | 77 | -- | Perform a series of measurements with the same parser. 78 | bparser :: 79 | (NFData a) => 80 | -- | Name of the benchmark group 81 | String -> 82 | -- | How to construct input 83 | (Int -> Text) -> 84 | -- | The parser receiving its future input 85 | ((Text, Int) -> Parser a) -> 86 | Weigh () 87 | bparser name f p = forM_ stdSeries $ \i -> do 88 | let arg = (f i, i) 89 | p' (s, n) = parse (p (s, n)) "" s 90 | func (name ++ "-" ++ show i) p' arg 91 | 92 | -- | Perform a series of measurements with the same parser. 93 | bparserBs :: 94 | (NFData a) => 95 | -- | Name of the benchmark group 96 | String -> 97 | -- | How to construct input 98 | (Int -> ByteString) -> 99 | -- | The parser receiving its future input 100 | ((ByteString, Int) -> ParserBs a) -> 101 | Weigh () 102 | bparserBs name f p = forM_ stdSeries $ \i -> do 103 | let arg = (f i, i) 104 | p' (s, n) = parse (p (s, n)) "" s 105 | func (name ++ "-" ++ show i) p' arg 106 | 107 | -- | Benchmark the 'errorBundlePretty' function. 108 | bbundle :: 109 | -- | Name of the benchmark 110 | String -> 111 | -- | Number of lines in input stream 112 | Int -> 113 | -- | Lines with parse errors 114 | [Int] -> 115 | Weigh () 116 | bbundle name totalLines sps = do 117 | let s = take (totalLines * 80) (cycle as) 118 | as = replicate 79 'a' ++ "\n" 119 | f l = 120 | TrivialError 121 | (20 + l * 80) 122 | (Just $ Tokens ('a' :| "")) 123 | (E.singleton $ Tokens ('b' :| "")) 124 | bundle :: ParseErrorBundle String Void 125 | bundle = 126 | ParseErrorBundle 127 | { bundleErrors = f <$> NE.fromList sps, 128 | bundlePosState = 129 | PosState 130 | { pstateInput = s, 131 | pstateOffset = 0, 132 | pstateSourcePos = initialPos "", 133 | pstateTabWidth = defaultTabWidth, 134 | pstateLinePrefix = "" 135 | } 136 | } 137 | func 138 | ("errorBundlePretty-" ++ show totalLines ++ "-" ++ name) 139 | errorBundlePretty 140 | bundle 141 | 142 | -- | Benchmark the 'reachOffset' function. 143 | breachOffset :: 144 | -- | Starting offset in 'PosState' 145 | Int -> 146 | -- | Offset to reach 147 | Int -> 148 | Weigh () 149 | breachOffset o0 o1 = 150 | func 151 | ("reachOffset-" ++ show o0 ++ "-" ++ show o1) 152 | f 153 | (o0 * 80, o1 * 80) 154 | where 155 | f :: (Int, Int) -> PosState Text 156 | f (startOffset, targetOffset) = 157 | snd $ 158 | reachOffset 159 | targetOffset 160 | PosState 161 | { pstateInput = manyAs (targetOffset - startOffset), 162 | pstateOffset = startOffset, 163 | pstateSourcePos = initialPos "", 164 | pstateTabWidth = defaultTabWidth, 165 | pstateLinePrefix = "" 166 | } 167 | 168 | -- | Benchmark the 'reachOffsetNoLine' function. 169 | breachOffsetNoLine :: 170 | -- | Starting offset in 'PosState' 171 | Int -> 172 | -- | Offset to reach 173 | Int -> 174 | Weigh () 175 | breachOffsetNoLine o0 o1 = 176 | func 177 | ("reachOffsetNoLine-" ++ show o0 ++ "-" ++ show o1) 178 | f 179 | (o0 * 80, o1 * 80) 180 | where 181 | f :: (Int, Int) -> PosState Text 182 | f (startOffset, targetOffset) = 183 | reachOffsetNoLine 184 | targetOffset 185 | PosState 186 | { pstateInput = manyAs (targetOffset - startOffset), 187 | pstateOffset = startOffset, 188 | pstateSourcePos = initialPos "", 189 | pstateTabWidth = defaultTabWidth, 190 | pstateLinePrefix = "" 191 | } 192 | 193 | -- | The series of sizes to try as part of 'bparser'. 194 | stdSeries :: [Int] 195 | stdSeries = [500, 1000, 2000, 4000] 196 | 197 | ---------------------------------------------------------------------------- 198 | -- Helpers 199 | 200 | -- | Generate that many \'a\' characters. 201 | manyAs :: Int -> Text 202 | manyAs n = T.replicate n "a" 203 | 204 | -- | Like 'manyAs' but the result is a 'ByteString'. 205 | many0x33 :: Int -> ByteString 206 | many0x33 n = B.replicate n 0x33 207 | 208 | -- | Like 'manyAs', but interspersed with \'b\'s. 209 | manyAbs :: Int -> Text 210 | manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab") 211 | 212 | -- | Like 'manyAs', but with a \'b\' added to the end. 213 | manyAsB :: Int -> Text 214 | manyAsB n = manyAs n <> "b" 215 | 216 | -- | Like 'manyAsB', but returns a 'String'. 217 | manyAsB' :: Int -> String 218 | manyAsB' n = replicate n 'a' ++ "b" 219 | 220 | -- | Like 'manyAbs', but ends in a \'b\'. 221 | manyAbs' :: Int -> Text 222 | manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab") 223 | 224 | -- | Render an 'Integer' with the number of digits linearly dependent on the 225 | -- argument. 226 | mkInt :: Int -> Text 227 | mkInt n = (T.pack . show) ((10 :: Integer) ^ (n `quot` 100)) 228 | -------------------------------------------------------------------------------- /bench/speed/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | 4 | module Main (main) where 5 | 6 | import Control.DeepSeq 7 | import Criterion.Main 8 | import Data.ByteString (ByteString) 9 | import qualified Data.ByteString as B 10 | import Data.List.NonEmpty (NonEmpty (..)) 11 | import qualified Data.List.NonEmpty as NE 12 | import qualified Data.Set as E 13 | import Data.Text (Text) 14 | import qualified Data.Text as T 15 | import Data.Void 16 | import Text.Megaparsec 17 | import qualified Text.Megaparsec.Byte.Binary as Binary 18 | import Text.Megaparsec.Char 19 | import qualified Text.Megaparsec.Char.Lexer as L 20 | 21 | -- | The type of parser that consumes 'Text'. 22 | type Parser = Parsec Void Text 23 | 24 | -- | The type of parser that consumes 'ByteString'. 25 | type ParserBs = Parsec Void ByteString 26 | 27 | main :: IO () 28 | main = 29 | defaultMain 30 | [ bparser "string" manyAs (string . fst), 31 | bparser "string'" manyAs (string' . fst), 32 | bparser "many" manyAs (const $ many (char 'a')), 33 | bparser "some" manyAs (const $ some (char 'a')), 34 | bparser "choice" (const "b") (choice . fmap char . manyAsB' . snd), 35 | bparser "count" manyAs (\(_, n) -> count n (char 'a')), 36 | bparser "count'" manyAs (\(_, n) -> count' 1 n (char 'a')), 37 | bparser "endBy" manyAbs' (const $ endBy (char 'a') (char 'b')), 38 | bparser "endBy1" manyAbs' (const $ endBy1 (char 'a') (char 'b')), 39 | bparser "manyTill" manyAsB (const $ manyTill (char 'a') (char 'b')), 40 | bparser "someTill" manyAsB (const $ someTill (char 'a') (char 'b')), 41 | bparser "sepBy" manyAbs (const $ sepBy (char 'a') (char 'b')), 42 | bparser "sepBy1" manyAbs (const $ sepBy1 (char 'a') (char 'b')), 43 | bparser "sepEndBy" manyAbs' (const $ sepEndBy (char 'a') (char 'b')), 44 | bparser "sepEndBy1" manyAbs' (const $ sepEndBy1 (char 'a') (char 'b')), 45 | bparser "skipMany" manyAs (const $ skipMany (char 'a')), 46 | bparser "skipSome" manyAs (const $ skipSome (char 'a')), 47 | bparser "skipCount" manyAs (\(_, n) -> skipCount n (char 'a')), 48 | bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b')), 49 | bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b')), 50 | bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a')), 51 | bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a')), 52 | bparser "decimal" mkInt (const (L.decimal :: Parser Integer)), 53 | bparser "octal" mkInt (const (L.octal :: Parser Integer)), 54 | bparser "hexadecimal" mkInt (const (L.hexadecimal :: Parser Integer)), 55 | bparser "scientific" mkInt (const L.scientific), 56 | bparserBs "word32be" many0x33 (const $ many Binary.word32be), 57 | bparserBs "word32le" many0x33 (const $ many Binary.word32le), 58 | bgroup "" [bbundle "single error" n [n] | n <- stdSeries], 59 | bbundle "2 errors" 1000 [1, 1000], 60 | bbundle "4 errors" 1000 [1, 500, 1000], 61 | bbundle "100 errors" 1000 [10, 20 .. 1000], 62 | breachOffset 0 1000, 63 | breachOffset 0 2000, 64 | breachOffset 0 4000, 65 | breachOffset 1000 1000, 66 | breachOffsetNoLine 0 1000, 67 | breachOffsetNoLine 0 2000, 68 | breachOffsetNoLine 0 4000, 69 | breachOffsetNoLine 1000 1000 70 | ] 71 | 72 | -- | Perform a series to measurements with the same parser. 73 | bparser :: 74 | (NFData a) => 75 | -- | Name of the benchmark group 76 | String -> 77 | -- | How to construct input 78 | (Int -> Text) -> 79 | -- | The parser receiving its future input 80 | ((Text, Int) -> Parser a) -> 81 | -- | The benchmark 82 | Benchmark 83 | bparser name f p = bgroup name (bs <$> stdSeries) 84 | where 85 | bs n = env (return (f n, n)) (bench (show n) . nf p') 86 | p' (s, n) = parse (p (s, n)) "" s 87 | 88 | -- | Perform a series to measurements with the same parser. 89 | bparserBs :: 90 | (NFData a) => 91 | -- | Name of the benchmark group 92 | String -> 93 | -- | How to construct input 94 | (Int -> ByteString) -> 95 | -- | The parser receiving its future input 96 | ((ByteString, Int) -> ParserBs a) -> 97 | -- | The benchmark 98 | Benchmark 99 | bparserBs name f p = bgroup name (bs <$> stdSeries) 100 | where 101 | bs n = env (return (f n, n)) (bench (show n) . nf p') 102 | p' (s, n) = parse (p (s, n)) "" s 103 | 104 | -- | Benchmark the 'errorBundlePretty' function. 105 | bbundle :: 106 | -- | Name of the benchmark 107 | String -> 108 | -- | Number of lines in input stream 109 | Int -> 110 | -- | Lines with parse errors 111 | [Int] -> 112 | Benchmark 113 | bbundle name totalLines sps = 114 | let s = take (totalLines * 80) (cycle as) 115 | as = replicate 79 'a' ++ "\n" 116 | f l = 117 | TrivialError 118 | (20 + l * 80) 119 | (Just $ Tokens ('a' :| "")) 120 | (E.singleton $ Tokens ('b' :| "")) 121 | bundle :: ParseErrorBundle String Void 122 | bundle = 123 | ParseErrorBundle 124 | { bundleErrors = f <$> NE.fromList sps, 125 | bundlePosState = 126 | PosState 127 | { pstateInput = s, 128 | pstateOffset = 0, 129 | pstateSourcePos = initialPos "", 130 | pstateTabWidth = defaultTabWidth, 131 | pstateLinePrefix = "" 132 | } 133 | } 134 | in bench 135 | ("errorBundlePretty-" ++ show totalLines ++ "-" ++ name) 136 | (nf errorBundlePretty bundle) 137 | 138 | -- | Benchmark the 'reachOffset' function. 139 | breachOffset :: 140 | -- | Starting offset in 'PosState' 141 | Int -> 142 | -- | Offset to reach 143 | Int -> 144 | Benchmark 145 | breachOffset o0 o1 = 146 | bench 147 | ("reachOffset-" ++ show o0 ++ "-" ++ show o1) 148 | (nf f (o0 * 80, o1 * 80)) 149 | where 150 | f :: (Int, Int) -> PosState Text 151 | f (startOffset, targetOffset) = 152 | snd $ 153 | reachOffset 154 | targetOffset 155 | PosState 156 | { pstateInput = manyAs (targetOffset - startOffset), 157 | pstateOffset = startOffset, 158 | pstateSourcePos = initialPos "", 159 | pstateTabWidth = defaultTabWidth, 160 | pstateLinePrefix = "" 161 | } 162 | 163 | -- | Benchmark the 'reachOffsetNoLine' function. 164 | breachOffsetNoLine :: 165 | -- | Starting offset in 'PosState' 166 | Int -> 167 | -- | Offset to reach 168 | Int -> 169 | Benchmark 170 | breachOffsetNoLine o0 o1 = 171 | bench 172 | ("reachOffsetNoLine-" ++ show o0 ++ "-" ++ show o1) 173 | (nf f (o0 * 80, o1 * 80)) 174 | where 175 | f :: (Int, Int) -> PosState Text 176 | f (startOffset, targetOffset) = 177 | reachOffsetNoLine 178 | targetOffset 179 | PosState 180 | { pstateInput = manyAs (targetOffset - startOffset), 181 | pstateOffset = startOffset, 182 | pstateSourcePos = initialPos "", 183 | pstateTabWidth = defaultTabWidth, 184 | pstateLinePrefix = "" 185 | } 186 | 187 | -- | The series of sizes to try as part of 'bparser'. 188 | stdSeries :: [Int] 189 | stdSeries = [500, 1000, 2000, 4000] 190 | 191 | ---------------------------------------------------------------------------- 192 | -- Helpers 193 | 194 | -- | Generate that many \'a\' characters. 195 | manyAs :: Int -> Text 196 | manyAs n = T.replicate n "a" 197 | 198 | -- | Like 'manyAs' but the result is a 'ByteString'. 199 | many0x33 :: Int -> ByteString 200 | many0x33 n = B.replicate n 0x33 201 | 202 | -- | Like 'manyAs', but interspersed with \'b\'s. 203 | manyAbs :: Int -> Text 204 | manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab") 205 | 206 | -- | Like 'manyAs', but with a \'b\' added to the end. 207 | manyAsB :: Int -> Text 208 | manyAsB n = manyAs n <> "b" 209 | 210 | -- | Like 'manyAsB', but returns a 'String'. 211 | manyAsB' :: Int -> String 212 | manyAsB' n = replicate n 'a' ++ "b" 213 | 214 | -- | Like 'manyAbs', but ends in a \'b\'. 215 | manyAbs' :: Int -> Text 216 | manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab") 217 | 218 | -- | Render an 'Integer' with the number of digits linearly dependent on the 219 | -- argument. 220 | mkInt :: Int -> Text 221 | mkInt n = (T.pack . show) ((10 :: Integer) ^ (n `quot` 100)) 222 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . megaparsec-tests 2 | tests: True 3 | benchmarks: True 4 | constraints: megaparsec +dev, megaparsec-tests +dev 5 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "locked": { 5 | "lastModified": 1667395993, 6 | "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", 7 | "owner": "numtide", 8 | "repo": "flake-utils", 9 | "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "numtide", 14 | "repo": "flake-utils", 15 | "type": "github" 16 | } 17 | }, 18 | "nixpkgs": { 19 | "locked": { 20 | "lastModified": 1668443372, 21 | "narHash": "sha256-lXNlVyNWwO22/JUdBtUWz68jZB3DM+Jq/irlsbwncI0=", 22 | "owner": "NixOS", 23 | "repo": "nixpkgs", 24 | "rev": "dad4de1694cd92d9a0e123bfdf134d0047b836a5", 25 | "type": "github" 26 | }, 27 | "original": { 28 | "owner": "NixOS", 29 | "ref": "nixpkgs-unstable", 30 | "repo": "nixpkgs", 31 | "type": "github" 32 | } 33 | }, 34 | "root": { 35 | "inputs": { 36 | "flake-utils": "flake-utils", 37 | "nixpkgs": "nixpkgs" 38 | } 39 | } 40 | }, 41 | "root": "root", 42 | "version": 7 43 | } 44 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Megaparsec Nix helpers"; 3 | inputs = { 4 | nixpkgs = { 5 | type = "github"; 6 | owner = "NixOS"; 7 | repo = "nixpkgs"; 8 | ref = "nixpkgs-unstable"; 9 | }; 10 | flake-utils = { 11 | type = "github"; 12 | owner = "numtide"; 13 | repo = "flake-utils"; 14 | }; 15 | }; 16 | outputs = { self, nixpkgs, flake-utils }: 17 | let 18 | pkgs = import nixpkgs { 19 | system = "x86_64-linux"; 20 | config.allowBroken = true; 21 | }; 22 | ghc = "ghc924"; 23 | 24 | megaparsecSource = pkgs.lib.sourceByRegex ./. [ 25 | "^CHANGELOG\.md$" 26 | "^LICENSE\.md$" 27 | "^README\.md$" 28 | "^Text.*$" 29 | "^bench.*$" 30 | "^megaparsec\.cabal$" 31 | ]; 32 | 33 | megaparsecTestsSource = pkgs.lib.sourceByRegex ./megaparsec-tests [ 34 | "^LICENSE\.md$" 35 | "^README\.md$" 36 | "^megaparsec-tests\.cabal$" 37 | "^src.*$" 38 | "^tests.*$" 39 | ]; 40 | 41 | parsersBenchSource = pkgs.lib.sourceByRegex ./parsers-bench [ 42 | "^README\.md$" 43 | "^parsers-bench\.cabal$" 44 | "^ParsersBench.*$" 45 | "^bench.*$" 46 | "^data.*$" 47 | ]; 48 | 49 | doBenchmark = p: 50 | let 51 | targets = [ "bench-speed" "bench-memory" ]; 52 | copying = pkgs.lib.concatMapStrings 53 | (t: "cp dist/build/${t}/${t} $out/bench/\n") 54 | targets; 55 | in 56 | pkgs.haskell.lib.doBenchmark 57 | (p.overrideAttrs (drv: { 58 | postInstall = '' 59 | mkdir -p $out/bench 60 | if test -d data/ 61 | then 62 | mkdir -p $out/bench/data 63 | cp data/* $out/bench/data/ 64 | fi 65 | ${copying} 66 | ''; 67 | })); 68 | 69 | doJailbreak = pkgs.haskell.lib.doJailbreak; 70 | 71 | patch = p: patch: 72 | pkgs.haskell.lib.appendPatch p patch; 73 | 74 | megaparsecOverlay = self: super: { 75 | "megaparsec" = doBenchmark 76 | (super.callCabal2nix "megaparsec" megaparsecSource { }); 77 | "megaparsec-tests" = 78 | super.callCabal2nix "megaparsec-tests" megaparsecTestsSource { }; 79 | # The ‘parser-combinators-tests’ package is a bit special because it 80 | # does not contain an executable nor a library, so its install phase 81 | # normally fails. We want to build it and run the tests anyway, so we 82 | # have to do these manipulations. 83 | "parser-combinators-tests" = pkgs.haskell.lib.dontHaddock 84 | (super.parser-combinators-tests.overrideAttrs (drv: { 85 | installPhase = "mkdir $out"; 86 | })); 87 | "modern-uri" = doBenchmark super.modern-uri; 88 | "parsers-bench" = doBenchmark 89 | (super.callCabal2nix "parsers-bench" parsersBenchSource { }); 90 | "mmark" = doBenchmark super.mmark; 91 | }; 92 | 93 | updatedPkgs = pkgs // { 94 | haskell = pkgs.haskell // { 95 | packages = pkgs.haskell.packages // { 96 | "${ghc}" = pkgs.haskell.packages.${ghc}.override { 97 | overrides = megaparsecOverlay; 98 | }; 99 | }; 100 | }; 101 | }; 102 | 103 | haskellPackages = updatedPkgs.haskell.packages.${ghc}; 104 | 105 | # Base: Megaparsec and its unit tests: 106 | base = { 107 | inherit (haskellPackages) 108 | hspec-megaparsec 109 | megaparsec 110 | megaparsec-tests 111 | parser-combinators-tests; 112 | }; 113 | 114 | # Dependent packages of interest: 115 | deps = { 116 | inherit (haskellPackages) 117 | cachix 118 | cassava-megaparsec 119 | cue-sheet 120 | dhall 121 | hledger 122 | idris 123 | mmark 124 | modern-uri 125 | replace-megaparsec 126 | stache 127 | tomland; 128 | }; 129 | 130 | # Benchmarks: 131 | benches = { 132 | inherit (haskellPackages) 133 | megaparsec 134 | mmark 135 | modern-uri 136 | parsers-bench; 137 | }; 138 | 139 | # Source distributions: 140 | dist = with pkgs.haskell.lib; { 141 | megaparsec = sdistTarball haskellPackages.megaparsec; 142 | megaparsec-tests = sdistTarball haskellPackages.megaparsec-tests; 143 | }; 144 | 145 | in 146 | flake-utils.lib.eachDefaultSystem (system: 147 | { 148 | packages = flake-utils.lib.flattenTree { 149 | base = pkgs.recurseIntoAttrs base; 150 | all_base = pkgs.linkFarmFromDrvs "base" (builtins.attrValues base); 151 | deps = pkgs.recurseIntoAttrs deps; 152 | all_deps = pkgs.linkFarmFromDrvs "deps" (builtins.attrValues deps); 153 | benches = pkgs.recurseIntoAttrs benches; 154 | all_benches = pkgs.linkFarmFromDrvs "benches" (builtins.attrValues benches); 155 | dist = pkgs.recurseIntoAttrs dist; 156 | all_dist = pkgs.linkFarmFromDrvs "dist" (builtins.attrValues dist); 157 | }; 158 | defaultPackage = base.megaparsec; 159 | devShells.default = haskellPackages.shellFor { 160 | packages = ps: [ 161 | ps.megaparsec 162 | ps.megaparsec-tests 163 | ]; 164 | buildInputs = with haskellPackages; [ 165 | cabal-install 166 | ghcid 167 | ]; 168 | }; 169 | }); 170 | } 171 | -------------------------------------------------------------------------------- /megaparsec-tests/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2015–present Megaparsec contributors 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS 16 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 18 | NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /megaparsec-tests/README.md: -------------------------------------------------------------------------------- 1 | # Megaparsec tests 2 | 3 | Megaparsec's test suite as a standalone package. The reason for the 4 | separation is that we can avoid circular dependency on `hspec-megaparsec` 5 | and thus avoid keeping copies of its source files in our test suite, as we 6 | had to do before. Another benefit is that we can export some auxiliary 7 | functions in `megaparsec-tests` which can be used by other test suites, for 8 | example in the `parser-combinators-tests` package. 9 | 10 | The version number of `megaparsec-tests` will be kept in sync with versions 11 | of `megaparsec` from now on. 12 | 13 | ## License 14 | 15 | Copyright © 2015–present Megaparsec contributors 16 | 17 | Distributed under FreeBSD license. 18 | -------------------------------------------------------------------------------- /megaparsec-tests/megaparsec-tests.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: megaparsec-tests 3 | version: 9.7.0 4 | license: BSD-2-Clause 5 | license-file: LICENSE.md 6 | maintainer: Mark Karpov 7 | author: Megaparsec contributors 8 | tested-with: ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1 9 | homepage: https://github.com/mrkkrp/megaparsec 10 | bug-reports: https://github.com/mrkkrp/megaparsec/issues 11 | synopsis: Test utilities and the test suite of Megaparsec 12 | description: Test utilities and the test suite of Megaparsec. 13 | category: Parsing 14 | build-type: Simple 15 | extra-doc-files: README.md 16 | 17 | flag dev 18 | description: Turn on development settings. 19 | default: False 20 | manual: True 21 | 22 | library 23 | exposed-modules: Test.Hspec.Megaparsec.AdHoc 24 | hs-source-dirs: src 25 | default-language: Haskell2010 26 | build-depends: 27 | QuickCheck >=2.10 && <2.16, 28 | base >=4.15 && <5, 29 | bytestring >=0.2 && <0.13, 30 | containers >=0.5 && <0.8, 31 | hspec >=2 && <3, 32 | hspec-megaparsec >=2 && <3, 33 | megaparsec ==9.7.0, 34 | mtl >=2.2.2 && <3, 35 | text >=0.2 && <2.2, 36 | transformers >=0.4 && <0.7 37 | 38 | if flag(dev) 39 | ghc-options: 40 | -Wall -Werror -Wredundant-constraints -Wpartial-fields 41 | -Wunused-packages 42 | 43 | else 44 | ghc-options: -O2 -Wall 45 | 46 | test-suite tests 47 | type: exitcode-stdio-1.0 48 | main-is: Spec.hs 49 | build-tool-depends: hspec-discover:hspec-discover >=2.0 && <3.0 50 | hs-source-dirs: tests 51 | other-modules: 52 | Text.Megaparsec.Byte.BinarySpec 53 | Text.Megaparsec.Byte.LexerSpec 54 | Text.Megaparsec.ByteSpec 55 | Text.Megaparsec.Char.LexerSpec 56 | Text.Megaparsec.CharSpec 57 | Text.Megaparsec.DebugSpec 58 | Text.Megaparsec.ErrorSpec 59 | Text.Megaparsec.PosSpec 60 | Text.Megaparsec.StreamSpec 61 | Text.Megaparsec.UnicodeSpec 62 | Text.MegaparsecSpec 63 | 64 | default-language: Haskell2010 65 | build-depends: 66 | QuickCheck >=2.10 && <2.16, 67 | base >=4.15 && <5, 68 | bytestring >=0.2 && <0.13, 69 | case-insensitive >=1.2 && <1.3, 70 | containers >=0.5 && <0.8, 71 | hspec >=2 && <3, 72 | hspec-megaparsec >=2 && <3, 73 | megaparsec ==9.7.0, 74 | megaparsec-tests, 75 | mtl >=2.2.2 && <3, 76 | scientific >=0.3.1 && <0.4, 77 | temporary >=1.1 && <1.4, 78 | text >=0.2 && <2.2, 79 | transformers >=0.4 && <0.7 80 | 81 | if flag(dev) 82 | ghc-options: 83 | -Wall -Werror -Wredundant-constraints -Wpartial-fields 84 | -Wunused-packages -Wno-unused-imports 85 | 86 | else 87 | ghc-options: -O2 -Wall 88 | 89 | if impl(ghc >=9.8) 90 | ghc-options: -Wno-x-partial 91 | -------------------------------------------------------------------------------- /megaparsec-tests/src/Test/Hspec/Megaparsec/AdHoc.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE RankNTypes #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE TypeFamilies #-} 5 | {-# LANGUAGE UndecidableInstances #-} 6 | {-# OPTIONS_GHC -fno-warn-orphans #-} 7 | 8 | -- | 9 | -- Module : Test.Hspec.Megaparsec.AdHoc 10 | -- Copyright : © 2019–present Megaparsec contributors 11 | -- License : FreeBSD 12 | -- 13 | -- Maintainer : Mark Karpov 14 | -- Stability : experimental 15 | -- Portability : portable 16 | -- 17 | -- Ad-hoc helpers used in Megaparsec's test suite. 18 | module Test.Hspec.Megaparsec.AdHoc 19 | ( -- * Types 20 | Parser, 21 | 22 | -- * Helpers to run parsers 23 | prs, 24 | prs', 25 | prs_, 26 | grs, 27 | grs', 28 | 29 | -- * Other 30 | nes, 31 | abcRow, 32 | rightOrder, 33 | scaleDown, 34 | getTabWidth, 35 | setTabWidth, 36 | strSourcePos, 37 | 38 | -- * Char and byte conversion 39 | toChar, 40 | fromChar, 41 | 42 | -- * Proxies 43 | sproxy, 44 | bproxy, 45 | blproxy, 46 | tproxy, 47 | tlproxy, 48 | ) 49 | where 50 | 51 | import qualified Control.Monad.RWS.Lazy as L 52 | import qualified Control.Monad.RWS.Strict as S 53 | import Control.Monad.Reader 54 | import qualified Control.Monad.State.Lazy as L 55 | import qualified Control.Monad.State.Strict as S 56 | import Control.Monad.Trans.Identity 57 | import qualified Control.Monad.Writer.Lazy as L 58 | import qualified Control.Monad.Writer.Strict as S 59 | import qualified Data.ByteString as B 60 | import qualified Data.ByteString.Lazy as BL 61 | import Data.Char (chr, ord) 62 | import Data.List.NonEmpty (NonEmpty (..)) 63 | import qualified Data.List.NonEmpty as NE 64 | import Data.Proxy 65 | import qualified Data.Set as E 66 | import qualified Data.Text as T 67 | import qualified Data.Text.Lazy as TL 68 | import Data.Void 69 | import Data.Word (Word8) 70 | import Test.Hspec 71 | import Test.Hspec.Megaparsec 72 | import Test.QuickCheck 73 | import Text.Megaparsec 74 | import Text.Megaparsec.Debug (MonadParsecDbg) 75 | 76 | ---------------------------------------------------------------------------- 77 | -- Types 78 | 79 | -- | The type of parser that consumes a 'String'. 80 | type Parser = Parsec Void String 81 | 82 | ---------------------------------------------------------------------------- 83 | -- Helpers to run parsers 84 | 85 | -- | Apply the parser to the given input. This is a specialized version of 86 | -- 'parse' that assumes empty file name. 87 | prs :: 88 | -- | Parser to run 89 | Parser a -> 90 | -- | Input for the parser 91 | String -> 92 | -- | Result of parsing 93 | Either (ParseErrorBundle String Void) a 94 | prs p = parse p "" 95 | 96 | -- | Just like 'prs', but allows us to inspect the final state of the 97 | -- parser. 98 | prs' :: 99 | -- | Parser to run 100 | Parser a -> 101 | -- | Input for the parser 102 | String -> 103 | -- | Result of parsing 104 | (State String Void, Either (ParseErrorBundle String Void) a) 105 | prs' p s = runParser' p (initialState s) 106 | 107 | -- | Just like 'prs', but forces the parser to consume all input by adding 108 | -- 'eof': 109 | -- 110 | -- > prs_ p = parse (p <* eof) "" 111 | prs_ :: 112 | -- | Parser to run 113 | Parser a -> 114 | -- | Input for the parser 115 | String -> 116 | -- | Result of parsing 117 | Either (ParseErrorBundle String Void) a 118 | prs_ p = parse (p <* eof) "" 119 | 120 | -- | Just like 'prs', but interprets given parser as various monads (tries 121 | -- all supported monads transformers in turn). 122 | grs :: 123 | -- | Parser to run 124 | (forall m. (MonadParsecDbg Void String m) => m a) -> 125 | -- | Input for the parser 126 | String -> 127 | -- | How to check result of parsing 128 | (Either (ParseErrorBundle String Void) a -> Expectation) -> 129 | Expectation 130 | grs p s r = do 131 | r (prs p s) 132 | r (prs (runIdentityT p) s) 133 | r (prs (runReaderT p ()) s) 134 | r (prs (L.evalStateT p ()) s) 135 | r (prs (S.evalStateT p ()) s) 136 | r (prs (evalWriterTL p) s) 137 | r (prs (evalWriterTS p) s) 138 | r (prs (evalRWSTL p) s) 139 | r (prs (evalRWSTS p) s) 140 | 141 | -- | 'grs'' to 'grs' is as 'prs'' to 'prs'. 142 | grs' :: 143 | -- | Parser to run 144 | (forall m. (MonadParsecDbg Void String m) => m a) -> 145 | -- | Input for the parser 146 | String -> 147 | -- | How to check result of parsing 148 | ((State String Void, Either (ParseErrorBundle String Void) a) -> Expectation) -> 149 | Expectation 150 | grs' p s r = do 151 | r (prs' p s) 152 | r (prs' (runIdentityT p) s) 153 | r (prs' (runReaderT p ()) s) 154 | r (prs' (L.evalStateT p ()) s) 155 | r (prs' (S.evalStateT p ()) s) 156 | r (prs' (evalWriterTL p) s) 157 | r (prs' (evalWriterTS p) s) 158 | r (prs' (evalRWSTL p) s) 159 | r (prs' (evalRWSTS p) s) 160 | 161 | evalWriterTL :: (Monad m) => L.WriterT [Int] m a -> m a 162 | evalWriterTL = fmap fst . L.runWriterT 163 | 164 | evalWriterTS :: (Monad m) => S.WriterT [Int] m a -> m a 165 | evalWriterTS = fmap fst . S.runWriterT 166 | 167 | evalRWSTL :: (Monad m) => L.RWST () [Int] () m a -> m a 168 | evalRWSTL m = do 169 | (a, _, _) <- L.runRWST m () () 170 | return a 171 | 172 | evalRWSTS :: (Monad m) => S.RWST () [Int] () m a -> m a 173 | evalRWSTS m = do 174 | (a, _, _) <- S.runRWST m () () 175 | return a 176 | 177 | ---------------------------------------------------------------------------- 178 | -- Other 179 | 180 | -- | Make a singleton non-empty list from a value. 181 | nes :: a -> NonEmpty a 182 | nes x = x :| [] 183 | 184 | -- | @abcRow a b c@ generates string consisting of character “a” repeated 185 | -- @a@ times, character “b” repeated @b@ times, and character “c” repeated 186 | -- @c@ times. 187 | abcRow :: Int -> Int -> Int -> String 188 | abcRow a b c = replicate a 'a' ++ replicate b 'b' ++ replicate c 'c' 189 | 190 | -- | Check that the given parser returns the list in the right order. 191 | rightOrder :: 192 | -- | The parser to test 193 | Parser String -> 194 | -- | Input for the parser 195 | String -> 196 | -- | Expected result 197 | String -> 198 | Spec 199 | rightOrder p s s' = 200 | it "produces the list in the right order" $ 201 | prs_ p s `shouldParse` s' 202 | 203 | -- | Get tab width from 'PosState'. Use with care only for testing. 204 | getTabWidth :: (MonadParsec e s m) => m Pos 205 | getTabWidth = pstateTabWidth . statePosState <$> getParserState 206 | 207 | -- | Set tab width in 'PosState'. Use with care only for testing. 208 | setTabWidth :: (MonadParsec e s m) => Pos -> m () 209 | setTabWidth w = updateParserState $ \st -> 210 | let pst = statePosState st 211 | in st {statePosState = pst {pstateTabWidth = w}} 212 | 213 | -- | Scale down. 214 | scaleDown :: Gen a -> Gen a 215 | scaleDown = scale (`div` 4) 216 | 217 | -- | A helper function that is used to advance 'SourcePos' given a 'String'. 218 | strSourcePos :: Pos -> SourcePos -> String -> SourcePos 219 | strSourcePos tabWidth ipos input = 220 | let (_, pst') = reachOffset maxBound pstate in pstateSourcePos pst' 221 | where 222 | pstate = 223 | PosState 224 | { pstateInput = input, 225 | pstateOffset = 0, 226 | pstateSourcePos = ipos, 227 | pstateTabWidth = tabWidth, 228 | pstateLinePrefix = "" 229 | } 230 | 231 | ---------------------------------------------------------------------------- 232 | -- Char and byte conversion 233 | 234 | -- | Convert a byte to char. 235 | toChar :: Word8 -> Char 236 | toChar = chr . fromIntegral 237 | 238 | -- | Covert a char to byte. 239 | fromChar :: Char -> Maybe Word8 240 | fromChar x = 241 | let p = ord x 242 | in if p > 0xff 243 | then Nothing 244 | else Just (fromIntegral p) 245 | 246 | ---------------------------------------------------------------------------- 247 | -- Proxies 248 | 249 | sproxy :: Proxy String 250 | sproxy = Proxy 251 | 252 | bproxy :: Proxy B.ByteString 253 | bproxy = Proxy 254 | 255 | blproxy :: Proxy BL.ByteString 256 | blproxy = Proxy 257 | 258 | tproxy :: Proxy T.Text 259 | tproxy = Proxy 260 | 261 | tlproxy :: Proxy TL.Text 262 | tlproxy = Proxy 263 | 264 | ---------------------------------------------------------------------------- 265 | -- Arbitrary instances 266 | 267 | instance Arbitrary Void where 268 | arbitrary = error "Arbitrary Void" 269 | 270 | instance Arbitrary Pos where 271 | arbitrary = mkPos <$> (getSmall . getPositive <$> arbitrary) 272 | 273 | instance Arbitrary SourcePos where 274 | arbitrary = 275 | SourcePos 276 | <$> scaleDown arbitrary 277 | <*> arbitrary 278 | <*> arbitrary 279 | 280 | instance (Arbitrary t) => Arbitrary (ErrorItem t) where 281 | arbitrary = 282 | oneof 283 | [ Tokens <$> (NE.fromList . getNonEmpty <$> arbitrary), 284 | Label <$> (NE.fromList . getNonEmpty <$> arbitrary), 285 | return EndOfInput 286 | ] 287 | 288 | instance Arbitrary (ErrorFancy a) where 289 | arbitrary = 290 | oneof 291 | [ ErrorFail <$> scaleDown arbitrary, 292 | ErrorIndentation <$> arbitrary <*> arbitrary <*> arbitrary 293 | ] 294 | 295 | instance 296 | (Arbitrary (Token s), Ord (Token s), Arbitrary e, Ord e) => 297 | Arbitrary (ParseError s e) 298 | where 299 | arbitrary = 300 | oneof 301 | [ TrivialError 302 | <$> (getNonNegative <$> arbitrary) 303 | <*> arbitrary 304 | <*> (E.fromList <$> scaleDown arbitrary), 305 | FancyError 306 | <$> (getNonNegative <$> arbitrary) 307 | <*> (E.fromList <$> scaleDown arbitrary) 308 | ] 309 | 310 | instance (Arbitrary s) => Arbitrary (State s e) where 311 | arbitrary = do 312 | input <- scaleDown arbitrary 313 | offset <- choose (1, 10000) 314 | pstate :: PosState s <- arbitrary 315 | return 316 | State 317 | { stateInput = input, 318 | stateOffset = offset, 319 | statePosState = 320 | pstate 321 | { pstateInput = input, 322 | pstateOffset = offset 323 | }, 324 | stateParseErrors = [] 325 | } 326 | 327 | instance (Arbitrary s) => Arbitrary (PosState s) where 328 | arbitrary = 329 | PosState 330 | <$> arbitrary 331 | <*> choose (1, 10000) 332 | <*> arbitrary 333 | <*> (mkPos <$> choose (1, 20)) 334 | <*> scaleDown arbitrary 335 | 336 | instance Arbitrary T.Text where 337 | arbitrary = T.pack <$> arbitrary 338 | 339 | instance Arbitrary TL.Text where 340 | arbitrary = TL.pack <$> arbitrary 341 | 342 | instance Arbitrary B.ByteString where 343 | arbitrary = B.pack <$> arbitrary 344 | 345 | instance Arbitrary BL.ByteString where 346 | arbitrary = BL.pack <$> arbitrary 347 | 348 | instance (Arbitrary a) => Arbitrary (NonEmpty a) where 349 | arbitrary = NE.fromList <$> (arbitrary `suchThat` (not . null)) 350 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 2 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/Byte/BinarySpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE RankNTypes #-} 4 | {-# LANGUAGE TypeApplications #-} 5 | 6 | module Text.Megaparsec.Byte.BinarySpec (spec) where 7 | 8 | import qualified Data.ByteString.Builder as BB 9 | import qualified Data.ByteString.Lazy as BL 10 | import Data.Void 11 | import Test.Hspec 12 | import Test.Hspec.Megaparsec 13 | import Test.QuickCheck 14 | import Text.Megaparsec 15 | import Text.Megaparsec.Byte.Binary 16 | 17 | spec :: Spec 18 | spec = do 19 | describe "word8" $ 20 | testParser word8 BB.word8 21 | describe "word16le" $ 22 | testParser word16le BB.word16LE 23 | describe "word16be" $ 24 | testParser word16be BB.word16BE 25 | describe "word32le" $ 26 | testParser word32le BB.word32LE 27 | describe "word32be" $ 28 | testParser word32be BB.word32BE 29 | describe "word64le" $ 30 | testParser word64le BB.word64LE 31 | describe "word64be" $ 32 | testParser word64be BB.word64BE 33 | describe "int8" $ 34 | testParser int8 BB.int8 35 | describe "int16le" $ 36 | testParser int16le BB.int16LE 37 | describe "int16be" $ 38 | testParser int16be BB.int16BE 39 | describe "int32le" $ do 40 | testParser int32le BB.int32LE 41 | describe "int32be" $ do 42 | testParser int32be BB.int32BE 43 | describe "int64le" $ do 44 | testParser int64le BB.int64LE 45 | describe "int64be" $ do 46 | testParser int64be BB.int64BE 47 | 48 | ---------------------------------------------------------------------------- 49 | -- Helpers 50 | 51 | -- | Test a binary parser. 52 | testParser :: 53 | (Arbitrary a, Show a, Eq a) => 54 | -- | The parser to test 55 | (forall s. (Stream s, BinaryChunk (Tokens s)) => Parsec Void s a) -> 56 | -- | Builder for the values that the parer consumes 57 | (a -> BB.Builder) -> 58 | SpecWith () 59 | testParser parser serializer = do 60 | it "works with strict ByteString" $ 61 | property $ \x -> do 62 | let rendered = (BL.toStrict . BB.toLazyByteString . serializer) x 63 | parse (parser <* eof) "" rendered `shouldParse` x 64 | it "works with lazy ByteString" $ 65 | property $ \x -> do 66 | let rendered = (BB.toLazyByteString . serializer) x 67 | parse (parser <* eof) "" rendered `shouldParse` x 68 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/Byte/LexerSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Text.Megaparsec.Byte.LexerSpec (spec) where 4 | 5 | import Control.Applicative 6 | import Data.ByteString (ByteString) 7 | import qualified Data.ByteString as B 8 | import qualified Data.ByteString.Char8 as B8 9 | import Data.Char (intToDigit, toUpper) 10 | import Data.Scientific (Scientific, fromFloatDigits) 11 | import Data.Void 12 | import Data.Word (Word8) 13 | import Numeric (showFFloatAlt, showHex, showInt, showIntAtBase, showOct) 14 | import Test.Hspec 15 | import Test.Hspec.Megaparsec 16 | import Test.QuickCheck 17 | import Text.Megaparsec 18 | import qualified Text.Megaparsec.Byte as B 19 | import Text.Megaparsec.Byte.Lexer 20 | 21 | type Parser = Parsec Void ByteString 22 | 23 | spec :: Spec 24 | spec = do 25 | describe "skipLineComment" $ do 26 | context "when there is no newline at the end of line" $ 27 | it "is picked up successfully" $ do 28 | let p = space B.space1 (skipLineComment "//") empty <* eof 29 | s = " // this line comment doesn't have a newline at the end " 30 | prs p s `shouldParse` () 31 | prs' p s `succeedsLeaving` "" 32 | it "inner characters are labelled properly" $ do 33 | let p = skipLineComment "//" <* empty 34 | s = "// here we go" 35 | prs p s `shouldFailWith` err (B.length s) (elabel "character") 36 | prs' p s `failsLeaving` "" 37 | 38 | describe "skipBlockComment" $ 39 | it "skips a simple block comment" $ do 40 | let p = skipBlockComment "/*" "*/" 41 | s = "/* here we go */foo!" 42 | prs p s `shouldParse` () 43 | prs' p s `succeedsLeaving` "foo!" 44 | 45 | describe "skipBlockCommentNested" $ 46 | context "when it runs into nested block comments" $ 47 | it "parses them all right" $ do 48 | let p = 49 | space 50 | B.space1 51 | empty 52 | (skipBlockCommentNested "/*" "*/") 53 | <* eof 54 | s = " /* foo bar /* baz */ quux */ " 55 | prs p s `shouldParse` () 56 | prs' p s `succeedsLeaving` "" 57 | 58 | describe "decimal" $ do 59 | context "when stream begins with decimal digits" $ 60 | it "they are parsed as an integer" $ 61 | property $ \n' -> do 62 | let p = decimal :: Parser Integer 63 | n = getNonNegative n' 64 | s = B8.pack (showInt n "") 65 | prs p s `shouldParse` n 66 | prs' p s `succeedsLeaving` "" 67 | context "when stream does not begin with decimal digits" $ 68 | it "signals correct parse error" $ 69 | property $ \a as -> 70 | not (isDigit a) ==> do 71 | let p = decimal :: Parser Integer 72 | s = B.pack (a : as) 73 | prs p s `shouldFailWith` err 0 (utok a <> elabel "integer") 74 | context "when stream is empty" $ 75 | it "signals correct parse error" $ 76 | prs (decimal :: Parser Integer) "" 77 | `shouldFailWith` err 0 (ueof <> elabel "integer") 78 | 79 | describe "binary" $ do 80 | context "when stream begins with binary digits" $ 81 | it "they are parsed as an integer" $ 82 | property $ \n' -> do 83 | let p = binary :: Parser Integer 84 | n = getNonNegative n' 85 | s = B8.pack (showIntAtBase 2 intToDigit n "") 86 | prs p s `shouldParse` n 87 | prs' p s `succeedsLeaving` "" 88 | context "when stream does not begin with binary digits" $ 89 | it "signals correct parse error" $ 90 | property $ \a as -> 91 | a /= 48 && a /= 49 ==> do 92 | let p = binary :: Parser Integer 93 | s = B.pack (a : as) 94 | prs p s 95 | `shouldFailWith` err 0 (utok a <> elabel "binary integer") 96 | context "when stream is empty" $ 97 | it "signals correct parse error" $ 98 | prs (binary :: Parser Integer) "" 99 | `shouldFailWith` err 0 (ueof <> elabel "binary integer") 100 | 101 | describe "octal" $ do 102 | context "when stream begins with octal digits" $ 103 | it "they are parsed as an integer" $ 104 | property $ \n' -> do 105 | let p = octal :: Parser Integer 106 | n = getNonNegative n' 107 | s = B8.pack (showOct n "") 108 | prs p s `shouldParse` n 109 | prs' p s `succeedsLeaving` "" 110 | context "when stream does not begin with octal digits" $ 111 | it "signals correct parse error" $ 112 | property $ \a as -> 113 | not (isOctDigit a) ==> do 114 | let p = octal :: Parser Integer 115 | s = B.pack (a : as) 116 | prs p s 117 | `shouldFailWith` err 0 (utok a <> elabel "octal integer") 118 | context "when stream is empty" $ 119 | it "signals correct parse error" $ 120 | prs (octal :: Parser Integer) "" 121 | `shouldFailWith` err 0 (ueof <> elabel "octal integer") 122 | 123 | describe "hexadecimal" $ do 124 | context "when stream begins with hexadecimal digits" $ 125 | it "they are parsed as an integer" $ 126 | property $ \n' -> do 127 | let p = hexadecimal :: Parser Integer 128 | n = getNonNegative n' 129 | s = B8.pack (showHex n "") 130 | prs p s `shouldParse` n 131 | prs' p s `succeedsLeaving` "" 132 | context "when stream begins with hexadecimal digits (uppercase)" $ 133 | it "they are parsed as an integer" $ 134 | property $ \n' -> do 135 | let p = hexadecimal :: Parser Integer 136 | n = getNonNegative n' 137 | s = B8.pack (toUpper <$> showHex n "") 138 | prs p s `shouldParse` n 139 | prs' p s `succeedsLeaving` "" 140 | context "when stream does not begin with hexadecimal digits" $ 141 | it "signals correct parse error" $ 142 | property $ \a as -> 143 | not (isHexDigit a) ==> do 144 | let p = hexadecimal :: Parser Integer 145 | s = B.pack (a : as) 146 | prs p s 147 | `shouldFailWith` err 0 (utok a <> elabel "hexadecimal integer") 148 | context "when stream is empty" $ 149 | it "signals correct parse error" $ 150 | prs (hexadecimal :: Parser Integer) "" 151 | `shouldFailWith` err 0 (ueof <> elabel "hexadecimal integer") 152 | 153 | describe "scientific" $ do 154 | context "when stream begins with a number" $ 155 | it "parses it" $ 156 | property $ \n' -> do 157 | let p = scientific :: Parser Scientific 158 | s = 159 | B8.pack $ 160 | either 161 | (show . getNonNegative) 162 | (show . getNonNegative) 163 | (n' :: Either (NonNegative Integer) (NonNegative Double)) 164 | prs p s `shouldParse` case n' of 165 | Left x -> fromIntegral (getNonNegative x) 166 | Right x -> fromFloatDigits (getNonNegative x) 167 | prs' p s `succeedsLeaving` "" 168 | context "when fractional part is interrupted" $ 169 | it "signals correct parse error" $ 170 | property $ \(NonNegative n) -> do 171 | let p = scientific <* empty :: Parser Scientific 172 | s = B8.pack (showFFloatAlt Nothing (n :: Double) "") 173 | prs p s 174 | `shouldFailWith` err 175 | (B.length s) 176 | (etok 69 <> etok 101 <> elabel "digit") 177 | prs' p s `failsLeaving` "" 178 | context "when whole part is followed by a dot without valid fractional part" $ 179 | it "parsing of fractional part is backtracked correctly" $ 180 | property $ \(NonNegative n) -> do 181 | let p = scientific :: Parser Scientific 182 | s = B8.pack $ showInt (n :: Integer) ".err" 183 | prs p s `shouldParse` fromIntegral n 184 | prs' p s `succeedsLeaving` ".err" 185 | context "when number is followed by something starting with 'e'" $ 186 | it "parsing of exponent part is backtracked correctly" $ 187 | property $ \(NonNegative n) -> do 188 | let p = scientific :: Parser Scientific 189 | s = B8.pack $ showFFloatAlt Nothing (n :: Double) "err!" 190 | prs p s `shouldParse` fromFloatDigits n 191 | prs' p s `succeedsLeaving` "err!" 192 | context "when stream is empty" $ 193 | it "signals correct parse error" $ 194 | prs (scientific :: Parser Scientific) "" 195 | `shouldFailWith` err 0 (ueof <> elabel "digit") 196 | 197 | describe "float" $ do 198 | context "when stream begins with a float" $ 199 | it "parses it" $ 200 | property $ \n' -> do 201 | let p = float :: Parser Double 202 | n = getNonNegative n' 203 | s = B8.pack (show n) 204 | prs p s `shouldParse` n 205 | prs' p s `succeedsLeaving` "" 206 | context "when stream does not begin with a float" $ 207 | it "signals correct parse error" $ 208 | property $ \a as -> 209 | not (isDigit a) ==> do 210 | let p = float :: Parser Double 211 | s = B.pack (a : as) 212 | prs p s 213 | `shouldFailWith` err 0 (utok a <> elabel "digit") 214 | prs' p s `failsLeaving` s 215 | context "when stream begins with an integer (decimal)" $ 216 | it "signals correct parse error" $ 217 | property $ \n' -> do 218 | let p = float :: Parser Double 219 | n = getNonNegative n' 220 | s = B8.pack $ show (n :: Integer) 221 | prs p s 222 | `shouldFailWith` err 223 | (B.length s) 224 | (ueof <> etok 46 <> etok 69 <> etok 101 <> elabel "digit") 225 | prs' p s `failsLeaving` "" 226 | context "when number is followed by something starting with 'e'" $ 227 | it "parsing of exponent part is backtracked correctly" $ 228 | property $ \(NonNegative n) -> do 229 | let p = float :: Parser Double 230 | s = B8.pack $ showFFloatAlt Nothing (n :: Double) "err!" 231 | prs p s `shouldParse` n 232 | prs' p s `succeedsLeaving` "err!" 233 | context "when stream is empty" $ 234 | it "signals correct parse error" $ 235 | prs (float :: Parser Double) "" 236 | `shouldFailWith` err 0 (ueof <> elabel "digit") 237 | context "when there is float with just exponent" $ 238 | it "parses it all right" $ 239 | do 240 | let p = float :: Parser Double 241 | prs p "123e3" `shouldParse` 123e3 242 | prs' p "123e3" `succeedsLeaving` "" 243 | prs p "123e+3" `shouldParse` 123e+3 244 | prs' p "123e+3" `succeedsLeaving` "" 245 | prs p "123e-3" `shouldParse` 123e-3 246 | prs' p "123e-3" `succeedsLeaving` "" 247 | 248 | describe "signed" $ do 249 | context "with integer" $ 250 | it "parses signed integers" $ 251 | property $ \n -> do 252 | let p :: Parser Integer 253 | p = signed (hidden B.space) decimal 254 | s = B8.pack (show n) 255 | prs p s `shouldParse` n 256 | prs' p s `succeedsLeaving` "" 257 | context "with float" $ 258 | it "parses signed floats" $ 259 | property $ \n -> do 260 | let p :: Parser Double 261 | p = signed (hidden B.space) float 262 | s = B8.pack (show n) 263 | prs p s `shouldParse` n 264 | prs' p s `succeedsLeaving` "" 265 | context "with scientific" $ 266 | it "parses singed scientific numbers" $ 267 | property $ \n -> do 268 | let p = signed (hidden B.space) scientific 269 | s = B8.pack $ either show show (n :: Either Integer Double) 270 | prs p s `shouldParse` case n of 271 | Left x -> fromIntegral x 272 | Right x -> fromFloatDigits x 273 | context "when number is prefixed with plus sign" $ 274 | it "parses the number" $ 275 | property $ \n' -> do 276 | let p :: Parser Integer 277 | p = signed (hidden B.space) decimal 278 | n = getNonNegative n' 279 | s = B8.pack ('+' : show n) 280 | prs p s `shouldParse` n 281 | prs' p s `succeedsLeaving` "" 282 | context "when number is prefixed with white space" $ 283 | it "signals correct parse error" $ 284 | property $ \n -> do 285 | let p :: Parser Integer 286 | p = signed (hidden B.space) decimal 287 | s = B8.pack (' ' : show (n :: Integer)) 288 | prs p s 289 | `shouldFailWith` err 290 | 0 291 | (utok 32 <> etok 43 <> etok 45 <> elabel "integer") 292 | prs' p s `failsLeaving` s 293 | context "when there is white space between sign and digits" $ 294 | it "parses it all right" $ do 295 | let p :: Parser Integer 296 | p = signed (hidden B.space) decimal 297 | s = "- 123" 298 | prs p s `shouldParse` (-123) 299 | prs' p s `succeedsLeaving` "" 300 | 301 | ---------------------------------------------------------------------------- 302 | -- Helpers 303 | 304 | prs :: 305 | -- | Parser to run 306 | Parser a -> 307 | -- | Input for the parser 308 | ByteString -> 309 | -- | Result of parsing 310 | Either (ParseErrorBundle ByteString Void) a 311 | prs p = parse p "" 312 | 313 | prs' :: 314 | -- | Parser to run 315 | Parser a -> 316 | -- | Input for the parser 317 | ByteString -> 318 | -- | Result of parsing 319 | (State ByteString Void, Either (ParseErrorBundle ByteString Void) a) 320 | prs' p s = runParser' p (initialState s) 321 | 322 | isDigit :: Word8 -> Bool 323 | isDigit w = w - 48 < 10 324 | 325 | isOctDigit :: Word8 -> Bool 326 | isOctDigit w = w - 48 < 8 327 | 328 | isHexDigit :: Word8 -> Bool 329 | isHexDigit w = 330 | (w >= 48 && w <= 57) 331 | || (w >= 97 && w <= 102) 332 | || (w >= 65 && w <= 70) 333 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/ByteSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Text.Megaparsec.ByteSpec (spec) where 4 | 5 | import Control.Monad 6 | import Data.ByteString (ByteString) 7 | import qualified Data.ByteString as B 8 | import Data.Char hiding (isSpace) 9 | import Data.Maybe (fromMaybe) 10 | import Data.Void 11 | import Data.Word (Word8) 12 | import Test.Hspec 13 | import Test.Hspec.Megaparsec 14 | import Test.Hspec.Megaparsec.AdHoc hiding (Parser, prs, prs') 15 | import Test.QuickCheck 16 | import Text.Megaparsec 17 | import Text.Megaparsec.Byte 18 | 19 | type Parser = Parsec Void ByteString 20 | 21 | spec :: Spec 22 | spec = do 23 | describe "newline" $ 24 | checkStrLit "newline" "\n" (tokenToChunk bproxy <$> newline) 25 | 26 | describe "csrf" $ 27 | checkStrLit "crlf newline" "\r\n" crlf 28 | 29 | describe "eol" $ do 30 | context "when stream begins with a newline" $ 31 | it "succeeds returning the newline" $ 32 | property $ \s -> do 33 | let s' = "\n" <> s 34 | prs eol s' `shouldParse` "\n" 35 | prs' eol s' `succeedsLeaving` s 36 | context "when stream begins with CRLF sequence" $ 37 | it "parses the CRLF sequence" $ 38 | property $ \s -> do 39 | let s' = "\r\n" <> s 40 | prs eol s' `shouldParse` "\r\n" 41 | prs' eol s' `succeedsLeaving` s 42 | context "when stream begins with '\\r', but it's not followed by '\\n'" $ 43 | it "signals correct parse error" $ 44 | property $ \ch -> 45 | ch /= 10 ==> do 46 | let s = "\r" <> B.singleton ch 47 | prs eol s 48 | `shouldFailWith` err 0 (utoks s <> elabel "end of line") 49 | context "when input stream is '\\r'" $ 50 | it "signals correct parse error" $ 51 | prs eol "\r" 52 | `shouldFailWith` err 53 | 0 54 | (utok 13 <> elabel "end of line") 55 | context "when stream does not begin with newline or CRLF sequence" $ 56 | it "signals correct parse error" $ 57 | property $ \ch s -> 58 | (ch /= 13 && ch /= 10) ==> do 59 | let s' = B.singleton ch <> s 60 | prs eol s' 61 | `shouldFailWith` err 62 | 0 63 | (utoks (B.take 2 s') <> elabel "end of line") 64 | context "when stream is empty" $ 65 | it "signals correct parse error" $ 66 | prs eol "" 67 | `shouldFailWith` err 68 | 0 69 | (ueof <> elabel "end of line") 70 | 71 | describe "tab" $ 72 | checkStrLit "tab" "\t" (tokenToChunk bproxy <$> tab) 73 | 74 | describe "space" $ 75 | it "consumes space up to first non-space character" $ 76 | property $ \s' -> do 77 | let (s0, s1) = B.partition isSpace s' 78 | s = s0 <> s1 79 | prs space s `shouldParse` () 80 | prs' space s `succeedsLeaving` s1 81 | 82 | describe "hspace" $ 83 | it "consumes space up to first non-space character" $ 84 | property $ \s' -> do 85 | let (s0, s1) = B.partition isHSpace s' 86 | s = s0 <> s1 87 | prs hspace s `shouldParse` () 88 | prs' hspace s `succeedsLeaving` s1 89 | 90 | describe "space1" $ do 91 | context "when stream does not start with a space character" $ 92 | it "signals correct parse error" $ 93 | property $ \ch s' -> 94 | not (isSpace ch) ==> do 95 | let (s0, s1) = B.partition isSpace s' 96 | s = B.singleton ch <> s0 <> s1 97 | prs space1 s `shouldFailWith` err 0 (utok ch <> elabel "white space") 98 | prs' space1 s `failsLeaving` s 99 | context "when stream starts with a space character" $ 100 | it "consumes space up to first non-space character" $ 101 | property $ \s' -> do 102 | let (s0, s1) = B.partition isSpace s' 103 | s = " " <> s0 <> s1 104 | prs space1 s `shouldParse` () 105 | prs' space1 s `succeedsLeaving` s1 106 | context "when stream is empty" $ 107 | it "signals correct parse error" $ 108 | prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space") 109 | 110 | describe "hspace1" $ do 111 | context "when stream does not start with a space character" $ 112 | it "signals correct parse error" $ 113 | property $ \ch s' -> 114 | not (isSpace ch) ==> do 115 | let (s0, s1) = B.partition isHSpace s' 116 | s = B.singleton ch <> s0 <> s1 117 | prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space") 118 | prs' hspace1 s `failsLeaving` s 119 | context "when stream starts with a space character" $ 120 | it "consumes space up to first non-space character" $ 121 | property $ \s' -> do 122 | let (s0, s1) = B.partition isHSpace s' 123 | s = " " <> s0 <> s1 124 | prs hspace1 s `shouldParse` () 125 | prs' hspace1 s `succeedsLeaving` s1 126 | context "when stream is empty" $ 127 | it "signals correct parse error" $ 128 | prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space") 129 | 130 | describe "controlChar" $ 131 | checkCharPred "control character" (isControl . toChar) controlChar 132 | 133 | describe "spaceChar" $ 134 | checkCharRange "white space" [9, 10, 11, 12, 13, 32, 160] spaceChar 135 | 136 | describe "printChar" $ 137 | checkCharPred "printable character" (isPrint . toChar) printChar 138 | 139 | describe "digitChar" $ 140 | checkCharRange "digit" [48 .. 57] digitChar 141 | 142 | describe "binDigitChar" $ 143 | checkCharRange "binary digit" [48 .. 49] binDigitChar 144 | 145 | describe "octDigitChar" $ 146 | checkCharRange "octal digit" [48 .. 55] octDigitChar 147 | 148 | describe "hexDigitChar" $ 149 | checkCharRange "hexadecimal digit" ([48 .. 57] ++ [97 .. 102] ++ [65 .. 70]) hexDigitChar 150 | 151 | describe "char'" $ do 152 | context "when stream begins with the character specified as argument" $ 153 | it "parses the character" $ 154 | property $ \ch s -> do 155 | let sl = B.cons (liftChar toLower ch) s 156 | su = B.cons (liftChar toUpper ch) s 157 | st = B.cons (liftChar toTitle ch) s 158 | prs (char' ch) sl `shouldParse` liftChar toLower ch 159 | prs (char' ch) su `shouldParse` liftChar toUpper ch 160 | prs (char' ch) st `shouldParse` liftChar toTitle ch 161 | prs' (char' ch) sl `succeedsLeaving` s 162 | prs' (char' ch) su `succeedsLeaving` s 163 | prs' (char' ch) st `succeedsLeaving` s 164 | context "when stream does not begin with the character specified as argument" $ 165 | it "signals correct parse error" $ 166 | property $ \ch ch' s -> 167 | not (casei ch ch') ==> do 168 | let s' = B.cons ch' s 169 | ms = utok ch' <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch) 170 | prs (char' ch) s' `shouldFailWith` err 0 ms 171 | prs' (char' ch) s' `failsLeaving` s' 172 | context "when stream is empty" $ 173 | it "signals correct parse error" $ 174 | property $ \ch -> do 175 | let ms = ueof <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch) 176 | prs (char' ch) "" `shouldFailWith` err 0 ms 177 | 178 | ---------------------------------------------------------------------------- 179 | -- Helpers 180 | 181 | checkStrLit :: String -> ByteString -> Parser ByteString -> SpecWith () 182 | checkStrLit name ts p = do 183 | context ("when stream begins with " ++ name) $ 184 | it ("parses the " ++ name) $ 185 | property $ \s -> do 186 | let s' = ts <> s 187 | prs p s' `shouldParse` ts 188 | prs' p s' `succeedsLeaving` s 189 | context ("when stream does not begin with " ++ name) $ 190 | it "signals correct parse error" $ 191 | property $ \ch s -> 192 | ch /= B.head ts ==> do 193 | let s' = B.cons ch s 194 | us = B.take (B.length ts) s' 195 | prs p s' `shouldFailWith` err 0 (utoks us <> etoks ts) 196 | prs' p s' `failsLeaving` s' 197 | context "when stream is empty" $ 198 | it "signals correct parse error" $ 199 | prs p "" `shouldFailWith` err 0 (ueof <> etoks ts) 200 | 201 | checkCharPred :: String -> (Word8 -> Bool) -> Parser Word8 -> SpecWith () 202 | checkCharPred name f p = do 203 | context ("when stream begins with " ++ name) $ 204 | it ("parses the " ++ name) $ 205 | property $ \ch s -> 206 | f ch ==> do 207 | let s' = B.singleton ch <> s 208 | prs p s' `shouldParse` ch 209 | prs' p s' `succeedsLeaving` s 210 | context ("when stream does not begin with " ++ name) $ 211 | it "signals correct parse error" $ 212 | property $ \ch s -> 213 | not (f ch) ==> do 214 | let s' = B.singleton ch <> s 215 | prs p s' `shouldFailWith` err 0 (utok ch <> elabel name) 216 | prs' p s' `failsLeaving` s' 217 | context "when stream is empty" $ 218 | it "signals correct parse error" $ 219 | prs p "" `shouldFailWith` err 0 (ueof <> elabel name) 220 | 221 | checkCharRange :: String -> [Word8] -> Parser Word8 -> SpecWith () 222 | checkCharRange name tchs p = do 223 | forM_ tchs $ \tch -> 224 | context ("when stream begins with " ++ showTokens bproxy (nes tch)) $ 225 | it ("parses the " ++ showTokens bproxy (nes tch)) $ 226 | property $ \s -> do 227 | let s' = B.singleton tch <> s 228 | prs p s' `shouldParse` tch 229 | prs' p s' `succeedsLeaving` s 230 | context "when stream is empty" $ 231 | it "signals correct parse error" $ 232 | prs p "" `shouldFailWith` err 0 (ueof <> elabel name) 233 | 234 | prs :: 235 | -- | Parser to run 236 | Parser a -> 237 | -- | Input for the parser 238 | ByteString -> 239 | -- | Result of parsing 240 | Either (ParseErrorBundle ByteString Void) a 241 | prs p = parse p "" 242 | 243 | prs' :: 244 | -- | Parser to run 245 | Parser a -> 246 | -- | Input for the parser 247 | ByteString -> 248 | -- | Result of parsing 249 | (State ByteString Void, Either (ParseErrorBundle ByteString Void) a) 250 | prs' p s = runParser' p (initialState s) 251 | 252 | -- | 'Word8'-specialized version of 'Data.Char.isSpace'. 253 | isSpace :: Word8 -> Bool 254 | isSpace x 255 | | x >= 9 && x <= 13 = True 256 | | x == 32 = True 257 | | x == 160 = True 258 | | otherwise = False 259 | 260 | -- | Like 'isSpace', but does not accept newlines and carriage returns. 261 | isHSpace :: Word8 -> Bool 262 | isHSpace x 263 | | x == 9 = True 264 | | x == 11 = True 265 | | x == 12 = True 266 | | x == 32 = True 267 | | x == 160 = True 268 | | otherwise = False 269 | 270 | -- | Lift char transformation to byte transformation. 271 | liftChar :: (Char -> Char) -> Word8 -> Word8 272 | liftChar f x = (fromMaybe x . fromChar . f . toChar) x 273 | 274 | -- | Compare two characters case-insensitively. 275 | casei :: Word8 -> Word8 -> Bool 276 | casei x y = 277 | x == liftChar toLower y 278 | || x == liftChar toUpper y 279 | || x == liftChar toTitle y 280 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/DebugSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | 4 | module Text.Megaparsec.DebugSpec (spec) where 5 | 6 | import Control.Exception (evaluate) 7 | import Control.Monad 8 | import Control.Monad.RWS (MonadRWS) 9 | import qualified Control.Monad.RWS.Lazy as L 10 | import qualified Control.Monad.RWS.Strict as S 11 | import Control.Monad.State (MonadState (..), modify) 12 | import qualified Control.Monad.State.Lazy as L 13 | import qualified Control.Monad.State.Strict as S 14 | import Control.Monad.Writer (MonadWriter (..)) 15 | import qualified Control.Monad.Writer.Lazy as L 16 | import qualified Control.Monad.Writer.Strict as S 17 | import Data.Void 18 | import GHC.IO.Handle 19 | import System.IO (stderr) 20 | import System.IO.Temp 21 | import Test.Hspec 22 | import Test.Hspec.Megaparsec 23 | import Test.Hspec.Megaparsec.AdHoc 24 | import Text.Megaparsec 25 | import Text.Megaparsec.Char 26 | import Text.Megaparsec.Debug 27 | 28 | spec :: Spec 29 | spec = do 30 | describe "dbg" $ do 31 | context "when inner parser succeeds consuming input" $ do 32 | it "has no effect on how parser works" $ do 33 | let p :: (MonadParsecDbg Void String m) => m Char 34 | p = dbg "char" (char 'a') 35 | s = "ab" 36 | shouldStderr p s "char> IN: \"ab\"\nchar> MATCH (COK): 'a'\nchar> VALUE: 'a'\nchar> HINTS: []\n\n" 37 | grs p s (`shouldParse` 'a') 38 | grs' p s (`succeedsLeaving` "b") 39 | it "its hints are preserved" $ do 40 | let p :: (MonadParsecDbg Void String m) => m String 41 | p = dbg "many chars" (many (char 'a')) <* empty 42 | s = "abcd" 43 | shouldStderr p s "many chars> IN: \"abcd\"\nmany chars> MATCH (COK): 'a'\nmany chars> VALUE: \"a\"\nmany chars> HINTS: ['a']\n\n" 44 | grs p s (`shouldFailWith` err 1 (etok 'a')) 45 | grs' p s (`failsLeaving` "bcd") 46 | it "prints several hints correctly" $ do 47 | let p :: (MonadParsecDbg Void String m) => m (Maybe Char) 48 | p = dbg "a or b" (optional (char 'a' <|> char 'b')) <* empty 49 | s = "" 50 | shouldStderr p s "a or b> IN: \na or b> MATCH (EOK): \na or b> VALUE: Nothing\na or b> HINTS: ['a','b']\n\n" 51 | grs p s (`shouldFailWith` err 0 (etok 'a' <> etok 'b')) 52 | grs' p s (`failsLeaving` "") 53 | context "when inner parser fails consuming input" $ 54 | it "has no effect on how parser works" $ do 55 | let p :: (MonadParsecDbg Void String m) => m Char 56 | p = dbg "chars" (char 'a' *> char 'c') 57 | s = "abc" 58 | shouldStderr p s "chars> IN: \"abc\"\nchars> MATCH (CERR): 'a'\nchars> ERROR:\nchars> offset=1:\nchars> unexpected 'b'\nchars> expecting 'c'\n\n" 59 | grs p s (`shouldFailWith` err 1 (utok 'b' <> etok 'c')) 60 | grs' p s (`failsLeaving` "bc") 61 | context "when inner parser succeeds without consuming" $ do 62 | it "has no effect on how parser works" $ do 63 | let p :: (MonadParsecDbg Void String m) => m Char 64 | p = dbg "return" (return 'a') 65 | s = "abc" 66 | shouldStderr p s "return> IN: \"abc\"\nreturn> MATCH (EOK): \nreturn> VALUE: 'a'\nreturn> HINTS: []\n\n" 67 | grs p s (`shouldParse` 'a') 68 | grs' p s (`succeedsLeaving` s) 69 | it "its hints are preserved" $ do 70 | let p :: (MonadParsecDbg Void String m) => m String 71 | p = dbg "many chars" (many (char 'a')) <* empty 72 | s = "bcd" 73 | shouldStderr p s "many chars> IN: \"bcd\"\nmany chars> MATCH (EOK): \nmany chars> VALUE: \"\"\nmany chars> HINTS: ['a']\n\n" 74 | grs p s (`shouldFailWith` err 0 (etok 'a')) 75 | grs' p s (`failsLeaving` "bcd") 76 | context "when inner parser fails without consuming" $ 77 | it "has no effect on how parser works" $ do 78 | let p :: (MonadParsecDbg Void String m) => m () 79 | p = dbg "empty" (void empty) 80 | s = "abc" 81 | shouldStderr p s "empty> IN: \"abc\"\nempty> MATCH (EERR): \nempty> ERROR:\nempty> offset=0:\nempty> unknown parse error\n\n" 82 | grs p s (`shouldFailWith` err 0 mempty) 83 | grs' p s (`failsLeaving` s) 84 | let p1, p2 :: (MonadParsecDbg Void String m, MonadWriter [Int] m) => m () 85 | p1 = tell [0] >> dbg "a" (single 'a' >> tell [1]) 86 | p2 = do 87 | void $ dbg "a" (single 'a') 88 | tell [0] 89 | void $ dbg "b" (single 'b') 90 | dbg "c" $ do 91 | void (single 'c') 92 | tell [1] 93 | void (single 'd') 94 | tell [2] 95 | s1 = "a" 96 | s2 = "abcd" 97 | stderr1 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (LOG: [1])\na> HINTS: []\n\n" 98 | stderr2 = "a> IN: \"abcd\"\na> MATCH (COK): 'a'\na> VALUE: 'a' (LOG: [])\na> HINTS: []\n\nb> IN: \"bcd\"\nb> MATCH (COK): 'b'\nb> VALUE: 'b' (LOG: [])\nb> HINTS: []\n\nc> IN: \"cd\"\nc> MATCH (COK): \"cd\"\nc> VALUE: () (LOG: [1,2])\nc> HINTS: []\n\n" 99 | r1 = ((), [0, 1]) 100 | r2 = ((), [0, 1, 2]) 101 | context "Lazy WriterT instance of MonadParsecDbg" $ do 102 | it "example 1" $ do 103 | shouldStderr (L.runWriterT p1) s1 stderr1 104 | prs (L.runWriterT p1) s1 `shouldParse` r1 105 | it "example 2" $ do 106 | shouldStderr (L.runWriterT p2) s2 stderr2 107 | prs (L.runWriterT p2) s2 `shouldParse` r2 108 | context "Strict WriterT instance of MonadParsecDbg" $ do 109 | it "example 1" $ do 110 | shouldStderr (S.runWriterT p1) s1 stderr1 111 | prs (S.runWriterT p1) s1 `shouldParse` r1 112 | it "example 2" $ do 113 | shouldStderr (S.runWriterT p2) s2 stderr2 114 | prs (S.runWriterT p2) s2 `shouldParse` r2 115 | let p3, p4 :: (MonadParsecDbg Void String m, MonadState Int m) => m () 116 | p3 = modify succ >> dbg "a" (single 'a' >> modify succ) 117 | p4 = do 118 | void $ dbg "a" (single 'a') 119 | modify succ 120 | void $ dbg "b" (single 'b') 121 | dbg "c" $ do 122 | void (single 'c') 123 | modify succ 124 | void (single 'd') 125 | modify succ 126 | s3 = "a" 127 | s4 = "abcd" 128 | stderr3 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (STATE: 2)\na> HINTS: []\n\n" 129 | stderr4 = "a> IN: \"abcd\"\na> MATCH (COK): 'a'\na> VALUE: 'a' (STATE: 0)\na> HINTS: []\n\nb> IN: \"bcd\"\nb> MATCH (COK): 'b'\nb> VALUE: 'b' (STATE: 1)\nb> HINTS: []\n\nc> IN: \"cd\"\nc> MATCH (COK): \"cd\"\nc> VALUE: () (STATE: 3)\nc> HINTS: []\n\n" 130 | r3 = ((), 2) 131 | r4 = ((), 3) 132 | context "Lazy StateT instance of MonadParsecDbg" $ do 133 | it "example 3" $ do 134 | shouldStderr (L.runStateT p3 0) s3 stderr3 135 | prs (L.runStateT p3 0) s3 `shouldParse` r3 136 | it "example 4" $ do 137 | shouldStderr (L.runStateT p4 0) s4 stderr4 138 | prs (L.runStateT p4 0) s4 `shouldParse` r4 139 | context "Strict StateT instance of MonadParsecDbg" $ do 140 | it "example 3" $ do 141 | shouldStderr (S.runStateT p3 0) s3 stderr3 142 | prs (S.runStateT p3 0) s3 `shouldParse` r3 143 | it "example 4" $ do 144 | shouldStderr (S.runStateT p4 0) s4 stderr4 145 | prs (S.runStateT p4 0) s4 `shouldParse` r4 146 | let p5 :: (MonadParsecDbg Void String m, MonadRWS () [Int] Int m) => m () 147 | p5 = do 148 | tell [0] 149 | modify succ 150 | dbg "a" (single 'a' >> tell [1] >> modify succ) 151 | s5 = "a" 152 | stderr5 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (STATE: 2) (LOG: [1])\na> HINTS: []\n\n" 153 | stderr7 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (LOG: [1]) (STATE: 2)\na> HINTS: []\n\n" 154 | r5 = ((), 2, [0, 1]) 155 | p6 :: (MonadParsecDbg Void String m, MonadWriter [Int] m, MonadState Int m) => m () 156 | p6 = do 157 | tell [0] 158 | modify succ 159 | dbg "a" (single 'a' >> tell [1] >> modify succ) 160 | r6 = (((), 2), [0, 1]) 161 | r7 = (((), [0, 1]), 2) 162 | context "Lazy RWST instance of MonadParsecDbg" $ do 163 | it "example 5" $ do 164 | shouldStderr (L.runRWST p5 () 0) s5 stderr5 165 | prs (L.runRWST p5 () 0) s5 `shouldParse` r5 166 | it "example 6" $ do 167 | shouldStderr (L.runWriterT (L.runStateT p6 0)) s5 stderr5 168 | prs (L.runWriterT (L.runStateT p6 0)) s5 `shouldParse` r6 169 | it "example 7" $ do 170 | shouldStderr (L.runStateT (L.runWriterT p6) 0) s5 stderr7 171 | prs (L.runStateT (L.runWriterT p6) 0) s5 `shouldParse` r7 172 | context "Strict RWST instance of MonadParsecDbg" $ do 173 | it "example 5" $ do 174 | shouldStderr (S.runRWST p5 () 0) s5 stderr5 175 | prs (S.runRWST p5 () 0) s5 `shouldParse` r5 176 | it "example 6" $ do 177 | shouldStderr (S.runWriterT (S.runStateT p6 0)) s5 stderr5 178 | prs (S.runWriterT (S.runStateT p6 0)) s5 `shouldParse` r6 179 | it "example 7" $ do 180 | shouldStderr (S.runStateT (S.runWriterT p6) 0) s5 stderr7 181 | prs (S.runStateT (S.runWriterT p6) 0) s5 `shouldParse` r7 182 | 183 | ---------------------------------------------------------------------------- 184 | -- Helpers 185 | 186 | -- | Check that running the given parser on the input prints the expected 187 | -- string to the 'stderr'. 188 | shouldStderr :: 189 | (HasCallStack) => 190 | -- | The parser to test 191 | Parser a -> 192 | -- | Input for the parser 193 | String -> 194 | -- | The expected 'stderr' output 195 | String -> 196 | Expectation 197 | shouldStderr p s expectedStderr = do 198 | hFlush stderr 199 | withSystemTempFile "megaparsec-dbg-tests" $ \tempPath tempHandle -> do 200 | hDuplicateTo tempHandle stderr 201 | void (evaluate (parse p "" s)) 202 | hFlush stderr 203 | hClose tempHandle 204 | capturedStderr <- readFile tempPath 205 | capturedStderr `shouldBe` expectedStderr 206 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/ErrorSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Text.Megaparsec.ErrorSpec (spec) where 4 | 5 | import Control.Exception (Exception (..)) 6 | import Data.Functor.Identity 7 | import Data.List (isInfixOf, isSuffixOf, sort) 8 | import Data.List.NonEmpty (NonEmpty (..)) 9 | import qualified Data.Set as E 10 | import Data.Void 11 | import Test.Hspec 12 | import Test.Hspec.Megaparsec 13 | import Test.Hspec.Megaparsec.AdHoc () 14 | import Test.QuickCheck 15 | import Text.Megaparsec 16 | 17 | spec :: Spec 18 | spec = do 19 | describe "Semigroup instance of ParseError" $ 20 | it "associativity" $ 21 | property $ \x y z -> 22 | (x <> y) <> z === (x <> (y <> z) :: PE) 23 | 24 | describe "Monoid instance of ParseError" $ do 25 | it "left identity" $ 26 | property $ \x -> 27 | mempty <> x === (x :: PE) 28 | it "right identity" $ 29 | property $ \x -> 30 | x <> mempty === (x :: PE) 31 | it "associativity" $ 32 | property $ \x y z -> 33 | (x <> y) <> z === (x <> (y <> z) :: PE) 34 | 35 | describe "error merging with (<>)" $ do 36 | it "selects greater offset" $ 37 | property $ \x y -> 38 | errorOffset (x <> y :: PE) === max (errorOffset x) (errorOffset y) 39 | context "when combining two trivial parse errors at the same position" $ 40 | it "merges their unexpected and expected items" $ 41 | do 42 | let n Nothing Nothing = Nothing 43 | n (Just x) Nothing = Just x 44 | n Nothing (Just y) = Just y 45 | n (Just x) (Just y) = Just (max x y) 46 | property $ \pos us0 ps0 us1 ps1 -> 47 | TrivialError pos us0 ps0 <> TrivialError pos us1 ps1 48 | `shouldBe` (TrivialError pos (n us0 us1) (E.union ps0 ps1) :: PE) 49 | context "when combining two fancy parse errors at the same position" $ 50 | it "merges their custom items" $ 51 | property $ \pos xs0 xs1 -> 52 | FancyError pos xs0 <> FancyError pos xs1 53 | `shouldBe` (FancyError pos (E.union xs0 xs1) :: PE) 54 | context "when combining trivial error with fancy error" $ do 55 | it "fancy has precedence (left)" $ 56 | property $ \pos us ps xs -> 57 | FancyError pos xs <> TrivialError pos us ps 58 | `shouldBe` (FancyError pos xs :: PE) 59 | it "fancy has precedence (right)" $ 60 | property $ \pos us ps xs -> 61 | TrivialError pos us ps <> FancyError pos xs 62 | `shouldBe` (FancyError pos xs :: PE) 63 | 64 | -- NOTE 'errorOffset' and 'setErrorOffset' are trivial. 65 | 66 | describe "attachSourcePos" $ 67 | it "attaches the positions correctly" $ 68 | property $ \xs' s -> do 69 | let xs = sort $ getSmall . getPositive <$> xs' 70 | pst = initialPosState (s :: String) 71 | pst' = 72 | if null xs 73 | then pst 74 | else reachOffsetNoLine (last xs) pst 75 | rs = f <$> xs 76 | f x = (x, pstateSourcePos (reachOffsetNoLine x pst)) 77 | attachSourcePos id (xs :: [Int]) pst `shouldBe` (rs, pst') 78 | 79 | describe "errorBundlePretty" $ do 80 | it "shows empty line correctly" $ do 81 | let s = "" :: String 82 | mkBundlePE s (mempty :: PE) 83 | `shouldBe` "1:1:\n |\n1 | \n | ^\nunknown parse error\n" 84 | it "shows position on first line correctly" $ do 85 | let s = "abc" :: String 86 | pe = err 1 (utok 'b' <> etok 'd') :: PE 87 | mkBundlePE s pe 88 | `shouldBe` "1:2:\n |\n1 | abc\n | ^\nunexpected 'b'\nexpecting 'd'\n" 89 | it "skips to second line correctly" $ do 90 | let s = "one\ntwo\n" :: String 91 | pe = err 4 (utok 't' <> etok 'x') :: PE 92 | mkBundlePE s pe 93 | `shouldBe` "2:1:\n |\n2 | two\n | ^\nunexpected 't'\nexpecting 'x'\n" 94 | it "shows position on 1000 line correctly" $ do 95 | let s = replicate 999 '\n' ++ "abc" 96 | pe = err 999 (utok 'a' <> etok 'd') :: PE 97 | mkBundlePE s pe 98 | `shouldBe` "1000:1:\n |\n1000 | abc\n | ^\nunexpected 'a'\nexpecting 'd'\n" 99 | it "shows offending line in the presence of tabs correctly" $ do 100 | let s = "\tsomething" :: String 101 | pe = err 1 (utok 's' <> etok 'x') :: PE 102 | mkBundlePE s pe 103 | `shouldBe` "1:9:\n |\n1 | something\n | ^\nunexpected 's'\nexpecting 'x'\n" 104 | it "uses continuous highlighting properly (trivial)" $ do 105 | let s = "\tfoobar" :: String 106 | pe = err 1 (utoks "foo" <> utoks "rar") :: PE 107 | mkBundlePE s pe 108 | `shouldBe` "1:9:\n |\n1 | foobar\n | ^^^\nunexpected \"rar\"\n" 109 | it "uses continuous highlighting properly (fancy)" $ do 110 | let s = "\tfoobar" :: String 111 | pe = 112 | errFancy 113 | 1 114 | (fancy $ ErrorCustom (CustomErr 5)) :: 115 | ParseError String CustomErr 116 | mkBundlePE s pe 117 | `shouldBe` "1:9:\n |\n1 | foobar\n | ^^^^^\ncustom thing\n" 118 | it "adjusts continuous highlighting so it doesn't get too long" $ do 119 | let s = "foobar\n" :: String 120 | pe = err 4 (utoks "foobar" <> etoks "foobar") :: PE 121 | mkBundlePE s pe 122 | `shouldBe` "1:5:\n |\n1 | foobar\n | ^^^\nunexpected \"foobar\"\nexpecting \"foobar\"\n" 123 | context "stream of insufficient size is provided in the bundle" $ 124 | it "handles the situation reasonably" $ 125 | do 126 | let s = "" :: String 127 | pe = err 3 (ueof <> etok 'x') :: PE 128 | mkBundlePE s pe 129 | `shouldBe` "1:1:\n |\n1 | \n | ^\nunexpected end of input\nexpecting 'x'\n" 130 | context "starting column in bundle is greater than 1" $ do 131 | context "and less than parse error column" $ 132 | it "is rendered correctly" $ 133 | do 134 | let s = "foo" :: String 135 | pe = err 5 (utok 'o' <> etok 'x') :: PE 136 | bundle = 137 | ParseErrorBundle 138 | { bundleErrors = pe :| [], 139 | bundlePosState = 140 | PosState 141 | { pstateInput = s, 142 | pstateOffset = 4, 143 | pstateSourcePos = SourcePos "" pos1 (mkPos 5), 144 | pstateTabWidth = defaultTabWidth, 145 | pstateLinePrefix = "" 146 | } 147 | } 148 | errorBundlePretty bundle 149 | `shouldBe` "1:6:\n |\n1 | foo\n | \nunexpected 'o'\nexpecting 'x'\n" 150 | context "and greater than parse error column" $ 151 | it "is rendered correctly" $ 152 | do 153 | let s = "foo" :: String 154 | pe = err 5 (utok 'o' <> etok 'x') :: PE 155 | bundle = 156 | ParseErrorBundle 157 | { bundleErrors = pe :| [], 158 | bundlePosState = 159 | PosState 160 | { pstateInput = s, 161 | pstateOffset = 9, 162 | pstateSourcePos = SourcePos "" pos1 (mkPos 10), 163 | pstateTabWidth = defaultTabWidth, 164 | pstateLinePrefix = "" 165 | } 166 | } 167 | errorBundlePretty bundle 168 | `shouldBe` "1:10:\n |\n1 | foo\n | \nunexpected 'o'\nexpecting 'x'\n" 169 | it "takes tab width into account correctly" $ 170 | property $ \w' i' -> do 171 | let w = unPos w' 172 | i = unPos i' `rem` (w * 2) 173 | as = replicate i 'a' 174 | s = "\t" ++ as ++ "\tb" :: String 175 | pe = err (2 + i) (utok 'b' <> etok 'x') :: PE 176 | bundle = 177 | ParseErrorBundle 178 | { bundleErrors = pe :| [], 179 | bundlePosState = 180 | PosState 181 | { pstateInput = s, 182 | pstateOffset = 0, 183 | pstateSourcePos = initialPos "", 184 | pstateTabWidth = w', 185 | pstateLinePrefix = "" 186 | } 187 | } 188 | secondTabApparentWidth = w - (i `rem` w) 189 | errColumn = w + i + secondTabApparentWidth 190 | errorBundlePretty bundle 191 | `shouldBe` ( "1:" 192 | ++ show (errColumn + 1) 193 | ++ ":\n |\n1 | " 194 | ++ replicate w ' ' 195 | ++ as 196 | ++ replicate secondTabApparentWidth ' ' 197 | ++ "b" 198 | ++ "\n | " 199 | ++ replicate errColumn ' ' 200 | ++ "^\nunexpected 'b'\nexpecting 'x'\n" 201 | ) 202 | context "in the presence of wide characters" $ do 203 | it "calculates column positions correctly" $ do 204 | let s = "구구 이면" :: String 205 | pe = err 2 (ulabel "space" <> etok '이') :: PE 206 | bundle = 207 | ParseErrorBundle 208 | { bundleErrors = pe :| [], 209 | bundlePosState = 210 | PosState 211 | { pstateInput = s, 212 | pstateOffset = 0, 213 | pstateSourcePos = initialPos "", 214 | pstateTabWidth = defaultTabWidth, 215 | pstateLinePrefix = "" 216 | } 217 | } 218 | errorBundlePretty bundle 219 | `shouldBe` "1:5:\n |\n1 | 구구 이면\n | ^\nunexpected space\nexpecting '이'\n" 220 | it "uses continuous highlighting" $ do 221 | let s = "구구 이면" :: String 222 | pe = err 3 (utok '이' <> etok '구') :: PE 223 | bundle = 224 | ParseErrorBundle 225 | { bundleErrors = pe :| [], 226 | bundlePosState = 227 | PosState 228 | { pstateInput = s, 229 | pstateOffset = 0, 230 | pstateSourcePos = initialPos "", 231 | pstateTabWidth = defaultTabWidth, 232 | pstateLinePrefix = "" 233 | } 234 | } 235 | errorBundlePretty bundle 236 | `shouldBe` "1:6:\n |\n1 | 구구 이면\n | ^^\nunexpected '이'\nexpecting '구'\n" 237 | it "displays multi-error bundle correctly" $ do 238 | let s = "something\ngood\n" :: String 239 | pe0 = err 2 (utok 'm' <> etok 'x') :: PE 240 | pe1 = err 10 (utok 'g' <> etok 'y') :: PE 241 | bundle = 242 | ParseErrorBundle 243 | { bundleErrors = pe0 :| [pe1], 244 | bundlePosState = 245 | PosState 246 | { pstateInput = s, 247 | pstateOffset = 0, 248 | pstateSourcePos = initialPos "", 249 | pstateTabWidth = defaultTabWidth, 250 | pstateLinePrefix = "" 251 | } 252 | } 253 | errorBundlePretty bundle 254 | `shouldBe` "1:3:\n |\n1 | something\n | ^\nunexpected 'm'\nexpecting 'x'\n\n2:1:\n |\n2 | good\n | ^\nunexpected 'g'\nexpecting 'y'\n" 255 | 256 | describe "parseErrorPretty" $ do 257 | it "shows unknown ParseError correctly" $ 258 | parseErrorPretty (mempty :: PE) `shouldBe` "offset=0:\nunknown parse error\n" 259 | it "result always ends with a newline" $ 260 | property $ \x -> 261 | parseErrorPretty (x :: PE) `shouldSatisfy` ("\n" `isSuffixOf`) 262 | it "result contains representation of offset" $ 263 | property (contains (Identity . errorOffset) show) 264 | it "result contains unexpected/expected items" $ do 265 | let e = err 0 (utoks "foo" <> etoks "bar" <> etoks "baz") :: PE 266 | parseErrorPretty e `shouldBe` "offset=0:\nunexpected \"foo\"\nexpecting \"bar\" or \"baz\"\n" 267 | it "result contains representation of custom items" $ do 268 | let e = errFancy 0 (fancy (ErrorFail "Ooops!")) :: PE 269 | parseErrorPretty e `shouldBe` "offset=0:\nOoops!\n" 270 | it "several fancy errors look not so bad" $ do 271 | let pe :: PE 272 | pe = 273 | errFancy 0 $ 274 | mempty <> fancy (ErrorFail "foo") <> fancy (ErrorFail "bar") 275 | parseErrorPretty pe `shouldBe` "offset=0:\nbar\nfoo\n" 276 | 277 | describe "parseErrorTextPretty" $ do 278 | it "shows trivial unknown ParseError correctly" $ 279 | parseErrorTextPretty (mempty :: PE) 280 | `shouldBe` "unknown parse error\n" 281 | it "shows fancy unknown ParseError correctly" $ 282 | parseErrorTextPretty (FancyError 0 E.empty :: PE) 283 | `shouldBe` "unknown fancy parse error\n" 284 | it "result always ends with a newline" $ 285 | property $ \x -> 286 | parseErrorTextPretty (x :: PE) 287 | `shouldSatisfy` ("\n" `isSuffixOf`) 288 | 289 | describe "displayException" $ 290 | it "produces the same result as parseErrorPretty" $ 291 | property $ \x -> 292 | displayException x `shouldBe` parseErrorPretty (x :: PE) 293 | 294 | ---------------------------------------------------------------------------- 295 | -- Helpers 296 | 297 | -- | A custom error component to test continuous highlighting for custom 298 | -- components. 299 | newtype CustomErr = CustomErr Int 300 | deriving (Eq, Ord, Show) 301 | 302 | instance ShowErrorComponent CustomErr where 303 | showErrorComponent _ = "custom thing" 304 | errorComponentLen (CustomErr n) = n 305 | 306 | type PE = ParseError String Void 307 | 308 | contains :: (Foldable t) => (PE -> t a) -> (a -> String) -> PE -> Property 309 | contains g r e = property (all f (g e)) 310 | where 311 | rendered = parseErrorPretty e 312 | f x = r x `isInfixOf` rendered 313 | 314 | mkBundlePE :: 315 | ( VisualStream s, 316 | TraversableStream s, 317 | ShowErrorComponent e 318 | ) => 319 | s -> 320 | ParseError s e -> 321 | String 322 | mkBundlePE s e = 323 | errorBundlePretty $ 324 | ParseErrorBundle 325 | { bundleErrors = e :| [], 326 | bundlePosState = 327 | PosState 328 | { pstateInput = s, 329 | pstateOffset = 0, 330 | pstateSourcePos = initialPos "", 331 | pstateTabWidth = defaultTabWidth, 332 | pstateLinePrefix = "" 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/PosSpec.hs: -------------------------------------------------------------------------------- 1 | module Text.Megaparsec.PosSpec (spec) where 2 | 3 | import Control.Exception (evaluate) 4 | import Data.Function (on) 5 | import Data.List (isInfixOf) 6 | import Test.Hspec 7 | import Test.Hspec.Megaparsec.AdHoc () 8 | import Test.QuickCheck 9 | import Text.Megaparsec.Pos 10 | 11 | spec :: Spec 12 | spec = do 13 | describe "mkPos" $ do 14 | context "when the argument is a non-positive number" $ 15 | it "throws InvalidPosException" $ 16 | property $ \n -> 17 | n <= 0 ==> 18 | evaluate (mkPos n) `shouldThrow` (== InvalidPosException n) 19 | context "when the argument is not 0" $ 20 | it "returns Pos with the given value" $ 21 | property $ \n -> 22 | (n > 0) ==> (unPos (mkPos n) `shouldBe` n) 23 | 24 | describe "Read and Show instances of Pos" $ 25 | it "printed representation of Pos is isomorphic to its value" $ 26 | property $ \x -> 27 | read (show x) === (x :: Pos) 28 | 29 | describe "Ord instance of Pos" $ 30 | it "works just like Ord instance of underlying Word" $ 31 | property $ \x y -> 32 | compare x y === (compare `on` unPos) x y 33 | 34 | describe "Semigroup instance of Pos" $ 35 | it "works like addition" $ 36 | property $ \x y -> 37 | x <> y === mkPos (unPos x + unPos y) 38 | .&&. unPos (x <> y) === unPos x + unPos y 39 | 40 | describe "initialPos" $ 41 | it "constructs initial position correctly" $ 42 | property $ \path -> 43 | let x = initialPos path 44 | in sourceName x === path 45 | .&&. sourceLine x === mkPos 1 46 | .&&. sourceColumn x === mkPos 1 47 | 48 | describe "Read and Show instances of SourcePos" $ 49 | it "printed representation of SourcePos in isomorphic to its value" $ 50 | property $ \x -> 51 | read (show x) === (x :: SourcePos) 52 | 53 | describe "sourcePosPretty" $ do 54 | it "displays file name" $ 55 | property $ \x -> 56 | sourceName x `isInfixOf` sourcePosPretty x 57 | it "displays line number" $ 58 | property $ \x -> 59 | (show . unPos . sourceLine) x `isInfixOf` sourcePosPretty x 60 | it "displays column number" $ 61 | property $ \x -> 62 | (show . unPos . sourceColumn) x `isInfixOf` sourcePosPretty x 63 | -------------------------------------------------------------------------------- /megaparsec-tests/tests/Text/Megaparsec/UnicodeSpec.hs: -------------------------------------------------------------------------------- 1 | module Text.Megaparsec.UnicodeSpec (spec) where 2 | 3 | import Test.Hspec 4 | import qualified Text.Megaparsec.Unicode as Unicode 5 | 6 | spec :: Spec 7 | spec = do 8 | describe "stringLength" $ 9 | it "computes correct length in the presense of wide chars" $ 10 | Unicode.stringLength "123 구구 이면" `shouldBe` 13 11 | describe "charLength" $ do 12 | it "returns 1 for non-wide chars" $ 13 | Unicode.charLength 'a' `shouldBe` 1 14 | it "returns 2 for wide chars" $ 15 | Unicode.charLength '구' `shouldBe` 2 16 | describe "isWideChar" $ do 17 | it "returns False for non-wide chars" $ 18 | Unicode.isWideChar 'a' `shouldBe` False 19 | it "returns True for wide chars" $ 20 | Unicode.isWideChar '구' `shouldBe` True 21 | -------------------------------------------------------------------------------- /megaparsec.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: megaparsec 3 | version: 9.7.0 4 | license: BSD-2-Clause 5 | license-file: LICENSE.md 6 | maintainer: Mark Karpov 7 | author: 8 | Megaparsec contributors, 9 | Paolo Martini , 10 | Daan Leijen 11 | 12 | tested-with: ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1 13 | homepage: https://github.com/mrkkrp/megaparsec 14 | bug-reports: https://github.com/mrkkrp/megaparsec/issues 15 | synopsis: Monadic parser combinators 16 | description: 17 | This is an industrial-strength monadic parser combinator library. 18 | Megaparsec is a feature-rich package that tries to find a nice balance 19 | between speed, flexibility, and quality of parse errors. 20 | 21 | category: Parsing 22 | build-type: Simple 23 | extra-doc-files: 24 | CHANGELOG.md 25 | README.md 26 | 27 | source-repository head 28 | type: git 29 | location: https://github.com/mrkkrp/megaparsec.git 30 | 31 | flag dev 32 | description: Turn on development settings. 33 | default: False 34 | manual: True 35 | 36 | library 37 | exposed-modules: 38 | Text.Megaparsec 39 | Text.Megaparsec.Byte 40 | Text.Megaparsec.Byte.Binary 41 | Text.Megaparsec.Byte.Lexer 42 | Text.Megaparsec.Char 43 | Text.Megaparsec.Char.Lexer 44 | Text.Megaparsec.Debug 45 | Text.Megaparsec.Error 46 | Text.Megaparsec.Error.Builder 47 | Text.Megaparsec.Internal 48 | Text.Megaparsec.Pos 49 | Text.Megaparsec.State 50 | Text.Megaparsec.Stream 51 | Text.Megaparsec.Unicode 52 | 53 | other-modules: 54 | Text.Megaparsec.Class 55 | Text.Megaparsec.Common 56 | Text.Megaparsec.Lexer 57 | 58 | default-language: Haskell2010 59 | build-depends: 60 | array >=0.5.3 && <0.6, 61 | base >=4.15 && <5, 62 | bytestring >=0.2 && <0.13, 63 | case-insensitive >=1.2 && <1.3, 64 | containers >=0.5 && <0.8, 65 | deepseq >=1.3 && <1.6, 66 | mtl >=2.2.2 && <3, 67 | parser-combinators >=1.0 && <2, 68 | scientific >=0.3.7 && <0.4, 69 | text >=0.2 && <2.2, 70 | transformers >=0.4 && <0.7 71 | 72 | if flag(dev) 73 | ghc-options: 74 | -Wall -Werror -Wredundant-constraints -Wpartial-fields 75 | -Wunused-packages -Wno-unused-imports 76 | 77 | else 78 | ghc-options: -O2 -Wall 79 | 80 | if impl(ghc >=9.8) 81 | ghc-options: -Wno-x-partial 82 | 83 | benchmark bench-speed 84 | type: exitcode-stdio-1.0 85 | main-is: Main.hs 86 | hs-source-dirs: bench/speed 87 | default-language: Haskell2010 88 | build-depends: 89 | base >=4.15 && <5, 90 | bytestring >=0.2 && <0.13, 91 | containers >=0.5 && <0.8, 92 | criterion >=0.6.2.1 && <1.7, 93 | deepseq >=1.3 && <1.6, 94 | megaparsec, 95 | text >=0.2 && <2.2 96 | 97 | if flag(dev) 98 | ghc-options: 99 | -Wall -Werror -Wredundant-constraints -Wpartial-fields 100 | -Wunused-packages 101 | 102 | else 103 | ghc-options: -O2 -Wall 104 | 105 | benchmark bench-memory 106 | type: exitcode-stdio-1.0 107 | main-is: Main.hs 108 | hs-source-dirs: bench/memory 109 | default-language: Haskell2010 110 | build-depends: 111 | base >=4.15 && <5, 112 | bytestring >=0.2 && <0.13, 113 | containers >=0.5 && <0.8, 114 | deepseq >=1.3 && <1.6, 115 | megaparsec, 116 | text >=0.2 && <2.2, 117 | weigh >=0.0.4 118 | 119 | if flag(dev) 120 | ghc-options: 121 | -Wall -Werror -Wredundant-constraints -Wpartial-fields 122 | -Wunused-packages 123 | 124 | else 125 | ghc-options: -O2 -Wall 126 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/CSV/Attoparsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module ParsersBench.CSV.Attoparsec 4 | ( parseCSV, 5 | ) 6 | where 7 | 8 | import Control.Applicative 9 | import Control.Applicative.Combinators 10 | import Control.Monad 11 | import Data.Attoparsec.ByteString.Char8 hiding (sepBy1) 12 | import qualified Data.Attoparsec.ByteString.Char8 as A 13 | import Data.ByteString (ByteString) 14 | import qualified Data.ByteString.Char8 as BC8 15 | import Data.Vector (Vector) 16 | import qualified Data.Vector as V 17 | 18 | -- NOTE We use the combinators from this module to make both implementations 19 | -- as close as possible. The combinators from the ‘parser-combinators’ 20 | -- packages are not slower than Attoparsec's. 21 | 22 | type Record = Vector Field 23 | 24 | type Field = ByteString 25 | 26 | parseCSV :: ByteString -> [Record] 27 | parseCSV bs = 28 | case parseOnly csv bs of 29 | Left err -> error err 30 | Right x -> x 31 | 32 | csv :: Parser [Record] 33 | csv = do 34 | xs <- sepEndBy1 record endOfLine 35 | endOfInput 36 | return xs 37 | 38 | record :: Parser Record 39 | record = do 40 | endAlready <- atEnd 41 | when endAlready empty -- to prevent reading empty line at the end of file 42 | V.fromList <$!> (sepBy1 field (char ',') "record") 43 | 44 | field :: Parser Field 45 | field = (escapedField <|> unescapedField) "field" 46 | 47 | escapedField :: Parser ByteString 48 | escapedField = 49 | BC8.pack <$!> between (char '"') (char '"') (many $ normalChar <|> escapedDq) 50 | where 51 | normalChar = notChar '"' "unescaped character" 52 | escapedDq = '"' <$ string "\"\"" 53 | 54 | unescapedField :: Parser ByteString 55 | unescapedField = 56 | A.takeWhile (\x -> notElem x [',', '\"', '\n', '\r']) 57 | {-# INLINE unescapedField #-} 58 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/CSV/Megaparsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module ParsersBench.CSV.Megaparsec 4 | ( parseCSV, 5 | ) 6 | where 7 | 8 | import Control.Monad 9 | import Data.ByteString (ByteString) 10 | import qualified Data.ByteString as B 11 | import Data.Vector (Vector) 12 | import qualified Data.Vector as V 13 | import Data.Void 14 | import Text.Megaparsec 15 | import Text.Megaparsec.Byte 16 | 17 | type Parser = Parsec Void ByteString 18 | 19 | type Record = Vector Field 20 | 21 | type Field = ByteString 22 | 23 | -- | Parse a CSV file without conversion of individual records. 24 | parseCSV :: ByteString -> [Record] 25 | parseCSV bs = 26 | case parse csv "" bs of 27 | Left err -> error (errorBundlePretty err) 28 | Right x -> x 29 | 30 | csv :: Parser [Record] 31 | csv = do 32 | xs <- sepEndBy1 record eol 33 | eof 34 | return xs 35 | 36 | record :: Parser Record 37 | record = do 38 | notFollowedBy eof -- to prevent reading empty line at the end of file 39 | V.fromList <$!> (sepBy1 field (char 44) "record") 40 | 41 | field :: Parser Field 42 | field = label "field" (escapedField <|> unescapedField) 43 | 44 | escapedField :: Parser ByteString 45 | escapedField = 46 | B.pack <$!> between (char 34) (char 34) (many $ normalChar <|> escapedDq) 47 | where 48 | normalChar = anySingleBut 34 "unescaped character" 49 | escapedDq = label "escaped double-quote" (34 <$ string "\"\"") 50 | 51 | unescapedField :: Parser ByteString 52 | unescapedField = 53 | takeWhileP 54 | (Just "unescaped char") 55 | (\x -> notElem x [44, 34, 10, 13]) 56 | {-# INLINE unescapedField #-} 57 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Json/Attoparsec.hs: -------------------------------------------------------------------------------- 1 | -- Mostly stolen from (with simplifications): 2 | -- https://github.com/bos/attoparsec/blob/master/benchmarks/Aeson.hs 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | 6 | module ParsersBench.Json.Attoparsec 7 | ( parseJson, 8 | ) 9 | where 10 | 11 | import Control.Applicative 12 | import qualified Data.Attoparsec.ByteString as A 13 | import Data.Attoparsec.ByteString.Char8 14 | import Data.ByteString (ByteString) 15 | import qualified Data.HashMap.Strict as H 16 | import Data.Text (Text) 17 | import qualified Data.Text.Encoding as TE 18 | import Data.Vector (Vector) 19 | import qualified Data.Vector as V 20 | import Data.Word (Word8) 21 | import ParsersBench.Json.Common 22 | 23 | #define CLOSE_CURLY 125 24 | #define CLOSE_SQUARE 93 25 | #define COMMA 44 26 | #define C_0 48 27 | #define C_9 57 28 | #define C_MINUS 45 29 | #define C_f 102 30 | #define C_n 110 31 | #define C_t 116 32 | #define DOUBLE_QUOTE 34 33 | #define OPEN_CURLY 123 34 | #define OPEN_SQUARE 91 35 | 36 | parseJson :: ByteString -> Value 37 | parseJson bs = 38 | case parseOnly json bs of 39 | Left err -> error err 40 | Right x -> x 41 | 42 | json :: Parser Value 43 | json = json_ object_ array_ 44 | 45 | json_ :: Parser Value -> Parser Value -> Parser Value 46 | json_ obj ary = do 47 | w <- skipSpace *> A.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE) 48 | if w == OPEN_CURLY 49 | then obj 50 | else ary 51 | {-# INLINE json_ #-} 52 | 53 | object_ :: Parser Value 54 | object_ = Object <$> objectValues jstring value 55 | 56 | objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value) 57 | objectValues str val = do 58 | skipSpace 59 | let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val) 60 | H.fromList <$> commaSeparated pair CLOSE_CURLY 61 | {-# INLINE objectValues #-} 62 | 63 | array_ :: Parser Value 64 | array_ = Array <$> arrayValues value 65 | 66 | arrayValues :: Parser Value -> Parser (Vector Value) 67 | arrayValues val = do 68 | skipSpace 69 | V.fromList <$> commaSeparated val CLOSE_SQUARE 70 | {-# INLINE arrayValues #-} 71 | 72 | commaSeparated :: Parser a -> Word8 -> Parser [a] 73 | commaSeparated item endByte = do 74 | w <- A.peekWord8' 75 | if w == endByte 76 | then A.anyWord8 >> return [] 77 | else loop 78 | where 79 | loop = do 80 | v <- item <* skipSpace 81 | ch <- A.satisfy $ \w -> w == COMMA || w == endByte 82 | if ch == COMMA 83 | then skipSpace >> (v :) <$> loop 84 | else return [v] 85 | {-# INLINE commaSeparated #-} 86 | 87 | value :: Parser Value 88 | value = do 89 | w <- A.peekWord8' 90 | case w of 91 | DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_) 92 | OPEN_CURLY -> A.anyWord8 *> object_ 93 | OPEN_SQUARE -> A.anyWord8 *> array_ 94 | C_f -> string "false" *> pure (Bool False) 95 | C_t -> string "true" *> pure (Bool True) 96 | C_n -> string "null" *> pure Null 97 | _ 98 | | w >= C_0 && w <= C_9 || w == C_MINUS -> Number <$> scientific 99 | | otherwise -> fail "not a valid json value" 100 | 101 | jstring :: Parser Text 102 | jstring = A.word8 DOUBLE_QUOTE *> jstring_ 103 | 104 | jstring_ :: Parser Text 105 | jstring_ = 106 | TE.decodeUtf8 107 | <$> A.takeWhile (/= DOUBLE_QUOTE) 108 | <* A.word8 DOUBLE_QUOTE 109 | {-# INLINE jstring_ #-} 110 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Json/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | 3 | module ParsersBench.Json.Common 4 | ( Result (..), 5 | Object, 6 | Array, 7 | Value (..), 8 | ) 9 | where 10 | 11 | import Control.DeepSeq 12 | import qualified Data.HashMap.Strict as H 13 | import Data.Scientific (Scientific) 14 | import Data.Text (Text) 15 | import Data.Vector (Vector) 16 | import GHC.Generics 17 | 18 | data Result a 19 | = Error String 20 | | Success a 21 | deriving (Eq, Show) 22 | 23 | type Object = H.HashMap Text Value 24 | 25 | type Array = Vector Value 26 | 27 | data Value 28 | = Object !Object 29 | | Array !Array 30 | | String !Text 31 | | Number !Scientific 32 | | Bool !Bool 33 | | Null 34 | deriving (Eq, Show, Generic) 35 | 36 | instance NFData Value 37 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Json/Megaparsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | 4 | module ParsersBench.Json.Megaparsec 5 | ( parseJson, 6 | ) 7 | where 8 | 9 | import Control.Applicative 10 | import Data.ByteString (ByteString) 11 | import qualified Data.HashMap.Strict as H 12 | import Data.Text (Text) 13 | import qualified Data.Text.Encoding as TE 14 | import Data.Vector (Vector) 15 | import qualified Data.Vector as V 16 | import Data.Void 17 | import Data.Word (Word8) 18 | import ParsersBench.Json.Common 19 | import Text.Megaparsec 20 | import Text.Megaparsec.Byte 21 | import qualified Text.Megaparsec.Byte.Lexer as L 22 | 23 | type Parser = Parsec Void ByteString 24 | 25 | #define CLOSE_CURLY 125 26 | #define CLOSE_SQUARE 93 27 | #define COMMA 44 28 | #define C_0 48 29 | #define C_9 57 30 | #define C_MINUS 45 31 | #define C_f 102 32 | #define C_n 110 33 | #define C_t 116 34 | #define DOUBLE_QUOTE 34 35 | #define OPEN_CURLY 123 36 | #define OPEN_SQUARE 91 37 | #define COLON 58 38 | 39 | parseJson :: ByteString -> Value 40 | parseJson bs = 41 | case parse json "" bs of 42 | Left err -> error (errorBundlePretty err) 43 | Right x -> x 44 | 45 | json :: Parser Value 46 | json = json_ object_ array_ 47 | 48 | json_ :: Parser Value -> Parser Value -> Parser Value 49 | json_ obj ary = do 50 | w <- space *> (char OPEN_CURLY <|> char OPEN_SQUARE) 51 | if w == OPEN_CURLY 52 | then obj 53 | else ary 54 | {-# INLINE json_ #-} 55 | 56 | object_ :: Parser Value 57 | object_ = Object <$> objectValues jstring value 58 | 59 | objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value) 60 | objectValues str val = do 61 | space 62 | let pair = liftA2 (,) (str <* space) (char COLON *> space *> val) 63 | H.fromList <$> commaSeparated pair CLOSE_CURLY 64 | {-# INLINE objectValues #-} 65 | 66 | array_ :: Parser Value 67 | array_ = Array <$> arrayValues value 68 | 69 | arrayValues :: Parser Value -> Parser (Vector Value) 70 | arrayValues val = do 71 | space 72 | V.fromList <$> commaSeparated val CLOSE_SQUARE 73 | {-# INLINE arrayValues #-} 74 | 75 | commaSeparated :: Parser a -> Word8 -> Parser [a] 76 | commaSeparated item endByte = do 77 | w <- lookAhead anySingle 78 | if w == endByte 79 | then [] <$ anySingle 80 | else loop 81 | where 82 | loop = do 83 | v <- item <* space 84 | ch <- char COMMA <|> char endByte 85 | if ch == COMMA 86 | then space >> (v :) <$> loop 87 | else return [v] 88 | {-# INLINE commaSeparated #-} 89 | 90 | value :: Parser Value 91 | value = do 92 | w <- lookAhead anySingle 93 | case w of 94 | DOUBLE_QUOTE -> anySingle *> (String <$> jstring_) 95 | OPEN_CURLY -> anySingle *> object_ 96 | OPEN_SQUARE -> anySingle *> array_ 97 | C_f -> Bool False <$ string "false" 98 | C_t -> Bool True <$ string "true" 99 | C_n -> string "null" *> pure Null 100 | _ 101 | | w >= C_0 && w <= C_9 || w == C_MINUS -> Number <$> L.scientific 102 | | otherwise -> fail "not a valid json value" 103 | 104 | jstring :: Parser Text 105 | jstring = char DOUBLE_QUOTE *> jstring_ 106 | 107 | jstring_ :: Parser Text 108 | jstring_ = 109 | TE.decodeUtf8 110 | <$> takeWhileP (Just "string char") (/= DOUBLE_QUOTE) 111 | <* char DOUBLE_QUOTE 112 | {-# INLINE jstring_ #-} 113 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Log/Attoparsec.hs: -------------------------------------------------------------------------------- 1 | -- Mostly stolen from: 2 | -- https://www.schoolofhaskell.com/school/starting-with-haskell/libraries-and-frameworks/text-manipulation/attoparsec 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module ParsersBench.Log.Attoparsec 6 | ( parseLog, 7 | ) 8 | where 9 | 10 | import Control.Applicative 11 | import Control.Monad 12 | import Data.Attoparsec.ByteString.Char8 13 | import Data.ByteString (ByteString) 14 | import Data.Time 15 | import ParsersBench.Log.Common 16 | 17 | parseLog :: ByteString -> Log 18 | parseLog bs = 19 | case parseOnly logParser bs of 20 | Left err -> error err 21 | Right x -> x 22 | 23 | parseIP :: Parser IP 24 | parseIP = do 25 | d1 <- decimal 26 | void (char '.') 27 | d2 <- decimal 28 | void (char '.') 29 | d3 <- decimal 30 | void (char '.') 31 | d4 <- decimal 32 | return $ IP d1 d2 d3 d4 33 | 34 | timeParser :: Parser LocalTime 35 | timeParser = do 36 | y <- count 4 digit 37 | void (char '-') 38 | mm <- count 2 digit 39 | void (char '-') 40 | d <- count 2 digit 41 | void (char ' ') 42 | h <- count 2 digit 43 | void (char ':') 44 | m <- count 2 digit 45 | void (char ':') 46 | s <- count 2 digit 47 | return 48 | LocalTime 49 | { localDay = fromGregorian (read y) (read mm) (read d), 50 | localTimeOfDay = TimeOfDay (read h) (read m) (read s) 51 | } 52 | 53 | productParser :: Parser Product 54 | productParser = 55 | (Mouse <$ string "mouse") 56 | <|> (Keyboard <$ string "keyboard") 57 | <|> (Monitor <$ string "monitor") 58 | <|> (Speakers <$ string "speakers") 59 | 60 | logEntryParser :: Parser LogEntry 61 | logEntryParser = do 62 | t <- timeParser 63 | void (char ' ') 64 | ip <- parseIP 65 | void (char ' ') 66 | p <- productParser 67 | return (LogEntry t ip p) 68 | 69 | logParser :: Parser Log 70 | logParser = many (logEntryParser <* endOfLine) 71 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Log/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | 3 | module ParsersBench.Log.Common 4 | ( IP (..), 5 | Product (..), 6 | LogEntry (..), 7 | Log, 8 | ) 9 | where 10 | 11 | import Control.DeepSeq 12 | import Data.Time 13 | import Data.Word 14 | import GHC.Generics 15 | 16 | data IP = IP Word8 Word8 Word8 Word8 17 | deriving (Show, Eq, Generic) 18 | 19 | instance NFData IP 20 | 21 | data Product 22 | = Mouse 23 | | Keyboard 24 | | Monitor 25 | | Speakers 26 | deriving (Show, Eq, Generic) 27 | 28 | instance NFData Product 29 | 30 | data LogEntry = LogEntry 31 | { entryTime :: LocalTime, 32 | entryIP :: IP, 33 | entryProduct :: Product 34 | } 35 | deriving (Show, Eq, Generic) 36 | 37 | instance NFData LogEntry 38 | 39 | type Log = [LogEntry] 40 | -------------------------------------------------------------------------------- /parsers-bench/ParsersBench/Log/Megaparsec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module ParsersBench.Log.Megaparsec 4 | ( parseLog, 5 | ) 6 | where 7 | 8 | import Control.Monad 9 | import Data.ByteString (ByteString) 10 | import Data.Char (chr) 11 | import Data.Time 12 | import Data.Void 13 | import Data.Word (Word8) 14 | import ParsersBench.Log.Common 15 | import Text.Megaparsec 16 | import Text.Megaparsec.Byte 17 | import qualified Text.Megaparsec.Byte.Lexer as L 18 | 19 | type Parser = Parsec Void ByteString 20 | 21 | parseLog :: ByteString -> Log 22 | parseLog bs = 23 | case parse logParser "" bs of 24 | Left err -> error (errorBundlePretty err) 25 | Right x -> x 26 | 27 | parseIP :: Parser IP 28 | parseIP = do 29 | d1 <- L.decimal 30 | void (char 46) 31 | d2 <- L.decimal 32 | void (char 46) 33 | d3 <- L.decimal 34 | void (char 46) 35 | d4 <- L.decimal 36 | return (IP d1 d2 d3 d4) 37 | 38 | timeParser :: Parser LocalTime 39 | timeParser = do 40 | y <- fmap byteToChar <$> count 4 digitChar 41 | void (char 45) 42 | mm <- fmap byteToChar <$> count 2 digitChar 43 | void (char 45) 44 | d <- fmap byteToChar <$> count 2 digitChar 45 | void (char 32) 46 | h <- fmap byteToChar <$> count 2 digitChar 47 | void (char 58) 48 | m <- fmap byteToChar <$> count 2 digitChar 49 | void (char 58) 50 | s <- fmap byteToChar <$> count 2 digitChar 51 | return 52 | LocalTime 53 | { localDay = fromGregorian (read y) (read mm) (read d), 54 | localTimeOfDay = TimeOfDay (read h) (read m) (read s) 55 | } 56 | 57 | productParser :: Parser Product 58 | productParser = 59 | (Mouse <$ string "mouse") 60 | <|> (Keyboard <$ string "keyboard") 61 | <|> (Monitor <$ string "monitor") 62 | <|> (Speakers <$ string "speakers") 63 | 64 | logEntryParser :: Parser LogEntry 65 | logEntryParser = do 66 | t <- timeParser 67 | void (char 32) 68 | ip <- parseIP 69 | void (char 32) 70 | p <- productParser 71 | return (LogEntry t ip p) 72 | 73 | logParser :: Parser Log 74 | logParser = many (logEntryParser <* eol) 75 | 76 | byteToChar :: Word8 -> Char 77 | byteToChar = chr . fromIntegral 78 | {-# INLINE byteToChar #-} 79 | -------------------------------------------------------------------------------- /parsers-bench/README.md: -------------------------------------------------------------------------------- 1 | # Parsers bench 2 | 3 | This directory contains a collection of identical parsers implemented in 4 | Attoparsec and Megaparsec. The goal here is to learn how much Attoparsec is 5 | actually faster than Megaparsec for common parsing tasks. 6 | 7 | ## License 8 | 9 | Copyright © 2017–present Mark Karpov 10 | 11 | Distributed under BSD 3 clause license. 12 | -------------------------------------------------------------------------------- /parsers-bench/bench/memory/Main.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Control.DeepSeq 4 | import Control.Monad 5 | import Data.ByteString (ByteString) 6 | import qualified Data.ByteString as B 7 | import qualified ParsersBench.CSV.Attoparsec as A 8 | import qualified ParsersBench.CSV.Megaparsec as M 9 | import qualified ParsersBench.Json.Attoparsec as A 10 | import qualified ParsersBench.Json.Megaparsec as M 11 | import qualified ParsersBench.Log.Attoparsec as A 12 | import qualified ParsersBench.Log.Megaparsec as M 13 | import Weigh 14 | 15 | main :: IO () 16 | main = mainWith $ do 17 | setColumns [Case, Allocated, GCs, Max] 18 | forM_ csvFiles $ \file -> 19 | bparser "CSV (Attoparsec)" file A.parseCSV 20 | forM_ csvFiles $ \file -> 21 | bparser "CSV (Megaparsec)" file M.parseCSV 22 | forM_ logFiles $ \file -> 23 | bparser "Log (Attoparsec)" file A.parseLog 24 | forM_ logFiles $ \file -> 25 | bparser "Log (Megaparsec)" file M.parseLog 26 | forM_ jsonFiles $ \file -> 27 | bparser "JSON (Attoparsec)" file A.parseJson 28 | forM_ jsonFiles $ \file -> 29 | bparser "JSON (Megaparsec)" file M.parseJson 30 | 31 | bparser :: (NFData a) => String -> FilePath -> (ByteString -> a) -> Weigh () 32 | bparser pre desc f = io (pre ++ "-" ++ desc) m path 33 | where 34 | path = "data/" ++ desc 35 | m pth = f <$> B.readFile pth 36 | 37 | csvFiles :: [FilePath] 38 | csvFiles = 39 | [ "csv-5.csv", 40 | "csv-10.csv", 41 | "csv-20.csv", 42 | "csv-40.csv" 43 | ] 44 | 45 | logFiles :: [FilePath] 46 | logFiles = 47 | [ "log-5.log", 48 | "log-10.log", 49 | "log-20.log", 50 | "log-40.log" 51 | ] 52 | 53 | jsonFiles :: [FilePath] 54 | jsonFiles = 55 | [ "json-5.json", 56 | "json-10.json", 57 | "json-20.json", 58 | "json-40.json" 59 | ] 60 | -------------------------------------------------------------------------------- /parsers-bench/bench/speed/Main.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Control.DeepSeq 4 | import Criterion.Main 5 | import Data.ByteString (ByteString) 6 | import qualified Data.ByteString as B 7 | import qualified ParsersBench.CSV.Attoparsec as A 8 | import qualified ParsersBench.CSV.Megaparsec as M 9 | import qualified ParsersBench.Json.Attoparsec as A 10 | import qualified ParsersBench.Json.Megaparsec as M 11 | import qualified ParsersBench.Log.Attoparsec as A 12 | import qualified ParsersBench.Log.Megaparsec as M 13 | 14 | main :: IO () 15 | main = 16 | defaultMain 17 | [ bgroup 18 | "CSV (Attoparsec)" 19 | [bparser file A.parseCSV | file <- csvFiles], 20 | bgroup 21 | "CSV (Megaparsec)" 22 | [bparser file M.parseCSV | file <- csvFiles], 23 | bgroup 24 | "Log (Attoparsec)" 25 | [bparser file A.parseLog | file <- logFiles], 26 | bgroup 27 | "Log (Megaparsec)" 28 | [bparser file M.parseLog | file <- logFiles], 29 | bgroup 30 | "JSON (Attoparsec)" 31 | [bparser file A.parseJson | file <- jsonFiles], 32 | bgroup 33 | "JSON (Megapasec)" 34 | [bparser file M.parseJson | file <- jsonFiles] 35 | ] 36 | 37 | bparser :: (NFData a) => FilePath -> (ByteString -> a) -> Benchmark 38 | bparser desc f = env (B.readFile path) (bench desc . nf f) 39 | where 40 | path = "data/" ++ desc 41 | 42 | csvFiles :: [FilePath] 43 | csvFiles = 44 | [ "csv-5.csv", 45 | "csv-10.csv", 46 | "csv-20.csv", 47 | "csv-40.csv" 48 | ] 49 | 50 | logFiles :: [FilePath] 51 | logFiles = 52 | [ "log-5.log", 53 | "log-10.log", 54 | "log-20.log", 55 | "log-40.log" 56 | ] 57 | 58 | jsonFiles :: [FilePath] 59 | jsonFiles = 60 | [ "json-5.json", 61 | "json-10.json", 62 | "json-20.json", 63 | "json-40.json" 64 | ] 65 | -------------------------------------------------------------------------------- /parsers-bench/data/csv-10.csv: -------------------------------------------------------------------------------- 1 | something,10,yes,"here we go","Meet me in Montauk." 2 | fifty two,11,yes,"here we go","Meet me in Montauk." 3 | something,12,yes,"here we go","Meet me in Montauk." 4 | "foo"" bar",13,yes,,"Meet me in Montauk." 5 | something,14,yes,"here we go","Meet me in Montauk." 6 | something,10,yes,"here we go","Meet me in Montauk." 7 | fifty two,11,yes,"here we go","Meet me in Montauk." 8 | something,12,yes,"here we go","Meet me in Montauk." 9 | "foo"" bar",13,yes,,"Meet me in Montauk." 10 | something,14,yes,"here we go","Meet me in Montauk." 11 | -------------------------------------------------------------------------------- /parsers-bench/data/csv-20.csv: -------------------------------------------------------------------------------- 1 | something,10,yes,"here we go","Meet me in Montauk." 2 | fifty two,11,yes,"here we go","Meet me in Montauk." 3 | something,12,yes,"here we go","Meet me in Montauk." 4 | "foo"" bar",13,yes,,"Meet me in Montauk." 5 | something,14,yes,"here we go","Meet me in Montauk." 6 | something,10,yes,"here we go","Meet me in Montauk." 7 | fifty two,11,yes,"here we go","Meet me in Montauk." 8 | something,12,yes,"here we go","Meet me in Montauk." 9 | "foo"" bar",13,yes,,"Meet me in Montauk." 10 | something,14,yes,"here we go","Meet me in Montauk." 11 | something,10,yes,"here we go","Meet me in Montauk." 12 | fifty two,11,yes,"here we go","Meet me in Montauk." 13 | something,12,yes,"here we go","Meet me in Montauk." 14 | "foo"" bar",13,yes,,"Meet me in Montauk." 15 | something,14,yes,"here we go","Meet me in Montauk." 16 | something,10,yes,"here we go","Meet me in Montauk." 17 | fifty two,11,yes,"here we go","Meet me in Montauk." 18 | something,12,yes,"here we go","Meet me in Montauk." 19 | "foo"" bar",13,yes,,"Meet me in Montauk." 20 | something,14,yes,"here we go","Meet me in Montauk." 21 | -------------------------------------------------------------------------------- /parsers-bench/data/csv-40.csv: -------------------------------------------------------------------------------- 1 | something,10,yes,"here we go","Meet me in Montauk." 2 | fifty two,11,yes,"here we go","Meet me in Montauk." 3 | something,12,yes,"here we go","Meet me in Montauk." 4 | "foo"" bar",13,yes,,"Meet me in Montauk." 5 | something,14,yes,"here we go","Meet me in Montauk." 6 | something,10,yes,"here we go","Meet me in Montauk." 7 | fifty two,11,yes,"here we go","Meet me in Montauk." 8 | something,12,yes,"here we go","Meet me in Montauk." 9 | "foo"" bar",13,yes,,"Meet me in Montauk." 10 | something,14,yes,"here we go","Meet me in Montauk." 11 | something,10,yes,"here we go","Meet me in Montauk." 12 | fifty two,11,yes,"here we go","Meet me in Montauk." 13 | something,12,yes,"here we go","Meet me in Montauk." 14 | "foo"" bar",13,yes,,"Meet me in Montauk." 15 | something,14,yes,"here we go","Meet me in Montauk." 16 | something,10,yes,"here we go","Meet me in Montauk." 17 | fifty two,11,yes,"here we go","Meet me in Montauk." 18 | something,12,yes,"here we go","Meet me in Montauk." 19 | "foo"" bar",13,yes,,"Meet me in Montauk." 20 | something,14,yes,"here we go","Meet me in Montauk." 21 | something,10,yes,"here we go","Meet me in Montauk." 22 | fifty two,11,yes,"here we go","Meet me in Montauk." 23 | something,12,yes,"here we go","Meet me in Montauk." 24 | "foo"" bar",13,yes,,"Meet me in Montauk." 25 | something,14,yes,"here we go","Meet me in Montauk." 26 | something,10,yes,"here we go","Meet me in Montauk." 27 | fifty two,11,yes,"here we go","Meet me in Montauk." 28 | something,12,yes,"here we go","Meet me in Montauk." 29 | "foo"" bar",13,yes,,"Meet me in Montauk." 30 | something,14,yes,"here we go","Meet me in Montauk." 31 | something,10,yes,"here we go","Meet me in Montauk." 32 | fifty two,11,yes,"here we go","Meet me in Montauk." 33 | something,12,yes,"here we go","Meet me in Montauk." 34 | "foo"" bar",13,yes,,"Meet me in Montauk." 35 | something,14,yes,"here we go","Meet me in Montauk." 36 | something,10,yes,"here we go","Meet me in Montauk." 37 | fifty two,11,yes,"here we go","Meet me in Montauk." 38 | something,12,yes,"here we go","Meet me in Montauk." 39 | "foo"" bar",13,yes,,"Meet me in Montauk." 40 | something,14,yes,"here we go","Meet me in Montauk." 41 | -------------------------------------------------------------------------------- /parsers-bench/data/csv-5.csv: -------------------------------------------------------------------------------- 1 | something,10,yes,"here we go","Meet me in Montauk." 2 | fifty two,11,yes,"here we go","Meet me in Montauk." 3 | something,12,yes,"here we go","Meet me in Montauk." 4 | "foo"" bar",13,yes,,"Meet me in Montauk." 5 | something,14,yes,"here we go","Meet me in Montauk." 6 | -------------------------------------------------------------------------------- /parsers-bench/data/json-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "something": 5, 3 | "another_thing": false, 4 | "price": { "min": 200, "max": 400 }, 5 | "something": 5, 6 | "another_thing": false, 7 | "price": { "min": 200, "max": 400 }, 8 | "something": 5, 9 | "another_thing": false 10 | } 11 | -------------------------------------------------------------------------------- /parsers-bench/data/json-20.json: -------------------------------------------------------------------------------- 1 | { 2 | "something": 5, 3 | "another_thing": false, 4 | "price": { "min": 200, "max": 400 }, 5 | "something": 5, 6 | "another_thing": false, 7 | "price": { "min": 200, "max": 400 }, 8 | "something": 5, 9 | "another_thing": false, 10 | "price": { "min": 200, "max": 400 }, 11 | "something": 5.0, 12 | "another_thing": true, 13 | "price": { "min": 200, "max": 400 }, 14 | "something": 533, 15 | "another_thing": [1,2,3], 16 | "price": { "min": 200, "max": 400 }, 17 | "something": 5, 18 | "another_thing": false, 19 | "direction": "north-east" 20 | } 21 | -------------------------------------------------------------------------------- /parsers-bench/data/json-40.json: -------------------------------------------------------------------------------- 1 | { 2 | "something": 5, 3 | "another_thing": false, 4 | "price": { "min": 200, "max": 400 }, 5 | "something": 5, 6 | "another_thing": false, 7 | "price": { "min": 200, "max": 400 }, 8 | "something": 5, 9 | "another_thing": false, 10 | "price": { "min": 200, "max": 400 }, 11 | "something": 5.0, 12 | "another_thing": true, 13 | "price": { "min": 200, "max": 400 }, 14 | "something": 533, 15 | "another_thing": [1,2,3], 16 | "price": { "min": 200, "max": 400 }, 17 | "something": 5, 18 | "another_thing": false, 19 | "direction": "north-east", 20 | "something": 5, 21 | "another_thing": false, 22 | "price": { "min": 200, "max": 400 }, 23 | "something": 5, 24 | "another_thing": false, 25 | "price": { "min": 200, "max": 400 }, 26 | "something": 5, 27 | "another_thing": false, 28 | "price": { "min": 200, "max": 400 }, 29 | "something": 5.0, 30 | "another_thing": true, 31 | "price": { "min": 200, "max": 400 }, 32 | "something": 533, 33 | "another_thing": [1,2,3], 34 | "price": { "min": 200, "max": 400 }, 35 | "something": 5, 36 | "another_thing": false, 37 | "direction": "north-east", 38 | "something": 5, 39 | "direction": "north-east" 40 | } 41 | -------------------------------------------------------------------------------- /parsers-bench/data/json-5.json: -------------------------------------------------------------------------------- 1 | { 2 | "something": 5, 3 | "another_thing": false, 4 | "price": { "min": 200, "max": 400 } 5 | } 6 | -------------------------------------------------------------------------------- /parsers-bench/data/log-10.log: -------------------------------------------------------------------------------- 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard 2 | 2013-06-29 11:32:12 212.141.23.67 mouse 3 | 2013-06-29 11:33:08 212.141.23.67 monitor 4 | 2013-06-29 12:12:34 125.80.32.31 speakers 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard 7 | 2013-06-29 11:32:12 212.141.23.67 mouse 8 | 2013-06-29 11:33:08 212.141.23.67 monitor 9 | 2013-06-29 12:12:34 125.80.32.31 speakers 10 | 2013-06-29 12:51:50 101.40.50.62 keyboard 11 | -------------------------------------------------------------------------------- /parsers-bench/data/log-20.log: -------------------------------------------------------------------------------- 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard 2 | 2013-06-29 11:32:12 212.141.23.67 mouse 3 | 2013-06-29 11:33:08 212.141.23.67 monitor 4 | 2013-06-29 12:12:34 125.80.32.31 speakers 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard 7 | 2013-06-29 11:32:12 212.141.23.67 mouse 8 | 2013-06-29 11:33:08 212.141.23.67 monitor 9 | 2013-06-29 12:12:34 125.80.32.31 speakers 10 | 2013-06-29 12:51:50 101.40.50.62 keyboard 11 | 2013-06-29 11:16:23 124.67.34.60 keyboard 12 | 2013-06-29 11:32:12 212.141.23.67 mouse 13 | 2013-06-29 11:33:08 212.141.23.67 monitor 14 | 2013-06-29 12:12:34 125.80.32.31 speakers 15 | 2013-06-29 12:51:50 101.40.50.62 keyboard 16 | 2013-06-29 11:16:23 124.67.34.60 keyboard 17 | 2013-06-29 11:32:12 212.141.23.67 mouse 18 | 2013-06-29 11:33:08 212.141.23.67 monitor 19 | 2013-06-29 12:12:34 125.80.32.31 speakers 20 | 2013-06-29 12:51:50 101.40.50.62 keyboard 21 | -------------------------------------------------------------------------------- /parsers-bench/data/log-40.log: -------------------------------------------------------------------------------- 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard 2 | 2013-06-29 11:32:12 212.141.23.67 mouse 3 | 2013-06-29 11:33:08 212.141.23.67 monitor 4 | 2013-06-29 12:12:34 125.80.32.31 speakers 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard 7 | 2013-06-29 11:32:12 212.141.23.67 mouse 8 | 2013-06-29 11:33:08 212.141.23.67 monitor 9 | 2013-06-29 12:12:34 125.80.32.31 speakers 10 | 2013-06-29 12:51:50 101.40.50.62 keyboard 11 | 2013-06-29 11:16:23 124.67.34.60 keyboard 12 | 2013-06-29 11:32:12 212.141.23.67 mouse 13 | 2013-06-29 11:33:08 212.141.23.67 monitor 14 | 2013-06-29 12:12:34 125.80.32.31 speakers 15 | 2013-06-29 12:51:50 101.40.50.62 keyboard 16 | 2013-06-29 11:16:23 124.67.34.60 keyboard 17 | 2013-06-29 11:32:12 212.141.23.67 mouse 18 | 2013-06-29 11:33:08 212.141.23.67 monitor 19 | 2013-06-29 12:12:34 125.80.32.31 speakers 20 | 2013-06-29 12:51:50 101.40.50.62 keyboard 21 | 2013-06-29 11:16:23 124.67.34.60 keyboard 22 | 2013-06-29 11:32:12 212.141.23.67 mouse 23 | 2013-06-29 11:33:08 212.141.23.67 monitor 24 | 2013-06-29 12:12:34 125.80.32.31 speakers 25 | 2013-06-29 12:51:50 101.40.50.62 keyboard 26 | 2013-06-29 11:16:23 124.67.34.60 keyboard 27 | 2013-06-29 11:32:12 212.141.23.67 mouse 28 | 2013-06-29 11:33:08 212.141.23.67 monitor 29 | 2013-06-29 12:12:34 125.80.32.31 speakers 30 | 2013-06-29 12:51:50 101.40.50.62 keyboard 31 | 2013-06-29 11:16:23 124.67.34.60 keyboard 32 | 2013-06-29 11:32:12 212.141.23.67 mouse 33 | 2013-06-29 11:33:08 212.141.23.67 monitor 34 | 2013-06-29 12:12:34 125.80.32.31 speakers 35 | 2013-06-29 12:51:50 101.40.50.62 keyboard 36 | 2013-06-29 11:16:23 124.67.34.60 keyboard 37 | 2013-06-29 11:32:12 212.141.23.67 mouse 38 | 2013-06-29 11:33:08 212.141.23.67 monitor 39 | 2013-06-29 12:12:34 125.80.32.31 speakers 40 | 2013-06-29 12:51:50 101.40.50.62 keyboard 41 | -------------------------------------------------------------------------------- /parsers-bench/data/log-5.log: -------------------------------------------------------------------------------- 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard 2 | 2013-06-29 11:32:12 212.141.23.67 mouse 3 | 2013-06-29 11:33:08 212.141.23.67 monitor 4 | 2013-06-29 12:12:34 125.80.32.31 speakers 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard 6 | -------------------------------------------------------------------------------- /parsers-bench/parsers-bench.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: >=1.18 2 | name: parsers-bench 3 | version: 0.1.0 4 | license: BSD3 5 | maintainer: Mark Karpov 6 | author: Mark Karpov 7 | tested-with: ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1 8 | homepage: https://github.com/mrkkrp/megaparsec 9 | bug-reports: https://github.com/mrkkrp/megaparsec/issues 10 | synopsis: Performance benchmarks: Megaparsec vs Attoparsec 11 | description: Performance benchmarks: Megaparsec vs Attoparsec. 12 | category: Parsing, Benchmark 13 | build-type: Simple 14 | extra-doc-files: README.md 15 | 16 | source-repository head 17 | type: git 18 | location: https://github.com/mrkkrp/parsers-bench.git 19 | 20 | flag dev 21 | description: Turn on development settings. 22 | default: False 23 | manual: True 24 | 25 | library 26 | exposed-modules: 27 | ParsersBench.CSV.Attoparsec 28 | ParsersBench.CSV.Megaparsec 29 | ParsersBench.Json.Attoparsec 30 | ParsersBench.Json.Common 31 | ParsersBench.Json.Megaparsec 32 | ParsersBench.Log.Attoparsec 33 | ParsersBench.Log.Common 34 | ParsersBench.Log.Megaparsec 35 | 36 | default-language: Haskell2010 37 | build-depends: 38 | base -any, 39 | attoparsec -any, 40 | bytestring -any, 41 | deepseq -any, 42 | megaparsec -any, 43 | parser-combinators -any, 44 | scientific -any, 45 | text -any, 46 | time -any, 47 | unordered-containers -any, 48 | vector -any 49 | 50 | if flag(dev) 51 | ghc-options: -Wall -Werror 52 | 53 | else 54 | ghc-options: -O2 -Wall 55 | 56 | benchmark bench-speed 57 | type: exitcode-stdio-1.0 58 | main-is: Main.hs 59 | hs-source-dirs: bench/speed 60 | default-language: Haskell2010 61 | build-depends: 62 | base -any, 63 | bytestring -any, 64 | criterion -any, 65 | deepseq -any, 66 | parsers-bench -any 67 | 68 | if flag(dev) 69 | ghc-options: -O2 -Wall -Werror 70 | 71 | else 72 | ghc-options: -O2 -Wall 73 | 74 | benchmark bench-memory 75 | type: exitcode-stdio-1.0 76 | main-is: Main.hs 77 | hs-source-dirs: bench/memory 78 | default-language: Haskell2010 79 | build-depends: 80 | base -any, 81 | bytestring -any, 82 | deepseq -any, 83 | parsers-bench -any, 84 | weigh -any 85 | 86 | if flag(dev) 87 | ghc-options: -O2 -Wall -Werror 88 | 89 | else 90 | ghc-options: -O2 -Wall 91 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? (import ./nix/nixpkgs) }: 2 | 3 | (import ./default.nix { inherit pkgs; }).shell 4 | --------------------------------------------------------------------------------