├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ci.yaml
├── .gitignore
├── CHANGELOG.md
├── HACKING.md
├── LICENSE.md
├── README.md
├── Text
    ├── Megaparsec.hs
    └── Megaparsec
    │   ├── Byte.hs
    │   ├── Byte
    │       ├── Binary.hs
    │       └── Lexer.hs
    │   ├── Char.hs
    │   ├── Char
    │       └── Lexer.hs
    │   ├── Class.hs
    │   ├── Common.hs
    │   ├── Debug.hs
    │   ├── Error.hs
    │   ├── Error.hs-boot
    │   ├── Error
    │       └── Builder.hs
    │   ├── Internal.hs
    │   ├── Internal.hs-boot
    │   ├── Lexer.hs
    │   ├── Pos.hs
    │   ├── State.hs
    │   ├── Stream.hs
    │   └── Unicode.hs
├── bench
    ├── memory
    │   └── Main.hs
    └── speed
    │   └── Main.hs
├── cabal.project
├── flake.lock
├── flake.nix
├── megaparsec-tests
    ├── LICENSE.md
    ├── README.md
    ├── megaparsec-tests.cabal
    ├── src
    │   └── Test
    │   │   └── Hspec
    │   │       └── Megaparsec
    │   │           └── AdHoc.hs
    └── tests
    │   ├── Spec.hs
    │   └── Text
    │       ├── Megaparsec
    │           ├── Byte
    │           │   ├── BinarySpec.hs
    │           │   └── LexerSpec.hs
    │           ├── ByteSpec.hs
    │           ├── Char
    │           │   └── LexerSpec.hs
    │           ├── CharSpec.hs
    │           ├── DebugSpec.hs
    │           ├── ErrorSpec.hs
    │           ├── PosSpec.hs
    │           ├── StreamSpec.hs
    │           └── UnicodeSpec.hs
    │       └── MegaparsecSpec.hs
├── megaparsec.cabal
├── parsers-bench
    ├── ParsersBench
    │   ├── CSV
    │   │   ├── Attoparsec.hs
    │   │   └── Megaparsec.hs
    │   ├── Json
    │   │   ├── Attoparsec.hs
    │   │   ├── Common.hs
    │   │   └── Megaparsec.hs
    │   └── Log
    │   │   ├── Attoparsec.hs
    │   │   ├── Common.hs
    │   │   └── Megaparsec.hs
    ├── README.md
    ├── bench
    │   ├── memory
    │   │   └── Main.hs
    │   └── speed
    │   │   └── Main.hs
    ├── data
    │   ├── csv-10.csv
    │   ├── csv-20.csv
    │   ├── csv-40.csv
    │   ├── csv-5.csv
    │   ├── json-10.json
    │   ├── json-20.json
    │   ├── json-40.json
    │   ├── json-5.json
    │   ├── log-10.log
    │   ├── log-20.log
    │   ├── log-40.log
    │   └── log-5.log
    └── parsers-bench.cabal
└── shell.nix


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     types:
 8 |       - opened
 9 |       - synchronize
10 | jobs:
11 |   ormolu:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: haskell-actions/run-ormolu@v15
16 |   build:
17 |     runs-on: ubuntu-latest
18 |     needs: ormolu
19 |     strategy:
20 |       matrix:
21 |         cabal: ["3.12"]
22 |         ghc:   ["9.8.4", "9.10.1", "9.12.1"]
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |       - uses: haskell-actions/setup@v2
26 |         id: setup-haskell-cabal
27 |         with:
28 |           ghc-version: ${{ matrix.ghc }}
29 |           cabal-version: ${{ matrix.cabal }}
30 |       - run: cabal update
31 |       - run: cabal freeze
32 |       - uses: actions/cache@v4.0.0
33 |         with:
34 |           path: |
35 |             ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
36 |             dist-newstyle
37 |           key: ${{ runner.os }}-${{ matrix.ghc }}-0-${{ hashFiles('cabal.project.freeze') }}
38 |           restore-keys: |
39 |             ${{ runner.os }}-${{ matrix.ghc }}-0-
40 |       - run: cabal format
41 |       - run: pushd megaparsec-tests && cabal format && popd
42 |       - run: git diff --exit-code --color=always
43 |       - run: cabal build all
44 |       - run: cabal test all
45 |       - run: cabal haddock megaparsec
46 |       - run: cabal haddock megaparsec-tests
47 |       - run: cabal sdist
48 |       - run: pushd megaparsec-tests && cabal sdist && popd
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *#
 2 | *.aux
 3 | *.chi
 4 | *.chs.h
 5 | *.dyn_hi
 6 | *.dyn_o
 7 | *.eventlog
 8 | *.hi
 9 | *.hp
10 | *.o
11 | *.prof
12 | *~
13 | .HTF/
14 | .cabal-sandbox/
15 | .ghc.environment.*
16 | .hpc
17 | .hsenv
18 | .stack-work/
19 | .virtualenv
20 | .vscode
21 | TAGS
22 | benchmarks.tix
23 | cabal-dev
24 | cabal.config
25 | cabal.project.local
26 | cabal.sandbox.config
27 | dist-*/
28 | dist/
29 | hie.yaml
30 | stack.yaml
31 | stack.yaml.lock
32 | 


--------------------------------------------------------------------------------
/HACKING.md:
--------------------------------------------------------------------------------
  1 | # Hacking
  2 | 
  3 | * [Development with `ghcid`](#development-with-ghcid)
  4 | * [Running unit tests](#running-unit-tests)
  5 | * [Checking dependent packages](#checking-dependent-packages)
  6 | * [Benchmarks](#benchmarks)
  7 | * [Releasing a new version](#releasing-a-new-version)
  8 | 
  9 | This document tries to describe everything you need to know to
 10 | develop/maintain Megaparsec.
 11 | 
 12 | ## Development with `ghcid`
 13 | 
 14 | We use `nix` for development. First enter the Nix shell:
 15 | 
 16 | ```console
 17 | $ nix develop
 18 | ```
 19 | 
 20 | Inside the shell you can:
 21 | 
 22 | * Build the `megaparsec` and `megaparsec-tests` packages with `cabal build
 23 |   all`.
 24 | 
 25 | * Run tests from the `megaparsec-tests` package with `cabal test all`.
 26 | 
 27 | * Run `ghcid` for interactive feedback as you edit with `ghcid
 28 |   --command="cabal repl megaparsec"` or `ghcid --command="cabal repl
 29 |   megaparsec-tests --enable-tests"` depending on the package you're editing.
 30 | 
 31 | ## Running unit tests
 32 | 
 33 | The tests in `megaparsec-tests` are usually not enough to gain confidence in
 34 | non-trivial changes. It is wise to use tests from other packages:
 35 | 
 36 | ```console
 37 | $ nix build .#all_base --no-link
 38 | ```
 39 | 
 40 | The `base` group includes building and testing the following packages:
 41 | 
 42 | * `megaparsec`
 43 | * `hspec-megaparsec`
 44 | * `megaparsec-tests`
 45 | * `parser-combinators-tests`
 46 | 
 47 | It is worth noting that individual derivations from `base` can be built like
 48 | this:
 49 | 
 50 | ```console
 51 | $ nix build .#base/parser-combinators-tests --no-link
 52 | ```
 53 | 
 54 | ## Checking dependent packages
 55 | 
 56 | To find out how your changes affect a selected set of dependent packages do:
 57 | 
 58 | ```console
 59 | $ nix build .#all_deps --no-link
 60 | ```
 61 | 
 62 | The “selected set” includes packages that are known to be high-quality,
 63 | well-tested, and non-trivial, so they are good targets for this sort of
 64 | testing. You can also try to build and test a particular package like this:
 65 | 
 66 | ```console
 67 | $ nix build .#deps/mmark --no-link
 68 | ```
 69 | 
 70 | When you introduce a breaking change, some packages may stop compiling.
 71 | Usually it's easy enough to create a patch and make it compile with the
 72 | current dev version of Megaparsec. To do so, follow these steps:
 73 | 
 74 | * Start by cloning the repo of the failing package.
 75 | 
 76 | * Checkout commit that corresponds to the version our current `nixpkgs`
 77 |   packages use.
 78 | 
 79 | * Attempt to compile the package with current dev version of Megaparsec to
 80 |   reproduce the build errors. Often, if the broken package uses stack you
 81 |   can just add the path to the updated Megaparsec directory to the
 82 |   `extra-deps` section.
 83 | 
 84 | * Perform whatever changes that are necessary to make the package work.
 85 | 
 86 | * Use `git diff > my-package.patch` or `git diff --cached >
 87 |   my-package.patch` to create the patch file. The first command will output
 88 |   unstaged changes, while the second command will write only staged changes.
 89 | 
 90 | * Adjust `default.nix` to apply the patch. You want to edit the `deps`
 91 |   attribute set. For example:
 92 | 
 93 |   ```nix
 94 |   # Dependent packages of interest:
 95 |   deps = {
 96 |     # ...
 97 |     idris = patch haskellPackages.idris ./nix/patches/idris.patch;
 98 |   };
 99 |   ```
100 | 
101 | ## Benchmarks
102 | 
103 | To build all benchmarks run:
104 | 
105 | ```console
106 | $ nix build .#all_benches
107 | ```
108 | 
109 | This will create several symlinks in `result`. It is also possible to build
110 | benchmarks for just a specific package:
111 | 
112 | ```console
113 | $ nix build .#benches/megaparsec # builds megaparsec's microbenchmarks
114 | ```
115 | 
116 | `cd` to the `bench` sub-directory and run benchmarks from there because some
117 | benchmarks need data to run on and the paths are relative, so it'll fail if
118 | run from the root of Megaparsec's repo.
119 | 
120 | ## Releasing a new version
121 | 
122 | To release a new version of Megaparsec, follow these steps:
123 | 
124 | * Bump version for both `megaparsec` and `megaparsec-tests`. In the
125 |   `megaparsec.cabal` file update the version of the package. In the
126 |   `megaparsec-tests.cabal` file update the version of the package (we keep
127 |   it in sync with version of `megaparsec`) as well as the versions of
128 |   `megaparsec` it depends on.
129 | 
130 | * Create git tag and push it to the repo.
131 | 
132 | * Generate distribution tarballs by running:
133 | 
134 |   ```console
135 |   $ nix build .#all_dist
136 |   ```
137 | 
138 |   This will create `result` with two symlinks to directories containing the
139 |   tarballs. Typically they are called `result/megaparsec-source-*` and
140 |   `result/megaparsec-tests-source-*`.
141 | 
142 | * To upload the tarballs to Hackage, execute the following:
143 | 
144 |   ```console
145 |   $ cabal upload --publish result/megaparsec-source-*/megaparsec-*.tar.gz
146 |   $ cabal upload --publish result/megaparsec-tests-source-*/megaparsec-tests-*.tar.gz
147 |   ```
148 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright © 2015–present Megaparsec contributors\
 2 | Copyright © 2007 Paolo Martini\
 3 | Copyright © 1999–2000 Daan Leijen
 4 | 
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice,
11 |   this list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
18 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
20 | NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Byte.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Safe #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# LANGUAGE TypeFamilies #-}
  4 | {-# LANGUAGE TypeOperators #-}
  5 | 
  6 | -- |
  7 | -- Module      :  Text.Megaparsec.Byte
  8 | -- Copyright   :  © 2015–present Megaparsec contributors
  9 | -- License     :  FreeBSD
 10 | --
 11 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 12 | -- Stability   :  experimental
 13 | -- Portability :  portable
 14 | --
 15 | -- Commonly used binary parsers.
 16 | --
 17 | -- @since 6.0.0
 18 | module Text.Megaparsec.Byte
 19 |   ( -- * Simple parsers
 20 |     newline,
 21 |     crlf,
 22 |     eol,
 23 |     tab,
 24 |     space,
 25 |     hspace,
 26 |     space1,
 27 |     hspace1,
 28 | 
 29 |     -- * Categories of characters
 30 |     controlChar,
 31 |     spaceChar,
 32 |     upperChar,
 33 |     lowerChar,
 34 |     letterChar,
 35 |     alphaNumChar,
 36 |     printChar,
 37 |     digitChar,
 38 |     binDigitChar,
 39 |     octDigitChar,
 40 |     hexDigitChar,
 41 |     asciiChar,
 42 | 
 43 |     -- * Single byte
 44 |     char,
 45 |     char',
 46 | 
 47 |     -- * Sequence of bytes
 48 |     string,
 49 |     string',
 50 |   )
 51 | where
 52 | 
 53 | import Control.Applicative
 54 | import Data.Char hiding (isSpace, toLower, toUpper)
 55 | import Data.Functor (void)
 56 | import Data.Proxy
 57 | import Data.Word (Word8)
 58 | import Text.Megaparsec
 59 | import Text.Megaparsec.Common
 60 | 
 61 | ----------------------------------------------------------------------------
 62 | -- Simple parsers
 63 | 
 64 | -- | Parse a newline byte.
 65 | newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
 66 | newline = char 10
 67 | {-# INLINE newline #-}
 68 | 
 69 | -- | Parse a carriage return character followed by a newline character.
 70 | -- Return the sequence of characters parsed.
 71 | crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
 72 | crlf = string (tokensToChunk (Proxy :: Proxy s) [13, 10])
 73 | {-# INLINE crlf #-}
 74 | 
 75 | -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
 76 | -- sequence of characters parsed.
 77 | eol :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
 78 | eol =
 79 |   (tokenToChunk (Proxy :: Proxy s) <$> newline)
 80 |     <|> crlf
 81 |     <?> "end of line"
 82 | {-# INLINE eol #-}
 83 | 
 84 | -- | Parse a tab character.
 85 | tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
 86 | tab = char 9
 87 | {-# INLINE tab #-}
 88 | 
 89 | -- | Skip /zero/ or more white space characters.
 90 | --
 91 | -- See also: 'skipMany' and 'spaceChar'.
 92 | space :: (MonadParsec e s m, Token s ~ Word8) => m ()
 93 | space = void $ takeWhileP (Just "white space") isSpace
 94 | {-# INLINE space #-}
 95 | 
 96 | -- | Like 'space', but does not accept newlines and carriage returns.
 97 | --
 98 | -- @since 9.0.0
 99 | hspace :: (MonadParsec e s m, Token s ~ Word8) => m ()
100 | hspace = void $ takeWhileP (Just "white space") isHSpace
101 | {-# INLINE hspace #-}
102 | 
103 | -- | Skip /one/ or more white space characters.
104 | --
105 | -- See also: 'skipSome' and 'spaceChar'.
106 | space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
107 | space1 = void $ takeWhile1P (Just "white space") isSpace
108 | {-# INLINE space1 #-}
109 | 
110 | -- | Like 'space1', but does not accept newlines and carriage returns.
111 | --
112 | -- @since 9.0.0
113 | hspace1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
114 | hspace1 = void $ takeWhile1P (Just "white space") isHSpace
115 | {-# INLINE hspace1 #-}
116 | 
117 | ----------------------------------------------------------------------------
118 | -- Categories of characters
119 | 
120 | -- | Parse a control character.
121 | controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
122 | controlChar = satisfy (isControl . toChar) <?> "control character"
123 | {-# INLINE controlChar #-}
124 | 
125 | -- | Parse a space character, and the control characters: tab, newline,
126 | -- carriage return, form feed, and vertical tab.
127 | spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
128 | spaceChar = satisfy isSpace <?> "white space"
129 | {-# INLINE spaceChar #-}
130 | 
131 | -- | Parse an upper-case character.
132 | upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
133 | upperChar = satisfy (isUpper . toChar) <?> "uppercase letter"
134 | {-# INLINE upperChar #-}
135 | 
136 | -- | Parse a lower-case alphabetic character.
137 | lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
138 | lowerChar = satisfy (isLower . toChar) <?> "lowercase letter"
139 | {-# INLINE lowerChar #-}
140 | 
141 | -- | Parse an alphabetic character: lower-case or upper-case.
142 | letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
143 | letterChar = satisfy (isLetter . toChar) <?> "letter"
144 | {-# INLINE letterChar #-}
145 | 
146 | -- | Parse an alphabetic or digit characters.
147 | alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
148 | alphaNumChar = satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
149 | {-# INLINE alphaNumChar #-}
150 | 
151 | -- | Parse a printable character: letter, number, mark, punctuation, symbol
152 | -- or space.
153 | printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
154 | printChar = satisfy (isPrint . toChar) <?> "printable character"
155 | {-# INLINE printChar #-}
156 | 
157 | -- | Parse an ASCII digit, i.e between “0” and “9”.
158 | digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
159 | digitChar = satisfy isDigit' <?> "digit"
160 |   where
161 |     isDigit' x = x >= 48 && x <= 57
162 | {-# INLINE digitChar #-}
163 | 
164 | -- | Parse a binary digit, i.e. “0” or “1”.
165 | --
166 | -- @since 7.0.0
167 | binDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
168 | binDigitChar = satisfy isBinDigit <?> "binary digit"
169 |   where
170 |     isBinDigit x = x == 48 || x == 49
171 | {-# INLINE binDigitChar #-}
172 | 
173 | -- | Parse an octal digit, i.e. between “0” and “7”.
174 | octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
175 | octDigitChar = satisfy isOctDigit' <?> "octal digit"
176 |   where
177 |     isOctDigit' x = x >= 48 && x <= 55
178 | {-# INLINE octDigitChar #-}
179 | 
180 | -- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
181 | -- “A” and “F”.
182 | hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
183 | hexDigitChar = satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
184 | {-# INLINE hexDigitChar #-}
185 | 
186 | -- | Parse a character from the first 128 characters of the Unicode
187 | -- character set, corresponding to the ASCII character set.
188 | asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
189 | asciiChar = satisfy (< 128) <?> "ASCII character"
190 | {-# INLINE asciiChar #-}
191 | 
192 | ----------------------------------------------------------------------------
193 | -- Single byte
194 | 
195 | -- | A type-constrained version of 'single'.
196 | --
197 | -- > newline = char 10
198 | char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
199 | char = single
200 | {-# INLINE char #-}
201 | 
202 | -- | The same as 'char' but case-insensitive. This parser returns the
203 | -- actually parsed character preserving its case.
204 | --
205 | -- >>> parseTest (char' 101) "E"
206 | -- 69 -- 'E'
207 | -- >>> parseTest (char' 101) "G"
208 | -- 1:1:
209 | -- unexpected 'G'
210 | -- expecting 'E' or 'e'
211 | char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
212 | char' c =
213 |   choice
214 |     [ char (toLower c),
215 |       char (toUpper c)
216 |     ]
217 | {-# INLINE char' #-}
218 | 
219 | ----------------------------------------------------------------------------
220 | -- Helpers
221 | 
222 | -- | 'Word8'-specialized version of 'Data.Char.isSpace'.
223 | isSpace :: Word8 -> Bool
224 | isSpace x
225 |   | x >= 9 && x <= 13 = True
226 |   | x == 32 = True
227 |   | x == 160 = True
228 |   | otherwise = False
229 | {-# INLINE isSpace #-}
230 | 
231 | -- | Like 'isSpace', but does not accept newlines and carriage returns.
232 | isHSpace :: Word8 -> Bool
233 | isHSpace x
234 |   | x == 9 = True
235 |   | x == 11 = True
236 |   | x == 12 = True
237 |   | x == 32 = True
238 |   | x == 160 = True
239 |   | otherwise = False
240 | {-# INLINE isHSpace #-}
241 | 
242 | -- | Convert a byte to char.
243 | toChar :: Word8 -> Char
244 | toChar = chr . fromIntegral
245 | {-# INLINE toChar #-}
246 | 
247 | -- | Convert a byte to its upper-case version.
248 | toUpper :: Word8 -> Word8
249 | toUpper x
250 |   | x >= 97 && x <= 122 = x - 32
251 |   | x == 247 = x -- division sign
252 |   | x == 255 = x -- latin small letter y with diaeresis
253 |   | x >= 224 = x - 32
254 |   | otherwise = x
255 | {-# INLINE toUpper #-}
256 | 
257 | -- | Convert a byte to its lower-case version.
258 | toLower :: Word8 -> Word8
259 | toLower x
260 |   | x >= 65 && x <= 90 = x + 32
261 |   | x == 215 = x -- multiplication sign
262 |   | x >= 192 && x <= 222 = x + 32
263 |   | otherwise = x
264 | {-# INLINE toLower #-}
265 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Byte/Binary.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE AllowAmbiguousTypes #-}
  2 | {-# LANGUAGE FlexibleContexts #-}
  3 | {-# LANGUAGE Safe #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE TypeApplications #-}
  6 | {-# LANGUAGE TypeFamilies #-}
  7 | 
  8 | -- |
  9 | -- Module      :  Text.Megaparsec.Byte.Binary
 10 | -- Copyright   :  © 2021–present Megaparsec contributors
 11 | -- License     :  FreeBSD
 12 | --
 13 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 14 | -- Stability   :  experimental
 15 | -- Portability :  portable
 16 | --
 17 | -- Binary-format number parsers.
 18 | --
 19 | -- @since 9.2.0
 20 | module Text.Megaparsec.Byte.Binary
 21 |   ( -- * Generic parsers
 22 |     BinaryChunk (..),
 23 |     anyLE,
 24 |     anyBE,
 25 | 
 26 |     -- * Parsing unsigned values
 27 |     word8,
 28 |     word16le,
 29 |     word16be,
 30 |     word32le,
 31 |     word32be,
 32 |     word64le,
 33 |     word64be,
 34 | 
 35 |     -- * Parsing signed values
 36 |     int8,
 37 |     int16le,
 38 |     int16be,
 39 |     int32le,
 40 |     int32be,
 41 |     int64le,
 42 |     int64be,
 43 |   )
 44 | where
 45 | 
 46 | import Data.Bits
 47 | import qualified Data.ByteString as B
 48 | import qualified Data.ByteString.Lazy as BL
 49 | import Data.Int
 50 | import Data.Word
 51 | import Text.Megaparsec
 52 | 
 53 | -- | Data types that can be converted to little- or big- endian numbers.
 54 | class BinaryChunk chunk where
 55 |   convertChunkBE :: (Bits a, Num a) => chunk -> a
 56 |   convertChunkLE :: (Bits a, Num a) => chunk -> a
 57 | 
 58 | instance BinaryChunk B.ByteString where
 59 |   convertChunkBE = B.foldl' go 0
 60 |     where
 61 |       go acc byte = (acc `unsafeShiftL` 8) .|. fromIntegral byte
 62 |   convertChunkLE = B.foldl' go 0
 63 |     where
 64 |       go acc byte = (acc .|. fromIntegral byte) `rotateR` 8
 65 | 
 66 | instance BinaryChunk BL.ByteString where
 67 |   convertChunkBE = BL.foldl' go 0
 68 |     where
 69 |       go acc byte = (acc `unsafeShiftL` 8) .|. fromIntegral byte
 70 |   convertChunkLE = BL.foldl' go 0
 71 |     where
 72 |       go acc byte = (acc .|. fromIntegral byte) `rotateR` 8
 73 | 
 74 | ----------------------------------------------------------------------------
 75 | -- Generic parsers
 76 | 
 77 | -- | Parse a little-endian number.
 78 | --
 79 | -- You may wish to call this with a visible type application:
 80 | --
 81 | -- > number <- anyLE (Just "little-endian 32 bit word") @Word32
 82 | anyLE ::
 83 |   forall a e s m.
 84 |   (MonadParsec e s m, FiniteBits a, Num a, BinaryChunk (Tokens s)) =>
 85 |   -- | Label, if any
 86 |   Maybe String ->
 87 |   m a
 88 | anyLE mlabel = convertChunkLE <$> takeP mlabel (finiteByteSize @a)
 89 | {-# INLINE anyLE #-}
 90 | 
 91 | -- | Parse a big-endian number.
 92 | --
 93 | -- You may wish to call this with a visible type application:
 94 | --
 95 | -- > number <- anyBE (Just "big-endian 32 bit word") @Word32
 96 | anyBE ::
 97 |   forall a e s m.
 98 |   (MonadParsec e s m, FiniteBits a, Num a, BinaryChunk (Tokens s)) =>
 99 |   -- | Label, if any
100 |   Maybe String ->
101 |   m a
102 | anyBE mlabel = convertChunkBE <$> takeP mlabel (finiteByteSize @a)
103 | {-# INLINE anyBE #-}
104 | 
105 | --------------------------------------------------------------------------------
106 | -- Parsing unsigned values
107 | 
108 | -- | Parse a 'Word8'.
109 | word8 :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word8
110 | word8 = anyBE (Just "8 bit word")
111 | {-# INLINE word8 #-}
112 | 
113 | -- | Parse a little-endian 'Word16'.
114 | word16le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word16
115 | word16le = anyLE (Just "little-endian 16 bit word")
116 | {-# INLINE word16le #-}
117 | 
118 | -- | Parse a big-endian 'Word16'.
119 | word16be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word16
120 | word16be = anyBE (Just "big-endian 16 bit word")
121 | {-# INLINE word16be #-}
122 | 
123 | -- | Parse a little-endian 'Word32'.
124 | word32le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word32
125 | word32le = anyLE (Just "little-endian 32 bit word")
126 | {-# INLINE word32le #-}
127 | 
128 | -- | Parse a big-endian 'Word32'.
129 | word32be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word32
130 | word32be = anyBE (Just "big-endian 32 bit word")
131 | {-# INLINE word32be #-}
132 | 
133 | -- | Parse a little-endian 'Word64'.
134 | word64le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word64
135 | word64le = anyLE (Just "little-endian 64 word")
136 | {-# INLINE word64le #-}
137 | 
138 | -- | Parse a big-endian 'Word64'.
139 | word64be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Word64
140 | word64be = anyBE (Just "big-endian 64 word")
141 | {-# INLINE word64be #-}
142 | 
143 | ----------------------------------------------------------------------------
144 | -- Parsing signed values
145 | 
146 | -- | Parse a 'Int8'.
147 | int8 :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int8
148 | int8 = anyBE (Just "8 bit int")
149 | {-# INLINE int8 #-}
150 | 
151 | -- | Parse a little-endian 'Int16'.
152 | int16le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int16
153 | int16le = anyLE (Just "little-endian 16 bit int")
154 | {-# INLINE int16le #-}
155 | 
156 | -- | Parse a big-endian 'Int16'.
157 | int16be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int16
158 | int16be = anyBE (Just "big-endian 16 bit int")
159 | {-# INLINE int16be #-}
160 | 
161 | -- | Parse a little-endian 'Int32'.
162 | int32le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int32
163 | int32le = anyLE (Just "little-endian 32 bit int")
164 | {-# INLINE int32le #-}
165 | 
166 | -- | Parse a big-endian 'Int32'.
167 | int32be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int32
168 | int32be = anyBE (Just "big-endian 32 bit int")
169 | {-# INLINE int32be #-}
170 | 
171 | -- | Parse a little-endian 'Int64'.
172 | int64le :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int64
173 | int64le = anyLE (Just "little-endian 64 int")
174 | {-# INLINE int64le #-}
175 | 
176 | -- | Parse a big-endian 'Int64'.
177 | int64be :: (MonadParsec e s m, BinaryChunk (Tokens s)) => m Int64
178 | int64be = anyBE (Just "big-endian 64 int")
179 | {-# INLINE int64be #-}
180 | 
181 | --------------------------------------------------------------------------------
182 | -- Helpers
183 | 
184 | -- | Return the number of bytes in the argument.
185 | --
186 | -- Performs ceiling division, so byte-unaligned types (bitsize not a
187 | -- multiple of 8) should work, but further usage is not tested.
188 | finiteByteSize :: forall a. (FiniteBits a) => Int
189 | finiteByteSize = finiteBitSize @a undefined `ceilDiv` 8
190 |   where
191 |     ceilDiv x y = (x + y - 1) `div` y
192 | {-# INLINE finiteByteSize #-}
193 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Byte/Lexer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Safe #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# LANGUAGE TypeFamilies #-}
  4 | {-# LANGUAGE TypeOperators #-}
  5 | 
  6 | -- |
  7 | -- Module      :  Text.Megaparsec.Byte.Lexer
  8 | -- Copyright   :  © 2015–present Megaparsec contributors
  9 | -- License     :  FreeBSD
 10 | --
 11 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 12 | -- Stability   :  experimental
 13 | -- Portability :  portable
 14 | --
 15 | -- Stripped-down version of "Text.Megaparsec.Char.Lexer" for streams of
 16 | -- bytes.
 17 | --
 18 | -- This module is intended to be imported qualified:
 19 | --
 20 | -- > import qualified Text.Megaparsec.Byte.Lexer as L
 21 | module Text.Megaparsec.Byte.Lexer
 22 |   ( -- * White space
 23 |     space,
 24 |     lexeme,
 25 |     symbol,
 26 |     symbol',
 27 |     skipLineComment,
 28 |     skipBlockComment,
 29 |     skipBlockCommentNested,
 30 | 
 31 |     -- * Numbers
 32 |     decimal,
 33 |     binary,
 34 |     octal,
 35 |     hexadecimal,
 36 |     scientific,
 37 |     float,
 38 |     signed,
 39 |   )
 40 | where
 41 | 
 42 | import Control.Applicative
 43 | import Data.Functor (void)
 44 | import Data.List (foldl')
 45 | import Data.Proxy
 46 | import Data.Scientific (Scientific)
 47 | import qualified Data.Scientific as Sci
 48 | import Data.Word (Word8)
 49 | import Text.Megaparsec
 50 | import qualified Text.Megaparsec.Byte as B
 51 | import Text.Megaparsec.Lexer
 52 | 
 53 | ----------------------------------------------------------------------------
 54 | -- White space
 55 | 
 56 | -- | Given a comment prefix this function returns a parser that skips line
 57 | -- comments. Note that it stops just before the newline character but
 58 | -- doesn't consume the newline. Newline is either supposed to be consumed by
 59 | -- 'space' parser or picked up manually.
 60 | skipLineComment ::
 61 |   (MonadParsec e s m, Token s ~ Word8) =>
 62 |   -- | Line comment prefix
 63 |   Tokens s ->
 64 |   m ()
 65 | skipLineComment prefix =
 66 |   B.string prefix *> void (takeWhileP (Just "character") (/= 10))
 67 | {-# INLINEABLE skipLineComment #-}
 68 | 
 69 | -- | @'skipBlockComment' start end@ skips non-nested block comment starting
 70 | -- with @start@ and ending with @end@.
 71 | skipBlockComment ::
 72 |   (MonadParsec e s m) =>
 73 |   -- | Start of block comment
 74 |   Tokens s ->
 75 |   -- | End of block comment
 76 |   Tokens s ->
 77 |   m ()
 78 | skipBlockComment start end = p >> void (manyTill anySingle n)
 79 |   where
 80 |     p = B.string start
 81 |     n = B.string end
 82 | {-# INLINEABLE skipBlockComment #-}
 83 | 
 84 | -- | @'skipBlockCommentNested' start end@ skips possibly nested block
 85 | -- comment starting with @start@ and ending with @end@.
 86 | --
 87 | -- @since 5.0.0
 88 | skipBlockCommentNested ::
 89 |   (MonadParsec e s m, Token s ~ Word8) =>
 90 |   -- | Start of block comment
 91 |   Tokens s ->
 92 |   -- | End of block comment
 93 |   Tokens s ->
 94 |   m ()
 95 | skipBlockCommentNested start end = p >> void (manyTill e n)
 96 |   where
 97 |     e = skipBlockCommentNested start end <|> void anySingle
 98 |     p = B.string start
 99 |     n = B.string end
100 | {-# INLINEABLE skipBlockCommentNested #-}
101 | 
102 | ----------------------------------------------------------------------------
103 | -- Numbers
104 | 
105 | -- | Parse an integer in the decimal representation according to the format
106 | -- of integer literals described in the Haskell report.
107 | --
108 | -- If you need to parse signed integers, see the 'signed' combinator.
109 | --
110 | -- __Warning__: this function does not perform range checks.
111 | decimal ::
112 |   forall e s m a.
113 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
114 |   m a
115 | decimal = decimal_ <?> "integer"
116 | {-# INLINEABLE decimal #-}
117 | 
118 | -- | A non-public helper to parse decimal integers.
119 | decimal_ ::
120 |   forall e s m a.
121 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
122 |   m a
123 | decimal_ = mkNum <$> takeWhile1P (Just "digit") isDigit
124 |   where
125 |     mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
126 |     step a w = a * 10 + fromIntegral (w - 48)
127 | {-# INLINE decimal_ #-}
128 | 
129 | -- | Parse an integer in the binary representation. The binary number is
130 | -- expected to be a non-empty sequence of zeroes “0” and ones “1”.
131 | --
132 | -- You could of course parse some prefix before the actual number:
133 | --
134 | -- > binary = char 48 >> char' 98 >> L.binary
135 | --
136 | -- __Warning__: this function does not perform range checks.
137 | --
138 | -- @since 7.0.0
139 | binary ::
140 |   forall e s m a.
141 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
142 |   m a
143 | binary =
144 |   mkNum
145 |     <$> takeWhile1P Nothing isBinDigit
146 |     <?> "binary integer"
147 |   where
148 |     mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
149 |     step a w = a * 2 + fromIntegral (w - 48)
150 |     isBinDigit w = w == 48 || w == 49
151 | {-# INLINEABLE binary #-}
152 | 
153 | -- | Parse an integer in the octal representation. The format of the octal
154 | -- number is expected to be according to the Haskell report except for the
155 | -- fact that this parser doesn't parse “0o” or “0O” prefix. It is a
156 | -- responsibility of the programmer to parse correct prefix before parsing
157 | -- the number itself.
158 | --
159 | -- For example you can make it conform to the Haskell report like this:
160 | --
161 | -- > octal = char 48 >> char' 111 >> L.octal
162 | --
163 | -- __Warning__: this function does not perform range checks.
164 | octal ::
165 |   forall e s m a.
166 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
167 |   m a
168 | octal =
169 |   mkNum
170 |     <$> takeWhile1P Nothing isOctDigit
171 |     <?> "octal integer"
172 |   where
173 |     mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
174 |     step a w = a * 8 + fromIntegral (w - 48)
175 |     isOctDigit w = w - 48 < 8
176 | {-# INLINEABLE octal #-}
177 | 
178 | -- | Parse an integer in the hexadecimal representation. The format of the
179 | -- hexadecimal number is expected to be according to the Haskell report
180 | -- except for the fact that this parser doesn't parse “0x” or “0X” prefix.
181 | -- It is a responsibility of the programmer to parse correct prefix before
182 | -- parsing the number itself.
183 | --
184 | -- For example you can make it conform to the Haskell report like this:
185 | --
186 | -- > hexadecimal = char 48 >> char' 120 >> L.hexadecimal
187 | --
188 | -- __Warning__: this function does not perform range checks.
189 | hexadecimal ::
190 |   forall e s m a.
191 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
192 |   m a
193 | hexadecimal =
194 |   mkNum
195 |     <$> takeWhile1P Nothing isHexDigit
196 |     <?> "hexadecimal integer"
197 |   where
198 |     mkNum = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
199 |     step a w
200 |       | w >= 48 && w <= 57 = a * 16 + fromIntegral (w - 48)
201 |       | w >= 97 = a * 16 + fromIntegral (w - 87)
202 |       | otherwise = a * 16 + fromIntegral (w - 55)
203 |     isHexDigit w =
204 |       (w >= 48 && w <= 57)
205 |         || (w >= 97 && w <= 102)
206 |         || (w >= 65 && w <= 70)
207 | {-# INLINEABLE hexadecimal #-}
208 | 
209 | -- | Parse a floating point value as a 'Scientific' number. 'Scientific' is
210 | -- great for parsing of arbitrary precision numbers coming from an untrusted
211 | -- source. See documentation in "Data.Scientific" for more information.
212 | --
213 | -- The parser can be used to parse integers or floating point values. Use
214 | -- functions like 'Data.Scientific.floatingOrInteger' from "Data.Scientific"
215 | -- to test and extract integer or real values.
216 | --
217 | -- This function does not parse sign, if you need to parse signed numbers,
218 | -- see 'signed'.
219 | scientific ::
220 |   forall e s m.
221 |   (MonadParsec e s m, Token s ~ Word8) =>
222 |   m Scientific
223 | scientific = do
224 |   c' <- decimal_
225 |   SP c e' <- option (SP c' 0) (try $ dotDecimal_ (Proxy :: Proxy s) c')
226 |   e <- option e' (try $ exponent_ e')
227 |   return (Sci.scientific c e)
228 | {-# INLINEABLE scientific #-}
229 | 
230 | data SP = SP !Integer {-# UNPACK #-} !Int
231 | 
232 | -- | Parse a floating point number according to the syntax for floating
233 | -- point literals described in the Haskell report.
234 | --
235 | -- This function does not parse sign, if you need to parse signed numbers,
236 | -- see 'signed'.
237 | --
238 | -- __Note__: in versions /6.0.0/–/6.1.1/ this function accepted plain integers.
239 | float :: (MonadParsec e s m, Token s ~ Word8, RealFloat a) => m a
240 | float = do
241 |   c' <- decimal_
242 |   Sci.toRealFloat
243 |     <$> ( ( do
244 |               SP c e' <- dotDecimal_ (Proxy :: Proxy s) c'
245 |               e <- option e' (try $ exponent_ e')
246 |               return (Sci.scientific c e)
247 |           )
248 |             <|> (Sci.scientific c' <$> exponent_ 0)
249 |         )
250 | {-# INLINEABLE float #-}
251 | 
252 | dotDecimal_ ::
253 |   (MonadParsec e s m, Token s ~ Word8) =>
254 |   Proxy s ->
255 |   Integer ->
256 |   m SP
257 | dotDecimal_ pxy c' = do
258 |   void (B.char 46)
259 |   let mkNum = foldl' step (SP c' 0) . chunkToTokens pxy
260 |       step (SP a e') w =
261 |         SP
262 |           (a * 10 + fromIntegral (w - 48))
263 |           (e' - 1)
264 |   mkNum <$> takeWhile1P (Just "digit") isDigit
265 | {-# INLINE dotDecimal_ #-}
266 | 
267 | exponent_ ::
268 |   (MonadParsec e s m, Token s ~ Word8) =>
269 |   Int ->
270 |   m Int
271 | exponent_ e' = do
272 |   void (B.char' 101)
273 |   (+ e') <$> signed (return ()) decimal_
274 | {-# INLINE exponent_ #-}
275 | 
276 | -- | @'signed' space p@ parser parses an optional sign character (“+” or
277 | -- “-”), then if there is a sign it consumes optional white space (using
278 | -- @space@ parser), then it runs parser @p@ which should return a number.
279 | -- Sign of the number is changed according to the previously parsed sign
280 | -- character.
281 | --
282 | -- For example, to parse signed integer you can write:
283 | --
284 | -- > lexeme        = L.lexeme spaceConsumer
285 | -- > integer       = lexeme L.decimal
286 | -- > signedInteger = L.signed spaceConsumer integer
287 | signed ::
288 |   (MonadParsec e s m, Token s ~ Word8, Num a) =>
289 |   -- | How to consume white space after the sign
290 |   m () ->
291 |   -- | How to parse the number itself
292 |   m a ->
293 |   -- | Parser for signed numbers
294 |   m a
295 | signed spc p = option id (lexeme spc sign) <*> p
296 |   where
297 |     sign = (id <$ B.char 43) <|> (negate <$ B.char 45)
298 | {-# INLINEABLE signed #-}
299 | 
300 | ----------------------------------------------------------------------------
301 | -- Helpers
302 | 
303 | -- | A fast predicate to check if the given 'Word8' is a digit in ASCII.
304 | isDigit :: Word8 -> Bool
305 | isDigit w = w - 48 < 10
306 | {-# INLINE isDigit #-}
307 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Char.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE Safe #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE TypeFamilies #-}
  6 | {-# LANGUAGE TypeOperators #-}
  7 | 
  8 | -- |
  9 | -- Module      :  Text.Megaparsec.Char
 10 | -- Copyright   :  © 2015–present Megaparsec contributors
 11 | --                © 2007 Paolo Martini
 12 | --                © 1999–2001 Daan Leijen
 13 | -- License     :  FreeBSD
 14 | --
 15 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 16 | -- Stability   :  experimental
 17 | -- Portability :  non-portable
 18 | --
 19 | -- Commonly used character parsers.
 20 | module Text.Megaparsec.Char
 21 |   ( -- * Simple parsers
 22 |     newline,
 23 |     crlf,
 24 |     eol,
 25 |     tab,
 26 |     space,
 27 |     hspace,
 28 |     space1,
 29 |     hspace1,
 30 | 
 31 |     -- * Categories of characters
 32 |     controlChar,
 33 |     spaceChar,
 34 |     upperChar,
 35 |     lowerChar,
 36 |     letterChar,
 37 |     alphaNumChar,
 38 |     printChar,
 39 |     digitChar,
 40 |     binDigitChar,
 41 |     octDigitChar,
 42 |     hexDigitChar,
 43 |     markChar,
 44 |     numberChar,
 45 |     punctuationChar,
 46 |     symbolChar,
 47 |     separatorChar,
 48 |     asciiChar,
 49 |     latin1Char,
 50 |     charCategory,
 51 |     categoryName,
 52 | 
 53 |     -- * Single character
 54 |     char,
 55 |     char',
 56 | 
 57 |     -- * Sequence of characters
 58 |     string,
 59 |     string',
 60 |   )
 61 | where
 62 | 
 63 | import Control.Applicative
 64 | import Data.Char
 65 | import Data.Functor (void)
 66 | import Data.Proxy
 67 | import Text.Megaparsec
 68 | import Text.Megaparsec.Common
 69 | 
 70 | ----------------------------------------------------------------------------
 71 | -- Simple parsers
 72 | 
 73 | -- | Parse a newline character.
 74 | newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 75 | newline = char '\n'
 76 | {-# INLINE newline #-}
 77 | 
 78 | -- | Parse a carriage return character followed by a newline character.
 79 | -- Return the sequence of characters parsed.
 80 | crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
 81 | crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n")
 82 | {-# INLINE crlf #-}
 83 | 
 84 | -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
 85 | -- sequence of characters parsed.
 86 | eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
 87 | eol =
 88 |   (tokenToChunk (Proxy :: Proxy s) <$> newline)
 89 |     <|> crlf
 90 |     <?> "end of line"
 91 | {-# INLINE eol #-}
 92 | 
 93 | -- | Parse a tab character.
 94 | tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 95 | tab = char '\t'
 96 | {-# INLINE tab #-}
 97 | 
 98 | -- | Skip /zero/ or more white space characters.
 99 | --
100 | -- See also: 'skipMany' and 'spaceChar'.
101 | space :: (MonadParsec e s m, Token s ~ Char) => m ()
102 | space = void $ takeWhileP (Just "white space") isSpace
103 | {-# INLINE space #-}
104 | 
105 | -- | Like 'space', but does not accept newlines and carriage returns.
106 | --
107 | -- @since 9.0.0
108 | hspace :: (MonadParsec e s m, Token s ~ Char) => m ()
109 | hspace = void $ takeWhileP (Just "white space") isHSpace
110 | {-# INLINE hspace #-}
111 | 
112 | -- | Skip /one/ or more white space characters.
113 | --
114 | -- See also: 'skipSome' and 'spaceChar'.
115 | --
116 | -- @since 6.0.0
117 | space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
118 | space1 = void $ takeWhile1P (Just "white space") isSpace
119 | {-# INLINE space1 #-}
120 | 
121 | -- | Like 'space1', but does not accept newlines and carriage returns.
122 | --
123 | -- @since 9.0.0
124 | hspace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
125 | hspace1 = void $ takeWhile1P (Just "white space") isHSpace
126 | {-# INLINE hspace1 #-}
127 | 
128 | ----------------------------------------------------------------------------
129 | -- Categories of characters
130 | 
131 | -- | Parse a control character (a non-printing character of the Latin-1
132 | -- subset of Unicode).
133 | controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
134 | controlChar = satisfy isControl <?> "control character"
135 | {-# INLINE controlChar #-}
136 | 
137 | -- | Parse a Unicode space character, and the control characters: tab,
138 | -- newline, carriage return, form feed, and vertical tab.
139 | spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
140 | spaceChar = satisfy isSpace <?> "white space"
141 | {-# INLINE spaceChar #-}
142 | 
143 | -- | Parse an upper-case or title-case alphabetic Unicode character. Title
144 | -- case is used by a small number of letter ligatures like the
145 | -- single-character form of Lj.
146 | upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
147 | upperChar = satisfy isUpper <?> "uppercase letter"
148 | {-# INLINE upperChar #-}
149 | 
150 | -- | Parse a lower-case alphabetic Unicode character.
151 | lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
152 | lowerChar = satisfy isLower <?> "lowercase letter"
153 | {-# INLINE lowerChar #-}
154 | 
155 | -- | Parse an alphabetic Unicode character: lower-case, upper-case, or
156 | -- title-case letter, or a letter of case-less scripts\/modifier letter.
157 | letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
158 | letterChar = satisfy isLetter <?> "letter"
159 | {-# INLINE letterChar #-}
160 | 
161 | -- | Parse an alphabetic or numeric digit Unicode characters.
162 | --
163 | -- Note that the numeric digits outside the ASCII range are parsed by this
164 | -- parser but not by 'digitChar'. Such digits may be part of identifiers but
165 | -- are not used by the printer and reader to represent numbers.
166 | alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
167 | alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
168 | {-# INLINE alphaNumChar #-}
169 | 
170 | -- | Parse a printable Unicode character: letter, number, mark, punctuation,
171 | -- symbol or space.
172 | printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
173 | printChar = satisfy isPrint <?> "printable character"
174 | {-# INLINE printChar #-}
175 | 
176 | -- | Parse an ASCII digit, i.e between “0” and “9”.
177 | digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
178 | digitChar = satisfy isDigit <?> "digit"
179 | {-# INLINE digitChar #-}
180 | 
181 | -- | Parse a binary digit, i.e. "0" or "1".
182 | --
183 | -- @since 7.0.0
184 | binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
185 | binDigitChar = satisfy isBinDigit <?> "binary digit"
186 |   where
187 |     isBinDigit x = x == '0' || x == '1'
188 | {-# INLINE binDigitChar #-}
189 | 
190 | -- | Parse an octal digit, i.e. between “0” and “7”.
191 | octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
192 | octDigitChar = satisfy isOctDigit <?> "octal digit"
193 | {-# INLINE octDigitChar #-}
194 | 
195 | -- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
196 | -- “A” and “F”.
197 | hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
198 | hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
199 | {-# INLINE hexDigitChar #-}
200 | 
201 | -- | Parse a Unicode mark character (accents and the like), which combines
202 | -- with preceding characters.
203 | markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
204 | markChar = satisfy isMark <?> "mark character"
205 | {-# INLINE markChar #-}
206 | 
207 | -- | Parse a Unicode numeric character, including digits from various
208 | -- scripts, Roman numerals, etc.
209 | numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
210 | numberChar = satisfy isNumber <?> "numeric character"
211 | {-# INLINE numberChar #-}
212 | 
213 | -- | Parse a Unicode punctuation character, including various kinds of
214 | -- connectors, brackets and quotes.
215 | punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
216 | punctuationChar = satisfy isPunctuation <?> "punctuation"
217 | {-# INLINE punctuationChar #-}
218 | 
219 | -- | Parse a Unicode symbol characters, including mathematical and currency
220 | -- symbols.
221 | symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
222 | symbolChar = satisfy isSymbol <?> "symbol"
223 | {-# INLINE symbolChar #-}
224 | 
225 | -- | Parse a Unicode space and separator characters.
226 | separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
227 | separatorChar = satisfy isSeparator <?> "separator"
228 | {-# INLINE separatorChar #-}
229 | 
230 | -- | Parse a character from the first 128 characters of the Unicode
231 | -- character set, corresponding to the ASCII character set.
232 | asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
233 | asciiChar = satisfy isAscii <?> "ASCII character"
234 | {-# INLINE asciiChar #-}
235 | 
236 | -- | Parse a character from the first 256 characters of the Unicode
237 | -- character set, corresponding to the ISO 8859-1 (Latin-1) character set.
238 | latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
239 | latin1Char = satisfy isLatin1 <?> "Latin-1 character"
240 | {-# INLINE latin1Char #-}
241 | 
242 | -- | @'charCategory' cat@ parses character in Unicode General Category
243 | -- @cat@, see 'Data.Char.GeneralCategory'.
244 | charCategory ::
245 |   (MonadParsec e s m, Token s ~ Char) =>
246 |   GeneralCategory ->
247 |   m (Token s)
248 | charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
249 | {-# INLINE charCategory #-}
250 | 
251 | -- | Return the human-readable name of Unicode General Category.
252 | categoryName :: GeneralCategory -> String
253 | categoryName = \case
254 |   UppercaseLetter -> "uppercase letter"
255 |   LowercaseLetter -> "lowercase letter"
256 |   TitlecaseLetter -> "titlecase letter"
257 |   ModifierLetter -> "modifier letter"
258 |   OtherLetter -> "other letter"
259 |   NonSpacingMark -> "non-spacing mark"
260 |   SpacingCombiningMark -> "spacing combining mark"
261 |   EnclosingMark -> "enclosing mark"
262 |   DecimalNumber -> "decimal number character"
263 |   LetterNumber -> "letter number character"
264 |   OtherNumber -> "other number character"
265 |   ConnectorPunctuation -> "connector punctuation"
266 |   DashPunctuation -> "dash punctuation"
267 |   OpenPunctuation -> "open punctuation"
268 |   ClosePunctuation -> "close punctuation"
269 |   InitialQuote -> "initial quote"
270 |   FinalQuote -> "final quote"
271 |   OtherPunctuation -> "other punctuation"
272 |   MathSymbol -> "math symbol"
273 |   CurrencySymbol -> "currency symbol"
274 |   ModifierSymbol -> "modifier symbol"
275 |   OtherSymbol -> "other symbol"
276 |   Space -> "white space"
277 |   LineSeparator -> "line separator"
278 |   ParagraphSeparator -> "paragraph separator"
279 |   Control -> "control character"
280 |   Format -> "format character"
281 |   Surrogate -> "surrogate character"
282 |   PrivateUse -> "private-use Unicode character"
283 |   NotAssigned -> "non-assigned Unicode character"
284 | 
285 | ----------------------------------------------------------------------------
286 | -- Single character
287 | 
288 | -- | A type-constrained version of 'single'.
289 | --
290 | -- > semicolon = char ';'
291 | char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
292 | char = single
293 | {-# INLINE char #-}
294 | 
295 | -- | The same as 'char' but case-insensitive. This parser returns the
296 | -- actually parsed character preserving its case.
297 | --
298 | -- >>> parseTest (char' 'e') "E"
299 | -- 'E'
300 | -- >>> parseTest (char' 'e') "G"
301 | -- 1:1:
302 | -- unexpected 'G'
303 | -- expecting 'E' or 'e'
304 | char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
305 | char' c =
306 |   choice
307 |     [ char (toLower c),
308 |       char (toUpper c),
309 |       char (toTitle c)
310 |     ]
311 | {-# INLINE char' #-}
312 | 
313 | ----------------------------------------------------------------------------
314 | -- Helpers
315 | 
316 | -- | Is it a horizontal space character?
317 | isHSpace :: Char -> Bool
318 | isHSpace x = isSpace x && x /= '\n' && x /= '\r'
319 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Common.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | {-# LANGUAGE Safe #-}
 3 | 
 4 | -- |
 5 | -- Module      :  Text.Megaparsec.Common
 6 | -- Copyright   :  © 2018–present Megaparsec contributors
 7 | -- License     :  FreeBSD
 8 | --
 9 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
10 | -- Stability   :  experimental
11 | -- Portability :  portable
12 | --
13 | -- Common token combinators. This module is not public, the functions from
14 | -- it are re-exported in "Text.Megaparsec.Byte" and "Text.Megaparsec.Char".
15 | --
16 | -- @since 7.0.0
17 | module Text.Megaparsec.Common
18 |   ( string,
19 |     string',
20 |   )
21 | where
22 | 
23 | import qualified Data.CaseInsensitive as CI
24 | import Data.Function (on)
25 | import Text.Megaparsec
26 | 
27 | -- | A synonym for 'chunk'.
28 | string :: (MonadParsec e s m) => Tokens s -> m (Tokens s)
29 | string = chunk
30 | {-# INLINE string #-}
31 | 
32 | -- | The same as 'string', but case-insensitive. On success returns string
33 | -- cased as the parsed input.
34 | --
35 | -- >>> parseTest (string' "foobar") "foObAr"
36 | -- "foObAr"
37 | string' ::
38 |   (MonadParsec e s m, CI.FoldCase (Tokens s)) =>
39 |   Tokens s ->
40 |   m (Tokens s)
41 | string' = tokens ((==) `on` CI.mk)
42 | {-# INLINE string' #-}
43 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Debug.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE FlexibleInstances #-}
  3 | {-# LANGUAGE MultiParamTypeClasses #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE Unsafe #-}
  6 | 
  7 | -- |
  8 | -- Module      :  Text.Megaparsec.Debug
  9 | -- Copyright   :  © 2015–present Megaparsec contributors
 10 | -- License     :  FreeBSD
 11 | --
 12 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 13 | -- Stability   :  experimental
 14 | -- Portability :  portable
 15 | --
 16 | -- Debugging helpers.
 17 | --
 18 | -- @since 7.0.0
 19 | module Text.Megaparsec.Debug
 20 |   ( MonadParsecDbg (..),
 21 |     dbg',
 22 |   )
 23 | where
 24 | 
 25 | import Control.Monad.Identity (IdentityT, mapIdentityT)
 26 | import qualified Control.Monad.Trans.RWS.Lazy as L
 27 | import qualified Control.Monad.Trans.RWS.Strict as S
 28 | import qualified Control.Monad.Trans.Reader as L
 29 | import qualified Control.Monad.Trans.State.Lazy as L
 30 | import qualified Control.Monad.Trans.State.Strict as S
 31 | import qualified Control.Monad.Trans.Writer.Lazy as L
 32 | import qualified Control.Monad.Trans.Writer.Strict as S
 33 | import Data.Bifunctor (Bifunctor (first))
 34 | import qualified Data.List as List
 35 | import qualified Data.List.NonEmpty as NE
 36 | import Data.Proxy
 37 | import qualified Data.Set as E
 38 | import Debug.Trace
 39 | import Text.Megaparsec.Class (MonadParsec)
 40 | import Text.Megaparsec.Error
 41 | import Text.Megaparsec.Internal
 42 | import Text.Megaparsec.State
 43 | import Text.Megaparsec.Stream
 44 | 
 45 | -- | Type class describing parser monads that can trace during evaluation.
 46 | --
 47 | -- @since 9.3.0
 48 | class (MonadParsec e s m) => MonadParsecDbg e s m where
 49 |   -- | @'dbg' label p@ parser works exactly like @p@, but when it's evaluated
 50 |   -- it prints information useful for debugging. The @label@ is only used to
 51 |   -- refer to this parser in the debugging output. This combinator uses the
 52 |   -- 'trace' function from "Debug.Trace" under the hood.
 53 |   --
 54 |   -- Typical usage is to wrap every sub-parser in misbehaving parser with
 55 |   -- 'dbg' assigning meaningful labels. Then give it a shot and go through the
 56 |   -- print-out. As of current version, this combinator prints all available
 57 |   -- information except for /hints/, which are probably only interesting to
 58 |   -- the maintainer of Megaparsec itself and may be quite verbose to output in
 59 |   -- general. Let me know if you would like to be able to see hints in the
 60 |   -- debugging output.
 61 |   --
 62 |   -- The output itself is pretty self-explanatory, although the following
 63 |   -- abbreviations should be clarified (they are derived from the low-level
 64 |   -- source code):
 65 |   --
 66 |   --     * @COK@—“consumed OK”. The parser consumed input and succeeded.
 67 |   --     * @CERR@—“consumed error”. The parser consumed input and failed.
 68 |   --     * @EOK@—“empty OK”. The parser succeeded without consuming input.
 69 |   --     * @EERR@—“empty error”. The parser failed without consuming input.
 70 |   --
 71 |   -- __Note__: up until the version /9.3.0/ this was a non-polymorphic
 72 |   -- function that worked only in 'ParsecT'. It was first introduced in the
 73 |   -- version /7.0.0/.
 74 |   dbg ::
 75 |     (Show a) =>
 76 |     -- | Debugging label
 77 |     String ->
 78 |     -- | Parser to debug
 79 |     m a ->
 80 |     -- | Parser that prints debugging messages
 81 |     m a
 82 | 
 83 | -- | @dbg (p :: StateT st m)@ prints state __after__ running @p@:
 84 | --
 85 | -- >>> p = modify succ >> dbg "a" (single 'a' >> modify succ)
 86 | -- >>> parseTest (runStateT p 0) "a"
 87 | -- a> IN: 'a'
 88 | -- a> MATCH (COK): 'a'
 89 | -- a> VALUE: () (STATE: 2)
 90 | -- ((),2)
 91 | instance
 92 |   (Show st, MonadParsecDbg e s m) =>
 93 |   MonadParsecDbg e s (L.StateT st m)
 94 |   where
 95 |   dbg str sma = L.StateT $ \s ->
 96 |     dbgWithComment "STATE" str $ L.runStateT sma s
 97 | 
 98 | -- | @dbg (p :: StateT st m)@ prints state __after__ running @p@:
 99 | --
100 | -- >>> p = modify succ >> dbg "a" (single 'a' >> modify succ)
101 | -- >>> parseTest (runStateT p 0) "a"
102 | -- a> IN: 'a'
103 | -- a> MATCH (COK): 'a'
104 | -- a> VALUE: () (STATE: 2)
105 | -- ((),2)
106 | instance
107 |   (Show st, MonadParsecDbg e s m) =>
108 |   MonadParsecDbg e s (S.StateT st m)
109 |   where
110 |   dbg str sma = S.StateT $ \s ->
111 |     dbgWithComment "STATE" str $ S.runStateT sma s
112 | 
113 | instance
114 |   (MonadParsecDbg e s m) =>
115 |   MonadParsecDbg e s (L.ReaderT r m)
116 |   where
117 |   dbg = L.mapReaderT . dbg
118 | 
119 | -- | @dbg (p :: WriterT st m)@ prints __only__ log produced by @p@:
120 | --
121 | -- >>> p = tell [0] >> dbg "a" (single 'a' >> tell [1])
122 | -- >>> parseTest (runWriterT p) "a"
123 | -- a> IN: 'a'
124 | -- a> MATCH (COK): 'a'
125 | -- a> VALUE: () (LOG: [1])
126 | -- ((),[0,1])
127 | instance
128 |   (Monoid w, Show w, MonadParsecDbg e s m) =>
129 |   MonadParsecDbg e s (L.WriterT w m)
130 |   where
131 |   dbg str wma = L.WriterT $ dbgWithComment "LOG" str $ L.runWriterT wma
132 | 
133 | -- | @dbg (p :: WriterT st m)@ prints __only__ log produced by @p@:
134 | --
135 | -- >>> p = tell [0] >> dbg "a" (single 'a' >> tell [1])
136 | -- >>> parseTest (runWriterT p) "a"
137 | -- a> IN: 'a'
138 | -- a> MATCH (COK): 'a'
139 | -- a> VALUE: () (LOG: [1])
140 | -- ((),[0,1])
141 | instance
142 |   (Monoid w, Show w, MonadParsecDbg e s m) =>
143 |   MonadParsecDbg e s (S.WriterT w m)
144 |   where
145 |   dbg str wma = S.WriterT $ dbgWithComment "LOG" str $ S.runWriterT wma
146 | 
147 | -- | @RWST@ works like @StateT@ inside a @WriterT@: subparser's log and its
148 | -- final state is printed:
149 | --
150 | -- >>> p = tell [0] >> modify succ >> dbg "a" (single 'a' >> tell [1] >> modify succ)
151 | -- >>> parseTest (runRWST p () 0) "a"
152 | -- a> IN: 'a'
153 | -- a> MATCH (COK): 'a'
154 | -- a> VALUE: () (STATE: 2) (LOG: [1])
155 | -- ((),2,[0,1])
156 | instance
157 |   (Monoid w, Show w, Show st, MonadParsecDbg e s m) =>
158 |   MonadParsecDbg e s (L.RWST r w st m)
159 |   where
160 |   dbg str sma = L.RWST $ \r s -> do
161 |     let smth =
162 |           (\(a, st, w) -> ShowComment "LOG" (ShowComment "STATE" (a, st), w))
163 |             <$> L.runRWST sma r s
164 |     ((a, st), w) <- first unComment . unComment <$> dbg str smth
165 |     pure (a, st, w)
166 | 
167 | -- | @RWST@ works like @StateT@ inside a @WriterT@: subparser's log and its
168 | -- final state is printed:
169 | --
170 | -- >>> p = tell [0] >> modify succ >> dbg "a" (single 'a' >> tell [1] >> modify succ)
171 | -- >>> parseTest (runRWST p () 0) "a"
172 | -- a> IN: 'a'
173 | -- a> MATCH (COK): 'a'
174 | -- a> VALUE: () (STATE: 2) (LOG: [1])
175 | -- ((),2,[0,1])
176 | instance
177 |   (Monoid w, Show w, Show st, MonadParsecDbg e s m) =>
178 |   MonadParsecDbg e s (S.RWST r w st m)
179 |   where
180 |   dbg str sma = S.RWST $ \r s -> do
181 |     let smth =
182 |           (\(a, st, w) -> ShowComment "LOG" (ShowComment "STATE" (a, st), w))
183 |             <$> S.runRWST sma r s
184 |     ((a, st), w) <- first unComment . unComment <$> dbg str smth
185 |     pure (a, st, w)
186 | 
187 | instance (MonadParsecDbg e s m) => MonadParsecDbg e s (IdentityT m) where
188 |   dbg = mapIdentityT . dbg
189 | 
190 | -- | @'dbgWithComment' label_a label_c m@ traces the first component of the
191 | -- result produced by @m@ with @label_a@ and the second component with
192 | -- @label_b@.
193 | dbgWithComment ::
194 |   (MonadParsecDbg e s m, Show a, Show c) =>
195 |   -- | Debugging label (for @a@)
196 |   String ->
197 |   -- | Extra component label (for @c@)
198 |   String ->
199 |   -- | Parser to debug
200 |   m (a, c) ->
201 |   -- | Parser that prints debugging messages
202 |   m (a, c)
203 | dbgWithComment lbl str ma =
204 |   unComment <$> dbg str (ShowComment lbl <$> ma)
205 | 
206 | -- | A wrapper with a special show instance:
207 | --
208 | -- >>> show (ShowComment "STATE" ("Hello, world!", 42))
209 | -- Hello, world! (STATE: 42)
210 | data ShowComment c a = ShowComment String (a, c)
211 | 
212 | unComment :: ShowComment c a -> (a, c)
213 | unComment (ShowComment _ val) = val
214 | 
215 | instance (Show c, Show a) => Show (ShowComment c a) where
216 |   show (ShowComment lbl (a, c)) = show a ++ " (" ++ lbl ++ ": " ++ show c ++ ")"
217 | 
218 | instance
219 |   (VisualStream s, ShowErrorComponent e) =>
220 |   MonadParsecDbg e s (ParsecT e s m)
221 |   where
222 |   dbg lbl p = ParsecT $ \s cok cerr eok eerr ->
223 |     let l = dbgLog lbl
224 |         unfold = streamTake 40
225 |         cok' x s' hs =
226 |           flip trace (cok x s' hs) $
227 |             l (DbgIn (unfold (stateInput s)))
228 |               ++ l (DbgCOK (streamTake (streamDelta s s') (stateInput s)) x hs)
229 |         cerr' err s' =
230 |           flip trace (cerr err s') $
231 |             l (DbgIn (unfold (stateInput s)))
232 |               ++ l (DbgCERR (streamTake (streamDelta s s') (stateInput s)) err)
233 |         eok' x s' hs =
234 |           flip trace (eok x s' hs) $
235 |             l (DbgIn (unfold (stateInput s)))
236 |               ++ l (DbgEOK (streamTake (streamDelta s s') (stateInput s)) x hs)
237 |         eerr' err s' =
238 |           flip trace (eerr err s') $
239 |             l (DbgIn (unfold (stateInput s)))
240 |               ++ l (DbgEERR (streamTake (streamDelta s s') (stateInput s)) err)
241 |      in unParser p s cok' cerr' eok' eerr'
242 | 
243 | -- | A single piece of info to be rendered with 'dbgLog'.
244 | data DbgItem s e a
245 |   = DbgIn [Token s]
246 |   | DbgCOK [Token s] a (Hints (Token s))
247 |   | DbgCERR [Token s] (ParseError s e)
248 |   | DbgEOK [Token s] a (Hints (Token s))
249 |   | DbgEERR [Token s] (ParseError s e)
250 | 
251 | -- | Render a single piece of debugging info.
252 | dbgLog ::
253 |   forall s e a.
254 |   (VisualStream s, ShowErrorComponent e, Show a) =>
255 |   -- | Debugging label
256 |   String ->
257 |   -- | Information to render
258 |   DbgItem s e a ->
259 |   -- | Rendered result
260 |   String
261 | dbgLog lbl item = prefix msg
262 |   where
263 |     prefix = unlines . fmap ((lbl ++ "> ") ++) . lines
264 |     pxy = Proxy :: Proxy s
265 |     showHints hs = "[" ++ List.intercalate "," (showErrorItem pxy <$> E.toAscList hs) ++ "]"
266 |     msg = case item of
267 |       DbgIn ts ->
268 |         "IN: " ++ showStream pxy ts
269 |       DbgCOK ts a (Hints hs) ->
270 |         "MATCH (COK): "
271 |           ++ showStream pxy ts
272 |           ++ "\nVALUE: "
273 |           ++ show a
274 |           ++ "\nHINTS: "
275 |           ++ showHints hs
276 |       DbgCERR ts e ->
277 |         "MATCH (CERR): " ++ showStream pxy ts ++ "\nERROR:\n" ++ parseErrorPretty e
278 |       DbgEOK ts a (Hints hs) ->
279 |         "MATCH (EOK): "
280 |           ++ showStream pxy ts
281 |           ++ "\nVALUE: "
282 |           ++ show a
283 |           ++ "\nHINTS: "
284 |           ++ showHints hs
285 |       DbgEERR ts e ->
286 |         "MATCH (EERR): " ++ showStream pxy ts ++ "\nERROR:\n" ++ parseErrorPretty e
287 | 
288 | -- | Pretty-print a list of tokens.
289 | showStream :: (VisualStream s) => Proxy s -> [Token s] -> String
290 | showStream pxy ts =
291 |   case NE.nonEmpty ts of
292 |     Nothing -> "<EMPTY>"
293 |     Just ne ->
294 |       let (h, r) = splitAt 40 (showTokens pxy ne)
295 |        in if null r then h else h ++ " <…>"
296 | 
297 | -- | Calculate number of consumed tokens given 'State' of parser before and
298 | -- after parsing.
299 | streamDelta ::
300 |   -- | State of parser before consumption
301 |   State s e ->
302 |   -- | State of parser after consumption
303 |   State s e ->
304 |   -- | Number of consumed tokens
305 |   Int
306 | streamDelta s0 s1 = stateOffset s1 - stateOffset s0
307 | 
308 | -- | Extract a given number of tokens from the stream.
309 | streamTake :: forall s. (Stream s) => Int -> s -> [Token s]
310 | streamTake n s =
311 |   case fst <$> takeN_ n s of
312 |     Nothing -> []
313 |     Just chk -> chunkToTokens (Proxy :: Proxy s) chk
314 | 
315 | -- | Just like 'dbg', but doesn't require the return value of the parser to
316 | -- be 'Show'-able.
317 | --
318 | -- @since 9.1.0
319 | dbg' ::
320 |   (MonadParsecDbg e s m) =>
321 |   -- | Debugging label
322 |   String ->
323 |   -- | Parser to debug
324 |   m a ->
325 |   -- | Parser that prints debugging messages
326 |   m a
327 | dbg' lbl p = unBlind <$> dbg lbl (Blind <$> p)
328 | 
329 | -- | A wrapper type with a dummy 'Show' instance.
330 | newtype Blind x = Blind {unBlind :: x}
331 | 
332 | instance Show (Blind x) where
333 |   show _ = "NOT SHOWN"
334 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Error.hs-boot:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE RoleAnnotations #-}
 2 | {-# LANGUAGE Safe #-}
 3 | 
 4 | module Text.Megaparsec.Error
 5 |   ( ParseError,
 6 |   )
 7 | where
 8 | 
 9 | type role ParseError nominal nominal
10 | 
11 | data ParseError s e
12 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Error/Builder.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | {-# LANGUAGE DeriveGeneric #-}
  3 | {-# LANGUAGE FlexibleContexts #-}
  4 | {-# LANGUAGE Safe #-}
  5 | {-# LANGUAGE ScopedTypeVariables #-}
  6 | {-# LANGUAGE StandaloneDeriving #-}
  7 | {-# LANGUAGE UndecidableInstances #-}
  8 | 
  9 | -- |
 10 | -- Module      :  Text.Megaparsec.Error.Builder
 11 | -- Copyright   :  © 2015–present Megaparsec contributors
 12 | -- License     :  FreeBSD
 13 | --
 14 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 15 | -- Stability   :  experimental
 16 | -- Portability :  portable
 17 | --
 18 | -- A set of helpers that should make construction of 'ParseError's more
 19 | -- concise. This is primarily useful in test suites and for debugging.
 20 | --
 21 | -- @since 6.0.0
 22 | module Text.Megaparsec.Error.Builder
 23 |   ( -- * Top-level helpers
 24 |     err,
 25 |     errFancy,
 26 | 
 27 |     -- * Error components
 28 |     utok,
 29 |     utoks,
 30 |     ulabel,
 31 |     ueof,
 32 |     etok,
 33 |     etoks,
 34 |     elabel,
 35 |     eeof,
 36 |     fancy,
 37 | 
 38 |     -- * Data types
 39 |     ET,
 40 |     EF,
 41 |   )
 42 | where
 43 | 
 44 | import Data.Data (Data)
 45 | import Data.List.NonEmpty (NonEmpty (..))
 46 | import qualified Data.List.NonEmpty as NE
 47 | import Data.Proxy
 48 | import Data.Set (Set)
 49 | import qualified Data.Set as E
 50 | import GHC.Generics
 51 | import Text.Megaparsec.Error
 52 | import Text.Megaparsec.Stream
 53 | 
 54 | ----------------------------------------------------------------------------
 55 | -- Data types
 56 | 
 57 | -- | Auxiliary type for construction of trivial parse errors.
 58 | data ET s = ET (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s)))
 59 |   deriving (Generic)
 60 | 
 61 | deriving instance (Eq (Token s)) => Eq (ET s)
 62 | 
 63 | deriving instance (Ord (Token s)) => Ord (ET s)
 64 | 
 65 | deriving instance
 66 |   ( Data s,
 67 |     Data (Token s),
 68 |     Ord (Token s)
 69 |   ) =>
 70 |   Data (ET s)
 71 | 
 72 | instance (Stream s) => Semigroup (ET s) where
 73 |   ET us0 ps0 <> ET us1 ps1 = ET (n us0 us1) (E.union ps0 ps1)
 74 |     where
 75 |       n Nothing Nothing = Nothing
 76 |       n (Just x) Nothing = Just x
 77 |       n Nothing (Just y) = Just y
 78 |       n (Just x) (Just y) = Just (max x y)
 79 | 
 80 | instance (Stream s) => Monoid (ET s) where
 81 |   mempty = ET Nothing E.empty
 82 |   mappend = (<>)
 83 | 
 84 | -- | Auxiliary type for construction of fancy parse errors.
 85 | newtype EF e = EF (Set (ErrorFancy e))
 86 |   deriving (Eq, Ord, Data, Generic)
 87 | 
 88 | instance (Ord e) => Semigroup (EF e) where
 89 |   EF xs0 <> EF xs1 = EF (E.union xs0 xs1)
 90 | 
 91 | instance (Ord e) => Monoid (EF e) where
 92 |   mempty = EF E.empty
 93 |   mappend = (<>)
 94 | 
 95 | ----------------------------------------------------------------------------
 96 | -- Top-level helpers
 97 | 
 98 | -- | Assemble a 'ParseError' from the offset and the @'ET' t@ value. @'ET'
 99 | -- t@ is a monoid and can be assembled by combining primitives provided by
100 | -- this module, see below.
101 | err ::
102 |   -- | 'ParseError' offset
103 |   Int ->
104 |   -- | Error components
105 |   ET s ->
106 |   -- | Resulting 'ParseError'
107 |   ParseError s e
108 | err p (ET us ps) = TrivialError p us ps
109 | 
110 | -- | Like 'err', but constructs a “fancy” 'ParseError'.
111 | errFancy ::
112 |   -- | 'ParseError' offset
113 |   Int ->
114 |   -- | Error components
115 |   EF e ->
116 |   -- | Resulting 'ParseError'
117 |   ParseError s e
118 | errFancy p (EF xs) = FancyError p xs
119 | 
120 | ----------------------------------------------------------------------------
121 | -- Error components
122 | 
123 | -- | Construct an “unexpected token” error component.
124 | utok :: Token s -> ET s
125 | utok = unexp . Tokens . nes
126 | 
127 | -- | Construct an “unexpected tokens” error component. Empty chunk produces
128 | -- 'EndOfInput'.
129 | utoks :: forall s. (Stream s) => Tokens s -> ET s
130 | utoks = unexp . canonicalizeTokens (Proxy :: Proxy s)
131 | 
132 | -- | Construct an “unexpected label” error component. Do not use with empty
133 | -- strings (for empty strings it's bottom).
134 | ulabel :: String -> ET s
135 | ulabel label
136 |   | label == "" = error "Text.Megaparsec.Error.Builder.ulabel: empty label"
137 |   | otherwise = unexp . Label . NE.fromList $ label
138 | 
139 | -- | Construct an “unexpected end of input” error component.
140 | ueof :: ET s
141 | ueof = unexp EndOfInput
142 | 
143 | -- | Construct an “expected token” error component.
144 | etok :: Token s -> ET s
145 | etok = expe . Tokens . nes
146 | 
147 | -- | Construct an “expected tokens” error component. Empty chunk produces
148 | -- 'EndOfInput'.
149 | etoks :: forall s. (Stream s) => Tokens s -> ET s
150 | etoks = expe . canonicalizeTokens (Proxy :: Proxy s)
151 | 
152 | -- | Construct an “expected label” error component. Do not use with empty
153 | -- strings.
154 | elabel :: String -> ET s
155 | elabel label
156 |   | label == "" = error "Text.Megaparsec.Error.Builder.elabel: empty label"
157 |   | otherwise = expe . Label . NE.fromList $ label
158 | 
159 | -- | Construct an “expected end of input” error component.
160 | eeof :: ET s
161 | eeof = expe EndOfInput
162 | 
163 | -- | Construct a custom error component.
164 | fancy :: ErrorFancy e -> EF e
165 | fancy = EF . E.singleton
166 | 
167 | ----------------------------------------------------------------------------
168 | -- Helpers
169 | 
170 | -- | Construct the appropriate 'ErrorItem' representation for the given
171 | -- token stream. The empty string produces 'EndOfInput'.
172 | canonicalizeTokens ::
173 |   (Stream s) =>
174 |   Proxy s ->
175 |   Tokens s ->
176 |   ErrorItem (Token s)
177 | canonicalizeTokens pxy ts =
178 |   case NE.nonEmpty (chunkToTokens pxy ts) of
179 |     Nothing -> EndOfInput
180 |     Just xs -> Tokens xs
181 | 
182 | -- | Lift an unexpected item into 'ET'.
183 | unexp :: ErrorItem (Token s) -> ET s
184 | unexp u = ET (pure u) E.empty
185 | 
186 | -- | Lift an expected item into 'ET'.
187 | expe :: ErrorItem (Token s) -> ET s
188 | expe p = ET Nothing (E.singleton p)
189 | 
190 | -- | Make a singleton non-empty list from a value.
191 | nes :: a -> NonEmpty a
192 | nes x = x :| []
193 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Internal.hs-boot:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE RoleAnnotations #-}
 2 | 
 3 | module Text.Megaparsec.Internal
 4 |   ( Reply,
 5 |   )
 6 | where
 7 | 
 8 | type role Reply nominal nominal representational
 9 | 
10 | data Reply e s a
11 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Lexer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE Safe #-}
  3 | 
  4 | -- |
  5 | -- Module      :  Text.Megaparsec.Common
  6 | -- Copyright   :  © 2018–present Megaparsec contributors
  7 | -- License     :  FreeBSD
  8 | --
  9 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 10 | -- Stability   :  experimental
 11 | -- Portability :  portable
 12 | --
 13 | -- Common token combinators. This module is not public, the functions from
 14 | -- it are re-exported in "Text.Megaparsec.Byte" and "Text.Megaparsec.Char".
 15 | --
 16 | -- @since 7.0.0
 17 | module Text.Megaparsec.Lexer
 18 |   ( -- * White space
 19 |     space,
 20 |     lexeme,
 21 |     symbol,
 22 |     symbol',
 23 |   )
 24 | where
 25 | 
 26 | import qualified Data.CaseInsensitive as CI
 27 | import Text.Megaparsec
 28 | import Text.Megaparsec.Common
 29 | 
 30 | ----------------------------------------------------------------------------
 31 | -- White space
 32 | 
 33 | -- | @'space' sc lineComment blockComment@ produces a parser that can parse
 34 | -- white space in general. It's expected that you create such a parser once
 35 | -- and pass it to other functions in this module as needed (when you see
 36 | -- @spaceConsumer@ in documentation, usually it means that something like
 37 | -- 'space' is expected there).
 38 | --
 39 | -- @sc@ is used to parse blocks of space characters. You can use
 40 | -- 'Text.Megaparsec.Char.space1' from "Text.Megaparsec.Char" for this
 41 | -- purpose as well as your own parser (if you don't want to automatically
 42 | -- consume newlines, for example). Make sure that the parser does not
 43 | -- succeed on the empty input though. In an earlier version of the library
 44 | -- 'Text.Megaparsec.Char.spaceChar' was recommended, but now parsers based
 45 | -- on 'takeWhile1P' are preferred because of their speed.
 46 | --
 47 | -- @lineComment@ is used to parse line comments. You can use
 48 | -- @skipLineComment@ if you don't need anything special.
 49 | --
 50 | -- @blockComment@ is used to parse block (multi-line) comments. You can use
 51 | -- @skipBlockComment@ or @skipBlockCommentNested@ if you don't need anything
 52 | -- special.
 53 | --
 54 | -- If you don't want to allow a kind of comment, simply pass 'empty' which
 55 | -- will fail instantly when parsing of that sort of comment is attempted and
 56 | -- 'space' will just move on or finish depending on whether there is more
 57 | -- white space for it to consume.
 58 | space ::
 59 |   (MonadParsec e s m) =>
 60 |   -- | A parser for space characters which does not accept empty
 61 |   -- input (e.g. 'Text.Megaparsec.Char.space1')
 62 |   m () ->
 63 |   -- | A parser for a line comment (e.g. 'skipLineComment')
 64 |   m () ->
 65 |   -- | A parser for a block comment (e.g. 'skipBlockComment')
 66 |   m () ->
 67 |   m ()
 68 | space sp line block =
 69 |   skipMany $
 70 |     choice
 71 |       [hidden sp, hidden line, hidden block]
 72 | {-# INLINEABLE space #-}
 73 | 
 74 | -- | This is a wrapper for lexemes. The typical usage is to supply the first
 75 | -- argument (parser that consumes white space, probably defined via 'space')
 76 | -- and use the resulting function to wrap parsers for every lexeme.
 77 | --
 78 | -- > lexeme  = L.lexeme spaceConsumer
 79 | -- > integer = lexeme L.decimal
 80 | lexeme ::
 81 |   (MonadParsec e s m) =>
 82 |   -- | How to consume white space after lexeme
 83 |   m () ->
 84 |   -- | How to parse actual lexeme
 85 |   m a ->
 86 |   m a
 87 | lexeme spc p = p <* spc
 88 | {-# INLINEABLE lexeme #-}
 89 | 
 90 | -- | This is a helper to parse symbols, i.e. verbatim strings. You pass the
 91 | -- first argument (parser that consumes white space, probably defined via
 92 | -- 'space') and then you can use the resulting function to parse strings:
 93 | --
 94 | -- > symbol    = L.symbol spaceConsumer
 95 | -- >
 96 | -- > parens    = between (symbol "(") (symbol ")")
 97 | -- > braces    = between (symbol "{") (symbol "}")
 98 | -- > angles    = between (symbol "<") (symbol ">")
 99 | -- > brackets  = between (symbol "[") (symbol "]")
100 | -- > semicolon = symbol ";"
101 | -- > comma     = symbol ","
102 | -- > colon     = symbol ":"
103 | -- > dot       = symbol "."
104 | symbol ::
105 |   (MonadParsec e s m) =>
106 |   -- | How to consume white space after lexeme
107 |   m () ->
108 |   -- | Symbol to parse
109 |   Tokens s ->
110 |   m (Tokens s)
111 | symbol spc = lexeme spc . string
112 | {-# INLINEABLE symbol #-}
113 | 
114 | -- | A case-insensitive version of 'symbol'. This may be helpful if you're
115 | -- working with case-insensitive languages.
116 | symbol' ::
117 |   (MonadParsec e s m, CI.FoldCase (Tokens s)) =>
118 |   -- | How to consume white space after lexeme
119 |   m () ->
120 |   -- | Symbol to parse (case-insensitive)
121 |   Tokens s ->
122 |   m (Tokens s)
123 | symbol' spc = lexeme spc . string'
124 | {-# INLINEABLE symbol' #-}
125 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Pos.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveAnyClass #-}
  2 | {-# LANGUAGE DeriveDataTypeable #-}
  3 | {-# LANGUAGE DeriveGeneric #-}
  4 | {-# LANGUAGE Safe #-}
  5 | 
  6 | -- |
  7 | -- Module      :  Text.Megaparsec.Pos
  8 | -- Copyright   :  © 2015–present Megaparsec contributors
  9 | -- License     :  FreeBSD
 10 | --
 11 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 12 | -- Stability   :  experimental
 13 | -- Portability :  portable
 14 | --
 15 | -- Textual source position. The position includes name of file, line number,
 16 | -- and column number.
 17 | --
 18 | -- You probably do not want to import this module directly because
 19 | -- "Text.Megaparsec" re-exports it anyway.
 20 | module Text.Megaparsec.Pos
 21 |   ( -- * Abstract position
 22 |     Pos,
 23 |     mkPos,
 24 |     unPos,
 25 |     pos1,
 26 |     defaultTabWidth,
 27 |     InvalidPosException (..),
 28 | 
 29 |     -- * Source position
 30 |     SourcePos (..),
 31 |     initialPos,
 32 |     sourcePosPretty,
 33 |   )
 34 | where
 35 | 
 36 | import Control.DeepSeq
 37 | import Control.Exception
 38 | import Data.Data (Data)
 39 | import GHC.Generics
 40 | 
 41 | ----------------------------------------------------------------------------
 42 | -- Abstract position
 43 | 
 44 | -- | 'Pos' is the type for positive integers. This is used to represent line
 45 | -- number, column number, and similar things like indentation level.
 46 | -- 'Semigroup' instance can be used to safely and efficiently add 'Pos'es
 47 | -- together.
 48 | --
 49 | -- @since 5.0.0
 50 | newtype Pos = Pos Int
 51 |   deriving (Show, Eq, Ord, Data, Generic, NFData)
 52 | 
 53 | -- | Construction of 'Pos' from 'Int'. The function throws
 54 | -- 'InvalidPosException' when given a non-positive argument.
 55 | --
 56 | -- @since 6.0.0
 57 | mkPos :: Int -> Pos
 58 | mkPos a =
 59 |   if a <= 0
 60 |     then throw (InvalidPosException a)
 61 |     else Pos a
 62 | {-# INLINE mkPos #-}
 63 | 
 64 | -- | Extract 'Int' from 'Pos'.
 65 | --
 66 | -- @since 6.0.0
 67 | unPos :: Pos -> Int
 68 | unPos (Pos w) = w
 69 | {-# INLINE unPos #-}
 70 | 
 71 | -- | Position with value 1.
 72 | --
 73 | -- @since 6.0.0
 74 | pos1 :: Pos
 75 | pos1 = mkPos 1
 76 | 
 77 | -- | Value of tab width used by default. Always prefer this constant when
 78 | -- you want to refer to the default tab width because actual value /may/
 79 | -- change in future.
 80 | --
 81 | -- Currently:
 82 | --
 83 | -- > defaultTabWidth = mkPos 8
 84 | --
 85 | -- @since 5.0.0
 86 | defaultTabWidth :: Pos
 87 | defaultTabWidth = mkPos 8
 88 | 
 89 | instance Semigroup Pos where
 90 |   (Pos x) <> (Pos y) = Pos (x + y)
 91 |   {-# INLINE (<>) #-}
 92 | 
 93 | instance Read Pos where
 94 |   readsPrec d =
 95 |     readParen (d > 10) $ \r1 -> do
 96 |       ("Pos", r2) <- lex r1
 97 |       (x, r3) <- readsPrec 11 r2
 98 |       return (mkPos x, r3)
 99 | 
100 | -- | The exception is thrown by 'mkPos' when its argument is not a positive
101 | -- number.
102 | --
103 | -- @since 5.0.0
104 | newtype InvalidPosException
105 |   = -- | Contains the actual value that was passed to 'mkPos'
106 |     InvalidPosException Int
107 |   deriving (Eq, Show, Data, Generic)
108 | 
109 | instance Exception InvalidPosException
110 | 
111 | instance NFData InvalidPosException
112 | 
113 | ----------------------------------------------------------------------------
114 | -- Source position
115 | 
116 | -- | The data type 'SourcePos' represents source positions. It contains the
117 | -- name of the source file, a line number, and a column number. Source line
118 | -- and column positions change intensively during parsing, so we need to
119 | -- make them strict to avoid memory leaks.
120 | data SourcePos = SourcePos
121 |   { -- | Name of source file
122 |     sourceName :: FilePath,
123 |     -- | Line number
124 |     sourceLine :: !Pos,
125 |     -- | Column number
126 |     sourceColumn :: !Pos
127 |   }
128 |   deriving (Show, Read, Eq, Ord, Data, Generic)
129 | 
130 | instance NFData SourcePos
131 | 
132 | -- | Construct initial position (line 1, column 1) given name of source
133 | -- file.
134 | initialPos :: FilePath -> SourcePos
135 | initialPos n = SourcePos n pos1 pos1
136 | 
137 | -- | Pretty-print a 'SourcePos'.
138 | --
139 | -- @since 5.0.0
140 | sourcePosPretty :: SourcePos -> String
141 | sourcePosPretty (SourcePos n l c)
142 |   | null n = showLC
143 |   | otherwise = n <> ":" <> showLC
144 |   where
145 |     showLC = show (unPos l) <> ":" <> show (unPos c)
146 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/State.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | {-# LANGUAGE DeriveGeneric #-}
  3 | {-# LANGUAGE FlexibleContexts #-}
  4 | {-# LANGUAGE Safe #-}
  5 | {-# LANGUAGE StandaloneDeriving #-}
  6 | {-# LANGUAGE UndecidableInstances #-}
  7 | 
  8 | -- |
  9 | -- Module      :  Text.Megaparsec.State
 10 | -- Copyright   :  © 2015–present Megaparsec contributors
 11 | --                © 2007 Paolo Martini
 12 | --                © 1999–2001 Daan Leijen
 13 | -- License     :  FreeBSD
 14 | --
 15 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 16 | -- Stability   :  experimental
 17 | -- Portability :  portable
 18 | --
 19 | -- Definition of Megaparsec's 'State'.
 20 | --
 21 | -- @since 6.5.0
 22 | module Text.Megaparsec.State
 23 |   ( State (..),
 24 |     initialState,
 25 |     PosState (..),
 26 |     initialPosState,
 27 |   )
 28 | where
 29 | 
 30 | import Control.DeepSeq (NFData)
 31 | import Data.Data (Data)
 32 | import GHC.Generics
 33 | import {-# SOURCE #-} Text.Megaparsec.Error (ParseError)
 34 | import Text.Megaparsec.Pos
 35 | 
 36 | -- | This is the Megaparsec's state parametrized over stream type @s@ and
 37 | -- custom error component type @e@.
 38 | data State s e = State
 39 |   { -- | The rest of input to process
 40 |     stateInput :: s,
 41 |     -- | Number of processed tokens so far
 42 |     --
 43 |     -- @since 7.0.0
 44 |     stateOffset :: {-# UNPACK #-} !Int,
 45 |     -- | State that is used for line\/column calculation
 46 |     --
 47 |     -- @since 7.0.0
 48 |     statePosState :: PosState s,
 49 |     -- | Collection of “delayed” 'ParseError's in reverse order. This means
 50 |     -- that the last registered error is the first element of the list.
 51 |     --
 52 |     -- @since 8.0.0
 53 |     stateParseErrors :: [ParseError s e]
 54 |   }
 55 |   deriving (Generic)
 56 | 
 57 | deriving instance
 58 |   ( Show (ParseError s e),
 59 |     Show s
 60 |   ) =>
 61 |   Show (State s e)
 62 | 
 63 | deriving instance
 64 |   ( Eq (ParseError s e),
 65 |     Eq s
 66 |   ) =>
 67 |   Eq (State s e)
 68 | 
 69 | deriving instance
 70 |   ( Data e,
 71 |     Data (ParseError s e),
 72 |     Data s
 73 |   ) =>
 74 |   Data (State s e)
 75 | 
 76 | instance (NFData s, NFData (ParseError s e)) => NFData (State s e)
 77 | 
 78 | -- | Given the name of the source file and the input construct the initial
 79 | -- state for a parser.
 80 | --
 81 | -- @since 9.6.0
 82 | initialState ::
 83 |   -- | Name of the file the input is coming from
 84 |   FilePath ->
 85 |   -- | Input
 86 |   s ->
 87 |   State s e
 88 | initialState name s =
 89 |   State
 90 |     { stateInput = s,
 91 |       stateOffset = 0,
 92 |       statePosState = initialPosState name s,
 93 |       stateParseErrors = []
 94 |     }
 95 | 
 96 | -- | A special kind of state that is used to calculate line\/column
 97 | -- positions on demand.
 98 | --
 99 | -- @since 7.0.0
100 | data PosState s = PosState
101 |   { -- | The rest of input to process
102 |     pstateInput :: s,
103 |     -- | Offset corresponding to beginning of 'pstateInput'
104 |     pstateOffset :: !Int,
105 |     -- | Source position corresponding to beginning of 'pstateInput'
106 |     pstateSourcePos :: !SourcePos,
107 |     -- | Tab width to use for column calculation
108 |     pstateTabWidth :: Pos,
109 |     -- | Prefix to prepend to offending line
110 |     pstateLinePrefix :: String
111 |   }
112 |   deriving (Show, Eq, Data, Generic)
113 | 
114 | instance (NFData s) => NFData (PosState s)
115 | 
116 | -- | Given the name of source file and the input construct the initial
117 | -- positional state.
118 | --
119 | -- @since 9.6.0
120 | initialPosState ::
121 |   -- | Name of the file the input is coming from
122 |   FilePath ->
123 |   -- | Input
124 |   s ->
125 |   PosState s
126 | initialPosState name s =
127 |   PosState
128 |     { pstateInput = s,
129 |       pstateOffset = 0,
130 |       pstateSourcePos = initialPos name,
131 |       pstateTabWidth = defaultTabWidth,
132 |       pstateLinePrefix = ""
133 |     }
134 | 


--------------------------------------------------------------------------------
/Text/Megaparsec/Unicode.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Safe #-}
  2 | 
  3 | -- |
  4 | -- Module      :  Text.Megaparsec.Unicode
  5 | -- Copyright   :  © 2024–present Megaparsec contributors
  6 | -- License     :  FreeBSD
  7 | --
  8 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
  9 | -- Stability   :  experimental
 10 | -- Portability :  portable
 11 | --
 12 | -- Utility functions for working with Unicode.
 13 | --
 14 | -- @since 9.7.0
 15 | module Text.Megaparsec.Unicode
 16 |   ( stringLength,
 17 |     charLength,
 18 |     isWideChar,
 19 |   )
 20 | where
 21 | 
 22 | import Data.Array (Array, bounds, listArray, (!))
 23 | import Data.Char (ord)
 24 | 
 25 | -- | Calculate length of a string taking into account the fact that certain
 26 | -- 'Char's may span more than 1 column.
 27 | --
 28 | -- @since 9.7.0
 29 | stringLength :: (Traversable t) => t Char -> Int
 30 | stringLength = sum . fmap charLength
 31 | 
 32 | -- | Return length of an individual 'Char'.
 33 | --
 34 | -- @since 9.7.0
 35 | charLength :: Char -> Int
 36 | charLength ch = if isWideChar ch then 2 else 1
 37 | 
 38 | -- | Determine whether the given 'Char' is “wide”, that is, whether it spans
 39 | -- 2 columns instead of one.
 40 | --
 41 | -- @since 9.7.0
 42 | isWideChar :: Char -> Bool
 43 | isWideChar c = go (bounds wideCharRanges)
 44 |   where
 45 |     go (lo, hi)
 46 |       | hi < lo = False
 47 |       | a <= n && n <= b = True
 48 |       | n < a = go (lo, pred mid)
 49 |       | otherwise = go (succ mid, hi)
 50 |       where
 51 |         mid = (lo + hi) `div` 2
 52 |         (a, b) = wideCharRanges ! mid
 53 |     n = ord c
 54 | 
 55 | -- | Wide character ranges.
 56 | wideCharRanges :: Array Int (Int, Int)
 57 | wideCharRanges =
 58 |   listArray
 59 |     (0, 118)
 60 |     [ (0x001100, 0x00115f),
 61 |       (0x00231a, 0x00231b),
 62 |       (0x002329, 0x00232a),
 63 |       (0x0023e9, 0x0023ec),
 64 |       (0x0023f0, 0x0023f0),
 65 |       (0x0023f3, 0x0023f3),
 66 |       (0x0025fd, 0x0025fe),
 67 |       (0x002614, 0x002615),
 68 |       (0x002648, 0x002653),
 69 |       (0x00267f, 0x00267f),
 70 |       (0x002693, 0x002693),
 71 |       (0x0026a1, 0x0026a1),
 72 |       (0x0026aa, 0x0026ab),
 73 |       (0x0026bd, 0x0026be),
 74 |       (0x0026c4, 0x0026c5),
 75 |       (0x0026ce, 0x0026ce),
 76 |       (0x0026d4, 0x0026d4),
 77 |       (0x0026ea, 0x0026ea),
 78 |       (0x0026f2, 0x0026f3),
 79 |       (0x0026f5, 0x0026f5),
 80 |       (0x0026fa, 0x0026fa),
 81 |       (0x0026fd, 0x0026fd),
 82 |       (0x002705, 0x002705),
 83 |       (0x00270a, 0x00270b),
 84 |       (0x002728, 0x002728),
 85 |       (0x00274c, 0x00274c),
 86 |       (0x00274e, 0x00274e),
 87 |       (0x002753, 0x002755),
 88 |       (0x002757, 0x002757),
 89 |       (0x002795, 0x002797),
 90 |       (0x0027b0, 0x0027b0),
 91 |       (0x0027bf, 0x0027bf),
 92 |       (0x002b1b, 0x002b1c),
 93 |       (0x002b50, 0x002b50),
 94 |       (0x002b55, 0x002b55),
 95 |       (0x002e80, 0x002e99),
 96 |       (0x002e9b, 0x002ef3),
 97 |       (0x002f00, 0x002fd5),
 98 |       (0x002ff0, 0x002ffb),
 99 |       (0x003000, 0x00303e),
100 |       (0x003041, 0x003096),
101 |       (0x003099, 0x0030ff),
102 |       (0x003105, 0x00312f),
103 |       (0x003131, 0x00318e),
104 |       (0x003190, 0x0031ba),
105 |       (0x0031c0, 0x0031e3),
106 |       (0x0031f0, 0x00321e),
107 |       (0x003220, 0x003247),
108 |       (0x003250, 0x004db5),
109 |       (0x004e00, 0x009fef),
110 |       (0x00a000, 0x00a48c),
111 |       (0x00a490, 0x00a4c6),
112 |       (0x00a960, 0x00a97c),
113 |       (0x00ac00, 0x00d7a3),
114 |       (0x00f900, 0x00fa6d),
115 |       (0x00fa70, 0x00fad9),
116 |       (0x00fe10, 0x00fe19),
117 |       (0x00fe30, 0x00fe52),
118 |       (0x00fe54, 0x00fe66),
119 |       (0x00fe68, 0x00fe6b),
120 |       (0x00ff01, 0x00ff60),
121 |       (0x00ffe0, 0x00ffe6),
122 |       (0x016fe0, 0x016fe3),
123 |       (0x017000, 0x0187f7),
124 |       (0x018800, 0x018af2),
125 |       (0x01b000, 0x01b11e),
126 |       (0x01b150, 0x01b152),
127 |       (0x01b164, 0x01b167),
128 |       (0x01b170, 0x01b2fb),
129 |       (0x01f004, 0x01f004),
130 |       (0x01f0cf, 0x01f0cf),
131 |       (0x01f18e, 0x01f18e),
132 |       (0x01f191, 0x01f19a),
133 |       (0x01f200, 0x01f202),
134 |       (0x01f210, 0x01f23b),
135 |       (0x01f240, 0x01f248),
136 |       (0x01f250, 0x01f251),
137 |       (0x01f260, 0x01f265),
138 |       (0x01f300, 0x01f320),
139 |       (0x01f32d, 0x01f335),
140 |       (0x01f337, 0x01f37c),
141 |       (0x01f37e, 0x01f393),
142 |       (0x01f3a0, 0x01f3ca),
143 |       (0x01f3cf, 0x01f3d3),
144 |       (0x01f3e0, 0x01f3f0),
145 |       (0x01f3f4, 0x01f3f4),
146 |       (0x01f3f8, 0x01f43e),
147 |       (0x01f440, 0x01f440),
148 |       (0x01f442, 0x01f4fc),
149 |       (0x01f4ff, 0x01f53d),
150 |       (0x01f54b, 0x01f54e),
151 |       (0x01f550, 0x01f567),
152 |       (0x01f57a, 0x01f57a),
153 |       (0x01f595, 0x01f596),
154 |       (0x01f5a4, 0x01f5a4),
155 |       (0x01f5fb, 0x01f64f),
156 |       (0x01f680, 0x01f6c5),
157 |       (0x01f6cc, 0x01f6cc),
158 |       (0x01f6d0, 0x01f6d2),
159 |       (0x01f6d5, 0x01f6d5),
160 |       (0x01f6eb, 0x01f6ec),
161 |       (0x01f6f4, 0x01f6fa),
162 |       (0x01f7e0, 0x01f7eb),
163 |       (0x01f90d, 0x01f971),
164 |       (0x01f973, 0x01f976),
165 |       (0x01f97a, 0x01f9a2),
166 |       (0x01f9a5, 0x01f9aa),
167 |       (0x01f9ae, 0x01f9ca),
168 |       (0x01f9cd, 0x01f9ff),
169 |       (0x01fa70, 0x01fa73),
170 |       (0x01fa78, 0x01fa7a),
171 |       (0x01fa80, 0x01fa82),
172 |       (0x01fa90, 0x01fa95),
173 |       (0x020000, 0x02a6d6),
174 |       (0x02a700, 0x02b734),
175 |       (0x02b740, 0x02b81d),
176 |       (0x02b820, 0x02cea1),
177 |       (0x02ceb0, 0x02ebe0),
178 |       (0x02f800, 0x02fa1d)
179 |     ]
180 | {-# NOINLINE wideCharRanges #-}
181 | 


--------------------------------------------------------------------------------
/bench/memory/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TypeFamilies #-}
  3 | 
  4 | module Main (main) where
  5 | 
  6 | import Control.DeepSeq
  7 | import Control.Monad
  8 | import Data.ByteString (ByteString)
  9 | import qualified Data.ByteString as B
 10 | import Data.List.NonEmpty (NonEmpty (..))
 11 | import qualified Data.List.NonEmpty as NE
 12 | import qualified Data.Set as E
 13 | import Data.Text (Text)
 14 | import qualified Data.Text as T
 15 | import Data.Void
 16 | import Text.Megaparsec
 17 | import qualified Text.Megaparsec.Byte.Binary as Binary
 18 | import Text.Megaparsec.Char
 19 | import qualified Text.Megaparsec.Char.Lexer as L
 20 | import Weigh
 21 | 
 22 | -- | The type of parser that consumes 'Text'.
 23 | type Parser = Parsec Void Text
 24 | 
 25 | -- | The type of parser that consumes 'ByteString'.
 26 | type ParserBs = Parsec Void ByteString
 27 | 
 28 | main :: IO ()
 29 | main = mainWith $ do
 30 |   setColumns [Case, Allocated, GCs, Max]
 31 |   bparser "string" manyAs (string . fst)
 32 |   bparser "string'" manyAs (string' . fst)
 33 |   bparser "many" manyAs (const $ many (char 'a'))
 34 |   bparser "some" manyAs (const $ some (char 'a'))
 35 |   bparser "choice" (const "b") (choice . fmap char . manyAsB' . snd)
 36 |   bparser "count" manyAs (\(_, n) -> count n (char 'a'))
 37 |   bparser "count'" manyAs (\(_, n) -> count' 1 n (char 'a'))
 38 |   bparser "endBy" manyAbs' (const $ endBy (char 'a') (char 'b'))
 39 |   bparser "endBy1" manyAbs' (const $ endBy1 (char 'a') (char 'b'))
 40 |   bparser "manyTill" manyAsB (const $ manyTill (char 'a') (char 'b'))
 41 |   bparser "someTill" manyAsB (const $ someTill (char 'a') (char 'b'))
 42 |   bparser "sepBy" manyAbs (const $ sepBy (char 'a') (char 'b'))
 43 |   bparser "sepBy1" manyAbs (const $ sepBy1 (char 'a') (char 'b'))
 44 |   bparser "sepEndBy" manyAbs' (const $ sepEndBy (char 'a') (char 'b'))
 45 |   bparser "sepEndBy1" manyAbs' (const $ sepEndBy1 (char 'a') (char 'b'))
 46 |   bparser "skipMany" manyAs (const $ skipMany (char 'a'))
 47 |   bparser "skipSome" manyAs (const $ skipSome (char 'a'))
 48 |   bparser "skipCount" manyAs (\(_, n) -> skipCount n (char 'a'))
 49 |   bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b'))
 50 |   bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b'))
 51 |   bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a'))
 52 |   bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a'))
 53 |   bparser "decimal" mkInt (const (L.decimal :: Parser Integer))
 54 |   bparser "octal" mkInt (const (L.octal :: Parser Integer))
 55 |   bparser "hexadecimal" mkInt (const (L.hexadecimal :: Parser Integer))
 56 |   bparser "scientific" mkInt (const L.scientific)
 57 |   bparserBs "word32be" many0x33 (const $ many Binary.word32be)
 58 |   bparserBs "word32le" many0x33 (const $ many Binary.word32le)
 59 | 
 60 |   forM_ stdSeries $ \n ->
 61 |     bbundle "single error" n [n]
 62 | 
 63 |   bbundle "2 errors" 1000 [1, 1000]
 64 |   bbundle "4 errors" 1000 [1, 500, 1000]
 65 |   bbundle "100 errors" 1000 [10, 20 .. 1000]
 66 | 
 67 |   breachOffset 0 1000
 68 |   breachOffset 0 2000
 69 |   breachOffset 0 4000
 70 |   breachOffset 1000 1000
 71 | 
 72 |   breachOffsetNoLine 0 1000
 73 |   breachOffsetNoLine 0 2000
 74 |   breachOffsetNoLine 0 4000
 75 |   breachOffsetNoLine 1000 1000
 76 | 
 77 | -- | Perform a series of measurements with the same parser.
 78 | bparser ::
 79 |   (NFData a) =>
 80 |   -- | Name of the benchmark group
 81 |   String ->
 82 |   -- | How to construct input
 83 |   (Int -> Text) ->
 84 |   -- | The parser receiving its future input
 85 |   ((Text, Int) -> Parser a) ->
 86 |   Weigh ()
 87 | bparser name f p = forM_ stdSeries $ \i -> do
 88 |   let arg = (f i, i)
 89 |       p' (s, n) = parse (p (s, n)) "" s
 90 |   func (name ++ "-" ++ show i) p' arg
 91 | 
 92 | -- | Perform a series of measurements with the same parser.
 93 | bparserBs ::
 94 |   (NFData a) =>
 95 |   -- | Name of the benchmark group
 96 |   String ->
 97 |   -- | How to construct input
 98 |   (Int -> ByteString) ->
 99 |   -- | The parser receiving its future input
100 |   ((ByteString, Int) -> ParserBs a) ->
101 |   Weigh ()
102 | bparserBs name f p = forM_ stdSeries $ \i -> do
103 |   let arg = (f i, i)
104 |       p' (s, n) = parse (p (s, n)) "" s
105 |   func (name ++ "-" ++ show i) p' arg
106 | 
107 | -- | Benchmark the 'errorBundlePretty' function.
108 | bbundle ::
109 |   -- | Name of the benchmark
110 |   String ->
111 |   -- | Number of lines in input stream
112 |   Int ->
113 |   -- | Lines with parse errors
114 |   [Int] ->
115 |   Weigh ()
116 | bbundle name totalLines sps = do
117 |   let s = take (totalLines * 80) (cycle as)
118 |       as = replicate 79 'a' ++ "\n"
119 |       f l =
120 |         TrivialError
121 |           (20 + l * 80)
122 |           (Just $ Tokens ('a' :| ""))
123 |           (E.singleton $ Tokens ('b' :| ""))
124 |       bundle :: ParseErrorBundle String Void
125 |       bundle =
126 |         ParseErrorBundle
127 |           { bundleErrors = f <$> NE.fromList sps,
128 |             bundlePosState =
129 |               PosState
130 |                 { pstateInput = s,
131 |                   pstateOffset = 0,
132 |                   pstateSourcePos = initialPos "",
133 |                   pstateTabWidth = defaultTabWidth,
134 |                   pstateLinePrefix = ""
135 |                 }
136 |           }
137 |   func
138 |     ("errorBundlePretty-" ++ show totalLines ++ "-" ++ name)
139 |     errorBundlePretty
140 |     bundle
141 | 
142 | -- | Benchmark the 'reachOffset' function.
143 | breachOffset ::
144 |   -- | Starting offset in 'PosState'
145 |   Int ->
146 |   -- | Offset to reach
147 |   Int ->
148 |   Weigh ()
149 | breachOffset o0 o1 =
150 |   func
151 |     ("reachOffset-" ++ show o0 ++ "-" ++ show o1)
152 |     f
153 |     (o0 * 80, o1 * 80)
154 |   where
155 |     f :: (Int, Int) -> PosState Text
156 |     f (startOffset, targetOffset) =
157 |       snd $
158 |         reachOffset
159 |           targetOffset
160 |           PosState
161 |             { pstateInput = manyAs (targetOffset - startOffset),
162 |               pstateOffset = startOffset,
163 |               pstateSourcePos = initialPos "",
164 |               pstateTabWidth = defaultTabWidth,
165 |               pstateLinePrefix = ""
166 |             }
167 | 
168 | -- | Benchmark the 'reachOffsetNoLine' function.
169 | breachOffsetNoLine ::
170 |   -- | Starting offset in 'PosState'
171 |   Int ->
172 |   -- | Offset to reach
173 |   Int ->
174 |   Weigh ()
175 | breachOffsetNoLine o0 o1 =
176 |   func
177 |     ("reachOffsetNoLine-" ++ show o0 ++ "-" ++ show o1)
178 |     f
179 |     (o0 * 80, o1 * 80)
180 |   where
181 |     f :: (Int, Int) -> PosState Text
182 |     f (startOffset, targetOffset) =
183 |       reachOffsetNoLine
184 |         targetOffset
185 |         PosState
186 |           { pstateInput = manyAs (targetOffset - startOffset),
187 |             pstateOffset = startOffset,
188 |             pstateSourcePos = initialPos "",
189 |             pstateTabWidth = defaultTabWidth,
190 |             pstateLinePrefix = ""
191 |           }
192 | 
193 | -- | The series of sizes to try as part of 'bparser'.
194 | stdSeries :: [Int]
195 | stdSeries = [500, 1000, 2000, 4000]
196 | 
197 | ----------------------------------------------------------------------------
198 | -- Helpers
199 | 
200 | -- | Generate that many \'a\' characters.
201 | manyAs :: Int -> Text
202 | manyAs n = T.replicate n "a"
203 | 
204 | -- | Like 'manyAs' but the result is a 'ByteString'.
205 | many0x33 :: Int -> ByteString
206 | many0x33 n = B.replicate n 0x33
207 | 
208 | -- | Like 'manyAs', but interspersed with \'b\'s.
209 | manyAbs :: Int -> Text
210 | manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab")
211 | 
212 | -- | Like 'manyAs', but with a \'b\' added to the end.
213 | manyAsB :: Int -> Text
214 | manyAsB n = manyAs n <> "b"
215 | 
216 | -- | Like 'manyAsB', but returns a 'String'.
217 | manyAsB' :: Int -> String
218 | manyAsB' n = replicate n 'a' ++ "b"
219 | 
220 | -- | Like 'manyAbs', but ends in a \'b\'.
221 | manyAbs' :: Int -> Text
222 | manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab")
223 | 
224 | -- | Render an 'Integer' with the number of digits linearly dependent on the
225 | -- argument.
226 | mkInt :: Int -> Text
227 | mkInt n = (T.pack . show) ((10 :: Integer) ^ (n `quot` 100))
228 | 


--------------------------------------------------------------------------------
/bench/speed/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TypeFamilies #-}
  3 | 
  4 | module Main (main) where
  5 | 
  6 | import Control.DeepSeq
  7 | import Criterion.Main
  8 | import Data.ByteString (ByteString)
  9 | import qualified Data.ByteString as B
 10 | import Data.List.NonEmpty (NonEmpty (..))
 11 | import qualified Data.List.NonEmpty as NE
 12 | import qualified Data.Set as E
 13 | import Data.Text (Text)
 14 | import qualified Data.Text as T
 15 | import Data.Void
 16 | import Text.Megaparsec
 17 | import qualified Text.Megaparsec.Byte.Binary as Binary
 18 | import Text.Megaparsec.Char
 19 | import qualified Text.Megaparsec.Char.Lexer as L
 20 | 
 21 | -- | The type of parser that consumes 'Text'.
 22 | type Parser = Parsec Void Text
 23 | 
 24 | -- | The type of parser that consumes 'ByteString'.
 25 | type ParserBs = Parsec Void ByteString
 26 | 
 27 | main :: IO ()
 28 | main =
 29 |   defaultMain
 30 |     [ bparser "string" manyAs (string . fst),
 31 |       bparser "string'" manyAs (string' . fst),
 32 |       bparser "many" manyAs (const $ many (char 'a')),
 33 |       bparser "some" manyAs (const $ some (char 'a')),
 34 |       bparser "choice" (const "b") (choice . fmap char . manyAsB' . snd),
 35 |       bparser "count" manyAs (\(_, n) -> count n (char 'a')),
 36 |       bparser "count'" manyAs (\(_, n) -> count' 1 n (char 'a')),
 37 |       bparser "endBy" manyAbs' (const $ endBy (char 'a') (char 'b')),
 38 |       bparser "endBy1" manyAbs' (const $ endBy1 (char 'a') (char 'b')),
 39 |       bparser "manyTill" manyAsB (const $ manyTill (char 'a') (char 'b')),
 40 |       bparser "someTill" manyAsB (const $ someTill (char 'a') (char 'b')),
 41 |       bparser "sepBy" manyAbs (const $ sepBy (char 'a') (char 'b')),
 42 |       bparser "sepBy1" manyAbs (const $ sepBy1 (char 'a') (char 'b')),
 43 |       bparser "sepEndBy" manyAbs' (const $ sepEndBy (char 'a') (char 'b')),
 44 |       bparser "sepEndBy1" manyAbs' (const $ sepEndBy1 (char 'a') (char 'b')),
 45 |       bparser "skipMany" manyAs (const $ skipMany (char 'a')),
 46 |       bparser "skipSome" manyAs (const $ skipSome (char 'a')),
 47 |       bparser "skipCount" manyAs (\(_, n) -> skipCount n (char 'a')),
 48 |       bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b')),
 49 |       bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b')),
 50 |       bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a')),
 51 |       bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a')),
 52 |       bparser "decimal" mkInt (const (L.decimal :: Parser Integer)),
 53 |       bparser "octal" mkInt (const (L.octal :: Parser Integer)),
 54 |       bparser "hexadecimal" mkInt (const (L.hexadecimal :: Parser Integer)),
 55 |       bparser "scientific" mkInt (const L.scientific),
 56 |       bparserBs "word32be" many0x33 (const $ many Binary.word32be),
 57 |       bparserBs "word32le" many0x33 (const $ many Binary.word32le),
 58 |       bgroup "" [bbundle "single error" n [n] | n <- stdSeries],
 59 |       bbundle "2 errors" 1000 [1, 1000],
 60 |       bbundle "4 errors" 1000 [1, 500, 1000],
 61 |       bbundle "100 errors" 1000 [10, 20 .. 1000],
 62 |       breachOffset 0 1000,
 63 |       breachOffset 0 2000,
 64 |       breachOffset 0 4000,
 65 |       breachOffset 1000 1000,
 66 |       breachOffsetNoLine 0 1000,
 67 |       breachOffsetNoLine 0 2000,
 68 |       breachOffsetNoLine 0 4000,
 69 |       breachOffsetNoLine 1000 1000
 70 |     ]
 71 | 
 72 | -- | Perform a series to measurements with the same parser.
 73 | bparser ::
 74 |   (NFData a) =>
 75 |   -- | Name of the benchmark group
 76 |   String ->
 77 |   -- | How to construct input
 78 |   (Int -> Text) ->
 79 |   -- | The parser receiving its future input
 80 |   ((Text, Int) -> Parser a) ->
 81 |   -- | The benchmark
 82 |   Benchmark
 83 | bparser name f p = bgroup name (bs <$> stdSeries)
 84 |   where
 85 |     bs n = env (return (f n, n)) (bench (show n) . nf p')
 86 |     p' (s, n) = parse (p (s, n)) "" s
 87 | 
 88 | -- | Perform a series to measurements with the same parser.
 89 | bparserBs ::
 90 |   (NFData a) =>
 91 |   -- | Name of the benchmark group
 92 |   String ->
 93 |   -- | How to construct input
 94 |   (Int -> ByteString) ->
 95 |   -- | The parser receiving its future input
 96 |   ((ByteString, Int) -> ParserBs a) ->
 97 |   -- | The benchmark
 98 |   Benchmark
 99 | bparserBs name f p = bgroup name (bs <$> stdSeries)
100 |   where
101 |     bs n = env (return (f n, n)) (bench (show n) . nf p')
102 |     p' (s, n) = parse (p (s, n)) "" s
103 | 
104 | -- | Benchmark the 'errorBundlePretty' function.
105 | bbundle ::
106 |   -- | Name of the benchmark
107 |   String ->
108 |   -- | Number of lines in input stream
109 |   Int ->
110 |   -- | Lines with parse errors
111 |   [Int] ->
112 |   Benchmark
113 | bbundle name totalLines sps =
114 |   let s = take (totalLines * 80) (cycle as)
115 |       as = replicate 79 'a' ++ "\n"
116 |       f l =
117 |         TrivialError
118 |           (20 + l * 80)
119 |           (Just $ Tokens ('a' :| ""))
120 |           (E.singleton $ Tokens ('b' :| ""))
121 |       bundle :: ParseErrorBundle String Void
122 |       bundle =
123 |         ParseErrorBundle
124 |           { bundleErrors = f <$> NE.fromList sps,
125 |             bundlePosState =
126 |               PosState
127 |                 { pstateInput = s,
128 |                   pstateOffset = 0,
129 |                   pstateSourcePos = initialPos "",
130 |                   pstateTabWidth = defaultTabWidth,
131 |                   pstateLinePrefix = ""
132 |                 }
133 |           }
134 |    in bench
135 |         ("errorBundlePretty-" ++ show totalLines ++ "-" ++ name)
136 |         (nf errorBundlePretty bundle)
137 | 
138 | -- | Benchmark the 'reachOffset' function.
139 | breachOffset ::
140 |   -- | Starting offset in 'PosState'
141 |   Int ->
142 |   -- | Offset to reach
143 |   Int ->
144 |   Benchmark
145 | breachOffset o0 o1 =
146 |   bench
147 |     ("reachOffset-" ++ show o0 ++ "-" ++ show o1)
148 |     (nf f (o0 * 80, o1 * 80))
149 |   where
150 |     f :: (Int, Int) -> PosState Text
151 |     f (startOffset, targetOffset) =
152 |       snd $
153 |         reachOffset
154 |           targetOffset
155 |           PosState
156 |             { pstateInput = manyAs (targetOffset - startOffset),
157 |               pstateOffset = startOffset,
158 |               pstateSourcePos = initialPos "",
159 |               pstateTabWidth = defaultTabWidth,
160 |               pstateLinePrefix = ""
161 |             }
162 | 
163 | -- | Benchmark the 'reachOffsetNoLine' function.
164 | breachOffsetNoLine ::
165 |   -- | Starting offset in 'PosState'
166 |   Int ->
167 |   -- | Offset to reach
168 |   Int ->
169 |   Benchmark
170 | breachOffsetNoLine o0 o1 =
171 |   bench
172 |     ("reachOffsetNoLine-" ++ show o0 ++ "-" ++ show o1)
173 |     (nf f (o0 * 80, o1 * 80))
174 |   where
175 |     f :: (Int, Int) -> PosState Text
176 |     f (startOffset, targetOffset) =
177 |       reachOffsetNoLine
178 |         targetOffset
179 |         PosState
180 |           { pstateInput = manyAs (targetOffset - startOffset),
181 |             pstateOffset = startOffset,
182 |             pstateSourcePos = initialPos "",
183 |             pstateTabWidth = defaultTabWidth,
184 |             pstateLinePrefix = ""
185 |           }
186 | 
187 | -- | The series of sizes to try as part of 'bparser'.
188 | stdSeries :: [Int]
189 | stdSeries = [500, 1000, 2000, 4000]
190 | 
191 | ----------------------------------------------------------------------------
192 | -- Helpers
193 | 
194 | -- | Generate that many \'a\' characters.
195 | manyAs :: Int -> Text
196 | manyAs n = T.replicate n "a"
197 | 
198 | -- | Like 'manyAs' but the result is a 'ByteString'.
199 | many0x33 :: Int -> ByteString
200 | many0x33 n = B.replicate n 0x33
201 | 
202 | -- | Like 'manyAs', but interspersed with \'b\'s.
203 | manyAbs :: Int -> Text
204 | manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab")
205 | 
206 | -- | Like 'manyAs', but with a \'b\' added to the end.
207 | manyAsB :: Int -> Text
208 | manyAsB n = manyAs n <> "b"
209 | 
210 | -- | Like 'manyAsB', but returns a 'String'.
211 | manyAsB' :: Int -> String
212 | manyAsB' n = replicate n 'a' ++ "b"
213 | 
214 | -- | Like 'manyAbs', but ends in a \'b\'.
215 | manyAbs' :: Int -> Text
216 | manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab")
217 | 
218 | -- | Render an 'Integer' with the number of digits linearly dependent on the
219 | -- argument.
220 | mkInt :: Int -> Text
221 | mkInt n = (T.pack . show) ((10 :: Integer) ^ (n `quot` 100))
222 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages: . megaparsec-tests
2 | tests: True
3 | benchmarks: True
4 | constraints: megaparsec +dev, megaparsec-tests +dev
5 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "flake-utils": {
 4 |       "locked": {
 5 |         "lastModified": 1667395993,
 6 |         "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
 7 |         "owner": "numtide",
 8 |         "repo": "flake-utils",
 9 |         "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
10 |         "type": "github"
11 |       },
12 |       "original": {
13 |         "owner": "numtide",
14 |         "repo": "flake-utils",
15 |         "type": "github"
16 |       }
17 |     },
18 |     "nixpkgs": {
19 |       "locked": {
20 |         "lastModified": 1668443372,
21 |         "narHash": "sha256-lXNlVyNWwO22/JUdBtUWz68jZB3DM+Jq/irlsbwncI0=",
22 |         "owner": "NixOS",
23 |         "repo": "nixpkgs",
24 |         "rev": "dad4de1694cd92d9a0e123bfdf134d0047b836a5",
25 |         "type": "github"
26 |       },
27 |       "original": {
28 |         "owner": "NixOS",
29 |         "ref": "nixpkgs-unstable",
30 |         "repo": "nixpkgs",
31 |         "type": "github"
32 |       }
33 |     },
34 |     "root": {
35 |       "inputs": {
36 |         "flake-utils": "flake-utils",
37 |         "nixpkgs": "nixpkgs"
38 |       }
39 |     }
40 |   },
41 |   "root": "root",
42 |   "version": 7
43 | }
44 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
  1 | {
  2 |   description = "Megaparsec Nix helpers";
  3 |   inputs = {
  4 |     nixpkgs = {
  5 |       type = "github";
  6 |       owner = "NixOS";
  7 |       repo = "nixpkgs";
  8 |       ref = "nixpkgs-unstable";
  9 |     };
 10 |     flake-utils = {
 11 |       type = "github";
 12 |       owner = "numtide";
 13 |       repo = "flake-utils";
 14 |     };
 15 |   };
 16 |   outputs = { self, nixpkgs, flake-utils }:
 17 |     let
 18 |       pkgs = import nixpkgs {
 19 |         system = "x86_64-linux";
 20 |         config.allowBroken = true;
 21 |       };
 22 |       ghc = "ghc924";
 23 | 
 24 |       megaparsecSource = pkgs.lib.sourceByRegex ./. [
 25 |         "^CHANGELOG\.md$"
 26 |         "^LICENSE\.md$"
 27 |         "^README\.md$"
 28 |         "^Text.*$"
 29 |         "^bench.*$"
 30 |         "^megaparsec\.cabal$"
 31 |       ];
 32 | 
 33 |       megaparsecTestsSource = pkgs.lib.sourceByRegex ./megaparsec-tests [
 34 |         "^LICENSE\.md$"
 35 |         "^README\.md$"
 36 |         "^megaparsec-tests\.cabal$"
 37 |         "^src.*$"
 38 |         "^tests.*$"
 39 |       ];
 40 | 
 41 |       parsersBenchSource = pkgs.lib.sourceByRegex ./parsers-bench [
 42 |         "^README\.md$"
 43 |         "^parsers-bench\.cabal$"
 44 |         "^ParsersBench.*$"
 45 |         "^bench.*$"
 46 |         "^data.*$"
 47 |       ];
 48 | 
 49 |       doBenchmark = p:
 50 |         let
 51 |           targets = [ "bench-speed" "bench-memory" ];
 52 |           copying = pkgs.lib.concatMapStrings
 53 |             (t: "cp dist/build/${t}/${t} $out/bench/\n")
 54 |             targets;
 55 |         in
 56 |         pkgs.haskell.lib.doBenchmark
 57 |           (p.overrideAttrs (drv: {
 58 |             postInstall = ''
 59 |               mkdir -p $out/bench
 60 |               if test -d data/
 61 |               then
 62 |                 mkdir -p $out/bench/data
 63 |                 cp data/* $out/bench/data/
 64 |               fi
 65 |               ${copying}
 66 |             '';
 67 |           }));
 68 | 
 69 |       doJailbreak = pkgs.haskell.lib.doJailbreak;
 70 | 
 71 |       patch = p: patch:
 72 |         pkgs.haskell.lib.appendPatch p patch;
 73 | 
 74 |       megaparsecOverlay = self: super: {
 75 |         "megaparsec" = doBenchmark
 76 |           (super.callCabal2nix "megaparsec" megaparsecSource { });
 77 |         "megaparsec-tests" =
 78 |           super.callCabal2nix "megaparsec-tests" megaparsecTestsSource { };
 79 |         # The ‘parser-combinators-tests’ package is a bit special because it
 80 |         # does not contain an executable nor a library, so its install phase
 81 |         # normally fails. We want to build it and run the tests anyway, so we
 82 |         # have to do these manipulations.
 83 |         "parser-combinators-tests" = pkgs.haskell.lib.dontHaddock
 84 |           (super.parser-combinators-tests.overrideAttrs (drv: {
 85 |             installPhase = "mkdir $out";
 86 |           }));
 87 |         "modern-uri" = doBenchmark super.modern-uri;
 88 |         "parsers-bench" = doBenchmark
 89 |           (super.callCabal2nix "parsers-bench" parsersBenchSource { });
 90 |         "mmark" = doBenchmark super.mmark;
 91 |       };
 92 | 
 93 |       updatedPkgs = pkgs // {
 94 |         haskell = pkgs.haskell // {
 95 |           packages = pkgs.haskell.packages // {
 96 |             "${ghc}" = pkgs.haskell.packages.${ghc}.override {
 97 |               overrides = megaparsecOverlay;
 98 |             };
 99 |           };
100 |         };
101 |       };
102 | 
103 |       haskellPackages = updatedPkgs.haskell.packages.${ghc};
104 | 
105 |       # Base: Megaparsec and its unit tests:
106 |       base = {
107 |         inherit (haskellPackages)
108 |           hspec-megaparsec
109 |           megaparsec
110 |           megaparsec-tests
111 |           parser-combinators-tests;
112 |       };
113 | 
114 |       # Dependent packages of interest:
115 |       deps = {
116 |         inherit (haskellPackages)
117 |           cachix
118 |           cassava-megaparsec
119 |           cue-sheet
120 |           dhall
121 |           hledger
122 |           idris
123 |           mmark
124 |           modern-uri
125 |           replace-megaparsec
126 |           stache
127 |           tomland;
128 |       };
129 | 
130 |       # Benchmarks:
131 |       benches = {
132 |         inherit (haskellPackages)
133 |           megaparsec
134 |           mmark
135 |           modern-uri
136 |           parsers-bench;
137 |       };
138 | 
139 |       # Source distributions:
140 |       dist = with pkgs.haskell.lib; {
141 |         megaparsec = sdistTarball haskellPackages.megaparsec;
142 |         megaparsec-tests = sdistTarball haskellPackages.megaparsec-tests;
143 |       };
144 | 
145 |     in
146 |     flake-utils.lib.eachDefaultSystem (system:
147 |       {
148 |         packages = flake-utils.lib.flattenTree {
149 |           base = pkgs.recurseIntoAttrs base;
150 |           all_base = pkgs.linkFarmFromDrvs "base" (builtins.attrValues base);
151 |           deps = pkgs.recurseIntoAttrs deps;
152 |           all_deps = pkgs.linkFarmFromDrvs "deps" (builtins.attrValues deps);
153 |           benches = pkgs.recurseIntoAttrs benches;
154 |           all_benches = pkgs.linkFarmFromDrvs "benches" (builtins.attrValues benches);
155 |           dist = pkgs.recurseIntoAttrs dist;
156 |           all_dist = pkgs.linkFarmFromDrvs "dist" (builtins.attrValues dist);
157 |         };
158 |         defaultPackage = base.megaparsec;
159 |         devShells.default = haskellPackages.shellFor {
160 |           packages = ps: [
161 |             ps.megaparsec
162 |             ps.megaparsec-tests
163 |           ];
164 |           buildInputs = with haskellPackages; [
165 |             cabal-install
166 |             ghcid
167 |           ];
168 |         };
169 |       });
170 | }
171 | 


--------------------------------------------------------------------------------
/megaparsec-tests/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright © 2015–present Megaparsec contributors
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | * Redistributions of source code must retain the above copyright notice,
 9 |   this list of conditions and the following disclaimer.
10 | 
11 | * Redistributions in binary form must reproduce the above copyright notice,
12 |   this list of conditions and the following disclaimer in the documentation
13 |   and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
16 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
18 | NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/megaparsec-tests/README.md:
--------------------------------------------------------------------------------
 1 | # Megaparsec tests
 2 | 
 3 | Megaparsec's test suite as a standalone package. The reason for the
 4 | separation is that we can avoid circular dependency on `hspec-megaparsec`
 5 | and thus avoid keeping copies of its source files in our test suite, as we
 6 | had to do before. Another benefit is that we can export some auxiliary
 7 | functions in `megaparsec-tests` which can be used by other test suites, for
 8 | example in the `parser-combinators-tests` package.
 9 | 
10 | The version number of `megaparsec-tests` will be kept in sync with versions
11 | of `megaparsec` from now on.
12 | 
13 | ## License
14 | 
15 | Copyright © 2015–present Megaparsec contributors
16 | 
17 | Distributed under FreeBSD license.
18 | 


--------------------------------------------------------------------------------
/megaparsec-tests/megaparsec-tests.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version:   2.4
 2 | name:            megaparsec-tests
 3 | version:         9.7.0
 4 | license:         BSD-2-Clause
 5 | license-file:    LICENSE.md
 6 | maintainer:      Mark Karpov <markkarpov92@gmail.com>
 7 | author:          Megaparsec contributors
 8 | tested-with:     ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1
 9 | homepage:        https://github.com/mrkkrp/megaparsec
10 | bug-reports:     https://github.com/mrkkrp/megaparsec/issues
11 | synopsis:        Test utilities and the test suite of Megaparsec
12 | description:     Test utilities and the test suite of Megaparsec.
13 | category:        Parsing
14 | build-type:      Simple
15 | extra-doc-files: README.md
16 | 
17 | flag dev
18 |     description: Turn on development settings.
19 |     default:     False
20 |     manual:      True
21 | 
22 | library
23 |     exposed-modules:  Test.Hspec.Megaparsec.AdHoc
24 |     hs-source-dirs:   src
25 |     default-language: Haskell2010
26 |     build-depends:
27 |         QuickCheck >=2.10 && <2.16,
28 |         base >=4.15 && <5,
29 |         bytestring >=0.2 && <0.13,
30 |         containers >=0.5 && <0.8,
31 |         hspec >=2 && <3,
32 |         hspec-megaparsec >=2 && <3,
33 |         megaparsec ==9.7.0,
34 |         mtl >=2.2.2 && <3,
35 |         text >=0.2 && <2.2,
36 |         transformers >=0.4 && <0.7
37 | 
38 |     if flag(dev)
39 |         ghc-options:
40 |             -Wall -Werror -Wredundant-constraints -Wpartial-fields
41 |             -Wunused-packages
42 | 
43 |     else
44 |         ghc-options: -O2 -Wall
45 | 
46 | test-suite tests
47 |     type:               exitcode-stdio-1.0
48 |     main-is:            Spec.hs
49 |     build-tool-depends: hspec-discover:hspec-discover >=2.0 && <3.0
50 |     hs-source-dirs:     tests
51 |     other-modules:
52 |         Text.Megaparsec.Byte.BinarySpec
53 |         Text.Megaparsec.Byte.LexerSpec
54 |         Text.Megaparsec.ByteSpec
55 |         Text.Megaparsec.Char.LexerSpec
56 |         Text.Megaparsec.CharSpec
57 |         Text.Megaparsec.DebugSpec
58 |         Text.Megaparsec.ErrorSpec
59 |         Text.Megaparsec.PosSpec
60 |         Text.Megaparsec.StreamSpec
61 |         Text.Megaparsec.UnicodeSpec
62 |         Text.MegaparsecSpec
63 | 
64 |     default-language:   Haskell2010
65 |     build-depends:
66 |         QuickCheck >=2.10 && <2.16,
67 |         base >=4.15 && <5,
68 |         bytestring >=0.2 && <0.13,
69 |         case-insensitive >=1.2 && <1.3,
70 |         containers >=0.5 && <0.8,
71 |         hspec >=2 && <3,
72 |         hspec-megaparsec >=2 && <3,
73 |         megaparsec ==9.7.0,
74 |         megaparsec-tests,
75 |         mtl >=2.2.2 && <3,
76 |         scientific >=0.3.1 && <0.4,
77 |         temporary >=1.1 && <1.4,
78 |         text >=0.2 && <2.2,
79 |         transformers >=0.4 && <0.7
80 | 
81 |     if flag(dev)
82 |         ghc-options:
83 |             -Wall -Werror -Wredundant-constraints -Wpartial-fields
84 |             -Wunused-packages -Wno-unused-imports
85 | 
86 |     else
87 |         ghc-options: -O2 -Wall
88 | 
89 |     if impl(ghc >=9.8)
90 |         ghc-options: -Wno-x-partial
91 | 


--------------------------------------------------------------------------------
/megaparsec-tests/src/Test/Hspec/Megaparsec/AdHoc.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE RankNTypes #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE TypeFamilies #-}
  5 | {-# LANGUAGE UndecidableInstances #-}
  6 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  7 | 
  8 | -- |
  9 | -- Module      :  Test.Hspec.Megaparsec.AdHoc
 10 | -- Copyright   :  © 2019–present Megaparsec contributors
 11 | -- License     :  FreeBSD
 12 | --
 13 | -- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
 14 | -- Stability   :  experimental
 15 | -- Portability :  portable
 16 | --
 17 | -- Ad-hoc helpers used in Megaparsec's test suite.
 18 | module Test.Hspec.Megaparsec.AdHoc
 19 |   ( -- * Types
 20 |     Parser,
 21 | 
 22 |     -- * Helpers to run parsers
 23 |     prs,
 24 |     prs',
 25 |     prs_,
 26 |     grs,
 27 |     grs',
 28 | 
 29 |     -- * Other
 30 |     nes,
 31 |     abcRow,
 32 |     rightOrder,
 33 |     scaleDown,
 34 |     getTabWidth,
 35 |     setTabWidth,
 36 |     strSourcePos,
 37 | 
 38 |     -- * Char and byte conversion
 39 |     toChar,
 40 |     fromChar,
 41 | 
 42 |     -- * Proxies
 43 |     sproxy,
 44 |     bproxy,
 45 |     blproxy,
 46 |     tproxy,
 47 |     tlproxy,
 48 |   )
 49 | where
 50 | 
 51 | import qualified Control.Monad.RWS.Lazy as L
 52 | import qualified Control.Monad.RWS.Strict as S
 53 | import Control.Monad.Reader
 54 | import qualified Control.Monad.State.Lazy as L
 55 | import qualified Control.Monad.State.Strict as S
 56 | import Control.Monad.Trans.Identity
 57 | import qualified Control.Monad.Writer.Lazy as L
 58 | import qualified Control.Monad.Writer.Strict as S
 59 | import qualified Data.ByteString as B
 60 | import qualified Data.ByteString.Lazy as BL
 61 | import Data.Char (chr, ord)
 62 | import Data.List.NonEmpty (NonEmpty (..))
 63 | import qualified Data.List.NonEmpty as NE
 64 | import Data.Proxy
 65 | import qualified Data.Set as E
 66 | import qualified Data.Text as T
 67 | import qualified Data.Text.Lazy as TL
 68 | import Data.Void
 69 | import Data.Word (Word8)
 70 | import Test.Hspec
 71 | import Test.Hspec.Megaparsec
 72 | import Test.QuickCheck
 73 | import Text.Megaparsec
 74 | import Text.Megaparsec.Debug (MonadParsecDbg)
 75 | 
 76 | ----------------------------------------------------------------------------
 77 | -- Types
 78 | 
 79 | -- | The type of parser that consumes a 'String'.
 80 | type Parser = Parsec Void String
 81 | 
 82 | ----------------------------------------------------------------------------
 83 | -- Helpers to run parsers
 84 | 
 85 | -- | Apply the parser to the given input. This is a specialized version of
 86 | -- 'parse' that assumes empty file name.
 87 | prs ::
 88 |   -- | Parser to run
 89 |   Parser a ->
 90 |   -- | Input for the parser
 91 |   String ->
 92 |   -- | Result of parsing
 93 |   Either (ParseErrorBundle String Void) a
 94 | prs p = parse p ""
 95 | 
 96 | -- | Just like 'prs', but allows us to inspect the final state of the
 97 | -- parser.
 98 | prs' ::
 99 |   -- | Parser to run
100 |   Parser a ->
101 |   -- | Input for the parser
102 |   String ->
103 |   -- | Result of parsing
104 |   (State String Void, Either (ParseErrorBundle String Void) a)
105 | prs' p s = runParser' p (initialState s)
106 | 
107 | -- | Just like 'prs', but forces the parser to consume all input by adding
108 | -- 'eof':
109 | --
110 | -- > prs_ p = parse (p <* eof) ""
111 | prs_ ::
112 |   -- | Parser to run
113 |   Parser a ->
114 |   -- | Input for the parser
115 |   String ->
116 |   -- | Result of parsing
117 |   Either (ParseErrorBundle String Void) a
118 | prs_ p = parse (p <* eof) ""
119 | 
120 | -- | Just like 'prs', but interprets given parser as various monads (tries
121 | -- all supported monads transformers in turn).
122 | grs ::
123 |   -- | Parser to run
124 |   (forall m. (MonadParsecDbg Void String m) => m a) ->
125 |   -- | Input for the parser
126 |   String ->
127 |   -- | How to check result of parsing
128 |   (Either (ParseErrorBundle String Void) a -> Expectation) ->
129 |   Expectation
130 | grs p s r = do
131 |   r (prs p s)
132 |   r (prs (runIdentityT p) s)
133 |   r (prs (runReaderT p ()) s)
134 |   r (prs (L.evalStateT p ()) s)
135 |   r (prs (S.evalStateT p ()) s)
136 |   r (prs (evalWriterTL p) s)
137 |   r (prs (evalWriterTS p) s)
138 |   r (prs (evalRWSTL p) s)
139 |   r (prs (evalRWSTS p) s)
140 | 
141 | -- | 'grs'' to 'grs' is as 'prs'' to 'prs'.
142 | grs' ::
143 |   -- | Parser to run
144 |   (forall m. (MonadParsecDbg Void String m) => m a) ->
145 |   -- | Input for the parser
146 |   String ->
147 |   -- | How to check result of parsing
148 |   ((State String Void, Either (ParseErrorBundle String Void) a) -> Expectation) ->
149 |   Expectation
150 | grs' p s r = do
151 |   r (prs' p s)
152 |   r (prs' (runIdentityT p) s)
153 |   r (prs' (runReaderT p ()) s)
154 |   r (prs' (L.evalStateT p ()) s)
155 |   r (prs' (S.evalStateT p ()) s)
156 |   r (prs' (evalWriterTL p) s)
157 |   r (prs' (evalWriterTS p) s)
158 |   r (prs' (evalRWSTL p) s)
159 |   r (prs' (evalRWSTS p) s)
160 | 
161 | evalWriterTL :: (Monad m) => L.WriterT [Int] m a -> m a
162 | evalWriterTL = fmap fst . L.runWriterT
163 | 
164 | evalWriterTS :: (Monad m) => S.WriterT [Int] m a -> m a
165 | evalWriterTS = fmap fst . S.runWriterT
166 | 
167 | evalRWSTL :: (Monad m) => L.RWST () [Int] () m a -> m a
168 | evalRWSTL m = do
169 |   (a, _, _) <- L.runRWST m () ()
170 |   return a
171 | 
172 | evalRWSTS :: (Monad m) => S.RWST () [Int] () m a -> m a
173 | evalRWSTS m = do
174 |   (a, _, _) <- S.runRWST m () ()
175 |   return a
176 | 
177 | ----------------------------------------------------------------------------
178 | -- Other
179 | 
180 | -- | Make a singleton non-empty list from a value.
181 | nes :: a -> NonEmpty a
182 | nes x = x :| []
183 | 
184 | -- | @abcRow a b c@ generates string consisting of character “a” repeated
185 | -- @a@ times, character “b” repeated @b@ times, and character “c” repeated
186 | -- @c@ times.
187 | abcRow :: Int -> Int -> Int -> String
188 | abcRow a b c = replicate a 'a' ++ replicate b 'b' ++ replicate c 'c'
189 | 
190 | -- | Check that the given parser returns the list in the right order.
191 | rightOrder ::
192 |   -- | The parser to test
193 |   Parser String ->
194 |   -- | Input for the parser
195 |   String ->
196 |   -- | Expected result
197 |   String ->
198 |   Spec
199 | rightOrder p s s' =
200 |   it "produces the list in the right order" $
201 |     prs_ p s `shouldParse` s'
202 | 
203 | -- | Get tab width from 'PosState'. Use with care only for testing.
204 | getTabWidth :: (MonadParsec e s m) => m Pos
205 | getTabWidth = pstateTabWidth . statePosState <$> getParserState
206 | 
207 | -- | Set tab width in 'PosState'. Use with care only for testing.
208 | setTabWidth :: (MonadParsec e s m) => Pos -> m ()
209 | setTabWidth w = updateParserState $ \st ->
210 |   let pst = statePosState st
211 |    in st {statePosState = pst {pstateTabWidth = w}}
212 | 
213 | -- | Scale down.
214 | scaleDown :: Gen a -> Gen a
215 | scaleDown = scale (`div` 4)
216 | 
217 | -- | A helper function that is used to advance 'SourcePos' given a 'String'.
218 | strSourcePos :: Pos -> SourcePos -> String -> SourcePos
219 | strSourcePos tabWidth ipos input =
220 |   let (_, pst') = reachOffset maxBound pstate in pstateSourcePos pst'
221 |   where
222 |     pstate =
223 |       PosState
224 |         { pstateInput = input,
225 |           pstateOffset = 0,
226 |           pstateSourcePos = ipos,
227 |           pstateTabWidth = tabWidth,
228 |           pstateLinePrefix = ""
229 |         }
230 | 
231 | ----------------------------------------------------------------------------
232 | -- Char and byte conversion
233 | 
234 | -- | Convert a byte to char.
235 | toChar :: Word8 -> Char
236 | toChar = chr . fromIntegral
237 | 
238 | -- | Covert a char to byte.
239 | fromChar :: Char -> Maybe Word8
240 | fromChar x =
241 |   let p = ord x
242 |    in if p > 0xff
243 |         then Nothing
244 |         else Just (fromIntegral p)
245 | 
246 | ----------------------------------------------------------------------------
247 | -- Proxies
248 | 
249 | sproxy :: Proxy String
250 | sproxy = Proxy
251 | 
252 | bproxy :: Proxy B.ByteString
253 | bproxy = Proxy
254 | 
255 | blproxy :: Proxy BL.ByteString
256 | blproxy = Proxy
257 | 
258 | tproxy :: Proxy T.Text
259 | tproxy = Proxy
260 | 
261 | tlproxy :: Proxy TL.Text
262 | tlproxy = Proxy
263 | 
264 | ----------------------------------------------------------------------------
265 | -- Arbitrary instances
266 | 
267 | instance Arbitrary Void where
268 |   arbitrary = error "Arbitrary Void"
269 | 
270 | instance Arbitrary Pos where
271 |   arbitrary = mkPos <$> (getSmall . getPositive <$> arbitrary)
272 | 
273 | instance Arbitrary SourcePos where
274 |   arbitrary =
275 |     SourcePos
276 |       <$> scaleDown arbitrary
277 |       <*> arbitrary
278 |       <*> arbitrary
279 | 
280 | instance (Arbitrary t) => Arbitrary (ErrorItem t) where
281 |   arbitrary =
282 |     oneof
283 |       [ Tokens <$> (NE.fromList . getNonEmpty <$> arbitrary),
284 |         Label <$> (NE.fromList . getNonEmpty <$> arbitrary),
285 |         return EndOfInput
286 |       ]
287 | 
288 | instance Arbitrary (ErrorFancy a) where
289 |   arbitrary =
290 |     oneof
291 |       [ ErrorFail <$> scaleDown arbitrary,
292 |         ErrorIndentation <$> arbitrary <*> arbitrary <*> arbitrary
293 |       ]
294 | 
295 | instance
296 |   (Arbitrary (Token s), Ord (Token s), Arbitrary e, Ord e) =>
297 |   Arbitrary (ParseError s e)
298 |   where
299 |   arbitrary =
300 |     oneof
301 |       [ TrivialError
302 |           <$> (getNonNegative <$> arbitrary)
303 |           <*> arbitrary
304 |           <*> (E.fromList <$> scaleDown arbitrary),
305 |         FancyError
306 |           <$> (getNonNegative <$> arbitrary)
307 |           <*> (E.fromList <$> scaleDown arbitrary)
308 |       ]
309 | 
310 | instance (Arbitrary s) => Arbitrary (State s e) where
311 |   arbitrary = do
312 |     input <- scaleDown arbitrary
313 |     offset <- choose (1, 10000)
314 |     pstate :: PosState s <- arbitrary
315 |     return
316 |       State
317 |         { stateInput = input,
318 |           stateOffset = offset,
319 |           statePosState =
320 |             pstate
321 |               { pstateInput = input,
322 |                 pstateOffset = offset
323 |               },
324 |           stateParseErrors = []
325 |         }
326 | 
327 | instance (Arbitrary s) => Arbitrary (PosState s) where
328 |   arbitrary =
329 |     PosState
330 |       <$> arbitrary
331 |       <*> choose (1, 10000)
332 |       <*> arbitrary
333 |       <*> (mkPos <$> choose (1, 20))
334 |       <*> scaleDown arbitrary
335 | 
336 | instance Arbitrary T.Text where
337 |   arbitrary = T.pack <$> arbitrary
338 | 
339 | instance Arbitrary TL.Text where
340 |   arbitrary = TL.pack <$> arbitrary
341 | 
342 | instance Arbitrary B.ByteString where
343 |   arbitrary = B.pack <$> arbitrary
344 | 
345 | instance Arbitrary BL.ByteString where
346 |   arbitrary = BL.pack <$> arbitrary
347 | 
348 | instance (Arbitrary a) => Arbitrary (NonEmpty a) where
349 |   arbitrary = NE.fromList <$> (arbitrary `suchThat` (not . null))
350 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Spec.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-}
2 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/Byte/BinarySpec.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | {-# LANGUAGE RankNTypes #-}
 4 | {-# LANGUAGE TypeApplications #-}
 5 | 
 6 | module Text.Megaparsec.Byte.BinarySpec (spec) where
 7 | 
 8 | import qualified Data.ByteString.Builder as BB
 9 | import qualified Data.ByteString.Lazy as BL
10 | import Data.Void
11 | import Test.Hspec
12 | import Test.Hspec.Megaparsec
13 | import Test.QuickCheck
14 | import Text.Megaparsec
15 | import Text.Megaparsec.Byte.Binary
16 | 
17 | spec :: Spec
18 | spec = do
19 |   describe "word8" $
20 |     testParser word8 BB.word8
21 |   describe "word16le" $
22 |     testParser word16le BB.word16LE
23 |   describe "word16be" $
24 |     testParser word16be BB.word16BE
25 |   describe "word32le" $
26 |     testParser word32le BB.word32LE
27 |   describe "word32be" $
28 |     testParser word32be BB.word32BE
29 |   describe "word64le" $
30 |     testParser word64le BB.word64LE
31 |   describe "word64be" $
32 |     testParser word64be BB.word64BE
33 |   describe "int8" $
34 |     testParser int8 BB.int8
35 |   describe "int16le" $
36 |     testParser int16le BB.int16LE
37 |   describe "int16be" $
38 |     testParser int16be BB.int16BE
39 |   describe "int32le" $ do
40 |     testParser int32le BB.int32LE
41 |   describe "int32be" $ do
42 |     testParser int32be BB.int32BE
43 |   describe "int64le" $ do
44 |     testParser int64le BB.int64LE
45 |   describe "int64be" $ do
46 |     testParser int64be BB.int64BE
47 | 
48 | ----------------------------------------------------------------------------
49 | -- Helpers
50 | 
51 | -- | Test a binary parser.
52 | testParser ::
53 |   (Arbitrary a, Show a, Eq a) =>
54 |   -- | The parser to test
55 |   (forall s. (Stream s, BinaryChunk (Tokens s)) => Parsec Void s a) ->
56 |   -- | Builder for the values that the parer consumes
57 |   (a -> BB.Builder) ->
58 |   SpecWith ()
59 | testParser parser serializer = do
60 |   it "works with strict ByteString" $
61 |     property $ \x -> do
62 |       let rendered = (BL.toStrict . BB.toLazyByteString . serializer) x
63 |       parse (parser <* eof) "" rendered `shouldParse` x
64 |   it "works with lazy ByteString" $
65 |     property $ \x -> do
66 |       let rendered = (BB.toLazyByteString . serializer) x
67 |       parse (parser <* eof) "" rendered `shouldParse` x
68 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/Byte/LexerSpec.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | module Text.Megaparsec.Byte.LexerSpec (spec) where
  4 | 
  5 | import Control.Applicative
  6 | import Data.ByteString (ByteString)
  7 | import qualified Data.ByteString as B
  8 | import qualified Data.ByteString.Char8 as B8
  9 | import Data.Char (intToDigit, toUpper)
 10 | import Data.Scientific (Scientific, fromFloatDigits)
 11 | import Data.Void
 12 | import Data.Word (Word8)
 13 | import Numeric (showFFloatAlt, showHex, showInt, showIntAtBase, showOct)
 14 | import Test.Hspec
 15 | import Test.Hspec.Megaparsec
 16 | import Test.QuickCheck
 17 | import Text.Megaparsec
 18 | import qualified Text.Megaparsec.Byte as B
 19 | import Text.Megaparsec.Byte.Lexer
 20 | 
 21 | type Parser = Parsec Void ByteString
 22 | 
 23 | spec :: Spec
 24 | spec = do
 25 |   describe "skipLineComment" $ do
 26 |     context "when there is no newline at the end of line" $
 27 |       it "is picked up successfully" $ do
 28 |         let p = space B.space1 (skipLineComment "//") empty <* eof
 29 |             s = "  // this line comment doesn't have a newline at the end "
 30 |         prs p s `shouldParse` ()
 31 |         prs' p s `succeedsLeaving` ""
 32 |     it "inner characters are labelled properly" $ do
 33 |       let p = skipLineComment "//" <* empty
 34 |           s = "// here we go"
 35 |       prs p s `shouldFailWith` err (B.length s) (elabel "character")
 36 |       prs' p s `failsLeaving` ""
 37 | 
 38 |   describe "skipBlockComment" $
 39 |     it "skips a simple block comment" $ do
 40 |       let p = skipBlockComment "/*" "*/"
 41 |           s = "/* here we go */foo!"
 42 |       prs p s `shouldParse` ()
 43 |       prs' p s `succeedsLeaving` "foo!"
 44 | 
 45 |   describe "skipBlockCommentNested" $
 46 |     context "when it runs into nested block comments" $
 47 |       it "parses them all right" $ do
 48 |         let p =
 49 |               space
 50 |                 B.space1
 51 |                 empty
 52 |                 (skipBlockCommentNested "/*" "*/")
 53 |                 <* eof
 54 |             s = " /* foo bar /* baz */ quux */ "
 55 |         prs p s `shouldParse` ()
 56 |         prs' p s `succeedsLeaving` ""
 57 | 
 58 |   describe "decimal" $ do
 59 |     context "when stream begins with decimal digits" $
 60 |       it "they are parsed as an integer" $
 61 |         property $ \n' -> do
 62 |           let p = decimal :: Parser Integer
 63 |               n = getNonNegative n'
 64 |               s = B8.pack (showInt n "")
 65 |           prs p s `shouldParse` n
 66 |           prs' p s `succeedsLeaving` ""
 67 |     context "when stream does not begin with decimal digits" $
 68 |       it "signals correct parse error" $
 69 |         property $ \a as ->
 70 |           not (isDigit a) ==> do
 71 |             let p = decimal :: Parser Integer
 72 |                 s = B.pack (a : as)
 73 |             prs p s `shouldFailWith` err 0 (utok a <> elabel "integer")
 74 |     context "when stream is empty" $
 75 |       it "signals correct parse error" $
 76 |         prs (decimal :: Parser Integer) ""
 77 |           `shouldFailWith` err 0 (ueof <> elabel "integer")
 78 | 
 79 |   describe "binary" $ do
 80 |     context "when stream begins with binary digits" $
 81 |       it "they are parsed as an integer" $
 82 |         property $ \n' -> do
 83 |           let p = binary :: Parser Integer
 84 |               n = getNonNegative n'
 85 |               s = B8.pack (showIntAtBase 2 intToDigit n "")
 86 |           prs p s `shouldParse` n
 87 |           prs' p s `succeedsLeaving` ""
 88 |     context "when stream does not begin with binary digits" $
 89 |       it "signals correct parse error" $
 90 |         property $ \a as ->
 91 |           a /= 48 && a /= 49 ==> do
 92 |             let p = binary :: Parser Integer
 93 |                 s = B.pack (a : as)
 94 |             prs p s
 95 |               `shouldFailWith` err 0 (utok a <> elabel "binary integer")
 96 |     context "when stream is empty" $
 97 |       it "signals correct parse error" $
 98 |         prs (binary :: Parser Integer) ""
 99 |           `shouldFailWith` err 0 (ueof <> elabel "binary integer")
100 | 
101 |   describe "octal" $ do
102 |     context "when stream begins with octal digits" $
103 |       it "they are parsed as an integer" $
104 |         property $ \n' -> do
105 |           let p = octal :: Parser Integer
106 |               n = getNonNegative n'
107 |               s = B8.pack (showOct n "")
108 |           prs p s `shouldParse` n
109 |           prs' p s `succeedsLeaving` ""
110 |     context "when stream does not begin with octal digits" $
111 |       it "signals correct parse error" $
112 |         property $ \a as ->
113 |           not (isOctDigit a) ==> do
114 |             let p = octal :: Parser Integer
115 |                 s = B.pack (a : as)
116 |             prs p s
117 |               `shouldFailWith` err 0 (utok a <> elabel "octal integer")
118 |     context "when stream is empty" $
119 |       it "signals correct parse error" $
120 |         prs (octal :: Parser Integer) ""
121 |           `shouldFailWith` err 0 (ueof <> elabel "octal integer")
122 | 
123 |   describe "hexadecimal" $ do
124 |     context "when stream begins with hexadecimal digits" $
125 |       it "they are parsed as an integer" $
126 |         property $ \n' -> do
127 |           let p = hexadecimal :: Parser Integer
128 |               n = getNonNegative n'
129 |               s = B8.pack (showHex n "")
130 |           prs p s `shouldParse` n
131 |           prs' p s `succeedsLeaving` ""
132 |     context "when stream begins with hexadecimal digits (uppercase)" $
133 |       it "they are parsed as an integer" $
134 |         property $ \n' -> do
135 |           let p = hexadecimal :: Parser Integer
136 |               n = getNonNegative n'
137 |               s = B8.pack (toUpper <$> showHex n "")
138 |           prs p s `shouldParse` n
139 |           prs' p s `succeedsLeaving` ""
140 |     context "when stream does not begin with hexadecimal digits" $
141 |       it "signals correct parse error" $
142 |         property $ \a as ->
143 |           not (isHexDigit a) ==> do
144 |             let p = hexadecimal :: Parser Integer
145 |                 s = B.pack (a : as)
146 |             prs p s
147 |               `shouldFailWith` err 0 (utok a <> elabel "hexadecimal integer")
148 |     context "when stream is empty" $
149 |       it "signals correct parse error" $
150 |         prs (hexadecimal :: Parser Integer) ""
151 |           `shouldFailWith` err 0 (ueof <> elabel "hexadecimal integer")
152 | 
153 |   describe "scientific" $ do
154 |     context "when stream begins with a number" $
155 |       it "parses it" $
156 |         property $ \n' -> do
157 |           let p = scientific :: Parser Scientific
158 |               s =
159 |                 B8.pack $
160 |                   either
161 |                     (show . getNonNegative)
162 |                     (show . getNonNegative)
163 |                     (n' :: Either (NonNegative Integer) (NonNegative Double))
164 |           prs p s `shouldParse` case n' of
165 |             Left x -> fromIntegral (getNonNegative x)
166 |             Right x -> fromFloatDigits (getNonNegative x)
167 |           prs' p s `succeedsLeaving` ""
168 |     context "when fractional part is interrupted" $
169 |       it "signals correct parse error" $
170 |         property $ \(NonNegative n) -> do
171 |           let p = scientific <* empty :: Parser Scientific
172 |               s = B8.pack (showFFloatAlt Nothing (n :: Double) "")
173 |           prs p s
174 |             `shouldFailWith` err
175 |               (B.length s)
176 |               (etok 69 <> etok 101 <> elabel "digit")
177 |           prs' p s `failsLeaving` ""
178 |     context "when whole part is followed by a dot without valid fractional part" $
179 |       it "parsing of fractional part is backtracked correctly" $
180 |         property $ \(NonNegative n) -> do
181 |           let p = scientific :: Parser Scientific
182 |               s = B8.pack $ showInt (n :: Integer) ".err"
183 |           prs p s `shouldParse` fromIntegral n
184 |           prs' p s `succeedsLeaving` ".err"
185 |     context "when number is followed by something starting with 'e'" $
186 |       it "parsing of exponent part is backtracked correctly" $
187 |         property $ \(NonNegative n) -> do
188 |           let p = scientific :: Parser Scientific
189 |               s = B8.pack $ showFFloatAlt Nothing (n :: Double) "err!"
190 |           prs p s `shouldParse` fromFloatDigits n
191 |           prs' p s `succeedsLeaving` "err!"
192 |     context "when stream is empty" $
193 |       it "signals correct parse error" $
194 |         prs (scientific :: Parser Scientific) ""
195 |           `shouldFailWith` err 0 (ueof <> elabel "digit")
196 | 
197 |   describe "float" $ do
198 |     context "when stream begins with a float" $
199 |       it "parses it" $
200 |         property $ \n' -> do
201 |           let p = float :: Parser Double
202 |               n = getNonNegative n'
203 |               s = B8.pack (show n)
204 |           prs p s `shouldParse` n
205 |           prs' p s `succeedsLeaving` ""
206 |     context "when stream does not begin with a float" $
207 |       it "signals correct parse error" $
208 |         property $ \a as ->
209 |           not (isDigit a) ==> do
210 |             let p = float :: Parser Double
211 |                 s = B.pack (a : as)
212 |             prs p s
213 |               `shouldFailWith` err 0 (utok a <> elabel "digit")
214 |             prs' p s `failsLeaving` s
215 |     context "when stream begins with an integer (decimal)" $
216 |       it "signals correct parse error" $
217 |         property $ \n' -> do
218 |           let p = float :: Parser Double
219 |               n = getNonNegative n'
220 |               s = B8.pack $ show (n :: Integer)
221 |           prs p s
222 |             `shouldFailWith` err
223 |               (B.length s)
224 |               (ueof <> etok 46 <> etok 69 <> etok 101 <> elabel "digit")
225 |           prs' p s `failsLeaving` ""
226 |     context "when number is followed by something starting with 'e'" $
227 |       it "parsing of exponent part is backtracked correctly" $
228 |         property $ \(NonNegative n) -> do
229 |           let p = float :: Parser Double
230 |               s = B8.pack $ showFFloatAlt Nothing (n :: Double) "err!"
231 |           prs p s `shouldParse` n
232 |           prs' p s `succeedsLeaving` "err!"
233 |     context "when stream is empty" $
234 |       it "signals correct parse error" $
235 |         prs (float :: Parser Double) ""
236 |           `shouldFailWith` err 0 (ueof <> elabel "digit")
237 |     context "when there is float with just exponent" $
238 |       it "parses it all right" $
239 |         do
240 |           let p = float :: Parser Double
241 |           prs p "123e3" `shouldParse` 123e3
242 |           prs' p "123e3" `succeedsLeaving` ""
243 |           prs p "123e+3" `shouldParse` 123e+3
244 |           prs' p "123e+3" `succeedsLeaving` ""
245 |           prs p "123e-3" `shouldParse` 123e-3
246 |           prs' p "123e-3" `succeedsLeaving` ""
247 | 
248 |   describe "signed" $ do
249 |     context "with integer" $
250 |       it "parses signed integers" $
251 |         property $ \n -> do
252 |           let p :: Parser Integer
253 |               p = signed (hidden B.space) decimal
254 |               s = B8.pack (show n)
255 |           prs p s `shouldParse` n
256 |           prs' p s `succeedsLeaving` ""
257 |     context "with float" $
258 |       it "parses signed floats" $
259 |         property $ \n -> do
260 |           let p :: Parser Double
261 |               p = signed (hidden B.space) float
262 |               s = B8.pack (show n)
263 |           prs p s `shouldParse` n
264 |           prs' p s `succeedsLeaving` ""
265 |     context "with scientific" $
266 |       it "parses singed scientific numbers" $
267 |         property $ \n -> do
268 |           let p = signed (hidden B.space) scientific
269 |               s = B8.pack $ either show show (n :: Either Integer Double)
270 |           prs p s `shouldParse` case n of
271 |             Left x -> fromIntegral x
272 |             Right x -> fromFloatDigits x
273 |     context "when number is prefixed with plus sign" $
274 |       it "parses the number" $
275 |         property $ \n' -> do
276 |           let p :: Parser Integer
277 |               p = signed (hidden B.space) decimal
278 |               n = getNonNegative n'
279 |               s = B8.pack ('+' : show n)
280 |           prs p s `shouldParse` n
281 |           prs' p s `succeedsLeaving` ""
282 |     context "when number is prefixed with white space" $
283 |       it "signals correct parse error" $
284 |         property $ \n -> do
285 |           let p :: Parser Integer
286 |               p = signed (hidden B.space) decimal
287 |               s = B8.pack (' ' : show (n :: Integer))
288 |           prs p s
289 |             `shouldFailWith` err
290 |               0
291 |               (utok 32 <> etok 43 <> etok 45 <> elabel "integer")
292 |           prs' p s `failsLeaving` s
293 |     context "when there is white space between sign and digits" $
294 |       it "parses it all right" $ do
295 |         let p :: Parser Integer
296 |             p = signed (hidden B.space) decimal
297 |             s = "- 123"
298 |         prs p s `shouldParse` (-123)
299 |         prs' p s `succeedsLeaving` ""
300 | 
301 | ----------------------------------------------------------------------------
302 | -- Helpers
303 | 
304 | prs ::
305 |   -- | Parser to run
306 |   Parser a ->
307 |   -- | Input for the parser
308 |   ByteString ->
309 |   -- | Result of parsing
310 |   Either (ParseErrorBundle ByteString Void) a
311 | prs p = parse p ""
312 | 
313 | prs' ::
314 |   -- | Parser to run
315 |   Parser a ->
316 |   -- | Input for the parser
317 |   ByteString ->
318 |   -- | Result of parsing
319 |   (State ByteString Void, Either (ParseErrorBundle ByteString Void) a)
320 | prs' p s = runParser' p (initialState s)
321 | 
322 | isDigit :: Word8 -> Bool
323 | isDigit w = w - 48 < 10
324 | 
325 | isOctDigit :: Word8 -> Bool
326 | isOctDigit w = w - 48 < 8
327 | 
328 | isHexDigit :: Word8 -> Bool
329 | isHexDigit w =
330 |   (w >= 48 && w <= 57)
331 |     || (w >= 97 && w <= 102)
332 |     || (w >= 65 && w <= 70)
333 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/ByteSpec.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | module Text.Megaparsec.ByteSpec (spec) where
  4 | 
  5 | import Control.Monad
  6 | import Data.ByteString (ByteString)
  7 | import qualified Data.ByteString as B
  8 | import Data.Char hiding (isSpace)
  9 | import Data.Maybe (fromMaybe)
 10 | import Data.Void
 11 | import Data.Word (Word8)
 12 | import Test.Hspec
 13 | import Test.Hspec.Megaparsec
 14 | import Test.Hspec.Megaparsec.AdHoc hiding (Parser, prs, prs')
 15 | import Test.QuickCheck
 16 | import Text.Megaparsec
 17 | import Text.Megaparsec.Byte
 18 | 
 19 | type Parser = Parsec Void ByteString
 20 | 
 21 | spec :: Spec
 22 | spec = do
 23 |   describe "newline" $
 24 |     checkStrLit "newline" "\n" (tokenToChunk bproxy <$> newline)
 25 | 
 26 |   describe "csrf" $
 27 |     checkStrLit "crlf newline" "\r\n" crlf
 28 | 
 29 |   describe "eol" $ do
 30 |     context "when stream begins with a newline" $
 31 |       it "succeeds returning the newline" $
 32 |         property $ \s -> do
 33 |           let s' = "\n" <> s
 34 |           prs eol s' `shouldParse` "\n"
 35 |           prs' eol s' `succeedsLeaving` s
 36 |     context "when stream begins with CRLF sequence" $
 37 |       it "parses the CRLF sequence" $
 38 |         property $ \s -> do
 39 |           let s' = "\r\n" <> s
 40 |           prs eol s' `shouldParse` "\r\n"
 41 |           prs' eol s' `succeedsLeaving` s
 42 |     context "when stream begins with '\\r', but it's not followed by '\\n'" $
 43 |       it "signals correct parse error" $
 44 |         property $ \ch ->
 45 |           ch /= 10 ==> do
 46 |             let s = "\r" <> B.singleton ch
 47 |             prs eol s
 48 |               `shouldFailWith` err 0 (utoks s <> elabel "end of line")
 49 |     context "when input stream is '\\r'" $
 50 |       it "signals correct parse error" $
 51 |         prs eol "\r"
 52 |           `shouldFailWith` err
 53 |             0
 54 |             (utok 13 <> elabel "end of line")
 55 |     context "when stream does not begin with newline or CRLF sequence" $
 56 |       it "signals correct parse error" $
 57 |         property $ \ch s ->
 58 |           (ch /= 13 && ch /= 10) ==> do
 59 |             let s' = B.singleton ch <> s
 60 |             prs eol s'
 61 |               `shouldFailWith` err
 62 |                 0
 63 |                 (utoks (B.take 2 s') <> elabel "end of line")
 64 |     context "when stream is empty" $
 65 |       it "signals correct parse error" $
 66 |         prs eol ""
 67 |           `shouldFailWith` err
 68 |             0
 69 |             (ueof <> elabel "end of line")
 70 | 
 71 |   describe "tab" $
 72 |     checkStrLit "tab" "\t" (tokenToChunk bproxy <$> tab)
 73 | 
 74 |   describe "space" $
 75 |     it "consumes space up to first non-space character" $
 76 |       property $ \s' -> do
 77 |         let (s0, s1) = B.partition isSpace s'
 78 |             s = s0 <> s1
 79 |         prs space s `shouldParse` ()
 80 |         prs' space s `succeedsLeaving` s1
 81 | 
 82 |   describe "hspace" $
 83 |     it "consumes space up to first non-space character" $
 84 |       property $ \s' -> do
 85 |         let (s0, s1) = B.partition isHSpace s'
 86 |             s = s0 <> s1
 87 |         prs hspace s `shouldParse` ()
 88 |         prs' hspace s `succeedsLeaving` s1
 89 | 
 90 |   describe "space1" $ do
 91 |     context "when stream does not start with a space character" $
 92 |       it "signals correct parse error" $
 93 |         property $ \ch s' ->
 94 |           not (isSpace ch) ==> do
 95 |             let (s0, s1) = B.partition isSpace s'
 96 |                 s = B.singleton ch <> s0 <> s1
 97 |             prs space1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
 98 |             prs' space1 s `failsLeaving` s
 99 |     context "when stream starts with a space character" $
100 |       it "consumes space up to first non-space character" $
101 |         property $ \s' -> do
102 |           let (s0, s1) = B.partition isSpace s'
103 |               s = " " <> s0 <> s1
104 |           prs space1 s `shouldParse` ()
105 |           prs' space1 s `succeedsLeaving` s1
106 |     context "when stream is empty" $
107 |       it "signals correct parse error" $
108 |         prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
109 | 
110 |   describe "hspace1" $ do
111 |     context "when stream does not start with a space character" $
112 |       it "signals correct parse error" $
113 |         property $ \ch s' ->
114 |           not (isSpace ch) ==> do
115 |             let (s0, s1) = B.partition isHSpace s'
116 |                 s = B.singleton ch <> s0 <> s1
117 |             prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
118 |             prs' hspace1 s `failsLeaving` s
119 |     context "when stream starts with a space character" $
120 |       it "consumes space up to first non-space character" $
121 |         property $ \s' -> do
122 |           let (s0, s1) = B.partition isHSpace s'
123 |               s = " " <> s0 <> s1
124 |           prs hspace1 s `shouldParse` ()
125 |           prs' hspace1 s `succeedsLeaving` s1
126 |     context "when stream is empty" $
127 |       it "signals correct parse error" $
128 |         prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
129 | 
130 |   describe "controlChar" $
131 |     checkCharPred "control character" (isControl . toChar) controlChar
132 | 
133 |   describe "spaceChar" $
134 |     checkCharRange "white space" [9, 10, 11, 12, 13, 32, 160] spaceChar
135 | 
136 |   describe "printChar" $
137 |     checkCharPred "printable character" (isPrint . toChar) printChar
138 | 
139 |   describe "digitChar" $
140 |     checkCharRange "digit" [48 .. 57] digitChar
141 | 
142 |   describe "binDigitChar" $
143 |     checkCharRange "binary digit" [48 .. 49] binDigitChar
144 | 
145 |   describe "octDigitChar" $
146 |     checkCharRange "octal digit" [48 .. 55] octDigitChar
147 | 
148 |   describe "hexDigitChar" $
149 |     checkCharRange "hexadecimal digit" ([48 .. 57] ++ [97 .. 102] ++ [65 .. 70]) hexDigitChar
150 | 
151 |   describe "char'" $ do
152 |     context "when stream begins with the character specified as argument" $
153 |       it "parses the character" $
154 |         property $ \ch s -> do
155 |           let sl = B.cons (liftChar toLower ch) s
156 |               su = B.cons (liftChar toUpper ch) s
157 |               st = B.cons (liftChar toTitle ch) s
158 |           prs (char' ch) sl `shouldParse` liftChar toLower ch
159 |           prs (char' ch) su `shouldParse` liftChar toUpper ch
160 |           prs (char' ch) st `shouldParse` liftChar toTitle ch
161 |           prs' (char' ch) sl `succeedsLeaving` s
162 |           prs' (char' ch) su `succeedsLeaving` s
163 |           prs' (char' ch) st `succeedsLeaving` s
164 |     context "when stream does not begin with the character specified as argument" $
165 |       it "signals correct parse error" $
166 |         property $ \ch ch' s ->
167 |           not (casei ch ch') ==> do
168 |             let s' = B.cons ch' s
169 |                 ms = utok ch' <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
170 |             prs (char' ch) s' `shouldFailWith` err 0 ms
171 |             prs' (char' ch) s' `failsLeaving` s'
172 |     context "when stream is empty" $
173 |       it "signals correct parse error" $
174 |         property $ \ch -> do
175 |           let ms = ueof <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
176 |           prs (char' ch) "" `shouldFailWith` err 0 ms
177 | 
178 | ----------------------------------------------------------------------------
179 | -- Helpers
180 | 
181 | checkStrLit :: String -> ByteString -> Parser ByteString -> SpecWith ()
182 | checkStrLit name ts p = do
183 |   context ("when stream begins with " ++ name) $
184 |     it ("parses the " ++ name) $
185 |       property $ \s -> do
186 |         let s' = ts <> s
187 |         prs p s' `shouldParse` ts
188 |         prs' p s' `succeedsLeaving` s
189 |   context ("when stream does not begin with " ++ name) $
190 |     it "signals correct parse error" $
191 |       property $ \ch s ->
192 |         ch /= B.head ts ==> do
193 |           let s' = B.cons ch s
194 |               us = B.take (B.length ts) s'
195 |           prs p s' `shouldFailWith` err 0 (utoks us <> etoks ts)
196 |           prs' p s' `failsLeaving` s'
197 |   context "when stream is empty" $
198 |     it "signals correct parse error" $
199 |       prs p "" `shouldFailWith` err 0 (ueof <> etoks ts)
200 | 
201 | checkCharPred :: String -> (Word8 -> Bool) -> Parser Word8 -> SpecWith ()
202 | checkCharPred name f p = do
203 |   context ("when stream begins with " ++ name) $
204 |     it ("parses the " ++ name) $
205 |       property $ \ch s ->
206 |         f ch ==> do
207 |           let s' = B.singleton ch <> s
208 |           prs p s' `shouldParse` ch
209 |           prs' p s' `succeedsLeaving` s
210 |   context ("when stream does not begin with " ++ name) $
211 |     it "signals correct parse error" $
212 |       property $ \ch s ->
213 |         not (f ch) ==> do
214 |           let s' = B.singleton ch <> s
215 |           prs p s' `shouldFailWith` err 0 (utok ch <> elabel name)
216 |           prs' p s' `failsLeaving` s'
217 |   context "when stream is empty" $
218 |     it "signals correct parse error" $
219 |       prs p "" `shouldFailWith` err 0 (ueof <> elabel name)
220 | 
221 | checkCharRange :: String -> [Word8] -> Parser Word8 -> SpecWith ()
222 | checkCharRange name tchs p = do
223 |   forM_ tchs $ \tch ->
224 |     context ("when stream begins with " ++ showTokens bproxy (nes tch)) $
225 |       it ("parses the " ++ showTokens bproxy (nes tch)) $
226 |         property $ \s -> do
227 |           let s' = B.singleton tch <> s
228 |           prs p s' `shouldParse` tch
229 |           prs' p s' `succeedsLeaving` s
230 |   context "when stream is empty" $
231 |     it "signals correct parse error" $
232 |       prs p "" `shouldFailWith` err 0 (ueof <> elabel name)
233 | 
234 | prs ::
235 |   -- | Parser to run
236 |   Parser a ->
237 |   -- | Input for the parser
238 |   ByteString ->
239 |   -- | Result of parsing
240 |   Either (ParseErrorBundle ByteString Void) a
241 | prs p = parse p ""
242 | 
243 | prs' ::
244 |   -- | Parser to run
245 |   Parser a ->
246 |   -- | Input for the parser
247 |   ByteString ->
248 |   -- | Result of parsing
249 |   (State ByteString Void, Either (ParseErrorBundle ByteString Void) a)
250 | prs' p s = runParser' p (initialState s)
251 | 
252 | -- | 'Word8'-specialized version of 'Data.Char.isSpace'.
253 | isSpace :: Word8 -> Bool
254 | isSpace x
255 |   | x >= 9 && x <= 13 = True
256 |   | x == 32 = True
257 |   | x == 160 = True
258 |   | otherwise = False
259 | 
260 | -- | Like 'isSpace', but does not accept newlines and carriage returns.
261 | isHSpace :: Word8 -> Bool
262 | isHSpace x
263 |   | x == 9 = True
264 |   | x == 11 = True
265 |   | x == 12 = True
266 |   | x == 32 = True
267 |   | x == 160 = True
268 |   | otherwise = False
269 | 
270 | -- | Lift char transformation to byte transformation.
271 | liftChar :: (Char -> Char) -> Word8 -> Word8
272 | liftChar f x = (fromMaybe x . fromChar . f . toChar) x
273 | 
274 | -- | Compare two characters case-insensitively.
275 | casei :: Word8 -> Word8 -> Bool
276 | casei x y =
277 |   x == liftChar toLower y
278 |     || x == liftChar toUpper y
279 |     || x == liftChar toTitle y
280 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/DebugSpec.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | 
  4 | module Text.Megaparsec.DebugSpec (spec) where
  5 | 
  6 | import Control.Exception (evaluate)
  7 | import Control.Monad
  8 | import Control.Monad.RWS (MonadRWS)
  9 | import qualified Control.Monad.RWS.Lazy as L
 10 | import qualified Control.Monad.RWS.Strict as S
 11 | import Control.Monad.State (MonadState (..), modify)
 12 | import qualified Control.Monad.State.Lazy as L
 13 | import qualified Control.Monad.State.Strict as S
 14 | import Control.Monad.Writer (MonadWriter (..))
 15 | import qualified Control.Monad.Writer.Lazy as L
 16 | import qualified Control.Monad.Writer.Strict as S
 17 | import Data.Void
 18 | import GHC.IO.Handle
 19 | import System.IO (stderr)
 20 | import System.IO.Temp
 21 | import Test.Hspec
 22 | import Test.Hspec.Megaparsec
 23 | import Test.Hspec.Megaparsec.AdHoc
 24 | import Text.Megaparsec
 25 | import Text.Megaparsec.Char
 26 | import Text.Megaparsec.Debug
 27 | 
 28 | spec :: Spec
 29 | spec = do
 30 |   describe "dbg" $ do
 31 |     context "when inner parser succeeds consuming input" $ do
 32 |       it "has no effect on how parser works" $ do
 33 |         let p :: (MonadParsecDbg Void String m) => m Char
 34 |             p = dbg "char" (char 'a')
 35 |             s = "ab"
 36 |         shouldStderr p s "char> IN: \"ab\"\nchar> MATCH (COK): 'a'\nchar> VALUE: 'a'\nchar> HINTS: []\n\n"
 37 |         grs p s (`shouldParse` 'a')
 38 |         grs' p s (`succeedsLeaving` "b")
 39 |       it "its hints are preserved" $ do
 40 |         let p :: (MonadParsecDbg Void String m) => m String
 41 |             p = dbg "many chars" (many (char 'a')) <* empty
 42 |             s = "abcd"
 43 |         shouldStderr p s "many chars> IN: \"abcd\"\nmany chars> MATCH (COK): 'a'\nmany chars> VALUE: \"a\"\nmany chars> HINTS: ['a']\n\n"
 44 |         grs p s (`shouldFailWith` err 1 (etok 'a'))
 45 |         grs' p s (`failsLeaving` "bcd")
 46 |       it "prints several hints correctly" $ do
 47 |         let p :: (MonadParsecDbg Void String m) => m (Maybe Char)
 48 |             p = dbg "a or b" (optional (char 'a' <|> char 'b')) <* empty
 49 |             s = ""
 50 |         shouldStderr p s "a or b> IN: <EMPTY>\na or b> MATCH (EOK): <EMPTY>\na or b> VALUE: Nothing\na or b> HINTS: ['a','b']\n\n"
 51 |         grs p s (`shouldFailWith` err 0 (etok 'a' <> etok 'b'))
 52 |         grs' p s (`failsLeaving` "")
 53 |     context "when inner parser fails consuming input" $
 54 |       it "has no effect on how parser works" $ do
 55 |         let p :: (MonadParsecDbg Void String m) => m Char
 56 |             p = dbg "chars" (char 'a' *> char 'c')
 57 |             s = "abc"
 58 |         shouldStderr p s "chars> IN: \"abc\"\nchars> MATCH (CERR): 'a'\nchars> ERROR:\nchars> offset=1:\nchars> unexpected 'b'\nchars> expecting 'c'\n\n"
 59 |         grs p s (`shouldFailWith` err 1 (utok 'b' <> etok 'c'))
 60 |         grs' p s (`failsLeaving` "bc")
 61 |     context "when inner parser succeeds without consuming" $ do
 62 |       it "has no effect on how parser works" $ do
 63 |         let p :: (MonadParsecDbg Void String m) => m Char
 64 |             p = dbg "return" (return 'a')
 65 |             s = "abc"
 66 |         shouldStderr p s "return> IN: \"abc\"\nreturn> MATCH (EOK): <EMPTY>\nreturn> VALUE: 'a'\nreturn> HINTS: []\n\n"
 67 |         grs p s (`shouldParse` 'a')
 68 |         grs' p s (`succeedsLeaving` s)
 69 |       it "its hints are preserved" $ do
 70 |         let p :: (MonadParsecDbg Void String m) => m String
 71 |             p = dbg "many chars" (many (char 'a')) <* empty
 72 |             s = "bcd"
 73 |         shouldStderr p s "many chars> IN: \"bcd\"\nmany chars> MATCH (EOK): <EMPTY>\nmany chars> VALUE: \"\"\nmany chars> HINTS: ['a']\n\n"
 74 |         grs p s (`shouldFailWith` err 0 (etok 'a'))
 75 |         grs' p s (`failsLeaving` "bcd")
 76 |     context "when inner parser fails without consuming" $
 77 |       it "has no effect on how parser works" $ do
 78 |         let p :: (MonadParsecDbg Void String m) => m ()
 79 |             p = dbg "empty" (void empty)
 80 |             s = "abc"
 81 |         shouldStderr p s "empty> IN: \"abc\"\nempty> MATCH (EERR): <EMPTY>\nempty> ERROR:\nempty> offset=0:\nempty> unknown parse error\n\n"
 82 |         grs p s (`shouldFailWith` err 0 mempty)
 83 |         grs' p s (`failsLeaving` s)
 84 |     let p1, p2 :: (MonadParsecDbg Void String m, MonadWriter [Int] m) => m ()
 85 |         p1 = tell [0] >> dbg "a" (single 'a' >> tell [1])
 86 |         p2 = do
 87 |           void $ dbg "a" (single 'a')
 88 |           tell [0]
 89 |           void $ dbg "b" (single 'b')
 90 |           dbg "c" $ do
 91 |             void (single 'c')
 92 |             tell [1]
 93 |             void (single 'd')
 94 |             tell [2]
 95 |         s1 = "a"
 96 |         s2 = "abcd"
 97 |         stderr1 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (LOG: [1])\na> HINTS: []\n\n"
 98 |         stderr2 = "a> IN: \"abcd\"\na> MATCH (COK): 'a'\na> VALUE: 'a' (LOG: [])\na> HINTS: []\n\nb> IN: \"bcd\"\nb> MATCH (COK): 'b'\nb> VALUE: 'b' (LOG: [])\nb> HINTS: []\n\nc> IN: \"cd\"\nc> MATCH (COK): \"cd\"\nc> VALUE: () (LOG: [1,2])\nc> HINTS: []\n\n"
 99 |         r1 = ((), [0, 1])
100 |         r2 = ((), [0, 1, 2])
101 |     context "Lazy WriterT instance of MonadParsecDbg" $ do
102 |       it "example 1" $ do
103 |         shouldStderr (L.runWriterT p1) s1 stderr1
104 |         prs (L.runWriterT p1) s1 `shouldParse` r1
105 |       it "example 2" $ do
106 |         shouldStderr (L.runWriterT p2) s2 stderr2
107 |         prs (L.runWriterT p2) s2 `shouldParse` r2
108 |     context "Strict WriterT instance of MonadParsecDbg" $ do
109 |       it "example 1" $ do
110 |         shouldStderr (S.runWriterT p1) s1 stderr1
111 |         prs (S.runWriterT p1) s1 `shouldParse` r1
112 |       it "example 2" $ do
113 |         shouldStderr (S.runWriterT p2) s2 stderr2
114 |         prs (S.runWriterT p2) s2 `shouldParse` r2
115 |     let p3, p4 :: (MonadParsecDbg Void String m, MonadState Int m) => m ()
116 |         p3 = modify succ >> dbg "a" (single 'a' >> modify succ)
117 |         p4 = do
118 |           void $ dbg "a" (single 'a')
119 |           modify succ
120 |           void $ dbg "b" (single 'b')
121 |           dbg "c" $ do
122 |             void (single 'c')
123 |             modify succ
124 |             void (single 'd')
125 |             modify succ
126 |         s3 = "a"
127 |         s4 = "abcd"
128 |         stderr3 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (STATE: 2)\na> HINTS: []\n\n"
129 |         stderr4 = "a> IN: \"abcd\"\na> MATCH (COK): 'a'\na> VALUE: 'a' (STATE: 0)\na> HINTS: []\n\nb> IN: \"bcd\"\nb> MATCH (COK): 'b'\nb> VALUE: 'b' (STATE: 1)\nb> HINTS: []\n\nc> IN: \"cd\"\nc> MATCH (COK): \"cd\"\nc> VALUE: () (STATE: 3)\nc> HINTS: []\n\n"
130 |         r3 = ((), 2)
131 |         r4 = ((), 3)
132 |     context "Lazy StateT instance of MonadParsecDbg" $ do
133 |       it "example 3" $ do
134 |         shouldStderr (L.runStateT p3 0) s3 stderr3
135 |         prs (L.runStateT p3 0) s3 `shouldParse` r3
136 |       it "example 4" $ do
137 |         shouldStderr (L.runStateT p4 0) s4 stderr4
138 |         prs (L.runStateT p4 0) s4 `shouldParse` r4
139 |     context "Strict StateT instance of MonadParsecDbg" $ do
140 |       it "example 3" $ do
141 |         shouldStderr (S.runStateT p3 0) s3 stderr3
142 |         prs (S.runStateT p3 0) s3 `shouldParse` r3
143 |       it "example 4" $ do
144 |         shouldStderr (S.runStateT p4 0) s4 stderr4
145 |         prs (S.runStateT p4 0) s4 `shouldParse` r4
146 |     let p5 :: (MonadParsecDbg Void String m, MonadRWS () [Int] Int m) => m ()
147 |         p5 = do
148 |           tell [0]
149 |           modify succ
150 |           dbg "a" (single 'a' >> tell [1] >> modify succ)
151 |         s5 = "a"
152 |         stderr5 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (STATE: 2) (LOG: [1])\na> HINTS: []\n\n"
153 |         stderr7 = "a> IN: 'a'\na> MATCH (COK): 'a'\na> VALUE: () (LOG: [1]) (STATE: 2)\na> HINTS: []\n\n"
154 |         r5 = ((), 2, [0, 1])
155 |         p6 :: (MonadParsecDbg Void String m, MonadWriter [Int] m, MonadState Int m) => m ()
156 |         p6 = do
157 |           tell [0]
158 |           modify succ
159 |           dbg "a" (single 'a' >> tell [1] >> modify succ)
160 |         r6 = (((), 2), [0, 1])
161 |         r7 = (((), [0, 1]), 2)
162 |     context "Lazy RWST instance of MonadParsecDbg" $ do
163 |       it "example 5" $ do
164 |         shouldStderr (L.runRWST p5 () 0) s5 stderr5
165 |         prs (L.runRWST p5 () 0) s5 `shouldParse` r5
166 |       it "example 6" $ do
167 |         shouldStderr (L.runWriterT (L.runStateT p6 0)) s5 stderr5
168 |         prs (L.runWriterT (L.runStateT p6 0)) s5 `shouldParse` r6
169 |       it "example 7" $ do
170 |         shouldStderr (L.runStateT (L.runWriterT p6) 0) s5 stderr7
171 |         prs (L.runStateT (L.runWriterT p6) 0) s5 `shouldParse` r7
172 |     context "Strict RWST instance of MonadParsecDbg" $ do
173 |       it "example 5" $ do
174 |         shouldStderr (S.runRWST p5 () 0) s5 stderr5
175 |         prs (S.runRWST p5 () 0) s5 `shouldParse` r5
176 |       it "example 6" $ do
177 |         shouldStderr (S.runWriterT (S.runStateT p6 0)) s5 stderr5
178 |         prs (S.runWriterT (S.runStateT p6 0)) s5 `shouldParse` r6
179 |       it "example 7" $ do
180 |         shouldStderr (S.runStateT (S.runWriterT p6) 0) s5 stderr7
181 |         prs (S.runStateT (S.runWriterT p6) 0) s5 `shouldParse` r7
182 | 
183 | ----------------------------------------------------------------------------
184 | -- Helpers
185 | 
186 | -- | Check that running the given parser on the input prints the expected
187 | -- string to the 'stderr'.
188 | shouldStderr ::
189 |   (HasCallStack) =>
190 |   -- | The parser to test
191 |   Parser a ->
192 |   -- | Input for the parser
193 |   String ->
194 |   -- | The expected 'stderr' output
195 |   String ->
196 |   Expectation
197 | shouldStderr p s expectedStderr = do
198 |   hFlush stderr
199 |   withSystemTempFile "megaparsec-dbg-tests" $ \tempPath tempHandle -> do
200 |     hDuplicateTo tempHandle stderr
201 |     void (evaluate (parse p "" s))
202 |     hFlush stderr
203 |     hClose tempHandle
204 |     capturedStderr <- readFile tempPath
205 |     capturedStderr `shouldBe` expectedStderr
206 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/ErrorSpec.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | module Text.Megaparsec.ErrorSpec (spec) where
  4 | 
  5 | import Control.Exception (Exception (..))
  6 | import Data.Functor.Identity
  7 | import Data.List (isInfixOf, isSuffixOf, sort)
  8 | import Data.List.NonEmpty (NonEmpty (..))
  9 | import qualified Data.Set as E
 10 | import Data.Void
 11 | import Test.Hspec
 12 | import Test.Hspec.Megaparsec
 13 | import Test.Hspec.Megaparsec.AdHoc ()
 14 | import Test.QuickCheck
 15 | import Text.Megaparsec
 16 | 
 17 | spec :: Spec
 18 | spec = do
 19 |   describe "Semigroup instance of ParseError" $
 20 |     it "associativity" $
 21 |       property $ \x y z ->
 22 |         (x <> y) <> z === (x <> (y <> z) :: PE)
 23 | 
 24 |   describe "Monoid instance of ParseError" $ do
 25 |     it "left identity" $
 26 |       property $ \x ->
 27 |         mempty <> x === (x :: PE)
 28 |     it "right identity" $
 29 |       property $ \x ->
 30 |         x <> mempty === (x :: PE)
 31 |     it "associativity" $
 32 |       property $ \x y z ->
 33 |         (x <> y) <> z === (x <> (y <> z) :: PE)
 34 | 
 35 |   describe "error merging with (<>)" $ do
 36 |     it "selects greater offset" $
 37 |       property $ \x y ->
 38 |         errorOffset (x <> y :: PE) === max (errorOffset x) (errorOffset y)
 39 |     context "when combining two trivial parse errors at the same position" $
 40 |       it "merges their unexpected and expected items" $
 41 |         do
 42 |           let n Nothing Nothing = Nothing
 43 |               n (Just x) Nothing = Just x
 44 |               n Nothing (Just y) = Just y
 45 |               n (Just x) (Just y) = Just (max x y)
 46 |           property $ \pos us0 ps0 us1 ps1 ->
 47 |             TrivialError pos us0 ps0 <> TrivialError pos us1 ps1
 48 |               `shouldBe` (TrivialError pos (n us0 us1) (E.union ps0 ps1) :: PE)
 49 |     context "when combining two fancy parse errors at the same position" $
 50 |       it "merges their custom items" $
 51 |         property $ \pos xs0 xs1 ->
 52 |           FancyError pos xs0 <> FancyError pos xs1
 53 |             `shouldBe` (FancyError pos (E.union xs0 xs1) :: PE)
 54 |     context "when combining trivial error with fancy error" $ do
 55 |       it "fancy has precedence (left)" $
 56 |         property $ \pos us ps xs ->
 57 |           FancyError pos xs <> TrivialError pos us ps
 58 |             `shouldBe` (FancyError pos xs :: PE)
 59 |       it "fancy has precedence (right)" $
 60 |         property $ \pos us ps xs ->
 61 |           TrivialError pos us ps <> FancyError pos xs
 62 |             `shouldBe` (FancyError pos xs :: PE)
 63 | 
 64 |   -- NOTE 'errorOffset' and 'setErrorOffset' are trivial.
 65 | 
 66 |   describe "attachSourcePos" $
 67 |     it "attaches the positions correctly" $
 68 |       property $ \xs' s -> do
 69 |         let xs = sort $ getSmall . getPositive <$> xs'
 70 |             pst = initialPosState (s :: String)
 71 |             pst' =
 72 |               if null xs
 73 |                 then pst
 74 |                 else reachOffsetNoLine (last xs) pst
 75 |             rs = f <$> xs
 76 |             f x = (x, pstateSourcePos (reachOffsetNoLine x pst))
 77 |         attachSourcePos id (xs :: [Int]) pst `shouldBe` (rs, pst')
 78 | 
 79 |   describe "errorBundlePretty" $ do
 80 |     it "shows empty line correctly" $ do
 81 |       let s = "" :: String
 82 |       mkBundlePE s (mempty :: PE)
 83 |         `shouldBe` "1:1:\n  |\n1 | <empty line>\n  | ^\nunknown parse error\n"
 84 |     it "shows position on first line correctly" $ do
 85 |       let s = "abc" :: String
 86 |           pe = err 1 (utok 'b' <> etok 'd') :: PE
 87 |       mkBundlePE s pe
 88 |         `shouldBe` "1:2:\n  |\n1 | abc\n  |  ^\nunexpected 'b'\nexpecting 'd'\n"
 89 |     it "skips to second line correctly" $ do
 90 |       let s = "one\ntwo\n" :: String
 91 |           pe = err 4 (utok 't' <> etok 'x') :: PE
 92 |       mkBundlePE s pe
 93 |         `shouldBe` "2:1:\n  |\n2 | two\n  | ^\nunexpected 't'\nexpecting 'x'\n"
 94 |     it "shows position on 1000 line correctly" $ do
 95 |       let s = replicate 999 '\n' ++ "abc"
 96 |           pe = err 999 (utok 'a' <> etok 'd') :: PE
 97 |       mkBundlePE s pe
 98 |         `shouldBe` "1000:1:\n     |\n1000 | abc\n     | ^\nunexpected 'a'\nexpecting 'd'\n"
 99 |     it "shows offending line in the presence of tabs correctly" $ do
100 |       let s = "\tsomething" :: String
101 |           pe = err 1 (utok 's' <> etok 'x') :: PE
102 |       mkBundlePE s pe
103 |         `shouldBe` "1:9:\n  |\n1 |         something\n  |         ^\nunexpected 's'\nexpecting 'x'\n"
104 |     it "uses continuous highlighting properly (trivial)" $ do
105 |       let s = "\tfoobar" :: String
106 |           pe = err 1 (utoks "foo" <> utoks "rar") :: PE
107 |       mkBundlePE s pe
108 |         `shouldBe` "1:9:\n  |\n1 |         foobar\n  |         ^^^\nunexpected \"rar\"\n"
109 |     it "uses continuous highlighting properly (fancy)" $ do
110 |       let s = "\tfoobar" :: String
111 |           pe =
112 |             errFancy
113 |               1
114 |               (fancy $ ErrorCustom (CustomErr 5)) ::
115 |               ParseError String CustomErr
116 |       mkBundlePE s pe
117 |         `shouldBe` "1:9:\n  |\n1 |         foobar\n  |         ^^^^^\ncustom thing\n"
118 |     it "adjusts continuous highlighting so it doesn't get too long" $ do
119 |       let s = "foobar\n" :: String
120 |           pe = err 4 (utoks "foobar" <> etoks "foobar") :: PE
121 |       mkBundlePE s pe
122 |         `shouldBe` "1:5:\n  |\n1 | foobar\n  |     ^^^\nunexpected \"foobar\"\nexpecting \"foobar\"\n"
123 |     context "stream of insufficient size is provided in the bundle" $
124 |       it "handles the situation reasonably" $
125 |         do
126 |           let s = "" :: String
127 |               pe = err 3 (ueof <> etok 'x') :: PE
128 |           mkBundlePE s pe
129 |             `shouldBe` "1:1:\n  |\n1 | <empty line>\n  | ^\nunexpected end of input\nexpecting 'x'\n"
130 |     context "starting column in bundle is greater than 1" $ do
131 |       context "and less than parse error column" $
132 |         it "is rendered correctly" $
133 |           do
134 |             let s = "foo" :: String
135 |                 pe = err 5 (utok 'o' <> etok 'x') :: PE
136 |                 bundle =
137 |                   ParseErrorBundle
138 |                     { bundleErrors = pe :| [],
139 |                       bundlePosState =
140 |                         PosState
141 |                           { pstateInput = s,
142 |                             pstateOffset = 4,
143 |                             pstateSourcePos = SourcePos "" pos1 (mkPos 5),
144 |                             pstateTabWidth = defaultTabWidth,
145 |                             pstateLinePrefix = ""
146 |                           }
147 |                     }
148 |             errorBundlePretty bundle
149 |               `shouldBe` "1:6:\n  |\n1 | foo\n  | \nunexpected 'o'\nexpecting 'x'\n"
150 |       context "and greater than parse error column" $
151 |         it "is rendered correctly" $
152 |           do
153 |             let s = "foo" :: String
154 |                 pe = err 5 (utok 'o' <> etok 'x') :: PE
155 |                 bundle =
156 |                   ParseErrorBundle
157 |                     { bundleErrors = pe :| [],
158 |                       bundlePosState =
159 |                         PosState
160 |                           { pstateInput = s,
161 |                             pstateOffset = 9,
162 |                             pstateSourcePos = SourcePos "" pos1 (mkPos 10),
163 |                             pstateTabWidth = defaultTabWidth,
164 |                             pstateLinePrefix = ""
165 |                           }
166 |                     }
167 |             errorBundlePretty bundle
168 |               `shouldBe` "1:10:\n  |\n1 | foo\n  | \nunexpected 'o'\nexpecting 'x'\n"
169 |     it "takes tab width into account correctly" $
170 |       property $ \w' i' -> do
171 |         let w = unPos w'
172 |             i = unPos i' `rem` (w * 2)
173 |             as = replicate i 'a'
174 |             s = "\t" ++ as ++ "\tb" :: String
175 |             pe = err (2 + i) (utok 'b' <> etok 'x') :: PE
176 |             bundle =
177 |               ParseErrorBundle
178 |                 { bundleErrors = pe :| [],
179 |                   bundlePosState =
180 |                     PosState
181 |                       { pstateInput = s,
182 |                         pstateOffset = 0,
183 |                         pstateSourcePos = initialPos "",
184 |                         pstateTabWidth = w',
185 |                         pstateLinePrefix = ""
186 |                       }
187 |                 }
188 |             secondTabApparentWidth = w - (i `rem` w)
189 |             errColumn = w + i + secondTabApparentWidth
190 |         errorBundlePretty bundle
191 |           `shouldBe` ( "1:"
192 |                          ++ show (errColumn + 1)
193 |                          ++ ":\n  |\n1 | "
194 |                          ++ replicate w ' '
195 |                          ++ as
196 |                          ++ replicate secondTabApparentWidth ' '
197 |                          ++ "b"
198 |                          ++ "\n  | "
199 |                          ++ replicate errColumn ' '
200 |                          ++ "^\nunexpected 'b'\nexpecting 'x'\n"
201 |                      )
202 |     context "in the presence of wide characters" $ do
203 |       it "calculates column positions correctly" $ do
204 |         let s = "구구 이면" :: String
205 |             pe = err 2 (ulabel "space" <> etok '이') :: PE
206 |             bundle =
207 |               ParseErrorBundle
208 |                 { bundleErrors = pe :| [],
209 |                   bundlePosState =
210 |                     PosState
211 |                       { pstateInput = s,
212 |                         pstateOffset = 0,
213 |                         pstateSourcePos = initialPos "",
214 |                         pstateTabWidth = defaultTabWidth,
215 |                         pstateLinePrefix = ""
216 |                       }
217 |                 }
218 |         errorBundlePretty bundle
219 |           `shouldBe` "1:5:\n  |\n1 | 구구 이면\n  |     ^\nunexpected space\nexpecting '이'\n"
220 |       it "uses continuous highlighting" $ do
221 |         let s = "구구 이면" :: String
222 |             pe = err 3 (utok '이' <> etok '구') :: PE
223 |             bundle =
224 |               ParseErrorBundle
225 |                 { bundleErrors = pe :| [],
226 |                   bundlePosState =
227 |                     PosState
228 |                       { pstateInput = s,
229 |                         pstateOffset = 0,
230 |                         pstateSourcePos = initialPos "",
231 |                         pstateTabWidth = defaultTabWidth,
232 |                         pstateLinePrefix = ""
233 |                       }
234 |                 }
235 |         errorBundlePretty bundle
236 |           `shouldBe` "1:6:\n  |\n1 | 구구 이면\n  |      ^^\nunexpected '이'\nexpecting '구'\n"
237 |     it "displays multi-error bundle correctly" $ do
238 |       let s = "something\ngood\n" :: String
239 |           pe0 = err 2 (utok 'm' <> etok 'x') :: PE
240 |           pe1 = err 10 (utok 'g' <> etok 'y') :: PE
241 |           bundle =
242 |             ParseErrorBundle
243 |               { bundleErrors = pe0 :| [pe1],
244 |                 bundlePosState =
245 |                   PosState
246 |                     { pstateInput = s,
247 |                       pstateOffset = 0,
248 |                       pstateSourcePos = initialPos "",
249 |                       pstateTabWidth = defaultTabWidth,
250 |                       pstateLinePrefix = ""
251 |                     }
252 |               }
253 |       errorBundlePretty bundle
254 |         `shouldBe` "1:3:\n  |\n1 | something\n  |   ^\nunexpected 'm'\nexpecting 'x'\n\n2:1:\n  |\n2 | good\n  | ^\nunexpected 'g'\nexpecting 'y'\n"
255 | 
256 |   describe "parseErrorPretty" $ do
257 |     it "shows unknown ParseError correctly" $
258 |       parseErrorPretty (mempty :: PE) `shouldBe` "offset=0:\nunknown parse error\n"
259 |     it "result always ends with a newline" $
260 |       property $ \x ->
261 |         parseErrorPretty (x :: PE) `shouldSatisfy` ("\n" `isSuffixOf`)
262 |     it "result contains representation of offset" $
263 |       property (contains (Identity . errorOffset) show)
264 |     it "result contains unexpected/expected items" $ do
265 |       let e = err 0 (utoks "foo" <> etoks "bar" <> etoks "baz") :: PE
266 |       parseErrorPretty e `shouldBe` "offset=0:\nunexpected \"foo\"\nexpecting \"bar\" or \"baz\"\n"
267 |     it "result contains representation of custom items" $ do
268 |       let e = errFancy 0 (fancy (ErrorFail "Ooops!")) :: PE
269 |       parseErrorPretty e `shouldBe` "offset=0:\nOoops!\n"
270 |     it "several fancy errors look not so bad" $ do
271 |       let pe :: PE
272 |           pe =
273 |             errFancy 0 $
274 |               mempty <> fancy (ErrorFail "foo") <> fancy (ErrorFail "bar")
275 |       parseErrorPretty pe `shouldBe` "offset=0:\nbar\nfoo\n"
276 | 
277 |   describe "parseErrorTextPretty" $ do
278 |     it "shows trivial unknown ParseError correctly" $
279 |       parseErrorTextPretty (mempty :: PE)
280 |         `shouldBe` "unknown parse error\n"
281 |     it "shows fancy unknown ParseError correctly" $
282 |       parseErrorTextPretty (FancyError 0 E.empty :: PE)
283 |         `shouldBe` "unknown fancy parse error\n"
284 |     it "result always ends with a newline" $
285 |       property $ \x ->
286 |         parseErrorTextPretty (x :: PE)
287 |           `shouldSatisfy` ("\n" `isSuffixOf`)
288 | 
289 |   describe "displayException" $
290 |     it "produces the same result as parseErrorPretty" $
291 |       property $ \x ->
292 |         displayException x `shouldBe` parseErrorPretty (x :: PE)
293 | 
294 | ----------------------------------------------------------------------------
295 | -- Helpers
296 | 
297 | -- | A custom error component to test continuous highlighting for custom
298 | -- components.
299 | newtype CustomErr = CustomErr Int
300 |   deriving (Eq, Ord, Show)
301 | 
302 | instance ShowErrorComponent CustomErr where
303 |   showErrorComponent _ = "custom thing"
304 |   errorComponentLen (CustomErr n) = n
305 | 
306 | type PE = ParseError String Void
307 | 
308 | contains :: (Foldable t) => (PE -> t a) -> (a -> String) -> PE -> Property
309 | contains g r e = property (all f (g e))
310 |   where
311 |     rendered = parseErrorPretty e
312 |     f x = r x `isInfixOf` rendered
313 | 
314 | mkBundlePE ::
315 |   ( VisualStream s,
316 |     TraversableStream s,
317 |     ShowErrorComponent e
318 |   ) =>
319 |   s ->
320 |   ParseError s e ->
321 |   String
322 | mkBundlePE s e =
323 |   errorBundlePretty $
324 |     ParseErrorBundle
325 |       { bundleErrors = e :| [],
326 |         bundlePosState =
327 |           PosState
328 |             { pstateInput = s,
329 |               pstateOffset = 0,
330 |               pstateSourcePos = initialPos "",
331 |               pstateTabWidth = defaultTabWidth,
332 |               pstateLinePrefix = ""
333 |             }
334 |       }
335 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/PosSpec.hs:
--------------------------------------------------------------------------------
 1 | module Text.Megaparsec.PosSpec (spec) where
 2 | 
 3 | import Control.Exception (evaluate)
 4 | import Data.Function (on)
 5 | import Data.List (isInfixOf)
 6 | import Test.Hspec
 7 | import Test.Hspec.Megaparsec.AdHoc ()
 8 | import Test.QuickCheck
 9 | import Text.Megaparsec.Pos
10 | 
11 | spec :: Spec
12 | spec = do
13 |   describe "mkPos" $ do
14 |     context "when the argument is a non-positive number" $
15 |       it "throws InvalidPosException" $
16 |         property $ \n ->
17 |           n <= 0 ==>
18 |             evaluate (mkPos n) `shouldThrow` (== InvalidPosException n)
19 |     context "when the argument is not 0" $
20 |       it "returns Pos with the given value" $
21 |         property $ \n ->
22 |           (n > 0) ==> (unPos (mkPos n) `shouldBe` n)
23 | 
24 |   describe "Read and Show instances of Pos" $
25 |     it "printed representation of Pos is isomorphic to its value" $
26 |       property $ \x ->
27 |         read (show x) === (x :: Pos)
28 | 
29 |   describe "Ord instance of Pos" $
30 |     it "works just like Ord instance of underlying Word" $
31 |       property $ \x y ->
32 |         compare x y === (compare `on` unPos) x y
33 | 
34 |   describe "Semigroup instance of Pos" $
35 |     it "works like addition" $
36 |       property $ \x y ->
37 |         x <> y === mkPos (unPos x + unPos y)
38 |           .&&. unPos (x <> y) === unPos x + unPos y
39 | 
40 |   describe "initialPos" $
41 |     it "constructs initial position correctly" $
42 |       property $ \path ->
43 |         let x = initialPos path
44 |          in sourceName x === path
45 |               .&&. sourceLine x === mkPos 1
46 |               .&&. sourceColumn x === mkPos 1
47 | 
48 |   describe "Read and Show instances of SourcePos" $
49 |     it "printed representation of SourcePos in isomorphic to its value" $
50 |       property $ \x ->
51 |         read (show x) === (x :: SourcePos)
52 | 
53 |   describe "sourcePosPretty" $ do
54 |     it "displays file name" $
55 |       property $ \x ->
56 |         sourceName x `isInfixOf` sourcePosPretty x
57 |     it "displays line number" $
58 |       property $ \x ->
59 |         (show . unPos . sourceLine) x `isInfixOf` sourcePosPretty x
60 |     it "displays column number" $
61 |       property $ \x ->
62 |         (show . unPos . sourceColumn) x `isInfixOf` sourcePosPretty x
63 | 


--------------------------------------------------------------------------------
/megaparsec-tests/tests/Text/Megaparsec/UnicodeSpec.hs:
--------------------------------------------------------------------------------
 1 | module Text.Megaparsec.UnicodeSpec (spec) where
 2 | 
 3 | import Test.Hspec
 4 | import qualified Text.Megaparsec.Unicode as Unicode
 5 | 
 6 | spec :: Spec
 7 | spec = do
 8 |   describe "stringLength" $
 9 |     it "computes correct length in the presense of wide chars" $
10 |       Unicode.stringLength "123 구구 이면" `shouldBe` 13
11 |   describe "charLength" $ do
12 |     it "returns 1 for non-wide chars" $
13 |       Unicode.charLength 'a' `shouldBe` 1
14 |     it "returns 2 for wide chars" $
15 |       Unicode.charLength '구' `shouldBe` 2
16 |   describe "isWideChar" $ do
17 |     it "returns False for non-wide chars" $
18 |       Unicode.isWideChar 'a' `shouldBe` False
19 |     it "returns True for wide chars" $
20 |       Unicode.isWideChar '구' `shouldBe` True
21 | 


--------------------------------------------------------------------------------
/megaparsec.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:   2.4
  2 | name:            megaparsec
  3 | version:         9.7.0
  4 | license:         BSD-2-Clause
  5 | license-file:    LICENSE.md
  6 | maintainer:      Mark Karpov <markkarpov92@gmail.com>
  7 | author:
  8 |     Megaparsec contributors,
  9 |     Paolo Martini <paolo@nemail.it>,
 10 |     Daan Leijen <daan@microsoft.com>
 11 | 
 12 | tested-with:     ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1
 13 | homepage:        https://github.com/mrkkrp/megaparsec
 14 | bug-reports:     https://github.com/mrkkrp/megaparsec/issues
 15 | synopsis:        Monadic parser combinators
 16 | description:
 17 |     This is an industrial-strength monadic parser combinator library.
 18 |     Megaparsec is a feature-rich package that tries to find a nice balance
 19 |     between speed, flexibility, and quality of parse errors.
 20 | 
 21 | category:        Parsing
 22 | build-type:      Simple
 23 | extra-doc-files:
 24 |     CHANGELOG.md
 25 |     README.md
 26 | 
 27 | source-repository head
 28 |     type:     git
 29 |     location: https://github.com/mrkkrp/megaparsec.git
 30 | 
 31 | flag dev
 32 |     description: Turn on development settings.
 33 |     default:     False
 34 |     manual:      True
 35 | 
 36 | library
 37 |     exposed-modules:
 38 |         Text.Megaparsec
 39 |         Text.Megaparsec.Byte
 40 |         Text.Megaparsec.Byte.Binary
 41 |         Text.Megaparsec.Byte.Lexer
 42 |         Text.Megaparsec.Char
 43 |         Text.Megaparsec.Char.Lexer
 44 |         Text.Megaparsec.Debug
 45 |         Text.Megaparsec.Error
 46 |         Text.Megaparsec.Error.Builder
 47 |         Text.Megaparsec.Internal
 48 |         Text.Megaparsec.Pos
 49 |         Text.Megaparsec.State
 50 |         Text.Megaparsec.Stream
 51 |         Text.Megaparsec.Unicode
 52 | 
 53 |     other-modules:
 54 |         Text.Megaparsec.Class
 55 |         Text.Megaparsec.Common
 56 |         Text.Megaparsec.Lexer
 57 | 
 58 |     default-language: Haskell2010
 59 |     build-depends:
 60 |         array >=0.5.3 && <0.6,
 61 |         base >=4.15 && <5,
 62 |         bytestring >=0.2 && <0.13,
 63 |         case-insensitive >=1.2 && <1.3,
 64 |         containers >=0.5 && <0.8,
 65 |         deepseq >=1.3 && <1.6,
 66 |         mtl >=2.2.2 && <3,
 67 |         parser-combinators >=1.0 && <2,
 68 |         scientific >=0.3.7 && <0.4,
 69 |         text >=0.2 && <2.2,
 70 |         transformers >=0.4 && <0.7
 71 | 
 72 |     if flag(dev)
 73 |         ghc-options:
 74 |             -Wall -Werror -Wredundant-constraints -Wpartial-fields
 75 |             -Wunused-packages -Wno-unused-imports
 76 | 
 77 |     else
 78 |         ghc-options: -O2 -Wall
 79 | 
 80 |     if impl(ghc >=9.8)
 81 |         ghc-options: -Wno-x-partial
 82 | 
 83 | benchmark bench-speed
 84 |     type:             exitcode-stdio-1.0
 85 |     main-is:          Main.hs
 86 |     hs-source-dirs:   bench/speed
 87 |     default-language: Haskell2010
 88 |     build-depends:
 89 |         base >=4.15 && <5,
 90 |         bytestring >=0.2 && <0.13,
 91 |         containers >=0.5 && <0.8,
 92 |         criterion >=0.6.2.1 && <1.7,
 93 |         deepseq >=1.3 && <1.6,
 94 |         megaparsec,
 95 |         text >=0.2 && <2.2
 96 | 
 97 |     if flag(dev)
 98 |         ghc-options:
 99 |             -Wall -Werror -Wredundant-constraints -Wpartial-fields
100 |             -Wunused-packages
101 | 
102 |     else
103 |         ghc-options: -O2 -Wall
104 | 
105 | benchmark bench-memory
106 |     type:             exitcode-stdio-1.0
107 |     main-is:          Main.hs
108 |     hs-source-dirs:   bench/memory
109 |     default-language: Haskell2010
110 |     build-depends:
111 |         base >=4.15 && <5,
112 |         bytestring >=0.2 && <0.13,
113 |         containers >=0.5 && <0.8,
114 |         deepseq >=1.3 && <1.6,
115 |         megaparsec,
116 |         text >=0.2 && <2.2,
117 |         weigh >=0.0.4
118 | 
119 |     if flag(dev)
120 |         ghc-options:
121 |             -Wall -Werror -Wredundant-constraints -Wpartial-fields
122 |             -Wunused-packages
123 | 
124 |     else
125 |         ghc-options: -O2 -Wall
126 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/CSV/Attoparsec.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module ParsersBench.CSV.Attoparsec
 4 |   ( parseCSV,
 5 |   )
 6 | where
 7 | 
 8 | import Control.Applicative
 9 | import Control.Applicative.Combinators
10 | import Control.Monad
11 | import Data.Attoparsec.ByteString.Char8 hiding (sepBy1)
12 | import qualified Data.Attoparsec.ByteString.Char8 as A
13 | import Data.ByteString (ByteString)
14 | import qualified Data.ByteString.Char8 as BC8
15 | import Data.Vector (Vector)
16 | import qualified Data.Vector as V
17 | 
18 | -- NOTE We use the combinators from this module to make both implementations
19 | -- as close as possible. The combinators from the ‘parser-combinators’
20 | -- packages are not slower than Attoparsec's.
21 | 
22 | type Record = Vector Field
23 | 
24 | type Field = ByteString
25 | 
26 | parseCSV :: ByteString -> [Record]
27 | parseCSV bs =
28 |   case parseOnly csv bs of
29 |     Left err -> error err
30 |     Right x -> x
31 | 
32 | csv :: Parser [Record]
33 | csv = do
34 |   xs <- sepEndBy1 record endOfLine
35 |   endOfInput
36 |   return xs
37 | 
38 | record :: Parser Record
39 | record = do
40 |   endAlready <- atEnd
41 |   when endAlready empty -- to prevent reading empty line at the end of file
42 |   V.fromList <$!> (sepBy1 field (char ',') <?> "record")
43 | 
44 | field :: Parser Field
45 | field = (escapedField <|> unescapedField) <?> "field"
46 | 
47 | escapedField :: Parser ByteString
48 | escapedField =
49 |   BC8.pack <$!> between (char '"') (char '"') (many $ normalChar <|> escapedDq)
50 |   where
51 |     normalChar = notChar '"' <?> "unescaped character"
52 |     escapedDq = '"' <$ string "\"\""
53 | 
54 | unescapedField :: Parser ByteString
55 | unescapedField =
56 |   A.takeWhile (\x -> notElem x [',', '\"', '\n', '\r'])
57 | {-# INLINE unescapedField #-}
58 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/CSV/Megaparsec.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module ParsersBench.CSV.Megaparsec
 4 |   ( parseCSV,
 5 |   )
 6 | where
 7 | 
 8 | import Control.Monad
 9 | import Data.ByteString (ByteString)
10 | import qualified Data.ByteString as B
11 | import Data.Vector (Vector)
12 | import qualified Data.Vector as V
13 | import Data.Void
14 | import Text.Megaparsec
15 | import Text.Megaparsec.Byte
16 | 
17 | type Parser = Parsec Void ByteString
18 | 
19 | type Record = Vector Field
20 | 
21 | type Field = ByteString
22 | 
23 | -- | Parse a CSV file without conversion of individual records.
24 | parseCSV :: ByteString -> [Record]
25 | parseCSV bs =
26 |   case parse csv "" bs of
27 |     Left err -> error (errorBundlePretty err)
28 |     Right x -> x
29 | 
30 | csv :: Parser [Record]
31 | csv = do
32 |   xs <- sepEndBy1 record eol
33 |   eof
34 |   return xs
35 | 
36 | record :: Parser Record
37 | record = do
38 |   notFollowedBy eof -- to prevent reading empty line at the end of file
39 |   V.fromList <$!> (sepBy1 field (char 44) <?> "record")
40 | 
41 | field :: Parser Field
42 | field = label "field" (escapedField <|> unescapedField)
43 | 
44 | escapedField :: Parser ByteString
45 | escapedField =
46 |   B.pack <$!> between (char 34) (char 34) (many $ normalChar <|> escapedDq)
47 |   where
48 |     normalChar = anySingleBut 34 <?> "unescaped character"
49 |     escapedDq = label "escaped double-quote" (34 <$ string "\"\"")
50 | 
51 | unescapedField :: Parser ByteString
52 | unescapedField =
53 |   takeWhileP
54 |     (Just "unescaped char")
55 |     (\x -> notElem x [44, 34, 10, 13])
56 | {-# INLINE unescapedField #-}
57 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Json/Attoparsec.hs:
--------------------------------------------------------------------------------
  1 | -- Mostly stolen from (with simplifications):
  2 | -- https://github.com/bos/attoparsec/blob/master/benchmarks/Aeson.hs
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | 
  6 | module ParsersBench.Json.Attoparsec
  7 |   ( parseJson,
  8 |   )
  9 | where
 10 | 
 11 | import Control.Applicative
 12 | import qualified Data.Attoparsec.ByteString as A
 13 | import Data.Attoparsec.ByteString.Char8
 14 | import Data.ByteString (ByteString)
 15 | import qualified Data.HashMap.Strict as H
 16 | import Data.Text (Text)
 17 | import qualified Data.Text.Encoding as TE
 18 | import Data.Vector (Vector)
 19 | import qualified Data.Vector as V
 20 | import Data.Word (Word8)
 21 | import ParsersBench.Json.Common
 22 | 
 23 | #define CLOSE_CURLY 125
 24 | #define CLOSE_SQUARE 93
 25 | #define COMMA 44
 26 | #define C_0 48
 27 | #define C_9 57
 28 | #define C_MINUS 45
 29 | #define C_f 102
 30 | #define C_n 110
 31 | #define C_t 116
 32 | #define DOUBLE_QUOTE 34
 33 | #define OPEN_CURLY 123
 34 | #define OPEN_SQUARE 91
 35 | 
 36 | parseJson :: ByteString -> Value
 37 | parseJson bs =
 38 |   case parseOnly json bs of
 39 |     Left err -> error err
 40 |     Right x -> x
 41 | 
 42 | json :: Parser Value
 43 | json = json_ object_ array_
 44 | 
 45 | json_ :: Parser Value -> Parser Value -> Parser Value
 46 | json_ obj ary = do
 47 |   w <- skipSpace *> A.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE)
 48 |   if w == OPEN_CURLY
 49 |     then obj
 50 |     else ary
 51 | {-# INLINE json_ #-}
 52 | 
 53 | object_ :: Parser Value
 54 | object_ = Object <$> objectValues jstring value
 55 | 
 56 | objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
 57 | objectValues str val = do
 58 |   skipSpace
 59 |   let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)
 60 |   H.fromList <$> commaSeparated pair CLOSE_CURLY
 61 | {-# INLINE objectValues #-}
 62 | 
 63 | array_ :: Parser Value
 64 | array_ = Array <$> arrayValues value
 65 | 
 66 | arrayValues :: Parser Value -> Parser (Vector Value)
 67 | arrayValues val = do
 68 |   skipSpace
 69 |   V.fromList <$> commaSeparated val CLOSE_SQUARE
 70 | {-# INLINE arrayValues #-}
 71 | 
 72 | commaSeparated :: Parser a -> Word8 -> Parser [a]
 73 | commaSeparated item endByte = do
 74 |   w <- A.peekWord8'
 75 |   if w == endByte
 76 |     then A.anyWord8 >> return []
 77 |     else loop
 78 |   where
 79 |     loop = do
 80 |       v <- item <* skipSpace
 81 |       ch <- A.satisfy $ \w -> w == COMMA || w == endByte
 82 |       if ch == COMMA
 83 |         then skipSpace >> (v :) <$> loop
 84 |         else return [v]
 85 | {-# INLINE commaSeparated #-}
 86 | 
 87 | value :: Parser Value
 88 | value = do
 89 |   w <- A.peekWord8'
 90 |   case w of
 91 |     DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_)
 92 |     OPEN_CURLY -> A.anyWord8 *> object_
 93 |     OPEN_SQUARE -> A.anyWord8 *> array_
 94 |     C_f -> string "false" *> pure (Bool False)
 95 |     C_t -> string "true" *> pure (Bool True)
 96 |     C_n -> string "null" *> pure Null
 97 |     _
 98 |       | w >= C_0 && w <= C_9 || w == C_MINUS -> Number <$> scientific
 99 |       | otherwise -> fail "not a valid json value"
100 | 
101 | jstring :: Parser Text
102 | jstring = A.word8 DOUBLE_QUOTE *> jstring_
103 | 
104 | jstring_ :: Parser Text
105 | jstring_ =
106 |   TE.decodeUtf8
107 |     <$> A.takeWhile (/= DOUBLE_QUOTE)
108 |     <* A.word8 DOUBLE_QUOTE
109 | {-# INLINE jstring_ #-}
110 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Json/Common.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric #-}
 2 | 
 3 | module ParsersBench.Json.Common
 4 |   ( Result (..),
 5 |     Object,
 6 |     Array,
 7 |     Value (..),
 8 |   )
 9 | where
10 | 
11 | import Control.DeepSeq
12 | import qualified Data.HashMap.Strict as H
13 | import Data.Scientific (Scientific)
14 | import Data.Text (Text)
15 | import Data.Vector (Vector)
16 | import GHC.Generics
17 | 
18 | data Result a
19 |   = Error String
20 |   | Success a
21 |   deriving (Eq, Show)
22 | 
23 | type Object = H.HashMap Text Value
24 | 
25 | type Array = Vector Value
26 | 
27 | data Value
28 |   = Object !Object
29 |   | Array !Array
30 |   | String !Text
31 |   | Number !Scientific
32 |   | Bool !Bool
33 |   | Null
34 |   deriving (Eq, Show, Generic)
35 | 
36 | instance NFData Value
37 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Json/Megaparsec.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | 
  4 | module ParsersBench.Json.Megaparsec
  5 |   ( parseJson,
  6 |   )
  7 | where
  8 | 
  9 | import Control.Applicative
 10 | import Data.ByteString (ByteString)
 11 | import qualified Data.HashMap.Strict as H
 12 | import Data.Text (Text)
 13 | import qualified Data.Text.Encoding as TE
 14 | import Data.Vector (Vector)
 15 | import qualified Data.Vector as V
 16 | import Data.Void
 17 | import Data.Word (Word8)
 18 | import ParsersBench.Json.Common
 19 | import Text.Megaparsec
 20 | import Text.Megaparsec.Byte
 21 | import qualified Text.Megaparsec.Byte.Lexer as L
 22 | 
 23 | type Parser = Parsec Void ByteString
 24 | 
 25 | #define CLOSE_CURLY 125
 26 | #define CLOSE_SQUARE 93
 27 | #define COMMA 44
 28 | #define C_0 48
 29 | #define C_9 57
 30 | #define C_MINUS 45
 31 | #define C_f 102
 32 | #define C_n 110
 33 | #define C_t 116
 34 | #define DOUBLE_QUOTE 34
 35 | #define OPEN_CURLY 123
 36 | #define OPEN_SQUARE 91
 37 | #define COLON 58
 38 | 
 39 | parseJson :: ByteString -> Value
 40 | parseJson bs =
 41 |   case parse json "" bs of
 42 |     Left err -> error (errorBundlePretty err)
 43 |     Right x -> x
 44 | 
 45 | json :: Parser Value
 46 | json = json_ object_ array_
 47 | 
 48 | json_ :: Parser Value -> Parser Value -> Parser Value
 49 | json_ obj ary = do
 50 |   w <- space *> (char OPEN_CURLY <|> char OPEN_SQUARE)
 51 |   if w == OPEN_CURLY
 52 |     then obj
 53 |     else ary
 54 | {-# INLINE json_ #-}
 55 | 
 56 | object_ :: Parser Value
 57 | object_ = Object <$> objectValues jstring value
 58 | 
 59 | objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
 60 | objectValues str val = do
 61 |   space
 62 |   let pair = liftA2 (,) (str <* space) (char COLON *> space *> val)
 63 |   H.fromList <$> commaSeparated pair CLOSE_CURLY
 64 | {-# INLINE objectValues #-}
 65 | 
 66 | array_ :: Parser Value
 67 | array_ = Array <$> arrayValues value
 68 | 
 69 | arrayValues :: Parser Value -> Parser (Vector Value)
 70 | arrayValues val = do
 71 |   space
 72 |   V.fromList <$> commaSeparated val CLOSE_SQUARE
 73 | {-# INLINE arrayValues #-}
 74 | 
 75 | commaSeparated :: Parser a -> Word8 -> Parser [a]
 76 | commaSeparated item endByte = do
 77 |   w <- lookAhead anySingle
 78 |   if w == endByte
 79 |     then [] <$ anySingle
 80 |     else loop
 81 |   where
 82 |     loop = do
 83 |       v <- item <* space
 84 |       ch <- char COMMA <|> char endByte
 85 |       if ch == COMMA
 86 |         then space >> (v :) <$> loop
 87 |         else return [v]
 88 | {-# INLINE commaSeparated #-}
 89 | 
 90 | value :: Parser Value
 91 | value = do
 92 |   w <- lookAhead anySingle
 93 |   case w of
 94 |     DOUBLE_QUOTE -> anySingle *> (String <$> jstring_)
 95 |     OPEN_CURLY -> anySingle *> object_
 96 |     OPEN_SQUARE -> anySingle *> array_
 97 |     C_f -> Bool False <$ string "false"
 98 |     C_t -> Bool True <$ string "true"
 99 |     C_n -> string "null" *> pure Null
100 |     _
101 |       | w >= C_0 && w <= C_9 || w == C_MINUS -> Number <$> L.scientific
102 |       | otherwise -> fail "not a valid json value"
103 | 
104 | jstring :: Parser Text
105 | jstring = char DOUBLE_QUOTE *> jstring_
106 | 
107 | jstring_ :: Parser Text
108 | jstring_ =
109 |   TE.decodeUtf8
110 |     <$> takeWhileP (Just "string char") (/= DOUBLE_QUOTE)
111 |     <* char DOUBLE_QUOTE
112 | {-# INLINE jstring_ #-}
113 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Log/Attoparsec.hs:
--------------------------------------------------------------------------------
 1 | -- Mostly stolen from:
 2 | -- https://www.schoolofhaskell.com/school/starting-with-haskell/libraries-and-frameworks/text-manipulation/attoparsec
 3 | {-# LANGUAGE OverloadedStrings #-}
 4 | 
 5 | module ParsersBench.Log.Attoparsec
 6 |   ( parseLog,
 7 |   )
 8 | where
 9 | 
10 | import Control.Applicative
11 | import Control.Monad
12 | import Data.Attoparsec.ByteString.Char8
13 | import Data.ByteString (ByteString)
14 | import Data.Time
15 | import ParsersBench.Log.Common
16 | 
17 | parseLog :: ByteString -> Log
18 | parseLog bs =
19 |   case parseOnly logParser bs of
20 |     Left err -> error err
21 |     Right x -> x
22 | 
23 | parseIP :: Parser IP
24 | parseIP = do
25 |   d1 <- decimal
26 |   void (char '.')
27 |   d2 <- decimal
28 |   void (char '.')
29 |   d3 <- decimal
30 |   void (char '.')
31 |   d4 <- decimal
32 |   return $ IP d1 d2 d3 d4
33 | 
34 | timeParser :: Parser LocalTime
35 | timeParser = do
36 |   y <- count 4 digit
37 |   void (char '-')
38 |   mm <- count 2 digit
39 |   void (char '-')
40 |   d <- count 2 digit
41 |   void (char ' ')
42 |   h <- count 2 digit
43 |   void (char ':')
44 |   m <- count 2 digit
45 |   void (char ':')
46 |   s <- count 2 digit
47 |   return
48 |     LocalTime
49 |       { localDay = fromGregorian (read y) (read mm) (read d),
50 |         localTimeOfDay = TimeOfDay (read h) (read m) (read s)
51 |       }
52 | 
53 | productParser :: Parser Product
54 | productParser =
55 |   (Mouse <$ string "mouse")
56 |     <|> (Keyboard <$ string "keyboard")
57 |     <|> (Monitor <$ string "monitor")
58 |     <|> (Speakers <$ string "speakers")
59 | 
60 | logEntryParser :: Parser LogEntry
61 | logEntryParser = do
62 |   t <- timeParser
63 |   void (char ' ')
64 |   ip <- parseIP
65 |   void (char ' ')
66 |   p <- productParser
67 |   return (LogEntry t ip p)
68 | 
69 | logParser :: Parser Log
70 | logParser = many (logEntryParser <* endOfLine)
71 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Log/Common.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric #-}
 2 | 
 3 | module ParsersBench.Log.Common
 4 |   ( IP (..),
 5 |     Product (..),
 6 |     LogEntry (..),
 7 |     Log,
 8 |   )
 9 | where
10 | 
11 | import Control.DeepSeq
12 | import Data.Time
13 | import Data.Word
14 | import GHC.Generics
15 | 
16 | data IP = IP Word8 Word8 Word8 Word8
17 |   deriving (Show, Eq, Generic)
18 | 
19 | instance NFData IP
20 | 
21 | data Product
22 |   = Mouse
23 |   | Keyboard
24 |   | Monitor
25 |   | Speakers
26 |   deriving (Show, Eq, Generic)
27 | 
28 | instance NFData Product
29 | 
30 | data LogEntry = LogEntry
31 |   { entryTime :: LocalTime,
32 |     entryIP :: IP,
33 |     entryProduct :: Product
34 |   }
35 |   deriving (Show, Eq, Generic)
36 | 
37 | instance NFData LogEntry
38 | 
39 | type Log = [LogEntry]
40 | 


--------------------------------------------------------------------------------
/parsers-bench/ParsersBench/Log/Megaparsec.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module ParsersBench.Log.Megaparsec
 4 |   ( parseLog,
 5 |   )
 6 | where
 7 | 
 8 | import Control.Monad
 9 | import Data.ByteString (ByteString)
10 | import Data.Char (chr)
11 | import Data.Time
12 | import Data.Void
13 | import Data.Word (Word8)
14 | import ParsersBench.Log.Common
15 | import Text.Megaparsec
16 | import Text.Megaparsec.Byte
17 | import qualified Text.Megaparsec.Byte.Lexer as L
18 | 
19 | type Parser = Parsec Void ByteString
20 | 
21 | parseLog :: ByteString -> Log
22 | parseLog bs =
23 |   case parse logParser "" bs of
24 |     Left err -> error (errorBundlePretty err)
25 |     Right x -> x
26 | 
27 | parseIP :: Parser IP
28 | parseIP = do
29 |   d1 <- L.decimal
30 |   void (char 46)
31 |   d2 <- L.decimal
32 |   void (char 46)
33 |   d3 <- L.decimal
34 |   void (char 46)
35 |   d4 <- L.decimal
36 |   return (IP d1 d2 d3 d4)
37 | 
38 | timeParser :: Parser LocalTime
39 | timeParser = do
40 |   y <- fmap byteToChar <$> count 4 digitChar
41 |   void (char 45)
42 |   mm <- fmap byteToChar <$> count 2 digitChar
43 |   void (char 45)
44 |   d <- fmap byteToChar <$> count 2 digitChar
45 |   void (char 32)
46 |   h <- fmap byteToChar <$> count 2 digitChar
47 |   void (char 58)
48 |   m <- fmap byteToChar <$> count 2 digitChar
49 |   void (char 58)
50 |   s <- fmap byteToChar <$> count 2 digitChar
51 |   return
52 |     LocalTime
53 |       { localDay = fromGregorian (read y) (read mm) (read d),
54 |         localTimeOfDay = TimeOfDay (read h) (read m) (read s)
55 |       }
56 | 
57 | productParser :: Parser Product
58 | productParser =
59 |   (Mouse <$ string "mouse")
60 |     <|> (Keyboard <$ string "keyboard")
61 |     <|> (Monitor <$ string "monitor")
62 |     <|> (Speakers <$ string "speakers")
63 | 
64 | logEntryParser :: Parser LogEntry
65 | logEntryParser = do
66 |   t <- timeParser
67 |   void (char 32)
68 |   ip <- parseIP
69 |   void (char 32)
70 |   p <- productParser
71 |   return (LogEntry t ip p)
72 | 
73 | logParser :: Parser Log
74 | logParser = many (logEntryParser <* eol)
75 | 
76 | byteToChar :: Word8 -> Char
77 | byteToChar = chr . fromIntegral
78 | {-# INLINE byteToChar #-}
79 | 


--------------------------------------------------------------------------------
/parsers-bench/README.md:
--------------------------------------------------------------------------------
 1 | # Parsers bench
 2 | 
 3 | This directory contains a collection of identical parsers implemented in
 4 | Attoparsec and Megaparsec. The goal here is to learn how much Attoparsec is
 5 | actually faster than Megaparsec for common parsing tasks.
 6 | 
 7 | ## License
 8 | 
 9 | Copyright © 2017–present Mark Karpov
10 | 
11 | Distributed under BSD 3 clause license.
12 | 


--------------------------------------------------------------------------------
/parsers-bench/bench/memory/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import Control.DeepSeq
 4 | import Control.Monad
 5 | import Data.ByteString (ByteString)
 6 | import qualified Data.ByteString as B
 7 | import qualified ParsersBench.CSV.Attoparsec as A
 8 | import qualified ParsersBench.CSV.Megaparsec as M
 9 | import qualified ParsersBench.Json.Attoparsec as A
10 | import qualified ParsersBench.Json.Megaparsec as M
11 | import qualified ParsersBench.Log.Attoparsec as A
12 | import qualified ParsersBench.Log.Megaparsec as M
13 | import Weigh
14 | 
15 | main :: IO ()
16 | main = mainWith $ do
17 |   setColumns [Case, Allocated, GCs, Max]
18 |   forM_ csvFiles $ \file ->
19 |     bparser "CSV (Attoparsec)" file A.parseCSV
20 |   forM_ csvFiles $ \file ->
21 |     bparser "CSV (Megaparsec)" file M.parseCSV
22 |   forM_ logFiles $ \file ->
23 |     bparser "Log (Attoparsec)" file A.parseLog
24 |   forM_ logFiles $ \file ->
25 |     bparser "Log (Megaparsec)" file M.parseLog
26 |   forM_ jsonFiles $ \file ->
27 |     bparser "JSON (Attoparsec)" file A.parseJson
28 |   forM_ jsonFiles $ \file ->
29 |     bparser "JSON (Megaparsec)" file M.parseJson
30 | 
31 | bparser :: (NFData a) => String -> FilePath -> (ByteString -> a) -> Weigh ()
32 | bparser pre desc f = io (pre ++ "-" ++ desc) m path
33 |   where
34 |     path = "data/" ++ desc
35 |     m pth = f <$> B.readFile pth
36 | 
37 | csvFiles :: [FilePath]
38 | csvFiles =
39 |   [ "csv-5.csv",
40 |     "csv-10.csv",
41 |     "csv-20.csv",
42 |     "csv-40.csv"
43 |   ]
44 | 
45 | logFiles :: [FilePath]
46 | logFiles =
47 |   [ "log-5.log",
48 |     "log-10.log",
49 |     "log-20.log",
50 |     "log-40.log"
51 |   ]
52 | 
53 | jsonFiles :: [FilePath]
54 | jsonFiles =
55 |   [ "json-5.json",
56 |     "json-10.json",
57 |     "json-20.json",
58 |     "json-40.json"
59 |   ]
60 | 


--------------------------------------------------------------------------------
/parsers-bench/bench/speed/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import Control.DeepSeq
 4 | import Criterion.Main
 5 | import Data.ByteString (ByteString)
 6 | import qualified Data.ByteString as B
 7 | import qualified ParsersBench.CSV.Attoparsec as A
 8 | import qualified ParsersBench.CSV.Megaparsec as M
 9 | import qualified ParsersBench.Json.Attoparsec as A
10 | import qualified ParsersBench.Json.Megaparsec as M
11 | import qualified ParsersBench.Log.Attoparsec as A
12 | import qualified ParsersBench.Log.Megaparsec as M
13 | 
14 | main :: IO ()
15 | main =
16 |   defaultMain
17 |     [ bgroup
18 |         "CSV (Attoparsec)"
19 |         [bparser file A.parseCSV | file <- csvFiles],
20 |       bgroup
21 |         "CSV (Megaparsec)"
22 |         [bparser file M.parseCSV | file <- csvFiles],
23 |       bgroup
24 |         "Log (Attoparsec)"
25 |         [bparser file A.parseLog | file <- logFiles],
26 |       bgroup
27 |         "Log (Megaparsec)"
28 |         [bparser file M.parseLog | file <- logFiles],
29 |       bgroup
30 |         "JSON (Attoparsec)"
31 |         [bparser file A.parseJson | file <- jsonFiles],
32 |       bgroup
33 |         "JSON (Megapasec)"
34 |         [bparser file M.parseJson | file <- jsonFiles]
35 |     ]
36 | 
37 | bparser :: (NFData a) => FilePath -> (ByteString -> a) -> Benchmark
38 | bparser desc f = env (B.readFile path) (bench desc . nf f)
39 |   where
40 |     path = "data/" ++ desc
41 | 
42 | csvFiles :: [FilePath]
43 | csvFiles =
44 |   [ "csv-5.csv",
45 |     "csv-10.csv",
46 |     "csv-20.csv",
47 |     "csv-40.csv"
48 |   ]
49 | 
50 | logFiles :: [FilePath]
51 | logFiles =
52 |   [ "log-5.log",
53 |     "log-10.log",
54 |     "log-20.log",
55 |     "log-40.log"
56 |   ]
57 | 
58 | jsonFiles :: [FilePath]
59 | jsonFiles =
60 |   [ "json-5.json",
61 |     "json-10.json",
62 |     "json-20.json",
63 |     "json-40.json"
64 |   ]
65 | 


--------------------------------------------------------------------------------
/parsers-bench/data/csv-10.csv:
--------------------------------------------------------------------------------
 1 | something,10,yes,"here we go","Meet me in Montauk."
 2 | fifty two,11,yes,"here we go","Meet me in Montauk."
 3 | something,12,yes,"here we go","Meet me in Montauk."
 4 | "foo"" bar",13,yes,,"Meet me in Montauk."
 5 | something,14,yes,"here we go","Meet me in Montauk."
 6 | something,10,yes,"here we go","Meet me in Montauk."
 7 | fifty two,11,yes,"here we go","Meet me in Montauk."
 8 | something,12,yes,"here we go","Meet me in Montauk."
 9 | "foo"" bar",13,yes,,"Meet me in Montauk."
10 | something,14,yes,"here we go","Meet me in Montauk."
11 | 


--------------------------------------------------------------------------------
/parsers-bench/data/csv-20.csv:
--------------------------------------------------------------------------------
 1 | something,10,yes,"here we go","Meet me in Montauk."
 2 | fifty two,11,yes,"here we go","Meet me in Montauk."
 3 | something,12,yes,"here we go","Meet me in Montauk."
 4 | "foo"" bar",13,yes,,"Meet me in Montauk."
 5 | something,14,yes,"here we go","Meet me in Montauk."
 6 | something,10,yes,"here we go","Meet me in Montauk."
 7 | fifty two,11,yes,"here we go","Meet me in Montauk."
 8 | something,12,yes,"here we go","Meet me in Montauk."
 9 | "foo"" bar",13,yes,,"Meet me in Montauk."
10 | something,14,yes,"here we go","Meet me in Montauk."
11 | something,10,yes,"here we go","Meet me in Montauk."
12 | fifty two,11,yes,"here we go","Meet me in Montauk."
13 | something,12,yes,"here we go","Meet me in Montauk."
14 | "foo"" bar",13,yes,,"Meet me in Montauk."
15 | something,14,yes,"here we go","Meet me in Montauk."
16 | something,10,yes,"here we go","Meet me in Montauk."
17 | fifty two,11,yes,"here we go","Meet me in Montauk."
18 | something,12,yes,"here we go","Meet me in Montauk."
19 | "foo"" bar",13,yes,,"Meet me in Montauk."
20 | something,14,yes,"here we go","Meet me in Montauk."
21 | 


--------------------------------------------------------------------------------
/parsers-bench/data/csv-40.csv:
--------------------------------------------------------------------------------
 1 | something,10,yes,"here we go","Meet me in Montauk."
 2 | fifty two,11,yes,"here we go","Meet me in Montauk."
 3 | something,12,yes,"here we go","Meet me in Montauk."
 4 | "foo"" bar",13,yes,,"Meet me in Montauk."
 5 | something,14,yes,"here we go","Meet me in Montauk."
 6 | something,10,yes,"here we go","Meet me in Montauk."
 7 | fifty two,11,yes,"here we go","Meet me in Montauk."
 8 | something,12,yes,"here we go","Meet me in Montauk."
 9 | "foo"" bar",13,yes,,"Meet me in Montauk."
10 | something,14,yes,"here we go","Meet me in Montauk."
11 | something,10,yes,"here we go","Meet me in Montauk."
12 | fifty two,11,yes,"here we go","Meet me in Montauk."
13 | something,12,yes,"here we go","Meet me in Montauk."
14 | "foo"" bar",13,yes,,"Meet me in Montauk."
15 | something,14,yes,"here we go","Meet me in Montauk."
16 | something,10,yes,"here we go","Meet me in Montauk."
17 | fifty two,11,yes,"here we go","Meet me in Montauk."
18 | something,12,yes,"here we go","Meet me in Montauk."
19 | "foo"" bar",13,yes,,"Meet me in Montauk."
20 | something,14,yes,"here we go","Meet me in Montauk."
21 | something,10,yes,"here we go","Meet me in Montauk."
22 | fifty two,11,yes,"here we go","Meet me in Montauk."
23 | something,12,yes,"here we go","Meet me in Montauk."
24 | "foo"" bar",13,yes,,"Meet me in Montauk."
25 | something,14,yes,"here we go","Meet me in Montauk."
26 | something,10,yes,"here we go","Meet me in Montauk."
27 | fifty two,11,yes,"here we go","Meet me in Montauk."
28 | something,12,yes,"here we go","Meet me in Montauk."
29 | "foo"" bar",13,yes,,"Meet me in Montauk."
30 | something,14,yes,"here we go","Meet me in Montauk."
31 | something,10,yes,"here we go","Meet me in Montauk."
32 | fifty two,11,yes,"here we go","Meet me in Montauk."
33 | something,12,yes,"here we go","Meet me in Montauk."
34 | "foo"" bar",13,yes,,"Meet me in Montauk."
35 | something,14,yes,"here we go","Meet me in Montauk."
36 | something,10,yes,"here we go","Meet me in Montauk."
37 | fifty two,11,yes,"here we go","Meet me in Montauk."
38 | something,12,yes,"here we go","Meet me in Montauk."
39 | "foo"" bar",13,yes,,"Meet me in Montauk."
40 | something,14,yes,"here we go","Meet me in Montauk."
41 | 


--------------------------------------------------------------------------------
/parsers-bench/data/csv-5.csv:
--------------------------------------------------------------------------------
1 | something,10,yes,"here we go","Meet me in Montauk."
2 | fifty two,11,yes,"here we go","Meet me in Montauk."
3 | something,12,yes,"here we go","Meet me in Montauk."
4 | "foo"" bar",13,yes,,"Meet me in Montauk."
5 | something,14,yes,"here we go","Meet me in Montauk."
6 | 


--------------------------------------------------------------------------------
/parsers-bench/data/json-10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "something": 5,
 3 |     "another_thing": false,
 4 |     "price": { "min": 200, "max": 400 },
 5 |     "something": 5,
 6 |     "another_thing": false,
 7 |     "price": { "min": 200, "max": 400 },
 8 |     "something": 5,
 9 |     "another_thing": false
10 | }
11 | 


--------------------------------------------------------------------------------
/parsers-bench/data/json-20.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "something": 5,
 3 |     "another_thing": false,
 4 |     "price": { "min": 200, "max": 400 },
 5 |     "something": 5,
 6 |     "another_thing": false,
 7 |     "price": { "min": 200, "max": 400 },
 8 |     "something": 5,
 9 |     "another_thing": false,
10 |     "price": { "min": 200, "max": 400 },
11 |     "something": 5.0,
12 |     "another_thing": true,
13 |     "price": { "min": 200, "max": 400 },
14 |     "something": 533,
15 |     "another_thing": [1,2,3],
16 |     "price": { "min": 200, "max": 400 },
17 |     "something": 5,
18 |     "another_thing": false,
19 |     "direction": "north-east"
20 | }
21 | 


--------------------------------------------------------------------------------
/parsers-bench/data/json-40.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "something": 5,
 3 |     "another_thing": false,
 4 |     "price": { "min": 200, "max": 400 },
 5 |     "something": 5,
 6 |     "another_thing": false,
 7 |     "price": { "min": 200, "max": 400 },
 8 |     "something": 5,
 9 |     "another_thing": false,
10 |     "price": { "min": 200, "max": 400 },
11 |     "something": 5.0,
12 |     "another_thing": true,
13 |     "price": { "min": 200, "max": 400 },
14 |     "something": 533,
15 |     "another_thing": [1,2,3],
16 |     "price": { "min": 200, "max": 400 },
17 |     "something": 5,
18 |     "another_thing": false,
19 |     "direction": "north-east",
20 |     "something": 5,
21 |     "another_thing": false,
22 |     "price": { "min": 200, "max": 400 },
23 |     "something": 5,
24 |     "another_thing": false,
25 |     "price": { "min": 200, "max": 400 },
26 |     "something": 5,
27 |     "another_thing": false,
28 |     "price": { "min": 200, "max": 400 },
29 |     "something": 5.0,
30 |     "another_thing": true,
31 |     "price": { "min": 200, "max": 400 },
32 |     "something": 533,
33 |     "another_thing": [1,2,3],
34 |     "price": { "min": 200, "max": 400 },
35 |     "something": 5,
36 |     "another_thing": false,
37 |     "direction": "north-east",
38 |     "something": 5,
39 |     "direction": "north-east"
40 | }
41 | 


--------------------------------------------------------------------------------
/parsers-bench/data/json-5.json:
--------------------------------------------------------------------------------
1 | {
2 |     "something": 5,
3 |     "another_thing": false,
4 |     "price": { "min": 200, "max": 400 }
5 | }
6 | 


--------------------------------------------------------------------------------
/parsers-bench/data/log-10.log:
--------------------------------------------------------------------------------
 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 2 | 2013-06-29 11:32:12 212.141.23.67 mouse
 3 | 2013-06-29 11:33:08 212.141.23.67 monitor
 4 | 2013-06-29 12:12:34 125.80.32.31 speakers
 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard
 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 7 | 2013-06-29 11:32:12 212.141.23.67 mouse
 8 | 2013-06-29 11:33:08 212.141.23.67 monitor
 9 | 2013-06-29 12:12:34 125.80.32.31 speakers
10 | 2013-06-29 12:51:50 101.40.50.62 keyboard
11 | 


--------------------------------------------------------------------------------
/parsers-bench/data/log-20.log:
--------------------------------------------------------------------------------
 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 2 | 2013-06-29 11:32:12 212.141.23.67 mouse
 3 | 2013-06-29 11:33:08 212.141.23.67 monitor
 4 | 2013-06-29 12:12:34 125.80.32.31 speakers
 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard
 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 7 | 2013-06-29 11:32:12 212.141.23.67 mouse
 8 | 2013-06-29 11:33:08 212.141.23.67 monitor
 9 | 2013-06-29 12:12:34 125.80.32.31 speakers
10 | 2013-06-29 12:51:50 101.40.50.62 keyboard
11 | 2013-06-29 11:16:23 124.67.34.60 keyboard
12 | 2013-06-29 11:32:12 212.141.23.67 mouse
13 | 2013-06-29 11:33:08 212.141.23.67 monitor
14 | 2013-06-29 12:12:34 125.80.32.31 speakers
15 | 2013-06-29 12:51:50 101.40.50.62 keyboard
16 | 2013-06-29 11:16:23 124.67.34.60 keyboard
17 | 2013-06-29 11:32:12 212.141.23.67 mouse
18 | 2013-06-29 11:33:08 212.141.23.67 monitor
19 | 2013-06-29 12:12:34 125.80.32.31 speakers
20 | 2013-06-29 12:51:50 101.40.50.62 keyboard
21 | 


--------------------------------------------------------------------------------
/parsers-bench/data/log-40.log:
--------------------------------------------------------------------------------
 1 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 2 | 2013-06-29 11:32:12 212.141.23.67 mouse
 3 | 2013-06-29 11:33:08 212.141.23.67 monitor
 4 | 2013-06-29 12:12:34 125.80.32.31 speakers
 5 | 2013-06-29 12:51:50 101.40.50.62 keyboard
 6 | 2013-06-29 11:16:23 124.67.34.60 keyboard
 7 | 2013-06-29 11:32:12 212.141.23.67 mouse
 8 | 2013-06-29 11:33:08 212.141.23.67 monitor
 9 | 2013-06-29 12:12:34 125.80.32.31 speakers
10 | 2013-06-29 12:51:50 101.40.50.62 keyboard
11 | 2013-06-29 11:16:23 124.67.34.60 keyboard
12 | 2013-06-29 11:32:12 212.141.23.67 mouse
13 | 2013-06-29 11:33:08 212.141.23.67 monitor
14 | 2013-06-29 12:12:34 125.80.32.31 speakers
15 | 2013-06-29 12:51:50 101.40.50.62 keyboard
16 | 2013-06-29 11:16:23 124.67.34.60 keyboard
17 | 2013-06-29 11:32:12 212.141.23.67 mouse
18 | 2013-06-29 11:33:08 212.141.23.67 monitor
19 | 2013-06-29 12:12:34 125.80.32.31 speakers
20 | 2013-06-29 12:51:50 101.40.50.62 keyboard
21 | 2013-06-29 11:16:23 124.67.34.60 keyboard
22 | 2013-06-29 11:32:12 212.141.23.67 mouse
23 | 2013-06-29 11:33:08 212.141.23.67 monitor
24 | 2013-06-29 12:12:34 125.80.32.31 speakers
25 | 2013-06-29 12:51:50 101.40.50.62 keyboard
26 | 2013-06-29 11:16:23 124.67.34.60 keyboard
27 | 2013-06-29 11:32:12 212.141.23.67 mouse
28 | 2013-06-29 11:33:08 212.141.23.67 monitor
29 | 2013-06-29 12:12:34 125.80.32.31 speakers
30 | 2013-06-29 12:51:50 101.40.50.62 keyboard
31 | 2013-06-29 11:16:23 124.67.34.60 keyboard
32 | 2013-06-29 11:32:12 212.141.23.67 mouse
33 | 2013-06-29 11:33:08 212.141.23.67 monitor
34 | 2013-06-29 12:12:34 125.80.32.31 speakers
35 | 2013-06-29 12:51:50 101.40.50.62 keyboard
36 | 2013-06-29 11:16:23 124.67.34.60 keyboard
37 | 2013-06-29 11:32:12 212.141.23.67 mouse
38 | 2013-06-29 11:33:08 212.141.23.67 monitor
39 | 2013-06-29 12:12:34 125.80.32.31 speakers
40 | 2013-06-29 12:51:50 101.40.50.62 keyboard
41 | 


--------------------------------------------------------------------------------
/parsers-bench/data/log-5.log:
--------------------------------------------------------------------------------
1 | 2013-06-29 11:16:23 124.67.34.60 keyboard
2 | 2013-06-29 11:32:12 212.141.23.67 mouse
3 | 2013-06-29 11:33:08 212.141.23.67 monitor
4 | 2013-06-29 12:12:34 125.80.32.31 speakers
5 | 2013-06-29 12:51:50 101.40.50.62 keyboard
6 | 


--------------------------------------------------------------------------------
/parsers-bench/parsers-bench.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version:   >=1.18
 2 | name:            parsers-bench
 3 | version:         0.1.0
 4 | license:         BSD3
 5 | maintainer:      Mark Karpov <markkarpov92@gmail.com>
 6 | author:          Mark Karpov <markkarpov92@gmail.com>
 7 | tested-with:     ghc ==9.8.4 ghc ==9.10.1 ghc ==9.12.1
 8 | homepage:        https://github.com/mrkkrp/megaparsec
 9 | bug-reports:     https://github.com/mrkkrp/megaparsec/issues
10 | synopsis:        Performance benchmarks: Megaparsec vs Attoparsec
11 | description:     Performance benchmarks: Megaparsec vs Attoparsec.
12 | category:        Parsing, Benchmark
13 | build-type:      Simple
14 | extra-doc-files: README.md
15 | 
16 | source-repository head
17 |     type:     git
18 |     location: https://github.com/mrkkrp/parsers-bench.git
19 | 
20 | flag dev
21 |     description: Turn on development settings.
22 |     default:     False
23 |     manual:      True
24 | 
25 | library
26 |     exposed-modules:
27 |         ParsersBench.CSV.Attoparsec
28 |         ParsersBench.CSV.Megaparsec
29 |         ParsersBench.Json.Attoparsec
30 |         ParsersBench.Json.Common
31 |         ParsersBench.Json.Megaparsec
32 |         ParsersBench.Log.Attoparsec
33 |         ParsersBench.Log.Common
34 |         ParsersBench.Log.Megaparsec
35 | 
36 |     default-language: Haskell2010
37 |     build-depends:
38 |         base -any,
39 |         attoparsec -any,
40 |         bytestring -any,
41 |         deepseq -any,
42 |         megaparsec -any,
43 |         parser-combinators -any,
44 |         scientific -any,
45 |         text -any,
46 |         time -any,
47 |         unordered-containers -any,
48 |         vector -any
49 | 
50 |     if flag(dev)
51 |         ghc-options: -Wall -Werror
52 | 
53 |     else
54 |         ghc-options: -O2 -Wall
55 | 
56 | benchmark bench-speed
57 |     type:             exitcode-stdio-1.0
58 |     main-is:          Main.hs
59 |     hs-source-dirs:   bench/speed
60 |     default-language: Haskell2010
61 |     build-depends:
62 |         base -any,
63 |         bytestring -any,
64 |         criterion -any,
65 |         deepseq -any,
66 |         parsers-bench -any
67 | 
68 |     if flag(dev)
69 |         ghc-options: -O2 -Wall -Werror
70 | 
71 |     else
72 |         ghc-options: -O2 -Wall
73 | 
74 | benchmark bench-memory
75 |     type:             exitcode-stdio-1.0
76 |     main-is:          Main.hs
77 |     hs-source-dirs:   bench/memory
78 |     default-language: Haskell2010
79 |     build-depends:
80 |         base -any,
81 |         bytestring -any,
82 |         deepseq -any,
83 |         parsers-bench -any,
84 |         weigh -any
85 | 
86 |     if flag(dev)
87 |         ghc-options: -O2 -Wall -Werror
88 | 
89 |     else
90 |         ghc-options: -O2 -Wall
91 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
1 | { pkgs ? (import ./nix/nixpkgs) }:
2 | 
3 | (import ./default.nix { inherit pkgs; }).shell
4 | 


--------------------------------------------------------------------------------