├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── LICENSE
├── README.md
├── megaparsack-doc
    ├── info.rkt
    └── scribblings
    │   ├── info.rkt
    │   ├── megaparsack.scrbl
    │   └── megaparsack
    │       ├── basics.scrbl
    │       ├── choice.scrbl
    │       ├── differences-from-parsack.scrbl
    │       ├── reference.scrbl
    │       ├── state.scrbl
    │       ├── syntax.scrbl
    │       └── util.rkt
├── megaparsack-lib
    ├── info.rkt
    └── megaparsack
    │   ├── base.rkt
    │   ├── combinator.rkt
    │   ├── main.rkt
    │   └── text.rkt
├── megaparsack-parser-tools
    ├── info.rkt
    └── megaparsack
    │   └── parser-tools
    │       └── lex.rkt
├── megaparsack-parser
    ├── info.rkt
    └── megaparsack
    │   └── parser
    │       └── json.rkt
├── megaparsack-test
    ├── info.rkt
    └── tests
    │   └── megaparsack
    │       ├── base.rkt
    │       ├── contract.rkt
    │       ├── parser
    │           └── json.rkt
    │       └── text.rkt
└── megaparsack
    └── info.rkt


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | on: [push]
 3 | jobs:
 4 |   test:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       fail-fast: false
 8 |       matrix:
 9 |         racket-version: [ '6.12', '7.0', '7.9', '8.0', stable ]
10 |     steps:
11 |     - uses: actions/checkout@v2
12 |       with: { path: repo }
13 |     - uses: Bogdanp/setup-racket@v1.5
14 |       with:
15 |         version: ${{ matrix.racket-version }}
16 |         dest: '$GITHUB_WORKSPACE/racket'
17 |         sudo: never
18 |     - name: install
19 |       run: raco pkg install --installation --auto --link
20 |              repo/megaparsack-{lib,parser,parser-tools,doc,test}
21 |     - name: test
22 |       run: raco test -ep megaparsack-{lib,parser,parser-tools,test}
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | compiled/
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2016, Alexis King
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software
 6 | for any purpose with or without fee is hereby granted, provided
 7 | that the above copyright notice and this permission notice appear
 8 | in all copies.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
15 | OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 | PERFORMANCE OF THIS SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # megaparsack [![Build Status](https://github.com/lexi-lambda/megaparsack/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/lexi-lambda/megaparsack/actions/workflows/build.yml)
 2 | 
 3 | This library implements a set of **practical parser combinators** inspired by libraries like [parsec][parsec], [megaparsec][megaparsec], and [parsack][parsack]. It can be used to build parsers for arbitrary input data, and it includes built-in facilities for parsing textual data and tokens produced by [`parser-tools/lex`][parser-tools/lex].
 4 | 
 5 | To install it, simply install the `megaparsack` package:
 6 | 
 7 | ```
 8 | $ raco pkg install megaparsack
 9 | ```
10 | 
11 | [For more information, see the full documentation.][megaparsack-doc]
12 | 
13 | [megaparsack-doc]: http://docs.racket-lang.org/megaparsack/
14 | [megaparsec]: https://hackage.haskell.org/package/megaparsec
15 | [parsack]: http://docs.racket-lang.org/parsack/
16 | [parsec]: https://hackage.haskell.org/package/parsec
17 | [parser-tools/lex]: http://docs.racket-lang.org/parser-tools/Lexers.html
18 | 


--------------------------------------------------------------------------------
/megaparsack-doc/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '())
 9 | (define build-deps
10 |   '("base"
11 |     "functional-doc"
12 |     "functional-lib"
13 |     ["megaparsack-lib" #:version "1.8"]
14 |     "megaparsack-parser-tools"
15 |     "parser-tools-doc"
16 |     "parser-tools-lib"
17 |     "racket-doc"
18 |     "scribble-lib"))
19 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/info.rkt:
--------------------------------------------------------------------------------
1 | #lang info
2 | 
3 | (define scribblings
4 |   '(["megaparsack.scrbl" (multi-page) (parsing-library)]))
5 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack.scrbl:
--------------------------------------------------------------------------------
 1 | #lang scribble/manual
 2 | 
 3 | @(require (for-label data/monad))
 4 | 
 5 | @title{Megaparsack: Practical Parser Combinators}
 6 | 
 7 | @author{@author+email["Alexis King" "lexi.lambda@gmail.com"]}
 8 | 
 9 | @defmodule[megaparsack]
10 | 
11 | Megaparsack is a @emph{parser combinator} library: a composable set of simple parsers that can be used
12 | to create larger parsing systems that can parse arbitrary grammars, including context-sensitive ones.
13 | Megaparsack uses the @racket[gen:monad] generic interface to provide a uniform interface to sequence
14 | and compose different parsers using a base set of primitives.
15 | 
16 | @table-of-contents[]
17 | 
18 | @include-section["megaparsack/basics.scrbl"]
19 | @include-section["megaparsack/choice.scrbl"]
20 | @include-section["megaparsack/syntax.scrbl"]
21 | @include-section["megaparsack/state.scrbl"]
22 | @include-section["megaparsack/reference.scrbl"]
23 | @include-section["megaparsack/differences-from-parsack.scrbl"]
24 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/basics.scrbl:
--------------------------------------------------------------------------------
  1 | #lang scribble/manual
  2 | 
  3 | @(require "util.rkt")
  4 | 
  5 | @title[#:tag "parsing-basics"]{Parsing Basics}
  6 | 
  7 | Megaparsack is a library for manipulating @deftech{parsers}, which are, very simply, functions that
  8 | operate on streams of tokens. This is very broad: the tokens in question can simply be characters in a
  9 | string, they can be tokens produced as the result of a lexer, they can be syntax objects, or they can
 10 | even be completely arbitrary data.
 11 | 
 12 | What’s special about parsers is that they can be @emph{sequenced}—that is, multiple parsers can be
 13 | chained together to make a larger parser. For example, to make a parser that parses the string
 14 | @racket["ab"], you might compose two parsers that parse the characters @racket[#\a] and @racket[#\b]
 15 | individually.
 16 | 
 17 | @section[#:tag "getting-started"]{Getting started with parsers}
 18 | 
 19 | To get started, require the @racketmodname[megaparsack] and @racketmodname[megaparsack/text]
 20 | libraries.
 21 | 
 22 | @(racketinput
 23 |   (require #,(racketmodname megaparsack) #,(racketmodname megaparsack/text)))
 24 | 
 25 | This will import the basic parser functions, as well as some built-in parsers for parsing textual
 26 | data. Now, you can use the @racket[parse-string] function along with basic parsers to parse values
 27 | from strings. Let’s start by parsing an integer:
 28 | 
 29 | @(parser-interaction
 30 |   (eval:check (parse-string integer/p "42") (success 42)))
 31 | 
 32 | Since the parser was successful, it returns a @racket[success] value. The @racket[parse-string]
 33 | function returns an @functech{either} value that represents success and failure. For example, take a
 34 | look at what would happen when a parse fails:
 35 | 
 36 | @(parser-interaction
 37 |   (parse-string integer/p "not an integer"))
 38 | 
 39 | When a parse fails, it returns a @racket[failure] value that encodes some information about what
 40 | caused the parser to error. You can convert that information to a human-readable error message using
 41 | the @racket[parse-error->string] function:
 42 | 
 43 | @(parser-interaction
 44 |   (map-failure parse-error->string (parse-string integer/p "not an integer")))
 45 | 
 46 | You can also assert that a parse will succeed and just get the result out by using the
 47 | @racket[parse-result!] function, which will throw an @racket[exn:fail:read:megaparsack] exception
 48 | when the parser fails.
 49 | 
 50 | @(parser-interaction
 51 |   (eval:check (parse-result! (parse-string integer/p "42")) 42)
 52 |   (eval:error (parse-result! (parse-string integer/p "not an integer"))))
 53 | 
 54 | You may notice that the error message includes some useful information. Specifically, megaparsack will
 55 | attempt to provide the following information to the user whenever a parse fails:
 56 | 
 57 |   @itemlist[
 58 |     @item{the source location of the error that caused the parse to fail}
 59 |     @item{the token that was “unexpected”, which caused the parse to fail}
 60 |     @item{a set of values that were “expected”, which would have been accepted as valid values for the
 61 |           parse}]
 62 | 
 63 | In the above case, the parser reports that it expected an integer, but it encountered the character
 64 | @tt{n}, which obviously isn’t a valid piece of an integer.
 65 | 
 66 | @section[#:tag "parsing-textual-data"]{Parsing textual data}
 67 | 
 68 | The @racket[integer/p] parser, as would be expected, parses a single integer. However, this isn’t very
 69 | useful on its own—most of the time, you will want to parse something much more complicated than that.
 70 | However, it is a useful building block for creating larger parsers. Let’s look at some other “building
 71 | block” parsers that work with strings.
 72 | 
 73 | The @racket[letter/p], @racket[digit/p], and @racket[space/p] parsers parse a single letter, digit, or
 74 | whitespace character, respectively:
 75 | 
 76 | @margin-note{
 77 |   Note that these parsers succeed even when only part of the input is consumed. This is important when
 78 |   combining parsers together, but if you want to ensure a parser parses the entire input, you can use
 79 |   @racket[eof/p].}
 80 | 
 81 | @(parser-interaction
 82 |   (eval:check (parse-string letter/p "hello") (success #\h))
 83 |   (eval:check (parse-string digit/p "123") (success #\1))
 84 |   (eval:check (parse-string space/p " ") (success #\space)))
 85 | 
 86 | The @racket[char/p] function creates a parser that parses a single character:
 87 | 
 88 | @(parser-interaction
 89 |   (eval:check (parse-string (char/p #\a) "abc") (success #\a))
 90 |   (eval:error (parse-result! (parse-string (char/p #\a) "xyz"))))
 91 | 
 92 | It may not make very much sense why the @racket[char/p] parser is useful—after all, it just seems to
 93 | return itself. Indeed, in these contrived examples, it’s not very useful at all! However, it becomes
 94 | @emph{extremely} important when combining multiple parsers together.
 95 | 
 96 | @section[#:tag "sequencing-parsers"]{Sequencing parsers}
 97 | 
 98 | @(define-parser-interaction sequencing-interaction close-sequencing!)
 99 | 
100 | All @tech{parsers} are @functech{monads}, so it’s possible to use @racket[chain] and @racket[do] from
101 | @racketmodname[data/monad] to combine multiple parsers together to create a bigger parser. For
102 | example, let’s create a parser that parses the letters @tt{a} and @tt{b} in sequence:
103 | 
104 | @(sequencing-interaction
105 |   (eval:alts (require @#,racketmodname[data/monad])
106 |              (void))
107 |   (define ab/p
108 |     (do (char/p #\a)
109 |         (char/p #\b))))
110 | 
111 | Now we can use our new parser just like any other:
112 | 
113 | @(sequencing-interaction
114 |   (eval:check (parse-string ab/p "ab") (success #\b))
115 |   (eval:error (parse-result! (parse-string ab/p "ac"))))
116 | 
117 | The parser succeeds when we supply it with the string @racket["ab"], but it fails when it doesn’t
118 | match, and we automatically get a pretty good error message.
119 | 
120 | One thing to note is that the result of the parser is not particularly meaningful—it’s just
121 | @racket[#\b]. That’s because the last parser in the @racket[do] block was @racket[(char/p #\b)], so
122 | the result of @racket[ab/p] is just the result of its final parser. If we wanted to, we could change
123 | the result to be whatever we wanted (but only on a successful parse) by returning our own value at the
124 | end of the @racket[do] block:
125 | 
126 | @(sequencing-interaction
127 |   (define ab*/p
128 |     (do (char/p #\a)
129 |         (char/p #\b)
130 |         (pure "success!"))))
131 | 
132 | We need the @racket[pure] wrapper in order to properly “lift” our arbitrary value into the context of
133 | a parser. Now we can run our new parser and get back our custom value when it succeeds:
134 | 
135 | @(sequencing-interaction
136 |   (eval:check (parse-string ab*/p "ab") (success "success!")))
137 | 
138 | This parser is a little silly, but we can use these concepts to implement parsers that might actually
139 | be useful. For example, you might need to parser two integers, separated by a comma, then add them
140 | together. Using the monadic parser interface, this is extremely simple:
141 | 
142 | @(sequencing-interaction
143 |   (define add-two-ints/p
144 |     (do [x <- integer/p]
145 |         (char/p #\,)
146 |         [y <- integer/p]
147 |         (pure (+ x y)))))
148 | 
149 | This definition is a little bit more complicated because we are using the results of the two integer
150 | parsers in our sequence, so we use the @racket[[_a <- _b]] syntax to “pull out” the result of each
151 | parser and bind it to a variable. Then we can add the two results together at the end. Actually using
152 | this parser works just as intended:
153 | 
154 | @(sequencing-interaction
155 |   (eval:check (parse-string add-two-ints/p "7,12") (success 19)))
156 | 
157 | Using this technique, it’s possible to build up fairly complex parsers from small, self-contained
158 | units.
159 | 
160 | @(close-sequencing!)
161 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/choice.scrbl:
--------------------------------------------------------------------------------
  1 | #lang scribble/manual
  2 | 
  3 | @(require "util.rkt")
  4 | 
  5 | @(define-parser-interaction choice-interaction close-choice!)
  6 | 
  7 | @title[#:tag "choice" #:style 'toc]{Parsers with Choice}
  8 | 
  9 | Most grammars are not completely static—usually, there is an element of @emph{choice}. For example,
 10 | when parsing a boolean, a valid value is @tt{true} or @tt{false}. Even more complicated, when parsing
 11 | a list of elements, a valid input might be @emph{any number of booleans}, all next to one another. To
 12 | handle these kinds of grammars, a parser can be provided with multiple paths, each of which are valid.
 13 | 
 14 | @local-table-of-contents[]
 15 | 
 16 | @section[#:tag "parsing-branching"]{Providing multiple paths}
 17 | 
 18 | To create a parser with multiple possibilities, use the @racket[or/p] combinator. It accepts any
 19 | number of parsers, and it tries them one at a time until one of them matches. For example, let’s
 20 | consider a parser that parses either the string @racket["true"] or @racket["false"], then returns the
 21 | value as a Racket boolean:
 22 | 
 23 | @(choice-interaction
 24 |   (define true/p
 25 |     (do (string/p "true")
 26 |         (pure #t)))
 27 |   (define false/p
 28 |     (do (string/p "false")
 29 |         (pure #f)))
 30 |   (define boolean/p
 31 |     (or/p true/p
 32 |           false/p)))
 33 | 
 34 | By using @racket[or/p], we’ve created a choice point, where the parser will try each path before
 35 | giving up. If none of the paths match, the parser will still fail, but if any of the paths match, it
 36 | will consider the parse successful and return. To demonstrate that this works, we can use our new
 37 | parser on some strings:
 38 | 
 39 | @(choice-interaction
 40 |   (eval:check (parse-string boolean/p "true") (success #t))
 41 |   (eval:check (parse-string boolean/p "false") (success #f)))
 42 | 
 43 | The @racket[or/p] combinator also automatically cooperates with error handling to provide helpful
 44 | error messages when parsing fails:
 45 | 
 46 | @(choice-interaction
 47 |   (eval:error (parse-result! (parse-string boolean/p "not a boolean"))))
 48 | 
 49 | Note that the error includes all the possible values that would have been considered valid at the
 50 | point that the parser failed.
 51 | 
 52 | Remember that the @racket[or/p] combinator is not magic: it does not attempt to predict which parse
 53 | will be valid, and it does not even try to look ahead to see which parse will be the longest. This can
 54 | cause problems when two different parses could @emph{both} succeed—@racket[or/p] will just pick the
 55 | first one:
 56 | 
 57 | @(parser-interaction
 58 |   (define overlapping/p
 59 |     (or/p (string/p "hello")
 60 |           (string/p "hello, world!")))
 61 |   (eval:check (parse-string overlapping/p "hello, world!") (success "hello")))
 62 | 
 63 | Just like ordinary boolean @racket[or], keep in mind that order does matter with @racket[or/p].
 64 | 
 65 | @subsection{Parsing ambiguous grammars}
 66 | 
 67 | So, if @racket[or/p] does not perform any lookahead, how exactly does it choose between parsers? It
 68 | might @emph{seem} like it tries each parser completely, then backtracks when any of them fail, but
 69 | this is not entirely true—consider the parser above, fixed so the longest match is first:
 70 | 
 71 | @(choice-interaction
 72 |   (define overlapping/p
 73 |     (or/p (string/p "hello, world!")
 74 |           (string/p "hello"))))
 75 | 
 76 | You might expect that, if the first match fails, it will try the second one, but in practice, this
 77 | doesn’t actually work:
 78 | 
 79 | @(choice-interaction
 80 |   (eval:check (parse-string overlapping/p "hello, world!") (success "hello, world!"))
 81 |   (eval:error (parse-result! (parse-string overlapping/p "hello"))))
 82 | 
 83 | What gives? Take a close look at the error message: it is expecting the rest of @tt{hello, world!},
 84 | but obviously we only gave it @tt{hello}. Why isn’t the parser backtracking? Well, megaparsack
 85 | actually does not backtrack by default. Instead, it implements a single-character lookahead: it tries
 86 | to parse the first token from each branch, and if it succeeds, it @emph{commits} to that path.
 87 | 
 88 | This means that, since part of the @tt{hello, world} parse was successful, the parser has already
 89 | committed to that branch and will not try any of the other options. This turns out to provide far
 90 | superior error reporting because it reports to the user precisely where the error occurred, not
 91 | somewhere much earlier in the parse. However, this obviously causes problems in this case where the
 92 | parse is @emph{ambiguous}, or more generally, the choice cannot be determined by a single character
 93 | of lookahead.
 94 | 
 95 | To solve this by allowing the parser to backtrack, use the @racket[try/p] combinator, which converts
 96 | a parser into one that backtracks upon failure. We can use this to solve our issue with our parser:
 97 | 
 98 | @(choice-interaction
 99 |   (define backtracking-overlapping/p
100 |     (or/p (try/p (string/p "hello, world!"))
101 |           (string/p "hello")))
102 |   (eval:check (parse-string backtracking-overlapping/p "hello, world!") (success "hello, world!"))
103 |   (eval:check (parse-string backtracking-overlapping/p "hello") (success "hello")))
104 | 
105 | All that @racket[try/p] does is disable the “committing” behavior mentioned earlier: instead of
106 | committing to a particular path once any of the parse succeeds, any error that occurs within the
107 | parser provided to @racket[try/p] is non-fatal, and the parser will backtrack and try the next
108 | alternative.
109 | 
110 | @subsection{Backtracking with caution}
111 | 
112 | In this case, since the parse is truly ambiguous based on the first character, @racket[try/p] is the
113 | correct approach. Note that the error messages are still helpful upon failure:
114 | 
115 | @(choice-interaction
116 |   (eval:error (parse-result! (parse-string backtracking-overlapping/p "not hello"))))
117 | 
118 | However, be deliberate about where you put @racket[try/p] because it is very easy to end up with a
119 | parser that provides completely useless error messages because all errors simply backtrack instead of
120 | failing fast and reporting the real problem. For an example of this, consider a parser that parses an
121 | integer or a boolean, depending on a label provided first:
122 | 
123 | @(choice-interaction
124 |   (define the-labeled-integer/p
125 |     (do (string/p "the integer: ")
126 |         integer/p))
127 |   (define the-labeled-boolean/p
128 |     (do (string/p "the boolean: ")
129 |         boolean/p)))
130 | 
131 | It might be tempting to use @racket[try/p] here because we know that the integer case might fail.
132 | Therefore, you might write the parser like this:
133 | 
134 | @(choice-interaction
135 |   (define try-labeled/p
136 |     (or/p (try/p the-labeled-integer/p)
137 |           the-labeled-boolean/p)))
138 | 
139 | This parser seems innocuous enough, right? It even works successfully:
140 | 
141 | @(choice-interaction
142 |   (eval:check (parse-string try-labeled/p "the integer: 42") (success 42))
143 |   (eval:check (parse-string try-labeled/p "the boolean: false") (success #f)))
144 | 
145 | But there is a lurking problem with this parser, and that’s its error messages. Consider a mismatch,
146 | when we provide the @tt{the integer:} label but do not actually provide an integer:
147 | 
148 | @(choice-interaction
149 |   (eval:error (parse-result! (parse-string try-labeled/p "the integer: false"))))
150 | 
151 | Oops. What happened? Well, the parser tried to parse an integer, but it failed, so it backtracked. It
152 | then tried to parse a boolean, and it parsed the @tt{the}, but then it failed, too, so it reported an
153 | error message. To a user, though, that error message is totally useless. The @emph{actual} issue is
154 | that they should have provided an integer, but instead provided a boolean. Unfortunately, the
155 | overzealous backtracking has eliminated that information.
156 | 
157 | This is tricky, because we can’t just drop the @racket[try/p]—since both cases share @tt{the}, the
158 | parse is ambiguous without a little bit of lookahead. In order to fix this, what we really want to
159 | do is factor out the common @tt{the}, which will allow us to eliminate the @racket[try/p] altogether:
160 | 
161 | @(choice-interaction
162 |   (define labeled-integer/p
163 |     (do (string/p "integer: ")
164 |         integer/p))
165 |   (define labeled-boolean/p
166 |     (do (string/p "boolean: ")
167 |         boolean/p))
168 |   (define labeled/p
169 |     (do (string/p "the ")
170 |         (or/p labeled-integer/p
171 |               labeled-boolean/p))))
172 | 
173 | Since we’ve removed all of the uses of @racket[try/p], now the parser can provide much more precise
174 | error messages when we provide invalid input.
175 | 
176 | @(choice-interaction
177 |   (eval:error (parse-result! (parse-string labeled/p "the integer: false"))))
178 | 
179 | @section{Parsing sequences}
180 | 
181 | Using @racket[or/p], it is possible to choose between alternatives when parsing, but what if a
182 | particular grammar permits @emph{any number of} elements in sequence? For that, you can use the
183 | @racket[many/p] combinator. It accepts a parser and attempts to parse it over and over again until
184 | it fails. For example, here is a parser that parses any number of occurrences of the letter @tt{a}:
185 | 
186 | @(parser-interaction
187 |   (eval:check (parse-string (many/p (char/p #\a)) "") (success '()))
188 |   (eval:check (parse-string (many/p (char/p #\a)) "a") (success '(#\a)))
189 |   (eval:check (parse-string (many/p (char/p #\a)) "aaaa") (success '(#\a #\a #\a #\a))))
190 | 
191 | This allows creating grammars that parse arbitrary numbers of values. The @racket[many/p] combinator
192 | accepts an optional keyword argument @racket[#:min] to specify a minimum number of values to
193 | parse. This can be used to parse two integers separated by some amount of whitespace, for example.
194 | 
195 | @(parser-interaction
196 |   (define two-integers/p
197 |     (do [x <- integer/p]
198 |         (many/p space/p #:min 1)
199 |         [y <- integer/p]
200 |         (pure (list x y))))
201 |   (eval:check (parse-string two-integers/p "13     102") (success '(13 102))))
202 | 
203 | Perhaps even more frequently, though, you may want to parse some number of values separated by some
204 | delimiter. For example, perhaps you want to parse a whole list of integers separated by commas. That
205 | can be accomplished by passing a parser for the @racket[#:sep] argument to @racket[many/p].
206 | 
207 | @(parser-interaction
208 |   (define many-integers/p
209 |     (many/p integer/p #:sep (char/p #\,)))
210 |   (eval:check (parse-string many-integers/p "1,2,3,5,8,13") (success '(1 2 3 5 8 13))))
211 | 
212 | Often an unbounded number of values is undesirable: some limit is desired. The @racket[#:max]
213 | argument to @racket[many/p] allows specifying a max number of values to parse. For example, we may
214 | not wish to allow more than five comma-separated integers.
215 | 
216 | @(parser-interaction
217 |   (define at-most-five-integers/p
218 |     (many/p integer/p #:sep (char/p #\,) #:max 5))
219 |   (eval:check (parse-string at-most-five-integers/p "1,2,3") (success '(1 2 3)))
220 |   (eval:check (parse-string at-most-five-integers/p "1,2,3,5,8,13") (success '(1 2 3 5 8))))
221 | 
222 | @(close-choice!)
223 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/differences-from-parsack.scrbl:
--------------------------------------------------------------------------------
 1 | #lang scribble/manual
 2 | 
 3 | @(require "util.rkt")
 4 | 
 5 | @title[#:tag "differences-from-parsack"]{Appendix: Parsack vs Megaparsack}
 6 | 
 7 | Megaparsack is not the only port of Parsec to Racket, and in fact, it isn’t even the first. The
 8 | original Parsec port is @hyperlink["http://docs.racket-lang.org/parsack/index.html"]{Parsack}. When
 9 | looking for a parser combinator library, you might be unsure which one to pick, so this attempts to
10 | provide an unbiased comparison between the two libraries.
11 | 
12 | Without getting into the nitty gritty details, here’s a quick overview of the differences:
13 | 
14 | @itemlist[
15 |  @item{Megaparsack is about two and a half years newer, initially released in the spring of 2016,
16 |        while Parsack was released in the fall of 2013.}
17 | 
18 |  @item{Probably the most significant difference in the two libraries’ APIs is that Megaparsack can
19 |        parse arbitrary tokens as input, while Parsack is specialized to exclusively operate on text or
20 |        bytes. This allows Megaparsack to operate after an initial lexing phase (such as using
21 |        @racketmodname[parser-tools/lex] with @racketmodname[megaparsack/parser-tools/lex]), while
22 |        Parsack is designed to exclusively parse input directly.}
23 | 
24 |  @item{Megaparsack supports the production of syntax objects from parsers automatically, whereas
25 |        Parsack does not.}
26 | 
27 |  @item{A less impactful difference but still significant design difference is that Megaparsack
28 |        implements the @racket[gen:functor], @racket[gen:applicative], and @racket[gen:monad]
29 |        interfaces from the @seclink["top" #:doc '(lib "scribblings/data/functional.scrbl")]{
30 |        @tt{functional}} library, while Parsack is entirely monomorphic and provides its own
31 |        sequencing and composition operators.}
32 | 
33 |  @item{Megaparsack provides contracts on parsers, while Parsack only includes a simple predicate. This
34 |        is more important for Megaparsack because of the different token types that parsers can accept,
35 |        but it’s also useful in general for denoting what parsers can produce.}
36 | 
37 |  @item{As a consequence of the above four differences, Parsack is currently @italic{considerably}
38 |        faster than Megaparsack, @bold{by more than an order of magnitude}.}
39 | 
40 |  @item{Megaparsack’s documentation is better than Parsack’s, and it includes a tutorial-style guide.}
41 | 
42 |  @item{Megaparsack’s naming conventions are somewhat closer to idiomatic Racket, whereas Parsack’s
43 |        names are more directly ported from Haskell.}
44 | 
45 |  @item{Megaparsack provides @tech{parser parameters} for maintaining arbitrarily many distinct cells
46 |        of user-defined parser state, whereas Parsack only provides a single cell of state.}
47 | 
48 |  @item{Both Megaparsack and Parsack use the same general model for parsing, backtracking, and error
49 |        reporting, which is adapted from the common parent, Parsec.}
50 | 
51 |  @item{Both Megaparsack and Parsack expose a monadic interface for composing and sequencing parsers
52 |        together, and they both provide a minimal set of combinators for producing new parsers from
53 |        primitives.}]
54 | 
55 | My general recommendation is to use Megaparsack unless performance is an issue, at which point it may
56 | be worth it to use Parsack, instead. However, while some of Megaparsack’s design decisions do make it
57 | inherently somewhat slower than Parsack, it’s likely that a lot of Megaparsack can be optimized much
58 | more than it currently is. If you run into performance problems with Megaparsack, feel free to open
59 | a bug report, and it might be possible to make Megaparsack palatably fast.
60 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/reference.scrbl:
--------------------------------------------------------------------------------
  1 | #lang scribble/manual
  2 | 
  3 | @(require "util.rkt")
  4 | 
  5 | @title[#:tag "reference"]{API Reference}
  6 | 
  7 | A @tech{parser} is a value that represents a method of turning a syntax object or sequence of
  8 | syntax objects an arbitrary Racket value. Parsers can be created using various primitives, then
  9 | sequenced together using parser combinators to create larger parsers.
 10 | 
 11 | Parsers are @functech{functors}, @functech{applicative functors}, and @functech{monads}, which allows
 12 | them to be mapped over and sequenced together using the corresponding generic interfaces.
 13 | 
 14 | @section[#:tag "primitives"]{Primitives}
 15 | 
 16 | @defproc[(parser? [v any/c]) boolean?]{
 17 | Returns @racket[#t] if @racket[v] is a @tech{parser}, otherwise returns @racket[#f].}
 18 | 
 19 | @defproc[(parser/c [in-ctc contract?] [out-ctc contract?]) contract?]{
 20 | Produces a @reftech{contract} that recognizes @tech{parsers}. Tokens consumed by the parser must match
 21 | @racket[in-ctc], and values returned by the parser must match @racket[out-ctc].
 22 | 
 23 | If both @racket[in-ctc] and @racket[out-ctc] are @reftech{chaperone contracts}, then the result will
 24 | also be a @reftech{chaperone contract}.}
 25 | 
 26 | @defproc[(parse [parser parser?] [boxes (listof syntax-box?)]) (either/c message? any/c)]{
 27 | Runs @racket[parser] on @racket[boxes] and returns either the result of a successful parse or a value
 28 | that includes information about the parse failure.}
 29 | 
 30 | @defproc[(parse-error->string [message message?]) string?]{
 31 | Converts a parse error to a human-readable string. This is used by @racket[parse-result!] to format
 32 | the message used in the exception, but it can also be used if you want to display the error in other
 33 | ways.}
 34 | 
 35 | @defproc[(parse-result! [result (either/c message? any/c)]) any/c]{
 36 | Extracts a successful parse value from @racket[result]. If @racket[result] is a failure, raises
 37 | @racket[exn:fail:read:megaparsack] with the failure message converted to a string using
 38 | @racket[parse-error->string].}
 39 | 
 40 | @defstruct*[(exn:fail:read:megaparsack exn:fail:read) ([unexpected any/c]
 41 |                                                        [expected (listof string?)])
 42 |             #:transparent]{
 43 | Raised by @racket[parse-result!] when given a parse failure.}
 44 | 
 45 | @defstruct*[syntax-box ([datum any/c] [srcloc srcloc?]) #:transparent]{
 46 | Represents a single parsable entity. Just like @reftech{syntax objects}, a @deftech{syntax box}
 47 | associates some source location information with an arbitrary datum. However, unlike ordinary syntax
 48 | objects, values like lists and vectors can be stored in a syntax box without being recursively wrapped
 49 | in additional layers of syntax objects.
 50 | 
 51 | The @racket[datum] can be anything at all, but usually it is either a character or some token produced
 52 | as the result of lexing. It is unlikely that you will need to create @racket[syntax-box] values
 53 | yourself; rather, use higher-level functions like @racket[parse-string] that create these values for
 54 | you.}
 55 | 
 56 | @defstruct*[message ([srcloc srcloc?] [unexpected any/c] [expected (listof string?)]) #:transparent]{
 57 | Represents a parse error. Generally you will not need to construct or use these yourself, since they
 58 | will be automatically constructed when parsers fail, and you can convert them to a human-readable
 59 | error message using @racket[parse-error->string]. For more complicated use cases, though, you may want
 60 | to raise custom parse errors using @racket[fail/p] or format your own error messages, so you can use
 61 | this structure directly.}
 62 | 
 63 | @defthing[void/p (parser/c any/c void?)]{
 64 | A parser that always succeeds and always returns @|void-const|.}
 65 | 
 66 | @defproc[(or/p [parser parser?] ...+) parser?]{
 67 | Tries each @racket[parser] in succession until one either succeeds or consumes input, at which point
 68 | its result will be returned as the result of the overall parse. Parsers that consume input but fail
 69 | will halt further parsers from being tried and will simply return an error; if backtracking is
 70 | desired, the parser should be wrapped with @racket[try/p].
 71 | 
 72 | @history[#:changed "1.5" @elem{Changed to always return the first successful result, rather than
 73 |            continuing to try parsers until one consumes input. The new behavior is more predictable
 74 |            and more consistent with existing Parsec implementations, though the old behavior was
 75 |            more consistent with the presentation in the original paper.}]}
 76 | 
 77 | @defproc[(try/p [parser parser?]) parser?]{
 78 | Creates a new parser like @racket[parser], except that it does not consume input if @racket[parser]
 79 | fails. This allows the parser to backtrack and try other alternatives when used inside a
 80 | @racket[or/p] combinator.}
 81 | 
 82 | @defproc[(noncommittal/p [parser parser?]) parser?]{
 83 | @margin-note{
 84 |   Note that unlike @racket[lookahead/p], @racket[noncommittal/p] @emph{only} affects backtracking;
 85 |   the consumed input is still removed from the input stream. If true lookahead is desired, use
 86 |   @racket[lookahead/p], instead.}
 87 | 
 88 | Creates a new parser like @racket[parser], except that it is not considered to have consumed
 89 | input for the purposes of backtracking if @racket[parser] succeeds. This allows a future failure to
 90 | backtrack to an earlier choice point (assuming the failure does not itself consume input).
 91 | 
 92 | @(parser-examples
 93 |   (eval:error (parse-result!
 94 |                (parse-string (or/p (do (char/p #\a) (char/p #\b))
 95 |                                    (do (char/p #\a) (char/p #\c)))
 96 |                              "ac")))
 97 |   (eval:check (parse-result!
 98 |                (parse-string (or/p (do (noncommittal/p (char/p #\a)) (char/p #\b))
 99 |                                    (do (char/p #\a) (char/p #\c)))
100 |                              "ac"))
101 |               #\c))
102 | 
103 | This can be useful to allow backtracking within a parse sequence in a more limited fashion than
104 | @racket[try/p] would allow. In particular, contrast the meaning of
105 | @nested[#:style 'inset]{@racket[(try/p (do _parser-a _parser-b))]}
106 | with that of
107 | @nested[#:style 'inset]{@racket[(do (noncommittal/p _parser-a) _parser-b)].}
108 | The version using @racket[try/p] will always backtrack if either @racket[_parser-a] or
109 | @racket[_parser-b] fails in any way. However, the version using @racket[noncommittal/p] will not
110 | backtrack if either parser fails in a way that would not ordinarily backtrack; it just allows a
111 | recoverable failure in @racket[_parser-b] to backtrack to a choice point that occurred prior to
112 | @racket[_parser-a].
113 | 
114 | Note that @racket[noncommittal/p] does @emph{not} affect whether @racket[parser] is considered to have
115 | consumed input if it fails, which is to say that @racket[try/p] and @racket[noncommittal/p] are
116 | orthogonal and should be combined if both behaviors are desired.
117 | 
118 | @history[#:added "1.7"]}
119 | 
120 | @(define-parser-interaction lookahead-interaction close-lookahead!)
121 | 
122 | @defproc[(lookahead/p [parser parser?]) parser?]{
123 | Creates a new parser like @racket[parser], except that it does not consume input if @racket[parser]
124 | succeeds, so subsequent parsers will continue from the same location in the input stream. This
125 | allows a parser to ensure something will appear in future input without immediately consuming it.
126 | 
127 | For example, @racket[lookahead/p] can be used to implement a parser that only succeeds at the end of a
128 | line, but does not consume the newline character itself:
129 | 
130 | @(lookahead-interaction
131 |   (define end-of-line/p (lookahead/p (char/p #\newline))))
132 | 
133 | This can be used to parse, for example, line comments that span to the end of the current line, while
134 | still allowing a later parser to consume the newline character:
135 | 
136 | @(lookahead-interaction
137 |   (define rest-of-line/p
138 |     (or/p (do end-of-line/p (pure ""))
139 |           (do [c <- any-char/p]
140 |               [cs <- rest-of-line/p]
141 |               (pure (string-append (string c) cs)))))
142 |   (define line-comment/p
143 |     (do (try/p (string/p "# "))
144 |         rest-of-line/p))
145 |   (eval:check (parse-string (many/p (do [line <- line-comment/p]
146 |                                         (char/p #\newline)
147 |                                         (pure line)))
148 |                             (string-append "# hello\n"
149 |                                            "# world\n"))
150 |               (success (list "hello" "world"))))
151 | 
152 | Note that if @racket[parser] @emph{fails}, @racket[lookahead/p] has no effect; if it consumed input
153 | before failing, it will not try other alternatives in an enclosing @racket[or/p]. Wrap @racket[parser]
154 | with @racket[try/p] if this behavior is undesirable.
155 | 
156 | @history[#:added "1.5"]}
157 | 
158 | @(close-lookahead!)
159 | 
160 | @defproc[(satisfy/p [proc (any/c . -> . any/c)]) parser?]{
161 | Creates a parser that checks if @racket[proc] produces a non-@racket[#f] value when applied to a
162 | single datum. If so, it consumes the datum and returns successfully; otherwise, it fails without
163 | consuming input.}
164 | 
165 | @defthing[eof/p (parser/c any/c void?)]{
166 | A parser that only succeeds when there is no more input left to consume. It always returns
167 | @|void-const|.}
168 | 
169 | @defproc[(label/p [label string?] [parser parser?]) parser?]{
170 | Creates a parser like @racket[parser], except that failures are reported in terms of @racket[label]
171 | instead of whatever names would have been used instead.}
172 | 
173 | @defproc[(hidden/p [parser parser?]) parser?]{
174 | Like @racket[label/p], adjusts how failures are reported for @racket[parser], but @racket[hidden/p]
175 | completely hides any failure information produced by @racket[parser] when reporting errors. (This is
176 | useful when parsing things like whitespace which are usually not useful to include in error
177 | messages.)}
178 | 
179 | @defproc[(syntax/p [parser parser?]) (parser/c any/c syntax?)]{
180 | Produces a parser like @racket[parser], except that its result is wrapped in a @reftech{syntax object}
181 | that automatically incorporates source location information from the input stream. This allows parsers
182 | to add a sort of automated source location tracking to their output.
183 | 
184 | The @racket[syntax/p] combinator makes source location wrapping opt-in, which is desirable since it is
185 | often useful to return values from combinators that are intermediate values not intended to be wrapped
186 | in syntax (for example, @racket[many/p] returns a list of results, not a syntax list).}
187 | 
188 | @defproc[(syntax-box/p [parser parser?]) (parser/c any/c syntax-box?)]{
189 | Like @racket[syntax/p], but wraps the result in a @racket[syntax-box] instead of a @reftech{syntax
190 | object}. This is useful if you want to get the source location information from a parse result, but
191 | you want to ensure the underlying datum remains untouched.}
192 | 
193 | @defproc[(fail/p [msg message?]) (parser/c any/c none/c)]{
194 | Produces a parser that always fails and produces @racket[msg] as the error message. This is the
195 | lowest-level way to report errors, but many cases in which you would want to raise a custom failure
196 | message can be replaced with @racket[guard/p] instead, which is slightly higher level.}
197 | 
198 | @defform[(delay/p parser-expr)
199 |          #:contracts ([parser-expr parser?])]{
200 | Creates a parser that delays evaluation of @racket[parser-expr] until the first time it is applied to
201 | input. Otherwise, the parser’s behavior is identical to that of @racket[parser-expr]. The parser
202 | returned by @racket[delay/p] never evaluates @racket[parser-expr] more than once, even if it’s applied
203 | to input multiple times.
204 | 
205 | @(parser-examples
206 |   (define delayed/p (delay/p (begin (println 'evaluated)
207 |                                     (char/p #\a))))
208 |   (eval:check (parse-result! (parse-string delayed/p "a")) #\a)
209 |   (eval:check (parse-result! (parse-string delayed/p "a")) #\a))
210 | 
211 | @racket[delay/p] can be used to delay evaluation in situations where a (possibly mutually) recursive
212 | parser would otherwise require evaluating a parser before its definition. For example:
213 | 
214 | @(parser-interaction
215 |   (define one/p (or/p (char/p #\.)
216 |                       (delay/p two/p)))
217 |   (define two/p (list/p (char/p #\a) one/p))
218 |   (eval:check (parse-result! (parse-string one/p "aa.")) '(#\a (#\a #\.))))
219 | 
220 | Without the use of @racket[delay/p], the reference to @racket[two/p] would be evaluated too soon
221 | (since @racket[or/p] is an ordinary function, unlike @racket[or]).
222 | 
223 | Note that the @racket[delay/p] expression itself may be evaluated multiple times, in which case the
224 | @racket[parser-expr] may be as well (since each evaluation of @racket[delay/p] creates a separate
225 | parser). This can easily arise from uses of @racket[do], since @racket[do] is syntactic sugar for
226 | nested uses of @racket[lambda], though it might not be syntactically obvious that the @racket[delay/p]
227 | expression appears under one such @racket[lambda]. For example:
228 | 
229 | @(parser-interaction
230 |   (define sneaky-evaluation/p
231 |     (do (char/p #\a)
232 |         (delay/p (begin (println 'evaluated)
233 |                         (char/p #\b)))))
234 |   (eval:check (parse-result! (parse-string sneaky-evaluation/p "ab")) #\b)
235 |   (eval:check (parse-result! (parse-string sneaky-evaluation/p "ab")) #\b))
236 | 
237 | In other words, @racket[delay/p] doesn’t perform any magical memoization or caching, so it can’t be
238 | used to prevent a parser from being evaluated multiple times, only to delay its evaluation to a later
239 | point in time.
240 | 
241 | @history[#:added "1.6"]}
242 | 
243 | @defproc[(==/p [v any/c] [=? (any/c any/c . -> . any/c) equal?]) parser?]{
244 | Produces a parser that succeeds when a single datum is equal to @racket[v], as determined by
245 | @racket[=?]. Like @racket[satisfy/p], it consumes a single datum upon success but does not consume
246 | anything upon failure.}
247 | 
248 | @defproc[(one-of/p [vs list?] [=? (any/c any/c . -> . any/c) equal?]) parser?]{
249 | Like @racket[(or/p (==/p _v =?) ...)]. Produces a parser that succeeds when a single datum is
250 | equal to any of the elements of @racket[vs], as determined by @racket[=?]. Like @racket[satisfy/p],
251 | it consumes a single datum upon success but does not consume anything upon failure.
252 | 
253 | @(parser-examples
254 |   (eval:check (parse-result! (parse-string (one-of/p '(#\a #\b)) "a")) #\a)
255 |   (eval:check (parse-result! (parse-string (one-of/p '(#\a #\b)) "b")) #\b)
256 |   (eval:error (parse-result! (parse-string (one-of/p '(#\a #\b)) "c"))))
257 | 
258 | @history[#:added "1.2"]}
259 | 
260 | @defproc[(guard/p [parser parser?] [pred? (any/c . -> . any/c)]
261 |                   [expected (or/c string? #f) #f] [make-unexpected (any/c . -> . any/c) identity])
262 |          parser?]{
263 | Produces a parser that runs @racket[parser], then applies a guard predicate @racket[pred?] to the
264 | result. If the result of @racket[pred?] is @racket[#f], then the parser fails, otherwise the parser
265 | succeeds and produces the same result as @racket[parser].
266 | 
267 | If the parser fails and @racket[expected] is a string, then @racket[expected] is used to add
268 | expected information to the parser error. Additionally, the @racket[make-unexpected] function is
269 | applied to the result of @racket[parser] to produce the @racket[unexpected] field of the parse error.
270 | 
271 | @(parser-examples
272 |   (define small-integer/p
273 |     (guard/p integer/p (λ (x) (<= x 100))
274 |              "integer in range [0,100]"))
275 |   (eval:check (parse-result! (parse-string small-integer/p "42")) 42)
276 |   (eval:error (parse-result! (parse-string small-integer/p "300"))))}
277 | 
278 | @defproc[(list/p [parser parser?] ... [#:sep sep parser? void/p]) (parser/c any? list?)]{
279 |  Returns a @tech{parser} that runs each @racket[parser] in sequence separated by @racket[sep] and
280 |  produces a list containing the results of each @racket[parser]. The results of @racket[sep] are
281 |  ignored.
282 | 
283 |  @(parser-examples
284 |    (define dotted-let-digit-let/p
285 |      (list/p letter/p digit/p letter/p #:sep (char/p #\.)))
286 |    (eval:check (parse-result! (parse-string dotted-let-digit-let/p "a.1.b")) (list #\a #\1 #\b))
287 |    (eval:error (parse-result! (parse-string dotted-let-digit-let/p "a1c")))
288 |    (eval:error (parse-result! (parse-string dotted-let-digit-let/p "a.1"))))
289 | 
290 |  Using a separator parser that consumes no input (such as the default separator, @racket[void/p]) is
291 |  equivalent to not using a separator at all.
292 | 
293 |  @(parser-examples
294 |    (define let-digit-let/p (list/p letter/p digit/p letter/p))
295 |    (eval:check (parse-result! (parse-string let-digit-let/p "a1b")) (list #\a #\1 #\b)))}
296 | 
297 | @subsection[#:tag "repetition"]{Repetition}
298 | 
299 | @defproc[(many/p [parser parser?]
300 |                  [#:sep sep parser? void/p]
301 |                  [#:min min-count exact-nonnegative-integer? 0]
302 |                  [#:max max-count (or/c exact-nonnegative-integer? +inf.0) +inf.0])
303 |          (parser/c any/c list?)]{
304 | Produces a parser that attempts @racket[parser] at least @racket[min-count] times and at most
305 | @racket[max-count] times, with attempts separated by @racket[sep]. The returned parser produces a
306 | list of results of successful attempts of @racket[parser]. Results of @racket[sep] are ignored.
307 | 
308 | @(parser-examples
309 |   (define letters/p (many/p letter/p))
310 |   (eval:check (parse-result! (parse-string letters/p "abc")) (list #\a #\b #\c))
311 |   (define dotted-letters/p
312 |     (many/p letter/p #:sep (char/p #\.) #:min 2 #:max 4))
313 |   (eval:check (parse-result! (parse-string dotted-letters/p "a.b.c")) (list #\a #\b #\c))
314 |   (eval:error (parse-result! (parse-string dotted-letters/p "abc")))
315 |   (eval:error (parse-result! (parse-string dotted-letters/p "a")))
316 |   (eval:check (parse-result! (parse-string dotted-letters/p "a.b.c.d.e")) (list #\a #\b #\c #\d)))
317 | 
318 | @history[#:added "1.1"]}
319 | 
320 | @defproc[(many+/p [parser parser?]
321 |                   [#:sep sep parser? void/p]
322 |                   [#:max max-count (or/c exact-nonnegative-integer? +inf.0) +inf.0])
323 |          (parser/c any/c list?)]{
324 | Like @racket[many/p], but @racket[parser] must succeed at least once. Equivalent to
325 | @racket[(many/p parser #:sep sep #:min 1 #:max max-count)].
326 | 
327 | @history[#:changed "1.1" @elem{Added support for @racket[#:sep] and @racket[#:max] keyword arguments
328 |                                for consistency with @racket[many/p].}]}
329 | 
330 | @defproc[(repeat/p [n exact-nonnegative-integer?] [parser parser?]) (parser/c any/c list?)]{
331 | Produces a parser that attempts @racket[parser] @emph{exactly} @racket[n] times and returns a list
332 | of the results. Equivalent to @racket[(many/p parser #:min n #:max n)].}
333 | 
334 | @defproc[(many-until/p [parser parser?]
335 |                        [#:end end parser?]
336 |                        [#:sep sep parser? void/p]
337 |                        [#:min min-count exact-nonnegative-integer? 0])
338 |          (parser/c (list/c list? any/c))]{
339 | Like @racket[many/p], but repeats until @racket[end] succeeds. The result is a list of two values:
340 | a list of results produced by @racket[parser] and the result produced by @racket[end].
341 | 
342 | @(parser-examples
343 |   (define letters-then-punctuation
344 |     (many-until/p letter/p #:end (char-in/p ".!?,;")))
345 |   (eval:check (parse-result! (parse-string letters-then-punctuation "abc!"))
346 |               (list '(#\a #\b #\c) #\!))
347 |   (eval:check (parse-result! (parse-string letters-then-punctuation "abc,efg"))
348 |               (list '(#\a #\b #\c) #\,))
349 |   (eval:error (parse-result! (parse-string letters-then-punctuation "a1c;")))
350 |   (eval:check (parse-result! (parse-string letters-then-punctuation "?"))
351 |               (list '() #\?)))
352 | 
353 | To determine if the repetition should stop, @racket[end] is attempted before each optional attempt of
354 | @racket[parser], i.e. each attempt after the ones required by @racket[min-count] (if any). If
355 | @racket[end] succeeds, the repetition is terminated immediately, regardless of whether or not
356 | further attempts of @racket[parser] might succeed:
357 | 
358 | @(parser-examples
359 |   #:label #f
360 |   (define digits-then-zero (many-until/p digit/p #:end (char/p #\0)))
361 |   (eval:check (parse-result! (parse-string digits-then-zero "1230"))
362 |               (list '(#\1 #\2 #\3) #\0))
363 |   (eval:check (parse-result! (parse-string digits-then-zero "12305670"))
364 |               (list '(#\1 #\2 #\3) #\0)))
365 | 
366 | If @racket[end] fails without consuming input, the repetition continues. However, note that if an
367 | attempt of @racket[end] fails @emph{after} consuming input, the failure is propagated to the
368 | entire repetition, as with @racket[or/p]. This allows a partial success of @racket[end] to “commit”
369 | to ending the repetition, even if further attempts @racket[parser] would succeed:
370 | 
371 | @(parser-examples
372 |   #:label #f
373 |   (define telegram/p (many-until/p any-char/p #:end (string/p "STOP")))
374 |   (eval:check (parse-result! (parse-string telegram/p "HELLO STOP"))
375 |               (list '(#\H #\E #\L #\L #\O #\space) "STOP"))
376 |   (eval:error (parse-result! (parse-string telegram/p "MESSAGE STOP"))))
377 | 
378 | This behavior can be useful in situations where @racket[end] is complex (so it’s helpful to report
379 | a parse error that occurs after some prefix of it has succeeded), but in cases like the above, it is
380 | usually not desired. As with @racket[or/p], this committing behavior can be suppressed by wrapping
381 | @racket[end] with @racket[try/p]:
382 | 
383 | @(parser-examples
384 |   #:label #f
385 |   (define fixed-telegram/p
386 |     (many-until/p any-char/p #:end (try/p (string/p "STOP"))))
387 |   (eval:check (parse-result! (parse-string fixed-telegram/p "MESSAGE STOP"))
388 |               (list '(#\M #\E #\S #\S #\A #\G #\E #\space) "STOP")))
389 | 
390 | @history[#:added "1.7"]}
391 | 
392 | @defproc[(many+-until/p [parser parser?]
393 |                         [#:end end parser?]
394 |                         [#:sep sep parser? void/p])
395 |          (parser/c (list/c list? any/c))]{
396 | Like @racket[many-until/p], but @racket[parser] must succeed at least once. Equivalent to
397 | @racket[(many-until/p parser #:end end #:sep sep #:min 1)].
398 | 
399 | @history[#:added "1.7"]}
400 | 
401 | @subsection[#:tag "parser-parameters"]{Parser Parameters}
402 | 
403 | @defproc[(make-parser-parameter [v any/c]) parser-parameter?]{
404 | Returns a new @deftech{parser parameter}. A parser parameter is like an ordinary @reftech{parameter},
405 | but instead of its state being scoped to a particular thread, a parser parameter’s state is scoped to
406 | a particular call to @racket[parse]. Furthermore, modifications to parser parameters are discarded if
407 | the parser backtracks past the point of modification, which ensures that only modifications from
408 | @emph{successful} parse branches are retained.
409 | 
410 | Like ordinary parameters, parser parameters are procedures that accept zero or one argument. Unlike
411 | ordinary parameters, the result of applying a parser parameter procedure is a @tech{parser}, which
412 | must be monadically sequenced with other parsers to have any effect.
413 | 
414 | @(parser-examples
415 |   (define param (make-parser-parameter #f))
416 |   (eval:check (parse-result! (parse-string (param) "")) #f)
417 |   (eval:check (parse-result! (parse-string (do (param #t) (param)) "")) #t))
418 | 
419 | Each call to @racket[parse] is executed with a distinct @deftech{parser parameterization}, which means
420 | modifications to parser parameters are only visible during that particular parser execution. The
421 | @racket[v] argument passed to @racket[make-parser-parameter] is used as the created parser parameter’s
422 | initial value in each distinct parser parameterization.
423 | 
424 | Parser parameters are useful for tracking state needed by context-sensitive parsers, but they can also
425 | be used to provide values with dynamic extent using @racket[parameterize/p], just as ordinary
426 | parameters can be locally modified via @racket[parameterize].
427 | 
428 | @history[#:added "1.4"]}
429 | 
430 | @defproc[(parser-parameter? [v any/c]) boolean?]{
431 | Returns @racket[#t] if @racket[v] is a @tech{parser parameter}, otherwise returns @racket[#f].
432 | 
433 | @history[#:added "1.4"]}
434 | 
435 | @defform[(parameterize/p ([param-expr val-expr] ...) parser-expr)
436 |          #:contracts ([param-expr parser-parameter?]
437 |                       [parser-expr parser?])]{
438 | Returns a new @tech{parser} that behaves just like @racket[parser-expr], except that the value of
439 | each @tech{parser parameter} @racket[param-expr] is given by the corresponding @racket[val-expr]
440 | during the dynamic extent of the parser’s execution.
441 | 
442 | @(parser-examples
443 |   (define param (make-parser-parameter #f))
444 |   (eval:check (parse-result! (parse-string (do [a <- (param)]
445 |                                                [b <- (parameterize/p ([param #t])
446 |                                                        (param))]
447 |                                                [c <- (param)]
448 |                                                (pure (list a b c)))
449 |                                            ""))
450 |               (list #f #t #f)))
451 | 
452 | If any of the @racket[param-expr]’s values are modified by @racket[parser-expr] via a direct call to
453 | the parser parameter procedure, the value remains modified until control leaves the enclosing
454 | @racket[parameterize/p] parser, after which the value is restored. (This behavior is precisely
455 | analogous to modifying an ordinary @reftech{parameter} within the body of a @racket[parameterize]
456 | expression.)
457 | 
458 | @(parser-examples
459 |   (define param (make-parser-parameter #f))
460 |   (eval:check (parse-result! (parse-string (do (param 1)
461 |                                                [a <- (parameterize/p ([param 2])
462 |                                                        (do (param 3)
463 |                                                            (param)))]
464 |                                                [b <- (param)]
465 |                                                (pure (list a b)))
466 |                                            ""))
467 |               (list 3 1)))
468 | 
469 | @history[#:added "1.4"]}
470 | 
471 | @section[#:tag "parsing-text"]{Parsing Text}
472 | 
473 | @defmodule[megaparsack/text]
474 | 
475 | @defproc[(parse-string [parser (parser/c char? any/c)] [str string?] [src-name any/c 'string])
476 |          (either/c message? any/c)]{
477 | Parses @racket[str] using @racket[parser], which must consume character datums. The value provided for
478 | @racket[src-name] is used in error reporting when displaying source location information.}
479 | 
480 | @defproc[(parse-syntax-string [parser (parser/c char? any/c)] [stx-str (syntax/c string?)])
481 |          (either/c message? any/c)]{
482 | Like @racket[parse-string], but uses the source location information from @racket[stx-str] to
483 | initialize the source location tracking. The result of @racket[(syntax-source stx-str)] is used in
484 | place of the @racket[_src-name] argument.}
485 | 
486 | @defproc[(char/p [c char?]) (parser/c char? char?)]{
487 | Parses a single datum that is equal to @racket[c].}
488 | 
489 | @defproc[(char-not/p [c char?]) (parser/c char? char?)]{
490 | Parses a single datum that is different from @racket[c].
491 | @history[#:added "1.3"]}
492 | 
493 | @defproc[(char-ci/p [c char?]) (parser/c char? char?)]{
494 | Parses a single datum that is case-insensitively equal to @racket[c], as determined by
495 | @racket[char-ci=?].}
496 | 
497 | @defproc[(char-between/p [low char?] [high char?]) (parser/c char? char?)]{
498 | Parses a single character that is between @racket[low] and @racket[high] according to
499 | @racket[char<=?].
500 | 
501 | @(parser-examples
502 |   (eval:check (parse-result! (parse-string (char-between/p #\a #\z) "d")) #\d)
503 |   (eval:error (parse-result! (parse-string (char-between/p #\a #\z) "D"))))
504 | 
505 | @history[#:added "1.2"]}
506 | 
507 | @defproc[(char-in/p [alphabet string?]) (parser/c char? char?)]{
508 | Returns a parser that parses a single character that is in @racket[alphabet].
509 |                                                             
510 | @(parser-examples
511 |   (eval:check (parse-result! (parse-string (char-in/p "aeiou") "i")) #\i)
512 |   (eval:error (parse-result! (parse-string (char-in/p "aeiou") "z"))))
513 | 
514 | @history[#:added "1.2"]}
515 | 
516 | @defproc[(char-not-in/p [alphabet string?]) (parser/c char? char?)]{
517 | Returns a parser that parses a single character that is not in @racket[alphabet].
518 | @history[#:added "1.3"]}
519 | 
520 | @defthing[any-char/p (parser/c char? char?)]{
521 | Returns a parser that parses a single character.
522 | @history[#:added "1.3"]}
523 | 
524 | @defthing[letter/p (parser/c char? char?)]{
525 | Parses an alphabetic letter, as determined by @racket[char-alphabetic?].}
526 | 
527 | @defthing[digit/p (parser/c char? char?)]{
528 | Parses a single digit, as determined by @racket[char-numeric?].}
529 | 
530 | @defthing[symbolic/p (parser/c char? char?)]{
531 | Parses a symbolic character, as determined by @racket[char-symbolic?].}
532 | 
533 | @defthing[space/p (parser/c char? char?)]{
534 | Parses a single whitespace character, as determined by @racket[char-whitespace?] or
535 | @racket[char-blank?].}
536 | 
537 | @defthing[integer/p (parser/c char? integer?)]{
538 | Parses a sequence of digits as an integer. Does not handle negative numbers or numeric separators.}
539 | 
540 | @defproc[(string/p [str string?]) (parser/c char? string?)]{
541 | Parses a sequence of characters equal to @racket[str] and returns @racket[str] as its result.}
542 | 
543 | @defproc[(string-ci/p [str string?]) (parser/c char? string?)]{
544 | Parses a sequence of characters case-insensitively equal to @racket[str] (as determined by
545 | @racket[char-ci=?]) and returns the matched input string as its result.
546 | 
547 | @(parser-examples
548 |   (eval:check (parse-result! (parse-string (string-ci/p "hello") "HeLlO")) "HeLlO"))
549 | 
550 | @history[#:added "1.3"
551 |          #:changed "1.8" @elem{Changed to return the parsed input string rather
552 |            than always returning @racket[str].}]}
553 | 
554 | @section[#:tag "parsing-with-parser-tools"]{Parsing with @racketmodname[parser-tools/lex]}
555 | 
556 | @defmodule[megaparsack/parser-tools/lex]
557 | 
558 | Sometimes it is useful to run a lexing pass over an input stream before parsing, in which case
559 | @racketmodname[megaparsack/text] is not appropriate. The @tt{parser-tools} package provides the
560 | @racketmodname[parser-tools/lex] library, which implements a lexer that produces tokens.
561 | 
562 | When using @racketmodname[parser-tools/lex], use @racket[lexer-src-pos] instead of @racket[lexer] to
563 | enable the built-in source location tracking. This will produce a sequence of @racket[position-token]
564 | elements, which can then be passed to @racket[parse-tokens] and detected with @racket[token/p].
565 | 
566 | @defproc[(parse-tokens [parser parser?] [tokens (listof position-token?)] [source-name any/c 'tokens])
567 |          syntax?]{
568 | Parses a stream of tokens, @racket[tokens], produced from @racket[lexer-src-pos] from
569 | @racketmodname[parser-tools/lex].}
570 | 
571 | @defproc[(token/p [name symbol?]) (parser/c (or/c symbol? token?) any/c)]{
572 | Produces a parser that expects a single token with @racket[name], as produced by @racket[token-name].}
573 | 
574 | @section[#:tag "deprecated-forms-and-functions"]{Deprecated Forms and Functions}
575 | 
576 | @defproc[(many*/p [parser parser?]) (parser/c list?)]{
577 | @deprecated[#:what "function" @racket[many/p]]}
578 | 
579 | @defproc[(many/sep*/p [parser parser?] [sep parser?]) parser?]{
580 | @deprecated[#:what "function" @racket[(many/p parser #:sep sep)]]}
581 | 
582 | @defproc[(many/sep+/p [parser parser?] [sep parser?]) parser?]{
583 | @deprecated[#:what "function" @racket[(many+/p parser #:sep sep)]]}
584 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/state.scrbl:
--------------------------------------------------------------------------------
  1 | #lang scribble/manual
  2 | 
  3 | @(require (for-label racket/file)
  4 |           scribble/bnf
  5 |           "util.rkt")
  6 | 
  7 | @(define-parser-interaction state-interaction close-state! #:eval state-eval)
  8 | 
  9 | @title[#:tag "state"]{Parsers with State}
 10 | 
 11 | So far, all of the languages we have attempted to parse have been
 12 | @hyperlink["https://en.wikipedia.org/wiki/Context-free_grammar"]{context free}, but in practice, many
 13 | languages have varying amounts of context sensitivity. Parsers for such languages are often made much
 14 | simpler by the addition of state that tracks the necessary context. However, megaparsack parsers can
 15 | backtrack, which makes maintaining mutable state somewhat subtle: if a parser abandons a parse branch,
 16 | all its state modifications must be rolled back.
 17 | 
 18 | To make this simpler, megaparsack provides built-in support for arbitrary, user-defined state in the
 19 | form of @tech{parser parameters}. Parser parameters are similar to ordinary @reftech{parameters}, but
 20 | their values are associated with a parse context rather than with a thread. This means their values
 21 | are automatically rolled back whenever the parser backtracks, and they behave predictably regardless
 22 | of parser evaluation order.
 23 | 
 24 | @section[#:tag "state-context"]{Parsing with context}
 25 | 
 26 | Suppose we have a simple language that consists of a sequence of variable declarations of the form
 27 | 
 28 | @BNF[(list @nonterm{decl} @BNF-seq[@litchar{let} @nonterm{var} @litchar{=} @nonterm{integer}])]
 29 | 
 30 | where each declaration appears on a separate line. We might write a parser for such a language like
 31 | this:
 32 | 
 33 | @(state-interaction
 34 |   (define name/p
 35 |     (map (compose1 string->symbol list->string) (many+/p letter/p)))
 36 |   (define declaration/p
 37 |     (do (string/p "let ")
 38 |         [name <- name/p]
 39 |         (string/p " = ")
 40 |         [value <- integer/p]
 41 |         (pure (cons name value))))
 42 |   (define declarations/p
 43 |     (many/p (do [decl <- declaration/p]
 44 |                 (char/p #\newline)
 45 |                 (pure decl)))))
 46 | 
 47 | This definition works alright:
 48 | 
 49 | @(state-interaction
 50 |   (eval:check (parse-string declarations/p
 51 |                             (string-append "let x = 1\n"
 52 |                                            "let y = 2\n"))
 53 |               (success '((x . 1) (y . 2)))))
 54 | 
 55 | However, note that it also accepts multiple declarations with the same name, which may not be desired:
 56 | 
 57 | @(state-interaction
 58 |   (eval:check (parse-string declarations/p
 59 |                             (string-append "let x = 1\n"
 60 |                                            "let x = 2\n"))
 61 |               (success '((x . 1) (x . 2)))))
 62 | 
 63 | One way to prevent this is to keep track of all the declarations that we’ve parsed so far using a
 64 | @tech{parser parameter}:
 65 | 
 66 | @(state-interaction
 67 |   (define declared-names (make-parser-parameter '())))
 68 | 
 69 | Just like an ordinary @reftech{parameter}, we can read the current value of a parser parameter simply
 70 | by calling it as a procedure, like @racket[(declared-names)], and we can update its value by applying
 71 | it to a single argument, like @racket[(declared-names _new-value)]. However, unlike an ordinary
 72 | parameter, the applying a parser parameter procedure does not directly return or update the parser
 73 | parameter’s value. Instead, it returns a @tech{parser} that, when executed, parses no input, but
 74 | returns or updates the parser parameter’s value.
 75 | 
 76 | This means we can sequence reads and writes to @racket[declared-names] the same way we sequence any
 77 | other parser, using @racket[do]:
 78 | 
 79 | @(state-interaction
 80 |   (define declaration/p
 81 |     (do (string/p "let ")
 82 |         [names <- (declared-names)]
 83 |         [name <- (guard/p name/p
 84 |                           (λ (name) (not (memq name names)))
 85 |                           "an unused variable name")]
 86 |         (declared-names (cons name names))
 87 |         (string/p " = ")
 88 |         [value <- integer/p]
 89 |         (pure (cons name value)))))
 90 | 
 91 | Now duplicate definitions are rejected with a helpful error:
 92 | 
 93 | @(state-interaction
 94 |   #:hidden
 95 |   (define declarations/p
 96 |     (many/p (do [decl <- declaration/p]
 97 |                 (char/p #\newline)
 98 |                 (pure decl)))))
 99 | @(state-interaction
100 |   (eval:error (parse-result! (parse-string declarations/p
101 |                                            (string-append "let x = 1\n"
102 |                                                           "let x = 2\n")))))
103 | 
104 | @section[#:tag "state-indentation"]{Indentation sensitivity}
105 | 
106 | Another useful application of parser parameters is parsing languages that are sensitive to
107 | indentation. For example, we might wish to parse a bulleted list of items, like this:
108 | 
109 | @(define groceries.txt @list{
110 | * produce
111 |   * apples
112 |   * spinach
113 | * dairy
114 |   * milk
115 |     * whole milk
116 |     * buttermilk
117 |   * cheese
118 |     * cheddar
119 |     * feta})
120 | 
121 | @filebox["groceries.txt" (apply verbatim groceries.txt)]
122 | 
123 | To track the current indentation level, we can use a parser parameter:
124 | 
125 | @(state-interaction
126 |   (define current-indent (make-parser-parameter 0))
127 |   (define indentation/p
128 |     (do [indent <- (current-indent)]
129 |         (repeat/p indent (char/p #\space)))))
130 | 
131 | This makes defining a parser for an indentation-sensitive bulleted list remarkably straightforward:
132 | 
133 | @(state-interaction
134 |   (define tree-list/p
135 |     (do (try/p indentation/p)
136 |         (string/p "* ")
137 |         [entry <- (many+/p (char-not/p #\newline))]
138 |         (char/p #\newline)
139 |         [indent <- (current-indent)]
140 |         [children <- (parameterize/p ([current-indent (+ indent 2)])
141 |                        (many/p tree-list/p))]
142 |         (pure (list (list->string entry) children)))))
143 | 
144 | The @racket[parameterize/p] form works just like @racket[parameterize], but with parser parameters
145 | instead of ordinary ones. This definition of @racket[tree-list/p] is enough to parse the
146 | @filepath{groceries.txt} file above:
147 | 
148 | @(state-eval `(define groceries.txt ,(string-append (apply string-append groceries.txt) "\n")))
149 | @(state-interaction
150 |   (eval:alts (define grocery-list (file->string "groceries.txt"))
151 |              (define grocery-list groceries.txt))
152 |   (eval:check (parse-string (many/p tree-list/p) grocery-list)
153 |               (success
154 |                '(("produce" (("apples" ()) ("spinach" ())))
155 |                  ("dairy"
156 |                   (("milk" (("whole milk" ()) ("buttermilk" ())))
157 |                    ("cheese" (("cheddar" ()) ("feta" ())))))))))
158 | 
159 | Admittedly, in such a simple example, using a parser parameter is not strictly necessary. An
160 | alternative definition of @racket[tree-list/p] could simply accept the indentation level as an
161 | argument:
162 | 
163 | @(state-interaction
164 |   (define (tree-list/p indent)
165 |     (do (try/p (repeat/p indent (char/p #\space)))
166 |         (string/p "* ")
167 |         [entry <- (many+/p (char-not/p #\newline))]
168 |         (char/p #\newline)
169 |         [children <- (many/p (tree-list/p (+ indent 2)))]
170 |         (pure (list (list->string entry) children)))))
171 | 
172 | However, in more complex parsers, this approach can require threading additional arguments through
173 | several layers of nested parsers, which is difficult to read and even more difficult to maintain.
174 | Just as ordinary parameters can help avoid threading values through many layers of nested functions,
175 | parser parameters can help avoid threading them through nested parsers.
176 | 
177 | @(close-state!)
178 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/syntax.scrbl:
--------------------------------------------------------------------------------
  1 | #lang scribble/manual
  2 | 
  3 | @(require "util.rkt")
  4 | 
  5 | @(define-parser-interaction syntax-interaction close-syntax!)
  6 | 
  7 | @title[#:tag "producing-syntax"]{Producing Syntax}
  8 | 
  9 | One of the properties of megaparsack is that it always tracks source locations, which is how it is
 10 | able to include source location information in error messages. This can be leveraged for an entirely
 11 | separate purpose, which is creating parsers that produce @reftech{syntax objects} as output. This
 12 | functionality is extremely useful when creating custom @hash-lang[] languages.
 13 | 
 14 | @section{Annotating parsers to produce syntax}
 15 | 
 16 | Megaparsack does not opt to produce syntax objects as the result of every parse because it would make
 17 | composing parsers extremely tedious. For example, if @racket[integer/p] produced syntax objects
 18 | containing integers instead of integers themselves, they would need to be unwrapped before they could
 19 | be added together or otherwise used as numbers. Instead, megaparsack requires that you opt-in to
 20 | syntax object production by using the @racket[syntax/p] combinator.
 21 | 
 22 | The @racket[syntax/p] combinator is “magic”—it takes @emph{any} parser and turns it into a parser that
 23 | produces a value containing accurate source location information. This is because @racket[syntax/p]
 24 | takes advantage of the internal parser state to track information that is otherwise not accessible
 25 | to parsers. Fortunately, this makes the interface extremely simple to use—just wrap an ordinary parser
 26 | with @racket[syntax/p] and use it as usual:
 27 | 
 28 | @(parser-interaction
 29 |   (parse-string (syntax/p integer/p) "42"))
 30 | 
 31 | The produced syntax objects automatically keep track of all the relevant syntax properties, including
 32 | line, column, position, and span:
 33 | 
 34 | @(parser-interaction
 35 |   (define stx (parse-result! (parse-string (syntax/p integer/p) "42")))
 36 |   (syntax-line stx)
 37 |   (syntax-column stx)
 38 |   (syntax-span stx))
 39 | 
 40 | This syntax tracking is not specific to the built-in parsers, and you do not need to do anything
 41 | special to use it with your custom parsers. For example, consider a relatively complex parser that
 42 | parses a list of comma-delimited numbers surrounded by brackets:
 43 | 
 44 | @(syntax-interaction
 45 |   (define integer-list/p
 46 |     (do (char/p #\[)
 47 |         [ints <- (many/p (syntax/p integer/p) #:sep (char/p #\,))]
 48 |         (char/p #\])
 49 |         (pure ints))))
 50 | 
 51 | We’ve annotated the @racket[integer/p] parser with @racket[syntax/p] once again so we can get location
 52 | tracking for each individual list element, but we’ll also annotate the whole thing with
 53 | @racket[syntax/p] so we can track information about the entire list as well:
 54 | 
 55 | @(syntax-interaction
 56 |   (define integer-list-stx
 57 |     (parse-result! (parse-string (syntax/p integer-list/p) "[1,2,3,5,8,13]")))
 58 |   integer-list-stx
 59 |   (syntax-span integer-list-stx))
 60 | 
 61 | As expected, the top-level syntax object spans the entire input, including the brackets. We can also
 62 | get information about the individual elements, since they are syntax objects as well:
 63 | 
 64 | @(syntax-interaction
 65 |   (syntax->list integer-list-stx))
 66 | 
 67 | This makes writing a reader for a @hash-lang[] relatively straightforward because source location
 68 | information is already encoded into a set of syntax objects which can be used as the source of a
 69 | Racket module.
 70 | 
 71 | @section{Parsing tokens from @racketmodname[parser-tools/lex]}
 72 | 
 73 | While @racket[syntax/p] can be used with any megaparsack parser, it is sometimes useful to be able to
 74 | perform a lexing phase before parsing to handle things like ignoring whitespace and tokenization in a
 75 | separate pass. Currently, megaparsack does not include tools of its own specifically for lexing
 76 | (though it would be perfectly possible to use the output of a separate simple parser as the input to
 77 | another parser), but it does provide a function to interoperate with @racketmodname[parser-tools/lex],
 78 | another Racket library that provides utilities designed specifically for lexing.
 79 | 
 80 | When using @racketmodname[parser-tools/lex], make sure to use the @racket[lexer-src-pos] form, which
 81 | enables the lexer’s own source location tracking. This configures the lexer to produce
 82 | @racket[position-token] values as output, which can be fed to @racket[parse-tokens] from
 83 | @racketmodname[megaparsack/parser-tools/lex] to parse with any megaparsack parser.
 84 | 
 85 | Parsers that operate on strings, like @racket[char/p] and @racket[integer/p], will not work with
 86 | tokens from @racketmodname[parser-tools/lex] because tokens can contain arbitrary data. Instead,
 87 | use the @racket[token/p] function to create parsers that handle particular tokens.
 88 | 
 89 | Here is a very simple lexer that produces lexemes for identifiers, numbers, parentheses, and commas:
 90 | 
 91 | @(syntax-interaction
 92 |   (define-tokens simple [IDENTIFIER NUMBER])
 93 |   (define-empty-tokens simple* [OPEN-PAREN CLOSE-PAREN COMMA])
 94 |   (define simple-lexer
 95 |     (lexer-src-pos
 96 |      [#\( (token-OPEN-PAREN)]
 97 |      [#\) (token-CLOSE-PAREN)]
 98 |      [#\, (token-COMMA)]
 99 |      [(:+ (:or (:/ #\a #\z) (:/ #\A #\Z)))
100 |       (token-IDENTIFIER (string->symbol lexeme))]
101 |      [(:+ (:/ #\0 #\9))
102 |       (token-NUMBER (string->number lexeme))]
103 |      [(:or whitespace blank iso-control) (void)]
104 |      [(eof) eof])))
105 | 
106 | We can write a simple helper function to lex a string into a list of tokens, making sure to call
107 | @racket[port-count-lines!] to enable source location tracking:
108 | 
109 | @(syntax-interaction
110 |   (define (lex-simple str)
111 |     (define in (open-input-string str))
112 |     (port-count-lines! in)
113 |     (let loop ([v (simple-lexer in)])
114 |       (cond [(void? (position-token-token v)) (loop (simple-lexer in))]
115 |             [(eof-object? (position-token-token v)) '()]
116 |             [else (cons v (loop (simple-lexer in)))])))
117 |   (lex-simple "f(1, g(3, 4))"))
118 | 
119 | Next, we can write a trivial parser to actually parse these tokens. Since we’ve written a lexer, most
120 | of the heavy lifting is already done, and we can just focus on assigning semantics:
121 | 
122 | @(syntax-interaction
123 |   (code:comment @#,elem{some wrappers around tokens that use @racket[syntax/p]})
124 |   (define number/p (syntax/p (token/p 'NUMBER)))
125 |   (define identifier/p (syntax/p (token/p 'IDENTIFIER)))
126 |   (code:comment @#,elem{a simple function invokation})
127 |   (define funcall/p
128 |     (syntax/p
129 |      (do [func <- identifier/p]
130 |          (token/p 'OPEN-PAREN)
131 |          [args <- (many/p expression/p #:sep (token/p 'COMMA))]
132 |          (token/p 'CLOSE-PAREN)
133 |          (pure (list* func args)))))
134 |   (code:comment @#,elem{an expression can be a number or a function invokation})
135 |   (define expression/p
136 |     (or/p number/p
137 |           funcall/p)))
138 | 
139 | Now, with our simple parser in place, we can actually parse arbitrary C-style function calls into
140 | S-expressions:
141 | 
142 | @(syntax-interaction
143 |   (define expr-stx
144 |     (parse-result! (parse-tokens expression/p (lex-simple "f(1, g(3, 4))"))))
145 |   expr-stx)
146 | 
147 | As expected, the source locations for each datum will automatically be assigned to the resulting
148 | syntax object due to the use of @racket[syntax/p] on the base datums and around @racket[funcall/p]:
149 | 
150 | @(syntax-interaction
151 |   (syntax->list expr-stx))
152 | 
153 | In just a couple dozens lines of code, we’ve managed to implement a fairly robust parser that produces
154 | syntax objects ready to be handed off to Racket as the result of parsing a module body, which can be
155 | compiled into working Racket code.
156 | 
157 | @(close-syntax!)
158 | 


--------------------------------------------------------------------------------
/megaparsack-doc/scribblings/megaparsack/util.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require (for-label data/functor
 4 |                     data/applicative
 5 |                     data/monad
 6 |                     data/either
 7 |                     megaparsack
 8 |                     megaparsack/text
 9 |                     megaparsack/parser-tools/lex
10 |                     parser-tools/lex
11 |                     (prefix-in : parser-tools/lex-sre)
12 |                     (except-in racket/base do map)
13 |                     racket/contract
14 |                     racket/function)
15 |          (for-syntax racket/base)
16 |          scribble/manual
17 |          scribble/example
18 |          syntax/parse/define)
19 | 
20 | (provide (for-label (all-from-out data/functor
21 |                                   data/applicative
22 |                                   data/monad
23 |                                   data/either
24 |                                   megaparsack
25 |                                   megaparsack/text
26 |                                   megaparsack/parser-tools/lex
27 |                                   parser-tools/lex
28 |                                   parser-tools/lex-sre
29 |                                   racket/base
30 |                                   racket/contract
31 |                                   racket/function))
32 |          reftech functech
33 |          make-parser-eval parser-examples parser-interaction define-parser-interaction)
34 | 
35 | (define (reftech . content)
36 |   (apply tech #:doc '(lib "scribblings/reference/reference.scrbl") content))
37 | 
38 | (define (functech . content)
39 |   (apply tech #:doc '(lib "scribblings/data/functional.scrbl") content))
40 | 
41 | (define (make-parser-eval [require-paths '()])
42 |   (let ([eval ((make-eval-factory '()))])
43 |     (eval '(require data/functor
44 |                     data/applicative
45 |                     data/monad
46 |                     data/either
47 |                     megaparsack
48 |                     megaparsack/text
49 |                     megaparsack/parser-tools/lex
50 |                     parser-tools/lex
51 |                     (prefix-in : parser-tools/lex-sre)
52 |                     (except-in racket/base do map)
53 |                     racket/function))
54 |     eval))
55 | 
56 | (define-syntax-rule (parser-examples body ...)
57 |   (examples #:eval (make-parser-eval) body ...))
58 | 
59 | (define-syntax-rule (parser-interaction body ...)
60 |   (parser-examples #:label #f body ...))
61 | 
62 | (define-simple-macro (define-parser-interaction interaction:id close-interaction!:id
63 |                        {~optional {~seq #:eval eval-id:id} #:defaults ([eval-id #'eval])})
64 |   (begin
65 |     (define eval-id (make-parser-eval))
66 |     (define-syntax-rule (interaction body (... ...))
67 |       (examples #:eval eval-id #:label #f body (... ...)))
68 |     (define (close-interaction!)
69 |       (close-eval eval-id))))
70 | 


--------------------------------------------------------------------------------
/megaparsack-lib/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '(["base" #:version "6.5"]
 9 |     "curly-fn-lib"
10 |     ["functional-lib" #:version "0.6"]
11 |     "match-plus"))
12 | (define build-deps
13 |   '())
14 | 


--------------------------------------------------------------------------------
/megaparsack-lib/megaparsack/base.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require racket/require
  4 |          (multi-in data [functor applicative monad either])
  5 |          (prefix-in d: data/applicative)
  6 |          match-plus
  7 |          (multi-in racket [contract function generic list match port promise string])
  8 |          (prefix-in r: racket/base)
  9 |          (submod data/applicative coerce-delayed)
 10 |          (for-syntax racket/base)
 11 |          syntax/parse/define)
 12 | 
 13 | (provide lazy/p delay/p
 14 |          parser/c
 15 |          parser-parameter?
 16 |          make-parser-parameter
 17 |          parameterize/p
 18 |          (contract-out
 19 |           [struct syntax-box ([datum any/c] [srcloc srcloc?])]
 20 |           [struct message ([srcloc srcloc?] [unexpected any/c] [expected (listof string?)])]
 21 |           [rename parser?* parser? (any/c . -> . boolean?)]
 22 | 
 23 |           [rename parse-datum parse (parser?* (listof syntax-box?) . -> . (either/c message? any/c))]
 24 |           [parse-error->string (message? . -> . string?)]
 25 |           [parse-result! ((either/c message? any/c) . -> . any/c)]
 26 |           [struct (exn:fail:read:megaparsack exn:fail)
 27 |             ([message any/c] [continuation-marks any/c] [srclocs any/c]
 28 |              [unexpected any/c] [expected (listof string?)])]
 29 | 
 30 |           [void/p (parser/c any/c void?)]
 31 |           [or/p (parser?* parser?* ... . -> . parser?*)]
 32 |           [try/p (parser?* . -> . parser?*)]
 33 |           [noncommittal/p (parser?* . -> . parser?*)]
 34 |           [lookahead/p (parser?* . -> . parser?*)]
 35 |           [satisfy/p ((any/c . -> . any/c) . -> . parser?*)]
 36 |           [eof/p (parser/c any/c void?)]
 37 |           [syntax-box/p (parser?* . -> . (parser/c any/c syntax-box?))]
 38 |           [syntax/p (parser?* . -> . (parser/c any/c syntax?))]
 39 |           [label/p (string? parser?* . -> . parser?*)]
 40 |           [hidden/p (parser?* . -> . parser?*)]
 41 |           [fail/p (message? . -> . parser?*)]
 42 |           [guard/p (->* [parser?* (any/c . -> . any/c)]
 43 |                         [(or/c string? #f) (any/c . -> . any/c)]
 44 |                         parser?*)]))
 45 | 
 46 | ;; supporting types
 47 | ;; ---------------------------------------------------------------------------------------------------
 48 | 
 49 | (struct syntax-box (datum srcloc)
 50 |   #:transparent
 51 |   #:methods gen:functor
 52 |   [(define/match* (map f (syntax-box datum srcloc))
 53 |      (syntax-box (f datum) srcloc))])
 54 | 
 55 | (struct parser-input (position ; the number of tokens advanced past to get to this point
 56 |                       last-loc ; the srcloc of the previous token
 57 |                       tokens)  ; the remaining sequence of tokens, in syntax boxes
 58 |   #:transparent)
 59 | 
 60 | (struct consumed (reply) #:transparent)
 61 | (struct empty (reply) #:transparent)
 62 | 
 63 | (struct error (message) #:transparent)
 64 | (struct ok (result rest message) #:transparent)
 65 | 
 66 | ; For backwards-compatibility reasons, the `message` struct cannot gain new fields. Therefore, we use
 67 | ; `message*` internally and `message` externally.
 68 | (struct message (srcloc unexpected expected) #:transparent)
 69 | (struct message* (position   ; the number of tokens successfully parsed before this error was generated
 70 |                   user?      ; #t if generated by `fail/p`, otherwise #f
 71 |                   srcloc
 72 |                   unexpected
 73 |                   expected)
 74 |   #:transparent)
 75 | 
 76 | ; Note that `ok` replies sometimes include an error message. Why? Because if a later parser fails
 77 | ; without consuming input, previous branches might have failed in the same location, which could
 78 | ; contribute to the error. For example, consider:
 79 | ;
 80 | ;   (do (or/p digit/p (pure #\0))
 81 | ;       letter/p)
 82 | ;
 83 | ; Given the input "*", the `digit/p` parser will fail, and the second branch will be taken, which
 84 | ; results in an `ok` response containing `#\0`. Next, the `letter/p` parser will fail, resulting in a
 85 | ; parse error. If the `ok` response contains no error information, the best error we can produce is
 86 | ;
 87 | ;   string:1:1: parse error
 88 | ;     unexpected: *
 89 | ;     expected: letter
 90 | ;
 91 | ; but this is not good, because a digit is also legal at that position. So we need to take care to
 92 | ; preserve error information from failed branches, even if we produce a successful result.
 93 | 
 94 | (define empty-srcloc (srcloc #f #f #f #f #f))
 95 | 
 96 | ;; core primitives
 97 | ;; ---------------------------------------------------------------------------------------------------
 98 | 
 99 | ; A parser is a procedure that obeys the following contract:
100 | ;
101 | ;   (-> parser-input?
102 | ;       parser-parameterization?
103 | ;       (values (or/c consumed? empty?)
104 | ;               parser-parameterization?))
105 | ;
106 | ; The parser parameterization tracks user-defined parser state, and it is monadically threaded
107 | ; through each parser.
108 | 
109 | (struct parser (proc)
110 |   #:methods gen:functor
111 |   [(define/generic -map map)
112 |    (define (map f p)
113 |      (parser
114 |       (λ (input paramz)
115 |         (define-values [result paramz*] ((parser-proc p) input paramz))
116 |         (values (match result
117 |                   [(consumed (ok v rest message)) (consumed (ok (-map f v) rest message))]
118 |                   [(empty (ok v rest message))    (empty (ok (-map f v) rest message))]
119 |                   [error                          error])
120 |                 paramz*))))]
121 | 
122 |   #:methods gen:applicative
123 |   [(define (pure _ x)
124 |      (pure/p x))
125 | 
126 |    (define (apply p ps)
127 |      (do [f  <- p]
128 |          [xs <- (map/m values ps)]
129 |          (d:pure (r:apply f xs))))]
130 | 
131 |   #:methods gen:monad
132 |   [(define (chain f p)
133 |      (parser
134 |       (λ (input paramz)
135 |         (define-values [result paramz*] ((parser-proc p) input paramz))
136 |         (match result
137 |           [(empty (ok (syntax-box x _) rest message))
138 |            (define-values [result paramz**] (parse (f x) rest paramz*))
139 |            (values (merge-message/result message result) paramz**)]
140 |           [(consumed (ok (syntax-box x srcloc) rest message-a))
141 |            (define-values [result* paramz**] (parse (f x) rest paramz*))
142 |            (values (consumed (match result*
143 |                                [(consumed (ok stx rest message-b))
144 |                                 (ok (merge-syntax-box/srcloc stx srcloc) rest (merge-messages message-a message-b))]
145 |                                [(empty (ok (syntax-box datum _) rest message-b))
146 |                                 (ok (syntax-box datum srcloc) rest (merge-messages message-a message-b))]
147 |                                [(consumed error) (merge-message/reply message-a error)]
148 |                                [(empty error) (merge-message/reply message-a error)]))
149 |                    paramz**)]
150 |           [error (values error paramz*)]))))])
151 | 
152 | (define (parser?* v)
153 |   (or (parser? v) (pure? v)))
154 | 
155 | (define (make-pure-result x input)
156 |   (empty (ok (syntax-box x empty-srcloc) input #f)))
157 | (define (make-stateless-parser proc)
158 |   (parser (λ (input paramz) (values (proc input) paramz))))
159 | (define (map-parser-result p proc)
160 |   (parser
161 |    (λ (input paramz)
162 |      (define-values [result paramz*] (parse p input paramz))
163 |      (values (proc result) paramz*))))
164 | 
165 | (define (pure/p x)
166 |   (make-stateless-parser (λ (input) (values (make-pure-result x input)))))
167 | (define void/p (pure/p (void)))
168 | (define coerce-parser
169 |   (coerce-pure void/p))
170 | 
171 | (define (parse p input paramz)
172 |   ((parser-proc (coerce-parser p)) input paramz))
173 | 
174 | ; these are used by `parser/c` to implement lazy input token contracts
175 | (define parse-prompt-tag (make-continuation-prompt-tag 'parse))
176 | (define mark:parser-token-ctc-proj (make-continuation-mark-key 'parser-token-ctc-proj))
177 | (define (current-parser-token-ctc-proj)
178 |   (continuation-mark-set-first #f
179 |                                mark:parser-token-ctc-proj
180 |                                (λ (val) val)
181 |                                parse-prompt-tag))
182 | 
183 | (define (parse-syntax-box p input)
184 |   (match-define-values [result _] (call-with-continuation-prompt
185 |                                    (λ () (parse p input (hasheq)))
186 |                                    parse-prompt-tag))
187 |   (match result
188 |     [(or (consumed (ok x _ _))
189 |          (empty    (ok x _ _)))
190 |      (success x)]
191 |     [(or (consumed (error (message* _ _ srcloc unexpected expected)))
192 |          (empty    (error (message* _ _ srcloc unexpected expected))))
193 |      (failure (message srcloc unexpected expected))]))
194 | 
195 | (define (parse-datum p tokens)
196 |   (map syntax-box-datum (parse-syntax-box p (parser-input 0 empty-srcloc tokens))))
197 | 
198 | (define/match* (parse-error->string (message srcloc unexpected expected))
199 |   (with-output-to-string
200 |    (thunk (display (or (srcloc->string srcloc) "?"))
201 |           (display ": parse error")
202 | 
203 |           (when (and (not unexpected) (null? expected))
204 |             (display ";")
205 |             (newline)
206 |             (display " unknown parse error"))
207 | 
208 |           (when unexpected
209 |             (newline)
210 |             (display "  unexpected: ")
211 |             (display unexpected))
212 | 
213 |           (define organized-expected (sort (remove-duplicates expected) string<=?))
214 |           (unless (null? organized-expected)
215 |             (newline)
216 |             (display "  expected: ")
217 |             (display (if (< (length organized-expected) 3)
218 |                          (string-join organized-expected " or ")
219 |                          (string-join organized-expected ", " #:before-last ", or ")))))))
220 | 
221 | (struct exn:fail:read:megaparsack exn:fail:read (unexpected expected) #:transparent)
222 | 
223 | (define/match (parse-result! result)
224 |   [((success result)) result]
225 |   [((failure (and message (message srcloc unexpected expected))))
226 |    (raise (exn:fail:read:megaparsack (parse-error->string message)
227 |                                      (current-continuation-marks)
228 |                                      (list srcloc) unexpected expected))])
229 | 
230 | ;; syntax object utilities
231 | ;; ---------------------------------------------------------------------------------------------------
232 | 
233 | (define some-original-syntax (read-syntax #f (open-input-string "()")))
234 | 
235 | (define/match* (syntax-box->syntax (syntax-box datum (srcloc name line col pos span)))
236 |   (datum->syntax #f datum (list name line col pos span) some-original-syntax))
237 | 
238 | (define (merge-srclocs srcloc-a srcloc-b)
239 |   (match (sort (list srcloc-a srcloc-b) < #:key (λ (x) (or (srcloc-position x) -1)))
240 |     [(list (srcloc name-a line-a col-a pos-a span-a) (srcloc name-b line-b col-b pos-b span-b))
241 |      (srcloc (or name-a name-b)
242 |              (or line-a line-b)
243 |              (or col-a col-b)
244 |              (or pos-a pos-b)
245 |              (cond [(and pos-a pos-b span-b) (+ (- pos-b pos-a) span-b)]
246 |                    [(and pos-a span-a)       span-a]
247 |                    [(and pos-b span-b)       span-b]
248 |                    [else                     (or span-a span-b)]))]))
249 | 
250 | (define/match* (merge-syntax-box/srcloc (syntax-box datum srcloc-b) srcloc-a)
251 |   (syntax-box datum (merge-srclocs srcloc-a srcloc-b)))
252 | 
253 | ;; error message reconciliation
254 | ;; ---------------------------------------------------------------------------------------------------
255 | 
256 | (define (merge-messages message-a message-b)
257 |   (match* {message-a message-b}
258 |     [{_ #f} message-a]
259 |     [{#f _} message-b]
260 |     [{(message* pos-a user?-a loc-a unexpected-a expected-a)
261 |       (message* pos-b user?-b loc-b unexpected-b expected-b)}
262 |      (cond
263 |        ; if the errors don’t occur at the same location, pick the one that made more progress
264 |        [(> pos-a pos-b) message-a]
265 |        [(< pos-a pos-b) message-b]
266 |        ; if one was user-generated and the other wasn’t, pick the user-generated one
267 |        [(and user?-a (not user?-b)) message-a]
268 |        [(and user?-b (not user?-a)) message-b]
269 |        ; otherwise, combine information from both errors
270 |        [else
271 |         (message* pos-a user?-a (merge-srclocs loc-a loc-b) (or unexpected-a unexpected-b) (append expected-a expected-b))])]))
272 | 
273 | (define (merge-message/reply message-a reply)
274 |   (match reply
275 |     [(ok x rest message-b) (ok x rest (merge-messages message-a message-b))]
276 |     [(error message-b)     (error (merge-messages message-a message-b))]))
277 | 
278 | (define (merge-message/result message result)
279 |   (match result
280 |     [(empty reply) (empty (merge-message/reply message reply))]
281 |     [(consumed reply) (consumed (merge-message/reply message reply))]))
282 | 
283 | ;; laziness (lazy/p, delay/p)
284 | ;; ---------------------------------------------------------------------------------------------------
285 | 
286 | ;; Not publicly documented, but provided for backwards compatibility.
287 | ;; Will likely be removed eventually in favor of `delay/p`.
288 | (define-simple-macro (lazy/p p:expr)
289 |   (parser (λ (input paramz) (parse p input paramz))))
290 | 
291 | (define-simple-macro (delay/p p)
292 |   #:declare p (expr/c #'parser?*)
293 |   (let ([pp (delay p.c)])
294 |     (parser (λ (input paramz) (parse (force pp) input paramz)))))
295 | 
296 | ;; choice (or/p)
297 | ;; ---------------------------------------------------------------------------------------------------
298 | 
299 | ; binary version of or/p
300 | (define (p . <or> . q)
301 |   (parser
302 |    (λ (input paramz)
303 |      (define-values [result paramz*] (parse p input paramz))
304 |      (match result
305 |        [(empty (error message))
306 |         (define-values [result paramz*] (parse q input paramz))
307 |         (values (merge-message/result message result) paramz*)]
308 |        [other (values other paramz*)]))))
309 | 
310 | (define (or/p . ps)
311 |   (let-values ([(ps p) (split-at ps (sub1 (length ps)))])
312 |     (foldr <or> (first p) ps)))
313 | 
314 | ;; lookahead (try/p, noncommittal/p, lookahead/p)
315 | ;; ---------------------------------------------------------------------------------------------------
316 | 
317 | (define (try/p p)
318 |   (map-parser-result
319 |    p (match-lambda
320 |        [(consumed (error message)) (empty (error message))]
321 |        [other                      other])))
322 | 
323 | (define (noncommittal/p p)
324 |   (map-parser-result
325 |    p (match-lambda
326 |        [(consumed (? ok? reply)) (empty reply)]
327 |        [other                    other])))
328 | 
329 | (define (lookahead/p p)
330 |   (parser
331 |    (λ (input paramz)
332 |      (define-values [result paramz*] (parse p input paramz))
333 |      (values (match result
334 |                [(consumed (ok x _ _)) (empty (ok x input #f))]
335 |                [(empty    (ok x _ _)) (empty (ok x input #f))]
336 |                [error                 error])
337 |              paramz*))))
338 | 
339 | ;; conditional (satisfy/p)
340 | ;; ---------------------------------------------------------------------------------------------------
341 | 
342 | (define (satisfy/p proc)
343 |   (make-stateless-parser
344 |    (match-lambda
345 |      [(parser-input pos last-loc tokens)
346 |       (match tokens
347 |         [(cons (syntax-box (app (current-parser-token-ctc-proj) c) loc) cs)
348 |          (if (proc c)
349 |              (consumed (ok (syntax-box c loc) (parser-input (add1 pos) loc cs) #f))
350 |              (empty (error (message* pos #f loc c '()))))]
351 |         ['()
352 |          (empty (error (message* pos #f last-loc "end of input" '())))])])))
353 | 
354 | ;; termination (eof/p)
355 | ;; ---------------------------------------------------------------------------------------------------
356 | 
357 | (define eof/p
358 |   (make-stateless-parser
359 |    (match-lambda
360 |      [(parser-input pos _ tokens)
361 |       (match tokens
362 |         ['()
363 |          (make-pure-result (void) '())]
364 |         [(cons (syntax-box (app (current-parser-token-ctc-proj) c) loc) _)
365 |          (empty (error (message* pos #f loc c '("end of input"))))])])))
366 | 
367 | ;; source location reification (syntax-box/p & syntax/p)
368 | ;; ---------------------------------------------------------------------------------------------------
369 | 
370 | (define (syntax-box/p p)
371 |   (map-parser-result
372 |    p (match-lambda
373 |        [(consumed (ok {and box (syntax-box _ srcloc)} rest message))
374 |         (consumed (ok (syntax-box box srcloc) rest message))]
375 |        [(empty (ok {and box (syntax-box _ srcloc)} rest message))
376 |         (empty (ok (syntax-box box srcloc) rest message))]
377 |        [error error])))
378 | 
379 | (define (syntax/p p)
380 |   (map syntax-box->syntax (syntax-box/p p)))
381 | 
382 | ;; parser annotation (label/p & hidden/p)
383 | ;; ---------------------------------------------------------------------------------------------------
384 | 
385 | (define/match* (expect pos-a expected (and message (message* pos-b user? srcloc unexpected _)))
386 |   (if (= pos-a pos-b)
387 |       (message* pos-a user? srcloc unexpected (list expected))
388 |       message))
389 | 
390 | (define/match* (hide pos-a (and message (message* pos-b user? srcloc unexpected _)))
391 |   (if (= pos-a pos-b)
392 |       (message* pos-a user? srcloc unexpected '())
393 |       message))
394 | 
395 | (define (label/p str p)
396 |   (parser
397 |    (λ (input paramz)
398 |      (define pos (parser-input-position input))
399 |      (define-values [result paramz*] (parse p input paramz))
400 |      (values (match result
401 |                [(empty (error message))     (empty (error (expect pos str message)))]
402 |                [(empty (ok x rest message)) (empty (ok x rest (expect pos str message)))]
403 |                [consumed                    consumed])
404 |              paramz*))))
405 | 
406 | (define (hidden/p p)
407 |   (parser
408 |    (λ (input paramz)
409 |      (define pos (parser-input-position input))
410 |      (define-values [result paramz*] (parse p input paramz))
411 |      (values (match result
412 |                [(empty (error message))     (empty (error (hide pos message)))]
413 |                [(empty (ok x rest message)) (empty (ok x rest (hide pos message)))]
414 |                [consumed                    consumed])
415 |              paramz*))))
416 | 
417 | ;; custom failure messages (fail/p, guard/p)
418 | ;; ---------------------------------------------------------------------------------------------------
419 | 
420 | (define/match* (fail/p (message srcloc unexpected expected))
421 |   (make-stateless-parser
422 |    (λ (input) (empty (error (message* (parser-input-position input) #t srcloc unexpected expected))))))
423 | 
424 | ; Providing guard/p as a primitive rather than as a derived concept is somewhat unsatisfying. In
425 | ; previous versions of this library, it /was/ implemented as a derived combinator, with the following
426 | ; implementation:
427 | ;
428 | ;   (define (guard/p p pred? [expected #f] [mk-unexpected values])
429 | ;     (do [s <- (syntax-box/p p)]
430 | ;         (define v (syntax-box-datum s))
431 | ;         (if (pred? v)
432 | ;             (pure v)
433 | ;             (fail/p (message (syntax-box-srcloc s)
434 | ;                              (mk-unexpected v)
435 | ;                              (if expected (list expected) '()))))))
436 | ;
437 | ; Sadly, changes to the way `label/p` chooses to adjust the list of expected values makes this
438 | ; implementation no longer satisfactory. The issue is that the token position of the error produced by
439 | ; `fail/p` is after the tokens consumed by the enclosed parser. This causes `label/p` to consider the
440 | ; parse error as more specific than the one it would generate, so it declines to modify it.
441 | ;
442 | ; This really reveals a deeper issue with the way megaparsack answers the question of which errors
443 | ; made the most progress. If an error from a different branch were to occur in the middle of the token
444 | ; stream consumed by the enclosed parser, megaparsack would select it over the `guard/p`-generated
445 | ; error, since `guard/p` reports its error at the /start/ of that sequence of tokens. This is not
446 | ; really right---a more correct approach would be to record the whole span of tokens that `guard/p`
447 | ; wraps. But it’s unclear how much this really matters, and that still wouldn’t make it any easier to
448 | ; define `guard/p` as a derived concept, so for now, the imperfect implementation remains.
449 | 
450 | (define (guard/p p pred? [expected #f] [mk-unexpected values])
451 |   (define (guard-reply pos reply)
452 |     (match reply
453 |       [(ok (syntax-box x loc) rest _)
454 |        #:when (not (pred? x))
455 |        (error (message* pos #t loc (mk-unexpected x) (if expected (list expected) '())))]
456 |       [other other]))
457 |   (parser
458 |    (λ (input paramz)
459 |      (define pos (parser-input-position input))
460 |      (define-values [result paramz*] (parse p input paramz))
461 |      (values (match result
462 |                [(empty reply) (empty (guard-reply pos reply))]
463 |                [(consumed reply) (consumed (guard-reply pos reply))])
464 |              paramz*))))
465 | 
466 | ;; contracts (parser/c)
467 | ;; ---------------------------------------------------------------------------------------------------
468 | 
469 | (define-syntax-parser parser/c
470 |   [(head in-ctc out-ctc)
471 |    (define ctc-tag (gensym 'ctc))
472 |    (syntax-property
473 |     (quasisyntax/loc this-syntax
474 |       (parser/c-proc
475 |        #,(syntax-property #'in-ctc 'racket/contract:negative-position ctc-tag)
476 |        #,(syntax-property #'out-ctc 'racket/contract:positive-position ctc-tag)))
477 |     'racket/contract:contract
478 |     (vector ctc-tag (list #'head) '()))]
479 |   [(head form ...)
480 |    (syntax-property (quasisyntax/loc this-syntax
481 |                       (parser/c-proc form ...))
482 |                     'racket/contract:contract
483 |                     (vector (gensym 'ctc) (list #'head) '()))]
484 |   [head:id
485 |    (syntax-property (quasisyntax/loc this-syntax
486 |                       parser/c-proc)
487 |                     'racket/contract:contract
488 |                     (vector (gensym 'ctc) (list #'head) '()))])
489 | 
490 | ; We define `parser/c` as a custom contract, which allows us to ensure that contracts on input tokens
491 | ; are applied /lazily/, when the tokens are actually consumed. If we didn’t do this, each individual
492 | ; parser would apply its input contract to every remaining token in the input stream, which is both
493 | ; unhelpful and extraordinarily slow.
494 | ;
495 | ; To keep contract checking minimal, `parser/c` does not directly enforce input contracts. Instead, it
496 | ; stores a contract projection in the `mark:parser-token-ctc-proj` continuation mark. The contract
497 | ; projection is only actually applied to input tokens as needed by primitive parser constructors like
498 | ; `satisfy/p`.
499 | ;
500 | ; This is all rather subtle, since it means all primitive parsers must individually take care to apply
501 | ; the contract projection before inspecting the token stream. Fortunately, the only parsers that
502 | ; actually inspect tokens directly are `satisfy/p` and `eof/p`, so the contract checking machinery
503 | ; can be localized there.
504 | 
505 | (define (parser/c-proc in-ctc out-ctc)
506 |   (let ([in-ctc (coerce-contract 'parser/c in-ctc)]
507 |         [out-ctc (coerce-contract 'parser/c out-ctc)])
508 |     (define in-proj (contract-late-neg-projection in-ctc))
509 |     (define out-proj (contract-late-neg-projection out-ctc))
510 |     (define pure-out-proj (contract-late-neg-projection (pure/c out-ctc)))
511 | 
512 |     (define chaperone? (and (chaperone-contract? in-ctc)
513 |                             (chaperone-contract? out-ctc)))
514 |     ((if chaperone? make-chaperone-contract make-contract)
515 |      #:name (build-compound-type-name 'parser/c in-ctc out-ctc)
516 |      #:first-order parser?*
517 |      #:late-neg-projection
518 |      (λ (blame)
519 |        (define in-elem-proj (in-proj (blame-add-context blame "the input to" #:swap? #t)))
520 |        (define out-blame (blame-add-context blame "the result of"))
521 |        (define out-elem-proj (out-proj out-blame))
522 |        (define pure-out-elem-proj (pure-out-proj out-blame))
523 |        (λ (val missing-party)
524 |          (cond
525 |            [(pure? val)
526 |             (pure-out-elem-proj val missing-party)]
527 |            [(parser? val)
528 |             ((if chaperone? chaperone-struct impersonate-struct)
529 |              val parser-proc
530 |              (λ (self parse-proc)
531 |                (define (wrap-ok x loc rest message)
532 |                  (ok (syntax-box (out-elem-proj x missing-party) loc) rest message))
533 |                ((if chaperone? chaperone-procedure impersonate-procedure)
534 |                 parse-proc
535 |                 (λ (input paramz)
536 |                   (define old-ctc-proj (current-parser-token-ctc-proj))
537 |                   (values (λ (result paramz)
538 |                             (values (match result
539 |                                       [(consumed (ok (syntax-box x loc) rest message))
540 |                                        (consumed (wrap-ok x loc rest message))]
541 |                                       [(empty (ok (syntax-box x loc) rest message))
542 |                                        (empty (wrap-ok x loc rest message))]
543 |                                       [error error])
544 |                                     paramz))
545 |                           'mark mark:parser-token-ctc-proj
546 |                           (λ (val) (in-elem-proj (old-ctc-proj val) missing-party))
547 |                           input paramz)))))]
548 |            [else
549 |             (raise-blame-error blame val #:missing-party missing-party
550 |                                '(expected: "parser?" given: "~e") val)]))))))
551 | 
552 | ;; state (make-parser-parameter, parameterize/p)
553 | ;; ---------------------------------------------------------------------------------------------------
554 | 
555 | (struct parser-parameter (initial-value)
556 |   #:constructor-name make-parser-parameter
557 |   #:authentic ; we look up parameters in the parameterization by `eq?`, so no contracts allowed
558 |   #:property prop:procedure
559 |   (case-lambda
560 |     [(self)
561 |      (parser
562 |       (λ (input paramz)
563 |         (define value (hash-ref paramz self (λ () (parser-parameter-initial-value self))))
564 |         (values (make-pure-result value input) paramz)))]
565 |     [(self value)
566 |      (parser
567 |       (λ (input paramz)
568 |         (values (make-pure-result (void) input)
569 |                 (hash-set paramz self value))))]))
570 | 
571 | (define-syntax-parser parameterize/p
572 |   [(_ ([param val:expr] ...) p)
573 |    #:declare param (expr/c #'parser-parameter? #:name "parameter")
574 |    #:declare p (expr/c #'parser? #:name "body parser")
575 |    (for/fold ([body #'p.c])
576 |              ([param (in-list (reverse (attribute param.c)))]
577 |               [val (in-list (reverse (attribute val)))])
578 |      (quasisyntax/loc this-syntax
579 |        (parameterize-one/p #,body #,param #,val)))])
580 | 
581 | (define (parameterize-one/p p param val)
582 |   (do [old-val <- (param)]
583 |       (param val)
584 |       [result <- p]
585 |       (param old-val)
586 |       (pure result)))
587 | 


--------------------------------------------------------------------------------
/megaparsack-lib/megaparsack/combinator.rkt:
--------------------------------------------------------------------------------
  1 | #lang curly-fn racket/base
  2 | 
  3 | (require data/applicative
  4 |          data/monad
  5 |          megaparsack/base
  6 |          racket/contract
  7 |          racket/list)
  8 | 
  9 | (provide (contract-out
 10 |           [many/p (->* [parser?]
 11 |                        [#:sep parser?
 12 |                         #:min exact-nonnegative-integer?
 13 |                         #:max (or/c exact-nonnegative-integer? +inf.0)]
 14 |                        (parser/c any/c list?))]
 15 |           [many+/p (->* [parser?]
 16 |                         [#:sep parser?
 17 |                          #:max (or/c exact-nonnegative-integer? +inf.0)]
 18 |                         (parser/c any/c list?))]
 19 |           [repeat/p (exact-nonnegative-integer? parser? . -> . (parser/c any/c list?))]
 20 | 
 21 |           [many-until/p (->* [parser?
 22 |                               #:end parser?]
 23 |                              [#:sep parser?
 24 |                               #:min exact-nonnegative-integer?]
 25 |                              (parser/c any/c (list/c list? any/c)))]
 26 |           [many+-until/p (->* [parser?
 27 |                                #:end parser?]
 28 |                               [#:sep parser?]
 29 |                               (parser/c any/c (list/c list? any/c)))]
 30 | 
 31 |           [many*/p (parser? . -> . parser?)]
 32 |           [many/sep*/p (parser? parser? . -> . parser?)]
 33 |           [many/sep+/p (parser? parser? . -> . parser?)]
 34 | 
 35 |           [==/p (->* [any/c] [(any/c any/c . -> . any/c)] parser?)]
 36 |           [one-of/p (->* [list?] [(any/c any/c . -> . any/c)] parser?)]
 37 |           [list/p (->* []
 38 |                        [#:sep parser?]
 39 |                        #:rest (listof parser?)
 40 |                        (parser/c any/c list?))]))
 41 | 
 42 | (define (==/p v [=? equal?])
 43 |   (label/p (format "~a" v)
 44 |            (satisfy/p #{=? v %})))
 45 | 
 46 | (define (one-of/p vs [=? equal?])
 47 |   (apply or/p (map #{==/p % =?} vs)))
 48 | 
 49 | (define (list/p #:sep [sep void/p] . ps)
 50 |   (cond [(empty? ps) (pure '())]
 51 |         [(empty? (rest ps)) ((pure list) (first ps))]
 52 |         [else
 53 |          ((pure list*) (do [v <- (first ps)] sep (pure v))
 54 |                        (apply list/p #:sep sep (rest ps)))]))
 55 | 
 56 | (define (many/p p
 57 |                 #:sep [sep-p void/p]
 58 |                 #:min [min-count 0]
 59 |                 #:max [max-count +inf.0])
 60 |   (define (loop-mandatory p [min-left min-count]
 61 |                           #:recur-parser [recur p])
 62 |     (if (zero? min-left)
 63 |         (loop-optional p #:recur-parser recur)
 64 |         ((pure cons) p (lazy/p (loop-mandatory recur (sub1 min-left))))))
 65 | 
 66 |   (define (loop-optional p [max-left (- max-count min-count)]
 67 |                          #:recur-parser [recur p])
 68 |     (if (<= max-left 0)
 69 |         (pure '())
 70 |         (or/p ((pure cons) p (lazy/p (loop-optional recur (sub1 max-left))))
 71 |               (pure '()))))
 72 | 
 73 |   (loop-mandatory p #:recur-parser (do sep-p p)))
 74 | 
 75 | (define (many+/p p #:sep [sep void/p] #:max [max-count +inf.0])
 76 |   (many/p p #:sep sep #:min 1 #:max max-count))
 77 | 
 78 | (define (repeat/p n p) (many/p p #:min n #:max n))
 79 | 
 80 | (define (many-until/p p #:end end-p
 81 |                       #:sep [sep-p void/p]
 82 |                       #:min [min-count 0])
 83 |   (define (loop-mandatory p
 84 |                           [results '()]
 85 |                           [min-left min-count]
 86 |                           #:recur-parser [recur p])
 87 |     (if (zero? min-left)
 88 |         (loop-optional p results #:recur-parser recur)
 89 |         (do [result <- p]
 90 |             (loop-mandatory recur (cons result results) (sub1 min-left)))))
 91 | 
 92 |   (define (loop-optional p results #:recur-parser [recur p])
 93 |     (or/p (do [end-result <- end-p]
 94 |               (pure (list (reverse results) end-result)))
 95 |           (do [result <- p]
 96 |               (loop-optional recur (cons result results)))))
 97 | 
 98 |   (loop-mandatory p #:recur-parser (do sep-p p)))
 99 | 
100 | (define (many+-until/p p #:end end-p #:sep [sep-p void/p])
101 |   (many-until/p p #:end end-p #:sep sep-p #:min 1))
102 | 
103 | ;; These combinators are special cases of many/p that predated many/p
104 | 
105 | (define (many*/p p) (many/p p))
106 | (define (many/sep*/p p sep) (many/p p #:sep sep))
107 | (define (many/sep+/p p sep) (many+/p p #:sep sep))
108 | 


--------------------------------------------------------------------------------
/megaparsack-lib/megaparsack/main.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | 
3 | (require megaparsack/base
4 |          megaparsack/combinator)
5 | 
6 | (provide (all-from-out megaparsack/base
7 |                        megaparsack/combinator))
8 | 


--------------------------------------------------------------------------------
/megaparsack-lib/megaparsack/text.rkt:
--------------------------------------------------------------------------------
  1 | #lang curly-fn racket/base
  2 | 
  3 | (require data/applicative
  4 |          data/monad
  5 |          megaparsack/base
  6 |          megaparsack/combinator
  7 |          racket/contract
  8 |          racket/list
  9 |          racket/function)
 10 | 
 11 | (provide (contract-out
 12 |           [parse-string (->* [parser? string?] [any/c] any/c)]
 13 |           [parse-syntax-string (-> parser? (syntax/c string?) any/c)]
 14 |           [char/p (char? . -> . (parser/c char? char?))]
 15 |           [char-not/p (char? . -> . (parser/c char? char?))]
 16 |           [char-ci/p (char? . -> . (parser/c char? char?))]
 17 |           [char-between/p (char? char? . -> . (parser/c char? char?))]
 18 |           [char-in/p (string? . -> . (parser/c char? char?))]
 19 |           [char-not-in/p (string? . -> . (parser/c char? char?))]
 20 |           [any-char/p (parser/c char? char?)]
 21 |           [letter/p (parser/c char? char?)]
 22 |           [digit/p (parser/c char? char?)]
 23 |           [symbolic/p (parser/c char? char?)]
 24 |           [space/p (parser/c char? char?)]
 25 |           [integer/p (parser/c char? integer?)]
 26 |           [string/p (string? . -> . (parser/c char? string?))]
 27 |           [string-ci/p (string? . -> . (parser/c char? string?))]))
 28 | 
 29 | (define (chars->syntax-boxes chars name pos line col)
 30 |   (if (empty? chars)
 31 |       '()
 32 |       (let ([c (first chars)])
 33 |         (cons (syntax-box c (srcloc name line col pos 1))
 34 |               (if (char=? c #\newline)
 35 |                   (chars->syntax-boxes (rest chars) name (add1 pos) (add1 line) 0)
 36 |                   (chars->syntax-boxes (rest chars) name (add1 pos) line (add1 col)))))))
 37 | 
 38 | (define (parse-string p input [name 'string])
 39 |   (parse p (chars->syntax-boxes (string->list input) name 1 1 0)))
 40 | 
 41 | (define (parse-syntax-string p stx-string)
 42 |   (parse p (chars->syntax-boxes (string->list (syntax->datum stx-string))
 43 |                                 (syntax-source stx-string)
 44 |                                 (syntax-position stx-string)
 45 |                                 (syntax-line stx-string)
 46 |                                 (syntax-column stx-string))))
 47 | 
 48 | (define (char/p c)    (label/p (format "'~a'" c) (satisfy/p #{char=? c})))
 49 | (define (char-not/p c) (label/p (format "not '~a'" c) (satisfy/p #{char=/? c})))
 50 | (define (char-ci/p c) (label/p (format "'~a'" c) (satisfy/p #{char-ci=? c})))
 51 | (define letter/p      (label/p "letter" (satisfy/p char-alphabetic?)))
 52 | (define digit/p       (label/p "number" (satisfy/p char-numeric?)))
 53 | (define symbolic/p    (label/p "symbolic" (satisfy/p char-symbolic?)))
 54 | (define space/p       (label/p "whitespace" (satisfy/p (disjoin char-whitespace? char-blank?))))
 55 | (define any-char/p    (label/p "any character" (satisfy/p (lambda (_) #t))))
 56 | 
 57 | (define integer/p
 58 |   (label/p "integer"
 59 |            (do [digits <- (many+/p digit/p)]
 60 |                (pure (string->number (apply string digits))))))
 61 | 
 62 | (define (string/p str)
 63 |   (define len (string-length str))
 64 |   (if (zero? len)
 65 |       (pure "")
 66 |       (label/p str
 67 |         (let loop ([i 0])
 68 |           (if (< i len)
 69 |               (do (char/p (string-ref str i))
 70 |                   (loop (add1 i)))
 71 |               (pure str))))))
 72 | 
 73 | (define (string-ci/p str)
 74 |   (define len (string-length str))
 75 |   (if (zero? len)
 76 |       (pure "")
 77 |       (label/p str
 78 |         (let loop ([i 0] [cs '()])
 79 |           (if (< i len)
 80 |               (do [c <- (char-ci/p (string-ref str i))]
 81 |                   (loop (add1 i) (cons c cs)))
 82 |               (pure (list->string (reverse cs))))))))
 83 | 
 84 | (define (char-between/p low high)
 85 |   (label/p (format "a character between '~a' and '~a'" low high)
 86 |            (satisfy/p #{char<=? low % high})))
 87 | 
 88 | (define (char-in/p str)
 89 |   (apply or/p (map char/p (string->list str))))
 90 | 
 91 | (define (char-not-in/p str)
 92 |   (satisfy/p #{string-not-member? str}))
 93 | 
 94 | ;;;
 95 | ;; char and string utility functions
 96 | (define (char=/? c k)
 97 |   (not (char=? c k)))
 98 | 
 99 | (define (char-ci=/? c k)
100 |   (not (char-ci=? c k)))
101 | 
102 | (define (string-member? str c)
103 |   (for/or ([k str])
104 |     (char=? k c)))
105 | 
106 | (define (string-not-member? str c)
107 |   (not (string-member? str c)))
108 | 


--------------------------------------------------------------------------------
/megaparsack-parser-tools/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '("base"
 9 |     "functional-lib"
10 |     "megaparsack-lib"
11 |     "parser-tools-lib"))
12 | (define build-deps
13 |   '())
14 | 


--------------------------------------------------------------------------------
/megaparsack-parser-tools/megaparsack/parser-tools/lex.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require data/monad
 4 |          data/applicative
 5 |          megaparsack/base
 6 |          (prefix-in lex: parser-tools/lex)
 7 |          racket/contract
 8 |          racket/match)
 9 | 
10 | (provide (contract-out [parse-tokens ([parser? (listof lex:position-token?)] [any/c] . ->* . any/c)]
11 |                        [token/p (symbol? . -> . parser?)]))
12 | 
13 | (define (parse-tokens parser toks [srcname 'tokens])
14 |   (parse parser (map (position-token->parser-token srcname) toks)))
15 | 
16 | (define ((position-token->parser-token source-name) pos-token)
17 |   (match pos-token
18 |     [(lex:position-token tok (lex:position offset-a line col) (lex:position offset-b _ _))
19 |      (syntax-box tok (srcloc source-name line col offset-a (- offset-b offset-a)))]))
20 | 
21 | (define (token/p name)
22 |   (label/p
23 |    (symbol->string name)
24 |    (do [tok <- (satisfy/p (λ (x) (and (or (lex:token? x) (symbol? x))
25 |                                       (equal? (lex:token-name x) name))))]
26 |        (pure (lex:token-value tok)))))
27 | 


--------------------------------------------------------------------------------
/megaparsack-parser/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '("base"
 9 |     "collections-lib"
10 |     "curly-fn-lib"
11 |     "functional-lib"
12 |     "megaparsack-lib"))
13 | (define build-deps
14 |   '())
15 | 


--------------------------------------------------------------------------------
/megaparsack-parser/megaparsack/parser/json.rkt:
--------------------------------------------------------------------------------
  1 | #lang curly-fn racket/base
  2 | 
  3 | (require racket/require
  4 |          (multi-in data [collection applicative monad])
  5 |          (multi-in megaparsack [base combinator text]))
  6 | 
  7 | (provide parse-json-string)
  8 | 
  9 | (define spaces/p (many*/p (hidden/p space/p)))
 10 | 
 11 | (define positive-decimal/p
 12 |   (or/p (try/p (do [i <- (or/p integer/p (pure 0))]
 13 |                    (char/p #\.)
 14 |                    [fs <- (many+/p digit/p)]
 15 |                    (define f (string->number (list->string fs)))
 16 |                    (pure (+ i (/ f (expt 10 (length fs)))))))
 17 |         integer/p))
 18 | 
 19 | (define exponent/p
 20 |   (do (char-ci/p #\e)
 21 |       [negate? <- (or/p (do (char/p #\+) (pure #f))
 22 |                         (do (char/p #\-) (pure #t))
 23 |                         (pure #f))]
 24 |       [i <- integer/p]
 25 |       (pure (if negate? (- i) i))))
 26 | 
 27 | (define positive-number/p
 28 |   (do [n <- positive-decimal/p]
 29 |       [exp <- (or/p (try/p exponent/p)
 30 |                     (pure 0))]
 31 |       (pure (* n (expt 10 exp)))))
 32 | 
 33 | (define number/p
 34 |   (label/p
 35 |    "number"
 36 |    (or/p (do (char/p #\-)
 37 |              [n <- positive-number/p]
 38 |              (pure (- n)))
 39 |          positive-number/p)))
 40 | 
 41 | (define hex-digit/p
 42 |   (or/p (do (char/p    #\0) (pure  0))
 43 |         (do (char/p    #\1) (pure  1))
 44 |         (do (char/p    #\2) (pure  2))
 45 |         (do (char/p    #\3) (pure  3))
 46 |         (do (char/p    #\4) (pure  4))
 47 |         (do (char/p    #\5) (pure  5))
 48 |         (do (char/p    #\6) (pure  6))
 49 |         (do (char/p    #\7) (pure  7))
 50 |         (do (char/p    #\8) (pure  8))
 51 |         (do (char/p    #\9) (pure  9))
 52 |         (do (char-ci/p #\a) (pure 10))
 53 |         (do (char-ci/p #\b) (pure 11))
 54 |         (do (char-ci/p #\c) (pure 12))
 55 |         (do (char-ci/p #\d) (pure 13))
 56 |         (do (char-ci/p #\e) (pure 14))
 57 |         (do (char-ci/p #\f) (pure 15))))
 58 | 
 59 | (define string-char-or-escape/p
 60 |   (or/p (do (char/p #\\)
 61 |             (or/p (char/p #\")
 62 |                   (char/p #\\)
 63 |                   (char/p #\/)
 64 |                   (do (char/p #\b) (pure #\backspace))
 65 |                   (do (char/p #\f) (pure #\page))
 66 |                   (do (char/p #\n) (pure #\newline))
 67 |                   (do (char/p #\r) (pure #\return))
 68 |                   (do (char/p #\t) (pure #\tab))
 69 |                   (do (char/p #\u)
 70 |                       [ns <- (repeat/p 4 hex-digit/p)]
 71 |                       (define code (foldl #{+ %2 (* 16 %1)} 0 ns))
 72 |                       (pure (integer->char code)))))
 73 |         (satisfy/p #{not (char=? #\" %)})))
 74 | 
 75 | (define quoted-string/p
 76 |   (label/p
 77 |    "string"
 78 |    (do (char/p #\")
 79 |        [chars <- (many*/p string-char-or-escape/p)]
 80 |        (char/p #\")
 81 |        (pure (list->string chars)))))
 82 | 
 83 | (define boolean/p
 84 |   (label/p
 85 |    "boolean"
 86 |    (or/p (do (string/p "true")  (pure #t))
 87 |          (do (string/p "false") (pure #f)))))
 88 | 
 89 | (define null/p
 90 |   (do (string/p "null") (pure 'null)))
 91 | 
 92 | (define object-pair/p
 93 |   (do spaces/p
 94 |       [k <- quoted-string/p]
 95 |       spaces/p
 96 |       (char/p #\:)
 97 |       spaces/p
 98 |       [v <- value/p]
 99 |       spaces/p
100 |       (pure (cons (string->symbol k) v))))
101 | 
102 | (define object/p
103 |   (label/p
104 |    "object"
105 |    (do (char/p #\{)
106 |        [pairs <- (many/sep*/p object-pair/p (char/p #\,))]
107 |        spaces/p
108 |        (char/p #\})
109 |        (pure (make-immutable-hasheq (sequence->list pairs))))))
110 | 
111 | (define array/p
112 |   (label/p
113 |    "array"
114 |    (do (char/p #\[)
115 |        [elems <- (many/sep*/p value/p (try/p (do spaces/p (char/p #\,))))]
116 |        spaces/p
117 |        (char/p #\])
118 |        (pure elems))))
119 | 
120 | (define value/p
121 |   (do spaces/p
122 |       (or/p number/p
123 |             quoted-string/p
124 |             boolean/p
125 |             null/p
126 |             object/p
127 |             array/p)))
128 | 
129 | (define entire-value/p
130 |   (do [v <- value/p]
131 |       spaces/p
132 |       eof/p
133 |       (pure v)))
134 | 
135 | (define (parse-json-string str [name 'json])
136 |   (parse-string entire-value/p str name))
137 | 


--------------------------------------------------------------------------------
/megaparsack-test/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '())
 9 | (define build-deps
10 |   '("base"
11 |     "curly-fn-lib"
12 |     "functional-lib"
13 |     "megaparsack-lib"
14 |     "megaparsack-parser"
15 |     "rackunit-lib"
16 |     "rackunit-spec"))
17 | 


--------------------------------------------------------------------------------
/megaparsack-test/tests/megaparsack/base.rkt:
--------------------------------------------------------------------------------
  1 | #lang curly-fn racket/base
  2 | 
  3 | (require data/applicative
  4 |          data/either
  5 |          data/monad
  6 |          megaparsack
  7 |          megaparsack/text
  8 |          rackunit
  9 |          rackunit/spec)
 10 | 
 11 | (describe "or/p"
 12 |   (context "when no parsers consume input"
 13 |     (it "succeeds with the first successful parse"
 14 |       (check-equal? (parse-string (or/p (do eof/p (pure 'eof)) letter/p) "")
 15 |                     (success 'eof))))
 16 | 
 17 |   (it "fails when a parser consumes input and fails"
 18 |     (check-equal? (parse-string (or/p (do letter/p letter/p)
 19 |                                       (pure 'eof))
 20 |                                 "a")
 21 |                   (failure (message (srcloc 'string 1 0 1 1) "end of input" '("letter")))))
 22 | 
 23 |   (it "preserves errors from other branches on success"
 24 |     (check-equal? (parse-string (do (or/p digit/p (pure #\0)) letter/p) "*")
 25 |                   (failure (message (srcloc 'string 1 0 1 1) #\* '("number" "letter"))))
 26 |     (check-equal? (parse-string (do (many/p (char/p #\r)) eof/p) "ra")
 27 |                   (failure (message (srcloc 'string 1 1 2 1) #\a '("'r'" "end of input"))))))
 28 | 
 29 | (describe "fail/p"
 30 |   (it "always fails"
 31 |     (let* ([loc (srcloc #f #f #f #f #f)]
 32 |            [msg (message loc
 33 |                          "something"
 34 |                          (list "something else"))]
 35 |            [p (fail/p msg)])
 36 |       (check-equal? (parse-string p "")
 37 |                     (failure msg)))))
 38 | 
 39 | (describe "noncommittal/p"
 40 |   (it "allows backtracking as if input was not consumed upon success"
 41 |     (check-equal? (parse-string (many/p letter/p #:sep (noncommittal/p space/p))
 42 |                                 "a b c d .")
 43 |                   (success '(#\a #\b #\c #\d))))
 44 | 
 45 |   (it "still consumes input on failure"
 46 |     (check-equal? (parse-string (or/p (noncommittal/p (string/p "ab"))
 47 |                                       (char/p #\a))
 48 |                                 "a")
 49 |                   (failure (message (srcloc 'string 1 0 1 1) "end of input" '("'b'"))))))
 50 | 
 51 | (describe "lookahead/p"
 52 |   (it "succeeds without consuming input"
 53 |     (check-equal? (parse-string (do (lookahead/p (char/p #\a))
 54 |                                     (char/p #\a)
 55 |                                     eof/p)
 56 |                                 "a")
 57 |                   (success (void))))
 58 | 
 59 |   (it "still consumes input on failure"
 60 |     (check-equal? (parse-string (do (lookahead/p (string/p "ab"))
 61 |                                     (char/p #\a))
 62 |                                 "a")
 63 |                   (failure (message (srcloc 'string 1 0 1 1) "end of input" '("'b'"))))))
 64 | 
 65 | (describe "one-of/p"
 66 |   (it "succeeds if any of the provided elements are equal"
 67 |     (check-equal? (parse-string (one-of/p '(#\a #\b)) "a")
 68 |                   (success #\a))
 69 |     (check-equal? (parse-string (one-of/p '(#\a #\b)) "b")
 70 |                   (success #\b)))
 71 | 
 72 |   (it "fails if none of the provided elements are equal"
 73 |     (check-equal? (parse-string (one-of/p '(#\a #\b)) "c")
 74 |                   (failure (message (srcloc 'string 1 0 1 1) #\c '("a" "b"))))))
 75 | 
 76 | (describe "guard/p"
 77 |   (let ([byte/p (label/p "byte" (try/p (guard/p integer/p #{% . < . 256})))])
 78 |     (it "succeeds when the guard predicate is non-#f"
 79 |       (check-equal? (parse-string byte/p "128")
 80 |                     (success 128)))
 81 |     (it "fails when the guard predicate is #f"
 82 |       (check-equal? (parse-string byte/p "1024")
 83 |                     (failure (message (srcloc 'string 1 0 1 4) 1024
 84 |                                       '("byte")))))))
 85 | 
 86 | (describe "list/p"
 87 |   (context "when not given a separator"
 88 |     (define letter-digit-letter/p (list/p letter/p digit/p letter/p))
 89 |     (it "succeeds when given components in sequence"
 90 |       (check-equal? (parse-string letter-digit-letter/p "a1b")
 91 |                     (success (list #\a #\1 #\b))))
 92 |     (it "fails when given too few components"
 93 |       (check-equal? (parse-string letter-digit-letter/p "a1")
 94 |                     (failure (message (srcloc 'string 1 1 2 1)
 95 |                                       "end of input"
 96 |                                       '("letter"))))))
 97 |   (context "when given a separator"
 98 |     (define dotted-letter-digit-letter/p
 99 |       (list/p letter/p digit/p letter/p #:sep (char/p #\.)))
100 |     (it "succeeds when given separated components"
101 |       (check-equal? (parse-string dotted-letter-digit-letter/p "a.1.b")
102 |                     (success (list #\a #\1 #\b))))
103 |     (it "fails when given unseparated components"
104 |       (check-equal? (parse-string dotted-letter-digit-letter/p "a1b")
105 |                     (failure (message (srcloc 'string 1 1 2 1)
106 |                                       #\1 '("'.'")))))))
107 | 
108 | (describe "delay/p"
109 |   (define eval-count 0)
110 |   (define rec/p (or/p (char/p #\.)
111 |                       (list/p (char/p #\a)
112 |                               (delay/p (begin (set! eval-count (add1 eval-count))
113 |                                               rec/p)))))
114 | 
115 |   (it "delays evaluation of its argument"
116 |     (check-equal? eval-count 0))
117 | 
118 |   (it "allows a parser to be self-recursive"
119 |     (check-equal? (parse-string rec/p "aaa.")
120 |                   (success '(#\a (#\a (#\a #\.))))))
121 | 
122 |   (it "only evaluates its argument once"
123 |     (check-equal? (parse-string rec/p "aaa.")
124 |                   (success '(#\a (#\a (#\a #\.)))))
125 |     (check-equal? eval-count 1)))
126 | 
127 | (describe "many/p"
128 |   (context "when given a letter parser"
129 |     (define many-letters/p (many/p letter/p))
130 |     (it "succeeds when parsing multiple letters"
131 |       (check-equal? (parse-string many-letters/p "abc")
132 |                     (success (list #\a #\b #\c))))
133 |     (it "succeeds when parsing one letter"
134 |       (check-equal? (parse-string many-letters/p "a")
135 |                     (success (list #\a))))
136 |     (it "succeeds with an empty list when unable to parse"
137 |       (check-equal? (parse-string many-letters/p "123")
138 |                     (success (list)))))
139 |   (context "when given a letter parser and dot separator"
140 |     (define many-dotted-letters/p (many/p letter/p #:sep (char/p #\.)))
141 |     (it "succeeds with only letters when parsing dotted letters"
142 |       (check-equal? (parse-string many-dotted-letters/p "a.b.c")
143 |                     (success (list #\a #\b #\c)))))
144 |   (context "when given a letter parser and a maximum count of three"
145 |     (define at-most-three-letters/p (many/p letter/p #:max 3))
146 |     (it "succeeds when parsing three letters"
147 |       (check-equal? (parse-string at-most-three-letters/p "abc")
148 |                     (success (list #\a #\b #\c))))
149 |     (it "succeeds when parsing fewer than three letters"
150 |       (check-equal? (parse-string at-most-three-letters/p "a")
151 |                     (success (list #\a))))
152 |     (it "only consumes three letters when parsing four letters"
153 |       (check-equal? (parse-string at-most-three-letters/p "abcd")
154 |                     (success (list #\a #\b #\c)))))
155 |   (context "when given a maximum count of zero"
156 |     (it "consumes no input"
157 |       (check-equal? (parse-string (many/p letter/p #:max 0) "abc")
158 |                     (success (list)))))
159 |   (context "when given a letter parser and a minimum count of three"
160 |     (define at-least-three-letters/p (many/p letter/p #:min 3))
161 |     (it "succeeds when parsing three letters"
162 |       (check-equal? (parse-string at-least-three-letters/p "abc")
163 |                     (success (list #\a #\b #\c))))
164 |     (it "succeeds when parsing four letters"
165 |       (check-equal? (parse-string at-least-three-letters/p "abcd")
166 |                     (success (list #\a #\b #\c #\d))))
167 |     (it "fails when parsing two letters"
168 |       (check-equal? (parse-string at-least-three-letters/p "ab")
169 |                     (failure (message (srcloc 'string 1 1 2 1)
170 |                                       "end of input"
171 |                                       '("letter"))))))
172 |   (context "when given a minimum count of 2 and a maximum count of 4"
173 |     (define two-to-four-letters/p (many/p letter/p #:min 2 #:max 4))
174 |     (it "succeeds when parsing two letters"
175 |       (check-equal? (parse-string two-to-four-letters/p "ab")
176 |                     (success (list #\a #\b))))
177 |     (it "succeeds when parsing four letters"
178 |       (check-equal? (parse-string two-to-four-letters/p "abcd")
179 |                     (success (list #\a #\b #\c #\d))))
180 |     (it "fails when parsing one letter"
181 |       (check-equal? (parse-string two-to-four-letters/p "a")
182 |                     (failure (message (srcloc 'string 1 0 1 1)
183 |                                       "end of input"
184 |                                       '("letter")))))
185 |     (it "only consumes four letters when given five"
186 |       (check-equal? (parse-string two-to-four-letters/p "abcde")
187 |                     (success (list #\a #\b #\c #\d))))))
188 | 
189 | (describe "parser parameters"
190 |   (define param (make-parser-parameter #f))
191 | 
192 |   (it "returns the initial value before being set"
193 |     (check-equal? (parse-string (param) "")
194 |                   (success #f)))
195 | 
196 |   (it "returns the set value after being set"
197 |     (check-equal? (parse-string (do (param #t) (param)) "")
198 |                   (success #t)))
199 | 
200 |   (it "unsets the value when backtracking"
201 |     (check-equal? (parse-string (or/p (do (param #t) any-char/p)
202 |                                       (param))
203 |                                 "")
204 |                   (success #f)))
205 | 
206 |   (describe "parameterize/p"
207 |     (it "sets the value with a local extent"
208 |       (check-equal? (parse-string (do [(list a b) <- (parameterize/p ([param 1])
209 |                                                        (do [a <- (param)]
210 |                                                            (param 2)
211 |                                                            [b <- (param)]
212 |                                                            (pure (list a b))))]
213 |                                       [c <- (param)]
214 |                                       (pure (list a b c)))
215 |                                   "")
216 |                     (success (list 1 2 #f))))))
217 | 


--------------------------------------------------------------------------------
/megaparsack-test/tests/megaparsack/contract.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require data/applicative
 4 |          data/monad
 5 |          megaparsack
 6 |          megaparsack/text
 7 |          racket/list
 8 |          racket/contract
 9 |          rackunit
10 |          rackunit/spec)
11 | 
12 | (describe "parser/c"
13 |   (it "signals a contract violation on invalid input tokens"
14 |     (define exn (with-handlers ([exn:fail? values])
15 |                   (parse (char/p #\a) (list (syntax-box #f (srcloc #f #f #f #f #f))))))
16 |     (check-pred exn:fail:contract:blame? exn)
17 |     (define blame (exn:fail:contract:blame-object exn))
18 |     (check-equal? (blame-contract blame)
19 |                   '(-> char? (parser/c char? char?)))
20 |     (check-pred blame-swapped? blame)
21 |     (check-equal? (first (blame-context blame))
22 |                   "the input to"))
23 | 
24 |   (it "signals a contract violation on invalid results"
25 |     (define/contract foo/p
26 |       (parser/c any/c string?)
27 |       (do void/p (pure #f)))
28 |     (define exn (with-handlers ([exn:fail? values])
29 |                   (parse foo/p '())))
30 |     (check-pred exn:fail:contract:blame? exn)
31 |     (define blame (exn:fail:contract:blame-object exn))
32 |     (check-equal? (blame-contract blame)
33 |                   '(parser/c any/c string?))
34 |     (check-pred blame-original? blame)
35 |     (check-equal? (first (blame-context blame))
36 |                   "the result of")))
37 | 


--------------------------------------------------------------------------------
/megaparsack-test/tests/megaparsack/parser/json.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require megaparsack
 4 |          megaparsack/parser/json
 5 |          racket/function
 6 |          rackunit
 7 |          rackunit/spec)
 8 | 
 9 | (describe "parse-json-string"
10 |   (it "parses floating-point numbers"
11 |     (check-= (parse-result! (parse-json-string "42")) 42 0.001)
12 |     (check-= (parse-result! (parse-json-string ".3")) 0.3 0.001)
13 |     (check-= (parse-result! (parse-json-string "12.75")) 12.75 0.001)
14 |     (check-= (parse-result! (parse-json-string "1e+2")) 100 0.001)
15 |     (check-= (parse-result! (parse-json-string "1.2e-3")) 0.0012 0.00001))
16 | 
17 |   (it "parses strings"
18 |     (check-equal? (parse-result! (parse-json-string "\"hello, world\"")) "hello, world")
19 |     (check-equal? (parse-result! (parse-json-string "\"this (\\\") is a double quote\""))
20 |                   "this (\") is a double quote")
21 |     (check-equal? (parse-result! (parse-json-string "\"escape \\u1a3f!\""))
22 |                   "escape \u1a3f!"))
23 | 
24 |   (it "parses booleans"
25 |     (check-true (parse-result! (parse-json-string "true")))
26 |     (check-false (parse-result! (parse-json-string "false"))))
27 | 
28 |   (it "parses null as the symbol 'null"
29 |     (check-equal? (parse-result! (parse-json-string "null")) 'null))
30 | 
31 |   (it "parses arrays as lists"
32 |     (check-equal? (parse-result! (parse-json-string "[1, 2, 3]")) '(1 2 3))
33 |     (check-equal? (parse-result! (parse-json-string "[true, null, 42]")) '(#t null 42)))
34 | 
35 |   (it "parses objects as hasheqs with symbol keys"
36 |     (check-equal? (parse-result! (parse-json-string "{ \"a\": 1, \"b\": true }"))
37 |                   #hasheq((a . 1) (b . #t))))
38 | 
39 |   (it "fails when it cannot parse the entire string"
40 |     (check-exn #rx"unexpected: t\n  expected: end of input"
41 |                (thunk (parse-result! (parse-json-string "{}true"))))))
42 | 


--------------------------------------------------------------------------------
/megaparsack-test/tests/megaparsack/text.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require data/either
 4 |          megaparsack
 5 |          megaparsack/text
 6 |          rackunit
 7 |          rackunit/spec)
 8 | 
 9 | (describe "parse-syntax-string"
10 |   (it "uses source location information from the provided syntax object"
11 |     (let* ([stx #'"hello, world!"]
12 |            [result (parse-result! (parse-syntax-string (syntax/p (string/p "hello")) stx))])
13 |       (check-equal? (syntax-source stx) (syntax-source result))
14 |       (check-equal? (syntax-line stx) (syntax-line result))
15 |       (check-equal? (syntax-column stx) (syntax-column result))
16 |       (check-equal? (syntax-position stx) (syntax-position result))
17 |       (check-equal? (syntax-span result) 5))))
18 | 
19 | (describe "char-between/p"
20 |   (it "parses a single character"
21 |     (check-equal? (parse-string (char-between/p #\b #\f) "bcdef") (success #\b)))
22 |   (it "parses only characters between the given bounds"
23 |     (check-equal? (parse-string (char-between/p #\b #\f) "a")
24 |                   (failure (message (srcloc 'string 1 0 1 1) #\a
25 |                                     '("a character between 'b' and 'f'"))))
26 |     (check-equal? (parse-string (char-between/p #\b #\f) "g")
27 |                   (failure (message (srcloc 'string 1 0 1 1) #\g
28 |                                     '("a character between 'b' and 'f'"))))))
29 | 
30 | (describe "char-in/p"
31 |   (it "parses a single character"
32 |     (check-equal? (parse-string (char-in/p "aeiou") "a") (success #\a)))
33 |   (it "parses only characters in the given string"
34 |     (check-equal? (parse-string (char-in/p "aeiou") "b")
35 |                   (failure (message (srcloc 'string 1 0 1 1) #\b
36 |                                     '("'a'" "'e'" "'i'" "'o'" "'u'"))))))
37 | 
38 | (describe "string-ci/p"
39 |   (it "parses a string, ignoring case, and returns the input string"
40 |     (check-equal? (parse-string (string-ci/p "LDA") "lda") (success "lda"))
41 |     (check-equal? (parse-string (string-ci/p "lda") "LDA") (success "LDA"))
42 |     (check-equal? (parse-string (string-ci/p "LDA") "lDa") (success "lDa"))
43 |     (check-equal? (parse-string (string-ci/p "LDA") "LdA") (success "LdA"))))
44 | 


--------------------------------------------------------------------------------
/megaparsack/info.rkt:
--------------------------------------------------------------------------------
 1 | #lang info
 2 | 
 3 | (define version "1.8")
 4 | 
 5 | (define collection 'multi)
 6 | 
 7 | (define deps
 8 |   '("base"
 9 |     ["megaparsack-lib" #:version "1.8"]
10 |     ["megaparsack-doc" #:version "1.8"]
11 |     ["megaparsack-parser" #:version "1.8"]
12 |     ["megaparsack-parser-tools" #:version "1.8"]))
13 | (define build-deps
14 |   '())
15 | 
16 | (define implies
17 |   '("megaparsack-lib"
18 |     "megaparsack-doc"
19 |     "megaparsack-parser"
20 |     "megaparsack-parser-tools"))
21 | 


--------------------------------------------------------------------------------