├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── README.md ├── megaparsack-doc ├── info.rkt └── scribblings │ ├── info.rkt │ ├── megaparsack.scrbl │ └── megaparsack │ ├── basics.scrbl │ ├── choice.scrbl │ ├── differences-from-parsack.scrbl │ ├── reference.scrbl │ ├── state.scrbl │ ├── syntax.scrbl │ └── util.rkt ├── megaparsack-lib ├── info.rkt └── megaparsack │ ├── base.rkt │ ├── combinator.rkt │ ├── main.rkt │ └── text.rkt ├── megaparsack-parser-tools ├── info.rkt └── megaparsack │ └── parser-tools │ └── lex.rkt ├── megaparsack-parser ├── info.rkt └── megaparsack │ └── parser │ └── json.rkt ├── megaparsack-test ├── info.rkt └── tests │ └── megaparsack │ ├── base.rkt │ ├── contract.rkt │ ├── parser │ └── json.rkt │ └── text.rkt └── megaparsack └── info.rkt /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: [push] 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | fail-fast: false 8 | matrix: 9 | racket-version: [ '6.12', '7.0', '7.9', '8.0', stable ] 10 | steps: 11 | - uses: actions/checkout@v2 12 | with: { path: repo } 13 | - uses: Bogdanp/setup-racket@v1.5 14 | with: 15 | version: ${{ matrix.racket-version }} 16 | dest: '$GITHUB_WORKSPACE/racket' 17 | sudo: never 18 | - name: install 19 | run: raco pkg install --installation --auto --link 20 | repo/megaparsack-{lib,parser,parser-tools,doc,test} 21 | - name: test 22 | run: raco test -ep megaparsack-{lib,parser,parser-tools,test} 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | compiled/ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2016, Alexis King 4 | 5 | Permission to use, copy, modify, and/or distribute this software 6 | for any purpose with or without fee is hereby granted, provided 7 | that the above copyright notice and this permission notice appear 8 | in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 11 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 12 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 13 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 14 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA 15 | OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 16 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 | PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # megaparsack [![Build Status](https://github.com/lexi-lambda/megaparsack/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/lexi-lambda/megaparsack/actions/workflows/build.yml) 2 | 3 | This library implements a set of **practical parser combinators** inspired by libraries like [parsec][parsec], [megaparsec][megaparsec], and [parsack][parsack]. It can be used to build parsers for arbitrary input data, and it includes built-in facilities for parsing textual data and tokens produced by [`parser-tools/lex`][parser-tools/lex]. 4 | 5 | To install it, simply install the `megaparsack` package: 6 | 7 | ``` 8 | $ raco pkg install megaparsack 9 | ``` 10 | 11 | [For more information, see the full documentation.][megaparsack-doc] 12 | 13 | [megaparsack-doc]: http://docs.racket-lang.org/megaparsack/ 14 | [megaparsec]: https://hackage.haskell.org/package/megaparsec 15 | [parsack]: http://docs.racket-lang.org/parsack/ 16 | [parsec]: https://hackage.haskell.org/package/parsec 17 | [parser-tools/lex]: http://docs.racket-lang.org/parser-tools/Lexers.html 18 | -------------------------------------------------------------------------------- /megaparsack-doc/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '()) 9 | (define build-deps 10 | '("base" 11 | "functional-doc" 12 | "functional-lib" 13 | ["megaparsack-lib" #:version "1.8"] 14 | "megaparsack-parser-tools" 15 | "parser-tools-doc" 16 | "parser-tools-lib" 17 | "racket-doc" 18 | "scribble-lib")) 19 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define scribblings 4 | '(["megaparsack.scrbl" (multi-page) (parsing-library)])) 5 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require (for-label data/monad)) 4 | 5 | @title{Megaparsack: Practical Parser Combinators} 6 | 7 | @author{@author+email["Alexis King" "lexi.lambda@gmail.com"]} 8 | 9 | @defmodule[megaparsack] 10 | 11 | Megaparsack is a @emph{parser combinator} library: a composable set of simple parsers that can be used 12 | to create larger parsing systems that can parse arbitrary grammars, including context-sensitive ones. 13 | Megaparsack uses the @racket[gen:monad] generic interface to provide a uniform interface to sequence 14 | and compose different parsers using a base set of primitives. 15 | 16 | @table-of-contents[] 17 | 18 | @include-section["megaparsack/basics.scrbl"] 19 | @include-section["megaparsack/choice.scrbl"] 20 | @include-section["megaparsack/syntax.scrbl"] 21 | @include-section["megaparsack/state.scrbl"] 22 | @include-section["megaparsack/reference.scrbl"] 23 | @include-section["megaparsack/differences-from-parsack.scrbl"] 24 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/basics.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require "util.rkt") 4 | 5 | @title[#:tag "parsing-basics"]{Parsing Basics} 6 | 7 | Megaparsack is a library for manipulating @deftech{parsers}, which are, very simply, functions that 8 | operate on streams of tokens. This is very broad: the tokens in question can simply be characters in a 9 | string, they can be tokens produced as the result of a lexer, they can be syntax objects, or they can 10 | even be completely arbitrary data. 11 | 12 | What’s special about parsers is that they can be @emph{sequenced}—that is, multiple parsers can be 13 | chained together to make a larger parser. For example, to make a parser that parses the string 14 | @racket["ab"], you might compose two parsers that parse the characters @racket[#\a] and @racket[#\b] 15 | individually. 16 | 17 | @section[#:tag "getting-started"]{Getting started with parsers} 18 | 19 | To get started, require the @racketmodname[megaparsack] and @racketmodname[megaparsack/text] 20 | libraries. 21 | 22 | @(racketinput 23 | (require #,(racketmodname megaparsack) #,(racketmodname megaparsack/text))) 24 | 25 | This will import the basic parser functions, as well as some built-in parsers for parsing textual 26 | data. Now, you can use the @racket[parse-string] function along with basic parsers to parse values 27 | from strings. Let’s start by parsing an integer: 28 | 29 | @(parser-interaction 30 | (eval:check (parse-string integer/p "42") (success 42))) 31 | 32 | Since the parser was successful, it returns a @racket[success] value. The @racket[parse-string] 33 | function returns an @functech{either} value that represents success and failure. For example, take a 34 | look at what would happen when a parse fails: 35 | 36 | @(parser-interaction 37 | (parse-string integer/p "not an integer")) 38 | 39 | When a parse fails, it returns a @racket[failure] value that encodes some information about what 40 | caused the parser to error. You can convert that information to a human-readable error message using 41 | the @racket[parse-error->string] function: 42 | 43 | @(parser-interaction 44 | (map-failure parse-error->string (parse-string integer/p "not an integer"))) 45 | 46 | You can also assert that a parse will succeed and just get the result out by using the 47 | @racket[parse-result!] function, which will throw an @racket[exn:fail:read:megaparsack] exception 48 | when the parser fails. 49 | 50 | @(parser-interaction 51 | (eval:check (parse-result! (parse-string integer/p "42")) 42) 52 | (eval:error (parse-result! (parse-string integer/p "not an integer")))) 53 | 54 | You may notice that the error message includes some useful information. Specifically, megaparsack will 55 | attempt to provide the following information to the user whenever a parse fails: 56 | 57 | @itemlist[ 58 | @item{the source location of the error that caused the parse to fail} 59 | @item{the token that was “unexpected”, which caused the parse to fail} 60 | @item{a set of values that were “expected”, which would have been accepted as valid values for the 61 | parse}] 62 | 63 | In the above case, the parser reports that it expected an integer, but it encountered the character 64 | @tt{n}, which obviously isn’t a valid piece of an integer. 65 | 66 | @section[#:tag "parsing-textual-data"]{Parsing textual data} 67 | 68 | The @racket[integer/p] parser, as would be expected, parses a single integer. However, this isn’t very 69 | useful on its own—most of the time, you will want to parse something much more complicated than that. 70 | However, it is a useful building block for creating larger parsers. Let’s look at some other “building 71 | block” parsers that work with strings. 72 | 73 | The @racket[letter/p], @racket[digit/p], and @racket[space/p] parsers parse a single letter, digit, or 74 | whitespace character, respectively: 75 | 76 | @margin-note{ 77 | Note that these parsers succeed even when only part of the input is consumed. This is important when 78 | combining parsers together, but if you want to ensure a parser parses the entire input, you can use 79 | @racket[eof/p].} 80 | 81 | @(parser-interaction 82 | (eval:check (parse-string letter/p "hello") (success #\h)) 83 | (eval:check (parse-string digit/p "123") (success #\1)) 84 | (eval:check (parse-string space/p " ") (success #\space))) 85 | 86 | The @racket[char/p] function creates a parser that parses a single character: 87 | 88 | @(parser-interaction 89 | (eval:check (parse-string (char/p #\a) "abc") (success #\a)) 90 | (eval:error (parse-result! (parse-string (char/p #\a) "xyz")))) 91 | 92 | It may not make very much sense why the @racket[char/p] parser is useful—after all, it just seems to 93 | return itself. Indeed, in these contrived examples, it’s not very useful at all! However, it becomes 94 | @emph{extremely} important when combining multiple parsers together. 95 | 96 | @section[#:tag "sequencing-parsers"]{Sequencing parsers} 97 | 98 | @(define-parser-interaction sequencing-interaction close-sequencing!) 99 | 100 | All @tech{parsers} are @functech{monads}, so it’s possible to use @racket[chain] and @racket[do] from 101 | @racketmodname[data/monad] to combine multiple parsers together to create a bigger parser. For 102 | example, let’s create a parser that parses the letters @tt{a} and @tt{b} in sequence: 103 | 104 | @(sequencing-interaction 105 | (eval:alts (require @#,racketmodname[data/monad]) 106 | (void)) 107 | (define ab/p 108 | (do (char/p #\a) 109 | (char/p #\b)))) 110 | 111 | Now we can use our new parser just like any other: 112 | 113 | @(sequencing-interaction 114 | (eval:check (parse-string ab/p "ab") (success #\b)) 115 | (eval:error (parse-result! (parse-string ab/p "ac")))) 116 | 117 | The parser succeeds when we supply it with the string @racket["ab"], but it fails when it doesn’t 118 | match, and we automatically get a pretty good error message. 119 | 120 | One thing to note is that the result of the parser is not particularly meaningful—it’s just 121 | @racket[#\b]. That’s because the last parser in the @racket[do] block was @racket[(char/p #\b)], so 122 | the result of @racket[ab/p] is just the result of its final parser. If we wanted to, we could change 123 | the result to be whatever we wanted (but only on a successful parse) by returning our own value at the 124 | end of the @racket[do] block: 125 | 126 | @(sequencing-interaction 127 | (define ab*/p 128 | (do (char/p #\a) 129 | (char/p #\b) 130 | (pure "success!")))) 131 | 132 | We need the @racket[pure] wrapper in order to properly “lift” our arbitrary value into the context of 133 | a parser. Now we can run our new parser and get back our custom value when it succeeds: 134 | 135 | @(sequencing-interaction 136 | (eval:check (parse-string ab*/p "ab") (success "success!"))) 137 | 138 | This parser is a little silly, but we can use these concepts to implement parsers that might actually 139 | be useful. For example, you might need to parser two integers, separated by a comma, then add them 140 | together. Using the monadic parser interface, this is extremely simple: 141 | 142 | @(sequencing-interaction 143 | (define add-two-ints/p 144 | (do [x <- integer/p] 145 | (char/p #\,) 146 | [y <- integer/p] 147 | (pure (+ x y))))) 148 | 149 | This definition is a little bit more complicated because we are using the results of the two integer 150 | parsers in our sequence, so we use the @racket[[_a <- _b]] syntax to “pull out” the result of each 151 | parser and bind it to a variable. Then we can add the two results together at the end. Actually using 152 | this parser works just as intended: 153 | 154 | @(sequencing-interaction 155 | (eval:check (parse-string add-two-ints/p "7,12") (success 19))) 156 | 157 | Using this technique, it’s possible to build up fairly complex parsers from small, self-contained 158 | units. 159 | 160 | @(close-sequencing!) 161 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/choice.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require "util.rkt") 4 | 5 | @(define-parser-interaction choice-interaction close-choice!) 6 | 7 | @title[#:tag "choice" #:style 'toc]{Parsers with Choice} 8 | 9 | Most grammars are not completely static—usually, there is an element of @emph{choice}. For example, 10 | when parsing a boolean, a valid value is @tt{true} or @tt{false}. Even more complicated, when parsing 11 | a list of elements, a valid input might be @emph{any number of booleans}, all next to one another. To 12 | handle these kinds of grammars, a parser can be provided with multiple paths, each of which are valid. 13 | 14 | @local-table-of-contents[] 15 | 16 | @section[#:tag "parsing-branching"]{Providing multiple paths} 17 | 18 | To create a parser with multiple possibilities, use the @racket[or/p] combinator. It accepts any 19 | number of parsers, and it tries them one at a time until one of them matches. For example, let’s 20 | consider a parser that parses either the string @racket["true"] or @racket["false"], then returns the 21 | value as a Racket boolean: 22 | 23 | @(choice-interaction 24 | (define true/p 25 | (do (string/p "true") 26 | (pure #t))) 27 | (define false/p 28 | (do (string/p "false") 29 | (pure #f))) 30 | (define boolean/p 31 | (or/p true/p 32 | false/p))) 33 | 34 | By using @racket[or/p], we’ve created a choice point, where the parser will try each path before 35 | giving up. If none of the paths match, the parser will still fail, but if any of the paths match, it 36 | will consider the parse successful and return. To demonstrate that this works, we can use our new 37 | parser on some strings: 38 | 39 | @(choice-interaction 40 | (eval:check (parse-string boolean/p "true") (success #t)) 41 | (eval:check (parse-string boolean/p "false") (success #f))) 42 | 43 | The @racket[or/p] combinator also automatically cooperates with error handling to provide helpful 44 | error messages when parsing fails: 45 | 46 | @(choice-interaction 47 | (eval:error (parse-result! (parse-string boolean/p "not a boolean")))) 48 | 49 | Note that the error includes all the possible values that would have been considered valid at the 50 | point that the parser failed. 51 | 52 | Remember that the @racket[or/p] combinator is not magic: it does not attempt to predict which parse 53 | will be valid, and it does not even try to look ahead to see which parse will be the longest. This can 54 | cause problems when two different parses could @emph{both} succeed—@racket[or/p] will just pick the 55 | first one: 56 | 57 | @(parser-interaction 58 | (define overlapping/p 59 | (or/p (string/p "hello") 60 | (string/p "hello, world!"))) 61 | (eval:check (parse-string overlapping/p "hello, world!") (success "hello"))) 62 | 63 | Just like ordinary boolean @racket[or], keep in mind that order does matter with @racket[or/p]. 64 | 65 | @subsection{Parsing ambiguous grammars} 66 | 67 | So, if @racket[or/p] does not perform any lookahead, how exactly does it choose between parsers? It 68 | might @emph{seem} like it tries each parser completely, then backtracks when any of them fail, but 69 | this is not entirely true—consider the parser above, fixed so the longest match is first: 70 | 71 | @(choice-interaction 72 | (define overlapping/p 73 | (or/p (string/p "hello, world!") 74 | (string/p "hello")))) 75 | 76 | You might expect that, if the first match fails, it will try the second one, but in practice, this 77 | doesn’t actually work: 78 | 79 | @(choice-interaction 80 | (eval:check (parse-string overlapping/p "hello, world!") (success "hello, world!")) 81 | (eval:error (parse-result! (parse-string overlapping/p "hello")))) 82 | 83 | What gives? Take a close look at the error message: it is expecting the rest of @tt{hello, world!}, 84 | but obviously we only gave it @tt{hello}. Why isn’t the parser backtracking? Well, megaparsack 85 | actually does not backtrack by default. Instead, it implements a single-character lookahead: it tries 86 | to parse the first token from each branch, and if it succeeds, it @emph{commits} to that path. 87 | 88 | This means that, since part of the @tt{hello, world} parse was successful, the parser has already 89 | committed to that branch and will not try any of the other options. This turns out to provide far 90 | superior error reporting because it reports to the user precisely where the error occurred, not 91 | somewhere much earlier in the parse. However, this obviously causes problems in this case where the 92 | parse is @emph{ambiguous}, or more generally, the choice cannot be determined by a single character 93 | of lookahead. 94 | 95 | To solve this by allowing the parser to backtrack, use the @racket[try/p] combinator, which converts 96 | a parser into one that backtracks upon failure. We can use this to solve our issue with our parser: 97 | 98 | @(choice-interaction 99 | (define backtracking-overlapping/p 100 | (or/p (try/p (string/p "hello, world!")) 101 | (string/p "hello"))) 102 | (eval:check (parse-string backtracking-overlapping/p "hello, world!") (success "hello, world!")) 103 | (eval:check (parse-string backtracking-overlapping/p "hello") (success "hello"))) 104 | 105 | All that @racket[try/p] does is disable the “committing” behavior mentioned earlier: instead of 106 | committing to a particular path once any of the parse succeeds, any error that occurs within the 107 | parser provided to @racket[try/p] is non-fatal, and the parser will backtrack and try the next 108 | alternative. 109 | 110 | @subsection{Backtracking with caution} 111 | 112 | In this case, since the parse is truly ambiguous based on the first character, @racket[try/p] is the 113 | correct approach. Note that the error messages are still helpful upon failure: 114 | 115 | @(choice-interaction 116 | (eval:error (parse-result! (parse-string backtracking-overlapping/p "not hello")))) 117 | 118 | However, be deliberate about where you put @racket[try/p] because it is very easy to end up with a 119 | parser that provides completely useless error messages because all errors simply backtrack instead of 120 | failing fast and reporting the real problem. For an example of this, consider a parser that parses an 121 | integer or a boolean, depending on a label provided first: 122 | 123 | @(choice-interaction 124 | (define the-labeled-integer/p 125 | (do (string/p "the integer: ") 126 | integer/p)) 127 | (define the-labeled-boolean/p 128 | (do (string/p "the boolean: ") 129 | boolean/p))) 130 | 131 | It might be tempting to use @racket[try/p] here because we know that the integer case might fail. 132 | Therefore, you might write the parser like this: 133 | 134 | @(choice-interaction 135 | (define try-labeled/p 136 | (or/p (try/p the-labeled-integer/p) 137 | the-labeled-boolean/p))) 138 | 139 | This parser seems innocuous enough, right? It even works successfully: 140 | 141 | @(choice-interaction 142 | (eval:check (parse-string try-labeled/p "the integer: 42") (success 42)) 143 | (eval:check (parse-string try-labeled/p "the boolean: false") (success #f))) 144 | 145 | But there is a lurking problem with this parser, and that’s its error messages. Consider a mismatch, 146 | when we provide the @tt{the integer:} label but do not actually provide an integer: 147 | 148 | @(choice-interaction 149 | (eval:error (parse-result! (parse-string try-labeled/p "the integer: false")))) 150 | 151 | Oops. What happened? Well, the parser tried to parse an integer, but it failed, so it backtracked. It 152 | then tried to parse a boolean, and it parsed the @tt{the}, but then it failed, too, so it reported an 153 | error message. To a user, though, that error message is totally useless. The @emph{actual} issue is 154 | that they should have provided an integer, but instead provided a boolean. Unfortunately, the 155 | overzealous backtracking has eliminated that information. 156 | 157 | This is tricky, because we can’t just drop the @racket[try/p]—since both cases share @tt{the}, the 158 | parse is ambiguous without a little bit of lookahead. In order to fix this, what we really want to 159 | do is factor out the common @tt{the}, which will allow us to eliminate the @racket[try/p] altogether: 160 | 161 | @(choice-interaction 162 | (define labeled-integer/p 163 | (do (string/p "integer: ") 164 | integer/p)) 165 | (define labeled-boolean/p 166 | (do (string/p "boolean: ") 167 | boolean/p)) 168 | (define labeled/p 169 | (do (string/p "the ") 170 | (or/p labeled-integer/p 171 | labeled-boolean/p)))) 172 | 173 | Since we’ve removed all of the uses of @racket[try/p], now the parser can provide much more precise 174 | error messages when we provide invalid input. 175 | 176 | @(choice-interaction 177 | (eval:error (parse-result! (parse-string labeled/p "the integer: false")))) 178 | 179 | @section{Parsing sequences} 180 | 181 | Using @racket[or/p], it is possible to choose between alternatives when parsing, but what if a 182 | particular grammar permits @emph{any number of} elements in sequence? For that, you can use the 183 | @racket[many/p] combinator. It accepts a parser and attempts to parse it over and over again until 184 | it fails. For example, here is a parser that parses any number of occurrences of the letter @tt{a}: 185 | 186 | @(parser-interaction 187 | (eval:check (parse-string (many/p (char/p #\a)) "") (success '())) 188 | (eval:check (parse-string (many/p (char/p #\a)) "a") (success '(#\a))) 189 | (eval:check (parse-string (many/p (char/p #\a)) "aaaa") (success '(#\a #\a #\a #\a)))) 190 | 191 | This allows creating grammars that parse arbitrary numbers of values. The @racket[many/p] combinator 192 | accepts an optional keyword argument @racket[#:min] to specify a minimum number of values to 193 | parse. This can be used to parse two integers separated by some amount of whitespace, for example. 194 | 195 | @(parser-interaction 196 | (define two-integers/p 197 | (do [x <- integer/p] 198 | (many/p space/p #:min 1) 199 | [y <- integer/p] 200 | (pure (list x y)))) 201 | (eval:check (parse-string two-integers/p "13 102") (success '(13 102)))) 202 | 203 | Perhaps even more frequently, though, you may want to parse some number of values separated by some 204 | delimiter. For example, perhaps you want to parse a whole list of integers separated by commas. That 205 | can be accomplished by passing a parser for the @racket[#:sep] argument to @racket[many/p]. 206 | 207 | @(parser-interaction 208 | (define many-integers/p 209 | (many/p integer/p #:sep (char/p #\,))) 210 | (eval:check (parse-string many-integers/p "1,2,3,5,8,13") (success '(1 2 3 5 8 13)))) 211 | 212 | Often an unbounded number of values is undesirable: some limit is desired. The @racket[#:max] 213 | argument to @racket[many/p] allows specifying a max number of values to parse. For example, we may 214 | not wish to allow more than five comma-separated integers. 215 | 216 | @(parser-interaction 217 | (define at-most-five-integers/p 218 | (many/p integer/p #:sep (char/p #\,) #:max 5)) 219 | (eval:check (parse-string at-most-five-integers/p "1,2,3") (success '(1 2 3))) 220 | (eval:check (parse-string at-most-five-integers/p "1,2,3,5,8,13") (success '(1 2 3 5 8)))) 221 | 222 | @(close-choice!) 223 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/differences-from-parsack.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require "util.rkt") 4 | 5 | @title[#:tag "differences-from-parsack"]{Appendix: Parsack vs Megaparsack} 6 | 7 | Megaparsack is not the only port of Parsec to Racket, and in fact, it isn’t even the first. The 8 | original Parsec port is @hyperlink["http://docs.racket-lang.org/parsack/index.html"]{Parsack}. When 9 | looking for a parser combinator library, you might be unsure which one to pick, so this attempts to 10 | provide an unbiased comparison between the two libraries. 11 | 12 | Without getting into the nitty gritty details, here’s a quick overview of the differences: 13 | 14 | @itemlist[ 15 | @item{Megaparsack is about two and a half years newer, initially released in the spring of 2016, 16 | while Parsack was released in the fall of 2013.} 17 | 18 | @item{Probably the most significant difference in the two libraries’ APIs is that Megaparsack can 19 | parse arbitrary tokens as input, while Parsack is specialized to exclusively operate on text or 20 | bytes. This allows Megaparsack to operate after an initial lexing phase (such as using 21 | @racketmodname[parser-tools/lex] with @racketmodname[megaparsack/parser-tools/lex]), while 22 | Parsack is designed to exclusively parse input directly.} 23 | 24 | @item{Megaparsack supports the production of syntax objects from parsers automatically, whereas 25 | Parsack does not.} 26 | 27 | @item{A less impactful difference but still significant design difference is that Megaparsack 28 | implements the @racket[gen:functor], @racket[gen:applicative], and @racket[gen:monad] 29 | interfaces from the @seclink["top" #:doc '(lib "scribblings/data/functional.scrbl")]{ 30 | @tt{functional}} library, while Parsack is entirely monomorphic and provides its own 31 | sequencing and composition operators.} 32 | 33 | @item{Megaparsack provides contracts on parsers, while Parsack only includes a simple predicate. This 34 | is more important for Megaparsack because of the different token types that parsers can accept, 35 | but it’s also useful in general for denoting what parsers can produce.} 36 | 37 | @item{As a consequence of the above four differences, Parsack is currently @italic{considerably} 38 | faster than Megaparsack, @bold{by more than an order of magnitude}.} 39 | 40 | @item{Megaparsack’s documentation is better than Parsack’s, and it includes a tutorial-style guide.} 41 | 42 | @item{Megaparsack’s naming conventions are somewhat closer to idiomatic Racket, whereas Parsack’s 43 | names are more directly ported from Haskell.} 44 | 45 | @item{Megaparsack provides @tech{parser parameters} for maintaining arbitrarily many distinct cells 46 | of user-defined parser state, whereas Parsack only provides a single cell of state.} 47 | 48 | @item{Both Megaparsack and Parsack use the same general model for parsing, backtracking, and error 49 | reporting, which is adapted from the common parent, Parsec.} 50 | 51 | @item{Both Megaparsack and Parsack expose a monadic interface for composing and sequencing parsers 52 | together, and they both provide a minimal set of combinators for producing new parsers from 53 | primitives.}] 54 | 55 | My general recommendation is to use Megaparsack unless performance is an issue, at which point it may 56 | be worth it to use Parsack, instead. However, while some of Megaparsack’s design decisions do make it 57 | inherently somewhat slower than Parsack, it’s likely that a lot of Megaparsack can be optimized much 58 | more than it currently is. If you run into performance problems with Megaparsack, feel free to open 59 | a bug report, and it might be possible to make Megaparsack palatably fast. 60 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/reference.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require "util.rkt") 4 | 5 | @title[#:tag "reference"]{API Reference} 6 | 7 | A @tech{parser} is a value that represents a method of turning a syntax object or sequence of 8 | syntax objects an arbitrary Racket value. Parsers can be created using various primitives, then 9 | sequenced together using parser combinators to create larger parsers. 10 | 11 | Parsers are @functech{functors}, @functech{applicative functors}, and @functech{monads}, which allows 12 | them to be mapped over and sequenced together using the corresponding generic interfaces. 13 | 14 | @section[#:tag "primitives"]{Primitives} 15 | 16 | @defproc[(parser? [v any/c]) boolean?]{ 17 | Returns @racket[#t] if @racket[v] is a @tech{parser}, otherwise returns @racket[#f].} 18 | 19 | @defproc[(parser/c [in-ctc contract?] [out-ctc contract?]) contract?]{ 20 | Produces a @reftech{contract} that recognizes @tech{parsers}. Tokens consumed by the parser must match 21 | @racket[in-ctc], and values returned by the parser must match @racket[out-ctc]. 22 | 23 | If both @racket[in-ctc] and @racket[out-ctc] are @reftech{chaperone contracts}, then the result will 24 | also be a @reftech{chaperone contract}.} 25 | 26 | @defproc[(parse [parser parser?] [boxes (listof syntax-box?)]) (either/c message? any/c)]{ 27 | Runs @racket[parser] on @racket[boxes] and returns either the result of a successful parse or a value 28 | that includes information about the parse failure.} 29 | 30 | @defproc[(parse-error->string [message message?]) string?]{ 31 | Converts a parse error to a human-readable string. This is used by @racket[parse-result!] to format 32 | the message used in the exception, but it can also be used if you want to display the error in other 33 | ways.} 34 | 35 | @defproc[(parse-result! [result (either/c message? any/c)]) any/c]{ 36 | Extracts a successful parse value from @racket[result]. If @racket[result] is a failure, raises 37 | @racket[exn:fail:read:megaparsack] with the failure message converted to a string using 38 | @racket[parse-error->string].} 39 | 40 | @defstruct*[(exn:fail:read:megaparsack exn:fail:read) ([unexpected any/c] 41 | [expected (listof string?)]) 42 | #:transparent]{ 43 | Raised by @racket[parse-result!] when given a parse failure.} 44 | 45 | @defstruct*[syntax-box ([datum any/c] [srcloc srcloc?]) #:transparent]{ 46 | Represents a single parsable entity. Just like @reftech{syntax objects}, a @deftech{syntax box} 47 | associates some source location information with an arbitrary datum. However, unlike ordinary syntax 48 | objects, values like lists and vectors can be stored in a syntax box without being recursively wrapped 49 | in additional layers of syntax objects. 50 | 51 | The @racket[datum] can be anything at all, but usually it is either a character or some token produced 52 | as the result of lexing. It is unlikely that you will need to create @racket[syntax-box] values 53 | yourself; rather, use higher-level functions like @racket[parse-string] that create these values for 54 | you.} 55 | 56 | @defstruct*[message ([srcloc srcloc?] [unexpected any/c] [expected (listof string?)]) #:transparent]{ 57 | Represents a parse error. Generally you will not need to construct or use these yourself, since they 58 | will be automatically constructed when parsers fail, and you can convert them to a human-readable 59 | error message using @racket[parse-error->string]. For more complicated use cases, though, you may want 60 | to raise custom parse errors using @racket[fail/p] or format your own error messages, so you can use 61 | this structure directly.} 62 | 63 | @defthing[void/p (parser/c any/c void?)]{ 64 | A parser that always succeeds and always returns @|void-const|.} 65 | 66 | @defproc[(or/p [parser parser?] ...+) parser?]{ 67 | Tries each @racket[parser] in succession until one either succeeds or consumes input, at which point 68 | its result will be returned as the result of the overall parse. Parsers that consume input but fail 69 | will halt further parsers from being tried and will simply return an error; if backtracking is 70 | desired, the parser should be wrapped with @racket[try/p]. 71 | 72 | @history[#:changed "1.5" @elem{Changed to always return the first successful result, rather than 73 | continuing to try parsers until one consumes input. The new behavior is more predictable 74 | and more consistent with existing Parsec implementations, though the old behavior was 75 | more consistent with the presentation in the original paper.}]} 76 | 77 | @defproc[(try/p [parser parser?]) parser?]{ 78 | Creates a new parser like @racket[parser], except that it does not consume input if @racket[parser] 79 | fails. This allows the parser to backtrack and try other alternatives when used inside a 80 | @racket[or/p] combinator.} 81 | 82 | @defproc[(noncommittal/p [parser parser?]) parser?]{ 83 | @margin-note{ 84 | Note that unlike @racket[lookahead/p], @racket[noncommittal/p] @emph{only} affects backtracking; 85 | the consumed input is still removed from the input stream. If true lookahead is desired, use 86 | @racket[lookahead/p], instead.} 87 | 88 | Creates a new parser like @racket[parser], except that it is not considered to have consumed 89 | input for the purposes of backtracking if @racket[parser] succeeds. This allows a future failure to 90 | backtrack to an earlier choice point (assuming the failure does not itself consume input). 91 | 92 | @(parser-examples 93 | (eval:error (parse-result! 94 | (parse-string (or/p (do (char/p #\a) (char/p #\b)) 95 | (do (char/p #\a) (char/p #\c))) 96 | "ac"))) 97 | (eval:check (parse-result! 98 | (parse-string (or/p (do (noncommittal/p (char/p #\a)) (char/p #\b)) 99 | (do (char/p #\a) (char/p #\c))) 100 | "ac")) 101 | #\c)) 102 | 103 | This can be useful to allow backtracking within a parse sequence in a more limited fashion than 104 | @racket[try/p] would allow. In particular, contrast the meaning of 105 | @nested[#:style 'inset]{@racket[(try/p (do _parser-a _parser-b))]} 106 | with that of 107 | @nested[#:style 'inset]{@racket[(do (noncommittal/p _parser-a) _parser-b)].} 108 | The version using @racket[try/p] will always backtrack if either @racket[_parser-a] or 109 | @racket[_parser-b] fails in any way. However, the version using @racket[noncommittal/p] will not 110 | backtrack if either parser fails in a way that would not ordinarily backtrack; it just allows a 111 | recoverable failure in @racket[_parser-b] to backtrack to a choice point that occurred prior to 112 | @racket[_parser-a]. 113 | 114 | Note that @racket[noncommittal/p] does @emph{not} affect whether @racket[parser] is considered to have 115 | consumed input if it fails, which is to say that @racket[try/p] and @racket[noncommittal/p] are 116 | orthogonal and should be combined if both behaviors are desired. 117 | 118 | @history[#:added "1.7"]} 119 | 120 | @(define-parser-interaction lookahead-interaction close-lookahead!) 121 | 122 | @defproc[(lookahead/p [parser parser?]) parser?]{ 123 | Creates a new parser like @racket[parser], except that it does not consume input if @racket[parser] 124 | succeeds, so subsequent parsers will continue from the same location in the input stream. This 125 | allows a parser to ensure something will appear in future input without immediately consuming it. 126 | 127 | For example, @racket[lookahead/p] can be used to implement a parser that only succeeds at the end of a 128 | line, but does not consume the newline character itself: 129 | 130 | @(lookahead-interaction 131 | (define end-of-line/p (lookahead/p (char/p #\newline)))) 132 | 133 | This can be used to parse, for example, line comments that span to the end of the current line, while 134 | still allowing a later parser to consume the newline character: 135 | 136 | @(lookahead-interaction 137 | (define rest-of-line/p 138 | (or/p (do end-of-line/p (pure "")) 139 | (do [c <- any-char/p] 140 | [cs <- rest-of-line/p] 141 | (pure (string-append (string c) cs))))) 142 | (define line-comment/p 143 | (do (try/p (string/p "# ")) 144 | rest-of-line/p)) 145 | (eval:check (parse-string (many/p (do [line <- line-comment/p] 146 | (char/p #\newline) 147 | (pure line))) 148 | (string-append "# hello\n" 149 | "# world\n")) 150 | (success (list "hello" "world")))) 151 | 152 | Note that if @racket[parser] @emph{fails}, @racket[lookahead/p] has no effect; if it consumed input 153 | before failing, it will not try other alternatives in an enclosing @racket[or/p]. Wrap @racket[parser] 154 | with @racket[try/p] if this behavior is undesirable. 155 | 156 | @history[#:added "1.5"]} 157 | 158 | @(close-lookahead!) 159 | 160 | @defproc[(satisfy/p [proc (any/c . -> . any/c)]) parser?]{ 161 | Creates a parser that checks if @racket[proc] produces a non-@racket[#f] value when applied to a 162 | single datum. If so, it consumes the datum and returns successfully; otherwise, it fails without 163 | consuming input.} 164 | 165 | @defthing[eof/p (parser/c any/c void?)]{ 166 | A parser that only succeeds when there is no more input left to consume. It always returns 167 | @|void-const|.} 168 | 169 | @defproc[(label/p [label string?] [parser parser?]) parser?]{ 170 | Creates a parser like @racket[parser], except that failures are reported in terms of @racket[label] 171 | instead of whatever names would have been used instead.} 172 | 173 | @defproc[(hidden/p [parser parser?]) parser?]{ 174 | Like @racket[label/p], adjusts how failures are reported for @racket[parser], but @racket[hidden/p] 175 | completely hides any failure information produced by @racket[parser] when reporting errors. (This is 176 | useful when parsing things like whitespace which are usually not useful to include in error 177 | messages.)} 178 | 179 | @defproc[(syntax/p [parser parser?]) (parser/c any/c syntax?)]{ 180 | Produces a parser like @racket[parser], except that its result is wrapped in a @reftech{syntax object} 181 | that automatically incorporates source location information from the input stream. This allows parsers 182 | to add a sort of automated source location tracking to their output. 183 | 184 | The @racket[syntax/p] combinator makes source location wrapping opt-in, which is desirable since it is 185 | often useful to return values from combinators that are intermediate values not intended to be wrapped 186 | in syntax (for example, @racket[many/p] returns a list of results, not a syntax list).} 187 | 188 | @defproc[(syntax-box/p [parser parser?]) (parser/c any/c syntax-box?)]{ 189 | Like @racket[syntax/p], but wraps the result in a @racket[syntax-box] instead of a @reftech{syntax 190 | object}. This is useful if you want to get the source location information from a parse result, but 191 | you want to ensure the underlying datum remains untouched.} 192 | 193 | @defproc[(fail/p [msg message?]) (parser/c any/c none/c)]{ 194 | Produces a parser that always fails and produces @racket[msg] as the error message. This is the 195 | lowest-level way to report errors, but many cases in which you would want to raise a custom failure 196 | message can be replaced with @racket[guard/p] instead, which is slightly higher level.} 197 | 198 | @defform[(delay/p parser-expr) 199 | #:contracts ([parser-expr parser?])]{ 200 | Creates a parser that delays evaluation of @racket[parser-expr] until the first time it is applied to 201 | input. Otherwise, the parser’s behavior is identical to that of @racket[parser-expr]. The parser 202 | returned by @racket[delay/p] never evaluates @racket[parser-expr] more than once, even if it’s applied 203 | to input multiple times. 204 | 205 | @(parser-examples 206 | (define delayed/p (delay/p (begin (println 'evaluated) 207 | (char/p #\a)))) 208 | (eval:check (parse-result! (parse-string delayed/p "a")) #\a) 209 | (eval:check (parse-result! (parse-string delayed/p "a")) #\a)) 210 | 211 | @racket[delay/p] can be used to delay evaluation in situations where a (possibly mutually) recursive 212 | parser would otherwise require evaluating a parser before its definition. For example: 213 | 214 | @(parser-interaction 215 | (define one/p (or/p (char/p #\.) 216 | (delay/p two/p))) 217 | (define two/p (list/p (char/p #\a) one/p)) 218 | (eval:check (parse-result! (parse-string one/p "aa.")) '(#\a (#\a #\.)))) 219 | 220 | Without the use of @racket[delay/p], the reference to @racket[two/p] would be evaluated too soon 221 | (since @racket[or/p] is an ordinary function, unlike @racket[or]). 222 | 223 | Note that the @racket[delay/p] expression itself may be evaluated multiple times, in which case the 224 | @racket[parser-expr] may be as well (since each evaluation of @racket[delay/p] creates a separate 225 | parser). This can easily arise from uses of @racket[do], since @racket[do] is syntactic sugar for 226 | nested uses of @racket[lambda], though it might not be syntactically obvious that the @racket[delay/p] 227 | expression appears under one such @racket[lambda]. For example: 228 | 229 | @(parser-interaction 230 | (define sneaky-evaluation/p 231 | (do (char/p #\a) 232 | (delay/p (begin (println 'evaluated) 233 | (char/p #\b))))) 234 | (eval:check (parse-result! (parse-string sneaky-evaluation/p "ab")) #\b) 235 | (eval:check (parse-result! (parse-string sneaky-evaluation/p "ab")) #\b)) 236 | 237 | In other words, @racket[delay/p] doesn’t perform any magical memoization or caching, so it can’t be 238 | used to prevent a parser from being evaluated multiple times, only to delay its evaluation to a later 239 | point in time. 240 | 241 | @history[#:added "1.6"]} 242 | 243 | @defproc[(==/p [v any/c] [=? (any/c any/c . -> . any/c) equal?]) parser?]{ 244 | Produces a parser that succeeds when a single datum is equal to @racket[v], as determined by 245 | @racket[=?]. Like @racket[satisfy/p], it consumes a single datum upon success but does not consume 246 | anything upon failure.} 247 | 248 | @defproc[(one-of/p [vs list?] [=? (any/c any/c . -> . any/c) equal?]) parser?]{ 249 | Like @racket[(or/p (==/p _v =?) ...)]. Produces a parser that succeeds when a single datum is 250 | equal to any of the elements of @racket[vs], as determined by @racket[=?]. Like @racket[satisfy/p], 251 | it consumes a single datum upon success but does not consume anything upon failure. 252 | 253 | @(parser-examples 254 | (eval:check (parse-result! (parse-string (one-of/p '(#\a #\b)) "a")) #\a) 255 | (eval:check (parse-result! (parse-string (one-of/p '(#\a #\b)) "b")) #\b) 256 | (eval:error (parse-result! (parse-string (one-of/p '(#\a #\b)) "c")))) 257 | 258 | @history[#:added "1.2"]} 259 | 260 | @defproc[(guard/p [parser parser?] [pred? (any/c . -> . any/c)] 261 | [expected (or/c string? #f) #f] [make-unexpected (any/c . -> . any/c) identity]) 262 | parser?]{ 263 | Produces a parser that runs @racket[parser], then applies a guard predicate @racket[pred?] to the 264 | result. If the result of @racket[pred?] is @racket[#f], then the parser fails, otherwise the parser 265 | succeeds and produces the same result as @racket[parser]. 266 | 267 | If the parser fails and @racket[expected] is a string, then @racket[expected] is used to add 268 | expected information to the parser error. Additionally, the @racket[make-unexpected] function is 269 | applied to the result of @racket[parser] to produce the @racket[unexpected] field of the parse error. 270 | 271 | @(parser-examples 272 | (define small-integer/p 273 | (guard/p integer/p (λ (x) (<= x 100)) 274 | "integer in range [0,100]")) 275 | (eval:check (parse-result! (parse-string small-integer/p "42")) 42) 276 | (eval:error (parse-result! (parse-string small-integer/p "300"))))} 277 | 278 | @defproc[(list/p [parser parser?] ... [#:sep sep parser? void/p]) (parser/c any? list?)]{ 279 | Returns a @tech{parser} that runs each @racket[parser] in sequence separated by @racket[sep] and 280 | produces a list containing the results of each @racket[parser]. The results of @racket[sep] are 281 | ignored. 282 | 283 | @(parser-examples 284 | (define dotted-let-digit-let/p 285 | (list/p letter/p digit/p letter/p #:sep (char/p #\.))) 286 | (eval:check (parse-result! (parse-string dotted-let-digit-let/p "a.1.b")) (list #\a #\1 #\b)) 287 | (eval:error (parse-result! (parse-string dotted-let-digit-let/p "a1c"))) 288 | (eval:error (parse-result! (parse-string dotted-let-digit-let/p "a.1")))) 289 | 290 | Using a separator parser that consumes no input (such as the default separator, @racket[void/p]) is 291 | equivalent to not using a separator at all. 292 | 293 | @(parser-examples 294 | (define let-digit-let/p (list/p letter/p digit/p letter/p)) 295 | (eval:check (parse-result! (parse-string let-digit-let/p "a1b")) (list #\a #\1 #\b)))} 296 | 297 | @subsection[#:tag "repetition"]{Repetition} 298 | 299 | @defproc[(many/p [parser parser?] 300 | [#:sep sep parser? void/p] 301 | [#:min min-count exact-nonnegative-integer? 0] 302 | [#:max max-count (or/c exact-nonnegative-integer? +inf.0) +inf.0]) 303 | (parser/c any/c list?)]{ 304 | Produces a parser that attempts @racket[parser] at least @racket[min-count] times and at most 305 | @racket[max-count] times, with attempts separated by @racket[sep]. The returned parser produces a 306 | list of results of successful attempts of @racket[parser]. Results of @racket[sep] are ignored. 307 | 308 | @(parser-examples 309 | (define letters/p (many/p letter/p)) 310 | (eval:check (parse-result! (parse-string letters/p "abc")) (list #\a #\b #\c)) 311 | (define dotted-letters/p 312 | (many/p letter/p #:sep (char/p #\.) #:min 2 #:max 4)) 313 | (eval:check (parse-result! (parse-string dotted-letters/p "a.b.c")) (list #\a #\b #\c)) 314 | (eval:error (parse-result! (parse-string dotted-letters/p "abc"))) 315 | (eval:error (parse-result! (parse-string dotted-letters/p "a"))) 316 | (eval:check (parse-result! (parse-string dotted-letters/p "a.b.c.d.e")) (list #\a #\b #\c #\d))) 317 | 318 | @history[#:added "1.1"]} 319 | 320 | @defproc[(many+/p [parser parser?] 321 | [#:sep sep parser? void/p] 322 | [#:max max-count (or/c exact-nonnegative-integer? +inf.0) +inf.0]) 323 | (parser/c any/c list?)]{ 324 | Like @racket[many/p], but @racket[parser] must succeed at least once. Equivalent to 325 | @racket[(many/p parser #:sep sep #:min 1 #:max max-count)]. 326 | 327 | @history[#:changed "1.1" @elem{Added support for @racket[#:sep] and @racket[#:max] keyword arguments 328 | for consistency with @racket[many/p].}]} 329 | 330 | @defproc[(repeat/p [n exact-nonnegative-integer?] [parser parser?]) (parser/c any/c list?)]{ 331 | Produces a parser that attempts @racket[parser] @emph{exactly} @racket[n] times and returns a list 332 | of the results. Equivalent to @racket[(many/p parser #:min n #:max n)].} 333 | 334 | @defproc[(many-until/p [parser parser?] 335 | [#:end end parser?] 336 | [#:sep sep parser? void/p] 337 | [#:min min-count exact-nonnegative-integer? 0]) 338 | (parser/c (list/c list? any/c))]{ 339 | Like @racket[many/p], but repeats until @racket[end] succeeds. The result is a list of two values: 340 | a list of results produced by @racket[parser] and the result produced by @racket[end]. 341 | 342 | @(parser-examples 343 | (define letters-then-punctuation 344 | (many-until/p letter/p #:end (char-in/p ".!?,;"))) 345 | (eval:check (parse-result! (parse-string letters-then-punctuation "abc!")) 346 | (list '(#\a #\b #\c) #\!)) 347 | (eval:check (parse-result! (parse-string letters-then-punctuation "abc,efg")) 348 | (list '(#\a #\b #\c) #\,)) 349 | (eval:error (parse-result! (parse-string letters-then-punctuation "a1c;"))) 350 | (eval:check (parse-result! (parse-string letters-then-punctuation "?")) 351 | (list '() #\?))) 352 | 353 | To determine if the repetition should stop, @racket[end] is attempted before each optional attempt of 354 | @racket[parser], i.e. each attempt after the ones required by @racket[min-count] (if any). If 355 | @racket[end] succeeds, the repetition is terminated immediately, regardless of whether or not 356 | further attempts of @racket[parser] might succeed: 357 | 358 | @(parser-examples 359 | #:label #f 360 | (define digits-then-zero (many-until/p digit/p #:end (char/p #\0))) 361 | (eval:check (parse-result! (parse-string digits-then-zero "1230")) 362 | (list '(#\1 #\2 #\3) #\0)) 363 | (eval:check (parse-result! (parse-string digits-then-zero "12305670")) 364 | (list '(#\1 #\2 #\3) #\0))) 365 | 366 | If @racket[end] fails without consuming input, the repetition continues. However, note that if an 367 | attempt of @racket[end] fails @emph{after} consuming input, the failure is propagated to the 368 | entire repetition, as with @racket[or/p]. This allows a partial success of @racket[end] to “commit” 369 | to ending the repetition, even if further attempts @racket[parser] would succeed: 370 | 371 | @(parser-examples 372 | #:label #f 373 | (define telegram/p (many-until/p any-char/p #:end (string/p "STOP"))) 374 | (eval:check (parse-result! (parse-string telegram/p "HELLO STOP")) 375 | (list '(#\H #\E #\L #\L #\O #\space) "STOP")) 376 | (eval:error (parse-result! (parse-string telegram/p "MESSAGE STOP")))) 377 | 378 | This behavior can be useful in situations where @racket[end] is complex (so it’s helpful to report 379 | a parse error that occurs after some prefix of it has succeeded), but in cases like the above, it is 380 | usually not desired. As with @racket[or/p], this committing behavior can be suppressed by wrapping 381 | @racket[end] with @racket[try/p]: 382 | 383 | @(parser-examples 384 | #:label #f 385 | (define fixed-telegram/p 386 | (many-until/p any-char/p #:end (try/p (string/p "STOP")))) 387 | (eval:check (parse-result! (parse-string fixed-telegram/p "MESSAGE STOP")) 388 | (list '(#\M #\E #\S #\S #\A #\G #\E #\space) "STOP"))) 389 | 390 | @history[#:added "1.7"]} 391 | 392 | @defproc[(many+-until/p [parser parser?] 393 | [#:end end parser?] 394 | [#:sep sep parser? void/p]) 395 | (parser/c (list/c list? any/c))]{ 396 | Like @racket[many-until/p], but @racket[parser] must succeed at least once. Equivalent to 397 | @racket[(many-until/p parser #:end end #:sep sep #:min 1)]. 398 | 399 | @history[#:added "1.7"]} 400 | 401 | @subsection[#:tag "parser-parameters"]{Parser Parameters} 402 | 403 | @defproc[(make-parser-parameter [v any/c]) parser-parameter?]{ 404 | Returns a new @deftech{parser parameter}. A parser parameter is like an ordinary @reftech{parameter}, 405 | but instead of its state being scoped to a particular thread, a parser parameter’s state is scoped to 406 | a particular call to @racket[parse]. Furthermore, modifications to parser parameters are discarded if 407 | the parser backtracks past the point of modification, which ensures that only modifications from 408 | @emph{successful} parse branches are retained. 409 | 410 | Like ordinary parameters, parser parameters are procedures that accept zero or one argument. Unlike 411 | ordinary parameters, the result of applying a parser parameter procedure is a @tech{parser}, which 412 | must be monadically sequenced with other parsers to have any effect. 413 | 414 | @(parser-examples 415 | (define param (make-parser-parameter #f)) 416 | (eval:check (parse-result! (parse-string (param) "")) #f) 417 | (eval:check (parse-result! (parse-string (do (param #t) (param)) "")) #t)) 418 | 419 | Each call to @racket[parse] is executed with a distinct @deftech{parser parameterization}, which means 420 | modifications to parser parameters are only visible during that particular parser execution. The 421 | @racket[v] argument passed to @racket[make-parser-parameter] is used as the created parser parameter’s 422 | initial value in each distinct parser parameterization. 423 | 424 | Parser parameters are useful for tracking state needed by context-sensitive parsers, but they can also 425 | be used to provide values with dynamic extent using @racket[parameterize/p], just as ordinary 426 | parameters can be locally modified via @racket[parameterize]. 427 | 428 | @history[#:added "1.4"]} 429 | 430 | @defproc[(parser-parameter? [v any/c]) boolean?]{ 431 | Returns @racket[#t] if @racket[v] is a @tech{parser parameter}, otherwise returns @racket[#f]. 432 | 433 | @history[#:added "1.4"]} 434 | 435 | @defform[(parameterize/p ([param-expr val-expr] ...) parser-expr) 436 | #:contracts ([param-expr parser-parameter?] 437 | [parser-expr parser?])]{ 438 | Returns a new @tech{parser} that behaves just like @racket[parser-expr], except that the value of 439 | each @tech{parser parameter} @racket[param-expr] is given by the corresponding @racket[val-expr] 440 | during the dynamic extent of the parser’s execution. 441 | 442 | @(parser-examples 443 | (define param (make-parser-parameter #f)) 444 | (eval:check (parse-result! (parse-string (do [a <- (param)] 445 | [b <- (parameterize/p ([param #t]) 446 | (param))] 447 | [c <- (param)] 448 | (pure (list a b c))) 449 | "")) 450 | (list #f #t #f))) 451 | 452 | If any of the @racket[param-expr]’s values are modified by @racket[parser-expr] via a direct call to 453 | the parser parameter procedure, the value remains modified until control leaves the enclosing 454 | @racket[parameterize/p] parser, after which the value is restored. (This behavior is precisely 455 | analogous to modifying an ordinary @reftech{parameter} within the body of a @racket[parameterize] 456 | expression.) 457 | 458 | @(parser-examples 459 | (define param (make-parser-parameter #f)) 460 | (eval:check (parse-result! (parse-string (do (param 1) 461 | [a <- (parameterize/p ([param 2]) 462 | (do (param 3) 463 | (param)))] 464 | [b <- (param)] 465 | (pure (list a b))) 466 | "")) 467 | (list 3 1))) 468 | 469 | @history[#:added "1.4"]} 470 | 471 | @section[#:tag "parsing-text"]{Parsing Text} 472 | 473 | @defmodule[megaparsack/text] 474 | 475 | @defproc[(parse-string [parser (parser/c char? any/c)] [str string?] [src-name any/c 'string]) 476 | (either/c message? any/c)]{ 477 | Parses @racket[str] using @racket[parser], which must consume character datums. The value provided for 478 | @racket[src-name] is used in error reporting when displaying source location information.} 479 | 480 | @defproc[(parse-syntax-string [parser (parser/c char? any/c)] [stx-str (syntax/c string?)]) 481 | (either/c message? any/c)]{ 482 | Like @racket[parse-string], but uses the source location information from @racket[stx-str] to 483 | initialize the source location tracking. The result of @racket[(syntax-source stx-str)] is used in 484 | place of the @racket[_src-name] argument.} 485 | 486 | @defproc[(char/p [c char?]) (parser/c char? char?)]{ 487 | Parses a single datum that is equal to @racket[c].} 488 | 489 | @defproc[(char-not/p [c char?]) (parser/c char? char?)]{ 490 | Parses a single datum that is different from @racket[c]. 491 | @history[#:added "1.3"]} 492 | 493 | @defproc[(char-ci/p [c char?]) (parser/c char? char?)]{ 494 | Parses a single datum that is case-insensitively equal to @racket[c], as determined by 495 | @racket[char-ci=?].} 496 | 497 | @defproc[(char-between/p [low char?] [high char?]) (parser/c char? char?)]{ 498 | Parses a single character that is between @racket[low] and @racket[high] according to 499 | @racket[char<=?]. 500 | 501 | @(parser-examples 502 | (eval:check (parse-result! (parse-string (char-between/p #\a #\z) "d")) #\d) 503 | (eval:error (parse-result! (parse-string (char-between/p #\a #\z) "D")))) 504 | 505 | @history[#:added "1.2"]} 506 | 507 | @defproc[(char-in/p [alphabet string?]) (parser/c char? char?)]{ 508 | Returns a parser that parses a single character that is in @racket[alphabet]. 509 | 510 | @(parser-examples 511 | (eval:check (parse-result! (parse-string (char-in/p "aeiou") "i")) #\i) 512 | (eval:error (parse-result! (parse-string (char-in/p "aeiou") "z")))) 513 | 514 | @history[#:added "1.2"]} 515 | 516 | @defproc[(char-not-in/p [alphabet string?]) (parser/c char? char?)]{ 517 | Returns a parser that parses a single character that is not in @racket[alphabet]. 518 | @history[#:added "1.3"]} 519 | 520 | @defthing[any-char/p (parser/c char? char?)]{ 521 | Returns a parser that parses a single character. 522 | @history[#:added "1.3"]} 523 | 524 | @defthing[letter/p (parser/c char? char?)]{ 525 | Parses an alphabetic letter, as determined by @racket[char-alphabetic?].} 526 | 527 | @defthing[digit/p (parser/c char? char?)]{ 528 | Parses a single digit, as determined by @racket[char-numeric?].} 529 | 530 | @defthing[symbolic/p (parser/c char? char?)]{ 531 | Parses a symbolic character, as determined by @racket[char-symbolic?].} 532 | 533 | @defthing[space/p (parser/c char? char?)]{ 534 | Parses a single whitespace character, as determined by @racket[char-whitespace?] or 535 | @racket[char-blank?].} 536 | 537 | @defthing[integer/p (parser/c char? integer?)]{ 538 | Parses a sequence of digits as an integer. Does not handle negative numbers or numeric separators.} 539 | 540 | @defproc[(string/p [str string?]) (parser/c char? string?)]{ 541 | Parses a sequence of characters equal to @racket[str] and returns @racket[str] as its result.} 542 | 543 | @defproc[(string-ci/p [str string?]) (parser/c char? string?)]{ 544 | Parses a sequence of characters case-insensitively equal to @racket[str] (as determined by 545 | @racket[char-ci=?]) and returns the matched input string as its result. 546 | 547 | @(parser-examples 548 | (eval:check (parse-result! (parse-string (string-ci/p "hello") "HeLlO")) "HeLlO")) 549 | 550 | @history[#:added "1.3" 551 | #:changed "1.8" @elem{Changed to return the parsed input string rather 552 | than always returning @racket[str].}]} 553 | 554 | @section[#:tag "parsing-with-parser-tools"]{Parsing with @racketmodname[parser-tools/lex]} 555 | 556 | @defmodule[megaparsack/parser-tools/lex] 557 | 558 | Sometimes it is useful to run a lexing pass over an input stream before parsing, in which case 559 | @racketmodname[megaparsack/text] is not appropriate. The @tt{parser-tools} package provides the 560 | @racketmodname[parser-tools/lex] library, which implements a lexer that produces tokens. 561 | 562 | When using @racketmodname[parser-tools/lex], use @racket[lexer-src-pos] instead of @racket[lexer] to 563 | enable the built-in source location tracking. This will produce a sequence of @racket[position-token] 564 | elements, which can then be passed to @racket[parse-tokens] and detected with @racket[token/p]. 565 | 566 | @defproc[(parse-tokens [parser parser?] [tokens (listof position-token?)] [source-name any/c 'tokens]) 567 | syntax?]{ 568 | Parses a stream of tokens, @racket[tokens], produced from @racket[lexer-src-pos] from 569 | @racketmodname[parser-tools/lex].} 570 | 571 | @defproc[(token/p [name symbol?]) (parser/c (or/c symbol? token?) any/c)]{ 572 | Produces a parser that expects a single token with @racket[name], as produced by @racket[token-name].} 573 | 574 | @section[#:tag "deprecated-forms-and-functions"]{Deprecated Forms and Functions} 575 | 576 | @defproc[(many*/p [parser parser?]) (parser/c list?)]{ 577 | @deprecated[#:what "function" @racket[many/p]]} 578 | 579 | @defproc[(many/sep*/p [parser parser?] [sep parser?]) parser?]{ 580 | @deprecated[#:what "function" @racket[(many/p parser #:sep sep)]]} 581 | 582 | @defproc[(many/sep+/p [parser parser?] [sep parser?]) parser?]{ 583 | @deprecated[#:what "function" @racket[(many+/p parser #:sep sep)]]} 584 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/state.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require (for-label racket/file) 4 | scribble/bnf 5 | "util.rkt") 6 | 7 | @(define-parser-interaction state-interaction close-state! #:eval state-eval) 8 | 9 | @title[#:tag "state"]{Parsers with State} 10 | 11 | So far, all of the languages we have attempted to parse have been 12 | @hyperlink["https://en.wikipedia.org/wiki/Context-free_grammar"]{context free}, but in practice, many 13 | languages have varying amounts of context sensitivity. Parsers for such languages are often made much 14 | simpler by the addition of state that tracks the necessary context. However, megaparsack parsers can 15 | backtrack, which makes maintaining mutable state somewhat subtle: if a parser abandons a parse branch, 16 | all its state modifications must be rolled back. 17 | 18 | To make this simpler, megaparsack provides built-in support for arbitrary, user-defined state in the 19 | form of @tech{parser parameters}. Parser parameters are similar to ordinary @reftech{parameters}, but 20 | their values are associated with a parse context rather than with a thread. This means their values 21 | are automatically rolled back whenever the parser backtracks, and they behave predictably regardless 22 | of parser evaluation order. 23 | 24 | @section[#:tag "state-context"]{Parsing with context} 25 | 26 | Suppose we have a simple language that consists of a sequence of variable declarations of the form 27 | 28 | @BNF[(list @nonterm{decl} @BNF-seq[@litchar{let} @nonterm{var} @litchar{=} @nonterm{integer}])] 29 | 30 | where each declaration appears on a separate line. We might write a parser for such a language like 31 | this: 32 | 33 | @(state-interaction 34 | (define name/p 35 | (map (compose1 string->symbol list->string) (many+/p letter/p))) 36 | (define declaration/p 37 | (do (string/p "let ") 38 | [name <- name/p] 39 | (string/p " = ") 40 | [value <- integer/p] 41 | (pure (cons name value)))) 42 | (define declarations/p 43 | (many/p (do [decl <- declaration/p] 44 | (char/p #\newline) 45 | (pure decl))))) 46 | 47 | This definition works alright: 48 | 49 | @(state-interaction 50 | (eval:check (parse-string declarations/p 51 | (string-append "let x = 1\n" 52 | "let y = 2\n")) 53 | (success '((x . 1) (y . 2))))) 54 | 55 | However, note that it also accepts multiple declarations with the same name, which may not be desired: 56 | 57 | @(state-interaction 58 | (eval:check (parse-string declarations/p 59 | (string-append "let x = 1\n" 60 | "let x = 2\n")) 61 | (success '((x . 1) (x . 2))))) 62 | 63 | One way to prevent this is to keep track of all the declarations that we’ve parsed so far using a 64 | @tech{parser parameter}: 65 | 66 | @(state-interaction 67 | (define declared-names (make-parser-parameter '()))) 68 | 69 | Just like an ordinary @reftech{parameter}, we can read the current value of a parser parameter simply 70 | by calling it as a procedure, like @racket[(declared-names)], and we can update its value by applying 71 | it to a single argument, like @racket[(declared-names _new-value)]. However, unlike an ordinary 72 | parameter, the applying a parser parameter procedure does not directly return or update the parser 73 | parameter’s value. Instead, it returns a @tech{parser} that, when executed, parses no input, but 74 | returns or updates the parser parameter’s value. 75 | 76 | This means we can sequence reads and writes to @racket[declared-names] the same way we sequence any 77 | other parser, using @racket[do]: 78 | 79 | @(state-interaction 80 | (define declaration/p 81 | (do (string/p "let ") 82 | [names <- (declared-names)] 83 | [name <- (guard/p name/p 84 | (λ (name) (not (memq name names))) 85 | "an unused variable name")] 86 | (declared-names (cons name names)) 87 | (string/p " = ") 88 | [value <- integer/p] 89 | (pure (cons name value))))) 90 | 91 | Now duplicate definitions are rejected with a helpful error: 92 | 93 | @(state-interaction 94 | #:hidden 95 | (define declarations/p 96 | (many/p (do [decl <- declaration/p] 97 | (char/p #\newline) 98 | (pure decl))))) 99 | @(state-interaction 100 | (eval:error (parse-result! (parse-string declarations/p 101 | (string-append "let x = 1\n" 102 | "let x = 2\n"))))) 103 | 104 | @section[#:tag "state-indentation"]{Indentation sensitivity} 105 | 106 | Another useful application of parser parameters is parsing languages that are sensitive to 107 | indentation. For example, we might wish to parse a bulleted list of items, like this: 108 | 109 | @(define groceries.txt @list{ 110 | * produce 111 | * apples 112 | * spinach 113 | * dairy 114 | * milk 115 | * whole milk 116 | * buttermilk 117 | * cheese 118 | * cheddar 119 | * feta}) 120 | 121 | @filebox["groceries.txt" (apply verbatim groceries.txt)] 122 | 123 | To track the current indentation level, we can use a parser parameter: 124 | 125 | @(state-interaction 126 | (define current-indent (make-parser-parameter 0)) 127 | (define indentation/p 128 | (do [indent <- (current-indent)] 129 | (repeat/p indent (char/p #\space))))) 130 | 131 | This makes defining a parser for an indentation-sensitive bulleted list remarkably straightforward: 132 | 133 | @(state-interaction 134 | (define tree-list/p 135 | (do (try/p indentation/p) 136 | (string/p "* ") 137 | [entry <- (many+/p (char-not/p #\newline))] 138 | (char/p #\newline) 139 | [indent <- (current-indent)] 140 | [children <- (parameterize/p ([current-indent (+ indent 2)]) 141 | (many/p tree-list/p))] 142 | (pure (list (list->string entry) children))))) 143 | 144 | The @racket[parameterize/p] form works just like @racket[parameterize], but with parser parameters 145 | instead of ordinary ones. This definition of @racket[tree-list/p] is enough to parse the 146 | @filepath{groceries.txt} file above: 147 | 148 | @(state-eval `(define groceries.txt ,(string-append (apply string-append groceries.txt) "\n"))) 149 | @(state-interaction 150 | (eval:alts (define grocery-list (file->string "groceries.txt")) 151 | (define grocery-list groceries.txt)) 152 | (eval:check (parse-string (many/p tree-list/p) grocery-list) 153 | (success 154 | '(("produce" (("apples" ()) ("spinach" ()))) 155 | ("dairy" 156 | (("milk" (("whole milk" ()) ("buttermilk" ()))) 157 | ("cheese" (("cheddar" ()) ("feta" ()))))))))) 158 | 159 | Admittedly, in such a simple example, using a parser parameter is not strictly necessary. An 160 | alternative definition of @racket[tree-list/p] could simply accept the indentation level as an 161 | argument: 162 | 163 | @(state-interaction 164 | (define (tree-list/p indent) 165 | (do (try/p (repeat/p indent (char/p #\space))) 166 | (string/p "* ") 167 | [entry <- (many+/p (char-not/p #\newline))] 168 | (char/p #\newline) 169 | [children <- (many/p (tree-list/p (+ indent 2)))] 170 | (pure (list (list->string entry) children))))) 171 | 172 | However, in more complex parsers, this approach can require threading additional arguments through 173 | several layers of nested parsers, which is difficult to read and even more difficult to maintain. 174 | Just as ordinary parameters can help avoid threading values through many layers of nested functions, 175 | parser parameters can help avoid threading them through nested parsers. 176 | 177 | @(close-state!) 178 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/syntax.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require "util.rkt") 4 | 5 | @(define-parser-interaction syntax-interaction close-syntax!) 6 | 7 | @title[#:tag "producing-syntax"]{Producing Syntax} 8 | 9 | One of the properties of megaparsack is that it always tracks source locations, which is how it is 10 | able to include source location information in error messages. This can be leveraged for an entirely 11 | separate purpose, which is creating parsers that produce @reftech{syntax objects} as output. This 12 | functionality is extremely useful when creating custom @hash-lang[] languages. 13 | 14 | @section{Annotating parsers to produce syntax} 15 | 16 | Megaparsack does not opt to produce syntax objects as the result of every parse because it would make 17 | composing parsers extremely tedious. For example, if @racket[integer/p] produced syntax objects 18 | containing integers instead of integers themselves, they would need to be unwrapped before they could 19 | be added together or otherwise used as numbers. Instead, megaparsack requires that you opt-in to 20 | syntax object production by using the @racket[syntax/p] combinator. 21 | 22 | The @racket[syntax/p] combinator is “magic”—it takes @emph{any} parser and turns it into a parser that 23 | produces a value containing accurate source location information. This is because @racket[syntax/p] 24 | takes advantage of the internal parser state to track information that is otherwise not accessible 25 | to parsers. Fortunately, this makes the interface extremely simple to use—just wrap an ordinary parser 26 | with @racket[syntax/p] and use it as usual: 27 | 28 | @(parser-interaction 29 | (parse-string (syntax/p integer/p) "42")) 30 | 31 | The produced syntax objects automatically keep track of all the relevant syntax properties, including 32 | line, column, position, and span: 33 | 34 | @(parser-interaction 35 | (define stx (parse-result! (parse-string (syntax/p integer/p) "42"))) 36 | (syntax-line stx) 37 | (syntax-column stx) 38 | (syntax-span stx)) 39 | 40 | This syntax tracking is not specific to the built-in parsers, and you do not need to do anything 41 | special to use it with your custom parsers. For example, consider a relatively complex parser that 42 | parses a list of comma-delimited numbers surrounded by brackets: 43 | 44 | @(syntax-interaction 45 | (define integer-list/p 46 | (do (char/p #\[) 47 | [ints <- (many/p (syntax/p integer/p) #:sep (char/p #\,))] 48 | (char/p #\]) 49 | (pure ints)))) 50 | 51 | We’ve annotated the @racket[integer/p] parser with @racket[syntax/p] once again so we can get location 52 | tracking for each individual list element, but we’ll also annotate the whole thing with 53 | @racket[syntax/p] so we can track information about the entire list as well: 54 | 55 | @(syntax-interaction 56 | (define integer-list-stx 57 | (parse-result! (parse-string (syntax/p integer-list/p) "[1,2,3,5,8,13]"))) 58 | integer-list-stx 59 | (syntax-span integer-list-stx)) 60 | 61 | As expected, the top-level syntax object spans the entire input, including the brackets. We can also 62 | get information about the individual elements, since they are syntax objects as well: 63 | 64 | @(syntax-interaction 65 | (syntax->list integer-list-stx)) 66 | 67 | This makes writing a reader for a @hash-lang[] relatively straightforward because source location 68 | information is already encoded into a set of syntax objects which can be used as the source of a 69 | Racket module. 70 | 71 | @section{Parsing tokens from @racketmodname[parser-tools/lex]} 72 | 73 | While @racket[syntax/p] can be used with any megaparsack parser, it is sometimes useful to be able to 74 | perform a lexing phase before parsing to handle things like ignoring whitespace and tokenization in a 75 | separate pass. Currently, megaparsack does not include tools of its own specifically for lexing 76 | (though it would be perfectly possible to use the output of a separate simple parser as the input to 77 | another parser), but it does provide a function to interoperate with @racketmodname[parser-tools/lex], 78 | another Racket library that provides utilities designed specifically for lexing. 79 | 80 | When using @racketmodname[parser-tools/lex], make sure to use the @racket[lexer-src-pos] form, which 81 | enables the lexer’s own source location tracking. This configures the lexer to produce 82 | @racket[position-token] values as output, which can be fed to @racket[parse-tokens] from 83 | @racketmodname[megaparsack/parser-tools/lex] to parse with any megaparsack parser. 84 | 85 | Parsers that operate on strings, like @racket[char/p] and @racket[integer/p], will not work with 86 | tokens from @racketmodname[parser-tools/lex] because tokens can contain arbitrary data. Instead, 87 | use the @racket[token/p] function to create parsers that handle particular tokens. 88 | 89 | Here is a very simple lexer that produces lexemes for identifiers, numbers, parentheses, and commas: 90 | 91 | @(syntax-interaction 92 | (define-tokens simple [IDENTIFIER NUMBER]) 93 | (define-empty-tokens simple* [OPEN-PAREN CLOSE-PAREN COMMA]) 94 | (define simple-lexer 95 | (lexer-src-pos 96 | [#\( (token-OPEN-PAREN)] 97 | [#\) (token-CLOSE-PAREN)] 98 | [#\, (token-COMMA)] 99 | [(:+ (:or (:/ #\a #\z) (:/ #\A #\Z))) 100 | (token-IDENTIFIER (string->symbol lexeme))] 101 | [(:+ (:/ #\0 #\9)) 102 | (token-NUMBER (string->number lexeme))] 103 | [(:or whitespace blank iso-control) (void)] 104 | [(eof) eof]))) 105 | 106 | We can write a simple helper function to lex a string into a list of tokens, making sure to call 107 | @racket[port-count-lines!] to enable source location tracking: 108 | 109 | @(syntax-interaction 110 | (define (lex-simple str) 111 | (define in (open-input-string str)) 112 | (port-count-lines! in) 113 | (let loop ([v (simple-lexer in)]) 114 | (cond [(void? (position-token-token v)) (loop (simple-lexer in))] 115 | [(eof-object? (position-token-token v)) '()] 116 | [else (cons v (loop (simple-lexer in)))]))) 117 | (lex-simple "f(1, g(3, 4))")) 118 | 119 | Next, we can write a trivial parser to actually parse these tokens. Since we’ve written a lexer, most 120 | of the heavy lifting is already done, and we can just focus on assigning semantics: 121 | 122 | @(syntax-interaction 123 | (code:comment @#,elem{some wrappers around tokens that use @racket[syntax/p]}) 124 | (define number/p (syntax/p (token/p 'NUMBER))) 125 | (define identifier/p (syntax/p (token/p 'IDENTIFIER))) 126 | (code:comment @#,elem{a simple function invokation}) 127 | (define funcall/p 128 | (syntax/p 129 | (do [func <- identifier/p] 130 | (token/p 'OPEN-PAREN) 131 | [args <- (many/p expression/p #:sep (token/p 'COMMA))] 132 | (token/p 'CLOSE-PAREN) 133 | (pure (list* func args))))) 134 | (code:comment @#,elem{an expression can be a number or a function invokation}) 135 | (define expression/p 136 | (or/p number/p 137 | funcall/p))) 138 | 139 | Now, with our simple parser in place, we can actually parse arbitrary C-style function calls into 140 | S-expressions: 141 | 142 | @(syntax-interaction 143 | (define expr-stx 144 | (parse-result! (parse-tokens expression/p (lex-simple "f(1, g(3, 4))")))) 145 | expr-stx) 146 | 147 | As expected, the source locations for each datum will automatically be assigned to the resulting 148 | syntax object due to the use of @racket[syntax/p] on the base datums and around @racket[funcall/p]: 149 | 150 | @(syntax-interaction 151 | (syntax->list expr-stx)) 152 | 153 | In just a couple dozens lines of code, we’ve managed to implement a fairly robust parser that produces 154 | syntax objects ready to be handed off to Racket as the result of parsing a module body, which can be 155 | compiled into working Racket code. 156 | 157 | @(close-syntax!) 158 | -------------------------------------------------------------------------------- /megaparsack-doc/scribblings/megaparsack/util.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require (for-label data/functor 4 | data/applicative 5 | data/monad 6 | data/either 7 | megaparsack 8 | megaparsack/text 9 | megaparsack/parser-tools/lex 10 | parser-tools/lex 11 | (prefix-in : parser-tools/lex-sre) 12 | (except-in racket/base do map) 13 | racket/contract 14 | racket/function) 15 | (for-syntax racket/base) 16 | scribble/manual 17 | scribble/example 18 | syntax/parse/define) 19 | 20 | (provide (for-label (all-from-out data/functor 21 | data/applicative 22 | data/monad 23 | data/either 24 | megaparsack 25 | megaparsack/text 26 | megaparsack/parser-tools/lex 27 | parser-tools/lex 28 | parser-tools/lex-sre 29 | racket/base 30 | racket/contract 31 | racket/function)) 32 | reftech functech 33 | make-parser-eval parser-examples parser-interaction define-parser-interaction) 34 | 35 | (define (reftech . content) 36 | (apply tech #:doc '(lib "scribblings/reference/reference.scrbl") content)) 37 | 38 | (define (functech . content) 39 | (apply tech #:doc '(lib "scribblings/data/functional.scrbl") content)) 40 | 41 | (define (make-parser-eval [require-paths '()]) 42 | (let ([eval ((make-eval-factory '()))]) 43 | (eval '(require data/functor 44 | data/applicative 45 | data/monad 46 | data/either 47 | megaparsack 48 | megaparsack/text 49 | megaparsack/parser-tools/lex 50 | parser-tools/lex 51 | (prefix-in : parser-tools/lex-sre) 52 | (except-in racket/base do map) 53 | racket/function)) 54 | eval)) 55 | 56 | (define-syntax-rule (parser-examples body ...) 57 | (examples #:eval (make-parser-eval) body ...)) 58 | 59 | (define-syntax-rule (parser-interaction body ...) 60 | (parser-examples #:label #f body ...)) 61 | 62 | (define-simple-macro (define-parser-interaction interaction:id close-interaction!:id 63 | {~optional {~seq #:eval eval-id:id} #:defaults ([eval-id #'eval])}) 64 | (begin 65 | (define eval-id (make-parser-eval)) 66 | (define-syntax-rule (interaction body (... ...)) 67 | (examples #:eval eval-id #:label #f body (... ...))) 68 | (define (close-interaction!) 69 | (close-eval eval-id)))) 70 | -------------------------------------------------------------------------------- /megaparsack-lib/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '(["base" #:version "6.5"] 9 | "curly-fn-lib" 10 | ["functional-lib" #:version "0.6"] 11 | "match-plus")) 12 | (define build-deps 13 | '()) 14 | -------------------------------------------------------------------------------- /megaparsack-lib/megaparsack/base.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require racket/require 4 | (multi-in data [functor applicative monad either]) 5 | (prefix-in d: data/applicative) 6 | match-plus 7 | (multi-in racket [contract function generic list match port promise string]) 8 | (prefix-in r: racket/base) 9 | (submod data/applicative coerce-delayed) 10 | (for-syntax racket/base) 11 | syntax/parse/define) 12 | 13 | (provide lazy/p delay/p 14 | parser/c 15 | parser-parameter? 16 | make-parser-parameter 17 | parameterize/p 18 | (contract-out 19 | [struct syntax-box ([datum any/c] [srcloc srcloc?])] 20 | [struct message ([srcloc srcloc?] [unexpected any/c] [expected (listof string?)])] 21 | [rename parser?* parser? (any/c . -> . boolean?)] 22 | 23 | [rename parse-datum parse (parser?* (listof syntax-box?) . -> . (either/c message? any/c))] 24 | [parse-error->string (message? . -> . string?)] 25 | [parse-result! ((either/c message? any/c) . -> . any/c)] 26 | [struct (exn:fail:read:megaparsack exn:fail) 27 | ([message any/c] [continuation-marks any/c] [srclocs any/c] 28 | [unexpected any/c] [expected (listof string?)])] 29 | 30 | [void/p (parser/c any/c void?)] 31 | [or/p (parser?* parser?* ... . -> . parser?*)] 32 | [try/p (parser?* . -> . parser?*)] 33 | [noncommittal/p (parser?* . -> . parser?*)] 34 | [lookahead/p (parser?* . -> . parser?*)] 35 | [satisfy/p ((any/c . -> . any/c) . -> . parser?*)] 36 | [eof/p (parser/c any/c void?)] 37 | [syntax-box/p (parser?* . -> . (parser/c any/c syntax-box?))] 38 | [syntax/p (parser?* . -> . (parser/c any/c syntax?))] 39 | [label/p (string? parser?* . -> . parser?*)] 40 | [hidden/p (parser?* . -> . parser?*)] 41 | [fail/p (message? . -> . parser?*)] 42 | [guard/p (->* [parser?* (any/c . -> . any/c)] 43 | [(or/c string? #f) (any/c . -> . any/c)] 44 | parser?*)])) 45 | 46 | ;; supporting types 47 | ;; --------------------------------------------------------------------------------------------------- 48 | 49 | (struct syntax-box (datum srcloc) 50 | #:transparent 51 | #:methods gen:functor 52 | [(define/match* (map f (syntax-box datum srcloc)) 53 | (syntax-box (f datum) srcloc))]) 54 | 55 | (struct parser-input (position ; the number of tokens advanced past to get to this point 56 | last-loc ; the srcloc of the previous token 57 | tokens) ; the remaining sequence of tokens, in syntax boxes 58 | #:transparent) 59 | 60 | (struct consumed (reply) #:transparent) 61 | (struct empty (reply) #:transparent) 62 | 63 | (struct error (message) #:transparent) 64 | (struct ok (result rest message) #:transparent) 65 | 66 | ; For backwards-compatibility reasons, the `message` struct cannot gain new fields. Therefore, we use 67 | ; `message*` internally and `message` externally. 68 | (struct message (srcloc unexpected expected) #:transparent) 69 | (struct message* (position ; the number of tokens successfully parsed before this error was generated 70 | user? ; #t if generated by `fail/p`, otherwise #f 71 | srcloc 72 | unexpected 73 | expected) 74 | #:transparent) 75 | 76 | ; Note that `ok` replies sometimes include an error message. Why? Because if a later parser fails 77 | ; without consuming input, previous branches might have failed in the same location, which could 78 | ; contribute to the error. For example, consider: 79 | ; 80 | ; (do (or/p digit/p (pure #\0)) 81 | ; letter/p) 82 | ; 83 | ; Given the input "*", the `digit/p` parser will fail, and the second branch will be taken, which 84 | ; results in an `ok` response containing `#\0`. Next, the `letter/p` parser will fail, resulting in a 85 | ; parse error. If the `ok` response contains no error information, the best error we can produce is 86 | ; 87 | ; string:1:1: parse error 88 | ; unexpected: * 89 | ; expected: letter 90 | ; 91 | ; but this is not good, because a digit is also legal at that position. So we need to take care to 92 | ; preserve error information from failed branches, even if we produce a successful result. 93 | 94 | (define empty-srcloc (srcloc #f #f #f #f #f)) 95 | 96 | ;; core primitives 97 | ;; --------------------------------------------------------------------------------------------------- 98 | 99 | ; A parser is a procedure that obeys the following contract: 100 | ; 101 | ; (-> parser-input? 102 | ; parser-parameterization? 103 | ; (values (or/c consumed? empty?) 104 | ; parser-parameterization?)) 105 | ; 106 | ; The parser parameterization tracks user-defined parser state, and it is monadically threaded 107 | ; through each parser. 108 | 109 | (struct parser (proc) 110 | #:methods gen:functor 111 | [(define/generic -map map) 112 | (define (map f p) 113 | (parser 114 | (λ (input paramz) 115 | (define-values [result paramz*] ((parser-proc p) input paramz)) 116 | (values (match result 117 | [(consumed (ok v rest message)) (consumed (ok (-map f v) rest message))] 118 | [(empty (ok v rest message)) (empty (ok (-map f v) rest message))] 119 | [error error]) 120 | paramz*))))] 121 | 122 | #:methods gen:applicative 123 | [(define (pure _ x) 124 | (pure/p x)) 125 | 126 | (define (apply p ps) 127 | (do [f <- p] 128 | [xs <- (map/m values ps)] 129 | (d:pure (r:apply f xs))))] 130 | 131 | #:methods gen:monad 132 | [(define (chain f p) 133 | (parser 134 | (λ (input paramz) 135 | (define-values [result paramz*] ((parser-proc p) input paramz)) 136 | (match result 137 | [(empty (ok (syntax-box x _) rest message)) 138 | (define-values [result paramz**] (parse (f x) rest paramz*)) 139 | (values (merge-message/result message result) paramz**)] 140 | [(consumed (ok (syntax-box x srcloc) rest message-a)) 141 | (define-values [result* paramz**] (parse (f x) rest paramz*)) 142 | (values (consumed (match result* 143 | [(consumed (ok stx rest message-b)) 144 | (ok (merge-syntax-box/srcloc stx srcloc) rest (merge-messages message-a message-b))] 145 | [(empty (ok (syntax-box datum _) rest message-b)) 146 | (ok (syntax-box datum srcloc) rest (merge-messages message-a message-b))] 147 | [(consumed error) (merge-message/reply message-a error)] 148 | [(empty error) (merge-message/reply message-a error)])) 149 | paramz**)] 150 | [error (values error paramz*)]))))]) 151 | 152 | (define (parser?* v) 153 | (or (parser? v) (pure? v))) 154 | 155 | (define (make-pure-result x input) 156 | (empty (ok (syntax-box x empty-srcloc) input #f))) 157 | (define (make-stateless-parser proc) 158 | (parser (λ (input paramz) (values (proc input) paramz)))) 159 | (define (map-parser-result p proc) 160 | (parser 161 | (λ (input paramz) 162 | (define-values [result paramz*] (parse p input paramz)) 163 | (values (proc result) paramz*)))) 164 | 165 | (define (pure/p x) 166 | (make-stateless-parser (λ (input) (values (make-pure-result x input))))) 167 | (define void/p (pure/p (void))) 168 | (define coerce-parser 169 | (coerce-pure void/p)) 170 | 171 | (define (parse p input paramz) 172 | ((parser-proc (coerce-parser p)) input paramz)) 173 | 174 | ; these are used by `parser/c` to implement lazy input token contracts 175 | (define parse-prompt-tag (make-continuation-prompt-tag 'parse)) 176 | (define mark:parser-token-ctc-proj (make-continuation-mark-key 'parser-token-ctc-proj)) 177 | (define (current-parser-token-ctc-proj) 178 | (continuation-mark-set-first #f 179 | mark:parser-token-ctc-proj 180 | (λ (val) val) 181 | parse-prompt-tag)) 182 | 183 | (define (parse-syntax-box p input) 184 | (match-define-values [result _] (call-with-continuation-prompt 185 | (λ () (parse p input (hasheq))) 186 | parse-prompt-tag)) 187 | (match result 188 | [(or (consumed (ok x _ _)) 189 | (empty (ok x _ _))) 190 | (success x)] 191 | [(or (consumed (error (message* _ _ srcloc unexpected expected))) 192 | (empty (error (message* _ _ srcloc unexpected expected)))) 193 | (failure (message srcloc unexpected expected))])) 194 | 195 | (define (parse-datum p tokens) 196 | (map syntax-box-datum (parse-syntax-box p (parser-input 0 empty-srcloc tokens)))) 197 | 198 | (define/match* (parse-error->string (message srcloc unexpected expected)) 199 | (with-output-to-string 200 | (thunk (display (or (srcloc->string srcloc) "?")) 201 | (display ": parse error") 202 | 203 | (when (and (not unexpected) (null? expected)) 204 | (display ";") 205 | (newline) 206 | (display " unknown parse error")) 207 | 208 | (when unexpected 209 | (newline) 210 | (display " unexpected: ") 211 | (display unexpected)) 212 | 213 | (define organized-expected (sort (remove-duplicates expected) string<=?)) 214 | (unless (null? organized-expected) 215 | (newline) 216 | (display " expected: ") 217 | (display (if (< (length organized-expected) 3) 218 | (string-join organized-expected " or ") 219 | (string-join organized-expected ", " #:before-last ", or "))))))) 220 | 221 | (struct exn:fail:read:megaparsack exn:fail:read (unexpected expected) #:transparent) 222 | 223 | (define/match (parse-result! result) 224 | [((success result)) result] 225 | [((failure (and message (message srcloc unexpected expected)))) 226 | (raise (exn:fail:read:megaparsack (parse-error->string message) 227 | (current-continuation-marks) 228 | (list srcloc) unexpected expected))]) 229 | 230 | ;; syntax object utilities 231 | ;; --------------------------------------------------------------------------------------------------- 232 | 233 | (define some-original-syntax (read-syntax #f (open-input-string "()"))) 234 | 235 | (define/match* (syntax-box->syntax (syntax-box datum (srcloc name line col pos span))) 236 | (datum->syntax #f datum (list name line col pos span) some-original-syntax)) 237 | 238 | (define (merge-srclocs srcloc-a srcloc-b) 239 | (match (sort (list srcloc-a srcloc-b) < #:key (λ (x) (or (srcloc-position x) -1))) 240 | [(list (srcloc name-a line-a col-a pos-a span-a) (srcloc name-b line-b col-b pos-b span-b)) 241 | (srcloc (or name-a name-b) 242 | (or line-a line-b) 243 | (or col-a col-b) 244 | (or pos-a pos-b) 245 | (cond [(and pos-a pos-b span-b) (+ (- pos-b pos-a) span-b)] 246 | [(and pos-a span-a) span-a] 247 | [(and pos-b span-b) span-b] 248 | [else (or span-a span-b)]))])) 249 | 250 | (define/match* (merge-syntax-box/srcloc (syntax-box datum srcloc-b) srcloc-a) 251 | (syntax-box datum (merge-srclocs srcloc-a srcloc-b))) 252 | 253 | ;; error message reconciliation 254 | ;; --------------------------------------------------------------------------------------------------- 255 | 256 | (define (merge-messages message-a message-b) 257 | (match* {message-a message-b} 258 | [{_ #f} message-a] 259 | [{#f _} message-b] 260 | [{(message* pos-a user?-a loc-a unexpected-a expected-a) 261 | (message* pos-b user?-b loc-b unexpected-b expected-b)} 262 | (cond 263 | ; if the errors don’t occur at the same location, pick the one that made more progress 264 | [(> pos-a pos-b) message-a] 265 | [(< pos-a pos-b) message-b] 266 | ; if one was user-generated and the other wasn’t, pick the user-generated one 267 | [(and user?-a (not user?-b)) message-a] 268 | [(and user?-b (not user?-a)) message-b] 269 | ; otherwise, combine information from both errors 270 | [else 271 | (message* pos-a user?-a (merge-srclocs loc-a loc-b) (or unexpected-a unexpected-b) (append expected-a expected-b))])])) 272 | 273 | (define (merge-message/reply message-a reply) 274 | (match reply 275 | [(ok x rest message-b) (ok x rest (merge-messages message-a message-b))] 276 | [(error message-b) (error (merge-messages message-a message-b))])) 277 | 278 | (define (merge-message/result message result) 279 | (match result 280 | [(empty reply) (empty (merge-message/reply message reply))] 281 | [(consumed reply) (consumed (merge-message/reply message reply))])) 282 | 283 | ;; laziness (lazy/p, delay/p) 284 | ;; --------------------------------------------------------------------------------------------------- 285 | 286 | ;; Not publicly documented, but provided for backwards compatibility. 287 | ;; Will likely be removed eventually in favor of `delay/p`. 288 | (define-simple-macro (lazy/p p:expr) 289 | (parser (λ (input paramz) (parse p input paramz)))) 290 | 291 | (define-simple-macro (delay/p p) 292 | #:declare p (expr/c #'parser?*) 293 | (let ([pp (delay p.c)]) 294 | (parser (λ (input paramz) (parse (force pp) input paramz))))) 295 | 296 | ;; choice (or/p) 297 | ;; --------------------------------------------------------------------------------------------------- 298 | 299 | ; binary version of or/p 300 | (define (p . . q) 301 | (parser 302 | (λ (input paramz) 303 | (define-values [result paramz*] (parse p input paramz)) 304 | (match result 305 | [(empty (error message)) 306 | (define-values [result paramz*] (parse q input paramz)) 307 | (values (merge-message/result message result) paramz*)] 308 | [other (values other paramz*)])))) 309 | 310 | (define (or/p . ps) 311 | (let-values ([(ps p) (split-at ps (sub1 (length ps)))]) 312 | (foldr (first p) ps))) 313 | 314 | ;; lookahead (try/p, noncommittal/p, lookahead/p) 315 | ;; --------------------------------------------------------------------------------------------------- 316 | 317 | (define (try/p p) 318 | (map-parser-result 319 | p (match-lambda 320 | [(consumed (error message)) (empty (error message))] 321 | [other other]))) 322 | 323 | (define (noncommittal/p p) 324 | (map-parser-result 325 | p (match-lambda 326 | [(consumed (? ok? reply)) (empty reply)] 327 | [other other]))) 328 | 329 | (define (lookahead/p p) 330 | (parser 331 | (λ (input paramz) 332 | (define-values [result paramz*] (parse p input paramz)) 333 | (values (match result 334 | [(consumed (ok x _ _)) (empty (ok x input #f))] 335 | [(empty (ok x _ _)) (empty (ok x input #f))] 336 | [error error]) 337 | paramz*)))) 338 | 339 | ;; conditional (satisfy/p) 340 | ;; --------------------------------------------------------------------------------------------------- 341 | 342 | (define (satisfy/p proc) 343 | (make-stateless-parser 344 | (match-lambda 345 | [(parser-input pos last-loc tokens) 346 | (match tokens 347 | [(cons (syntax-box (app (current-parser-token-ctc-proj) c) loc) cs) 348 | (if (proc c) 349 | (consumed (ok (syntax-box c loc) (parser-input (add1 pos) loc cs) #f)) 350 | (empty (error (message* pos #f loc c '()))))] 351 | ['() 352 | (empty (error (message* pos #f last-loc "end of input" '())))])]))) 353 | 354 | ;; termination (eof/p) 355 | ;; --------------------------------------------------------------------------------------------------- 356 | 357 | (define eof/p 358 | (make-stateless-parser 359 | (match-lambda 360 | [(parser-input pos _ tokens) 361 | (match tokens 362 | ['() 363 | (make-pure-result (void) '())] 364 | [(cons (syntax-box (app (current-parser-token-ctc-proj) c) loc) _) 365 | (empty (error (message* pos #f loc c '("end of input"))))])]))) 366 | 367 | ;; source location reification (syntax-box/p & syntax/p) 368 | ;; --------------------------------------------------------------------------------------------------- 369 | 370 | (define (syntax-box/p p) 371 | (map-parser-result 372 | p (match-lambda 373 | [(consumed (ok {and box (syntax-box _ srcloc)} rest message)) 374 | (consumed (ok (syntax-box box srcloc) rest message))] 375 | [(empty (ok {and box (syntax-box _ srcloc)} rest message)) 376 | (empty (ok (syntax-box box srcloc) rest message))] 377 | [error error]))) 378 | 379 | (define (syntax/p p) 380 | (map syntax-box->syntax (syntax-box/p p))) 381 | 382 | ;; parser annotation (label/p & hidden/p) 383 | ;; --------------------------------------------------------------------------------------------------- 384 | 385 | (define/match* (expect pos-a expected (and message (message* pos-b user? srcloc unexpected _))) 386 | (if (= pos-a pos-b) 387 | (message* pos-a user? srcloc unexpected (list expected)) 388 | message)) 389 | 390 | (define/match* (hide pos-a (and message (message* pos-b user? srcloc unexpected _))) 391 | (if (= pos-a pos-b) 392 | (message* pos-a user? srcloc unexpected '()) 393 | message)) 394 | 395 | (define (label/p str p) 396 | (parser 397 | (λ (input paramz) 398 | (define pos (parser-input-position input)) 399 | (define-values [result paramz*] (parse p input paramz)) 400 | (values (match result 401 | [(empty (error message)) (empty (error (expect pos str message)))] 402 | [(empty (ok x rest message)) (empty (ok x rest (expect pos str message)))] 403 | [consumed consumed]) 404 | paramz*)))) 405 | 406 | (define (hidden/p p) 407 | (parser 408 | (λ (input paramz) 409 | (define pos (parser-input-position input)) 410 | (define-values [result paramz*] (parse p input paramz)) 411 | (values (match result 412 | [(empty (error message)) (empty (error (hide pos message)))] 413 | [(empty (ok x rest message)) (empty (ok x rest (hide pos message)))] 414 | [consumed consumed]) 415 | paramz*)))) 416 | 417 | ;; custom failure messages (fail/p, guard/p) 418 | ;; --------------------------------------------------------------------------------------------------- 419 | 420 | (define/match* (fail/p (message srcloc unexpected expected)) 421 | (make-stateless-parser 422 | (λ (input) (empty (error (message* (parser-input-position input) #t srcloc unexpected expected)))))) 423 | 424 | ; Providing guard/p as a primitive rather than as a derived concept is somewhat unsatisfying. In 425 | ; previous versions of this library, it /was/ implemented as a derived combinator, with the following 426 | ; implementation: 427 | ; 428 | ; (define (guard/p p pred? [expected #f] [mk-unexpected values]) 429 | ; (do [s <- (syntax-box/p p)] 430 | ; (define v (syntax-box-datum s)) 431 | ; (if (pred? v) 432 | ; (pure v) 433 | ; (fail/p (message (syntax-box-srcloc s) 434 | ; (mk-unexpected v) 435 | ; (if expected (list expected) '())))))) 436 | ; 437 | ; Sadly, changes to the way `label/p` chooses to adjust the list of expected values makes this 438 | ; implementation no longer satisfactory. The issue is that the token position of the error produced by 439 | ; `fail/p` is after the tokens consumed by the enclosed parser. This causes `label/p` to consider the 440 | ; parse error as more specific than the one it would generate, so it declines to modify it. 441 | ; 442 | ; This really reveals a deeper issue with the way megaparsack answers the question of which errors 443 | ; made the most progress. If an error from a different branch were to occur in the middle of the token 444 | ; stream consumed by the enclosed parser, megaparsack would select it over the `guard/p`-generated 445 | ; error, since `guard/p` reports its error at the /start/ of that sequence of tokens. This is not 446 | ; really right---a more correct approach would be to record the whole span of tokens that `guard/p` 447 | ; wraps. But it’s unclear how much this really matters, and that still wouldn’t make it any easier to 448 | ; define `guard/p` as a derived concept, so for now, the imperfect implementation remains. 449 | 450 | (define (guard/p p pred? [expected #f] [mk-unexpected values]) 451 | (define (guard-reply pos reply) 452 | (match reply 453 | [(ok (syntax-box x loc) rest _) 454 | #:when (not (pred? x)) 455 | (error (message* pos #t loc (mk-unexpected x) (if expected (list expected) '())))] 456 | [other other])) 457 | (parser 458 | (λ (input paramz) 459 | (define pos (parser-input-position input)) 460 | (define-values [result paramz*] (parse p input paramz)) 461 | (values (match result 462 | [(empty reply) (empty (guard-reply pos reply))] 463 | [(consumed reply) (consumed (guard-reply pos reply))]) 464 | paramz*)))) 465 | 466 | ;; contracts (parser/c) 467 | ;; --------------------------------------------------------------------------------------------------- 468 | 469 | (define-syntax-parser parser/c 470 | [(head in-ctc out-ctc) 471 | (define ctc-tag (gensym 'ctc)) 472 | (syntax-property 473 | (quasisyntax/loc this-syntax 474 | (parser/c-proc 475 | #,(syntax-property #'in-ctc 'racket/contract:negative-position ctc-tag) 476 | #,(syntax-property #'out-ctc 'racket/contract:positive-position ctc-tag))) 477 | 'racket/contract:contract 478 | (vector ctc-tag (list #'head) '()))] 479 | [(head form ...) 480 | (syntax-property (quasisyntax/loc this-syntax 481 | (parser/c-proc form ...)) 482 | 'racket/contract:contract 483 | (vector (gensym 'ctc) (list #'head) '()))] 484 | [head:id 485 | (syntax-property (quasisyntax/loc this-syntax 486 | parser/c-proc) 487 | 'racket/contract:contract 488 | (vector (gensym 'ctc) (list #'head) '()))]) 489 | 490 | ; We define `parser/c` as a custom contract, which allows us to ensure that contracts on input tokens 491 | ; are applied /lazily/, when the tokens are actually consumed. If we didn’t do this, each individual 492 | ; parser would apply its input contract to every remaining token in the input stream, which is both 493 | ; unhelpful and extraordinarily slow. 494 | ; 495 | ; To keep contract checking minimal, `parser/c` does not directly enforce input contracts. Instead, it 496 | ; stores a contract projection in the `mark:parser-token-ctc-proj` continuation mark. The contract 497 | ; projection is only actually applied to input tokens as needed by primitive parser constructors like 498 | ; `satisfy/p`. 499 | ; 500 | ; This is all rather subtle, since it means all primitive parsers must individually take care to apply 501 | ; the contract projection before inspecting the token stream. Fortunately, the only parsers that 502 | ; actually inspect tokens directly are `satisfy/p` and `eof/p`, so the contract checking machinery 503 | ; can be localized there. 504 | 505 | (define (parser/c-proc in-ctc out-ctc) 506 | (let ([in-ctc (coerce-contract 'parser/c in-ctc)] 507 | [out-ctc (coerce-contract 'parser/c out-ctc)]) 508 | (define in-proj (contract-late-neg-projection in-ctc)) 509 | (define out-proj (contract-late-neg-projection out-ctc)) 510 | (define pure-out-proj (contract-late-neg-projection (pure/c out-ctc))) 511 | 512 | (define chaperone? (and (chaperone-contract? in-ctc) 513 | (chaperone-contract? out-ctc))) 514 | ((if chaperone? make-chaperone-contract make-contract) 515 | #:name (build-compound-type-name 'parser/c in-ctc out-ctc) 516 | #:first-order parser?* 517 | #:late-neg-projection 518 | (λ (blame) 519 | (define in-elem-proj (in-proj (blame-add-context blame "the input to" #:swap? #t))) 520 | (define out-blame (blame-add-context blame "the result of")) 521 | (define out-elem-proj (out-proj out-blame)) 522 | (define pure-out-elem-proj (pure-out-proj out-blame)) 523 | (λ (val missing-party) 524 | (cond 525 | [(pure? val) 526 | (pure-out-elem-proj val missing-party)] 527 | [(parser? val) 528 | ((if chaperone? chaperone-struct impersonate-struct) 529 | val parser-proc 530 | (λ (self parse-proc) 531 | (define (wrap-ok x loc rest message) 532 | (ok (syntax-box (out-elem-proj x missing-party) loc) rest message)) 533 | ((if chaperone? chaperone-procedure impersonate-procedure) 534 | parse-proc 535 | (λ (input paramz) 536 | (define old-ctc-proj (current-parser-token-ctc-proj)) 537 | (values (λ (result paramz) 538 | (values (match result 539 | [(consumed (ok (syntax-box x loc) rest message)) 540 | (consumed (wrap-ok x loc rest message))] 541 | [(empty (ok (syntax-box x loc) rest message)) 542 | (empty (wrap-ok x loc rest message))] 543 | [error error]) 544 | paramz)) 545 | 'mark mark:parser-token-ctc-proj 546 | (λ (val) (in-elem-proj (old-ctc-proj val) missing-party)) 547 | input paramz)))))] 548 | [else 549 | (raise-blame-error blame val #:missing-party missing-party 550 | '(expected: "parser?" given: "~e") val)])))))) 551 | 552 | ;; state (make-parser-parameter, parameterize/p) 553 | ;; --------------------------------------------------------------------------------------------------- 554 | 555 | (struct parser-parameter (initial-value) 556 | #:constructor-name make-parser-parameter 557 | #:authentic ; we look up parameters in the parameterization by `eq?`, so no contracts allowed 558 | #:property prop:procedure 559 | (case-lambda 560 | [(self) 561 | (parser 562 | (λ (input paramz) 563 | (define value (hash-ref paramz self (λ () (parser-parameter-initial-value self)))) 564 | (values (make-pure-result value input) paramz)))] 565 | [(self value) 566 | (parser 567 | (λ (input paramz) 568 | (values (make-pure-result (void) input) 569 | (hash-set paramz self value))))])) 570 | 571 | (define-syntax-parser parameterize/p 572 | [(_ ([param val:expr] ...) p) 573 | #:declare param (expr/c #'parser-parameter? #:name "parameter") 574 | #:declare p (expr/c #'parser? #:name "body parser") 575 | (for/fold ([body #'p.c]) 576 | ([param (in-list (reverse (attribute param.c)))] 577 | [val (in-list (reverse (attribute val)))]) 578 | (quasisyntax/loc this-syntax 579 | (parameterize-one/p #,body #,param #,val)))]) 580 | 581 | (define (parameterize-one/p p param val) 582 | (do [old-val <- (param)] 583 | (param val) 584 | [result <- p] 585 | (param old-val) 586 | (pure result))) 587 | -------------------------------------------------------------------------------- /megaparsack-lib/megaparsack/combinator.rkt: -------------------------------------------------------------------------------- 1 | #lang curly-fn racket/base 2 | 3 | (require data/applicative 4 | data/monad 5 | megaparsack/base 6 | racket/contract 7 | racket/list) 8 | 9 | (provide (contract-out 10 | [many/p (->* [parser?] 11 | [#:sep parser? 12 | #:min exact-nonnegative-integer? 13 | #:max (or/c exact-nonnegative-integer? +inf.0)] 14 | (parser/c any/c list?))] 15 | [many+/p (->* [parser?] 16 | [#:sep parser? 17 | #:max (or/c exact-nonnegative-integer? +inf.0)] 18 | (parser/c any/c list?))] 19 | [repeat/p (exact-nonnegative-integer? parser? . -> . (parser/c any/c list?))] 20 | 21 | [many-until/p (->* [parser? 22 | #:end parser?] 23 | [#:sep parser? 24 | #:min exact-nonnegative-integer?] 25 | (parser/c any/c (list/c list? any/c)))] 26 | [many+-until/p (->* [parser? 27 | #:end parser?] 28 | [#:sep parser?] 29 | (parser/c any/c (list/c list? any/c)))] 30 | 31 | [many*/p (parser? . -> . parser?)] 32 | [many/sep*/p (parser? parser? . -> . parser?)] 33 | [many/sep+/p (parser? parser? . -> . parser?)] 34 | 35 | [==/p (->* [any/c] [(any/c any/c . -> . any/c)] parser?)] 36 | [one-of/p (->* [list?] [(any/c any/c . -> . any/c)] parser?)] 37 | [list/p (->* [] 38 | [#:sep parser?] 39 | #:rest (listof parser?) 40 | (parser/c any/c list?))])) 41 | 42 | (define (==/p v [=? equal?]) 43 | (label/p (format "~a" v) 44 | (satisfy/p #{=? v %}))) 45 | 46 | (define (one-of/p vs [=? equal?]) 47 | (apply or/p (map #{==/p % =?} vs))) 48 | 49 | (define (list/p #:sep [sep void/p] . ps) 50 | (cond [(empty? ps) (pure '())] 51 | [(empty? (rest ps)) ((pure list) (first ps))] 52 | [else 53 | ((pure list*) (do [v <- (first ps)] sep (pure v)) 54 | (apply list/p #:sep sep (rest ps)))])) 55 | 56 | (define (many/p p 57 | #:sep [sep-p void/p] 58 | #:min [min-count 0] 59 | #:max [max-count +inf.0]) 60 | (define (loop-mandatory p [min-left min-count] 61 | #:recur-parser [recur p]) 62 | (if (zero? min-left) 63 | (loop-optional p #:recur-parser recur) 64 | ((pure cons) p (lazy/p (loop-mandatory recur (sub1 min-left)))))) 65 | 66 | (define (loop-optional p [max-left (- max-count min-count)] 67 | #:recur-parser [recur p]) 68 | (if (<= max-left 0) 69 | (pure '()) 70 | (or/p ((pure cons) p (lazy/p (loop-optional recur (sub1 max-left)))) 71 | (pure '())))) 72 | 73 | (loop-mandatory p #:recur-parser (do sep-p p))) 74 | 75 | (define (many+/p p #:sep [sep void/p] #:max [max-count +inf.0]) 76 | (many/p p #:sep sep #:min 1 #:max max-count)) 77 | 78 | (define (repeat/p n p) (many/p p #:min n #:max n)) 79 | 80 | (define (many-until/p p #:end end-p 81 | #:sep [sep-p void/p] 82 | #:min [min-count 0]) 83 | (define (loop-mandatory p 84 | [results '()] 85 | [min-left min-count] 86 | #:recur-parser [recur p]) 87 | (if (zero? min-left) 88 | (loop-optional p results #:recur-parser recur) 89 | (do [result <- p] 90 | (loop-mandatory recur (cons result results) (sub1 min-left))))) 91 | 92 | (define (loop-optional p results #:recur-parser [recur p]) 93 | (or/p (do [end-result <- end-p] 94 | (pure (list (reverse results) end-result))) 95 | (do [result <- p] 96 | (loop-optional recur (cons result results))))) 97 | 98 | (loop-mandatory p #:recur-parser (do sep-p p))) 99 | 100 | (define (many+-until/p p #:end end-p #:sep [sep-p void/p]) 101 | (many-until/p p #:end end-p #:sep sep-p #:min 1)) 102 | 103 | ;; These combinators are special cases of many/p that predated many/p 104 | 105 | (define (many*/p p) (many/p p)) 106 | (define (many/sep*/p p sep) (many/p p #:sep sep)) 107 | (define (many/sep+/p p sep) (many+/p p #:sep sep)) 108 | -------------------------------------------------------------------------------- /megaparsack-lib/megaparsack/main.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require megaparsack/base 4 | megaparsack/combinator) 5 | 6 | (provide (all-from-out megaparsack/base 7 | megaparsack/combinator)) 8 | -------------------------------------------------------------------------------- /megaparsack-lib/megaparsack/text.rkt: -------------------------------------------------------------------------------- 1 | #lang curly-fn racket/base 2 | 3 | (require data/applicative 4 | data/monad 5 | megaparsack/base 6 | megaparsack/combinator 7 | racket/contract 8 | racket/list 9 | racket/function) 10 | 11 | (provide (contract-out 12 | [parse-string (->* [parser? string?] [any/c] any/c)] 13 | [parse-syntax-string (-> parser? (syntax/c string?) any/c)] 14 | [char/p (char? . -> . (parser/c char? char?))] 15 | [char-not/p (char? . -> . (parser/c char? char?))] 16 | [char-ci/p (char? . -> . (parser/c char? char?))] 17 | [char-between/p (char? char? . -> . (parser/c char? char?))] 18 | [char-in/p (string? . -> . (parser/c char? char?))] 19 | [char-not-in/p (string? . -> . (parser/c char? char?))] 20 | [any-char/p (parser/c char? char?)] 21 | [letter/p (parser/c char? char?)] 22 | [digit/p (parser/c char? char?)] 23 | [symbolic/p (parser/c char? char?)] 24 | [space/p (parser/c char? char?)] 25 | [integer/p (parser/c char? integer?)] 26 | [string/p (string? . -> . (parser/c char? string?))] 27 | [string-ci/p (string? . -> . (parser/c char? string?))])) 28 | 29 | (define (chars->syntax-boxes chars name pos line col) 30 | (if (empty? chars) 31 | '() 32 | (let ([c (first chars)]) 33 | (cons (syntax-box c (srcloc name line col pos 1)) 34 | (if (char=? c #\newline) 35 | (chars->syntax-boxes (rest chars) name (add1 pos) (add1 line) 0) 36 | (chars->syntax-boxes (rest chars) name (add1 pos) line (add1 col))))))) 37 | 38 | (define (parse-string p input [name 'string]) 39 | (parse p (chars->syntax-boxes (string->list input) name 1 1 0))) 40 | 41 | (define (parse-syntax-string p stx-string) 42 | (parse p (chars->syntax-boxes (string->list (syntax->datum stx-string)) 43 | (syntax-source stx-string) 44 | (syntax-position stx-string) 45 | (syntax-line stx-string) 46 | (syntax-column stx-string)))) 47 | 48 | (define (char/p c) (label/p (format "'~a'" c) (satisfy/p #{char=? c}))) 49 | (define (char-not/p c) (label/p (format "not '~a'" c) (satisfy/p #{char=/? c}))) 50 | (define (char-ci/p c) (label/p (format "'~a'" c) (satisfy/p #{char-ci=? c}))) 51 | (define letter/p (label/p "letter" (satisfy/p char-alphabetic?))) 52 | (define digit/p (label/p "number" (satisfy/p char-numeric?))) 53 | (define symbolic/p (label/p "symbolic" (satisfy/p char-symbolic?))) 54 | (define space/p (label/p "whitespace" (satisfy/p (disjoin char-whitespace? char-blank?)))) 55 | (define any-char/p (label/p "any character" (satisfy/p (lambda (_) #t)))) 56 | 57 | (define integer/p 58 | (label/p "integer" 59 | (do [digits <- (many+/p digit/p)] 60 | (pure (string->number (apply string digits)))))) 61 | 62 | (define (string/p str) 63 | (define len (string-length str)) 64 | (if (zero? len) 65 | (pure "") 66 | (label/p str 67 | (let loop ([i 0]) 68 | (if (< i len) 69 | (do (char/p (string-ref str i)) 70 | (loop (add1 i))) 71 | (pure str)))))) 72 | 73 | (define (string-ci/p str) 74 | (define len (string-length str)) 75 | (if (zero? len) 76 | (pure "") 77 | (label/p str 78 | (let loop ([i 0] [cs '()]) 79 | (if (< i len) 80 | (do [c <- (char-ci/p (string-ref str i))] 81 | (loop (add1 i) (cons c cs))) 82 | (pure (list->string (reverse cs)))))))) 83 | 84 | (define (char-between/p low high) 85 | (label/p (format "a character between '~a' and '~a'" low high) 86 | (satisfy/p #{char<=? low % high}))) 87 | 88 | (define (char-in/p str) 89 | (apply or/p (map char/p (string->list str)))) 90 | 91 | (define (char-not-in/p str) 92 | (satisfy/p #{string-not-member? str})) 93 | 94 | ;;; 95 | ;; char and string utility functions 96 | (define (char=/? c k) 97 | (not (char=? c k))) 98 | 99 | (define (char-ci=/? c k) 100 | (not (char-ci=? c k))) 101 | 102 | (define (string-member? str c) 103 | (for/or ([k str]) 104 | (char=? k c))) 105 | 106 | (define (string-not-member? str c) 107 | (not (string-member? str c))) 108 | -------------------------------------------------------------------------------- /megaparsack-parser-tools/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '("base" 9 | "functional-lib" 10 | "megaparsack-lib" 11 | "parser-tools-lib")) 12 | (define build-deps 13 | '()) 14 | -------------------------------------------------------------------------------- /megaparsack-parser-tools/megaparsack/parser-tools/lex.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require data/monad 4 | data/applicative 5 | megaparsack/base 6 | (prefix-in lex: parser-tools/lex) 7 | racket/contract 8 | racket/match) 9 | 10 | (provide (contract-out [parse-tokens ([parser? (listof lex:position-token?)] [any/c] . ->* . any/c)] 11 | [token/p (symbol? . -> . parser?)])) 12 | 13 | (define (parse-tokens parser toks [srcname 'tokens]) 14 | (parse parser (map (position-token->parser-token srcname) toks))) 15 | 16 | (define ((position-token->parser-token source-name) pos-token) 17 | (match pos-token 18 | [(lex:position-token tok (lex:position offset-a line col) (lex:position offset-b _ _)) 19 | (syntax-box tok (srcloc source-name line col offset-a (- offset-b offset-a)))])) 20 | 21 | (define (token/p name) 22 | (label/p 23 | (symbol->string name) 24 | (do [tok <- (satisfy/p (λ (x) (and (or (lex:token? x) (symbol? x)) 25 | (equal? (lex:token-name x) name))))] 26 | (pure (lex:token-value tok))))) 27 | -------------------------------------------------------------------------------- /megaparsack-parser/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '("base" 9 | "collections-lib" 10 | "curly-fn-lib" 11 | "functional-lib" 12 | "megaparsack-lib")) 13 | (define build-deps 14 | '()) 15 | -------------------------------------------------------------------------------- /megaparsack-parser/megaparsack/parser/json.rkt: -------------------------------------------------------------------------------- 1 | #lang curly-fn racket/base 2 | 3 | (require racket/require 4 | (multi-in data [collection applicative monad]) 5 | (multi-in megaparsack [base combinator text])) 6 | 7 | (provide parse-json-string) 8 | 9 | (define spaces/p (many*/p (hidden/p space/p))) 10 | 11 | (define positive-decimal/p 12 | (or/p (try/p (do [i <- (or/p integer/p (pure 0))] 13 | (char/p #\.) 14 | [fs <- (many+/p digit/p)] 15 | (define f (string->number (list->string fs))) 16 | (pure (+ i (/ f (expt 10 (length fs))))))) 17 | integer/p)) 18 | 19 | (define exponent/p 20 | (do (char-ci/p #\e) 21 | [negate? <- (or/p (do (char/p #\+) (pure #f)) 22 | (do (char/p #\-) (pure #t)) 23 | (pure #f))] 24 | [i <- integer/p] 25 | (pure (if negate? (- i) i)))) 26 | 27 | (define positive-number/p 28 | (do [n <- positive-decimal/p] 29 | [exp <- (or/p (try/p exponent/p) 30 | (pure 0))] 31 | (pure (* n (expt 10 exp))))) 32 | 33 | (define number/p 34 | (label/p 35 | "number" 36 | (or/p (do (char/p #\-) 37 | [n <- positive-number/p] 38 | (pure (- n))) 39 | positive-number/p))) 40 | 41 | (define hex-digit/p 42 | (or/p (do (char/p #\0) (pure 0)) 43 | (do (char/p #\1) (pure 1)) 44 | (do (char/p #\2) (pure 2)) 45 | (do (char/p #\3) (pure 3)) 46 | (do (char/p #\4) (pure 4)) 47 | (do (char/p #\5) (pure 5)) 48 | (do (char/p #\6) (pure 6)) 49 | (do (char/p #\7) (pure 7)) 50 | (do (char/p #\8) (pure 8)) 51 | (do (char/p #\9) (pure 9)) 52 | (do (char-ci/p #\a) (pure 10)) 53 | (do (char-ci/p #\b) (pure 11)) 54 | (do (char-ci/p #\c) (pure 12)) 55 | (do (char-ci/p #\d) (pure 13)) 56 | (do (char-ci/p #\e) (pure 14)) 57 | (do (char-ci/p #\f) (pure 15)))) 58 | 59 | (define string-char-or-escape/p 60 | (or/p (do (char/p #\\) 61 | (or/p (char/p #\") 62 | (char/p #\\) 63 | (char/p #\/) 64 | (do (char/p #\b) (pure #\backspace)) 65 | (do (char/p #\f) (pure #\page)) 66 | (do (char/p #\n) (pure #\newline)) 67 | (do (char/p #\r) (pure #\return)) 68 | (do (char/p #\t) (pure #\tab)) 69 | (do (char/p #\u) 70 | [ns <- (repeat/p 4 hex-digit/p)] 71 | (define code (foldl #{+ %2 (* 16 %1)} 0 ns)) 72 | (pure (integer->char code))))) 73 | (satisfy/p #{not (char=? #\" %)}))) 74 | 75 | (define quoted-string/p 76 | (label/p 77 | "string" 78 | (do (char/p #\") 79 | [chars <- (many*/p string-char-or-escape/p)] 80 | (char/p #\") 81 | (pure (list->string chars))))) 82 | 83 | (define boolean/p 84 | (label/p 85 | "boolean" 86 | (or/p (do (string/p "true") (pure #t)) 87 | (do (string/p "false") (pure #f))))) 88 | 89 | (define null/p 90 | (do (string/p "null") (pure 'null))) 91 | 92 | (define object-pair/p 93 | (do spaces/p 94 | [k <- quoted-string/p] 95 | spaces/p 96 | (char/p #\:) 97 | spaces/p 98 | [v <- value/p] 99 | spaces/p 100 | (pure (cons (string->symbol k) v)))) 101 | 102 | (define object/p 103 | (label/p 104 | "object" 105 | (do (char/p #\{) 106 | [pairs <- (many/sep*/p object-pair/p (char/p #\,))] 107 | spaces/p 108 | (char/p #\}) 109 | (pure (make-immutable-hasheq (sequence->list pairs)))))) 110 | 111 | (define array/p 112 | (label/p 113 | "array" 114 | (do (char/p #\[) 115 | [elems <- (many/sep*/p value/p (try/p (do spaces/p (char/p #\,))))] 116 | spaces/p 117 | (char/p #\]) 118 | (pure elems)))) 119 | 120 | (define value/p 121 | (do spaces/p 122 | (or/p number/p 123 | quoted-string/p 124 | boolean/p 125 | null/p 126 | object/p 127 | array/p))) 128 | 129 | (define entire-value/p 130 | (do [v <- value/p] 131 | spaces/p 132 | eof/p 133 | (pure v))) 134 | 135 | (define (parse-json-string str [name 'json]) 136 | (parse-string entire-value/p str name)) 137 | -------------------------------------------------------------------------------- /megaparsack-test/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '()) 9 | (define build-deps 10 | '("base" 11 | "curly-fn-lib" 12 | "functional-lib" 13 | "megaparsack-lib" 14 | "megaparsack-parser" 15 | "rackunit-lib" 16 | "rackunit-spec")) 17 | -------------------------------------------------------------------------------- /megaparsack-test/tests/megaparsack/base.rkt: -------------------------------------------------------------------------------- 1 | #lang curly-fn racket/base 2 | 3 | (require data/applicative 4 | data/either 5 | data/monad 6 | megaparsack 7 | megaparsack/text 8 | rackunit 9 | rackunit/spec) 10 | 11 | (describe "or/p" 12 | (context "when no parsers consume input" 13 | (it "succeeds with the first successful parse" 14 | (check-equal? (parse-string (or/p (do eof/p (pure 'eof)) letter/p) "") 15 | (success 'eof)))) 16 | 17 | (it "fails when a parser consumes input and fails" 18 | (check-equal? (parse-string (or/p (do letter/p letter/p) 19 | (pure 'eof)) 20 | "a") 21 | (failure (message (srcloc 'string 1 0 1 1) "end of input" '("letter"))))) 22 | 23 | (it "preserves errors from other branches on success" 24 | (check-equal? (parse-string (do (or/p digit/p (pure #\0)) letter/p) "*") 25 | (failure (message (srcloc 'string 1 0 1 1) #\* '("number" "letter")))) 26 | (check-equal? (parse-string (do (many/p (char/p #\r)) eof/p) "ra") 27 | (failure (message (srcloc 'string 1 1 2 1) #\a '("'r'" "end of input")))))) 28 | 29 | (describe "fail/p" 30 | (it "always fails" 31 | (let* ([loc (srcloc #f #f #f #f #f)] 32 | [msg (message loc 33 | "something" 34 | (list "something else"))] 35 | [p (fail/p msg)]) 36 | (check-equal? (parse-string p "") 37 | (failure msg))))) 38 | 39 | (describe "noncommittal/p" 40 | (it "allows backtracking as if input was not consumed upon success" 41 | (check-equal? (parse-string (many/p letter/p #:sep (noncommittal/p space/p)) 42 | "a b c d .") 43 | (success '(#\a #\b #\c #\d)))) 44 | 45 | (it "still consumes input on failure" 46 | (check-equal? (parse-string (or/p (noncommittal/p (string/p "ab")) 47 | (char/p #\a)) 48 | "a") 49 | (failure (message (srcloc 'string 1 0 1 1) "end of input" '("'b'")))))) 50 | 51 | (describe "lookahead/p" 52 | (it "succeeds without consuming input" 53 | (check-equal? (parse-string (do (lookahead/p (char/p #\a)) 54 | (char/p #\a) 55 | eof/p) 56 | "a") 57 | (success (void)))) 58 | 59 | (it "still consumes input on failure" 60 | (check-equal? (parse-string (do (lookahead/p (string/p "ab")) 61 | (char/p #\a)) 62 | "a") 63 | (failure (message (srcloc 'string 1 0 1 1) "end of input" '("'b'")))))) 64 | 65 | (describe "one-of/p" 66 | (it "succeeds if any of the provided elements are equal" 67 | (check-equal? (parse-string (one-of/p '(#\a #\b)) "a") 68 | (success #\a)) 69 | (check-equal? (parse-string (one-of/p '(#\a #\b)) "b") 70 | (success #\b))) 71 | 72 | (it "fails if none of the provided elements are equal" 73 | (check-equal? (parse-string (one-of/p '(#\a #\b)) "c") 74 | (failure (message (srcloc 'string 1 0 1 1) #\c '("a" "b")))))) 75 | 76 | (describe "guard/p" 77 | (let ([byte/p (label/p "byte" (try/p (guard/p integer/p #{% . < . 256})))]) 78 | (it "succeeds when the guard predicate is non-#f" 79 | (check-equal? (parse-string byte/p "128") 80 | (success 128))) 81 | (it "fails when the guard predicate is #f" 82 | (check-equal? (parse-string byte/p "1024") 83 | (failure (message (srcloc 'string 1 0 1 4) 1024 84 | '("byte"))))))) 85 | 86 | (describe "list/p" 87 | (context "when not given a separator" 88 | (define letter-digit-letter/p (list/p letter/p digit/p letter/p)) 89 | (it "succeeds when given components in sequence" 90 | (check-equal? (parse-string letter-digit-letter/p "a1b") 91 | (success (list #\a #\1 #\b)))) 92 | (it "fails when given too few components" 93 | (check-equal? (parse-string letter-digit-letter/p "a1") 94 | (failure (message (srcloc 'string 1 1 2 1) 95 | "end of input" 96 | '("letter")))))) 97 | (context "when given a separator" 98 | (define dotted-letter-digit-letter/p 99 | (list/p letter/p digit/p letter/p #:sep (char/p #\.))) 100 | (it "succeeds when given separated components" 101 | (check-equal? (parse-string dotted-letter-digit-letter/p "a.1.b") 102 | (success (list #\a #\1 #\b)))) 103 | (it "fails when given unseparated components" 104 | (check-equal? (parse-string dotted-letter-digit-letter/p "a1b") 105 | (failure (message (srcloc 'string 1 1 2 1) 106 | #\1 '("'.'"))))))) 107 | 108 | (describe "delay/p" 109 | (define eval-count 0) 110 | (define rec/p (or/p (char/p #\.) 111 | (list/p (char/p #\a) 112 | (delay/p (begin (set! eval-count (add1 eval-count)) 113 | rec/p))))) 114 | 115 | (it "delays evaluation of its argument" 116 | (check-equal? eval-count 0)) 117 | 118 | (it "allows a parser to be self-recursive" 119 | (check-equal? (parse-string rec/p "aaa.") 120 | (success '(#\a (#\a (#\a #\.)))))) 121 | 122 | (it "only evaluates its argument once" 123 | (check-equal? (parse-string rec/p "aaa.") 124 | (success '(#\a (#\a (#\a #\.))))) 125 | (check-equal? eval-count 1))) 126 | 127 | (describe "many/p" 128 | (context "when given a letter parser" 129 | (define many-letters/p (many/p letter/p)) 130 | (it "succeeds when parsing multiple letters" 131 | (check-equal? (parse-string many-letters/p "abc") 132 | (success (list #\a #\b #\c)))) 133 | (it "succeeds when parsing one letter" 134 | (check-equal? (parse-string many-letters/p "a") 135 | (success (list #\a)))) 136 | (it "succeeds with an empty list when unable to parse" 137 | (check-equal? (parse-string many-letters/p "123") 138 | (success (list))))) 139 | (context "when given a letter parser and dot separator" 140 | (define many-dotted-letters/p (many/p letter/p #:sep (char/p #\.))) 141 | (it "succeeds with only letters when parsing dotted letters" 142 | (check-equal? (parse-string many-dotted-letters/p "a.b.c") 143 | (success (list #\a #\b #\c))))) 144 | (context "when given a letter parser and a maximum count of three" 145 | (define at-most-three-letters/p (many/p letter/p #:max 3)) 146 | (it "succeeds when parsing three letters" 147 | (check-equal? (parse-string at-most-three-letters/p "abc") 148 | (success (list #\a #\b #\c)))) 149 | (it "succeeds when parsing fewer than three letters" 150 | (check-equal? (parse-string at-most-three-letters/p "a") 151 | (success (list #\a)))) 152 | (it "only consumes three letters when parsing four letters" 153 | (check-equal? (parse-string at-most-three-letters/p "abcd") 154 | (success (list #\a #\b #\c))))) 155 | (context "when given a maximum count of zero" 156 | (it "consumes no input" 157 | (check-equal? (parse-string (many/p letter/p #:max 0) "abc") 158 | (success (list))))) 159 | (context "when given a letter parser and a minimum count of three" 160 | (define at-least-three-letters/p (many/p letter/p #:min 3)) 161 | (it "succeeds when parsing three letters" 162 | (check-equal? (parse-string at-least-three-letters/p "abc") 163 | (success (list #\a #\b #\c)))) 164 | (it "succeeds when parsing four letters" 165 | (check-equal? (parse-string at-least-three-letters/p "abcd") 166 | (success (list #\a #\b #\c #\d)))) 167 | (it "fails when parsing two letters" 168 | (check-equal? (parse-string at-least-three-letters/p "ab") 169 | (failure (message (srcloc 'string 1 1 2 1) 170 | "end of input" 171 | '("letter")))))) 172 | (context "when given a minimum count of 2 and a maximum count of 4" 173 | (define two-to-four-letters/p (many/p letter/p #:min 2 #:max 4)) 174 | (it "succeeds when parsing two letters" 175 | (check-equal? (parse-string two-to-four-letters/p "ab") 176 | (success (list #\a #\b)))) 177 | (it "succeeds when parsing four letters" 178 | (check-equal? (parse-string two-to-four-letters/p "abcd") 179 | (success (list #\a #\b #\c #\d)))) 180 | (it "fails when parsing one letter" 181 | (check-equal? (parse-string two-to-four-letters/p "a") 182 | (failure (message (srcloc 'string 1 0 1 1) 183 | "end of input" 184 | '("letter"))))) 185 | (it "only consumes four letters when given five" 186 | (check-equal? (parse-string two-to-four-letters/p "abcde") 187 | (success (list #\a #\b #\c #\d)))))) 188 | 189 | (describe "parser parameters" 190 | (define param (make-parser-parameter #f)) 191 | 192 | (it "returns the initial value before being set" 193 | (check-equal? (parse-string (param) "") 194 | (success #f))) 195 | 196 | (it "returns the set value after being set" 197 | (check-equal? (parse-string (do (param #t) (param)) "") 198 | (success #t))) 199 | 200 | (it "unsets the value when backtracking" 201 | (check-equal? (parse-string (or/p (do (param #t) any-char/p) 202 | (param)) 203 | "") 204 | (success #f))) 205 | 206 | (describe "parameterize/p" 207 | (it "sets the value with a local extent" 208 | (check-equal? (parse-string (do [(list a b) <- (parameterize/p ([param 1]) 209 | (do [a <- (param)] 210 | (param 2) 211 | [b <- (param)] 212 | (pure (list a b))))] 213 | [c <- (param)] 214 | (pure (list a b c))) 215 | "") 216 | (success (list 1 2 #f)))))) 217 | -------------------------------------------------------------------------------- /megaparsack-test/tests/megaparsack/contract.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require data/applicative 4 | data/monad 5 | megaparsack 6 | megaparsack/text 7 | racket/list 8 | racket/contract 9 | rackunit 10 | rackunit/spec) 11 | 12 | (describe "parser/c" 13 | (it "signals a contract violation on invalid input tokens" 14 | (define exn (with-handlers ([exn:fail? values]) 15 | (parse (char/p #\a) (list (syntax-box #f (srcloc #f #f #f #f #f)))))) 16 | (check-pred exn:fail:contract:blame? exn) 17 | (define blame (exn:fail:contract:blame-object exn)) 18 | (check-equal? (blame-contract blame) 19 | '(-> char? (parser/c char? char?))) 20 | (check-pred blame-swapped? blame) 21 | (check-equal? (first (blame-context blame)) 22 | "the input to")) 23 | 24 | (it "signals a contract violation on invalid results" 25 | (define/contract foo/p 26 | (parser/c any/c string?) 27 | (do void/p (pure #f))) 28 | (define exn (with-handlers ([exn:fail? values]) 29 | (parse foo/p '()))) 30 | (check-pred exn:fail:contract:blame? exn) 31 | (define blame (exn:fail:contract:blame-object exn)) 32 | (check-equal? (blame-contract blame) 33 | '(parser/c any/c string?)) 34 | (check-pred blame-original? blame) 35 | (check-equal? (first (blame-context blame)) 36 | "the result of"))) 37 | -------------------------------------------------------------------------------- /megaparsack-test/tests/megaparsack/parser/json.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require megaparsack 4 | megaparsack/parser/json 5 | racket/function 6 | rackunit 7 | rackunit/spec) 8 | 9 | (describe "parse-json-string" 10 | (it "parses floating-point numbers" 11 | (check-= (parse-result! (parse-json-string "42")) 42 0.001) 12 | (check-= (parse-result! (parse-json-string ".3")) 0.3 0.001) 13 | (check-= (parse-result! (parse-json-string "12.75")) 12.75 0.001) 14 | (check-= (parse-result! (parse-json-string "1e+2")) 100 0.001) 15 | (check-= (parse-result! (parse-json-string "1.2e-3")) 0.0012 0.00001)) 16 | 17 | (it "parses strings" 18 | (check-equal? (parse-result! (parse-json-string "\"hello, world\"")) "hello, world") 19 | (check-equal? (parse-result! (parse-json-string "\"this (\\\") is a double quote\"")) 20 | "this (\") is a double quote") 21 | (check-equal? (parse-result! (parse-json-string "\"escape \\u1a3f!\"")) 22 | "escape \u1a3f!")) 23 | 24 | (it "parses booleans" 25 | (check-true (parse-result! (parse-json-string "true"))) 26 | (check-false (parse-result! (parse-json-string "false")))) 27 | 28 | (it "parses null as the symbol 'null" 29 | (check-equal? (parse-result! (parse-json-string "null")) 'null)) 30 | 31 | (it "parses arrays as lists" 32 | (check-equal? (parse-result! (parse-json-string "[1, 2, 3]")) '(1 2 3)) 33 | (check-equal? (parse-result! (parse-json-string "[true, null, 42]")) '(#t null 42))) 34 | 35 | (it "parses objects as hasheqs with symbol keys" 36 | (check-equal? (parse-result! (parse-json-string "{ \"a\": 1, \"b\": true }")) 37 | #hasheq((a . 1) (b . #t)))) 38 | 39 | (it "fails when it cannot parse the entire string" 40 | (check-exn #rx"unexpected: t\n expected: end of input" 41 | (thunk (parse-result! (parse-json-string "{}true")))))) 42 | -------------------------------------------------------------------------------- /megaparsack-test/tests/megaparsack/text.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require data/either 4 | megaparsack 5 | megaparsack/text 6 | rackunit 7 | rackunit/spec) 8 | 9 | (describe "parse-syntax-string" 10 | (it "uses source location information from the provided syntax object" 11 | (let* ([stx #'"hello, world!"] 12 | [result (parse-result! (parse-syntax-string (syntax/p (string/p "hello")) stx))]) 13 | (check-equal? (syntax-source stx) (syntax-source result)) 14 | (check-equal? (syntax-line stx) (syntax-line result)) 15 | (check-equal? (syntax-column stx) (syntax-column result)) 16 | (check-equal? (syntax-position stx) (syntax-position result)) 17 | (check-equal? (syntax-span result) 5)))) 18 | 19 | (describe "char-between/p" 20 | (it "parses a single character" 21 | (check-equal? (parse-string (char-between/p #\b #\f) "bcdef") (success #\b))) 22 | (it "parses only characters between the given bounds" 23 | (check-equal? (parse-string (char-between/p #\b #\f) "a") 24 | (failure (message (srcloc 'string 1 0 1 1) #\a 25 | '("a character between 'b' and 'f'")))) 26 | (check-equal? (parse-string (char-between/p #\b #\f) "g") 27 | (failure (message (srcloc 'string 1 0 1 1) #\g 28 | '("a character between 'b' and 'f'")))))) 29 | 30 | (describe "char-in/p" 31 | (it "parses a single character" 32 | (check-equal? (parse-string (char-in/p "aeiou") "a") (success #\a))) 33 | (it "parses only characters in the given string" 34 | (check-equal? (parse-string (char-in/p "aeiou") "b") 35 | (failure (message (srcloc 'string 1 0 1 1) #\b 36 | '("'a'" "'e'" "'i'" "'o'" "'u'")))))) 37 | 38 | (describe "string-ci/p" 39 | (it "parses a string, ignoring case, and returns the input string" 40 | (check-equal? (parse-string (string-ci/p "LDA") "lda") (success "lda")) 41 | (check-equal? (parse-string (string-ci/p "lda") "LDA") (success "LDA")) 42 | (check-equal? (parse-string (string-ci/p "LDA") "lDa") (success "lDa")) 43 | (check-equal? (parse-string (string-ci/p "LDA") "LdA") (success "LdA")))) 44 | -------------------------------------------------------------------------------- /megaparsack/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define version "1.8") 4 | 5 | (define collection 'multi) 6 | 7 | (define deps 8 | '("base" 9 | ["megaparsack-lib" #:version "1.8"] 10 | ["megaparsack-doc" #:version "1.8"] 11 | ["megaparsack-parser" #:version "1.8"] 12 | ["megaparsack-parser-tools" #:version "1.8"])) 13 | (define build-deps 14 | '()) 15 | 16 | (define implies 17 | '("megaparsack-lib" 18 | "megaparsack-doc" 19 | "megaparsack-parser" 20 | "megaparsack-parser-tools")) 21 | --------------------------------------------------------------------------------