├── src
    ├── pars.cr
    └── pars
    │   ├── parse_error.cr
    │   ├── parse_result.cr
    │   ├── parse_context.cr
    │   ├── parse.cr
    │   └── parser.cr
├── .gitignore
├── shard.yml
├── .github
    └── workflows
    │   └── crystal.yml
├── LICENSE
├── spec
    └── pars
    │   ├── parse_spec.cr
    │   └── parser_spec.cr
└── README.md


/src/pars.cr:
--------------------------------------------------------------------------------
1 | require "./pars/*"
2 | 
3 | module Pars; end
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /docs/
 2 | /lib/
 3 | /bin/
 4 | /.shards/
 5 | *.dwarf
 6 | 
 7 | # Libraries don't need dependency lock
 8 | # Dependencies will be locked in applications that use them
 9 | /shard.lock
10 | 


--------------------------------------------------------------------------------
/shard.yml:
--------------------------------------------------------------------------------
 1 | name: pars
 2 | version: 1.2.0
 3 | 
 4 | authors:
 5 |   - voximity <vox@voximity.net>
 6 |   - Kim Burgess <kim@place.technology>
 7 | 
 8 | description: |
 9 |   Monadic parser combinator library
10 | 
11 | license: MIT
12 | 


--------------------------------------------------------------------------------
/src/pars/parse_error.cr:
--------------------------------------------------------------------------------
 1 | module Pars
 2 |   # A struct containing information about a parse error.
 3 |   struct ParseError
 4 |     getter context
 5 |     getter message
 6 | 
 7 |     def initialize(@message : String, @context : ParseContext)
 8 |     end
 9 | 
10 |     def to_s(io : IO)
11 |       io << message
12 |       io << ' '
13 |       io << '('
14 |       io << context
15 |       io << ')'
16 |     end
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/.github/workflows/crystal.yml:
--------------------------------------------------------------------------------
 1 | name: Crystal CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     container:
15 |       image: crystallang/crystal
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Install dependencies
20 |       run: shards install
21 |     - name: Run tests
22 |       run: crystal spec
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Original work Copyright (c) 2019 "Voximity" (https://github.com/voximity)
 4 | Modified work Copyright (c) 2020 Place Technology Limited
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/pars/parse_result.cr:
--------------------------------------------------------------------------------
 1 | require "./parse_error"
 2 | require "./parse_context"
 3 | 
 4 | module Pars
 5 |   # ParseResult(T) is a result of a parsed Parser with return type T.
 6 |   struct ParseResult(T)
 7 |     # Creates an errored `ParseResult` that wraps *e*.
 8 |     def self.error(e : ParseError)
 9 |       inst = ParseResult(T).allocate
10 |       inst.initialize_as_error e
11 |       inst
12 |     end
13 | 
14 |     # Creates an errored `ParseResult`.
15 |     def self.error(message : String, context : ParseContext)
16 |       ParseResult(T).error ParseError.new message, context
17 |     end
18 | 
19 |     @errored = uninitialized Bool
20 |     @error = uninitialized ParseError
21 |     @context = uninitialized ParseContext
22 | 
23 |     getter errored
24 |     getter context
25 | 
26 |     # Creates a new successful `ParseResult`.
27 |     def initialize(@value : T, @context)
28 |       @errored = false
29 |     end
30 | 
31 |     # :nodoc:
32 |     def initialize_as_error(e : ParseError)
33 |       @errored = true
34 |       @error = e
35 |       @context = e.context
36 |     end
37 | 
38 |     # Returns a `ParseError`, or nil if parsing was successful.
39 |     def error? : ParseError?
40 |       errored ? @error : Nil
41 |     end
42 | 
43 |     # Returns the parsed value, or a `ParseError`.
44 |     def value : T | ParseError
45 |       errored ? @error : @value
46 |     end
47 | 
48 |     # Directly access to parsed value.
49 |     #
50 |     # Note: this is unsafe and should only be used if `#errored == false`.
51 |     def value! : T
52 |       @value
53 |     end
54 | 
55 |     # Directly access the `ParseError`.
56 |     #
57 |     # Note: this unsafe and should only be used if `#errored == true`.
58 |     def error! : ParseError
59 |       @error
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/src/pars/parse_context.cr:
--------------------------------------------------------------------------------
 1 | module Pars
 2 |   # A struct containing information about a parsing context. Used to chain
 3 |   # Parsers together and retain input position.
 4 |   struct ParseContext
 5 |     def initialize(@input, @pos = 0)
 6 |     end
 7 | 
 8 |     # The input the parser is working across.
 9 |     getter input : String | Bytes
10 | 
11 |     # The correct parse offset within *input*.
12 |     getter pos : Int32
13 | 
14 |     # Creates a new context at the next parse position.
15 |     def next(offset = 1) : ParseContext
16 |       ParseContext.new(input, pos + offset)
17 |     end
18 | 
19 |     # `true` if all of the input has been consumed.
20 |     def exhausted? : Bool
21 |       pos >= input.size
22 |     end
23 | 
24 |     # Provides the parse head as a `Char`.
25 |     def char : Char
26 |       if input.is_a? String
27 |         input.as(String).char_at pos
28 |       else
29 |         input.as(Bytes)[pos].chr
30 |       end
31 |     end
32 | 
33 |     # Provides the parse head as a byte.
34 |     def byte : UInt8
35 |       if input.is_a? String
36 |         input.as(String).byte_at pos
37 |       else
38 |         input.as(Bytes)[pos]
39 |       end
40 |     end
41 | 
42 |     # Provide the current parse head directly.
43 |     def head : Char | UInt8
44 |       input[pos]
45 |     end
46 | 
47 |     # Provide a human readable verison of the conntext parse context.
48 |     def to_s(io : IO)
49 |       before = ([0, pos - 5].max..(pos - 1))
50 |       after = ((pos + 1)..[pos + 5, input.size - 1].min)
51 |       io << "..." if before.begin > 0
52 |       if input.is_a? String
53 |         io << input[before] if pos > 0
54 |         io << '['
55 |         io << (exhausted? ? "<EOF>" : char)
56 |         io << ']'
57 |         io << input[after] if pos < input.size - 1
58 |       else
59 |         io << hex(input[before].as(Bytes)) if pos > 0
60 |         io << '['
61 |         io << (exhausted? ? "<EOF>" : hex(byte))
62 |         io << ']'
63 |         io << hex(input[after].as(Bytes)) if pos < input.size - 1
64 |       end
65 |       io << "..." unless after.end == input.size - 1
66 |     end
67 | 
68 |     private def hex(byte : UInt8)
69 |       byte.to_s(16).rjust(2, '0')
70 |     end
71 | 
72 |     private def hex(bytes : Bytes) : String
73 |       bytes.map(&->hex(UInt8)).join ' '
74 |     end
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------
/spec/pars/parse_spec.cr:
--------------------------------------------------------------------------------
  1 | require "spec"
  2 | require "../../src/pars"
  3 | 
  4 | include Pars
  5 | 
  6 | describe Pars::Parse do
  7 |   describe ".const" do
  8 |     p = Parse.const 'a'
  9 |     it "returns a constant value for every input" do
 10 |       p.parse("abc").should eq 'a'
 11 |       p.parse("123").should eq 'a'
 12 |       p.parse("").should eq 'a'
 13 |     end
 14 |   end
 15 | 
 16 |   describe "do macro" do
 17 |     it "supports sequencing multiple parsers" do
 18 |       p = Parse.do({
 19 |         alpha <= Parse.letter,
 20 |         digit <= Parse.digit,
 21 |         Parse.const({alpha, digit}),
 22 |       })
 23 |       p.parse("a1").should eq({'a', '1'})
 24 |       p.parse("42").should be_a ParseError
 25 |     end
 26 |   end
 27 | 
 28 |   describe ".cond" do
 29 |     it "success when the predicate is true" do
 30 |       p = Parse.cond 'a' { true }
 31 |       p.parse("").should eq 'a'
 32 |     end
 33 |     it "produces a ParseError when the prediciate is false" do
 34 |       p = Parse.cond 'a' { false }
 35 |       p.parse("").should be_a ParseError
 36 |     end
 37 |   end
 38 | 
 39 |   describe ".eq" do
 40 |     p = Parse.eq 'a'.ord
 41 |     it "checks equivalence at the parse position" do
 42 |       p.parse("abc").should eq 'a'.ord
 43 |       p.parse("bca").should be_a ParseError
 44 |       p.parse("cab").should be_a ParseError
 45 |     end
 46 |   end
 47 | 
 48 |   describe ".char" do
 49 |     p = Parse.char 'a'
 50 |     it "matches against a char at the current parse position" do
 51 |       p.parse("abc").should eq 'a'
 52 |       p.parse("bca").should be_a ParseError
 53 |       p.parse("cab").should be_a ParseError
 54 |     end
 55 |   end
 56 | 
 57 |   describe ".byte" do
 58 |     p = Parse.byte 0x0
 59 |     it "matches for a byte value" do
 60 |       p.parse(Bytes[0x0]).should eq 0x0
 61 |       p.parse("foo").should be_a ParseError
 62 |     end
 63 |   end
 64 | 
 65 |   describe ".string" do
 66 |     p = Parse.string "cat"
 67 |     it "matches against the a string" do
 68 |       p.parse("cat").should eq "cat"
 69 |       p.parse("dog").should be_a ParseError
 70 |       p.parse("").should be_a ParseError
 71 |     end
 72 |   end
 73 | 
 74 |   describe ".bytes" do
 75 |     p = Parse.bytes Bytes[0xDE, 0xAD, 0xBE, 0xEF]
 76 |     it "matches against byte values" do
 77 |       p.parse(Bytes[0xDE, 0xAD, 0xBE, 0xEF]).should eq Bytes[0xDE, 0xAD, 0xBE, 0xEF]
 78 |       p.parse(Bytes[0xDE, 0xAD]).should be_a ParseError
 79 |       p.parse(Bytes[0x0]).should be_a ParseError
 80 |       p.parse("foo").should be_a ParseError
 81 |     end
 82 |   end
 83 | 
 84 |   describe ".one_char_of" do
 85 |     p = Parse.one_char_of "abc"
 86 |     it "matches any character from the passed string" do
 87 |       p.parse("apple").should eq 'a'
 88 |       p.parse("banana").should eq 'b'
 89 |       p.parse("carrot").should eq 'c'
 90 |       p.parse("dragonfruit").should be_a ParseError
 91 |     end
 92 |   end
 93 | 
 94 |   describe ".no_char_of" do
 95 |     p = Parse.no_char_of "abc"
 96 |     it "fails for any character in the passed string" do
 97 |       p.parse("apple").should be_a ParseError
 98 |       p.parse("banana").should be_a ParseError
 99 |       p.parse("carrot").should be_a ParseError
100 |       p.parse("dragonfruit").should eq 'd'
101 |     end
102 |   end
103 | 
104 |   describe ".non_empty_list" do
105 |     space = Parse.whitespace * (0..)
106 |     comma = space >> Parse.char(',') << space
107 |     word = Parse.word
108 |     p = Parse.non_empty_list word, comma
109 |     it "builds an array from the wrapped element and delimiter parsers" do
110 |       p.parse("").should be_a ParseError
111 |       p.parse("test").should eq ["test"]
112 |       p.parse("hello, world").should eq ["hello", "world"]
113 |       p.parse("par , s3k").should eq ["par", "s3k"]
114 |     end
115 |   end
116 | end
117 | 


--------------------------------------------------------------------------------
/src/pars/parse.cr:
--------------------------------------------------------------------------------
  1 | require "./parser"
  2 | require "./parse_result"
  3 | 
  4 | module Pars
  5 |   # Tools for creating commonly useful `Parser` instances.
  6 |   module Parse
  7 |     extend self
  8 | 
  9 |     # Provides a notation for building complex parsers that combine the result
 10 |     # of a number of component parsers.
 11 |     macro do(body)
 12 |       {% non_expression_types = {"Assign", "TypeNode", "Splat", "Union",
 13 |                                  "UninitializedVar", "TypeDeclaration",
 14 |                                  "Generic", "ClassDef", "Def",
 15 |                                  "VisibilityModifier", "MultiAssign"} %}
 16 |       {% if non_expression_types.includes? body.last.class_name %}
 17 |         {{body.last.raise "expected last operation in monad to be an expression, got a '#{body.last.class_name}'"}}
 18 |       {% end %}
 19 |       ({{body[0].args[0]}}).bind do |{{body[0].receiver}}|
 20 |       {% for i in 1...body.size - 1 %}
 21 |         {% if body[i].class_name == "Assign" %}
 22 |             {{body[i].target}} = {{body[i].value}}
 23 |         {% else %}
 24 |           {% if body[i].class_name == "Call" && body[i].name == "<=" %}
 25 |             ({{body[i].args[0]}}).bind do |{{body[i].receiver}}|
 26 |           {% elsif non_expressions_types.includes? body[i].class_name %}
 27 |             {{body[i].raise "expected operation '<=' or '=', got '#{body[i].name}'"}}
 28 |           {% else %}
 29 |             {{body[i]}}
 30 |           {% end %}
 31 |         {% end %}
 32 |       {% end %}
 33 |         {{body[body.size - 1]}}
 34 |       {% for i in 1...body.size - 1 %}
 35 |         {% if body[i].class_name == "Call" && body[i].name == "<=" %}
 36 |           end
 37 |         {% end %}
 38 |       {% end %}
 39 |       end
 40 |     end
 41 | 
 42 |     # Always succeeds with *value* and does not consume any input.
 43 |     def const(value : T) : Parser(T) forall T
 44 |       Parser(T).const value
 45 |     end
 46 | 
 47 |     # Parser that returns the parse head as a `Char`.
 48 |     def char : Parser(Char)
 49 |       Parser.char
 50 |     end
 51 | 
 52 |     # Parser that return the byte vaue at the parse head.
 53 |     def byte : Parser(UInt8)
 54 |       Parser.byte
 55 |     end
 56 | 
 57 |     # Parser that succeeds with *value* if *block* evaluates to true when passed
 58 |     # the value.
 59 |     #
 60 |     # In most cases this should not be used externally and is instead a tool for
 61 |     # composing parsers.
 62 |     def cond(value : T, expected : T | String? = nil, &block : T -> Bool) : Parser(T) forall T
 63 |       Parser(T).new do |context|
 64 |         if block.call value
 65 |           ParseResult(T).new value, context
 66 |         else
 67 |           message = case expected
 68 |                     when T
 69 |                       "expected '#{expected}', got '#{value}'"
 70 |                     when String
 71 |                       "expected #{expected}, got '#{value}'"
 72 |                     else
 73 |                       "unsatisfied predicate, got '#{value}'"
 74 |                     end
 75 |           ParseResult(T).error message, context
 76 |         end
 77 |       end
 78 |     end
 79 | 
 80 |     # Parser that return the context head if it satisfies *block*.
 81 |     #
 82 |     # *expected* can be optionally specified for providing a human friendly
 83 |     # ParseError on fail.
 84 |     def char_if(expected = nil, &block : Char -> Bool) : Parser(Char)
 85 |       Parser.char.bind do |value|
 86 |         cond value, expected, &block
 87 |       end
 88 |     end
 89 | 
 90 |     # :ditto:
 91 |     def byte_if(expected = nil, &block : UInt8 -> Bool) : Parser(UInt8)
 92 |       Parser.byte.bind do |value|
 93 |         cond value, expected, &block
 94 |       end
 95 |     end
 96 | 
 97 |     # Parser that tests equivalence to *value* at the parse head.
 98 |     #
 99 |     # If equivalent *value* itself is returned and the parse head progresses.
100 |     def eq(value : T) : Parser(T) forall T
101 |       Parser.head.bind do |head|
102 |         cond value, value, &.===(head)
103 |       end
104 |     end
105 | 
106 |     # Parser that matches for a specific *char* at the parse head.
107 |     def char(char : Char) : Parser(Char)
108 |       char_if char, &.==(char)
109 |     end
110 | 
111 |     # Parser that matches for a specific *byte* at the parse head.
112 |     def byte(byte : UInt8) : Parser(UInt8)
113 |       byte_if byte, &.==(byte)
114 |     end
115 | 
116 |     # Creates a `Parser(String)` that looks at the current parse position and
117 |     # expects the array of characters in the string `s` (`s.chars`) to be
118 |     # consecutively present.
119 |     def string(string : String) : Parser(String)
120 |       case string.size
121 |       when 0
122 |         const string
123 |       when 1
124 |         char(string[0]) >> const string
125 |       else
126 |         string.each_char.map(&->char(Char)).reduce do |a, b|
127 |           a >> b
128 |         end >> const string
129 |       end
130 |     end
131 | 
132 |     # Creates a `Parser(Bytes)` that looks at the current parse position and
133 |     # expects a series of bytes to be consecutively present.
134 |     def bytes(bytes : Bytes) : Parser(Bytes)
135 |       case bytes.size
136 |       when 0
137 |         const bytes
138 |       when 1
139 |         byte(bytes[0]) >> const bytes
140 |       else
141 |         bytes.each.map(&->byte(UInt8)).reduce do |a, b|
142 |           a >> b
143 |         end >> const bytes
144 |       end
145 |     end
146 | 
147 |     # Creates a `Parser(Char)` that looks at the current parse position and
148 |     # expects the current character to be present in the string `s`.
149 |     def one_char_of(string_or_list : String | Enumerable(Char)) : Parser(Char)
150 |       char_if "a character from #{string_or_list}", &.in?(string_or_list)
151 |     end
152 | 
153 |     # Functions identically to `Parse.one_char_of`, but reverses the expected
154 |     # input. If the current character is present in `s`, then the parse fails.
155 |     def no_char_of(string_or_list : String | Enumerable(Char)) : Parser(Char)
156 |       char_if "no character in #{string_or_list}", &.in?(string_or_list).!
157 |     end
158 | 
159 |     # Creates a `Parser(Array(T))` that will continue to parse with *parser*
160 |     # delimited by *delimter* until an error with either occurs.
161 |     def list(item : Parser(A), delimiter : Parser(B)) : Parser(Array(A)) forall A, B
162 |       empty_list = const [] of A
163 |       non_empty_list(item, delimiter) | empty_list
164 |     end
165 | 
166 |     def non_empty_list(item : Parser(A), delimiter : Parser(B)) : Parser(Array(A)) forall A, B
167 |       singleton = item * 1
168 |       plural = ((item << delimiter) * (1..) &+ item).map { |(xs, x)| xs << x }
169 |       plural | singleton
170 |     end
171 | 
172 |     # Parses a character of the lowercase alphabet.
173 |     def lowercase
174 |       char_if "a lowercase character", &.lowercase?
175 |     end
176 | 
177 |     # Parses a character of the uppercase alphabet.
178 |     def uppercase
179 |       char_if "an uppercase character", &.uppercase?
180 |     end
181 | 
182 |     # Parses a character in the alphabet regardless of case.
183 |     def letter
184 |       char_if "a letter", &.letter?
185 |     end
186 | 
187 |     def alphanumeric
188 |       char_if "an alphanumeric character", &.alphanumeric?
189 |     end
190 | 
191 |     # Parses a full word of at least one character.
192 |     def word
193 |       (alphanumeric * (1..)).map &.join
194 |     end
195 | 
196 |     def whitespace
197 |       char_if "a whitespace character", &.whitespace?
198 |     end
199 | 
200 |     # Parses a digit as a character.
201 |     def digit
202 |       char_if "a digit", &.number?
203 |     end
204 | 
205 |     # Parses an integer as a String.
206 |     def integer
207 |       (digit * (1..)).map &.join
208 |     end
209 | 
210 |     # Parses a fractional number as a String.
211 |     def decimal
212 |       (integer + (char '.') + integer).map &.join
213 |     end
214 | 
215 |     # Parses a number as a String.
216 |     def number
217 |       decimal | integer
218 |     end
219 |   end
220 | end
221 | 


--------------------------------------------------------------------------------
/spec/pars/parser_spec.cr:
--------------------------------------------------------------------------------
  1 | require "spec"
  2 | require "../../src/pars"
  3 | 
  4 | include Pars
  5 | 
  6 | describe Parser do
  7 |   a = Parse.char 'a'
  8 |   b = Parse.char 'b'
  9 |   c = Parse.char 'c'
 10 |   str = Parse.string "foo"
 11 | 
 12 |   describe ".const" do
 13 |     p = Parser.const 42
 14 |     it "always returns the same value regardless of input" do
 15 |       p.parse("a").should eq 42
 16 |       p.parse("test").should eq 42
 17 |       p.parse("").should eq 42
 18 |       p.parse(Bytes[0xB, 0xE, 0xE, 0xF]).should eq 42
 19 |       p.parse(Bytes.empty).should eq 42
 20 |     end
 21 |     it "does not consume any of the input" do
 22 |       ctx = ParseContext.new "hello"
 23 |       ctx.pos.should eq 0
 24 |       res = p.run ctx
 25 |       res.value.should eq 42
 26 |       res.context.should eq ctx
 27 |       res.context.pos.should eq 0
 28 |     end
 29 |   end
 30 | 
 31 |   describe ".fail" do
 32 |     p = Parser(Char).fail "nope"
 33 |     it "fails for every input" do
 34 |       p.parse("a").should be_a ParseError
 35 |       p.parse("test").should be_a ParseError
 36 |       p.parse("").should be_a ParseError
 37 |       p.parse(Bytes[0xB, 0xE, 0xE, 0xF]).should be_a ParseError
 38 |       p.parse(Bytes.empty).should be_a ParseError
 39 |     end
 40 |     it "does not consume any of the input" do
 41 |       ctx = ParseContext.new "hello"
 42 |       ctx.pos.should eq 0
 43 |       res = p.run ctx
 44 |       res.value.should be_a ParseError
 45 |       res.context.should eq ctx
 46 |       res.context.pos.should eq 0
 47 |     end
 48 |   end
 49 | 
 50 |   describe ".head" do
 51 |     p = Parser.head
 52 |     it "returns the parse head" do
 53 |       p.parse("a").should eq 'a'
 54 |       p.parse("b").should eq 'b'
 55 |     end
 56 |     it "progresses the parse context" do
 57 |       ctx = ParseContext.new "ab"
 58 |       res = p.run ctx
 59 |       res.context.pos.should eq 1
 60 |     end
 61 |     it "provides a parse error when the end of input is reached" do
 62 |       p.parse("").should be_a ParseError
 63 |     end
 64 |   end
 65 | 
 66 |   describe "#map" do
 67 |     it "applies the transform to the parser output" do
 68 |       p = a.map &.to_s
 69 |       p.parse("a").should eq "a"
 70 |     end
 71 |     it "captures exception in the transform as a ParseError" do
 72 |       p = a.map { |_| raise Exception.new "oh no" }
 73 |       result = p.parse("a")
 74 |       result.should be_a ParseError
 75 |       result.message.should be "oh no"
 76 |     end
 77 |   end
 78 | 
 79 |   describe "#&+" do
 80 |     it "sequences `self` with another parser as a Tuple" do
 81 |       p = a &+ b
 82 |       p.parse("a").should be_a ParseError
 83 |       p.parse("ab").should eq({'a', 'b'})
 84 |       p.parse("abc").should eq({'a', 'b'})
 85 |     end
 86 |     it "flattens the results when chaining" do
 87 |       p = a &+ b &+ c
 88 |       p.parse("abc").should eq({'a', 'b', 'c'})
 89 |     end
 90 |     it "preserve types as each parser position" do
 91 |       p = a &+ Parse.const("foo")
 92 |       typeof(p).should eq Parser({Char, String})
 93 |     end
 94 |     it "is associative" do
 95 |       p1 = (a &+ b) &+ str
 96 |       p2 = a &+ (b &+ str)
 97 |       typeof(p1).should eq typeof(p2)
 98 |     end
 99 |     it "returns a ParseError if any fail" do
100 |       p = a &+ b &+ c
101 |       p.parse("zbc").should be_a ParseError
102 |       p.parse("azc").should be_a ParseError
103 |       p.parse("abz").should be_a ParseError
104 |     end
105 |   end
106 | 
107 |   describe "#+" do
108 |     it "sequences `self` with another parser as an Array" do
109 |       p = a + b
110 |       p.parse("a").should be_a ParseError
111 |       p.parse("ab").should eq(['a', 'b'])
112 |       p.parse("abc").should eq(['a', 'b'])
113 |     end
114 |     it "flattens the results when chaining" do
115 |       p = a + b + c
116 |       p.parse("abc").should eq(['a', 'b', 'c'])
117 |     end
118 |     it "is associative" do
119 |       p1 = (a + b) + str
120 |       p2 = a + (b + str)
121 |       typeof(p1).should eq typeof(p2)
122 |     end
123 |     it "forms an array with elements of a union type" do
124 |       p = a + Parse.const("foo")
125 |       typeof(p).should eq Parser(Array(Char | String))
126 |     end
127 |     it "allows forming parsers of unbounded length" do
128 |       p = "foo".each_char.map(&->Parse.char(Char)).reduce(Parser.const [] of Char) { |a, b| a + b }
129 |       typeof(p).should eq Parser(Array(Char))
130 |     end
131 |     it "returns a ParseError if any fail" do
132 |       p = a + b + c
133 |       p.parse("zbc").should be_a ParseError
134 |       p.parse("azc").should be_a ParseError
135 |       p.parse("abz").should be_a ParseError
136 |     end
137 |   end
138 | 
139 |   describe "#<<" do
140 |     p = a << b
141 |     it "returns the result of self if both parsers succeed" do
142 |       p.parse("ab").should eq 'a'
143 |     end
144 |     it "returns a ParseError if self errors" do
145 |       p.parse("bb").should be_a ParseError
146 |     end
147 |     it "preserves the previous context when self fails" do
148 |       ctx = ParseContext.new "bb"
149 |       res = p.run ctx
150 |       res.value.should be_a ParseError
151 |       res.context.pos.should eq 0
152 |     end
153 |     it "preserves the parse context when other fails" do
154 |       ctx = ParseContext.new "aa"
155 |       res = p.run ctx
156 |       res.value.should be_a ParseError
157 |       res.context.pos.should eq 0
158 |     end
159 |   end
160 | 
161 |   describe "#>>" do
162 |     p = a >> b
163 |     it "returns the result of other if both parsers succeed" do
164 |       p.parse("ab").should eq 'b'
165 |     end
166 |     it "returns a parse error if other fails" do
167 |       p.parse("aa").should be_a ParseError
168 |     end
169 |     it "preserves the previous context when self fails" do
170 |       ctx = ParseContext.new "bb"
171 |       res = p.run ctx
172 |       res.value.should be_a ParseError
173 |       res.context.pos.should eq 0
174 |     end
175 |     it "preserves the parse context when other fails" do
176 |       ctx = ParseContext.new "aa"
177 |       res = p.run ctx
178 |       res.value.should be_a ParseError
179 |       res.context.pos.should eq 0
180 |     end
181 |   end
182 | 
183 |   describe "#|" do
184 |     p = a | b
185 |     it "returns the result if either parser succeeds" do
186 |       p.parse("a").should eq 'a'
187 |       p.parse("b").should eq 'b'
188 |     end
189 |     it "returns a ParseError if both fail" do
190 |       p.parse("c").should be_a ParseError
191 |     end
192 |     it "allows chaining with a custom error message" do
193 |       result = (p | "nope").parse "c"
194 |       result.should be_a ParseError
195 |       result.as(ParseError).message.should eq "nope"
196 |     end
197 |     it "builds a union type from component parsers" do
198 |       composite = p | str | Parse.byte(0x0).map(&->Box.new(UInt8)) | p
199 |       typeof(composite).should eq Parser(Char | String | Box(UInt8))
200 |       typeof(composite.parse("foo")).should eq (Char | String | Box(UInt8) | ParseError)
201 |     end
202 |   end
203 | 
204 |   describe "#&" do
205 |     it "succeeds when both succeed" do
206 |       p = a & Parse.letter
207 |       p.parse("a").should eq({'a', 'a'})
208 |     end
209 |     it "returns a ParseError if either fail" do
210 |       (a & b).parse("a").should be_a ParseError
211 |       (b & a).parse("a").should be_a ParseError
212 |     end
213 |   end
214 | 
215 |   describe "#^" do
216 |     it "succeeds if a succeeds" do
217 |       (a ^ b).parse("a").should eq 'a'
218 |     end
219 |     it "succeeds if b succeeds" do
220 |       (a ^ b).parse("b").should eq 'b'
221 |     end
222 |     it "fails if both fail" do
223 |       (a ^ b).parse("c").should be_a ParseError
224 |     end
225 |     it "fails if both succeed" do
226 |       (a ^ a).parse("a").should be_a ParseError
227 |     end
228 |     it "provides a union type as the result" do
229 |       (a ^ str).parse("a").should be_a Char | String
230 |     end
231 |   end
232 | 
233 |   describe "#*(Int)" do
234 |     it "repeats the parser the specified number of times" do
235 |       (a * 1).parse("aaa").should eq ['a']
236 |       (a * 2).parse("aaa").should eq ['a', 'a']
237 |       (a * 3).parse("aaa").should eq ['a', 'a', 'a']
238 |     end
239 |     it "returns an empty array for 0" do
240 |       (a * 0).parse("aaa").should eq [] of Char
241 |     end
242 |     it "fails if the count isn't met" do
243 |       (a * 3).parse("a").should be_a ParseError
244 |     end
245 |   end
246 | 
247 |   describe "#*(Range)" do
248 |     p = a * (1..2)
249 |     it "stops matching after range.end" do
250 |       p.parse("aab").should eq ['a', 'a']
251 |     end
252 |     it "succeeds if the number of matches is within the range" do
253 |       p.parse("ab").should eq ['a']
254 |     end
255 |     it "failes if the range.start is not met" do
256 |       p.parse("b").should be_a ParseError
257 |     end
258 |     it "succeeds on a endless range if range.start is met" do
259 |       (a * (0..)).parse("").should eq [] of Char
260 |       (a * (1..)).parse("a").should eq ['a']
261 |       (a * (0..)).parse("aab").should eq ['a', 'a']
262 |     end
263 |   end
264 | end
265 | 


--------------------------------------------------------------------------------
/src/pars/parser.cr:
--------------------------------------------------------------------------------
  1 | require "./parse_result"
  2 | require "./parse_context"
  3 | 
  4 | module Pars
  5 |   struct Parser(T)
  6 |     # Creates a `Parser` that always succeeds with *value*.
  7 |     def self.const(value : T)
  8 |       new do |context|
  9 |         ParseResult(T).new value, context
 10 |       end
 11 |     end
 12 | 
 13 |     # Creates a `Parser` that always fails with *message*.
 14 |     def self.fail(message : String)
 15 |       new do |context|
 16 |         ParseResult(T).error message, context
 17 |       end
 18 |     end
 19 | 
 20 |     {% for item in [:head, :char, :byte] %}
 21 |       # Creates a `Parser` that consumes the parse head, or fails if the end of
 22 |       # input has been reached.
 23 |       def self.{{item.id}}
 24 |         new do |context|
 25 |           if context.exhausted?
 26 |             ParseResult(typeof(context.{{item.id}})).error "input ended", context
 27 |           else
 28 |             ParseResult(typeof(context.{{item.id}})).new context.{{item.id}}, context.next
 29 |           end
 30 |         end
 31 |       end
 32 |     {% end %}
 33 | 
 34 |     def initialize(&block : ParseContext -> ParseResult(T))
 35 |       @block = block
 36 |     end
 37 | 
 38 |     # Parses the input string `input` given the parser's logic provided by its
 39 |     # block at definition.
 40 |     def parse(input) : (T | ParseError)
 41 |       context = ParseContext.new input
 42 |       run(context).value
 43 |     end
 44 | 
 45 |     # Runs `self` for a given *context*.
 46 |     def run(context : ParseContext) : ParseResult(T)
 47 |       @block.call context
 48 |     end
 49 | 
 50 |     # Transforms the result of the parser such that, when the parser runs, the
 51 |     # output value becomes a different value.
 52 |     #
 53 |     # For example, if you took a `Parser(Char)` and wanted to transform it to a
 54 |     # `Parser(String)` by `Char#to_s`, then you could use
 55 |     # `char_parser.transform &.to_s`.
 56 |     def map(&block : T -> B) : Parser(B) forall B
 57 |       Parser(B).new do |context|
 58 |         result = run context
 59 |         if result.errored
 60 |           ParseResult(B).error result.error!
 61 |         else
 62 |           begin
 63 |             ParseResult(B).new block.call(result.value!), result.context
 64 |           rescue e
 65 |             ParseResult(B).error e.message || e.to_s, result.context
 66 |           end
 67 |         end
 68 |       end
 69 |     end
 70 | 
 71 |     # Sequences `self` with another parser.
 72 |     #
 73 |     # Expects a block that receives the result of the current parser and returns
 74 |     # a new parser of any type.
 75 |     def bind(&block : T -> Parser(B)) : Parser(B) forall B
 76 |       Parser(B).new do |context|
 77 |         result = run context
 78 |         if result.errored
 79 |           ParseResult(B).error result.error!
 80 |         else
 81 |           other = block.call result.value!
 82 |           other_result = other.run result.context
 83 |           if other_result.errored
 84 |             ParseResult(B).error other_result.error!.message, context
 85 |           else
 86 |             other_result
 87 |           end
 88 |         end
 89 |       end
 90 |     end
 91 | 
 92 |     # Sequences `self` with *other*, providing a new Parser that returns the
 93 |     # results as a Tuple.
 94 |     #
 95 |     # If multiple parsers are chained, the results are flattened.
 96 |     def &+(other : Parser(B)) forall B
 97 |       self.bind do |a|
 98 |         other.bind do |b|
 99 |           {% if T.name.starts_with?("Tuple(") && B.name.starts_with?("Tuple(") %}
100 |             Parser.const(a + b)
101 |           {% elsif T.name.starts_with? "Tuple(" %}
102 |             Parser.const(a + {b})
103 |           {% elsif B.name.starts_with? "Tuple(" %}
104 |             Parser.const({a} + b)
105 |           {% else %}
106 |             Parser.const({a, b})
107 |           {% end %}
108 |         end
109 |       end
110 |     end
111 | 
112 |     # Sequences `self` with *other*, providing a new Parser that returns the
113 |     # results as an Array.
114 |     #
115 |     # This may be preferred in place of `Parser(T)#.&+` when building parsers
116 |     # that enumerate or reduce over a structure of unknown size, such as when
117 |     # working within an Iterator.
118 |     #
119 |     # If multiple parsers are chained, the results are flattened.
120 |     def +(other : Parser(B)) forall B
121 |       self.bind do |a|
122 |         other.bind do |b|
123 |           {% if T.name.starts_with?("Array(") && B.name.starts_with?("Array(") %}
124 |             Parser.const a + b
125 |           {% elsif T.name.starts_with? "Array(" %}
126 |             Parser.const a + [b]
127 |           {% elsif B.name.starts_with? "Array(" %}
128 |             Parser.const [a] + b
129 |           {% else %}
130 |             Parser.const [a, b]
131 |           {% end %}
132 |         end
133 |       end
134 |     end
135 | 
136 |     # Sequences the current parser with another parser, and disregards the other
137 |     # parser's result, but ensures the two succeed.
138 |     def <<(other : Parser(B)) : Parser(T) forall B
139 |       Parser(T).new do |context|
140 |         result = run context
141 |         if result.errored
142 |           result
143 |         else
144 |           other_result = other.run result.context
145 |           if other_result.errored
146 |             ParseResult(T).error other_result.error!.message, context
147 |           else
148 |             ParseResult(T).new result.value!, other_result.context
149 |           end
150 |         end
151 |       end
152 |     end
153 | 
154 |     # Sequences the current parser with another parser, and disregards the
155 |     # original parser's result, but ensures the two succeed.
156 |     def >>(other : Parser(B)) : Parser(B) forall B
157 |       Parser(B).new do |context|
158 |         result = run context
159 |         if result.errored
160 |           ParseResult(B).error result.error!
161 |         else
162 |           other_result = other.run result.context
163 |           if other_result.errored
164 |             ParseResult(B).error other_result.error!.message, context
165 |           else
166 |             other_result
167 |           end
168 |         end
169 |       end
170 |     end
171 | 
172 |     # Given `A | B`, creates a new parser that succeeds when A succeeds or B
173 |     # succeeds. Checks A first, doesn't check B if A succeeds. Ignores type
174 |     # differences, gives union type.
175 |     def |(other : Parser(B)) : Parser(T | B) forall B
176 |       Parser(T | B).new do |context|
177 |         result = run context
178 |         if result.errored
179 |           {% if Union(T, B) == B %}
180 |             other.run context
181 |           {% else %}
182 |             other_result = other.run context
183 |             if other_result.errored
184 |               ParseResult(T | B).error other_result.error!
185 |             else
186 |               ParseResult(T | B).new other_result.value!, other_result.context
187 |             end
188 |           {% end %}
189 |         else
190 |           {% if Union(T, B) == T %}
191 |             result
192 |           {% else %}
193 |             ParseResult(T | B).new result.value!, result.context
194 |           {% end %}
195 |         end
196 |       end
197 |     end
198 | 
199 |     # Creates a new `Parser(T)` that fails with *message* if `self` is
200 |     # unsuccessful.
201 |     #
202 |     # This can be used to provide a custom error message when chaining parsers.
203 |     def |(message : String) : Parser(T)
204 |       Parser(T).new do |context|
205 |         result = run context
206 |         if result.errored
207 |           ParseResult(T).error message, result.context
208 |         else
209 |           result
210 |         end
211 |       end
212 |     end
213 | 
214 |     # Given `A & B`, creates a parser that succeeds when both A and B succeed
215 |     # for the same input, returning the results as a Tuple.
216 |     def &(other : Parser(B)) : Parser({T, B}) forall B
217 |       Parser({T, B}).new do |context|
218 |         result = run context
219 |         if result.errored
220 |           ParseResult({T, B}).error result.error!
221 |         else
222 |           other_result = other.run context
223 |           if other_result.errored
224 |             ParseResult({T, B}).error other_result.error!
225 |           else
226 |             ParseResult({T, B}).new({result.value!, other_result.value!}, result.context)
227 |           end
228 |         end
229 |       end
230 |     end
231 | 
232 |     # Given `A ^ B`, creates a parser that succeeds if A or B succeed
233 |     # exclusively for the same input.
234 |     #
235 |     # If both succeed, the parser will fail.
236 |     def ^(other : Parser(B)) : Parser(T | B) forall B
237 |       Parser(T | B).new do |context|
238 |         result = run context
239 |         other_result = other.run context
240 |         if result.errored && other_result.errored
241 |           ParseResult(T | B).error other_result.error!
242 |         elsif result.errored
243 |           ParseResult(T | B).new other_result.value!, other_result.context
244 |         elsif other_result.errored
245 |           ParseResult(T | B).new result.value!, result.context
246 |         else
247 |           ParseResult(T | B).error "expected only one parser to succeed", context
248 |         end
249 |       end
250 |     end
251 | 
252 |     # Creates a new parser that repeats `self` exactly *count* times.
253 |     def *(count : Int) : Parser(Array(T))
254 |       case count
255 |       when .< 0
256 |         raise ArgumentError.new "cannot match less than zero times"
257 |       when .== 0
258 |         Parser.const [] of T
259 |       else
260 |         self * (count..count)
261 |       end
262 |     end
263 | 
264 |     # Creates a new parser that repeats `self` continuously up to *range.end*
265 |     # times. If *range* is not bounded it will continue to repeat until failing.
266 |     def *(range : Range(Int, Int) | Range(Int, Nil)) : Parser(Array(T))
267 |       Parser(Array(T)).new do |context|
268 |         result = run context
269 |         if result.errored && !range.includes? 0
270 |           next ParseResult(Array(T)).error result.error!
271 |         end
272 | 
273 |         results = [] of T
274 |         if (max = range.end)
275 |           # Bounded range
276 |           max -= 1 if range.excludes_end?
277 |           while !result.errored
278 |             results << result.value!
279 |             break if results.size >= max
280 |             result = run result.context
281 |           end
282 |         else
283 |           # Unbounded - parse until error
284 |           while !result.errored
285 |             results << result.value!
286 |             result = run result.context
287 |           end
288 |         end
289 | 
290 |         unless range.includes? results.size
291 |           next ParseResult(Array(T)).error "expected #{range} parses, got #{results.size} parses", result.context
292 |         end
293 | 
294 |         ParseResult.new results, result.context
295 |       end
296 |     end
297 |   end
298 | end
299 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pars
  2 | 
  3 | `Pars` is a library for building monadic parser combinators in crystal-lang.
  4 | It works with minimal object allocation to extract domain-specific representation from String or Bytes input.
  5 | 
  6 | A combinator parser is a system that allows for the creation of small parsers which can then combine to represent more complex semantics.
  7 | This process then repeats, allowing for increasing complexity.
  8 | Small parsers combine with logic (OR, AND, XOR) and sequencing to create larger, more meaningful parsers.
  9 | Ultimately providing a single parser that models a full domain grammar.
 10 | 
 11 | This style of parser allows for creating interpreted programming languages, decoding markup, reading files of different formats, decoding communication protocols and other uses where there is a need to extract information from String or Bytes data based on defined syntax.
 12 | 
 13 | For a more in-depth introduction, see [Monadic Parser Combinators](https://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf).
 14 | 
 15 | ## Example
 16 | 
 17 | Lets start with a domain model.
 18 | ```crystal
 19 | enum Greeting
 20 |   Hello
 21 |   Goodbye
 22 | end
 23 | 
 24 | alias Target = String
 25 | 
 26 | record Expression, greeting : Greeting, target : Target
 27 | ```
 28 | 
 29 | Now we can build some parsers.
 30 | ```crystal
 31 | # Creates a parser that returns a single letter character.
 32 | letter = Parse.char_if &.letter?
 33 | 
 34 | # Parser for 1 or more letters.
 35 | letters = letter * (1..)
 36 | 
 37 | # We can join these into a new Parser that provides joins these letters as a String.
 38 | word = letters.map &.join  # this is already available as `Parse.word` too
 39 | 
 40 | # This can now build a parser for one of our domain objects.
 41 | greeting = word.map &->Greeting.parse(String)
 42 | 
 43 | # But we still have a little more to consume...
 44 | comma = Parse.char ','
 45 | space = Parse.char ' '
 46 | sep = (comma + space) | space
 47 | # ...and some more...
 48 | target = word
 49 | exclamation = Parse.char('!') * (0..1)
 50 | 
 51 | # But now we can combine these into the full expression parser
 52 | expression = ((greeting << sep) &+ target << exclamation).map do |(g, t)|
 53 |   Expression.new g, t
 54 | end
 55 | 
 56 | typeof(expression) # => Parser(Expression)
 57 | 
 58 | expression.parse "Hello, world!" # => Expression(@greeting=Hello, @target="world")
 59 | 
 60 | expression.parse "Hello human" # => Expression(@greeting=Hello, @target="human")
 61 | 
 62 | expression.parse "Well then..." # => ParseError: unknown enum Greeting value: Well
 63 | ```
 64 | 
 65 | Importantly though this is only the start. We can continue to build complexity.
 66 | ```crystal
 67 | other_expression = Parse.string "Well then..."
 68 | new_parser = expression | other_expression
 69 | ```
 70 | 
 71 | And create results that use crystal's beautiful type system.
 72 | ```crystal
 73 | result = new_parser.parse("Well then...")
 74 | case result
 75 | in Expression
 76 |   # ...
 77 | in String
 78 |   # ...
 79 | in ParseError
 80 |   # ...
 81 | end
 82 | ```
 83 | 
 84 | ---
 85 | 
 86 | ## Usage
 87 | 
 88 | ```crystal
 89 | require "pars"
 90 | include Pars
 91 | ```
 92 | 
 93 | While not required, including `Pars` is _highly recommended_ for ease of access.
 94 | 
 95 | ### Primitive parsers
 96 | 
 97 | ```crystal
 98 | char_a = Parse.char 'a'
 99 | 
100 | puts char_a.parse "abc" #=> a
101 | ```
102 | 
103 | This example creates a `Parser(Char)` from `Parse.char`, and parses the string `"abc"` on it.
104 | The character parser looks at the beginning of the string, and looks for the first character.
105 | If the first character matches the character supplied, then the parse will succeed and the parse result will return the character that matched.
106 | 
107 | ```crystal
108 | puts char_a.parse "bca" #=> expected 'a', got 'b'
109 | ```
110 | 
111 | This example uses the same `char_a` parser, but parses string `"bca"` on it.
112 | Because it doesn't start with `'a'`, the parse fails and returns a `ParseError`.
113 | A `ParseError` contains a message about the parse failure, available via `ParseError#message`.
114 | As such, `Parser(T)#parse` returns a union of `(T | ParseError)`, as it can return either.
115 | 
116 | ```crystal
117 | str_cat = Parse.string "cat"
118 | 
119 | puts str_cat.parse "cat" #=> cat
120 | puts str_cat.parse "cats are cool" #=> cat
121 | puts str_cat.parse "dog" #=> expected 'cat', got 'd'
122 | ```
123 | 
124 | This example creates a new primitive parser, the `Parser(String)` created by `Parse.string(String)`.
125 | It expects an exact copy of the string provided; in this example the text `"cat"`.
126 | 
127 | When constructing parsers for non-string based input, `Parser.byte` is also provided.
128 | ```crystal
129 | null_byte = Parse.byte 0x0
130 | 
131 | puts null_byte.parse Bytes[0xDE, 0xAD, 0xBE, 0xEF] #=> expected `0`, got '222'
132 | ```
133 | 
134 | Similarly `Parser.bytes` is available for matching a specific byte sequence.
135 | ```crystal
136 | bovine = Parse.bytes Bytes[0xBE, 0xEF]
137 | 
138 | puts bovine.parse Bytes[0xBE, 0xEF] #=> Bytes[0xBE, 0xEF]
139 | ```
140 | 
141 | ### Conditional parsers
142 | 
143 | In some cases, you may want to retrieve a value from the input that matches certain criteria.
144 | Two base conditional parsers provide this:
145 | 
146 | ```crystal
147 | space = Parse.char_if &.whitespace?
148 | ```
149 | 
150 | or for binary inputs
151 | 
152 | ```crystal
153 | low_val = Parse.byte_if { |b| b <= 10 }
154 | ```
155 | 
156 | 
157 | ### Optional parsers
158 | 
159 | ```crystal
160 | char_a = Parse.char 'a'
161 | char_b = Parse.char 'b'
162 | parse_ab = char_a | char_b
163 | 
164 | puts parse_ab.parse "abc" #=> a
165 | puts parse_ab.parse "bca" #=> b
166 | puts parse_ab.parse "cab" #=> expected 'b', got 'c'
167 | ```
168 | 
169 | This example creates three parsers:
170 | - a `Parser(Char)` that expects a character of `'a'`,
171 | - a `Parser(Char)` that expects a character of `'b'`, and
172 | - a `Parser(Char)` created using the `|` operator that will try the left parser first, then the right, and use the successful parser.
173 | 
174 | The `|` operator allows you to create branching parsers by using OR logic.
175 | It first tries the parser on the left, then the right.
176 | If both fail, it will throw the `ParseError` given by the rightmost parser.
177 | 
178 | This process is tedious for large masses of characters, such as if you wanted to accept all letters of the alphabet.
179 | For this sake, there exists `Parse.one_char_of`, which looks for any character in the provided string of list.
180 | 
181 | ```crystal
182 | parse_alphabet = Parse.one_char_of "abcdefghijklmnopqrstuvwxyz"
183 | 
184 | puts parse_alphabet.parse "abc" #=> a
185 | puts parse_alphabet.parse "bca" #=> b
186 | puts parse_alphabet.parse "xyz" #=> x
187 | puts parse_alphabet.parse "yzx" #=> y
188 | puts parse_alphabet.parse "123" #=> expected 'z', got '1'
189 | ```
190 | 
191 | This example creates a parser that accepts a char from the provided list.
192 | As seen, alphabetical characters parse, but numerical characters do not, as they were not in the original string of the alphabet.
193 | 
194 | Prebuilt parsers exist for common character types: `Parse.lowercase`, `Parse.uppercase`, `Parse.letter`, `Parse.digit`, `Parse.alphanumeric`, `Parse.whitespace`.
195 | 
196 | ### Repetitive parsers
197 | 
198 | To create a parser that repeats, use the `*` operator.
199 | This is available on any `Parser(T)`, and outputs a `Parser(Array(T))`.
200 | 
201 | When used with an integer, this creates a parser that matches an exact number of times.
202 | 
203 | ```crystal
204 | triple_a = Parse.char('a') * 3
205 | 
206 | triple_a.parse("aaa") #=> ['a', 'a', 'a']
207 | triple_a.parse("aa") #=> expected 'a', input ended
208 | ```
209 | 
210 | To match a variable number of times, use a `Range`.
211 | 
212 | ```crystal
213 | some_a = Parse.char('a') * (1..3)
214 | 
215 | some_a.parse("aaa") #=> ['a', 'a', 'a']
216 | some_a.parse("aa") #=> ['a', 'a']
217 | ```
218 | 
219 | Endless ranges are also supported, which will continue to match until a ParseError occurs.
220 | 
221 | ```crystal
222 | existential_dread = Parse.char('a') * (5..)
223 | 
224 | existential_dread.parse("aaa") #=> expected 'a', input ended
225 | existential_dread.parse("aaaaaaaaaa") #=> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
226 | ```
227 | 
228 | ```crystal
229 | word = Parse.letter * (1..)
230 | 
231 | puts word.parse "hello world" #=> ['h', 'e', 'l', 'l', 'o']
232 | puts word.parse "abc" #=> ['a', 'b', 'c']
233 | ```
234 | 
235 | A clear issue exists with the above example: it returns a list of the characters.
236 | If we want to convert this into a usable `String`, we have to transform the parser.
237 | 
238 | ### Transforming parsers
239 | 
240 | Existing parsers can be "transformed" to create new parsers with new logic.
241 | This provide the ability to move from primitive types to domain-specific types.
242 | To transform a parser, use the `Parser(T)#map(T -> B)` method.
243 | This accepts a block that receives the resulting value of a parse as a parameter, and outputs a transformed/mapped value.
244 | 
245 | For example, if you created a parser that accepted numbers:
246 | 
247 | ```crystal
248 | digit = Parse.one_char_of "0123456789"
249 | ```
250 | 
251 | Upon parsing it, it would yield characters on success:
252 | 
253 | ```crystal
254 | puts (digit.parse "1").class #=> Char
255 | ```
256 | 
257 | we find that the result is a `Char`, not any form of a `Number`! To solve this, we can transform the parser:
258 | 
259 | ```crystal
260 | digit = (Parse.one_char_of "0123456789").map &.to_i
261 | 
262 | puts digit.parse "1" #=> 1
263 | puts (digit.parse "1").class #=> Int32
264 | ```
265 | 
266 | Success! Now the parsed value from our parser is the correct type, `Int32`.
267 | 
268 | Back to the issue we found in the word parser from the previous section, we can transform the `Array(Char)` to
269 | a `String`.
270 | 
271 | ```crystal
272 | word = (Parse.letter * (1..)).map &.join
273 | 
274 | puts word.parse "hello world" #=> hello
275 | puts word.parse "abc" #=> abc
276 | ```
277 | 
278 | This identical `word` parser is available as `Parse.word` (`Parser(String)`).
279 | 
280 | ### Logical combinations
281 | 
282 | The `|` (OR) operator already discussed as accompanied by other logical operators.
283 | 
284 | - `A & B` (AND) creates a new parser that ensure both A and B successfull parse for the same input and returns the results as the Tuple `{A, B}`.
285 | - `A ^ B` (XOR) creates a new parser that succeeds with the result of A or B, but fails if both succeed.
286 | 
287 | ### Sequencing parsers
288 | 
289 | - `A >> B` creates a new parser that ensures both A and B parse sequentially, but results with the value of B.
290 | - `A << B` creates a new parser that ensures both A and B parse sequentially, but results with the value of A.
291 | - `A + B` creates a new parser that ensure both A and B parse sequentially, returning the results as an Array.
292 | - `A &+ B` creates a new parser that ensure both A and B parse sequentially, returning the results as a Tuple.
293 | 
294 | ```crystal
295 | letter = Parse.letter
296 | digit = Parse.digit
297 | parser_take_digit = letter >> digit
298 | parser_take_letter = letter << digit
299 | 
300 | puts parser_take_digit.parse "a1" #=> 1
301 | puts parser_take_digit.parse "b2" #=> 2
302 | 
303 | puts parser_take_letter.parse "a1" #=> a
304 | puts parser_take_letter.parse "b2" #=> b
305 | ```
306 | 
307 | In this example, two parsers are created, `letter` and `digit`.
308 | Then, two new parsers are created using the `>>` and `<<` operators.
309 | The first parses both sequentially but results with the result of `digit`, and the second does the same but results with the value of `letter`.
310 | Upon parsing these, the two parsers must work sequentially, but returns with the parser's result the operator is pointing toward.
311 | 
312 | ### Parsing lists
313 | 
314 | `Parse` has a special parser that can parse a list of parsable items by parser `A`, delimited by parser `B`.
315 | Using this, we can create a parser that parses through a list of words (using `Parser.word`), delimited by a second parser that looks for commas.
316 | 
317 | ```crystal
318 | word = Parser.word
319 | optional_whitespace = Parser.whitespace * (0..)
320 | comma = (Parser.char ',') << optional_whitespace
321 | 
322 | list_parser = Parse.list word, comma
323 | 
324 | puts list_parser.parse "hello, world" #=> ["hello", "world"]
325 | puts list_parser.parse "how,are,    you" #=> ["how", "are", "you"]
326 | puts list_parser.parse "123, 456" #=> []
327 | puts list_parser.parse "hello world, how are you" #=> ["hello"]
328 | ```
329 | 
330 | ### Complex sequential parsers
331 | 
332 | In the event you need to create complex sequential parsers, you can use `Parser(T)#bind`.
333 | The `bind` method takes a block that receives the output of `Parser(T)` as a value, and must return
334 | a new `Parser` of any type, or `Parser(B)`.
335 | We can recreate the `parser_take_digit` and `parser_take_letter` parsers using this functionality:
336 | 
337 | ```crystal
338 | letter = Parse.letter
339 | digit = Parse.digit
340 | 
341 | parser_take_digit = letter.bind do |char_result|
342 |   digit.bind do |digit_result|
343 |     Parse.const digit_result
344 |   end
345 | end
346 | ```
347 | 
348 | The original two parsers chain their execution, and ultimately a `Parse.const` parser returns.
349 | `Parse.const` is a parser that takes in any value of type `T`.
350 | When parsed, it _always_ returns the value of type `T`.
351 | In this case, we create it with the `Char` result from `digit`.
352 | 
353 | ```crystal
354 | parser_letter_digit = letter.bind do |char_result|
355 |   digit.bind do |digit_result|
356 |     Parse.const({char_result, digit_result}) # a constant parser with a `Tuple(Char, Char)`
357 |   end
358 | end
359 | ```
360 | 
361 | This parser will parse strings like `a1`, `b2`, `c3`, etc., but return both of the retrieved values as a `Tuple`.
362 | 
363 | ```crystal
364 | result = parser_letter_digit.parse "a1"
365 | 
366 | puts result[0] #=> a
367 | puts result[1] #=> 1
368 | ```
369 | 
370 | This form of parser sequencing can become tedious.
371 | As a result, the library has a special macro inspired by Haskell's `do` statement.
372 | It allows you to chain parsers like above, but in a much more linear and organized manner.
373 | Here is the most recent sequential parser `parser_letter_digit` using `Parse.do`:
374 | 
375 | ```crystal
376 | parser_letter_digit = Parse.do({
377 |   char_result <= letter,
378 |   digit_result <= digit,
379 |   Parse.const({char_result, digit_result})
380 | })
381 | ```
382 | 
383 | The body of the `Parse.do` macro is a list of actions separated by commas.
384 | The last element of this list _must_ be an expression that is ultimately returned through the new parser.
385 | 
386 | For each of the other elements in the list, they must be either parser results or local variables.
387 | 
388 | - Parser results look like `result_variable_name <= parser,`. In this case, the result from `parser` is
389 |   stored as `result_variable_name`.
390 | - Local variables are `variable_name = value,`. In this case, `variable_name` is set to `value`.
391 | 
392 | Utilizing these tools, more complex parsers are expressible.
393 | 
394 | ```crystal
395 | word = Parse.word
396 | 
397 | optional_whitespace = Parse.whitespace * (0..)
398 | equals = optional_whitespace >> (Parse.char '=') << optional_whitespace
399 | 
400 | key_value_pair = Parse.do({
401 |   key <= word,
402 |   _ <= equals,
403 |   value <= word,
404 |   Parse.const({key, value})
405 | })
406 | 
407 | comma = (Parse.char ',') << optional_whitespace
408 | 
409 | key_value_list = Parse.list key_value_pair, comma
410 | 
411 | puts key_value_list.parse "hello = world" #=> [{"hello", "world"}]
412 | puts key_value_list.parse "how = are, you= sir" #=> [{"how", "are"}, {"you", "sir"}]
413 | puts key_value_list.parse "all=     sorts,of   =supported, white = spaces" #=> [{"all", "sorts"}, {"of", "supported"}, {"white", "spaces"}]
414 | ```
415 | 
416 | ### Custom parsers
417 | 
418 | Custom parsers can wrap arbitrary logic.
419 | This is sometimes necessary if existing primitive parsers cannot combine effectively or efficiently.
420 | 
421 | ```crystal
422 | def char_parser(char)
423 |   Parser(Char).new do |context|
424 |     if context.exhausted?
425 |       ParseResult(Char).error "expected '#{char}', got end of input", context
426 |     elsif context.head === char
427 |       ParseResult(Char).new char, context.next
428 |     else
429 |       ParseResult(Char).error "expected '#{char}', got '#{context.head}", context
430 |     end
431 |   end
432 | end
433 | ```
434 | 
435 | This defines `char_parser(Char)`, which creates a parser that expects a character as specified.
436 | This implementation is the same as the internal implementation `Parse.char(Char)`.
437 | See the source code for more applications of Parsers derived from blocks.
438 | 
439 | ## Docs
440 | 
441 | Generate docs with `crystal docs`.
442 | 
443 | ## Acknowledgements
444 | 
445 | `Pars` is a fork of [Pars3k](https://github.com/voximity/pars3k).
446 | It shares much of the same internals and structure but is _not_ API compatible.
447 | The public API uses features, idioms and operators specific to crystal-lang.
448 | While it may look and feel different, a significant hat-tip needs to go to the original work by [Voximity](https://github.com/voximity) and the authors of libraries which inspired it.
449 | 


--------------------------------------------------------------------------------