├── src ├── pars.cr └── pars │ ├── parse_error.cr │ ├── parse_result.cr │ ├── parse_context.cr │ ├── parse.cr │ └── parser.cr ├── .gitignore ├── shard.yml ├── .github └── workflows │ └── crystal.yml ├── LICENSE ├── spec └── pars │ ├── parse_spec.cr │ └── parser_spec.cr └── README.md /src/pars.cr: -------------------------------------------------------------------------------- 1 | require "./pars/*" 2 | 3 | module Pars; end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /docs/ 2 | /lib/ 3 | /bin/ 4 | /.shards/ 5 | *.dwarf 6 | 7 | # Libraries don't need dependency lock 8 | # Dependencies will be locked in applications that use them 9 | /shard.lock 10 | -------------------------------------------------------------------------------- /shard.yml: -------------------------------------------------------------------------------- 1 | name: pars 2 | version: 1.2.0 3 | 4 | authors: 5 | - voximity 6 | - Kim Burgess 7 | 8 | description: | 9 | Monadic parser combinator library 10 | 11 | license: MIT 12 | -------------------------------------------------------------------------------- /src/pars/parse_error.cr: -------------------------------------------------------------------------------- 1 | module Pars 2 | # A struct containing information about a parse error. 3 | struct ParseError 4 | getter context 5 | getter message 6 | 7 | def initialize(@message : String, @context : ParseContext) 8 | end 9 | 10 | def to_s(io : IO) 11 | io << message 12 | io << ' ' 13 | io << '(' 14 | io << context 15 | io << ')' 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /.github/workflows/crystal.yml: -------------------------------------------------------------------------------- 1 | name: Crystal CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | container: 15 | image: crystallang/crystal 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Install dependencies 20 | run: shards install 21 | - name: Run tests 22 | run: crystal spec 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Original work Copyright (c) 2019 "Voximity" (https://github.com/voximity) 4 | Modified work Copyright (c) 2020 Place Technology Limited 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/pars/parse_result.cr: -------------------------------------------------------------------------------- 1 | require "./parse_error" 2 | require "./parse_context" 3 | 4 | module Pars 5 | # ParseResult(T) is a result of a parsed Parser with return type T. 6 | struct ParseResult(T) 7 | # Creates an errored `ParseResult` that wraps *e*. 8 | def self.error(e : ParseError) 9 | inst = ParseResult(T).allocate 10 | inst.initialize_as_error e 11 | inst 12 | end 13 | 14 | # Creates an errored `ParseResult`. 15 | def self.error(message : String, context : ParseContext) 16 | ParseResult(T).error ParseError.new message, context 17 | end 18 | 19 | @errored = uninitialized Bool 20 | @error = uninitialized ParseError 21 | @context = uninitialized ParseContext 22 | 23 | getter errored 24 | getter context 25 | 26 | # Creates a new successful `ParseResult`. 27 | def initialize(@value : T, @context) 28 | @errored = false 29 | end 30 | 31 | # :nodoc: 32 | def initialize_as_error(e : ParseError) 33 | @errored = true 34 | @error = e 35 | @context = e.context 36 | end 37 | 38 | # Returns a `ParseError`, or nil if parsing was successful. 39 | def error? : ParseError? 40 | errored ? @error : Nil 41 | end 42 | 43 | # Returns the parsed value, or a `ParseError`. 44 | def value : T | ParseError 45 | errored ? @error : @value 46 | end 47 | 48 | # Directly access to parsed value. 49 | # 50 | # Note: this is unsafe and should only be used if `#errored == false`. 51 | def value! : T 52 | @value 53 | end 54 | 55 | # Directly access the `ParseError`. 56 | # 57 | # Note: this unsafe and should only be used if `#errored == true`. 58 | def error! : ParseError 59 | @error 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /src/pars/parse_context.cr: -------------------------------------------------------------------------------- 1 | module Pars 2 | # A struct containing information about a parsing context. Used to chain 3 | # Parsers together and retain input position. 4 | struct ParseContext 5 | def initialize(@input, @pos = 0) 6 | end 7 | 8 | # The input the parser is working across. 9 | getter input : String | Bytes 10 | 11 | # The correct parse offset within *input*. 12 | getter pos : Int32 13 | 14 | # Creates a new context at the next parse position. 15 | def next(offset = 1) : ParseContext 16 | ParseContext.new(input, pos + offset) 17 | end 18 | 19 | # `true` if all of the input has been consumed. 20 | def exhausted? : Bool 21 | pos >= input.size 22 | end 23 | 24 | # Provides the parse head as a `Char`. 25 | def char : Char 26 | if input.is_a? String 27 | input.as(String).char_at pos 28 | else 29 | input.as(Bytes)[pos].chr 30 | end 31 | end 32 | 33 | # Provides the parse head as a byte. 34 | def byte : UInt8 35 | if input.is_a? String 36 | input.as(String).byte_at pos 37 | else 38 | input.as(Bytes)[pos] 39 | end 40 | end 41 | 42 | # Provide the current parse head directly. 43 | def head : Char | UInt8 44 | input[pos] 45 | end 46 | 47 | # Provide a human readable verison of the conntext parse context. 48 | def to_s(io : IO) 49 | before = ([0, pos - 5].max..(pos - 1)) 50 | after = ((pos + 1)..[pos + 5, input.size - 1].min) 51 | io << "..." if before.begin > 0 52 | if input.is_a? String 53 | io << input[before] if pos > 0 54 | io << '[' 55 | io << (exhausted? ? "" : char) 56 | io << ']' 57 | io << input[after] if pos < input.size - 1 58 | else 59 | io << hex(input[before].as(Bytes)) if pos > 0 60 | io << '[' 61 | io << (exhausted? ? "" : hex(byte)) 62 | io << ']' 63 | io << hex(input[after].as(Bytes)) if pos < input.size - 1 64 | end 65 | io << "..." unless after.end == input.size - 1 66 | end 67 | 68 | private def hex(byte : UInt8) 69 | byte.to_s(16).rjust(2, '0') 70 | end 71 | 72 | private def hex(bytes : Bytes) : String 73 | bytes.map(&->hex(UInt8)).join ' ' 74 | end 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /spec/pars/parse_spec.cr: -------------------------------------------------------------------------------- 1 | require "spec" 2 | require "../../src/pars" 3 | 4 | include Pars 5 | 6 | describe Pars::Parse do 7 | describe ".const" do 8 | p = Parse.const 'a' 9 | it "returns a constant value for every input" do 10 | p.parse("abc").should eq 'a' 11 | p.parse("123").should eq 'a' 12 | p.parse("").should eq 'a' 13 | end 14 | end 15 | 16 | describe "do macro" do 17 | it "supports sequencing multiple parsers" do 18 | p = Parse.do({ 19 | alpha <= Parse.letter, 20 | digit <= Parse.digit, 21 | Parse.const({alpha, digit}), 22 | }) 23 | p.parse("a1").should eq({'a', '1'}) 24 | p.parse("42").should be_a ParseError 25 | end 26 | end 27 | 28 | describe ".cond" do 29 | it "success when the predicate is true" do 30 | p = Parse.cond 'a' { true } 31 | p.parse("").should eq 'a' 32 | end 33 | it "produces a ParseError when the prediciate is false" do 34 | p = Parse.cond 'a' { false } 35 | p.parse("").should be_a ParseError 36 | end 37 | end 38 | 39 | describe ".eq" do 40 | p = Parse.eq 'a'.ord 41 | it "checks equivalence at the parse position" do 42 | p.parse("abc").should eq 'a'.ord 43 | p.parse("bca").should be_a ParseError 44 | p.parse("cab").should be_a ParseError 45 | end 46 | end 47 | 48 | describe ".char" do 49 | p = Parse.char 'a' 50 | it "matches against a char at the current parse position" do 51 | p.parse("abc").should eq 'a' 52 | p.parse("bca").should be_a ParseError 53 | p.parse("cab").should be_a ParseError 54 | end 55 | end 56 | 57 | describe ".byte" do 58 | p = Parse.byte 0x0 59 | it "matches for a byte value" do 60 | p.parse(Bytes[0x0]).should eq 0x0 61 | p.parse("foo").should be_a ParseError 62 | end 63 | end 64 | 65 | describe ".string" do 66 | p = Parse.string "cat" 67 | it "matches against the a string" do 68 | p.parse("cat").should eq "cat" 69 | p.parse("dog").should be_a ParseError 70 | p.parse("").should be_a ParseError 71 | end 72 | end 73 | 74 | describe ".bytes" do 75 | p = Parse.bytes Bytes[0xDE, 0xAD, 0xBE, 0xEF] 76 | it "matches against byte values" do 77 | p.parse(Bytes[0xDE, 0xAD, 0xBE, 0xEF]).should eq Bytes[0xDE, 0xAD, 0xBE, 0xEF] 78 | p.parse(Bytes[0xDE, 0xAD]).should be_a ParseError 79 | p.parse(Bytes[0x0]).should be_a ParseError 80 | p.parse("foo").should be_a ParseError 81 | end 82 | end 83 | 84 | describe ".one_char_of" do 85 | p = Parse.one_char_of "abc" 86 | it "matches any character from the passed string" do 87 | p.parse("apple").should eq 'a' 88 | p.parse("banana").should eq 'b' 89 | p.parse("carrot").should eq 'c' 90 | p.parse("dragonfruit").should be_a ParseError 91 | end 92 | end 93 | 94 | describe ".no_char_of" do 95 | p = Parse.no_char_of "abc" 96 | it "fails for any character in the passed string" do 97 | p.parse("apple").should be_a ParseError 98 | p.parse("banana").should be_a ParseError 99 | p.parse("carrot").should be_a ParseError 100 | p.parse("dragonfruit").should eq 'd' 101 | end 102 | end 103 | 104 | describe ".non_empty_list" do 105 | space = Parse.whitespace * (0..) 106 | comma = space >> Parse.char(',') << space 107 | word = Parse.word 108 | p = Parse.non_empty_list word, comma 109 | it "builds an array from the wrapped element and delimiter parsers" do 110 | p.parse("").should be_a ParseError 111 | p.parse("test").should eq ["test"] 112 | p.parse("hello, world").should eq ["hello", "world"] 113 | p.parse("par , s3k").should eq ["par", "s3k"] 114 | end 115 | end 116 | end 117 | -------------------------------------------------------------------------------- /src/pars/parse.cr: -------------------------------------------------------------------------------- 1 | require "./parser" 2 | require "./parse_result" 3 | 4 | module Pars 5 | # Tools for creating commonly useful `Parser` instances. 6 | module Parse 7 | extend self 8 | 9 | # Provides a notation for building complex parsers that combine the result 10 | # of a number of component parsers. 11 | macro do(body) 12 | {% non_expression_types = {"Assign", "TypeNode", "Splat", "Union", 13 | "UninitializedVar", "TypeDeclaration", 14 | "Generic", "ClassDef", "Def", 15 | "VisibilityModifier", "MultiAssign"} %} 16 | {% if non_expression_types.includes? body.last.class_name %} 17 | {{body.last.raise "expected last operation in monad to be an expression, got a '#{body.last.class_name}'"}} 18 | {% end %} 19 | ({{body[0].args[0]}}).bind do |{{body[0].receiver}}| 20 | {% for i in 1...body.size - 1 %} 21 | {% if body[i].class_name == "Assign" %} 22 | {{body[i].target}} = {{body[i].value}} 23 | {% else %} 24 | {% if body[i].class_name == "Call" && body[i].name == "<=" %} 25 | ({{body[i].args[0]}}).bind do |{{body[i].receiver}}| 26 | {% elsif non_expressions_types.includes? body[i].class_name %} 27 | {{body[i].raise "expected operation '<=' or '=', got '#{body[i].name}'"}} 28 | {% else %} 29 | {{body[i]}} 30 | {% end %} 31 | {% end %} 32 | {% end %} 33 | {{body[body.size - 1]}} 34 | {% for i in 1...body.size - 1 %} 35 | {% if body[i].class_name == "Call" && body[i].name == "<=" %} 36 | end 37 | {% end %} 38 | {% end %} 39 | end 40 | end 41 | 42 | # Always succeeds with *value* and does not consume any input. 43 | def const(value : T) : Parser(T) forall T 44 | Parser(T).const value 45 | end 46 | 47 | # Parser that returns the parse head as a `Char`. 48 | def char : Parser(Char) 49 | Parser.char 50 | end 51 | 52 | # Parser that return the byte vaue at the parse head. 53 | def byte : Parser(UInt8) 54 | Parser.byte 55 | end 56 | 57 | # Parser that succeeds with *value* if *block* evaluates to true when passed 58 | # the value. 59 | # 60 | # In most cases this should not be used externally and is instead a tool for 61 | # composing parsers. 62 | def cond(value : T, expected : T | String? = nil, &block : T -> Bool) : Parser(T) forall T 63 | Parser(T).new do |context| 64 | if block.call value 65 | ParseResult(T).new value, context 66 | else 67 | message = case expected 68 | when T 69 | "expected '#{expected}', got '#{value}'" 70 | when String 71 | "expected #{expected}, got '#{value}'" 72 | else 73 | "unsatisfied predicate, got '#{value}'" 74 | end 75 | ParseResult(T).error message, context 76 | end 77 | end 78 | end 79 | 80 | # Parser that return the context head if it satisfies *block*. 81 | # 82 | # *expected* can be optionally specified for providing a human friendly 83 | # ParseError on fail. 84 | def char_if(expected = nil, &block : Char -> Bool) : Parser(Char) 85 | Parser.char.bind do |value| 86 | cond value, expected, &block 87 | end 88 | end 89 | 90 | # :ditto: 91 | def byte_if(expected = nil, &block : UInt8 -> Bool) : Parser(UInt8) 92 | Parser.byte.bind do |value| 93 | cond value, expected, &block 94 | end 95 | end 96 | 97 | # Parser that tests equivalence to *value* at the parse head. 98 | # 99 | # If equivalent *value* itself is returned and the parse head progresses. 100 | def eq(value : T) : Parser(T) forall T 101 | Parser.head.bind do |head| 102 | cond value, value, &.===(head) 103 | end 104 | end 105 | 106 | # Parser that matches for a specific *char* at the parse head. 107 | def char(char : Char) : Parser(Char) 108 | char_if char, &.==(char) 109 | end 110 | 111 | # Parser that matches for a specific *byte* at the parse head. 112 | def byte(byte : UInt8) : Parser(UInt8) 113 | byte_if byte, &.==(byte) 114 | end 115 | 116 | # Creates a `Parser(String)` that looks at the current parse position and 117 | # expects the array of characters in the string `s` (`s.chars`) to be 118 | # consecutively present. 119 | def string(string : String) : Parser(String) 120 | case string.size 121 | when 0 122 | const string 123 | when 1 124 | char(string[0]) >> const string 125 | else 126 | string.each_char.map(&->char(Char)).reduce do |a, b| 127 | a >> b 128 | end >> const string 129 | end 130 | end 131 | 132 | # Creates a `Parser(Bytes)` that looks at the current parse position and 133 | # expects a series of bytes to be consecutively present. 134 | def bytes(bytes : Bytes) : Parser(Bytes) 135 | case bytes.size 136 | when 0 137 | const bytes 138 | when 1 139 | byte(bytes[0]) >> const bytes 140 | else 141 | bytes.each.map(&->byte(UInt8)).reduce do |a, b| 142 | a >> b 143 | end >> const bytes 144 | end 145 | end 146 | 147 | # Creates a `Parser(Char)` that looks at the current parse position and 148 | # expects the current character to be present in the string `s`. 149 | def one_char_of(string_or_list : String | Enumerable(Char)) : Parser(Char) 150 | char_if "a character from #{string_or_list}", &.in?(string_or_list) 151 | end 152 | 153 | # Functions identically to `Parse.one_char_of`, but reverses the expected 154 | # input. If the current character is present in `s`, then the parse fails. 155 | def no_char_of(string_or_list : String | Enumerable(Char)) : Parser(Char) 156 | char_if "no character in #{string_or_list}", &.in?(string_or_list).! 157 | end 158 | 159 | # Creates a `Parser(Array(T))` that will continue to parse with *parser* 160 | # delimited by *delimter* until an error with either occurs. 161 | def list(item : Parser(A), delimiter : Parser(B)) : Parser(Array(A)) forall A, B 162 | empty_list = const [] of A 163 | non_empty_list(item, delimiter) | empty_list 164 | end 165 | 166 | def non_empty_list(item : Parser(A), delimiter : Parser(B)) : Parser(Array(A)) forall A, B 167 | singleton = item * 1 168 | plural = ((item << delimiter) * (1..) &+ item).map { |(xs, x)| xs << x } 169 | plural | singleton 170 | end 171 | 172 | # Parses a character of the lowercase alphabet. 173 | def lowercase 174 | char_if "a lowercase character", &.lowercase? 175 | end 176 | 177 | # Parses a character of the uppercase alphabet. 178 | def uppercase 179 | char_if "an uppercase character", &.uppercase? 180 | end 181 | 182 | # Parses a character in the alphabet regardless of case. 183 | def letter 184 | char_if "a letter", &.letter? 185 | end 186 | 187 | def alphanumeric 188 | char_if "an alphanumeric character", &.alphanumeric? 189 | end 190 | 191 | # Parses a full word of at least one character. 192 | def word 193 | (alphanumeric * (1..)).map &.join 194 | end 195 | 196 | def whitespace 197 | char_if "a whitespace character", &.whitespace? 198 | end 199 | 200 | # Parses a digit as a character. 201 | def digit 202 | char_if "a digit", &.number? 203 | end 204 | 205 | # Parses an integer as a String. 206 | def integer 207 | (digit * (1..)).map &.join 208 | end 209 | 210 | # Parses a fractional number as a String. 211 | def decimal 212 | (integer + (char '.') + integer).map &.join 213 | end 214 | 215 | # Parses a number as a String. 216 | def number 217 | decimal | integer 218 | end 219 | end 220 | end 221 | -------------------------------------------------------------------------------- /spec/pars/parser_spec.cr: -------------------------------------------------------------------------------- 1 | require "spec" 2 | require "../../src/pars" 3 | 4 | include Pars 5 | 6 | describe Parser do 7 | a = Parse.char 'a' 8 | b = Parse.char 'b' 9 | c = Parse.char 'c' 10 | str = Parse.string "foo" 11 | 12 | describe ".const" do 13 | p = Parser.const 42 14 | it "always returns the same value regardless of input" do 15 | p.parse("a").should eq 42 16 | p.parse("test").should eq 42 17 | p.parse("").should eq 42 18 | p.parse(Bytes[0xB, 0xE, 0xE, 0xF]).should eq 42 19 | p.parse(Bytes.empty).should eq 42 20 | end 21 | it "does not consume any of the input" do 22 | ctx = ParseContext.new "hello" 23 | ctx.pos.should eq 0 24 | res = p.run ctx 25 | res.value.should eq 42 26 | res.context.should eq ctx 27 | res.context.pos.should eq 0 28 | end 29 | end 30 | 31 | describe ".fail" do 32 | p = Parser(Char).fail "nope" 33 | it "fails for every input" do 34 | p.parse("a").should be_a ParseError 35 | p.parse("test").should be_a ParseError 36 | p.parse("").should be_a ParseError 37 | p.parse(Bytes[0xB, 0xE, 0xE, 0xF]).should be_a ParseError 38 | p.parse(Bytes.empty).should be_a ParseError 39 | end 40 | it "does not consume any of the input" do 41 | ctx = ParseContext.new "hello" 42 | ctx.pos.should eq 0 43 | res = p.run ctx 44 | res.value.should be_a ParseError 45 | res.context.should eq ctx 46 | res.context.pos.should eq 0 47 | end 48 | end 49 | 50 | describe ".head" do 51 | p = Parser.head 52 | it "returns the parse head" do 53 | p.parse("a").should eq 'a' 54 | p.parse("b").should eq 'b' 55 | end 56 | it "progresses the parse context" do 57 | ctx = ParseContext.new "ab" 58 | res = p.run ctx 59 | res.context.pos.should eq 1 60 | end 61 | it "provides a parse error when the end of input is reached" do 62 | p.parse("").should be_a ParseError 63 | end 64 | end 65 | 66 | describe "#map" do 67 | it "applies the transform to the parser output" do 68 | p = a.map &.to_s 69 | p.parse("a").should eq "a" 70 | end 71 | it "captures exception in the transform as a ParseError" do 72 | p = a.map { |_| raise Exception.new "oh no" } 73 | result = p.parse("a") 74 | result.should be_a ParseError 75 | result.message.should be "oh no" 76 | end 77 | end 78 | 79 | describe "#&+" do 80 | it "sequences `self` with another parser as a Tuple" do 81 | p = a &+ b 82 | p.parse("a").should be_a ParseError 83 | p.parse("ab").should eq({'a', 'b'}) 84 | p.parse("abc").should eq({'a', 'b'}) 85 | end 86 | it "flattens the results when chaining" do 87 | p = a &+ b &+ c 88 | p.parse("abc").should eq({'a', 'b', 'c'}) 89 | end 90 | it "preserve types as each parser position" do 91 | p = a &+ Parse.const("foo") 92 | typeof(p).should eq Parser({Char, String}) 93 | end 94 | it "is associative" do 95 | p1 = (a &+ b) &+ str 96 | p2 = a &+ (b &+ str) 97 | typeof(p1).should eq typeof(p2) 98 | end 99 | it "returns a ParseError if any fail" do 100 | p = a &+ b &+ c 101 | p.parse("zbc").should be_a ParseError 102 | p.parse("azc").should be_a ParseError 103 | p.parse("abz").should be_a ParseError 104 | end 105 | end 106 | 107 | describe "#+" do 108 | it "sequences `self` with another parser as an Array" do 109 | p = a + b 110 | p.parse("a").should be_a ParseError 111 | p.parse("ab").should eq(['a', 'b']) 112 | p.parse("abc").should eq(['a', 'b']) 113 | end 114 | it "flattens the results when chaining" do 115 | p = a + b + c 116 | p.parse("abc").should eq(['a', 'b', 'c']) 117 | end 118 | it "is associative" do 119 | p1 = (a + b) + str 120 | p2 = a + (b + str) 121 | typeof(p1).should eq typeof(p2) 122 | end 123 | it "forms an array with elements of a union type" do 124 | p = a + Parse.const("foo") 125 | typeof(p).should eq Parser(Array(Char | String)) 126 | end 127 | it "allows forming parsers of unbounded length" do 128 | p = "foo".each_char.map(&->Parse.char(Char)).reduce(Parser.const [] of Char) { |a, b| a + b } 129 | typeof(p).should eq Parser(Array(Char)) 130 | end 131 | it "returns a ParseError if any fail" do 132 | p = a + b + c 133 | p.parse("zbc").should be_a ParseError 134 | p.parse("azc").should be_a ParseError 135 | p.parse("abz").should be_a ParseError 136 | end 137 | end 138 | 139 | describe "#<<" do 140 | p = a << b 141 | it "returns the result of self if both parsers succeed" do 142 | p.parse("ab").should eq 'a' 143 | end 144 | it "returns a ParseError if self errors" do 145 | p.parse("bb").should be_a ParseError 146 | end 147 | it "preserves the previous context when self fails" do 148 | ctx = ParseContext.new "bb" 149 | res = p.run ctx 150 | res.value.should be_a ParseError 151 | res.context.pos.should eq 0 152 | end 153 | it "preserves the parse context when other fails" do 154 | ctx = ParseContext.new "aa" 155 | res = p.run ctx 156 | res.value.should be_a ParseError 157 | res.context.pos.should eq 0 158 | end 159 | end 160 | 161 | describe "#>>" do 162 | p = a >> b 163 | it "returns the result of other if both parsers succeed" do 164 | p.parse("ab").should eq 'b' 165 | end 166 | it "returns a parse error if other fails" do 167 | p.parse("aa").should be_a ParseError 168 | end 169 | it "preserves the previous context when self fails" do 170 | ctx = ParseContext.new "bb" 171 | res = p.run ctx 172 | res.value.should be_a ParseError 173 | res.context.pos.should eq 0 174 | end 175 | it "preserves the parse context when other fails" do 176 | ctx = ParseContext.new "aa" 177 | res = p.run ctx 178 | res.value.should be_a ParseError 179 | res.context.pos.should eq 0 180 | end 181 | end 182 | 183 | describe "#|" do 184 | p = a | b 185 | it "returns the result if either parser succeeds" do 186 | p.parse("a").should eq 'a' 187 | p.parse("b").should eq 'b' 188 | end 189 | it "returns a ParseError if both fail" do 190 | p.parse("c").should be_a ParseError 191 | end 192 | it "allows chaining with a custom error message" do 193 | result = (p | "nope").parse "c" 194 | result.should be_a ParseError 195 | result.as(ParseError).message.should eq "nope" 196 | end 197 | it "builds a union type from component parsers" do 198 | composite = p | str | Parse.byte(0x0).map(&->Box.new(UInt8)) | p 199 | typeof(composite).should eq Parser(Char | String | Box(UInt8)) 200 | typeof(composite.parse("foo")).should eq (Char | String | Box(UInt8) | ParseError) 201 | end 202 | end 203 | 204 | describe "#&" do 205 | it "succeeds when both succeed" do 206 | p = a & Parse.letter 207 | p.parse("a").should eq({'a', 'a'}) 208 | end 209 | it "returns a ParseError if either fail" do 210 | (a & b).parse("a").should be_a ParseError 211 | (b & a).parse("a").should be_a ParseError 212 | end 213 | end 214 | 215 | describe "#^" do 216 | it "succeeds if a succeeds" do 217 | (a ^ b).parse("a").should eq 'a' 218 | end 219 | it "succeeds if b succeeds" do 220 | (a ^ b).parse("b").should eq 'b' 221 | end 222 | it "fails if both fail" do 223 | (a ^ b).parse("c").should be_a ParseError 224 | end 225 | it "fails if both succeed" do 226 | (a ^ a).parse("a").should be_a ParseError 227 | end 228 | it "provides a union type as the result" do 229 | (a ^ str).parse("a").should be_a Char | String 230 | end 231 | end 232 | 233 | describe "#*(Int)" do 234 | it "repeats the parser the specified number of times" do 235 | (a * 1).parse("aaa").should eq ['a'] 236 | (a * 2).parse("aaa").should eq ['a', 'a'] 237 | (a * 3).parse("aaa").should eq ['a', 'a', 'a'] 238 | end 239 | it "returns an empty array for 0" do 240 | (a * 0).parse("aaa").should eq [] of Char 241 | end 242 | it "fails if the count isn't met" do 243 | (a * 3).parse("a").should be_a ParseError 244 | end 245 | end 246 | 247 | describe "#*(Range)" do 248 | p = a * (1..2) 249 | it "stops matching after range.end" do 250 | p.parse("aab").should eq ['a', 'a'] 251 | end 252 | it "succeeds if the number of matches is within the range" do 253 | p.parse("ab").should eq ['a'] 254 | end 255 | it "failes if the range.start is not met" do 256 | p.parse("b").should be_a ParseError 257 | end 258 | it "succeeds on a endless range if range.start is met" do 259 | (a * (0..)).parse("").should eq [] of Char 260 | (a * (1..)).parse("a").should eq ['a'] 261 | (a * (0..)).parse("aab").should eq ['a', 'a'] 262 | end 263 | end 264 | end 265 | -------------------------------------------------------------------------------- /src/pars/parser.cr: -------------------------------------------------------------------------------- 1 | require "./parse_result" 2 | require "./parse_context" 3 | 4 | module Pars 5 | struct Parser(T) 6 | # Creates a `Parser` that always succeeds with *value*. 7 | def self.const(value : T) 8 | new do |context| 9 | ParseResult(T).new value, context 10 | end 11 | end 12 | 13 | # Creates a `Parser` that always fails with *message*. 14 | def self.fail(message : String) 15 | new do |context| 16 | ParseResult(T).error message, context 17 | end 18 | end 19 | 20 | {% for item in [:head, :char, :byte] %} 21 | # Creates a `Parser` that consumes the parse head, or fails if the end of 22 | # input has been reached. 23 | def self.{{item.id}} 24 | new do |context| 25 | if context.exhausted? 26 | ParseResult(typeof(context.{{item.id}})).error "input ended", context 27 | else 28 | ParseResult(typeof(context.{{item.id}})).new context.{{item.id}}, context.next 29 | end 30 | end 31 | end 32 | {% end %} 33 | 34 | def initialize(&block : ParseContext -> ParseResult(T)) 35 | @block = block 36 | end 37 | 38 | # Parses the input string `input` given the parser's logic provided by its 39 | # block at definition. 40 | def parse(input) : (T | ParseError) 41 | context = ParseContext.new input 42 | run(context).value 43 | end 44 | 45 | # Runs `self` for a given *context*. 46 | def run(context : ParseContext) : ParseResult(T) 47 | @block.call context 48 | end 49 | 50 | # Transforms the result of the parser such that, when the parser runs, the 51 | # output value becomes a different value. 52 | # 53 | # For example, if you took a `Parser(Char)` and wanted to transform it to a 54 | # `Parser(String)` by `Char#to_s`, then you could use 55 | # `char_parser.transform &.to_s`. 56 | def map(&block : T -> B) : Parser(B) forall B 57 | Parser(B).new do |context| 58 | result = run context 59 | if result.errored 60 | ParseResult(B).error result.error! 61 | else 62 | begin 63 | ParseResult(B).new block.call(result.value!), result.context 64 | rescue e 65 | ParseResult(B).error e.message || e.to_s, result.context 66 | end 67 | end 68 | end 69 | end 70 | 71 | # Sequences `self` with another parser. 72 | # 73 | # Expects a block that receives the result of the current parser and returns 74 | # a new parser of any type. 75 | def bind(&block : T -> Parser(B)) : Parser(B) forall B 76 | Parser(B).new do |context| 77 | result = run context 78 | if result.errored 79 | ParseResult(B).error result.error! 80 | else 81 | other = block.call result.value! 82 | other_result = other.run result.context 83 | if other_result.errored 84 | ParseResult(B).error other_result.error!.message, context 85 | else 86 | other_result 87 | end 88 | end 89 | end 90 | end 91 | 92 | # Sequences `self` with *other*, providing a new Parser that returns the 93 | # results as a Tuple. 94 | # 95 | # If multiple parsers are chained, the results are flattened. 96 | def &+(other : Parser(B)) forall B 97 | self.bind do |a| 98 | other.bind do |b| 99 | {% if T.name.starts_with?("Tuple(") && B.name.starts_with?("Tuple(") %} 100 | Parser.const(a + b) 101 | {% elsif T.name.starts_with? "Tuple(" %} 102 | Parser.const(a + {b}) 103 | {% elsif B.name.starts_with? "Tuple(" %} 104 | Parser.const({a} + b) 105 | {% else %} 106 | Parser.const({a, b}) 107 | {% end %} 108 | end 109 | end 110 | end 111 | 112 | # Sequences `self` with *other*, providing a new Parser that returns the 113 | # results as an Array. 114 | # 115 | # This may be preferred in place of `Parser(T)#.&+` when building parsers 116 | # that enumerate or reduce over a structure of unknown size, such as when 117 | # working within an Iterator. 118 | # 119 | # If multiple parsers are chained, the results are flattened. 120 | def +(other : Parser(B)) forall B 121 | self.bind do |a| 122 | other.bind do |b| 123 | {% if T.name.starts_with?("Array(") && B.name.starts_with?("Array(") %} 124 | Parser.const a + b 125 | {% elsif T.name.starts_with? "Array(" %} 126 | Parser.const a + [b] 127 | {% elsif B.name.starts_with? "Array(" %} 128 | Parser.const [a] + b 129 | {% else %} 130 | Parser.const [a, b] 131 | {% end %} 132 | end 133 | end 134 | end 135 | 136 | # Sequences the current parser with another parser, and disregards the other 137 | # parser's result, but ensures the two succeed. 138 | def <<(other : Parser(B)) : Parser(T) forall B 139 | Parser(T).new do |context| 140 | result = run context 141 | if result.errored 142 | result 143 | else 144 | other_result = other.run result.context 145 | if other_result.errored 146 | ParseResult(T).error other_result.error!.message, context 147 | else 148 | ParseResult(T).new result.value!, other_result.context 149 | end 150 | end 151 | end 152 | end 153 | 154 | # Sequences the current parser with another parser, and disregards the 155 | # original parser's result, but ensures the two succeed. 156 | def >>(other : Parser(B)) : Parser(B) forall B 157 | Parser(B).new do |context| 158 | result = run context 159 | if result.errored 160 | ParseResult(B).error result.error! 161 | else 162 | other_result = other.run result.context 163 | if other_result.errored 164 | ParseResult(B).error other_result.error!.message, context 165 | else 166 | other_result 167 | end 168 | end 169 | end 170 | end 171 | 172 | # Given `A | B`, creates a new parser that succeeds when A succeeds or B 173 | # succeeds. Checks A first, doesn't check B if A succeeds. Ignores type 174 | # differences, gives union type. 175 | def |(other : Parser(B)) : Parser(T | B) forall B 176 | Parser(T | B).new do |context| 177 | result = run context 178 | if result.errored 179 | {% if Union(T, B) == B %} 180 | other.run context 181 | {% else %} 182 | other_result = other.run context 183 | if other_result.errored 184 | ParseResult(T | B).error other_result.error! 185 | else 186 | ParseResult(T | B).new other_result.value!, other_result.context 187 | end 188 | {% end %} 189 | else 190 | {% if Union(T, B) == T %} 191 | result 192 | {% else %} 193 | ParseResult(T | B).new result.value!, result.context 194 | {% end %} 195 | end 196 | end 197 | end 198 | 199 | # Creates a new `Parser(T)` that fails with *message* if `self` is 200 | # unsuccessful. 201 | # 202 | # This can be used to provide a custom error message when chaining parsers. 203 | def |(message : String) : Parser(T) 204 | Parser(T).new do |context| 205 | result = run context 206 | if result.errored 207 | ParseResult(T).error message, result.context 208 | else 209 | result 210 | end 211 | end 212 | end 213 | 214 | # Given `A & B`, creates a parser that succeeds when both A and B succeed 215 | # for the same input, returning the results as a Tuple. 216 | def &(other : Parser(B)) : Parser({T, B}) forall B 217 | Parser({T, B}).new do |context| 218 | result = run context 219 | if result.errored 220 | ParseResult({T, B}).error result.error! 221 | else 222 | other_result = other.run context 223 | if other_result.errored 224 | ParseResult({T, B}).error other_result.error! 225 | else 226 | ParseResult({T, B}).new({result.value!, other_result.value!}, result.context) 227 | end 228 | end 229 | end 230 | end 231 | 232 | # Given `A ^ B`, creates a parser that succeeds if A or B succeed 233 | # exclusively for the same input. 234 | # 235 | # If both succeed, the parser will fail. 236 | def ^(other : Parser(B)) : Parser(T | B) forall B 237 | Parser(T | B).new do |context| 238 | result = run context 239 | other_result = other.run context 240 | if result.errored && other_result.errored 241 | ParseResult(T | B).error other_result.error! 242 | elsif result.errored 243 | ParseResult(T | B).new other_result.value!, other_result.context 244 | elsif other_result.errored 245 | ParseResult(T | B).new result.value!, result.context 246 | else 247 | ParseResult(T | B).error "expected only one parser to succeed", context 248 | end 249 | end 250 | end 251 | 252 | # Creates a new parser that repeats `self` exactly *count* times. 253 | def *(count : Int) : Parser(Array(T)) 254 | case count 255 | when .< 0 256 | raise ArgumentError.new "cannot match less than zero times" 257 | when .== 0 258 | Parser.const [] of T 259 | else 260 | self * (count..count) 261 | end 262 | end 263 | 264 | # Creates a new parser that repeats `self` continuously up to *range.end* 265 | # times. If *range* is not bounded it will continue to repeat until failing. 266 | def *(range : Range(Int, Int) | Range(Int, Nil)) : Parser(Array(T)) 267 | Parser(Array(T)).new do |context| 268 | result = run context 269 | if result.errored && !range.includes? 0 270 | next ParseResult(Array(T)).error result.error! 271 | end 272 | 273 | results = [] of T 274 | if (max = range.end) 275 | # Bounded range 276 | max -= 1 if range.excludes_end? 277 | while !result.errored 278 | results << result.value! 279 | break if results.size >= max 280 | result = run result.context 281 | end 282 | else 283 | # Unbounded - parse until error 284 | while !result.errored 285 | results << result.value! 286 | result = run result.context 287 | end 288 | end 289 | 290 | unless range.includes? results.size 291 | next ParseResult(Array(T)).error "expected #{range} parses, got #{results.size} parses", result.context 292 | end 293 | 294 | ParseResult.new results, result.context 295 | end 296 | end 297 | end 298 | end 299 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pars 2 | 3 | `Pars` is a library for building monadic parser combinators in crystal-lang. 4 | It works with minimal object allocation to extract domain-specific representation from String or Bytes input. 5 | 6 | A combinator parser is a system that allows for the creation of small parsers which can then combine to represent more complex semantics. 7 | This process then repeats, allowing for increasing complexity. 8 | Small parsers combine with logic (OR, AND, XOR) and sequencing to create larger, more meaningful parsers. 9 | Ultimately providing a single parser that models a full domain grammar. 10 | 11 | This style of parser allows for creating interpreted programming languages, decoding markup, reading files of different formats, decoding communication protocols and other uses where there is a need to extract information from String or Bytes data based on defined syntax. 12 | 13 | For a more in-depth introduction, see [Monadic Parser Combinators](https://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf). 14 | 15 | ## Example 16 | 17 | Lets start with a domain model. 18 | ```crystal 19 | enum Greeting 20 | Hello 21 | Goodbye 22 | end 23 | 24 | alias Target = String 25 | 26 | record Expression, greeting : Greeting, target : Target 27 | ``` 28 | 29 | Now we can build some parsers. 30 | ```crystal 31 | # Creates a parser that returns a single letter character. 32 | letter = Parse.char_if &.letter? 33 | 34 | # Parser for 1 or more letters. 35 | letters = letter * (1..) 36 | 37 | # We can join these into a new Parser that provides joins these letters as a String. 38 | word = letters.map &.join # this is already available as `Parse.word` too 39 | 40 | # This can now build a parser for one of our domain objects. 41 | greeting = word.map &->Greeting.parse(String) 42 | 43 | # But we still have a little more to consume... 44 | comma = Parse.char ',' 45 | space = Parse.char ' ' 46 | sep = (comma + space) | space 47 | # ...and some more... 48 | target = word 49 | exclamation = Parse.char('!') * (0..1) 50 | 51 | # But now we can combine these into the full expression parser 52 | expression = ((greeting << sep) &+ target << exclamation).map do |(g, t)| 53 | Expression.new g, t 54 | end 55 | 56 | typeof(expression) # => Parser(Expression) 57 | 58 | expression.parse "Hello, world!" # => Expression(@greeting=Hello, @target="world") 59 | 60 | expression.parse "Hello human" # => Expression(@greeting=Hello, @target="human") 61 | 62 | expression.parse "Well then..." # => ParseError: unknown enum Greeting value: Well 63 | ``` 64 | 65 | Importantly though this is only the start. We can continue to build complexity. 66 | ```crystal 67 | other_expression = Parse.string "Well then..." 68 | new_parser = expression | other_expression 69 | ``` 70 | 71 | And create results that use crystal's beautiful type system. 72 | ```crystal 73 | result = new_parser.parse("Well then...") 74 | case result 75 | in Expression 76 | # ... 77 | in String 78 | # ... 79 | in ParseError 80 | # ... 81 | end 82 | ``` 83 | 84 | --- 85 | 86 | ## Usage 87 | 88 | ```crystal 89 | require "pars" 90 | include Pars 91 | ``` 92 | 93 | While not required, including `Pars` is _highly recommended_ for ease of access. 94 | 95 | ### Primitive parsers 96 | 97 | ```crystal 98 | char_a = Parse.char 'a' 99 | 100 | puts char_a.parse "abc" #=> a 101 | ``` 102 | 103 | This example creates a `Parser(Char)` from `Parse.char`, and parses the string `"abc"` on it. 104 | The character parser looks at the beginning of the string, and looks for the first character. 105 | If the first character matches the character supplied, then the parse will succeed and the parse result will return the character that matched. 106 | 107 | ```crystal 108 | puts char_a.parse "bca" #=> expected 'a', got 'b' 109 | ``` 110 | 111 | This example uses the same `char_a` parser, but parses string `"bca"` on it. 112 | Because it doesn't start with `'a'`, the parse fails and returns a `ParseError`. 113 | A `ParseError` contains a message about the parse failure, available via `ParseError#message`. 114 | As such, `Parser(T)#parse` returns a union of `(T | ParseError)`, as it can return either. 115 | 116 | ```crystal 117 | str_cat = Parse.string "cat" 118 | 119 | puts str_cat.parse "cat" #=> cat 120 | puts str_cat.parse "cats are cool" #=> cat 121 | puts str_cat.parse "dog" #=> expected 'cat', got 'd' 122 | ``` 123 | 124 | This example creates a new primitive parser, the `Parser(String)` created by `Parse.string(String)`. 125 | It expects an exact copy of the string provided; in this example the text `"cat"`. 126 | 127 | When constructing parsers for non-string based input, `Parser.byte` is also provided. 128 | ```crystal 129 | null_byte = Parse.byte 0x0 130 | 131 | puts null_byte.parse Bytes[0xDE, 0xAD, 0xBE, 0xEF] #=> expected `0`, got '222' 132 | ``` 133 | 134 | Similarly `Parser.bytes` is available for matching a specific byte sequence. 135 | ```crystal 136 | bovine = Parse.bytes Bytes[0xBE, 0xEF] 137 | 138 | puts bovine.parse Bytes[0xBE, 0xEF] #=> Bytes[0xBE, 0xEF] 139 | ``` 140 | 141 | ### Conditional parsers 142 | 143 | In some cases, you may want to retrieve a value from the input that matches certain criteria. 144 | Two base conditional parsers provide this: 145 | 146 | ```crystal 147 | space = Parse.char_if &.whitespace? 148 | ``` 149 | 150 | or for binary inputs 151 | 152 | ```crystal 153 | low_val = Parse.byte_if { |b| b <= 10 } 154 | ``` 155 | 156 | 157 | ### Optional parsers 158 | 159 | ```crystal 160 | char_a = Parse.char 'a' 161 | char_b = Parse.char 'b' 162 | parse_ab = char_a | char_b 163 | 164 | puts parse_ab.parse "abc" #=> a 165 | puts parse_ab.parse "bca" #=> b 166 | puts parse_ab.parse "cab" #=> expected 'b', got 'c' 167 | ``` 168 | 169 | This example creates three parsers: 170 | - a `Parser(Char)` that expects a character of `'a'`, 171 | - a `Parser(Char)` that expects a character of `'b'`, and 172 | - a `Parser(Char)` created using the `|` operator that will try the left parser first, then the right, and use the successful parser. 173 | 174 | The `|` operator allows you to create branching parsers by using OR logic. 175 | It first tries the parser on the left, then the right. 176 | If both fail, it will throw the `ParseError` given by the rightmost parser. 177 | 178 | This process is tedious for large masses of characters, such as if you wanted to accept all letters of the alphabet. 179 | For this sake, there exists `Parse.one_char_of`, which looks for any character in the provided string of list. 180 | 181 | ```crystal 182 | parse_alphabet = Parse.one_char_of "abcdefghijklmnopqrstuvwxyz" 183 | 184 | puts parse_alphabet.parse "abc" #=> a 185 | puts parse_alphabet.parse "bca" #=> b 186 | puts parse_alphabet.parse "xyz" #=> x 187 | puts parse_alphabet.parse "yzx" #=> y 188 | puts parse_alphabet.parse "123" #=> expected 'z', got '1' 189 | ``` 190 | 191 | This example creates a parser that accepts a char from the provided list. 192 | As seen, alphabetical characters parse, but numerical characters do not, as they were not in the original string of the alphabet. 193 | 194 | Prebuilt parsers exist for common character types: `Parse.lowercase`, `Parse.uppercase`, `Parse.letter`, `Parse.digit`, `Parse.alphanumeric`, `Parse.whitespace`. 195 | 196 | ### Repetitive parsers 197 | 198 | To create a parser that repeats, use the `*` operator. 199 | This is available on any `Parser(T)`, and outputs a `Parser(Array(T))`. 200 | 201 | When used with an integer, this creates a parser that matches an exact number of times. 202 | 203 | ```crystal 204 | triple_a = Parse.char('a') * 3 205 | 206 | triple_a.parse("aaa") #=> ['a', 'a', 'a'] 207 | triple_a.parse("aa") #=> expected 'a', input ended 208 | ``` 209 | 210 | To match a variable number of times, use a `Range`. 211 | 212 | ```crystal 213 | some_a = Parse.char('a') * (1..3) 214 | 215 | some_a.parse("aaa") #=> ['a', 'a', 'a'] 216 | some_a.parse("aa") #=> ['a', 'a'] 217 | ``` 218 | 219 | Endless ranges are also supported, which will continue to match until a ParseError occurs. 220 | 221 | ```crystal 222 | existential_dread = Parse.char('a') * (5..) 223 | 224 | existential_dread.parse("aaa") #=> expected 'a', input ended 225 | existential_dread.parse("aaaaaaaaaa") #=> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a'] 226 | ``` 227 | 228 | ```crystal 229 | word = Parse.letter * (1..) 230 | 231 | puts word.parse "hello world" #=> ['h', 'e', 'l', 'l', 'o'] 232 | puts word.parse "abc" #=> ['a', 'b', 'c'] 233 | ``` 234 | 235 | A clear issue exists with the above example: it returns a list of the characters. 236 | If we want to convert this into a usable `String`, we have to transform the parser. 237 | 238 | ### Transforming parsers 239 | 240 | Existing parsers can be "transformed" to create new parsers with new logic. 241 | This provide the ability to move from primitive types to domain-specific types. 242 | To transform a parser, use the `Parser(T)#map(T -> B)` method. 243 | This accepts a block that receives the resulting value of a parse as a parameter, and outputs a transformed/mapped value. 244 | 245 | For example, if you created a parser that accepted numbers: 246 | 247 | ```crystal 248 | digit = Parse.one_char_of "0123456789" 249 | ``` 250 | 251 | Upon parsing it, it would yield characters on success: 252 | 253 | ```crystal 254 | puts (digit.parse "1").class #=> Char 255 | ``` 256 | 257 | we find that the result is a `Char`, not any form of a `Number`! To solve this, we can transform the parser: 258 | 259 | ```crystal 260 | digit = (Parse.one_char_of "0123456789").map &.to_i 261 | 262 | puts digit.parse "1" #=> 1 263 | puts (digit.parse "1").class #=> Int32 264 | ``` 265 | 266 | Success! Now the parsed value from our parser is the correct type, `Int32`. 267 | 268 | Back to the issue we found in the word parser from the previous section, we can transform the `Array(Char)` to 269 | a `String`. 270 | 271 | ```crystal 272 | word = (Parse.letter * (1..)).map &.join 273 | 274 | puts word.parse "hello world" #=> hello 275 | puts word.parse "abc" #=> abc 276 | ``` 277 | 278 | This identical `word` parser is available as `Parse.word` (`Parser(String)`). 279 | 280 | ### Logical combinations 281 | 282 | The `|` (OR) operator already discussed as accompanied by other logical operators. 283 | 284 | - `A & B` (AND) creates a new parser that ensure both A and B successfull parse for the same input and returns the results as the Tuple `{A, B}`. 285 | - `A ^ B` (XOR) creates a new parser that succeeds with the result of A or B, but fails if both succeed. 286 | 287 | ### Sequencing parsers 288 | 289 | - `A >> B` creates a new parser that ensures both A and B parse sequentially, but results with the value of B. 290 | - `A << B` creates a new parser that ensures both A and B parse sequentially, but results with the value of A. 291 | - `A + B` creates a new parser that ensure both A and B parse sequentially, returning the results as an Array. 292 | - `A &+ B` creates a new parser that ensure both A and B parse sequentially, returning the results as a Tuple. 293 | 294 | ```crystal 295 | letter = Parse.letter 296 | digit = Parse.digit 297 | parser_take_digit = letter >> digit 298 | parser_take_letter = letter << digit 299 | 300 | puts parser_take_digit.parse "a1" #=> 1 301 | puts parser_take_digit.parse "b2" #=> 2 302 | 303 | puts parser_take_letter.parse "a1" #=> a 304 | puts parser_take_letter.parse "b2" #=> b 305 | ``` 306 | 307 | In this example, two parsers are created, `letter` and `digit`. 308 | Then, two new parsers are created using the `>>` and `<<` operators. 309 | The first parses both sequentially but results with the result of `digit`, and the second does the same but results with the value of `letter`. 310 | Upon parsing these, the two parsers must work sequentially, but returns with the parser's result the operator is pointing toward. 311 | 312 | ### Parsing lists 313 | 314 | `Parse` has a special parser that can parse a list of parsable items by parser `A`, delimited by parser `B`. 315 | Using this, we can create a parser that parses through a list of words (using `Parser.word`), delimited by a second parser that looks for commas. 316 | 317 | ```crystal 318 | word = Parser.word 319 | optional_whitespace = Parser.whitespace * (0..) 320 | comma = (Parser.char ',') << optional_whitespace 321 | 322 | list_parser = Parse.list word, comma 323 | 324 | puts list_parser.parse "hello, world" #=> ["hello", "world"] 325 | puts list_parser.parse "how,are, you" #=> ["how", "are", "you"] 326 | puts list_parser.parse "123, 456" #=> [] 327 | puts list_parser.parse "hello world, how are you" #=> ["hello"] 328 | ``` 329 | 330 | ### Complex sequential parsers 331 | 332 | In the event you need to create complex sequential parsers, you can use `Parser(T)#bind`. 333 | The `bind` method takes a block that receives the output of `Parser(T)` as a value, and must return 334 | a new `Parser` of any type, or `Parser(B)`. 335 | We can recreate the `parser_take_digit` and `parser_take_letter` parsers using this functionality: 336 | 337 | ```crystal 338 | letter = Parse.letter 339 | digit = Parse.digit 340 | 341 | parser_take_digit = letter.bind do |char_result| 342 | digit.bind do |digit_result| 343 | Parse.const digit_result 344 | end 345 | end 346 | ``` 347 | 348 | The original two parsers chain their execution, and ultimately a `Parse.const` parser returns. 349 | `Parse.const` is a parser that takes in any value of type `T`. 350 | When parsed, it _always_ returns the value of type `T`. 351 | In this case, we create it with the `Char` result from `digit`. 352 | 353 | ```crystal 354 | parser_letter_digit = letter.bind do |char_result| 355 | digit.bind do |digit_result| 356 | Parse.const({char_result, digit_result}) # a constant parser with a `Tuple(Char, Char)` 357 | end 358 | end 359 | ``` 360 | 361 | This parser will parse strings like `a1`, `b2`, `c3`, etc., but return both of the retrieved values as a `Tuple`. 362 | 363 | ```crystal 364 | result = parser_letter_digit.parse "a1" 365 | 366 | puts result[0] #=> a 367 | puts result[1] #=> 1 368 | ``` 369 | 370 | This form of parser sequencing can become tedious. 371 | As a result, the library has a special macro inspired by Haskell's `do` statement. 372 | It allows you to chain parsers like above, but in a much more linear and organized manner. 373 | Here is the most recent sequential parser `parser_letter_digit` using `Parse.do`: 374 | 375 | ```crystal 376 | parser_letter_digit = Parse.do({ 377 | char_result <= letter, 378 | digit_result <= digit, 379 | Parse.const({char_result, digit_result}) 380 | }) 381 | ``` 382 | 383 | The body of the `Parse.do` macro is a list of actions separated by commas. 384 | The last element of this list _must_ be an expression that is ultimately returned through the new parser. 385 | 386 | For each of the other elements in the list, they must be either parser results or local variables. 387 | 388 | - Parser results look like `result_variable_name <= parser,`. In this case, the result from `parser` is 389 | stored as `result_variable_name`. 390 | - Local variables are `variable_name = value,`. In this case, `variable_name` is set to `value`. 391 | 392 | Utilizing these tools, more complex parsers are expressible. 393 | 394 | ```crystal 395 | word = Parse.word 396 | 397 | optional_whitespace = Parse.whitespace * (0..) 398 | equals = optional_whitespace >> (Parse.char '=') << optional_whitespace 399 | 400 | key_value_pair = Parse.do({ 401 | key <= word, 402 | _ <= equals, 403 | value <= word, 404 | Parse.const({key, value}) 405 | }) 406 | 407 | comma = (Parse.char ',') << optional_whitespace 408 | 409 | key_value_list = Parse.list key_value_pair, comma 410 | 411 | puts key_value_list.parse "hello = world" #=> [{"hello", "world"}] 412 | puts key_value_list.parse "how = are, you= sir" #=> [{"how", "are"}, {"you", "sir"}] 413 | puts key_value_list.parse "all= sorts,of =supported, white = spaces" #=> [{"all", "sorts"}, {"of", "supported"}, {"white", "spaces"}] 414 | ``` 415 | 416 | ### Custom parsers 417 | 418 | Custom parsers can wrap arbitrary logic. 419 | This is sometimes necessary if existing primitive parsers cannot combine effectively or efficiently. 420 | 421 | ```crystal 422 | def char_parser(char) 423 | Parser(Char).new do |context| 424 | if context.exhausted? 425 | ParseResult(Char).error "expected '#{char}', got end of input", context 426 | elsif context.head === char 427 | ParseResult(Char).new char, context.next 428 | else 429 | ParseResult(Char).error "expected '#{char}', got '#{context.head}", context 430 | end 431 | end 432 | end 433 | ``` 434 | 435 | This defines `char_parser(Char)`, which creates a parser that expects a character as specified. 436 | This implementation is the same as the internal implementation `Parse.char(Char)`. 437 | See the source code for more applications of Parsers derived from blocks. 438 | 439 | ## Docs 440 | 441 | Generate docs with `crystal docs`. 442 | 443 | ## Acknowledgements 444 | 445 | `Pars` is a fork of [Pars3k](https://github.com/voximity/pars3k). 446 | It shares much of the same internals and structure but is _not_ API compatible. 447 | The public API uses features, idioms and operators specific to crystal-lang. 448 | While it may look and feel different, a significant hat-tip needs to go to the original work by [Voximity](https://github.com/voximity) and the authors of libraries which inspired it. 449 | --------------------------------------------------------------------------------