├── .formatter.exs ├── .gitignore ├── README.md ├── example └── sql_parser.ex ├── lib ├── pegasus.ex └── pegasus │ ├── ast.ex │ ├── class.ex │ ├── components.ex │ ├── expression.ex │ ├── grammar.ex │ ├── identifier.ex │ ├── literal.ex │ ├── primary.ex │ ├── sequence.ex │ └── tokens.ex ├── mix.exs ├── mix.lock └── test ├── _assets └── peg_for_peg.y ├── _support └── case.ex ├── pegasus_test.exs ├── pegasus_test ├── class_test.exs ├── components_test.exs ├── identifier_test.exs ├── literal_test.exs └── sequence_test.exs ├── regresssion_test.exs └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | pegasus-*.tar 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pegasus 2 | 3 | Instrumentable Peg Parser for Elixir, based on NimbleParsec 4 | 5 | For documentation on this peg format: 6 | 7 | https://www.piumarta.com/software/peg/peg.1.html 8 | 9 | ## Installation 10 | 11 | ```elixir 12 | def deps do 13 | [ 14 | {:pegasus, "~> 0.2.4"} 15 | ] 16 | end 17 | ``` 18 | 19 | 20 | -------------------------------------------------------------------------------- /example/sql_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Example.Parser do 2 | @moduledoc """ 3 | Parses a SQL statement into a simplistic AST. 4 | 5 | The output is an AST with nodes in the following format: 6 | ``` 7 | %{ 8 | "type" => 9 | "opts" => [], 10 | "children" => [node()] 11 | } 12 | ``` 13 | Where `opts` is any extra matadata associated with that particular node, such 14 | as an identifier's name or a SELECT statement's select list or the operator of 15 | a comparision expression. 16 | """ 17 | require Pegasus 18 | require Logger 19 | 20 | import NimbleParsec 21 | 22 | @options [ 23 | # Exported sub-parsers 24 | expression: [parser: true, export: true], 25 | 26 | # Post traversed nodes get transformed into proper AST nodes. 27 | # This happens either with the `terminal`, `generic` or node specific post 28 | # traversal function. 29 | ExpressionBinary: [tag: "expression_binary", post_traverse: :post_traverser], 30 | ExpressionFunCall: [tag: "fun_call", post_traverse: :post_traverser], 31 | Identifier: [tag: "identifier", post_traverse: :terminal_post_traverser], 32 | StatementSelect: [tag: "select", post_traverse: :statement_post_traverser], 33 | TableGet: [tag: "table_get", post_traverse: :post_traverser], 34 | TokenDynamic: [tag: "token_dynamic", post_traverse: :terminal_post_traverser], 35 | 36 | # Tagged Productions 37 | SelectList: [tag: "select_list"], 38 | StatementSubquery: [tag: "subquery"], 39 | PredicateGroupBy: [tag: "group_by"], 40 | PredicateWhere: [tag: "where"], 41 | 42 | # Constants 43 | ConstantInteger: [ 44 | tag: "constant_integer", 45 | collect: true, 46 | post_traverse: :terminal_post_traverser 47 | ], 48 | ConstantString: [ 49 | tag: "constant_string", 50 | collect: true, 51 | post_traverse: :terminal_post_traverser 52 | ], 53 | 54 | # These are node-level options. 55 | TokenDistinct: [tag: {:opt, "distinct"}], 56 | TokenEqual: [tag: {:opt, "operator"}], 57 | TokenAnd: [tag: {:opt, "operator"}], 58 | TokenOr: [tag: {:opt, "operator"}], 59 | TokenPlus: [tag: {:opt, "operator"}], 60 | SequenceGroupBy: [tag: {:opt, "group_by_list"}], 61 | 62 | # Ignore Tokens 63 | Spacing: [ignore: true], 64 | TokenComma: [ignore: true], 65 | TokenSemiColon: [ignore: true], 66 | TokenOpenParen: [ignore: true], 67 | TokenCloseParen: [ignore: true], 68 | TokenFrom: [ignore: true], 69 | TokenGroupBy: [ignore: true], 70 | TokenWhere: [ignore: true], 71 | TokenSelect: [ignore: true] 72 | ] 73 | 74 | Pegasus.parser_from_string( 75 | """ 76 | # Exported top level parser. 77 | SQL <- Statement 78 | 79 | # Exported partial expression parser. 80 | # Lower-case the name here to allow for exporting into Elixir. 81 | expression <- Expression 82 | 83 | Statement <- StatementSelect Spacing TokenSemiColon 84 | 85 | StatementSelect <- 86 | Spacing TokenSelect 87 | (Spacing TokenDistinct Spacing)? 88 | Spacing SelectList 89 | Spacing TokenFrom 90 | Spacing SelectTarget 91 | (Spacing PredicateWhere)? 92 | (Spacing PredicateGroupBy)? 93 | 94 | SelectList <- TokenStar / Sequence 95 | 96 | SelectTarget <- TableGet / StatementSubquery 97 | 98 | TableGet <- Identifier 99 | 100 | StatementSubquery <- TokenOpenParen StatementSelect TokenCloseParen 101 | 102 | PredicateGroupBy <- TokenGroupBy Spacing SequenceGroupBy 103 | 104 | SequenceGroupBy <- Sequence 105 | 106 | PredicateWhere <- TokenWhere Spacing Expression Spacing 107 | 108 | Sequence <- Expression ( Spacing? TokenComma Spacing? Sequence )* 109 | 110 | Expression <- 111 | TokenOpenParen Spacing Expression Spacing TokenCloseParen 112 | / ExpressionBinary 113 | / Expr 114 | 115 | ExpressionBinary <- 116 | Expr Spacing Operator Spacing Expression 117 | #/ Expr (Spacing ExpressionBinaryRest)* 118 | 119 | ExpressionBinaryRest <- 120 | Operator Spacing ExpressionBinary 121 | 122 | Operator <- 123 | TokenEqual 124 | / TokenAnd 125 | / TokenOr 126 | / TokenPlus 127 | 128 | Expr <- 129 | ExpressionFunCall 130 | / ExpressionConstant 131 | 132 | ExpressionFunCall <- 133 | Identifier Spacing TokenOpenParen Spacing Expression Spacing TokenCloseParen 134 | 135 | ExpressionConstant <- 136 | TokenDynamic 137 | / ConstantString 138 | / Identifier 139 | / ConstantInteger 140 | 141 | Identifier <- < IdentStart IdentCont* > Spacing 142 | IdentStart <- [a-zA-Z_\.] 143 | IdentCont <- IdentStart / [0-9] 144 | 145 | # These are semi-keyword semi-constants that get defined at runtime. 146 | TokenDynamic <- TokenCurrentDate 147 | 148 | # Tokens 149 | TokenDistinct <- < [Dd][Ii][Ss][Tt][Ii][Nn][Cc][Tt] > 150 | TokenFrom <- < [Ff][Rr][Oo][Mm] > 151 | TokenGroupBy <- < [Gg][Rr][Oo][Uu][Pp] > Spacing < [Bb][Yy] > 152 | TokenSelect <- < [Ss][Ee][Ll][Ee][Cc][Tt] > 153 | TokenWhere <- < [Ww][Hh][Ee][Rr][Ee] > 154 | TokenCurrentDate <- < [Cc][Uu][Rr][Rr][Ee][Nn][Tt][_][Dd][Aa][Tt][Ee] > 155 | 156 | TokenSemiColon <- ";" 157 | TokenComma <- "," 158 | TokenStar <- "*" 159 | TokenOpenParen <- "(" 160 | TokenCloseParen <- ")" 161 | TokenEqual <- "=" 162 | TokenPlus <- "+" 163 | TokenAnd <- < [Aa][Nn][Dd] > 164 | TokenOr <- < [Oo][Rr] > 165 | 166 | # Constants 167 | ConstantInteger <- [0-9]* 168 | ConstantString <- ['] < ( !['] . )* > ['] 169 | 170 | # Misc 171 | Spacing <- ( Space / Comment )* 172 | Space <- ' ' / '\t' / EndOfLine 173 | Comment <- '//' ( !EndOfLine . )* EndOfLine 174 | EndOfLine <- '\r\n' / '\n' / '\r' 175 | """, 176 | @options 177 | ) 178 | 179 | defparsec(:parse, parsec(:SQL)) 180 | 181 | @doc "Prints the AST in a relativly reasonable format." 182 | def print(ast) do 183 | Logger.debug(inspect(ast, pretty: true, width: 150)) 184 | end 185 | 186 | @doc """ 187 | Prints the AST in a relativly reasonable format with the line and file of the 188 | caller. 189 | """ 190 | def print(ast, file_caller, line_caller) do 191 | Logger.debug("#{file_caller}:#{line_caller} #{inspect(ast, pretty: true, width: 150)}") 192 | end 193 | 194 | # The generic post_traverser is a helper to form a generic node from a parse node. 195 | # This basically just flattens the parse node into a consistent AST structure. 196 | defp post_traverser(rest, args, context, _line, _offset) do 197 | [{node_type, node}] = args 198 | {opts, children} = reduce_parse_node(node, {[], []}) 199 | 200 | node = %{ 201 | "type" => node_type, 202 | "opts" => opts, 203 | "children" => Enum.reverse(children) 204 | } 205 | 206 | {rest, [node], context} 207 | end 208 | 209 | defp reduce_parse_node([], acc), do: acc 210 | 211 | defp reduce_parse_node([{{:opt, type}, opt} | rest], {opts_acc, children_acc}) do 212 | reduce_parse_node(rest, {[{type, opt} | opts_acc], children_acc}) 213 | end 214 | 215 | defp reduce_parse_node([child | rest], {opts_acc, children_acc}) do 216 | reduce_parse_node(rest, {opts_acc, [child | children_acc]}) 217 | end 218 | 219 | # The statement_post_traverser transforms statement parse nodes into AST node, as they 220 | # are a bit more complex. 221 | defp statement_post_traverser(rest, [{"select", node_opts}] = args, context, _, _) do 222 | group_by = :proplists.get_value("group_by", node_opts, nil) 223 | 224 | if group_by !== nil do 225 | gbagg_post_traverser(rest, args, context) 226 | else 227 | select_post_traverser(rest, args, context) 228 | end 229 | end 230 | 231 | # Group By Aggregate post traversal node. 232 | # Strips the group by, post-traverses on the select statement, then assembles the node. 233 | def gbagg_post_traverser(rest, [{"select", node_opts}], context) do 234 | node_opts_raw = List.keydelete(node_opts, "group_by", 0) 235 | select_traverser_input = [{"select", node_opts_raw}] 236 | {_, [ast_select], _} = select_post_traverser(rest, select_traverser_input, context) 237 | 238 | node_opts = :proplists.get_value("group_by", node_opts) 239 | {opts, _} = reduce_parse_node(node_opts, {[], []}) 240 | ast_gbagg = %{ 241 | "type" => "gbagg", 242 | "opts" => opts, 243 | "children" => [ast_select] 244 | } 245 | 246 | {rest, [ast_gbagg], context} 247 | end 248 | 249 | # Post traverse wrapper for SELECTs. 250 | defp select_post_traverser(rest, [{"select", node_opts}], context) do 251 | select_list = :proplists.get_value("select_list", node_opts) 252 | where = :proplists.get_value("where", node_opts, []) 253 | 254 | {opts, _} = reduce_parse_node(node_opts, {[], []}) 255 | 256 | target = 257 | node_opts 258 | |> List.keydelete("select_list", 0) 259 | |> List.keydelete("where", 0) 260 | |> strip_optionals() 261 | 262 | ast_select = %{ 263 | "type" => "select", 264 | "opts" => [{"select_list", select_list} | opts], 265 | "children" => target ++ where 266 | } 267 | 268 | {rest, [ast_select], context} 269 | end 270 | 271 | # The terminal post traverser is for simple "terminal" nodes, 272 | # which are nodes with no children and basically a constant interior. 273 | defp terminal_post_traverser(rest, args, context, _line, _offset) do 274 | [{type, [node_name]}] = args 275 | 276 | node = %{ 277 | "type" => type, 278 | "opts" => [{"value", node_name}], 279 | "children" => [] 280 | } 281 | 282 | {rest, [node], context} 283 | end 284 | 285 | # Removes optionals from a parse node. 286 | defp strip_optionals(list), do: strip_optionals(list, []) 287 | 288 | defp strip_optionals([], acc), do: Enum.reverse(acc) 289 | defp strip_optionals([{{:opt, _}, _} | rest], acc), do: strip_optionals(rest, acc) 290 | defp strip_optionals([head | rest], acc), do: strip_optionals(rest, [head | acc]) 291 | end 292 | -------------------------------------------------------------------------------- /lib/pegasus.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus do 2 | @moduledoc """ 3 | converts `peg` files into `NimbleParsec` parsers. 4 | 5 | For documentation on this peg format: https://www.piumarta.com/software/peg/peg.1.html 6 | 7 | To use, drop this in your model: 8 | 9 | ``` 10 | defmodule MyModule 11 | require Pegasus 12 | 13 | Pegasus.parser_from_string(\""" 14 | foo <- "foo" "bar" 15 | \""", foo: [parser: true]) 16 | end 17 | ``` 18 | 19 | See `NimbleParsec` for the description of the output. 20 | 21 | ``` 22 | MyModule.foo("foobar") # ==> {:ok, ["foo", "bar"], ...} 23 | ``` 24 | 25 | > #### Capitalized Identifiers {: .warning} 26 | > 27 | > for capitalized identifiers, you will have to use `apply/3` to call the 28 | > function, or you may wrap it in another combinator like so: 29 | > 30 | > ```elixir 31 | > defmodule Capitalized do 32 | > require Pegasus 33 | > import NimbleParsec 34 | > 35 | > Pegasus.parser_from_string("Foo <- 'foo'") 36 | > 37 | > defparsec :parse, parsec(:Foo) 38 | > end 39 | > ``` 40 | 41 | You may also load a parser from a file using `parser_from_file/2`. 42 | 43 | ## Parser Options 44 | 45 | Parser options are passed as a keyword list after the parser defintion 46 | string (or file). The keys for the options are the names of the combinators, 47 | followed by a keyword list of supplied options, which are applied in the 48 | specified order: 49 | 50 | ### `:start_position` 51 | 52 | When true, drops a map `%{line: , column: , offset: }` into 53 | the arguments for this keyword at the front of its list. 54 | 55 | ### `:collect` 56 | 57 | You may collect the contents of a combinator using the `collect: true` option. 58 | If this combinator calls other combinators, they must leave only iodata (no 59 | tags, no tokens) in the arguments list. 60 | 61 | ### `:token` 62 | 63 | You may substitute the contents of any combinator with a token (usually an atom). 64 | The following conditions apply: 65 | 66 | - `token: false` - no token (default) 67 | - `token: true` - token is set to the atom name of the combinator 68 | - `token: ` - token is set to the value of setting 69 | 70 | ### `:tag` 71 | 72 | You may tag the contents of your combinator using the `:tag` option. The 73 | following conditions apply: 74 | 75 | - `tag: false` - No tag (default) 76 | - `tag: true` - Use the combinator name as the tag. 77 | - `tag: ` - Use the supplied atom as the tag. 78 | 79 | ### `:post_traverse` 80 | 81 | You may supply a post_traversal for any parser. See `NimbleParsec` for how to 82 | implement post-traversal functions. These are defined by passing a keyword list 83 | to the `parser_from_file/2` or `parser_from_string/2` function. 84 | 85 | > #### Post-traversal arguments are reversed {: .info } 86 | > 87 | > Note that the second argument for a post-traversal function receives a list of 88 | > results from traversal in *reversed* order. 89 | 90 | #### Example 91 | 92 | ``` 93 | Pegasus.parser_from_string(\""" 94 | foo <- "foo" "bar" 95 | \""", 96 | foo: [post_traverse: {:some_function, []}] 97 | ) 98 | 99 | defp foo(rest, ["bar", "foo"], context, {_line, _col}, _bytes) do 100 | {rest, [:parsed], context} 101 | end 102 | ``` 103 | 104 | ### `:ignore` 105 | 106 | If true, clears the arguments from the list. 107 | 108 | ### `:parser` 109 | 110 | You may sepecify to export a combinator as a parser by specifying `parser: true`. 111 | By default, only a combinator will be generated. See `NimbleParsec.defparsec/3` 112 | to understand the difference. 113 | 114 | #### Example 115 | 116 | ``` 117 | Pegasus.parser_from_string(\""" 118 | foo <- "foo" "bar" 119 | \""", foo: [parser: true] 120 | ) 121 | ``` 122 | 123 | ### `:export` 124 | 125 | You may sepecify to export a combinator as a public function by specifying `export: true`. 126 | By default, the combinators are private functions. 127 | 128 | #### Example 129 | 130 | ``` 131 | Pegasus.parser_from_string(\""" 132 | foo <- "foo" "bar" 133 | \""", foo: [export: true] 134 | ) 135 | ``` 136 | 137 | ### `:alias` 138 | 139 | You may specify your own combinators to be run in place of what's in the grammar. 140 | This is useful if the grammar is wrong or contains content that can't be run for 141 | some reason. 142 | 143 | #### Example 144 | 145 | ``` 146 | Pegasus.parser_from_string(\""" 147 | foo <- "foo" 148 | \""", foo: [alias: :my_combinator]) 149 | ``` 150 | 151 | ## Not implemented features 152 | 153 | Actions, which imply the use of C code, are not implemented. These currently fail to parse 154 | but in the future they may silently do nothing. 155 | """ 156 | 157 | import NimbleParsec 158 | 159 | defparsec(:parse, Pegasus.Grammar.parser()) 160 | 161 | defmacro parser_from_string(string, opts \\ []) do 162 | quote bind_quoted: [string: string, opts: opts] do 163 | string 164 | |> Pegasus.parse() 165 | |> Pegasus.parser_from_ast(opts) 166 | end 167 | end 168 | 169 | defmacro parser_from_file(file, opts \\ []) do 170 | quote bind_quoted: [file: file, opts: opts] do 171 | file 172 | |> File.read!() 173 | |> Pegasus.parse() 174 | |> Pegasus.parser_from_ast(opts) 175 | end 176 | end 177 | 178 | defmacro parser_from_ast(ast, opts) do 179 | quote bind_quoted: [ast: ast, opts: opts] do 180 | require NimbleParsec 181 | require Pegasus.Ast 182 | 183 | for ast = %{name: name, parsec: parsec} <- Pegasus.Ast.to_nimble_parsec(ast, opts) do 184 | name_opts = Keyword.get(opts, name, []) 185 | exported = !!Keyword.get(name_opts, :export) 186 | parser = Keyword.get(name_opts, :parser, false) 187 | 188 | Pegasus.Ast.traversals(ast) 189 | 190 | case {exported, parser} do 191 | {false, false} -> 192 | NimbleParsec.defcombinatorp(name, parsec) 193 | 194 | {false, true} -> 195 | NimbleParsec.defparsecp(name, parsec) 196 | 197 | {false, parser_name} -> 198 | NimbleParsec.defparsecp(parser_name, parsec) 199 | 200 | {true, false} -> 201 | NimbleParsec.defcombinator(name, parsec) 202 | 203 | {true, true} -> 204 | NimbleParsec.defparsec(name, parsec, export_combinator: true) 205 | 206 | {true, parser_name} -> 207 | NimbleParsec.defparsec(parser_name, parsec, export_combinator: true) 208 | end 209 | end 210 | end 211 | end 212 | end 213 | -------------------------------------------------------------------------------- /lib/pegasus/ast.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Ast do 2 | @moduledoc false 3 | import NimbleParsec 4 | 5 | @enforce_keys [:name] 6 | 7 | defstruct @enforce_keys ++ [:extract, start_pos?: false, parsec: empty()] 8 | 9 | @dummy_context %{parsec: empty()} 10 | 11 | def to_nimble_parsec({:ok, list, "", _, _, _}, opts) do 12 | to_nimble_parsec(list, opts) 13 | end 14 | 15 | def to_nimble_parsec(ast, opts) when is_list(ast) do 16 | Enum.map(ast, &to_nimble_parsec(&1, opts)) 17 | end 18 | 19 | def to_nimble_parsec({name, parser_ast}, opts) do 20 | name_opts = Keyword.get(opts, name, []) 21 | 22 | %__MODULE__{name: name} 23 | |> maybe_add_position(name_opts) 24 | |> translate_sequence(parser_ast) 25 | |> maybe_extract() 26 | |> maybe_collect(name_opts) 27 | |> maybe_token(name, name_opts) 28 | |> maybe_tag(name, name_opts) 29 | |> maybe_post_traverse(name_opts) 30 | |> maybe_ignore(name_opts) 31 | |> maybe_alias(name_opts) 32 | end 33 | 34 | defp maybe_add_position(context, name_opts) do 35 | if Keyword.get(name_opts, :start_position) do 36 | parsec = post_traverse(context.parsec, traversal_name(context.name, :start_pos)) 37 | %{context | parsec: parsec, start_pos?: true} 38 | else 39 | context 40 | end 41 | end 42 | 43 | defp maybe_extract(context) do 44 | if context.extract == :extract do 45 | %{context | parsec: post_traverse(context.parsec, traversal_name(context.name, :extract))} 46 | else 47 | context 48 | end 49 | end 50 | 51 | defp maybe_collect(context, name_opts) do 52 | if Keyword.get(name_opts, :collect) do 53 | %{context | parsec: reduce(context.parsec, {IO, :iodata_to_binary, []})} 54 | else 55 | context 56 | end 57 | end 58 | 59 | defp maybe_token(context = %{parsec: parsec}, name, name_opts) do 60 | case Keyword.get(name_opts, :token, false) do 61 | false -> 62 | context 63 | 64 | true -> 65 | %{ 66 | context 67 | | parsec: parsec |> tag(name) |> post_traverse(traversal_name(name, :tag)), 68 | extract: :tag 69 | } 70 | 71 | token -> 72 | %{ 73 | context 74 | | parsec: parsec |> tag(token) |> post_traverse(traversal_name(name, :tag)), 75 | extract: :tag 76 | } 77 | end 78 | end 79 | 80 | defp maybe_tag(context = %{parsec: parsec}, name, name_opts) do 81 | case Keyword.get(name_opts, :tag, false) do 82 | false -> context 83 | true -> %{context | parsec: tag(parsec, name)} 84 | tag -> %{context | parsec: tag(parsec, tag)} 85 | end 86 | end 87 | 88 | defp maybe_post_traverse(context, name_opts) do 89 | if post_traverse = Keyword.get(name_opts, :post_traverse) do 90 | %{context | parsec: post_traverse(context.parsec, post_traverse)} 91 | else 92 | context 93 | end 94 | end 95 | 96 | defp maybe_ignore(context, name_opts) do 97 | if Keyword.get(name_opts, :ignore) do 98 | %{context | parsec: ignore(context.parsec)} 99 | else 100 | context 101 | end 102 | end 103 | 104 | defp maybe_alias(context, name_opts) do 105 | if substitution = Keyword.get(name_opts, :alias) do 106 | %{context | parsec: parsec(substitution)} 107 | else 108 | context 109 | end 110 | end 111 | 112 | def translate_sequence(context, ast) do 113 | Enum.reduce(ast, context, &translate/2) 114 | end 115 | 116 | defp translate(:dot, context) do 117 | %{context | parsec: utf8_char(context.parsec, not: 0)} 118 | end 119 | 120 | defp translate({:char, ranges}, context) do 121 | %{context | parsec: ascii_char(context.parsec, ranges)} 122 | end 123 | 124 | defp translate({:literal, literal}, context) do 125 | %{context | parsec: string(context.parsec, literal)} 126 | end 127 | 128 | defp translate({:lookahead, content}, context) do 129 | %{parsec: lookahead} = translate(content, @dummy_context) 130 | %{context | parsec: lookahead(context.parsec, lookahead)} 131 | end 132 | 133 | defp translate({:lookahead_not, content}, context) do 134 | %{parsec: lookahead_not} = translate(content, @dummy_context) 135 | %{context | parsec: lookahead_not(context.parsec, lookahead_not)} 136 | end 137 | 138 | defp translate({:optional, content}, context) do 139 | %{parsec: optional} = translate(content, @dummy_context) 140 | %{context | parsec: optional(context.parsec, optional)} 141 | end 142 | 143 | defp translate({:repeat, content}, context) do 144 | %{parsec: repeated} = translate(content, @dummy_context) 145 | %{context | parsec: repeat(context.parsec, repeated)} 146 | end 147 | 148 | defp translate({:times, content}, context) do 149 | %{parsec: repeated} = translate(content, @dummy_context) 150 | %{context | parsec: times(context.parsec, repeated, min: 1)} 151 | end 152 | 153 | defp translate({:identifier, identifier}, context) do 154 | %{context | parsec: parsec(context.parsec, identifier)} 155 | end 156 | 157 | defp translate({:choice, list_of_choices}, context) do 158 | choices = Enum.map(list_of_choices, &translate_sequence(@dummy_context, &1).parsec) 159 | %{context | parsec: choice(context.parsec, choices)} 160 | end 161 | 162 | defp translate({:ungroup, commands}, context) do 163 | grouped = translate_sequence(@dummy_context, commands) 164 | %{context | parsec: concat(context.parsec, grouped.parsec)} 165 | end 166 | 167 | defp translate({:extract, commands}, context) do 168 | grouped = translate_sequence(@dummy_context, commands) 169 | tagged = tag(grouped.parsec, :__extract__) 170 | %{context | parsec: concat(context.parsec, tagged), extract: :extract} 171 | end 172 | 173 | def traversal_name(name, tag), do: :"#{name}-#{tag}" 174 | 175 | defmacro traversals(ast) do 176 | quote bind_quoted: [ast: ast] do 177 | if ast.start_pos? do 178 | start_pos_name = Pegasus.Ast.traversal_name(ast.name, :start_pos) 179 | 180 | defp(unquote(start_pos_name)(rest, args, context, {line, offset}, col)) do 181 | {rest, [%{line: line, column: col - offset + 1, offset: offset} | args], context} 182 | end 183 | end 184 | 185 | case ast.extract do 186 | :tag -> 187 | extract_name = Pegasus.Ast.traversal_name(ast.name, :tag) 188 | 189 | defp(unquote(extract_name)(rest, [{tag, _} | args_rest], context, _, _)) do 190 | {rest, [tag | args_rest], context} 191 | end 192 | 193 | :extract -> 194 | extract_name = Pegasus.Ast.traversal_name(ast.name, :extract) 195 | 196 | defp unquote(extract_name)(rest, args, context, _, _) do 197 | extracted = 198 | Enum.flat_map(args, fn 199 | {:__extract__, what} -> 200 | what 201 | |> Enum.filter(&(is_binary(&1) or &1 in 1..0x10FFFF)) 202 | |> IO.iodata_to_binary() 203 | |> List.wrap() 204 | 205 | _ -> 206 | [] 207 | end) 208 | 209 | {rest, extracted, context} 210 | end 211 | 212 | _ -> 213 | [] 214 | end 215 | end 216 | end 217 | end 218 | -------------------------------------------------------------------------------- /lib/pegasus/class.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Class do 2 | @moduledoc false 3 | 4 | # Produces a "class" parser. 5 | # 6 | # Note that the output of a "class" parser leaves a NimbleParsec parser in the 7 | # arguments list. 8 | # 9 | # ```peg 10 | # Class <- '[' < ( !']' Range )* > ']' Spacing 11 | # ``` 12 | 13 | alias Pegasus.Components 14 | 15 | import NimbleParsec 16 | 17 | def parser(previous \\ empty()) do 18 | previous 19 | |> tag( 20 | ignore(string("[")) 21 | |> optional(string("^")) 22 | |> repeat( 23 | lookahead_not(string("]")) 24 | |> Components.range() 25 | ) 26 | |> ignore(string("]")), 27 | :class 28 | ) 29 | |> post_traverse({__MODULE__, :to_parser, []}) 30 | |> Components.spacing() 31 | end 32 | 33 | def to_parser(rest, [{:class, args} | args_rest], context, _, _) do 34 | classes = 35 | case args do 36 | ["^" | rest] -> 37 | Enum.map(rest, &{:not, &1}) 38 | 39 | args -> 40 | args 41 | end 42 | 43 | {rest, [{:char, classes} | args_rest], context} 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/pegasus/components.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Components do 2 | @moduledoc false 3 | 4 | # tools for the most simple parts of the PEG definition. 5 | # 6 | # None of these components *a priori* generate parsers. 7 | # 8 | # The following components are defined here: 9 | # 10 | # ```peg 11 | # Range <- Char '-' Char / Char 12 | # Char <- '\\' [abefnrtv'"\[\]\\] 13 | # / '\\' [0-3][0-7][0-7] 14 | # / '\\' [0-7][0-7]? 15 | # / '\\' '-' 16 | # / !'\\' . 17 | # Spacing <- ( Space / Comment )* 18 | # Comment <- '#' ( !EndOfLine . )* EndOfLine 19 | # Space <- ' ' / '\t' / EndOfLine 20 | # EndOfLine <- '\r\n' / '\n' / '\r' 21 | # EndOfFile <- !. 22 | # ``` 23 | 24 | import NimbleParsec 25 | 26 | def range(previous \\ empty()) do 27 | choice(previous, [ 28 | tag(char() |> string("-") |> char(), :range) 29 | |> post_traverse({__MODULE__, :_to_range, []}), 30 | char() 31 | ]) 32 | end 33 | 34 | def char(previous \\ empty()) do 35 | escaped_char = ascii_char(~C(abefnrtv'"[]\-)) 36 | 37 | three_digit_octal = 38 | ascii_char([?0..?3]) 39 | |> ascii_char([?0..?7]) 40 | |> ascii_char([?0..?7]) 41 | 42 | two_or_one_digit_octal = 43 | ascii_char([?0..?7]) 44 | |> optional(ascii_char([?0..?7])) 45 | 46 | escaped = 47 | tag( 48 | string("\\") 49 | |> choice([ 50 | escaped_char, 51 | three_digit_octal, 52 | two_or_one_digit_octal 53 | ]), 54 | :escaped 55 | ) 56 | |> post_traverse({__MODULE__, :_parse_escaped, []}) 57 | 58 | not_escaped = 59 | lookahead_not(string("\\")) 60 | # need to provide *some* dummy variable for utf-8 characters 61 | |> utf8_char(not: 0) 62 | 63 | choice(previous, [ 64 | escaped, 65 | not_escaped 66 | ]) 67 | end 68 | 69 | def spacing(previous \\ empty()) do 70 | previous 71 | |> ignore( 72 | repeat( 73 | choice([ 74 | space(), 75 | comment() 76 | ]) 77 | ) 78 | ) 79 | end 80 | 81 | def comment(previous \\ empty()) do 82 | previous 83 | |> concat(string("#")) 84 | |> repeat( 85 | lookahead_not(end_of_line()) 86 | |> utf8_char(not: 0) 87 | ) 88 | |> end_of_line() 89 | end 90 | 91 | def space(previous \\ empty()) do 92 | previous 93 | |> choice([ 94 | ascii_char(~c' \t'), 95 | end_of_line() 96 | ]) 97 | end 98 | 99 | def end_of_line(previous \\ empty()) do 100 | previous 101 | |> choice([ 102 | string("\n\r"), 103 | ascii_char(~c'\n\r') 104 | ]) 105 | end 106 | 107 | def end_of_file(previous \\ empty()) do 108 | eos(previous) 109 | end 110 | 111 | @escape_lookup %{ 112 | ?a => ?\a, 113 | ?b => ?\b, 114 | ?e => ?\e, 115 | ?f => ?\f, 116 | ?n => ?\n, 117 | ?r => ?\r, 118 | ?t => ?\t, 119 | ?v => ?\v, 120 | ?' => ?', 121 | ?" => ?", 122 | ?[ => ?[, 123 | ?] => ?], 124 | ?- => ?-, 125 | 92 => 92 126 | } 127 | 128 | @escape_keys Map.keys(@escape_lookup) 129 | 130 | def _parse_escaped(rest, [{:escaped, ["\\", symbol]} | rest_args], context, _, _) 131 | when symbol in @escape_keys do 132 | {rest, [@escape_lookup[symbol] | rest_args], context} 133 | end 134 | 135 | def _parse_escaped(rest, [{:escaped, ["\\", o1, o2, o3]} | rest_args], context, _, _) 136 | when o1 in ?0..?3 and o2 in ?0..?7 and o3 in ?0..?7 do 137 | {rest, [deoctalize([o1, o2, o3]) | rest_args], context} 138 | end 139 | 140 | def _parse_escaped(rest, [{:escaped, ["\\", o1, o2]} | rest_args], context, _, _) 141 | when o1 in ?0..?7 and o2 in ?0..?7 do 142 | {rest, [deoctalize([o1, o2]) | rest_args], context} 143 | end 144 | 145 | def _parse_escaped(rest, [{:escaped, ["\\", o1]} | rest_args], context, _, _) 146 | when o1 in ?0..?7 do 147 | {rest, [deoctalize([o1]) | rest_args], context} 148 | end 149 | 150 | defp deoctalize(list) do 151 | list |> :erlang.list_to_integer(8) 152 | end 153 | 154 | def _to_range(rest, [{:range, [left, "-", right]} | rest_args], context, _, _) 155 | when left < right do 156 | {rest, [left..right | rest_args], context} 157 | end 158 | end 159 | -------------------------------------------------------------------------------- /lib/pegasus/expression.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Expression do 2 | @moduledoc false 3 | 4 | # Produces a "expression" parser. 5 | # 6 | # ```peg 7 | # Expression <- Sequence ( SLASH Sequence )* 8 | # ``` 9 | 10 | import NimbleParsec 11 | 12 | alias Pegasus.Sequence 13 | alias Pegasus.Tokens 14 | 15 | require Sequence 16 | require Tokens 17 | 18 | expression = 19 | empty() 20 | |> tag( 21 | Sequence.parser() 22 | |> repeat( 23 | Tokens.slash() 24 | |> Sequence.parser() 25 | ), 26 | :sequences 27 | ) 28 | |> post_traverse({__MODULE__, :_separate_slashes, []}) 29 | 30 | defcombinator(:expression, expression) 31 | 32 | def parser(previous \\ empty()) do 33 | parsec(previous, {__MODULE__, :expression}) 34 | end 35 | 36 | def _separate_slashes(rest, [{:sequences, sequences} | other_args], context, _, _) do 37 | choice = 38 | case by_slashes(sequences) do 39 | [one_sequence] -> one_sequence 40 | many_sequences -> [choice: many_sequences] 41 | end 42 | 43 | {rest, [choice | other_args], context} 44 | end 45 | 46 | defp by_slashes(sequences, so_far \\ []) 47 | 48 | defp by_slashes([], so_far), do: Enum.reverse(so_far) 49 | defp by_slashes([:slash, this | rest], so_far), do: by_slashes(rest, [this | so_far]) 50 | defp by_slashes([this | rest], []), do: by_slashes(rest, [this]) 51 | end 52 | -------------------------------------------------------------------------------- /lib/pegasus/grammar.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Grammar do 2 | @moduledoc false 3 | 4 | # produces a fully parsed grammar. 5 | # 6 | # ``` 7 | # Grammar <- Spacing Definition+ EndOfFile 8 | # Definition <- Identifier LEFTARROW Expression 9 | # ``` 10 | 11 | import NimbleParsec 12 | 13 | alias Pegasus.Components 14 | alias Pegasus.Expression 15 | alias Pegasus.Identifier 16 | alias Pegasus.Tokens 17 | 18 | def parser do 19 | Components.spacing() 20 | |> times( 21 | Identifier.parser(empty()) 22 | |> Tokens.leftarrow() 23 | |> Expression.parser() 24 | |> post_traverse({__MODULE__, :collate, []}), 25 | min: 1 26 | ) 27 | |> Components.end_of_file() 28 | end 29 | 30 | def collate(rest, [parser, :leftarrow, {:identifier, name} | args_rest], context, _, _) do 31 | {rest, [{name, parser} | args_rest], context} 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/pegasus/identifier.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Identifier do 2 | @moduledoc false 3 | 4 | # Produces a "identifier" parser. 5 | # 6 | # the make_parser option should be set to false (default) when the identifier 7 | # is being assigned, and true when the identifier is being used as part of a 8 | # parser sequence. 9 | # 10 | # ```peg 11 | # Identifier <- < IdentStart IdentCont* > Spacing 12 | # IdentStart <- [a-zA-Z_] 13 | # IdentCont <- IdentStart / [0-9] 14 | # ``` 15 | 16 | alias Pegasus.Components 17 | import NimbleParsec 18 | 19 | def parser(previous, make_parser \\ false) do 20 | previous 21 | |> tag( 22 | ident_start() 23 | |> repeat(ident_cont()), 24 | :identifier 25 | ) 26 | |> post_traverse({__MODULE__, :to_parser, [make_parser]}) 27 | |> Components.spacing() 28 | end 29 | 30 | def ident_start do 31 | ascii_char([?a..?z, ?A..?Z, ?_]) 32 | end 33 | 34 | def ident_cont() do 35 | ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9]) 36 | end 37 | 38 | def to_parser(rest, [{:identifier, args} | other_args], context, _, _, make_parser) do 39 | identifier = 40 | args 41 | |> IO.iodata_to_binary() 42 | |> String.to_atom() 43 | 44 | tag = if make_parser, do: :parser, else: :identifier 45 | 46 | {rest, [{tag, identifier} | other_args], context} 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /lib/pegasus/literal.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Literal do 2 | @moduledoc false 3 | 4 | # Produces a "literal" parser. 5 | # 6 | # Note that the output of a "literal" parser leaves a NimbleParsec parser in the 7 | # arguments list. 8 | # 9 | # ```peg 10 | # Literal <- ['] < ( !['] Char )* > ['] Spacing 11 | # / ["] < ( !["] Char )* > ["] Spacing 12 | # ``` 13 | 14 | alias Pegasus.Components 15 | import NimbleParsec 16 | 17 | def parser(previous \\ empty()) do 18 | previous 19 | |> tag( 20 | choice([ 21 | quoted_literal(~S(')), 22 | quoted_literal(~S(")) 23 | ]), 24 | :literal 25 | ) 26 | |> post_traverse({__MODULE__, :to_parser, []}) 27 | |> Components.spacing() 28 | end 29 | 30 | defp quoted_literal(quote_bound) do 31 | ignore(string(quote_bound)) 32 | |> repeat( 33 | lookahead_not(string(quote_bound)) 34 | |> Components.char() 35 | ) 36 | |> ignore(string(quote_bound)) 37 | end 38 | 39 | def to_parser(rest, [{:literal, args} | args_rest], context, _, _) do 40 | literal = IO.iodata_to_binary(args) 41 | 42 | {rest, [{:literal, literal} | args_rest], context} 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/pegasus/primary.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Primary do 2 | @moduledoc false 3 | 4 | # Produces a "primary" parser. This is a single item which 5 | # 6 | # ```peg 7 | # Primary <- Identifier !LEFTARROW 8 | # / OPEN Expression CLOSE 9 | # / Literal 10 | # / Class 11 | # / DOT 12 | # / Action 13 | # / BEGIN Expression END 14 | # ``` 15 | 16 | import NimbleParsec 17 | alias Pegasus.Expression 18 | alias Pegasus.Identifier 19 | alias Pegasus.Literal 20 | alias Pegasus.Class 21 | alias Pegasus.Tokens 22 | 23 | def parser(previous \\ empty()) do 24 | previous 25 | |> choice([ 26 | bare_identifier(), 27 | paren_expression(), 28 | Literal.parser(), 29 | Class.parser(), 30 | Tokens.dot(), 31 | tagged_expression() 32 | ]) 33 | end 34 | 35 | defp bare_identifier do 36 | empty() 37 | |> Identifier.parser() 38 | |> lookahead_not(Tokens.leftarrow()) 39 | end 40 | 41 | defp paren_expression do 42 | tag( 43 | ignore(Tokens.open()) 44 | |> parsec({Expression, :expression}) 45 | |> ignore(Tokens.close()), 46 | :collect 47 | ) 48 | |> post_traverse({__MODULE__, :_group, [:ungroup]}) 49 | end 50 | 51 | defp tagged_expression do 52 | tag( 53 | ignore(Tokens.begin()) 54 | |> parsec({Expression, :expression}) 55 | |> ignore(Tokens.ender()), 56 | :collect 57 | ) 58 | |> post_traverse({__MODULE__, :_group, [:extract]}) 59 | end 60 | 61 | def _group(rest, [{:collect, [inner_args]} | args_rest], context, _, _, action) do 62 | {rest, [{action, inner_args} | args_rest], context} 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/pegasus/sequence.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Sequence do 2 | @moduledoc false 3 | 4 | # Collects together parsers for all of the minor tokens for Peg parsers 5 | # 6 | # ```peg 7 | # Sequence <- Prefix* 8 | # Prefix <- AND Action # <== not implemented 9 | # / ( AND / NOT )? Suffix 10 | # Suffix <- Primary ( QUERY / STAR / PLUS )? 11 | # ``` 12 | 13 | alias Pegasus.Tokens 14 | alias Pegasus.Primary 15 | 16 | import NimbleParsec 17 | 18 | def parser(previous \\ empty()) do 19 | previous 20 | |> tag( 21 | repeat( 22 | tag( 23 | optional(choice([Tokens.and(), Tokens.not()])) 24 | |> Primary.parser() 25 | |> optional( 26 | choice([ 27 | Tokens.query(), 28 | Tokens.star(), 29 | Tokens.plus() 30 | ]) 31 | ), 32 | :one_sequence_item 33 | ) 34 | ), 35 | :sequence 36 | ) 37 | |> post_traverse({__MODULE__, :sequence, []}) 38 | end 39 | 40 | def sequence(rest, [{:sequence, args} | rest_args], context, _, _) do 41 | new_args = Enum.map(args, &sequence_one/1) 42 | {rest, [new_args | rest_args], context} 43 | end 44 | 45 | def sequence_one({:one_sequence_item, [:and | args]}) do 46 | {:lookahead, sequence_internal(args)} 47 | end 48 | 49 | def sequence_one({:one_sequence_item, [:not | args]}) do 50 | {:lookahead_not, sequence_internal(args)} 51 | end 52 | 53 | def sequence_one({:one_sequence_item, args}), do: sequence_internal(args) 54 | 55 | defp sequence_internal([command, :query]), do: {:optional, command} 56 | 57 | defp sequence_internal([command, :star]), do: {:repeat, command} 58 | 59 | defp sequence_internal([command, :plus]), do: {:times, command} 60 | 61 | defp sequence_internal([command]), do: command 62 | end 63 | -------------------------------------------------------------------------------- /lib/pegasus/tokens.ex: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.Tokens do 2 | @moduledoc false 3 | 4 | # Collects together parsers for all of the minor tokens for Peg parsers 5 | # 6 | # ```peg 7 | # LEFTARROW <- '<-' Spacing 8 | # SLASH <- '/' Spacing 9 | # AND <- '&' Spacing 10 | # NOT <- '!' Spacing 11 | # QUERY <- '?' Spacing 12 | # STAR <- '*' Spacing 13 | # PLUS <- '+' Spacing 14 | # OPEN <- '(' Spacing 15 | # CLOSE <- ')' Spacing 16 | # DOT <- '.' Spacing 17 | # BEGIN <- '<' Spacing 18 | # END <- '>' Spacing 19 | # ``` 20 | 21 | import NimbleParsec 22 | alias Pegasus.Components 23 | 24 | @definitions %{ 25 | leftarrow: "<-", 26 | slash: "/", 27 | and: "&", 28 | not: "!", 29 | query: "?", 30 | star: "*", 31 | plus: "+", 32 | open: "(", 33 | close: ")", 34 | dot: ".", 35 | begin: "<", 36 | ender: ">" 37 | } 38 | 39 | for {name, token} <- @definitions do 40 | def unquote(name)(previous \\ empty()) do 41 | previous 42 | |> ignore(string(unquote(token))) 43 | |> post_traverse({__MODULE__, :tokenize, [unquote(name)]}) 44 | |> Components.spacing() 45 | end 46 | end 47 | 48 | def tokenize(rest, args, context, _, _, token) do 49 | {rest, [token | args], context} 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Pegasus.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :pegasus, 7 | version: "0.2.6", 8 | elixir: "~> 1.12", 9 | start_permanent: Mix.env() == :prod, 10 | elixirc_paths: elixirc_paths(Mix.env()), 11 | description: "peg -> nimbleparsec", 12 | package: package(), 13 | docs: docs(), 14 | deps: deps() 15 | ] 16 | end 17 | 18 | def application do 19 | [ 20 | extra_applications: [:logger] 21 | ] 22 | end 23 | 24 | defp package do 25 | [ 26 | licenses: ["MIT"], 27 | links: %{ 28 | github: "https://github.com/ityonemo/pegasus" 29 | } 30 | ] 31 | end 32 | 33 | defp docs do 34 | [ 35 | main: "Pegasus", 36 | extras: ["README.md"] 37 | ] 38 | end 39 | 40 | defp elixirc_paths(:test), do: ["lib", "test/_support"] 41 | defp elixirc_paths(_), do: ["lib"] 42 | 43 | defp deps do 44 | [{:nimble_parsec, "~> 1.2"}, {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}] 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, 3 | "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"}, 4 | "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, 5 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 6 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, 7 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 8 | } 9 | -------------------------------------------------------------------------------- /test/_assets/peg_for_peg.y: -------------------------------------------------------------------------------- 1 | Grammar <- Spacing Definition+ EndOfFile 2 | 3 | Definition <- Identifier LEFTARROW Expression 4 | Expression <- Sequence ( SLASH Sequence )* 5 | Sequence <- Prefix* 6 | Prefix <- AND Action 7 | / ( AND | NOT )? Suffix 8 | Suffix <- Primary ( QUERY / STAR / PLUS )? 9 | Primary <- Identifier !LEFTARROW 10 | / OPEN Expression CLOSE 11 | / Literal 12 | / Class 13 | / DOT 14 | / Action 15 | / BEGIN 16 | / END 17 | 18 | Identifier <- < IdentStart IdentCont* > Spacing 19 | IdentStart <- [a-zA-Z_] 20 | IdentCont <- IdentStart / [0-9] 21 | Literal <- ['] < ( !['] Char )* > ['] Spacing 22 | / ["] < ( !["] Char )* > ["] Spacing 23 | Class <- '[' < ( !']' Range )* > ']' Spacing 24 | Range <- Char '-' Char / Char 25 | Char <- '\\' [abefnrtv'"\[\]\\] 26 | / '\\' [0-3][0-7][0-7] 27 | / '\\' [0-7][0-7]? 28 | / '\\' '-' 29 | / !'\\' . 30 | LEFTARROW <- '<-' Spacing 31 | SLASH <- '/' Spacing 32 | AND <- '&' Spacing 33 | NOT <- '!' Spacing 34 | QUERY <- '?' Spacing 35 | STAR <- '*' Spacing 36 | PLUS <- '+' Spacing 37 | OPEN <- '(' Spacing 38 | CLOSE <- ')' Spacing 39 | DOT <- '.' Spacing 40 | Spacing <- ( Space / Comment )* 41 | Comment <- '#' ( !EndOfLine . )* EndOfLine 42 | Space <- ' ' / '\t' / EndOfLine 43 | EndOfLine <- '\r\n' / '\n' / '\r' 44 | EndOfFile <- !. 45 | Action <- '{' < [^}]* > '}' Spacing 46 | BEGIN <- '<' Spacing 47 | END <- '>' Spacing -------------------------------------------------------------------------------- /test/_support/case.ex: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.Case do 2 | defmacro assert_parsed( 3 | value, 4 | args \\ quote do 5 | _ 6 | end 7 | ) do 8 | quote do 9 | assert {:ok, unquote(args), "", %{}, _, _} = unquote(value) 10 | end 11 | end 12 | 13 | defmacro assert_parser(value, parser) do 14 | quote bind_quoted: [value: value, parser: parser] do 15 | assert {:ok, [^parser], "", %{}, _, _} = value 16 | end 17 | end 18 | 19 | defmacro refute_parsed(value = {_, _, [source]}) do 20 | quote bind_quoted: [value: value, source: source] do 21 | case value do 22 | error when elem(error, 0) == :error -> 23 | assert {:error, _msg, _rest, _context, _, _} = value 24 | 25 | _ -> 26 | assert {:ok, [], ^source, %{}, _, _} = value 27 | end 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /test/pegasus_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest do 2 | use ExUnit.Case, async: true 3 | 4 | require Pegasus 5 | import PegasusTest.Case 6 | 7 | Pegasus.parser_from_string("char_range <- [a-z]", char_range: [parser: true]) 8 | 9 | describe "char_range works" do 10 | test "char_range" do 11 | assert_parsed(char_range("a")) 12 | refute_parsed(char_range("A")) 13 | end 14 | end 15 | 16 | Pegasus.parser_from_string("literal <- 'foo'", literal: [parser: true]) 17 | 18 | describe "literal works" do 19 | test "literal" do 20 | assert_parsed(literal("foo")) 21 | refute_parsed(literal("bar")) 22 | end 23 | end 24 | 25 | Pegasus.parser_from_string("sequence <- 'foo' 'bar'", sequence: [parser: true]) 26 | 27 | describe "sequence works" do 28 | test "sequence" do 29 | assert_parsed(sequence("foobar")) 30 | refute_parsed(sequence("foo")) 31 | end 32 | end 33 | 34 | Pegasus.parser_from_string("lookahead <- &'f' 'foo'", lookahead: [parser: true]) 35 | 36 | describe "lookahead works" do 37 | test "lookahead" do 38 | assert_parsed(lookahead("foo")) 39 | end 40 | end 41 | 42 | Pegasus.parser_from_string("lookahead_not <- !'aaa' [a-z][a-z][a-z]", 43 | lookahead_not: [parser: true] 44 | ) 45 | 46 | describe "lookahead_not works" do 47 | test "lookahead_not" do 48 | assert_parsed(lookahead_not("aab")) 49 | refute_parsed(lookahead_not("aaa")) 50 | end 51 | end 52 | 53 | Pegasus.parser_from_string("optional <- 'foo' 'bar'?", optional: [parser: true]) 54 | 55 | describe "optional works" do 56 | test "optional" do 57 | assert_parsed(optional("foo")) 58 | assert_parsed(optional("foobar")) 59 | refute_parsed(optional("funbar")) 60 | assert {:ok, ["foo"], "baz", _, _, _} = optional("foobaz") 61 | end 62 | end 63 | 64 | Pegasus.parser_from_string("repeat <- 'foo' 'bar'*", repeat: [parser: true]) 65 | 66 | describe "repeat works" do 67 | test "repeat" do 68 | assert_parsed(repeat("foo")) 69 | assert_parsed(repeat("foobar")) 70 | assert_parsed(repeat("foobarbar")) 71 | refute_parsed(repeat("funbar")) 72 | end 73 | end 74 | 75 | Pegasus.parser_from_string("times <- 'foo' 'bar'+", times: [parser: true]) 76 | 77 | describe "times works" do 78 | test "times" do 79 | refute_parsed(times("foo")) 80 | assert_parsed(times("foobar")) 81 | assert_parsed(times("foobarbar")) 82 | refute_parsed(times("funbar")) 83 | end 84 | end 85 | 86 | Pegasus.parser_from_string( 87 | """ 88 | identifier <- 'foo' IDENTIFIER # plus a comment, why not 89 | IDENTIFIER <- 'bar' 90 | """, 91 | identifier: [parser: true] 92 | ) 93 | 94 | describe "identifiers work" do 95 | test "identifier" do 96 | assert_parsed(identifier("foobar")) 97 | refute_parsed(identifier("foo")) 98 | refute_parsed(identifier("bar")) 99 | end 100 | end 101 | 102 | Pegasus.parser_from_string("choice <- 'foo' / 'bar'", choice: [parser: true]) 103 | 104 | describe "choice works" do 105 | test "choice" do 106 | assert_parsed(choice("foo")) 107 | assert_parsed(choice("bar")) 108 | refute_parsed(choice("baz")) 109 | end 110 | end 111 | 112 | Pegasus.parser_from_string("dumb_parens <- ('foo' [a-z]) 'bar' ", dumb_parens: [parser: true]) 113 | 114 | describe "dumb parens work" do 115 | test "dumb_parens" do 116 | assert_parsed(dumb_parens("fooabar")) 117 | refute_parsed(dumb_parens("fooZbar")) 118 | refute_parsed(dumb_parens("foo")) 119 | refute_parsed(dumb_parens("fooa")) 120 | refute_parsed(dumb_parens("bar")) 121 | end 122 | end 123 | 124 | Pegasus.parser_from_string("times_parens <- ('foo' [a-z])+ 'bar' ", 125 | times_parens: [parser: true] 126 | ) 127 | 128 | describe "smart parens work" do 129 | test "with times" do 130 | assert_parsed(times_parens("fooabar")) 131 | assert_parsed(times_parens("fooafooabar")) 132 | assert_parsed(times_parens("fooafooafooabar")) 133 | refute_parsed(times_parens("bar")) 134 | end 135 | end 136 | 137 | Pegasus.parser_from_string("begin_end <- < 'foo' [a-z] > 'bar' ", begin_end: [parser: true]) 138 | 139 | describe "begin-end works" do 140 | test "to group" do 141 | assert_parsed(begin_end("fooabar")) 142 | refute_parsed(begin_end("bar")) 143 | end 144 | end 145 | 146 | Pegasus.parser_from_string("dot <- 'foo' .", dot: [parser: true]) 147 | 148 | describe "dot works" do 149 | test "dot" do 150 | assert_parsed(dot("fooa")) 151 | refute_parsed(dot("foba")) 152 | refute_parsed(dot("foo")) 153 | end 154 | end 155 | 156 | describe "post_traverse settings work" do 157 | Pegasus.parser_from_string("post_traverse_ungrouped <- 'foo' [a-z]", 158 | post_traverse_ungrouped: [ 159 | parser: true, 160 | post_traverse: {:post_traverse_ungrouped, []} 161 | ] 162 | ) 163 | 164 | defp post_traverse_ungrouped("", [?a, "foo"], context, {1, 0}, 4) do 165 | {"", [], Map.put(context, :parsed, true)} 166 | end 167 | 168 | test "ungrouped content is presented as a list" do 169 | result = assert_parsed(post_traverse_ungrouped("fooa")) 170 | assert {:ok, [], "", %{parsed: true}, _, _} = result 171 | end 172 | 173 | Pegasus.parser_from_string("post_traverse_grouped <- ('foo' [a-z])", 174 | post_traverse_grouped: [ 175 | parser: true, 176 | post_traverse: {:post_traverse_grouped, [:test]} 177 | ] 178 | ) 179 | 180 | defp post_traverse_grouped("", [?a, "foo"], context, {1, 0}, 4, :test) do 181 | {"", [], Map.put(context, :parsed, true)} 182 | end 183 | 184 | test "grouped content is merged" do 185 | result = assert_parsed(post_traverse_grouped("fooa")) 186 | assert {:ok, [], "", %{parsed: true}, _, _} = result 187 | end 188 | 189 | Pegasus.parser_from_string("post_traverse_extracted <- <'foo' [a-z]> 'bar'", 190 | post_traverse_extracted: [ 191 | parser: true, 192 | post_traverse: {:post_traverse_extracted, [:test]} 193 | ] 194 | ) 195 | 196 | defp post_traverse_extracted("", ["fooa"], context, {1, 0}, _, :test) do 197 | {"", [], Map.put(context, :parsed, true)} 198 | end 199 | 200 | test "extracted content is merged and isolated" do 201 | result = assert_parsed(post_traverse_extracted("fooabar")) 202 | assert {:ok, [], "", %{parsed: true}, _, _} = result 203 | end 204 | end 205 | 206 | describe "tagging" do 207 | Pegasus.parser_from_string("tagged_true <- 'foo' [a-z]", 208 | tagged_true: [parser: true, tag: true] 209 | ) 210 | 211 | test "set to the parser name when true" do 212 | result = assert_parsed(tagged_true("fooa")) 213 | assert {:ok, [tagged_true: ["foo", ?a]], "", %{}, _, _} = result 214 | end 215 | 216 | Pegasus.parser_from_string("tagged_name <- 'foo' [a-z]", 217 | tagged_name: [parser: true, tag: :name] 218 | ) 219 | 220 | test "customizable" do 221 | result = assert_parsed(tagged_name("fooa")) 222 | assert {:ok, [name: ["foo", ?a]], "", %{}, _, _} = result 223 | end 224 | end 225 | 226 | describe "collected" do 227 | Pegasus.parser_from_string("collecting <- 'foo' [a-z]", 228 | collecting: [parser: true, collect: true] 229 | ) 230 | 231 | test "content is merged and isolated" do 232 | result = assert_parsed(collecting("fooa")) 233 | assert {:ok, ["fooa"], "", %{}, _, _} = result 234 | end 235 | end 236 | 237 | describe "tokening" do 238 | Pegasus.parser_from_string("token_true <- 'foo' [a-z]", 239 | token_true: [parser: true, token: true] 240 | ) 241 | 242 | test "set to the parser name when true" do 243 | result = assert_parsed(token_true("fooa")) 244 | assert {:ok, [:token_true], "", %{}, _, _} = result 245 | end 246 | 247 | Pegasus.parser_from_string("token_name <- 'foo' [a-z]", 248 | token_name: [parser: true, token: :name] 249 | ) 250 | 251 | test "customizable" do 252 | result = assert_parsed(token_name("fooa")) 253 | assert {:ok, [:name], "", %{}, _, _} = result 254 | end 255 | end 256 | 257 | describe "ignore" do 258 | Pegasus.parser_from_string( 259 | """ 260 | ignore_outside <- 'foo' ignore_inside 261 | ignore_inside <- 'bar' 262 | """, 263 | ignore_outside: [parser: true], 264 | ignore_inside: [ignore: true] 265 | ) 266 | 267 | test "ignores when you expect when true" do 268 | result = assert_parsed(ignore_outside("foobar")) 269 | assert {:ok, ["foo"], "", %{}, _, _} = result 270 | end 271 | end 272 | 273 | describe "start_position" do 274 | Pegasus.parser_from_string( 275 | """ 276 | start_position <- 'foo' needs_position 277 | needs_position <- 'bar' 278 | """, 279 | start_position: [parser: true], 280 | needs_position: [start_position: true, post_traverse: :post_traverse_needs_position] 281 | ) 282 | 283 | defp post_traverse_needs_position("", ["bar", %{line: 1, column: 4}], context, _, _) do 284 | {"", [], context} 285 | end 286 | 287 | test "can be given a start position" do 288 | assert_parsed(start_position("foobar")) 289 | end 290 | end 291 | 292 | describe "alias" do 293 | Pegasus.parser_from_string( 294 | """ 295 | aliased <- "not correct" 296 | """, 297 | aliased: [parser: true, alias: :substitution] 298 | ) 299 | 300 | import NimbleParsec 301 | 302 | defcombinatorp(:substitution, string("correct")) 303 | 304 | test "aliasing works" do 305 | assert_parsed(aliased("correct")) 306 | end 307 | end 308 | end 309 | -------------------------------------------------------------------------------- /test/pegasus_test/class_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.ClassTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias Pegasus.Class 5 | 6 | import NimbleParsec 7 | import PegasusTest.Case 8 | 9 | defparsec(:parser, Class.parser()) 10 | 11 | describe "the class parser" do 12 | test "produces a single char class" do 13 | assert_parser(parser("[a]"), {:char, ~C(a)}) 14 | end 15 | 16 | test "produces a char range class" do 17 | assert_parser(parser("[a-z]"), {:char, [?a..?z]}) 18 | end 19 | 20 | test "can match multiple chars" do 21 | assert_parser(parser("[ac]"), {:char, ~C(ac)}) 22 | end 23 | 24 | test "can match an escaped chars" do 25 | assert_parser(parser(~S"[\nc]"), {:char, ~c(\nc)}) 26 | # assert_parser(parser(~S"[\\c]"), {:char, ~C(\c)}) 27 | end 28 | 29 | test "can match a char and a range" do 30 | assert_parser(parser("[ad-z]"), {:char, [?a, ?d..?z]}) 31 | end 32 | 33 | test "can negate a char" do 34 | assert_parser(parser("[^a]"), {:char, not: ?a}) 35 | end 36 | 37 | test "can negate a range" do 38 | assert_parser(parser("[^a-z]"), {:char, not: ?a..?z}) 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /test/pegasus_test/components_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.ComponentsTest do 2 | # tests basic components in the PEG grammar 3 | use ExUnit.Case, async: true 4 | 5 | alias Pegasus.Components 6 | 7 | import NimbleParsec 8 | import PegasusTest.Case 9 | 10 | for component <- ~w(end_of_file end_of_line space comment spacing char range)a do 11 | defparsecp(component, apply(Components, component, [])) 12 | end 13 | 14 | describe "end of file" do 15 | test "parses end of file" do 16 | assert_parsed(end_of_file("")) 17 | end 18 | 19 | test "fails if it's not eof" do 20 | refute_parsed(end_of_file("a")) 21 | end 22 | end 23 | 24 | describe "end of line" do 25 | test "parses end of line" do 26 | assert_parsed(end_of_line("\n"), ~c'\n') 27 | assert_parsed(end_of_line("\r"), ~c'\r') 28 | assert_parsed(end_of_line("\n\r"), ["\n\r"]) 29 | end 30 | 31 | test "fails if it's not eol" do 32 | refute_parsed(end_of_line("")) 33 | refute_parsed(end_of_line("a")) 34 | refute_parsed(end_of_line("a\n")) 35 | end 36 | end 37 | 38 | describe "space" do 39 | test "parses spaces" do 40 | assert_parsed(space(" "), ~c' ') 41 | assert_parsed(space("\t"), ~c'\t') 42 | assert_parsed(space("\n"), ~c'\n') 43 | assert_parsed(space("\n\r"), ["\n\r"]) 44 | end 45 | 46 | test "fails non-spaces" do 47 | refute_parsed(space("")) 48 | refute_parsed(space("a")) 49 | refute_parsed(space("a ")) 50 | refute_parsed(space("a\t")) 51 | end 52 | end 53 | 54 | describe "comment" do 55 | test "parses end of line comments" do 56 | assert_parsed(comment("# this is a comment\n")) 57 | assert_parsed(comment("# this is a # comment\n")) 58 | assert_parsed(comment("# windows comments\n\r")) 59 | end 60 | 61 | test "fails comments that are eof'd" do 62 | refute_parsed(comment("# this comment fails")) 63 | end 64 | 65 | test "fails non-comments" do 66 | refute_parsed(comment("a# comment\n")) 67 | end 68 | end 69 | 70 | describe "spacing" do 71 | test "parses spaces" do 72 | assert_parsed(spacing(" ")) 73 | assert_parsed(spacing(" ")) 74 | assert_parsed(spacing(" \t")) 75 | end 76 | 77 | test "parses comments" do 78 | assert_parsed(spacing("# comment\n")) 79 | assert_parsed(spacing("# comment\n# comment2\n")) 80 | end 81 | 82 | test "parses space then comments" do 83 | assert_parsed(spacing(" #comment\n")) 84 | end 85 | 86 | test "parses nothing" do 87 | assert_parsed(spacing("")) 88 | end 89 | 90 | test "fails non-space, non-comments" do 91 | refute_parsed(spacing("foo")) 92 | end 93 | end 94 | 95 | describe "char" do 96 | test "parses basic characters" do 97 | assert_parsed(char(" "), ~C' ') 98 | assert_parsed(char("f"), ~C'f') 99 | assert_parsed(char("A"), ~C'A') 100 | end 101 | 102 | test "parses escaped values" do 103 | assert_parsed(char(~S"\a"), ~c'\a') 104 | assert_parsed(char(~S"\b"), ~c'\b') 105 | assert_parsed(char(~S"\n"), ~c'\n') 106 | assert_parsed(char(~S"\f"), ~c'\f') 107 | assert_parsed(char(~S"\e"), ~c'\e') 108 | assert_parsed(char(~S"\r"), ~c'\r') 109 | assert_parsed(char(~S"\t"), ~c'\t') 110 | assert_parsed(char(~S"\v"), ~c'\v') 111 | 112 | assert_parsed(char(~S(\')), ~C(')) 113 | assert_parsed(char(~S(\")), ~C(")) 114 | assert_parsed(char(~S(\[)), ~C([)) 115 | assert_parsed(char(~S(\])), ~C(])) 116 | assert_parsed(char(~S(\-)), ~C(-)) 117 | # \\ -> '\' 118 | assert_parsed(char(<<92, 92>>), [92]) 119 | end 120 | 121 | test "parses octal values" do 122 | assert_parsed(char(~S(\123)), [0o123]) 123 | assert_parsed(char(~S(\77)), [0o77]) 124 | assert_parsed(char(~S(\7)), [0o7]) 125 | end 126 | 127 | test "fails when nothing" do 128 | refute_parsed(char("")) 129 | end 130 | end 131 | 132 | describe "range" do 133 | test "correctly produces a range" do 134 | assert_parsed(range(~S(a-z)), [?a..?z]) 135 | end 136 | 137 | test "correctly produces a range with octal escape" do 138 | assert_parsed(range(~S(\123-Z)), [0o123..?Z]) 139 | end 140 | 141 | test "produces a correct range matcher with octal" do 142 | assert_parsed(range(~S(\141-\172)), [?a..?z]) 143 | end 144 | 145 | test "correctly parses a single char" do 146 | assert_parsed(range(~S(a)), [?a]) 147 | end 148 | 149 | test "correctly parses a single escaped char" do 150 | assert_parsed(range(~S(\123)), [0o123]) 151 | assert_parsed(range(<<92, 92>>, [92])) 152 | end 153 | end 154 | end 155 | -------------------------------------------------------------------------------- /test/pegasus_test/identifier_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.IdentifierTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias Pegasus.Identifier 5 | 6 | import NimbleParsec 7 | import PegasusTest.Case 8 | 9 | defparsec(:parser, Identifier.parser(empty(), true)) 10 | defparsec(:definer, Identifier.parser(empty())) 11 | 12 | describe "the identifier parser" do 13 | test "produces a tagged identifier parser" do 14 | assert_parser(parser("foo"), {:parser, :foo}) 15 | end 16 | 17 | test "fails on a non-identifer" do 18 | refute_parsed(parser("5oo")) 19 | end 20 | end 21 | 22 | describe "the indentifier definer" do 23 | test "produces a tagged identifier identifier" do 24 | assert_parser(definer("foo"), {:identifier, :foo}) 25 | end 26 | 27 | test "fails on a non-identifer" do 28 | refute_parsed(parser("5oo")) 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /test/pegasus_test/literal_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.LiteralTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias Pegasus.Literal 5 | 6 | import NimbleParsec 7 | import PegasusTest.Case 8 | 9 | defparsec(:parser, Literal.parser()) 10 | 11 | describe "the literal parser" do 12 | test "produces a literal string matcher with double quotes" do 13 | assert_parser(parser(~S("foo")), {:literal, "foo"}) 14 | end 15 | 16 | test "produces a literal string matcher with single quotes" do 17 | assert_parser(parser(~S('foo')), {:literal, "foo"}) 18 | end 19 | 20 | test "produces a literal string matcher with double quotes and escaped quote" do 21 | assert_parser(parser(~S("\"foo\"")), {:literal, ~S("foo")}) 22 | end 23 | 24 | test "produces a literal string matcher with single quotes and escaped quote" do 25 | assert_parser(parser(~S('\'foo\'')), {:literal, ~S('foo')}) 26 | end 27 | 28 | test "produces a literal string matcher with double quotes and escaped return" do 29 | assert_parser(parser(~S("foo\n")), {:literal, ~s(foo\n)}) 30 | end 31 | 32 | test "produces a literal string matcher with double quotes and escaped number" do 33 | assert_parser(parser(~S("fo\157")), {:literal, ~s(foo)}) 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /test/pegasus_test/sequence_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.SequenceTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias Pegasus.Sequence 5 | 6 | import NimbleParsec 7 | import PegasusTest.Case 8 | 9 | defparsec(:parser, Sequence.parser()) 10 | 11 | describe "the sequence parser" do 12 | test "produces a single normal sequence" do 13 | assert_parser(parser(~S("foo")), literal: "foo") 14 | end 15 | 16 | test "produces a sequnential normal sequences" do 17 | assert_parser(parser(~S("foo" 'bar')), literal: "foo", literal: "bar") 18 | end 19 | 20 | test "identifies lookahead" do 21 | assert_parser(parser(~S(&"foo")), lookahead: {:literal, "foo"}) 22 | end 23 | 24 | test "identifies lookahead_not" do 25 | assert_parser(parser(~S(!"foo")), lookahead_not: {:literal, "foo"}) 26 | end 27 | 28 | test "identifies optional" do 29 | assert_parser(parser(~S("foo"?)), optional: {:literal, "foo"}) 30 | end 31 | 32 | test "identifies repeat" do 33 | assert_parser(parser(~S("foo"*)), repeat: {:literal, "foo"}) 34 | end 35 | 36 | test "identifies times" do 37 | assert_parser(parser(~S("foo"+)), times: {:literal, "foo"}) 38 | end 39 | 40 | test "identifies lookahead_not, times" do 41 | assert_parser(parser(~S(!"foo"+)), lookahead_not: {:times, {:literal, "foo"}}) 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /test/regresssion_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PegasusTest.RegressionTest do 2 | use ExUnit.Case, async: true 3 | 4 | require Pegasus 5 | import PegasusTest.Case 6 | 7 | Pegasus.parser_from_string(~S"slash <- [\\t]", slash: [parser: true]) 8 | 9 | describe "slash in range works" do 10 | test "slash" do 11 | assert_parsed(slash("t")) 12 | assert_parsed(slash("\\")) 13 | refute_parsed(slash("a")) 14 | end 15 | end 16 | 17 | Pegasus.parser_from_string( 18 | ~S""" 19 | hex <- [0-9a-fA-F] 20 | 21 | char_escape 22 | <- "\\x" hex hex 23 | / "\\u{" hex+ "}" 24 | / "\\" [nr\\t'"] 25 | """, 26 | char_escape: [parser: true] 27 | ) 28 | 29 | describe "char_escape" do 30 | test "works with hex" do 31 | assert_parsed(char_escape(~S"\x00")) 32 | end 33 | 34 | test "works with u" do 35 | assert_parsed(char_escape(~S"\u{0a0a}")) 36 | end 37 | 38 | test "works with \\n" do 39 | assert_parsed(char_escape(~S"\n")) 40 | end 41 | 42 | test "works with \\r" do 43 | assert_parsed(char_escape(~S"\r")) 44 | end 45 | 46 | test "works with \\\\" do 47 | assert_parsed(char_escape(~S"\\")) 48 | end 49 | 50 | test "works with \\t" do 51 | assert_parsed(char_escape(~S"\t")) 52 | end 53 | 54 | test "works with \\'" do 55 | assert_parsed(char_escape(~S"\'")) 56 | end 57 | 58 | test "works with \\\"" do 59 | assert_parsed(char_escape(~S(\"))) 60 | end 61 | end 62 | 63 | Pegasus.parser_from_string( 64 | ~S""" 65 | STRINGLITERALSINGLE <- "\"" string_char* "\"" 66 | string_char <- [^\\"\n] 67 | """, 68 | STRINGLITERALSINGLE: [parser: :string_literal] 69 | ) 70 | 71 | describe "string literal works" do 72 | test "optional, not used" do 73 | assert_parsed(string_literal(~S("string_literal"))) 74 | end 75 | end 76 | 77 | Pegasus.parser_from_string( 78 | ~S""" 79 | ox80_oxBF <- [\200-\277] 80 | oxF4 <- '\364' 81 | ox80_ox8F <- [\200-\217] 82 | oxF1_oxF3 <- [\361-\363] 83 | oxF0 <- '\360' 84 | ox90_0xBF <- [\220-\277] 85 | oxEE_oxEF <- [\356-\357] 86 | oxED <- '\355' 87 | ox80_ox9F <- [\200-\237] 88 | oxE1_oxEC <- [\341-\354] 89 | oxE0 <- '\340' 90 | oxA0_oxBF <- [\240-\277] 91 | oxC2_oxDF <- [\302-\337] 92 | 93 | mb_utf8_literal <- 94 | oxF4 ox80_ox8F ox80_oxBF ox80_oxBF 95 | / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF 96 | / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF 97 | / oxEE_oxEF ox80_oxBF ox80_oxBF 98 | / oxED ox80_ox9F ox80_oxBF 99 | / oxE1_oxEC ox80_oxBF ox80_oxBF 100 | / oxE0 oxA0_oxBF ox80_oxBF 101 | / oxC2_oxDF ox80_oxBF 102 | """, 103 | mb_utf8_literal: [parser: true] 104 | ) 105 | 106 | describe "utf-8 descriptor" do 107 | test "works" do 108 | assert_parsed(mb_utf8_literal("🚀")) 109 | end 110 | end 111 | 112 | Pegasus.parser_from_string( 113 | ~S""" 114 | byte_range <- [\302-\304] 115 | """, 116 | byte_range: [parser: true] 117 | ) 118 | 119 | describe "single byte range" do 120 | test "works" do 121 | assert_parsed(byte_range(<<0o303>>)) 122 | end 123 | end 124 | 125 | Pegasus.parser_from_string( 126 | ~S""" 127 | octal_escape_three_digit <- '\065' 128 | octal_escape_two_digit <- '\65' 129 | octal_escape_one_digit <- '\5' 130 | """, 131 | octal_escape_three_digit: [parser: true], 132 | octal_escape_two_digit: [parser: true], 133 | octal_escape_one_digit: [parser: true] 134 | ) 135 | 136 | describe "octal escape" do 137 | test "works with a leading zero" do 138 | assert_parsed(octal_escape_three_digit("5")) 139 | end 140 | 141 | test "works with two digit" do 142 | assert_parsed(octal_escape_two_digit("5")) 143 | end 144 | 145 | test "works with one digit" do 146 | assert_parsed(octal_escape_one_digit(<<5>>)) 147 | end 148 | end 149 | end 150 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------