├── .formatter.exs
├── .gitignore
├── README.md
├── example
    └── sql_parser.ex
├── lib
    ├── pegasus.ex
    └── pegasus
    │   ├── ast.ex
    │   ├── class.ex
    │   ├── components.ex
    │   ├── expression.ex
    │   ├── grammar.ex
    │   ├── identifier.ex
    │   ├── literal.ex
    │   ├── primary.ex
    │   ├── sequence.ex
    │   └── tokens.ex
├── mix.exs
├── mix.lock
└── test
    ├── _assets
        └── peg_for_peg.y
    ├── _support
        └── case.ex
    ├── pegasus_test.exs
    ├── pegasus_test
        ├── class_test.exs
        ├── components_test.exs
        ├── identifier_test.exs
        ├── literal_test.exs
        └── sequence_test.exs
    ├── regresssion_test.exs
    └── test_helper.exs


/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where third-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | pegasus-*.tar
24 | 
25 | # Temporary files, for example, from tests.
26 | /tmp/
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pegasus
 2 | 
 3 | Instrumentable Peg Parser for Elixir, based on NimbleParsec
 4 | 
 5 | For documentation on this peg format:
 6 | 
 7 | https://www.piumarta.com/software/peg/peg.1.html
 8 | 
 9 | ## Installation
10 | 
11 | ```elixir
12 | def deps do
13 |   [
14 |     {:pegasus, "~> 0.2.4"}
15 |   ]
16 | end
17 | ```
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/example/sql_parser.ex:
--------------------------------------------------------------------------------
  1 | defmodule Pegasus.Example.Parser do
  2 |   @moduledoc """
  3 |   Parses a SQL statement into a simplistic AST.
  4 | 
  5 |   The output is an AST with nodes in the following format:
  6 |   ```
  7 |   %{
  8 |     "type" => <node_type>
  9 |     "opts" => [],
 10 |     "children" => [node()]
 11 |   }
 12 |   ```
 13 |   Where `opts` is any extra matadata associated with that particular node, such
 14 |   as an identifier's name or a SELECT statement's select list or the operator of
 15 |   a comparision expression.
 16 |   """
 17 |   require Pegasus
 18 |   require Logger
 19 | 
 20 |   import NimbleParsec
 21 | 
 22 |   @options [
 23 |     # Exported sub-parsers
 24 |     expression: [parser: true, export: true],
 25 | 
 26 |     # Post traversed nodes get transformed into proper AST nodes.
 27 |     # This happens either with the `terminal`, `generic` or node specific post
 28 |     # traversal function.
 29 |     ExpressionBinary: [tag: "expression_binary", post_traverse: :post_traverser],
 30 |     ExpressionFunCall: [tag: "fun_call", post_traverse: :post_traverser],
 31 |     Identifier: [tag: "identifier", post_traverse: :terminal_post_traverser],
 32 |     StatementSelect: [tag: "select", post_traverse: :statement_post_traverser],
 33 |     TableGet: [tag: "table_get", post_traverse: :post_traverser],
 34 |     TokenDynamic: [tag: "token_dynamic", post_traverse: :terminal_post_traverser],
 35 | 
 36 |     # Tagged Productions
 37 |     SelectList: [tag: "select_list"],
 38 |     StatementSubquery: [tag: "subquery"],
 39 |     PredicateGroupBy: [tag: "group_by"],
 40 |     PredicateWhere: [tag: "where"],
 41 | 
 42 |     # Constants
 43 |     ConstantInteger: [
 44 |       tag: "constant_integer",
 45 |       collect: true,
 46 |       post_traverse: :terminal_post_traverser
 47 |     ],
 48 |     ConstantString: [
 49 |       tag: "constant_string",
 50 |       collect: true,
 51 |       post_traverse: :terminal_post_traverser
 52 |     ],
 53 | 
 54 |     # These are node-level options.
 55 |     TokenDistinct: [tag: {:opt, "distinct"}],
 56 |     TokenEqual: [tag: {:opt, "operator"}],
 57 |     TokenAnd: [tag: {:opt, "operator"}],
 58 |     TokenOr: [tag: {:opt, "operator"}],
 59 |     TokenPlus: [tag: {:opt, "operator"}],
 60 |     SequenceGroupBy: [tag: {:opt, "group_by_list"}],
 61 | 
 62 |     # Ignore Tokens
 63 |     Spacing: [ignore: true],
 64 |     TokenComma: [ignore: true],
 65 |     TokenSemiColon: [ignore: true],
 66 |     TokenOpenParen: [ignore: true],
 67 |     TokenCloseParen: [ignore: true],
 68 |     TokenFrom: [ignore: true],
 69 |     TokenGroupBy: [ignore: true],
 70 |     TokenWhere: [ignore: true],
 71 |     TokenSelect: [ignore: true]
 72 |   ]
 73 | 
 74 |   Pegasus.parser_from_string(
 75 |     """
 76 |     # Exported top level parser.
 77 |     SQL <- Statement
 78 | 
 79 |     # Exported partial expression parser.
 80 |     # Lower-case the name here to allow for exporting into Elixir.
 81 |     expression <- Expression
 82 | 
 83 |     Statement <- StatementSelect Spacing TokenSemiColon
 84 | 
 85 |     StatementSelect <-
 86 |       Spacing TokenSelect
 87 |       (Spacing TokenDistinct Spacing)?
 88 |       Spacing SelectList
 89 |       Spacing TokenFrom
 90 |       Spacing SelectTarget
 91 |       (Spacing PredicateWhere)?
 92 |       (Spacing PredicateGroupBy)?
 93 | 
 94 |     SelectList <- TokenStar / Sequence
 95 | 
 96 |     SelectTarget <- TableGet / StatementSubquery
 97 | 
 98 |     TableGet <- Identifier
 99 | 
100 |     StatementSubquery <- TokenOpenParen StatementSelect TokenCloseParen
101 | 
102 |     PredicateGroupBy <- TokenGroupBy Spacing SequenceGroupBy
103 | 
104 |     SequenceGroupBy <- Sequence
105 | 
106 |     PredicateWhere <- TokenWhere Spacing Expression Spacing
107 | 
108 |     Sequence <- Expression ( Spacing? TokenComma Spacing? Sequence )*
109 | 
110 |     Expression <-
111 |       TokenOpenParen Spacing Expression Spacing TokenCloseParen
112 |       / ExpressionBinary
113 |       / Expr
114 | 
115 |     ExpressionBinary <-
116 |       Expr Spacing Operator Spacing Expression
117 |       #/ Expr (Spacing ExpressionBinaryRest)*
118 | 
119 |     ExpressionBinaryRest <-
120 |       Operator Spacing ExpressionBinary
121 | 
122 |     Operator <-
123 |       TokenEqual
124 |       / TokenAnd
125 |       / TokenOr
126 |       / TokenPlus
127 | 
128 |     Expr <-
129 |       ExpressionFunCall
130 |       / ExpressionConstant
131 | 
132 |     ExpressionFunCall <-
133 |       Identifier Spacing TokenOpenParen Spacing Expression Spacing TokenCloseParen
134 | 
135 |     ExpressionConstant <- 
136 |       TokenDynamic
137 |       / ConstantString
138 |       / Identifier
139 |       / ConstantInteger
140 | 
141 |     Identifier      <- < IdentStart IdentCont* > Spacing
142 |     IdentStart      <- [a-zA-Z_\.]
143 |     IdentCont       <- IdentStart / [0-9]
144 | 
145 |     # These are semi-keyword semi-constants that get defined at runtime.
146 |     TokenDynamic <- TokenCurrentDate
147 | 
148 |     # Tokens
149 |     TokenDistinct   <- < [Dd][Ii][Ss][Tt][Ii][Nn][Cc][Tt] >
150 |     TokenFrom       <- < [Ff][Rr][Oo][Mm] >
151 |     TokenGroupBy    <- < [Gg][Rr][Oo][Uu][Pp] > Spacing < [Bb][Yy] >
152 |     TokenSelect     <- < [Ss][Ee][Ll][Ee][Cc][Tt] >
153 |     TokenWhere      <- < [Ww][Hh][Ee][Rr][Ee] >
154 |     TokenCurrentDate  <- < [Cc][Uu][Rr][Rr][Ee][Nn][Tt][_][Dd][Aa][Tt][Ee] >
155 | 
156 |     TokenSemiColon  <- ";"
157 |     TokenComma      <- ","
158 |     TokenStar       <- "*"
159 |     TokenOpenParen  <- "("
160 |     TokenCloseParen <- ")"
161 |     TokenEqual      <- "="
162 |     TokenPlus       <- "+"
163 |     TokenAnd        <- < [Aa][Nn][Dd] >
164 |     TokenOr         <- < [Oo][Rr] >
165 | 
166 |     # Constants
167 |     ConstantInteger <- [0-9]*
168 |     ConstantString  <- ['] < ( !['] . )* > [']
169 | 
170 |     # Misc
171 |     Spacing         <- ( Space / Comment )*
172 |     Space           <- ' ' / '\t' / EndOfLine
173 |     Comment         <- '//' ( !EndOfLine . )* EndOfLine
174 |     EndOfLine       <- '\r\n' / '\n' / '\r'
175 |     """,
176 |     @options
177 |   )
178 | 
179 |   defparsec(:parse, parsec(:SQL))
180 | 
181 |   @doc "Prints the AST in a relativly reasonable format."
182 |   def print(ast) do
183 |     Logger.debug(inspect(ast, pretty: true, width: 150))
184 |   end
185 | 
186 |   @doc """
187 |   Prints the AST in a relativly reasonable format with the line and file of the
188 |   caller.
189 |   """
190 |   def print(ast, file_caller, line_caller) do
191 |     Logger.debug("#{file_caller}:#{line_caller} #{inspect(ast, pretty: true, width: 150)}")
192 |   end
193 | 
194 |   # The generic post_traverser is a helper to form a generic node from a parse node.
195 |   # This basically just flattens the parse node into a consistent AST structure.
196 |   defp post_traverser(rest, args, context, _line, _offset) do
197 |     [{node_type, node}] = args
198 |     {opts, children} = reduce_parse_node(node, {[], []})
199 | 
200 |     node = %{
201 |       "type" => node_type,
202 |       "opts" => opts,
203 |       "children" => Enum.reverse(children)
204 |     }
205 | 
206 |     {rest, [node], context}
207 |   end
208 | 
209 |   defp reduce_parse_node([], acc), do: acc
210 | 
211 |   defp reduce_parse_node([{{:opt, type}, opt} | rest], {opts_acc, children_acc}) do
212 |     reduce_parse_node(rest, {[{type, opt} | opts_acc], children_acc})
213 |   end
214 | 
215 |   defp reduce_parse_node([child | rest], {opts_acc, children_acc}) do
216 |     reduce_parse_node(rest, {opts_acc, [child | children_acc]})
217 |   end
218 | 
219 |   # The statement_post_traverser transforms statement parse nodes into AST node, as they
220 |   # are a bit more complex.
221 |   defp statement_post_traverser(rest, [{"select", node_opts}] = args, context, _, _) do
222 |     group_by = :proplists.get_value("group_by", node_opts, nil)
223 | 
224 |     if group_by !== nil do
225 |       gbagg_post_traverser(rest, args, context)
226 |     else
227 |       select_post_traverser(rest, args, context)
228 |     end
229 |   end
230 | 
231 |   # Group By Aggregate post traversal node.
232 |   # Strips the group by, post-traverses on the select statement, then assembles the node.
233 |   def gbagg_post_traverser(rest, [{"select", node_opts}], context) do
234 |     node_opts_raw = List.keydelete(node_opts, "group_by", 0)
235 |     select_traverser_input = [{"select", node_opts_raw}]
236 |     {_, [ast_select], _} = select_post_traverser(rest, select_traverser_input, context)
237 | 
238 |     node_opts = :proplists.get_value("group_by", node_opts)
239 |     {opts, _} = reduce_parse_node(node_opts, {[], []})
240 |     ast_gbagg = %{
241 |       "type" => "gbagg",
242 |       "opts" => opts,
243 |       "children" => [ast_select]
244 |     }
245 | 
246 |     {rest, [ast_gbagg], context}
247 |   end
248 | 
249 |   # Post traverse wrapper for SELECTs.
250 |   defp select_post_traverser(rest, [{"select", node_opts}], context) do
251 |     select_list = :proplists.get_value("select_list", node_opts)
252 |     where = :proplists.get_value("where", node_opts, [])
253 | 
254 |     {opts, _} = reduce_parse_node(node_opts, {[], []})
255 | 
256 |     target =
257 |       node_opts
258 |       |> List.keydelete("select_list", 0)
259 |       |> List.keydelete("where", 0)
260 |       |> strip_optionals()
261 | 
262 |     ast_select = %{
263 |       "type" => "select",
264 |       "opts" => [{"select_list", select_list} | opts],
265 |       "children" => target ++ where
266 |     }
267 | 
268 |     {rest, [ast_select], context}
269 |   end
270 | 
271 |   # The terminal post traverser is for simple "terminal" nodes,
272 |   # which are nodes with no children and basically a constant interior.
273 |   defp terminal_post_traverser(rest, args, context, _line, _offset) do
274 |     [{type, [node_name]}] = args
275 | 
276 |     node = %{
277 |       "type" => type,
278 |       "opts" => [{"value", node_name}],
279 |       "children" => []
280 |     }
281 | 
282 |     {rest, [node], context}
283 |   end
284 | 
285 |   # Removes optionals from a parse node.
286 |   defp strip_optionals(list), do: strip_optionals(list, [])
287 | 
288 |   defp strip_optionals([], acc), do: Enum.reverse(acc)
289 |   defp strip_optionals([{{:opt, _}, _} | rest], acc), do: strip_optionals(rest, acc)
290 |   defp strip_optionals([head | rest], acc), do: strip_optionals(rest, [head | acc])
291 | end
292 | 


--------------------------------------------------------------------------------
/lib/pegasus.ex:
--------------------------------------------------------------------------------
  1 | defmodule Pegasus do
  2 |   @moduledoc """
  3 |   converts `peg` files into `NimbleParsec` parsers.
  4 | 
  5 |   For documentation on this peg format:  https://www.piumarta.com/software/peg/peg.1.html
  6 | 
  7 |   To use, drop this in your model:
  8 | 
  9 |   ```
 10 |   defmodule MyModule
 11 |     require Pegasus
 12 | 
 13 |     Pegasus.parser_from_string(\"""
 14 |     foo <- "foo" "bar"
 15 |     \""", foo: [parser: true])
 16 |   end
 17 |   ```
 18 | 
 19 |   See `NimbleParsec` for the description of the output.
 20 | 
 21 |   ```
 22 |   MyModule.foo("foobar") # ==> {:ok, ["foo", "bar"], ...}
 23 |   ```
 24 | 
 25 |   > #### Capitalized Identifiers {: .warning}
 26 |   >
 27 |   > for capitalized identifiers, you will have to use `apply/3` to call the
 28 |   > function, or you may wrap it in another combinator like so:
 29 |   >
 30 |   > ```elixir
 31 |   > defmodule Capitalized do
 32 |   >   require Pegasus
 33 |   >   import NimbleParsec
 34 |   >
 35 |   >   Pegasus.parser_from_string("Foo <- 'foo'")
 36 |   >
 37 |   >   defparsec :parse, parsec(:Foo)
 38 |   > end
 39 |   > ```
 40 | 
 41 |   You may also load a parser from a file using `parser_from_file/2`.
 42 | 
 43 |   ## Parser Options
 44 | 
 45 |   Parser options are passed as a keyword list after the parser defintion
 46 |   string (or file).  The keys for the options are the names of the combinators,
 47 |   followed by a keyword list of supplied options, which are applied in the
 48 |   specified order:
 49 | 
 50 |   ### `:start_position`
 51 | 
 52 |   When true, drops a map `%{line: <line>, column: <column>, offset: <offset>}` into
 53 |   the arguments for this keyword at the front of its list.
 54 | 
 55 |   ### `:collect`
 56 | 
 57 |   You may collect the contents of a combinator using the `collect: true` option.
 58 |   If this combinator calls other combinators, they must leave only iodata (no
 59 |   tags, no tokens) in the arguments list.
 60 | 
 61 |   ### `:token`
 62 | 
 63 |   You may substitute the contents of any combinator with a token (usually an atom).
 64 |   The following conditions apply:
 65 | 
 66 |   - `token: false` - no token (default)
 67 |   - `token: true` - token is set to the atom name of the combinator
 68 |   - `token: <value>` - token is set to the value of setting
 69 | 
 70 |   ### `:tag`
 71 | 
 72 |   You may tag the contents of your combinator using the `:tag` option.  The
 73 |   following conditions apply:
 74 | 
 75 |   - `tag: false` - No tag (default)
 76 |   - `tag: true` - Use the combinator name as the tag.
 77 |   - `tag: <atom>` - Use the supplied atom as the tag.
 78 | 
 79 |   ### `:post_traverse`
 80 | 
 81 |   You may supply a post_traversal for any parser.  See `NimbleParsec` for how to
 82 |   implement post-traversal functions.  These are defined by passing a keyword list
 83 |   to the `parser_from_file/2` or `parser_from_string/2` function.
 84 | 
 85 |   > #### Post-traversal arguments are reversed {: .info }
 86 |   >
 87 |   > Note that the second argument for a post-traversal function receives a list of
 88 |   > results from traversal in *reversed* order.
 89 | 
 90 |   #### Example
 91 | 
 92 |   ```
 93 |   Pegasus.parser_from_string(\"""
 94 |     foo <- "foo" "bar"
 95 |     \""",
 96 |     foo: [post_traverse: {:some_function, []}]
 97 |   )
 98 | 
 99 |   defp foo(rest, ["bar", "foo"], context, {_line, _col}, _bytes) do
100 |     {rest, [:parsed], context}
101 |   end
102 |   ```
103 | 
104 |   ### `:ignore`
105 | 
106 |   If true, clears the arguments from the list.
107 | 
108 |   ### `:parser`
109 | 
110 |   You may sepecify to export a combinator as a parser by specifying `parser: true`.
111 |   By default, only a combinator will be generated.  See `NimbleParsec.defparsec/3`
112 |   to understand the difference.
113 | 
114 |   #### Example
115 | 
116 |   ```
117 |   Pegasus.parser_from_string(\"""
118 |     foo <- "foo" "bar"
119 |     \""", foo: [parser: true]
120 |   )
121 |   ```
122 | 
123 |   ### `:export`
124 | 
125 |   You may sepecify to export a combinator as a public function by specifying `export: true`.
126 |   By default, the combinators are private functions.
127 | 
128 |   #### Example
129 | 
130 |   ```
131 |   Pegasus.parser_from_string(\"""
132 |     foo <- "foo" "bar"
133 |     \""", foo: [export: true]
134 |   )
135 |   ```
136 | 
137 |   ### `:alias`
138 | 
139 |   You may specify your own combinators to be run in place of what's in the grammar.
140 |   This is useful if the grammar is wrong or contains content that can't be run for
141 |   some reason.
142 | 
143 |   #### Example
144 | 
145 |   ```
146 |   Pegasus.parser_from_string(\"""
147 |     foo <- "foo"
148 |   \""", foo: [alias: :my_combinator])
149 |   ```
150 | 
151 |   ## Not implemented features
152 | 
153 |   Actions, which imply the use of C code, are not implemented.  These currently fail to parse
154 |   but in the future they may silently do nothing.
155 |   """
156 | 
157 |   import NimbleParsec
158 | 
159 |   defparsec(:parse, Pegasus.Grammar.parser())
160 | 
161 |   defmacro parser_from_string(string, opts \\ []) do
162 |     quote bind_quoted: [string: string, opts: opts] do
163 |       string
164 |       |> Pegasus.parse()
165 |       |> Pegasus.parser_from_ast(opts)
166 |     end
167 |   end
168 | 
169 |   defmacro parser_from_file(file, opts \\ []) do
170 |     quote bind_quoted: [file: file, opts: opts] do
171 |       file
172 |       |> File.read!()
173 |       |> Pegasus.parse()
174 |       |> Pegasus.parser_from_ast(opts)
175 |     end
176 |   end
177 | 
178 |   defmacro parser_from_ast(ast, opts) do
179 |     quote bind_quoted: [ast: ast, opts: opts] do
180 |       require NimbleParsec
181 |       require Pegasus.Ast
182 | 
183 |       for ast = %{name: name, parsec: parsec} <- Pegasus.Ast.to_nimble_parsec(ast, opts) do
184 |         name_opts = Keyword.get(opts, name, [])
185 |         exported = !!Keyword.get(name_opts, :export)
186 |         parser = Keyword.get(name_opts, :parser, false)
187 | 
188 |         Pegasus.Ast.traversals(ast)
189 | 
190 |         case {exported, parser} do
191 |           {false, false} ->
192 |             NimbleParsec.defcombinatorp(name, parsec)
193 | 
194 |           {false, true} ->
195 |             NimbleParsec.defparsecp(name, parsec)
196 | 
197 |           {false, parser_name} ->
198 |             NimbleParsec.defparsecp(parser_name, parsec)
199 | 
200 |           {true, false} ->
201 |             NimbleParsec.defcombinator(name, parsec)
202 | 
203 |           {true, true} ->
204 |             NimbleParsec.defparsec(name, parsec, export_combinator: true)
205 | 
206 |           {true, parser_name} ->
207 |             NimbleParsec.defparsec(parser_name, parsec, export_combinator: true)
208 |         end
209 |       end
210 |     end
211 |   end
212 | end
213 | 


--------------------------------------------------------------------------------
/lib/pegasus/ast.ex:
--------------------------------------------------------------------------------
  1 | defmodule Pegasus.Ast do
  2 |   @moduledoc false
  3 |   import NimbleParsec
  4 | 
  5 |   @enforce_keys [:name]
  6 | 
  7 |   defstruct @enforce_keys ++ [:extract, start_pos?: false, parsec: empty()]
  8 | 
  9 |   @dummy_context %{parsec: empty()}
 10 | 
 11 |   def to_nimble_parsec({:ok, list, "", _, _, _}, opts) do
 12 |     to_nimble_parsec(list, opts)
 13 |   end
 14 | 
 15 |   def to_nimble_parsec(ast, opts) when is_list(ast) do
 16 |     Enum.map(ast, &to_nimble_parsec(&1, opts))
 17 |   end
 18 | 
 19 |   def to_nimble_parsec({name, parser_ast}, opts) do
 20 |     name_opts = Keyword.get(opts, name, [])
 21 | 
 22 |     %__MODULE__{name: name}
 23 |     |> maybe_add_position(name_opts)
 24 |     |> translate_sequence(parser_ast)
 25 |     |> maybe_extract()
 26 |     |> maybe_collect(name_opts)
 27 |     |> maybe_token(name, name_opts)
 28 |     |> maybe_tag(name, name_opts)
 29 |     |> maybe_post_traverse(name_opts)
 30 |     |> maybe_ignore(name_opts)
 31 |     |> maybe_alias(name_opts)
 32 |   end
 33 | 
 34 |   defp maybe_add_position(context, name_opts) do
 35 |     if Keyword.get(name_opts, :start_position) do
 36 |       parsec = post_traverse(context.parsec, traversal_name(context.name, :start_pos))
 37 |       %{context | parsec: parsec, start_pos?: true}
 38 |     else
 39 |       context
 40 |     end
 41 |   end
 42 | 
 43 |   defp maybe_extract(context) do
 44 |     if context.extract == :extract do
 45 |       %{context | parsec: post_traverse(context.parsec, traversal_name(context.name, :extract))}
 46 |     else
 47 |       context
 48 |     end
 49 |   end
 50 | 
 51 |   defp maybe_collect(context, name_opts) do
 52 |     if Keyword.get(name_opts, :collect) do
 53 |       %{context | parsec: reduce(context.parsec, {IO, :iodata_to_binary, []})}
 54 |     else
 55 |       context
 56 |     end
 57 |   end
 58 | 
 59 |   defp maybe_token(context = %{parsec: parsec}, name, name_opts) do
 60 |     case Keyword.get(name_opts, :token, false) do
 61 |       false ->
 62 |         context
 63 | 
 64 |       true ->
 65 |         %{
 66 |           context
 67 |           | parsec: parsec |> tag(name) |> post_traverse(traversal_name(name, :tag)),
 68 |             extract: :tag
 69 |         }
 70 | 
 71 |       token ->
 72 |         %{
 73 |           context
 74 |           | parsec: parsec |> tag(token) |> post_traverse(traversal_name(name, :tag)),
 75 |             extract: :tag
 76 |         }
 77 |     end
 78 |   end
 79 | 
 80 |   defp maybe_tag(context = %{parsec: parsec}, name, name_opts) do
 81 |     case Keyword.get(name_opts, :tag, false) do
 82 |       false -> context
 83 |       true -> %{context | parsec: tag(parsec, name)}
 84 |       tag -> %{context | parsec: tag(parsec, tag)}
 85 |     end
 86 |   end
 87 | 
 88 |   defp maybe_post_traverse(context, name_opts) do
 89 |     if post_traverse = Keyword.get(name_opts, :post_traverse) do
 90 |       %{context | parsec: post_traverse(context.parsec, post_traverse)}
 91 |     else
 92 |       context
 93 |     end
 94 |   end
 95 | 
 96 |   defp maybe_ignore(context, name_opts) do
 97 |     if Keyword.get(name_opts, :ignore) do
 98 |       %{context | parsec: ignore(context.parsec)}
 99 |     else
100 |       context
101 |     end
102 |   end
103 | 
104 |   defp maybe_alias(context, name_opts) do
105 |     if substitution = Keyword.get(name_opts, :alias) do
106 |       %{context | parsec: parsec(substitution)}
107 |     else
108 |       context
109 |     end
110 |   end
111 | 
112 |   def translate_sequence(context, ast) do
113 |     Enum.reduce(ast, context, &translate/2)
114 |   end
115 | 
116 |   defp translate(:dot, context) do
117 |     %{context | parsec: utf8_char(context.parsec, not: 0)}
118 |   end
119 | 
120 |   defp translate({:char, ranges}, context) do
121 |     %{context | parsec: ascii_char(context.parsec, ranges)}
122 |   end
123 | 
124 |   defp translate({:literal, literal}, context) do
125 |     %{context | parsec: string(context.parsec, literal)}
126 |   end
127 | 
128 |   defp translate({:lookahead, content}, context) do
129 |     %{parsec: lookahead} = translate(content, @dummy_context)
130 |     %{context | parsec: lookahead(context.parsec, lookahead)}
131 |   end
132 | 
133 |   defp translate({:lookahead_not, content}, context) do
134 |     %{parsec: lookahead_not} = translate(content, @dummy_context)
135 |     %{context | parsec: lookahead_not(context.parsec, lookahead_not)}
136 |   end
137 | 
138 |   defp translate({:optional, content}, context) do
139 |     %{parsec: optional} = translate(content, @dummy_context)
140 |     %{context | parsec: optional(context.parsec, optional)}
141 |   end
142 | 
143 |   defp translate({:repeat, content}, context) do
144 |     %{parsec: repeated} = translate(content, @dummy_context)
145 |     %{context | parsec: repeat(context.parsec, repeated)}
146 |   end
147 | 
148 |   defp translate({:times, content}, context) do
149 |     %{parsec: repeated} = translate(content, @dummy_context)
150 |     %{context | parsec: times(context.parsec, repeated, min: 1)}
151 |   end
152 | 
153 |   defp translate({:identifier, identifier}, context) do
154 |     %{context | parsec: parsec(context.parsec, identifier)}
155 |   end
156 | 
157 |   defp translate({:choice, list_of_choices}, context) do
158 |     choices = Enum.map(list_of_choices, &translate_sequence(@dummy_context, &1).parsec)
159 |     %{context | parsec: choice(context.parsec, choices)}
160 |   end
161 | 
162 |   defp translate({:ungroup, commands}, context) do
163 |     grouped = translate_sequence(@dummy_context, commands)
164 |     %{context | parsec: concat(context.parsec, grouped.parsec)}
165 |   end
166 | 
167 |   defp translate({:extract, commands}, context) do
168 |     grouped = translate_sequence(@dummy_context, commands)
169 |     tagged = tag(grouped.parsec, :__extract__)
170 |     %{context | parsec: concat(context.parsec, tagged), extract: :extract}
171 |   end
172 | 
173 |   def traversal_name(name, tag), do: :"#{name}-#{tag}"
174 | 
175 |   defmacro traversals(ast) do
176 |     quote bind_quoted: [ast: ast] do
177 |       if ast.start_pos? do
178 |         start_pos_name = Pegasus.Ast.traversal_name(ast.name, :start_pos)
179 | 
180 |         defp(unquote(start_pos_name)(rest, args, context, {line, offset}, col)) do
181 |           {rest, [%{line: line, column: col - offset + 1, offset: offset} | args], context}
182 |         end
183 |       end
184 | 
185 |       case ast.extract do
186 |         :tag ->
187 |           extract_name = Pegasus.Ast.traversal_name(ast.name, :tag)
188 | 
189 |           defp(unquote(extract_name)(rest, [{tag, _} | args_rest], context, _, _)) do
190 |             {rest, [tag | args_rest], context}
191 |           end
192 | 
193 |         :extract ->
194 |           extract_name = Pegasus.Ast.traversal_name(ast.name, :extract)
195 | 
196 |           defp unquote(extract_name)(rest, args, context, _, _) do
197 |             extracted =
198 |               Enum.flat_map(args, fn
199 |                 {:__extract__, what} ->
200 |                   what
201 |                   |> Enum.filter(&(is_binary(&1) or &1 in 1..0x10FFFF))
202 |                   |> IO.iodata_to_binary()
203 |                   |> List.wrap()
204 | 
205 |                 _ ->
206 |                   []
207 |               end)
208 | 
209 |             {rest, extracted, context}
210 |           end
211 | 
212 |         _ ->
213 |           []
214 |       end
215 |     end
216 |   end
217 | end
218 | 


--------------------------------------------------------------------------------
/lib/pegasus/class.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Class do
 2 |   @moduledoc false
 3 | 
 4 |   # Produces a "class" parser.
 5 |   #
 6 |   # Note that the output of a "class" parser leaves a NimbleParsec parser in the
 7 |   # arguments list.
 8 |   #
 9 |   # ```peg
10 |   # Class           <- '[' < ( !']' Range )* > ']' Spacing
11 |   # ```
12 | 
13 |   alias Pegasus.Components
14 | 
15 |   import NimbleParsec
16 | 
17 |   def parser(previous \\ empty()) do
18 |     previous
19 |     |> tag(
20 |       ignore(string("["))
21 |       |> optional(string("^"))
22 |       |> repeat(
23 |         lookahead_not(string("]"))
24 |         |> Components.range()
25 |       )
26 |       |> ignore(string("]")),
27 |       :class
28 |     )
29 |     |> post_traverse({__MODULE__, :to_parser, []})
30 |     |> Components.spacing()
31 |   end
32 | 
33 |   def to_parser(rest, [{:class, args} | args_rest], context, _, _) do
34 |     classes =
35 |       case args do
36 |         ["^" | rest] ->
37 |           Enum.map(rest, &{:not, &1})
38 | 
39 |         args ->
40 |           args
41 |       end
42 | 
43 |     {rest, [{:char, classes} | args_rest], context}
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/pegasus/components.ex:
--------------------------------------------------------------------------------
  1 | defmodule Pegasus.Components do
  2 |   @moduledoc false
  3 | 
  4 |   # tools for the most simple parts of the PEG definition.
  5 |   #
  6 |   # None of these components *a priori* generate parsers.
  7 |   #
  8 |   # The following components are defined here:
  9 |   #
 10 |   # ```peg
 11 |   # Range           <- Char '-' Char / Char
 12 |   # Char            <- '\\' [abefnrtv'"\[\]\\]
 13 |   #                 / '\\' [0-3][0-7][0-7]
 14 |   #                 / '\\' [0-7][0-7]?
 15 |   #                 / '\\' '-'
 16 |   #                 / !'\\' .
 17 |   # Spacing         <- ( Space / Comment )*
 18 |   # Comment         <- '#' ( !EndOfLine . )* EndOfLine
 19 |   # Space           <- ' ' / '\t' / EndOfLine
 20 |   # EndOfLine       <- '\r\n' / '\n' / '\r'
 21 |   # EndOfFile       <- !.
 22 |   # ```
 23 | 
 24 |   import NimbleParsec
 25 | 
 26 |   def range(previous \\ empty()) do
 27 |     choice(previous, [
 28 |       tag(char() |> string("-") |> char(), :range)
 29 |       |> post_traverse({__MODULE__, :_to_range, []}),
 30 |       char()
 31 |     ])
 32 |   end
 33 | 
 34 |   def char(previous \\ empty()) do
 35 |     escaped_char = ascii_char(~C(abefnrtv'"[]\-))
 36 | 
 37 |     three_digit_octal =
 38 |       ascii_char([?0..?3])
 39 |       |> ascii_char([?0..?7])
 40 |       |> ascii_char([?0..?7])
 41 | 
 42 |     two_or_one_digit_octal =
 43 |       ascii_char([?0..?7])
 44 |       |> optional(ascii_char([?0..?7]))
 45 | 
 46 |     escaped =
 47 |       tag(
 48 |         string("\\")
 49 |         |> choice([
 50 |           escaped_char,
 51 |           three_digit_octal,
 52 |           two_or_one_digit_octal
 53 |         ]),
 54 |         :escaped
 55 |       )
 56 |       |> post_traverse({__MODULE__, :_parse_escaped, []})
 57 | 
 58 |     not_escaped =
 59 |       lookahead_not(string("\\"))
 60 |       # need to provide *some* dummy variable for utf-8 characters
 61 |       |> utf8_char(not: 0)
 62 | 
 63 |     choice(previous, [
 64 |       escaped,
 65 |       not_escaped
 66 |     ])
 67 |   end
 68 | 
 69 |   def spacing(previous \\ empty()) do
 70 |     previous
 71 |     |> ignore(
 72 |       repeat(
 73 |         choice([
 74 |           space(),
 75 |           comment()
 76 |         ])
 77 |       )
 78 |     )
 79 |   end
 80 | 
 81 |   def comment(previous \\ empty()) do
 82 |     previous
 83 |     |> concat(string("#"))
 84 |     |> repeat(
 85 |       lookahead_not(end_of_line())
 86 |       |> utf8_char(not: 0)
 87 |     )
 88 |     |> end_of_line()
 89 |   end
 90 | 
 91 |   def space(previous \\ empty()) do
 92 |     previous
 93 |     |> choice([
 94 |       ascii_char(~c' \t'),
 95 |       end_of_line()
 96 |     ])
 97 |   end
 98 | 
 99 |   def end_of_line(previous \\ empty()) do
100 |     previous
101 |     |> choice([
102 |       string("\n\r"),
103 |       ascii_char(~c'\n\r')
104 |     ])
105 |   end
106 | 
107 |   def end_of_file(previous \\ empty()) do
108 |     eos(previous)
109 |   end
110 | 
111 |   @escape_lookup %{
112 |     ?a => ?\a,
113 |     ?b => ?\b,
114 |     ?e => ?\e,
115 |     ?f => ?\f,
116 |     ?n => ?\n,
117 |     ?r => ?\r,
118 |     ?t => ?\t,
119 |     ?v => ?\v,
120 |     ?' => ?',
121 |     ?" => ?",
122 |     ?[ => ?[,
123 |     ?] => ?],
124 |     ?- => ?-,
125 |     92 => 92
126 |   }
127 | 
128 |   @escape_keys Map.keys(@escape_lookup)
129 | 
130 |   def _parse_escaped(rest, [{:escaped, ["\\", symbol]} | rest_args], context, _, _)
131 |       when symbol in @escape_keys do
132 |     {rest, [@escape_lookup[symbol] | rest_args], context}
133 |   end
134 | 
135 |   def _parse_escaped(rest, [{:escaped, ["\\", o1, o2, o3]} | rest_args], context, _, _)
136 |       when o1 in ?0..?3 and o2 in ?0..?7 and o3 in ?0..?7 do
137 |     {rest, [deoctalize([o1, o2, o3]) | rest_args], context}
138 |   end
139 | 
140 |   def _parse_escaped(rest, [{:escaped, ["\\", o1, o2]} | rest_args], context, _, _)
141 |       when o1 in ?0..?7 and o2 in ?0..?7 do
142 |     {rest, [deoctalize([o1, o2]) | rest_args], context}
143 |   end
144 | 
145 |   def _parse_escaped(rest, [{:escaped, ["\\", o1]} | rest_args], context, _, _)
146 |       when o1 in ?0..?7 do
147 |     {rest, [deoctalize([o1]) | rest_args], context}
148 |   end
149 | 
150 |   defp deoctalize(list) do
151 |     list |> :erlang.list_to_integer(8)
152 |   end
153 | 
154 |   def _to_range(rest, [{:range, [left, "-", right]} | rest_args], context, _, _)
155 |       when left < right do
156 |     {rest, [left..right | rest_args], context}
157 |   end
158 | end
159 | 


--------------------------------------------------------------------------------
/lib/pegasus/expression.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Expression do
 2 |   @moduledoc false
 3 | 
 4 |   # Produces a "expression" parser.
 5 |   #
 6 |   # ```peg
 7 |   # Expression      <- Sequence ( SLASH Sequence )*
 8 |   # ```
 9 | 
10 |   import NimbleParsec
11 | 
12 |   alias Pegasus.Sequence
13 |   alias Pegasus.Tokens
14 | 
15 |   require Sequence
16 |   require Tokens
17 | 
18 |   expression =
19 |     empty()
20 |     |> tag(
21 |       Sequence.parser()
22 |       |> repeat(
23 |         Tokens.slash()
24 |         |> Sequence.parser()
25 |       ),
26 |       :sequences
27 |     )
28 |     |> post_traverse({__MODULE__, :_separate_slashes, []})
29 | 
30 |   defcombinator(:expression, expression)
31 | 
32 |   def parser(previous \\ empty()) do
33 |     parsec(previous, {__MODULE__, :expression})
34 |   end
35 | 
36 |   def _separate_slashes(rest, [{:sequences, sequences} | other_args], context, _, _) do
37 |     choice =
38 |       case by_slashes(sequences) do
39 |         [one_sequence] -> one_sequence
40 |         many_sequences -> [choice: many_sequences]
41 |       end
42 | 
43 |     {rest, [choice | other_args], context}
44 |   end
45 | 
46 |   defp by_slashes(sequences, so_far \\ [])
47 | 
48 |   defp by_slashes([], so_far), do: Enum.reverse(so_far)
49 |   defp by_slashes([:slash, this | rest], so_far), do: by_slashes(rest, [this | so_far])
50 |   defp by_slashes([this | rest], []), do: by_slashes(rest, [this])
51 | end
52 | 


--------------------------------------------------------------------------------
/lib/pegasus/grammar.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Grammar do
 2 |   @moduledoc false
 3 | 
 4 |   # produces a fully parsed grammar.
 5 |   #
 6 |   # ```
 7 |   # Grammar         <- Spacing Definition+ EndOfFile
 8 |   # Definition      <- Identifier LEFTARROW Expression
 9 |   # ```
10 | 
11 |   import NimbleParsec
12 | 
13 |   alias Pegasus.Components
14 |   alias Pegasus.Expression
15 |   alias Pegasus.Identifier
16 |   alias Pegasus.Tokens
17 | 
18 |   def parser do
19 |     Components.spacing()
20 |     |> times(
21 |       Identifier.parser(empty())
22 |       |> Tokens.leftarrow()
23 |       |> Expression.parser()
24 |       |> post_traverse({__MODULE__, :collate, []}),
25 |       min: 1
26 |     )
27 |     |> Components.end_of_file()
28 |   end
29 | 
30 |   def collate(rest, [parser, :leftarrow, {:identifier, name} | args_rest], context, _, _) do
31 |     {rest, [{name, parser} | args_rest], context}
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/pegasus/identifier.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Identifier do
 2 |   @moduledoc false
 3 | 
 4 |   # Produces a "identifier" parser.
 5 |   #
 6 |   # the make_parser option should be set to false (default) when the identifier
 7 |   # is being assigned, and true when the identifier is being used as part of a
 8 |   # parser sequence.
 9 |   #
10 |   # ```peg
11 |   # Identifier      <- < IdentStart IdentCont* > Spacing
12 |   # IdentStart      <- [a-zA-Z_]
13 |   # IdentCont       <- IdentStart / [0-9]
14 |   # ```
15 | 
16 |   alias Pegasus.Components
17 |   import NimbleParsec
18 | 
19 |   def parser(previous, make_parser \\ false) do
20 |     previous
21 |     |> tag(
22 |       ident_start()
23 |       |> repeat(ident_cont()),
24 |       :identifier
25 |     )
26 |     |> post_traverse({__MODULE__, :to_parser, [make_parser]})
27 |     |> Components.spacing()
28 |   end
29 | 
30 |   def ident_start do
31 |     ascii_char([?a..?z, ?A..?Z, ?_])
32 |   end
33 | 
34 |   def ident_cont() do
35 |     ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9])
36 |   end
37 | 
38 |   def to_parser(rest, [{:identifier, args} | other_args], context, _, _, make_parser) do
39 |     identifier =
40 |       args
41 |       |> IO.iodata_to_binary()
42 |       |> String.to_atom()
43 | 
44 |     tag = if make_parser, do: :parser, else: :identifier
45 | 
46 |     {rest, [{tag, identifier} | other_args], context}
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/lib/pegasus/literal.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Literal do
 2 |   @moduledoc false
 3 | 
 4 |   # Produces a "literal" parser.
 5 |   #
 6 |   # Note that the output of a "literal" parser leaves a NimbleParsec parser in the
 7 |   # arguments list.
 8 |   #
 9 |   # ```peg
10 |   # Literal         <- ['] < ( !['] Char  )* > ['] Spacing
11 |   #                 / ["] < ( !["] Char  )* > ["] Spacing
12 |   # ```
13 | 
14 |   alias Pegasus.Components
15 |   import NimbleParsec
16 | 
17 |   def parser(previous \\ empty()) do
18 |     previous
19 |     |> tag(
20 |       choice([
21 |         quoted_literal(~S(')),
22 |         quoted_literal(~S("))
23 |       ]),
24 |       :literal
25 |     )
26 |     |> post_traverse({__MODULE__, :to_parser, []})
27 |     |> Components.spacing()
28 |   end
29 | 
30 |   defp quoted_literal(quote_bound) do
31 |     ignore(string(quote_bound))
32 |     |> repeat(
33 |       lookahead_not(string(quote_bound))
34 |       |> Components.char()
35 |     )
36 |     |> ignore(string(quote_bound))
37 |   end
38 | 
39 |   def to_parser(rest, [{:literal, args} | args_rest], context, _, _) do
40 |     literal = IO.iodata_to_binary(args)
41 | 
42 |     {rest, [{:literal, literal} | args_rest], context}
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/pegasus/primary.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Primary do
 2 |   @moduledoc false
 3 | 
 4 |   # Produces a "primary" parser.  This is a single item which
 5 |   #
 6 |   # ```peg
 7 |   # Primary        <- Identifier !LEFTARROW
 8 |   #               / OPEN Expression CLOSE
 9 |   #               / Literal
10 |   #               / Class
11 |   #               / DOT
12 |   #               / Action
13 |   #               / BEGIN Expression END
14 |   # ```
15 | 
16 |   import NimbleParsec
17 |   alias Pegasus.Expression
18 |   alias Pegasus.Identifier
19 |   alias Pegasus.Literal
20 |   alias Pegasus.Class
21 |   alias Pegasus.Tokens
22 | 
23 |   def parser(previous \\ empty()) do
24 |     previous
25 |     |> choice([
26 |       bare_identifier(),
27 |       paren_expression(),
28 |       Literal.parser(),
29 |       Class.parser(),
30 |       Tokens.dot(),
31 |       tagged_expression()
32 |     ])
33 |   end
34 | 
35 |   defp bare_identifier do
36 |     empty()
37 |     |> Identifier.parser()
38 |     |> lookahead_not(Tokens.leftarrow())
39 |   end
40 | 
41 |   defp paren_expression do
42 |     tag(
43 |       ignore(Tokens.open())
44 |       |> parsec({Expression, :expression})
45 |       |> ignore(Tokens.close()),
46 |       :collect
47 |     )
48 |     |> post_traverse({__MODULE__, :_group, [:ungroup]})
49 |   end
50 | 
51 |   defp tagged_expression do
52 |     tag(
53 |       ignore(Tokens.begin())
54 |       |> parsec({Expression, :expression})
55 |       |> ignore(Tokens.ender()),
56 |       :collect
57 |     )
58 |     |> post_traverse({__MODULE__, :_group, [:extract]})
59 |   end
60 | 
61 |   def _group(rest, [{:collect, [inner_args]} | args_rest], context, _, _, action) do
62 |     {rest, [{action, inner_args} | args_rest], context}
63 |   end
64 | end
65 | 


--------------------------------------------------------------------------------
/lib/pegasus/sequence.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Sequence do
 2 |   @moduledoc false
 3 | 
 4 |   # Collects together parsers for all of the minor tokens for Peg parsers
 5 |   #
 6 |   # ```peg
 7 |   # Sequence        <- Prefix*
 8 |   # Prefix          <- AND Action  # <== not implemented
 9 |   #                 / ( AND / NOT )? Suffix
10 |   # Suffix          <- Primary ( QUERY / STAR / PLUS )?
11 |   # ```
12 | 
13 |   alias Pegasus.Tokens
14 |   alias Pegasus.Primary
15 | 
16 |   import NimbleParsec
17 | 
18 |   def parser(previous \\ empty()) do
19 |     previous
20 |     |> tag(
21 |       repeat(
22 |         tag(
23 |           optional(choice([Tokens.and(), Tokens.not()]))
24 |           |> Primary.parser()
25 |           |> optional(
26 |             choice([
27 |               Tokens.query(),
28 |               Tokens.star(),
29 |               Tokens.plus()
30 |             ])
31 |           ),
32 |           :one_sequence_item
33 |         )
34 |       ),
35 |       :sequence
36 |     )
37 |     |> post_traverse({__MODULE__, :sequence, []})
38 |   end
39 | 
40 |   def sequence(rest, [{:sequence, args} | rest_args], context, _, _) do
41 |     new_args = Enum.map(args, &sequence_one/1)
42 |     {rest, [new_args | rest_args], context}
43 |   end
44 | 
45 |   def sequence_one({:one_sequence_item, [:and | args]}) do
46 |     {:lookahead, sequence_internal(args)}
47 |   end
48 | 
49 |   def sequence_one({:one_sequence_item, [:not | args]}) do
50 |     {:lookahead_not, sequence_internal(args)}
51 |   end
52 | 
53 |   def sequence_one({:one_sequence_item, args}), do: sequence_internal(args)
54 | 
55 |   defp sequence_internal([command, :query]), do: {:optional, command}
56 | 
57 |   defp sequence_internal([command, :star]), do: {:repeat, command}
58 | 
59 |   defp sequence_internal([command, :plus]), do: {:times, command}
60 | 
61 |   defp sequence_internal([command]), do: command
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/pegasus/tokens.ex:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.Tokens do
 2 |   @moduledoc false
 3 | 
 4 |   # Collects together parsers for all of the minor tokens for Peg parsers
 5 |   #
 6 |   # ```peg
 7 |   # LEFTARROW       <- '<-' Spacing
 8 |   # SLASH           <- '/' Spacing
 9 |   # AND             <- '&' Spacing
10 |   # NOT             <- '!' Spacing
11 |   # QUERY           <- '?' Spacing
12 |   # STAR            <- '*' Spacing
13 |   # PLUS            <- '+' Spacing
14 |   # OPEN            <- '(' Spacing
15 |   # CLOSE           <- ')' Spacing
16 |   # DOT             <- '.' Spacing
17 |   # BEGIN           <- '<' Spacing
18 |   # END             <- '>' Spacing
19 |   # ```
20 | 
21 |   import NimbleParsec
22 |   alias Pegasus.Components
23 | 
24 |   @definitions %{
25 |     leftarrow: "<-",
26 |     slash: "/",
27 |     and: "&",
28 |     not: "!",
29 |     query: "?",
30 |     star: "*",
31 |     plus: "+",
32 |     open: "(",
33 |     close: ")",
34 |     dot: ".",
35 |     begin: "<",
36 |     ender: ">"
37 |   }
38 | 
39 |   for {name, token} <- @definitions do
40 |     def unquote(name)(previous \\ empty()) do
41 |       previous
42 |       |> ignore(string(unquote(token)))
43 |       |> post_traverse({__MODULE__, :tokenize, [unquote(name)]})
44 |       |> Components.spacing()
45 |     end
46 |   end
47 | 
48 |   def tokenize(rest, args, context, _, _, token) do
49 |     {rest, [token | args], context}
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule Pegasus.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :pegasus,
 7 |       version: "0.2.6",
 8 |       elixir: "~> 1.12",
 9 |       start_permanent: Mix.env() == :prod,
10 |       elixirc_paths: elixirc_paths(Mix.env()),
11 |       description: "peg -> nimbleparsec",
12 |       package: package(),
13 |       docs: docs(),
14 |       deps: deps()
15 |     ]
16 |   end
17 | 
18 |   def application do
19 |     [
20 |       extra_applications: [:logger]
21 |     ]
22 |   end
23 | 
24 |   defp package do
25 |     [
26 |       licenses: ["MIT"],
27 |       links: %{
28 |         github: "https://github.com/ityonemo/pegasus"
29 |       }
30 |     ]
31 |   end
32 | 
33 |   defp docs do
34 |     [
35 |       main: "Pegasus",
36 |       extras: ["README.md"]
37 |     ]
38 |   end
39 | 
40 |   defp elixirc_paths(:test), do: ["lib", "test/_support"]
41 |   defp elixirc_paths(_), do: ["lib"]
42 | 
43 |   defp deps do
44 |     [{:nimble_parsec, "~> 1.2"}, {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}]
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
1 | %{
2 |   "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"},
3 |   "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"},
4 |   "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"},
5 |   "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"},
6 |   "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"},
7 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
8 | }
9 | 


--------------------------------------------------------------------------------
/test/_assets/peg_for_peg.y:
--------------------------------------------------------------------------------
 1 | Grammar         <- Spacing Definition+ EndOfFile
 2 | 
 3 | Definition      <- Identifier LEFTARROW Expression
 4 | Expression      <- Sequence ( SLASH Sequence )*
 5 | Sequence        <- Prefix*
 6 | Prefix          <- AND Action
 7 |                  / ( AND | NOT )? Suffix
 8 | Suffix          <- Primary ( QUERY / STAR / PLUS )?
 9 | Primary         <- Identifier !LEFTARROW
10 |                  / OPEN Expression CLOSE
11 |                  / Literal
12 |                  / Class
13 |                  / DOT
14 |                  / Action
15 |                  / BEGIN
16 |                  / END
17 | 
18 | Identifier      <- < IdentStart IdentCont* > Spacing
19 | IdentStart      <- [a-zA-Z_]
20 | IdentCont       <- IdentStart / [0-9]
21 | Literal         <- ['] < ( !['] Char  )* > ['] Spacing
22 |                  / ["] < ( !["] Char  )* > ["] Spacing
23 | Class           <- '[' < ( !']' Range )* > ']' Spacing
24 | Range           <- Char '-' Char / Char
25 | Char            <- '\\' [abefnrtv'"\[\]\\]
26 |                  / '\\' [0-3][0-7][0-7]
27 |                  / '\\' [0-7][0-7]?
28 |                  / '\\' '-'
29 |                  / !'\\' .
30 | LEFTARROW       <- '<-' Spacing
31 | SLASH           <- '/' Spacing
32 | AND             <- '&' Spacing
33 | NOT             <- '!' Spacing
34 | QUERY           <- '?' Spacing
35 | STAR            <- '*' Spacing
36 | PLUS            <- '+' Spacing
37 | OPEN            <- '(' Spacing
38 | CLOSE           <- ')' Spacing
39 | DOT             <- '.' Spacing
40 | Spacing         <- ( Space / Comment )*
41 | Comment         <- '#' ( !EndOfLine . )* EndOfLine
42 | Space           <- ' ' / '\t' / EndOfLine
43 | EndOfLine       <- '\r\n' / '\n' / '\r'
44 | EndOfFile       <- !.
45 | Action          <- '{' < [^}]* > '}' Spacing
46 | BEGIN           <- '<' Spacing
47 | END             <- '>' Spacing


--------------------------------------------------------------------------------
/test/_support/case.ex:
--------------------------------------------------------------------------------
 1 | defmodule PegasusTest.Case do
 2 |   defmacro assert_parsed(
 3 |              value,
 4 |              args \\ quote do
 5 |                _
 6 |              end
 7 |            ) do
 8 |     quote do
 9 |       assert {:ok, unquote(args), "", %{}, _, _} = unquote(value)
10 |     end
11 |   end
12 | 
13 |   defmacro assert_parser(value, parser) do
14 |     quote bind_quoted: [value: value, parser: parser] do
15 |       assert {:ok, [^parser], "", %{}, _, _} = value
16 |     end
17 |   end
18 | 
19 |   defmacro refute_parsed(value = {_, _, [source]}) do
20 |     quote bind_quoted: [value: value, source: source] do
21 |       case value do
22 |         error when elem(error, 0) == :error ->
23 |           assert {:error, _msg, _rest, _context, _, _} = value
24 | 
25 |         _ ->
26 |           assert {:ok, [], ^source, %{}, _, _} = value
27 |       end
28 |     end
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/test/pegasus_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule PegasusTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   require Pegasus
  5 |   import PegasusTest.Case
  6 | 
  7 |   Pegasus.parser_from_string("char_range <- [a-z]", char_range: [parser: true])
  8 | 
  9 |   describe "char_range works" do
 10 |     test "char_range" do
 11 |       assert_parsed(char_range("a"))
 12 |       refute_parsed(char_range("A"))
 13 |     end
 14 |   end
 15 | 
 16 |   Pegasus.parser_from_string("literal <- 'foo'", literal: [parser: true])
 17 | 
 18 |   describe "literal works" do
 19 |     test "literal" do
 20 |       assert_parsed(literal("foo"))
 21 |       refute_parsed(literal("bar"))
 22 |     end
 23 |   end
 24 | 
 25 |   Pegasus.parser_from_string("sequence <- 'foo' 'bar'", sequence: [parser: true])
 26 | 
 27 |   describe "sequence works" do
 28 |     test "sequence" do
 29 |       assert_parsed(sequence("foobar"))
 30 |       refute_parsed(sequence("foo"))
 31 |     end
 32 |   end
 33 | 
 34 |   Pegasus.parser_from_string("lookahead <- &'f' 'foo'", lookahead: [parser: true])
 35 | 
 36 |   describe "lookahead works" do
 37 |     test "lookahead" do
 38 |       assert_parsed(lookahead("foo"))
 39 |     end
 40 |   end
 41 | 
 42 |   Pegasus.parser_from_string("lookahead_not <- !'aaa' [a-z][a-z][a-z]",
 43 |     lookahead_not: [parser: true]
 44 |   )
 45 | 
 46 |   describe "lookahead_not works" do
 47 |     test "lookahead_not" do
 48 |       assert_parsed(lookahead_not("aab"))
 49 |       refute_parsed(lookahead_not("aaa"))
 50 |     end
 51 |   end
 52 | 
 53 |   Pegasus.parser_from_string("optional <- 'foo' 'bar'?", optional: [parser: true])
 54 | 
 55 |   describe "optional works" do
 56 |     test "optional" do
 57 |       assert_parsed(optional("foo"))
 58 |       assert_parsed(optional("foobar"))
 59 |       refute_parsed(optional("funbar"))
 60 |       assert {:ok, ["foo"], "baz", _, _, _} = optional("foobaz")
 61 |     end
 62 |   end
 63 | 
 64 |   Pegasus.parser_from_string("repeat <- 'foo' 'bar'*", repeat: [parser: true])
 65 | 
 66 |   describe "repeat works" do
 67 |     test "repeat" do
 68 |       assert_parsed(repeat("foo"))
 69 |       assert_parsed(repeat("foobar"))
 70 |       assert_parsed(repeat("foobarbar"))
 71 |       refute_parsed(repeat("funbar"))
 72 |     end
 73 |   end
 74 | 
 75 |   Pegasus.parser_from_string("times <- 'foo' 'bar'+", times: [parser: true])
 76 | 
 77 |   describe "times works" do
 78 |     test "times" do
 79 |       refute_parsed(times("foo"))
 80 |       assert_parsed(times("foobar"))
 81 |       assert_parsed(times("foobarbar"))
 82 |       refute_parsed(times("funbar"))
 83 |     end
 84 |   end
 85 | 
 86 |   Pegasus.parser_from_string(
 87 |     """
 88 |     identifier <- 'foo' IDENTIFIER  # plus a comment, why not
 89 |     IDENTIFIER <- 'bar'
 90 |     """,
 91 |     identifier: [parser: true]
 92 |   )
 93 | 
 94 |   describe "identifiers work" do
 95 |     test "identifier" do
 96 |       assert_parsed(identifier("foobar"))
 97 |       refute_parsed(identifier("foo"))
 98 |       refute_parsed(identifier("bar"))
 99 |     end
100 |   end
101 | 
102 |   Pegasus.parser_from_string("choice <- 'foo' / 'bar'", choice: [parser: true])
103 | 
104 |   describe "choice works" do
105 |     test "choice" do
106 |       assert_parsed(choice("foo"))
107 |       assert_parsed(choice("bar"))
108 |       refute_parsed(choice("baz"))
109 |     end
110 |   end
111 | 
112 |   Pegasus.parser_from_string("dumb_parens <- ('foo' [a-z]) 'bar' ", dumb_parens: [parser: true])
113 | 
114 |   describe "dumb parens work" do
115 |     test "dumb_parens" do
116 |       assert_parsed(dumb_parens("fooabar"))
117 |       refute_parsed(dumb_parens("fooZbar"))
118 |       refute_parsed(dumb_parens("foo"))
119 |       refute_parsed(dumb_parens("fooa"))
120 |       refute_parsed(dumb_parens("bar"))
121 |     end
122 |   end
123 | 
124 |   Pegasus.parser_from_string("times_parens <- ('foo' [a-z])+ 'bar' ",
125 |     times_parens: [parser: true]
126 |   )
127 | 
128 |   describe "smart parens work" do
129 |     test "with times" do
130 |       assert_parsed(times_parens("fooabar"))
131 |       assert_parsed(times_parens("fooafooabar"))
132 |       assert_parsed(times_parens("fooafooafooabar"))
133 |       refute_parsed(times_parens("bar"))
134 |     end
135 |   end
136 | 
137 |   Pegasus.parser_from_string("begin_end <- < 'foo' [a-z] > 'bar' ", begin_end: [parser: true])
138 | 
139 |   describe "begin-end works" do
140 |     test "to group" do
141 |       assert_parsed(begin_end("fooabar"))
142 |       refute_parsed(begin_end("bar"))
143 |     end
144 |   end
145 | 
146 |   Pegasus.parser_from_string("dot <- 'foo' .", dot: [parser: true])
147 | 
148 |   describe "dot works" do
149 |     test "dot" do
150 |       assert_parsed(dot("fooa"))
151 |       refute_parsed(dot("foba"))
152 |       refute_parsed(dot("foo"))
153 |     end
154 |   end
155 | 
156 |   describe "post_traverse settings work" do
157 |     Pegasus.parser_from_string("post_traverse_ungrouped <- 'foo' [a-z]",
158 |       post_traverse_ungrouped: [
159 |         parser: true,
160 |         post_traverse: {:post_traverse_ungrouped, []}
161 |       ]
162 |     )
163 | 
164 |     defp post_traverse_ungrouped("", [?a, "foo"], context, {1, 0}, 4) do
165 |       {"", [], Map.put(context, :parsed, true)}
166 |     end
167 | 
168 |     test "ungrouped content is presented as a list" do
169 |       result = assert_parsed(post_traverse_ungrouped("fooa"))
170 |       assert {:ok, [], "", %{parsed: true}, _, _} = result
171 |     end
172 | 
173 |     Pegasus.parser_from_string("post_traverse_grouped <- ('foo' [a-z])",
174 |       post_traverse_grouped: [
175 |         parser: true,
176 |         post_traverse: {:post_traverse_grouped, [:test]}
177 |       ]
178 |     )
179 | 
180 |     defp post_traverse_grouped("", [?a, "foo"], context, {1, 0}, 4, :test) do
181 |       {"", [], Map.put(context, :parsed, true)}
182 |     end
183 | 
184 |     test "grouped content is merged" do
185 |       result = assert_parsed(post_traverse_grouped("fooa"))
186 |       assert {:ok, [], "", %{parsed: true}, _, _} = result
187 |     end
188 | 
189 |     Pegasus.parser_from_string("post_traverse_extracted <- <'foo' [a-z]> 'bar'",
190 |       post_traverse_extracted: [
191 |         parser: true,
192 |         post_traverse: {:post_traverse_extracted, [:test]}
193 |       ]
194 |     )
195 | 
196 |     defp post_traverse_extracted("", ["fooa"], context, {1, 0}, _, :test) do
197 |       {"", [], Map.put(context, :parsed, true)}
198 |     end
199 | 
200 |     test "extracted content is merged and isolated" do
201 |       result = assert_parsed(post_traverse_extracted("fooabar"))
202 |       assert {:ok, [], "", %{parsed: true}, _, _} = result
203 |     end
204 |   end
205 | 
206 |   describe "tagging" do
207 |     Pegasus.parser_from_string("tagged_true <- 'foo' [a-z]",
208 |       tagged_true: [parser: true, tag: true]
209 |     )
210 | 
211 |     test "set to the parser name when true" do
212 |       result = assert_parsed(tagged_true("fooa"))
213 |       assert {:ok, [tagged_true: ["foo", ?a]], "", %{}, _, _} = result
214 |     end
215 | 
216 |     Pegasus.parser_from_string("tagged_name <- 'foo' [a-z]",
217 |       tagged_name: [parser: true, tag: :name]
218 |     )
219 | 
220 |     test "customizable" do
221 |       result = assert_parsed(tagged_name("fooa"))
222 |       assert {:ok, [name: ["foo", ?a]], "", %{}, _, _} = result
223 |     end
224 |   end
225 | 
226 |   describe "collected" do
227 |     Pegasus.parser_from_string("collecting <- 'foo' [a-z]",
228 |       collecting: [parser: true, collect: true]
229 |     )
230 | 
231 |     test "content is merged and isolated" do
232 |       result = assert_parsed(collecting("fooa"))
233 |       assert {:ok, ["fooa"], "", %{}, _, _} = result
234 |     end
235 |   end
236 | 
237 |   describe "tokening" do
238 |     Pegasus.parser_from_string("token_true <- 'foo' [a-z]",
239 |       token_true: [parser: true, token: true]
240 |     )
241 | 
242 |     test "set to the parser name when true" do
243 |       result = assert_parsed(token_true("fooa"))
244 |       assert {:ok, [:token_true], "", %{}, _, _} = result
245 |     end
246 | 
247 |     Pegasus.parser_from_string("token_name <- 'foo' [a-z]",
248 |       token_name: [parser: true, token: :name]
249 |     )
250 | 
251 |     test "customizable" do
252 |       result = assert_parsed(token_name("fooa"))
253 |       assert {:ok, [:name], "", %{}, _, _} = result
254 |     end
255 |   end
256 | 
257 |   describe "ignore" do
258 |     Pegasus.parser_from_string(
259 |       """
260 |       ignore_outside <- 'foo' ignore_inside
261 |       ignore_inside <- 'bar'
262 |       """,
263 |       ignore_outside: [parser: true],
264 |       ignore_inside: [ignore: true]
265 |     )
266 | 
267 |     test "ignores when you expect when true" do
268 |       result = assert_parsed(ignore_outside("foobar"))
269 |       assert {:ok, ["foo"], "", %{}, _, _} = result
270 |     end
271 |   end
272 | 
273 |   describe "start_position" do
274 |     Pegasus.parser_from_string(
275 |       """
276 |       start_position <- 'foo' needs_position
277 |       needs_position <- 'bar'
278 |       """,
279 |       start_position: [parser: true],
280 |       needs_position: [start_position: true, post_traverse: :post_traverse_needs_position]
281 |     )
282 | 
283 |     defp post_traverse_needs_position("", ["bar", %{line: 1, column: 4}], context, _, _) do
284 |       {"", [], context}
285 |     end
286 | 
287 |     test "can be given a start position" do
288 |       assert_parsed(start_position("foobar"))
289 |     end
290 |   end
291 | 
292 |   describe "alias" do
293 |     Pegasus.parser_from_string(
294 |       """
295 |       aliased <- "not correct"
296 |       """,
297 |       aliased: [parser: true, alias: :substitution]
298 |     )
299 | 
300 |     import NimbleParsec
301 | 
302 |     defcombinatorp(:substitution, string("correct"))
303 | 
304 |     test "aliasing works" do
305 |       assert_parsed(aliased("correct"))
306 |     end
307 |   end
308 | end
309 | 


--------------------------------------------------------------------------------
/test/pegasus_test/class_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule PegasusTest.ClassTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias Pegasus.Class
 5 | 
 6 |   import NimbleParsec
 7 |   import PegasusTest.Case
 8 | 
 9 |   defparsec(:parser, Class.parser())
10 | 
11 |   describe "the class parser" do
12 |     test "produces a single char class" do
13 |       assert_parser(parser("[a]"), {:char, ~C(a)})
14 |     end
15 | 
16 |     test "produces a char range class" do
17 |       assert_parser(parser("[a-z]"), {:char, [?a..?z]})
18 |     end
19 | 
20 |     test "can match multiple chars" do
21 |       assert_parser(parser("[ac]"), {:char, ~C(ac)})
22 |     end
23 | 
24 |     test "can match an escaped chars" do
25 |       assert_parser(parser(~S"[\nc]"), {:char, ~c(\nc)})
26 |       # assert_parser(parser(~S"[\\c]"), {:char, ~C(\c)})
27 |     end
28 | 
29 |     test "can match a char and a range" do
30 |       assert_parser(parser("[ad-z]"), {:char, [?a, ?d..?z]})
31 |     end
32 | 
33 |     test "can negate a char" do
34 |       assert_parser(parser("[^a]"), {:char, not: ?a})
35 |     end
36 | 
37 |     test "can negate a range" do
38 |       assert_parser(parser("[^a-z]"), {:char, not: ?a..?z})
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/test/pegasus_test/components_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule PegasusTest.ComponentsTest do
  2 |   # tests basic components in the PEG grammar
  3 |   use ExUnit.Case, async: true
  4 | 
  5 |   alias Pegasus.Components
  6 | 
  7 |   import NimbleParsec
  8 |   import PegasusTest.Case
  9 | 
 10 |   for component <- ~w(end_of_file end_of_line space comment spacing char range)a do
 11 |     defparsecp(component, apply(Components, component, []))
 12 |   end
 13 | 
 14 |   describe "end of file" do
 15 |     test "parses end of file" do
 16 |       assert_parsed(end_of_file(""))
 17 |     end
 18 | 
 19 |     test "fails if it's not eof" do
 20 |       refute_parsed(end_of_file("a"))
 21 |     end
 22 |   end
 23 | 
 24 |   describe "end of line" do
 25 |     test "parses end of line" do
 26 |       assert_parsed(end_of_line("\n"), ~c'\n')
 27 |       assert_parsed(end_of_line("\r"), ~c'\r')
 28 |       assert_parsed(end_of_line("\n\r"), ["\n\r"])
 29 |     end
 30 | 
 31 |     test "fails if it's not eol" do
 32 |       refute_parsed(end_of_line(""))
 33 |       refute_parsed(end_of_line("a"))
 34 |       refute_parsed(end_of_line("a\n"))
 35 |     end
 36 |   end
 37 | 
 38 |   describe "space" do
 39 |     test "parses spaces" do
 40 |       assert_parsed(space(" "), ~c' ')
 41 |       assert_parsed(space("\t"), ~c'\t')
 42 |       assert_parsed(space("\n"), ~c'\n')
 43 |       assert_parsed(space("\n\r"), ["\n\r"])
 44 |     end
 45 | 
 46 |     test "fails non-spaces" do
 47 |       refute_parsed(space(""))
 48 |       refute_parsed(space("a"))
 49 |       refute_parsed(space("a "))
 50 |       refute_parsed(space("a\t"))
 51 |     end
 52 |   end
 53 | 
 54 |   describe "comment" do
 55 |     test "parses end of line comments" do
 56 |       assert_parsed(comment("# this is a comment\n"))
 57 |       assert_parsed(comment("# this is a # comment\n"))
 58 |       assert_parsed(comment("# windows comments\n\r"))
 59 |     end
 60 | 
 61 |     test "fails comments that are eof'd" do
 62 |       refute_parsed(comment("# this comment fails"))
 63 |     end
 64 | 
 65 |     test "fails non-comments" do
 66 |       refute_parsed(comment("a# comment\n"))
 67 |     end
 68 |   end
 69 | 
 70 |   describe "spacing" do
 71 |     test "parses spaces" do
 72 |       assert_parsed(spacing(" "))
 73 |       assert_parsed(spacing("  "))
 74 |       assert_parsed(spacing(" \t"))
 75 |     end
 76 | 
 77 |     test "parses comments" do
 78 |       assert_parsed(spacing("# comment\n"))
 79 |       assert_parsed(spacing("# comment\n# comment2\n"))
 80 |     end
 81 | 
 82 |     test "parses space then comments" do
 83 |       assert_parsed(spacing(" #comment\n"))
 84 |     end
 85 | 
 86 |     test "parses nothing" do
 87 |       assert_parsed(spacing(""))
 88 |     end
 89 | 
 90 |     test "fails non-space, non-comments" do
 91 |       refute_parsed(spacing("foo"))
 92 |     end
 93 |   end
 94 | 
 95 |   describe "char" do
 96 |     test "parses basic characters" do
 97 |       assert_parsed(char(" "), ~C' ')
 98 |       assert_parsed(char("f"), ~C'f')
 99 |       assert_parsed(char("A"), ~C'A')
100 |     end
101 | 
102 |     test "parses escaped values" do
103 |       assert_parsed(char(~S"\a"), ~c'\a')
104 |       assert_parsed(char(~S"\b"), ~c'\b')
105 |       assert_parsed(char(~S"\n"), ~c'\n')
106 |       assert_parsed(char(~S"\f"), ~c'\f')
107 |       assert_parsed(char(~S"\e"), ~c'\e')
108 |       assert_parsed(char(~S"\r"), ~c'\r')
109 |       assert_parsed(char(~S"\t"), ~c'\t')
110 |       assert_parsed(char(~S"\v"), ~c'\v')
111 | 
112 |       assert_parsed(char(~S(\')), ~C('))
113 |       assert_parsed(char(~S(\")), ~C("))
114 |       assert_parsed(char(~S(\[)), ~C([))
115 |       assert_parsed(char(~S(\])), ~C(]))
116 |       assert_parsed(char(~S(\-)), ~C(-))
117 |       # \\ -> '\'
118 |       assert_parsed(char(<<92, 92>>), [92])
119 |     end
120 | 
121 |     test "parses octal values" do
122 |       assert_parsed(char(~S(\123)), [0o123])
123 |       assert_parsed(char(~S(\77)), [0o77])
124 |       assert_parsed(char(~S(\7)), [0o7])
125 |     end
126 | 
127 |     test "fails when nothing" do
128 |       refute_parsed(char(""))
129 |     end
130 |   end
131 | 
132 |   describe "range" do
133 |     test "correctly produces a range" do
134 |       assert_parsed(range(~S(a-z)), [?a..?z])
135 |     end
136 | 
137 |     test "correctly produces a range with octal escape" do
138 |       assert_parsed(range(~S(\123-Z)), [0o123..?Z])
139 |     end
140 | 
141 |     test "produces a correct range matcher with octal" do
142 |       assert_parsed(range(~S(\141-\172)), [?a..?z])
143 |     end
144 | 
145 |     test "correctly parses a single char" do
146 |       assert_parsed(range(~S(a)), [?a])
147 |     end
148 | 
149 |     test "correctly parses a single escaped char" do
150 |       assert_parsed(range(~S(\123)), [0o123])
151 |       assert_parsed(range(<<92, 92>>, [92]))
152 |     end
153 |   end
154 | end
155 | 


--------------------------------------------------------------------------------
/test/pegasus_test/identifier_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule PegasusTest.IdentifierTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias Pegasus.Identifier
 5 | 
 6 |   import NimbleParsec
 7 |   import PegasusTest.Case
 8 | 
 9 |   defparsec(:parser, Identifier.parser(empty(), true))
10 |   defparsec(:definer, Identifier.parser(empty()))
11 | 
12 |   describe "the identifier parser" do
13 |     test "produces a tagged identifier parser" do
14 |       assert_parser(parser("foo"), {:parser, :foo})
15 |     end
16 | 
17 |     test "fails on a non-identifer" do
18 |       refute_parsed(parser("5oo"))
19 |     end
20 |   end
21 | 
22 |   describe "the indentifier definer" do
23 |     test "produces a tagged identifier identifier" do
24 |       assert_parser(definer("foo"), {:identifier, :foo})
25 |     end
26 | 
27 |     test "fails on a non-identifer" do
28 |       refute_parsed(parser("5oo"))
29 |     end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/test/pegasus_test/literal_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule PegasusTest.LiteralTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias Pegasus.Literal
 5 | 
 6 |   import NimbleParsec
 7 |   import PegasusTest.Case
 8 | 
 9 |   defparsec(:parser, Literal.parser())
10 | 
11 |   describe "the literal parser" do
12 |     test "produces a literal string matcher with double quotes" do
13 |       assert_parser(parser(~S("foo")), {:literal, "foo"})
14 |     end
15 | 
16 |     test "produces a literal string matcher with single quotes" do
17 |       assert_parser(parser(~S('foo')), {:literal, "foo"})
18 |     end
19 | 
20 |     test "produces a literal string matcher with double quotes and escaped quote" do
21 |       assert_parser(parser(~S("\"foo\"")), {:literal, ~S("foo")})
22 |     end
23 | 
24 |     test "produces a literal string matcher with single quotes and escaped quote" do
25 |       assert_parser(parser(~S('\'foo\'')), {:literal, ~S('foo')})
26 |     end
27 | 
28 |     test "produces a literal string matcher with double quotes and escaped return" do
29 |       assert_parser(parser(~S("foo\n")), {:literal, ~s(foo\n)})
30 |     end
31 | 
32 |     test "produces a literal string matcher with double quotes and escaped number" do
33 |       assert_parser(parser(~S("fo\157")), {:literal, ~s(foo)})
34 |     end
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/test/pegasus_test/sequence_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule PegasusTest.SequenceTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias Pegasus.Sequence
 5 | 
 6 |   import NimbleParsec
 7 |   import PegasusTest.Case
 8 | 
 9 |   defparsec(:parser, Sequence.parser())
10 | 
11 |   describe "the sequence parser" do
12 |     test "produces a single normal sequence" do
13 |       assert_parser(parser(~S("foo")), literal: "foo")
14 |     end
15 | 
16 |     test "produces a sequnential normal sequences" do
17 |       assert_parser(parser(~S("foo" 'bar')), literal: "foo", literal: "bar")
18 |     end
19 | 
20 |     test "identifies lookahead" do
21 |       assert_parser(parser(~S(&"foo")), lookahead: {:literal, "foo"})
22 |     end
23 | 
24 |     test "identifies lookahead_not" do
25 |       assert_parser(parser(~S(!"foo")), lookahead_not: {:literal, "foo"})
26 |     end
27 | 
28 |     test "identifies optional" do
29 |       assert_parser(parser(~S("foo"?)), optional: {:literal, "foo"})
30 |     end
31 | 
32 |     test "identifies repeat" do
33 |       assert_parser(parser(~S("foo"*)), repeat: {:literal, "foo"})
34 |     end
35 | 
36 |     test "identifies times" do
37 |       assert_parser(parser(~S("foo"+)), times: {:literal, "foo"})
38 |     end
39 | 
40 |     test "identifies lookahead_not, times" do
41 |       assert_parser(parser(~S(!"foo"+)), lookahead_not: {:times, {:literal, "foo"}})
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/test/regresssion_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule PegasusTest.RegressionTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   require Pegasus
  5 |   import PegasusTest.Case
  6 | 
  7 |   Pegasus.parser_from_string(~S"slash <- [\\t]", slash: [parser: true])
  8 | 
  9 |   describe "slash in range works" do
 10 |     test "slash" do
 11 |       assert_parsed(slash("t"))
 12 |       assert_parsed(slash("\\"))
 13 |       refute_parsed(slash("a"))
 14 |     end
 15 |   end
 16 | 
 17 |   Pegasus.parser_from_string(
 18 |     ~S"""
 19 |     hex <- [0-9a-fA-F]
 20 | 
 21 |     char_escape
 22 |       <- "\\x" hex hex
 23 |        / "\\u{" hex+ "}"
 24 |        / "\\" [nr\\t'"]
 25 |     """,
 26 |     char_escape: [parser: true]
 27 |   )
 28 | 
 29 |   describe "char_escape" do
 30 |     test "works with hex" do
 31 |       assert_parsed(char_escape(~S"\x00"))
 32 |     end
 33 | 
 34 |     test "works with u" do
 35 |       assert_parsed(char_escape(~S"\u{0a0a}"))
 36 |     end
 37 | 
 38 |     test "works with \\n" do
 39 |       assert_parsed(char_escape(~S"\n"))
 40 |     end
 41 | 
 42 |     test "works with \\r" do
 43 |       assert_parsed(char_escape(~S"\r"))
 44 |     end
 45 | 
 46 |     test "works with \\\\" do
 47 |       assert_parsed(char_escape(~S"\\"))
 48 |     end
 49 | 
 50 |     test "works with \\t" do
 51 |       assert_parsed(char_escape(~S"\t"))
 52 |     end
 53 | 
 54 |     test "works with \\'" do
 55 |       assert_parsed(char_escape(~S"\'"))
 56 |     end
 57 | 
 58 |     test "works with \\\"" do
 59 |       assert_parsed(char_escape(~S(\")))
 60 |     end
 61 |   end
 62 | 
 63 |   Pegasus.parser_from_string(
 64 |     ~S"""
 65 |     STRINGLITERALSINGLE <- "\"" string_char* "\""
 66 |     string_char <- [^\\"\n]
 67 |     """,
 68 |     STRINGLITERALSINGLE: [parser: :string_literal]
 69 |   )
 70 | 
 71 |   describe "string literal works" do
 72 |     test "optional, not used" do
 73 |       assert_parsed(string_literal(~S("string_literal")))
 74 |     end
 75 |   end
 76 | 
 77 |   Pegasus.parser_from_string(
 78 |     ~S"""
 79 |     ox80_oxBF <- [\200-\277]
 80 |     oxF4 <- '\364'
 81 |     ox80_ox8F <- [\200-\217]
 82 |     oxF1_oxF3 <- [\361-\363]
 83 |     oxF0 <- '\360'
 84 |     ox90_0xBF <- [\220-\277]
 85 |     oxEE_oxEF <- [\356-\357]
 86 |     oxED <- '\355'
 87 |     ox80_ox9F <- [\200-\237]
 88 |     oxE1_oxEC <- [\341-\354]
 89 |     oxE0 <- '\340'
 90 |     oxA0_oxBF <- [\240-\277]
 91 |     oxC2_oxDF <- [\302-\337]
 92 | 
 93 |     mb_utf8_literal <-
 94 |       oxF4      ox80_ox8F ox80_oxBF ox80_oxBF
 95 |     / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
 96 |     / oxF0      ox90_0xBF ox80_oxBF ox80_oxBF
 97 |     / oxEE_oxEF ox80_oxBF ox80_oxBF
 98 |     / oxED      ox80_ox9F ox80_oxBF
 99 |     / oxE1_oxEC ox80_oxBF ox80_oxBF
100 |     / oxE0      oxA0_oxBF ox80_oxBF
101 |     / oxC2_oxDF ox80_oxBF
102 |     """,
103 |     mb_utf8_literal: [parser: true]
104 |   )
105 | 
106 |   describe "utf-8 descriptor" do
107 |     test "works" do
108 |       assert_parsed(mb_utf8_literal("🚀"))
109 |     end
110 |   end
111 | 
112 |   Pegasus.parser_from_string(
113 |     ~S"""
114 |     byte_range <- [\302-\304]
115 |     """,
116 |     byte_range: [parser: true]
117 |   )
118 | 
119 |   describe "single byte range" do
120 |     test "works" do
121 |       assert_parsed(byte_range(<<0o303>>))
122 |     end
123 |   end
124 | 
125 |   Pegasus.parser_from_string(
126 |     ~S"""
127 |     octal_escape_three_digit  <- '\065'
128 |     octal_escape_two_digit    <- '\65'
129 |     octal_escape_one_digit    <- '\5'
130 |     """,
131 |     octal_escape_three_digit: [parser: true],
132 |     octal_escape_two_digit: [parser: true],
133 |     octal_escape_one_digit: [parser: true]
134 |   )
135 | 
136 |   describe "octal escape" do
137 |     test "works with a leading zero" do
138 |       assert_parsed(octal_escape_three_digit("5"))
139 |     end
140 | 
141 |     test "works with two digit" do
142 |       assert_parsed(octal_escape_two_digit("5"))
143 |     end
144 | 
145 |     test "works with one digit" do
146 |       assert_parsed(octal_escape_one_digit(<<5>>))
147 |     end
148 |   end
149 | end
150 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------