├── test ├── data │ ├── empty.txt │ ├── nonchar.txt │ └── foo.txt ├── test_helper.exs ├── input │ ├── file_input_test.exs │ └── memory_input_test.exs ├── monad │ └── parse_test.exs └── parser_test.exs ├── .gitignore ├── mix.lock ├── bench ├── text_bench.exs ├── binary_bench.exs └── bench.exs ├── .travis.yml ├── docs.exs ├── lib ├── ex_parsec │ ├── monad │ │ └── parse.ex │ ├── token.ex │ ├── position.ex │ ├── input.ex │ ├── error.ex │ ├── input │ │ ├── file_input.ex │ │ └── memory_input.ex │ ├── reply.ex │ ├── binary.ex │ ├── parser.ex │ ├── helpers.ex │ ├── text.ex │ └── base.ex └── ex_parsec.ex ├── LICENSE ├── README.md └── mix.exs /test/data/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/data/nonchar.txt: -------------------------------------------------------------------------------- 1 | ￾ -------------------------------------------------------------------------------- /test/data/foo.txt: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /bench/graphs 3 | /bench/snapshots 4 | /cover 5 | /deps 6 | /docs 7 | *.dump 8 | *.ez 9 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"benchfella": {:git, "git://github.com/alco/benchfella.git", "aea9290e135afadaeda0c6098451741e14280643", []}, 2 | "coverex": {:package, "0.0.7"}, 3 | "dialyze": {:package, "0.1.2"}, 4 | "earmark": {:package, "0.1.10"}, 5 | "ex_doc": {:package, "0.5.2"}, 6 | "monad": {:package, "1.0.3"}} 7 | -------------------------------------------------------------------------------- /bench/text_bench.exs: -------------------------------------------------------------------------------- 1 | Code.require_file(Path.join("bench", "bench.exs")) 2 | 3 | defmodule Bench.ExParsec.Text do 4 | use Bench.ExParsec, mode: Text 5 | 6 | @chars "sdfgjakghvnlkasjlghavsdjlkfhgvaskljmtvmslkdgfdaskl" 7 | 8 | bench_text "many any_char", @chars do 9 | many(any_char()) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /bench/binary_bench.exs: -------------------------------------------------------------------------------- 1 | Code.require_file(Path.join("bench", "bench.exs")) 2 | 3 | defmodule Bench.ExParsec.Binary do 4 | use Bench.ExParsec, mode: Binary 5 | 6 | @bytes <<43, 63, 54, 134, 43, 64, 78, 43, 254, 65, 124, 186, 43, 56>> 7 | 8 | bench_binary "many bits", @bytes do 9 | many(bits(1)) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | otp_release: 3 | - 17.0 4 | - 17.1 5 | before_install: 6 | - git clone git://github.com/elixir-lang/elixir.git 7 | - cd elixir 8 | - git checkout master 9 | - make 10 | - cd .. 11 | before_script: 12 | - export PATH=`pwd`/elixir/bin:$PATH 13 | - mix local.hex --force 14 | script: 15 | - mix make 16 | - mix test 17 | -------------------------------------------------------------------------------- /docs.exs: -------------------------------------------------------------------------------- 1 | Mix.Task.run("make") 2 | ghp = "gh-pages" 3 | {b, 0} = System.cmd("git", ["rev-parse", "--abbrev-ref", "HEAD"]) 4 | {_, 0} = System.cmd("git", ["checkout", ghp]) 5 | paths = Path.wildcard("*", [match_dot: true]) 6 | Enum.each(paths, fn(p) -> if !(p in [".git", "docs"]), do: File.rm_rf!(p) end) 7 | File.cp_r!("docs/.", ".") 8 | File.rm_rf!("docs") 9 | {_, 0} = System.cmd("git", ["add", "--all"]) 10 | {_, 0} = System.cmd("git", ["commit", "--message", "\"Update documentation.\""]) 11 | {_, 0} = System.cmd("git", ["push", "origin", ghp]) 12 | {_, 0} = System.cmd("git", ["checkout", String.strip(b)]) 13 | -------------------------------------------------------------------------------- /lib/ex_parsec/monad/parse.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Monad.Parse do 2 | @moduledoc """ 3 | Provides monadic syntax for writing parsers. 4 | """ 5 | 6 | use Monad 7 | 8 | alias ExParsec.Base 9 | 10 | @doc false 11 | @spec bind(ExParsec.t(state, result1), 12 | ((result1) -> ExParsec.t(state, result2))) :: 13 | ExParsec.t(state, result2) 14 | when [state: var, result1: var, result2: var] 15 | def bind(p, f) do 16 | Base.bind(p, f) 17 | end 18 | 19 | @doc false 20 | @spec return(result) :: ExParsec.t(term(), result) when [result: var] 21 | def return(x) do 22 | Base.return(x) 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /bench/bench.exs: -------------------------------------------------------------------------------- 1 | defmodule Bench.ExParsec do 2 | defmacro __using__(opts) do 3 | quote do 4 | use ExParsec, unquote(opts) 5 | 6 | require Benchfella 7 | 8 | import unquote(__MODULE__) 9 | end 10 | end 11 | 12 | defmacro bench_text(name, value, [do: block]) do 13 | quote do 14 | Benchfella.bench unquote(name) do 15 | ExParsec.parse_value(unquote(value), unquote(block)) 16 | end 17 | end 18 | end 19 | 20 | defmacro bench_binary(name, value, [do: block]) do 21 | quote do 22 | Benchfella.bench unquote(name) do 23 | ExParsec.parse_binary(unquote(value), unquote(block)) 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/ex_parsec/token.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Token do 2 | @moduledoc """ 3 | Represents an input token. 4 | 5 | * `position` is the token's position in the source text. 6 | * `data` is any data associated with the token. 7 | 8 | Tokens are commonly emitted by an initial lexing/tokenization pass and then 9 | consumed by the actual parsing pass. 10 | """ 11 | 12 | alias ExParsec.Position 13 | 14 | defstruct position: nil, 15 | data: nil 16 | 17 | @typedoc """ 18 | The type of an `ExParsec.Token` instance. 19 | """ 20 | @type t(data) :: %__MODULE__{position: Position.t(), 21 | data: data} 22 | 23 | @doc """ 24 | Checks if `value` is an `ExParsec.Token` instance. 25 | """ 26 | @spec token?(term()) :: boolean() 27 | def token?(value) do 28 | match?(%__MODULE__{}, value) 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/ex_parsec/position.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Position do 2 | @moduledoc """ 3 | Represents a position in source code text. 4 | 5 | * `index` is the zero-based index into the file. 6 | * `line` is the one-based line number. 7 | * `column` is the one-based column number. 8 | 9 | Position tracking is done at the granularity of UTF-8 codepoints. 10 | """ 11 | 12 | defstruct index: 0, 13 | line: 1, 14 | column: 1 15 | 16 | @typedoc """ 17 | The type of an `ExParsec.Position` instance. 18 | """ 19 | @type t() :: %__MODULE__{index: non_neg_integer(), 20 | line: pos_integer(), 21 | column: pos_integer()} 22 | 23 | @doc """ 24 | Checks if `value` is an `ExParsec.Position` instance. 25 | """ 26 | @spec position?(term()) :: boolean() 27 | def position?(value) do 28 | match?(%__MODULE__{}, value) 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/ex_parsec/input.ex: -------------------------------------------------------------------------------- 1 | defprotocol ExParsec.Input do 2 | @moduledoc """ 3 | Represents a source of input data for parser functions. 4 | 5 | This protocol should be implemented for types that can be used to feed data 6 | (codepoints, tokens, etc) to parser functions. Note that an implementation 7 | must only emit one kind of input data, not several. 8 | 9 | This is a relatively low-level interface that doesn't even provide position 10 | tracking. In general, beyond implementing this protocol, you should not be 11 | using it. 12 | """ 13 | 14 | @doc """ 15 | Fetches data from the input. If no more data is available, `:eof` is 16 | returned. If invalid input data is encountered, a tuple containing `:error` 17 | and a reason is returned. Otherwise, returns a tuple containing the 18 | advanced input and the fetched data. 19 | 20 | `opts` can be used for implementation-specific options. 21 | """ 22 | @spec get(t()) :: {t(), term()} | {:error, term()} | :eof 23 | def get(input, opts \\ []) 24 | end 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Alex Rønne Petersen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/input/file_input_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Test.ExParsec.Input.FileInput do 2 | use ExUnit.Case, async: true 3 | 4 | alias ExParsec.Input.FileInput 5 | alias ExParsec.Input 6 | 7 | @data_dir Path.join("test", "data") 8 | @foo_txt Path.join(@data_dir, "foo.txt") 9 | @nonchar_txt Path.join(@data_dir, "nonchar.txt") 10 | @empty_txt Path.join(@data_dir, "empty.txt") 11 | 12 | test "basic get" do 13 | File.open!(@foo_txt, [:read, :utf8], fn(dev) -> 14 | input = %FileInput{device: dev} 15 | 16 | assert {input, "f"} = Input.get(input) 17 | assert {input, "o"} = Input.get(input) 18 | assert {input, "o"} = Input.get(input) 19 | assert {input, "\n"} = Input.get(input) 20 | assert :eof = Input.get(input) 21 | end) 22 | end 23 | 24 | test "get noncharacter" do 25 | File.open!(@nonchar_txt, [:read, :utf8], fn(dev) -> 26 | input = %FileInput{device: dev} 27 | 28 | assert {:error, :noncharacter} = Input.get(input) 29 | end) 30 | end 31 | 32 | test "empty get" do 33 | File.open!(@empty_txt, [:read, :utf8], fn(dev) -> 34 | input = %FileInput{device: dev} 35 | 36 | assert :eof = Input.get(input) 37 | end) 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/ex_parsec/error.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Error do 2 | @moduledoc """ 3 | Represents a parse error encountered when executing a parser function. 4 | 5 | * `message` is the error message. 6 | * `kind` is the error kind. `nil` if the error doesn't fit into the list 7 | of standard error kinds. 8 | * `position` is the position in the input data where the error occurred. 9 | Can be `nil` if the input doesn't support position tracking. 10 | """ 11 | 12 | alias ExParsec.Position 13 | 14 | defstruct message: nil, 15 | kind: nil, 16 | position: nil 17 | 18 | @typedoc """ 19 | The type of an `ExParsec.Error` instance. 20 | """ 21 | @type t() :: %__MODULE__{message: String.t(), 22 | kind: kind(), 23 | position: Position.t() | nil} 24 | 25 | @typedoc """ 26 | The various error kinds. 27 | """ 28 | @type kind() :: nil | 29 | :eof | 30 | :io | 31 | :expected | 32 | :expected_eof | 33 | :expected_char | 34 | :expected_string 35 | 36 | @doc """ 37 | Checks if `value` is an `ExParsec.Error` instance. 38 | """ 39 | @spec error?(term()) :: boolean() 40 | def error?(value) do 41 | match?(%__MODULE__{}, value) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /test/monad/parse_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Test.ExParsec.Monad.Parse do 2 | use ExUnit.Case, async: true 3 | 4 | import ExParsec.Text 5 | 6 | require ExParsec.Monad.Parse 7 | 8 | alias ExParsec.Input.MemoryInput 9 | alias ExParsec.Monad.Parse 10 | alias ExParsec.Parser 11 | alias ExParsec.Reply 12 | 13 | test "successful binding" do 14 | value = "x" 15 | input = %MemoryInput{value: value} 16 | parser = %Parser{input: input} 17 | 18 | p = Parse.m do 19 | x <- any_char() 20 | return x 21 | end 22 | 23 | assert %Reply{status: :ok} = p.(parser) 24 | end 25 | 26 | test "successful let binding" do 27 | value = "" 28 | input = %MemoryInput{value: value} 29 | parser = %Parser{input: input} 30 | 31 | p = Parse.m do 32 | let x = "foo" 33 | let do 34 | y = "bar" 35 | z = "baz" 36 | end 37 | return x <> y <> z 38 | end 39 | 40 | assert %Reply{status: :ok} = p.(parser) 41 | end 42 | 43 | test "unsuccessful binding" do 44 | value = "xz" 45 | input = %MemoryInput{value: value} 46 | parser = %Parser{input: input} 47 | 48 | p = Parse.m do 49 | x <- char("x") 50 | y <- char("y") 51 | return x <> y 52 | end 53 | 54 | assert %Reply{status: :error} = p.(parser) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /lib/ex_parsec/input/file_input.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Input.FileInput do 2 | @moduledoc """ 3 | Provides data from an I/O device in UTF-8 mode. 4 | 5 | * `device` is the I/O device. 6 | * `position` is the current position in the device. 7 | """ 8 | 9 | defstruct device: nil, 10 | position: 0 11 | 12 | @typedoc """ 13 | The type of an `ExParsec.Input.FileInput` instance. 14 | """ 15 | @type t() :: %__MODULE__{device: File.io_device(), 16 | position: non_neg_integer()} 17 | 18 | @doc """ 19 | Checks if `value` is an `ExParsec.Input.FileInput` instance. 20 | """ 21 | @spec file_input?(term()) :: boolean() 22 | def file_input?(value) do 23 | match?(%__MODULE__{}, value) 24 | end 25 | end 26 | 27 | defimpl ExParsec.Input, for: ExParsec.Input.FileInput do 28 | alias ExParsec.Input.FileInput 29 | 30 | @spec get(FileInput.t(), Keyword.t()) :: {FileInput.t(), String.codepoint()} | 31 | {:error, term()} | :eof 32 | def get(input, _) do 33 | {:ok, pos} = :file.position(input.device, input.position) 34 | 35 | case IO.read(input.device, 1) do 36 | {:error, r} -> {:error, r} 37 | :eof -> :eof 38 | cp -> 39 | if String.valid_character?(cp) do 40 | {%FileInput{input | :position => pos + byte_size(cp)}, cp} 41 | else 42 | {:error, :noncharacter} 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /lib/ex_parsec/reply.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Reply do 2 | @moduledoc """ 3 | Represents the result of executing a parser function. 4 | 5 | * `parser` is the advanced `ExParsec.Parser` instance. If `status` is not 6 | `:ok`, this field must be `nil`. 7 | * `status` is either `:ok` for a successful parse, `:error` for an 8 | unsuccessful parse, or `:fatal` for an unrecoverable parse error. 9 | * `errors` is the list of `ExParsec.Error` instances representing any 10 | errors encountered when running the parser function. This list can 11 | contain duplicate entries. 12 | * `result` is whatever result data the parser function returned. If 13 | `status` is not `:ok`, this field must be `nil`. 14 | 15 | If a parser function returns a `status` value of `:fatal`, the calling 16 | function must propagate this value further up the call stack such that the 17 | entire parse operation fails. 18 | """ 19 | 20 | alias ExParsec.Error 21 | alias ExParsec.Parser 22 | 23 | defstruct parser: nil, 24 | status: :ok, 25 | errors: [], 26 | result: nil 27 | 28 | @typedoc """ 29 | The type of an `ExParsec.Reply` instance. 30 | """ 31 | @type t(state, result) :: %__MODULE__{parser: Parser.t(state) | nil, 32 | status: :ok | :error | :fatal, 33 | errors: [Error.t()], 34 | result: result} 35 | 36 | @doc """ 37 | Checks if `value` is an `ExParsec.Reply` instance. 38 | """ 39 | @spec reply?(term()) :: boolean() 40 | def reply?(value) do 41 | match?(%__MODULE__{}, value) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ExParsec 2 | 3 | [![Build Status](https://travis-ci.org/alexrp/ex_parsec.png?branch=master)](https://travis-ci.org/alexrp/ex_parsec) 4 | [![Hex Version](https://img.shields.io/hexpm/v/ex_parsec.svg)](https://hex.pm/packages/ex_parsec) 5 | [![Hex Downloads](https://img.shields.io/hexpm/dt/ex_parsec.svg)](https://hex.pm/packages/ex_parsec) 6 | 7 | A parser combinator library inspired by Parsec. 8 | 9 | ## Usage 10 | 11 | Add ExParsec as a dependency in your `mix.exs` file: 12 | 13 | ```elixir 14 | def deps do 15 | [ {:ex_parsec, "~> x.y.z"} ] 16 | end 17 | ``` 18 | 19 | Replace `x.y.z` with whatever released version you would like to depend on. 20 | 21 | After you are done, run `mix deps.get` in your shell to fetch and compile 22 | ExParsec. Start an interactive Elixir shell with `iex -S mix`. 23 | 24 | ```iex 25 | iex> import ExParsec.Base; import ExParsec.Text 26 | nil 27 | iex> ExParsec.parse_value "foo", many(any_char()) 28 | {:ok, nil, ["f", "o", "o"]} 29 | iex> ExParsec.parse_value "[x]", between(char("["), char("x"), char("]")) 30 | {:ok, nil, "x"} 31 | iex> ExParsec.parse_value " spa ces ", 32 | sequence([skip(spaces), 33 | times(any_char(), 3), 34 | skip(space), 35 | times(any_char(), 3), 36 | skip(spaces), 37 | eof]) 38 | {:ok, nil, [nil, ["s", "p", "a"], nil, ["c", "e", "s"], nil, nil]} 39 | ``` 40 | 41 | ## Features 42 | 43 | * Can parse context-sensitive grammars. 44 | * High-quality, customizable error messages. 45 | * Full UTF-8 string support. 46 | * Non-text input such as binary data and tokens. 47 | * Support for theoretically infinitely large files. 48 | * Monadic parse blocks based on Elixir macros. 49 | * Simple, extensible API surface. 50 | 51 | ## Examples 52 | -------------------------------------------------------------------------------- /test/input/memory_input_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Test.ExParsec.Input.MemoryInput do 2 | use ExUnit.Case, async: true 3 | 4 | alias ExParsec.Input.MemoryInput 5 | alias ExParsec.Input 6 | 7 | test "basic get" do 8 | value = "foo" 9 | input = %MemoryInput{value: value} 10 | 11 | assert {input, "f"} = Input.get(input) 12 | assert {input, "o"} = Input.get(input) 13 | assert {input, "o"} = Input.get(input) 14 | assert :eof = Input.get(input) 15 | end 16 | 17 | test "get noncharacter" do 18 | value = "\x{0fffe}" 19 | input = %MemoryInput{value: value} 20 | 21 | assert {:error, :noncharacter} = Input.get(input) 22 | end 23 | 24 | test "empty get" do 25 | value = "" 26 | input = %MemoryInput{value: value} 27 | 28 | assert :eof = Input.get(input) 29 | end 30 | 31 | test "bitstring get" do 32 | value = <<3>> 33 | input = %MemoryInput{value: value, is_string: false} 34 | 35 | assert {input, <<0 :: size(6)>>} = Input.get(input, [n: 6]) 36 | assert {input, <<1 :: size(1)>>} = Input.get(input) 37 | assert {input, <<1 :: size(1)>>} = Input.get(input) 38 | assert :eof = Input.get(input) 39 | end 40 | 41 | test "term get" do 42 | value = [:a, :b, :c] 43 | input = %MemoryInput{value: value, is_string: false} 44 | 45 | assert {input, :a} = Input.get(input) 46 | assert {input, :b} = Input.get(input) 47 | assert {input, :c} = Input.get(input) 48 | assert :eof = Input.get(input) 49 | end 50 | 51 | test "codepoints get" do 52 | value = ["a", "b", "c"] 53 | input = %MemoryInput{value: value, is_string: false} 54 | 55 | assert {input, "a"} = Input.get(input) 56 | assert {input, "b"} = Input.get(input) 57 | assert {input, "c"} = Input.get(input) 58 | assert :eof = Input.get(input) 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Mixfile do 2 | use Mix.Project 3 | 4 | def project() do 5 | [name: "ExParsec", 6 | description: "A parser combinator library inspired by Parsec.", 7 | app: :ex_parsec, 8 | version: "0.2.1", 9 | elixir: "~> 0.15.1", 10 | source_url: "https://github.com/alexrp/ex_parsec", 11 | homepage_url: "https://hex.pm/packages/ex_parsec", 12 | deps: deps(), 13 | docs: docs(), 14 | package: package(), 15 | aliases: aliases(), 16 | test_coverage: coverage()] 17 | end 18 | 19 | def application() do 20 | [applications: [:monad]] 21 | end 22 | 23 | defp deps() do 24 | [{:benchfella, github: "alco/benchfella", only: [:dev]}, 25 | {:coverex, "~> 0.0.7", only: [:test]}, 26 | {:dialyze, "~> 0.1.2", only: [:dev]}, 27 | {:earmark, "~> 0.1.10", only: [:dev]}, 28 | {:ex_doc, "~> 0.5.2", only: [:dev]}, 29 | {:monad, "~> 1.0.3"}] 30 | end 31 | 32 | defp docs() do 33 | {ref, x} = System.cmd("git", ["rev-parse", "--verify", "--quiet", "HEAD"]) 34 | 35 | if x != 0, do: ref = "master" 36 | 37 | [main: "README", 38 | readme: true, 39 | source_ref: ref] 40 | end 41 | 42 | defp package() do 43 | [contributors: ["Alex Rønne Petersen"], 44 | licenses: ["MIT"], 45 | links: %{"GitHub" => "https://github.com/alexrp/ex_parsec", 46 | "Documentation" => "http://alexrp.com/ex_parsec"}] 47 | end 48 | 49 | defp aliases() do 50 | [make: ["deps.get", "deps.compile", "docs"], 51 | test: ["test --trace --cover"], 52 | wipe: ["clean", &wipe/1]] 53 | end 54 | 55 | defp coverage() do 56 | [tool: Coverex.Task] 57 | end 58 | 59 | defp wipe(_) do 60 | File.rm_rf!("_build") 61 | File.rm_rf!(Path.join("bench", "graphs")) 62 | File.rm_rf!(Path.join("bench", "snapshots")) 63 | File.rm_rf!("cover") 64 | File.rm_rf!("docs") 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/ex_parsec/input/memory_input.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Input.MemoryInput do 2 | @moduledoc """ 3 | Provides data from an in-memory UTF-8 string, a list of tokens, a list of 4 | any arbitrary term type, or from a bitstring. 5 | 6 | * `value` is the binary containing the encoded string, list of terms, or 7 | bitstring. 8 | * `is_string` indicates whether `value` is a string or something else. This 9 | is needed to disambiguate since strings and bitstrings are the same type. 10 | This should be `false` if `value` is not a bitstring. 11 | """ 12 | 13 | defstruct value: nil, 14 | is_string: true 15 | 16 | @typedoc """ 17 | The type of an `ExParsec.Input.MemoryInput` instance. 18 | """ 19 | @type t() :: %__MODULE__{value: bitstring() | [term()], 20 | is_string: boolean()} 21 | 22 | @doc """ 23 | Checks if `value` is an `ExParsec.Input.MemoryInput` instance. 24 | """ 25 | @spec memory_input?(term()) :: boolean() 26 | def memory_input?(value) do 27 | match?(%__MODULE__{}, value) 28 | end 29 | end 30 | 31 | defimpl ExParsec.Input, for: ExParsec.Input.MemoryInput do 32 | alias ExParsec.Input.MemoryInput 33 | 34 | @spec get(MemoryInput.t(), Keyword.t()) :: {MemoryInput.t(), term()} | 35 | {:error, term()} | :eof 36 | def get(input, opts) do 37 | cond do 38 | is_list(input.value) -> 39 | case input.value do 40 | [h | t] -> {%MemoryInput{input | :value => t}, h} 41 | [] -> :eof 42 | end 43 | input.is_string -> 44 | case String.next_codepoint(input.value) do 45 | {cp, r} -> 46 | if String.valid_character?(cp) do 47 | {%MemoryInput{input | :value => r}, cp} 48 | else 49 | {:error, :noncharacter} 50 | end 51 | nil -> :eof 52 | end 53 | true -> 54 | n = opts[:n] || 1 55 | 56 | case input.value do 57 | <> -> 58 | {%MemoryInput{input | :value => r}, b} 59 | _ -> :eof 60 | end 61 | end 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /lib/ex_parsec/binary.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Binary do 2 | @moduledoc """ 3 | Provides common parsers that operate on binary data. 4 | 5 | These parsers all require that the input is bitstring data. They also 6 | assume that the `ExParsec.Input.get/2` implementation supports an `:n` 7 | option specifying how many bits to fetch. 8 | """ 9 | 10 | import ExParsec.Base 11 | import ExParsec.Helpers 12 | 13 | alias ExParsec.Parser 14 | 15 | @doc """ 16 | Parses `n` bits of data. 17 | """ 18 | @spec bits(non_neg_integer()) :: ExParsec.t(term(), bitstring()) 19 | defparser bits(n) in p do 20 | case Parser.get(p, [n: n]) do 21 | {:error, r} -> failure([error(p, :io, "encountered I/O error: #{inspect(r)}")]) 22 | :eof -> failure([error(p, :eof, "expected #{n} bits but encountered end of file")]) 23 | {p, bits} -> success(p, bits) 24 | end 25 | end 26 | 27 | @doc """ 28 | Parses `n` bytes of data. 29 | """ 30 | @spec bytes(non_neg_integer()) :: ExParsec.t(term(), binary()) 31 | defparser bytes(n) in p do 32 | case Parser.get(p, [n: n * 8]) do 33 | {:error, r} -> failure([error(p, :io, "encountered I/O error: #{inspect(r)}")]) 34 | :eof -> failure([error(p, :eof, "expected #{n} bytes but encountered end of file")]) 35 | {p, bytes} -> success(p, bytes) 36 | end 37 | end 38 | 39 | @doc """ 40 | Parses an unsigned `n`-bit integer encoded with the given `endianness`. 41 | """ 42 | @spec uint(pos_integer(), :be | :le) :: 43 | ExParsec.t(term(), non_neg_integer()) 44 | defparser uint(n, endianness) in p do 45 | map(bits(n), fn(bin) -> 46 | case endianness do 47 | :be -> <> = bin 48 | :le -> <> = bin 49 | end 50 | 51 | b 52 | end).(p) 53 | end 54 | 55 | @doc """ 56 | Parses a signed `n`-bit integer encoded with the given `endianness`. 57 | """ 58 | @spec sint(pos_integer(), :be | :le) :: 59 | ExParsec.t(term(), integer()) 60 | defparser sint(n, endianness) in p do 61 | map(bits(n), fn(bin) -> 62 | case endianness do 63 | :be -> <> = bin 64 | :le -> <> = bin 65 | end 66 | 67 | b 68 | end).(p) 69 | end 70 | 71 | @doc """ 72 | Parses an `n`-bit floating point value. 73 | """ 74 | @spec float(32 | 64) :: ExParsec.t(term(), float()) 75 | defparser float(n) in p do 76 | map(bits(n), fn(<>) -> b end).(p) 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /lib/ex_parsec/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Parser do 2 | @moduledoc """ 3 | Represents the state of an executing parse session. 4 | 5 | * `input` is the input data. 6 | * `position` is the current position in the input data. This is `nil` if 7 | the input data does not support text position tracking. 8 | * `state` is the current user state. 9 | """ 10 | 11 | alias ExParsec.Input 12 | alias ExParsec.Position 13 | alias ExParsec.Token 14 | 15 | defstruct input: nil, 16 | position: nil, 17 | state: nil 18 | 19 | @typedoc """ 20 | The type of an `ExParsec.Parser` instance. 21 | """ 22 | @type t(state) :: %__MODULE__{input: Input.t(), 23 | position: Position.t() | nil, 24 | state: state} 25 | 26 | @doc """ 27 | Checks if `value` is an `ExParsec.Parser` instance. 28 | """ 29 | @spec parser?(term()) :: boolean() 30 | def parser?(value) do 31 | match?(%__MODULE__{}, value) 32 | end 33 | 34 | @doc """ 35 | Fetches data from the input. If no more data is available, `:eof` is 36 | returned. If invalid input data is encountered, a tuple containing `:error` 37 | and a reason is returned. Otherwise, returns a tuple containing the 38 | advanced parser and the fetched data. 39 | 40 | `opts` is passed through to `ExParsec.Input.get/2`. 41 | 42 | This function is a wrapper on top of `ExParsec.Input.get/2`, adding 43 | position tracking (codepoint index and line/column numbers) for input data 44 | that supports it. Position information can be found on the `position` field 45 | of `ExParsec.Parser`. 46 | """ 47 | @spec get(t(state), Keyword.t()) :: {t(state), String.codepoint() | Token.t()} | 48 | {:error, term()} | :eof when [state: var] 49 | def get(parser, opts \\ []) do 50 | case Input.get(parser.input, opts) do 51 | e = {:error, _} -> e 52 | :eof -> :eof 53 | {inp, data} -> 54 | pos = cond do 55 | is_binary(data) -> 56 | pos = parser.position || %Position{} 57 | pos = %Position{pos | :index => pos.index + 1} 58 | 59 | if data == "\n" do 60 | %Position{pos | :line => pos.line + 1, :column => 1} 61 | else 62 | %Position{pos | :column => pos.column + 1} 63 | end 64 | Token.token?(data) -> data.position 65 | true -> nil 66 | end 67 | 68 | {%__MODULE__{input: inp, position: pos, state: parser.state}, data} 69 | end 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /test/parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Test.ExParsec.Parser do 2 | use ExUnit.Case, async: true 3 | 4 | alias ExParsec.Input.MemoryInput 5 | alias ExParsec.Parser 6 | alias ExParsec.Position 7 | alias ExParsec.Token 8 | 9 | test "basic get" do 10 | value = "foo" 11 | input = %MemoryInput{value: value} 12 | parser = %Parser{input: input} 13 | 14 | assert {parser, "f"} = Parser.get(parser) 15 | assert {parser, "o"} = Parser.get(parser) 16 | assert {parser, "o"} = Parser.get(parser) 17 | assert :eof = Parser.get(parser) 18 | end 19 | 20 | test "get noncharacter" do 21 | value = "\x{0fffe}" 22 | input = %MemoryInput{value: value} 23 | parser = %Parser{input: input} 24 | 25 | assert {:error, :noncharacter} = Parser.get(parser) 26 | end 27 | 28 | test "empty get" do 29 | value = "" 30 | input = %MemoryInput{value: value} 31 | parser = %Parser{input: input} 32 | 33 | assert :eof = Parser.get(parser) 34 | end 35 | 36 | test "text position tracking" do 37 | value = "fo\no" 38 | input = %MemoryInput{value: value} 39 | parser = %Parser{input: input, position: %Position{}} 40 | 41 | assert %Position{index: 0, line: 1, column: 1} = parser.position 42 | assert {parser = %Parser{position: %Position{index: 1, line: 1, column: 2}}, "f"} = Parser.get(parser) 43 | assert {parser = %Parser{position: %Position{index: 2, line: 1, column: 3}}, "o"} = Parser.get(parser) 44 | assert {parser = %Parser{position: %Position{index: 3, line: 2, column: 1}}, "\n"} = Parser.get(parser) 45 | assert {%Parser{position: %Position{index: 4, line: 2, column: 2}}, "o"} = Parser.get(parser) 46 | end 47 | 48 | test "token position tracking" do 49 | value = [%Token{position: %Position{index: 3, line: 5, column: 32}}, 50 | %Token{position: %Position{index: 47, line: 9, column: 6}}] 51 | input = %MemoryInput{value: value} 52 | parser = %Parser{input: input} 53 | 54 | assert {parser = %Parser{position: %Position{index: 3, line: 5, column: 32}}, _} = Parser.get(parser) 55 | assert {%Parser{position: %Position{index: 47, line: 9, column: 6}}, _} = Parser.get(parser) 56 | end 57 | 58 | test "no position tracking" do 59 | value = [:a, :b, :c] 60 | input = %MemoryInput{value: value} 61 | parser = %Parser{input: input} 62 | 63 | assert {parser = %Parser{position: nil}, :a} = Parser.get(parser) 64 | assert {parser = %Parser{position: nil}, :b} = Parser.get(parser) 65 | assert {%Parser{position: nil}, :c} = Parser.get(parser) 66 | end 67 | 68 | test "option passing" do 69 | value = <<3, 2, 1, 0>> 70 | input = %MemoryInput{value: value} 71 | parser = %Parser{input: input} 72 | 73 | assert {_, <<3>>} = Parser.get(parser, [n: 8]) 74 | end 75 | 76 | test "state propagation" do 77 | value = "x" 78 | input = %MemoryInput{value: value} 79 | state = :foo 80 | parser = %Parser{input: input, state: state} 81 | 82 | assert {%Parser{state: :foo}, _} = Parser.get(parser) 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/ex_parsec/helpers.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Helpers do 2 | @moduledoc """ 3 | Provides utility functions and macros for writing parser functions. 4 | """ 5 | 6 | require ExParsec.Monad.Parse 7 | 8 | alias ExParsec.Error 9 | alias ExParsec.Monad.Parse 10 | alias ExParsec.Parser 11 | alias ExParsec.Reply 12 | 13 | @doc """ 14 | Defines a parser function. This is a convenience macro that eliminates some 15 | very common syntax noise. 16 | 17 | Example: 18 | 19 | defparser return(value) in p do 20 | success(p, value) 21 | end 22 | 23 | The above is equivalent to: 24 | 25 | def return(value) do 26 | fn(p) -> 27 | success(p, value) 28 | end 29 | end 30 | """ 31 | defmacro defparser(sig, [do: block]) do 32 | [call, parg] = elem(sig, 2) 33 | 34 | quote [location: :keep] do 35 | def unquote(call) do 36 | fn(unquote(parg)) -> 37 | unquote(block) 38 | end 39 | end 40 | end 41 | end 42 | 43 | @doc """ 44 | Defines a monadic parser function. 45 | 46 | A monadic parser function's body is defined through the 47 | `ExParsec.Monad.Parse.m/1` macro. Inside such a function, the `<-` 48 | operator can be used to bind names to parser invocations. Values 49 | can be returned by using the special `return` macro. 50 | 51 | For example, consider this parser to parse a field declaration in a 52 | programming language: 53 | 54 | use ExParsec 55 | 56 | defparser field() in p do 57 | bind(type(), fn(ty) -> 58 | sequence([skip_many(space), 59 | char(":"), 60 | skip_many(space), 61 | bind(identifier(), fn(id) -> 62 | return(%Field{type: ty, name: id}) 63 | end)]) 64 | end) 65 | end 66 | 67 | This is quite unwieldy. Modifying this at a later point would be a rather 68 | complicated matter. 69 | 70 | We can write this in a much more readable and maintainable way with monadic 71 | syntax: 72 | 73 | use ExParsec 74 | 75 | defmparser field() do 76 | ty <- type() 77 | skip_many(space) 78 | char(":") 79 | skip_many(space) 80 | id <- identifier() 81 | return %Field{type: ty, name: id} 82 | end 83 | 84 | It's now very clear what the parser is doing. There is virtually no syntax 85 | noise. Modifying this parser later down the road would also be much easier. 86 | """ 87 | defmacro defmparser(sig, [do: block]) do 88 | quote [location: :keep] do 89 | require ExParsec.Monad.Parse 90 | 91 | def unquote(sig) do 92 | Parse.m do 93 | unquote(block) 94 | end 95 | end 96 | end 97 | end 98 | 99 | @doc """ 100 | Constructs a successful `ExParsec.Reply` with `result` as the result value. 101 | `errors` can optionally be used to propagate error messages, if any. 102 | """ 103 | @spec success(Parser.t(state), result, [Error.t()]) :: 104 | Reply.t(state, result) when [state: var, result: var] 105 | def success(parser, result, errors \\ []) do 106 | %Reply{parser: parser, 107 | errors: errors, 108 | result: result} 109 | end 110 | 111 | @doc """ 112 | Constructs an unsuccessful `ExParsec.Reply` with `status` (either `:error` 113 | or `:fatal`) as the error kind and `errors` as the list of errors. 114 | """ 115 | @spec failure(:error | :fatal, [Error.t()]) :: Reply.t(term(), nil) 116 | def failure(status \\ :error, errors) do 117 | %Reply{status: status, 118 | errors: errors} 119 | end 120 | 121 | @doc """ 122 | Constructs an `ExParsec.Error` with the given `message` and the current 123 | position from `parser`. 124 | """ 125 | @spec error(Parser.t(term()), Error.kind(), String.t()) :: Error.t() 126 | def error(parser, kind \\ nil, message) do 127 | %Error{message: message, 128 | kind: kind, 129 | position: parser.position} 130 | end 131 | end 132 | -------------------------------------------------------------------------------- /lib/ex_parsec.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec do 2 | @moduledoc """ 3 | A parser combinator library inspired by Parsec. 4 | 5 | This module provides convenient entry point functions for running parsers. 6 | 7 | This module can also be `use`d. Doing so `import`s the following modules: 8 | 9 | * `ExParsec` 10 | * `ExParsec.Base` 11 | * `ExParsec.Helpers` 12 | 13 | It will also `require` the following modules: 14 | 15 | * `ExParsec.Monad.Parse` 16 | 17 | It will also `alias` the following modules: 18 | 19 | * `ExParsec.Error` 20 | * `ExParsec.Input` 21 | * `ExParsec.Monad.Parse` 22 | * `ExParsec.Parser` 23 | * `ExParsec.Position` 24 | * `ExParsec.Reply` 25 | 26 | A `:mode` option can be given, indicating which module containing common 27 | parsers should be `import`ed. Currently, `Text` and `Binary` are valid 28 | values for this option. If the option isn't given, `Text` is the default. 29 | `nil` may be given to not `import` any module. The mode must given as a 30 | compile-time literal. 31 | 32 | Examples: 33 | 34 | defmodule TextParsing do 35 | use ExParsec 36 | 37 | # ... 38 | end 39 | 40 | defmodule BinaryParsing do 41 | use ExParsec, mode: Binary 42 | 43 | # ... 44 | end 45 | 46 | defmodule TokenParsing do 47 | use ExParsec, mode: nil 48 | 49 | # ... 50 | end 51 | """ 52 | 53 | alias ExParsec.Error 54 | alias ExParsec.Input.FileInput 55 | alias ExParsec.Input.MemoryInput 56 | alias ExParsec.Input 57 | alias ExParsec.Parser 58 | alias ExParsec.Position 59 | alias ExParsec.Reply 60 | 61 | @doc false 62 | defmacro __using__(opts) do 63 | mod = Module.concat(ExParsec, Macro.expand(opts[:mode], __ENV__) || Text) 64 | 65 | quote do 66 | import ExParsec 67 | import ExParsec.Base 68 | import ExParsec.Helpers 69 | 70 | import unquote(mod) 71 | 72 | require ExParsec.Monad.Parse 73 | 74 | alias ExParsec.Error 75 | alias ExParsec.Input 76 | alias ExParsec.Monad.Parse 77 | alias ExParsec.Parser 78 | alias ExParsec.Position 79 | alias ExParsec.Reply 80 | end 81 | end 82 | 83 | @typedoc """ 84 | The type of a parser function. 85 | 86 | A parser function receives the `ExParsec.Parser` instance as its first and 87 | only argument. It is expected to return an `ExParsec.Reply` instance that 88 | describes the result of applying the function. 89 | """ 90 | @type t(state, result) :: ((Parser.t(state)) -> {Reply.t(state, result)}) 91 | 92 | @doc """ 93 | Parses the given `input` by applying the parser `function` to it. `state` 94 | can optionally be given to parse with user state. `position` defines the 95 | initial input position. By default, it's set to a default-initialized 96 | `ExParsec.Position` instance. It should be `nil` for non-text, non-token 97 | inputs. 98 | 99 | Returns either: 100 | 101 | * A tuple containing `:ok`, the final user state, and the result. 102 | * A tuple containing `:error` and a list of `ExParsec.Error` instances. 103 | """ 104 | @spec parse(Input.t(), t(state, result), Position.t() | nil, state) :: 105 | {:ok, state, result} | {:error, [Error.t()]} 106 | when [state: var, result: var] 107 | def parse(input, function, state \\ nil, position \\ %Position{}) do 108 | parser = %Parser{input: input, position: position, state: state} 109 | reply = function.(parser) 110 | 111 | case reply.status do 112 | :ok -> {:ok, reply.parser.state, reply.result} 113 | _ -> 114 | {:error, 115 | reply.errors |> 116 | Enum.uniq() |> 117 | Enum.sort(&(&1.message < &2.message))} 118 | end 119 | end 120 | 121 | @doc """ 122 | Constructs an `ExParsec.Input.MemoryInput` instance with the given `value` 123 | (a string or list of codepoints) and forwards to `parse/4`. 124 | """ 125 | @spec parse_text(String.t() | [String.codepoint()], t(state, result), state) :: 126 | {:ok, state, result} | {:error, [Error.t()]} 127 | when [state: var, result: var] 128 | def parse_text(value, function, state \\ nil) do 129 | parse(%MemoryInput{value: value}, function, state) 130 | end 131 | 132 | @doc """ 133 | Constructs an `ExParsec.Input.MemoryInput` instance with the given `value` 134 | (a bitstring) and forwards to `parse/4`. 135 | """ 136 | @spec parse_bitstring(bitstring(), t(state, result), state) :: 137 | {:ok, state, result} | {:error, [Error.t()]} 138 | when [state: var, result: var] 139 | def parse_bitstring(value, function, state \\ nil) do 140 | parse(%MemoryInput{value: value, is_string: false}, function, state, nil) 141 | end 142 | 143 | @doc """ 144 | Constructs an `ExParsec.Input.MemoryInput` instance with the given `list` 145 | of terms (e.g. tokens) and forwards to `parse/4`. 146 | """ 147 | @spec parse_terms([term()], t(state, result), state) :: 148 | {:ok, state, result} | {:error, [Error.t()]} 149 | when [state: var, result: var] 150 | def parse_terms(list, function, state \\ nil) do 151 | parse(%MemoryInput{value: list, is_string: false}, function, state, nil) 152 | end 153 | 154 | @doc """ 155 | Constructs an `ExParsec.Input.FileInput` instance with the given `device` 156 | and forwards to `parse/4`. 157 | """ 158 | @spec parse_file(File.io_device(), t(state, result), state) :: 159 | {:ok, state, result} | {:error, [Error.t()]} 160 | when [state: var, result: var] 161 | def parse_file(device, function, state \\ nil) do 162 | parse(%FileInput{device: device}, function, state) 163 | end 164 | end 165 | -------------------------------------------------------------------------------- /lib/ex_parsec/text.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Text do 2 | @moduledoc """ 3 | Provides common parsers that operate on text. 4 | 5 | These parsers all require that the input data is codepoints. 6 | """ 7 | 8 | import ExParsec.Base 9 | import ExParsec.Helpers 10 | 11 | alias ExParsec.Parser 12 | 13 | @doc """ 14 | Parses a codepoint. Returns the codepoint as result. 15 | """ 16 | @spec any_char() :: ExParsec.t(term(), String.codepoint()) 17 | defparser any_char() in p do 18 | case Parser.get(p) do 19 | {:error, r} -> failure([error(p, :io, "encountered I/O error: #{inspect(r)}")]) 20 | :eof -> failure([error(p, :eof, "expected a character but encountered end of file")]) 21 | {p, cp} -> success(p, cp) 22 | end 23 | end 24 | 25 | @doc """ 26 | Expects and parses a codepoint that satisfies the criteria required by 27 | `function`. `name` is used for error message generation. 28 | """ 29 | @spec satisfy(String.t(), ((String.codepoint()) -> boolean())) :: 30 | ExParsec.t(term(), String.codepoint()) 31 | defparser satisfy(name, function) in p do 32 | r = any_char().(p) 33 | 34 | if r.status == :ok do 35 | cp = r.result 36 | 37 | if function.(cp) do 38 | r 39 | else 40 | failure([error(p, :expected_char, "expected #{name} but found #{inspect(cp)}")]) 41 | end 42 | else 43 | r 44 | end 45 | end 46 | 47 | @doc """ 48 | Expects and parses the given `codepoint`. On success, returns the codepoint 49 | as result. 50 | """ 51 | @spec char(String.codepoint()) :: ExParsec.t(term(), String.codepoint()) 52 | defparser char(codepoint) in p do 53 | satisfy(inspect(codepoint), fn(c) -> c == codepoint end).(p) 54 | end 55 | 56 | @doc """ 57 | Expects and parses a codepoint that's present in `codepoints`, which can 58 | either be a list of codepoints, or a string that's converted to a list of 59 | codepoints. 60 | """ 61 | @spec one_of([String.codepoint()] | String.t()) :: 62 | ExParsec.t(term(), String.codepoint()) 63 | defparser one_of(codepoints) in p do 64 | if is_binary(codepoints) do 65 | codepoints = String.codepoints(codepoints) 66 | end 67 | 68 | name = codepoints |> 69 | Enum.map(&inspect/1) |> 70 | Enum.join(", ") 71 | 72 | satisfy(name, fn(c) -> c in codepoints end).(p) 73 | end 74 | 75 | @doc """ 76 | The opposite of `one_of/1`: Expects a codepoint that's *not* in 77 | `codepoints`. Otherwise, works like `one_of/1`. 78 | """ 79 | @spec none_of([String.codepoint()] | String.t()) :: 80 | ExParsec.t(term(), String.codepoint()) 81 | defparser none_of(codepoints) in p do 82 | if is_binary(codepoints) do 83 | codepoints = String.codepoints(codepoints) 84 | end 85 | 86 | name = codepoints |> 87 | Enum.map(&inspect/1) |> 88 | Enum.join(", ") 89 | 90 | satisfy(name, fn(c) -> !(c in codepoints) end).(p) 91 | end 92 | 93 | @doc """ 94 | Expects and parses any white space character. 95 | """ 96 | @spec space() :: ExParsec.t(term(), String.codepoint()) 97 | defparser space() in p do 98 | satisfy("any white space character", fn(c) -> String.strip(c) == "" end).(p) 99 | end 100 | 101 | @doc """ 102 | Expects and parses a tab (`"\t"`) character. 103 | """ 104 | @spec tab() :: ExParsec.t(term(), String.codepoint()) 105 | defparser tab() in p do 106 | satisfy(inspect("\t"), fn(c) -> c == "\t" end).(p) 107 | end 108 | 109 | @doc """ 110 | Expects and parses a newline sequence. This can either be a `"\n"` or a 111 | `"\r"` followed by `"\n"`. Either way, returns `"\n"` as result. 112 | """ 113 | @spec newline() :: ExParsec.t(term(), String.codepoint()) 114 | defparser newline() in p do 115 | bind(option(char("\r")), fn(_) -> char("\n") end).(p) 116 | end 117 | 118 | @doc """ 119 | Expects and parses any letter in `?a .. ?z`. 120 | """ 121 | @spec lower() :: ExParsec.t(term(), String.codepoint()) 122 | defparser lower() in p do 123 | satisfy("any lower case letter", fn(<>) -> 124 | c in ?a .. ?z 125 | end).(p) 126 | end 127 | 128 | @doc """ 129 | Expects and parses any letter in `?A .. ?Z`. 130 | """ 131 | @spec upper() :: ExParsec.t(term(), String.codepoint()) 132 | defparser upper() in p do 133 | satisfy("any upper case letter", fn(<>) -> 134 | c in ?A .. ?Z 135 | end).(p) 136 | end 137 | 138 | @doc """ 139 | Expects and parses any letter in `?A .. ?Z` and `?a .. ?z`. 140 | """ 141 | @spec letter() :: ExParsec.t(term(), String.codepoint()) 142 | defparser letter() in p do 143 | either(lower(), upper()).(p) 144 | end 145 | 146 | @doc """ 147 | Expects and parses any digit in `?0 .. ?1`. 148 | """ 149 | @spec bin_digit() :: ExParsec.t(term(), String.codepoint()) 150 | defparser bin_digit() in p do 151 | satisfy("any binary digit", fn(<>) -> 152 | c in ?0 .. ?1 153 | end).(p) 154 | end 155 | 156 | @doc """ 157 | Expects and parses any digit in `?0 .. ?7`. 158 | """ 159 | @spec oct_digit() :: ExParsec.t(term(), String.codepoint()) 160 | defparser oct_digit() in p do 161 | satisfy("any octal digit", fn(<>) -> 162 | c in ?0 .. ?7 163 | end).(p) 164 | end 165 | 166 | @doc """ 167 | Expects and parses any digit in `?0 .. ?9`. 168 | """ 169 | @spec digit() :: ExParsec.t(term(), String.codepoint()) 170 | defparser digit() in p do 171 | satisfy("any decimal digit", fn(<>) -> 172 | c in ?0 .. ?9 173 | end).(p) 174 | end 175 | 176 | @doc """ 177 | Expects and parses any digit in `?0 .. ?9`, `?A .. ?F`, and `?a .. ?f`. 178 | """ 179 | @spec hex_digit() :: ExParsec.t(term(), String.codepoint()) 180 | defparser hex_digit() in p do 181 | satisfy("any hexadecimal digit", fn(<>) -> 182 | c in ?0 .. ?9 || c in ?A .. ?F || c in ?a .. ?f 183 | end).(p) 184 | end 185 | 186 | @doc """ 187 | Expects and parses any alphanumeric character (i.e. `?A .. ?Z`, `?a .. ?z`, 188 | and `?0 .. ?9`). 189 | """ 190 | defparser alphanumeric() in p do 191 | either(letter(), digit()).(p) 192 | end 193 | 194 | @doc """ 195 | Expects and parses the given `string`. On success, returns the string as 196 | result. 197 | """ 198 | @spec string(String.t()) :: ExParsec.t(term(), String.t()) 199 | defparser string(string) in p do 200 | sz = length(String.codepoints(string)) 201 | 202 | loop = fn(loop, accp, acc) -> 203 | cond do 204 | acc == string -> success(accp, acc) 205 | length(String.codepoints(acc)) >= sz -> 206 | failure([error(p, :expected_string, "expected #{inspect(string)} but found #{inspect(acc)}")]) 207 | true -> 208 | case Parser.get(accp) do 209 | {:error, r} -> 210 | failure([error(accp, :io, "Encountered I/O error: #{inspect(r)}")]) 211 | :eof -> 212 | failure([error(accp, :eof, "expected #{inspect(string)} but encountered end of file")]) 213 | {accp, cp} -> loop.(loop, accp, acc <> cp) 214 | end 215 | end 216 | end 217 | 218 | loop.(loop, p, "") 219 | end 220 | 221 | @doc """ 222 | Parses as many white space characters as possible. 223 | """ 224 | @spec spaces() :: ExParsec.t(term(), [String.codepoint()]) 225 | defparser spaces() in p do 226 | many(space()).(p) 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /lib/ex_parsec/base.ex: -------------------------------------------------------------------------------- 1 | defmodule ExParsec.Base do 2 | @moduledoc """ 3 | Provides fundamental combinators and parsers. 4 | """ 5 | 6 | import ExParsec.Helpers 7 | 8 | alias ExParsec.Input 9 | alias ExParsec.Parser 10 | alias ExParsec.Position 11 | alias ExParsec.Reply 12 | 13 | @doc """ 14 | Returns the user state as result. 15 | """ 16 | @spec get_state() :: ExParsec.t(term(), term()) 17 | defparser get_state() in p do 18 | success(p, p.state) 19 | end 20 | 21 | @doc """ 22 | Sets the user state to `state`. 23 | """ 24 | @spec set_state(state) :: ExParsec.t(state, nil) when [state: var] 25 | defparser set_state(state) in p do 26 | success(%Parser{p | :state => state}, nil) 27 | end 28 | 29 | @doc """ 30 | Updates the user state by applying `updater` to it. 31 | """ 32 | @spec update_state(((state) -> state)) :: ExParsec.t(state, nil) when [state: var] 33 | defparser update_state(updater) in p do 34 | success(%Parser{p | :state => updater.(p.state)}, nil) 35 | end 36 | 37 | @doc """ 38 | Returns the current position as result. 39 | """ 40 | @spec get_position() :: ExParsec.t(term(), Position.t()) 41 | defparser get_position() in p do 42 | success(p, p.position) 43 | end 44 | 45 | @doc """ 46 | Returns `value` as result. 47 | """ 48 | @spec return(value) :: ExParsec.t(term(), value) when [value: var] 49 | defparser return(value) in p do 50 | success(p, value) 51 | end 52 | 53 | @doc """ 54 | Fails without an error message. 55 | """ 56 | @spec zero() :: ExParsec.t(term(), nil) 57 | defparser zero() in _ do 58 | failure([]) 59 | end 60 | 61 | @doc """ 62 | Fails with the given error `message`. 63 | """ 64 | @spec fail(String.t()) :: ExParsec.t(term(), nil) 65 | defparser fail(message) in p do 66 | failure([error(p, message)]) 67 | end 68 | 69 | @doc """ 70 | Fails fatally with the given error `message`. 71 | """ 72 | @spec fail_fatal(String.t()) :: ExParsec.t(term(), nil) 73 | defparser fail_fatal(message) in p do 74 | failure(:fatal, [error(p, message)]) 75 | end 76 | 77 | @doc """ 78 | Only succeeds at the end of the input data. 79 | """ 80 | @spec eof() :: ExParsec.t(term(), nil) 81 | defparser eof() in p do 82 | # We can skip `ExParsec.Parser.get/2` since we just need to check for 83 | # EOF - we don't care about position info. 84 | if Input.get(p.input) == :eof do 85 | success(p, nil) 86 | else 87 | failure([error(p, :expected_eof, "expected end of file")]) 88 | end 89 | end 90 | 91 | @doc """ 92 | Applies `parser` and passes its result to `function`. `function`'s return 93 | value is returned as the result. 94 | """ 95 | @spec map(ExParsec.t(state, result1), ((result1) -> result2)) :: 96 | ExParsec.t(state, result2) 97 | when [state: var, result1: var, result2: var] 98 | defparser map(parser, function) in p do 99 | pipe([parser], fn([r]) -> function.(r) end).(p) 100 | end 101 | 102 | @doc """ 103 | Applies `parser` and discards its result. 104 | """ 105 | @spec ignore(ExParsec.t(state, term())) :: ExParsec.t(state, nil) 106 | when [state: var] 107 | defparser ignore(parser) in p do 108 | map(parser, fn(_) -> nil end).(p) 109 | end 110 | 111 | @doc """ 112 | Applies `parser` and passes its result as the only argument to `function`. 113 | `function` is expected to return a parser. That parser is then applied and 114 | its result is returned. 115 | """ 116 | @spec bind(ExParsec.t(state, result1), ((result1) -> ExParsec.t(state, result2))) :: 117 | ExParsec.t(state, result2) when [state: var, result1: var, result2: var] 118 | defparser bind(parser, function) in p do 119 | r1 = parser.(p) 120 | 121 | if r1.status == :ok do 122 | parser2 = function.(r1.result) 123 | r2 = parser2.(r1.parser) 124 | errs = List.flatten([r2.errors | r1.errors]) 125 | 126 | %Reply{r2 | :errors => errs} 127 | else 128 | r1 129 | end 130 | end 131 | 132 | @doc """ 133 | Applies `parser` if possible. Returns a tuple containing `:ok` and the 134 | result, or `nil` if `parser` could not be applied. 135 | """ 136 | @spec option(ExParsec.t(state, result)) :: ExParsec.t(state, {:ok, result} | nil) 137 | when [state: var, result: var] 138 | defparser option(parser) in p do 139 | r = parser.(p) 140 | 141 | case r.status do 142 | :ok -> %Reply{r | :result => {:ok, r.result}} 143 | :error -> success(p, nil, r.errors) 144 | :fatal -> r 145 | end 146 | end 147 | 148 | @doc """ 149 | Identical to applying `parser` normally, except that if applying `parser` 150 | results in a fatal error, it will be turned into a regular error. 151 | """ 152 | @spec attempt(ExParsec.t(state, result)) :: ExParsec.t(state, result) 153 | when [state: var, result: var] 154 | defparser attempt(parser) in p do 155 | r = parser.(p) 156 | 157 | if r.status == :ok do 158 | r 159 | else 160 | failure(r.errors) 161 | end 162 | end 163 | 164 | @doc """ 165 | First tries to apply `parser1`. If that fails, tries to apply `parser2`. If 166 | that fails, this combinator fails. Otherwise, returns the first successful 167 | result value obtained. 168 | """ 169 | @spec either(ExParsec.t(state, term()), ExParsec.t(state, term())) :: 170 | ExParsec.t(state, term()) when [state: var] 171 | defparser either(parser1, parser2) in p do 172 | choice([parser1, parser2]).(p) 173 | end 174 | 175 | @doc """ 176 | Tries to apply each parser in `parsers` until one succeeds. This is a 177 | variant of `either/2` generalized for any number of parsers. 178 | """ 179 | @spec choice([ExParsec.t(state, term()), ...]) :: 180 | ExParsec.t(state, term()) when [state: var] 181 | defparser choice(parsers) in p do 182 | try do 183 | errs = Enum.reduce(parsers, [], fn(parser, errs) -> 184 | r = parser.(p) 185 | errs = List.flatten([r.errors | errs]) 186 | 187 | if r.status in [:ok, :fatal] do 188 | throw({:"$ex_parsec", %Reply{r | :errors => errs}}) 189 | end 190 | 191 | errs 192 | end) 193 | 194 | failure(errs) 195 | catch 196 | :throw, {:"$ex_parsec", r} -> r 197 | end 198 | end 199 | 200 | @doc """ 201 | Applies each parser in `parsers`. Passes all result values in a list to 202 | `function`. `function`'s return value is returned as the result. 203 | """ 204 | @spec pipe([ExParsec.t(state, term())], (([term()]) -> result)) :: 205 | ExParsec.t(state, result) when [state: var, result: var] 206 | defparser pipe(parsers, function) in p do 207 | try do 208 | {p, errs, ress} = Enum.reduce(parsers, {p, [], []}, fn(parser, acc) -> 209 | {p, errs, ress} = acc 210 | 211 | r = parser.(p) 212 | errs = List.flatten([r.errors | errs]) 213 | 214 | if r.status != :ok do 215 | throw({:"$ex_parsec", %Reply{r | :errors => errs}}) 216 | end 217 | 218 | {r.parser, errs, [r.result | ress]} 219 | end) 220 | 221 | res = function.(Enum.reverse(ress)) 222 | 223 | success(p, res, errs) 224 | catch 225 | :throw, {:"$ex_parsec", r} -> r 226 | end 227 | end 228 | 229 | @doc """ 230 | Applies each parser in `parsers`. Returns all results in a list. 231 | """ 232 | @spec sequence([ExParsec.t(state, term())]) :: ExParsec.t(state, term()) 233 | when [state: var] 234 | defparser sequence(parsers) in p do 235 | pipe(parsers, fn(list) -> list end).(p) 236 | end 237 | 238 | @doc """ 239 | Applies `parser1` and `parser2` in sequence. Passes the result values as 240 | two arguments to `function`. `function`'s return value is returned as the 241 | result. 242 | """ 243 | @spec both(ExParsec.t(state, result1), ExParsec.t(state, result2), 244 | ((result1, result2) -> result3)) :: ExParsec.t(state, result3) 245 | when [state: var, result1: var, result2: var, result3: var] 246 | defparser both(parser1, parser2, function) in p do 247 | pipe([parser1, parser2], fn([a, b]) -> function.(a, b) end).(p) 248 | end 249 | 250 | @doc """ 251 | Applies `parser1` and `parser2` in sequence. Returns the result of 252 | `parser1`. 253 | """ 254 | @spec pair_left(ExParsec.t(state, result), ExParsec.t(state, term())) :: 255 | ExParsec.t(state, result) when [state: var, result: var] 256 | defparser pair_left(parser1, parser2) in p do 257 | both(parser1, parser2, fn(a, _) -> a end).(p) 258 | end 259 | 260 | @doc """ 261 | Applies `parser1` and `parser2` in sequence. Returns the result of 262 | `parser2`. 263 | """ 264 | @spec pair_right(ExParsec.t(state, term()), ExParsec.t(state, result)) :: 265 | ExParsec.t(state, result) when [state: var, result: var] 266 | defparser pair_right(parser1, parser2) in p do 267 | both(parser1, parser2, fn(_, b) -> b end).(p) 268 | end 269 | 270 | @doc """ 271 | Applies `parser1` and `parser2` in sequence. Returns the result of 272 | both parsers as a tuple. 273 | """ 274 | @spec pair_both(ExParsec.t(state, result1), ExParsec.t(state, result2)) :: 275 | ExParsec.t(state, {result1, result2}) 276 | when [state: var, result1: var, result2: var] 277 | defparser pair_both(parser1, parser2) in p do 278 | both(parser1, parser2, fn(a, b) -> {a, b} end).(p) 279 | end 280 | 281 | @doc """ 282 | Applies `parser1`, `parser2`, and `parser3` in sequence. Returns the result 283 | of `parser2`. 284 | """ 285 | @spec between(ExParsec.t(state, term()), ExParsec.t(state, result), 286 | ExParsec.t(state, term())) :: ExParsec.t(state, result) 287 | when [state: var, result: var] 288 | defparser between(parser1, parser2, parser3) in p do 289 | pipe([parser1, parser2, parser3], fn([_, b, _]) -> b end).(p) 290 | end 291 | 292 | @doc """ 293 | Applies `parser` to the input data `n` times. Returns results in a list. 294 | """ 295 | @spec times(ExParsec.t(state, result), non_neg_integer()) :: 296 | ExParsec.t(state, [result]) when [state: var, result: var] 297 | defparser times(parser, n) in p do 298 | if n == 0 do 299 | success(p, []) 300 | else 301 | try do 302 | {p, errs, ress} = Enum.reduce(1 .. n, {p, [], []}, fn(_, acc) -> 303 | {p, errs, ress} = acc 304 | 305 | r = parser.(p) 306 | errs = List.flatten([r.errors | errs]) 307 | 308 | if r.status != :ok do 309 | throw({:"$ex_parsec", %Reply{r | :errors => errs}}) 310 | end 311 | 312 | {r.parser, errs, [r.result | ress]} 313 | end) 314 | 315 | success(p, Enum.reverse(ress), errs) 316 | catch 317 | :throw, {:"$ex_parsec", r} -> r 318 | end 319 | end 320 | end 321 | 322 | @doc """ 323 | Applies `parser` one or more times. Returns all results in a list. 324 | """ 325 | @spec many1(ExParsec.t(state, result)) :: ExParsec.t(state, [result, ...]) 326 | when [state: var, result: var] 327 | defparser many1(parser) in p do 328 | loop = fn(loop, p, ress, errs) -> 329 | # We can skip `ExParsec.Parser.get/2` since we just need to check for 330 | # EOF - we don't care about position info. 331 | if Input.get(p.input) == :eof do 332 | success(p, Enum.reverse(ress), errs) 333 | else 334 | r = parser.(p) 335 | errs = List.flatten([r.errors | errs]) 336 | 337 | case r.status do 338 | :ok -> loop.(loop, r.parser, [r.result | ress], errs) 339 | :error -> success(p, Enum.reverse(ress), errs) 340 | :fatal -> %Reply{r | :errors => errs} 341 | end 342 | end 343 | end 344 | 345 | r = parser.(p) 346 | 347 | if r.status == :ok do 348 | loop.(loop, r.parser, [r.result], r.errors) 349 | else 350 | r 351 | end 352 | end 353 | 354 | @doc """ 355 | Applies `parser` as many times as possible. Returns all results in a list. 356 | """ 357 | @spec many(ExParsec.t(state, result)) :: ExParsec.t(state, [result]) 358 | when [state: var, result: var] 359 | defparser many(parser) in p do 360 | either(many1(parser), return([])).(p) 361 | end 362 | 363 | @doc """ 364 | Applies `parser1` one or more times, separated by `parser2`. Returns 365 | results of `parser1` in a list. 366 | """ 367 | @spec sep_by1(ExParsec.t(state, result), ExParsec.t(state, term())) :: 368 | ExParsec.t(state, [result, ...]) when [state: var, result: var] 369 | defparser sep_by1(parser1, parser2) in p do 370 | pipe([parser1, many(pair_right(parser2, parser1))], 371 | fn([h, t]) -> [h | t] end).(p) 372 | end 373 | 374 | @doc """ 375 | Applies `parser1` as many times as possible, separated by `parser2`. 376 | Returns results of `parser1` in a list. 377 | """ 378 | @spec sep_by(ExParsec.t(state, result), ExParsec.t(state, term())) :: 379 | ExParsec.t(state, [result]) when [state: var, result: var] 380 | defparser sep_by(parser1, parser2) in p do 381 | either(pipe([parser1, many(pair_right(parser2, parser1))], 382 | fn([h, t]) -> [h | t] end), 383 | return([])).(p) 384 | end 385 | 386 | @doc """ 387 | Applies `parser` if possible. Discards the result. 388 | """ 389 | @spec skip(ExParsec.t(state, term())) :: ExParsec.t(state, nil) 390 | when [state: var] 391 | defparser skip(parser) in p do 392 | # TODO: Optimize this so we don't build up a ton of data. 393 | ignore(option(parser)).(p) 394 | end 395 | 396 | @doc """ 397 | Applies `parser´ one or more times. Discards the results. 398 | """ 399 | @spec skip_many1(ExParsec.t(state, term())) :: ExParsec.t(state, nil) 400 | when [state: var] 401 | defparser skip_many1(parser) in p do 402 | # TODO: Optimize this so we don't build up a ton of data. 403 | ignore(many1(parser)).(p) 404 | end 405 | 406 | @doc """ 407 | Applies `parser` as many times as possible. Discards the results. 408 | """ 409 | @spec skip_many(ExParsec.t(state, term())) :: ExParsec.t(state, nil) 410 | when [state: var] 411 | defparser skip_many(parser) in p do 412 | # TODO: Optimize this so we don't build up a ton of data. 413 | ignore(many(parser)).(p) 414 | end 415 | 416 | @doc ~S""" 417 | Applies `parser`. If it fails, replaces its error with one generated based 418 | on `name` of the form `expected #{name}`. 419 | """ 420 | @spec label(ExParsec.t(state, result), String.t()) :: 421 | ExParsec.t(state, result) when [state: var, result: var] 422 | defparser label(parser, name) in p do 423 | r = parser.(p) 424 | 425 | if r.status != :ok do 426 | %Reply{r | :errors => error(p, :expected, "expected #{name}")} 427 | else 428 | r 429 | end 430 | end 431 | 432 | @doc ~S""" 433 | Applies `parser`. If it fails, its errors are propagated in addition to an 434 | extra error generated based on `name` of the form `"expected #{name}"`. 435 | """ 436 | @spec describe(ExParsec.t(state, result), String.t()) :: 437 | ExParsec.t(state, result) when [state: var, result: var] 438 | defparser describe(parser, name) in p do 439 | r = parser.(p) 440 | 441 | if r.status != :ok do 442 | %Reply{r | :errors => [error(p, :expected, "expected #{name}") | r.errors]} 443 | else 444 | r 445 | end 446 | end 447 | end 448 | --------------------------------------------------------------------------------