├── .github
    └── workflows
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── 00-introduction.md
    ├── 01-lexing.md
    ├── 02-parsing.md
    ├── 03-parse-contexts.md
    ├── 04-backtracking.md
    ├── 05-lexer-modes.md
    └── 06-pratt-parsing.md
├── gleam.toml
├── manifest.toml
├── src
    ├── glearray_ffi.erl
    ├── glearray_ffi.mjs
    ├── nibble.gleam
    └── nibble
    │   ├── lexer.gleam
    │   ├── pratt.gleam
    │   ├── predicates.gleam
    │   └── vendor
    │       └── glearray.gleam
└── test
    ├── docs
        └── lexer_modes
        │   └── indentation_test.gleam
    ├── examples
        ├── calculator_test.gleam
        ├── env_test.gleam
        ├── json_test.gleam
        ├── readme_test.gleam
        ├── sexpr_test.gleam
        └── simple_test.gleam
    ├── nibble_test.gleam
    └── unit
        ├── lexer_test.gleam
        └── parser_test.gleam


/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags: ["v*"]
 6 | 
 7 | jobs:
 8 |   publish:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v3.1.0
13 |       - uses: erlef/setup-beam@v1.16.0
14 |         with:
15 |           otp-version: "27.0"
16 |           rebar3-version: "3"
17 |           gleam-version: "1.5.1"
18 | 
19 |       - run: |
20 |           version="v$(cat gleam.toml | grep -m 1 "version" | sed -r "s/version *= *\"([[:digit:].]+)\"/\1/")"
21 |           if [ "$version" != "${{ github.ref_name }}" ]; then
22 |             echo "tag '${{ github.ref_name }}' does not match the version in gleam.toml"
23 |             echo "expected a tag name 'v$version'"
24 |             exit 1
25 |           fi
26 |         name: check version
27 | 
28 |       - run: gleam format --check
29 | 
30 |       - run: gleam publish -y
31 |         env:
32 |           HEXPM_USER: ${{ secrets.HEX_USERNAME }}
33 |           HEXPM_PASS: ${{ secrets.HEX_PASSWORD }}
34 | 
35 |       - uses: softprops/action-gh-release@v1
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.beam
2 | *.ez
3 | build
4 | erl_crash.dump
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2022 Hayleigh Thompson
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in the
 5 | Software without restriction, including without limitation the rights to use,
 6 | copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
 7 | Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nibble
 2 | 
 3 | A lexer and parser combinator library inspired by [`elm/parser`](https://github.com/elm/parser).
 4 | 
 5 | [![Package Version](https://img.shields.io/hexpm/v/nibble)](https://hex.pm/packages/nibble)
 6 | [![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/nibble/)
 7 | 
 8 | ✨ This project is written in **pure Gleam** so you can use it anywhere Gleam
 9 | runs: Erlang, Elixir, Node, Deno, and the browser!
10 | 
11 | ## Quick start
12 | 
13 | If you just want to get a feel for what nibble can do, check out the example
14 | below.
15 | 
16 | ```gleam
17 | import gleam/option.{None, Some}
18 | import nibble.{do, return}
19 | import nibble/lexer
20 | 
21 | type Point {
22 |   Point(x: Int, y: Int)
23 | }
24 | 
25 | type Token {
26 |   Num(Int)
27 |   LParen
28 |   RParen
29 |   Comma
30 | }
31 | 
32 | pub fn main() {
33 |   // Your lexer knows how to take an input string and
34 |   // turn it into a flat list of tokens. You define the
35 |   // type of token you want to use, but nibble will wrap
36 |   // that up in its own `Token` type that includes the
37 |   // source span and original lexeme for each token.
38 |   let lexer =
39 |     lexer.simple([
40 |       lexer.int(Num),
41 |       lexer.token("(", LParen),
42 |       lexer.token(")", RParen),
43 |       lexer.token(",", Comma),
44 |       // Skip over whitespace, we don't care about it!
45 |         lexer.whitespace(Nil)
46 |         |> lexer.ignore,
47 |     ])
48 | 
49 |   // Your parser(s!) know how to transform a list of
50 |   // tokens into whatever you want. You have the full
51 |   // power of Gleam here, so you can go wild!
52 |   let int_parser = {
53 |     // Use `take_map` to only consume certain kinds of tokens and transform the
54 |     // result.
55 |     use tok <- nibble.take_map("expected number")
56 |     case tok {
57 |       Num(n) -> Some(n)
58 |       _ -> None
59 |     }
60 |   }
61 | 
62 |   let parser = {
63 |     use _ <- do(nibble.token(LParen))
64 |     use x <- do(int_parser)
65 |     use _ <- do(nibble.token(Comma))
66 |     use y <- do(int_parser)
67 |     use _ <- do(nibble.token(RParen))
68 | 
69 |     return(Point(x, y))
70 |   }
71 | 
72 |   let assert Ok(tokens) = lexer.run("(1, 2)", lexer)
73 |   let assert Ok(point) = nibble.run(tokens, parser)
74 | 
75 |   point.x //=> 1
76 |   point.y //=> 2
77 | }
78 | 
79 | ```
80 | 
81 | ## Installation
82 | 
83 | If available on Hex this package can be added to your Gleam project:
84 | 
85 | ```sh
86 | gleam add nibble
87 | ```
88 | 
89 | and its documentation can be found at <https://hexdocs.pm/nibble>.
90 | 


--------------------------------------------------------------------------------
/docs/00-introduction.md:
--------------------------------------------------------------------------------
  1 | # 00 Introduction
  2 | 
  3 | Nibble is a parser combinator library with a twist: it includes a lexer combinator
  4 | library as well! If some of those words already started to sound like gibberish
  5 | to you then don't worry, this introduction is going to get you up to speed on the
  6 | core concepts!
  7 | 
  8 | ## Your first parser!
  9 | 
 10 | ```gleam
 11 | type T {
 12 |   Hello
 13 |   Name(String)
 14 | }
 15 | ```
 16 | 
 17 | ```gleam
 18 | fn lexer() {
 19 |   lexer.simple([
 20 |     lexer.token("hello", Hello),
 21 |     lexer.variable(set.new(), Name),
 22 |     lexer.whitespace(Nil)
 23 |       |> lexer.ignore,
 24 |   ])
 25 | }
 26 | ```
 27 | 
 28 | ```gleam
 29 | fn parser() {
 30 |   use _ <- nibble.do(nibble.token(Hello))
 31 |   use name <- nibble.do(name_parser())
 32 | 
 33 |   nibble.return("You are greeting " <> name)
 34 | }
 35 | 
 36 | fn name_parser() {
 37 |   use tok <- nibble.take_map("Expected a name")
 38 | 
 39 |   case tok {
 40 |     Name(name) -> option.Some(name)
 41 |     _ -> option.None
 42 |   }
 43 | }
 44 | ```
 45 | 
 46 | ```gleam
 47 | pub fn main() {
 48 |   let input = "hello joe"
 49 | 
 50 |   use tokens <- result.try(
 51 |     input
 52 |     |> lexer.run(lexer()),
 53 |   )
 54 | 
 55 |   case tokens |> nibble.run(parser()) {
 56 |     Ok(value) -> io.println(value)
 57 |     Error(err) -> {
 58 |       let _ = io.debug(err)
 59 |       Nil
 60 |     }
 61 |   }
 62 | 
 63 |   Ok("")
 64 |   //=> "You are greeting joe"
 65 | }
 66 | ```
 67 | 
 68 | ## Terminology
 69 | 
 70 | Throughout Nibble's docs we use words that not all Gleamlins might have come
 71 | across before. Here's a quick rundown of the important terms and concepts to
 72 | know:
 73 | 
 74 | ### What is a combinator?
 75 | 
 76 | Although you can find some more-formal definitions of what a combinator is –
 77 | looking at you, combinatory logic – we're Gleamlins here and we like to keep
 78 | things simple. For our purposes we can think of a combinators as functions that
 79 | work together like building blocks for more complex behaviour.
 80 | 
 81 | You'll have seen combinators already if you've ever written any code using
 82 | `gleam/dynamic`! With `gleam/dynamic` you combine decoders together to create more
 83 | complex ones:
 84 | 
 85 | ```gleam
 86 | dynamic.field("wibble", dynamic.list(dynamic.int))
 87 | ```
 88 | 
 89 | We can take the simple `dynamic.int` decoder and combine it with `dynamic.list`
 90 | to get back a decoder that can decode a _list_ of ints. And we can combine _that_
 91 | decoder with `dynamic.field` to get back a decoder that can decode a list of ints
 92 | from an object field called `"wibble"`! We can keep going, continuing to build
 93 | decoders up from smaller pieces: this is the essence of combinators!
 94 | 
 95 | ### What is a parser?
 96 | 
 97 | In the broadest sense, a parser takes an unstructured sequence of stuff (often
 98 | characters in a string or tokens in a list) and turns it into something more
 99 | structured. You can imagine all parsers can be thought of as the same basic idea:
100 | 
101 | ```
102 | type Parser(a, b) = fn(List(a)) -> #(b, List(a))
103 | ```
104 | 
105 | In the real world parsers tend to be a bit more complex than this, including things
106 | like errors and failure cases, position tracking, and so on. But in essence parsers
107 | are combinators, and just like `gleam/dynamic` that means we can combine them
108 | together to parse very complex things.
109 | 
110 | ### What is a lexer?
111 | 


--------------------------------------------------------------------------------
/docs/01-lexing.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/01-lexing.md


--------------------------------------------------------------------------------
/docs/02-parsing.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/02-parsing.md


--------------------------------------------------------------------------------
/docs/03-parse-contexts.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/03-parse-contexts.md


--------------------------------------------------------------------------------
/docs/04-backtracking.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/04-backtracking.md


--------------------------------------------------------------------------------
/docs/05-lexer-modes.md:
--------------------------------------------------------------------------------
  1 | # Lexer Modes
  2 | 
  3 | Up until now we have been running our lexer using `lexer.simple`. As the name
  4 | implies, this is the simplest way to use Nibble's lexer and it is context-free.
  5 | Where possible we should try to stick to these simple lexers, but sometimes we
  6 | need to be able to lex things that are context-sensitive. That's where lexer
  7 | modes come in!
  8 | 
  9 | ## Indentation Sensitivity
 10 | 
 11 | Let's imagine we're writing a lexer for a Python-ish programming language and
 12 | we want to produce `Indent` and `Dedent` tokens to represent indentation. We
 13 | might define our tokens like this:
 14 | 
 15 | ```gleam
 16 | pub type TokenT {
 17 |   Var(String)
 18 |   Str(String)
 19 |   Num(Int)
 20 | 
 21 |   // Keywords
 22 |   Def
 23 |   For
 24 |   In
 25 |   Print
 26 | 
 27 |   // Indentation
 28 |   Indent(Int)
 29 |   Dedent(Int)
 30 | }
 31 | ```
 32 | 
 33 | We could represent a chunk of code like this:
 34 | 
 35 | ```python
 36 | def wibble arr
 37 |   for x in arr
 38 |     print x
 39 | 
 40 |   print "done!"
 41 | 
 42 | def wobble
 43 |   wibble [1, 2, 3]
 44 | ```
 45 | 
 46 | Indentation would change the meaning of this program, so we need to know when we
 47 | are inside a block of indented code or not. Our `Indent` and `Dedent` tokens
 48 | carry with them the level of indentation they represent such that when we come to
 49 | parsing we can make sure everything is valid, but how do we produce the tokens in
 50 | the first place?
 51 | 
 52 | We'll need to do two things: (1) write a custom matcher using `lexer.custom` and
 53 | (2) store the current indentation level as the lexer's mode.
 54 | 
 55 | ```gleam
 56 | pub opaque type Lexer(a, mode)
 57 | pub opaque type Matcher(a, mode)
 58 | ```
 59 | 
 60 | Modes allow us to chose different matchers for different contexts, or inject
 61 | state into our matchers. For our indentation-sensitive lexer, that means we'll
 62 | end up with `Lexer` and `Matcher` types like this:
 63 | 
 64 | ```gleam
 65 | type Lexer = nibble.Lexer(TokenT, Int)
 66 | type Matcher = nibble.Matcher(TokenT, Int)
 67 | ```
 68 | 
 69 | To write our `indentation` matcher, we'll count the number of spaces that immediately
 70 | follow a newline and compare that to the current indentation level. If that number
 71 | is less than the current indentation level, we'll produce a `Dedent` token, otherwise
 72 | we'll produce an `Indent` token. In either case we'll also update the lexer's
 73 | mode with the new indentation level for subsequent lines.
 74 | 
 75 | ```gleam
 76 | fn indentation() -> Matcher(TokenT, Int) {
 77 |   let assert Ok(is_indent) = regex.from_string("^\\n[ \\t]*")
 78 |   use current_indent, lexeme, lookahead <- lexer.custom
 79 | 
 80 |   case regex.check(is_indent, lexeme), lookahead {
 81 |     False, _ -> NoMatch
 82 |     True, " " | True, "\t" -> Skip
 83 |     True, "\n" -> Drop(current_indent)
 84 |     True, _ -> {
 85 |       let spaces = string.length(lexeme) - 1
 86 | 
 87 |       case int.compare(spaces, current_indent) {
 88 |         Lt -> Keep(Dedent(spaces), spaces)
 89 |         Eq if spaces == 0 -> Drop(0)
 90 |         Eq -> Keep(Indent(spaces), spaces)
 91 |         Gt -> Keep(Indent(spaces), spaces)
 92 |       }
 93 |     }
 94 |   }
 95 | }
 96 | ```
 97 | 
 98 | There's actually a little more going on here than I just described, so let's
 99 | break the pattern matching down case by case.
100 | 
101 | ```gleam
102 | False, _ -> NoMatch
103 | ```
104 | 
105 | - This branch should be self-explanatory. If the lexeme doesn't match the regex
106 |   we have for indentation then we don't have a match and should tell the lexer to
107 |   move on and attempt a different matcher.
108 | 
109 | ```gleam
110 | True, " " | True, "\t" -> Skip
111 | ```
112 | 
113 | - If the lexeme matches the indentation regex but we can see from the lookahead
114 |   that it is followed by more indentation, then we don't want to produce a token
115 |   right now. We _also_ don't want the lexer to try any other matchers that might
116 |   consume these lexeme (like `lexer.whitespace`) so we tell the lexer to _skip_
117 |   checking any other matchers and move on to the next lexeme.
118 | 
119 | ```gleam
120 | True, "\n" -> Drop(current_indent)
121 | ```
122 | 
123 | - When the lexeme matches the indentation regex but it's followed by a new line
124 |   that means we have an empty line of just whitespace. You could choose to emit a
125 |   token here if you wanted to, but in this case we'll ignore empty lines entirely
126 |   and instruct the lexer to _drop_ the lexeme without producing anything. Importantly
127 |   we preserve the current indentation level as the lexer's mode going forward.
128 | 
129 | ```gleam
130 | True, _ -> {
131 |   let spaces = string.length(lexeme) - 1
132 | 
133 |   case int.compare(spaces, current_indent) {
134 |     Lt -> Keep(Dedent(spaces), spaces)
135 |     Eq if spaces == 0 -> Drop(0)
136 |     Eq -> Keep(Indent(spaces), spaces)
137 |     Gt -> Keep(Indent(spaces), spaces)
138 |   }
139 | }
140 | ```
141 | 
142 | - This is the actual indentation logic; we hit this branch when the lookahead is
143 |   any other grapheme. To get the new indentation level we just need to count the
144 |   length of the string, remembering to subtract one for the leading newline. By
145 |   comparing the new indentation to the current, we can work out what token to keep.
146 | 
147 |   - If the new indentation is less than the current, we keep a `Dedent` token and
148 |     update the lexer's mode to the new indentation level.
149 |   - If the new indentation is is equal to the current _but_ that level is zero,
150 |     then we drop the lexeme without producing any indentation token at all.
151 |   - Otherwise, if the new indentation is equal or greater than the current, we
152 |     keep an `Indent` token and update the lexer's mode to the new indentation level.
153 | 
154 | ## String Interpolation
155 | 


--------------------------------------------------------------------------------
/docs/06-pratt-parsing.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/06-pratt-parsing.md


--------------------------------------------------------------------------------
/gleam.toml:
--------------------------------------------------------------------------------
 1 | name = "nibble"
 2 | version = "1.1.3"
 3 | 
 4 | # Fill out these fields if you intend to generate HTML documentation or publish
 5 | # your project to the Hex package manager.
 6 | #
 7 | licences = ["MIT"]
 8 | description = "A string parsing library combining a traditional lexer with parser combinators.."
 9 | repository = { type = "github", user = "hayleigh-dot-dev", repo = "gleam-nibble" }
10 | gleam = ">= 0.34.0"
11 | 
12 | internal_modules = ["nibble/vendor/*"]
13 | 
14 | 
15 | [dependencies]
16 | gleam_stdlib = ">= 0.34.0 and < 1.0.0"
17 | gleam_regexp = ">= 1.0.0 and < 2.0.0"
18 | 
19 | [dev-dependencies]
20 | gleeunit = ">= 1.0.0 and < 2.0.0"
21 | 
22 | [documentation]
23 | pages = [
24 |     { title = "Introduction to Nibble", path = "intro.html", source = "./docs/00-introduction.md" },
25 |     { title = "Lexing", path = "lexing.html", source = "./docs/01-lexing.md" },
26 |     { title = "Parsing", path = "parsing.html", source = "./docs/02-parsing.md" },
27 |     { title = "Parse Contexts", path = "parse-contexts.html", source = "./docs/03-parse-contexts.md" },
28 |     { title = "Backtracking", path = "backtracking.html", source = "./docs/04-backtracking.md" },
29 |     { title = "Lexer Modes", path = "lexer-modes.html", source = "./docs/05-lexer-modes.md" },
30 |     { title = "Pratt Parsing", path = "pratt-parsing.html", source = "./docs/06-pratt-parsing.md" },
31 | ]
32 | 


--------------------------------------------------------------------------------
/manifest.toml:
--------------------------------------------------------------------------------
 1 | # This file was generated by Gleam
 2 | # You typically do not need to edit this file
 3 | 
 4 | packages = [
 5 |   { name = "gleam_regexp", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "A3655FDD288571E90EE9C4009B719FEF59FA16AFCDF3952A76A125AF23CF1592" },
 6 |   { name = "gleam_stdlib", version = "0.51.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "14AFA8D3DDD7045203D422715DBB822D1725992A31DF35A08D97389014B74B68" },
 7 |   { name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" },
 8 | ]
 9 | 
10 | [requirements]
11 | gleam_regexp = { version = ">= 1.0.0 and < 2.0.0" }
12 | gleam_stdlib = { version = ">= 0.34.0 and < 1.0.0" }
13 | gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
14 | 


--------------------------------------------------------------------------------
/src/glearray_ffi.erl:
--------------------------------------------------------------------------------
 1 | -module(glearray_ffi).
 2 | 
 3 | -export([new/0, get/2, set/3, insert/3]).
 4 | 
 5 | new() -> {}.
 6 | 
 7 | get(Array, Index) -> element(Index + 1, Array).
 8 | 
 9 | set(Array, Index, Value) -> setelement(Index + 1, Array, Value).
10 | 
11 | insert(Array, Index, Value) -> erlang:insert_element(Index + 1, Array, Value).
12 | 


--------------------------------------------------------------------------------
/src/glearray_ffi.mjs:
--------------------------------------------------------------------------------
 1 | export function newArray() {
 2 |   return [];
 3 | }
 4 | 
 5 | export function fromList(list) {
 6 |   return list.toArray();
 7 | }
 8 | 
 9 | export function arrayLength(array) {
10 |   return array.length;
11 | }
12 | 
13 | export function get(array, index) {
14 |   return array[index];
15 | }
16 | 
17 | export function set(array, index, value) {
18 |   const copy = [...array];
19 |   copy[index] = value;
20 |   return copy;
21 | }
22 | 
23 | export function push(array, value) {
24 |   const copy = [...array];
25 |   copy.push(value);
26 |   return copy;
27 | }
28 | 
29 | export function insert(array, index, value) {
30 |   return array.toSpliced(index, 0, value);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/nibble.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/bool
  4 | import gleam/io
  5 | import gleam/list
  6 | import gleam/option.{type Option, None, Some}
  7 | import gleam/string
  8 | import nibble/lexer.{type Span, type Token, Span, Token}
  9 | import nibble/vendor/glearray.{type Array}
 10 | 
 11 | // TYPES -----------------------------------------------------------------------
 12 | 
 13 | /// The `Parser` type has three parameters, let's take a look at each of them:
 14 | ///
 15 | /// ```
 16 | /// Parser(a, tok, ctx)
 17 | /// // (1) ^
 18 | /// // (2)    ^^^
 19 | /// // (3)         ^^^
 20 | /// ```
 21 | ///
 22 | /// 1) `a` is the type of value that the parser knows how to produce. If you were
 23 | ///   writing a parser for a programming language, this might be your expression
 24 | ///   type.
 25 | ///
 26 | /// 2) `tok` is the type of tokens that the parser knows how to consume. You can
 27 | ///   take a look at the [`Token`](./nibble/lexer#Token) type for a bit more info,
 28 | ///   but note that it's not necessary for the token stream to come from nibble's
 29 | ///   lexer.
 30 | ///
 31 | /// 3) `ctx` is used to make error reporting nicer. You can place a parser into a
 32 | ///   custom context. When the parser runs the context gets pushed into a stack.
 33 | ///   If the parser fails you can see the context stack in the error message,
 34 | ///   which can make error reporting and debugging much easier!
 35 | ///
 36 | pub opaque type Parser(a, tok, ctx) {
 37 |   Parser(fn(State(tok, ctx)) -> Step(a, tok, ctx))
 38 | }
 39 | 
 40 | type Step(a, tok, ctx) {
 41 |   Cont(CanBacktrack, a, State(tok, ctx))
 42 |   Fail(CanBacktrack, Bag(tok, ctx))
 43 | }
 44 | 
 45 | type State(tok, ctx) {
 46 |   State(
 47 |     // ❓ You might wonder why we're wanting an `Array` at all when we could just
 48 |     // use a `List` and backtrack to a previous state when we need to. By tracking
 49 |     // the index and indexing into the dict/array directly we save ever having to
 50 |     // allocate something new, which is a big deal for performance!
 51 |     src: Array(Token(tok)),
 52 |     idx: Int,
 53 |     pos: Span,
 54 |     ctx: List(#(Span, ctx)),
 55 |   )
 56 | }
 57 | 
 58 | type CanBacktrack {
 59 |   CanBacktrack(Bool)
 60 | }
 61 | 
 62 | // RUNNING PARSERS -------------------------------------------------------------
 63 | 
 64 | /// Parsers don't do anything until they're run! The `run` function takes a
 65 | /// [`Parser`](#Parser) and a list of [`Token`](./nibble/lexer#Token)s and
 66 | /// runs it; returning either the parsed value or a list of [`DeadEnds`](#DeadEnd)
 67 | /// where the parser failed.
 68 | ///
 69 | pub fn run(
 70 |   src: List(Token(tok)),
 71 |   parser: Parser(a, tok, ctx),
 72 | ) -> Result(a, List(DeadEnd(tok, ctx))) {
 73 |   let init =
 74 |     State(src: glearray.from_list(src), idx: 0, pos: Span(1, 1, 1, 1), ctx: [])
 75 | 
 76 |   case runwrap(init, parser) {
 77 |     Cont(_, a, _) -> Ok(a)
 78 |     Fail(_, bag) -> Error(to_deadends(bag, []))
 79 |   }
 80 | }
 81 | 
 82 | fn runwrap(
 83 |   state: State(tok, ctx),
 84 |   parser: Parser(a, tok, ctx),
 85 | ) -> Step(a, tok, ctx) {
 86 |   let Parser(parse) = parser
 87 |   parse(state)
 88 | }
 89 | 
 90 | fn next(state: State(tok, ctx)) -> #(Option(tok), State(tok, ctx)) {
 91 |   case glearray.get(state.src, state.idx) {
 92 |     Error(_) -> #(option.None, state)
 93 |     Ok(Token(span, _, tok)) -> #(
 94 |       option.Some(tok),
 95 |       State(..state, idx: state.idx + 1, pos: span),
 96 |     )
 97 |   }
 98 | }
 99 | 
100 | // CONSTRUCTORS ----------------------------------------------------------------
101 | 
102 | /// The simplest kind of parser. [`return`](#return) consumes no tokens and always
103 | /// produces the given value. Sometimes called [`succeed`](#succeed) instead.
104 | ///
105 | /// This function might seem useless at first, but it is very useful when used in
106 | /// combination with [`do`](#do) or [`then`](#then).
107 | ///
108 | /// ```gleam
109 | /// import nibble.{do, return}
110 | ///
111 | /// fn unit8_parser() {
112 | ///   use int <- do(int_parser())
113 | ///
114 | ///   case int >= 0, int <= 255 {
115 | ///     True, True ->
116 | ///       return(int)
117 | ///
118 | ///     False, _ ->
119 | ///       throw("Expected an int >= 0")
120 | ///
121 | ///     _, False ->
122 | ///       throw("Expected an int <= 255")
123 | ///  }
124 | /// }
125 | /// ```
126 | ///
127 | /// 💡 [`return`](#return`) and [`succeed`](#succeed) are names for the same thing.
128 | /// We suggesting using `return` unqualified when using `do` and Gleam's `use`
129 | /// syntax, and `nibble.succeed` in a pipeline with `nibble.then`.
130 | pub fn return(value: a) -> Parser(a, tok, ctx) {
131 |   use state <- Parser
132 | 
133 |   Cont(CanBacktrack(False), value, state)
134 | }
135 | 
136 | /// The simplest kind of parser. [`succeed`](#succeed) consumes no tokens and always
137 | /// produces the given value. Sometimes called [`return`](#return) instead.
138 | ///
139 | /// This function might seem useless at first, but it is very useful when used in
140 | /// combination with [`do`](#do) or [`then`](#then).
141 | ///
142 | /// ```gleam
143 | /// import nibble
144 | ///
145 | /// fn unit8_parser() {
146 | ///   int_parser()
147 | ///   |> nibble.then(fn(int) {
148 | ///     case int >= 0, int <= 255 {
149 | ///       True, True -> succeed(int)
150 | ///       False, _ -> fail("Expected an int >= 0")
151 | ///       _, False -> fail("Expected an int <= 255")
152 | ///     }
153 | ///   })
154 | /// }
155 | /// ```
156 | ///
157 | /// 💡 [`succeed`](#succeed) and [`return`](#return) are names for the same thing.
158 | /// We suggest using `succeed` in a pipeline with `nibble.then`, and `return`
159 | /// unqalified when using `do` with Gleam's `use` syntax.
160 | ///
161 | pub fn succeed(value: a) -> Parser(a, tok, ctx) {
162 |   return(value)
163 | }
164 | 
165 | /// The opposite of [`return`](#return), this parser always fails with the given
166 | /// message. Sometimes called [`fail`](#fail) instead.
167 | ///
168 | pub fn throw(message: String) -> Parser(a, tok, ctx) {
169 |   use state <- Parser
170 |   let error = Custom(message)
171 |   let bag = bag_from_state(state, error)
172 | 
173 |   Fail(CanBacktrack(False), bag)
174 | }
175 | 
176 | /// Create a parser that consumes no tokens and always fails with the given
177 | /// error message.
178 | ///
179 | pub fn fail(message: String) -> Parser(a, tok, ctx) {
180 |   throw(message)
181 | }
182 | 
183 | /// Defer the creation of a parser until it is needed. This is often most useful
184 | /// when creating a parser that is recursive and is *not* a function.
185 | ///
186 | pub fn lazy(parser: fn() -> Parser(a, tok, ctx)) -> Parser(a, tok, ctx) {
187 |   use state <- Parser
188 | 
189 |   runwrap(state, parser())
190 | }
191 | 
192 | // BACKTRACKING ----------------------------------------------------------------
193 | 
194 | /// By default, parsers will not backtrack if they fail after consuming at least
195 | /// one token. Passing a parser to `backtrackable` will change this behaviour and
196 | /// allows us to jump back to the state of the parser before it consumed any input
197 | /// and try another one.
198 | ///
199 | /// This is most useful when you want to quickly try a few different parsers using
200 | /// [`one_of`](#one_of).
201 | ///
202 | /// 🚨 Backtracing parsers can drastically reduce performance, so you should avoid
203 | /// them where possible. A common reason folks reach for backtracking is when they
204 | /// want to try multiple branches that start with the same token or same sequence
205 | /// of tokens.
206 | ///
207 | /// To avoid backtracking in these cases, you can create an intermediate parser
208 | /// that consumes the common tokens _and then_ use [`one_of`](#one_of) to try
209 | /// the different branches.
210 | ///
211 | pub fn backtrackable(parser: Parser(a, tok, ctx)) -> Parser(a, tok, ctx) {
212 |   use state <- Parser
213 | 
214 |   case runwrap(state, parser) {
215 |     Cont(_, a, state) -> Cont(CanBacktrack(False), a, state)
216 |     Fail(_, bag) -> Fail(CanBacktrack(False), bag)
217 |   }
218 | }
219 | 
220 | fn should_commit(a: CanBacktrack, or b: CanBacktrack) -> CanBacktrack {
221 |   let CanBacktrack(a) = a
222 |   let CanBacktrack(b) = b
223 | 
224 |   CanBacktrack(a || b)
225 | }
226 | 
227 | // MANIPULATING PARSERS --------------------------------------------------------
228 | 
229 | ///
230 | ///
231 | pub fn do(
232 |   parser: Parser(a, tok, ctx),
233 |   f: fn(a) -> Parser(b, tok, ctx),
234 | ) -> Parser(b, tok, ctx) {
235 |   use state <- Parser
236 | 
237 |   case runwrap(state, parser) {
238 |     Cont(to_a, a, state) ->
239 |       case runwrap(state, f(a)) {
240 |         Cont(to_b, b, state) -> Cont(should_commit(to_a, or: to_b), b, state)
241 |         Fail(to_b, bag) -> Fail(should_commit(to_a, or: to_b), bag)
242 |       }
243 |     Fail(can_backtrack, bag) -> Fail(can_backtrack, bag)
244 |   }
245 | }
246 | 
247 | ///
248 | ///
249 | pub fn do_in(
250 |   context: ctx,
251 |   parser: Parser(a, tok, ctx),
252 |   f: fn(a) -> Parser(b, tok, ctx),
253 | ) -> Parser(b, tok, ctx) {
254 |   do(parser, f)
255 |   |> in(context)
256 | }
257 | 
258 | ///
259 | ///
260 | pub fn then(
261 |   parser: Parser(a, tok, ctx),
262 |   f: fn(a) -> Parser(b, tok, ctx),
263 | ) -> Parser(b, tok, ctx) {
264 |   do(parser, f)
265 | }
266 | 
267 | ///
268 | ///
269 | pub fn map(parser: Parser(a, tok, ctx), f: fn(a) -> b) -> Parser(b, tok, ctx) {
270 |   use a <- do(parser)
271 | 
272 |   return(f(a))
273 | }
274 | 
275 | ///
276 | ///
277 | pub fn replace(parser: Parser(a, tok, ctx), with b: b) -> Parser(b, tok, ctx) {
278 |   map(parser, fn(_) { b })
279 | }
280 | 
281 | // PARSER STATE ----------------------------------------------------------------
282 | 
283 | /// A parser that returns the current token position.
284 | ///
285 | pub fn span() -> Parser(Span, tok, ctx) {
286 |   use state <- Parser
287 | 
288 |   Cont(CanBacktrack(False), state.pos, state)
289 | }
290 | 
291 | // SIMPLE PARSERS --------------------------------------------------------------
292 | 
293 | /// Returns the next token in the input stream. Fails if there are no more
294 | /// tokens.
295 | pub fn any() -> Parser(tok, tok, ctx) {
296 |   take_if("a single token", fn(_) { True })
297 | }
298 | 
299 | /// Returns nil if the token `tok` is the next token in the input stream. Fails
300 | /// if the next token is not `tok` or if the input stream is empty.
301 | pub fn token(tok: tok) -> Parser(Nil, tok, ctx) {
302 |   use state <- Parser
303 | 
304 |   case next(state) {
305 |     #(option.Some(t), state) if tok == t -> Cont(CanBacktrack(True), Nil, state)
306 |     #(option.Some(t), state) ->
307 |       Fail(
308 |         CanBacktrack(False),
309 |         bag_from_state(state, Expected(string.inspect(tok), t)),
310 |       )
311 |     #(option.None, state) ->
312 |       Fail(CanBacktrack(False), bag_from_state(state, EndOfInput))
313 |   }
314 | }
315 | 
316 | /// Succeeds if the input stream is empty, fails otherwise. This is useful to
317 | /// verify that you've consumed all the tokens in the input stream.
318 | ///
319 | pub fn eof() -> Parser(Nil, tok, ctx) {
320 |   use state <- Parser
321 | 
322 |   case next(state) {
323 |     #(option.Some(tok), state) ->
324 |       Fail(CanBacktrack(False), bag_from_state(state, Unexpected(tok)))
325 |     #(option.None, _) -> Cont(CanBacktrack(False), Nil, state)
326 |   }
327 | }
328 | 
329 | // BRANCHING AND LOOPING -------------------------------------------------------
330 | 
331 | /// Try the given parsers in order until one succeeds. If all fail, the parser
332 | /// fails.
333 | pub fn one_of(parsers: List(Parser(a, tok, ctx))) -> Parser(a, tok, ctx) {
334 |   use state <- Parser
335 |   let init = Fail(CanBacktrack(False), Empty)
336 | 
337 |   use result, next <- list.fold_until(parsers, init)
338 | 
339 |   case result {
340 |     Cont(_, _, _) -> list.Stop(result)
341 |     Fail(CanBacktrack(True), _) -> list.Stop(result)
342 |     Fail(_, bag) ->
343 |       runwrap(state, next)
344 |       |> add_bag_to_step(bag)
345 |       |> list.Continue
346 |   }
347 | }
348 | 
349 | ///
350 | /// Consumes a sequence of tokens using the given parser, separated by the
351 | /// given `separator` parser. Returns a list of the parsed values, ignoring
352 | /// the results of the `separator` parser.
353 | ///
354 | pub fn sequence(
355 |   parser: Parser(a, tok, ctx),
356 |   separator sep: Parser(x, tok, ctx),
357 | ) -> Parser(List(a), tok, ctx) {
358 |   one_of([
359 |     parser
360 |       |> then(more(_, parser, sep)),
361 |     return([]),
362 |   ])
363 | }
364 | 
365 | ///
366 | /// Returns consecutive applications of the given parser. If you are parsing
367 | /// values with a separator, use [`sequence`](#sequence) instead.
368 | ///
369 | /// 💡 This parser can succeed without consuming any input. You can end up with
370 | /// an infinite loop if you're not careful. Use [`many1`](#many1) if you want
371 | /// to guarantee you take at least one token.
372 | ///
373 | pub fn many(parser: Parser(a, tok, ctx)) -> Parser(List(a), tok, ctx) {
374 |   sequence(parser, return(Nil))
375 | }
376 | 
377 | ///
378 | /// This is the same as [`many1`](#many1), but is guaranteed to return at least
379 | /// one value.
380 | ///
381 | pub fn many1(parser: Parser(a, tok, ctx)) -> Parser(List(a), tok, ctx) {
382 |   use x <- do(parser)
383 |   use xs <- do(many(parser))
384 | 
385 |   return([x, ..xs])
386 | }
387 | 
388 | fn more(
389 |   x: a,
390 |   parser: Parser(a, tok, ctx),
391 |   separator: Parser(x, tok, ctx),
392 | ) -> Parser(List(a), tok, ctx) {
393 |   use xs <- loop([x])
394 |   // `break` is lazy so we don't reverse `xs` every iteration if we don't need
395 |   // to.
396 |   let break = fn() { return(Break(list.reverse(xs))) }
397 |   let continue = {
398 |     use _ <- do(separator)
399 |     use x <- do(parser)
400 | 
401 |     return(Continue([x, ..xs]))
402 |   }
403 | 
404 |   one_of([continue, lazy(break)])
405 | }
406 | 
407 | ///
408 | ///
409 | pub type Loop(a, state) {
410 |   Continue(state)
411 |   Break(a)
412 | }
413 | 
414 | ///
415 | ///
416 | pub fn loop(
417 |   init: state,
418 |   step: fn(state) -> Parser(Loop(a, state), tok, ctx),
419 | ) -> Parser(a, tok, ctx) {
420 |   use state <- Parser
421 | 
422 |   loop_help(step, CanBacktrack(False), init, state)
423 | }
424 | 
425 | fn loop_help(f, commit, loop_state, state) {
426 |   case runwrap(state, f(loop_state)) {
427 |     Cont(can_backtrack, Continue(next_loop_state), next_state) ->
428 |       loop_help(
429 |         f,
430 |         should_commit(commit, can_backtrack),
431 |         next_loop_state,
432 |         next_state,
433 |       )
434 |     Cont(can_backtrack, Break(result), next_state) ->
435 |       Cont(should_commit(commit, can_backtrack), result, next_state)
436 |     Fail(can_backtrack, bag) -> Fail(should_commit(commit, can_backtrack), bag)
437 |   }
438 | }
439 | 
440 | // PREDICATES ------------------------------------------------------------------
441 | 
442 | ///
443 | /// Fails if the given condition is false, otherwise returns `Nil`.
444 | ///
445 | pub fn guard(cond: Bool, expecting: String) -> Parser(Nil, tok, ctx) {
446 |   case cond {
447 |     True -> return(Nil)
448 |     False -> fail(expecting)
449 |   }
450 | }
451 | 
452 | ///
453 | /// Takes the next token off the stream if it satisfies the given predicate.
454 | ///
455 | pub fn take_if(
456 |   expecting: String,
457 |   predicate: fn(tok) -> Bool,
458 | ) -> Parser(tok, tok, ctx) {
459 |   use state <- Parser
460 |   let #(tok, next_state) = next(state)
461 | 
462 |   case tok, option.map(tok, predicate) {
463 |     Some(tok), Some(True) -> Cont(CanBacktrack(False), tok, next_state)
464 |     Some(tok), Some(False) ->
465 |       Fail(
466 |         CanBacktrack(False),
467 |         bag_from_state(next_state, Expected(expecting, got: tok)),
468 |       )
469 |     _, _ -> Fail(CanBacktrack(False), bag_from_state(next_state, EndOfInput))
470 |   }
471 | }
472 | 
473 | ///
474 | /// Take tokens from the stream while the given predicate is satisfied.
475 | ///
476 | /// 💡 This parser can succeed without consuming any input (if the predicate
477 | /// immediately fails). You can end up with an infinite loop if you're not
478 | /// careful. Use [`take_while1`](#take_while1) if you want to guarantee you
479 | /// take at least one token.
480 | ///
481 | pub fn take_while(predicate: fn(tok) -> Bool) -> Parser(List(tok), tok, ctx) {
482 |   use state <- Parser
483 |   let #(tok, next_state) = next(state)
484 | 
485 |   case tok, option.map(tok, predicate) {
486 |     Some(tok), Some(True) ->
487 |       runwrap(next_state, {
488 |         use toks <- do(take_while(predicate))
489 |         return([tok, ..toks])
490 |       })
491 |     Some(_), Some(False) -> Cont(CanBacktrack(False), [], state)
492 |     _, _ -> Cont(CanBacktrack(False), [], state)
493 |   }
494 | }
495 | 
496 | ///
497 | /// Take tokens from the stream while the given predicate is satisfied.
498 | ///
499 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty.
500 | /// Feel free to `let assert` the result!
501 | ///
502 | pub fn take_while1(
503 |   expecting: String,
504 |   predicate: fn(tok) -> Bool,
505 | ) -> Parser(List(tok), tok, ctx) {
506 |   use x <- do(take_if(expecting, predicate))
507 |   use xs <- do(take_while(predicate))
508 | 
509 |   return([x, ..xs])
510 | }
511 | 
512 | ///
513 | /// Take token from the stream until the given predicate is satisfied.
514 | ///
515 | /// 💡 This parser can succeed without consuming any input (if the predicate
516 | /// immediately succeeds). You can end up with an infinite loop if you're not
517 | /// careful. Use [`take_until1`](#take_until1) if you want to guarantee you
518 | /// take at least one token.
519 | ///
520 | pub fn take_until(predicate: fn(tok) -> Bool) -> Parser(List(tok), tok, ctx) {
521 |   take_while(fn(tok) { bool.negate(predicate(tok)) })
522 | }
523 | 
524 | ///
525 | /// Take token from the stream until the given predicate is satisfied.
526 | ///
527 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty.
528 | /// Feel free to `let assert` the result!
529 | ///
530 | pub fn take_until1(
531 |   expecting: String,
532 |   predicate: fn(tok) -> Bool,
533 | ) -> Parser(List(tok), tok, ctx) {
534 |   take_while1(expecting, fn(tok) { bool.negate(predicate(tok)) })
535 | }
536 | 
537 | ///
538 | /// Apply the parser up to `count` times, returning a list of the results.
539 | ///
540 | /// 💡 This parser can succeed without consuming any input (if the parser
541 | /// fails immediately) and return an empty list. You can end up with an
542 | /// infinite loop if you're not careful.
543 | ///
544 | pub fn take_up_to(
545 |   parser: Parser(a, tok, ctx),
546 |   count: Int,
547 | ) -> Parser(List(a), tok, ctx) {
548 |   case count {
549 |     0 -> return([])
550 |     _ ->
551 |       {
552 |         use x <- do(parser)
553 |         use xs <- do(take_up_to(parser, count - 1))
554 | 
555 |         return([x, ..xs])
556 |       }
557 |       |> or([])
558 |   }
559 | }
560 | 
561 | ///
562 | /// Apply the parser a minimum of `count` times, returning a list of the results.
563 | ///
564 | pub fn take_at_least(
565 |   parser: Parser(a, tok, ctx),
566 |   count: Int,
567 | ) -> Parser(List(a), tok, ctx) {
568 |   case count {
569 |     0 -> many(parser)
570 |     _ -> {
571 |       use x <- do(parser)
572 |       use xs <- do(take_at_least(parser, count - 1))
573 | 
574 |       return([x, ..xs])
575 |     }
576 |   }
577 | }
578 | 
579 | ///
580 | /// Take `count` consecutive tokens from the stream using the given parser.
581 | ///
582 | pub fn take_exactly(
583 |   parser: Parser(a, tok, ctx),
584 |   count: Int,
585 | ) -> Parser(List(a), tok, ctx) {
586 |   case count {
587 |     0 -> return([])
588 |     _ -> {
589 |       use x <- do(parser)
590 |       use xs <- do(take_exactly(parser, count - 1))
591 | 
592 |       return([x, ..xs])
593 |     }
594 |   }
595 | }
596 | 
597 | ///
598 | /// Try the given parser, but if it fails return the given default value instead
599 | /// of failing.
600 | ///
601 | pub fn or(parser: Parser(a, tok, ctx), default: a) -> Parser(a, tok, ctx) {
602 |   one_of([parser, return(default)])
603 | }
604 | 
605 | ///
606 | /// Try the given parser, but if it fails return
607 | /// [`None`](#https://hexdocs.pm/gleam_stdlib/gleam/option.html#Option) instead
608 | /// of failing.
609 | ///
610 | pub fn optional(parser: Parser(a, tok, ctx)) -> Parser(Option(a), tok, ctx) {
611 |   one_of([map(parser, Some), return(None)])
612 | }
613 | 
614 | ///
615 | /// Take the next token and attempt to transform it with the given function. This
616 | /// is useful when creating reusable primitive parsers for your own tokens such as
617 | /// `take_identifier` or `take_number`.
618 | ///
619 | pub fn take_map(
620 |   expecting: String,
621 |   f: fn(tok) -> Option(a),
622 | ) -> Parser(a, tok, ctx) {
623 |   use state <- Parser
624 |   let #(tok, next_state) = next(state)
625 | 
626 |   case tok, option.then(tok, f) {
627 |     None, _ -> Fail(CanBacktrack(False), bag_from_state(next_state, EndOfInput))
628 |     Some(tok), None ->
629 |       Fail(
630 |         CanBacktrack(False),
631 |         bag_from_state(next_state, Expected(expecting, got: tok)),
632 |       )
633 |     _, Some(a) -> Cont(CanBacktrack(False), a, next_state)
634 |   }
635 | }
636 | 
637 | ///
638 | /// Applies a function to consecutive tokens while the given function returns
639 | /// `Some`.
640 | ///
641 | /// 💡 This parser can succeed without consuming any input (if the predicate
642 | /// immediately succeeds). You can end up with an infinite loop if you're not
643 | /// careful. Use [`take_map_while1`](#take_map_while1) if you want to guarantee you
644 | /// take at least one token.
645 | ///
646 | pub fn take_map_while(f: fn(tok) -> Option(a)) -> Parser(List(a), tok, ctx) {
647 |   use state <- Parser
648 |   let #(tok, next_state) = next(state)
649 | 
650 |   case tok, option.then(tok, f) {
651 |     None, _ -> Cont(CanBacktrack(True), [], state)
652 |     Some(_), None -> Cont(CanBacktrack(True), [], state)
653 |     _, Some(x) ->
654 |       runwrap(
655 |         next_state,
656 |         take_map_while(f)
657 |           |> map(list.prepend(_, x)),
658 |       )
659 |   }
660 | }
661 | 
662 | /// Applies a function to consecutive tokens while the given function returns
663 | /// `Some`.
664 | ///
665 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty.
666 | /// Feel free to `let assert` the result!
667 | ///
668 | pub fn take_map_while1(
669 |   expecting: String,
670 |   f: fn(tok) -> Option(a),
671 | ) -> Parser(List(a), tok, ctx) {
672 |   use x <- do(take_map(expecting, f))
673 |   use xs <- do(take_map_while(f))
674 | 
675 |   return([x, ..xs])
676 | }
677 | 
678 | // ERRORS ----------------------------------------------------------------------
679 | 
680 | ///
681 | ///
682 | ///
683 | ///
684 | pub type Error(tok) {
685 |   BadParser(String)
686 |   Custom(String)
687 |   EndOfInput
688 |   Expected(String, got: tok)
689 |   Unexpected(tok)
690 | }
691 | 
692 | ///
693 | /// A dead end represents a the point where a parser that had committed down a
694 | /// path failed. It contains the position of the failure, the [`Error`](#Error)
695 | /// describing the failure, and the context stack for any parsers that had run.
696 | ///
697 | pub type DeadEnd(tok, ctx) {
698 |   DeadEnd(pos: Span, problem: Error(tok), context: List(#(Span, ctx)))
699 | }
700 | 
701 | type Bag(tok, ctx) {
702 |   Empty
703 |   Cons(Bag(tok, ctx), DeadEnd(tok, ctx))
704 |   Append(Bag(tok, ctx), Bag(tok, ctx))
705 | }
706 | 
707 | fn bag_from_state(state: State(tok, ctx), problem: Error(tok)) -> Bag(tok, ctx) {
708 |   Cons(Empty, DeadEnd(state.pos, problem, state.ctx))
709 | }
710 | 
711 | fn to_deadends(
712 |   bag: Bag(tok, ctx),
713 |   acc: List(DeadEnd(tok, ctx)),
714 | ) -> List(DeadEnd(tok, ctx)) {
715 |   case bag {
716 |     Empty -> acc
717 |     Cons(Empty, deadend) -> [deadend, ..acc]
718 |     Cons(bag, deadend) -> to_deadends(bag, [deadend, ..acc])
719 |     Append(left, right) -> to_deadends(left, to_deadends(right, acc))
720 |   }
721 | }
722 | 
723 | fn add_bag_to_step(
724 |   step: Step(a, tok, ctx),
725 |   left: Bag(tok, ctx),
726 | ) -> Step(a, tok, ctx) {
727 |   case step {
728 |     Cont(can_backtrack, a, state) -> Cont(can_backtrack, a, state)
729 |     Fail(can_backtrack, right) -> Fail(can_backtrack, Append(left, right))
730 |   }
731 | }
732 | 
733 | // CONTEXT ---------------------------------------------------------------------
734 | 
735 | ///
736 | ///
737 | pub fn in(parser: Parser(a, tok, ctx), context: ctx) -> Parser(a, tok, ctx) {
738 |   use state <- Parser
739 | 
740 |   case runwrap(push_context(state, context), parser) {
741 |     Cont(can_backtrack, a, state) -> Cont(can_backtrack, a, pop_context(state))
742 |     Fail(can_backtrack, bag) -> Fail(can_backtrack, bag)
743 |   }
744 | }
745 | 
746 | fn push_context(state: State(tok, ctx), context: ctx) -> State(tok, ctx) {
747 |   State(..state, ctx: [#(state.pos, context), ..state.ctx])
748 | }
749 | 
750 | fn pop_context(state: State(tok, ctx)) -> State(tok, ctx) {
751 |   case state.ctx {
752 |     [] -> state
753 |     [_, ..context] -> State(..state, ctx: context)
754 |   }
755 | }
756 | 
757 | ///
758 | /// Run the given parser and then inspect it's state.
759 | ///
760 | pub fn inspect(
761 |   parser: Parser(a, tok, ctx),
762 |   message: String,
763 | ) -> Parser(a, tok, ctx) {
764 |   use state <- Parser
765 |   io.println(message <> ": ")
766 | 
767 |   runwrap(state, parser)
768 |   |> io.debug
769 | }
770 | 


--------------------------------------------------------------------------------
/src/nibble/lexer.gleam:
--------------------------------------------------------------------------------
  1 | //// Nibble takes a different approach to many other parser combinator libraries
  2 | //// by also providing a _lexer_ combinator module that you use to turn an input
  3 | //// string into a list of tokens.
  4 | ////
  5 | //// Parser combinators are a powerful and flexible way to build parsers, but
  6 | //// they offer come at a performance cost compared to hand-written parsers or
  7 | //// parser generators. On the other hand, writing a lexer by hand can be a bit
  8 | //// tedious and difficult. Nibble aims to provide a happy middle-ground by making
  9 | //// it easy to produce OK-performing lexers and then use parser combinators that
 10 | //// can be much faster working on the smaller token stream.
 11 | ////
 12 | //// To see how Nibble's lexer works, let's consider the example from the
 13 | //// [introduction guide](#):
 14 | ////
 15 | //// ```gleam
 16 | //// fn lexer() {
 17 | ////   lexer.simple([
 18 | ////     lexer.token("hello", Hello),
 19 | ////     lexer.variable("[a-zA-Z]", "\w", Name),
 20 | ////     lexer.whitespace(Nil)
 21 | ////     |> lexer.ignore
 22 | ////   ])
 23 | //// }
 24 | //// ```
 25 | ////
 26 | //// We have three _matchers_ here. One for the exact token "hello", one for at
 27 | //// least one letter followed by any number of word characters, and one for any
 28 | //// amount of whitespace.
 29 | ////
 30 | //// To see how these matchers work we'll look at the input string `"Hello Joe"`.
 31 | //// Nibble looks at the input string one grapheme at a time, and runs each of
 32 | //// the matchers in order. At the same time, it accumulates a list of the tokens
 33 | //// it has produced so far.
 34 | ////
 35 | //// ```
 36 | //// Tokens : []
 37 | //// Input  : Hello Joe
 38 | ////          ^
 39 | //// ```
 40 | ////
 41 | //// If no matcher matches the input, Nibble will store the current grapheme and
 42 | //// move on to the next one:
 43 | ////
 44 | //// ```
 45 | //// Tokens : []
 46 | //// Input  : Hello Joe
 47 | ////          -^
 48 | //// ```
 49 | ////
 50 | //// This continues until a matcher _does_ match the input:
 51 | ////
 52 | //// ```
 53 | //// Tokens : []
 54 | //// Input  : Hello Joe
 55 | ////          ----^
 56 | //// ```
 57 | ////
 58 | //// The accumulated string (known as a _lexeme_) is consumed, and whatever token
 59 | //// value the matcher produces is added to the list of tokens:
 60 | ////
 61 | //// ```
 62 | //// Tokens : [Hello]
 63 | //// Input  :  Joe
 64 | ////          ^
 65 | //// ```
 66 | ////
 67 | //// Here we have some whitespace that would be matched by `lexer.whitespace`, by
 68 | //// we passed that matcher to the [`lexer.ignore`](#ignore) comabinator. The matcher
 69 | //// will still consume the input, but this time it will not produce a new token
 70 | //// value:
 71 | ////
 72 | //// ```
 73 | //// Tokens : [Hello]
 74 | //// Input  : Joe
 75 | ////          ^
 76 | //// ```
 77 | ////
 78 | //// As we expect, the lexer continues on accumulating input. When it reaches the
 79 | //// end of the input string with a value it checks all the matches one last time
 80 | //// to see if they will produce a final token. In this case the `lexer.variable`
 81 | //// matcher will match the accumulated "Joe" and we're left with the following:
 82 | ////
 83 | //// ```
 84 | //// Tokens : [Hello, Name("Joe")]
 85 | //// Input  :
 86 | //// ```
 87 | ////
 88 | 
 89 | // IMPORTS ---------------------------------------------------------------------
 90 | 
 91 | import gleam/float
 92 | import gleam/int
 93 | import gleam/list
 94 | import gleam/regexp
 95 | import gleam/result
 96 | import gleam/set.{type Set}
 97 | import gleam/string
 98 | 
 99 | // TYPES -----------------------------------------------------------------------
100 | 
101 | /// A `Matcher` is how we define the rules that match parts of the input string
102 | /// and turn them into tokens. At it's core, a `Match` is a function that takes
103 | /// three arguments:
104 | ///
105 | /// - The current mode of the lexer
106 | ///
107 | /// - Any input we've accumulated so far
108 | ///
109 | /// - A lookahead of one grapheme
110 | ///
111 | /// With just these three arguments we can define arbitrary rules for consuming
112 | /// (or not) input and producing tokens!
113 | ///
114 | pub opaque type Matcher(a, mode) {
115 |   Matcher(run: fn(mode, String, String) -> Match(a, mode))
116 | }
117 | 
118 | /// When writing a custom matcher, a `Match` is what you return to tell the lexer
119 | /// what to do next.
120 | ///
121 | pub type Match(a, mode) {
122 |   /// Consume the accumulated input and produce a token with the given value. A
123 |   /// `Keep` match can also transition the lexer into a new mode.
124 |   Keep(a, mode)
125 |   /// Skip running any additional matchers this iteration, add the next grapheme
126 |   /// to the accumulated input, and run the next iteration.
127 |   Skip
128 |   /// Drop the accumulated input and move on to the next iteration. A `Drop`
129 |   /// match can also transition the lexer into a new mode. This match is useful
130 |   /// for discarding input like whitespace or comments.
131 |   Drop(mode)
132 |   /// The matcher did not match the input, so the lexer should try the next
133 |   /// matcher in the list (or fail if there are no more matchers).
134 |   NoMatch
135 | }
136 | 
137 | /// You use Nibble's lexer to turn a string into a list of tokens that your parser
138 | /// will eventually consume. The `Token` type contains the lexeme that was consumed
139 | /// (aka the raw input string), the source [`Span`](#Span) of the consumed lexeme
140 | /// to locate it in the source, and whatever token value your lexer produces.
141 | ///
142 | pub type Token(a) {
143 |   Token(span: Span, lexeme: String, value: a)
144 | }
145 | 
146 | /// A source span is a range into the source string that represents the start and
147 | /// end of a lexeme in a human-readable way. That means instead of a straight index
148 | /// into the string you get a row and column for the start and end instead!
149 | ///
150 | pub type Span {
151 |   Span(row_start: Int, col_start: Int, row_end: Int, col_end: Int)
152 | }
153 | 
154 | ///
155 | ///
156 | pub type Error {
157 |   NoMatchFound(row: Int, col: Int, lexeme: String)
158 | }
159 | 
160 | ///
161 | ///
162 | pub opaque type Lexer(a, mode) {
163 |   Lexer(matchers: fn(mode) -> List(Matcher(a, mode)))
164 | }
165 | 
166 | type State(a) {
167 |   State(
168 |     source: List(String),
169 |     tokens: List(Token(a)),
170 |     current: #(Int, Int, String),
171 |     row: Int,
172 |     col: Int,
173 |   )
174 | }
175 | 
176 | // LEXER CONSTRUCTORS ----------------------------------------------------------
177 | 
178 | ///
179 | ///
180 | pub fn simple(matchers: List(Matcher(a, Nil))) -> Lexer(a, Nil) {
181 |   Lexer(fn(_) { matchers })
182 | }
183 | 
184 | /// An `advanced` lexer is one that can change what matchers it uses based on the
185 | /// current mode. This is useful for sophisticated lexers that might need to
186 | /// handle things like interpolated strings or indentation-sensitive syntax.
187 | ///
188 | pub fn advanced(matchers: fn(mode) -> List(Matcher(a, mode))) -> Lexer(a, mode) {
189 |   Lexer(fn(mode) { matchers(mode) })
190 | }
191 | 
192 | // MATCHER CONSTRUCTORS --------------------------------------------------------
193 | 
194 | /// Create a custom [`Matcher`](#Matcher) that will consume the input and produce
195 | /// a token with the given value if it is `Ok` or return a `NoMatch` if it fails.
196 | /// The first parameter is a function that takes the current lexeme and the
197 | /// second parameter is a one-grapheme lookahead.
198 | ///
199 | /// Matchers created with this convenience function cannot change the lexer's
200 | /// mode or skip ahead to the next iteration without consuming the input.
201 | ///
202 | pub fn keep(f: fn(String, String) -> Result(a, Nil)) -> Matcher(a, mode) {
203 |   use mode, lexeme, lookahead <- Matcher
204 | 
205 |   f(lexeme, lookahead)
206 |   |> result.map(Keep(_, mode))
207 |   |> result.unwrap(NoMatch)
208 | }
209 | 
210 | /// Create a custom [`Matcher`](#Matcher) that will consume the input and move
211 | /// to the next iteration without producing a token if it is `True` or return a
212 | /// `NoMatch` if it fails. The first parameter is a function that takes the
213 | /// current lexeme and the second parameter is a one-grapheme lookahead.
214 | ///
215 | /// Matchers created with this convenience function cannot change the lexer's
216 | /// mode or skip ahead to the next iteration without consuming the input.
217 | ///
218 | pub fn drop(f: fn(String, String) -> Bool) -> Matcher(a, mode) {
219 |   use mode, lexeme, lookahead <- Matcher
220 | 
221 |   case f(lexeme, lookahead) {
222 |     True -> Drop(mode)
223 |     False -> NoMatch
224 |   }
225 | }
226 | 
227 | /// Create a custom [`Matcher`](#Matcher) that is flexible enough to do anything
228 | /// you want! The first parameter is a function that takes the current lexer mode,
229 | /// the current lexeme, and a one-grapheme lookahead.
230 | ///
231 | /// The function returns a [`Match`](#Match) that tells the lexer what to do next.
232 | ///
233 | pub fn custom(f: fn(mode, String, String) -> Match(a, mode)) -> Matcher(a, mode) {
234 |   Matcher(f)
235 | }
236 | 
237 | /// Take an existing matcher and transform it by applying a function to the value
238 | /// it produces.
239 | ///
240 | pub fn map(matcher: Matcher(a, mode), f: fn(a) -> b) -> Matcher(b, mode) {
241 |   use mode, lexeme, lookahead <- Matcher
242 | 
243 |   case matcher.run(mode, lexeme, lookahead) {
244 |     Keep(value, mode) -> Keep(f(value), mode)
245 |     Skip -> Skip
246 |     Drop(mode) -> Drop(mode)
247 |     NoMatch -> NoMatch
248 |   }
249 | }
250 | 
251 | /// Take an existing matcher and transform it by applying a function to the value
252 | /// it producs. The function you provide can return a different [`Match`](#Match)
253 | /// so you can, for example, take a matcher that `Keep`s a value and turn it into
254 | /// a matcher that `Drop`s the value instead. This is out [`ignore`](#ignore) works!
255 | ///
256 | pub fn then(
257 |   matcher: Matcher(a, mode),
258 |   f: fn(a) -> Match(b, mode),
259 | ) -> Matcher(b, mode) {
260 |   use mode, lexeme, lookahead <- Matcher
261 | 
262 |   case matcher.run(mode, lexeme, lookahead) {
263 |     Keep(value, _) -> f(value)
264 |     Skip -> Skip
265 |     Drop(mode) -> Drop(mode)
266 |     NoMatch -> NoMatch
267 |   }
268 | }
269 | 
270 | /// Take an existing matcher and transition to a new mode. This only runs if
271 | /// the matcher is successful and either `Keep`s or `Drop`s a value.
272 | ///
273 | ///
274 | pub fn into(matcher: Matcher(a, mode), f: fn(mode) -> mode) -> Matcher(a, mode) {
275 |   use mode, lexeme, lookahead <- Matcher
276 | 
277 |   case matcher.run(mode, lexeme, lookahead) {
278 |     Keep(value, mode) -> Keep(value, f(mode))
279 |     Skip -> Skip
280 |     Drop(mode) -> Drop(f(mode))
281 |     NoMatch -> NoMatch
282 |   }
283 | }
284 | 
285 | /// Take a matcher than might `Keep` anything and silently `Drop` anything it
286 | /// produces instead. This is useful for things like whitespace or comments
287 | /// where you want to consume some input but you don't want to emit a token.
288 | ///
289 | pub fn ignore(matcher: Matcher(a, mode)) -> Matcher(b, mode) {
290 |   use mode, lexeme, lookahead <- Matcher
291 | 
292 |   case matcher.run(mode, lexeme, lookahead) {
293 |     Keep(_, mode) -> Drop(mode)
294 |     Skip -> Skip
295 |     Drop(mode) -> Drop(mode)
296 |     NoMatch -> NoMatch
297 |   }
298 | }
299 | 
300 | // COMMON MATCHERS -------------------------------------------------------------
301 | 
302 | /// Match exactly the given string with no lookahead and produce the given value.
303 | ///
304 | pub fn token(str: String, value: a) -> Matcher(a, mode) {
305 |   use mode, lexeme, _ <- Matcher
306 | 
307 |   case lexeme == str {
308 |     True -> Keep(value, mode)
309 |     False -> NoMatch
310 |   }
311 | }
312 | 
313 | /// Match exactly the given string only when the lookahead is matched by the given
314 | /// breaker _regex_. This is an alias of [`keyword`](#keyword) but it can be
315 | /// helpful to separate the two concepts.
316 | ///
317 | pub fn symbol(str: String, breaker: String, value: a) -> Matcher(a, mode) {
318 |   let assert Ok(break) = regexp.from_string(breaker)
319 | 
320 |   use mode, lexeme, lookahead <- Matcher
321 | 
322 |   case lexeme == str && { lookahead == "" || regexp.check(break, lookahead) } {
323 |     True -> Keep(value, mode)
324 |     False -> NoMatch
325 |   }
326 | }
327 | 
328 | /// Match exactly the given string only when the lookahead is matched by the given
329 | /// breaker _regex_. Keywords are exact strings like `let` but you wouldn't want
330 | /// to lex `letter` as `[Let, Var("tter")]` so the breaker is used so you can say
331 | /// what characters should trigger a match.
332 | ///
333 | pub fn keyword(str: String, breaker: String, value: a) -> Matcher(a, mode) {
334 |   let assert Ok(break) = regexp.from_string(breaker)
335 | 
336 |   use mode, lexeme, lookahead <- Matcher
337 | 
338 |   case lexeme == str && { lookahead == "" || regexp.check(break, lookahead) } {
339 |     True -> Keep(value, mode)
340 |     False -> NoMatch
341 |   }
342 | }
343 | 
344 | ///
345 | ///
346 | pub fn int(to_value: fn(Int) -> a) -> Matcher(a, mode) {
347 |   int_with_separator("", to_value)
348 | }
349 | 
350 | ///
351 | ///
352 | pub fn int_with_separator(
353 |   separator: String,
354 |   to_value: fn(Int) -> a,
355 | ) -> Matcher(a, mode) {
356 |   let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]")
357 |   let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$")
358 | 
359 |   use mode, lexeme, lookahead <- Matcher
360 | 
361 |   case !regexp.check(digit, lookahead) && regexp.check(integer, lexeme) {
362 |     False -> NoMatch
363 |     True -> {
364 |       let assert Ok(num) =
365 |         lexeme
366 |         |> string.replace(separator, "")
367 |         |> int.parse
368 |       Keep(to_value(num), mode)
369 |     }
370 |   }
371 | }
372 | 
373 | ///
374 | ///
375 | pub fn float(to_value: fn(Float) -> a) -> Matcher(a, mode) {
376 |   float_with_separator("", to_value)
377 | }
378 | 
379 | ///
380 | ///
381 | pub fn float_with_separator(
382 |   separator: String,
383 |   to_value: fn(Float) -> a,
384 | ) -> Matcher(a, mode) {
385 |   let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]")
386 |   let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$")
387 |   let assert Ok(number) =
388 |     regexp.from_string(
389 |       "^-*[0-9" <> separator <> "]+\\.[0-9" <> separator <> "]+$",
390 |     )
391 | 
392 |   use mode, lexeme, lookahead <- Matcher
393 |   let is_int = !regexp.check(digit, lookahead) && regexp.check(integer, lexeme)
394 |   let is_float = !regexp.check(digit, lookahead) && regexp.check(number, lexeme)
395 | 
396 |   case lexeme {
397 |     "." if is_int -> NoMatch
398 | 
399 |     _ if is_float -> {
400 |       let assert Ok(num) =
401 |         lexeme
402 |         |> string.replace(separator, "")
403 |         |> float.parse
404 |       Keep(to_value(num), mode)
405 |     }
406 | 
407 |     _ -> NoMatch
408 |   }
409 | }
410 | 
411 | pub fn number(
412 |   from_int: fn(Int) -> a,
413 |   from_float: fn(Float) -> a,
414 | ) -> Matcher(a, mode) {
415 |   number_with_separator("", from_int, from_float)
416 | }
417 | 
418 | pub fn number_with_separator(
419 |   separator: String,
420 |   from_int: fn(Int) -> a,
421 |   from_float: fn(Float) -> a,
422 | ) -> Matcher(a, mode) {
423 |   let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]")
424 |   let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$")
425 |   let assert Ok(number) =
426 |     regexp.from_string(
427 |       "^-*[0-9" <> separator <> "]+\\.[0-9" <> separator <> "]+$",
428 |     )
429 | 
430 |   use mode, lexeme, lookahead <- Matcher
431 |   let is_int = !regexp.check(digit, lookahead) && regexp.check(integer, lexeme)
432 |   let is_float = !regexp.check(digit, lookahead) && regexp.check(number, lexeme)
433 | 
434 |   case lexeme, lookahead {
435 |     ".", _ if is_int -> NoMatch
436 |     _, "." if is_int -> NoMatch
437 | 
438 |     _, _ if is_int -> {
439 |       let assert Ok(num) =
440 |         lexeme
441 |         |> string.replace(separator, "")
442 |         |> int.parse
443 |       Keep(from_int(num), mode)
444 |     }
445 | 
446 |     _, _ if is_float -> {
447 |       let assert Ok(num) =
448 |         lexeme
449 |         |> string.replace(separator, "")
450 |         |> float.parse
451 |       Keep(from_float(num), mode)
452 |     }
453 | 
454 |     _, _ -> NoMatch
455 |   }
456 | }
457 | 
458 | ///
459 | ///
460 | pub fn string(char: String, to_value: fn(String) -> a) -> Matcher(a, mode) {
461 |   let assert Ok(is_string) =
462 |     regexp.from_string(
463 |       "^" <> char <> "([^" <> char <> "\\\\]|\\\\[\\s\\S])*" <> char <> "$",
464 |     )
465 |   use mode, lexeme, _ <- Matcher
466 | 
467 |   case regexp.check(is_string, lexeme) {
468 |     True ->
469 |       lexeme
470 |       |> string.drop_start(1)
471 |       |> string.drop_end(1)
472 |       |> to_value
473 |       |> Keep(mode)
474 |     False -> NoMatch
475 |   }
476 | }
477 | 
478 | ///
479 | ///
480 | pub fn identifier(
481 |   start: String,
482 |   inner: String,
483 |   reserved: Set(String),
484 |   to_value: fn(String) -> a,
485 | ) -> Matcher(a, mode) {
486 |   let assert Ok(ident) = regexp.from_string("^" <> start <> inner <> "*$")
487 |   let assert Ok(inner) = regexp.from_string(inner)
488 | 
489 |   use mode, lexeme, lookahead <- Matcher
490 | 
491 |   case regexp.check(inner, lookahead), regexp.check(ident, lexeme) {
492 |     True, True -> Skip
493 |     False, True ->
494 |       case set.contains(reserved, lexeme) {
495 |         True -> NoMatch
496 |         False -> Keep(to_value(lexeme), mode)
497 |       }
498 |     _, _ -> NoMatch
499 |   }
500 | }
501 | 
502 | ///
503 | ///
504 | pub fn try_identifier(
505 |   start: String,
506 |   inner: String,
507 |   reserved: Set(String),
508 |   to_value: fn(String) -> a,
509 | ) -> Result(Matcher(a, mode), regexp.CompileError) {
510 |   use ident <- result.then(regexp.from_string("^" <> start <> inner <> "*$"))
511 |   use inner <- result.map(regexp.from_string(inner))
512 | 
513 |   use mode, lexeme, lookahead <- Matcher
514 | 
515 |   case regexp.check(inner, lookahead), regexp.check(ident, lexeme) {
516 |     True, True -> Skip
517 |     False, True ->
518 |       case set.contains(reserved, lexeme) {
519 |         True -> NoMatch
520 |         False -> Keep(to_value(lexeme), mode)
521 |       }
522 |     _, _ -> NoMatch
523 |   }
524 | }
525 | 
526 | ///
527 | ///
528 | pub fn variable(
529 |   reserved: Set(String),
530 |   to_value: fn(String) -> a,
531 | ) -> Matcher(a, mode) {
532 |   identifier("[a-z]", "[a-zA-Z0-9_]", reserved, to_value)
533 | }
534 | 
535 | ///
536 | ///
537 | pub fn spaces(token: a) -> Matcher(a, mode) {
538 |   spaces_(fn(_) { token })
539 | }
540 | 
541 | ///
542 | ///
543 | pub fn spaces_(to_value: fn(String) -> a) -> Matcher(a, mode) {
544 |   let assert Ok(spaces) = regexp.from_string("^[ \\t]+")
545 | 
546 |   use mode, lexeme, _ <- Matcher
547 | 
548 |   case regexp.check(spaces, lexeme) {
549 |     True -> Keep(to_value(lexeme), mode)
550 |     False -> NoMatch
551 |   }
552 | }
553 | 
554 | ///
555 | ///
556 | pub fn whitespace(token: a) -> Matcher(a, mode) {
557 |   let assert Ok(whitespace) = regexp.from_string("^\\s+$")
558 | 
559 |   use mode, lexeme, _ <- Matcher
560 | 
561 |   case regexp.check(whitespace, lexeme) {
562 |     True -> Keep(token, mode)
563 |     False -> NoMatch
564 |   }
565 | }
566 | 
567 | ///
568 | pub fn comment(start: String, to_value: fn(String) -> a) -> Matcher(a, mode) {
569 |   let drop_length = string.length(start)
570 |   use mode, lexeme, lookahead <- Matcher
571 | 
572 |   case string.starts_with(lexeme, start), lookahead {
573 |     True, "\n" ->
574 |       lexeme
575 |       |> string.drop_start(drop_length)
576 |       |> to_value
577 |       |> Keep(mode)
578 |     True, _ -> Skip
579 |     False, _ -> NoMatch
580 |   }
581 | }
582 | 
583 | // RUNNING A LEXER -------------------------------------------------------------
584 | 
585 | ///
586 | ///
587 | pub fn run(
588 |   source: String,
589 |   lexer: Lexer(a, Nil),
590 | ) -> Result(List(Token(a)), Error) {
591 |   string.to_graphemes(source)
592 |   |> State([], #(1, 1, ""), 1, 1)
593 |   |> do_run(lexer, Nil, _)
594 | }
595 | 
596 | ///
597 | ///
598 | pub fn run_advanced(
599 |   source: String,
600 |   mode: mode,
601 |   lexer: Lexer(a, mode),
602 | ) -> Result(List(Token(a)), Error) {
603 |   do_run(lexer, mode, State(string.to_graphemes(source), [], #(1, 1, ""), 1, 1))
604 | }
605 | 
606 | fn do_run(
607 |   lexer: Lexer(a, mode),
608 |   mode: mode,
609 |   state: State(a),
610 | ) -> Result(List(Token(a)), Error) {
611 |   let matchers = lexer.matchers(mode)
612 | 
613 |   case state.source, state.current {
614 |     // If we're at the end of the source and there's no lexeme left to match,
615 |     // we're done!
616 |     //
617 |     // We have to remember to reverse the list of tokens because we've been building
618 |     // it backwards using `[token, ..state.tokens]`. This is much quicker than
619 |     // trying to prepend to the list as we go.
620 |     [], #(_, _, "") -> Ok(list.reverse(state.tokens))
621 | 
622 |     // If we're at the end of the source but there's still a lexeme left to match,
623 |     // we'll run the final `do_match` and return the result. If we get a `NoMatch`
624 |     // at this point something went wrong.
625 |     [], #(start_row, start_col, lexeme) ->
626 |       case do_match(mode, lexeme, "", matchers) {
627 |         NoMatch -> Error(NoMatchFound(start_row, start_col, lexeme))
628 |         Skip -> Error(NoMatchFound(start_row, start_col, lexeme))
629 |         Drop(_) -> Ok(list.reverse(state.tokens))
630 |         Keep(value, _) -> {
631 |           let span = Span(start_row, start_col, state.row, state.col)
632 |           let token = Token(span, lexeme, value)
633 | 
634 |           Ok(list.reverse([token, ..state.tokens]))
635 |         }
636 |       }
637 | 
638 |     // When lexing we include a one-grapheme lookahead to help us with things like
639 |     // matching identifiers or other mode-aware tokens. This just takes the
640 |     // skip grapheme from the source (we call it `lookahead` here) and calls the
641 |     // `do_match` function with it and some other bits.
642 |     [lookahead, ..rest], #(start_row, start_col, lexeme) -> {
643 |       let row = next_row(state.row, lookahead)
644 |       let col = next_col(state.col, lookahead)
645 | 
646 |       case do_match(mode, lexeme, lookahead, matchers) {
647 |         Keep(value, mode) -> {
648 |           let span = Span(start_row, start_col, state.row, state.col)
649 |           let token = Token(span, lexeme, value)
650 | 
651 |           do_run(
652 |             lexer,
653 |             mode,
654 |             State(
655 |               source: rest,
656 |               tokens: [token, ..state.tokens],
657 |               current: #(state.row, state.col, lookahead),
658 |               row: row,
659 |               col: col,
660 |             ),
661 |           )
662 |         }
663 | 
664 |         // A skip says that a matcher has matched the lexeme but still wants to
665 |         // consume more input. This is mostly useful for things like identifiers
666 |         // where the current lexeme is in the set of reserved words but we can
667 |         // see the lookahead and know that it's not a reserved word.
668 |         Skip ->
669 |           do_run(
670 |             lexer,
671 |             mode,
672 |             State(
673 |               source: rest,
674 |               tokens: state.tokens,
675 |               current: #(start_row, start_col, lexeme <> lookahead),
676 |               row: row,
677 |               col: col,
678 |             ),
679 |           )
680 | 
681 |         // A drop says that we've matched the lexeme but we don't want to emit a
682 |         // token. This is mostly useful for things like comments or whitespace that
683 |         // users may not want to appear in the final token stream but do want to
684 |         // handle in the lexer.
685 |         Drop(mode) ->
686 |           do_run(
687 |             lexer,
688 |             mode,
689 |             State(
690 |               source: rest,
691 |               tokens: state.tokens,
692 |               current: #(state.row, state.col, lookahead),
693 |               row: row,
694 |               col: col,
695 |             ),
696 |           )
697 | 
698 |         NoMatch ->
699 |           do_run(
700 |             lexer,
701 |             mode,
702 |             State(
703 |               source: rest,
704 |               tokens: state.tokens,
705 |               current: #(start_row, start_col, lexeme <> lookahead),
706 |               row: row,
707 |               col: col,
708 |             ),
709 |           )
710 |       }
711 |     }
712 |   }
713 | }
714 | 
715 | fn do_match(
716 |   mode: mode,
717 |   str: String,
718 |   lookahead: String,
719 |   matchers: List(Matcher(a, mode)),
720 | ) -> Match(a, mode) {
721 |   use _, matcher <- list.fold_until(matchers, NoMatch)
722 | 
723 |   case matcher.run(mode, str, lookahead) {
724 |     Keep(_, _) as match -> list.Stop(match)
725 |     Skip -> list.Stop(Skip)
726 |     Drop(_) as match -> list.Stop(match)
727 |     NoMatch -> list.Continue(NoMatch)
728 |   }
729 | }
730 | 
731 | // UTILS -----------------------------------------------------------------------
732 | 
733 | fn next_col(col: Int, str: String) -> Int {
734 |   case str {
735 |     "\n" -> 1
736 |     _ -> col + 1
737 |   }
738 | }
739 | 
740 | fn next_row(row: Int, str: String) -> Int {
741 |   case str {
742 |     "\n" -> row + 1
743 |     _ -> row
744 |   }
745 | }
746 | 


--------------------------------------------------------------------------------
/src/nibble/pratt.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/list
  4 | import nibble.{type Parser}
  5 | 
  6 | // TYPES -----------------------------------------------------------------------
  7 | 
  8 | pub opaque type Config(a, tok, ctx) {
  9 |   Config(
 10 |     one_of: List(fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx)),
 11 |     and_then_one_of: List(Operator(a, tok, ctx)),
 12 |     spaces: Parser(Nil, tok, ctx),
 13 |   )
 14 | }
 15 | 
 16 | pub opaque type Operator(a, tok, ctx) {
 17 |   Operator(fn(Config(a, tok, ctx)) -> #(Int, fn(a) -> Parser(a, tok, ctx)))
 18 | }
 19 | 
 20 | //
 21 | 
 22 | pub fn expression(
 23 |   one_of first: List(fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx)),
 24 |   and_then then: List(Operator(a, tok, ctx)),
 25 |   dropping spaces: Parser(Nil, tok, ctx),
 26 | ) -> Parser(a, tok, ctx) {
 27 |   let config = Config(first, then, spaces)
 28 |   sub_expression(config, 0)
 29 | }
 30 | 
 31 | pub fn sub_expression(
 32 |   config: Config(a, tok, ctx),
 33 |   precedence: Int,
 34 | ) -> Parser(a, tok, ctx) {
 35 |   let expr = {
 36 |     use <- nibble.lazy
 37 |     config.one_of
 38 |     |> list.map(fn(p) { p(config) })
 39 |     |> nibble.one_of
 40 |   }
 41 | 
 42 |   let go = fn(expr) {
 43 |     use _ <- nibble.do(config.spaces)
 44 | 
 45 |     nibble.one_of([
 46 |       operation(expr, config, precedence)
 47 |         |> nibble.map(nibble.Continue),
 48 |       nibble.return(expr)
 49 |         |> nibble.map(nibble.Break),
 50 |     ])
 51 |   }
 52 | 
 53 |   use _ <- nibble.do(config.spaces)
 54 |   use e <- nibble.do(expr)
 55 | 
 56 |   nibble.loop(e, go)
 57 | }
 58 | 
 59 | fn operation(
 60 |   expr: a,
 61 |   config: Config(a, tok, ctx),
 62 |   current_precedence: Int,
 63 | ) -> Parser(a, tok, ctx) {
 64 |   config.and_then_one_of
 65 |   |> list.filter_map(fn(operator) {
 66 |     let Operator(op) = operator
 67 |     case op(config) {
 68 |       #(precedence, parser) if precedence > current_precedence ->
 69 |         Ok(parser(expr))
 70 | 
 71 |       _ -> Error(Nil)
 72 |     }
 73 |   })
 74 |   |> nibble.one_of()
 75 | }
 76 | 
 77 | //
 78 | 
 79 | pub fn prefix(
 80 |   precedence: Int,
 81 |   operator: Parser(Nil, tok, ctx),
 82 |   apply: fn(a) -> a,
 83 | ) -> fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx) {
 84 |   fn(config) {
 85 |     use _ <- nibble.do(operator)
 86 |     use subexpr <- nibble.do(sub_expression(config, precedence))
 87 | 
 88 |     nibble.return(apply(subexpr))
 89 |   }
 90 | }
 91 | 
 92 | pub fn infix_left(
 93 |   precedence: Int,
 94 |   operator: Parser(Nil, tok, ctx),
 95 |   apply: fn(a, a) -> a,
 96 | ) -> Operator(a, tok, ctx) {
 97 |   make_infix(#(precedence, precedence), operator, apply)
 98 | }
 99 | 
100 | pub fn infix_right(
101 |   precedence: Int,
102 |   operator: Parser(Nil, tok, ctx),
103 |   apply: fn(a, a) -> a,
104 | ) -> Operator(a, tok, ctx) {
105 |   make_infix(#(precedence, precedence - 1), operator, apply)
106 | }
107 | 
108 | pub fn postfix(
109 |   precedence: Int,
110 |   operator: Parser(Nil, tok, ctx),
111 |   apply: fn(a) -> a,
112 | ) -> Operator(a, tok, ctx) {
113 |   use _ <- Operator
114 |   #(precedence, fn(lhs) {
115 |     use _ <- nibble.do(operator)
116 |     nibble.return(apply(lhs))
117 |   })
118 | }
119 | 
120 | fn make_infix(
121 |   precedence: #(Int, Int),
122 |   operator: Parser(Nil, tok, ctx),
123 |   apply: fn(a, a) -> a,
124 | ) -> Operator(a, tok, ctx) {
125 |   let #(left_precedence, right_precedence) = precedence
126 |   use config <- Operator
127 |   #(left_precedence, fn(lhs) {
128 |     use _ <- nibble.do(operator)
129 |     use subexpr <- nibble.do(sub_expression(config, right_precedence))
130 | 
131 |     nibble.return(apply(lhs, subexpr))
132 |   })
133 | }
134 | 


--------------------------------------------------------------------------------
/src/nibble/predicates.gleam:
--------------------------------------------------------------------------------
 1 | import gleam/list
 2 | import gleam/string
 3 | 
 4 | pub fn string(str: String, predicate: fn(String) -> Bool) -> Bool {
 5 |   str != "" && list.all(string.to_graphemes(str), predicate)
 6 | }
 7 | 
 8 | pub fn is_lower_ascii(grapheme: String) -> Bool {
 9 |   case grapheme {
10 |     "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" -> True
11 |     "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" -> True
12 |     "w" | "x" | "y" | "z" -> True
13 |     _ -> False
14 |   }
15 | }
16 | 
17 | pub fn is_upper_ascii(grapheme: String) -> Bool {
18 |   case grapheme {
19 |     "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" -> True
20 |     "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" -> True
21 |     "W" | "X" | "Y" | "Z" -> True
22 |     _ -> False
23 |   }
24 | }
25 | 
26 | pub fn is_digit(grapheme: String) -> Bool {
27 |   case grapheme {
28 |     "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -> True
29 |     _ -> False
30 |   }
31 | }
32 | 
33 | pub fn is_whitespace(grapheme: String) -> Bool {
34 |   case grapheme {
35 |     " " | "\t" | "\r" | "\n" -> True
36 |     _ -> False
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/nibble/vendor/glearray.gleam:
--------------------------------------------------------------------------------
  1 | //// The following module is vendored from the Open Source package "glearray" with
  2 | //// functions for working with iterators removed. The original source can be found
  3 | //// here:
  4 | ////
  5 | ////   https://github.com/lunagl/glearray/
  6 | ////
  7 | //// The original license is included below:
  8 | ////
  9 | ////   Apache License
 10 | ////   Version 2.0, January 2004
 11 | ////   http://www.apache.org/licenses/
 12 | ////
 13 | ////   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 14 | ////
 15 | ////   1. Definitions.
 16 | ////
 17 | ////   "License" shall mean the terms and conditions for use, reproduction,
 18 | ////   and distribution as defined by Sections 1 through 9 of this document.
 19 | ////
 20 | ////   "Licensor" shall mean the copyright owner or entity authorized by
 21 | ////   the copyright owner that is granting the License.
 22 | ////
 23 | ////   "Legal Entity" shall mean the union of the acting entity and all
 24 | ////   other entities that control, are controlled by, or are under common
 25 | ////   control with that entity. For the purposes of this definition,
 26 | ////   "control" means (i) the power, direct or indirect, to cause the
 27 | ////   direction or management of such entity, whether by contract or
 28 | ////   otherwise, or (ii) ownership of fifty percent (50%) or more of the
 29 | ////   outstanding shares, or (iii) beneficial ownership of such entity.
 30 | ////
 31 | ////   "You" (or "Your") shall mean an individual or Legal Entity
 32 | ////   exercising permissions granted by this License.
 33 | ////
 34 | ////   "Source" form shall mean the preferred form for making modifications,
 35 | ////   including but not limited to software source code, documentation
 36 | ////   source, and configuration files.
 37 | ////
 38 | ////   "Object" form shall mean any form resulting from mechanical
 39 | ////   transformation or translation of a Source form, including but
 40 | ////   not limited to compiled object code, generated documentation,
 41 | ////   and conversions to other media types.
 42 | ////
 43 | ////   "Work" shall mean the work of authorship, whether in Source or
 44 | ////   Object form, made available under the License, as indicated by a
 45 | ////   copyright notice that is included in or attached to the work
 46 | ////   (an example is provided in the Appendix below).
 47 | ////
 48 | ////   "Derivative Works" shall mean any work, whether in Source or Object
 49 | ////   form, that is based on (or derived from) the Work and for which the
 50 | ////   editorial revisions, annotations, elaborations, or other modifications
 51 | ////   represent, as a whole, an original work of authorship. For the purposes
 52 | ////   of this License, Derivative Works shall not include works that remain
 53 | ////   separable from, or merely link (or bind by name) to the interfaces of,
 54 | ////   the Work and Derivative Works thereof.
 55 | ////
 56 | ////   "Contribution" shall mean any work of authorship, including
 57 | ////   the original version of the Work and any modifications or additions
 58 | ////   to that Work or Derivative Works thereof, that is intentionally
 59 | ////   submitted to Licensor for inclusion in the Work by the copyright owner
 60 | ////   or by an individual or Legal Entity authorized to submit on behalf of
 61 | ////   the copyright owner. For the purposes of this definition, "submitted"
 62 | ////   means any form of electronic, verbal, or written communication sent
 63 | ////   to the Licensor or its representatives, including but not limited to
 64 | ////   communication on electronic mailing lists, source code control systems,
 65 | ////   and issue tracking systems that are managed by, or on behalf of, the
 66 | ////   Licensor for the purpose of discussing and improving the Work, but
 67 | ////   excluding communication that is conspicuously marked or otherwise
 68 | ////   designated in writing by the copyright owner as "Not a Contribution."
 69 | ////
 70 | ////   "Contributor" shall mean Licensor and any individual or Legal Entity
 71 | ////   on behalf of whom a Contribution has been received by Licensor and
 72 | ////   subsequently incorporated within the Work.
 73 | ////
 74 | ////   2. Grant of Copyright License. Subject to the terms and conditions of
 75 | ////   this License, each Contributor hereby grants to You a perpetual,
 76 | ////   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 | ////   copyright license to reproduce, prepare Derivative Works of,
 78 | ////   publicly display, publicly perform, sublicense, and distribute the
 79 | ////   Work and such Derivative Works in Source or Object form.
 80 | ////
 81 | ////   3. Grant of Patent License. Subject to the terms and conditions of
 82 | ////   this License, each Contributor hereby grants to You a perpetual,
 83 | ////   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 84 | ////   (except as stated in this section) patent license to make, have made,
 85 | ////   use, offer to sell, sell, import, and otherwise transfer the Work,
 86 | ////   where such license applies only to those patent claims licensable
 87 | ////   by such Contributor that are necessarily infringed by their
 88 | ////   Contribution(s) alone or by combination of their Contribution(s)
 89 | ////   with the Work to which such Contribution(s) was submitted. If You
 90 | ////   institute patent litigation against any entity (including a
 91 | ////   cross-claim or counterclaim in a lawsuit) alleging that the Work
 92 | ////   or a Contribution incorporated within the Work constitutes direct
 93 | ////   or contributory patent infringement, then any patent licenses
 94 | ////   granted to You under this License for that Work shall terminate
 95 | ////   as of the date such litigation is filed.
 96 | ////
 97 | ////   4. Redistribution. You may reproduce and distribute copies of the
 98 | ////   Work or Derivative Works thereof in any medium, with or without
 99 | ////   modifications, and in Source or Object form, provided that You
100 | ////   meet the following conditions:
101 | ////
102 | ////   (a) You must give any other recipients of the Work or
103 | ////   Derivative Works a copy of this License; and
104 | ////
105 | ////   (b) You must cause any modified files to carry prominent notices
106 | ////   stating that You changed the files; and
107 | ////
108 | ////   (c) You must retain, in the Source form of any Derivative Works
109 | ////   that You distribute, all copyright, patent, trademark, and
110 | ////   attribution notices from the Source form of the Work,
111 | ////   excluding those notices that do not pertain to any part of
112 | ////   the Derivative Works; and
113 | ////
114 | ////   (d) If the Work includes a "NOTICE" text file as part of its
115 | ////   distribution, then any Derivative Works that You distribute must
116 | ////   include a readable copy of the attribution notices contained
117 | ////   within such NOTICE file, excluding those notices that do not
118 | ////   pertain to any part of the Derivative Works, in at least one
119 | ////   of the following places: within a NOTICE text file distributed
120 | ////   as part of the Derivative Works; within the Source form or
121 | ////   documentation, if provided along with the Derivative Works; or,
122 | ////   within a display generated by the Derivative Works, if and
123 | ////   wherever such third-party notices normally appear. The contents
124 | ////   of the NOTICE file are for informational purposes only and
125 | ////   do not modify the License. You may add Your own attribution
126 | ////   notices within Derivative Works that You distribute, alongside
127 | ////   or as an addendum to the NOTICE text from the Work, provided
128 | ////   that such additional attribution notices cannot be construed
129 | ////   as modifying the License.
130 | ////
131 | ////   You may add Your own copyright statement to Your modifications and
132 | ////   may provide additional or different license terms and conditions
133 | ////   for use, reproduction, or distribution of Your modifications, or
134 | ////   for any such Derivative Works as a whole, provided Your use,
135 | ////   reproduction, and distribution of the Work otherwise complies with
136 | ////   the conditions stated in this License.
137 | ////
138 | ////   5. Submission of Contributions. Unless You explicitly state otherwise,
139 | ////   any Contribution intentionally submitted for inclusion in the Work
140 | ////   by You to the Licensor shall be under the terms and conditions of
141 | ////   this License, without any additional terms or conditions.
142 | ////   Notwithstanding the above, nothing herein shall supersede or modify
143 | ////   the terms of any separate license agreement you may have executed
144 | ////   with Licensor regarding such Contributions.
145 | ////
146 | ////   6. Trademarks. This License does not grant permission to use the trade
147 | ////   names, trademarks, service marks, or product names of the Licensor,
148 | ////   except as required for reasonable and customary use in describing the
149 | ////   origin of the Work and reproducing the content of the NOTICE file.
150 | ////
151 | ////   7. Disclaimer of Warranty. Unless required by applicable law or
152 | ////   agreed to in writing, Licensor provides the Work (and each
153 | ////   Contributor provides its Contributions) on an "AS IS" BASIS,
154 | ////   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
155 | ////   implied, including, without limitation, any warranties or conditions
156 | ////   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
157 | ////   PARTICULAR PURPOSE. You are solely responsible for determining the
158 | ////   appropriateness of using or redistributing the Work and assume any
159 | ////   risks associated with Your exercise of permissions under this License.
160 | ////
161 | ////   8. Limitation of Liability. In no event and under no legal theory,
162 | ////   whether in tort (including negligence), contract, or otherwise,
163 | ////   unless required by applicable law (such as deliberate and grossly
164 | ////   negligent acts) or agreed to in writing, shall any Contributor be
165 | ////   liable to You for damages, including any direct, indirect, special,
166 | ////   incidental, or consequential damages of any character arising as a
167 | ////   result of this License or out of the use or inability to use the
168 | ////   Work (including but not limited to damages for loss of goodwill,
169 | ////   work stoppage, computer failure or malfunction, or any and all
170 | ////   other commercial damages or losses), even if such Contributor
171 | ////   has been advised of the possibility of such damages.
172 | ////
173 | ////   9. Accepting Warranty or Additional Liability. While redistributing
174 | ////   the Work or Derivative Works thereof, You may choose to offer,
175 | ////   and charge a fee for, acceptance of support, warranty, indemnity,
176 | ////   or other liability obligations and/or rights consistent with this
177 | ////   License. However, in accepting such obligations, You may act only
178 | ////   on Your own behalf and on Your sole responsibility, not on behalf
179 | ////   of any other Contributor, and only if You agree to indemnify,
180 | ////   defend, and hold each Contributor harmless for any liability
181 | ////   incurred by, or claims asserted against, such Contributor by reason
182 | ////   of your accepting any such warranty or additional liability.
183 | ////
184 | ////   END OF TERMS AND CONDITIONS
185 | ////
186 | ////   Copyright 2023 Frederick Schwalbe
187 | ////
188 | ////   Licensed under the Apache License, Version 2.0 (the "License");
189 | ////   you may not use this file except in compliance with the License.
190 | ////   You may obtain a copy of the License at
191 | ////
192 | ////   http://www.apache.org/licenses/LICENSE-2.0
193 | ////
194 | ////   Unless required by applicable law or agreed to in writing, software
195 | ////   distributed under the License is distributed on an "AS IS" BASIS,
196 | ////   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
197 | ////   See the License for the specific language governing permissions and
198 | ////   limitations under the License.
199 | 
200 | /// Arrays are ordered sequences of elements, similar to lists.
201 | ///
202 | /// Like everything in Gleam, arrays are immutable.
203 | /// As opposed to linked lists, arrays store their elements in a contiguous
204 | /// slice of memory, therefore allowing very fast indexed access.
205 | ///
206 | /// Modifying an array however takes linear time and memory because it requires
207 | /// copying the entire array.
208 | ///
209 | /// ### Implementation
210 | ///
211 | /// Arrays are represented as tuples when compiled to Erlang, and JavaScript
212 | /// arrays when compiled to JavaScript.
213 | ///
214 | /// Also note that this library has no connection to Erlang's
215 | /// [`array`](https://www.erlang.org/doc/man/array.html) module, which
216 | /// implements a tree structure for efficient reading and writing.
217 | ///
218 | pub type Array(a)
219 | 
220 | /// Returns an empty array.
221 | ///
222 | /// ## Examples
223 | ///
224 | /// ```gleam
225 | /// > new()
226 | /// from_list([])
227 | /// ```
228 | ///
229 | @external(erlang, "glearray_ffi", "new")
230 | @external(javascript, "../../glearray_ffi.mjs", "newArray")
231 | pub fn new() -> Array(a)
232 | 
233 | /// Converts a list to an array.
234 | ///
235 | @external(erlang, "erlang", "list_to_tuple")
236 | @external(javascript, "../../glearray_ffi.mjs", "fromList")
237 | pub fn from_list(list: List(a)) -> Array(a)
238 | 
239 | /// Converts an array to a list.
240 | ///
241 | @external(erlang, "erlang", "tuple_to_list")
242 | @external(javascript, "../../gleam.mjs", "toList")
243 | pub fn to_list(array: Array(a)) -> List(a)
244 | 
245 | /// Returns the number of elements in the array.
246 | ///
247 | /// ## Performance
248 | ///
249 | /// This function is very efficient and runs in constant time.
250 | ///
251 | /// ## Examples
252 | ///
253 | /// ```gleam
254 | /// > length(new())
255 | /// 0
256 | /// ```
257 | ///
258 | /// ```gleam
259 | /// > from_list([8, 0, 0]) |> length
260 | /// 3
261 | /// ```
262 | ///
263 | @external(erlang, "erlang", "tuple_size")
264 | @external(javascript, "../../glearray_ffi.mjs", "arrayLength")
265 | pub fn length(of array: Array(a)) -> Int
266 | 
267 | /// Returns the element at the specified index, starting from 0.
268 | ///
269 | /// `Error(Nil)` is returned if `index` is less than 0 or greater than
270 | /// or equal to `length(array)`.
271 | ///
272 | /// ## Performance
273 | ///
274 | /// This function is very efficient and runs in constant time.
275 | ///
276 | /// ## Examples
277 | ///
278 | /// ```gleam
279 | /// > from_list([5, 6, 7]) |> get(1)
280 | /// Ok(6)
281 | /// ```
282 | ///
283 | /// ```gleam
284 | /// > from_list([5, 6, 7]) |> get(3)
285 | /// Error(Nil)
286 | /// ```
287 | ///
288 | pub fn get(in array: Array(a), at index: Int) -> Result(a, Nil) {
289 |   case is_valid_index(array, index) {
290 |     True -> Ok(do_get(array, index))
291 |     False -> Error(Nil)
292 |   }
293 | }
294 | 
295 | @external(erlang, "glearray_ffi", "get")
296 | @external(javascript, "../../glearray_ffi.mjs", "get")
297 | fn do_get(array: Array(a), index: Int) -> a
298 | 
299 | /// Replaces the element at the given index with `value`.
300 | ///
301 | /// This function cannot extend an array and returns `Error(Nil)` if `index` is
302 | /// not valid.
303 | /// See also [`copy_insert`](#copy_insert) and [`copy_push`](#copy_push).
304 | ///
305 | /// ## Performance
306 | ///
307 | /// This function has to copy the entire array, making it very inefficient
308 | /// especially for larger arrays.
309 | ///
310 | /// ## Examples
311 | ///
312 | /// ```gleam
313 | /// > from_list(["a", "b", "c"]) |> copy_set(1, "x")
314 | /// Ok(from_list(["a", "x", "c"]))
315 | /// ```
316 | ///
317 | /// ```gleam
318 | /// > from_list(["a", "b", "c"]) |> copy_set(3, "x")
319 | /// Error(Nil)
320 | /// ```
321 | ///
322 | pub fn copy_set(
323 |   in array: Array(a),
324 |   at index: Int,
325 |   value value: a,
326 | ) -> Result(Array(a), Nil) {
327 |   case is_valid_index(array, index) {
328 |     True -> Ok(do_set(array, index, value))
329 |     False -> Error(Nil)
330 |   }
331 | }
332 | 
333 | @external(erlang, "glearray_ffi", "set")
334 | @external(javascript, "../../glearray_ffi.mjs", "set")
335 | fn do_set(array: Array(a), index: Int, value: a) -> Array(a)
336 | 
337 | fn is_valid_index(array: Array(a), index: Int) -> Bool {
338 |   index >= 0 && index < length(array)
339 | }
340 | 
341 | /// Adds a single element to the back of the given array.
342 | ///
343 | /// ## Performance
344 | ///
345 | /// This function has to copy the entire array, making it very inefficient
346 | /// especially for larger arrays.
347 | ///
348 | /// ## Examples
349 | ///
350 | /// ```gleam
351 | /// > new() |> copy_push(1) |> copy_push(2) |> to_list
352 | /// [1, 2]
353 | /// ```
354 | ///
355 | @external(erlang, "erlang", "append_element")
356 | @external(javascript, "../../glearray_ffi.mjs", "push")
357 | pub fn copy_push(onto array: Array(a), value value: a) -> Array(a)
358 | 
359 | /// Inserts an element into the array at the given index.
360 | ///
361 | /// All following elements are shifted to the right, having their index
362 | /// incremented by one.
363 | ///
364 | /// `Error(Nil)` is returned if the index is less than 0 or greater than
365 | /// `length(array)`.
366 | /// If the index is equal to `length(array)`, this function behaves like
367 | /// [`copy_push`](#copy_push).
368 | ///
369 | /// ## Performance
370 | ///
371 | /// This function has to copy the entire array, making it very inefficient
372 | /// especially for larger arrays.
373 | ///
374 | /// ## Examples
375 | ///
376 | /// ```gleam
377 | /// > from_list(["a", "b"]) |> copy_insert(0, "c")
378 | /// Ok(from_list(["c", "a", "b"]))
379 | /// ```
380 | ///
381 | /// ```gleam
382 | /// > from_list(["a", "b"]) |> copy_insert(1, "c")
383 | /// Ok(from_list(["a", "c", "b"]))
384 | /// ```
385 | ///
386 | /// ```gleam
387 | /// > from_list(["a", "b"]) |> copy_insert(2, "c")
388 | /// Ok(from_list(["a", "b", "c"]))
389 | /// ```
390 | ///
391 | /// ```gleam
392 | /// > from_list(["a", "b"]) |> copy_insert(3, "c")
393 | /// Error(Nil)
394 | /// ```
395 | ///
396 | pub fn copy_insert(
397 |   into array: Array(a),
398 |   at index: Int,
399 |   value value: a,
400 | ) -> Result(Array(a), Nil) {
401 |   case index >= 0 && index <= length(array) {
402 |     True -> Ok(do_insert(array, index, value))
403 |     False -> Error(Nil)
404 |   }
405 | }
406 | 
407 | @external(erlang, "glearray_ffi", "insert")
408 | @external(javascript, "../../glearray_ffi.mjs", "insert")
409 | fn do_insert(array: Array(a), index: Int, value: a) -> Array(a)
410 | 


--------------------------------------------------------------------------------
/test/docs/lexer_modes/indentation_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/int
  4 | import gleam/io
  5 | import gleam/order.{Eq, Gt, Lt}
  6 | import gleam/regexp
  7 | import gleam/set
  8 | import gleam/string
  9 | import gleeunit/should
 10 | import nibble/lexer.{
 11 |   type Matcher, type Token, Drop, Keep, NoMatch, Skip, Span, Token,
 12 | }
 13 | 
 14 | // TYPES -----------------------------------------------------------------------
 15 | 
 16 | type TokenT {
 17 |   Var(String)
 18 |   Str(String)
 19 |   Num(Int)
 20 | 
 21 |   // Keywords
 22 |   Def
 23 |   For
 24 |   In
 25 |   Print
 26 | 
 27 |   // Indentation
 28 |   Indent(Int)
 29 |   Dedent(Int)
 30 | }
 31 | 
 32 | // TESTS -----------------------------------------------------------------------
 33 | 
 34 | pub fn indentation_test() {
 35 |   use run <- should("parse indent and dedent tokens")
 36 |   let input =
 37 |     "def wibble arr
 38 |   for x in arr
 39 |     print x
 40 | 
 41 |   print \"done!\"
 42 | 
 43 | def wobble
 44 |   wibble numbers
 45 | "
 46 |   let expected = [
 47 |     Token(Span(1, 1, 1, 4), "def", Var("def")),
 48 |     Token(Span(1, 5, 1, 11), "wibble", Var("wibble")),
 49 |     Token(Span(1, 12, 1, 15), "arr", Var("arr")),
 50 |     Token(Span(1, 15, 2, 3), "\n  ", Indent(2)),
 51 |     Token(Span(2, 3, 2, 6), "for", Var("for")),
 52 |     Token(Span(2, 7, 2, 8), "x", Var("x")),
 53 |     Token(Span(2, 9, 2, 11), "in", Var("in")),
 54 |     Token(Span(2, 12, 2, 15), "arr", Var("arr")),
 55 |     Token(Span(2, 15, 3, 5), "\n    ", Indent(4)),
 56 |     Token(Span(3, 5, 3, 10), "print", Var("print")),
 57 |     Token(Span(3, 11, 3, 12), "x", Var("x")),
 58 |     Token(Span(4, 1, 5, 3), "\n  ", Dedent(2)),
 59 |     Token(Span(5, 3, 5, 8), "print", Var("print")),
 60 |     Token(Span(5, 9, 5, 16), "\"done!\"", Str("done!")),
 61 |     Token(Span(6, 1, 7, 1), "\n", Dedent(0)),
 62 |     Token(Span(7, 1, 7, 4), "def", Var("def")),
 63 |     Token(Span(7, 5, 7, 11), "wobble", Var("wobble")),
 64 |     Token(Span(7, 11, 8, 3), "\n  ", Indent(2)),
 65 |     Token(Span(8, 3, 8, 9), "wibble", Var("wibble")),
 66 |     Token(Span(8, 10, 8, 17), "numbers", Var("numbers")),
 67 |     Token(Span(8, 17, 9, 1), "\n", Dedent(0)),
 68 |   ]
 69 | 
 70 |   run(input, expected)
 71 | }
 72 | 
 73 | // UTILS -----------------------------------------------------------------------
 74 | 
 75 | fn should(
 76 |   description: String,
 77 |   run: fn(fn(String, List(Token(TokenT))) -> Nil) -> Nil,
 78 | ) -> Nil {
 79 |   use input, expected <- run
 80 | 
 81 |   io.print("should " <> description)
 82 | 
 83 |   lexer.advanced(lexer)
 84 |   |> lexer.run_advanced(input, 0, _)
 85 |   |> should.be_ok
 86 |   |> should.equal(expected)
 87 | 
 88 |   io.println(" ✅")
 89 | }
 90 | 
 91 | fn lexer(_) -> List(Matcher(TokenT, Int)) {
 92 |   let assert Ok(is_indent) = regexp.from_string("^\\n[ \\t]*")
 93 |   let indentation = {
 94 |     use current_indent, lexeme, lookahead <- lexer.custom
 95 | 
 96 |     case regexp.check(is_indent, lexeme), lookahead {
 97 |       False, _ -> NoMatch
 98 |       True, " " | True, "\t" -> Skip
 99 |       True, "\n" -> Drop(current_indent)
100 |       True, _ -> {
101 |         let spaces = string.length(lexeme) - 1
102 | 
103 |         case int.compare(spaces, current_indent) {
104 |           Lt -> Keep(Dedent(spaces), spaces)
105 |           Eq if spaces == 0 -> Drop(0)
106 |           Eq -> Keep(Indent(spaces), spaces)
107 |           Gt -> Keep(Indent(spaces), spaces)
108 |         }
109 |       }
110 |     }
111 |   }
112 | 
113 |   [
114 |     lexer.variable(set.new(), Var),
115 |     lexer.string("\"", Str),
116 |     lexer.int(Num),
117 |     // Keywords
118 |     lexer.keyword("def", "[\\W\\D]", Def),
119 |     lexer.keyword("for", "[\\W\\D]", For),
120 |     lexer.keyword("in", "[\\W\\D]", In),
121 |     lexer.keyword("print", "[\\W\\D]", Print),
122 |     // Our custom indentation lexer
123 |     indentation,
124 |     // Ignore all other whitespace
125 |     lexer.whitespace(Nil)
126 |       |> lexer.ignore,
127 |   ]
128 | }
129 | 


--------------------------------------------------------------------------------
/test/examples/calculator_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/function
  4 | import gleam/int
  5 | import gleam/io
  6 | import gleam/option.{None, Some}
  7 | import gleeunit/should
  8 | import nibble.{do, do_in, return}
  9 | import nibble/lexer
 10 | import nibble/pratt
 11 | 
 12 | // TYPES -----------------------------------------------------------------------
 13 | 
 14 | type TokenT {
 15 |   Add
 16 |   Sub
 17 |   Mul
 18 |   Div
 19 |   Num(Float)
 20 |   LParen
 21 |   RParen
 22 | }
 23 | 
 24 | type Context {
 25 |   InSubExpr
 26 | }
 27 | 
 28 | type DeadEnd =
 29 |   nibble.DeadEnd(TokenT, Context)
 30 | 
 31 | // TESTS -----------------------------------------------------------------------
 32 | 
 33 | pub fn add_test() {
 34 |   use run <- should("add two numbers")
 35 |   let input = "1 + 2"
 36 |   let expected = 3.0
 37 | 
 38 |   run(input, expected)
 39 | }
 40 | 
 41 | pub fn multi_add_test() {
 42 |   use run <- should("add multiple numbers")
 43 |   let input = "1 + 2 + 3"
 44 |   let expected = 6.0
 45 | 
 46 |   run(input, expected)
 47 | }
 48 | 
 49 | pub fn sub_test() {
 50 |   use run <- should("subtract two numbers")
 51 |   let input = "3 - 2"
 52 |   let expected = 1.0
 53 | 
 54 |   run(input, expected)
 55 | }
 56 | 
 57 | pub fn multi_sub_test() {
 58 |   use run <- should("subtract multiple numbers")
 59 |   let input = "3 - 2 - 1"
 60 |   let expected = 0.0
 61 | 
 62 |   run(input, expected)
 63 | }
 64 | 
 65 | pub fn mul_test() {
 66 |   use run <- should("multiply two numbers")
 67 |   let input = "2 * 3"
 68 |   let expected = 6.0
 69 | 
 70 |   run(input, expected)
 71 | }
 72 | 
 73 | pub fn multi_mul_test() {
 74 |   use run <- should("multiply multiple numbers")
 75 |   let input = "2 * 3 * 4"
 76 |   let expected = 24.0
 77 | 
 78 |   run(input, expected)
 79 | }
 80 | 
 81 | pub fn precedence_test() {
 82 |   use run <- should("evaluate operators according to precedence")
 83 |   let input = "2 * 3 + 4"
 84 |   let expected = 10.0
 85 | 
 86 |   run(input, expected)
 87 | }
 88 | 
 89 | pub fn parens_test() {
 90 |   use run <- should("evaluate parens first")
 91 |   let input = "2 * (3 + 4)"
 92 |   let expected = 14.0
 93 | 
 94 |   run(input, expected)
 95 | }
 96 | 
 97 | pub fn complex_test() {
 98 |   use run <- should("evaluate complex expressions")
 99 |   let input = "2 * (3 + 4) / 2 - 1"
100 |   let expected = 6.0
101 | 
102 |   run(input, expected)
103 | }
104 | 
105 | pub fn mismatched_parens_test() {
106 |   use run <- should_error("on mismatched parens")
107 |   let input = "2 * (3 + 4"
108 |   let expected = [
109 |     nibble.DeadEnd(lexer.Span(1, 10, 1, 11), nibble.EndOfInput, [
110 |       #(lexer.Span(1, 5, 1, 6), InSubExpr),
111 |     ]),
112 |   ]
113 | 
114 |   run(input, expected)
115 | }
116 | 
117 | // UTILS -----------------------------------------------------------------------
118 | 
119 | fn should(description: String, run: fn(fn(String, Float) -> Nil) -> Nil) -> Nil {
120 |   use input, expected <- run
121 | 
122 |   io.print("should " <> description)
123 | 
124 |   lexer.run(input, lexer())
125 |   |> should.be_ok
126 |   |> nibble.run(parser())
127 |   |> should.be_ok
128 |   |> should.equal(expected)
129 | 
130 |   io.println(" ✅")
131 | }
132 | 
133 | fn should_error(
134 |   description: String,
135 |   run: fn(fn(String, List(DeadEnd)) -> Nil) -> Nil,
136 | ) -> Nil {
137 |   use input, expected <- run
138 | 
139 |   io.print("should error " <> description)
140 | 
141 |   lexer.run(input, lexer())
142 |   |> should.be_ok
143 |   |> nibble.run(parser())
144 |   |> should.equal(Error(expected))
145 | 
146 |   io.println(" ✅")
147 | }
148 | 
149 | fn lexer() {
150 |   lexer.simple([
151 |     // Grouping
152 |     lexer.token("(", LParen),
153 |     lexer.token(")", RParen),
154 |     // Operators
155 |     lexer.token("+", Add),
156 |     lexer.token("-", Sub),
157 |     lexer.token("*", Mul),
158 |     lexer.token("/", Div),
159 |     // Numbers
160 |     lexer.number(int.to_float, function.identity)
161 |       |> lexer.map(Num),
162 |     // Whitespace
163 |     lexer.whitespace(Nil)
164 |       |> lexer.ignore(),
165 |   ])
166 | }
167 | 
168 | fn parser() {
169 |   let add = fn(x, y) { x +. y }
170 |   let sub = fn(x, y) { x -. y }
171 |   let mul = fn(x, y) { x *. y }
172 |   let div = fn(x, y) { x /. y }
173 | 
174 |   pratt.expression(
175 |     one_of: [parens_parser, number_parser],
176 |     and_then: [
177 |       pratt.infix_left(14, nibble.token(Mul), mul),
178 |       pratt.infix_left(14, nibble.token(Div), div),
179 |       pratt.infix_left(13, nibble.token(Add), add),
180 |       pratt.infix_left(13, nibble.token(Sub), sub),
181 |     ],
182 |     // Whitespace is already ignored by the lexer so there's no tokens we need to
183 |     // explicitly ignore here.
184 |     dropping: return(Nil),
185 |   )
186 | }
187 | 
188 | fn number_parser(_) {
189 |   use tok <- nibble.take_map("a number token")
190 | 
191 |   case tok {
192 |     Num(n) -> Some(n)
193 |     _ -> None
194 |   }
195 | }
196 | 
197 | fn parens_parser(_) {
198 |   use _ <- do(nibble.token(LParen))
199 |   use n <- do_in(InSubExpr, nibble.lazy(parser))
200 |   use _ <- do(nibble.token(RParen))
201 | 
202 |   return(n)
203 | }
204 | 


--------------------------------------------------------------------------------
/test/examples/env_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/dict.{type Dict}
  4 | import gleam/float
  5 | import gleam/int
  6 | import gleam/io
  7 | import gleam/option.{None, Some}
  8 | import gleam/set
  9 | import gleeunit/should
 10 | import nibble.{Break, Continue, do, return}
 11 | import nibble/lexer
 12 | 
 13 | // TYPES -----------------------------------------------------------------------
 14 | 
 15 | type Env =
 16 |   Dict(String, String)
 17 | 
 18 | type TokenT {
 19 |   Key(String)
 20 |   Str(String)
 21 |   Equals
 22 |   NewLine
 23 | }
 24 | 
 25 | // TESTS -----------------------------------------------------------------------
 26 | 
 27 | pub fn empty_env_test() {
 28 |   use run <- should("parse an empty env")
 29 |   let input = ""
 30 |   let expected = dict.new()
 31 | 
 32 |   run(input, expected)
 33 | }
 34 | 
 35 | pub fn single_env_test() {
 36 |   use run <- should("parse a single k/v pair")
 37 |   let input = "FOO=bar"
 38 |   let expected = dict.from_list([#("FOO", "bar")])
 39 | 
 40 |   run(input, expected)
 41 | }
 42 | 
 43 | pub fn single_env_string_test() {
 44 |   use run <- should("parse a single k/v pair with a string value")
 45 |   let input = "FOO='bar'"
 46 |   let expected = dict.from_list([#("FOO", "bar")])
 47 | 
 48 |   run(input, expected)
 49 | }
 50 | 
 51 | pub fn single_env_number_test() {
 52 |   use run <- should("parse a single k/v pair with a number value")
 53 |   let input = "FOO=123"
 54 |   let expected = dict.from_list([#("FOO", "123")])
 55 | 
 56 |   run(input, expected)
 57 | }
 58 | 
 59 | pub fn single_env_float_test() {
 60 |   use run <- should("parse a single k/v pair with a float value")
 61 |   let input = "FOO=123.456"
 62 |   let expected = dict.from_list([#("FOO", "123.456")])
 63 | 
 64 |   run(input, expected)
 65 | }
 66 | 
 67 | pub fn multi_env_test() {
 68 |   use run <- should("parse multiple k/v pairs")
 69 |   let input =
 70 |     "
 71 |     FOO=bar
 72 |     BAZ=qux
 73 |     "
 74 |   let expected = dict.from_list([#("FOO", "bar"), #("BAZ", "qux")])
 75 | 
 76 |   run(input, expected)
 77 | }
 78 | 
 79 | // UTILS -----------------------------------------------------------------------
 80 | 
 81 | fn should(description: String, run: fn(fn(String, Env) -> Nil) -> Nil) -> Nil {
 82 |   use input, expected <- run
 83 | 
 84 |   io.print("should " <> description)
 85 | 
 86 |   lexer.run(input, lexer())
 87 |   |> should.be_ok
 88 |   |> nibble.run(parser())
 89 |   |> should.be_ok
 90 |   |> should.equal(expected)
 91 | 
 92 |   io.println(" ✅")
 93 | }
 94 | 
 95 | fn lexer() {
 96 |   lexer.simple([
 97 |     lexer.token("=", Equals),
 98 |     lexer.token("\n", NewLine),
 99 |     // Strings values can use either double quotes or single quotes
100 |     lexer.string("\"", Str),
101 |     lexer.string("'", Str),
102 |     // Keys can be any non-whitespace character
103 |     lexer.identifier("[^\\s=#]", "[^\\s=]", set.new(), Key),
104 |     // We'll allow number literals and just convert them to string values
105 |     lexer.number(fn(int) { Str(int.to_string(int)) }, fn(float) {
106 |       Str(float.to_string(float))
107 |     }),
108 |     // Drop comments and whitespace
109 |     lexer.comment("#", fn(_) { Nil })
110 |       |> lexer.ignore,
111 |     lexer.spaces(Nil)
112 |       |> lexer.ignore,
113 |   ])
114 | }
115 | 
116 | fn parser() {
117 |   use env <- nibble.loop(dict.new())
118 | 
119 |   nibble.one_of([
120 |     key_value_parser(env)
121 |       |> nibble.map(Continue),
122 |     // The `key_value_parser` already consumes one new line. This parser makes
123 |     // sure that if k/v pairs are separated by _multiple_ newlines that we still
124 |     // consume them all.
125 |     //
126 |     // We use `many1` here because we need to consume at least _one_ token to
127 |     // prevent an infinite loop.
128 |     //
129 |     nibble.many1(nibble.token(NewLine))
130 |       |> nibble.replace(Continue(env)),
131 |     nibble.eof()
132 |       |> nibble.replace(Break(env)),
133 |   ])
134 | }
135 | 
136 | fn key_value_parser(env) {
137 |   use k <- do(key_parser())
138 |   use _ <- do(nibble.token(Equals))
139 |   use v <- do(val_parser())
140 |   use _ <- do(nibble.one_of([nibble.token(NewLine), nibble.eof()]))
141 | 
142 |   return(dict.insert(env, k, v))
143 | }
144 | 
145 | fn key_parser() {
146 |   use tok <- nibble.take_map("an env key")
147 | 
148 |   case tok {
149 |     Key(k) -> Some(k)
150 |     _ -> None
151 |   }
152 | }
153 | 
154 | fn val_parser() {
155 |   use tok <- nibble.take_map("an env value")
156 | 
157 |   case tok {
158 |     Str(v) -> Some(v)
159 |     // We can treat a single unquoted word as a value
160 |     Key(v) -> Some(v)
161 |     _ -> None
162 |   }
163 | }
164 | 


--------------------------------------------------------------------------------
/test/examples/json_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/int
  4 | import gleam/io
  5 | import gleeunit/should
  6 | import nibble.{type Parser}
  7 | import nibble/lexer.{type Lexer}
  8 | 
  9 | // TYPES -----------------------------------------------------------------------
 10 | 
 11 | type Json {
 12 |   Array(List(Json))
 13 |   False
 14 |   Null
 15 |   Number(Float)
 16 |   Object(List(#(String, Json)))
 17 |   String(String)
 18 |   True
 19 | }
 20 | 
 21 | type JsonT {
 22 |   Colon
 23 |   Comma
 24 |   FalseT
 25 |   LBrace
 26 |   LBracket
 27 |   NullT
 28 |   NumT(Float)
 29 |   RBrace
 30 |   RBracket
 31 |   StrT(String)
 32 |   TrueT
 33 | }
 34 | 
 35 | type Context {
 36 |   InArray
 37 |   InObject
 38 | }
 39 | 
 40 | // LITERAL TESTS ---------------------------------------------------------------
 41 | 
 42 | pub fn json_null_test() {
 43 |   use run <- should("parse a JSON null")
 44 |   let input = "null"
 45 |   let expected = Null
 46 | 
 47 |   run(input, expected)
 48 | }
 49 | 
 50 | pub fn json_true_test() {
 51 |   use run <- should("parse a JSON true")
 52 |   let input = "true"
 53 |   let expected = True
 54 | 
 55 |   run(input, expected)
 56 | }
 57 | 
 58 | pub fn json_false_test() {
 59 |   use run <- should("parse a JSON false")
 60 |   let input = "false"
 61 |   let expected = False
 62 | 
 63 |   run(input, expected)
 64 | }
 65 | 
 66 | pub fn json_number_test() {
 67 |   use run <- should("parse a JSON number")
 68 |   let input = "123.456"
 69 |   let expected = Number(123.456)
 70 | 
 71 |   run(input, expected)
 72 | }
 73 | 
 74 | pub fn json_string_test() {
 75 |   use run <- should("parse a JSON string")
 76 |   let input = "\"hello world\""
 77 |   let expected = String("hello world")
 78 | 
 79 |   run(input, expected)
 80 | }
 81 | 
 82 | // ARRAY TESTS -----------------------------------------------------------------
 83 | 
 84 | pub fn json_empty_array_test() {
 85 |   use run <- should("parse an empty JSON array")
 86 |   let input = "[]"
 87 |   let expected = Array([])
 88 | 
 89 |   run(input, expected)
 90 | }
 91 | 
 92 | pub fn json_singleton_array_test() {
 93 |   use run <- should("parse a JSON array with one element")
 94 |   let input = "[1]"
 95 |   let expected = Array([Number(1.0)])
 96 | 
 97 |   run(input, expected)
 98 | }
 99 | 
100 | pub fn json_array_test() {
101 |   use run <- should("parse a JSON array with multiple elements")
102 |   let input = "[1, 2, 3]"
103 |   let expected = Array([Number(1.0), Number(2.0), Number(3.0)])
104 | 
105 |   run(input, expected)
106 | }
107 | 
108 | pub fn json_nested_array_test() {
109 |   use run <- should("parse a nested JSON array")
110 |   let input = "[1, [2, 3], 4]"
111 |   let expected =
112 |     Array([Number(1.0), Array([Number(2.0), Number(3.0)]), Number(4.0)])
113 | 
114 |   run(input, expected)
115 | }
116 | 
117 | // OBJECT TESTS ----------------------------------------------------------------
118 | 
119 | pub fn json_empty_object_test() {
120 |   use run <- should("parse an empty JSON object")
121 |   let input = "{}"
122 |   let expected = Object([])
123 | 
124 |   run(input, expected)
125 | }
126 | 
127 | pub fn json_singleton_object_test() {
128 |   use run <- should("parse a JSON object with one element")
129 |   let input = "{\"a\": 1}"
130 |   let expected = Object([#("a", Number(1.0))])
131 | 
132 |   run(input, expected)
133 | }
134 | 
135 | pub fn json_object_test() {
136 |   use run <- should("parse a JSON object with multiple elements")
137 |   let input = "{\"a\": 1, \"b\": 2, \"c\": 3}"
138 |   let expected =
139 |     Object([#("a", Number(1.0)), #("b", Number(2.0)), #("c", Number(3.0))])
140 | 
141 |   run(input, expected)
142 | }
143 | 
144 | pub fn json_nested_object_test() {
145 |   use run <- should("parse a nested JSON object")
146 |   let input = "{\"a\": 1, \"b\": {\"c\": 2}, \"d\": 3}"
147 |   let expected =
148 |     Object([
149 |       #("a", Number(1.0)),
150 |       #("b", Object([#("c", Number(2.0))])),
151 |       #("d", Number(3.0)),
152 |     ])
153 | 
154 |   run(input, expected)
155 | }
156 | 
157 | // REAL WORLD TESTS ------------------------------------------------------------
158 | 
159 | pub fn json_schema_basic_test() {
160 |   // https://json-schema.org/learn/miscellaneous-examples.html#basic
161 |   use run <- should("parse the JSON Schema basic example")
162 |   let input =
163 |     "{
164 |   \"$id\": \"https://example.com/person.schema.json\",
165 |   \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",
166 |   \"title\": \"Person\",
167 |   \"type\": \"object\",
168 |   \"properties\": {
169 |     \"firstName\": {
170 |       \"type\": \"string\",
171 |       \"description\": \"The person's first name.\"
172 |     },
173 |     \"lastName\": {
174 |       \"type\": \"string\",
175 |       \"description\": \"The person's last name.\"
176 |     },
177 |     \"age\": {
178 |       \"description\": \"Age in years which must be equal to or greater than zero.\",
179 |       \"type\": \"integer\",
180 |       \"minimum\": 0
181 |     }
182 |   }
183 | }"
184 |   let expected =
185 |     Object([
186 |       #("$id", String("https://example.com/person.schema.json")),
187 |       #("$schema", String("https://json-schema.org/draft/2020-12/schema")),
188 |       #("title", String("Person")),
189 |       #("type", String("object")),
190 |       #(
191 |         "properties",
192 |         Object([
193 |           #(
194 |             "firstName",
195 |             Object([
196 |               #("type", String("string")),
197 |               #("description", String("The person's first name.")),
198 |             ]),
199 |           ),
200 |           #(
201 |             "lastName",
202 |             Object([
203 |               #("type", String("string")),
204 |               #("description", String("The person's last name.")),
205 |             ]),
206 |           ),
207 |           #(
208 |             "age",
209 |             Object([
210 |               #(
211 |                 "description",
212 |                 String(
213 |                   "Age in years which must be equal to or greater than zero.",
214 |                 ),
215 |               ),
216 |               #("type", String("integer")),
217 |               #("minimum", Number(0.0)),
218 |             ]),
219 |           ),
220 |         ]),
221 |       ),
222 |     ])
223 | 
224 |   run(input, expected)
225 | }
226 | 
227 | pub fn json_scheme_arrays_of_things_test() {
228 |   // https://json-schema.org/learn/miscellaneous-examples.html#arrays-of-things
229 |   use run <- should("parse the JSON Schema 'arrays of things' example")
230 |   let input =
231 |     "{
232 |   \"$id\": \"https://example.com/arrays.schema.json\",
233 |   \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",
234 |   \"description\": \"A representation of a person, company, organization, or place\",
235 |   \"type\": \"object\",
236 |   \"properties\": {
237 |     \"fruits\": {
238 |       \"type\": \"array\",
239 |       \"items\": {
240 |         \"type\": \"string\"
241 |       }
242 |     },
243 |     \"vegetables\": {
244 |       \"type\": \"array\",
245 |       \"items\": { \"$ref\": \"#/$defs/veggie\" }
246 |     }
247 |   },
248 |   \"$defs\": {
249 |     \"veggie\": {
250 |       \"type\": \"object\",
251 |       \"required\": [ \"veggieName\", \"veggieLike\" ],
252 |       \"properties\": {
253 |         \"veggieName\": {
254 |           \"type\": \"string\",
255 |           \"description\": \"The name of the vegetable.\"
256 |         },
257 |         \"veggieLike\": {
258 |           \"type\": \"boolean\",
259 |           \"description\": \"Do I like this vegetable?\"
260 |         }
261 |       }
262 |     }
263 |   }
264 | }"
265 |   let expected =
266 |     Object([
267 |       #("$id", String("https://example.com/arrays.schema.json")),
268 |       #("$schema", String("https://json-schema.org/draft/2020-12/schema")),
269 |       #(
270 |         "description",
271 |         String("A representation of a person, company, organization, or place"),
272 |       ),
273 |       #("type", String("object")),
274 |       #(
275 |         "properties",
276 |         Object([
277 |           #(
278 |             "fruits",
279 |             Object([
280 |               #("type", String("array")),
281 |               #("items", Object([#("type", String("string"))])),
282 |             ]),
283 |           ),
284 |           #(
285 |             "vegetables",
286 |             Object([
287 |               #("type", String("array")),
288 |               #("items", Object([#("$ref", String("#/$defs/veggie"))])),
289 |             ]),
290 |           ),
291 |         ]),
292 |       ),
293 |       #(
294 |         "$defs",
295 |         Object([
296 |           #(
297 |             "veggie",
298 |             Object([
299 |               #("type", String("object")),
300 |               #("required", Array([String("veggieName"), String("veggieLike")])),
301 |               #(
302 |                 "properties",
303 |                 Object([
304 |                   #(
305 |                     "veggieName",
306 |                     Object([
307 |                       #("type", String("string")),
308 |                       #("description", String("The name of the vegetable.")),
309 |                     ]),
310 |                   ),
311 |                   #(
312 |                     "veggieLike",
313 |                     Object([
314 |                       #("type", String("boolean")),
315 |                       #("description", String("Do I like this vegetable?")),
316 |                     ]),
317 |                   ),
318 |                 ]),
319 |               ),
320 |             ]),
321 |           ),
322 |         ]),
323 |       ),
324 |     ])
325 | 
326 |   run(input, expected)
327 | }
328 | 
329 | // UTILS -----------------------------------------------------------------------
330 | 
331 | fn should(description: String, run: fn(fn(String, Json) -> Nil) -> Nil) -> Nil {
332 |   use input, expected <- run
333 | 
334 |   io.print("should " <> description)
335 | 
336 |   lexer.run(input, lexer())
337 |   |> should.be_ok
338 |   |> nibble.run(parser())
339 |   |> should.be_ok
340 |   |> should.equal(expected)
341 | 
342 |   io.println(" ✅")
343 | }
344 | 
345 | fn lexer() -> Lexer(JsonT, Nil) {
346 |   lexer.simple([
347 |     lexer.number(fn(int) { NumT(int.to_float(int)) }, NumT),
348 |     lexer.token(":", Colon),
349 |     lexer.token(",", Comma),
350 |     lexer.token("false", FalseT),
351 |     lexer.token("{", LBrace),
352 |     lexer.token("[", LBracket),
353 |     lexer.token("null", NullT),
354 |     lexer.token("true", TrueT),
355 |     lexer.token("}", RBrace),
356 |     lexer.token("]", RBracket),
357 |     lexer.string("\"", StrT),
358 |     //
359 |     lexer.whitespace(Nil)
360 |       |> lexer.ignore,
361 |   ])
362 | }
363 | 
364 | fn parser() -> Parser(Json, JsonT, Context) {
365 |   nibble.one_of([
366 |     // Structures
367 |     array_parser()
368 |       |> nibble.in(InArray),
369 |     object_parser()
370 |       |> nibble.in(InObject),
371 |     literal_parser(),
372 |   ])
373 | }
374 | 
375 | fn array_parser() -> Parser(Json, JsonT, Context) {
376 |   use _ <- nibble.do(nibble.token(LBracket))
377 |   use elements <- nibble.do(nibble.sequence(
378 |     nibble.lazy(parser),
379 |     nibble.token(Comma),
380 |   ))
381 |   use _ <- nibble.do(nibble.token(RBracket))
382 | 
383 |   nibble.return(Array(elements))
384 | }
385 | 
386 | fn object_parser() -> Parser(Json, JsonT, Context) {
387 |   use _ <- nibble.do(nibble.token(LBrace))
388 |   use elements <- nibble.do(nibble.sequence(
389 |     nibble.lazy(object_element_parser),
390 |     nibble.token(Comma),
391 |   ))
392 |   use _ <- nibble.do(nibble.token(RBrace))
393 | 
394 |   nibble.return(Object(elements))
395 | }
396 | 
397 | fn object_element_parser() -> Parser(#(String, Json), JsonT, Context) {
398 |   use key <- nibble.do(
399 |     nibble.backtrackable({
400 |       use t <- nibble.do(nibble.any())
401 | 
402 |       case t {
403 |         StrT(s) -> nibble.return(s)
404 |         _ -> nibble.fail("Expected string object key")
405 |       }
406 |     }),
407 |   )
408 |   use _ <- nibble.do(nibble.token(Colon))
409 |   use value <- nibble.do(nibble.lazy(parser))
410 | 
411 |   nibble.return(#(key, value))
412 | }
413 | 
414 | fn literal_parser() -> Parser(Json, JsonT, Context) {
415 |   nibble.backtrackable({
416 |     use t <- nibble.do(nibble.any())
417 | 
418 |     case t {
419 |       NumT(n) -> nibble.return(Number(n))
420 |       StrT(s) -> nibble.return(String(s))
421 |       TrueT -> nibble.return(True)
422 |       FalseT -> nibble.return(False)
423 |       NullT -> nibble.return(Null)
424 |       _ -> nibble.fail("Expected a literal value")
425 |     }
426 |   })
427 | }
428 | 


--------------------------------------------------------------------------------
/test/examples/readme_test.gleam:
--------------------------------------------------------------------------------
 1 | // IMPORTS ---------------------------------------------------------------------
 2 | 
 3 | import gleam/io
 4 | import gleam/option.{None, Some}
 5 | import gleeunit/should
 6 | import nibble.{do, return}
 7 | import nibble/lexer
 8 | 
 9 | // TYPES -----------------------------------------------------------------------
10 | 
11 | type Point {
12 |   Point(x: Int, y: Int)
13 | }
14 | 
15 | type T {
16 |   Num(Int)
17 |   LParen
18 |   RParen
19 |   Comma
20 | }
21 | 
22 | // TESTS -----------------------------------------------------------------------
23 | 
24 | pub fn readme_test() {
25 |   use run <- should("parse the example in README.md")
26 |   let input = "(1, 2)"
27 |   let expected = Point(1, 2)
28 | 
29 |   run(input, expected)
30 | }
31 | 
32 | // UTILS -----------------------------------------------------------------------
33 | 
34 | fn should(description: String, run: fn(fn(String, Point) -> Nil) -> Nil) -> Nil {
35 |   use input, expected <- run
36 | 
37 |   io.print("should " <> description)
38 | 
39 |   lexer.run(input, lexer())
40 |   |> should.be_ok
41 |   |> nibble.run(parser())
42 |   |> should.be_ok
43 |   |> should.equal(expected)
44 | 
45 |   io.println(" ✅")
46 | }
47 | 
48 | fn lexer() {
49 |   lexer.simple([
50 |     lexer.int(Num),
51 |     lexer.token("(", LParen),
52 |     lexer.token(")", RParen),
53 |     lexer.token(",", Comma),
54 |     // Skip over whitespace, we don't care about it!
55 |     lexer.whitespace(Nil)
56 |       |> lexer.ignore,
57 |   ])
58 | }
59 | 
60 | fn parser() {
61 |   use _ <- do(nibble.token(LParen))
62 |   use x <- do(int_parser())
63 |   use _ <- do(nibble.token(Comma))
64 |   use y <- do(int_parser())
65 |   use _ <- do(nibble.token(RParen))
66 | 
67 |   return(Point(x, y))
68 | }
69 | 
70 | fn int_parser() {
71 |   use tok <- nibble.take_map("a `Num` token")
72 | 
73 |   case tok {
74 |     Num(n) -> Some(n)
75 |     _ -> None
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/test/examples/sexpr_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/io
  4 | import gleeunit/should
  5 | import nibble.{type Parser}
  6 | import nibble/lexer.{type Lexer, type Span, Span}
  7 | 
  8 | // TYPES -----------------------------------------------------------------------
  9 | 
 10 | type TokenT {
 11 |   LParenT
 12 |   RParenT
 13 |   NumT(Int)
 14 | }
 15 | 
 16 | type Expr {
 17 |   SExpr(body: List(Expr), location: Span)
 18 |   Num(value: Int, location: Span)
 19 | }
 20 | 
 21 | // TESTS -----------------------------------------------------------------------
 22 | 
 23 | pub fn basic_test() {
 24 |   use run <- should("parse a basic s-expression")
 25 |   let input = "(1 2)"
 26 |   let expected =
 27 |     SExpr(
 28 |       [Num(1, Span(1, 2, 1, 3)), Num(2, Span(1, 4, 1, 5))],
 29 |       Span(1, 1, 1, 6),
 30 |     )
 31 | 
 32 |   run(input, expected)
 33 | }
 34 | 
 35 | pub fn nested_test() {
 36 |   use run <- should("parse nested s-expressions")
 37 |   let input = "(1 ((2 3) 4) (5) 6)"
 38 |   let expected =
 39 |     SExpr(
 40 |       [
 41 |         Num(1, Span(1, 2, 1, 3)),
 42 |         SExpr(
 43 |           [
 44 |             SExpr(
 45 |               [Num(2, Span(1, 6, 1, 7)), Num(3, Span(1, 8, 1, 9))],
 46 |               Span(1, 5, 1, 10),
 47 |             ),
 48 |             Num(4, Span(1, 11, 1, 12)),
 49 |           ],
 50 |           Span(1, 4, 1, 13),
 51 |         ),
 52 |         SExpr([Num(5, Span(1, 15, 1, 16))], Span(1, 14, 1, 17)),
 53 |         Num(6, Span(1, 18, 1, 19)),
 54 |       ],
 55 |       Span(1, 1, 1, 20),
 56 |     )
 57 | 
 58 |   run(input, expected)
 59 | }
 60 | 
 61 | pub fn locations_test() {
 62 |   use run <- should("parse locations over multiple lines")
 63 |   let input =
 64 |     "(
 65 |       1
 66 | 
 67 |   2 3)
 68 |     "
 69 |   let expected =
 70 |     SExpr(
 71 |       [
 72 |         Num(1, Span(2, 7, 2, 8)),
 73 |         Num(2, Span(4, 3, 4, 4)),
 74 |         Num(3, Span(4, 5, 4, 6)),
 75 |       ],
 76 |       Span(1, 1, 4, 7),
 77 |     )
 78 | 
 79 |   run(input, expected)
 80 | }
 81 | 
 82 | // UTILS -----------------------------------------------------------------------
 83 | 
 84 | fn should(description: String, run: fn(fn(String, Expr) -> Nil) -> Nil) -> Nil {
 85 |   use input, expected <- run
 86 | 
 87 |   io.print("should " <> description)
 88 | 
 89 |   lexer.run(input, lexer())
 90 |   |> should.be_ok
 91 |   |> nibble.run(parser())
 92 |   |> should.be_ok
 93 |   |> should.equal(expected)
 94 | 
 95 |   io.println(" ✅")
 96 | }
 97 | 
 98 | fn lexer() -> Lexer(TokenT, Nil) {
 99 |   lexer.simple([
100 |     lexer.token("(", LParenT),
101 |     lexer.token(")", RParenT),
102 |     lexer.int(NumT),
103 |     lexer.whitespace(Nil)
104 |       |> lexer.ignore,
105 |   ])
106 | }
107 | 
108 | fn parser() -> Parser(Expr, TokenT, Nil) {
109 |   nibble.one_of([number_parser(), sexpr_parser()])
110 | }
111 | 
112 | fn number_parser() -> Parser(Expr, TokenT, Nil) {
113 |   use t <- nibble.do(nibble.any())
114 |   // Get the position of the current token
115 |   use pos <- nibble.do(nibble.span())
116 | 
117 |   case t {
118 |     NumT(n) -> nibble.return(Num(n, pos))
119 |     _ -> nibble.fail("expected a number literal")
120 |   }
121 | }
122 | 
123 | fn sexpr_parser() -> Parser(Expr, TokenT, Nil) {
124 |   use _ <- nibble.do(nibble.token(LParenT))
125 |   // Position of the left parenthesis
126 |   use start <- nibble.do(nibble.span())
127 |   use body <- nibble.do(nibble.many1(parser()))
128 |   use _ <- nibble.do(nibble.token(RParenT))
129 |   // Position of the right parenthesis
130 |   use end <- nibble.do(nibble.span())
131 | 
132 |   // Create a combined location from the start and end spans
133 |   //
134 |   // (  1  2  3  )
135 |   // ~ start     ~ end
136 |   // ~~~~~~~~~~~~~ combined
137 |   let combined = merge_spans(start, end)
138 | 
139 |   nibble.return(SExpr(body, combined))
140 | }
141 | 
142 | fn merge_spans(span1: Span, span2: Span) -> Span {
143 |   Span(span1.row_start, span1.col_start, span2.row_end, span2.col_end)
144 | }
145 | 


--------------------------------------------------------------------------------
/test/examples/simple_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/option.{None, Some}
  4 | import nibble.{
  5 |   any, do, eof, fail, guard, many, many1, map, one_of, optional, or, replace,
  6 |   return, run, sequence, take_at_least, take_exactly, take_if, take_map,
  7 |   take_map_while, take_until, take_while, take_while1, token,
  8 | }
  9 | import nibble/lexer
 10 | 
 11 | // TYPES -----------------------------------------------------------------------
 12 | 
 13 | pub type TokenT {
 14 |   AT
 15 |   BT
 16 |   CommaT
 17 |   LParenT
 18 |   RParenT
 19 |   NumT(Int)
 20 | }
 21 | 
 22 | // LEXER -----------------------------------------------------------------------
 23 | fn lexer() {
 24 |   lexer.simple([
 25 |     lexer.token("a", AT),
 26 |     lexer.token("b", BT),
 27 |     lexer.token(",", CommaT),
 28 |     lexer.token("(", LParenT),
 29 |     lexer.token(")", RParenT),
 30 |     lexer.int(NumT),
 31 |     lexer.whitespace(Nil)
 32 |       |> lexer.ignore,
 33 |   ])
 34 | }
 35 | 
 36 | // PARSERS ---------------------------------------------------------------------
 37 | fn number_parser() {
 38 |   use tok <- take_map("expected a number")
 39 |   case tok {
 40 |     NumT(n) -> Some(n)
 41 |     _ -> None
 42 |   }
 43 | }
 44 | 
 45 | // TESTS -----------------------------------------------------------------------
 46 | 
 47 | pub fn any_test() {
 48 |   let assert Ok(non_empty_tokens) = lexer.run("a", lexer())
 49 |   let assert Ok(AT) = run(non_empty_tokens, any())
 50 |   let assert Ok(empty_tokens) = lexer.run("", lexer())
 51 |   let assert Error(_) = run(empty_tokens, nibble.any())
 52 | }
 53 | 
 54 | pub fn do_test() {
 55 |   let assert Ok(tokens) = lexer.run("a", lexer())
 56 |   let parser = {
 57 |     use parsed_value <- do(token(AT))
 58 |     return(parsed_value)
 59 |   }
 60 |   let assert Ok(Nil) = run(tokens, parser)
 61 | }
 62 | 
 63 | pub fn eof_test() {
 64 |   let assert Ok(tokens) = lexer.run("aba", lexer())
 65 |   let successful_parser = {
 66 |     use _ <- do(token(AT))
 67 |     use _ <- do(token(BT))
 68 |     use _ <- do(token(AT))
 69 |     use _ <- do(eof())
 70 |     return(Nil)
 71 |   }
 72 |   let assert Ok(Nil) = run(tokens, successful_parser)
 73 |   let failing_parser = {
 74 |     use _ <- do(token(AT))
 75 |     use _ <- do(token(BT))
 76 |     use _ <- do(eof())
 77 |     return(Nil)
 78 |   }
 79 |   let assert Error(_) = run(tokens, failing_parser)
 80 | }
 81 | 
 82 | pub fn fail_test() {
 83 |   let assert Error(_) = run([], fail("I never succeed"))
 84 | }
 85 | 
 86 | pub fn guard_test() {
 87 |   let even_number_parser = {
 88 |     use number_value <- do(number_parser())
 89 |     use _ <- do(guard(number_value % 2 == 0, "expected an even number"))
 90 |     return(number_value)
 91 |   }
 92 |   let assert Ok(even_number_token) = lexer.run("10", lexer())
 93 |   let assert Ok(10) = run(even_number_token, even_number_parser)
 94 |   let assert Ok(odd_number_token) = lexer.run("13", lexer())
 95 |   let assert Error(_) = run(odd_number_token, even_number_parser)
 96 | }
 97 | 
 98 | pub fn many_test() {
 99 |   let assert Ok(tokens) = lexer.run("aaab", lexer())
100 |   let assert Ok([Nil, Nil, Nil]) = run(tokens, many(token(AT)))
101 |   let assert Ok(tokens) = lexer.run("b", lexer())
102 |   let assert Ok([]) = run(tokens, many(token(AT)))
103 | }
104 | 
105 | pub fn many1_test() {
106 |   let assert Ok(tokens) = lexer.run("aaab", lexer())
107 |   let assert Ok([Nil, Nil, Nil]) = run(tokens, many1(token(AT)))
108 |   let assert Ok(tokens) = lexer.run("b", lexer())
109 |   let assert Error(_) = run(tokens, many1(token(AT)))
110 | }
111 | 
112 | pub fn map_test() {
113 |   let double_parser = {
114 |     use doubled_value <- do(number_parser() |> map(fn(n) { n * 2 }))
115 |     return(doubled_value)
116 |   }
117 |   let assert Ok(token) = lexer.run("400", lexer())
118 |   let assert Ok(800) = run(token, double_parser)
119 | }
120 | 
121 | pub fn one_of_test() {
122 |   let a_or_b_parser = {
123 |     one_of([token(AT), token(BT)])
124 |   }
125 |   let assert Ok(tokens) = lexer.run("a", lexer())
126 |   let assert Ok(Nil) = run(tokens, a_or_b_parser)
127 |   let assert Ok(tokens) = lexer.run("b", lexer())
128 |   let assert Ok(Nil) = run(tokens, a_or_b_parser)
129 |   let assert Ok(tokens) = lexer.run("22", lexer())
130 |   let assert Error(_) = run(tokens, a_or_b_parser)
131 | }
132 | 
133 | pub fn optional_test() {
134 |   let optional_a_parser = {
135 |     token(AT) |> optional
136 |   }
137 |   let assert Ok(tokens) = lexer.run("a", lexer())
138 |   let assert Ok(Some(Nil)) = run(tokens, optional_a_parser)
139 |   let assert Ok(tokens) = lexer.run("b", lexer())
140 |   let assert Ok(None) = run(tokens, optional_a_parser)
141 | }
142 | 
143 | pub fn or_test() {
144 |   let assert Ok(tokens) = lexer.run("a", lexer())
145 |   let assert Ok(0) = run(tokens, number_parser() |> or(0))
146 | }
147 | 
148 | pub fn replace_test() {
149 |   let assert Ok(tokens) = lexer.run("a", lexer())
150 |   let assert Ok("a") = run(tokens, token(AT) |> replace("a"))
151 | }
152 | 
153 | pub fn sequence_test() {
154 |   let assert Ok(tokens) = lexer.run("a,a,a", lexer())
155 |   let assert Ok([Nil, Nil, Nil]) =
156 |     run(tokens, sequence(token(AT), token(CommaT)))
157 | }
158 | 
159 | pub fn take_at_least_test() {
160 |   let assert Ok(tokens) = lexer.run("aaa", lexer())
161 |   let assert Ok([Nil, Nil, Nil]) = run(tokens, take_at_least(token(AT), 2))
162 |   let assert Ok(tokens) = lexer.run("abaa", lexer())
163 |   let assert Error(_) = run(tokens, take_at_least(token(AT), 2))
164 | }
165 | 
166 | pub fn take_exactly_test() {
167 |   let assert Ok(tokens) = lexer.run("aaaaaa", lexer())
168 |   let assert Ok([Nil, Nil]) = run(tokens, take_exactly(token(AT), 2))
169 |   let assert Ok(tokens) = lexer.run("abaa", lexer())
170 |   let assert Error(_) = run(tokens, take_exactly(token(AT), 2))
171 | }
172 | 
173 | pub fn take_if_test() {
174 |   let a_parser = {
175 |     take_if("expected an 'a'", fn(tok) {
176 |       case tok {
177 |         AT -> True
178 |         _ -> False
179 |       }
180 |     })
181 |   }
182 |   let assert Ok(tokens) = lexer.run("a", lexer())
183 |   let assert Ok(AT) = run(tokens, a_parser)
184 |   let assert Ok(tokens) = lexer.run("b", lexer())
185 |   let assert Error(_) = run(tokens, a_parser)
186 | }
187 | 
188 | pub fn take_map_test() {
189 |   // See the body of `number_parser` for the use of `take_map`.
190 |   let assert Ok(tokens) = lexer.run("10", lexer())
191 |   let assert Ok(10) = run(tokens, number_parser())
192 |   let assert Ok(tokens) = lexer.run("b", lexer())
193 |   let assert Error(_) = run(tokens, number_parser())
194 | }
195 | 
196 | pub fn take_map_while_test() {
197 |   let f = fn(tok) {
198 |     case tok {
199 |       NumT(n) if n > 0 -> Some("positive")
200 |       NumT(n) if n == 0 -> Some("zero")
201 |       NumT(n) if n < 0 -> Some("negative")
202 |       CommaT -> Some("comma")
203 |       _ -> None
204 |     }
205 |   }
206 |   let assert Ok(tokens) = lexer.run("10,-10", lexer())
207 |   let assert Ok(["positive", "comma", "negative"]) =
208 |     run(tokens, take_map_while(f))
209 | }
210 | 
211 | pub fn take_until_test() {
212 |   let not_a_number = fn(tok) {
213 |     case tok {
214 |       NumT(_) -> False
215 |       _ -> True
216 |     }
217 |   }
218 |   let assert Ok(tokens) = lexer.run("a,b,10", lexer())
219 |   let assert Ok([]) = run(tokens, take_until(not_a_number))
220 |   let assert Ok(tokens) = lexer.run("10a", lexer())
221 |   let assert Ok([NumT(10)]) = run(tokens, take_until(not_a_number))
222 | }
223 | 
224 | pub fn take_while_test() {
225 |   let not_a_number = fn(tok) {
226 |     case tok {
227 |       NumT(_) -> False
228 |       _ -> True
229 |     }
230 |   }
231 |   let assert Ok(tokens) = lexer.run("a,b,10", lexer())
232 |   let assert Ok([AT, CommaT, BT, CommaT]) =
233 |     run(tokens, take_while(not_a_number))
234 |   let assert Ok(tokens) = lexer.run("10", lexer())
235 |   let assert Ok([]) = run(tokens, take_while(not_a_number))
236 | }
237 | 
238 | pub fn take_while1_test() {
239 |   let not_a_number = fn(tok) {
240 |     case tok {
241 |       NumT(_) -> False
242 |       _ -> True
243 |     }
244 |   }
245 |   let assert Ok(tokens) = lexer.run("a,b,10", lexer())
246 |   let assert Ok([AT, CommaT, BT, CommaT]) =
247 |     run(tokens, take_while1("expected a non-number", not_a_number))
248 |   let assert Ok(tokens) = lexer.run("10", lexer())
249 |   let assert Error(_) =
250 |     run(tokens, take_while1("expected a non-number", not_a_number))
251 | }
252 | 


--------------------------------------------------------------------------------
/test/nibble_test.gleam:
--------------------------------------------------------------------------------
1 | import gleeunit
2 | 
3 | pub fn main() {
4 |   gleeunit.main()
5 | }
6 | 


--------------------------------------------------------------------------------
/test/unit/lexer_test.gleam:
--------------------------------------------------------------------------------
  1 | // IMPORTS ---------------------------------------------------------------------
  2 | 
  3 | import gleam/io
  4 | import gleam/set
  5 | import gleam/string
  6 | import gleeunit/should
  7 | import nibble/lexer.{type Matcher, type Token, Span, Token}
  8 | 
  9 | // TYPES -----------------------------------------------------------------------
 10 | 
 11 | type TokenT {
 12 |   Str(String)
 13 |   Int(Int)
 14 |   Num(Float)
 15 |   Kwd(String)
 16 |   Var(String)
 17 | }
 18 | 
 19 | // INTEGER TESTS ---------------------------------------------------------------
 20 | 
 21 | pub fn single_digit_integer_test() {
 22 |   use run <- should("lex a single digit integer")
 23 | 
 24 |   let input = "1"
 25 |   let expected = [Token(Span(1, 1, 1, 2), "1", Int(1))]
 26 | 
 27 |   run(input, expected, [lexer.int(Int)])
 28 | }
 29 | 
 30 | pub fn single_negative_digit_integer_test() {
 31 |   use run <- should("lex a single digit negative integer")
 32 | 
 33 |   let input = "-1"
 34 |   let expected = [Token(Span(1, 1, 1, 3), "-1", Int(-1))]
 35 | 
 36 |   run(input, expected, [lexer.int(Int)])
 37 | }
 38 | 
 39 | pub fn multi_digit_integer_test() {
 40 |   use run <- should("lex a multi digit integer")
 41 | 
 42 |   let input = "123"
 43 |   let expected = [Token(Span(1, 1, 1, 4), "123", Int(123))]
 44 | 
 45 |   run(input, expected, [lexer.int(Int)])
 46 | }
 47 | 
 48 | pub fn multi_digit_negative_integer_test() {
 49 |   use run <- should("lex a multi digit negative integer")
 50 | 
 51 |   let input = "-123"
 52 |   let expected = [Token(Span(1, 1, 1, 5), "-123", Int(-123))]
 53 | 
 54 |   run(input, expected, [lexer.int(Int)])
 55 | }
 56 | 
 57 | // FLOAT TESTS -----------------------------------------------------------------
 58 | 
 59 | pub fn single_digit_float_test() {
 60 |   use run <- should("lex a single digit float")
 61 | 
 62 |   let input = "1.0"
 63 |   let expected = [Token(Span(1, 1, 1, 4), "1.0", Num(1.0))]
 64 | 
 65 |   run(input, expected, [lexer.float(Num)])
 66 | }
 67 | 
 68 | pub fn single_nagive_digit_float_test() {
 69 |   use run <- should("lex a single digit negative float")
 70 | 
 71 |   let input = "-1.0"
 72 |   let expected = [Token(Span(1, 1, 1, 5), "-1.0", Num(-1.0))]
 73 | 
 74 |   run(input, expected, [lexer.float(Num)])
 75 | }
 76 | 
 77 | pub fn multi_digit_float_test() {
 78 |   use run <- should("lex a multi digit float")
 79 | 
 80 |   let input = "123.456"
 81 |   let expected = [Token(Span(1, 1, 1, 8), "123.456", Num(123.456))]
 82 | 
 83 |   run(input, expected, [lexer.float(Num)])
 84 | }
 85 | 
 86 | pub fn multi_digit_negative_float_test() {
 87 |   use run <- should("lex a multi digit negative float")
 88 | 
 89 |   let input = "-123.456"
 90 |   let expected = [Token(Span(1, 1, 1, 9), "-123.456", Num(-123.456))]
 91 | 
 92 |   run(input, expected, [lexer.float(Num)])
 93 | }
 94 | 
 95 | // STRING TESTS ----------------------------------------------------------------
 96 | 
 97 | pub fn empty_string_test() {
 98 |   use run <- should("lex an empty string")
 99 | 
100 |   let input = "''"
101 |   let expected = [Token(Span(1, 1, 1, 3), "''", Str(""))]
102 | 
103 |   run(input, expected, [lexer.string("'", Str)])
104 | }
105 | 
106 | pub fn single_char_string_test() {
107 |   use run <- should("lex a single char string")
108 | 
109 |   let input = "'a'"
110 |   let expected = [Token(Span(1, 1, 1, 4), "'a'", Str("a"))]
111 | 
112 |   run(input, expected, [lexer.string("'", Str)])
113 | }
114 | 
115 | pub fn multi_char_string_test() {
116 |   use run <- should("lex a multi char string")
117 | 
118 |   let input = "'abc'"
119 |   let expected = [Token(Span(1, 1, 1, 6), "'abc'", Str("abc"))]
120 | 
121 |   run(input, expected, [lexer.string("'", Str)])
122 | }
123 | 
124 | pub fn escaped_string_test() {
125 |   use run <- should("lex an escaped string")
126 | 
127 |   let input = "'\\''"
128 |   let expected = [Token(Span(1, 1, 1, 5), "'\\''", Str("\\'"))]
129 | 
130 |   run(input, expected, [lexer.string("'", Str)])
131 | }
132 | 
133 | pub fn multiline_string_test() {
134 |   use run <- should("lex a multi-line string")
135 | 
136 |   let input = string.join(["'this is a", "multi-line string'"], "\n")
137 |   let expected = [
138 |     Token(Span(1, 1, 2, 19), input, Str("this is a\nmulti-line string")),
139 |   ]
140 | 
141 |   run(input, expected, [lexer.string("'", Str)])
142 | }
143 | 
144 | // KEYWORD TESTS ---------------------------------------------------------------
145 | 
146 | pub fn keyword_test() {
147 |   use run <- should("lex a keyword")
148 | 
149 |   let input = "in"
150 |   let expected = [Token(Span(1, 1, 1, 3), "in", Kwd("in"))]
151 | 
152 |   run(input, expected, [lexer.keyword("in", "\\s", Kwd("in"))])
153 | }
154 | 
155 | pub fn keyword_breaker_test() {
156 |   use run <- should("lex a keyword in an identifier")
157 | 
158 |   let input = "as assert"
159 |   let expected = [
160 |     Token(Span(1, 1, 1, 3), "as", Kwd("as")),
161 |     Token(Span(1, 4, 1, 10), "assert", Var("assert")),
162 |   ]
163 | 
164 |   run(input, expected, [
165 |     lexer.keyword("as", "\\s", Kwd("as")),
166 |     lexer.keyword("assert", "\\s", Var("assert")),
167 |     lexer.token(" ", Nil)
168 |       |> lexer.ignore(),
169 |   ])
170 | }
171 | 
172 | // VARIABLE TESTS --------------------------------------------------------------
173 | 
174 | pub fn single_char_variable_test() {
175 |   use run <- should("lex a single char variable")
176 | 
177 |   let input = "a"
178 |   let expected = [Token(Span(1, 1, 1, 2), "a", Var("a"))]
179 | 
180 |   run(input, expected, [lexer.variable(set.new(), Var)])
181 | }
182 | 
183 | pub fn multi_char_variable_test() {
184 |   use run <- should("lex a multi char variable")
185 | 
186 |   let input = "abc"
187 |   let expected = [Token(Span(1, 1, 1, 4), "abc", Var("abc"))]
188 | 
189 |   run(input, expected, [lexer.variable(set.new(), Var)])
190 | }
191 | 
192 | pub fn multi_char_variable_with_numbers_and_underscores_test() {
193 |   use run <- should("lex a multi char variable with numbers and underscores")
194 | 
195 |   let input = "abc_123"
196 |   let expected = [Token(Span(1, 1, 1, 8), "abc_123", Var("abc_123"))]
197 | 
198 |   run(input, expected, [lexer.variable(set.new(), Var)])
199 | }
200 | 
201 | pub fn variable_containing_keyword_test() {
202 |   use run <- should("lex a variable containing a keyword")
203 | 
204 |   let input = "insert"
205 |   let expected = [Token(Span(1, 1, 1, 7), "insert", Var("insert"))]
206 | 
207 |   run(input, expected, [
208 |     lexer.keyword("in", "\\s", Kwd("in")),
209 |     lexer.variable(set.from_list(["int"]), Var),
210 |   ])
211 | }
212 | 
213 | // UTILS -----------------------------------------------------------------------
214 | 
215 | fn should(
216 |   description: String,
217 |   run: fn(fn(String, List(Token(a)), List(Matcher(a, Nil))) -> Nil) -> Nil,
218 | ) -> Nil {
219 |   use input, expected, matchers <- run
220 | 
221 |   io.print("should " <> description)
222 | 
223 |   lexer.simple(matchers)
224 |   |> lexer.run(input, _)
225 |   |> should.be_ok
226 |   |> should.equal(expected)
227 | 
228 |   io.println(" ✅")
229 | }
230 | 


--------------------------------------------------------------------------------
/test/unit/parser_test.gleam:
--------------------------------------------------------------------------------
 1 | // IMPORTS ---------------------------------------------------------------------
 2 | 
 3 | import gleam/io
 4 | import gleam/list
 5 | import gleeunit/should
 6 | import nibble.{type Parser, do, return}
 7 | import nibble/lexer.{type Token}
 8 | 
 9 | // STRESS TESTS ----------------------------------------------------------------
10 | 
11 | pub fn do_tco_test() {
12 |   use run <- should("Not stack overflow with many chained `do` calls")
13 |   let input = []
14 |   let expected = 0
15 |   let parser =
16 |     list.repeat(return, 1_000_000)
17 |     |> list.fold(return(0), do)
18 | 
19 |   run(input, expected, parser)
20 | }
21 | 
22 | // UTILS -----------------------------------------------------------------------
23 | 
24 | fn should(
25 |   description: String,
26 |   run: fn(fn(List(Token(tok)), a, Parser(a, tok, ctx)) -> Nil) -> Nil,
27 | ) -> Nil {
28 |   use input, expected, parser <- run
29 | 
30 |   io.print("should " <> description)
31 | 
32 |   nibble.run(input, parser)
33 |   |> should.be_ok
34 |   |> should.equal(expected)
35 | 
36 |   io.println(" ✅")
37 | }
38 | 


--------------------------------------------------------------------------------