├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── 00-introduction.md ├── 01-lexing.md ├── 02-parsing.md ├── 03-parse-contexts.md ├── 04-backtracking.md ├── 05-lexer-modes.md └── 06-pratt-parsing.md ├── gleam.toml ├── manifest.toml ├── src ├── glearray_ffi.erl ├── glearray_ffi.mjs ├── nibble.gleam └── nibble │ ├── lexer.gleam │ ├── pratt.gleam │ ├── predicates.gleam │ └── vendor │ └── glearray.gleam └── test ├── docs └── lexer_modes │ └── indentation_test.gleam ├── examples ├── calculator_test.gleam ├── env_test.gleam ├── json_test.gleam ├── readme_test.gleam ├── sexpr_test.gleam └── simple_test.gleam ├── nibble_test.gleam └── unit ├── lexer_test.gleam └── parser_test.gleam /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: ["v*"] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v3.1.0 13 | - uses: erlef/setup-beam@v1.16.0 14 | with: 15 | otp-version: "27.0" 16 | rebar3-version: "3" 17 | gleam-version: "1.5.1" 18 | 19 | - run: | 20 | version="v$(cat gleam.toml | grep -m 1 "version" | sed -r "s/version *= *\"([[:digit:].]+)\"/\1/")" 21 | if [ "$version" != "${{ github.ref_name }}" ]; then 22 | echo "tag '${{ github.ref_name }}' does not match the version in gleam.toml" 23 | echo "expected a tag name 'v$version'" 24 | exit 1 25 | fi 26 | name: check version 27 | 28 | - run: gleam format --check 29 | 30 | - run: gleam publish -y 31 | env: 32 | HEXPM_USER: ${{ secrets.HEX_USERNAME }} 33 | HEXPM_PASS: ${{ secrets.HEX_PASSWORD }} 34 | 35 | - uses: softprops/action-gh-release@v1 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.beam 2 | *.ez 3 | build 4 | erl_crash.dump 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Hayleigh Thompson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in the 5 | Software without restriction, including without limitation the rights to use, 6 | copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 7 | Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nibble 2 | 3 | A lexer and parser combinator library inspired by [`elm/parser`](https://github.com/elm/parser). 4 | 5 | [![Package Version](https://img.shields.io/hexpm/v/nibble)](https://hex.pm/packages/nibble) 6 | [![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/nibble/) 7 | 8 | ✨ This project is written in **pure Gleam** so you can use it anywhere Gleam 9 | runs: Erlang, Elixir, Node, Deno, and the browser! 10 | 11 | ## Quick start 12 | 13 | If you just want to get a feel for what nibble can do, check out the example 14 | below. 15 | 16 | ```gleam 17 | import gleam/option.{None, Some} 18 | import nibble.{do, return} 19 | import nibble/lexer 20 | 21 | type Point { 22 | Point(x: Int, y: Int) 23 | } 24 | 25 | type Token { 26 | Num(Int) 27 | LParen 28 | RParen 29 | Comma 30 | } 31 | 32 | pub fn main() { 33 | // Your lexer knows how to take an input string and 34 | // turn it into a flat list of tokens. You define the 35 | // type of token you want to use, but nibble will wrap 36 | // that up in its own `Token` type that includes the 37 | // source span and original lexeme for each token. 38 | let lexer = 39 | lexer.simple([ 40 | lexer.int(Num), 41 | lexer.token("(", LParen), 42 | lexer.token(")", RParen), 43 | lexer.token(",", Comma), 44 | // Skip over whitespace, we don't care about it! 45 | lexer.whitespace(Nil) 46 | |> lexer.ignore, 47 | ]) 48 | 49 | // Your parser(s!) know how to transform a list of 50 | // tokens into whatever you want. You have the full 51 | // power of Gleam here, so you can go wild! 52 | let int_parser = { 53 | // Use `take_map` to only consume certain kinds of tokens and transform the 54 | // result. 55 | use tok <- nibble.take_map("expected number") 56 | case tok { 57 | Num(n) -> Some(n) 58 | _ -> None 59 | } 60 | } 61 | 62 | let parser = { 63 | use _ <- do(nibble.token(LParen)) 64 | use x <- do(int_parser) 65 | use _ <- do(nibble.token(Comma)) 66 | use y <- do(int_parser) 67 | use _ <- do(nibble.token(RParen)) 68 | 69 | return(Point(x, y)) 70 | } 71 | 72 | let assert Ok(tokens) = lexer.run("(1, 2)", lexer) 73 | let assert Ok(point) = nibble.run(tokens, parser) 74 | 75 | point.x //=> 1 76 | point.y //=> 2 77 | } 78 | 79 | ``` 80 | 81 | ## Installation 82 | 83 | If available on Hex this package can be added to your Gleam project: 84 | 85 | ```sh 86 | gleam add nibble 87 | ``` 88 | 89 | and its documentation can be found at . 90 | -------------------------------------------------------------------------------- /docs/00-introduction.md: -------------------------------------------------------------------------------- 1 | # 00 Introduction 2 | 3 | Nibble is a parser combinator library with a twist: it includes a lexer combinator 4 | library as well! If some of those words already started to sound like gibberish 5 | to you then don't worry, this introduction is going to get you up to speed on the 6 | core concepts! 7 | 8 | ## Your first parser! 9 | 10 | ```gleam 11 | type T { 12 | Hello 13 | Name(String) 14 | } 15 | ``` 16 | 17 | ```gleam 18 | fn lexer() { 19 | lexer.simple([ 20 | lexer.token("hello", Hello), 21 | lexer.variable(set.new(), Name), 22 | lexer.whitespace(Nil) 23 | |> lexer.ignore, 24 | ]) 25 | } 26 | ``` 27 | 28 | ```gleam 29 | fn parser() { 30 | use _ <- nibble.do(nibble.token(Hello)) 31 | use name <- nibble.do(name_parser()) 32 | 33 | nibble.return("You are greeting " <> name) 34 | } 35 | 36 | fn name_parser() { 37 | use tok <- nibble.take_map("Expected a name") 38 | 39 | case tok { 40 | Name(name) -> option.Some(name) 41 | _ -> option.None 42 | } 43 | } 44 | ``` 45 | 46 | ```gleam 47 | pub fn main() { 48 | let input = "hello joe" 49 | 50 | use tokens <- result.try( 51 | input 52 | |> lexer.run(lexer()), 53 | ) 54 | 55 | case tokens |> nibble.run(parser()) { 56 | Ok(value) -> io.println(value) 57 | Error(err) -> { 58 | let _ = io.debug(err) 59 | Nil 60 | } 61 | } 62 | 63 | Ok("") 64 | //=> "You are greeting joe" 65 | } 66 | ``` 67 | 68 | ## Terminology 69 | 70 | Throughout Nibble's docs we use words that not all Gleamlins might have come 71 | across before. Here's a quick rundown of the important terms and concepts to 72 | know: 73 | 74 | ### What is a combinator? 75 | 76 | Although you can find some more-formal definitions of what a combinator is – 77 | looking at you, combinatory logic – we're Gleamlins here and we like to keep 78 | things simple. For our purposes we can think of a combinators as functions that 79 | work together like building blocks for more complex behaviour. 80 | 81 | You'll have seen combinators already if you've ever written any code using 82 | `gleam/dynamic`! With `gleam/dynamic` you combine decoders together to create more 83 | complex ones: 84 | 85 | ```gleam 86 | dynamic.field("wibble", dynamic.list(dynamic.int)) 87 | ``` 88 | 89 | We can take the simple `dynamic.int` decoder and combine it with `dynamic.list` 90 | to get back a decoder that can decode a _list_ of ints. And we can combine _that_ 91 | decoder with `dynamic.field` to get back a decoder that can decode a list of ints 92 | from an object field called `"wibble"`! We can keep going, continuing to build 93 | decoders up from smaller pieces: this is the essence of combinators! 94 | 95 | ### What is a parser? 96 | 97 | In the broadest sense, a parser takes an unstructured sequence of stuff (often 98 | characters in a string or tokens in a list) and turns it into something more 99 | structured. You can imagine all parsers can be thought of as the same basic idea: 100 | 101 | ``` 102 | type Parser(a, b) = fn(List(a)) -> #(b, List(a)) 103 | ``` 104 | 105 | In the real world parsers tend to be a bit more complex than this, including things 106 | like errors and failure cases, position tracking, and so on. But in essence parsers 107 | are combinators, and just like `gleam/dynamic` that means we can combine them 108 | together to parse very complex things. 109 | 110 | ### What is a lexer? 111 | -------------------------------------------------------------------------------- /docs/01-lexing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/01-lexing.md -------------------------------------------------------------------------------- /docs/02-parsing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/02-parsing.md -------------------------------------------------------------------------------- /docs/03-parse-contexts.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/03-parse-contexts.md -------------------------------------------------------------------------------- /docs/04-backtracking.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/04-backtracking.md -------------------------------------------------------------------------------- /docs/05-lexer-modes.md: -------------------------------------------------------------------------------- 1 | # Lexer Modes 2 | 3 | Up until now we have been running our lexer using `lexer.simple`. As the name 4 | implies, this is the simplest way to use Nibble's lexer and it is context-free. 5 | Where possible we should try to stick to these simple lexers, but sometimes we 6 | need to be able to lex things that are context-sensitive. That's where lexer 7 | modes come in! 8 | 9 | ## Indentation Sensitivity 10 | 11 | Let's imagine we're writing a lexer for a Python-ish programming language and 12 | we want to produce `Indent` and `Dedent` tokens to represent indentation. We 13 | might define our tokens like this: 14 | 15 | ```gleam 16 | pub type TokenT { 17 | Var(String) 18 | Str(String) 19 | Num(Int) 20 | 21 | // Keywords 22 | Def 23 | For 24 | In 25 | Print 26 | 27 | // Indentation 28 | Indent(Int) 29 | Dedent(Int) 30 | } 31 | ``` 32 | 33 | We could represent a chunk of code like this: 34 | 35 | ```python 36 | def wibble arr 37 | for x in arr 38 | print x 39 | 40 | print "done!" 41 | 42 | def wobble 43 | wibble [1, 2, 3] 44 | ``` 45 | 46 | Indentation would change the meaning of this program, so we need to know when we 47 | are inside a block of indented code or not. Our `Indent` and `Dedent` tokens 48 | carry with them the level of indentation they represent such that when we come to 49 | parsing we can make sure everything is valid, but how do we produce the tokens in 50 | the first place? 51 | 52 | We'll need to do two things: (1) write a custom matcher using `lexer.custom` and 53 | (2) store the current indentation level as the lexer's mode. 54 | 55 | ```gleam 56 | pub opaque type Lexer(a, mode) 57 | pub opaque type Matcher(a, mode) 58 | ``` 59 | 60 | Modes allow us to chose different matchers for different contexts, or inject 61 | state into our matchers. For our indentation-sensitive lexer, that means we'll 62 | end up with `Lexer` and `Matcher` types like this: 63 | 64 | ```gleam 65 | type Lexer = nibble.Lexer(TokenT, Int) 66 | type Matcher = nibble.Matcher(TokenT, Int) 67 | ``` 68 | 69 | To write our `indentation` matcher, we'll count the number of spaces that immediately 70 | follow a newline and compare that to the current indentation level. If that number 71 | is less than the current indentation level, we'll produce a `Dedent` token, otherwise 72 | we'll produce an `Indent` token. In either case we'll also update the lexer's 73 | mode with the new indentation level for subsequent lines. 74 | 75 | ```gleam 76 | fn indentation() -> Matcher(TokenT, Int) { 77 | let assert Ok(is_indent) = regex.from_string("^\\n[ \\t]*") 78 | use current_indent, lexeme, lookahead <- lexer.custom 79 | 80 | case regex.check(is_indent, lexeme), lookahead { 81 | False, _ -> NoMatch 82 | True, " " | True, "\t" -> Skip 83 | True, "\n" -> Drop(current_indent) 84 | True, _ -> { 85 | let spaces = string.length(lexeme) - 1 86 | 87 | case int.compare(spaces, current_indent) { 88 | Lt -> Keep(Dedent(spaces), spaces) 89 | Eq if spaces == 0 -> Drop(0) 90 | Eq -> Keep(Indent(spaces), spaces) 91 | Gt -> Keep(Indent(spaces), spaces) 92 | } 93 | } 94 | } 95 | } 96 | ``` 97 | 98 | There's actually a little more going on here than I just described, so let's 99 | break the pattern matching down case by case. 100 | 101 | ```gleam 102 | False, _ -> NoMatch 103 | ``` 104 | 105 | - This branch should be self-explanatory. If the lexeme doesn't match the regex 106 | we have for indentation then we don't have a match and should tell the lexer to 107 | move on and attempt a different matcher. 108 | 109 | ```gleam 110 | True, " " | True, "\t" -> Skip 111 | ``` 112 | 113 | - If the lexeme matches the indentation regex but we can see from the lookahead 114 | that it is followed by more indentation, then we don't want to produce a token 115 | right now. We _also_ don't want the lexer to try any other matchers that might 116 | consume these lexeme (like `lexer.whitespace`) so we tell the lexer to _skip_ 117 | checking any other matchers and move on to the next lexeme. 118 | 119 | ```gleam 120 | True, "\n" -> Drop(current_indent) 121 | ``` 122 | 123 | - When the lexeme matches the indentation regex but it's followed by a new line 124 | that means we have an empty line of just whitespace. You could choose to emit a 125 | token here if you wanted to, but in this case we'll ignore empty lines entirely 126 | and instruct the lexer to _drop_ the lexeme without producing anything. Importantly 127 | we preserve the current indentation level as the lexer's mode going forward. 128 | 129 | ```gleam 130 | True, _ -> { 131 | let spaces = string.length(lexeme) - 1 132 | 133 | case int.compare(spaces, current_indent) { 134 | Lt -> Keep(Dedent(spaces), spaces) 135 | Eq if spaces == 0 -> Drop(0) 136 | Eq -> Keep(Indent(spaces), spaces) 137 | Gt -> Keep(Indent(spaces), spaces) 138 | } 139 | } 140 | ``` 141 | 142 | - This is the actual indentation logic; we hit this branch when the lookahead is 143 | any other grapheme. To get the new indentation level we just need to count the 144 | length of the string, remembering to subtract one for the leading newline. By 145 | comparing the new indentation to the current, we can work out what token to keep. 146 | 147 | - If the new indentation is less than the current, we keep a `Dedent` token and 148 | update the lexer's mode to the new indentation level. 149 | - If the new indentation is is equal to the current _but_ that level is zero, 150 | then we drop the lexeme without producing any indentation token at all. 151 | - Otherwise, if the new indentation is equal or greater than the current, we 152 | keep an `Indent` token and update the lexer's mode to the new indentation level. 153 | 154 | ## String Interpolation 155 | -------------------------------------------------------------------------------- /docs/06-pratt-parsing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hayleigh-dot-dev/nibble/6ce357f4f0f08219147c5c27d753ec642152c767/docs/06-pratt-parsing.md -------------------------------------------------------------------------------- /gleam.toml: -------------------------------------------------------------------------------- 1 | name = "nibble" 2 | version = "1.1.3" 3 | 4 | # Fill out these fields if you intend to generate HTML documentation or publish 5 | # your project to the Hex package manager. 6 | # 7 | licences = ["MIT"] 8 | description = "A string parsing library combining a traditional lexer with parser combinators.." 9 | repository = { type = "github", user = "hayleigh-dot-dev", repo = "gleam-nibble" } 10 | gleam = ">= 0.34.0" 11 | 12 | internal_modules = ["nibble/vendor/*"] 13 | 14 | 15 | [dependencies] 16 | gleam_stdlib = ">= 0.34.0 and < 1.0.0" 17 | gleam_regexp = ">= 1.0.0 and < 2.0.0" 18 | 19 | [dev-dependencies] 20 | gleeunit = ">= 1.0.0 and < 2.0.0" 21 | 22 | [documentation] 23 | pages = [ 24 | { title = "Introduction to Nibble", path = "intro.html", source = "./docs/00-introduction.md" }, 25 | { title = "Lexing", path = "lexing.html", source = "./docs/01-lexing.md" }, 26 | { title = "Parsing", path = "parsing.html", source = "./docs/02-parsing.md" }, 27 | { title = "Parse Contexts", path = "parse-contexts.html", source = "./docs/03-parse-contexts.md" }, 28 | { title = "Backtracking", path = "backtracking.html", source = "./docs/04-backtracking.md" }, 29 | { title = "Lexer Modes", path = "lexer-modes.html", source = "./docs/05-lexer-modes.md" }, 30 | { title = "Pratt Parsing", path = "pratt-parsing.html", source = "./docs/06-pratt-parsing.md" }, 31 | ] 32 | -------------------------------------------------------------------------------- /manifest.toml: -------------------------------------------------------------------------------- 1 | # This file was generated by Gleam 2 | # You typically do not need to edit this file 3 | 4 | packages = [ 5 | { name = "gleam_regexp", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "A3655FDD288571E90EE9C4009B719FEF59FA16AFCDF3952A76A125AF23CF1592" }, 6 | { name = "gleam_stdlib", version = "0.51.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "14AFA8D3DDD7045203D422715DBB822D1725992A31DF35A08D97389014B74B68" }, 7 | { name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" }, 8 | ] 9 | 10 | [requirements] 11 | gleam_regexp = { version = ">= 1.0.0 and < 2.0.0" } 12 | gleam_stdlib = { version = ">= 0.34.0 and < 1.0.0" } 13 | gleeunit = { version = ">= 1.0.0 and < 2.0.0" } 14 | -------------------------------------------------------------------------------- /src/glearray_ffi.erl: -------------------------------------------------------------------------------- 1 | -module(glearray_ffi). 2 | 3 | -export([new/0, get/2, set/3, insert/3]). 4 | 5 | new() -> {}. 6 | 7 | get(Array, Index) -> element(Index + 1, Array). 8 | 9 | set(Array, Index, Value) -> setelement(Index + 1, Array, Value). 10 | 11 | insert(Array, Index, Value) -> erlang:insert_element(Index + 1, Array, Value). 12 | -------------------------------------------------------------------------------- /src/glearray_ffi.mjs: -------------------------------------------------------------------------------- 1 | export function newArray() { 2 | return []; 3 | } 4 | 5 | export function fromList(list) { 6 | return list.toArray(); 7 | } 8 | 9 | export function arrayLength(array) { 10 | return array.length; 11 | } 12 | 13 | export function get(array, index) { 14 | return array[index]; 15 | } 16 | 17 | export function set(array, index, value) { 18 | const copy = [...array]; 19 | copy[index] = value; 20 | return copy; 21 | } 22 | 23 | export function push(array, value) { 24 | const copy = [...array]; 25 | copy.push(value); 26 | return copy; 27 | } 28 | 29 | export function insert(array, index, value) { 30 | return array.toSpliced(index, 0, value); 31 | } 32 | -------------------------------------------------------------------------------- /src/nibble.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/bool 4 | import gleam/io 5 | import gleam/list 6 | import gleam/option.{type Option, None, Some} 7 | import gleam/string 8 | import nibble/lexer.{type Span, type Token, Span, Token} 9 | import nibble/vendor/glearray.{type Array} 10 | 11 | // TYPES ----------------------------------------------------------------------- 12 | 13 | /// The `Parser` type has three parameters, let's take a look at each of them: 14 | /// 15 | /// ``` 16 | /// Parser(a, tok, ctx) 17 | /// // (1) ^ 18 | /// // (2) ^^^ 19 | /// // (3) ^^^ 20 | /// ``` 21 | /// 22 | /// 1) `a` is the type of value that the parser knows how to produce. If you were 23 | /// writing a parser for a programming language, this might be your expression 24 | /// type. 25 | /// 26 | /// 2) `tok` is the type of tokens that the parser knows how to consume. You can 27 | /// take a look at the [`Token`](./nibble/lexer#Token) type for a bit more info, 28 | /// but note that it's not necessary for the token stream to come from nibble's 29 | /// lexer. 30 | /// 31 | /// 3) `ctx` is used to make error reporting nicer. You can place a parser into a 32 | /// custom context. When the parser runs the context gets pushed into a stack. 33 | /// If the parser fails you can see the context stack in the error message, 34 | /// which can make error reporting and debugging much easier! 35 | /// 36 | pub opaque type Parser(a, tok, ctx) { 37 | Parser(fn(State(tok, ctx)) -> Step(a, tok, ctx)) 38 | } 39 | 40 | type Step(a, tok, ctx) { 41 | Cont(CanBacktrack, a, State(tok, ctx)) 42 | Fail(CanBacktrack, Bag(tok, ctx)) 43 | } 44 | 45 | type State(tok, ctx) { 46 | State( 47 | // ❓ You might wonder why we're wanting an `Array` at all when we could just 48 | // use a `List` and backtrack to a previous state when we need to. By tracking 49 | // the index and indexing into the dict/array directly we save ever having to 50 | // allocate something new, which is a big deal for performance! 51 | src: Array(Token(tok)), 52 | idx: Int, 53 | pos: Span, 54 | ctx: List(#(Span, ctx)), 55 | ) 56 | } 57 | 58 | type CanBacktrack { 59 | CanBacktrack(Bool) 60 | } 61 | 62 | // RUNNING PARSERS ------------------------------------------------------------- 63 | 64 | /// Parsers don't do anything until they're run! The `run` function takes a 65 | /// [`Parser`](#Parser) and a list of [`Token`](./nibble/lexer#Token)s and 66 | /// runs it; returning either the parsed value or a list of [`DeadEnds`](#DeadEnd) 67 | /// where the parser failed. 68 | /// 69 | pub fn run( 70 | src: List(Token(tok)), 71 | parser: Parser(a, tok, ctx), 72 | ) -> Result(a, List(DeadEnd(tok, ctx))) { 73 | let init = 74 | State(src: glearray.from_list(src), idx: 0, pos: Span(1, 1, 1, 1), ctx: []) 75 | 76 | case runwrap(init, parser) { 77 | Cont(_, a, _) -> Ok(a) 78 | Fail(_, bag) -> Error(to_deadends(bag, [])) 79 | } 80 | } 81 | 82 | fn runwrap( 83 | state: State(tok, ctx), 84 | parser: Parser(a, tok, ctx), 85 | ) -> Step(a, tok, ctx) { 86 | let Parser(parse) = parser 87 | parse(state) 88 | } 89 | 90 | fn next(state: State(tok, ctx)) -> #(Option(tok), State(tok, ctx)) { 91 | case glearray.get(state.src, state.idx) { 92 | Error(_) -> #(option.None, state) 93 | Ok(Token(span, _, tok)) -> #( 94 | option.Some(tok), 95 | State(..state, idx: state.idx + 1, pos: span), 96 | ) 97 | } 98 | } 99 | 100 | // CONSTRUCTORS ---------------------------------------------------------------- 101 | 102 | /// The simplest kind of parser. [`return`](#return) consumes no tokens and always 103 | /// produces the given value. Sometimes called [`succeed`](#succeed) instead. 104 | /// 105 | /// This function might seem useless at first, but it is very useful when used in 106 | /// combination with [`do`](#do) or [`then`](#then). 107 | /// 108 | /// ```gleam 109 | /// import nibble.{do, return} 110 | /// 111 | /// fn unit8_parser() { 112 | /// use int <- do(int_parser()) 113 | /// 114 | /// case int >= 0, int <= 255 { 115 | /// True, True -> 116 | /// return(int) 117 | /// 118 | /// False, _ -> 119 | /// throw("Expected an int >= 0") 120 | /// 121 | /// _, False -> 122 | /// throw("Expected an int <= 255") 123 | /// } 124 | /// } 125 | /// ``` 126 | /// 127 | /// 💡 [`return`](#return`) and [`succeed`](#succeed) are names for the same thing. 128 | /// We suggesting using `return` unqualified when using `do` and Gleam's `use` 129 | /// syntax, and `nibble.succeed` in a pipeline with `nibble.then`. 130 | pub fn return(value: a) -> Parser(a, tok, ctx) { 131 | use state <- Parser 132 | 133 | Cont(CanBacktrack(False), value, state) 134 | } 135 | 136 | /// The simplest kind of parser. [`succeed`](#succeed) consumes no tokens and always 137 | /// produces the given value. Sometimes called [`return`](#return) instead. 138 | /// 139 | /// This function might seem useless at first, but it is very useful when used in 140 | /// combination with [`do`](#do) or [`then`](#then). 141 | /// 142 | /// ```gleam 143 | /// import nibble 144 | /// 145 | /// fn unit8_parser() { 146 | /// int_parser() 147 | /// |> nibble.then(fn(int) { 148 | /// case int >= 0, int <= 255 { 149 | /// True, True -> succeed(int) 150 | /// False, _ -> fail("Expected an int >= 0") 151 | /// _, False -> fail("Expected an int <= 255") 152 | /// } 153 | /// }) 154 | /// } 155 | /// ``` 156 | /// 157 | /// 💡 [`succeed`](#succeed) and [`return`](#return) are names for the same thing. 158 | /// We suggest using `succeed` in a pipeline with `nibble.then`, and `return` 159 | /// unqalified when using `do` with Gleam's `use` syntax. 160 | /// 161 | pub fn succeed(value: a) -> Parser(a, tok, ctx) { 162 | return(value) 163 | } 164 | 165 | /// The opposite of [`return`](#return), this parser always fails with the given 166 | /// message. Sometimes called [`fail`](#fail) instead. 167 | /// 168 | pub fn throw(message: String) -> Parser(a, tok, ctx) { 169 | use state <- Parser 170 | let error = Custom(message) 171 | let bag = bag_from_state(state, error) 172 | 173 | Fail(CanBacktrack(False), bag) 174 | } 175 | 176 | /// Create a parser that consumes no tokens and always fails with the given 177 | /// error message. 178 | /// 179 | pub fn fail(message: String) -> Parser(a, tok, ctx) { 180 | throw(message) 181 | } 182 | 183 | /// Defer the creation of a parser until it is needed. This is often most useful 184 | /// when creating a parser that is recursive and is *not* a function. 185 | /// 186 | pub fn lazy(parser: fn() -> Parser(a, tok, ctx)) -> Parser(a, tok, ctx) { 187 | use state <- Parser 188 | 189 | runwrap(state, parser()) 190 | } 191 | 192 | // BACKTRACKING ---------------------------------------------------------------- 193 | 194 | /// By default, parsers will not backtrack if they fail after consuming at least 195 | /// one token. Passing a parser to `backtrackable` will change this behaviour and 196 | /// allows us to jump back to the state of the parser before it consumed any input 197 | /// and try another one. 198 | /// 199 | /// This is most useful when you want to quickly try a few different parsers using 200 | /// [`one_of`](#one_of). 201 | /// 202 | /// 🚨 Backtracing parsers can drastically reduce performance, so you should avoid 203 | /// them where possible. A common reason folks reach for backtracking is when they 204 | /// want to try multiple branches that start with the same token or same sequence 205 | /// of tokens. 206 | /// 207 | /// To avoid backtracking in these cases, you can create an intermediate parser 208 | /// that consumes the common tokens _and then_ use [`one_of`](#one_of) to try 209 | /// the different branches. 210 | /// 211 | pub fn backtrackable(parser: Parser(a, tok, ctx)) -> Parser(a, tok, ctx) { 212 | use state <- Parser 213 | 214 | case runwrap(state, parser) { 215 | Cont(_, a, state) -> Cont(CanBacktrack(False), a, state) 216 | Fail(_, bag) -> Fail(CanBacktrack(False), bag) 217 | } 218 | } 219 | 220 | fn should_commit(a: CanBacktrack, or b: CanBacktrack) -> CanBacktrack { 221 | let CanBacktrack(a) = a 222 | let CanBacktrack(b) = b 223 | 224 | CanBacktrack(a || b) 225 | } 226 | 227 | // MANIPULATING PARSERS -------------------------------------------------------- 228 | 229 | /// 230 | /// 231 | pub fn do( 232 | parser: Parser(a, tok, ctx), 233 | f: fn(a) -> Parser(b, tok, ctx), 234 | ) -> Parser(b, tok, ctx) { 235 | use state <- Parser 236 | 237 | case runwrap(state, parser) { 238 | Cont(to_a, a, state) -> 239 | case runwrap(state, f(a)) { 240 | Cont(to_b, b, state) -> Cont(should_commit(to_a, or: to_b), b, state) 241 | Fail(to_b, bag) -> Fail(should_commit(to_a, or: to_b), bag) 242 | } 243 | Fail(can_backtrack, bag) -> Fail(can_backtrack, bag) 244 | } 245 | } 246 | 247 | /// 248 | /// 249 | pub fn do_in( 250 | context: ctx, 251 | parser: Parser(a, tok, ctx), 252 | f: fn(a) -> Parser(b, tok, ctx), 253 | ) -> Parser(b, tok, ctx) { 254 | do(parser, f) 255 | |> in(context) 256 | } 257 | 258 | /// 259 | /// 260 | pub fn then( 261 | parser: Parser(a, tok, ctx), 262 | f: fn(a) -> Parser(b, tok, ctx), 263 | ) -> Parser(b, tok, ctx) { 264 | do(parser, f) 265 | } 266 | 267 | /// 268 | /// 269 | pub fn map(parser: Parser(a, tok, ctx), f: fn(a) -> b) -> Parser(b, tok, ctx) { 270 | use a <- do(parser) 271 | 272 | return(f(a)) 273 | } 274 | 275 | /// 276 | /// 277 | pub fn replace(parser: Parser(a, tok, ctx), with b: b) -> Parser(b, tok, ctx) { 278 | map(parser, fn(_) { b }) 279 | } 280 | 281 | // PARSER STATE ---------------------------------------------------------------- 282 | 283 | /// A parser that returns the current token position. 284 | /// 285 | pub fn span() -> Parser(Span, tok, ctx) { 286 | use state <- Parser 287 | 288 | Cont(CanBacktrack(False), state.pos, state) 289 | } 290 | 291 | // SIMPLE PARSERS -------------------------------------------------------------- 292 | 293 | /// Returns the next token in the input stream. Fails if there are no more 294 | /// tokens. 295 | pub fn any() -> Parser(tok, tok, ctx) { 296 | take_if("a single token", fn(_) { True }) 297 | } 298 | 299 | /// Returns nil if the token `tok` is the next token in the input stream. Fails 300 | /// if the next token is not `tok` or if the input stream is empty. 301 | pub fn token(tok: tok) -> Parser(Nil, tok, ctx) { 302 | use state <- Parser 303 | 304 | case next(state) { 305 | #(option.Some(t), state) if tok == t -> Cont(CanBacktrack(True), Nil, state) 306 | #(option.Some(t), state) -> 307 | Fail( 308 | CanBacktrack(False), 309 | bag_from_state(state, Expected(string.inspect(tok), t)), 310 | ) 311 | #(option.None, state) -> 312 | Fail(CanBacktrack(False), bag_from_state(state, EndOfInput)) 313 | } 314 | } 315 | 316 | /// Succeeds if the input stream is empty, fails otherwise. This is useful to 317 | /// verify that you've consumed all the tokens in the input stream. 318 | /// 319 | pub fn eof() -> Parser(Nil, tok, ctx) { 320 | use state <- Parser 321 | 322 | case next(state) { 323 | #(option.Some(tok), state) -> 324 | Fail(CanBacktrack(False), bag_from_state(state, Unexpected(tok))) 325 | #(option.None, _) -> Cont(CanBacktrack(False), Nil, state) 326 | } 327 | } 328 | 329 | // BRANCHING AND LOOPING ------------------------------------------------------- 330 | 331 | /// Try the given parsers in order until one succeeds. If all fail, the parser 332 | /// fails. 333 | pub fn one_of(parsers: List(Parser(a, tok, ctx))) -> Parser(a, tok, ctx) { 334 | use state <- Parser 335 | let init = Fail(CanBacktrack(False), Empty) 336 | 337 | use result, next <- list.fold_until(parsers, init) 338 | 339 | case result { 340 | Cont(_, _, _) -> list.Stop(result) 341 | Fail(CanBacktrack(True), _) -> list.Stop(result) 342 | Fail(_, bag) -> 343 | runwrap(state, next) 344 | |> add_bag_to_step(bag) 345 | |> list.Continue 346 | } 347 | } 348 | 349 | /// 350 | /// Consumes a sequence of tokens using the given parser, separated by the 351 | /// given `separator` parser. Returns a list of the parsed values, ignoring 352 | /// the results of the `separator` parser. 353 | /// 354 | pub fn sequence( 355 | parser: Parser(a, tok, ctx), 356 | separator sep: Parser(x, tok, ctx), 357 | ) -> Parser(List(a), tok, ctx) { 358 | one_of([ 359 | parser 360 | |> then(more(_, parser, sep)), 361 | return([]), 362 | ]) 363 | } 364 | 365 | /// 366 | /// Returns consecutive applications of the given parser. If you are parsing 367 | /// values with a separator, use [`sequence`](#sequence) instead. 368 | /// 369 | /// 💡 This parser can succeed without consuming any input. You can end up with 370 | /// an infinite loop if you're not careful. Use [`many1`](#many1) if you want 371 | /// to guarantee you take at least one token. 372 | /// 373 | pub fn many(parser: Parser(a, tok, ctx)) -> Parser(List(a), tok, ctx) { 374 | sequence(parser, return(Nil)) 375 | } 376 | 377 | /// 378 | /// This is the same as [`many1`](#many1), but is guaranteed to return at least 379 | /// one value. 380 | /// 381 | pub fn many1(parser: Parser(a, tok, ctx)) -> Parser(List(a), tok, ctx) { 382 | use x <- do(parser) 383 | use xs <- do(many(parser)) 384 | 385 | return([x, ..xs]) 386 | } 387 | 388 | fn more( 389 | x: a, 390 | parser: Parser(a, tok, ctx), 391 | separator: Parser(x, tok, ctx), 392 | ) -> Parser(List(a), tok, ctx) { 393 | use xs <- loop([x]) 394 | // `break` is lazy so we don't reverse `xs` every iteration if we don't need 395 | // to. 396 | let break = fn() { return(Break(list.reverse(xs))) } 397 | let continue = { 398 | use _ <- do(separator) 399 | use x <- do(parser) 400 | 401 | return(Continue([x, ..xs])) 402 | } 403 | 404 | one_of([continue, lazy(break)]) 405 | } 406 | 407 | /// 408 | /// 409 | pub type Loop(a, state) { 410 | Continue(state) 411 | Break(a) 412 | } 413 | 414 | /// 415 | /// 416 | pub fn loop( 417 | init: state, 418 | step: fn(state) -> Parser(Loop(a, state), tok, ctx), 419 | ) -> Parser(a, tok, ctx) { 420 | use state <- Parser 421 | 422 | loop_help(step, CanBacktrack(False), init, state) 423 | } 424 | 425 | fn loop_help(f, commit, loop_state, state) { 426 | case runwrap(state, f(loop_state)) { 427 | Cont(can_backtrack, Continue(next_loop_state), next_state) -> 428 | loop_help( 429 | f, 430 | should_commit(commit, can_backtrack), 431 | next_loop_state, 432 | next_state, 433 | ) 434 | Cont(can_backtrack, Break(result), next_state) -> 435 | Cont(should_commit(commit, can_backtrack), result, next_state) 436 | Fail(can_backtrack, bag) -> Fail(should_commit(commit, can_backtrack), bag) 437 | } 438 | } 439 | 440 | // PREDICATES ------------------------------------------------------------------ 441 | 442 | /// 443 | /// Fails if the given condition is false, otherwise returns `Nil`. 444 | /// 445 | pub fn guard(cond: Bool, expecting: String) -> Parser(Nil, tok, ctx) { 446 | case cond { 447 | True -> return(Nil) 448 | False -> fail(expecting) 449 | } 450 | } 451 | 452 | /// 453 | /// Takes the next token off the stream if it satisfies the given predicate. 454 | /// 455 | pub fn take_if( 456 | expecting: String, 457 | predicate: fn(tok) -> Bool, 458 | ) -> Parser(tok, tok, ctx) { 459 | use state <- Parser 460 | let #(tok, next_state) = next(state) 461 | 462 | case tok, option.map(tok, predicate) { 463 | Some(tok), Some(True) -> Cont(CanBacktrack(False), tok, next_state) 464 | Some(tok), Some(False) -> 465 | Fail( 466 | CanBacktrack(False), 467 | bag_from_state(next_state, Expected(expecting, got: tok)), 468 | ) 469 | _, _ -> Fail(CanBacktrack(False), bag_from_state(next_state, EndOfInput)) 470 | } 471 | } 472 | 473 | /// 474 | /// Take tokens from the stream while the given predicate is satisfied. 475 | /// 476 | /// 💡 This parser can succeed without consuming any input (if the predicate 477 | /// immediately fails). You can end up with an infinite loop if you're not 478 | /// careful. Use [`take_while1`](#take_while1) if you want to guarantee you 479 | /// take at least one token. 480 | /// 481 | pub fn take_while(predicate: fn(tok) -> Bool) -> Parser(List(tok), tok, ctx) { 482 | use state <- Parser 483 | let #(tok, next_state) = next(state) 484 | 485 | case tok, option.map(tok, predicate) { 486 | Some(tok), Some(True) -> 487 | runwrap(next_state, { 488 | use toks <- do(take_while(predicate)) 489 | return([tok, ..toks]) 490 | }) 491 | Some(_), Some(False) -> Cont(CanBacktrack(False), [], state) 492 | _, _ -> Cont(CanBacktrack(False), [], state) 493 | } 494 | } 495 | 496 | /// 497 | /// Take tokens from the stream while the given predicate is satisfied. 498 | /// 499 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty. 500 | /// Feel free to `let assert` the result! 501 | /// 502 | pub fn take_while1( 503 | expecting: String, 504 | predicate: fn(tok) -> Bool, 505 | ) -> Parser(List(tok), tok, ctx) { 506 | use x <- do(take_if(expecting, predicate)) 507 | use xs <- do(take_while(predicate)) 508 | 509 | return([x, ..xs]) 510 | } 511 | 512 | /// 513 | /// Take token from the stream until the given predicate is satisfied. 514 | /// 515 | /// 💡 This parser can succeed without consuming any input (if the predicate 516 | /// immediately succeeds). You can end up with an infinite loop if you're not 517 | /// careful. Use [`take_until1`](#take_until1) if you want to guarantee you 518 | /// take at least one token. 519 | /// 520 | pub fn take_until(predicate: fn(tok) -> Bool) -> Parser(List(tok), tok, ctx) { 521 | take_while(fn(tok) { bool.negate(predicate(tok)) }) 522 | } 523 | 524 | /// 525 | /// Take token from the stream until the given predicate is satisfied. 526 | /// 527 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty. 528 | /// Feel free to `let assert` the result! 529 | /// 530 | pub fn take_until1( 531 | expecting: String, 532 | predicate: fn(tok) -> Bool, 533 | ) -> Parser(List(tok), tok, ctx) { 534 | take_while1(expecting, fn(tok) { bool.negate(predicate(tok)) }) 535 | } 536 | 537 | /// 538 | /// Apply the parser up to `count` times, returning a list of the results. 539 | /// 540 | /// 💡 This parser can succeed without consuming any input (if the parser 541 | /// fails immediately) and return an empty list. You can end up with an 542 | /// infinite loop if you're not careful. 543 | /// 544 | pub fn take_up_to( 545 | parser: Parser(a, tok, ctx), 546 | count: Int, 547 | ) -> Parser(List(a), tok, ctx) { 548 | case count { 549 | 0 -> return([]) 550 | _ -> 551 | { 552 | use x <- do(parser) 553 | use xs <- do(take_up_to(parser, count - 1)) 554 | 555 | return([x, ..xs]) 556 | } 557 | |> or([]) 558 | } 559 | } 560 | 561 | /// 562 | /// Apply the parser a minimum of `count` times, returning a list of the results. 563 | /// 564 | pub fn take_at_least( 565 | parser: Parser(a, tok, ctx), 566 | count: Int, 567 | ) -> Parser(List(a), tok, ctx) { 568 | case count { 569 | 0 -> many(parser) 570 | _ -> { 571 | use x <- do(parser) 572 | use xs <- do(take_at_least(parser, count - 1)) 573 | 574 | return([x, ..xs]) 575 | } 576 | } 577 | } 578 | 579 | /// 580 | /// Take `count` consecutive tokens from the stream using the given parser. 581 | /// 582 | pub fn take_exactly( 583 | parser: Parser(a, tok, ctx), 584 | count: Int, 585 | ) -> Parser(List(a), tok, ctx) { 586 | case count { 587 | 0 -> return([]) 588 | _ -> { 589 | use x <- do(parser) 590 | use xs <- do(take_exactly(parser, count - 1)) 591 | 592 | return([x, ..xs]) 593 | } 594 | } 595 | } 596 | 597 | /// 598 | /// Try the given parser, but if it fails return the given default value instead 599 | /// of failing. 600 | /// 601 | pub fn or(parser: Parser(a, tok, ctx), default: a) -> Parser(a, tok, ctx) { 602 | one_of([parser, return(default)]) 603 | } 604 | 605 | /// 606 | /// Try the given parser, but if it fails return 607 | /// [`None`](#https://hexdocs.pm/gleam_stdlib/gleam/option.html#Option) instead 608 | /// of failing. 609 | /// 610 | pub fn optional(parser: Parser(a, tok, ctx)) -> Parser(Option(a), tok, ctx) { 611 | one_of([map(parser, Some), return(None)]) 612 | } 613 | 614 | /// 615 | /// Take the next token and attempt to transform it with the given function. This 616 | /// is useful when creating reusable primitive parsers for your own tokens such as 617 | /// `take_identifier` or `take_number`. 618 | /// 619 | pub fn take_map( 620 | expecting: String, 621 | f: fn(tok) -> Option(a), 622 | ) -> Parser(a, tok, ctx) { 623 | use state <- Parser 624 | let #(tok, next_state) = next(state) 625 | 626 | case tok, option.then(tok, f) { 627 | None, _ -> Fail(CanBacktrack(False), bag_from_state(next_state, EndOfInput)) 628 | Some(tok), None -> 629 | Fail( 630 | CanBacktrack(False), 631 | bag_from_state(next_state, Expected(expecting, got: tok)), 632 | ) 633 | _, Some(a) -> Cont(CanBacktrack(False), a, next_state) 634 | } 635 | } 636 | 637 | /// 638 | /// Applies a function to consecutive tokens while the given function returns 639 | /// `Some`. 640 | /// 641 | /// 💡 This parser can succeed without consuming any input (if the predicate 642 | /// immediately succeeds). You can end up with an infinite loop if you're not 643 | /// careful. Use [`take_map_while1`](#take_map_while1) if you want to guarantee you 644 | /// take at least one token. 645 | /// 646 | pub fn take_map_while(f: fn(tok) -> Option(a)) -> Parser(List(a), tok, ctx) { 647 | use state <- Parser 648 | let #(tok, next_state) = next(state) 649 | 650 | case tok, option.then(tok, f) { 651 | None, _ -> Cont(CanBacktrack(True), [], state) 652 | Some(_), None -> Cont(CanBacktrack(True), [], state) 653 | _, Some(x) -> 654 | runwrap( 655 | next_state, 656 | take_map_while(f) 657 | |> map(list.prepend(_, x)), 658 | ) 659 | } 660 | } 661 | 662 | /// Applies a function to consecutive tokens while the given function returns 663 | /// `Some`. 664 | /// 665 | /// 💡 If this parser succeeds, the list produced is guaranteed to be non-empty. 666 | /// Feel free to `let assert` the result! 667 | /// 668 | pub fn take_map_while1( 669 | expecting: String, 670 | f: fn(tok) -> Option(a), 671 | ) -> Parser(List(a), tok, ctx) { 672 | use x <- do(take_map(expecting, f)) 673 | use xs <- do(take_map_while(f)) 674 | 675 | return([x, ..xs]) 676 | } 677 | 678 | // ERRORS ---------------------------------------------------------------------- 679 | 680 | /// 681 | /// 682 | /// 683 | /// 684 | pub type Error(tok) { 685 | BadParser(String) 686 | Custom(String) 687 | EndOfInput 688 | Expected(String, got: tok) 689 | Unexpected(tok) 690 | } 691 | 692 | /// 693 | /// A dead end represents a the point where a parser that had committed down a 694 | /// path failed. It contains the position of the failure, the [`Error`](#Error) 695 | /// describing the failure, and the context stack for any parsers that had run. 696 | /// 697 | pub type DeadEnd(tok, ctx) { 698 | DeadEnd(pos: Span, problem: Error(tok), context: List(#(Span, ctx))) 699 | } 700 | 701 | type Bag(tok, ctx) { 702 | Empty 703 | Cons(Bag(tok, ctx), DeadEnd(tok, ctx)) 704 | Append(Bag(tok, ctx), Bag(tok, ctx)) 705 | } 706 | 707 | fn bag_from_state(state: State(tok, ctx), problem: Error(tok)) -> Bag(tok, ctx) { 708 | Cons(Empty, DeadEnd(state.pos, problem, state.ctx)) 709 | } 710 | 711 | fn to_deadends( 712 | bag: Bag(tok, ctx), 713 | acc: List(DeadEnd(tok, ctx)), 714 | ) -> List(DeadEnd(tok, ctx)) { 715 | case bag { 716 | Empty -> acc 717 | Cons(Empty, deadend) -> [deadend, ..acc] 718 | Cons(bag, deadend) -> to_deadends(bag, [deadend, ..acc]) 719 | Append(left, right) -> to_deadends(left, to_deadends(right, acc)) 720 | } 721 | } 722 | 723 | fn add_bag_to_step( 724 | step: Step(a, tok, ctx), 725 | left: Bag(tok, ctx), 726 | ) -> Step(a, tok, ctx) { 727 | case step { 728 | Cont(can_backtrack, a, state) -> Cont(can_backtrack, a, state) 729 | Fail(can_backtrack, right) -> Fail(can_backtrack, Append(left, right)) 730 | } 731 | } 732 | 733 | // CONTEXT --------------------------------------------------------------------- 734 | 735 | /// 736 | /// 737 | pub fn in(parser: Parser(a, tok, ctx), context: ctx) -> Parser(a, tok, ctx) { 738 | use state <- Parser 739 | 740 | case runwrap(push_context(state, context), parser) { 741 | Cont(can_backtrack, a, state) -> Cont(can_backtrack, a, pop_context(state)) 742 | Fail(can_backtrack, bag) -> Fail(can_backtrack, bag) 743 | } 744 | } 745 | 746 | fn push_context(state: State(tok, ctx), context: ctx) -> State(tok, ctx) { 747 | State(..state, ctx: [#(state.pos, context), ..state.ctx]) 748 | } 749 | 750 | fn pop_context(state: State(tok, ctx)) -> State(tok, ctx) { 751 | case state.ctx { 752 | [] -> state 753 | [_, ..context] -> State(..state, ctx: context) 754 | } 755 | } 756 | 757 | /// 758 | /// Run the given parser and then inspect it's state. 759 | /// 760 | pub fn inspect( 761 | parser: Parser(a, tok, ctx), 762 | message: String, 763 | ) -> Parser(a, tok, ctx) { 764 | use state <- Parser 765 | io.println(message <> ": ") 766 | 767 | runwrap(state, parser) 768 | |> io.debug 769 | } 770 | -------------------------------------------------------------------------------- /src/nibble/lexer.gleam: -------------------------------------------------------------------------------- 1 | //// Nibble takes a different approach to many other parser combinator libraries 2 | //// by also providing a _lexer_ combinator module that you use to turn an input 3 | //// string into a list of tokens. 4 | //// 5 | //// Parser combinators are a powerful and flexible way to build parsers, but 6 | //// they offer come at a performance cost compared to hand-written parsers or 7 | //// parser generators. On the other hand, writing a lexer by hand can be a bit 8 | //// tedious and difficult. Nibble aims to provide a happy middle-ground by making 9 | //// it easy to produce OK-performing lexers and then use parser combinators that 10 | //// can be much faster working on the smaller token stream. 11 | //// 12 | //// To see how Nibble's lexer works, let's consider the example from the 13 | //// [introduction guide](#): 14 | //// 15 | //// ```gleam 16 | //// fn lexer() { 17 | //// lexer.simple([ 18 | //// lexer.token("hello", Hello), 19 | //// lexer.variable("[a-zA-Z]", "\w", Name), 20 | //// lexer.whitespace(Nil) 21 | //// |> lexer.ignore 22 | //// ]) 23 | //// } 24 | //// ``` 25 | //// 26 | //// We have three _matchers_ here. One for the exact token "hello", one for at 27 | //// least one letter followed by any number of word characters, and one for any 28 | //// amount of whitespace. 29 | //// 30 | //// To see how these matchers work we'll look at the input string `"Hello Joe"`. 31 | //// Nibble looks at the input string one grapheme at a time, and runs each of 32 | //// the matchers in order. At the same time, it accumulates a list of the tokens 33 | //// it has produced so far. 34 | //// 35 | //// ``` 36 | //// Tokens : [] 37 | //// Input : Hello Joe 38 | //// ^ 39 | //// ``` 40 | //// 41 | //// If no matcher matches the input, Nibble will store the current grapheme and 42 | //// move on to the next one: 43 | //// 44 | //// ``` 45 | //// Tokens : [] 46 | //// Input : Hello Joe 47 | //// -^ 48 | //// ``` 49 | //// 50 | //// This continues until a matcher _does_ match the input: 51 | //// 52 | //// ``` 53 | //// Tokens : [] 54 | //// Input : Hello Joe 55 | //// ----^ 56 | //// ``` 57 | //// 58 | //// The accumulated string (known as a _lexeme_) is consumed, and whatever token 59 | //// value the matcher produces is added to the list of tokens: 60 | //// 61 | //// ``` 62 | //// Tokens : [Hello] 63 | //// Input : Joe 64 | //// ^ 65 | //// ``` 66 | //// 67 | //// Here we have some whitespace that would be matched by `lexer.whitespace`, by 68 | //// we passed that matcher to the [`lexer.ignore`](#ignore) comabinator. The matcher 69 | //// will still consume the input, but this time it will not produce a new token 70 | //// value: 71 | //// 72 | //// ``` 73 | //// Tokens : [Hello] 74 | //// Input : Joe 75 | //// ^ 76 | //// ``` 77 | //// 78 | //// As we expect, the lexer continues on accumulating input. When it reaches the 79 | //// end of the input string with a value it checks all the matches one last time 80 | //// to see if they will produce a final token. In this case the `lexer.variable` 81 | //// matcher will match the accumulated "Joe" and we're left with the following: 82 | //// 83 | //// ``` 84 | //// Tokens : [Hello, Name("Joe")] 85 | //// Input : 86 | //// ``` 87 | //// 88 | 89 | // IMPORTS --------------------------------------------------------------------- 90 | 91 | import gleam/float 92 | import gleam/int 93 | import gleam/list 94 | import gleam/regexp 95 | import gleam/result 96 | import gleam/set.{type Set} 97 | import gleam/string 98 | 99 | // TYPES ----------------------------------------------------------------------- 100 | 101 | /// A `Matcher` is how we define the rules that match parts of the input string 102 | /// and turn them into tokens. At it's core, a `Match` is a function that takes 103 | /// three arguments: 104 | /// 105 | /// - The current mode of the lexer 106 | /// 107 | /// - Any input we've accumulated so far 108 | /// 109 | /// - A lookahead of one grapheme 110 | /// 111 | /// With just these three arguments we can define arbitrary rules for consuming 112 | /// (or not) input and producing tokens! 113 | /// 114 | pub opaque type Matcher(a, mode) { 115 | Matcher(run: fn(mode, String, String) -> Match(a, mode)) 116 | } 117 | 118 | /// When writing a custom matcher, a `Match` is what you return to tell the lexer 119 | /// what to do next. 120 | /// 121 | pub type Match(a, mode) { 122 | /// Consume the accumulated input and produce a token with the given value. A 123 | /// `Keep` match can also transition the lexer into a new mode. 124 | Keep(a, mode) 125 | /// Skip running any additional matchers this iteration, add the next grapheme 126 | /// to the accumulated input, and run the next iteration. 127 | Skip 128 | /// Drop the accumulated input and move on to the next iteration. A `Drop` 129 | /// match can also transition the lexer into a new mode. This match is useful 130 | /// for discarding input like whitespace or comments. 131 | Drop(mode) 132 | /// The matcher did not match the input, so the lexer should try the next 133 | /// matcher in the list (or fail if there are no more matchers). 134 | NoMatch 135 | } 136 | 137 | /// You use Nibble's lexer to turn a string into a list of tokens that your parser 138 | /// will eventually consume. The `Token` type contains the lexeme that was consumed 139 | /// (aka the raw input string), the source [`Span`](#Span) of the consumed lexeme 140 | /// to locate it in the source, and whatever token value your lexer produces. 141 | /// 142 | pub type Token(a) { 143 | Token(span: Span, lexeme: String, value: a) 144 | } 145 | 146 | /// A source span is a range into the source string that represents the start and 147 | /// end of a lexeme in a human-readable way. That means instead of a straight index 148 | /// into the string you get a row and column for the start and end instead! 149 | /// 150 | pub type Span { 151 | Span(row_start: Int, col_start: Int, row_end: Int, col_end: Int) 152 | } 153 | 154 | /// 155 | /// 156 | pub type Error { 157 | NoMatchFound(row: Int, col: Int, lexeme: String) 158 | } 159 | 160 | /// 161 | /// 162 | pub opaque type Lexer(a, mode) { 163 | Lexer(matchers: fn(mode) -> List(Matcher(a, mode))) 164 | } 165 | 166 | type State(a) { 167 | State( 168 | source: List(String), 169 | tokens: List(Token(a)), 170 | current: #(Int, Int, String), 171 | row: Int, 172 | col: Int, 173 | ) 174 | } 175 | 176 | // LEXER CONSTRUCTORS ---------------------------------------------------------- 177 | 178 | /// 179 | /// 180 | pub fn simple(matchers: List(Matcher(a, Nil))) -> Lexer(a, Nil) { 181 | Lexer(fn(_) { matchers }) 182 | } 183 | 184 | /// An `advanced` lexer is one that can change what matchers it uses based on the 185 | /// current mode. This is useful for sophisticated lexers that might need to 186 | /// handle things like interpolated strings or indentation-sensitive syntax. 187 | /// 188 | pub fn advanced(matchers: fn(mode) -> List(Matcher(a, mode))) -> Lexer(a, mode) { 189 | Lexer(fn(mode) { matchers(mode) }) 190 | } 191 | 192 | // MATCHER CONSTRUCTORS -------------------------------------------------------- 193 | 194 | /// Create a custom [`Matcher`](#Matcher) that will consume the input and produce 195 | /// a token with the given value if it is `Ok` or return a `NoMatch` if it fails. 196 | /// The first parameter is a function that takes the current lexeme and the 197 | /// second parameter is a one-grapheme lookahead. 198 | /// 199 | /// Matchers created with this convenience function cannot change the lexer's 200 | /// mode or skip ahead to the next iteration without consuming the input. 201 | /// 202 | pub fn keep(f: fn(String, String) -> Result(a, Nil)) -> Matcher(a, mode) { 203 | use mode, lexeme, lookahead <- Matcher 204 | 205 | f(lexeme, lookahead) 206 | |> result.map(Keep(_, mode)) 207 | |> result.unwrap(NoMatch) 208 | } 209 | 210 | /// Create a custom [`Matcher`](#Matcher) that will consume the input and move 211 | /// to the next iteration without producing a token if it is `True` or return a 212 | /// `NoMatch` if it fails. The first parameter is a function that takes the 213 | /// current lexeme and the second parameter is a one-grapheme lookahead. 214 | /// 215 | /// Matchers created with this convenience function cannot change the lexer's 216 | /// mode or skip ahead to the next iteration without consuming the input. 217 | /// 218 | pub fn drop(f: fn(String, String) -> Bool) -> Matcher(a, mode) { 219 | use mode, lexeme, lookahead <- Matcher 220 | 221 | case f(lexeme, lookahead) { 222 | True -> Drop(mode) 223 | False -> NoMatch 224 | } 225 | } 226 | 227 | /// Create a custom [`Matcher`](#Matcher) that is flexible enough to do anything 228 | /// you want! The first parameter is a function that takes the current lexer mode, 229 | /// the current lexeme, and a one-grapheme lookahead. 230 | /// 231 | /// The function returns a [`Match`](#Match) that tells the lexer what to do next. 232 | /// 233 | pub fn custom(f: fn(mode, String, String) -> Match(a, mode)) -> Matcher(a, mode) { 234 | Matcher(f) 235 | } 236 | 237 | /// Take an existing matcher and transform it by applying a function to the value 238 | /// it produces. 239 | /// 240 | pub fn map(matcher: Matcher(a, mode), f: fn(a) -> b) -> Matcher(b, mode) { 241 | use mode, lexeme, lookahead <- Matcher 242 | 243 | case matcher.run(mode, lexeme, lookahead) { 244 | Keep(value, mode) -> Keep(f(value), mode) 245 | Skip -> Skip 246 | Drop(mode) -> Drop(mode) 247 | NoMatch -> NoMatch 248 | } 249 | } 250 | 251 | /// Take an existing matcher and transform it by applying a function to the value 252 | /// it producs. The function you provide can return a different [`Match`](#Match) 253 | /// so you can, for example, take a matcher that `Keep`s a value and turn it into 254 | /// a matcher that `Drop`s the value instead. This is out [`ignore`](#ignore) works! 255 | /// 256 | pub fn then( 257 | matcher: Matcher(a, mode), 258 | f: fn(a) -> Match(b, mode), 259 | ) -> Matcher(b, mode) { 260 | use mode, lexeme, lookahead <- Matcher 261 | 262 | case matcher.run(mode, lexeme, lookahead) { 263 | Keep(value, _) -> f(value) 264 | Skip -> Skip 265 | Drop(mode) -> Drop(mode) 266 | NoMatch -> NoMatch 267 | } 268 | } 269 | 270 | /// Take an existing matcher and transition to a new mode. This only runs if 271 | /// the matcher is successful and either `Keep`s or `Drop`s a value. 272 | /// 273 | /// 274 | pub fn into(matcher: Matcher(a, mode), f: fn(mode) -> mode) -> Matcher(a, mode) { 275 | use mode, lexeme, lookahead <- Matcher 276 | 277 | case matcher.run(mode, lexeme, lookahead) { 278 | Keep(value, mode) -> Keep(value, f(mode)) 279 | Skip -> Skip 280 | Drop(mode) -> Drop(f(mode)) 281 | NoMatch -> NoMatch 282 | } 283 | } 284 | 285 | /// Take a matcher than might `Keep` anything and silently `Drop` anything it 286 | /// produces instead. This is useful for things like whitespace or comments 287 | /// where you want to consume some input but you don't want to emit a token. 288 | /// 289 | pub fn ignore(matcher: Matcher(a, mode)) -> Matcher(b, mode) { 290 | use mode, lexeme, lookahead <- Matcher 291 | 292 | case matcher.run(mode, lexeme, lookahead) { 293 | Keep(_, mode) -> Drop(mode) 294 | Skip -> Skip 295 | Drop(mode) -> Drop(mode) 296 | NoMatch -> NoMatch 297 | } 298 | } 299 | 300 | // COMMON MATCHERS ------------------------------------------------------------- 301 | 302 | /// Match exactly the given string with no lookahead and produce the given value. 303 | /// 304 | pub fn token(str: String, value: a) -> Matcher(a, mode) { 305 | use mode, lexeme, _ <- Matcher 306 | 307 | case lexeme == str { 308 | True -> Keep(value, mode) 309 | False -> NoMatch 310 | } 311 | } 312 | 313 | /// Match exactly the given string only when the lookahead is matched by the given 314 | /// breaker _regex_. This is an alias of [`keyword`](#keyword) but it can be 315 | /// helpful to separate the two concepts. 316 | /// 317 | pub fn symbol(str: String, breaker: String, value: a) -> Matcher(a, mode) { 318 | let assert Ok(break) = regexp.from_string(breaker) 319 | 320 | use mode, lexeme, lookahead <- Matcher 321 | 322 | case lexeme == str && { lookahead == "" || regexp.check(break, lookahead) } { 323 | True -> Keep(value, mode) 324 | False -> NoMatch 325 | } 326 | } 327 | 328 | /// Match exactly the given string only when the lookahead is matched by the given 329 | /// breaker _regex_. Keywords are exact strings like `let` but you wouldn't want 330 | /// to lex `letter` as `[Let, Var("tter")]` so the breaker is used so you can say 331 | /// what characters should trigger a match. 332 | /// 333 | pub fn keyword(str: String, breaker: String, value: a) -> Matcher(a, mode) { 334 | let assert Ok(break) = regexp.from_string(breaker) 335 | 336 | use mode, lexeme, lookahead <- Matcher 337 | 338 | case lexeme == str && { lookahead == "" || regexp.check(break, lookahead) } { 339 | True -> Keep(value, mode) 340 | False -> NoMatch 341 | } 342 | } 343 | 344 | /// 345 | /// 346 | pub fn int(to_value: fn(Int) -> a) -> Matcher(a, mode) { 347 | int_with_separator("", to_value) 348 | } 349 | 350 | /// 351 | /// 352 | pub fn int_with_separator( 353 | separator: String, 354 | to_value: fn(Int) -> a, 355 | ) -> Matcher(a, mode) { 356 | let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]") 357 | let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$") 358 | 359 | use mode, lexeme, lookahead <- Matcher 360 | 361 | case !regexp.check(digit, lookahead) && regexp.check(integer, lexeme) { 362 | False -> NoMatch 363 | True -> { 364 | let assert Ok(num) = 365 | lexeme 366 | |> string.replace(separator, "") 367 | |> int.parse 368 | Keep(to_value(num), mode) 369 | } 370 | } 371 | } 372 | 373 | /// 374 | /// 375 | pub fn float(to_value: fn(Float) -> a) -> Matcher(a, mode) { 376 | float_with_separator("", to_value) 377 | } 378 | 379 | /// 380 | /// 381 | pub fn float_with_separator( 382 | separator: String, 383 | to_value: fn(Float) -> a, 384 | ) -> Matcher(a, mode) { 385 | let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]") 386 | let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$") 387 | let assert Ok(number) = 388 | regexp.from_string( 389 | "^-*[0-9" <> separator <> "]+\\.[0-9" <> separator <> "]+$", 390 | ) 391 | 392 | use mode, lexeme, lookahead <- Matcher 393 | let is_int = !regexp.check(digit, lookahead) && regexp.check(integer, lexeme) 394 | let is_float = !regexp.check(digit, lookahead) && regexp.check(number, lexeme) 395 | 396 | case lexeme { 397 | "." if is_int -> NoMatch 398 | 399 | _ if is_float -> { 400 | let assert Ok(num) = 401 | lexeme 402 | |> string.replace(separator, "") 403 | |> float.parse 404 | Keep(to_value(num), mode) 405 | } 406 | 407 | _ -> NoMatch 408 | } 409 | } 410 | 411 | pub fn number( 412 | from_int: fn(Int) -> a, 413 | from_float: fn(Float) -> a, 414 | ) -> Matcher(a, mode) { 415 | number_with_separator("", from_int, from_float) 416 | } 417 | 418 | pub fn number_with_separator( 419 | separator: String, 420 | from_int: fn(Int) -> a, 421 | from_float: fn(Float) -> a, 422 | ) -> Matcher(a, mode) { 423 | let assert Ok(digit) = regexp.from_string("[0-9" <> separator <> "]") 424 | let assert Ok(integer) = regexp.from_string("^-*[0-9" <> separator <> "]+$") 425 | let assert Ok(number) = 426 | regexp.from_string( 427 | "^-*[0-9" <> separator <> "]+\\.[0-9" <> separator <> "]+$", 428 | ) 429 | 430 | use mode, lexeme, lookahead <- Matcher 431 | let is_int = !regexp.check(digit, lookahead) && regexp.check(integer, lexeme) 432 | let is_float = !regexp.check(digit, lookahead) && regexp.check(number, lexeme) 433 | 434 | case lexeme, lookahead { 435 | ".", _ if is_int -> NoMatch 436 | _, "." if is_int -> NoMatch 437 | 438 | _, _ if is_int -> { 439 | let assert Ok(num) = 440 | lexeme 441 | |> string.replace(separator, "") 442 | |> int.parse 443 | Keep(from_int(num), mode) 444 | } 445 | 446 | _, _ if is_float -> { 447 | let assert Ok(num) = 448 | lexeme 449 | |> string.replace(separator, "") 450 | |> float.parse 451 | Keep(from_float(num), mode) 452 | } 453 | 454 | _, _ -> NoMatch 455 | } 456 | } 457 | 458 | /// 459 | /// 460 | pub fn string(char: String, to_value: fn(String) -> a) -> Matcher(a, mode) { 461 | let assert Ok(is_string) = 462 | regexp.from_string( 463 | "^" <> char <> "([^" <> char <> "\\\\]|\\\\[\\s\\S])*" <> char <> "$", 464 | ) 465 | use mode, lexeme, _ <- Matcher 466 | 467 | case regexp.check(is_string, lexeme) { 468 | True -> 469 | lexeme 470 | |> string.drop_start(1) 471 | |> string.drop_end(1) 472 | |> to_value 473 | |> Keep(mode) 474 | False -> NoMatch 475 | } 476 | } 477 | 478 | /// 479 | /// 480 | pub fn identifier( 481 | start: String, 482 | inner: String, 483 | reserved: Set(String), 484 | to_value: fn(String) -> a, 485 | ) -> Matcher(a, mode) { 486 | let assert Ok(ident) = regexp.from_string("^" <> start <> inner <> "*$") 487 | let assert Ok(inner) = regexp.from_string(inner) 488 | 489 | use mode, lexeme, lookahead <- Matcher 490 | 491 | case regexp.check(inner, lookahead), regexp.check(ident, lexeme) { 492 | True, True -> Skip 493 | False, True -> 494 | case set.contains(reserved, lexeme) { 495 | True -> NoMatch 496 | False -> Keep(to_value(lexeme), mode) 497 | } 498 | _, _ -> NoMatch 499 | } 500 | } 501 | 502 | /// 503 | /// 504 | pub fn try_identifier( 505 | start: String, 506 | inner: String, 507 | reserved: Set(String), 508 | to_value: fn(String) -> a, 509 | ) -> Result(Matcher(a, mode), regexp.CompileError) { 510 | use ident <- result.then(regexp.from_string("^" <> start <> inner <> "*$")) 511 | use inner <- result.map(regexp.from_string(inner)) 512 | 513 | use mode, lexeme, lookahead <- Matcher 514 | 515 | case regexp.check(inner, lookahead), regexp.check(ident, lexeme) { 516 | True, True -> Skip 517 | False, True -> 518 | case set.contains(reserved, lexeme) { 519 | True -> NoMatch 520 | False -> Keep(to_value(lexeme), mode) 521 | } 522 | _, _ -> NoMatch 523 | } 524 | } 525 | 526 | /// 527 | /// 528 | pub fn variable( 529 | reserved: Set(String), 530 | to_value: fn(String) -> a, 531 | ) -> Matcher(a, mode) { 532 | identifier("[a-z]", "[a-zA-Z0-9_]", reserved, to_value) 533 | } 534 | 535 | /// 536 | /// 537 | pub fn spaces(token: a) -> Matcher(a, mode) { 538 | spaces_(fn(_) { token }) 539 | } 540 | 541 | /// 542 | /// 543 | pub fn spaces_(to_value: fn(String) -> a) -> Matcher(a, mode) { 544 | let assert Ok(spaces) = regexp.from_string("^[ \\t]+") 545 | 546 | use mode, lexeme, _ <- Matcher 547 | 548 | case regexp.check(spaces, lexeme) { 549 | True -> Keep(to_value(lexeme), mode) 550 | False -> NoMatch 551 | } 552 | } 553 | 554 | /// 555 | /// 556 | pub fn whitespace(token: a) -> Matcher(a, mode) { 557 | let assert Ok(whitespace) = regexp.from_string("^\\s+$") 558 | 559 | use mode, lexeme, _ <- Matcher 560 | 561 | case regexp.check(whitespace, lexeme) { 562 | True -> Keep(token, mode) 563 | False -> NoMatch 564 | } 565 | } 566 | 567 | /// 568 | pub fn comment(start: String, to_value: fn(String) -> a) -> Matcher(a, mode) { 569 | let drop_length = string.length(start) 570 | use mode, lexeme, lookahead <- Matcher 571 | 572 | case string.starts_with(lexeme, start), lookahead { 573 | True, "\n" -> 574 | lexeme 575 | |> string.drop_start(drop_length) 576 | |> to_value 577 | |> Keep(mode) 578 | True, _ -> Skip 579 | False, _ -> NoMatch 580 | } 581 | } 582 | 583 | // RUNNING A LEXER ------------------------------------------------------------- 584 | 585 | /// 586 | /// 587 | pub fn run( 588 | source: String, 589 | lexer: Lexer(a, Nil), 590 | ) -> Result(List(Token(a)), Error) { 591 | string.to_graphemes(source) 592 | |> State([], #(1, 1, ""), 1, 1) 593 | |> do_run(lexer, Nil, _) 594 | } 595 | 596 | /// 597 | /// 598 | pub fn run_advanced( 599 | source: String, 600 | mode: mode, 601 | lexer: Lexer(a, mode), 602 | ) -> Result(List(Token(a)), Error) { 603 | do_run(lexer, mode, State(string.to_graphemes(source), [], #(1, 1, ""), 1, 1)) 604 | } 605 | 606 | fn do_run( 607 | lexer: Lexer(a, mode), 608 | mode: mode, 609 | state: State(a), 610 | ) -> Result(List(Token(a)), Error) { 611 | let matchers = lexer.matchers(mode) 612 | 613 | case state.source, state.current { 614 | // If we're at the end of the source and there's no lexeme left to match, 615 | // we're done! 616 | // 617 | // We have to remember to reverse the list of tokens because we've been building 618 | // it backwards using `[token, ..state.tokens]`. This is much quicker than 619 | // trying to prepend to the list as we go. 620 | [], #(_, _, "") -> Ok(list.reverse(state.tokens)) 621 | 622 | // If we're at the end of the source but there's still a lexeme left to match, 623 | // we'll run the final `do_match` and return the result. If we get a `NoMatch` 624 | // at this point something went wrong. 625 | [], #(start_row, start_col, lexeme) -> 626 | case do_match(mode, lexeme, "", matchers) { 627 | NoMatch -> Error(NoMatchFound(start_row, start_col, lexeme)) 628 | Skip -> Error(NoMatchFound(start_row, start_col, lexeme)) 629 | Drop(_) -> Ok(list.reverse(state.tokens)) 630 | Keep(value, _) -> { 631 | let span = Span(start_row, start_col, state.row, state.col) 632 | let token = Token(span, lexeme, value) 633 | 634 | Ok(list.reverse([token, ..state.tokens])) 635 | } 636 | } 637 | 638 | // When lexing we include a one-grapheme lookahead to help us with things like 639 | // matching identifiers or other mode-aware tokens. This just takes the 640 | // skip grapheme from the source (we call it `lookahead` here) and calls the 641 | // `do_match` function with it and some other bits. 642 | [lookahead, ..rest], #(start_row, start_col, lexeme) -> { 643 | let row = next_row(state.row, lookahead) 644 | let col = next_col(state.col, lookahead) 645 | 646 | case do_match(mode, lexeme, lookahead, matchers) { 647 | Keep(value, mode) -> { 648 | let span = Span(start_row, start_col, state.row, state.col) 649 | let token = Token(span, lexeme, value) 650 | 651 | do_run( 652 | lexer, 653 | mode, 654 | State( 655 | source: rest, 656 | tokens: [token, ..state.tokens], 657 | current: #(state.row, state.col, lookahead), 658 | row: row, 659 | col: col, 660 | ), 661 | ) 662 | } 663 | 664 | // A skip says that a matcher has matched the lexeme but still wants to 665 | // consume more input. This is mostly useful for things like identifiers 666 | // where the current lexeme is in the set of reserved words but we can 667 | // see the lookahead and know that it's not a reserved word. 668 | Skip -> 669 | do_run( 670 | lexer, 671 | mode, 672 | State( 673 | source: rest, 674 | tokens: state.tokens, 675 | current: #(start_row, start_col, lexeme <> lookahead), 676 | row: row, 677 | col: col, 678 | ), 679 | ) 680 | 681 | // A drop says that we've matched the lexeme but we don't want to emit a 682 | // token. This is mostly useful for things like comments or whitespace that 683 | // users may not want to appear in the final token stream but do want to 684 | // handle in the lexer. 685 | Drop(mode) -> 686 | do_run( 687 | lexer, 688 | mode, 689 | State( 690 | source: rest, 691 | tokens: state.tokens, 692 | current: #(state.row, state.col, lookahead), 693 | row: row, 694 | col: col, 695 | ), 696 | ) 697 | 698 | NoMatch -> 699 | do_run( 700 | lexer, 701 | mode, 702 | State( 703 | source: rest, 704 | tokens: state.tokens, 705 | current: #(start_row, start_col, lexeme <> lookahead), 706 | row: row, 707 | col: col, 708 | ), 709 | ) 710 | } 711 | } 712 | } 713 | } 714 | 715 | fn do_match( 716 | mode: mode, 717 | str: String, 718 | lookahead: String, 719 | matchers: List(Matcher(a, mode)), 720 | ) -> Match(a, mode) { 721 | use _, matcher <- list.fold_until(matchers, NoMatch) 722 | 723 | case matcher.run(mode, str, lookahead) { 724 | Keep(_, _) as match -> list.Stop(match) 725 | Skip -> list.Stop(Skip) 726 | Drop(_) as match -> list.Stop(match) 727 | NoMatch -> list.Continue(NoMatch) 728 | } 729 | } 730 | 731 | // UTILS ----------------------------------------------------------------------- 732 | 733 | fn next_col(col: Int, str: String) -> Int { 734 | case str { 735 | "\n" -> 1 736 | _ -> col + 1 737 | } 738 | } 739 | 740 | fn next_row(row: Int, str: String) -> Int { 741 | case str { 742 | "\n" -> row + 1 743 | _ -> row 744 | } 745 | } 746 | -------------------------------------------------------------------------------- /src/nibble/pratt.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/list 4 | import nibble.{type Parser} 5 | 6 | // TYPES ----------------------------------------------------------------------- 7 | 8 | pub opaque type Config(a, tok, ctx) { 9 | Config( 10 | one_of: List(fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx)), 11 | and_then_one_of: List(Operator(a, tok, ctx)), 12 | spaces: Parser(Nil, tok, ctx), 13 | ) 14 | } 15 | 16 | pub opaque type Operator(a, tok, ctx) { 17 | Operator(fn(Config(a, tok, ctx)) -> #(Int, fn(a) -> Parser(a, tok, ctx))) 18 | } 19 | 20 | // 21 | 22 | pub fn expression( 23 | one_of first: List(fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx)), 24 | and_then then: List(Operator(a, tok, ctx)), 25 | dropping spaces: Parser(Nil, tok, ctx), 26 | ) -> Parser(a, tok, ctx) { 27 | let config = Config(first, then, spaces) 28 | sub_expression(config, 0) 29 | } 30 | 31 | pub fn sub_expression( 32 | config: Config(a, tok, ctx), 33 | precedence: Int, 34 | ) -> Parser(a, tok, ctx) { 35 | let expr = { 36 | use <- nibble.lazy 37 | config.one_of 38 | |> list.map(fn(p) { p(config) }) 39 | |> nibble.one_of 40 | } 41 | 42 | let go = fn(expr) { 43 | use _ <- nibble.do(config.spaces) 44 | 45 | nibble.one_of([ 46 | operation(expr, config, precedence) 47 | |> nibble.map(nibble.Continue), 48 | nibble.return(expr) 49 | |> nibble.map(nibble.Break), 50 | ]) 51 | } 52 | 53 | use _ <- nibble.do(config.spaces) 54 | use e <- nibble.do(expr) 55 | 56 | nibble.loop(e, go) 57 | } 58 | 59 | fn operation( 60 | expr: a, 61 | config: Config(a, tok, ctx), 62 | current_precedence: Int, 63 | ) -> Parser(a, tok, ctx) { 64 | config.and_then_one_of 65 | |> list.filter_map(fn(operator) { 66 | let Operator(op) = operator 67 | case op(config) { 68 | #(precedence, parser) if precedence > current_precedence -> 69 | Ok(parser(expr)) 70 | 71 | _ -> Error(Nil) 72 | } 73 | }) 74 | |> nibble.one_of() 75 | } 76 | 77 | // 78 | 79 | pub fn prefix( 80 | precedence: Int, 81 | operator: Parser(Nil, tok, ctx), 82 | apply: fn(a) -> a, 83 | ) -> fn(Config(a, tok, ctx)) -> Parser(a, tok, ctx) { 84 | fn(config) { 85 | use _ <- nibble.do(operator) 86 | use subexpr <- nibble.do(sub_expression(config, precedence)) 87 | 88 | nibble.return(apply(subexpr)) 89 | } 90 | } 91 | 92 | pub fn infix_left( 93 | precedence: Int, 94 | operator: Parser(Nil, tok, ctx), 95 | apply: fn(a, a) -> a, 96 | ) -> Operator(a, tok, ctx) { 97 | make_infix(#(precedence, precedence), operator, apply) 98 | } 99 | 100 | pub fn infix_right( 101 | precedence: Int, 102 | operator: Parser(Nil, tok, ctx), 103 | apply: fn(a, a) -> a, 104 | ) -> Operator(a, tok, ctx) { 105 | make_infix(#(precedence, precedence - 1), operator, apply) 106 | } 107 | 108 | pub fn postfix( 109 | precedence: Int, 110 | operator: Parser(Nil, tok, ctx), 111 | apply: fn(a) -> a, 112 | ) -> Operator(a, tok, ctx) { 113 | use _ <- Operator 114 | #(precedence, fn(lhs) { 115 | use _ <- nibble.do(operator) 116 | nibble.return(apply(lhs)) 117 | }) 118 | } 119 | 120 | fn make_infix( 121 | precedence: #(Int, Int), 122 | operator: Parser(Nil, tok, ctx), 123 | apply: fn(a, a) -> a, 124 | ) -> Operator(a, tok, ctx) { 125 | let #(left_precedence, right_precedence) = precedence 126 | use config <- Operator 127 | #(left_precedence, fn(lhs) { 128 | use _ <- nibble.do(operator) 129 | use subexpr <- nibble.do(sub_expression(config, right_precedence)) 130 | 131 | nibble.return(apply(lhs, subexpr)) 132 | }) 133 | } 134 | -------------------------------------------------------------------------------- /src/nibble/predicates.gleam: -------------------------------------------------------------------------------- 1 | import gleam/list 2 | import gleam/string 3 | 4 | pub fn string(str: String, predicate: fn(String) -> Bool) -> Bool { 5 | str != "" && list.all(string.to_graphemes(str), predicate) 6 | } 7 | 8 | pub fn is_lower_ascii(grapheme: String) -> Bool { 9 | case grapheme { 10 | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" -> True 11 | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" -> True 12 | "w" | "x" | "y" | "z" -> True 13 | _ -> False 14 | } 15 | } 16 | 17 | pub fn is_upper_ascii(grapheme: String) -> Bool { 18 | case grapheme { 19 | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" -> True 20 | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" -> True 21 | "W" | "X" | "Y" | "Z" -> True 22 | _ -> False 23 | } 24 | } 25 | 26 | pub fn is_digit(grapheme: String) -> Bool { 27 | case grapheme { 28 | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -> True 29 | _ -> False 30 | } 31 | } 32 | 33 | pub fn is_whitespace(grapheme: String) -> Bool { 34 | case grapheme { 35 | " " | "\t" | "\r" | "\n" -> True 36 | _ -> False 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/nibble/vendor/glearray.gleam: -------------------------------------------------------------------------------- 1 | //// The following module is vendored from the Open Source package "glearray" with 2 | //// functions for working with iterators removed. The original source can be found 3 | //// here: 4 | //// 5 | //// https://github.com/lunagl/glearray/ 6 | //// 7 | //// The original license is included below: 8 | //// 9 | //// Apache License 10 | //// Version 2.0, January 2004 11 | //// http://www.apache.org/licenses/ 12 | //// 13 | //// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 14 | //// 15 | //// 1. Definitions. 16 | //// 17 | //// "License" shall mean the terms and conditions for use, reproduction, 18 | //// and distribution as defined by Sections 1 through 9 of this document. 19 | //// 20 | //// "Licensor" shall mean the copyright owner or entity authorized by 21 | //// the copyright owner that is granting the License. 22 | //// 23 | //// "Legal Entity" shall mean the union of the acting entity and all 24 | //// other entities that control, are controlled by, or are under common 25 | //// control with that entity. For the purposes of this definition, 26 | //// "control" means (i) the power, direct or indirect, to cause the 27 | //// direction or management of such entity, whether by contract or 28 | //// otherwise, or (ii) ownership of fifty percent (50%) or more of the 29 | //// outstanding shares, or (iii) beneficial ownership of such entity. 30 | //// 31 | //// "You" (or "Your") shall mean an individual or Legal Entity 32 | //// exercising permissions granted by this License. 33 | //// 34 | //// "Source" form shall mean the preferred form for making modifications, 35 | //// including but not limited to software source code, documentation 36 | //// source, and configuration files. 37 | //// 38 | //// "Object" form shall mean any form resulting from mechanical 39 | //// transformation or translation of a Source form, including but 40 | //// not limited to compiled object code, generated documentation, 41 | //// and conversions to other media types. 42 | //// 43 | //// "Work" shall mean the work of authorship, whether in Source or 44 | //// Object form, made available under the License, as indicated by a 45 | //// copyright notice that is included in or attached to the work 46 | //// (an example is provided in the Appendix below). 47 | //// 48 | //// "Derivative Works" shall mean any work, whether in Source or Object 49 | //// form, that is based on (or derived from) the Work and for which the 50 | //// editorial revisions, annotations, elaborations, or other modifications 51 | //// represent, as a whole, an original work of authorship. For the purposes 52 | //// of this License, Derivative Works shall not include works that remain 53 | //// separable from, or merely link (or bind by name) to the interfaces of, 54 | //// the Work and Derivative Works thereof. 55 | //// 56 | //// "Contribution" shall mean any work of authorship, including 57 | //// the original version of the Work and any modifications or additions 58 | //// to that Work or Derivative Works thereof, that is intentionally 59 | //// submitted to Licensor for inclusion in the Work by the copyright owner 60 | //// or by an individual or Legal Entity authorized to submit on behalf of 61 | //// the copyright owner. For the purposes of this definition, "submitted" 62 | //// means any form of electronic, verbal, or written communication sent 63 | //// to the Licensor or its representatives, including but not limited to 64 | //// communication on electronic mailing lists, source code control systems, 65 | //// and issue tracking systems that are managed by, or on behalf of, the 66 | //// Licensor for the purpose of discussing and improving the Work, but 67 | //// excluding communication that is conspicuously marked or otherwise 68 | //// designated in writing by the copyright owner as "Not a Contribution." 69 | //// 70 | //// "Contributor" shall mean Licensor and any individual or Legal Entity 71 | //// on behalf of whom a Contribution has been received by Licensor and 72 | //// subsequently incorporated within the Work. 73 | //// 74 | //// 2. Grant of Copyright License. Subject to the terms and conditions of 75 | //// this License, each Contributor hereby grants to You a perpetual, 76 | //// worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | //// copyright license to reproduce, prepare Derivative Works of, 78 | //// publicly display, publicly perform, sublicense, and distribute the 79 | //// Work and such Derivative Works in Source or Object form. 80 | //// 81 | //// 3. Grant of Patent License. Subject to the terms and conditions of 82 | //// this License, each Contributor hereby grants to You a perpetual, 83 | //// worldwide, non-exclusive, no-charge, royalty-free, irrevocable 84 | //// (except as stated in this section) patent license to make, have made, 85 | //// use, offer to sell, sell, import, and otherwise transfer the Work, 86 | //// where such license applies only to those patent claims licensable 87 | //// by such Contributor that are necessarily infringed by their 88 | //// Contribution(s) alone or by combination of their Contribution(s) 89 | //// with the Work to which such Contribution(s) was submitted. If You 90 | //// institute patent litigation against any entity (including a 91 | //// cross-claim or counterclaim in a lawsuit) alleging that the Work 92 | //// or a Contribution incorporated within the Work constitutes direct 93 | //// or contributory patent infringement, then any patent licenses 94 | //// granted to You under this License for that Work shall terminate 95 | //// as of the date such litigation is filed. 96 | //// 97 | //// 4. Redistribution. You may reproduce and distribute copies of the 98 | //// Work or Derivative Works thereof in any medium, with or without 99 | //// modifications, and in Source or Object form, provided that You 100 | //// meet the following conditions: 101 | //// 102 | //// (a) You must give any other recipients of the Work or 103 | //// Derivative Works a copy of this License; and 104 | //// 105 | //// (b) You must cause any modified files to carry prominent notices 106 | //// stating that You changed the files; and 107 | //// 108 | //// (c) You must retain, in the Source form of any Derivative Works 109 | //// that You distribute, all copyright, patent, trademark, and 110 | //// attribution notices from the Source form of the Work, 111 | //// excluding those notices that do not pertain to any part of 112 | //// the Derivative Works; and 113 | //// 114 | //// (d) If the Work includes a "NOTICE" text file as part of its 115 | //// distribution, then any Derivative Works that You distribute must 116 | //// include a readable copy of the attribution notices contained 117 | //// within such NOTICE file, excluding those notices that do not 118 | //// pertain to any part of the Derivative Works, in at least one 119 | //// of the following places: within a NOTICE text file distributed 120 | //// as part of the Derivative Works; within the Source form or 121 | //// documentation, if provided along with the Derivative Works; or, 122 | //// within a display generated by the Derivative Works, if and 123 | //// wherever such third-party notices normally appear. The contents 124 | //// of the NOTICE file are for informational purposes only and 125 | //// do not modify the License. You may add Your own attribution 126 | //// notices within Derivative Works that You distribute, alongside 127 | //// or as an addendum to the NOTICE text from the Work, provided 128 | //// that such additional attribution notices cannot be construed 129 | //// as modifying the License. 130 | //// 131 | //// You may add Your own copyright statement to Your modifications and 132 | //// may provide additional or different license terms and conditions 133 | //// for use, reproduction, or distribution of Your modifications, or 134 | //// for any such Derivative Works as a whole, provided Your use, 135 | //// reproduction, and distribution of the Work otherwise complies with 136 | //// the conditions stated in this License. 137 | //// 138 | //// 5. Submission of Contributions. Unless You explicitly state otherwise, 139 | //// any Contribution intentionally submitted for inclusion in the Work 140 | //// by You to the Licensor shall be under the terms and conditions of 141 | //// this License, without any additional terms or conditions. 142 | //// Notwithstanding the above, nothing herein shall supersede or modify 143 | //// the terms of any separate license agreement you may have executed 144 | //// with Licensor regarding such Contributions. 145 | //// 146 | //// 6. Trademarks. This License does not grant permission to use the trade 147 | //// names, trademarks, service marks, or product names of the Licensor, 148 | //// except as required for reasonable and customary use in describing the 149 | //// origin of the Work and reproducing the content of the NOTICE file. 150 | //// 151 | //// 7. Disclaimer of Warranty. Unless required by applicable law or 152 | //// agreed to in writing, Licensor provides the Work (and each 153 | //// Contributor provides its Contributions) on an "AS IS" BASIS, 154 | //// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 155 | //// implied, including, without limitation, any warranties or conditions 156 | //// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 157 | //// PARTICULAR PURPOSE. You are solely responsible for determining the 158 | //// appropriateness of using or redistributing the Work and assume any 159 | //// risks associated with Your exercise of permissions under this License. 160 | //// 161 | //// 8. Limitation of Liability. In no event and under no legal theory, 162 | //// whether in tort (including negligence), contract, or otherwise, 163 | //// unless required by applicable law (such as deliberate and grossly 164 | //// negligent acts) or agreed to in writing, shall any Contributor be 165 | //// liable to You for damages, including any direct, indirect, special, 166 | //// incidental, or consequential damages of any character arising as a 167 | //// result of this License or out of the use or inability to use the 168 | //// Work (including but not limited to damages for loss of goodwill, 169 | //// work stoppage, computer failure or malfunction, or any and all 170 | //// other commercial damages or losses), even if such Contributor 171 | //// has been advised of the possibility of such damages. 172 | //// 173 | //// 9. Accepting Warranty or Additional Liability. While redistributing 174 | //// the Work or Derivative Works thereof, You may choose to offer, 175 | //// and charge a fee for, acceptance of support, warranty, indemnity, 176 | //// or other liability obligations and/or rights consistent with this 177 | //// License. However, in accepting such obligations, You may act only 178 | //// on Your own behalf and on Your sole responsibility, not on behalf 179 | //// of any other Contributor, and only if You agree to indemnify, 180 | //// defend, and hold each Contributor harmless for any liability 181 | //// incurred by, or claims asserted against, such Contributor by reason 182 | //// of your accepting any such warranty or additional liability. 183 | //// 184 | //// END OF TERMS AND CONDITIONS 185 | //// 186 | //// Copyright 2023 Frederick Schwalbe 187 | //// 188 | //// Licensed under the Apache License, Version 2.0 (the "License"); 189 | //// you may not use this file except in compliance with the License. 190 | //// You may obtain a copy of the License at 191 | //// 192 | //// http://www.apache.org/licenses/LICENSE-2.0 193 | //// 194 | //// Unless required by applicable law or agreed to in writing, software 195 | //// distributed under the License is distributed on an "AS IS" BASIS, 196 | //// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 197 | //// See the License for the specific language governing permissions and 198 | //// limitations under the License. 199 | 200 | /// Arrays are ordered sequences of elements, similar to lists. 201 | /// 202 | /// Like everything in Gleam, arrays are immutable. 203 | /// As opposed to linked lists, arrays store their elements in a contiguous 204 | /// slice of memory, therefore allowing very fast indexed access. 205 | /// 206 | /// Modifying an array however takes linear time and memory because it requires 207 | /// copying the entire array. 208 | /// 209 | /// ### Implementation 210 | /// 211 | /// Arrays are represented as tuples when compiled to Erlang, and JavaScript 212 | /// arrays when compiled to JavaScript. 213 | /// 214 | /// Also note that this library has no connection to Erlang's 215 | /// [`array`](https://www.erlang.org/doc/man/array.html) module, which 216 | /// implements a tree structure for efficient reading and writing. 217 | /// 218 | pub type Array(a) 219 | 220 | /// Returns an empty array. 221 | /// 222 | /// ## Examples 223 | /// 224 | /// ```gleam 225 | /// > new() 226 | /// from_list([]) 227 | /// ``` 228 | /// 229 | @external(erlang, "glearray_ffi", "new") 230 | @external(javascript, "../../glearray_ffi.mjs", "newArray") 231 | pub fn new() -> Array(a) 232 | 233 | /// Converts a list to an array. 234 | /// 235 | @external(erlang, "erlang", "list_to_tuple") 236 | @external(javascript, "../../glearray_ffi.mjs", "fromList") 237 | pub fn from_list(list: List(a)) -> Array(a) 238 | 239 | /// Converts an array to a list. 240 | /// 241 | @external(erlang, "erlang", "tuple_to_list") 242 | @external(javascript, "../../gleam.mjs", "toList") 243 | pub fn to_list(array: Array(a)) -> List(a) 244 | 245 | /// Returns the number of elements in the array. 246 | /// 247 | /// ## Performance 248 | /// 249 | /// This function is very efficient and runs in constant time. 250 | /// 251 | /// ## Examples 252 | /// 253 | /// ```gleam 254 | /// > length(new()) 255 | /// 0 256 | /// ``` 257 | /// 258 | /// ```gleam 259 | /// > from_list([8, 0, 0]) |> length 260 | /// 3 261 | /// ``` 262 | /// 263 | @external(erlang, "erlang", "tuple_size") 264 | @external(javascript, "../../glearray_ffi.mjs", "arrayLength") 265 | pub fn length(of array: Array(a)) -> Int 266 | 267 | /// Returns the element at the specified index, starting from 0. 268 | /// 269 | /// `Error(Nil)` is returned if `index` is less than 0 or greater than 270 | /// or equal to `length(array)`. 271 | /// 272 | /// ## Performance 273 | /// 274 | /// This function is very efficient and runs in constant time. 275 | /// 276 | /// ## Examples 277 | /// 278 | /// ```gleam 279 | /// > from_list([5, 6, 7]) |> get(1) 280 | /// Ok(6) 281 | /// ``` 282 | /// 283 | /// ```gleam 284 | /// > from_list([5, 6, 7]) |> get(3) 285 | /// Error(Nil) 286 | /// ``` 287 | /// 288 | pub fn get(in array: Array(a), at index: Int) -> Result(a, Nil) { 289 | case is_valid_index(array, index) { 290 | True -> Ok(do_get(array, index)) 291 | False -> Error(Nil) 292 | } 293 | } 294 | 295 | @external(erlang, "glearray_ffi", "get") 296 | @external(javascript, "../../glearray_ffi.mjs", "get") 297 | fn do_get(array: Array(a), index: Int) -> a 298 | 299 | /// Replaces the element at the given index with `value`. 300 | /// 301 | /// This function cannot extend an array and returns `Error(Nil)` if `index` is 302 | /// not valid. 303 | /// See also [`copy_insert`](#copy_insert) and [`copy_push`](#copy_push). 304 | /// 305 | /// ## Performance 306 | /// 307 | /// This function has to copy the entire array, making it very inefficient 308 | /// especially for larger arrays. 309 | /// 310 | /// ## Examples 311 | /// 312 | /// ```gleam 313 | /// > from_list(["a", "b", "c"]) |> copy_set(1, "x") 314 | /// Ok(from_list(["a", "x", "c"])) 315 | /// ``` 316 | /// 317 | /// ```gleam 318 | /// > from_list(["a", "b", "c"]) |> copy_set(3, "x") 319 | /// Error(Nil) 320 | /// ``` 321 | /// 322 | pub fn copy_set( 323 | in array: Array(a), 324 | at index: Int, 325 | value value: a, 326 | ) -> Result(Array(a), Nil) { 327 | case is_valid_index(array, index) { 328 | True -> Ok(do_set(array, index, value)) 329 | False -> Error(Nil) 330 | } 331 | } 332 | 333 | @external(erlang, "glearray_ffi", "set") 334 | @external(javascript, "../../glearray_ffi.mjs", "set") 335 | fn do_set(array: Array(a), index: Int, value: a) -> Array(a) 336 | 337 | fn is_valid_index(array: Array(a), index: Int) -> Bool { 338 | index >= 0 && index < length(array) 339 | } 340 | 341 | /// Adds a single element to the back of the given array. 342 | /// 343 | /// ## Performance 344 | /// 345 | /// This function has to copy the entire array, making it very inefficient 346 | /// especially for larger arrays. 347 | /// 348 | /// ## Examples 349 | /// 350 | /// ```gleam 351 | /// > new() |> copy_push(1) |> copy_push(2) |> to_list 352 | /// [1, 2] 353 | /// ``` 354 | /// 355 | @external(erlang, "erlang", "append_element") 356 | @external(javascript, "../../glearray_ffi.mjs", "push") 357 | pub fn copy_push(onto array: Array(a), value value: a) -> Array(a) 358 | 359 | /// Inserts an element into the array at the given index. 360 | /// 361 | /// All following elements are shifted to the right, having their index 362 | /// incremented by one. 363 | /// 364 | /// `Error(Nil)` is returned if the index is less than 0 or greater than 365 | /// `length(array)`. 366 | /// If the index is equal to `length(array)`, this function behaves like 367 | /// [`copy_push`](#copy_push). 368 | /// 369 | /// ## Performance 370 | /// 371 | /// This function has to copy the entire array, making it very inefficient 372 | /// especially for larger arrays. 373 | /// 374 | /// ## Examples 375 | /// 376 | /// ```gleam 377 | /// > from_list(["a", "b"]) |> copy_insert(0, "c") 378 | /// Ok(from_list(["c", "a", "b"])) 379 | /// ``` 380 | /// 381 | /// ```gleam 382 | /// > from_list(["a", "b"]) |> copy_insert(1, "c") 383 | /// Ok(from_list(["a", "c", "b"])) 384 | /// ``` 385 | /// 386 | /// ```gleam 387 | /// > from_list(["a", "b"]) |> copy_insert(2, "c") 388 | /// Ok(from_list(["a", "b", "c"])) 389 | /// ``` 390 | /// 391 | /// ```gleam 392 | /// > from_list(["a", "b"]) |> copy_insert(3, "c") 393 | /// Error(Nil) 394 | /// ``` 395 | /// 396 | pub fn copy_insert( 397 | into array: Array(a), 398 | at index: Int, 399 | value value: a, 400 | ) -> Result(Array(a), Nil) { 401 | case index >= 0 && index <= length(array) { 402 | True -> Ok(do_insert(array, index, value)) 403 | False -> Error(Nil) 404 | } 405 | } 406 | 407 | @external(erlang, "glearray_ffi", "insert") 408 | @external(javascript, "../../glearray_ffi.mjs", "insert") 409 | fn do_insert(array: Array(a), index: Int, value: a) -> Array(a) 410 | -------------------------------------------------------------------------------- /test/docs/lexer_modes/indentation_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/int 4 | import gleam/io 5 | import gleam/order.{Eq, Gt, Lt} 6 | import gleam/regexp 7 | import gleam/set 8 | import gleam/string 9 | import gleeunit/should 10 | import nibble/lexer.{ 11 | type Matcher, type Token, Drop, Keep, NoMatch, Skip, Span, Token, 12 | } 13 | 14 | // TYPES ----------------------------------------------------------------------- 15 | 16 | type TokenT { 17 | Var(String) 18 | Str(String) 19 | Num(Int) 20 | 21 | // Keywords 22 | Def 23 | For 24 | In 25 | Print 26 | 27 | // Indentation 28 | Indent(Int) 29 | Dedent(Int) 30 | } 31 | 32 | // TESTS ----------------------------------------------------------------------- 33 | 34 | pub fn indentation_test() { 35 | use run <- should("parse indent and dedent tokens") 36 | let input = 37 | "def wibble arr 38 | for x in arr 39 | print x 40 | 41 | print \"done!\" 42 | 43 | def wobble 44 | wibble numbers 45 | " 46 | let expected = [ 47 | Token(Span(1, 1, 1, 4), "def", Var("def")), 48 | Token(Span(1, 5, 1, 11), "wibble", Var("wibble")), 49 | Token(Span(1, 12, 1, 15), "arr", Var("arr")), 50 | Token(Span(1, 15, 2, 3), "\n ", Indent(2)), 51 | Token(Span(2, 3, 2, 6), "for", Var("for")), 52 | Token(Span(2, 7, 2, 8), "x", Var("x")), 53 | Token(Span(2, 9, 2, 11), "in", Var("in")), 54 | Token(Span(2, 12, 2, 15), "arr", Var("arr")), 55 | Token(Span(2, 15, 3, 5), "\n ", Indent(4)), 56 | Token(Span(3, 5, 3, 10), "print", Var("print")), 57 | Token(Span(3, 11, 3, 12), "x", Var("x")), 58 | Token(Span(4, 1, 5, 3), "\n ", Dedent(2)), 59 | Token(Span(5, 3, 5, 8), "print", Var("print")), 60 | Token(Span(5, 9, 5, 16), "\"done!\"", Str("done!")), 61 | Token(Span(6, 1, 7, 1), "\n", Dedent(0)), 62 | Token(Span(7, 1, 7, 4), "def", Var("def")), 63 | Token(Span(7, 5, 7, 11), "wobble", Var("wobble")), 64 | Token(Span(7, 11, 8, 3), "\n ", Indent(2)), 65 | Token(Span(8, 3, 8, 9), "wibble", Var("wibble")), 66 | Token(Span(8, 10, 8, 17), "numbers", Var("numbers")), 67 | Token(Span(8, 17, 9, 1), "\n", Dedent(0)), 68 | ] 69 | 70 | run(input, expected) 71 | } 72 | 73 | // UTILS ----------------------------------------------------------------------- 74 | 75 | fn should( 76 | description: String, 77 | run: fn(fn(String, List(Token(TokenT))) -> Nil) -> Nil, 78 | ) -> Nil { 79 | use input, expected <- run 80 | 81 | io.print("should " <> description) 82 | 83 | lexer.advanced(lexer) 84 | |> lexer.run_advanced(input, 0, _) 85 | |> should.be_ok 86 | |> should.equal(expected) 87 | 88 | io.println(" ✅") 89 | } 90 | 91 | fn lexer(_) -> List(Matcher(TokenT, Int)) { 92 | let assert Ok(is_indent) = regexp.from_string("^\\n[ \\t]*") 93 | let indentation = { 94 | use current_indent, lexeme, lookahead <- lexer.custom 95 | 96 | case regexp.check(is_indent, lexeme), lookahead { 97 | False, _ -> NoMatch 98 | True, " " | True, "\t" -> Skip 99 | True, "\n" -> Drop(current_indent) 100 | True, _ -> { 101 | let spaces = string.length(lexeme) - 1 102 | 103 | case int.compare(spaces, current_indent) { 104 | Lt -> Keep(Dedent(spaces), spaces) 105 | Eq if spaces == 0 -> Drop(0) 106 | Eq -> Keep(Indent(spaces), spaces) 107 | Gt -> Keep(Indent(spaces), spaces) 108 | } 109 | } 110 | } 111 | } 112 | 113 | [ 114 | lexer.variable(set.new(), Var), 115 | lexer.string("\"", Str), 116 | lexer.int(Num), 117 | // Keywords 118 | lexer.keyword("def", "[\\W\\D]", Def), 119 | lexer.keyword("for", "[\\W\\D]", For), 120 | lexer.keyword("in", "[\\W\\D]", In), 121 | lexer.keyword("print", "[\\W\\D]", Print), 122 | // Our custom indentation lexer 123 | indentation, 124 | // Ignore all other whitespace 125 | lexer.whitespace(Nil) 126 | |> lexer.ignore, 127 | ] 128 | } 129 | -------------------------------------------------------------------------------- /test/examples/calculator_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/function 4 | import gleam/int 5 | import gleam/io 6 | import gleam/option.{None, Some} 7 | import gleeunit/should 8 | import nibble.{do, do_in, return} 9 | import nibble/lexer 10 | import nibble/pratt 11 | 12 | // TYPES ----------------------------------------------------------------------- 13 | 14 | type TokenT { 15 | Add 16 | Sub 17 | Mul 18 | Div 19 | Num(Float) 20 | LParen 21 | RParen 22 | } 23 | 24 | type Context { 25 | InSubExpr 26 | } 27 | 28 | type DeadEnd = 29 | nibble.DeadEnd(TokenT, Context) 30 | 31 | // TESTS ----------------------------------------------------------------------- 32 | 33 | pub fn add_test() { 34 | use run <- should("add two numbers") 35 | let input = "1 + 2" 36 | let expected = 3.0 37 | 38 | run(input, expected) 39 | } 40 | 41 | pub fn multi_add_test() { 42 | use run <- should("add multiple numbers") 43 | let input = "1 + 2 + 3" 44 | let expected = 6.0 45 | 46 | run(input, expected) 47 | } 48 | 49 | pub fn sub_test() { 50 | use run <- should("subtract two numbers") 51 | let input = "3 - 2" 52 | let expected = 1.0 53 | 54 | run(input, expected) 55 | } 56 | 57 | pub fn multi_sub_test() { 58 | use run <- should("subtract multiple numbers") 59 | let input = "3 - 2 - 1" 60 | let expected = 0.0 61 | 62 | run(input, expected) 63 | } 64 | 65 | pub fn mul_test() { 66 | use run <- should("multiply two numbers") 67 | let input = "2 * 3" 68 | let expected = 6.0 69 | 70 | run(input, expected) 71 | } 72 | 73 | pub fn multi_mul_test() { 74 | use run <- should("multiply multiple numbers") 75 | let input = "2 * 3 * 4" 76 | let expected = 24.0 77 | 78 | run(input, expected) 79 | } 80 | 81 | pub fn precedence_test() { 82 | use run <- should("evaluate operators according to precedence") 83 | let input = "2 * 3 + 4" 84 | let expected = 10.0 85 | 86 | run(input, expected) 87 | } 88 | 89 | pub fn parens_test() { 90 | use run <- should("evaluate parens first") 91 | let input = "2 * (3 + 4)" 92 | let expected = 14.0 93 | 94 | run(input, expected) 95 | } 96 | 97 | pub fn complex_test() { 98 | use run <- should("evaluate complex expressions") 99 | let input = "2 * (3 + 4) / 2 - 1" 100 | let expected = 6.0 101 | 102 | run(input, expected) 103 | } 104 | 105 | pub fn mismatched_parens_test() { 106 | use run <- should_error("on mismatched parens") 107 | let input = "2 * (3 + 4" 108 | let expected = [ 109 | nibble.DeadEnd(lexer.Span(1, 10, 1, 11), nibble.EndOfInput, [ 110 | #(lexer.Span(1, 5, 1, 6), InSubExpr), 111 | ]), 112 | ] 113 | 114 | run(input, expected) 115 | } 116 | 117 | // UTILS ----------------------------------------------------------------------- 118 | 119 | fn should(description: String, run: fn(fn(String, Float) -> Nil) -> Nil) -> Nil { 120 | use input, expected <- run 121 | 122 | io.print("should " <> description) 123 | 124 | lexer.run(input, lexer()) 125 | |> should.be_ok 126 | |> nibble.run(parser()) 127 | |> should.be_ok 128 | |> should.equal(expected) 129 | 130 | io.println(" ✅") 131 | } 132 | 133 | fn should_error( 134 | description: String, 135 | run: fn(fn(String, List(DeadEnd)) -> Nil) -> Nil, 136 | ) -> Nil { 137 | use input, expected <- run 138 | 139 | io.print("should error " <> description) 140 | 141 | lexer.run(input, lexer()) 142 | |> should.be_ok 143 | |> nibble.run(parser()) 144 | |> should.equal(Error(expected)) 145 | 146 | io.println(" ✅") 147 | } 148 | 149 | fn lexer() { 150 | lexer.simple([ 151 | // Grouping 152 | lexer.token("(", LParen), 153 | lexer.token(")", RParen), 154 | // Operators 155 | lexer.token("+", Add), 156 | lexer.token("-", Sub), 157 | lexer.token("*", Mul), 158 | lexer.token("/", Div), 159 | // Numbers 160 | lexer.number(int.to_float, function.identity) 161 | |> lexer.map(Num), 162 | // Whitespace 163 | lexer.whitespace(Nil) 164 | |> lexer.ignore(), 165 | ]) 166 | } 167 | 168 | fn parser() { 169 | let add = fn(x, y) { x +. y } 170 | let sub = fn(x, y) { x -. y } 171 | let mul = fn(x, y) { x *. y } 172 | let div = fn(x, y) { x /. y } 173 | 174 | pratt.expression( 175 | one_of: [parens_parser, number_parser], 176 | and_then: [ 177 | pratt.infix_left(14, nibble.token(Mul), mul), 178 | pratt.infix_left(14, nibble.token(Div), div), 179 | pratt.infix_left(13, nibble.token(Add), add), 180 | pratt.infix_left(13, nibble.token(Sub), sub), 181 | ], 182 | // Whitespace is already ignored by the lexer so there's no tokens we need to 183 | // explicitly ignore here. 184 | dropping: return(Nil), 185 | ) 186 | } 187 | 188 | fn number_parser(_) { 189 | use tok <- nibble.take_map("a number token") 190 | 191 | case tok { 192 | Num(n) -> Some(n) 193 | _ -> None 194 | } 195 | } 196 | 197 | fn parens_parser(_) { 198 | use _ <- do(nibble.token(LParen)) 199 | use n <- do_in(InSubExpr, nibble.lazy(parser)) 200 | use _ <- do(nibble.token(RParen)) 201 | 202 | return(n) 203 | } 204 | -------------------------------------------------------------------------------- /test/examples/env_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/dict.{type Dict} 4 | import gleam/float 5 | import gleam/int 6 | import gleam/io 7 | import gleam/option.{None, Some} 8 | import gleam/set 9 | import gleeunit/should 10 | import nibble.{Break, Continue, do, return} 11 | import nibble/lexer 12 | 13 | // TYPES ----------------------------------------------------------------------- 14 | 15 | type Env = 16 | Dict(String, String) 17 | 18 | type TokenT { 19 | Key(String) 20 | Str(String) 21 | Equals 22 | NewLine 23 | } 24 | 25 | // TESTS ----------------------------------------------------------------------- 26 | 27 | pub fn empty_env_test() { 28 | use run <- should("parse an empty env") 29 | let input = "" 30 | let expected = dict.new() 31 | 32 | run(input, expected) 33 | } 34 | 35 | pub fn single_env_test() { 36 | use run <- should("parse a single k/v pair") 37 | let input = "FOO=bar" 38 | let expected = dict.from_list([#("FOO", "bar")]) 39 | 40 | run(input, expected) 41 | } 42 | 43 | pub fn single_env_string_test() { 44 | use run <- should("parse a single k/v pair with a string value") 45 | let input = "FOO='bar'" 46 | let expected = dict.from_list([#("FOO", "bar")]) 47 | 48 | run(input, expected) 49 | } 50 | 51 | pub fn single_env_number_test() { 52 | use run <- should("parse a single k/v pair with a number value") 53 | let input = "FOO=123" 54 | let expected = dict.from_list([#("FOO", "123")]) 55 | 56 | run(input, expected) 57 | } 58 | 59 | pub fn single_env_float_test() { 60 | use run <- should("parse a single k/v pair with a float value") 61 | let input = "FOO=123.456" 62 | let expected = dict.from_list([#("FOO", "123.456")]) 63 | 64 | run(input, expected) 65 | } 66 | 67 | pub fn multi_env_test() { 68 | use run <- should("parse multiple k/v pairs") 69 | let input = 70 | " 71 | FOO=bar 72 | BAZ=qux 73 | " 74 | let expected = dict.from_list([#("FOO", "bar"), #("BAZ", "qux")]) 75 | 76 | run(input, expected) 77 | } 78 | 79 | // UTILS ----------------------------------------------------------------------- 80 | 81 | fn should(description: String, run: fn(fn(String, Env) -> Nil) -> Nil) -> Nil { 82 | use input, expected <- run 83 | 84 | io.print("should " <> description) 85 | 86 | lexer.run(input, lexer()) 87 | |> should.be_ok 88 | |> nibble.run(parser()) 89 | |> should.be_ok 90 | |> should.equal(expected) 91 | 92 | io.println(" ✅") 93 | } 94 | 95 | fn lexer() { 96 | lexer.simple([ 97 | lexer.token("=", Equals), 98 | lexer.token("\n", NewLine), 99 | // Strings values can use either double quotes or single quotes 100 | lexer.string("\"", Str), 101 | lexer.string("'", Str), 102 | // Keys can be any non-whitespace character 103 | lexer.identifier("[^\\s=#]", "[^\\s=]", set.new(), Key), 104 | // We'll allow number literals and just convert them to string values 105 | lexer.number(fn(int) { Str(int.to_string(int)) }, fn(float) { 106 | Str(float.to_string(float)) 107 | }), 108 | // Drop comments and whitespace 109 | lexer.comment("#", fn(_) { Nil }) 110 | |> lexer.ignore, 111 | lexer.spaces(Nil) 112 | |> lexer.ignore, 113 | ]) 114 | } 115 | 116 | fn parser() { 117 | use env <- nibble.loop(dict.new()) 118 | 119 | nibble.one_of([ 120 | key_value_parser(env) 121 | |> nibble.map(Continue), 122 | // The `key_value_parser` already consumes one new line. This parser makes 123 | // sure that if k/v pairs are separated by _multiple_ newlines that we still 124 | // consume them all. 125 | // 126 | // We use `many1` here because we need to consume at least _one_ token to 127 | // prevent an infinite loop. 128 | // 129 | nibble.many1(nibble.token(NewLine)) 130 | |> nibble.replace(Continue(env)), 131 | nibble.eof() 132 | |> nibble.replace(Break(env)), 133 | ]) 134 | } 135 | 136 | fn key_value_parser(env) { 137 | use k <- do(key_parser()) 138 | use _ <- do(nibble.token(Equals)) 139 | use v <- do(val_parser()) 140 | use _ <- do(nibble.one_of([nibble.token(NewLine), nibble.eof()])) 141 | 142 | return(dict.insert(env, k, v)) 143 | } 144 | 145 | fn key_parser() { 146 | use tok <- nibble.take_map("an env key") 147 | 148 | case tok { 149 | Key(k) -> Some(k) 150 | _ -> None 151 | } 152 | } 153 | 154 | fn val_parser() { 155 | use tok <- nibble.take_map("an env value") 156 | 157 | case tok { 158 | Str(v) -> Some(v) 159 | // We can treat a single unquoted word as a value 160 | Key(v) -> Some(v) 161 | _ -> None 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /test/examples/json_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/int 4 | import gleam/io 5 | import gleeunit/should 6 | import nibble.{type Parser} 7 | import nibble/lexer.{type Lexer} 8 | 9 | // TYPES ----------------------------------------------------------------------- 10 | 11 | type Json { 12 | Array(List(Json)) 13 | False 14 | Null 15 | Number(Float) 16 | Object(List(#(String, Json))) 17 | String(String) 18 | True 19 | } 20 | 21 | type JsonT { 22 | Colon 23 | Comma 24 | FalseT 25 | LBrace 26 | LBracket 27 | NullT 28 | NumT(Float) 29 | RBrace 30 | RBracket 31 | StrT(String) 32 | TrueT 33 | } 34 | 35 | type Context { 36 | InArray 37 | InObject 38 | } 39 | 40 | // LITERAL TESTS --------------------------------------------------------------- 41 | 42 | pub fn json_null_test() { 43 | use run <- should("parse a JSON null") 44 | let input = "null" 45 | let expected = Null 46 | 47 | run(input, expected) 48 | } 49 | 50 | pub fn json_true_test() { 51 | use run <- should("parse a JSON true") 52 | let input = "true" 53 | let expected = True 54 | 55 | run(input, expected) 56 | } 57 | 58 | pub fn json_false_test() { 59 | use run <- should("parse a JSON false") 60 | let input = "false" 61 | let expected = False 62 | 63 | run(input, expected) 64 | } 65 | 66 | pub fn json_number_test() { 67 | use run <- should("parse a JSON number") 68 | let input = "123.456" 69 | let expected = Number(123.456) 70 | 71 | run(input, expected) 72 | } 73 | 74 | pub fn json_string_test() { 75 | use run <- should("parse a JSON string") 76 | let input = "\"hello world\"" 77 | let expected = String("hello world") 78 | 79 | run(input, expected) 80 | } 81 | 82 | // ARRAY TESTS ----------------------------------------------------------------- 83 | 84 | pub fn json_empty_array_test() { 85 | use run <- should("parse an empty JSON array") 86 | let input = "[]" 87 | let expected = Array([]) 88 | 89 | run(input, expected) 90 | } 91 | 92 | pub fn json_singleton_array_test() { 93 | use run <- should("parse a JSON array with one element") 94 | let input = "[1]" 95 | let expected = Array([Number(1.0)]) 96 | 97 | run(input, expected) 98 | } 99 | 100 | pub fn json_array_test() { 101 | use run <- should("parse a JSON array with multiple elements") 102 | let input = "[1, 2, 3]" 103 | let expected = Array([Number(1.0), Number(2.0), Number(3.0)]) 104 | 105 | run(input, expected) 106 | } 107 | 108 | pub fn json_nested_array_test() { 109 | use run <- should("parse a nested JSON array") 110 | let input = "[1, [2, 3], 4]" 111 | let expected = 112 | Array([Number(1.0), Array([Number(2.0), Number(3.0)]), Number(4.0)]) 113 | 114 | run(input, expected) 115 | } 116 | 117 | // OBJECT TESTS ---------------------------------------------------------------- 118 | 119 | pub fn json_empty_object_test() { 120 | use run <- should("parse an empty JSON object") 121 | let input = "{}" 122 | let expected = Object([]) 123 | 124 | run(input, expected) 125 | } 126 | 127 | pub fn json_singleton_object_test() { 128 | use run <- should("parse a JSON object with one element") 129 | let input = "{\"a\": 1}" 130 | let expected = Object([#("a", Number(1.0))]) 131 | 132 | run(input, expected) 133 | } 134 | 135 | pub fn json_object_test() { 136 | use run <- should("parse a JSON object with multiple elements") 137 | let input = "{\"a\": 1, \"b\": 2, \"c\": 3}" 138 | let expected = 139 | Object([#("a", Number(1.0)), #("b", Number(2.0)), #("c", Number(3.0))]) 140 | 141 | run(input, expected) 142 | } 143 | 144 | pub fn json_nested_object_test() { 145 | use run <- should("parse a nested JSON object") 146 | let input = "{\"a\": 1, \"b\": {\"c\": 2}, \"d\": 3}" 147 | let expected = 148 | Object([ 149 | #("a", Number(1.0)), 150 | #("b", Object([#("c", Number(2.0))])), 151 | #("d", Number(3.0)), 152 | ]) 153 | 154 | run(input, expected) 155 | } 156 | 157 | // REAL WORLD TESTS ------------------------------------------------------------ 158 | 159 | pub fn json_schema_basic_test() { 160 | // https://json-schema.org/learn/miscellaneous-examples.html#basic 161 | use run <- should("parse the JSON Schema basic example") 162 | let input = 163 | "{ 164 | \"$id\": \"https://example.com/person.schema.json\", 165 | \"$schema\": \"https://json-schema.org/draft/2020-12/schema\", 166 | \"title\": \"Person\", 167 | \"type\": \"object\", 168 | \"properties\": { 169 | \"firstName\": { 170 | \"type\": \"string\", 171 | \"description\": \"The person's first name.\" 172 | }, 173 | \"lastName\": { 174 | \"type\": \"string\", 175 | \"description\": \"The person's last name.\" 176 | }, 177 | \"age\": { 178 | \"description\": \"Age in years which must be equal to or greater than zero.\", 179 | \"type\": \"integer\", 180 | \"minimum\": 0 181 | } 182 | } 183 | }" 184 | let expected = 185 | Object([ 186 | #("$id", String("https://example.com/person.schema.json")), 187 | #("$schema", String("https://json-schema.org/draft/2020-12/schema")), 188 | #("title", String("Person")), 189 | #("type", String("object")), 190 | #( 191 | "properties", 192 | Object([ 193 | #( 194 | "firstName", 195 | Object([ 196 | #("type", String("string")), 197 | #("description", String("The person's first name.")), 198 | ]), 199 | ), 200 | #( 201 | "lastName", 202 | Object([ 203 | #("type", String("string")), 204 | #("description", String("The person's last name.")), 205 | ]), 206 | ), 207 | #( 208 | "age", 209 | Object([ 210 | #( 211 | "description", 212 | String( 213 | "Age in years which must be equal to or greater than zero.", 214 | ), 215 | ), 216 | #("type", String("integer")), 217 | #("minimum", Number(0.0)), 218 | ]), 219 | ), 220 | ]), 221 | ), 222 | ]) 223 | 224 | run(input, expected) 225 | } 226 | 227 | pub fn json_scheme_arrays_of_things_test() { 228 | // https://json-schema.org/learn/miscellaneous-examples.html#arrays-of-things 229 | use run <- should("parse the JSON Schema 'arrays of things' example") 230 | let input = 231 | "{ 232 | \"$id\": \"https://example.com/arrays.schema.json\", 233 | \"$schema\": \"https://json-schema.org/draft/2020-12/schema\", 234 | \"description\": \"A representation of a person, company, organization, or place\", 235 | \"type\": \"object\", 236 | \"properties\": { 237 | \"fruits\": { 238 | \"type\": \"array\", 239 | \"items\": { 240 | \"type\": \"string\" 241 | } 242 | }, 243 | \"vegetables\": { 244 | \"type\": \"array\", 245 | \"items\": { \"$ref\": \"#/$defs/veggie\" } 246 | } 247 | }, 248 | \"$defs\": { 249 | \"veggie\": { 250 | \"type\": \"object\", 251 | \"required\": [ \"veggieName\", \"veggieLike\" ], 252 | \"properties\": { 253 | \"veggieName\": { 254 | \"type\": \"string\", 255 | \"description\": \"The name of the vegetable.\" 256 | }, 257 | \"veggieLike\": { 258 | \"type\": \"boolean\", 259 | \"description\": \"Do I like this vegetable?\" 260 | } 261 | } 262 | } 263 | } 264 | }" 265 | let expected = 266 | Object([ 267 | #("$id", String("https://example.com/arrays.schema.json")), 268 | #("$schema", String("https://json-schema.org/draft/2020-12/schema")), 269 | #( 270 | "description", 271 | String("A representation of a person, company, organization, or place"), 272 | ), 273 | #("type", String("object")), 274 | #( 275 | "properties", 276 | Object([ 277 | #( 278 | "fruits", 279 | Object([ 280 | #("type", String("array")), 281 | #("items", Object([#("type", String("string"))])), 282 | ]), 283 | ), 284 | #( 285 | "vegetables", 286 | Object([ 287 | #("type", String("array")), 288 | #("items", Object([#("$ref", String("#/$defs/veggie"))])), 289 | ]), 290 | ), 291 | ]), 292 | ), 293 | #( 294 | "$defs", 295 | Object([ 296 | #( 297 | "veggie", 298 | Object([ 299 | #("type", String("object")), 300 | #("required", Array([String("veggieName"), String("veggieLike")])), 301 | #( 302 | "properties", 303 | Object([ 304 | #( 305 | "veggieName", 306 | Object([ 307 | #("type", String("string")), 308 | #("description", String("The name of the vegetable.")), 309 | ]), 310 | ), 311 | #( 312 | "veggieLike", 313 | Object([ 314 | #("type", String("boolean")), 315 | #("description", String("Do I like this vegetable?")), 316 | ]), 317 | ), 318 | ]), 319 | ), 320 | ]), 321 | ), 322 | ]), 323 | ), 324 | ]) 325 | 326 | run(input, expected) 327 | } 328 | 329 | // UTILS ----------------------------------------------------------------------- 330 | 331 | fn should(description: String, run: fn(fn(String, Json) -> Nil) -> Nil) -> Nil { 332 | use input, expected <- run 333 | 334 | io.print("should " <> description) 335 | 336 | lexer.run(input, lexer()) 337 | |> should.be_ok 338 | |> nibble.run(parser()) 339 | |> should.be_ok 340 | |> should.equal(expected) 341 | 342 | io.println(" ✅") 343 | } 344 | 345 | fn lexer() -> Lexer(JsonT, Nil) { 346 | lexer.simple([ 347 | lexer.number(fn(int) { NumT(int.to_float(int)) }, NumT), 348 | lexer.token(":", Colon), 349 | lexer.token(",", Comma), 350 | lexer.token("false", FalseT), 351 | lexer.token("{", LBrace), 352 | lexer.token("[", LBracket), 353 | lexer.token("null", NullT), 354 | lexer.token("true", TrueT), 355 | lexer.token("}", RBrace), 356 | lexer.token("]", RBracket), 357 | lexer.string("\"", StrT), 358 | // 359 | lexer.whitespace(Nil) 360 | |> lexer.ignore, 361 | ]) 362 | } 363 | 364 | fn parser() -> Parser(Json, JsonT, Context) { 365 | nibble.one_of([ 366 | // Structures 367 | array_parser() 368 | |> nibble.in(InArray), 369 | object_parser() 370 | |> nibble.in(InObject), 371 | literal_parser(), 372 | ]) 373 | } 374 | 375 | fn array_parser() -> Parser(Json, JsonT, Context) { 376 | use _ <- nibble.do(nibble.token(LBracket)) 377 | use elements <- nibble.do(nibble.sequence( 378 | nibble.lazy(parser), 379 | nibble.token(Comma), 380 | )) 381 | use _ <- nibble.do(nibble.token(RBracket)) 382 | 383 | nibble.return(Array(elements)) 384 | } 385 | 386 | fn object_parser() -> Parser(Json, JsonT, Context) { 387 | use _ <- nibble.do(nibble.token(LBrace)) 388 | use elements <- nibble.do(nibble.sequence( 389 | nibble.lazy(object_element_parser), 390 | nibble.token(Comma), 391 | )) 392 | use _ <- nibble.do(nibble.token(RBrace)) 393 | 394 | nibble.return(Object(elements)) 395 | } 396 | 397 | fn object_element_parser() -> Parser(#(String, Json), JsonT, Context) { 398 | use key <- nibble.do( 399 | nibble.backtrackable({ 400 | use t <- nibble.do(nibble.any()) 401 | 402 | case t { 403 | StrT(s) -> nibble.return(s) 404 | _ -> nibble.fail("Expected string object key") 405 | } 406 | }), 407 | ) 408 | use _ <- nibble.do(nibble.token(Colon)) 409 | use value <- nibble.do(nibble.lazy(parser)) 410 | 411 | nibble.return(#(key, value)) 412 | } 413 | 414 | fn literal_parser() -> Parser(Json, JsonT, Context) { 415 | nibble.backtrackable({ 416 | use t <- nibble.do(nibble.any()) 417 | 418 | case t { 419 | NumT(n) -> nibble.return(Number(n)) 420 | StrT(s) -> nibble.return(String(s)) 421 | TrueT -> nibble.return(True) 422 | FalseT -> nibble.return(False) 423 | NullT -> nibble.return(Null) 424 | _ -> nibble.fail("Expected a literal value") 425 | } 426 | }) 427 | } 428 | -------------------------------------------------------------------------------- /test/examples/readme_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/io 4 | import gleam/option.{None, Some} 5 | import gleeunit/should 6 | import nibble.{do, return} 7 | import nibble/lexer 8 | 9 | // TYPES ----------------------------------------------------------------------- 10 | 11 | type Point { 12 | Point(x: Int, y: Int) 13 | } 14 | 15 | type T { 16 | Num(Int) 17 | LParen 18 | RParen 19 | Comma 20 | } 21 | 22 | // TESTS ----------------------------------------------------------------------- 23 | 24 | pub fn readme_test() { 25 | use run <- should("parse the example in README.md") 26 | let input = "(1, 2)" 27 | let expected = Point(1, 2) 28 | 29 | run(input, expected) 30 | } 31 | 32 | // UTILS ----------------------------------------------------------------------- 33 | 34 | fn should(description: String, run: fn(fn(String, Point) -> Nil) -> Nil) -> Nil { 35 | use input, expected <- run 36 | 37 | io.print("should " <> description) 38 | 39 | lexer.run(input, lexer()) 40 | |> should.be_ok 41 | |> nibble.run(parser()) 42 | |> should.be_ok 43 | |> should.equal(expected) 44 | 45 | io.println(" ✅") 46 | } 47 | 48 | fn lexer() { 49 | lexer.simple([ 50 | lexer.int(Num), 51 | lexer.token("(", LParen), 52 | lexer.token(")", RParen), 53 | lexer.token(",", Comma), 54 | // Skip over whitespace, we don't care about it! 55 | lexer.whitespace(Nil) 56 | |> lexer.ignore, 57 | ]) 58 | } 59 | 60 | fn parser() { 61 | use _ <- do(nibble.token(LParen)) 62 | use x <- do(int_parser()) 63 | use _ <- do(nibble.token(Comma)) 64 | use y <- do(int_parser()) 65 | use _ <- do(nibble.token(RParen)) 66 | 67 | return(Point(x, y)) 68 | } 69 | 70 | fn int_parser() { 71 | use tok <- nibble.take_map("a `Num` token") 72 | 73 | case tok { 74 | Num(n) -> Some(n) 75 | _ -> None 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /test/examples/sexpr_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/io 4 | import gleeunit/should 5 | import nibble.{type Parser} 6 | import nibble/lexer.{type Lexer, type Span, Span} 7 | 8 | // TYPES ----------------------------------------------------------------------- 9 | 10 | type TokenT { 11 | LParenT 12 | RParenT 13 | NumT(Int) 14 | } 15 | 16 | type Expr { 17 | SExpr(body: List(Expr), location: Span) 18 | Num(value: Int, location: Span) 19 | } 20 | 21 | // TESTS ----------------------------------------------------------------------- 22 | 23 | pub fn basic_test() { 24 | use run <- should("parse a basic s-expression") 25 | let input = "(1 2)" 26 | let expected = 27 | SExpr( 28 | [Num(1, Span(1, 2, 1, 3)), Num(2, Span(1, 4, 1, 5))], 29 | Span(1, 1, 1, 6), 30 | ) 31 | 32 | run(input, expected) 33 | } 34 | 35 | pub fn nested_test() { 36 | use run <- should("parse nested s-expressions") 37 | let input = "(1 ((2 3) 4) (5) 6)" 38 | let expected = 39 | SExpr( 40 | [ 41 | Num(1, Span(1, 2, 1, 3)), 42 | SExpr( 43 | [ 44 | SExpr( 45 | [Num(2, Span(1, 6, 1, 7)), Num(3, Span(1, 8, 1, 9))], 46 | Span(1, 5, 1, 10), 47 | ), 48 | Num(4, Span(1, 11, 1, 12)), 49 | ], 50 | Span(1, 4, 1, 13), 51 | ), 52 | SExpr([Num(5, Span(1, 15, 1, 16))], Span(1, 14, 1, 17)), 53 | Num(6, Span(1, 18, 1, 19)), 54 | ], 55 | Span(1, 1, 1, 20), 56 | ) 57 | 58 | run(input, expected) 59 | } 60 | 61 | pub fn locations_test() { 62 | use run <- should("parse locations over multiple lines") 63 | let input = 64 | "( 65 | 1 66 | 67 | 2 3) 68 | " 69 | let expected = 70 | SExpr( 71 | [ 72 | Num(1, Span(2, 7, 2, 8)), 73 | Num(2, Span(4, 3, 4, 4)), 74 | Num(3, Span(4, 5, 4, 6)), 75 | ], 76 | Span(1, 1, 4, 7), 77 | ) 78 | 79 | run(input, expected) 80 | } 81 | 82 | // UTILS ----------------------------------------------------------------------- 83 | 84 | fn should(description: String, run: fn(fn(String, Expr) -> Nil) -> Nil) -> Nil { 85 | use input, expected <- run 86 | 87 | io.print("should " <> description) 88 | 89 | lexer.run(input, lexer()) 90 | |> should.be_ok 91 | |> nibble.run(parser()) 92 | |> should.be_ok 93 | |> should.equal(expected) 94 | 95 | io.println(" ✅") 96 | } 97 | 98 | fn lexer() -> Lexer(TokenT, Nil) { 99 | lexer.simple([ 100 | lexer.token("(", LParenT), 101 | lexer.token(")", RParenT), 102 | lexer.int(NumT), 103 | lexer.whitespace(Nil) 104 | |> lexer.ignore, 105 | ]) 106 | } 107 | 108 | fn parser() -> Parser(Expr, TokenT, Nil) { 109 | nibble.one_of([number_parser(), sexpr_parser()]) 110 | } 111 | 112 | fn number_parser() -> Parser(Expr, TokenT, Nil) { 113 | use t <- nibble.do(nibble.any()) 114 | // Get the position of the current token 115 | use pos <- nibble.do(nibble.span()) 116 | 117 | case t { 118 | NumT(n) -> nibble.return(Num(n, pos)) 119 | _ -> nibble.fail("expected a number literal") 120 | } 121 | } 122 | 123 | fn sexpr_parser() -> Parser(Expr, TokenT, Nil) { 124 | use _ <- nibble.do(nibble.token(LParenT)) 125 | // Position of the left parenthesis 126 | use start <- nibble.do(nibble.span()) 127 | use body <- nibble.do(nibble.many1(parser())) 128 | use _ <- nibble.do(nibble.token(RParenT)) 129 | // Position of the right parenthesis 130 | use end <- nibble.do(nibble.span()) 131 | 132 | // Create a combined location from the start and end spans 133 | // 134 | // ( 1 2 3 ) 135 | // ~ start ~ end 136 | // ~~~~~~~~~~~~~ combined 137 | let combined = merge_spans(start, end) 138 | 139 | nibble.return(SExpr(body, combined)) 140 | } 141 | 142 | fn merge_spans(span1: Span, span2: Span) -> Span { 143 | Span(span1.row_start, span1.col_start, span2.row_end, span2.col_end) 144 | } 145 | -------------------------------------------------------------------------------- /test/examples/simple_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/option.{None, Some} 4 | import nibble.{ 5 | any, do, eof, fail, guard, many, many1, map, one_of, optional, or, replace, 6 | return, run, sequence, take_at_least, take_exactly, take_if, take_map, 7 | take_map_while, take_until, take_while, take_while1, token, 8 | } 9 | import nibble/lexer 10 | 11 | // TYPES ----------------------------------------------------------------------- 12 | 13 | pub type TokenT { 14 | AT 15 | BT 16 | CommaT 17 | LParenT 18 | RParenT 19 | NumT(Int) 20 | } 21 | 22 | // LEXER ----------------------------------------------------------------------- 23 | fn lexer() { 24 | lexer.simple([ 25 | lexer.token("a", AT), 26 | lexer.token("b", BT), 27 | lexer.token(",", CommaT), 28 | lexer.token("(", LParenT), 29 | lexer.token(")", RParenT), 30 | lexer.int(NumT), 31 | lexer.whitespace(Nil) 32 | |> lexer.ignore, 33 | ]) 34 | } 35 | 36 | // PARSERS --------------------------------------------------------------------- 37 | fn number_parser() { 38 | use tok <- take_map("expected a number") 39 | case tok { 40 | NumT(n) -> Some(n) 41 | _ -> None 42 | } 43 | } 44 | 45 | // TESTS ----------------------------------------------------------------------- 46 | 47 | pub fn any_test() { 48 | let assert Ok(non_empty_tokens) = lexer.run("a", lexer()) 49 | let assert Ok(AT) = run(non_empty_tokens, any()) 50 | let assert Ok(empty_tokens) = lexer.run("", lexer()) 51 | let assert Error(_) = run(empty_tokens, nibble.any()) 52 | } 53 | 54 | pub fn do_test() { 55 | let assert Ok(tokens) = lexer.run("a", lexer()) 56 | let parser = { 57 | use parsed_value <- do(token(AT)) 58 | return(parsed_value) 59 | } 60 | let assert Ok(Nil) = run(tokens, parser) 61 | } 62 | 63 | pub fn eof_test() { 64 | let assert Ok(tokens) = lexer.run("aba", lexer()) 65 | let successful_parser = { 66 | use _ <- do(token(AT)) 67 | use _ <- do(token(BT)) 68 | use _ <- do(token(AT)) 69 | use _ <- do(eof()) 70 | return(Nil) 71 | } 72 | let assert Ok(Nil) = run(tokens, successful_parser) 73 | let failing_parser = { 74 | use _ <- do(token(AT)) 75 | use _ <- do(token(BT)) 76 | use _ <- do(eof()) 77 | return(Nil) 78 | } 79 | let assert Error(_) = run(tokens, failing_parser) 80 | } 81 | 82 | pub fn fail_test() { 83 | let assert Error(_) = run([], fail("I never succeed")) 84 | } 85 | 86 | pub fn guard_test() { 87 | let even_number_parser = { 88 | use number_value <- do(number_parser()) 89 | use _ <- do(guard(number_value % 2 == 0, "expected an even number")) 90 | return(number_value) 91 | } 92 | let assert Ok(even_number_token) = lexer.run("10", lexer()) 93 | let assert Ok(10) = run(even_number_token, even_number_parser) 94 | let assert Ok(odd_number_token) = lexer.run("13", lexer()) 95 | let assert Error(_) = run(odd_number_token, even_number_parser) 96 | } 97 | 98 | pub fn many_test() { 99 | let assert Ok(tokens) = lexer.run("aaab", lexer()) 100 | let assert Ok([Nil, Nil, Nil]) = run(tokens, many(token(AT))) 101 | let assert Ok(tokens) = lexer.run("b", lexer()) 102 | let assert Ok([]) = run(tokens, many(token(AT))) 103 | } 104 | 105 | pub fn many1_test() { 106 | let assert Ok(tokens) = lexer.run("aaab", lexer()) 107 | let assert Ok([Nil, Nil, Nil]) = run(tokens, many1(token(AT))) 108 | let assert Ok(tokens) = lexer.run("b", lexer()) 109 | let assert Error(_) = run(tokens, many1(token(AT))) 110 | } 111 | 112 | pub fn map_test() { 113 | let double_parser = { 114 | use doubled_value <- do(number_parser() |> map(fn(n) { n * 2 })) 115 | return(doubled_value) 116 | } 117 | let assert Ok(token) = lexer.run("400", lexer()) 118 | let assert Ok(800) = run(token, double_parser) 119 | } 120 | 121 | pub fn one_of_test() { 122 | let a_or_b_parser = { 123 | one_of([token(AT), token(BT)]) 124 | } 125 | let assert Ok(tokens) = lexer.run("a", lexer()) 126 | let assert Ok(Nil) = run(tokens, a_or_b_parser) 127 | let assert Ok(tokens) = lexer.run("b", lexer()) 128 | let assert Ok(Nil) = run(tokens, a_or_b_parser) 129 | let assert Ok(tokens) = lexer.run("22", lexer()) 130 | let assert Error(_) = run(tokens, a_or_b_parser) 131 | } 132 | 133 | pub fn optional_test() { 134 | let optional_a_parser = { 135 | token(AT) |> optional 136 | } 137 | let assert Ok(tokens) = lexer.run("a", lexer()) 138 | let assert Ok(Some(Nil)) = run(tokens, optional_a_parser) 139 | let assert Ok(tokens) = lexer.run("b", lexer()) 140 | let assert Ok(None) = run(tokens, optional_a_parser) 141 | } 142 | 143 | pub fn or_test() { 144 | let assert Ok(tokens) = lexer.run("a", lexer()) 145 | let assert Ok(0) = run(tokens, number_parser() |> or(0)) 146 | } 147 | 148 | pub fn replace_test() { 149 | let assert Ok(tokens) = lexer.run("a", lexer()) 150 | let assert Ok("a") = run(tokens, token(AT) |> replace("a")) 151 | } 152 | 153 | pub fn sequence_test() { 154 | let assert Ok(tokens) = lexer.run("a,a,a", lexer()) 155 | let assert Ok([Nil, Nil, Nil]) = 156 | run(tokens, sequence(token(AT), token(CommaT))) 157 | } 158 | 159 | pub fn take_at_least_test() { 160 | let assert Ok(tokens) = lexer.run("aaa", lexer()) 161 | let assert Ok([Nil, Nil, Nil]) = run(tokens, take_at_least(token(AT), 2)) 162 | let assert Ok(tokens) = lexer.run("abaa", lexer()) 163 | let assert Error(_) = run(tokens, take_at_least(token(AT), 2)) 164 | } 165 | 166 | pub fn take_exactly_test() { 167 | let assert Ok(tokens) = lexer.run("aaaaaa", lexer()) 168 | let assert Ok([Nil, Nil]) = run(tokens, take_exactly(token(AT), 2)) 169 | let assert Ok(tokens) = lexer.run("abaa", lexer()) 170 | let assert Error(_) = run(tokens, take_exactly(token(AT), 2)) 171 | } 172 | 173 | pub fn take_if_test() { 174 | let a_parser = { 175 | take_if("expected an 'a'", fn(tok) { 176 | case tok { 177 | AT -> True 178 | _ -> False 179 | } 180 | }) 181 | } 182 | let assert Ok(tokens) = lexer.run("a", lexer()) 183 | let assert Ok(AT) = run(tokens, a_parser) 184 | let assert Ok(tokens) = lexer.run("b", lexer()) 185 | let assert Error(_) = run(tokens, a_parser) 186 | } 187 | 188 | pub fn take_map_test() { 189 | // See the body of `number_parser` for the use of `take_map`. 190 | let assert Ok(tokens) = lexer.run("10", lexer()) 191 | let assert Ok(10) = run(tokens, number_parser()) 192 | let assert Ok(tokens) = lexer.run("b", lexer()) 193 | let assert Error(_) = run(tokens, number_parser()) 194 | } 195 | 196 | pub fn take_map_while_test() { 197 | let f = fn(tok) { 198 | case tok { 199 | NumT(n) if n > 0 -> Some("positive") 200 | NumT(n) if n == 0 -> Some("zero") 201 | NumT(n) if n < 0 -> Some("negative") 202 | CommaT -> Some("comma") 203 | _ -> None 204 | } 205 | } 206 | let assert Ok(tokens) = lexer.run("10,-10", lexer()) 207 | let assert Ok(["positive", "comma", "negative"]) = 208 | run(tokens, take_map_while(f)) 209 | } 210 | 211 | pub fn take_until_test() { 212 | let not_a_number = fn(tok) { 213 | case tok { 214 | NumT(_) -> False 215 | _ -> True 216 | } 217 | } 218 | let assert Ok(tokens) = lexer.run("a,b,10", lexer()) 219 | let assert Ok([]) = run(tokens, take_until(not_a_number)) 220 | let assert Ok(tokens) = lexer.run("10a", lexer()) 221 | let assert Ok([NumT(10)]) = run(tokens, take_until(not_a_number)) 222 | } 223 | 224 | pub fn take_while_test() { 225 | let not_a_number = fn(tok) { 226 | case tok { 227 | NumT(_) -> False 228 | _ -> True 229 | } 230 | } 231 | let assert Ok(tokens) = lexer.run("a,b,10", lexer()) 232 | let assert Ok([AT, CommaT, BT, CommaT]) = 233 | run(tokens, take_while(not_a_number)) 234 | let assert Ok(tokens) = lexer.run("10", lexer()) 235 | let assert Ok([]) = run(tokens, take_while(not_a_number)) 236 | } 237 | 238 | pub fn take_while1_test() { 239 | let not_a_number = fn(tok) { 240 | case tok { 241 | NumT(_) -> False 242 | _ -> True 243 | } 244 | } 245 | let assert Ok(tokens) = lexer.run("a,b,10", lexer()) 246 | let assert Ok([AT, CommaT, BT, CommaT]) = 247 | run(tokens, take_while1("expected a non-number", not_a_number)) 248 | let assert Ok(tokens) = lexer.run("10", lexer()) 249 | let assert Error(_) = 250 | run(tokens, take_while1("expected a non-number", not_a_number)) 251 | } 252 | -------------------------------------------------------------------------------- /test/nibble_test.gleam: -------------------------------------------------------------------------------- 1 | import gleeunit 2 | 3 | pub fn main() { 4 | gleeunit.main() 5 | } 6 | -------------------------------------------------------------------------------- /test/unit/lexer_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/io 4 | import gleam/set 5 | import gleam/string 6 | import gleeunit/should 7 | import nibble/lexer.{type Matcher, type Token, Span, Token} 8 | 9 | // TYPES ----------------------------------------------------------------------- 10 | 11 | type TokenT { 12 | Str(String) 13 | Int(Int) 14 | Num(Float) 15 | Kwd(String) 16 | Var(String) 17 | } 18 | 19 | // INTEGER TESTS --------------------------------------------------------------- 20 | 21 | pub fn single_digit_integer_test() { 22 | use run <- should("lex a single digit integer") 23 | 24 | let input = "1" 25 | let expected = [Token(Span(1, 1, 1, 2), "1", Int(1))] 26 | 27 | run(input, expected, [lexer.int(Int)]) 28 | } 29 | 30 | pub fn single_negative_digit_integer_test() { 31 | use run <- should("lex a single digit negative integer") 32 | 33 | let input = "-1" 34 | let expected = [Token(Span(1, 1, 1, 3), "-1", Int(-1))] 35 | 36 | run(input, expected, [lexer.int(Int)]) 37 | } 38 | 39 | pub fn multi_digit_integer_test() { 40 | use run <- should("lex a multi digit integer") 41 | 42 | let input = "123" 43 | let expected = [Token(Span(1, 1, 1, 4), "123", Int(123))] 44 | 45 | run(input, expected, [lexer.int(Int)]) 46 | } 47 | 48 | pub fn multi_digit_negative_integer_test() { 49 | use run <- should("lex a multi digit negative integer") 50 | 51 | let input = "-123" 52 | let expected = [Token(Span(1, 1, 1, 5), "-123", Int(-123))] 53 | 54 | run(input, expected, [lexer.int(Int)]) 55 | } 56 | 57 | // FLOAT TESTS ----------------------------------------------------------------- 58 | 59 | pub fn single_digit_float_test() { 60 | use run <- should("lex a single digit float") 61 | 62 | let input = "1.0" 63 | let expected = [Token(Span(1, 1, 1, 4), "1.0", Num(1.0))] 64 | 65 | run(input, expected, [lexer.float(Num)]) 66 | } 67 | 68 | pub fn single_nagive_digit_float_test() { 69 | use run <- should("lex a single digit negative float") 70 | 71 | let input = "-1.0" 72 | let expected = [Token(Span(1, 1, 1, 5), "-1.0", Num(-1.0))] 73 | 74 | run(input, expected, [lexer.float(Num)]) 75 | } 76 | 77 | pub fn multi_digit_float_test() { 78 | use run <- should("lex a multi digit float") 79 | 80 | let input = "123.456" 81 | let expected = [Token(Span(1, 1, 1, 8), "123.456", Num(123.456))] 82 | 83 | run(input, expected, [lexer.float(Num)]) 84 | } 85 | 86 | pub fn multi_digit_negative_float_test() { 87 | use run <- should("lex a multi digit negative float") 88 | 89 | let input = "-123.456" 90 | let expected = [Token(Span(1, 1, 1, 9), "-123.456", Num(-123.456))] 91 | 92 | run(input, expected, [lexer.float(Num)]) 93 | } 94 | 95 | // STRING TESTS ---------------------------------------------------------------- 96 | 97 | pub fn empty_string_test() { 98 | use run <- should("lex an empty string") 99 | 100 | let input = "''" 101 | let expected = [Token(Span(1, 1, 1, 3), "''", Str(""))] 102 | 103 | run(input, expected, [lexer.string("'", Str)]) 104 | } 105 | 106 | pub fn single_char_string_test() { 107 | use run <- should("lex a single char string") 108 | 109 | let input = "'a'" 110 | let expected = [Token(Span(1, 1, 1, 4), "'a'", Str("a"))] 111 | 112 | run(input, expected, [lexer.string("'", Str)]) 113 | } 114 | 115 | pub fn multi_char_string_test() { 116 | use run <- should("lex a multi char string") 117 | 118 | let input = "'abc'" 119 | let expected = [Token(Span(1, 1, 1, 6), "'abc'", Str("abc"))] 120 | 121 | run(input, expected, [lexer.string("'", Str)]) 122 | } 123 | 124 | pub fn escaped_string_test() { 125 | use run <- should("lex an escaped string") 126 | 127 | let input = "'\\''" 128 | let expected = [Token(Span(1, 1, 1, 5), "'\\''", Str("\\'"))] 129 | 130 | run(input, expected, [lexer.string("'", Str)]) 131 | } 132 | 133 | pub fn multiline_string_test() { 134 | use run <- should("lex a multi-line string") 135 | 136 | let input = string.join(["'this is a", "multi-line string'"], "\n") 137 | let expected = [ 138 | Token(Span(1, 1, 2, 19), input, Str("this is a\nmulti-line string")), 139 | ] 140 | 141 | run(input, expected, [lexer.string("'", Str)]) 142 | } 143 | 144 | // KEYWORD TESTS --------------------------------------------------------------- 145 | 146 | pub fn keyword_test() { 147 | use run <- should("lex a keyword") 148 | 149 | let input = "in" 150 | let expected = [Token(Span(1, 1, 1, 3), "in", Kwd("in"))] 151 | 152 | run(input, expected, [lexer.keyword("in", "\\s", Kwd("in"))]) 153 | } 154 | 155 | pub fn keyword_breaker_test() { 156 | use run <- should("lex a keyword in an identifier") 157 | 158 | let input = "as assert" 159 | let expected = [ 160 | Token(Span(1, 1, 1, 3), "as", Kwd("as")), 161 | Token(Span(1, 4, 1, 10), "assert", Var("assert")), 162 | ] 163 | 164 | run(input, expected, [ 165 | lexer.keyword("as", "\\s", Kwd("as")), 166 | lexer.keyword("assert", "\\s", Var("assert")), 167 | lexer.token(" ", Nil) 168 | |> lexer.ignore(), 169 | ]) 170 | } 171 | 172 | // VARIABLE TESTS -------------------------------------------------------------- 173 | 174 | pub fn single_char_variable_test() { 175 | use run <- should("lex a single char variable") 176 | 177 | let input = "a" 178 | let expected = [Token(Span(1, 1, 1, 2), "a", Var("a"))] 179 | 180 | run(input, expected, [lexer.variable(set.new(), Var)]) 181 | } 182 | 183 | pub fn multi_char_variable_test() { 184 | use run <- should("lex a multi char variable") 185 | 186 | let input = "abc" 187 | let expected = [Token(Span(1, 1, 1, 4), "abc", Var("abc"))] 188 | 189 | run(input, expected, [lexer.variable(set.new(), Var)]) 190 | } 191 | 192 | pub fn multi_char_variable_with_numbers_and_underscores_test() { 193 | use run <- should("lex a multi char variable with numbers and underscores") 194 | 195 | let input = "abc_123" 196 | let expected = [Token(Span(1, 1, 1, 8), "abc_123", Var("abc_123"))] 197 | 198 | run(input, expected, [lexer.variable(set.new(), Var)]) 199 | } 200 | 201 | pub fn variable_containing_keyword_test() { 202 | use run <- should("lex a variable containing a keyword") 203 | 204 | let input = "insert" 205 | let expected = [Token(Span(1, 1, 1, 7), "insert", Var("insert"))] 206 | 207 | run(input, expected, [ 208 | lexer.keyword("in", "\\s", Kwd("in")), 209 | lexer.variable(set.from_list(["int"]), Var), 210 | ]) 211 | } 212 | 213 | // UTILS ----------------------------------------------------------------------- 214 | 215 | fn should( 216 | description: String, 217 | run: fn(fn(String, List(Token(a)), List(Matcher(a, Nil))) -> Nil) -> Nil, 218 | ) -> Nil { 219 | use input, expected, matchers <- run 220 | 221 | io.print("should " <> description) 222 | 223 | lexer.simple(matchers) 224 | |> lexer.run(input, _) 225 | |> should.be_ok 226 | |> should.equal(expected) 227 | 228 | io.println(" ✅") 229 | } 230 | -------------------------------------------------------------------------------- /test/unit/parser_test.gleam: -------------------------------------------------------------------------------- 1 | // IMPORTS --------------------------------------------------------------------- 2 | 3 | import gleam/io 4 | import gleam/list 5 | import gleeunit/should 6 | import nibble.{type Parser, do, return} 7 | import nibble/lexer.{type Token} 8 | 9 | // STRESS TESTS ---------------------------------------------------------------- 10 | 11 | pub fn do_tco_test() { 12 | use run <- should("Not stack overflow with many chained `do` calls") 13 | let input = [] 14 | let expected = 0 15 | let parser = 16 | list.repeat(return, 1_000_000) 17 | |> list.fold(return(0), do) 18 | 19 | run(input, expected, parser) 20 | } 21 | 22 | // UTILS ----------------------------------------------------------------------- 23 | 24 | fn should( 25 | description: String, 26 | run: fn(fn(List(Token(tok)), a, Parser(a, tok, ctx)) -> Nil) -> Nil, 27 | ) -> Nil { 28 | use input, expected, parser <- run 29 | 30 | io.print("should " <> description) 31 | 32 | nibble.run(input, parser) 33 | |> should.be_ok 34 | |> should.equal(expected) 35 | 36 | io.println(" ✅") 37 | } 38 | --------------------------------------------------------------------------------