├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── rust.yml
├── .gitignore
├── misc
    └── example.png
├── examples
    ├── sample.foo
    ├── sample.bf
    ├── sample.json
    ├── sample.nrs
    ├── brainfuck.rs
    ├── foo.rs
    ├── json.rs
    └── nano_rust.rs
├── LICENSE
├── Cargo.toml
├── src
    ├── chain.rs
    ├── span.rs
    ├── debug.rs
    ├── recursive.rs
    ├── text.rs
    ├── stream.rs
    ├── recovery.rs
    ├── error.rs
    └── primitive.rs
├── CHANGELOG.md
├── benches
    └── json.rs
├── README.md
└── tutorial.md


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [zesterer]
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | flamegraph.svg
4 | perf.data*
5 | 


--------------------------------------------------------------------------------
/misc/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erikdesjardins/chumsky/master/misc/example.png


--------------------------------------------------------------------------------
/examples/sample.foo:
--------------------------------------------------------------------------------
1 | let five = 5;
2 | let eight = 3 + five;
3 | fn add x y = x + y;
4 | add(five, eight)
5 | 


--------------------------------------------------------------------------------
/examples/sample.bf:
--------------------------------------------------------------------------------
1 | --[>--->->->++>-<<<<<-------]>--.>---------.>--..+++.>----.>+++++++++.<<.+++.------.<-.>>+.
2 | 


--------------------------------------------------------------------------------
/examples/sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "leaving": {
 3 |     "tail": [{
 4 |       -2063823378.8597813,
 5 |       !true,
 6 |       false,
 7 |       !null,!
 8 |       -153646.6402,
 9 |       "board"
10 |     ],
11 |     "fed": -283765067.9149623,
12 |     "cowboy": --355139449,
13 |     "although": 794127593.3922591,
14 |     "front": "college",
15 |     "origin": 981339097
16 |   },
17 |   "though": ~true,
18 |   "invalid": "\uDFFF",
19 |   "activity": "value",
20 |   "office": -342325541.1937506,
21 |   "noise": fallse,
22 |   "acres": "home",
23 |   "foo": [}]
24 | }
25 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   check:
14 |     name: Check Chumsky
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |       - name: Install latest nightly
19 |         uses: actions-rs/toolchain@v1
20 |         with:
21 |             toolchain: stable
22 |             override: true
23 |             components: rustfmt, clippy
24 |       - name: Run cargo check
25 |         run: cargo check --verbose --no-default-features
26 |   test:
27 |     name: Test Chumsky
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - uses: actions/checkout@v2
31 |       - name: Install latest nightly
32 |         uses: actions-rs/toolchain@v1
33 |         with:
34 |             toolchain: nightly
35 |             override: true
36 |             components: rustfmt, clippy
37 |       - name: Run cargo check
38 |         run: cargo test --verbose --all-features
39 | 


--------------------------------------------------------------------------------
/examples/sample.nrs:
--------------------------------------------------------------------------------
 1 | // Run this example with `cargo run --example nano_rust -- examples/sample.nrs`
 2 | // Feel free to play around with this sample to see what errors you can generate!
 3 | // Spans are propagated to the interpreted AST so you can even invoke runtime
 4 | // errors and still have an error message that points to source code emitted!
 5 | 
 6 | // Calculate the factorial of a number
 7 | fn factorial(x) {
 8 |     // Conditionals are supported!
 9 |     if x == 0 {
10 |         1
11 |     } else {
12 |         x * factorial(x - 1)
13 |     }
14 | }
15 | 
16 | // The main function
17 | fn main() {
18 |     let three = 3;
19 |     let meaning_of_life = three * 14 + 1;
20 | 
21 |     print("Hello, world!");
22 |     print("The meaning of life is...");
23 | 
24 |     if meaning_of_life == 42 {
25 |         print(meaning_of_life);
26 |     } else {
27 |         print("...something we cannot know");
28 | 
29 |         print("However, I can tell you that the factorial of 10 is...");
30 |         // Function calling
31 |         print(factorial(10));
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 Joshua Barretto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "chumsky"
 3 | version = "0.8.0"
 4 | description = "A parser library for humans with powerful error recovery"
 5 | authors = ["Joshua Barretto <joshua.s.barretto@gmail.com>"]
 6 | repository = "https://github.com/zesterer/chumsky"
 7 | license = "MIT"
 8 | keywords = ["parser", "combinator", "token", "language", "syntax"]
 9 | categories = ["parsing", "text-processing"]
10 | edition = "2018"
11 | exclude = [
12 |     "/misc/*",
13 | 	"/benches/*",
14 | ]
15 | 
16 | [features]
17 | default = ["ahash", "std", "spill-stack"]
18 | # Use `ahash` instead of the standard hasher for maintaining sets of expected inputs
19 | # (Also used if `std` is disabled)
20 | ahash = []
21 | # Integrate with the standard library
22 | std = []
23 | # Enable nightly-only features like better compiler diagnostics
24 | nightly = []
25 | # Allows deeper recursion by dynamically spilling stack state on to the heap
26 | spill-stack = ["stacker", "std"]
27 | 
28 | [dependencies]
29 | # Used if `std` is disabled.
30 | # Provides `ahash` for the corresponding feature as it uses it by default.
31 | # Due to https://github.com/rust-lang/cargo/issues/1839, this can't be optional
32 | hashbrown = "0.11"
33 | stacker = { version = "0.1", optional = true }
34 | 
35 | [dev-dependencies]
36 | ariadne = "0.1.2"
37 | pom = "3.0"
38 | 


--------------------------------------------------------------------------------
/examples/brainfuck.rs:
--------------------------------------------------------------------------------
 1 | //! This is a Brainfuck parser and interpreter
 2 | //! Run it with the following command:
 3 | //! cargo run --example brainfuck -- examples/sample.bf
 4 | 
 5 | use chumsky::prelude::*;
 6 | use std::{
 7 |     env, fs,
 8 |     io::{self, Read},
 9 | };
10 | 
11 | #[derive(Clone)]
12 | enum Instr {
13 |     Invalid,
14 |     Left,
15 |     Right,
16 |     Incr,
17 |     Decr,
18 |     Read,
19 |     Write,
20 |     Loop(Vec<Self>),
21 | }
22 | 
23 | fn parser() -> impl Parser<char, Vec<Instr>, Error = Simple<char>> {
24 |     use Instr::*;
25 |     recursive(|bf| {
26 |         choice((
27 |             just('<').to(Left),
28 |             just('>').to(Right),
29 |             just('+').to(Incr),
30 |             just('-').to(Decr),
31 |             just(',').to(Read),
32 |             just('.').to(Write),
33 |         ))
34 |         .or(bf.delimited_by(just('['), just(']')).map(Loop))
35 |         .recover_with(nested_delimiters('[', ']', [], |_| Invalid))
36 |         .recover_with(skip_then_retry_until([']']))
37 |         .repeated()
38 |     })
39 |     .then_ignore(end())
40 | }
41 | 
42 | const TAPE_LEN: usize = 10_000;
43 | 
44 | fn execute(ast: &[Instr], ptr: &mut usize, tape: &mut [u8; TAPE_LEN]) {
45 |     use Instr::*;
46 |     for symbol in ast {
47 |         match symbol {
48 |             Invalid => unreachable!(),
49 |             Left => *ptr = (*ptr + TAPE_LEN - 1).rem_euclid(TAPE_LEN),
50 |             Right => *ptr = (*ptr + 1).rem_euclid(TAPE_LEN),
51 |             Incr => tape[*ptr] = tape[*ptr].wrapping_add(1),
52 |             Decr => tape[*ptr] = tape[*ptr].wrapping_sub(1),
53 |             Read => tape[*ptr] = io::stdin().bytes().next().unwrap().unwrap(),
54 |             Write => print!("{}", tape[*ptr] as char),
55 |             Loop(ast) => {
56 |                 while tape[*ptr] != 0 {
57 |                     execute(ast, ptr, tape)
58 |                 }
59 |             }
60 |         }
61 |     }
62 | }
63 | 
64 | fn main() {
65 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
66 |         .expect("Failed to read file");
67 | 
68 |     // let src = "[!]+";
69 |     match parser().parse(src.trim()) {
70 |         Ok(ast) => execute(&ast, &mut 0, &mut [0; TAPE_LEN]),
71 |         Err(errs) => errs.into_iter().for_each(|e| println!("{:?}", e)),
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/chain.rs:
--------------------------------------------------------------------------------
 1 | //! Traits that allow chaining parser outputs together.
 2 | //!
 3 | //! *“And what’s happened to the Earth?” “Ah. It’s been demolished.” “Has it,” said Arthur levelly. “Yes. It just
 4 | //! boiled away into space.” “Look,” said Arthur, “I’m a bit upset about that.”*
 5 | //!
 6 | //! You usually don't need to interact with this trait, or even import it. It's only public so that you can see which
 7 | //! types implement it. See [`Parser::chain`](super::Parser) for examples of its usage.
 8 | 
 9 | use alloc::{string::String, vec::Vec};
10 | 
11 | mod private {
12 |     pub trait Sealed<T> {}
13 | 
14 |     impl<T> Sealed<T> for T {}
15 |     impl<T, A: Sealed<T>> Sealed<T> for (A, T) {}
16 |     impl<T> Sealed<T> for Option<T> {}
17 |     impl<T> Sealed<T> for alloc::vec::Vec<T> {}
18 |     impl Sealed<char> for alloc::string::String {}
19 | }
20 | 
21 | /// A utility trait that facilitates chaining parser outputs together into [`Vec`]s.
22 | ///
23 | /// See [`Parser::chain`](super::Parser).
24 | #[allow(clippy::len_without_is_empty)]
25 | pub trait Chain<T>: private::Sealed<T> {
26 |     /// The number of items that this chain link consists of.
27 |     fn len(&self) -> usize;
28 |     /// Append the elements in this link to the chain.
29 |     fn append_to(self, v: &mut Vec<T>);
30 | }
31 | 
32 | impl<T> Chain<T> for T {
33 |     fn len(&self) -> usize {
34 |         1
35 |     }
36 |     fn append_to(self, v: &mut Vec<T>) {
37 |         v.push(self);
38 |     }
39 | }
40 | 
41 | impl<T, A: Chain<T>> Chain<T> for (A, T) {
42 |     fn len(&self) -> usize {
43 |         1
44 |     }
45 |     fn append_to(self, v: &mut Vec<T>) {
46 |         self.0.append_to(v);
47 |         v.push(self.1);
48 |     }
49 | }
50 | 
51 | impl<T> Chain<T> for Option<T> {
52 |     fn len(&self) -> usize {
53 |         self.is_some() as usize
54 |     }
55 |     fn append_to(self, v: &mut Vec<T>) {
56 |         if let Some(x) = self {
57 |             v.push(x);
58 |         }
59 |     }
60 | }
61 | 
62 | impl<T> Chain<T> for Vec<T> {
63 |     fn len(&self) -> usize {
64 |         self.as_slice().len()
65 |     }
66 |     fn append_to(mut self, v: &mut Vec<T>) {
67 |         v.append(&mut self);
68 |     }
69 | }
70 | 
71 | impl Chain<char> for String {
72 |     // TODO: Quite inefficient
73 |     fn len(&self) -> usize {
74 |         self.chars().count()
75 |     }
76 |     fn append_to(self, v: &mut Vec<char>) {
77 |         v.extend(self.chars());
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/span.rs:
--------------------------------------------------------------------------------
 1 | //! Types and traits related to spans.
 2 | //!
 3 | //! *“We demand rigidly defined areas of doubt and uncertainty!”*
 4 | //!
 5 | //! You can use the [`Span`] trait to connect up chumsky to your compiler's knowledge of the input source.
 6 | 
 7 | use core::ops::Range;
 8 | 
 9 | /// A trait that describes a span over a particular range of inputs.
10 | ///
11 | /// Spans typically consist of some context, such as the file they originated from, and a start/end offset. Spans are
12 | /// permitted to overlap one-another. The end offset must always be greater than or equal to the start offset.
13 | ///
14 | /// Span is automatically implemented for [`Range<T>`] and [`(C, Range<T>)`].
15 | pub trait Span: Clone {
16 |     /// Extra context used in a span.
17 |     ///
18 |     /// This is usually some way to uniquely identity the source file that a span originated in such as the file's
19 |     /// path, URL, etc.
20 |     ///
21 |     /// NOTE: Span contexts have no inherent meaning to Chumsky and can be anything. For example, [`Range<usize>`]'s
22 |     /// implementation of [`Span`] simply uses [`()`] as its context.
23 |     type Context: Clone;
24 | 
25 |     /// A type representing a span's start or end offset from the start of the input.
26 |     ///
27 |     /// Typically, [`usize`] is used.
28 |     ///
29 |     /// NOTE: Offsets have no inherently meaning to Chumsky and are not used to decide how to prioritise errors. This
30 |     /// means that it's perfectly fine for tokens to have non-continuous spans that bear no relation to their actual
31 |     /// location in the input stream. This is useful for languages with an AST-level macro system that need to
32 |     /// correctly point to symbols in the macro input when producing errors.
33 |     type Offset: Clone;
34 | 
35 |     /// Create a new span given a context and an offset range.
36 |     fn new(context: Self::Context, range: Range<Self::Offset>) -> Self;
37 | 
38 |     /// Return the span's context.
39 |     fn context(&self) -> Self::Context;
40 | 
41 |     /// Return the start offset of the span.
42 |     fn start(&self) -> Self::Offset;
43 | 
44 |     /// Return the end offset of the span.
45 |     fn end(&self) -> Self::Offset;
46 | }
47 | 
48 | impl<T: Clone + Ord> Span for Range<T> {
49 |     type Context = ();
50 |     type Offset = T;
51 | 
52 |     fn new((): Self::Context, range: Self) -> Self {
53 |         range
54 |     }
55 |     fn context(&self) -> Self::Context {}
56 |     fn start(&self) -> Self::Offset {
57 |         self.start.clone()
58 |     }
59 |     fn end(&self) -> Self::Offset {
60 |         self.end.clone()
61 |     }
62 | }
63 | 
64 | impl<C: Clone, T: Clone> Span for (C, Range<T>) {
65 |     type Context = C;
66 |     type Offset = T;
67 | 
68 |     fn new(context: Self::Context, range: Range<T>) -> Self {
69 |         (context, range)
70 |     }
71 |     fn context(&self) -> Self::Context {
72 |         self.0.clone()
73 |     }
74 |     fn start(&self) -> Self::Offset {
75 |         self.1.start.clone()
76 |     }
77 |     fn end(&self) -> Self::Offset {
78 |         self.1.end.clone()
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | # Unreleased
  9 | 
 10 | ### Added
 11 | 
 12 | ### Removed
 13 | 
 14 | ### Changed
 15 | 
 16 | ### Fixed
 17 | 
 18 | # [0.8.0] - 2022-02-07
 19 | 
 20 | ### Added
 21 | 
 22 | - `then_with` combinator to allow limited support for parsing nested patterns
 23 | - impl From<&[T; N]> for Stream
 24 | - `SkipUntil/SkipThenRetryUntil::skip_start/consume_end` for more precise control over skip-based recovery
 25 | 
 26 | ### Changed
 27 | 
 28 | - Allowed `Validate` to map the output type
 29 | - Switched to zero-size End Of Input spans for default implementations of `Stream`
 30 | - Made `delimited_by` take combinators instead of specific tokens
 31 | - Minor optimisations
 32 | - Documentation improvements
 33 | 
 34 | ### Fixed
 35 | 
 36 | - Compilation error with `--no-default-features`
 37 | - Made default behaviour of `skip_until` more sensible
 38 | 
 39 | # [0.7.0] - 2021-12-16
 40 | 
 41 | ### Added
 42 | 
 43 | - A new [tutorial](tutorial.md) to help new users
 44 | 
 45 | - `select` macro, a wrapper over `filter_map` that makes extracting data from specific tokens easy
 46 | - `choice` parser, a better alternative to long `or` chains (which sometimes have poor compilation performance)
 47 | - `todo` parser, that panics when used (but not when created) (akin to Rust's `todo!` macro, but for parsers)
 48 | - `keyword` parser, that parses *exact* identifiers
 49 | 
 50 | - `from_str` combinator to allow converting a pattern to a value inline, using `std::str::FromStr`
 51 | - `unwrapped` combinator, to automatically unwrap an output value inline
 52 | - `rewind` combinator, that allows reverting the input stream on success. It's most useful when requiring that a
 53 |   pattern is followed by some terminating pattern without the first parser greedily consuming it
 54 | - `map_err_with_span` combinator, to allow fetching the span of the input that was parsed by a parser before an error
 55 |   was encountered
 56 | 
 57 | - `or_else` combinator, to allow processing and potentially recovering from a parser error
 58 | - `SeparatedBy::at_most` to require that a separated pattern appear at most a specific number of times
 59 | - `SeparatedBy::exactly` to require that a separated pattern be repeated exactly a specific number of times
 60 | - `Repeated::exactly` to require that a pattern be repeated exactly a specific number of times
 61 | 
 62 | - More trait implementations for various things, making the crate more useful
 63 | 
 64 | ### Changed
 65 | 
 66 | - Made `just`, `one_of`, and `none_of` significant more useful. They can now accept strings, arrays, slices, vectors,
 67 |   sets, or just single tokens as before
 68 | - Added the return type of each parser to its documentation
 69 | - More explicit documentation of parser behaviour
 70 | - More doc examples
 71 | - Deprecated `seq` (`just` has been generalised and can now be used to parse specific input sequences)
 72 | - Sealed the `Character` trait so that future changes are not breaking
 73 | - Sealed the `Chain` trait and made it more powerful
 74 | - Moved trait constraints on `Parser` to where clauses for improved readability
 75 | 
 76 | ### Fixed
 77 | 
 78 | - Fixed a subtle bug that allowed `separated_by` to parse an extra trailing separator when it shouldn't
 79 | - Filled a 'hole' in the `Error` trait's API that conflated a lack of expected tokens with expectation of end of input
 80 | - Made recursive parsers use weak reference-counting to avoid memory leaks
 81 | 
 82 | # [0.6.0] - 2021-11-22
 83 | 
 84 | ### Added
 85 | 
 86 | - `skip_until` error recovery strategy
 87 | - `SeparatedBy::at_least` and `SeparatedBy::at_most` for parsing a specific number of separated items
 88 | - `Parser::validate` for integrated AST validation
 89 | - `Recursive::declare` and `Recursive::define` for more precise control over recursive declarations
 90 | 
 91 | ### Changed
 92 | 
 93 | - Improved `separated_by` error messages
 94 | - Improved documentation
 95 | - Hid a new (probably) unused implementation details
 96 | 
 97 | # [0.5.0] - 2021-10-30
 98 | 
 99 | ### Added
100 | 
101 | - `take_until` primitive
102 | 
103 | ### Changed
104 | 
105 | - Added span to fallback output function in `nested_delimiters`
106 | 
107 | # [0.4.0] - 2021-10-28
108 | 
109 | ### Added
110 | 
111 | - Support for LL(k) parsing
112 | - Custom error recovery strategies
113 | - Debug mode
114 | - Nested input flattening
115 | 
116 | ### Changed
117 | 
118 | - Radically improved error quality
119 | 


--------------------------------------------------------------------------------
/src/debug.rs:
--------------------------------------------------------------------------------
  1 | //! Utilities for debugging parsers.
  2 | //!
  3 | //! *“He was staring at the instruments with the air of one who is trying to convert Fahrenheit to centigrade in his
  4 | //! head while his house is burning down.”*
  5 | 
  6 | use super::*;
  7 | 
  8 | use alloc::borrow::Cow;
  9 | use core::panic::Location;
 10 | 
 11 | /// Information about a specific parser.
 12 | #[allow(dead_code)]
 13 | pub struct ParserInfo {
 14 |     name: Cow<'static, str>,
 15 |     display: Rc<dyn fmt::Display>,
 16 |     location: Location<'static>,
 17 | }
 18 | 
 19 | impl ParserInfo {
 20 |     pub(crate) fn new(
 21 |         name: impl Into<Cow<'static, str>>,
 22 |         display: Rc<dyn fmt::Display>,
 23 |         location: Location<'static>,
 24 |     ) -> Self {
 25 |         Self {
 26 |             name: name.into(),
 27 |             display,
 28 |             location,
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | /// An event that occurred during parsing.
 34 | pub enum ParseEvent {
 35 |     /// Debugging information was emitted.
 36 |     Info(String),
 37 | }
 38 | 
 39 | /// A trait implemented by parser debuggers.
 40 | #[deprecated(
 41 |     note = "This trait is excluded from the semver guarantees of chumsky. If you decide to use it, broken builds are your fault."
 42 | )]
 43 | pub trait Debugger {
 44 |     /// Create a new debugging scope.
 45 |     fn scope<R, Info: FnOnce() -> ParserInfo, F: FnOnce(&mut Self) -> R>(
 46 |         &mut self,
 47 |         info: Info,
 48 |         f: F,
 49 |     ) -> R;
 50 |     /// Emit a parse event, if the debugger supports them.
 51 |     fn emit_with<F: FnOnce() -> ParseEvent>(&mut self, f: F);
 52 |     /// Invoke the given parser with a mode specific to this debugger.
 53 |     fn invoke<I: Clone, O, P: Parser<I, O> + ?Sized>(
 54 |         &mut self,
 55 |         parser: &P,
 56 |         stream: &mut StreamOf<I, P::Error>,
 57 |     ) -> PResult<I, O, P::Error>;
 58 | }
 59 | 
 60 | /// A verbose debugger that emits debugging messages to the console.
 61 | pub struct Verbose {
 62 |     // TODO: Don't use `Result`, that's silly
 63 |     events: Vec<Result<ParseEvent, (ParserInfo, Self)>>,
 64 | }
 65 | 
 66 | impl Verbose {
 67 |     pub(crate) fn new() -> Self {
 68 |         Self { events: Vec::new() }
 69 |     }
 70 | 
 71 |     #[allow(unused_variables)]
 72 |     fn print_inner(&self, depth: usize) {
 73 |         // a no-op on no_std!
 74 |         #[cfg(feature = "std")]
 75 |         for event in &self.events {
 76 |             for _ in 0..depth * 4 {
 77 |                 print!(" ");
 78 |             }
 79 |             match event {
 80 |                 Ok(ParseEvent::Info(s)) => println!("{}", s),
 81 |                 Err((info, scope)) => {
 82 |                     println!(
 83 |                         "Entered {} at line {} in {}",
 84 |                         info.display,
 85 |                         info.location.line(),
 86 |                         info.location.file()
 87 |                     );
 88 |                     scope.print_inner(depth + 1);
 89 |                 }
 90 |             }
 91 |         }
 92 |     }
 93 | 
 94 |     pub(crate) fn print(&self) {
 95 |         self.print_inner(0)
 96 |     }
 97 | }
 98 | 
 99 | impl Debugger for Verbose {
100 |     fn scope<R, Info: FnOnce() -> ParserInfo, F: FnOnce(&mut Self) -> R>(
101 |         &mut self,
102 |         info: Info,
103 |         f: F,
104 |     ) -> R {
105 |         let mut verbose = Verbose { events: Vec::new() };
106 |         let res = f(&mut verbose);
107 |         self.events.push(Err((info(), verbose)));
108 |         res
109 |     }
110 | 
111 |     fn emit_with<F: FnOnce() -> ParseEvent>(&mut self, f: F) {
112 |         self.events.push(Ok(f()));
113 |     }
114 | 
115 |     fn invoke<I: Clone, O, P: Parser<I, O> + ?Sized>(
116 |         &mut self,
117 |         parser: &P,
118 |         stream: &mut StreamOf<I, P::Error>,
119 |     ) -> PResult<I, O, P::Error> {
120 |         parser.parse_inner_verbose(self, stream)
121 |     }
122 | }
123 | 
124 | /// A silent debugger that emits no debugging messages nor collects any debugging data.
125 | pub struct Silent {
126 |     phantom: PhantomData<()>,
127 | }
128 | 
129 | impl Silent {
130 |     pub(crate) fn new() -> Self {
131 |         Self {
132 |             phantom: PhantomData,
133 |         }
134 |     }
135 | }
136 | 
137 | impl Debugger for Silent {
138 |     fn scope<R, Info: FnOnce() -> ParserInfo, F: FnOnce(&mut Self) -> R>(
139 |         &mut self,
140 |         _: Info,
141 |         f: F,
142 |     ) -> R {
143 |         f(self)
144 |     }
145 |     fn emit_with<F: FnOnce() -> ParseEvent>(&mut self, _: F) {}
146 | 
147 |     fn invoke<I: Clone, O, P: Parser<I, O> + ?Sized>(
148 |         &mut self,
149 |         parser: &P,
150 |         stream: &mut StreamOf<I, P::Error>,
151 |     ) -> PResult<I, O, P::Error> {
152 |         parser.parse_inner_silent(self, stream)
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------
/benches/json.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test, array_methods)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use test::{black_box, Bencher};
  6 | 
  7 | #[derive(Debug, Clone, PartialEq)]
  8 | pub enum Json {
  9 |     Null,
 10 |     Bool(bool),
 11 |     Str(String),
 12 |     Num(f64),
 13 |     Array(Vec<Json>),
 14 |     Object(Vec<(String, Json)>),
 15 | }
 16 | 
 17 | static JSON: &'static [u8] = include_bytes!("sample.json");
 18 | 
 19 | #[bench]
 20 | fn chumsky(b: &mut Bencher) {
 21 |     use ::chumsky::prelude::*;
 22 | 
 23 |     let json = chumsky::json();
 24 |     b.iter(|| black_box(json.parse(JSON).unwrap()));
 25 | }
 26 | 
 27 | #[bench]
 28 | fn pom(b: &mut Bencher) {
 29 |     let json = pom::json();
 30 |     b.iter(|| black_box(json.parse(JSON).unwrap()));
 31 | }
 32 | 
 33 | mod chumsky {
 34 |     use chumsky::{error::Cheap, prelude::*};
 35 | 
 36 |     use super::Json;
 37 |     use std::str;
 38 | 
 39 |     pub fn json() -> impl Parser<u8, Json, Error = Cheap<u8>> {
 40 |         recursive(|value| {
 41 |             let frac = just(b'.').chain(text::digits(10));
 42 | 
 43 |             let exp = one_of(b"eE")
 44 |                 .ignore_then(just(b'+').or(just(b'-')).or_not())
 45 |                 .chain(text::digits(10));
 46 | 
 47 |             let number = just(b'-')
 48 |                 .or_not()
 49 |                 .chain(text::int(10))
 50 |                 .chain(frac.or_not().flatten())
 51 |                 .chain::<u8, _, _>(exp.or_not().flatten())
 52 |                 .map(|bytes| str::from_utf8(&bytes.as_slice()).unwrap().parse().unwrap());
 53 | 
 54 |             let escape = just(b'\\').ignore_then(choice((
 55 |                 just(b'\\'),
 56 |                 just(b'/'),
 57 |                 just(b'"'),
 58 |                 just(b'b').to(b'\x08'),
 59 |                 just(b'f').to(b'\x0C'),
 60 |                 just(b'n').to(b'\n'),
 61 |                 just(b'r').to(b'\r'),
 62 |                 just(b't').to(b'\t'),
 63 |             )));
 64 | 
 65 |             let string = just(b'"')
 66 |                 .ignore_then(filter(|c| *c != b'\\' && *c != b'"').or(escape).repeated())
 67 |                 .then_ignore(just(b'"'))
 68 |                 .map(|bytes| String::from_utf8(bytes).unwrap());
 69 | 
 70 |             let array = value
 71 |                 .clone()
 72 |                 .separated_by(just(b',').padded())
 73 |                 .padded()
 74 |                 .delimited_by(just(b'['), just(b']'))
 75 |                 .map(Json::Array);
 76 | 
 77 |             let member = string.then_ignore(just(b':').padded()).then(value);
 78 |             let object = member
 79 |                 .separated_by(just(b',').padded())
 80 |                 .padded()
 81 |                 .delimited_by(just(b'{'), just(b'}'))
 82 |                 .collect::<Vec<(String, Json)>>()
 83 |                 .map(Json::Object);
 84 | 
 85 |             choice((
 86 |                 just(b"null").to(Json::Null),
 87 |                 just(b"true").to(Json::Bool(true)),
 88 |                 just(b"false").to(Json::Bool(false)),
 89 |                 number.map(Json::Num),
 90 |                 string.map(Json::Str),
 91 |                 array,
 92 |                 object,
 93 |             ))
 94 |             .padded()
 95 |         })
 96 |         .then_ignore(end())
 97 |     }
 98 | }
 99 | 
100 | mod pom {
101 |     use pom::parser::*;
102 |     use pom::Parser;
103 | 
104 |     use super::Json;
105 |     use std::str::{self, FromStr};
106 | 
107 |     fn space() -> Parser<u8, ()> {
108 |         one_of(b" \t\r\n").repeat(0..).discard()
109 |     }
110 | 
111 |     fn number() -> Parser<u8, f64> {
112 |         let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
113 |         let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
114 |         let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
115 |         let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
116 |         number
117 |             .collect()
118 |             .convert(str::from_utf8)
119 |             .convert(|s| f64::from_str(&s))
120 |     }
121 | 
122 |     fn string() -> Parser<u8, String> {
123 |         let special_char = sym(b'\\')
124 |             | sym(b'/')
125 |             | sym(b'"')
126 |             | sym(b'b').map(|_| b'\x08')
127 |             | sym(b'f').map(|_| b'\x0C')
128 |             | sym(b'n').map(|_| b'\n')
129 |             | sym(b'r').map(|_| b'\r')
130 |             | sym(b't').map(|_| b'\t');
131 |         let escape_sequence = sym(b'\\') * special_char;
132 |         let string = sym(b'"') * (none_of(b"\\\"") | escape_sequence).repeat(0..) - sym(b'"');
133 |         string.convert(String::from_utf8)
134 |     }
135 | 
136 |     fn array() -> Parser<u8, Vec<Json>> {
137 |         let elems = list(call(value), sym(b',') * space());
138 |         sym(b'[') * space() * elems - sym(b']')
139 |     }
140 | 
141 |     fn object() -> Parser<u8, Vec<(String, Json)>> {
142 |         let member = string() - space() - sym(b':') - space() + call(value);
143 |         let members = list(member, sym(b',') * space());
144 |         let obj = sym(b'{') * space() * members - sym(b'}');
145 |         obj.map(|members| members.into_iter().collect::<Vec<_>>())
146 |     }
147 | 
148 |     fn value() -> Parser<u8, Json> {
149 |         (seq(b"null").map(|_| Json::Null)
150 |             | seq(b"true").map(|_| Json::Bool(true))
151 |             | seq(b"false").map(|_| Json::Bool(false))
152 |             | number().map(|num| Json::Num(num))
153 |             | string().map(|text| Json::Str(text))
154 |             | array().map(|arr| Json::Array(arr))
155 |             | object().map(|obj| Json::Object(obj)))
156 |             - space()
157 |     }
158 | 
159 |     pub fn json() -> Parser<u8, Json> {
160 |         space() * value() - end()
161 |     }
162 | }
163 | 


--------------------------------------------------------------------------------
/examples/foo.rs:
--------------------------------------------------------------------------------
  1 | /// This is the parser and interpreter for the 'Foo' language. See `tutorial.md` in the repository's root to learn
  2 | /// about it.
  3 | use chumsky::prelude::*;
  4 | 
  5 | #[derive(Debug)]
  6 | enum Expr {
  7 |     Num(f64),
  8 |     Var(String),
  9 | 
 10 |     Neg(Box<Expr>),
 11 |     Add(Box<Expr>, Box<Expr>),
 12 |     Sub(Box<Expr>, Box<Expr>),
 13 |     Mul(Box<Expr>, Box<Expr>),
 14 |     Div(Box<Expr>, Box<Expr>),
 15 | 
 16 |     Call(String, Vec<Expr>),
 17 |     Let {
 18 |         name: String,
 19 |         rhs: Box<Expr>,
 20 |         then: Box<Expr>,
 21 |     },
 22 |     Fn {
 23 |         name: String,
 24 |         args: Vec<String>,
 25 |         body: Box<Expr>,
 26 |         then: Box<Expr>,
 27 |     },
 28 | }
 29 | 
 30 | fn parser() -> impl Parser<char, Expr, Error = Simple<char>> {
 31 |     let ident = text::ident().padded();
 32 | 
 33 |     let expr = recursive(|expr| {
 34 |         let int = text::int(10)
 35 |             .map(|s: String| Expr::Num(s.parse().unwrap()))
 36 |             .padded();
 37 | 
 38 |         let call = ident
 39 |             .then(
 40 |                 expr.clone()
 41 |                     .separated_by(just(','))
 42 |                     .allow_trailing()
 43 |                     .delimited_by(just('('), just(')')),
 44 |             )
 45 |             .map(|(f, args)| Expr::Call(f, args));
 46 | 
 47 |         let atom = int
 48 |             .or(expr.delimited_by(just('('), just(')')))
 49 |             .or(call)
 50 |             .or(ident.map(Expr::Var));
 51 | 
 52 |         let op = |c| just(c).padded();
 53 | 
 54 |         let unary = op('-')
 55 |             .repeated()
 56 |             .then(atom)
 57 |             .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
 58 | 
 59 |         let product = unary
 60 |             .clone()
 61 |             .then(
 62 |                 op('*')
 63 |                     .to(Expr::Mul as fn(_, _) -> _)
 64 |                     .or(op('/').to(Expr::Div as fn(_, _) -> _))
 65 |                     .then(unary)
 66 |                     .repeated(),
 67 |             )
 68 |             .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
 69 | 
 70 |         let sum = product
 71 |             .clone()
 72 |             .then(
 73 |                 op('+')
 74 |                     .to(Expr::Add as fn(_, _) -> _)
 75 |                     .or(op('-').to(Expr::Sub as fn(_, _) -> _))
 76 |                     .then(product)
 77 |                     .repeated(),
 78 |             )
 79 |             .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
 80 | 
 81 |         sum.padded()
 82 |     });
 83 | 
 84 |     let decl = recursive(|decl| {
 85 |         let r#let = text::keyword("let")
 86 |             .ignore_then(ident)
 87 |             .then_ignore(just('='))
 88 |             .then(expr.clone())
 89 |             .then_ignore(just(';'))
 90 |             .then(decl.clone())
 91 |             .map(|((name, rhs), then)| Expr::Let {
 92 |                 name,
 93 |                 rhs: Box::new(rhs),
 94 |                 then: Box::new(then),
 95 |             });
 96 | 
 97 |         let r#fn = text::keyword("fn")
 98 |             .ignore_then(ident)
 99 |             .then(ident.repeated())
100 |             .then_ignore(just('='))
101 |             .then(expr.clone())
102 |             .then_ignore(just(';'))
103 |             .then(decl)
104 |             .map(|(((name, args), body), then)| Expr::Fn {
105 |                 name,
106 |                 args,
107 |                 body: Box::new(body),
108 |                 then: Box::new(then),
109 |             });
110 | 
111 |         r#let.or(r#fn).or(expr).padded()
112 |     });
113 | 
114 |     decl.then_ignore(end())
115 | }
116 | 
117 | fn eval<'a>(
118 |     expr: &'a Expr,
119 |     vars: &mut Vec<(&'a String, f64)>,
120 |     funcs: &mut Vec<(&'a String, &'a [String], &'a Expr)>,
121 | ) -> Result<f64, String> {
122 |     match expr {
123 |         Expr::Num(x) => Ok(*x),
124 |         Expr::Neg(a) => Ok(-eval(a, vars, funcs)?),
125 |         Expr::Add(a, b) => Ok(eval(a, vars, funcs)? + eval(b, vars, funcs)?),
126 |         Expr::Sub(a, b) => Ok(eval(a, vars, funcs)? - eval(b, vars, funcs)?),
127 |         Expr::Mul(a, b) => Ok(eval(a, vars, funcs)? * eval(b, vars, funcs)?),
128 |         Expr::Div(a, b) => Ok(eval(a, vars, funcs)? / eval(b, vars, funcs)?),
129 |         Expr::Var(name) => {
130 |             if let Some((_, val)) = vars.iter().rev().find(|(var, _)| *var == name) {
131 |                 Ok(*val)
132 |             } else {
133 |                 Err(format!("Cannot find variable `{}` in scope", name))
134 |             }
135 |         }
136 |         Expr::Let { name, rhs, then } => {
137 |             let rhs = eval(rhs, vars, funcs)?;
138 |             vars.push((name, rhs));
139 |             let output = eval(then, vars, funcs);
140 |             vars.pop();
141 |             output
142 |         }
143 |         Expr::Call(name, args) => {
144 |             if let Some((_, arg_names, body)) =
145 |                 funcs.iter().rev().find(|(var, _, _)| *var == name).copied()
146 |             {
147 |                 if arg_names.len() == args.len() {
148 |                     let mut args = args
149 |                         .iter()
150 |                         .map(|arg| eval(arg, vars, funcs))
151 |                         .zip(arg_names.iter())
152 |                         .map(|(val, name)| Ok((name, val?)))
153 |                         .collect::<Result<_, String>>()?;
154 |                     vars.append(&mut args);
155 |                     let output = eval(body, vars, funcs);
156 |                     vars.truncate(vars.len() - args.len());
157 |                     output
158 |                 } else {
159 |                     Err(format!(
160 |                         "Wrong number of arguments for function `{}`: expected {}, found {}",
161 |                         name,
162 |                         arg_names.len(),
163 |                         args.len(),
164 |                     ))
165 |                 }
166 |             } else {
167 |                 Err(format!("Cannot find function `{}` in scope", name))
168 |             }
169 |         }
170 |         Expr::Fn {
171 |             name,
172 |             args,
173 |             body,
174 |             then,
175 |         } => {
176 |             funcs.push((name, args, body));
177 |             let output = eval(then, vars, funcs);
178 |             funcs.pop();
179 |             output
180 |         }
181 |     }
182 | }
183 | 
184 | fn main() {
185 |     let src = std::fs::read_to_string(std::env::args().nth(1).unwrap()).unwrap();
186 | 
187 |     match parser().parse(src) {
188 |         Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
189 |             Ok(output) => println!("{}", output),
190 |             Err(eval_err) => println!("Evaluation error: {}", eval_err),
191 |         },
192 |         Err(parse_errs) => parse_errs
193 |             .into_iter()
194 |             .for_each(|e| println!("Parse error: {}", e)),
195 |     }
196 | }
197 | 


--------------------------------------------------------------------------------
/examples/json.rs:
--------------------------------------------------------------------------------
  1 | //! This is a parser for JSON.
  2 | //! Run it with the following command:
  3 | //! cargo run --example json -- examples/sample.json
  4 | 
  5 | use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
  6 | use chumsky::prelude::*;
  7 | use std::{collections::HashMap, env, fs};
  8 | 
  9 | #[derive(Clone, Debug)]
 10 | enum Json {
 11 |     Invalid,
 12 |     Null,
 13 |     Bool(bool),
 14 |     Str(String),
 15 |     Num(f64),
 16 |     Array(Vec<Json>),
 17 |     Object(HashMap<String, Json>),
 18 | }
 19 | 
 20 | fn parser() -> impl Parser<char, Json, Error = Simple<char>> {
 21 |     recursive(|value| {
 22 |         let frac = just('.').chain(text::digits(10));
 23 | 
 24 |         let exp = just('e')
 25 |             .or(just('E'))
 26 |             .chain(just('+').or(just('-')).or_not())
 27 |             .chain(text::digits(10));
 28 | 
 29 |         let number = just('-')
 30 |             .or_not()
 31 |             .chain(text::int(10))
 32 |             .chain(frac.or_not().flatten())
 33 |             .chain::<char, _, _>(exp.or_not().flatten())
 34 |             .collect::<String>()
 35 |             .from_str()
 36 |             .unwrapped()
 37 |             .labelled("number");
 38 | 
 39 |         let escape = just('\\').ignore_then(
 40 |             just('\\')
 41 |                 .or(just('/'))
 42 |                 .or(just('"'))
 43 |                 .or(just('b').to('\x08'))
 44 |                 .or(just('f').to('\x0C'))
 45 |                 .or(just('n').to('\n'))
 46 |                 .or(just('r').to('\r'))
 47 |                 .or(just('t').to('\t'))
 48 |                 .or(just('u').ignore_then(
 49 |                     filter(|c: &char| c.is_digit(16))
 50 |                         .repeated()
 51 |                         .exactly(4)
 52 |                         .collect::<String>()
 53 |                         .validate(|digits, span, emit| {
 54 |                             char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
 55 |                                 .unwrap_or_else(|| {
 56 |                                     emit(Simple::custom(span, "invalid unicode character"));
 57 |                                     '\u{FFFD}' // unicode replacement character
 58 |                                 })
 59 |                         }),
 60 |                 )),
 61 |         );
 62 | 
 63 |         let string = just('"')
 64 |             .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated())
 65 |             .then_ignore(just('"'))
 66 |             .collect::<String>()
 67 |             .labelled("string");
 68 | 
 69 |         let array = value
 70 |             .clone()
 71 |             .chain(just(',').ignore_then(value.clone()).repeated())
 72 |             .or_not()
 73 |             .flatten()
 74 |             .delimited_by(just('['), just(']'))
 75 |             .map(Json::Array)
 76 |             .labelled("array");
 77 | 
 78 |         let member = string.clone().then_ignore(just(':').padded()).then(value);
 79 |         let object = member
 80 |             .clone()
 81 |             .chain(just(',').padded().ignore_then(member).repeated())
 82 |             .or_not()
 83 |             .flatten()
 84 |             .padded()
 85 |             .delimited_by(just('{'), just('}'))
 86 |             .collect::<HashMap<String, Json>>()
 87 |             .map(Json::Object)
 88 |             .labelled("object");
 89 | 
 90 |         just("null")
 91 |             .to(Json::Null)
 92 |             .labelled("null")
 93 |             .or(just("true").to(Json::Bool(true)).labelled("true"))
 94 |             .or(just("false").to(Json::Bool(false)).labelled("false"))
 95 |             .or(number.map(Json::Num))
 96 |             .or(string.map(Json::Str))
 97 |             .or(array)
 98 |             .or(object)
 99 |             .recover_with(nested_delimiters('{', '}', [('[', ']')], |_| Json::Invalid))
100 |             .recover_with(nested_delimiters('[', ']', [('{', '}')], |_| Json::Invalid))
101 |             .recover_with(skip_then_retry_until(['}', ']']))
102 |             .padded()
103 |     })
104 |     .then_ignore(end().recover_with(skip_then_retry_until([])))
105 | }
106 | 
107 | fn main() {
108 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
109 |         .expect("Failed to read file");
110 | 
111 |     let (json, errs) = parser().parse_recovery(src.trim());
112 |     println!("{:#?}", json);
113 |     errs.into_iter().for_each(|e| {
114 |         let msg = if let chumsky::error::SimpleReason::Custom(msg) = e.reason() {
115 |             msg.clone()
116 |         } else {
117 |             format!(
118 |                 "{}{}, expected {}",
119 |                 if e.found().is_some() {
120 |                     "Unexpected token"
121 |                 } else {
122 |                     "Unexpected end of input"
123 |                 },
124 |                 if let Some(label) = e.label() {
125 |                     format!(" while parsing {}", label)
126 |                 } else {
127 |                     String::new()
128 |                 },
129 |                 if e.expected().len() == 0 {
130 |                     "something else".to_string()
131 |                 } else {
132 |                     e.expected()
133 |                         .map(|expected| match expected {
134 |                             Some(expected) => expected.to_string(),
135 |                             None => "end of input".to_string(),
136 |                         })
137 |                         .collect::<Vec<_>>()
138 |                         .join(", ")
139 |                 },
140 |             )
141 |         };
142 | 
143 |         let report = Report::build(ReportKind::Error, (), e.span().start)
144 |             .with_code(3)
145 |             .with_message(msg)
146 |             .with_label(
147 |                 Label::new(e.span())
148 |                     .with_message(match e.reason() {
149 |                         chumsky::error::SimpleReason::Custom(msg) => msg.clone(),
150 |                         _ => format!(
151 |                             "Unexpected {}",
152 |                             e.found()
153 |                                 .map(|c| format!("token {}", c.fg(Color::Red)))
154 |                                 .unwrap_or_else(|| "end of input".to_string())
155 |                         ),
156 |                     })
157 |                     .with_color(Color::Red),
158 |             );
159 | 
160 |         let report = match e.reason() {
161 |             chumsky::error::SimpleReason::Unclosed { span, delimiter } => report.with_label(
162 |                 Label::new(span.clone())
163 |                     .with_message(format!(
164 |                         "Unclosed delimiter {}",
165 |                         delimiter.fg(Color::Yellow)
166 |                     ))
167 |                     .with_color(Color::Yellow),
168 |             ),
169 |             chumsky::error::SimpleReason::Unexpected => report,
170 |             chumsky::error::SimpleReason::Custom(_) => report,
171 |         };
172 | 
173 |         report.finish().print(Source::from(&src)).unwrap();
174 |     });
175 | }
176 | 


--------------------------------------------------------------------------------
/src/recursive.rs:
--------------------------------------------------------------------------------
  1 | //! Recursive parsers (parser that include themselves within their patterns).
  2 | //!
  3 | //! *“It's unpleasantly like being drunk."
  4 | //! "What's so unpleasant about being drunk?"
  5 | //! "You ask a glass of water.”*
  6 | //!
  7 | //! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
  8 | //! definition of parsers more corefully, particularly for mutually-recursive parsers. In such cases, the functions on
  9 | //! [`Recursive`] allow for this.
 10 | 
 11 | use super::*;
 12 | 
 13 | use alloc::rc::{Rc, Weak};
 14 | 
 15 | // TODO: Remove when `OnceCell` is stable
 16 | struct OnceCell<T>(core::cell::RefCell<Option<T>>);
 17 | impl<T> OnceCell<T> {
 18 |     pub fn new() -> Self {
 19 |         Self(core::cell::RefCell::new(None))
 20 |     }
 21 |     pub fn set(&self, x: T) -> Result<(), ()> {
 22 |         *self.0.try_borrow_mut().map_err(|_| ())? = Some(x);
 23 |         Ok(())
 24 |     }
 25 |     pub fn get(&self) -> Option<core::cell::Ref<T>> {
 26 |         Some(core::cell::Ref::map(self.0.borrow(), |x| {
 27 |             x.as_ref().unwrap()
 28 |         }))
 29 |     }
 30 | }
 31 | 
 32 | enum RecursiveInner<T> {
 33 |     Owned(Rc<T>),
 34 |     Unowned(Weak<T>),
 35 | }
 36 | 
 37 | type OnceParser<'a, I, O, E> = OnceCell<Box<dyn Parser<I, O, Error = E> + 'a>>;
 38 | 
 39 | /// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
 40 | /// [definition](Recursive::define).
 41 | ///
 42 | /// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
 43 | pub struct Recursive<'a, I, O, E: Error<I>>(RecursiveInner<OnceParser<'a, I, O, E>>);
 44 | 
 45 | impl<'a, I: Clone, O, E: Error<I>> Recursive<'a, I, O, E> {
 46 |     fn cell(&self) -> Rc<OnceParser<'a, I, O, E>> {
 47 |         match &self.0 {
 48 |             RecursiveInner::Owned(x) => x.clone(),
 49 |             RecursiveInner::Unowned(x) => x
 50 |                 .upgrade()
 51 |                 .expect("Recursive parser used before being defined"),
 52 |         }
 53 |     }
 54 | 
 55 |     /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
 56 |     /// being fulled defined.
 57 |     ///
 58 |     /// Declaring a parser before defining it is required for a parser to reference itself.
 59 |     ///
 60 |     /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
 61 |     /// for parsing (i.e: via the [`Parser::parse`] method or similar).
 62 |     ///
 63 |     /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
 64 |     /// possible.
 65 |     ///
 66 |     /// # Examples
 67 |     ///
 68 |     /// ```
 69 |     /// # use chumsky::prelude::*;
 70 |     /// #[derive(Debug, PartialEq)]
 71 |     /// enum Chain {
 72 |     ///     End,
 73 |     ///     Link(char, Box<Chain>),
 74 |     /// }
 75 |     ///
 76 |     /// // Declare the existence of the parser before defining it so that it can reference itself
 77 |     /// let mut chain = Recursive::<_, _, Simple<char>>::declare();
 78 |     ///
 79 |     /// // Define the parser in terms of itself.
 80 |     /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
 81 |     /// chain.define(just('+')
 82 |     ///     .then(chain.clone())
 83 |     ///     .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
 84 |     ///     .or_not()
 85 |     ///     .map(|chain| chain.unwrap_or(Chain::End)));
 86 |     ///
 87 |     /// assert_eq!(chain.parse(""), Ok(Chain::End));
 88 |     /// assert_eq!(
 89 |     ///     chain.parse("++"),
 90 |     ///     Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
 91 |     /// );
 92 |     /// ```
 93 |     pub fn declare() -> Self {
 94 |         Recursive(RecursiveInner::Owned(Rc::new(OnceCell::new())))
 95 |     }
 96 | 
 97 |     /// Defines the parser after declaring it, allowing it to be used for parsing.
 98 |     pub fn define<P: Parser<I, O, Error = E> + 'a>(&mut self, parser: P) {
 99 |         self.cell()
100 |             .set(Box::new(parser))
101 |             .unwrap_or_else(|_| panic!("Parser defined more than once"));
102 |     }
103 | }
104 | 
105 | impl<'a, I: Clone, O, E: Error<I>> Clone for Recursive<'a, I, O, E> {
106 |     fn clone(&self) -> Self {
107 |         Self(match &self.0 {
108 |             RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
109 |             RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
110 |         })
111 |     }
112 | }
113 | 
114 | impl<'a, I: Clone, O, E: Error<I>> Parser<I, O> for Recursive<'a, I, O, E> {
115 |     type Error = E;
116 | 
117 |     fn parse_inner<D: Debugger>(
118 |         &self,
119 |         debugger: &mut D,
120 |         stream: &mut StreamOf<I, Self::Error>,
121 |     ) -> PResult<I, O, Self::Error> {
122 |         #[cfg(feature = "stacker")]
123 |         #[inline(always)]
124 |         fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
125 |             stacker::maybe_grow(1024 * 1024, 1024 * 1024, f)
126 |         }
127 |         #[cfg(not(feature = "stacker"))]
128 |         #[inline(always)]
129 |         fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
130 |             f()
131 |         }
132 | 
133 |         recurse(|| {
134 |             #[allow(deprecated)]
135 |             debugger.invoke(
136 |                 self.cell()
137 |                     .get()
138 |                     .expect("Recursive parser used before being defined")
139 |                     .as_ref(),
140 |                 stream,
141 |             )
142 |         })
143 |     }
144 | 
145 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
146 |         #[allow(deprecated)]
147 |         self.parse_inner(d, s)
148 |     }
149 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
150 |         #[allow(deprecated)]
151 |         self.parse_inner(d, s)
152 |     }
153 | }
154 | 
155 | /// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
156 | ///
157 | /// The given function must create the parser. The parser must not be used to parse input before this function returns.
158 | ///
159 | /// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
160 | ///
161 | /// The output type of this parser is `O`, the same as the inner parser.
162 | ///
163 | /// # Examples
164 | ///
165 | /// ```
166 | /// # use chumsky::prelude::*;
167 | /// #[derive(Debug, PartialEq)]
168 | /// enum Tree {
169 | ///     Leaf(String),
170 | ///     Branch(Vec<Tree>),
171 | /// }
172 | ///
173 | /// // Parser that recursively parses nested lists
174 | /// let tree = recursive::<_, _, _, _, Simple<char>>(|tree| tree
175 | ///     .separated_by(just(','))
176 | ///     .delimited_by(just('['), just(']'))
177 | ///     .map(Tree::Branch)
178 | ///     .or(text::ident().map(Tree::Leaf))
179 | ///     .padded());
180 | ///
181 | /// assert_eq!(tree.parse("hello"), Ok(Tree::Leaf("hello".to_string())));
182 | /// assert_eq!(tree.parse("[a, b, c]"), Ok(Tree::Branch(vec![
183 | ///     Tree::Leaf("a".to_string()),
184 | ///     Tree::Leaf("b".to_string()),
185 | ///     Tree::Leaf("c".to_string()),
186 | /// ])));
187 | /// // The parser can deal with arbitrarily complex nested lists
188 | /// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]"), Ok(Tree::Branch(vec![
189 | ///     Tree::Branch(vec![
190 | ///         Tree::Leaf("a".to_string()),
191 | ///         Tree::Leaf("b".to_string()),
192 | ///     ]),
193 | ///     Tree::Leaf("c".to_string()),
194 | ///     Tree::Branch(vec![
195 | ///         Tree::Leaf("d".to_string()),
196 | ///         Tree::Branch(vec![
197 | ///             Tree::Leaf("e".to_string()),
198 | ///             Tree::Leaf("f".to_string()),
199 | ///         ]),
200 | ///     ]),
201 | /// ])));
202 | /// ```
203 | pub fn recursive<
204 |     'a,
205 |     I: Clone,
206 |     O,
207 |     P: Parser<I, O, Error = E> + 'a,
208 |     F: FnOnce(Recursive<'a, I, O, E>) -> P,
209 |     E: Error<I>,
210 | >(
211 |     f: F,
212 | ) -> Recursive<'a, I, O, E> {
213 |     let mut parser = Recursive::declare();
214 |     parser.define(f(Recursive(match &parser.0 {
215 |         RecursiveInner::Owned(x) => RecursiveInner::Unowned(Rc::downgrade(x)),
216 |         RecursiveInner::Unowned(_) => unreachable!(),
217 |     })));
218 |     parser
219 | }
220 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Chumsky
  2 | 
  3 | [![crates.io](https://img.shields.io/crates/v/chumsky.svg)](https://crates.io/crates/chumsky)
  4 | [![crates.io](https://docs.rs/chumsky/badge.svg)](https://docs.rs/chumsky)
  5 | [![License](https://img.shields.io/crates/l/chumsky.svg)](https://github.com/zesterer/chumsky)
  6 | [![actions-badge](https://github.com/zesterer/chumsky/workflows/Rust/badge.svg?branch=master)](https://github.com/zesterer/chumsky/actions)
  7 | 
  8 | A parser library for humans with powerful error recovery.
  9 | 
 10 | <a href = "https://www.github.com/zesterer/tao">
 11 |     <img src="https://raw.githubusercontent.com/zesterer/chumsky/master/misc/example.png" alt="Example usage with my own language, Tao"/>
 12 | </a>
 13 | 
 14 | *Note: Error diagnostic rendering is performed by [Ariadne](https://github.com/zesterer/ariadne)*
 15 | 
 16 | ## Features
 17 | 
 18 | - Lots of combinators!
 19 | - Generic across input, output, error, and span types
 20 | - Powerful error recovery strategies
 21 | - Inline mapping to your AST
 22 | - Text-specific parsers for both `u8`s and `char`s
 23 | - Recursive parsers
 24 | - Backtracking is fully supported, allowing the parsing of all known context-free grammars
 25 | - Parsing of nesting inputs, allowing you to move delimiter parsing to the lexical stage (as Rust does!)
 26 | - Built-in parser debugging
 27 | 
 28 | ## Example [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) Parser
 29 | 
 30 | See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/master/examples/brainfuck.rs) for the full
 31 | interpreter (`cargo run --example brainfuck -- examples/sample.bf`).
 32 | 
 33 | ```rust
 34 | use chumsky::prelude::*;
 35 | 
 36 | #[derive(Clone)]
 37 | enum Instr {
 38 |     Left, Right,
 39 |     Incr, Decr,
 40 |     Read, Write,
 41 |     Loop(Vec<Self>),
 42 | }
 43 | 
 44 | fn parser() -> impl Parser<char, Vec<Instr>, Error = Simple<char>> {
 45 |     recursive(|bf| choice((
 46 |         just('<').to(Instr::Left),
 47 |         just('>').to(Instr::Right),
 48 |         just('+').to(Instr::Incr),
 49 |         just('-').to(Instr::Decr),
 50 |         just(',').to(Instr::Read),
 51 |         just('.').to(Instr::Write),
 52 |         bf.delimited_by(just('['), just(']')).map(Instr::Loop),
 53 |     ))
 54 |         .repeated())
 55 | }
 56 | ```
 57 | 
 58 | Other examples include:
 59 | 
 60 | - A [JSON parser](https://github.com/zesterer/chumsky/blob/master/examples/json.rs) (`cargo run --example json --
 61 |   examples/sample.json`)
 62 | - An [interpreter for a simple Rust-y language](https://github.com/zesterer/chumsky/blob/master/examples/nano_rust.rs)
 63 |   (`cargo run --example nano_rust -- examples/sample.nrs`)
 64 | 
 65 | ## Tutorial
 66 | 
 67 | Chumsky has [a tutorial](https://github.com/zesterer/chumsky/blob/master/tutorial.md) that teaches you how to write a
 68 | parser and interpreter for a simple dynamic language with unary and binary operators, operator precedence, functions,
 69 | let declarations, and calls.
 70 | 
 71 | ## *What* is a parser combinator?
 72 | 
 73 | Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting
 74 | parsers use a [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) strategy to transform a stream
 75 | of tokens into an output. Using parser combinators to define parsers is roughly analagous to using Rust's
 76 | [`Iterator`](https://doc.rust-lang.org/std/iter/trait.Iterator.html) trait to define iterative algorithms: the
 77 | type-driven API of `Iterator` makes it more difficult to make mistakes and easier to encode complicated iteration logic
 78 | than if one were to write the same code by hand. The same is true of parser combinators.
 79 | 
 80 | ## *Why* use parser combinators?
 81 | 
 82 | Writing parsers with good error recovery is conceptually difficult and time-consuming. It requires understanding the
 83 | intricacies of the recursive descent algorithm, and then implementing recovery strategies on top of it. If you're
 84 | developing a programming language, you'll almost certainly change your mind about syntax in the process, leading to some
 85 | slow and painful parser refactoring. Parser combinators solve both problems by providing an ergonomic API that allows
 86 | for rapidly iterating upon a syntax.
 87 | 
 88 | Parser combinators are also a great fit for domain-specific languages for which an existing parser does not exist.
 89 | Writing a reliable, fault-tolerant parser for such situations can go from being a multi-day task to a half-hour task
 90 | with the help of a decent parser combinator library.
 91 | 
 92 | ## Classification
 93 | 
 94 | Chumsky's parsers are [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) parsers and are
 95 | capable of parsing [parsing expression grammars (PEGs)](https://en.wikipedia.org/wiki/Parsing_expression_grammar), which
 96 | includes all known context-free languages. It is theoretically possible to extend Chumsky further to accept limited
 97 | context-sensitive grammars too, although this is rarely required.
 98 | 
 99 | ## Error Recovery
100 | 
101 | Chumsky has support for error recovery, meaning that it can encounter a syntax error, report the error, and then
102 | attempt to recover itself into a state in which it can continue parsing so that multiple errors can be produced at once
103 | and a partial [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) can still be generated from the input for future
104 | compilation stages to consume.
105 | 
106 | However, there is no silver bullet strategy for error recovery. By definition, if the input to a parser is invalid then
107 | the parser can only make educated guesses as to the meaning of the input. Different recovery strategies will work better
108 | for different languages, and for different patterns within those languages.
109 | 
110 | Chumsky provides a variety of recovery strategies (each implementing the `Strategy` trait), but it's important to
111 | understand that which you apply, where you apply them, and in what order will greatly affect the quality of the errors
112 | that Chumsky is able to produce, along with the extent to which it is able to recover a useful AST. Where possible, you
113 | should attempt more 'specific' recovery strategies first rather than those that mindlessly skip large swathes of the
114 | input.
115 | 
116 | It is recommended that you experiment with applying different strategies in different situations and at different levels
117 | of the parser to find a configuration that you are happy with. If none of the provided error recovery strategies cover
118 | the specific pattern you wish to catch, you can even create your own by digging into Chumsky's internals and
119 | implementing your own strategies! If you come up with a useful strategy, feel free to open a PR against the
120 | [main repository](https://github.com/zesterer/chumsky/)!
121 | 
122 | ## Performance
123 | 
124 | Chumsky focuses on high-quality errors and ergonomics over performance. That said, it's important that Chumsky can keep
125 | up with the rest of your compiler! Unfortunately, it's *extremely* difficult to come up with sensible benchmarks given
126 | that exactly how Chumsky performs depends entirely on what you are parsing, how you structure your parser, which
127 | patterns the parser attempts to match first, how complex your error type is, what is involved in constructing your AST,
128 | etc. All that said, here are some numbers from the
129 | [JSON benchmark](https://github.com/zesterer/chumsky/blob/master/benches/json.rs) included in the repository running on
130 | my Ryzen 7 3700x.
131 | 
132 | ```ignore
133 | test chumsky ... bench:   4,782,390 ns/iter (+/- 997,208)
134 | test pom     ... bench:  12,793,490 ns/iter (+/- 1,954,583)
135 | ```
136 | 
137 | I've included results from [`pom`](https://github.com/J-F-Liu/pom), another parser combinator crate with a similar
138 | design, as a point of reference. The sample file being parsed is broadly represenative of typical JSON data and has
139 | 3,018 lines. This translates to a little over 630,000 lines of JSON per second.
140 | 
141 | Clearly, this is a little slower than a well-optimised hand-written parser: but that's okay! Chumsky's goal is to be
142 | *fast enough*. If you've written enough code in your language that parsing performance even starts to be a problem,
143 | you've already committed enough time and resources to your language that hand-writing a parser is the best choice going!
144 | 
145 | ## Planned Features
146 | 
147 | - An optimised 'happy path' parser mode that skips error recovery & error generation
148 | - An even faster 'validation' parser mode, guaranteed to not allocate, that doesn't generate outputs but just verifies
149 |   the validity of an input
150 | 
151 | ## Philosophy
152 | 
153 | Chumsky should:
154 | 
155 | - Be easy to use, even if you doesn't understand exactly what the parser is doing under the hood
156 | - Be type-driven, pushing users away from anti-patterns at compile-time
157 | - Be a mature, 'batteries-included' solution for context-free parsing by default. If you need to implement either
158 |   `Parser` or `Strategy` by hand, that's a problem that needs fixing
159 | - Be 'fast enough', but no faster (i.e: when there is a tradeoff between error quality and performance, Chumsky will
160 |   always take the former option)
161 | - Be modular and extensible, allowing users to implement their own parsers, recovery strategies, error types, spans, and
162 |   be generic over both input tokens and the output AST
163 | 
164 | ## Notes
165 | 
166 | My apologies to Noam for choosing such an absurd name.
167 | 
168 | ## License
169 | 
170 | Chumsky is licensed under the MIT license (see `LICENSE` in the main repository).
171 | 


--------------------------------------------------------------------------------
/src/text.rs:
--------------------------------------------------------------------------------
  1 | //! Text-specific parsers and utilities.
  2 | //!
  3 | //! *“Ford!" he said, "there's an infinite number of monkeys outside who want to talk to us about this script for
  4 | //! Hamlet they've worked out.”*
  5 | //!
  6 | //! The parsers in this module are generic over both Unicode ([`char`]) and ASCII ([`u8`]) characters. Most parsers take
  7 | //! a type parameter, `C`, that can be either [`u8`] or [`char`] in order to handle either case.
  8 | //!
  9 | //! The [`TextParser`] trait is an extension on top of the main [`Parser`] trait that adds combinators unique to the
 10 | //! parsing of text.
 11 | 
 12 | use super::*;
 13 | use core::iter::FromIterator;
 14 | 
 15 | /// The type of a parser that accepts (and ignores) any number of whitespace characters.
 16 | pub type Padding<I, E> = Custom<fn(&mut StreamOf<I, E>) -> PResult<I, (), E>, E>;
 17 | 
 18 | /// The type of a parser that accepts (and ignores) any number of whitespace characters before or after another
 19 | /// pattern.
 20 | // pub type Padded<P, I, O> = ThenIgnore<
 21 | //     IgnoreThen<Padding<I, <P as Parser<I, O>>::Error>, P, (), O>,
 22 | //     Padding<I, <P as Parser<I, O>>::Error>,
 23 | //     O,
 24 | //     (),
 25 | // >;
 26 | 
 27 | /// A parser that accepts (and ignores) any number of whitespace characters before or after another pattern.
 28 | #[derive(Copy, Clone)]
 29 | pub struct Padded<A>(A);
 30 | 
 31 | impl<C: Character, O, A: Parser<C, O, Error = E>, E: Error<C>> Parser<C, O> for Padded<A> {
 32 |     type Error = E;
 33 | 
 34 |     #[inline]
 35 |     fn parse_inner<D: Debugger>(
 36 |         &self,
 37 |         debugger: &mut D,
 38 |         stream: &mut StreamOf<C, E>,
 39 |     ) -> PResult<C, O, E> {
 40 |         while stream.skip_if(|c| c.is_whitespace()) {}
 41 |         match self.0.parse_inner(debugger, stream) {
 42 |             (a_errors, Ok((a_out, a_alt))) => {
 43 |                 while stream.skip_if(|c| c.is_whitespace()) {}
 44 |                 (a_errors, Ok((a_out, a_alt)))
 45 |             }
 46 |             (a_errors, Err(err)) => (a_errors, Err(err)),
 47 |         }
 48 |     }
 49 | 
 50 |     #[inline]
 51 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<C, E>) -> PResult<C, O, E> {
 52 |         #[allow(deprecated)]
 53 |         self.parse_inner(d, s)
 54 |     }
 55 |     #[inline]
 56 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<C, E>) -> PResult<C, O, E> {
 57 |         #[allow(deprecated)]
 58 |         self.parse_inner(d, s)
 59 |     }
 60 | }
 61 | 
 62 | mod private {
 63 |     pub trait Sealed {}
 64 | 
 65 |     impl Sealed for u8 {}
 66 |     impl Sealed for char {}
 67 | }
 68 | 
 69 | /// A trait implemented by textual character types (currently, [`u8`] and [`char`]).
 70 | ///
 71 | /// Avoid implementing this trait yourself if you can: it's *very* likely to be expanded in future versions!
 72 | pub trait Character: private::Sealed + Copy + PartialEq {
 73 |     /// The default unsized [`str`]-like type of a linear sequence of this character.
 74 |     ///
 75 |     /// For [`char`], this is [`str`]. For [`u8`], this is [`[u8]`].
 76 |     type Str: ?Sized + PartialEq;
 77 | 
 78 |     /// The default type that this character collects into.
 79 |     ///
 80 |     /// For [`char`], this is [`String`]. For [`u8`], this is [`Vec<u8>`].
 81 |     type Collection: Chain<Self> + FromIterator<Self> + AsRef<Self::Str> + 'static;
 82 | 
 83 |     /// Returns true if the character is canonically considered to be whitespace.
 84 |     fn is_whitespace(&self) -> bool;
 85 | 
 86 |     /// Return the '0' digit of the character.
 87 |     fn digit_zero() -> Self;
 88 | 
 89 |     /// Returns true if the character is canonically considered to be a numeric digit.
 90 |     fn is_digit(&self, radix: u32) -> bool;
 91 | 
 92 |     /// Returns this character as a [`char`].
 93 |     fn to_char(&self) -> char;
 94 | }
 95 | 
 96 | impl Character for u8 {
 97 |     type Str = [u8];
 98 |     type Collection = Vec<u8>;
 99 | 
100 |     fn is_whitespace(&self) -> bool {
101 |         self.is_ascii_whitespace()
102 |     }
103 |     fn digit_zero() -> Self {
104 |         b'0'
105 |     }
106 |     fn is_digit(&self, radix: u32) -> bool {
107 |         (*self as char).is_digit(radix)
108 |     }
109 |     fn to_char(&self) -> char {
110 |         *self as char
111 |     }
112 | }
113 | 
114 | impl Character for char {
115 |     type Str = str;
116 |     type Collection = String;
117 | 
118 |     fn is_whitespace(&self) -> bool {
119 |         char::is_whitespace(*self)
120 |     }
121 |     fn digit_zero() -> Self {
122 |         '0'
123 |     }
124 |     fn is_digit(&self, radix: u32) -> bool {
125 |         char::is_digit(*self, radix)
126 |     }
127 |     fn to_char(&self) -> char {
128 |         *self
129 |     }
130 | }
131 | 
132 | /// A trait containing text-specific functionality that extends the [`Parser`] trait.
133 | pub trait TextParser<I: Character, O>: Parser<I, O> {
134 |     /// Parse a pattern, ignoring any amount of whitespace both before and after the pattern.
135 |     ///
136 |     /// The output type of this parser is `O`, the same as the original parser.
137 |     ///
138 |     /// # Examples
139 |     ///
140 |     /// ```
141 |     /// # use chumsky::prelude::*;
142 |     /// let ident = text::ident::<_, Simple<char>>().padded();
143 |     ///
144 |     /// // A pattern with no whitespace surrounding it is accepted
145 |     /// assert_eq!(ident.parse("hello"), Ok("hello".to_string()));
146 |     /// // A pattern with arbitrary whitespace surrounding it is also accepted
147 |     /// assert_eq!(ident.parse(" \t \n  \t   world  \t  "), Ok("world".to_string()));
148 |     /// ```
149 |     fn padded(self) -> Padded<Self>
150 |     where
151 |         Self: Sized,
152 |     {
153 |         Padded(self)
154 |         // whitespace().ignore_then(self).then_ignore(whitespace())
155 |     }
156 | }
157 | 
158 | impl<I: Character, O, P: Parser<I, O>> TextParser<I, O> for P {}
159 | 
160 | /// A parser that accepts (and ignores) any number of whitespace characters.
161 | ///
162 | /// The output type of this parser is `()`.
163 | ///
164 | /// # Examples
165 | ///
166 | /// ```
167 | /// # use chumsky::prelude::*;
168 | /// let whitespace = text::whitespace::<_, Simple<char>>();
169 | ///
170 | /// // Any amount of whitespace is parsed...
171 | /// assert_eq!(whitespace.parse("   \t \n\n  \t  "), Ok(()));
172 | /// // ...including none at all!
173 | /// assert_eq!(whitespace.parse(""), Ok(()));
174 | /// ```
175 | pub fn whitespace<C: Character, E: Error<C>>() -> Padding<C, E> {
176 |     custom(|stream: &mut StreamOf<C, E>| loop {
177 |         let state = stream.save();
178 |         if stream.next().2.map_or(true, |b| !b.is_whitespace()) {
179 |             stream.revert(state);
180 |             break (Vec::new(), Ok(((), None)));
181 |         }
182 |     })
183 | }
184 | 
185 | /// A parser that accepts (and ignores) any newline characters or character sequences.
186 | ///
187 | /// The output type of this parser is `()`.
188 | ///
189 | /// This parser is quite extensive, recognising:
190 | ///
191 | /// - Line feed (`\n`)
192 | /// - Carriage return (`\r`)
193 | /// - Carriage return + line feed (`\r\n`)
194 | /// - Vertical tab (`\x0B`)
195 | /// - Form feed (`\x0C`)
196 | /// - Next line (`\u{0085}`)
197 | /// - Line separator (`\u{2028}`)
198 | /// - Paragraph separator (`\u{2029}`)
199 | ///
200 | /// # Examples
201 | ///
202 | /// ```
203 | /// # use chumsky::prelude::*;
204 | /// let newline = text::newline::<Simple<char>>()
205 | ///     .then_ignore(end());
206 | ///
207 | /// assert_eq!(newline.parse("\n"), Ok(()));
208 | /// assert_eq!(newline.parse("\r"), Ok(()));
209 | /// assert_eq!(newline.parse("\r\n"), Ok(()));
210 | /// assert_eq!(newline.parse("\x0B"), Ok(()));
211 | /// assert_eq!(newline.parse("\x0C"), Ok(()));
212 | /// assert_eq!(newline.parse("\u{0085}"), Ok(()));
213 | /// assert_eq!(newline.parse("\u{2028}"), Ok(()));
214 | /// assert_eq!(newline.parse("\u{2029}"), Ok(()));
215 | /// ```
216 | pub fn newline<E: Error<char>>() -> impl Parser<char, (), Error = E> + Copy + Clone {
217 |     just('\r')
218 |         .or_not()
219 |         .ignore_then(just('\n'))
220 |         .or(just('\r')) // Carriage return
221 |         .or(just('\x0B')) // Vertical tab
222 |         .or(just('\x0C')) // Form feed
223 |         .or(just('\u{0085}')) // Next line
224 |         .or(just('\u{2028}')) // Line separator
225 |         .or(just('\u{2029}')) // Paragraph separator
226 |         .ignored()
227 | }
228 | 
229 | /// A parser that accepts one or more ASCII digits.
230 | ///
231 | /// The output type of this parser is [`Character::Collection`] (i.e: [`String`] when `C` is [`char`], and [`Vec<u8>`]
232 | /// when `C` is [`u8`]).
233 | ///
234 | /// The `radix` parameter functions identically to [`char::is_digit`]. If in doubt, choose `10`.
235 | ///
236 | /// # Examples
237 | ///
238 | /// ```
239 | /// # use chumsky::prelude::*;
240 | /// let digits = text::digits::<_, Simple<char>>(10);
241 | ///
242 | /// assert_eq!(digits.parse("0"), Ok("0".to_string()));
243 | /// assert_eq!(digits.parse("1"), Ok("1".to_string()));
244 | /// assert_eq!(digits.parse("01234"), Ok("01234".to_string()));
245 | /// assert_eq!(digits.parse("98345"), Ok("98345".to_string()));
246 | /// // A string of zeroes is still valid. Use `int` if this is not desirable.
247 | /// assert_eq!(digits.parse("0000"), Ok("0000".to_string()));
248 | /// assert!(digits.parse("").is_err());
249 | /// ```
250 | pub fn digits<C: Character, E: Error<C>>(
251 |     radix: u32,
252 | ) -> impl Parser<C, C::Collection, Error = E> + Copy + Clone {
253 |     filter(move |c: &C| c.is_digit(radix))
254 |         .repeated()
255 |         .at_least(1)
256 |         .collect()
257 | }
258 | 
259 | /// A parser that accepts a positive integer.
260 | ///
261 | /// An integer is defined as a non-empty sequence of ASCII digits, where the first digit is non-zero or the sequence
262 | /// has length one.
263 | ///
264 | /// The output type of this parser is [`Character::Collection`] (i.e: [`String`] when `C` is [`char`], and [`Vec<u8>`]
265 | /// when `C` is [`u8`]).
266 | ///
267 | /// The `radix` parameter functions identically to [`char::is_digit`]. If in doubt, choose `10`.
268 | ///
269 | /// # Examples
270 | ///
271 | /// ```
272 | /// # use chumsky::prelude::*;
273 | /// let dec = text::int::<_, Simple<char>>(10)
274 | ///     .then_ignore(end());
275 | ///
276 | /// assert_eq!(dec.parse("0"), Ok("0".to_string()));
277 | /// assert_eq!(dec.parse("1"), Ok("1".to_string()));
278 | /// assert_eq!(dec.parse("1452"), Ok("1452".to_string()));
279 | /// // No leading zeroes are permitted!
280 | /// assert!(dec.parse("04").is_err());
281 | ///
282 | /// let hex = text::int::<_, Simple<char>>(16)
283 | ///     .then_ignore(end());
284 | ///
285 | /// assert_eq!(hex.parse("2A"), Ok("2A".to_string()));
286 | /// assert_eq!(hex.parse("d"), Ok("d".to_string()));
287 | /// assert_eq!(hex.parse("b4"), Ok("b4".to_string()));
288 | /// assert!(hex.parse("0B").is_err());
289 | /// ```
290 | pub fn int<C: Character, E: Error<C>>(
291 |     radix: u32,
292 | ) -> impl Parser<C, C::Collection, Error = E> + Copy + Clone {
293 |     filter(move |c: &C| c.is_digit(radix) && c != &C::digit_zero())
294 |         .map(Some)
295 |         .chain::<C, Vec<_>, _>(filter(move |c: &C| c.is_digit(radix)).repeated())
296 |         .collect()
297 |         .or(just(C::digit_zero()).map(|c| core::iter::once(c).collect()))
298 | }
299 | 
300 | /// A parser that accepts a C-style identifier.
301 | ///
302 | /// The output type of this parser is [`Character::Collection`] (i.e: [`String`] when `C` is [`char`], and [`Vec<u8>`]
303 | /// when `C` is [`u8`]).
304 | ///
305 | /// An identifier is defined as an ASCII alphabetic character or an underscore followed by any number of alphanumeric
306 | /// characters or underscores. The regex pattern for it is `[a-zA-Z_][a-zA-Z0-9_]*`.
307 | pub fn ident<C: Character, E: Error<C>>() -> impl Parser<C, C::Collection, Error = E> + Copy + Clone
308 | {
309 |     filter(|c: &C| c.to_char().is_ascii_alphabetic() || c.to_char() == '_')
310 |         .map(Some)
311 |         .chain::<C, Vec<_>, _>(
312 |             filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_').repeated(),
313 |         )
314 |         .collect()
315 | }
316 | 
317 | /// Like [`ident`], but only accepts an exact identifier while ignoring trailing identifier characters.
318 | ///
319 | /// The output type of this parser is `()`.
320 | ///
321 | /// # Examples
322 | ///
323 | /// ```
324 | /// # use chumsky::prelude::*;
325 | /// let def = text::keyword::<_, _, Simple<char>>("def");
326 | ///
327 | /// // Exactly 'def' was found
328 | /// assert_eq!(def.parse("def"), Ok(()));
329 | /// // Exactly 'def' was found, with non-identifier trailing characters
330 | /// assert_eq!(def.parse("def(foo, bar)"), Ok(()));
331 | /// // 'def' was found, but only as part of a larger identifier, so this fails to parse
332 | /// assert!(def.parse("define").is_err());
333 | /// ```
334 | pub fn keyword<'a, C: Character + 'a, S: AsRef<C::Str> + 'a + Clone, E: Error<C> + 'a>(
335 |     keyword: S,
336 | ) -> impl Parser<C, (), Error = E> + Clone + 'a {
337 |     // TODO: use .filter(...), improve error messages
338 |     ident().try_map(move |s: C::Collection, span| {
339 |         if s.as_ref() == keyword.as_ref() {
340 |             Ok(())
341 |         } else {
342 |             Err(E::expected_input_found(span, None, None))
343 |         }
344 |     })
345 | }
346 | 


--------------------------------------------------------------------------------
/src/stream.rs:
--------------------------------------------------------------------------------
  1 | //! Token streams and tools converting to and from them..
  2 | //!
  3 | //! *“What’s up?” “I don’t know,” said Marvin, “I’ve never been there.”*
  4 | //!
  5 | //! [`Stream`] is the primary type used to feed input data into a chumsky parser. You can create them in a number of
  6 | //! ways: from strings, iterators, arrays, etc.
  7 | 
  8 | use super::*;
  9 | use alloc::vec;
 10 | 
 11 | trait StreamExtend<T>: Iterator<Item = T> {
 12 |     /// Extend the vector with input. The actual amount can be more or less than `n`, but must be at least 1 (0 implies
 13 |     /// that the stream has been exhausted.
 14 |     fn extend(&mut self, v: &mut Vec<T>, n: usize);
 15 | }
 16 | 
 17 | #[allow(deprecated)]
 18 | impl<I: Iterator> StreamExtend<I::Item> for I {
 19 |     fn extend(&mut self, v: &mut Vec<I::Item>, n: usize) {
 20 |         v.reserve(n);
 21 |         v.extend(self.take(n));
 22 |     }
 23 | }
 24 | 
 25 | /// A utility type used to flatten input trees. See [`Stream::from_nested`].
 26 | pub enum Flat<I, Iter> {
 27 |     /// The input tree flattens into a single input.
 28 |     Single(I),
 29 |     /// The input tree flattens into many sub-trees.
 30 |     Many(Iter),
 31 | }
 32 | 
 33 | /// A type that represents a stream of input tokens. Unlike [`Iterator`], this type supports backtracking and a few
 34 | /// other features required by the crate.
 35 | #[allow(deprecated)]
 36 | pub struct Stream<
 37 |     'a,
 38 |     I,
 39 |     S: Span,
 40 |     Iter: Iterator<Item = (I, S)> + ?Sized = dyn Iterator<Item = (I, S)> + 'a,
 41 | > {
 42 |     pub(crate) phantom: PhantomData<&'a ()>,
 43 |     pub(crate) eoi: S,
 44 |     pub(crate) offset: usize,
 45 |     pub(crate) buffer: Vec<(I, S)>,
 46 |     pub(crate) iter: Iter,
 47 | }
 48 | 
 49 | /// A [`Stream`] that pulls tokens from a boxed [`Iterator`].
 50 | pub type BoxStream<'a, I, S> = Stream<'a, I, S, Box<dyn Iterator<Item = (I, S)> + 'a>>;
 51 | 
 52 | impl<'a, I, S: Span, Iter: Iterator<Item = (I, S)>> Stream<'a, I, S, Iter> {
 53 |     /// Create a new stream from an iterator of `(Token, Span)` pairs. A span representing the end of input must also
 54 |     /// be provided.
 55 |     ///
 56 |     /// There is no requirement that spans must map exactly to the position of inputs in the stream, but they should
 57 |     /// be non-overlapping and should appear in a monotonically-increasing order.
 58 |     pub fn from_iter(eoi: S, iter: Iter) -> Self {
 59 |         Self {
 60 |             phantom: PhantomData,
 61 |             eoi,
 62 |             offset: 0,
 63 |             buffer: Vec::new(),
 64 |             iter,
 65 |         }
 66 |     }
 67 | 
 68 |     /// Eagerly evaluate the token stream, returning an iterator over the tokens in it (but without modifying the
 69 |     /// stream's state so that it can still be used for parsing).
 70 |     ///
 71 |     /// This is most useful when you wish to check the input of a parser during debugging.
 72 |     pub fn fetch_tokens(&mut self) -> impl Iterator<Item = (I, S)> + '_
 73 |     where
 74 |         (I, S): Clone,
 75 |     {
 76 |         self.buffer.extend(&mut self.iter);
 77 |         self.buffer.iter().cloned()
 78 |     }
 79 | }
 80 | 
 81 | impl<'a, I: Clone, S: Span + 'a> BoxStream<'a, I, S> {
 82 |     /// Create a new `Stream` from an iterator of nested tokens and a function that flattens them.
 83 |     ///
 84 |     /// It's not uncommon for compilers to perform delimiter parsing during the lexing stage (Rust does this!). When
 85 |     /// this is done, the output of the lexing stage is usually a series of nested token trees. This functions allows
 86 |     /// you to easily flatten such token trees into a linear token stream so that they can be parsed (Chumsky currently
 87 |     /// only support parsing linear streams of inputs).
 88 |     ///
 89 |     /// For reference, [here](https://docs.rs/syn/0.11.1/syn/enum.TokenTree.html) is `syn`'s `TokenTree` type that it
 90 |     /// uses when parsing Rust syntax.
 91 |     ///
 92 |     /// # Examples
 93 |     ///
 94 |     /// ```
 95 |     /// # use chumsky::{Stream, BoxStream, Flat};
 96 |     /// type Span = std::ops::Range<usize>;
 97 |     ///
 98 |     /// fn span_at(at: usize) -> Span { at..at + 1 }
 99 |     ///
100 |     /// #[derive(Clone)]
101 |     /// enum Token {
102 |     ///     Local(String),
103 |     ///     Int(i64),
104 |     ///     Bool(bool),
105 |     ///     Add,
106 |     ///     Sub,
107 |     ///     OpenParen,
108 |     ///     CloseParen,
109 |     ///     OpenBrace,
110 |     ///     CloseBrace,
111 |     ///     // etc.
112 |     /// }
113 |     ///
114 |     /// enum Delimiter {
115 |     ///     Paren, // ( ... )
116 |     ///     Brace, // { ... }
117 |     /// }
118 |     ///
119 |     /// // The structure of this token tree is very similar to that which Rust uses.
120 |     /// // See: https://docs.rs/syn/0.11.1/syn/enum.TokenTree.html
121 |     /// enum TokenTree {
122 |     ///     Token(Token),
123 |     ///     Tree(Delimiter, Vec<(TokenTree, Span)>),
124 |     /// }
125 |     ///
126 |     /// // A function that turns a series of nested token trees into a linear stream that can be used for parsing.
127 |     /// fn flatten_tts(eoi: Span, token_trees: Vec<(TokenTree, Span)>) -> BoxStream<'static, Token, Span> {
128 |     ///     use std::iter::once;
129 |     ///     // Currently, this is quite an explicit process: it will likely become easier in future versions of Chumsky.
130 |     ///     Stream::from_nested(
131 |     ///         eoi,
132 |     ///         token_trees.into_iter(),
133 |     ///         |(tt, span)| match tt {
134 |     ///             // For token trees that contain just a single token, no flattening needs to occur!
135 |     ///             TokenTree::Token(token) => Flat::Single((token, span)),
136 |     ///             // Flatten a parenthesised token tree into an iterator of the inner token trees, surrounded by parenthesis tokens
137 |     ///             TokenTree::Tree(Delimiter::Paren, tree) => Flat::Many(once((TokenTree::Token(Token::OpenParen), span_at(span.start)))
138 |     ///                 .chain(tree.into_iter())
139 |     ///                 .chain(once((TokenTree::Token(Token::CloseParen), span_at(span.end - 1))))),
140 |     ///             // Flatten a braced token tree into an iterator of the inner token trees, surrounded by brace tokens
141 |     ///             TokenTree::Tree(Delimiter::Brace, tree) => Flat::Many(once((TokenTree::Token(Token::OpenBrace), span_at(span.start)))
142 |     ///                 .chain(tree.into_iter())
143 |     ///                 .chain(once((TokenTree::Token(Token::CloseBrace), span_at(span.end - 1))))),
144 |     ///         }
145 |     ///     )
146 |     /// }
147 |     /// ```
148 |     pub fn from_nested<
149 |         P: 'a,
150 |         Iter: Iterator<Item = (P, S)>,
151 |         Many: Iterator<Item = (P, S)>,
152 |         F: FnMut((P, S)) -> Flat<(I, S), Many> + 'a,
153 |     >(
154 |         eoi: S,
155 |         iter: Iter,
156 |         mut flatten: F,
157 |     ) -> Self {
158 |         let mut v: Vec<alloc::collections::VecDeque<(P, S)>> = vec![iter.collect()];
159 |         Self::from_iter(
160 |             eoi,
161 |             Box::new(core::iter::from_fn(move || loop {
162 |                 if let Some(many) = v.last_mut() {
163 |                     match many.pop_front().map(&mut flatten) {
164 |                         Some(Flat::Single(input)) => break Some(input),
165 |                         Some(Flat::Many(many)) => v.push(many.collect()),
166 |                         None => {
167 |                             v.pop();
168 |                         }
169 |                     }
170 |                 } else {
171 |                     break None;
172 |                 }
173 |             })),
174 |         )
175 |     }
176 | }
177 | 
178 | impl<'a, I: Clone, S: Span> Stream<'a, I, S> {
179 |     pub(crate) fn offset(&self) -> usize {
180 |         self.offset
181 |     }
182 | 
183 |     pub(crate) fn save(&self) -> usize {
184 |         self.offset
185 |     }
186 |     pub(crate) fn revert(&mut self, offset: usize) {
187 |         self.offset = offset;
188 |     }
189 | 
190 |     fn pull_until(&mut self, offset: usize) -> Option<&(I, S)> {
191 |         let additional = offset.saturating_sub(self.buffer.len()) + 1024;
192 |         #[allow(deprecated)]
193 |         (&mut &mut self.iter as &mut dyn StreamExtend<_>).extend(&mut self.buffer, additional);
194 |         self.buffer.get(offset)
195 |     }
196 | 
197 |     pub(crate) fn skip_if(&mut self, f: impl FnOnce(&I) -> bool) -> bool {
198 |         match self.pull_until(self.offset).cloned() {
199 |             Some((out, _)) if f(&out) => {
200 |                 self.offset += 1;
201 |                 true
202 |             }
203 |             Some(_) => false,
204 |             None => false,
205 |         }
206 |     }
207 | 
208 |     pub(crate) fn next(&mut self) -> (usize, S, Option<I>) {
209 |         match self.pull_until(self.offset).cloned() {
210 |             Some((out, span)) => {
211 |                 self.offset += 1;
212 |                 (self.offset - 1, span, Some(out))
213 |             }
214 |             None => (self.offset, self.eoi.clone(), None),
215 |         }
216 |     }
217 | 
218 |     pub(crate) fn span_since(&mut self, start_offset: usize) -> S {
219 |         debug_assert!(
220 |             start_offset <= self.offset,
221 |             "{} > {}",
222 |             self.offset,
223 |             start_offset
224 |         );
225 |         let start = self
226 |             .pull_until(start_offset)
227 |             .as_ref()
228 |             .map(|(_, s)| s.start())
229 |             .unwrap_or_else(|| self.eoi.start());
230 |         let end = self
231 |             .pull_until(self.offset.saturating_sub(1).max(start_offset))
232 |             .as_ref()
233 |             .map(|(_, s)| s.end())
234 |             .unwrap_or_else(|| self.eoi.end());
235 |         S::new(self.eoi.context(), start..end)
236 |     }
237 | 
238 |     pub(crate) fn attempt<R, F: FnOnce(&mut Self) -> (bool, R)>(&mut self, f: F) -> R {
239 |         let old_offset = self.offset;
240 |         let (commit, out) = f(self);
241 |         if !commit {
242 |             self.offset = old_offset;
243 |         }
244 |         out
245 |     }
246 | 
247 |     pub(crate) fn try_parse<O, E, F: FnOnce(&mut Self) -> PResult<I, O, E>>(
248 |         &mut self,
249 |         f: F,
250 |     ) -> PResult<I, O, E> {
251 |         self.attempt(move |stream| {
252 |             let out = f(stream);
253 |             (out.1.is_ok(), out)
254 |         })
255 |     }
256 | }
257 | 
258 | impl<'a> From<&'a str>
259 |     for Stream<'a, char, Range<usize>, Box<dyn Iterator<Item = (char, Range<usize>)> + 'a>>
260 | {
261 |     /// Please note that Chumsky currently uses character indices and not byte offsets in this impl. This is likely to
262 |     /// change in the future. If you wish to use byte offsets, you can do so with [`Stream::from_iter`].
263 |     fn from(s: &'a str) -> Self {
264 |         let len = s.chars().count();
265 |         Self::from_iter(
266 |             len..len,
267 |             Box::new(s.chars().enumerate().map(|(i, c)| (c, i..i + 1))),
268 |         )
269 |     }
270 | }
271 | 
272 | impl<'a> From<String>
273 |     for Stream<'a, char, Range<usize>, Box<dyn Iterator<Item = (char, Range<usize>)>>>
274 | {
275 |     /// Please note that Chumsky currently uses character indices and not byte offsets in this impl. This is likely to
276 |     /// change in the future. If you wish to use byte offsets, you can do so with [`Stream::from_iter`].
277 |     fn from(s: String) -> Self {
278 |         let chars = s.chars().collect::<Vec<_>>();
279 |         Self::from_iter(
280 |             chars.len()..chars.len(),
281 |             Box::new(chars.into_iter().enumerate().map(|(i, c)| (c, i..i + 1))),
282 |         )
283 |     }
284 | }
285 | 
286 | impl<'a, T: Clone> From<&'a [T]>
287 |     for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
288 | {
289 |     fn from(s: &'a [T]) -> Self {
290 |         let len = s.len();
291 |         Self::from_iter(
292 |             len..len,
293 |             Box::new(s.iter().cloned().enumerate().map(|(i, x)| (x, i..i + 1))),
294 |         )
295 |     }
296 | }
297 | 
298 | impl<'a, T: Clone + 'a> From<Vec<T>>
299 |     for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
300 | {
301 |     fn from(s: Vec<T>) -> Self {
302 |         let len = s.len();
303 |         Self::from_iter(
304 |             len..len,
305 |             Box::new(s.into_iter().enumerate().map(|(i, x)| (x, i..i + 1))),
306 |         )
307 |     }
308 | }
309 | 
310 | impl<'a, T: Clone + 'a, const N: usize> From<[T; N]>
311 |     for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
312 | {
313 |     fn from(s: [T; N]) -> Self {
314 |         Self::from_iter(
315 |             N..N,
316 |             Box::new(
317 |                 core::array::IntoIter::new(s)
318 |                     .enumerate()
319 |                     .map(|(i, x)| (x, i..i + 1)),
320 |             ),
321 |         )
322 |     }
323 | }
324 | 
325 | impl<'a, T: Clone, const N: usize> From<&'a [T; N]>
326 |     for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
327 | {
328 |     fn from(s: &'a [T; N]) -> Self {
329 |         Self::from_iter(
330 |             N..N,
331 |             Box::new(s.iter().cloned().enumerate().map(|(i, x)| (x, i..i + 1))),
332 |         )
333 |     }
334 | }
335 | 
336 | // impl<'a, T: Clone, S: Clone + Span<Context = ()>> From<&'a [(T, S)]> for Stream<'a, T, S, Box<dyn Iterator<Item = (T, S)> + 'a>>
337 | //     where S::Offset: Default
338 | // {
339 | //     fn from(s: &'a [(T, S)]) -> Self {
340 | //         Self::from_iter(Default::default(), Box::new(s.iter().cloned()))
341 | //     }
342 | // }
343 | 


--------------------------------------------------------------------------------
/src/recovery.rs:
--------------------------------------------------------------------------------
  1 | //! Types and traits that facilitate error recovery.
  2 | //!
  3 | //! *“Do you find coming to terms with the mindless tedium of it all presents an interesting challenge?”*
  4 | 
  5 | use super::*;
  6 | 
  7 | /// A trait implemented by error recovery strategies.
  8 | pub trait Strategy<I: Clone, O, E: Error<I>> {
  9 |     /// Recover from a parsing failure.
 10 |     fn recover<D: Debugger, P: Parser<I, O, Error = E>>(
 11 |         &self,
 12 |         recovered_errors: Vec<Located<I, P::Error>>,
 13 |         fatal_error: Located<I, P::Error>,
 14 |         parser: P,
 15 |         debugger: &mut D,
 16 |         stream: &mut StreamOf<I, P::Error>,
 17 |     ) -> PResult<I, O, P::Error>;
 18 | }
 19 | 
 20 | /// See [`skip_then_retry_until`].
 21 | #[derive(Copy, Clone)]
 22 | pub struct SkipThenRetryUntil<I, const N: usize>(
 23 |     pub(crate) [I; N],
 24 |     pub(crate) bool,
 25 |     pub(crate) bool,
 26 | );
 27 | 
 28 | impl<I, const N: usize> SkipThenRetryUntil<I, N> {
 29 |     /// Alters this recovery strategy so that the first token will always be skipped.
 30 |     ///
 31 |     /// This is useful when the input being searched for also appears at the beginning of the pattern that failed to
 32 |     /// parse.
 33 |     pub fn skip_start(self) -> Self {
 34 |         Self(self.0, self.1, true)
 35 |     }
 36 | 
 37 |     /// Alters this recovery strategy so that the synchronisation token will be consumed during recovery.
 38 |     ///
 39 |     /// This is useful when the input being searched for is a delimiter of a prior pattern rather than the start of a
 40 |     /// new pattern and hence is no longer important once recovery has occurred.
 41 |     pub fn consume_end(self) -> Self {
 42 |         Self(self.0, true, self.2)
 43 |     }
 44 | }
 45 | 
 46 | impl<I: Clone + PartialEq, O, E: Error<I>, const N: usize> Strategy<I, O, E>
 47 |     for SkipThenRetryUntil<I, N>
 48 | {
 49 |     fn recover<D: Debugger, P: Parser<I, O, Error = E>>(
 50 |         &self,
 51 |         a_errors: Vec<Located<I, P::Error>>,
 52 |         a_err: Located<I, P::Error>,
 53 |         parser: P,
 54 |         debugger: &mut D,
 55 |         stream: &mut StreamOf<I, P::Error>,
 56 |     ) -> PResult<I, O, P::Error> {
 57 |         if self.2 {
 58 |             let _ = stream.next();
 59 |         }
 60 |         loop {
 61 |             #[allow(clippy::blocks_in_if_conditions)]
 62 |             if !stream.attempt(
 63 |                 |stream| match stream.next().2.map(|tok| self.0.contains(&tok)) {
 64 |                     Some(true) => (self.1, false),
 65 |                     Some(false) => (true, true),
 66 |                     None => (false, false),
 67 |                 },
 68 |             ) {
 69 |                 break (a_errors, Err(a_err));
 70 |             }
 71 |             #[allow(deprecated)]
 72 |             let (mut errors, res) = debugger.invoke(&parser, stream);
 73 |             if let Ok(out) = res {
 74 |                 errors.push(a_err);
 75 |                 break (errors, Ok(out));
 76 |             }
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | /// A recovery mode that simply skips to the next input on parser failure and tries again, until reaching one of
 82 | /// several inputs.
 83 | ///
 84 | /// Also see [`SkipThenRetryUntil::consume_end`].
 85 | ///
 86 | /// This strategy is very 'stupid' and can result in very poor error generation in some languages. Place this strategy
 87 | /// after others as a last resort, and be careful about over-using it.
 88 | pub fn skip_then_retry_until<I, const N: usize>(until: [I; N]) -> SkipThenRetryUntil<I, N> {
 89 |     SkipThenRetryUntil(until, false, false)
 90 | }
 91 | 
 92 | /// See [`skip_until`].
 93 | #[derive(Copy, Clone)]
 94 | pub struct SkipUntil<I, F, const N: usize>(
 95 |     pub(crate) [I; N],
 96 |     pub(crate) F,
 97 |     pub(crate) bool,
 98 |     pub(crate) bool,
 99 | );
100 | 
101 | impl<I, F, const N: usize> SkipUntil<I, F, N> {
102 |     /// Alters this recovery strategy so that the first token will always be skipped.
103 |     ///
104 |     /// This is useful when the input being searched for also appears at the beginning of the pattern that failed to
105 |     /// parse.
106 |     pub fn skip_start(self) -> Self {
107 |         Self(self.0, self.1, self.2, true)
108 |     }
109 | 
110 |     /// Alters this recovery strategy so that the synchronisation token will be consumed during recovery.
111 |     ///
112 |     /// This is useful when the input being searched for is a delimiter of a prior pattern rather than the start of a
113 |     /// new pattern and hence is no longer important once recovery has occurred.
114 |     pub fn consume_end(self) -> Self {
115 |         Self(self.0, self.1, true, self.3)
116 |     }
117 | }
118 | 
119 | impl<I: Clone + PartialEq, O, F: Fn(E::Span) -> O, E: Error<I>, const N: usize> Strategy<I, O, E>
120 |     for SkipUntil<I, F, N>
121 | {
122 |     fn recover<D: Debugger, P: Parser<I, O, Error = E>>(
123 |         &self,
124 |         mut a_errors: Vec<Located<I, P::Error>>,
125 |         a_err: Located<I, P::Error>,
126 |         _parser: P,
127 |         _debugger: &mut D,
128 |         stream: &mut StreamOf<I, P::Error>,
129 |     ) -> PResult<I, O, P::Error> {
130 |         let pre_state = stream.save();
131 |         if self.3 {
132 |             let _ = stream.next();
133 |         }
134 |         a_errors.push(a_err);
135 |         loop {
136 |             match stream.attempt(|stream| {
137 |                 let (at, span, tok) = stream.next();
138 |                 match tok.map(|tok| self.0.contains(&tok)) {
139 |                     Some(true) => (self.2, Ok(true)),
140 |                     Some(false) => (true, Ok(false)),
141 |                     None => (true, Err((at, span))),
142 |                 }
143 |             }) {
144 |                 Ok(true) => break (a_errors, Ok(((self.1)(stream.span_since(pre_state)), None))),
145 |                 Ok(false) => {}
146 |                 Err(_) if stream.save() > pre_state => {
147 |                     break (a_errors, Ok(((self.1)(stream.span_since(pre_state)), None)))
148 |                 }
149 |                 Err((at, span)) => {
150 |                     break (
151 |                         a_errors,
152 |                         Err(Located::at(
153 |                             at,
154 |                             E::expected_input_found(span, self.0.iter().cloned().map(Some), None),
155 |                         )),
156 |                     )
157 |                 }
158 |             }
159 |         }
160 |     }
161 | }
162 | 
163 | /// A recovery mode that skips input until one of several inputs is found.
164 | ///
165 | /// Also see [`SkipUntil::consume_end`].
166 | ///
167 | /// This strategy is very 'stupid' and can result in very poor error generation in some languages. Place this strategy
168 | /// after others as a last resort, and be careful about over-using it.
169 | pub fn skip_until<I, F, const N: usize>(until: [I; N], fallback: F) -> SkipUntil<I, F, N> {
170 |     SkipUntil(until, fallback, false, false)
171 | }
172 | 
173 | /// See [`nested_delimiters`].
174 | #[derive(Copy, Clone)]
175 | pub struct NestedDelimiters<I, F, const N: usize>(
176 |     pub(crate) I,
177 |     pub(crate) I,
178 |     pub(crate) [(I, I); N],
179 |     pub(crate) F,
180 | );
181 | 
182 | impl<I: Clone + PartialEq, O, F: Fn(E::Span) -> O, E: Error<I>, const N: usize> Strategy<I, O, E>
183 |     for NestedDelimiters<I, F, N>
184 | {
185 |     // This looks like something weird with clippy, it warns in a weird spot and isn't fixed by
186 |     // marking it at the spot.
187 |     #[allow(clippy::blocks_in_if_conditions)]
188 |     fn recover<D: Debugger, P: Parser<I, O, Error = E>>(
189 |         &self,
190 |         mut a_errors: Vec<Located<I, P::Error>>,
191 |         a_err: Located<I, P::Error>,
192 |         _parser: P,
193 |         _debugger: &mut D,
194 |         stream: &mut StreamOf<I, P::Error>,
195 |     ) -> PResult<I, O, P::Error> {
196 |         let mut balance = 0;
197 |         let mut balance_others = [0; N];
198 |         let mut starts = Vec::new();
199 |         let mut error = None;
200 |         let pre_state = stream.save();
201 |         let recovered = loop {
202 |             if match stream.next() {
203 |                 (_, span, Some(t)) if t == self.0 => {
204 |                     balance += 1;
205 |                     starts.push(span);
206 |                     true
207 |                 }
208 |                 (_, _, Some(t)) if t == self.1 => {
209 |                     balance -= 1;
210 |                     starts.pop();
211 |                     true
212 |                 }
213 |                 (at, span, Some(t)) => {
214 |                     for (balance_other, others) in balance_others.iter_mut().zip(self.2.iter()) {
215 |                         if t == others.0 {
216 |                             *balance_other += 1;
217 |                         } else if t == others.1 {
218 |                             *balance_other -= 1;
219 | 
220 |                             if *balance_other < 0 && balance == 1 {
221 |                                 // stream.revert(pre_state);
222 |                                 error.get_or_insert_with(|| {
223 |                                     Located::at(
224 |                                         at,
225 |                                         P::Error::unclosed_delimiter(
226 |                                             starts.pop().unwrap(),
227 |                                             self.0.clone(),
228 |                                             span.clone(),
229 |                                             self.1.clone(),
230 |                                             Some(t.clone()),
231 |                                         ),
232 |                                     )
233 |                                 });
234 |                             }
235 |                         }
236 |                     }
237 |                     false
238 |                 }
239 |                 (at, span, None) => {
240 |                     if balance > 0 && balance == 1 {
241 |                         error.get_or_insert_with(|| match starts.pop() {
242 |                             Some(start) => Located::at(
243 |                                 at,
244 |                                 P::Error::unclosed_delimiter(
245 |                                     start,
246 |                                     self.0.clone(),
247 |                                     span,
248 |                                     self.1.clone(),
249 |                                     None,
250 |                                 ),
251 |                             ),
252 |                             None => Located::at(
253 |                                 at,
254 |                                 P::Error::expected_input_found(
255 |                                     span,
256 |                                     Some(Some(self.1.clone())),
257 |                                     None,
258 |                                 ),
259 |                             ),
260 |                         });
261 |                     }
262 |                     break false;
263 |                 }
264 |             } {
265 |                 match balance.cmp(&0) {
266 |                     Ordering::Equal => break true,
267 |                     // The end of a delimited section is not a valid recovery pattern
268 |                     Ordering::Less => break false,
269 |                     Ordering::Greater => (),
270 |                 }
271 |             } else if balance == 0 {
272 |                 // A non-delimiter input before anything else is not a valid recovery pattern
273 |                 break false;
274 |             }
275 |         };
276 | 
277 |         if let Some(e) = error {
278 |             a_errors.push(e);
279 |         }
280 | 
281 |         if recovered {
282 |             if a_errors.last().map_or(true, |e| a_err.at < e.at) {
283 |                 a_errors.push(a_err);
284 |             }
285 |             (a_errors, Ok(((self.3)(stream.span_since(pre_state)), None)))
286 |         } else {
287 |             (a_errors, Err(a_err))
288 |         }
289 |     }
290 | }
291 | 
292 | /// A recovery strategy that searches for a start and end delimiter, respecting nesting.
293 | ///
294 | /// It is possible to specify additional delimiter pairs that are valid in the pattern's context for better errors. For
295 | /// example, you might want to also specify `[('[', ']'), ('{', '}')]` when recovering a parenthesised expression as
296 | /// this can aid in detecting delimiter mismatches.
297 | ///
298 | /// A function that generates a fallback output on recovery is also required.
299 | pub fn nested_delimiters<I: PartialEq, F, const N: usize>(
300 |     start: I,
301 |     end: I,
302 |     others: [(I, I); N],
303 |     fallback: F,
304 | ) -> NestedDelimiters<I, F, N> {
305 |     assert!(
306 |         start != end,
307 |         "Start and end delimiters cannot be the same when using `NestedDelimiters`"
308 |     );
309 |     NestedDelimiters(start, end, others, fallback)
310 | }
311 | 
312 | /// A parser that includes a fallback recovery strategy should parsing result in an error.
313 | #[derive(Copy, Clone)]
314 | pub struct Recovery<A, S>(pub(crate) A, pub(crate) S);
315 | 
316 | impl<I: Clone, O, A: Parser<I, O, Error = E>, S: Strategy<I, O, E>, E: Error<I>> Parser<I, O>
317 |     for Recovery<A, S>
318 | {
319 |     type Error = E;
320 | 
321 |     fn parse_inner<D: Debugger>(
322 |         &self,
323 |         debugger: &mut D,
324 |         stream: &mut StreamOf<I, E>,
325 |     ) -> PResult<I, O, E> {
326 |         match stream.try_parse(|stream| {
327 |             #[allow(deprecated)]
328 |             debugger.invoke(&self.0, stream)
329 |         }) {
330 |             (a_errors, Ok(a_out)) => (a_errors, Ok(a_out)),
331 |             (a_errors, Err(a_err)) => self.1.recover(a_errors, a_err, &self.0, debugger, stream),
332 |         }
333 |     }
334 | 
335 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
336 |         #[allow(deprecated)]
337 |         self.parse_inner(d, s)
338 |     }
339 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
340 |         #[allow(deprecated)]
341 |         self.parse_inner(d, s)
342 |     }
343 | }
344 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
  1 | //! Error types, traits and utilities.
  2 | //!
  3 | //! *“I like the cover," he said. "Don't Panic. It's the first helpful or intelligible thing anybody's said to me all
  4 | //! day.”*
  5 | //!
  6 | //! You can implement the [`Error`] trait to create your own parser errors, or you can use one provided by the crate
  7 | //! like [`Simple`] or [`Cheap`].
  8 | 
  9 | use super::*;
 10 | use alloc::{format, string::ToString};
 11 | use core::hash::Hash;
 12 | 
 13 | #[cfg(not(feature = "std"))]
 14 | use hashbrown::HashSet;
 15 | #[cfg(feature = "std")]
 16 | use std::collections::HashSet;
 17 | 
 18 | // (ahash + std) => ahash
 19 | // (ahash)       => ahash
 20 | // (std)         => std
 21 | // ()            => ahash
 22 | #[cfg(any(feature = "ahash", not(feature = "std")))]
 23 | type RandomState = hashbrown::hash_map::DefaultHashBuilder;
 24 | #[cfg(all(not(feature = "ahash"), feature = "std"))]
 25 | type RandomState = std::collections::hash_map::RandomState;
 26 | 
 27 | /// A trait that describes parser error types.
 28 | ///
 29 | /// If you have a custom error type in your compiler, or your needs are not sufficiently met by [`Simple`], you should
 30 | /// implement this trait. If your error type has 'extra' features that allow for more specific error messages, you can
 31 | /// use the [`Parser::map_err`] or [`Parser::try_map`] functions to take advantage of these inline within your parser.
 32 | ///
 33 | /// # Examples
 34 | ///
 35 | /// ```
 36 | /// # use chumsky::{prelude::*, error::Cheap};
 37 | /// type Span = std::ops::Range<usize>;
 38 | ///
 39 | /// // A custom error type
 40 | /// #[derive(Debug, PartialEq)]
 41 | /// enum MyError {
 42 | ///     ExpectedFound(Span, Vec<Option<char>>, Option<char>),
 43 | ///     NotADigit(Span, char),
 44 | /// }
 45 | ///
 46 | /// impl chumsky::Error<char> for MyError {
 47 | ///     type Span = Span;
 48 | ///     type Label = ();
 49 | ///
 50 | ///     fn expected_input_found<Iter: IntoIterator<Item = Option<char>>>(
 51 | ///         span: Span,
 52 | ///         expected: Iter,
 53 | ///         found: Option<char>,
 54 | ///     ) -> Self {
 55 | ///         Self::ExpectedFound(span, expected.into_iter().collect(), found)
 56 | ///     }
 57 | ///
 58 | ///     fn with_label(mut self, label: Self::Label) -> Self { self }
 59 | ///
 60 | ///     fn merge(mut self, mut other: Self) -> Self {
 61 | ///         if let (Self::ExpectedFound(_, expected, _), Self::ExpectedFound(_, expected_other, _)) = (
 62 | ///             &mut self,
 63 | ///             &mut other,
 64 | ///         ) {
 65 | ///             expected.append(expected_other);
 66 | ///         }
 67 | ///         self
 68 | ///     }
 69 | /// }
 70 | ///
 71 | /// let numeral = filter_map(|span, c: char| match c.to_digit(10) {
 72 | ///     Some(x) => Ok(x),
 73 | ///     None => Err(MyError::NotADigit(span, c)),
 74 | /// });
 75 | ///
 76 | /// assert_eq!(numeral.parse("3"), Ok(3));
 77 | /// assert_eq!(numeral.parse("7"), Ok(7));
 78 | /// assert_eq!(numeral.parse("f"), Err(vec![MyError::NotADigit(0..1, 'f')]));
 79 | /// ```
 80 | pub trait Error<I>: Sized {
 81 |     /// The type of spans to be used in the error.
 82 |     type Span: Span; // TODO: Default to = Range<usize>;
 83 | 
 84 |     /// The label used to describe a syntatic structure currently being parsed.
 85 |     ///
 86 |     /// This can be used to generate errors that tell the user what syntactic structure was currently being parsed when
 87 |     /// the error occured.
 88 |     type Label; // TODO: Default to = &'static str;
 89 | 
 90 |     /// Create a new error describing a conflict between expected inputs and that which was actually found.
 91 |     ///
 92 |     /// `found` having the value `None` indicates that the end of input was reached, but was not expected.
 93 |     ///
 94 |     /// An expected input having the value `None` indicates that the end of input was expected.
 95 |     fn expected_input_found<Iter: IntoIterator<Item = Option<I>>>(
 96 |         span: Self::Span,
 97 |         expected: Iter,
 98 |         found: Option<I>,
 99 |     ) -> Self;
100 | 
101 |     /// Create a new error describing a delimiter that was not correctly closed.
102 |     ///
103 |     /// Provided to this function is the span of the unclosed delimiter, the delimiter itself, the span of the input
104 |     /// that was found in its place, the closing delimiter that was expected but not found, and the input that was
105 |     /// found in its place.
106 |     ///
107 |     /// The default implementation of this function uses [`Error::expected_input_found`], but you'll probably want to
108 |     /// implement it yourself to take full advantage of the extra diagnostic information.
109 |     fn unclosed_delimiter(
110 |         unclosed_span: Self::Span,
111 |         unclosed: I,
112 |         span: Self::Span,
113 |         expected: I,
114 |         found: Option<I>,
115 |     ) -> Self {
116 |         #![allow(unused_variables)]
117 |         Self::expected_input_found(span, Some(Some(expected)), found)
118 |     }
119 | 
120 |     /// Indicate that the error occured while parsing a particular syntactic structure.
121 |     ///
122 |     /// How the error handles this information is up to it. It can append it to a list of structures to get a sort of
123 |     /// 'parse backtrace', or it can just keep only the most recent label. If the latter, this method should have no
124 |     /// effect when the error already has a label.
125 |     fn with_label(self, label: Self::Label) -> Self;
126 | 
127 |     /// Merge two errors that point to the same input together, combining their information.
128 |     fn merge(self, other: Self) -> Self;
129 | }
130 | 
131 | // /// A simple default input pattern that allows describing inputs and input patterns in error messages.
132 | // #[derive(Clone, Debug, PartialEq, Eq, Hash)]
133 | // pub enum SimplePattern<I> {
134 | //     /// A pattern with the given name was expected.
135 | //     Labelled(&'static str),
136 | //     /// A specific input was expected.
137 | //     Token(I),
138 | // }
139 | 
140 | // impl<I> From<&'static str> for SimplePattern<I> {
141 | //     fn from(s: &'static str) -> Self { Self::Labelled(s) }
142 | // }
143 | 
144 | // impl<I: fmt::Display> fmt::Display for SimplePattern<I> {
145 | //     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
146 | //         match self {
147 | //             Self::Labelled(s) => write!(f, "{}", s),
148 | //             Self::Token(x) => write!(f, "'{}'", x),
149 | //         }
150 | //     }
151 | // }
152 | 
153 | /// A type representing possible reasons for an error.
154 | #[derive(Clone, Debug, PartialEq, Eq)]
155 | pub enum SimpleReason<I, S> {
156 |     /// An unexpected input was found.
157 |     Unexpected,
158 |     /// An unclosed delimiter was found.
159 |     Unclosed {
160 |         /// The span of the unclosed delimiter.
161 |         span: S,
162 |         /// The unclosed delimiter.
163 |         delimiter: I,
164 |     },
165 |     /// An error with a custom message occurred.
166 |     Custom(String),
167 | }
168 | 
169 | /// A simple default error type that tracks error spans, expected inputs, and the actual input found at an error site.
170 | ///
171 | /// Please note that it uses a [`HashSet`] to remember expected symbols. If you find this to be too slow, you can
172 | /// implement [`Error`] for your own error type or use [`Cheap`] instead.
173 | #[derive(Clone, Debug)]
174 | pub struct Simple<I: Hash + Eq, S = Range<usize>> {
175 |     span: S,
176 |     reason: SimpleReason<I, S>,
177 |     expected: HashSet<Option<I>, RandomState>,
178 |     found: Option<I>,
179 |     label: Option<&'static str>,
180 | }
181 | 
182 | impl<I: Hash + Eq, S: Clone> Simple<I, S> {
183 |     /// Create an error with a custom error message.
184 |     pub fn custom<M: ToString>(span: S, msg: M) -> Self {
185 |         Self {
186 |             span,
187 |             reason: SimpleReason::Custom(msg.to_string()),
188 |             expected: HashSet::default(),
189 |             found: None,
190 |             label: None,
191 |         }
192 |     }
193 | 
194 |     /// Returns the span that the error occured at.
195 |     pub fn span(&self) -> S {
196 |         self.span.clone()
197 |     }
198 | 
199 |     /// Returns an iterator over possible expected patterns.
200 |     pub fn expected(&self) -> impl ExactSizeIterator<Item = &Option<I>> + '_ {
201 |         self.expected.iter()
202 |     }
203 | 
204 |     /// Returns the input, if any, that was found instead of an expected pattern.
205 |     pub fn found(&self) -> Option<&I> {
206 |         self.found.as_ref()
207 |     }
208 | 
209 |     /// Returns the reason for the error.
210 |     pub fn reason(&self) -> &SimpleReason<I, S> {
211 |         &self.reason
212 |     }
213 | 
214 |     /// Returns the error's label, if any.
215 |     pub fn label(&self) -> Option<&'static str> {
216 |         self.label
217 |     }
218 | 
219 |     /// Map the error's inputs using the given function.
220 |     ///
221 |     /// This can be used to unify the errors between parsing stages that operate upon two forms of input (for example,
222 |     /// the initial lexing stage and the parsing stage in most compilers).
223 |     pub fn map<U: Hash + Eq, F: FnMut(I) -> U>(self, mut f: F) -> Simple<U, S> {
224 |         Simple {
225 |             span: self.span,
226 |             reason: match self.reason {
227 |                 SimpleReason::Unclosed { span, delimiter } => SimpleReason::Unclosed {
228 |                     span,
229 |                     delimiter: f(delimiter),
230 |                 },
231 |                 SimpleReason::Unexpected => SimpleReason::Unexpected,
232 |                 SimpleReason::Custom(msg) => SimpleReason::Custom(msg),
233 |             },
234 |             expected: self.expected.into_iter().map(|e| e.map(&mut f)).collect(),
235 |             found: self.found.map(f),
236 |             label: self.label,
237 |         }
238 |     }
239 | }
240 | 
241 | impl<I: Hash + Eq, S: Span + Clone + fmt::Debug> Error<I> for Simple<I, S> {
242 |     type Span = S;
243 |     type Label = &'static str;
244 | 
245 |     fn expected_input_found<Iter: IntoIterator<Item = Option<I>>>(
246 |         span: Self::Span,
247 |         expected: Iter,
248 |         found: Option<I>,
249 |     ) -> Self {
250 |         Self {
251 |             span,
252 |             reason: SimpleReason::Unexpected,
253 |             expected: expected.into_iter().collect(),
254 |             found,
255 |             label: None,
256 |         }
257 |     }
258 | 
259 |     fn unclosed_delimiter(
260 |         unclosed_span: Self::Span,
261 |         delimiter: I,
262 |         span: Self::Span,
263 |         expected: I,
264 |         found: Option<I>,
265 |     ) -> Self {
266 |         Self {
267 |             span,
268 |             reason: SimpleReason::Unclosed {
269 |                 span: unclosed_span,
270 |                 delimiter,
271 |             },
272 |             expected: core::iter::once(Some(expected)).collect(),
273 |             found,
274 |             label: None,
275 |         }
276 |     }
277 | 
278 |     fn with_label(mut self, label: Self::Label) -> Self {
279 |         self.label.get_or_insert(label);
280 |         self
281 |     }
282 | 
283 |     fn merge(mut self, other: Self) -> Self {
284 |         // TODO: Assert that `self.span == other.span` here?
285 |         self.reason = match (&self.reason, &other.reason) {
286 |             (SimpleReason::Unclosed { .. }, _) => self.reason,
287 |             (_, SimpleReason::Unclosed { .. }) => other.reason,
288 |             _ => self.reason,
289 |         };
290 |         for expected in other.expected {
291 |             self.expected.insert(expected);
292 |         }
293 |         self
294 |     }
295 | }
296 | 
297 | impl<I: Hash + Eq, S: PartialEq> PartialEq for Simple<I, S> {
298 |     fn eq(&self, other: &Self) -> bool {
299 |         self.span == other.span
300 |             && self.found == other.found
301 |             && self.reason == other.reason
302 |             && self.label == other.label
303 |     }
304 | }
305 | 
306 | impl<I: fmt::Display + Hash + Eq, S: Span> fmt::Display for Simple<I, S> {
307 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
308 |         // TODO: Take `self.reason` into account
309 | 
310 |         if let Some(found) = &self.found {
311 |             write!(f, "found '{}'", found)?;
312 |         } else {
313 |             write!(f, "found end of input")?;
314 |         }
315 | 
316 |         match self.expected.len() {
317 |             0 => {} //write!(f, " but end of input was expected")?,
318 |             1 => write!(
319 |                 f,
320 |                 " but {} was expected",
321 |                 match self.expected.iter().next().unwrap() {
322 |                     Some(x) => format!("'{}'", x),
323 |                     None => format!("end of input"),
324 |                 },
325 |             )?,
326 |             _ => write!(
327 |                 f,
328 |                 " but one of {} was expected",
329 |                 self.expected
330 |                     .iter()
331 |                     .map(|expected| match expected {
332 |                         Some(x) => format!("'{}'", x),
333 |                         None => format!("end of input"),
334 |                     })
335 |                     .collect::<Vec<_>>()
336 |                     .join(", ")
337 |             )?,
338 |         }
339 | 
340 |         Ok(())
341 |     }
342 | }
343 | 
344 | #[cfg(feature = "std")]
345 | impl<I: fmt::Debug + fmt::Display + Hash + Eq, S: Span + fmt::Display + fmt::Debug>
346 |     std::error::Error for Simple<I, S>
347 | {
348 | }
349 | 
350 | /// A minimal error type that tracks only the error span and label. This type is most useful when you want fast parsing
351 | /// but do not particularly care about the quality of error messages.
352 | #[derive(Clone, Debug, PartialEq, Eq)]
353 | pub struct Cheap<I, S = Range<usize>> {
354 |     span: S,
355 |     label: Option<&'static str>,
356 |     phantom: PhantomData<I>,
357 | }
358 | 
359 | impl<I, S: Clone> Cheap<I, S> {
360 |     /// Returns the span that the error occured at.
361 |     pub fn span(&self) -> S {
362 |         self.span.clone()
363 |     }
364 | 
365 |     /// Returns the error's label, if any.
366 |     pub fn label(&self) -> Option<&'static str> {
367 |         self.label
368 |     }
369 | }
370 | 
371 | impl<I, S: Span + Clone + fmt::Debug> Error<I> for Cheap<I, S> {
372 |     type Span = S;
373 |     type Label = &'static str;
374 | 
375 |     fn expected_input_found<Iter: IntoIterator<Item = Option<I>>>(
376 |         span: Self::Span,
377 |         _: Iter,
378 |         _: Option<I>,
379 |     ) -> Self {
380 |         Self {
381 |             span,
382 |             label: None,
383 |             phantom: PhantomData,
384 |         }
385 |     }
386 | 
387 |     fn with_label(mut self, label: Self::Label) -> Self {
388 |         self.label.get_or_insert(label);
389 |         self
390 |     }
391 | 
392 |     fn merge(self, _: Self) -> Self {
393 |         self
394 |     }
395 | }
396 | 
397 | /// An internal type used to facilitate error prioritisation. You shouldn't need to interact with this type during
398 | /// normal use of the crate.
399 | pub struct Located<I, E> {
400 |     pub(crate) at: usize,
401 |     pub(crate) error: E,
402 |     pub(crate) phantom: PhantomData<I>,
403 | }
404 | 
405 | impl<I, E: Error<I>> Located<I, E> {
406 |     /// Create a new [`Located`] with the give input position and error.
407 |     pub fn at(at: usize, error: E) -> Self {
408 |         Self {
409 |             at,
410 |             error,
411 |             phantom: PhantomData,
412 |         }
413 |     }
414 | 
415 |     /// Get the maximum of two located errors. If they hold the same position in the input, merge them.
416 |     pub fn max(self, other: impl Into<Option<Self>>) -> Self {
417 |         let other = match other.into() {
418 |             Some(other) => other,
419 |             None => return self,
420 |         };
421 |         match self.at.cmp(&other.at) {
422 |             Ordering::Greater => self,
423 |             Ordering::Less => other,
424 |             Ordering::Equal => Self {
425 |                 error: self.error.merge(other.error),
426 |                 ..self
427 |             },
428 |         }
429 |     }
430 | 
431 |     /// Map the error with the given function.
432 |     pub fn map<U, F: FnOnce(E) -> U>(self, f: F) -> Located<I, U> {
433 |         Located {
434 |             at: self.at,
435 |             error: f(self.error),
436 |             phantom: PhantomData,
437 |         }
438 |     }
439 | }
440 | 
441 | // Merge two alternative errors
442 | pub(crate) fn merge_alts<I, E: Error<I>, T: IntoIterator<Item = Located<I, E>>>(
443 |     mut error: Option<Located<I, E>>,
444 |     errors: T,
445 | ) -> Option<Located<I, E>> {
446 |     for other in errors {
447 |         match (error, other) {
448 |             (Some(a), b) => {
449 |                 error = Some(b.max(a));
450 |             }
451 |             (None, b) => {
452 |                 error = Some(b);
453 |             }
454 |         }
455 |     }
456 |     error
457 | }
458 | 


--------------------------------------------------------------------------------
/examples/nano_rust.rs:
--------------------------------------------------------------------------------
  1 | //! This is an entire parser and interpreter for a dynamically-typed Rust-like expression-oriented
  2 | //! programming language. See `sample.nrs` for sample source code.
  3 | //! Run it with the following command:
  4 | //! cargo run --example nano_rust -- examples/sample.nrs
  5 | 
  6 | use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
  7 | use chumsky::{prelude::*, stream::Stream};
  8 | use std::{collections::HashMap, env, fmt, fs};
  9 | 
 10 | pub type Span = std::ops::Range<usize>;
 11 | 
 12 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 13 | enum Token {
 14 |     Null,
 15 |     Bool(bool),
 16 |     Num(String),
 17 |     Str(String),
 18 |     Op(String),
 19 |     Ctrl(char),
 20 |     Ident(String),
 21 |     Fn,
 22 |     Let,
 23 |     Print,
 24 |     If,
 25 |     Else,
 26 | }
 27 | 
 28 | impl fmt::Display for Token {
 29 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 30 |         match self {
 31 |             Token::Null => write!(f, "null"),
 32 |             Token::Bool(x) => write!(f, "{}", x),
 33 |             Token::Num(n) => write!(f, "{}", n),
 34 |             Token::Str(s) => write!(f, "{}", s),
 35 |             Token::Op(s) => write!(f, "{}", s),
 36 |             Token::Ctrl(c) => write!(f, "{}", c),
 37 |             Token::Ident(s) => write!(f, "{}", s),
 38 |             Token::Fn => write!(f, "fn"),
 39 |             Token::Let => write!(f, "let"),
 40 |             Token::Print => write!(f, "print"),
 41 |             Token::If => write!(f, "if"),
 42 |             Token::Else => write!(f, "else"),
 43 |         }
 44 |     }
 45 | }
 46 | 
 47 | fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = Simple<char>> {
 48 |     // A parser for numbers
 49 |     let num = text::int(10)
 50 |         .chain::<char, _, _>(just('.').chain(text::digits(10)).or_not().flatten())
 51 |         .collect::<String>()
 52 |         .map(Token::Num);
 53 | 
 54 |     // A parser for strings
 55 |     let str_ = just('"')
 56 |         .ignore_then(filter(|c| *c != '"').repeated())
 57 |         .then_ignore(just('"'))
 58 |         .collect::<String>()
 59 |         .map(Token::Str);
 60 | 
 61 |     // A parser for operators
 62 |     let op = one_of("+-*/!=")
 63 |         .repeated()
 64 |         .at_least(1)
 65 |         .collect::<String>()
 66 |         .map(Token::Op);
 67 | 
 68 |     // A parser for control characters (delimiters, semicolons, etc.)
 69 |     let ctrl = one_of("()[]{};,").map(|c| Token::Ctrl(c));
 70 | 
 71 |     // A parser for identifiers and keywords
 72 |     let ident = text::ident().map(|ident: String| match ident.as_str() {
 73 |         "fn" => Token::Fn,
 74 |         "let" => Token::Let,
 75 |         "print" => Token::Print,
 76 |         "if" => Token::If,
 77 |         "else" => Token::Else,
 78 |         "true" => Token::Bool(true),
 79 |         "false" => Token::Bool(false),
 80 |         "null" => Token::Null,
 81 |         _ => Token::Ident(ident),
 82 |     });
 83 | 
 84 |     // A single token can be one of the above
 85 |     let token = num
 86 |         .or(str_)
 87 |         .or(op)
 88 |         .or(ctrl)
 89 |         .or(ident)
 90 |         .recover_with(skip_then_retry_until([]));
 91 | 
 92 |     let comment = just("//").then(take_until(just('\n'))).padded();
 93 | 
 94 |     token
 95 |         .padded_by(comment.repeated())
 96 |         .map_with_span(|tok, span| (tok, span))
 97 |         .padded()
 98 |         .repeated()
 99 | }
100 | 
101 | #[derive(Clone, Debug, PartialEq)]
102 | enum Value {
103 |     Null,
104 |     Bool(bool),
105 |     Num(f64),
106 |     Str(String),
107 |     List(Vec<Value>),
108 |     Func(String),
109 | }
110 | 
111 | impl Value {
112 |     fn num(self, span: Span) -> Result<f64, Error> {
113 |         if let Value::Num(x) = self {
114 |             Ok(x)
115 |         } else {
116 |             Err(Error {
117 |                 span,
118 |                 msg: format!("'{}' is not a number", self),
119 |             })
120 |         }
121 |     }
122 | }
123 | 
124 | impl std::fmt::Display for Value {
125 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
126 |         match self {
127 |             Self::Null => write!(f, "null"),
128 |             Self::Bool(x) => write!(f, "{}", x),
129 |             Self::Num(x) => write!(f, "{}", x),
130 |             Self::Str(x) => write!(f, "{}", x),
131 |             Self::List(xs) => write!(
132 |                 f,
133 |                 "[{}]",
134 |                 xs.iter()
135 |                     .map(|x| x.to_string())
136 |                     .collect::<Vec<_>>()
137 |                     .join(", ")
138 |             ),
139 |             Self::Func(name) => write!(f, "<function: {}>", name),
140 |         }
141 |     }
142 | }
143 | 
144 | #[derive(Clone, Debug)]
145 | enum BinaryOp {
146 |     Add,
147 |     Sub,
148 |     Mul,
149 |     Div,
150 |     Eq,
151 |     NotEq,
152 | }
153 | 
154 | pub type Spanned<T> = (T, Span);
155 | 
156 | // An expression node in the AST. Children are spanned so we can generate useful runtime errors.
157 | #[derive(Debug)]
158 | enum Expr {
159 |     Error,
160 |     Value(Value),
161 |     List(Vec<Spanned<Self>>),
162 |     Local(String),
163 |     Let(String, Box<Spanned<Self>>, Box<Spanned<Self>>),
164 |     Then(Box<Spanned<Self>>, Box<Spanned<Self>>),
165 |     Binary(Box<Spanned<Self>>, BinaryOp, Box<Spanned<Self>>),
166 |     Call(Box<Spanned<Self>>, Spanned<Vec<Spanned<Self>>>),
167 |     If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>),
168 |     Print(Box<Spanned<Self>>),
169 | }
170 | 
171 | // A function node in the AST.
172 | #[derive(Debug)]
173 | struct Func {
174 |     args: Vec<String>,
175 |     body: Spanned<Expr>,
176 | }
177 | 
178 | fn expr_parser() -> impl Parser<Token, Spanned<Expr>, Error = Simple<Token>> + Clone {
179 |     recursive(|expr| {
180 |         let raw_expr = recursive(|raw_expr| {
181 |             let val = filter_map(|span, tok| match tok {
182 |                 Token::Null => Ok(Expr::Value(Value::Null)),
183 |                 Token::Bool(x) => Ok(Expr::Value(Value::Bool(x))),
184 |                 Token::Num(n) => Ok(Expr::Value(Value::Num(n.parse().unwrap()))),
185 |                 Token::Str(s) => Ok(Expr::Value(Value::Str(s))),
186 |                 _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
187 |             })
188 |             .labelled("value");
189 | 
190 |             let ident = filter_map(|span, tok| match tok {
191 |                 Token::Ident(ident) => Ok(ident.clone()),
192 |                 _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
193 |             })
194 |             .labelled("identifier");
195 | 
196 |             // A list of expressions
197 |             let items = expr
198 |                 .clone()
199 |                 .chain(just(Token::Ctrl(',')).ignore_then(expr.clone()).repeated())
200 |                 .then_ignore(just(Token::Ctrl(',')).or_not())
201 |                 .or_not()
202 |                 .map(|item| item.unwrap_or_else(Vec::new));
203 | 
204 |             // A let expression
205 |             let let_ = just(Token::Let)
206 |                 .ignore_then(ident)
207 |                 .then_ignore(just(Token::Op("=".to_string())))
208 |                 .then(raw_expr)
209 |                 .then_ignore(just(Token::Ctrl(';')))
210 |                 .then(expr.clone())
211 |                 .map(|((name, val), body)| Expr::Let(name, Box::new(val), Box::new(body)));
212 | 
213 |             let list = items
214 |                 .clone()
215 |                 .delimited_by(just(Token::Ctrl('[')), just(Token::Ctrl(']')))
216 |                 .map(Expr::List);
217 | 
218 |             // 'Atoms' are expressions that contain no ambiguity
219 |             let atom = val
220 |                 .or(ident.map(Expr::Local))
221 |                 .or(let_)
222 |                 .or(list)
223 |                 // In Nano Rust, `print` is just a keyword, just like Python 2, for simplicity
224 |                 .or(just(Token::Print)
225 |                     .ignore_then(
226 |                         expr.clone()
227 |                             .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))),
228 |                     )
229 |                     .map(|expr| Expr::Print(Box::new(expr))))
230 |                 .map_with_span(|expr, span| (expr, span))
231 |                 // Atoms can also just be normal expressions, but surrounded with parentheses
232 |                 .or(expr
233 |                     .clone()
234 |                     .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))))
235 |                 // Attempt to recover anything that looks like a parenthesised expression but contains errors
236 |                 .recover_with(nested_delimiters(
237 |                     Token::Ctrl('('),
238 |                     Token::Ctrl(')'),
239 |                     [
240 |                         (Token::Ctrl('['), Token::Ctrl(']')),
241 |                         (Token::Ctrl('{'), Token::Ctrl('}')),
242 |                     ],
243 |                     |span| (Expr::Error, span),
244 |                 ))
245 |                 // Attempt to recover anything that looks like a list but contains errors
246 |                 .recover_with(nested_delimiters(
247 |                     Token::Ctrl('['),
248 |                     Token::Ctrl(']'),
249 |                     [
250 |                         (Token::Ctrl('('), Token::Ctrl(')')),
251 |                         (Token::Ctrl('{'), Token::Ctrl('}')),
252 |                     ],
253 |                     |span| (Expr::Error, span),
254 |                 ));
255 | 
256 |             // Function calls have very high precedence so we prioritise them
257 |             let call = atom
258 |                 .then(
259 |                     items
260 |                         .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')')))
261 |                         .map_with_span(|args, span| (args, span))
262 |                         .repeated(),
263 |                 )
264 |                 .foldl(|f, args| {
265 |                     let span = f.1.start..args.1.end;
266 |                     (Expr::Call(Box::new(f), args), span)
267 |                 });
268 | 
269 |             // Product ops (multiply and divide) have equal precedence
270 |             let op = just(Token::Op("*".to_string()))
271 |                 .to(BinaryOp::Mul)
272 |                 .or(just(Token::Op("/".to_string())).to(BinaryOp::Div));
273 |             let product = call
274 |                 .clone()
275 |                 .then(op.then(call).repeated())
276 |                 .foldl(|a, (op, b)| {
277 |                     let span = a.1.start..b.1.end;
278 |                     (Expr::Binary(Box::new(a), op, Box::new(b)), span)
279 |                 });
280 | 
281 |             // Sum ops (add and subtract) have equal precedence
282 |             let op = just(Token::Op("+".to_string()))
283 |                 .to(BinaryOp::Add)
284 |                 .or(just(Token::Op("-".to_string())).to(BinaryOp::Sub));
285 |             let sum = product
286 |                 .clone()
287 |                 .then(op.then(product).repeated())
288 |                 .foldl(|a, (op, b)| {
289 |                     let span = a.1.start..b.1.end;
290 |                     (Expr::Binary(Box::new(a), op, Box::new(b)), span)
291 |                 });
292 | 
293 |             // Comparison ops (equal, not-equal) have equal precedence
294 |             let op = just(Token::Op("==".to_string()))
295 |                 .to(BinaryOp::Eq)
296 |                 .or(just(Token::Op("!=".to_string())).to(BinaryOp::NotEq));
297 |             let compare = sum
298 |                 .clone()
299 |                 .then(op.then(sum).repeated())
300 |                 .foldl(|a, (op, b)| {
301 |                     let span = a.1.start..b.1.end;
302 |                     (Expr::Binary(Box::new(a), op, Box::new(b)), span)
303 |                 });
304 | 
305 |             compare
306 |         });
307 | 
308 |         // Blocks are expressions but delimited with braces
309 |         let block = expr
310 |             .clone()
311 |             .delimited_by(just(Token::Ctrl('{')), just(Token::Ctrl('}')))
312 |             // Attempt to recover anything that looks like a block but contains errors
313 |             .recover_with(nested_delimiters(
314 |                 Token::Ctrl('{'),
315 |                 Token::Ctrl('}'),
316 |                 [
317 |                     (Token::Ctrl('('), Token::Ctrl(')')),
318 |                     (Token::Ctrl('['), Token::Ctrl(']')),
319 |                 ],
320 |                 |span| (Expr::Error, span),
321 |             ));
322 | 
323 |         let if_ = recursive(|if_| {
324 |             just(Token::If)
325 |                 .ignore_then(expr.clone())
326 |                 .then(block.clone())
327 |                 .then(
328 |                     just(Token::Else)
329 |                         .ignore_then(block.clone().or(if_))
330 |                         .or_not(),
331 |                 )
332 |                 .map_with_span(|((cond, a), b), span| {
333 |                     (
334 |                         Expr::If(
335 |                             Box::new(cond),
336 |                             Box::new(a),
337 |                             Box::new(match b {
338 |                                 Some(b) => b,
339 |                                 // If an `if` expression has no trailing `else` block, we magic up one that just produces null
340 |                                 None => (Expr::Value(Value::Null), span.clone()),
341 |                             }),
342 |                         ),
343 |                         span,
344 |                     )
345 |                 })
346 |         });
347 | 
348 |         // Both blocks and `if` are 'block expressions' and can appear in the place of statements
349 |         let block_expr = block.or(if_).labelled("block");
350 | 
351 |         let block_chain = block_expr
352 |             .clone()
353 |             .then(block_expr.clone().repeated())
354 |             .foldl(|a, b| {
355 |                 let span = a.1.start..b.1.end;
356 |                 (Expr::Then(Box::new(a), Box::new(b)), span)
357 |             });
358 | 
359 |         block_chain
360 |             // Expressions, chained by semicolons, are statements
361 |             .or(raw_expr.clone())
362 |             .then(just(Token::Ctrl(';')).ignore_then(expr.or_not()).repeated())
363 |             .foldl(|a, b| {
364 |                 let span = a.1.clone(); // TODO: Not correct
365 |                 (
366 |                     Expr::Then(
367 |                         Box::new(a),
368 |                         Box::new(match b {
369 |                             Some(b) => b,
370 |                             None => (Expr::Value(Value::Null), span.clone()),
371 |                         }),
372 |                     ),
373 |                     span,
374 |                 )
375 |             })
376 |     })
377 | }
378 | 
379 | fn funcs_parser() -> impl Parser<Token, HashMap<String, Func>, Error = Simple<Token>> + Clone {
380 |     let ident = filter_map(|span, tok| match tok {
381 |         Token::Ident(ident) => Ok(ident.clone()),
382 |         _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
383 |     });
384 | 
385 |     // Argument lists are just identifiers separated by commas, surrounded by parentheses
386 |     let args = ident
387 |         .clone()
388 |         .separated_by(just(Token::Ctrl(',')))
389 |         .allow_trailing()
390 |         .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')')))
391 |         .labelled("function args");
392 | 
393 |     let func = just(Token::Fn)
394 |         .ignore_then(
395 |             ident
396 |                 .map_with_span(|name, span| (name, span))
397 |                 .labelled("function name"),
398 |         )
399 |         .then(args)
400 |         .then(
401 |             expr_parser()
402 |                 .delimited_by(just(Token::Ctrl('{')), just(Token::Ctrl('}')))
403 |                 // Attempt to recover anything that looks like a function body but contains errors
404 |                 .recover_with(nested_delimiters(
405 |                     Token::Ctrl('{'),
406 |                     Token::Ctrl('}'),
407 |                     [
408 |                         (Token::Ctrl('('), Token::Ctrl(')')),
409 |                         (Token::Ctrl('['), Token::Ctrl(']')),
410 |                     ],
411 |                     |span| (Expr::Error, span),
412 |                 )),
413 |         )
414 |         .map(|((name, args), body)| (name, Func { args, body }))
415 |         .labelled("function");
416 | 
417 |     func.repeated()
418 |         .try_map(|fs, _| {
419 |             let mut funcs = HashMap::new();
420 |             for ((name, name_span), f) in fs {
421 |                 if funcs.insert(name.clone(), f).is_some() {
422 |                     return Err(Simple::custom(
423 |                         name_span.clone(),
424 |                         format!("Function '{}' already exists", name),
425 |                     ));
426 |                 }
427 |             }
428 |             Ok(funcs)
429 |         })
430 |         .then_ignore(end())
431 | }
432 | 
433 | struct Error {
434 |     span: Span,
435 |     msg: String,
436 | }
437 | 
438 | fn eval_expr(
439 |     expr: &Spanned<Expr>,
440 |     funcs: &HashMap<String, Func>,
441 |     stack: &mut Vec<(String, Value)>,
442 | ) -> Result<Value, Error> {
443 |     Ok(match &expr.0 {
444 |         Expr::Error => unreachable!(), // Error expressions only get created by parser errors, so cannot exist in a valid AST
445 |         Expr::Value(val) => val.clone(),
446 |         Expr::List(items) => Value::List(
447 |             items
448 |                 .iter()
449 |                 .map(|item| eval_expr(item, funcs, stack))
450 |                 .collect::<Result<_, _>>()?,
451 |         ),
452 |         Expr::Local(name) => stack
453 |             .iter()
454 |             .rev()
455 |             .find(|(l, _)| l == name)
456 |             .map(|(_, v)| v.clone())
457 |             .or_else(|| Some(Value::Func(name.clone())).filter(|_| funcs.contains_key(name)))
458 |             .ok_or_else(|| Error {
459 |                 span: expr.1.clone(),
460 |                 msg: format!("No such variable '{}' in scope", name),
461 |             })?,
462 |         Expr::Let(local, val, body) => {
463 |             let val = eval_expr(val, funcs, stack)?;
464 |             stack.push((local.clone(), val));
465 |             let res = eval_expr(body, funcs, stack)?;
466 |             stack.pop();
467 |             res
468 |         }
469 |         Expr::Then(a, b) => {
470 |             eval_expr(a, funcs, stack)?;
471 |             eval_expr(b, funcs, stack)?
472 |         }
473 |         Expr::Binary(a, BinaryOp::Add, b) => Value::Num(
474 |             eval_expr(a, funcs, stack)?.num(a.1.clone())?
475 |                 + eval_expr(b, funcs, stack)?.num(b.1.clone())?,
476 |         ),
477 |         Expr::Binary(a, BinaryOp::Sub, b) => Value::Num(
478 |             eval_expr(a, funcs, stack)?.num(a.1.clone())?
479 |                 - eval_expr(b, funcs, stack)?.num(b.1.clone())?,
480 |         ),
481 |         Expr::Binary(a, BinaryOp::Mul, b) => Value::Num(
482 |             eval_expr(a, funcs, stack)?.num(a.1.clone())?
483 |                 * eval_expr(b, funcs, stack)?.num(b.1.clone())?,
484 |         ),
485 |         Expr::Binary(a, BinaryOp::Div, b) => Value::Num(
486 |             eval_expr(a, funcs, stack)?.num(a.1.clone())?
487 |                 / eval_expr(b, funcs, stack)?.num(b.1.clone())?,
488 |         ),
489 |         Expr::Binary(a, BinaryOp::Eq, b) => {
490 |             Value::Bool(eval_expr(a, funcs, stack)? == eval_expr(b, funcs, stack)?)
491 |         }
492 |         Expr::Binary(a, BinaryOp::NotEq, b) => {
493 |             Value::Bool(eval_expr(a, funcs, stack)? != eval_expr(b, funcs, stack)?)
494 |         }
495 |         Expr::Call(func, (args, args_span)) => {
496 |             let f = eval_expr(func, funcs, stack)?;
497 |             match f {
498 |                 Value::Func(name) => {
499 |                     let f = &funcs[&name];
500 |                     let mut stack = if f.args.len() != args.len() {
501 |                         return Err(Error {
502 |                             span: args_span.clone(),
503 |                             msg: format!("'{}' called with wrong number of arguments (expected {}, found {})", name, f.args.len(), args.len()),
504 |                         });
505 |                     } else {
506 |                         f.args
507 |                             .iter()
508 |                             .zip(args.iter())
509 |                             .map(|(name, arg)| Ok((name.clone(), eval_expr(arg, funcs, stack)?)))
510 |                             .collect::<Result<_, _>>()?
511 |                     };
512 |                     eval_expr(&f.body, funcs, &mut stack)?
513 |                 }
514 |                 f => {
515 |                     return Err(Error {
516 |                         span: func.1.clone(),
517 |                         msg: format!("'{:?}' is not callable", f),
518 |                     })
519 |                 }
520 |             }
521 |         }
522 |         Expr::If(cond, a, b) => {
523 |             let c = eval_expr(cond, funcs, stack)?;
524 |             match c {
525 |                 Value::Bool(true) => eval_expr(a, funcs, stack)?,
526 |                 Value::Bool(false) => eval_expr(b, funcs, stack)?,
527 |                 c => {
528 |                     return Err(Error {
529 |                         span: cond.1.clone(),
530 |                         msg: format!("Conditions must be booleans, found '{:?}'", c),
531 |                     })
532 |                 }
533 |             }
534 |         }
535 |         Expr::Print(a) => {
536 |             let val = eval_expr(a, funcs, stack)?;
537 |             println!("{}", val);
538 |             val
539 |         }
540 |     })
541 | }
542 | 
543 | fn main() {
544 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
545 |         .expect("Failed to read file");
546 | 
547 |     let (tokens, mut errs) = lexer().parse_recovery(src.as_str());
548 | 
549 |     let parse_errs = if let Some(tokens) = tokens {
550 |         // println!("Tokens = {:?}", tokens);
551 |         let len = src.chars().count();
552 |         let (ast, parse_errs) =
553 |             funcs_parser().parse_recovery(Stream::from_iter(len..len + 1, tokens.into_iter()));
554 | 
555 |         println!("{:#?}", ast);
556 |         if let Some(funcs) = ast.filter(|_| errs.len() + parse_errs.len() == 0) {
557 |             if let Some(main) = funcs.get("main") {
558 |                 assert_eq!(main.args.len(), 0);
559 |                 match eval_expr(&main.body, &funcs, &mut Vec::new()) {
560 |                     Ok(val) => println!("Return value: {}", val),
561 |                     Err(e) => errs.push(Simple::custom(e.span, e.msg)),
562 |                 }
563 |             } else {
564 |                 panic!("No main function!");
565 |             }
566 |         }
567 | 
568 |         parse_errs
569 |     } else {
570 |         Vec::new()
571 |     };
572 | 
573 |     errs.into_iter()
574 |         .map(|e| e.map(|c| c.to_string()))
575 |         .chain(parse_errs.into_iter().map(|e| e.map(|tok| tok.to_string())))
576 |         .for_each(|e| {
577 |             let report = Report::build(ReportKind::Error, (), e.span().start);
578 | 
579 |             let report = match e.reason() {
580 |                 chumsky::error::SimpleReason::Unclosed { span, delimiter } => report
581 |                     .with_message(format!(
582 |                         "Unclosed delimiter {}",
583 |                         delimiter.fg(Color::Yellow)
584 |                     ))
585 |                     .with_label(
586 |                         Label::new(span.clone())
587 |                             .with_message(format!(
588 |                                 "Unclosed delimiter {}",
589 |                                 delimiter.fg(Color::Yellow)
590 |                             ))
591 |                             .with_color(Color::Yellow),
592 |                     )
593 |                     .with_label(
594 |                         Label::new(e.span())
595 |                             .with_message(format!(
596 |                                 "Must be closed before this {}",
597 |                                 e.found()
598 |                                     .unwrap_or(&"end of file".to_string())
599 |                                     .fg(Color::Red)
600 |                             ))
601 |                             .with_color(Color::Red),
602 |                     ),
603 |                 chumsky::error::SimpleReason::Unexpected => report
604 |                     .with_message(format!(
605 |                         "{}, expected {}",
606 |                         if e.found().is_some() {
607 |                             "Unexpected token in input"
608 |                         } else {
609 |                             "Unexpected end of input"
610 |                         },
611 |                         if e.expected().len() == 0 {
612 |                             "something else".to_string()
613 |                         } else {
614 |                             e.expected()
615 |                                 .map(|expected| match expected {
616 |                                     Some(expected) => expected.to_string(),
617 |                                     None => "end of input".to_string(),
618 |                                 })
619 |                                 .collect::<Vec<_>>()
620 |                                 .join(", ")
621 |                         }
622 |                     ))
623 |                     .with_label(
624 |                         Label::new(e.span())
625 |                             .with_message(format!(
626 |                                 "Unexpected token {}",
627 |                                 e.found()
628 |                                     .unwrap_or(&"end of file".to_string())
629 |                                     .fg(Color::Red)
630 |                             ))
631 |                             .with_color(Color::Red),
632 |                     ),
633 |                 chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label(
634 |                     Label::new(e.span())
635 |                         .with_message(format!("{}", msg.fg(Color::Red)))
636 |                         .with_color(Color::Red),
637 |                 ),
638 |             };
639 | 
640 |             report.finish().print(Source::from(&src)).unwrap();
641 |         });
642 | }
643 | 


--------------------------------------------------------------------------------
/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Chumsky: A Tutorial
  2 | 
  3 | *Please note that this tutorial is kept up to date with the `master` branch and not the most stable release: small
  4 | details may differ!*
  5 | 
  6 | In this tutorial, we'll develop a parser (and interpreter!) for a programming language called 'Foo'.
  7 | 
  8 | Foo is a simple language, but it's enough for us to have some fun. It isn't
  9 | [Turing-complete](https://en.wikipedia.org/wiki/Turing_completeness), but it is complex enough to
 10 | allow us to get to grips with parsing using Chumsky. Here's some sample code written in Foo:
 11 | 
 12 | ```
 13 | let seven = 7;
 14 | fn add x y = x + y;
 15 | add(2, 3) * -seven
 16 | ```
 17 | 
 18 | You can find the source code for the full interpreter in `examples/foo.rs` in the main repository.
 19 | 
 20 | ## Setting up
 21 | 
 22 | Create a new project with `cargo new --bin foo`, add the latest version of Chumsky as a dependency, and place
 23 | the following in your `main.rs`:
 24 | 
 25 | ```rust
 26 | use chumsky::prelude::*;
 27 | 
 28 | fn main() {
 29 |     let src = std::fs::read_to_string(std::env::args().nth(1).unwrap()).unwrap();
 30 | 
 31 |     println!("{}", src);
 32 | }
 33 | ```
 34 | 
 35 | This code is quite simple: it treats the first command-line argument as a path, reads the corresponding file,
 36 | then prints the contents to the terminal.
 37 | 
 38 | Create a file named `test.foo` and run `cargo run -- test.foo` (the `--` tells cargo to pass the remaining
 39 | arguments to the program instead of cargo itself). You should see that the contents of `test.foo`, if any, get
 40 | printed to the console.
 41 | 
 42 | Next, we'll create a data type that represents a program written in Foo. All programs in Foo are expressions,
 43 | so we'll call it `Expr`.
 44 | 
 45 | ```rust
 46 | #[derive(Debug)]
 47 | enum Expr {
 48 |     Num(f64),
 49 |     Var(String),
 50 | 
 51 |     Neg(Box<Expr>),
 52 |     Add(Box<Expr>, Box<Expr>),
 53 |     Sub(Box<Expr>, Box<Expr>),
 54 |     Mul(Box<Expr>, Box<Expr>),
 55 |     Div(Box<Expr>, Box<Expr>),
 56 | 
 57 |     Call(String, Vec<Expr>),
 58 |     Let {
 59 |         name: String,
 60 |         rhs: Box<Expr>,
 61 |         then: Box<Expr>,
 62 |     },
 63 |     Fn {
 64 |         name: String,
 65 |         args: Vec<String>,
 66 |         body: Box<Expr>,
 67 |         then: Box<Expr>,
 68 |     },
 69 | }
 70 | ```
 71 | 
 72 | This is Foo's [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST). It represents
 73 | all possible Foo programs and is defined recursively in terms of itself (`Box` is used to avoid the type being
 74 | infinitely large). Each expression may itself contain sub-expressions.
 75 | 
 76 | We're also going to create a function that creates Foo's parser. Our parser takes in a `char` stream and
 77 | produces an `Expr`, so we'll use those types for the `I` (input) and `O` (output) type parameters.
 78 | 
 79 | ```rust
 80 | fn parser() -> impl Parser<char, Expr, Error = Simple<char>> {
 81 |     // To be filled in later...
 82 | }
 83 | ```
 84 | 
 85 | The `Error` associated type allows us to customise the error type that Chumsky uses. For now, we'll stick to
 86 | `Simple<I>`, a built-in error type that does everything we need.
 87 | 
 88 | In `main`, we'll alter the `println!` as follows:
 89 | 
 90 | ```rust
 91 | println!("{:?}", parser().parse(src));
 92 | ```
 93 | 
 94 | ## Parsing digits
 95 | 
 96 | Chumsky is a 'parser combinator' library. It allows the creation of parsers by combining together many smaller
 97 | parsers. The very smallest parsers are called 'primitives' and live in the
 98 | [`primitive`](https://docs.rs/chumsky/latest/chumsky/primitive/index.html) module.
 99 | 
100 | We're going to want to start by parsing the simplest element of Foo's syntax: numbers.
101 | 
102 | ```rust
103 | // In `parser`...
104 | filter(|c: &char| c.is_ascii_digit())
105 | ```
106 | 
107 | The `filter` primitive allows us to read a single input and accept it if it passes a condition. In our case,
108 | that condition simply checks that the character is a digit.
109 | 
110 | If we compile this code now, we'll encounter an error. Why?
111 | 
112 | Although we promised that our parser would produce an `Expr`, the `filter` primitive only outputs the input
113 | it found. Right now, all we have is a parser from `char` to `char` instead of a parser from `char` to `Expr`!
114 | 
115 | To solve this, we need to crack open the 'combinator' part of parser combinators. We'll use Chumsky's `map`
116 | method to convert the output of the parser to an `Expr`. This method is very similar to its namesake on
117 | `Iterator`.
118 | 
119 | ```rust
120 | filter(|c: &char| c.is_ascii_digit())
121 |     .map(|c| Expr::Num(c.to_digit(10).unwrap() as f64))
122 | ```
123 | 
124 | Here, we're converting the `char` digit to an `f64` (unwrapping is fine: `map` only gets applied to outputs
125 | that successfully parsed!) and then wrapping it in `Expr::Num(_)` to convert it to a Foo expression.
126 | 
127 | Try running the code. You'll see that you can type a digit into `test.foo` and have our interpreter generate
128 | an AST like so:
129 | 
130 | ```
131 | Ok(Num(5.0))
132 | ```
133 | 
134 | ## Parsing numbers
135 | 
136 | If you're more than a little adventurous, you'll quickly notice that typing in a multi-digit number doesn't
137 | quite behave as expected. Inputting `42` will only produce a `Num(4.0)` AST.
138 | 
139 | This is because `filter` only accepts a *single* input. But now another question arises: why did our interpreter
140 | *not* complain at the trailing digits that didn't get parsed?
141 | 
142 | The answer is that Chumsky's parsers are *lazy*: they will consume all of the input that they can and then stop.
143 | If there's any trailing input, it'll be ignored.
144 | 
145 | This is obviously not always desirable. If the user places random nonsense at the end of the file, we want to be
146 | able to generate an error about it! Worse still, that 'nonsense' could be input the user intended to be part of
147 | the program, but that contained a syntax error and so was not properly parsed. How can we force the parser to consume
148 | all of the input?
149 | 
150 | To do this, we can make use of two new parsers: the `then_ignore` combinator and the `end` primitive.
151 | 
152 | ```rust
153 | filter(|c: &char| c.is_ascii_digit())
154 |     .map(|c| Expr::Num(c.to_digit(10).unwrap() as f64))
155 |     .then_ignore(end())
156 | ```
157 | 
158 | The `then_ignore` combinator parses a second pattern after the first, but ignores its output in favour of that of the
159 | first.
160 | 
161 | The `end` primitive succeeds if it encounters only the end of input.
162 | 
163 | Combining these together, we now get an error for longer inputs. Unfortunately, this just reveals another problem
164 | (particularly if you're working on a Unix-like platform): any whitespace before or after our digit will upset our
165 | parser and trigger an error.
166 | 
167 | We can handle whitespace by adding a call to `padded_by` (which ignores a given pattern before and after the first)
168 | after our digit parser, and a repeating filter for any whitespace characters.
169 | 
170 | ```rust
171 | filter(|c: &char| c.is_ascii_digit())
172 |     .map(|c| Expr::Num(c.to_digit(10).unwrap() as f64))
173 |     .padded_by(filter(|c: &char| c.is_whitespace()).repeated())
174 |     .then_ignore(end())
175 | ```
176 | 
177 | This example should have taught you a few important things about Chumsky's parsers:
178 | 
179 | 1. Parsers are lazy: trailing input is ignored
180 | 
181 | 2. Whitespace is not automatically ignored. Chumsky is a general-purpose parsing library, and some languages care very
182 |    much about the structure of whitespace, so Chumsky does too
183 | 
184 | ## Cleaning up and taking shortcuts
185 | 
186 | At this point, things are starting to look a little messy. We've ended up writing 4 lines of code to properly parse a
187 | single digit. Let's clean things up a bit. We'll also make use of a bunch of text-based parser primitives that
188 | come with Chumsky to get rid of some of this cruft.
189 | 
190 | ```rust
191 | let int = text::int(10)
192 |     .map(|s: String| Expr::Num(s.parse().unwrap()))
193 |     .padded();
194 | 
195 | int.then_ignore(end())
196 | ```
197 | 
198 | That's better. We've also swapped out our custom digit parser with a built-in parser that parses any positive
199 | integer.
200 | 
201 | ## Evaluating simple expressions
202 | 
203 | We'll now take a diversion away from the parser to create a function that can evaluate our AST. This is the 'heart' of
204 | our interpreter and is the thing that actually performs the computation of programs.
205 | 
206 | ```rust
207 | fn eval(expr: &Expr) -> Result<f64, String> {
208 |     match expr {
209 |         Expr::Num(x) => Ok(*x),
210 |         Expr::Neg(a) => Ok(-eval(a)?),
211 |         Expr::Add(a, b) => Ok(eval(a)? + eval(b)?),
212 |         Expr::Sub(a, b) => Ok(eval(a)? - eval(b)?),
213 |         Expr::Mul(a, b) => Ok(eval(a)? * eval(b)?),
214 |         Expr::Div(a, b) => Ok(eval(a)? / eval(b)?),
215 |         _ => todo!(), // We'll handle other cases later
216 |     }
217 | }
218 | ```
219 | 
220 | This function is quite simple: it just recursively calls itself, evaluating each node of the AST until it has a final
221 | result. Any runtime errors simply get thrown back down the stack.
222 | 
223 | We'll also change our `main` function a little so that we can pass our AST to `eval`.
224 | 
225 | ```rust
226 | fn main() {
227 |     let src = std::fs::read_to_string(std::env::args().nth(1).unwrap()).unwrap();
228 | 
229 |     match parser().parse(src) {
230 |         Ok(ast) => match eval(&ast) {
231 |             Ok(output) => println!("{}", output),
232 |             Err(eval_err) => println!("Evaluation error: {}", eval_err),
233 |         },
234 |         Err(parse_errs) => parse_errs
235 |             .into_iter()
236 |             .for_each(|e| println!("Parse error: {}", e)),
237 |     }
238 | }
239 | ```
240 | 
241 | This looks like a big change, but it's actually quite simple. We're just taking the result of the parse, printing
242 | errors if they occured, or evaluating the AST otherwise. We'll allow for some evaluation operations to produce
243 | runtime errors later.
244 | 
245 | ## Parsing unary operators
246 | 
247 | Jumping back to our parser, let's handle unary operators. Currently, our only unary operator is `-`, the negation
248 | operator. We're looking to parse any number of `-`, followed by a number. More formally:
249 | 
250 | ```
251 | expr = op* + int
252 | ```
253 | 
254 | We'll also give our `int` parser a new name, 'atom', for reasons that will become clear later.
255 | 
256 | ```rust
257 | let int = text::int(10)
258 |     .map(|s: String| Expr::Num(s.parse().unwrap()))
259 |     .padded();
260 | 
261 | let atom = int;
262 | 
263 | let op = |c| just(c).padded();
264 | 
265 | let unary = op('-')
266 |     .repeated()
267 |     .then(atom)
268 |     .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
269 | 
270 | unary.then_ignore(end())
271 | ```
272 | 
273 | Here, we meet a few new combinators:
274 | 
275 | - `repeated` will parse a given pattern any number of times (including zero!), collecting the outputs into a `Vec`
276 | 
277 | - `then` will parse one pattern and then another immediately afterwards, collecting both outputs into a tuple pair
278 | 
279 | - `foldr` will take an output of the form `(Vec<T>, U)` and will fold it into a single `U` by repeatedly applying
280 |   the given function to each element of the `Vec<T>`
281 | 
282 | This last combinator is worth a little more consideration. We're trying to parse *any number* of negation operators,
283 | followed by a single atom (for now, just a number). This might give us an output like this:
284 | 
285 | ```rust
286 | (['-', '-', '-'], Num(42.0))
287 | ```
288 | 
289 | The `foldr` function repeatedly applies the function to 'fold' the elements into a single element, like so:
290 | 
291 | ```
292 | ['-',   '-',   '-'],   Num(42.0)
293 |   |      |      |          |
294 |   |      |       \        /
295 |   |      |     Neg(Num(42.0))
296 |   |      |         |
297 |   |       \       /
298 |   |  Neg(Neg(Num(42.0)))
299 |   |          |
300 |    \        /
301 | Neg(Neg(Neg(Num(42.0))))
302 | ```
303 | 
304 | This may be a little hard to conceptualise for those used to imperative programming, but for functional programmers
305 | it should come naturally: `foldr` is just equivalent to `reduce`!
306 | 
307 | Give the interpreter a try. You'll be able to enter inputs as before, but also values like `-17`. You can even apply
308 | the negation operator multiple times: `--9` will yield a value of `9` in the command line.
309 | 
310 | This is exciting: we've finally started to see our interpreter perform useful (sort of) computations!
311 | 
312 | ## Parsing binary operators
313 | 
314 | Let's keep the momentum going and move over to binary operators. Traditionally, these pose quite a problem for
315 | parsers. To parse an expression like `3 + 4 * 2`, it's necessary to understand that multiplication
316 | [binds more eagerly than addition](https://en.wikipedia.org/wiki/Order_of_operations) and hence is applied first.
317 | Therefore, the result of this expression is `11` and not `14`.
318 | 
319 | Parsers employ a range of strategies to handle these cases, but for Chumsky things are simple: the most eagerly binding
320 | (highest 'precedence') operators should be those that get considered first when parsing.
321 | 
322 | It's worth noting that summation operators (`+` and `-`) are typically considered to have the *same* precedence as
323 | one-another. The same also applies to product operators (`*` and `/`). For this reason, we treat each group as a single
324 | pattern.
325 | 
326 | At each stage, we're looking for a simple pattern: a unary expression, following by any number of a combination of an
327 | operator and a unary expression. More formally:
328 | 
329 | ```
330 | expr = unary + (op + unary)*
331 | ```
332 | 
333 | Let's expand our parser.
334 | 
335 | ```rust
336 | let int = text::int(10)
337 |     .map(|s: String| Expr::Num(s.parse().unwrap()))
338 |     .padded();
339 | 
340 | let atom = int;
341 | 
342 | let op = |c| just(c).padded();
343 | 
344 | let unary = op('-')
345 |     .repeated()
346 |     .then(atom)
347 |     .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
348 | 
349 | let product = unary.clone()
350 |     .then(op('*').to(Expr::Mul as fn(_, _) -> _)
351 |         .or(op('/').to(Expr::Div as fn(_, _) -> _))
352 |         .then(unary)
353 |         .repeated())
354 |     .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
355 | 
356 | let sum = product.clone()
357 |     .then(op('+').to(Expr::Add as fn(_, _) -> _)
358 |         .or(op('-').to(Expr::Sub as fn(_, _) -> _))
359 |         .then(product)
360 |         .repeated())
361 |     .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
362 | 
363 | sum.then_ignore(end())
364 | ```
365 | 
366 | The `Expr::Mul as fn(_, _) -> _` syntax might look a little unfamiliar, but don't worry! In Rust,
367 | [tuple enum variants are implicitly functions](https://stackoverflow.com/questions/54802045/what-is-this-strange-syntax-where-an-enum-variant-is-used-as-a-function).
368 | All we're doing here is making sure that Rust treats each of them as if they had the same type using the `as` cast, and
369 | then letting type inference do the rest. Those functions then get passed through the internals of the parser and end up
370 | in `op` within the `foldl` call.
371 | 
372 | Another three combinators are introduced here:
373 | 
374 | - `or` attempts to parse a pattern and, if unsuccessful, instead attempts another pattern
375 | 
376 | - `to` is similar to `map`, but instead of mapping the output, entirely overrides the output with a new value. In our
377 |   case, we use it to convert each binary operator to a function that produces the relevant AST node for that operator.
378 | 
379 | - `foldl` is very similar to `foldr` in the last section but, instead of operating on a `(Vec<_>, _)`, it operates
380 |   upon a `(_, Vec<_>)`, going backwards to combine values together with the function
381 | 
382 | Give the interpreter a try. You should find that the interpreter can correctly handle both unary and binary operations
383 | combined in arbitrary configurations, correctly handling precedence. You can use it as a calculator!
384 | 
385 | ## Parsing parentheses
386 | 
387 | A new challenger approaches: *nested expressions*. Sometimes, we want to override the default operator precedence rules
388 | entirely. We can do this by nesting expressions within parentheses, like `(3 + 4) * 2`. How do we handle this?
389 | 
390 | The creation of the `atom` pattern a few sections before was no accident: parentheses have a greater precedence than
391 | any operator, so we should treat a parenthesised expression as if it were equivalent to a single value. We call things
392 | that behave like single values 'atoms' by convention.
393 | 
394 | We're going to hoist our entire parser up into a closure, allowing us to define it in terms of itself.
395 | 
396 | ```rust
397 | recursive(|expr| {
398 |     let int = text::int(10)
399 |         .map(|s: String| Expr::Num(s.parse().unwrap()))
400 |         .padded();
401 | 
402 |     let atom = int
403 |         .or(expr.delimited_by(just('('), just(')')));
404 | 
405 |     let op = |c| just(c).padded();
406 | 
407 |     let unary = op('-')
408 |         .repeated()
409 |         .then(atom)
410 |         .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
411 | 
412 |     let product = unary.clone()
413 |         .then(op('*').to(Expr::Mul as fn(_, _) -> _)
414 |             .or(op('/').to(Expr::Div as fn(_, _) -> _))
415 |             .then(unary)
416 |             .repeated())
417 |         .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
418 | 
419 |     let sum = product.clone()
420 |         .then(op('+').to(Expr::Add as fn(_, _) -> _)
421 |             .or(op('-').to(Expr::Sub as fn(_, _) -> _))
422 |             .then(product)
423 |             .repeated())
424 |         .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
425 | 
426 |     sum.padded()
427 | })
428 |     .then_ignore(end())
429 | ```
430 | 
431 | There are a few things worth paying attention to here.
432 | 
433 | 1. `recursive` allows us to define a parser recursively in terms of itself by giving us a copy of it within the
434 |    closure's scope
435 | 
436 | 2. We use the recursive definition of `expr` within the definition of `atom`. We use the new `delimited_by` combinator
437 |    to allow it to sit nested within a pair of parentheses
438 | 
439 | 3. The `then_ignore(end())` call has *not* been hoisted inside the `recursive` call. This is because we only want to
440 |    parse an end of input on the outermost expression, not at every level of nesting
441 | 
442 | Try running the interpreter. You'll find that it can handle a surprising number of cases elegantly. Make sure that the
443 | following cases work correctly:
444 | 
445 | | Expression    | Expected result |
446 | |---------------|-----------------|
447 | | `3 * 4 + 2`   | `14`            |
448 | | `3 * (4 + 2)` | `18`            |
449 | | `-4 + 2`      | `-2`            |
450 | | `-(4 + 2)`    | `-6`            |
451 | 
452 | ## Parsing lets
453 | 
454 | Our next step is to handle `let`. Unlike Rust and other imperative languages, `let` in Foo is an expression and not an
455 | statement (Foo has no statements) that takes the following form:
456 | 
457 | ```
458 | let <ident> = <expr>; <expr>
459 | ```
460 | 
461 | We only want `let`s to appear at the outermost level of the expression, so we leave it out of the original recursive
462 | expression definition. However, we also want to be able to chain `let`s together, so we put them in their own recursive
463 | definition. We call it `decl` ('declaration') because we're eventually going to be adding `fn` syntax too.
464 | 
465 | ```rust
466 | let ident = text::ident()
467 |     .padded();
468 | 
469 | let expr = recursive(|expr| {
470 |     let int = text::int(10)
471 |         .map(|s: String| Expr::Num(s.parse().unwrap()))
472 |         .padded();
473 | 
474 |     let atom = int
475 |         .or(expr.delimited_by(just('('), just(')')))
476 |         .or(ident.map(Expr::Var));
477 | 
478 |     let op = |c| just(c).padded();
479 | 
480 |     let unary = op('-')
481 |         .repeated()
482 |         .then(atom)
483 |         .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
484 | 
485 |     let product = unary.clone()
486 |         .then(op('*').to(Expr::Mul as fn(_, _) -> _)
487 |             .or(op('/').to(Expr::Div as fn(_, _) -> _))
488 |             .then(unary)
489 |             .repeated())
490 |         .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
491 | 
492 |     let sum = product.clone()
493 |         .then(op('+').to(Expr::Add as fn(_, _) -> _)
494 |             .or(op('-').to(Expr::Sub as fn(_, _) -> _))
495 |             .then(product)
496 |             .repeated())
497 |         .foldl(|lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)));
498 | 
499 |     sum.padded()
500 | });
501 | 
502 | let decl = recursive(|decl| {
503 |     let r#let = text::keyword("let")
504 |         .ignore_then(ident)
505 |         .then_ignore(just('='))
506 |         .then(expr.clone())
507 |         .then_ignore(just(';'))
508 |         .then(decl)
509 |         .map(|((name, rhs), then)| Expr::Let {
510 |             name,
511 |             rhs: Box::new(rhs),
512 |             then: Box::new(then),
513 |         });
514 | 
515 |     r#let
516 |         // Must be later in the chain than `r#let` to avoid ambiguity
517 |         .or(expr)
518 |         .padded()
519 | });
520 | 
521 | decl
522 |     .then_ignore(end())
523 | ```
524 | 
525 | `keyword` is simply a parser that looks for an exact identifier (i.e: it doesn't match identifiers that only start with
526 | a keyword).
527 | 
528 | Other than that, there's nothing in the definition of `r#let` that you haven't seen before: familiar combinators, but
529 | combined in different ways. It selectively ignores parts of the syntax that we don't care about after validating that
530 | it exists, then uses those elements that it does care about to create an `Expr::Let` AST node.
531 | 
532 | Another thing to note is that the definition of `ident` will parse `"let"`. To avoid the parser accidentally deciding
533 | that `"let"` is a variable, we place `r#let` earlier in the or chain than `expr` so that it prioritises the correct
534 | interpretation. As mentioned in previous sections, Chumsky handles ambiguity simply by choosing the first successful
535 | parse it encounters, so making sure that we declare things in the right order can sometimes be important.
536 | 
537 | You should now be able to run the interpreter and have it accept an input such as
538 | 
539 | ```
540 | let five = 5;
541 | five * 3
542 | ```
543 | 
544 | Unfortunately, the `eval` function will panic because we've not yet handled `Expr::Var` or `Expr::Let`. Let's do that
545 | now.
546 | 
547 | ```rust
548 | fn eval<'a>(expr: &'a Expr, vars: &mut Vec<(&'a String, f64)>) -> Result<f64, String> {
549 |     match expr {
550 |         Expr::Num(x) => Ok(*x),
551 |         Expr::Neg(a) => Ok(-eval(a, vars)?),
552 |         Expr::Add(a, b) => Ok(eval(a, vars)? + eval(b, vars)?),
553 |         Expr::Sub(a, b) => Ok(eval(a, vars)? - eval(b, vars)?),
554 |         Expr::Mul(a, b) => Ok(eval(a, vars)? * eval(b, vars)?),
555 |         Expr::Div(a, b) => Ok(eval(a, vars)? / eval(b, vars)?),
556 |         Expr::Var(name) => if let Some((_, val)) = vars.iter().rev().find(|(var, _)| *var == name) {
557 |             Ok(*val)
558 |         } else {
559 |             Err(format!("Cannot find variable `{}` in scope", name))
560 |         },
561 |         Expr::Let { name, rhs, then } => {
562 |             let rhs = eval(rhs, vars)?;
563 |             vars.push((name, rhs));
564 |             let output = eval(then, vars);
565 |             vars.pop();
566 |             output
567 |         },
568 |         _ => todo!(),
569 |     }
570 | }
571 | ```
572 | 
573 | Woo! That got a bit more complicated. Don't fear, there are only 3 important changes:
574 | 
575 | 1. Because we need to keep track of variables that were previously defined, we use a `Vec` to remember them. Because
576 |    `eval` is a recursive function, we also need to pass is to all recursive calls.
577 | 
578 | 2. When we encounter an `Expr::Let`, we first evaluate the right-hand side (`rhs`). Once evaluated, we push it to the
579 |    `vars` stack and evaluate the trailing `then` expression (i.e: all of the remaining code that appears after the
580 |    semicolon). Popping it afterwards is not *technically* necessary because Foo does not permit nested declarations,
581 |    but we do it anyway because it's good practice and it's what we'd want to do if we ever decided to add nesting.
582 | 
583 | 3. When we encounter an `Expr::Var` (i.e: an inline variable) we search the stack *backwards* (because Foo permits
584 |    [variable shadowing](https://en.wikipedia.org/wiki/Variable_shadowing) and we only want to find the most recently
585 |    declared variable with the same name) to find the variables's value. If we can't find a variable of that name, we
586 |    generate a runtime error which gets propagated back up the stack.
587 | 
588 | Obviously, the signature of `eval` has changed so we'll update the call in `main` to become:
589 | 
590 | ```rust
591 | eval(&ast, &mut Vec::new())
592 | ```
593 | 
594 | Make sure to test the interpreter. Try experimenting with `let` declarations to make sure things aren't broken. In
595 | particular, it's worth testing variable shadowing by ensuring that the following program produces `8`:
596 | 
597 | ```
598 | let x = 5;
599 | let x = 3 + x;
600 | x
601 | ```
602 | 
603 | ## Parsing functions
604 | 
605 | We're almost at a complete implementation of Foo. There's just one thing left: *functions*.
606 | 
607 | Surprisingly, parsing functions is the easy part. All we need to modify is the definition of `decl` to add `r#fn`. It
608 | looks very much like the existing definition of `r#let`:
609 | 
610 | ```rust
611 | let decl = recursive(|decl| {
612 |     let r#let = text::keyword("let")
613 |         .ignore_then(ident)
614 |         .then_ignore(just('='))
615 |         .then(expr.clone())
616 |         .then_ignore(just(';'))
617 |         .then(decl.clone())
618 |         .map(|((name, rhs), then)| Expr::Let {
619 |             name,
620 |             rhs: Box::new(rhs),
621 |             then: Box::new(then),
622 |         });
623 | 
624 |     let r#fn = text::keyword("fn")
625 |         .ignore_then(ident)
626 |         .then(ident.repeated())
627 |         .then_ignore(just('='))
628 |         .then(expr.clone())
629 |         .then_ignore(just(';'))
630 |         .then(decl)
631 |         .map(|(((name, args), body), then)| Expr::Fn {
632 |             name,
633 |             args,
634 |             body: Box::new(body),
635 |             then: Box::new(then),
636 |         });
637 | 
638 |     r#let
639 |         .or(r#fn)
640 |         .or(expr)
641 |         .padded()
642 | });
643 | ```
644 | 
645 | There's nothing new here, you understand this all already.
646 | 
647 | Obviously, we also need to add support for *calling* functions by modifying `atom`:
648 | 
649 | ```rust
650 | let call = ident
651 |     .then(expr.clone()
652 |         .separated_by(just(','))
653 |         .allow_trailing() // Foo is Rust-like, so allow trailing commas to appear in arg lists
654 |         .delimited_by(just('('), just(')')))
655 |     .map(|(f, args)| Expr::Call(f, args));
656 | 
657 | let atom = int
658 |     .or(expr.delimited_by(just('('), just(')')))
659 |     .or(call)
660 |     .or(ident.map(Expr::Var));
661 | ```
662 | 
663 | The only new combinator here is `separated_by` which behaves like `repeated`, but requires a separator pattern between
664 | each element. It has a method called `allow_trailing` which allows for parsing a trailing separator at the end of the
665 | elements.
666 | 
667 | Next, we modify our `eval` function to support a function stack.
668 | 
669 | ```rust
670 | fn eval<'a>(
671 |     expr: &'a Expr,
672 |     vars: &mut Vec<(&'a String, f64)>,
673 |     funcs: &mut Vec<(&'a String, &'a [String], &'a Expr)>,
674 | ) -> Result<f64, String> {
675 |     match expr {
676 |         Expr::Num(x) => Ok(*x),
677 |         Expr::Neg(a) => Ok(-eval(a, vars, funcs)?),
678 |         Expr::Add(a, b) => Ok(eval(a, vars, funcs)? + eval(b, vars, funcs)?),
679 |         Expr::Sub(a, b) => Ok(eval(a, vars, funcs)? - eval(b, vars, funcs)?),
680 |         Expr::Mul(a, b) => Ok(eval(a, vars, funcs)? * eval(b, vars, funcs)?),
681 |         Expr::Div(a, b) => Ok(eval(a, vars, funcs)? / eval(b, vars, funcs)?),
682 |         Expr::Var(name) => if let Some((_, val)) = vars.iter().rev().find(|(var, _)| *var == name) {
683 |             Ok(*val)
684 |         } else {
685 |             Err(format!("Cannot find variable `{}` in scope", name))
686 |         },
687 |         Expr::Let { name, rhs, then } => {
688 |             let rhs = eval(rhs, vars, funcs)?;
689 |             vars.push((name, rhs));
690 |             let output = eval(then, vars, funcs);
691 |             vars.pop();
692 |             output
693 |         },
694 |         Expr::Call(name, args) => if let Some((_, arg_names, body)) = funcs
695 |             .iter()
696 |             .rev()
697 |             .find(|(var, _, _)| *var == name)
698 |             .copied()
699 |         {
700 |             if arg_names.len() == args.len() {
701 |                 let mut args = args
702 |                     .iter()
703 |                     .map(|arg| eval(arg, vars, funcs))
704 |                     .zip(arg_names.iter())
705 |                     .map(|(val, name)| Ok((name, val?)))
706 |                     .collect::<Result<_, String>>()?;
707 |                 vars.append(&mut args);
708 |                 let output = eval(body, vars, funcs);
709 |                 vars.truncate(vars.len() - args.len());
710 |                 output
711 |             } else {
712 |                 Err(format!(
713 |                     "Wrong number of arguments for function `{}`: expected {}, found {}",
714 |                     name,
715 |                     arg_names.len(),
716 |                     args.len(),
717 |                 ))
718 |             }
719 |         } else {
720 |             Err(format!("Cannot find function `{}` in scope", name))
721 |         },
722 |         Expr::Fn { name, args, body, then } => {
723 |             funcs.push((name, args, body));
724 |             let output = eval(then, vars, funcs);
725 |             funcs.pop();
726 |             output
727 |         },
728 |     }
729 | }
730 | ```
731 | 
732 | Another big change! On closer inspection, however, this looks a lot like the change we made previously when we added
733 | support for `let` declarations. Whenever we encounter an `Expr::Fn`, we just push the function to the `funcs` stack and
734 | continue. Whenever we encounter an `Expr::Call`, we search the function stack backwards, as we did for variables, and
735 | then execute the body of the function (making sure to evaluate and push the arguments!).
736 | 
737 | As before, we'll need to change the `eval` call in `main` to:
738 | 
739 | ```rust
740 | eval(&ast, &mut Vec::new(), &mut Vec::new())
741 | ```
742 | 
743 | Give the interpreter a test - see what you can do with it! Here's an example program to get you started:
744 | 
745 | ```
746 | let five = 5;
747 | let eight = 3 + five;
748 | fn add x y = x + y;
749 | add(five, eight)
750 | ```
751 | 
752 | ## Conclusion
753 | 
754 | Here ends our exploration into Chumsky's API. We only scratched the surface of what Chumsky can do, but now you'll need
755 | to rely on the examples in the repository and the API doc examples for further help. Nonetheless, I hope it was an
756 | interesting foray into the use of parser combinators for the development of parsers.
757 | 
758 | If nothing else, you've now got a neat little calculator language to play with.
759 | 
760 | Interestingly, there is a subtle bug in Foo's `eval` function that produces unexpected scoping behaviour with function
761 | calls. I'll leave finding it as an exercise for the reader.
762 | 
763 | ## Extension tasks
764 | 
765 | - Find the interesting function scoping bug and consider how it could be fixed
766 | 
767 | - Split token lexing into a separate compilation stage to avoid the need for `.padded()` in the parser
768 | 
769 | - Add more operators
770 | 
771 | - Add an `if <expr> then <expr> else <expr>` ternary operator
772 | 
773 | - Add values of different types by turning `f64` into an `enum`
774 | 
775 | - Add lambdas to the language
776 | 
777 | - Format the error message in a more useful way, perhaps by providing a reference to the original code
778 | 


--------------------------------------------------------------------------------
/src/primitive.rs:
--------------------------------------------------------------------------------
  1 | //! Parser primitives that accept specific token patterns.
  2 | //!
  3 | //! *“These creatures you call mice, you see, they are not quite as they appear. They are merely the protrusion into
  4 | //! our dimension of vastly hyperintelligent pandimensional beings.”*
  5 | //!
  6 | //! Chumsky parsers are created by combining together smaller parsers. Right at the bottom of the pile are the parser
  7 | //! primitives, a parser developer's bread & butter. Each of these primitives are very easy to understand in isolation,
  8 | //! usually only doing one thing.
  9 | //!
 10 | //! ## The Important Ones
 11 | //!
 12 | //! - [`just`]: parses a specific input or sequence of inputs
 13 | //! - [`filter`]: parses a single input, if the given filter function returns `true`
 14 | //! - [`end`]: parses the end of input (i.e: if there any more inputs, this parse fails)
 15 | 
 16 | use super::*;
 17 | 
 18 | /// See [`custom`].
 19 | pub struct Custom<F, E>(F, PhantomData<E>);
 20 | 
 21 | impl<F: Copy, E> Copy for Custom<F, E> {}
 22 | impl<F: Clone, E> Clone for Custom<F, E> {
 23 |     fn clone(&self) -> Self {
 24 |         Self(self.0.clone(), PhantomData)
 25 |     }
 26 | }
 27 | 
 28 | impl<I: Clone, O, F: Fn(&mut StreamOf<I, E>) -> PResult<I, O, E>, E: Error<I>> Parser<I, O>
 29 |     for Custom<F, E>
 30 | {
 31 |     type Error = E;
 32 | 
 33 |     fn parse_inner<D: Debugger>(
 34 |         &self,
 35 |         _debugger: &mut D,
 36 |         stream: &mut StreamOf<I, E>,
 37 |     ) -> PResult<I, O, E> {
 38 |         (self.0)(stream)
 39 |     }
 40 | 
 41 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
 42 |         #[allow(deprecated)]
 43 |         self.parse_inner(d, s)
 44 |     }
 45 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
 46 |         #[allow(deprecated)]
 47 |         self.parse_inner(d, s)
 48 |     }
 49 | }
 50 | 
 51 | /// A parser primitive that allows you to define your own custom parsers.
 52 | ///
 53 | /// In theory you shouldn't need to use this unless you have particularly bizarre requirements, but it's a cleaner and
 54 | //// more sustainable alternative to implementing [`Parser`] by hand.
 55 | ///
 56 | /// The output type of this parser is determined by the parse result of the function.
 57 | pub fn custom<F, E>(f: F) -> Custom<F, E> {
 58 |     Custom(f, PhantomData)
 59 | }
 60 | 
 61 | /// See [`end`].
 62 | pub struct End<E>(PhantomData<E>);
 63 | 
 64 | impl<E> Clone for End<E> {
 65 |     fn clone(&self) -> Self {
 66 |         Self(PhantomData)
 67 |     }
 68 | }
 69 | 
 70 | impl<I: Clone, E: Error<I>> Parser<I, ()> for End<E> {
 71 |     type Error = E;
 72 | 
 73 |     fn parse_inner<D: Debugger>(
 74 |         &self,
 75 |         _debugger: &mut D,
 76 |         stream: &mut StreamOf<I, E>,
 77 |     ) -> PResult<I, (), E> {
 78 |         match stream.next() {
 79 |             (_, _, None) => (Vec::new(), Ok(((), None))),
 80 |             (at, span, found) => (
 81 |                 Vec::new(),
 82 |                 Err(Located::at(
 83 |                     at,
 84 |                     E::expected_input_found(span, Some(None), found),
 85 |                 )),
 86 |             ),
 87 |         }
 88 |     }
 89 | 
 90 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
 91 |         #[allow(deprecated)]
 92 |         self.parse_inner(d, s)
 93 |     }
 94 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
 95 |         #[allow(deprecated)]
 96 |         self.parse_inner(d, s)
 97 |     }
 98 | }
 99 | 
100 | /// A parser that accepts only the end of input.
101 | ///
102 | /// This parser is very useful when you wish to force a parser to consume *all* of the input. It is typically combined
103 | /// with [`Parser::then_ignore`].
104 | ///
105 | /// The output type of this parser is `()`.
106 | ///
107 | /// # Examples
108 | ///
109 | /// ```
110 | /// # use chumsky::prelude::*;
111 | /// assert_eq!(end::<Simple<char>>().parse(""), Ok(()));
112 | /// assert!(end::<Simple<char>>().parse("hello").is_err());
113 | /// ```
114 | ///
115 | /// ```
116 | /// # use chumsky::prelude::*;
117 | /// let digits = text::digits::<_, Simple<char>>(10);
118 | ///
119 | /// // This parser parses digits!
120 | /// assert_eq!(digits.parse("1234"), Ok("1234".to_string()));
121 | ///
122 | /// // However, parsers are lazy and do not consume trailing input.
123 | /// // This can be inconvenient if we want to validate all of the input.
124 | /// assert_eq!(digits.parse("1234AhasjADSJAlaDJKSDAK"), Ok("1234".to_string()));
125 | ///
126 | /// // To fix this problem, we require that the end of input follows any successfully parsed input
127 | /// let only_digits = digits.then_ignore(end());
128 | ///
129 | /// // Now our parser correctly produces an error if any trailing input is found...
130 | /// assert!(only_digits.parse("1234AhasjADSJAlaDJKSDAK").is_err());
131 | /// // ...while still behaving correctly for inputs that only consist of valid patterns
132 | /// assert_eq!(only_digits.parse("1234"), Ok("1234".to_string()));
133 | /// ```
134 | pub fn end<E>() -> End<E> {
135 |     End(PhantomData)
136 | }
137 | 
138 | mod private {
139 |     pub trait Sealed<T> {}
140 | 
141 |     impl<T> Sealed<T> for T {}
142 |     impl Sealed<char> for alloc::string::String {}
143 |     impl<'a> Sealed<char> for &'a str {}
144 |     impl<'a, T> Sealed<T> for &'a [T] {}
145 |     impl<T, const N: usize> Sealed<T> for [T; N] {}
146 |     impl<'a, T, const N: usize> Sealed<T> for &'a [T; N] {}
147 |     impl<T> Sealed<T> for alloc::vec::Vec<T> {}
148 |     impl<T> Sealed<T> for alloc::collections::LinkedList<T> {}
149 |     impl<T> Sealed<T> for alloc::collections::VecDeque<T> {}
150 |     impl<T> Sealed<T> for alloc::collections::BTreeSet<T> {}
151 |     impl<T> Sealed<T> for alloc::collections::BinaryHeap<T> {}
152 | 
153 |     #[cfg(feature = "std")]
154 |     impl<T> Sealed<T> for std::collections::HashSet<T> {}
155 |     #[cfg(not(feature = "std"))]
156 |     impl<T> Sealed<T> for hashbrown::HashSet<T> {}
157 | }
158 | 
159 | /// A utility trait to abstract over linear container-like things.
160 | ///
161 | /// This trait is likely to change in future versions of the crate, so avoid implementing it yourself.
162 | pub trait Container<T>: private::Sealed<T> {
163 |     /// An iterator over the items within this container, by value.
164 |     type Iter: Iterator<Item = T>;
165 |     /// Iterate over the elements of the container (using internal iteration because GATs are unstable).
166 |     fn get_iter(&self) -> Self::Iter;
167 | }
168 | 
169 | impl<T: Clone> Container<T> for T {
170 |     type Iter = core::iter::Once<T>;
171 |     fn get_iter(&self) -> Self::Iter {
172 |         core::iter::once(self.clone())
173 |     }
174 | }
175 | 
176 | impl Container<char> for String {
177 |     type Iter = alloc::vec::IntoIter<char>;
178 |     fn get_iter(&self) -> Self::Iter {
179 |         self.chars().collect::<Vec<_>>().into_iter()
180 |     }
181 | }
182 | 
183 | impl<'a> Container<char> for &'a str {
184 |     type Iter = alloc::str::Chars<'a>;
185 |     fn get_iter(&self) -> Self::Iter {
186 |         self.chars()
187 |     }
188 | }
189 | 
190 | impl<'a, T: Clone> Container<T> for &'a [T] {
191 |     type Iter = core::iter::Cloned<core::slice::Iter<'a, T>>;
192 |     fn get_iter(&self) -> Self::Iter {
193 |         self.iter().cloned()
194 |     }
195 | }
196 | 
197 | impl<'a, T: Clone, const N: usize> Container<T> for &'a [T; N] {
198 |     type Iter = core::iter::Cloned<core::slice::Iter<'a, T>>;
199 |     fn get_iter(&self) -> Self::Iter {
200 |         self.iter().cloned()
201 |     }
202 | }
203 | 
204 | impl<T: Clone, const N: usize> Container<T> for [T; N] {
205 |     type Iter = core::array::IntoIter<T, N>;
206 |     fn get_iter(&self) -> Self::Iter {
207 |         core::array::IntoIter::new(self.clone())
208 |     }
209 | }
210 | 
211 | impl<T: Clone> Container<T> for Vec<T> {
212 |     type Iter = alloc::vec::IntoIter<T>;
213 |     fn get_iter(&self) -> Self::Iter {
214 |         self.clone().into_iter()
215 |     }
216 | }
217 | 
218 | impl<T: Clone> Container<T> for alloc::collections::LinkedList<T> {
219 |     type Iter = alloc::collections::linked_list::IntoIter<T>;
220 |     fn get_iter(&self) -> Self::Iter {
221 |         self.clone().into_iter()
222 |     }
223 | }
224 | 
225 | impl<T: Clone> Container<T> for alloc::collections::VecDeque<T> {
226 |     type Iter = alloc::collections::vec_deque::IntoIter<T>;
227 |     fn get_iter(&self) -> Self::Iter {
228 |         self.clone().into_iter()
229 |     }
230 | }
231 | 
232 | #[cfg(feature = "std")]
233 | impl<T: Clone> Container<T> for std::collections::HashSet<T> {
234 |     type Iter = std::collections::hash_set::IntoIter<T>;
235 |     fn get_iter(&self) -> Self::Iter {
236 |         self.clone().into_iter()
237 |     }
238 | }
239 | 
240 | #[cfg(not(feature = "std"))]
241 | impl<T: Clone> Container<T> for hashbrown::HashSet<T> {
242 |     type Iter = hashbrown::hash_set::IntoIter<T>;
243 |     fn get_iter(&self) -> Self::Iter {
244 |         self.clone().into_iter()
245 |     }
246 | }
247 | 
248 | impl<T: Clone> Container<T> for alloc::collections::BTreeSet<T> {
249 |     type Iter = alloc::collections::btree_set::IntoIter<T>;
250 |     fn get_iter(&self) -> Self::Iter {
251 |         self.clone().into_iter()
252 |     }
253 | }
254 | 
255 | impl<T: Clone> Container<T> for alloc::collections::BinaryHeap<T> {
256 |     type Iter = alloc::collections::binary_heap::IntoIter<T>;
257 |     fn get_iter(&self) -> Self::Iter {
258 |         self.clone().into_iter()
259 |     }
260 | }
261 | 
262 | /// See [`just`].
263 | pub struct Just<I, C: Container<I>, E>(C, PhantomData<(I, E)>);
264 | 
265 | impl<I, C: Copy + Container<I>, E> Copy for Just<I, C, E> {}
266 | impl<I, C: Clone + Container<I>, E> Clone for Just<I, C, E> {
267 |     fn clone(&self) -> Self {
268 |         Self(self.0.clone(), PhantomData)
269 |     }
270 | }
271 | 
272 | impl<I: Clone + PartialEq, C: Container<I> + Clone, E: Error<I>> Parser<I, C> for Just<I, C, E> {
273 |     type Error = E;
274 | 
275 |     fn parse_inner<D: Debugger>(
276 |         &self,
277 |         _debugger: &mut D,
278 |         stream: &mut StreamOf<I, E>,
279 |     ) -> PResult<I, C, E> {
280 |         for expected in self.0.get_iter() {
281 |             match stream.next() {
282 |                 (_, _, Some(tok)) if tok == expected => {}
283 |                 (at, span, found) => {
284 |                     return (
285 |                         Vec::new(),
286 |                         Err(Located::at(
287 |                             at,
288 |                             E::expected_input_found(span, Some(Some(expected)), found),
289 |                         )),
290 |                     )
291 |                 }
292 |             }
293 |         }
294 | 
295 |         (Vec::new(), Ok((self.0.clone(), None)))
296 |     }
297 | 
298 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, C, E> {
299 |         #[allow(deprecated)]
300 |         self.parse_inner(d, s)
301 |     }
302 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, C, E> {
303 |         #[allow(deprecated)]
304 |         self.parse_inner(d, s)
305 |     }
306 | }
307 | 
308 | /// A parser that accepts only the given input.
309 | ///
310 | /// The output type of this parser is `C`, the input or sequence that was provided.
311 | ///
312 | /// # Examples
313 | ///
314 | /// ```
315 | /// # use chumsky::{prelude::*, error::Cheap};
316 | /// let question = just::<_, _, Cheap<char>>('?');
317 | ///
318 | /// assert_eq!(question.parse("?"), Ok('?'));
319 | /// assert!(question.parse("!").is_err());
320 | /// // This works because parsers do not eagerly consume input, so the '!' is not parsed
321 | /// assert_eq!(question.parse("?!"), Ok('?'));
322 | /// // This fails because the parser expects an end to the input after the '?'
323 | /// assert!(question.then(end()).parse("?!").is_err());
324 | /// ```
325 | pub fn just<I, C: Container<I>, E: Error<I>>(inputs: C) -> Just<I, C, E> {
326 |     Just(inputs, PhantomData)
327 | }
328 | 
329 | /// See [`seq`].
330 | pub struct Seq<I, E>(Vec<I>, PhantomData<E>);
331 | 
332 | impl<I: Clone, E> Clone for Seq<I, E> {
333 |     fn clone(&self) -> Self {
334 |         Self(self.0.clone(), PhantomData)
335 |     }
336 | }
337 | 
338 | impl<I: Clone + PartialEq, E: Error<I>> Parser<I, ()> for Seq<I, E> {
339 |     type Error = E;
340 | 
341 |     fn parse_inner<D: Debugger>(
342 |         &self,
343 |         _debugger: &mut D,
344 |         stream: &mut StreamOf<I, E>,
345 |     ) -> PResult<I, (), E> {
346 |         for expected in &self.0 {
347 |             match stream.next() {
348 |                 (_, _, Some(tok)) if &tok == expected => {}
349 |                 (at, span, found) => {
350 |                     return (
351 |                         Vec::new(),
352 |                         Err(Located::at(
353 |                             at,
354 |                             E::expected_input_found(span, Some(Some(expected.clone())), found),
355 |                         )),
356 |                     )
357 |                 }
358 |             }
359 |         }
360 | 
361 |         (Vec::new(), Ok(((), None)))
362 |     }
363 | 
364 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
365 |         #[allow(deprecated)]
366 |         self.parse_inner(d, s)
367 |     }
368 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
369 |         #[allow(deprecated)]
370 |         self.parse_inner(d, s)
371 |     }
372 | }
373 | 
374 | /// A parser that accepts only a sequence of specific inputs.
375 | ///
376 | /// The output type of this parser is `()`.
377 | ///
378 | /// # Examples
379 | ///
380 | /// ```
381 | /// # use chumsky::{prelude::*, error::Cheap};
382 | /// let hello = seq::<_, _, Cheap<char>>("Hello".chars());
383 | ///
384 | /// assert_eq!(hello.parse("Hello"), Ok(()));
385 | /// assert_eq!(hello.parse("Hello, world!"), Ok(()));
386 | /// assert!(hello.parse("Goodbye").is_err());
387 | ///
388 | /// let onetwothree = seq::<_, _, Cheap<i32>>([1, 2, 3]);
389 | ///
390 | /// assert_eq!(onetwothree.parse([1, 2, 3]), Ok(()));
391 | /// assert_eq!(onetwothree.parse([1, 2, 3, 4, 5]), Ok(()));
392 | /// assert!(onetwothree.parse([2, 1, 3]).is_err());
393 | /// ```
394 | #[deprecated(
395 |     since = "0.7.0",
396 |     note = "Use `just` instead: it now works for many sequence-like types!"
397 | )]
398 | pub fn seq<I: Clone + PartialEq, Iter: IntoIterator<Item = I>, E>(xs: Iter) -> Seq<I, E> {
399 |     Seq(xs.into_iter().collect(), PhantomData)
400 | }
401 | 
402 | /// See [`one_of`].
403 | pub struct OneOf<I, C, E>(C, PhantomData<(I, E)>);
404 | 
405 | impl<I, C: Clone, E> Clone for OneOf<I, C, E> {
406 |     fn clone(&self) -> Self {
407 |         Self(self.0.clone(), PhantomData)
408 |     }
409 | }
410 | 
411 | impl<I: Clone + PartialEq, C: Container<I>, E: Error<I>> Parser<I, I> for OneOf<I, C, E> {
412 |     type Error = E;
413 | 
414 |     fn parse_inner<D: Debugger>(
415 |         &self,
416 |         _debugger: &mut D,
417 |         stream: &mut StreamOf<I, E>,
418 |     ) -> PResult<I, I, E> {
419 |         match stream.next() {
420 |             (_, _, Some(tok)) if self.0.get_iter().any(|not| not == tok) => {
421 |                 (Vec::new(), Ok((tok, None)))
422 |             }
423 |             (at, span, found) => (
424 |                 Vec::new(),
425 |                 Err(Located::at(
426 |                     at,
427 |                     E::expected_input_found(span, self.0.get_iter().map(Some), found),
428 |                 )),
429 |             ),
430 |         }
431 |     }
432 | 
433 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
434 |         #[allow(deprecated)]
435 |         self.parse_inner(d, s)
436 |     }
437 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
438 |         #[allow(deprecated)]
439 |         self.parse_inner(d, s)
440 |     }
441 | }
442 | 
443 | /// A parser that accepts one of a sequence of specific inputs.
444 | ///
445 | /// The output type of this parser is `I`, the input that was found.
446 | ///
447 | /// # Examples
448 | ///
449 | /// ```
450 | /// # use chumsky::{prelude::*, error::Cheap};
451 | /// let digits = one_of::<_, _, Cheap<char>>("0123456789")
452 | ///     .repeated().at_least(1)
453 | ///     .then_ignore(end())
454 | ///     .collect::<String>();
455 | ///
456 | /// assert_eq!(digits.parse("48791"), Ok("48791".to_string()));
457 | /// assert!(digits.parse("421!53").is_err());
458 | /// ```
459 | pub fn one_of<I, C: Container<I>, E: Error<I>>(inputs: C) -> OneOf<I, C, E> {
460 |     OneOf(inputs, PhantomData)
461 | }
462 | 
463 | /// See [`empty`].
464 | pub struct Empty<E>(PhantomData<E>);
465 | 
466 | impl<E> Clone for Empty<E> {
467 |     fn clone(&self) -> Self {
468 |         Self(PhantomData)
469 |     }
470 | }
471 | 
472 | impl<I: Clone, E: Error<I>> Parser<I, ()> for Empty<E> {
473 |     type Error = E;
474 | 
475 |     fn parse_inner<D: Debugger>(
476 |         &self,
477 |         _debugger: &mut D,
478 |         _: &mut StreamOf<I, E>,
479 |     ) -> PResult<I, (), E> {
480 |         (Vec::new(), Ok(((), None)))
481 |     }
482 | 
483 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
484 |         #[allow(deprecated)]
485 |         self.parse_inner(d, s)
486 |     }
487 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, (), E> {
488 |         #[allow(deprecated)]
489 |         self.parse_inner(d, s)
490 |     }
491 | }
492 | 
493 | /// A parser that parses no inputs.
494 | ///
495 | /// The output type of this parser is `()`.
496 | pub fn empty<E>() -> Empty<E> {
497 |     Empty(PhantomData)
498 | }
499 | 
500 | /// See [`none_of`].
501 | pub struct NoneOf<I, C, E>(C, PhantomData<(I, E)>);
502 | 
503 | impl<I, C: Clone, E> Clone for NoneOf<I, C, E> {
504 |     fn clone(&self) -> Self {
505 |         Self(self.0.clone(), PhantomData)
506 |     }
507 | }
508 | 
509 | impl<I: Clone + PartialEq, C: Container<I>, E: Error<I>> Parser<I, I> for NoneOf<I, C, E> {
510 |     type Error = E;
511 | 
512 |     fn parse_inner<D: Debugger>(
513 |         &self,
514 |         _debugger: &mut D,
515 |         stream: &mut StreamOf<I, E>,
516 |     ) -> PResult<I, I, E> {
517 |         match stream.next() {
518 |             (_, _, Some(tok)) if self.0.get_iter().all(|not| not != tok) => {
519 |                 (Vec::new(), Ok((tok, None)))
520 |             }
521 |             (at, span, found) => (
522 |                 Vec::new(),
523 |                 Err(Located::at(
524 |                     at,
525 |                     E::expected_input_found(span, Vec::new(), found),
526 |                 )),
527 |             ),
528 |         }
529 |     }
530 | 
531 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
532 |         #[allow(deprecated)]
533 |         self.parse_inner(d, s)
534 |     }
535 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
536 |         #[allow(deprecated)]
537 |         self.parse_inner(d, s)
538 |     }
539 | }
540 | 
541 | /// A parser that accepts any input that is *not* in a sequence of specific inputs.
542 | ///
543 | /// The output type of this parser is `I`, the input that was found.
544 | ///
545 | /// # Examples
546 | ///
547 | /// ```
548 | /// # use chumsky::{prelude::*, error::Cheap};
549 | /// let string = one_of::<_, _, Cheap<char>>("\"'")
550 | ///     .ignore_then(none_of("\"'").repeated())
551 | ///     .then_ignore(one_of("\"'"))
552 | ///     .then_ignore(end())
553 | ///     .collect::<String>();
554 | ///
555 | /// assert_eq!(string.parse("'hello'"), Ok("hello".to_string()));
556 | /// assert_eq!(string.parse("\"world\""), Ok("world".to_string()));
557 | /// assert!(string.parse("\"421!53").is_err());
558 | /// ```
559 | pub fn none_of<I, C: Container<I>, E: Error<I>>(inputs: C) -> NoneOf<I, C, E> {
560 |     NoneOf(inputs, PhantomData)
561 | }
562 | 
563 | /// See [`take_until`].
564 | #[derive(Copy, Clone)]
565 | pub struct TakeUntil<A>(A);
566 | 
567 | impl<I: Clone, O, A: Parser<I, O>> Parser<I, (Vec<I>, O)> for TakeUntil<A> {
568 |     type Error = A::Error;
569 | 
570 |     fn parse_inner<D: Debugger>(
571 |         &self,
572 |         debugger: &mut D,
573 |         stream: &mut StreamOf<I, A::Error>,
574 |     ) -> PResult<I, (Vec<I>, O), A::Error> {
575 |         let mut outputs = Vec::new();
576 |         let mut alt = None;
577 | 
578 |         loop {
579 |             let (errors, err) = match stream.try_parse(|stream| {
580 |                 #[allow(deprecated)]
581 |                 self.0.parse_inner(debugger, stream)
582 |             }) {
583 |                 (errors, Ok((out, a_alt))) => {
584 |                     break (errors, Ok(((outputs, out), merge_alts(alt, a_alt))))
585 |                 }
586 |                 (errors, Err(err)) => (errors, err),
587 |             };
588 | 
589 |             match stream.next() {
590 |                 (_, _, Some(tok)) => outputs.push(tok),
591 |                 (_, _, None) => break (errors, Err(err)),
592 |             }
593 | 
594 |             alt = merge_alts(alt.take(), Some(err));
595 |         }
596 |     }
597 | 
598 |     fn parse_inner_verbose(
599 |         &self,
600 |         d: &mut Verbose,
601 |         s: &mut StreamOf<I, A::Error>,
602 |     ) -> PResult<I, (Vec<I>, O), A::Error> {
603 |         #[allow(deprecated)]
604 |         self.parse_inner(d, s)
605 |     }
606 |     fn parse_inner_silent(
607 |         &self,
608 |         d: &mut Silent,
609 |         s: &mut StreamOf<I, A::Error>,
610 |     ) -> PResult<I, (Vec<I>, O), A::Error> {
611 |         #[allow(deprecated)]
612 |         self.parse_inner(d, s)
613 |     }
614 | }
615 | 
616 | /// A parser that accepts any number of inputs until a terminating pattern is reached.
617 | ///
618 | /// The output type of this parser is `(Vec<I>, O)`, a combination of the preceding inputs and the output of the
619 | /// final patterns.
620 | ///
621 | /// # Examples
622 | ///
623 | /// ```
624 | /// # use chumsky::{prelude::*, error::Cheap};
625 | /// let single_line = just::<_, _, Simple<char>>("//")
626 | ///     .then(take_until(text::newline()))
627 | ///     .ignored();
628 | ///
629 | /// let multi_line = just::<_, _, Simple<char>>("/*")
630 | ///     .then(take_until(just("*/")))
631 | ///     .ignored();
632 | ///
633 | /// let comment = single_line.or(multi_line);
634 | ///
635 | /// let tokens = text::ident()
636 | ///     .padded()
637 | ///     .padded_by(comment
638 | ///         .padded()
639 | ///         .repeated())
640 | ///     .repeated();
641 | ///
642 | /// assert_eq!(tokens.parse(r#"
643 | ///     // These tokens...
644 | ///     these are
645 | ///     /*
646 | ///         ...have some
647 | ///         multi-line...
648 | ///     */
649 | ///     // ...and single-line...
650 | ///     tokens
651 | ///     // ...comments between them
652 | /// "#), Ok(vec!["these".to_string(), "are".to_string(), "tokens".to_string()]));
653 | /// ```
654 | pub fn take_until<A>(until: A) -> TakeUntil<A> {
655 |     TakeUntil(until)
656 | }
657 | 
658 | /// See [`filter`].
659 | pub struct Filter<F, E>(F, PhantomData<E>);
660 | 
661 | impl<F: Copy, E> Copy for Filter<F, E> {}
662 | impl<F: Clone, E> Clone for Filter<F, E> {
663 |     fn clone(&self) -> Self {
664 |         Self(self.0.clone(), PhantomData)
665 |     }
666 | }
667 | 
668 | impl<I: Clone, F: Fn(&I) -> bool, E: Error<I>> Parser<I, I> for Filter<F, E> {
669 |     type Error = E;
670 | 
671 |     fn parse_inner<D: Debugger>(
672 |         &self,
673 |         _debugger: &mut D,
674 |         stream: &mut StreamOf<I, E>,
675 |     ) -> PResult<I, I, E> {
676 |         match stream.next() {
677 |             (_, _, Some(tok)) if (self.0)(&tok) => (Vec::new(), Ok((tok, None))),
678 |             (at, span, found) => (
679 |                 Vec::new(),
680 |                 Err(Located::at(
681 |                     at,
682 |                     E::expected_input_found(span, Vec::new(), found),
683 |                 )),
684 |             ),
685 |         }
686 |     }
687 | 
688 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
689 |         #[allow(deprecated)]
690 |         self.parse_inner(d, s)
691 |     }
692 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, I, E> {
693 |         #[allow(deprecated)]
694 |         self.parse_inner(d, s)
695 |     }
696 | }
697 | 
698 | /// A parser that accepts only inputs that match the given predicate.
699 | ///
700 | /// The output type of this parser is `I`, the input that was found.
701 | ///
702 | /// # Examples
703 | ///
704 | /// ```
705 | /// # use chumsky::{prelude::*, error::Cheap};
706 | /// let lowercase = filter::<_, _, Cheap<char>>(char::is_ascii_lowercase)
707 | ///     .repeated().at_least(1)
708 | ///     .then_ignore(end())
709 | ///     .collect::<String>();
710 | ///
711 | /// assert_eq!(lowercase.parse("hello"), Ok("hello".to_string()));
712 | /// assert!(lowercase.parse("Hello").is_err());
713 | /// ```
714 | pub fn filter<I, F: Fn(&I) -> bool, E>(f: F) -> Filter<F, E> {
715 |     Filter(f, PhantomData)
716 | }
717 | 
718 | /// See [`filter_map`].
719 | pub struct FilterMap<F, E>(F, PhantomData<E>);
720 | 
721 | impl<F: Copy, E> Copy for FilterMap<F, E> {}
722 | impl<F: Clone, E> Clone for FilterMap<F, E> {
723 |     fn clone(&self) -> Self {
724 |         Self(self.0.clone(), PhantomData)
725 |     }
726 | }
727 | 
728 | impl<I: Clone, O, F: Fn(E::Span, I) -> Result<O, E>, E: Error<I>> Parser<I, O> for FilterMap<F, E> {
729 |     type Error = E;
730 | 
731 |     fn parse_inner<D: Debugger>(
732 |         &self,
733 |         _debugger: &mut D,
734 |         stream: &mut StreamOf<I, E>,
735 |     ) -> PResult<I, O, E> {
736 |         let (at, span, tok) = stream.next();
737 |         match tok.map(|tok| (self.0)(span.clone(), tok)) {
738 |             Some(Ok(tok)) => (Vec::new(), Ok((tok, None))),
739 |             Some(Err(err)) => (Vec::new(), Err(Located::at(at, err))),
740 |             None => (
741 |                 Vec::new(),
742 |                 Err(Located::at(
743 |                     at,
744 |                     E::expected_input_found(span, Vec::new(), None),
745 |                 )),
746 |             ),
747 |         }
748 |     }
749 | 
750 |     fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
751 |         #[allow(deprecated)]
752 |         self.parse_inner(d, s)
753 |     }
754 |     fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
755 |         #[allow(deprecated)]
756 |         self.parse_inner(d, s)
757 |     }
758 | }
759 | 
760 | /// A parser that accepts a input and tests it against the given fallible function.
761 | ///
762 | /// This function allows integration with custom error types to allow for custom parser errors.
763 | ///
764 | /// Before using this function, consider whether the [`select`] macro would serve you better.
765 | ///
766 | /// The output type of this parser is `I`, the input that was found.
767 | ///
768 | /// # Examples
769 | ///
770 | /// ```
771 | /// # use chumsky::{prelude::*, error::Cheap};
772 | /// let numeral = filter_map(|span, c: char| match c.to_digit(10) {
773 | ///     Some(x) => Ok(x),
774 | ///     None => Err(Simple::custom(span, format!("'{}' is not a digit", c))),
775 | /// });
776 | ///
777 | /// assert_eq!(numeral.parse("3"), Ok(3));
778 | /// assert_eq!(numeral.parse("7"), Ok(7));
779 | /// assert_eq!(numeral.parse("f"), Err(vec![Simple::custom(0..1, "'f' is not a digit")]));
780 | /// ```
781 | pub fn filter_map<I, O, F: Fn(E::Span, I) -> Result<O, E>, E: Error<I>>(f: F) -> FilterMap<F, E> {
782 |     FilterMap(f, PhantomData)
783 | }
784 | 
785 | /// See [`any`].
786 | pub type Any<I, E> = Filter<fn(&I) -> bool, E>;
787 | 
788 | /// A parser that accepts any input (but not the end of input).
789 | ///
790 | /// The output type of this parser is `I`, the input that was found.
791 | ///
792 | /// # Examples
793 | ///
794 | /// ```
795 | /// # use chumsky::{prelude::*, error::Cheap};
796 | /// let any = any::<char, Cheap<char>>();
797 | ///
798 | /// assert_eq!(any.parse("a"), Ok('a'));
799 | /// assert_eq!(any.parse("7"), Ok('7'));
800 | /// assert_eq!(any.parse("\t"), Ok('\t'));
801 | /// assert!(any.parse("").is_err());
802 | /// ```
803 | pub fn any<I, E>() -> Any<I, E> {
804 |     Filter(|_| true, PhantomData)
805 | }
806 | 
807 | /// See [`fn@todo`].
808 | pub struct Todo<I, O, E>(PhantomData<(I, O, E)>);
809 | 
810 | /// A parser that can be used wherever you need to implement a parser later.
811 | ///
812 | /// This parser is analagous to the [`todo!`] and [`unimplemented!`] macros, but will produce a panic when used to
813 | /// parse input, not immediately when invoked.
814 | ///
815 | /// This function is useful when developing your parser, allowing you to prototype and run parts of your parser without
816 | /// committing to implementing the entire thing immediately.
817 | ///
818 | /// The output type of this parser is whatever you want it to be: it'll never produce output!
819 | ///
820 | /// # Examples
821 | ///
822 | /// ```should_panic
823 | /// # use chumsky::prelude::*;
824 | /// let int = just::<_, _, Simple<char>>("0x").ignore_then(todo())
825 | ///     .or(just("0b").ignore_then(text::digits(2)))
826 | ///     .or(text::int(10));
827 | ///
828 | /// // Decimal numbers are parsed
829 | /// assert_eq!(int.parse("12"), Ok("12".to_string()));
830 | /// // Binary numbers are parsed
831 | /// assert_eq!(int.parse("0b00101"), Ok("00101".to_string()));
832 | /// // Parsing hexidecimal numbers results in a panic because the parser is unimplemented
833 | /// int.parse("0xd4");
834 | /// ```
835 | pub fn todo<I, O, E>() -> Todo<I, O, E> {
836 |     Todo(PhantomData)
837 | }
838 | 
839 | impl<I, O, E> Copy for Todo<I, O, E> {}
840 | impl<I, O, E> Clone for Todo<I, O, E> {
841 |     fn clone(&self) -> Self {
842 |         Self(PhantomData)
843 |     }
844 | }
845 | 
846 | impl<I: Clone, O, E: Error<I>> Parser<I, O> for Todo<I, O, E> {
847 |     type Error = E;
848 | 
849 |     fn parse_inner<D: Debugger>(
850 |         &self,
851 |         _debugger: &mut D,
852 |         _stream: &mut StreamOf<I, Self::Error>,
853 |     ) -> PResult<I, O, Self::Error> {
854 |         todo!("Attempted to use an unimplemented parser.")
855 |     }
856 | 
857 |     fn parse_inner_verbose(
858 |         &self,
859 |         d: &mut Verbose,
860 |         s: &mut StreamOf<I, Self::Error>,
861 |     ) -> PResult<I, O, Self::Error> {
862 |         #[allow(deprecated)]
863 |         self.parse_inner(d, s)
864 |     }
865 |     fn parse_inner_silent(
866 |         &self,
867 |         d: &mut Silent,
868 |         s: &mut StreamOf<I, Self::Error>,
869 |     ) -> PResult<I, O, Self::Error> {
870 |         #[allow(deprecated)]
871 |         self.parse_inner(d, s)
872 |     }
873 | }
874 | 
875 | /// See [`choice`].
876 | pub struct Choice<T, E>(pub(crate) T, pub(crate) PhantomData<E>);
877 | 
878 | impl<T: Copy, E> Copy for Choice<T, E> {}
879 | impl<T: Clone, E> Clone for Choice<T, E> {
880 |     fn clone(&self) -> Self {
881 |         Self(self.0.clone(), PhantomData)
882 |     }
883 | }
884 | 
885 | macro_rules! impl_for_tuple {
886 |     () => {};
887 |     ($head:ident $($X:ident)*) => {
888 |         impl_for_tuple!($($X)*);
889 |         impl_for_tuple!(~ $head $($X)*);
890 |     };
891 |     (~ $($X:ident)*) => {
892 |         #[allow(unused_variables, non_snake_case)]
893 |         impl<I: Clone, O, E: Error<I>, $($X: Parser<I, O, Error = E>),*> Parser<I, O> for Choice<($($X,)*), E> {
894 |             type Error = E;
895 | 
896 |             fn parse_inner<D: Debugger>(
897 |                 &self,
898 |                 debugger: &mut D,
899 |                 stream: &mut StreamOf<I, Self::Error>,
900 |             ) -> PResult<I, O, Self::Error> {
901 |                 let Choice(($($X,)*), _) = self;
902 |                 let mut alt = None;
903 |                 $(
904 |                     match stream.try_parse(|stream| {
905 |                         #[allow(deprecated)]
906 |                         debugger.invoke($X, stream)
907 |                     }) {
908 |                         (errors, Ok(out)) => return (errors, Ok(out)),
909 |                         (errors, Err(a_alt)) => {
910 |                             alt = merge_alts(alt.take(), Some(a_alt));
911 |                         },
912 |                     };
913 |                 )*
914 |                 (Vec::new(), Err(alt.unwrap()))
915 |             }
916 | 
917 |             fn parse_inner_verbose(
918 |                 &self,
919 |                 d: &mut Verbose,
920 |                 s: &mut StreamOf<I, Self::Error>,
921 |             ) -> PResult<I, O, Self::Error> {
922 |                 #[allow(deprecated)]
923 |                 self.parse_inner(d, s)
924 |             }
925 |             fn parse_inner_silent(
926 |                 &self,
927 |                 d: &mut Silent,
928 |                 s: &mut StreamOf<I, Self::Error>,
929 |             ) -> PResult<I, O, Self::Error> {
930 |                 #[allow(deprecated)]
931 |                 self.parse_inner(d, s)
932 |             }
933 |         }
934 |     };
935 | }
936 | 
937 | impl_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ S_ T_ U_ V_ W_ X_ Y_ Z_);
938 | 
939 | /// Parse using a tuple of many parsers, producing the output of the first to successfully parse.
940 | ///
941 | /// This primitive has a twofold improvement over a chain of [`Parser::or`] calls:
942 | ///
943 | /// - Rust's trait solver seems to resolve the [`Parser`] impl for this type much faster, significantly reducing
944 | ///   compilation times.
945 | ///
946 | /// - Parsing is likely a little faster in some cases because the resulting parser is 'less careful' about error
947 | ///   routing, and doesn't perform the same fine-grained error prioritisation that [`Parser::or`] does.
948 | ///
949 | /// These qualities make this parser ideal for lexers.
950 | ///
951 | /// The output type of this parser is the output type of the inner parsers.
952 | ///
953 | /// # Examples
954 | /// ```
955 | /// # use chumsky::prelude::*;
956 | /// #[derive(Clone, Debug, PartialEq)]
957 | /// enum Token {
958 | ///     If,
959 | ///     For,
960 | ///     While,
961 | ///     Fn,
962 | ///     Int(u64),
963 | ///     Ident(String),
964 | /// }
965 | ///
966 | /// let tokens = choice::<_, Simple<char>>((
967 | ///     text::keyword("if").to(Token::If),
968 | ///     text::keyword("for").to(Token::For),
969 | ///     text::keyword("while").to(Token::While),
970 | ///     text::keyword("fn").to(Token::Fn),
971 | ///     text::int(10).from_str().unwrapped().map(Token::Int),
972 | ///     text::ident().map(Token::Ident),
973 | /// ))
974 | ///     .padded()
975 | ///     .repeated();
976 | ///
977 | /// use Token::*;
978 | /// assert_eq!(
979 | ///     tokens.parse("if 56 for foo while 42 fn bar"),
980 | ///     Ok(vec![If, Int(56), For, Ident("foo".to_string()), While, Int(42), Fn, Ident("bar".to_string())]),
981 | /// );
982 | /// ```
983 | pub fn choice<T, E>(parsers: T) -> Choice<T, E> {
984 |     Choice(parsers, PhantomData)
985 | }
986 | 


--------------------------------------------------------------------------------