├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── rust.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    ├── backtrack.rs
    ├── cbor.rs
    ├── json.pest
    ├── json.rs
    ├── lex.rs
    ├── parser.rs
    ├── samples
    │   ├── sample.cbor
    │   └── sample.json
    ├── tokens.txt
    └── utils.rs
├── examples
    ├── brainfuck.rs
    ├── foo.rs
    ├── indent.rs
    ├── io.rs
    ├── json.rs
    ├── json_fast.rs
    ├── logos.rs
    ├── mini_ml.rs
    ├── nano_rust.rs
    ├── nested.rs
    ├── nested_spans.rs
    ├── pythonic.rs
    ├── sample.bf
    ├── sample.foo
    ├── sample.io
    ├── sample.json
    ├── sample.mini_ml
    ├── sample.nrs
    ├── sample.py
    └── zero-copy.rs
├── guide
    ├── README.md
    ├── debugging.md
    ├── error_and_recovery.md
    ├── getting_started.md
    ├── intro.md
    ├── key_concepts.md
    ├── meet_the_parsers.md
    ├── recursion.md
    ├── technical_notes.md
    └── tutorial.md
├── misc
    ├── example.png
    └── logo.svg
└── src
    ├── blanket.rs
    ├── cache.rs
    ├── combinator.rs
    ├── container.rs
    ├── either.rs
    ├── error.rs
    ├── extension.rs
    ├── extra.rs
    ├── guide.rs
    ├── input.rs
    ├── inspector.rs
    ├── label.rs
    ├── lib.rs
    ├── number.rs
    ├── pratt.rs
    ├── primitive.rs
    ├── private.rs
    ├── recovery.rs
    ├── recursive.rs
    ├── regex.rs
    ├── span.rs
    ├── stream.rs
    ├── text.rs
    ├── tokio.rs
    └── util.rs


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [zesterer]
2 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | permissions: read-all
13 | 
14 | jobs:
15 |   check:
16 |     name: Check
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v3
20 |       - name: Install latest nightly
21 |         uses: dtolnay/rust-toolchain@master
22 |         with:
23 |             toolchain: nightly
24 |             components: rustfmt, clippy
25 |       - name: Run cargo check (all features)
26 |         run: cargo check --benches --examples --tests --verbose --all-features
27 |       - name: Run cargo check (no features)
28 |         run: cargo check --benches --examples --tests --verbose --no-default-features
29 |       - name: Run cargo clippy
30 |         run: cargo clippy --benches --examples --tests --verbose --all-features -- -D warnings
31 |       - name: Run cargo fmt
32 |         run: cargo fmt --verbose --check
33 |       - name: Run cargo doc
34 |         run: cargo doc --all-features --verbose
35 |         env:
36 |             RUSTDOCFLAGS: --cfg docsrs
37 |   test:
38 |     name: Test
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - uses: actions/checkout@v3
42 |       - name: Install latest nightly
43 |         uses: dtolnay/rust-toolchain@master
44 |         with:
45 |             toolchain: nightly
46 |             components: rustfmt, clippy
47 |       - name: Run cargo test
48 |         run: cargo test --verbose --all-features
49 |         env:
50 |             RUSTDOCFLAGS: --cfg docsrs
51 |   msrv:
52 |     name: MSRV
53 |     runs-on: ubuntu-latest
54 |     steps:
55 |       - uses: actions/checkout@v3
56 |       - name: Install MSRV
57 |         uses: dtolnay/rust-toolchain@master
58 |         with:
59 |             toolchain: "1.65"
60 |             components: rustfmt, clippy
61 |       - name: Check MSRV compatibility
62 |         run: cargo check --verbose --features _test_stable
63 |   semver:
64 |     name: SemVer
65 |     runs-on: ubuntu-latest
66 |     steps:
67 |       - uses: actions/checkout@v3
68 |       - name: Check semver compatibility
69 |         uses: obi1kenobi/cargo-semver-checks-action@v2
70 |         with:
71 |           rust-toolchain: stable
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDE projects
2 | .idea/
3 | 
4 | # Project output
5 | /target
6 | flamegraph.svg
7 | perf.data*
8 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | # Unreleased
  9 | 
 10 | ### Added
 11 | 
 12 | ### Removed
 13 | 
 14 | ### Changed
 15 | 
 16 | ### Fixed
 17 | 
 18 | # [0.10.1] - 2025-04-13
 19 | 
 20 | ### Added
 21 | 
 22 | - Implemented `Container` for `VecDeque`
 23 | - New section covering recursion in the guide
 24 | 
 25 | ### Changed
 26 | 
 27 | - `Boxed` types now have a default type parameter of `extra::Default`, like `Parser` and `IterParser`
 28 | - The tutorial has been updated for `0.10` and has been moved to the guide
 29 | 
 30 | ### Fixed
 31 | 
 32 | - Nonsense spans occasionally generated for non-existent tokens
 33 | - Improved docs have been added for several items
 34 | - Many minor documentation issues have been fixed
 35 | 
 36 | # [0.10.0] - 2025-03-22
 37 | 
 38 | *Note: version 0.10 is a from-scratch rewrite of chumsky with innumerable small changes. To avoid this changelog being
 39 | longer than the compiled works of Douglas Adams, the following is a high-level overview of the major feature additions
 40 | and does not include small details.*
 41 | 
 42 | ### Added
 43 | 
 44 | - Support for zero-copy parsing (i.e: parser outputs that hold references to the parser input)
 45 | - Support for parsing nested inputs like token trees
 46 | - Support for parsing context-sensitive grammars such as Python-style indentation, Rust-style raw strings, and much
 47 | more
 48 | - Support for parsing by graphemes as well as unicode codepoints
 49 | - Support for caching parsers independent of the lifetime of the parser
 50 | - A new trait, `IterParser`, that allows expressing parsers that generate many outputs
 51 | - Added the ability to collect iterable parsers into fixed-size arrays, along with a plethora of other container types
 52 | - Support for manipulating shared state during parsing, elegantly allowing support for arena allocators, cstrees,
 53 | interners, and much more
 54 | - Support for a vast array of new input types: slices, strings, arrays, `impl Read`ers, iterators, etc.
 55 | - Experimental support for memoization, allowing chumsky to parse left-recursive grammars and reducing the
 56 | computational complexity of parsing certain grammars
 57 | - An extension API, allowing third-party crates to extend chumsky's capabilities and introduce new combinators
 58 | - A `pratt` parser combinator, allowing for conveniently and simply creating expression parsers with precise operator
 59 | precedence
 60 | - A `regex` combinator, allowing the parsing of terms based on a specific regex pattern
 61 | - Properly differentiated ASCII and Unicode text parsers
 62 | 
 63 | ## Removed
 64 | 
 65 | - `Parser::then_with` has been removed in favour of the new context-sensitive combinators
 66 | 
 67 | ### Changed
 68 | 
 69 | - Performance has *radically* improved
 70 | - Error generation and handling is now significantly more flexible
 71 | 
 72 | # [0.9.2] - 2023-03-02
 73 | 
 74 | ### Fixed
 75 | 
 76 | - Properly fixed `skip_then_retry_until` regression
 77 | 
 78 | # [0.9.1] - 2023-03-02
 79 | 
 80 | ### Fixed
 81 | 
 82 | - Regression in `skip_then_retry_until` recovery strategy
 83 | 
 84 | # [0.9.0] - 2023-02-07
 85 | 
 86 | ### Added
 87 | 
 88 | - A `spill-stack` feature that uses `stacker` to avoid stack overflow errors for deeply recursive parsers
 89 | - The ability to access the token span when using `select!` like `select! { |span| Token::Num(x) => (x, span) }`
 90 | - Added a `skip_parser` recovery strategy that allows you to implement your own recovery strategies in terms of other
 91 |   parsers. For example, `.recover_with(skip_parser(take_until(just(';'))))` skips tokens until after the next semicolon
 92 | - A `not` combinator that consumes a single token if it is *not* the start of a given pattern. For example,
 93 |   `just("\\n").or(just('"')).not()` matches any `char` that is not either the final quote of a string, and is not the
 94 |   start of a newline escape sequence
 95 | - A `semantic_indentation` parser for parsing indentation-sensitive languages. Note that this is likely to be
 96 |   deprecated/removed in the future in favour of a more powerful solution
 97 | - `#[must_use]` attribute for parsers to ensure that they're not accidentally created without being used
 98 | - `Option<Vec<T>>` and `Vec<Option<T>>` now implement `Chain<T>` and `Option<String>` implements `Chain<char>`
 99 | - `choice` now supports both arrays and vectors of parsers in addition to tuples
100 | - The `Simple` error type now implements `Eq`
101 | 
102 | ### Changed
103 | 
104 | - `text::whitespace` returns a `Repeated` instead of an `impl Parser`, allowing you to call methods like `at_least` and
105 |   `exactly` on it.
106 | - Improved `no_std` support
107 | - Improved examples and documentation
108 | - Use zero-width spans for EoI by default
109 | - Don't allow defining a recursive parser more than once
110 | - Various minor bug fixes
111 | - Improved `Display` implementations for various built-in error types and `SimpleReason`
112 | - Use an `OrderedContainer` trait to avoid unexpected behaviour for unordered containers in combination with `just`
113 | 
114 | ### Fixed
115 | 
116 | - Made several parsers (`todo`, `unwrapped`, etc.) more useful by reporting the parser's location on panic
117 | - Boxing a parser that is already boxed just gives you the original parser to avoid double indirection
118 | - Improved compilation speeds
119 | 
120 | # [0.8.0] - 2022-02-07
121 | 
122 | ### Added
123 | 
124 | - `then_with` combinator to allow limited support for parsing nested patterns
125 | - impl From<&[T; N]> for Stream
126 | - `SkipUntil/SkipThenRetryUntil::skip_start/consume_end` for more precise control over skip-based recovery
127 | 
128 | ### Changed
129 | 
130 | - Allowed `Validate` to map the output type
131 | - Switched to zero-size End Of Input spans for default implementations of `Stream`
132 | - Made `delimited_by` take combinators instead of specific tokens
133 | - Minor optimisations
134 | - Documentation improvements
135 | 
136 | ### Fixed
137 | 
138 | - Compilation error with `--no-default-features`
139 | - Made default behaviour of `skip_until` more sensible
140 | 
141 | # [0.7.0] - 2021-12-16
142 | 
143 | ### Added
144 | 
145 | - A new [tutorial](tutorial.md) to help new users
146 | 
147 | - `select` macro, a wrapper over `filter_map` that makes extracting data from specific tokens easy
148 | - `choice` parser, a better alternative to long `or` chains (which sometimes have poor compilation performance)
149 | - `todo` parser, that panics when used (but not when created) (akin to Rust's `todo!` macro, but for parsers)
150 | - `keyword` parser, that parses *exact* identifiers
151 | 
152 | - `from_str` combinator to allow converting a pattern to a value inline, using `std::str::FromStr`
153 | - `unwrapped` combinator, to automatically unwrap an output value inline
154 | - `rewind` combinator, that allows reverting the input stream on success. It's most useful when requiring that a
155 |   pattern is followed by some terminating pattern without the first parser greedily consuming it
156 | - `map_err_with_span` combinator, to allow fetching the span of the input that was parsed by a parser before an error
157 |   was encountered
158 | 
159 | - `or_else` combinator, to allow processing and potentially recovering from a parser error
160 | - `SeparatedBy::at_most` to require that a separated pattern appear at most a specific number of times
161 | - `SeparatedBy::exactly` to require that a separated pattern be repeated exactly a specific number of times
162 | - `Repeated::exactly` to require that a pattern be repeated exactly a specific number of times
163 | 
164 | - More trait implementations for various things, making the crate more useful
165 | 
166 | ### Changed
167 | 
168 | - Made `just`, `one_of`, and `none_of` significant more useful. They can now accept strings, arrays, slices, vectors,
169 |   sets, or just single tokens as before
170 | - Added the return type of each parser to its documentation
171 | - More explicit documentation of parser behaviour
172 | - More doc examples
173 | - Deprecated `seq` (`just` has been generalised and can now be used to parse specific input sequences)
174 | - Sealed the `Character` trait so that future changes are not breaking
175 | - Sealed the `Chain` trait and made it more powerful
176 | - Moved trait constraints on `Parser` to where clauses for improved readability
177 | 
178 | ### Fixed
179 | 
180 | - Fixed a subtle bug that allowed `separated_by` to parse an extra trailing separator when it shouldn't
181 | - Filled a 'hole' in the `Error` trait's API that conflated a lack of expected tokens with expectation of end of input
182 | - Made recursive parsers use weak reference-counting to avoid memory leaks
183 | 
184 | # [0.6.0] - 2021-11-22
185 | 
186 | ### Added
187 | 
188 | - `skip_until` error recovery strategy
189 | - `SeparatedBy::at_least` and `SeparatedBy::at_most` for parsing a specific number of separated items
190 | - `Parser::validate` for integrated AST validation
191 | - `Recursive::declare` and `Recursive::define` for more precise control over recursive declarations
192 | 
193 | ### Changed
194 | 
195 | - Improved `separated_by` error messages
196 | - Improved documentation
197 | - Hid a new (probably) unused implementation details
198 | 
199 | # [0.5.0] - 2021-10-30
200 | 
201 | ### Added
202 | 
203 | - `take_until` primitive
204 | 
205 | ### Changed
206 | 
207 | - Added span to fallback output function in `nested_delimiters`
208 | 
209 | # [0.4.0] - 2021-10-28
210 | 
211 | ### Added
212 | 
213 | - Support for LL(k) parsing
214 | - Custom error recovery strategies
215 | - Debug mode
216 | - Nested input flattening
217 | 
218 | ### Changed
219 | 
220 | - Radically improved error quality
221 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "chumsky"
  3 | version = "0.11.0"
  4 | description = "A parser library for humans with powerful error recovery"
  5 | authors = ["Joshua Barretto <joshua.s.barretto@gmail.com>", "Elijah Hartvigsen <elijah.reed@hartvigsen.xyz", "Jakob Wiesmore <runetynan@gmail.com>"]
  6 | repository = "https://github.com/zesterer/chumsky"
  7 | license = "MIT"
  8 | keywords = ["parser", "combinator", "token", "language", "syntax"]
  9 | categories = ["parsing", "text-processing"]
 10 | edition = "2021"
 11 | exclude = [
 12 |     "/misc/*",
 13 |     "/benches/samples/*",
 14 | ]
 15 | rust-version = "1.65"
 16 | 
 17 | [features]
 18 | default = ["std", "stacker"]
 19 | 
 20 | # Integrate with the standard library.
 21 | std = [
 22 |     "regex-automata?/std",
 23 |     "serde?/std"
 24 | ]
 25 | 
 26 | # Enable nightly-only features like better compiler diagnostics and a Parser impl for ! (the never type).
 27 | nightly = []
 28 | 
 29 | # Allows deeper recursion by dynamically spilling stack state on to the heap.
 30 | stacker = ["dep:stacker", "std"]
 31 | 
 32 | # Allows parser memoization, speeding up heavily back-tracking parsers and allowing left recursion.
 33 | memoization = []
 34 | 
 35 | # Allows extending chumsky by writing your own parser implementations.
 36 | extension = []
 37 | 
 38 | # Make builtin parsers such as `Boxed` use atomic instead of non-atomic internals.
 39 | # TODO: Remove or rework this
 40 | sync = ["spin"]
 41 | 
 42 | # Enable Pratt parsing combinator
 43 | pratt = ["unstable"]
 44 | 
 45 | # Allow the use of unstable features (aka features where the API is not settled)
 46 | unstable = []
 47 | 
 48 | # Allows use of the `Number` parser, which is backed by the `lexical` crate
 49 | lexical-numbers = ["lexical", "unstable"]
 50 | 
 51 | # Adds impl of Parser for either::Either
 52 | either = ["dep:either"]
 53 | 
 54 | # Enables regex combinators
 55 | regex = ["dep:regex-automata"]
 56 | 
 57 | # Enable serde serialization support
 58 | serde = ["dep:serde"]
 59 | 
 60 | # Enable support for using Tokio's byte slices as inputs
 61 | bytes = ["dep:bytes"]
 62 | 
 63 | # Enable dependencies only needed for generation of documentation on docs.rs
 64 | docsrs = []
 65 | 
 66 | # An alias of all features that work with the stable compiler.
 67 | # Do not use this feature, its removal is not considered a breaking change and its behaviour may change.
 68 | # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list.
 69 | _test_stable = ["std", "stacker", "memoization", "extension", "sync"]
 70 | 
 71 | [package.metadata.docs.rs]
 72 | all-features = true
 73 | rustdoc-args = ["--cfg", "docsrs"]
 74 | 
 75 | [dependencies]
 76 | hashbrown = "0.15"
 77 | stacker = { version = "0.1", optional = true }
 78 | regex-automata = { version = "0.3", default-features = false, optional = true, features = ["alloc", "meta", "perf", "unicode", "nfa", "dfa", "hybrid"] }
 79 | spin = { version = "0.9", features = ["once"], default-features = false, optional = true }
 80 | lexical = { version = "6.1.1", default-features = false, features = ["parse-integers", "parse-floats", "format"], optional = true }
 81 | either = { version = "1.8.1", optional = true }
 82 | serde = { version = "1.0", default-features = false, optional = true, features = ["derive"] }
 83 | unicode-ident =  "1.0.10"
 84 | unicode-segmentation = "1"
 85 | bytes = { version = "1", default-features = false, optional = true }
 86 | 
 87 | [dev-dependencies]
 88 | ariadne = "0.5"
 89 | pom = "3.2"
 90 | nom = "7.1"
 91 | nom8 = { package = "nom", version = "8"}
 92 | winnow = "0.7.0"
 93 | serde_json = { version = "1.0", features = ["preserve_order"] }
 94 | ciborium = { version = "0.2" }
 95 | criterion = "0.4.0"
 96 | pest = "2.5"
 97 | pest_derive = "2.5"
 98 | sn = "0.1"
 99 | logos = "0.13"
100 | lasso = "0.7"
101 | slotmap = "1.0"
102 | 
103 | [target.'cfg(unix)'.dev-dependencies]
104 | pprof = { version = "0.11", features = ["flamegraph", "criterion"] }
105 | 
106 | [profile.bench]
107 | debug = true
108 | 
109 | [[bench]]
110 | name = "json"
111 | harness = false
112 | required-features = ["std"]
113 | 
114 | [[bench]]
115 | name = "lex"
116 | harness = false
117 | 
118 | [[bench]]
119 | name = "parser"
120 | harness = false
121 | 
122 | [[bench]]
123 | name = "backtrack"
124 | harness = false
125 | 
126 | [[bench]]
127 | name = "cbor"
128 | harness = false
129 | 
130 | [[example]]
131 | name = "nano_rust"
132 | 
133 | [[example]]
134 | name = "json"
135 | required-features = ["std"]
136 | 
137 | [[example]]
138 | name = "json_fast"
139 | required-features = ["std"]
140 | 
141 | [[example]]
142 | name = "io"
143 | required-features = ["std"]
144 | 
145 | [[example]]
146 | name = "foo"
147 | required-features = ["std"]
148 | 
149 | [[example]]
150 | name = "mini_ml"
151 | required-features = ["pratt"]
152 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 Joshua Barretto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![crates.io](https://img.shields.io/crates/v/chumsky.svg)](https://crates.io/crates/chumsky)
  2 | [![crates.io](https://docs.rs/chumsky/badge.svg)](https://docs.rs/chumsky)
  3 | [![License](https://img.shields.io/crates/l/chumsky.svg)](https://github.com/zesterer/chumsky)
  4 | [![actions-badge](https://github.com/zesterer/chumsky/workflows/Rust/badge.svg?branch=master)](https://github.com/zesterer/chumsky/actions)
  5 | 
  6 | Chumsky is a parser library for Rust that makes writing expressive, high-performance parsers easy.
  7 | 
  8 | <a href = "https://www.github.com/zesterer/tao">
  9 |     <img src="https://raw.githubusercontent.com/zesterer/chumsky/master/misc/example.png" alt="Example usage with my own language, Tao"/>
 10 | </a>
 11 | 
 12 | *Note: Error diagnostic rendering in this example is performed by [Ariadne](https://github.com/zesterer/ariadne)*
 13 | 
 14 | Although chumsky is designed primarily for user-facing parsers such as compilers, chumsky is just as much at home
 15 | parsing binary protocols at the networking layer, configuration files, or any other form of complex input validation
 16 | that you may need. It also has `no_std` support, making it suitable for embedded environments.
 17 | 
 18 | ## Features
 19 | 
 20 | - 🪄 **Expressive combinators** that make writing your parser a joy
 21 | - 🎛️ **Fully generic** across input, token, output, span, and error types
 22 | - 📑 **Zero-copy parsing** minimises allocation by having outputs hold references/slices of the input
 23 | - 🚦 **Flexible error recovery** strategies out of the box
 24 | - ☑️ **Check-only mode** for fast verification of inputs, automatically supported
 25 | - 🚀 **Internal optimiser** leverages the power of [GATs](https://smallcultfollowing.com/babysteps/blog/2022/06/27/many-modes-a-gats-pattern/) to optimise your parser for you
 26 | - 📖 **Text-oriented parsers** for text inputs (i.e: `&[u8]` and `&str`)
 27 | - 👁️‍🗨️ **Context-free grammars** are fully supported, with support for context-sensitivity
 28 | - 🔄 **Left recursion and memoization** have opt-in support
 29 | - 🪺 **Nested inputs** such as token trees are fully supported both as inputs and outputs
 30 | - 🏷️ **Pattern labelling** for dynamic, user-friendly error messages
 31 | - 🗃️ **Caching** allows parsers to be created once and reused many times
 32 | - ↔️ **Pratt parsing** support for simple yet flexible expression parsing
 33 | - 🪛 **no_std** support, allowing chumsky to run in embedded environments
 34 | 
 35 | ## Example
 36 | 
 37 | See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/main/examples/brainfuck.rs) for a full
 38 | [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) interpreter
 39 | (`cargo run --example brainfuck -- examples/sample.bf`).
 40 | 
 41 | ```rust,ignore
 42 | use chumsky::prelude::*;
 43 | 
 44 | /// An AST (Abstract Syntax Tree) for Brainfuck instructions
 45 | #[derive(Clone)]
 46 | enum Instr {
 47 |     Left, Right,
 48 |     Incr, Decr,
 49 |     Read, Write,
 50 |     Loop(Vec<Self>), // In Brainfuck, `[...]` loop instructions contain any number of instructions
 51 | }
 52 | 
 53 | /// A function that generates a Brainfuck parser
 54 | fn brainfuck<'a>() -> impl Parser<'a, &'a str, Vec<Instr>> {
 55 |     // Brainfuck syntax is recursive: each instruction can contain many sub-instructions (via `[...]` loops)
 56 |     recursive(|bf| choice((
 57 |         // All of the basic instructions are just single characters
 58 |         just('<').to(Instr::Left),
 59 |         just('>').to(Instr::Right),
 60 |         just('+').to(Instr::Incr),
 61 |         just('-').to(Instr::Decr),
 62 |         just(',').to(Instr::Read),
 63 |         just('.').to(Instr::Write),
 64 |         // Loops are strings of Brainfuck instructions, delimited by square brackets
 65 |         bf.delimited_by(just('['), just(']')).map(Instr::Loop),
 66 |     ))
 67 |         // Brainfuck instructions appear sequentially, so parse as many as we need
 68 |         .repeated()
 69 |         .collect())
 70 | }
 71 | 
 72 | // Parse some Brainfuck with our parser
 73 | brainfuck().parse("--[>--->->->++>-<<<<<-------]>--.>---------.>--..+++.>----.>+++++++++.<<.+++.------.<-.>>+.")
 74 | ```
 75 | 
 76 | You can find more examples [here](https://github.com/zesterer/chumsky/tree/main/examples).
 77 | 
 78 | ## Guide and documentation
 79 | 
 80 | Chumsky has an extensive [guide](https://docs.rs/chumsky/latest/chumsky/guide) that walks you through the library: all
 81 | the way from setting up and basic theory to advanced uses of the crate. It includes technical details of chumsky's
 82 | behaviour, examples of uses, a handy index for all of the combinators, technical details about the crate, and even a
 83 | tutorial that leads you through the development of a fully-functioning interpreter for a simple programming language.
 84 | 
 85 | The crate docs should also be similarly useful: most important functions include at least one contextually-relevant
 86 | example, and all crate items are fully documented.
 87 | 
 88 | In addition, chumsky comes with a suite of fully-fledged
 89 | [example projects](https://github.com/zesterer/chumsky/tree/main/examples). These include:
 90 | 
 91 | - Parsers for existing syntaxes like Brainfuck and JSON
 92 | - Integration demos for third-party crates, like [`logos`](https://crates.io/crates/logos)
 93 | - Parsers for new toy programming languages: a Rust-like language and a full-on lexer, parser, type-checker, and
 94 |   interpreter for a minature ML-like language.
 95 | - Examples of parsing non-trivial inputs like token trees, `impl Read`ers, and zero-copy, zero-alloc parsing.
 96 | 
 97 | ## Cargo features
 98 | 
 99 | Chumsky contains several optional features that extend the crate's functionality.
100 | 
101 | - `bytes`: adds support for parsing types from the [`bytes`](https://docs.rs/bytes/) crate.
102 | 
103 | - `either`: implements `Parser` for `either::Either`, allowing dynamic configuration of parsers at run-time
104 | 
105 | - `extension`: enables the extension API, allowing you to write your own first-class combinators that integrate with
106 |   and extend chumsky
107 | 
108 | - `lexical-numbers`: Enables use of the `Number` parser for parsing various numeric formats
109 | 
110 | - `memoization`: enables [memoization](https://en.wikipedia.org/wiki/Memoization#Parsers) features
111 | 
112 | - `nightly`: enable support for features only supported by the nightly Rust compiler
113 | 
114 | - `pratt`: enables the [pratt parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html)
115 |   combinator
116 | 
117 | - `regex`: enables the regex combinator
118 | 
119 | - `serde`: enables `serde` (de)serialization support for several types
120 | 
121 | - `stacker` (enabled by default): avoid stack overflows by spilling stack data to the heap via the `stacker` crate
122 | 
123 | - `std` (enabled by default): support for standard library features
124 | 
125 | - `unstable`: enables experimental chumsky features (API features enabled by `unstable` are NOT considered to fall
126 |   under the semver guarantees of chumsky!)
127 | 
128 | ## *What* is a parser combinator?
129 | 
130 | Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting
131 | parsers use a [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) strategy to transform a stream
132 | of tokens into an output. Using parser combinators to define parsers is roughly analogous to using Rust's
133 | [`Iterator`](https://doc.rust-lang.org/std/iter/trait.Iterator.html) trait to define iterative algorithms: the
134 | type-driven API of `Iterator` makes it more difficult to make mistakes and easier to encode complicated iteration logic
135 | than if one were to write the same code by hand. The same is true of parser combinators.
136 | 
137 | ## *Why* use parser combinators?
138 | 
139 | Writing parsers with good error recovery is conceptually difficult and time-consuming. It requires understanding the
140 | intricacies of the recursive descent algorithm, and then implementing recovery strategies on top of it. If you're
141 | developing a programming language, you'll almost certainly change your mind about syntax in the process, leading to some
142 | slow and painful parser refactoring. Parser combinators solve both problems by providing an ergonomic API that allows
143 | for rapidly iterating upon a syntax.
144 | 
145 | Parser combinators are also a great fit for domain-specific languages for which an existing parser does not exist.
146 | Writing a reliable, fault-tolerant parser for such situations can go from being a multi-day task to a half-hour task
147 | with the help of a decent parser combinator library.
148 | 
149 | ## Classification
150 | 
151 | Chumsky's parsers are [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) parsers and are
152 | capable of parsing [parsing expression grammars (PEGs)](https://en.wikipedia.org/wiki/Parsing_expression_grammar), which
153 | includes all known context-free languages. However, chumsky doesn't stop there: it also supports context-sensitive
154 | grammars via a set of dedicated combinators that integrate cleanly with the rest of the library. This allows it to
155 | additionally parse a number of context-sensitive syntaxes like Rust-style raw strings, Python-style semantic
156 | indentation, and much more.
157 | 
158 | ## Error recovery
159 | 
160 | Chumsky has support for error recovery, meaning that it can encounter a syntax error, report the error, and then
161 | attempt to recover itself into a state in which it can continue parsing so that multiple errors can be produced at once
162 | and a partial [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) can still be generated from the input for future
163 | compilation stages to consume.
164 | 
165 | ## Performance
166 | 
167 | Chumsky allows you to choose your priorities. When needed, it can be configured for high-quality parser errors. It can
168 | also be configured for *performance*.
169 | 
170 | It's difficult to produce general benchmark results for parser libraries. By their nature, the performance of a parser
171 | is intimately tied to exactly how the grammar they implement has been specified. That said, here are some numbers for a
172 | fairly routine JSON parsing benchmark implemented idiomatically in various libraries. As you can see, chumsky ranks
173 | quite well!
174 | 
175 | | Ranking | Library                                              | Time (smaller is better) | Throughput |
176 | |---------|------------------------------------------------------|--------------------------|------------|
177 | | 1       | `chumsky` (check-only)                               | 140.77 µs                | 797 MB/s   |
178 | | 2       | [`winnow`](https://github.com/winnow-rs/winnow)      | 178.91 µs                | 627 MB/s   |
179 | | 3       | `chumsky`                                            | 210.43 µs                | 533 MB/s   |
180 | | 4       | [`sn`](https://github.com/Jacherr/sn) (hand-written) | 237.94 µs                | 472 MB/s   |
181 | | 5       | [`serde_json`](https://github.com/serde-rs/json)     | 477.41 µs                | 235 MB/s   |
182 | | 6       | [`nom`](https://github.com/rust-bakery/nom)          | 526.52 µs                | 213 MB/s   |
183 | | 7       | [`pest`](https://github.com/pest-parser/pest)        | 1.9706 ms                | 57 MB/s    |
184 | | 8       | [`pom`](https://github.com/J-F-Liu/pom)              | 13.730 ms                | 8 MB/s     |
185 | 
186 | What should you take from this? It's difficult to say. 'Chumsky is faster than X' or 'chumsky is slower than Y' is too
187 | strong a statement: this is just one particular benchmark with one particular set of implementations and one
188 | particular workload.
189 | 
190 | That said, there is something you can take: chumsky isn't going to be your bottleneck. In this benchmark, chumsky is
191 | within 20% of the performance of the 'pack leader' and has performance comparable to a hand-written parser. The
192 | performance standards for Rust libraries are already far above most language ecosystems, so you can be sure that
193 | chumsky will keep pace with your use-case.
194 | 
195 | Benchmarks were performed on a single core of an AMD Ryzen 7 3700x.
196 | 
197 | ## Notes
198 | 
199 | My apologies to Noam for choosing such an absurd name.
200 | 
201 | ## License
202 | 
203 | Chumsky is licensed under the MIT license (see `LICENSE` in the main repository).
204 | 


--------------------------------------------------------------------------------
/benches/backtrack.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::prelude::*;
 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 3 | 
 4 | fn bench_backtrack(c: &mut Criterion) {
 5 |     let four = just::<_, &str, extra::Default>('!')
 6 |         .repeated()
 7 |         .collect::<Vec<_>>()
 8 |         .then_ignore(just(';'))
 9 |         .repeated()
10 |         .exactly(4)
11 |         .collect::<Vec<_>>()
12 |         .then_ignore(just(';'));
13 | 
14 |     let five = just('!')
15 |         .repeated()
16 |         .collect::<Vec<_>>()
17 |         .then_ignore(just(';'))
18 |         .repeated()
19 |         .exactly(5)
20 |         .collect::<Vec<_>>()
21 |         .then_ignore(just(';'));
22 | 
23 |     let xs = five.or(four).repeated().collect::<Vec<_>>();
24 | 
25 |     c.bench_function("backtrack", |b| {
26 |         b.iter(|| {
27 |             black_box(xs.parse(&black_box("!!!!;!!!!;!!!!;!!!!;;".repeat(1000))))
28 |                 .into_result()
29 |                 .unwrap();
30 |         })
31 |     });
32 | }
33 | 
34 | criterion_group!(benches, bench_backtrack);
35 | criterion_main!(benches);
36 | 


--------------------------------------------------------------------------------
/benches/json.pest:
--------------------------------------------------------------------------------
 1 | WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
 2 | 
 3 | object = {
 4 |     "{" ~ "}" |
 5 |     "{" ~ pair ~ ("," ~ pair)* ~ "}"
 6 | }
 7 | pair = { string ~ ":" ~ value }
 8 | 
 9 | array = {
10 |     "[" ~ "]" |
11 |     "[" ~ value ~ ("," ~ value)* ~ "]"
12 | }
13 | 
14 | value = _{ object | array | string | number | boolean | null }
15 | 
16 | boolean = { "true" | "false" }
17 | 
18 | null = { "null" }
19 | 
20 | string = ${ "\"" ~ inner ~ "\"" }
21 | inner = @{ char* }
22 | char = {
23 |     !("\"" | "\\") ~ ANY
24 |     | "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
25 |     | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
26 | }
27 | 
28 | number = @{
29 |     "-"?
30 |     ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*)
31 |     ~ ("." ~ ASCII_DIGIT*)?
32 |     ~ (^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+)?
33 | }
34 | 
35 | json = _{ SOI ~ (object | array) ~ EOI }
36 | 


--------------------------------------------------------------------------------
/benches/lex.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
  2 | 
  3 | #[derive(Debug, Clone, PartialEq)]
  4 | pub enum Json {
  5 |     Null,
  6 |     Bool(bool),
  7 |     Str(String),
  8 |     Num(f64),
  9 |     Array(Vec<Json>),
 10 |     Object(Vec<(String, Json)>),
 11 | }
 12 | 
 13 | #[derive(Debug, Clone, PartialEq)]
 14 | pub enum Token<'a> {
 15 |     Null,
 16 |     Bool(bool),
 17 |     Str(&'a [u8]),
 18 |     Num(f64),
 19 |     Ident(&'a [u8]),
 20 |     Less,
 21 |     More,
 22 |     LessEq,
 23 |     MoreEq,
 24 |     OpenParen,
 25 |     CloseParen,
 26 |     Comma,
 27 | }
 28 | 
 29 | static SAMPLE: &[u8] = include_bytes!("tokens.txt");
 30 | 
 31 | fn bench_lex(c: &mut Criterion) {
 32 |     c.bench_function("lex_chumsky_zero_copy", {
 33 |         use ::chumsky::prelude::*;
 34 |         let parser = chumsky_zero_copy::parser();
 35 |         move |b| {
 36 |             b.iter(|| {
 37 |                 assert_eq!(
 38 |                     black_box(parser.parse(black_box(SAMPLE)))
 39 |                         .into_result()
 40 |                         .unwrap()
 41 |                         .len(),
 42 |                     4048
 43 |                 )
 44 |             })
 45 |         }
 46 |     });
 47 | 
 48 |     c.bench_function("lex_chumsky_zero_copy_check", {
 49 |         use ::chumsky::prelude::*;
 50 |         let parser = chumsky_zero_copy::parser();
 51 |         move |b| {
 52 |             b.iter(|| {
 53 |                 assert!(black_box(parser.check(black_box(SAMPLE)))
 54 |                     .into_errors()
 55 |                     .is_empty())
 56 |             })
 57 |         }
 58 |     });
 59 | 
 60 |     c.bench_function("lex_logos", |b| {
 61 |         b.iter(|| {
 62 |             assert!(black_box(logos::lexer(black_box(SAMPLE))).all(|t| t != Ok(logos::Token::Error)))
 63 |         })
 64 |     });
 65 | }
 66 | 
 67 | criterion_group!(benches, bench_lex);
 68 | criterion_main!(benches);
 69 | 
 70 | mod logos {
 71 |     use logos::{Lexer, Logos};
 72 |     use std::str;
 73 | 
 74 |     fn to_bool<'a>(lex: &mut Lexer<'a, Token<'a>>) -> bool {
 75 |         match lex.slice() {
 76 |             b"true" => true,
 77 |             b"false" => false,
 78 |             _ => unreachable!(),
 79 |         }
 80 |     }
 81 | 
 82 |     fn to_f64<'a>(lex: &mut Lexer<'a, Token<'a>>) -> f64 {
 83 |         str::from_utf8(lex.slice()).unwrap().parse().unwrap()
 84 |     }
 85 | 
 86 |     #[derive(Logos, Debug, Clone, PartialEq)]
 87 |     pub enum Token<'a> {
 88 |         #[token("null")]
 89 |         Null,
 90 |         #[regex("true|false", to_bool)]
 91 |         Bool(bool),
 92 |         #[regex(br#""([^\\"]|\\[\\"bfnrt/])*""#)]
 93 |         Str(&'a [u8]),
 94 |         #[regex(br"-?([1-9][0-9]*|0)(\.[0-9]*)?([eE][+-]?[0-9]*)?", to_f64)]
 95 |         Num(f64),
 96 |         #[regex(br"[a-zA-Z_][a-zA-Z0-9_]*")]
 97 |         Ident(&'a [u8]),
 98 |         #[token(b"<")]
 99 |         Less,
100 |         #[token(b">")]
101 |         More,
102 |         #[token(b"<=")]
103 |         LessEq,
104 |         #[token(b">=")]
105 |         MoreEq,
106 |         #[token(b"(")]
107 |         OpenParen,
108 |         #[token(b")")]
109 |         CloseParen,
110 |         #[token(b",")]
111 |         Comma,
112 | 
113 |         #[regex(br"\s", logos::skip)]
114 |         Error,
115 |     }
116 | 
117 |     pub fn lexer(src: &[u8]) -> Lexer<'_, Token<'_>> {
118 |         Token::lexer(src)
119 |     }
120 | }
121 | 
122 | mod chumsky_zero_copy {
123 |     use chumsky::prelude::*;
124 | 
125 |     use super::Token;
126 |     use std::str;
127 | 
128 |     pub fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec<Token<'a>>> {
129 |         let digits = one_of(b'0'..=b'9').repeated().to_slice();
130 | 
131 |         let int = one_of(b'1'..=b'9')
132 |             .repeated()
133 |             .at_least(1)
134 |             .then(one_of(b'0'..=b'9').repeated())
135 |             .ignored()
136 |             .or(just(b'0').ignored())
137 |             .ignored();
138 | 
139 |         let frac = just(b'.').then(digits.clone());
140 | 
141 |         let exp = just(b'e')
142 |             .or(just(b'E'))
143 |             .then(one_of(b"+-").or_not())
144 |             .then(digits.clone());
145 | 
146 |         let number = just(b'-')
147 |             .or_not()
148 |             .then(int)
149 |             .then(frac.or_not())
150 |             .then(exp.or_not())
151 |             .to_slice()
152 |             .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
153 |             .boxed();
154 | 
155 |         let escape = just(b'\\')
156 |             .then(choice((
157 |                 just(b'\\'),
158 |                 just(b'/'),
159 |                 just(b'"'),
160 |                 just(b'b').to(b'\x08'),
161 |                 just(b'f').to(b'\x0C'),
162 |                 just(b'n').to(b'\n'),
163 |                 just(b'r').to(b'\r'),
164 |                 just(b't').to(b'\t'),
165 |             )))
166 |             .ignored()
167 |             .boxed();
168 | 
169 |         let string = none_of(b"\\\"")
170 |             .ignored()
171 |             .or(escape)
172 |             .repeated()
173 |             .to_slice()
174 |             .delimited_by(just(b'"'), just(b'"'))
175 |             .boxed();
176 | 
177 |         let ident = text::ascii::ident().to_slice().map(Token::Ident);
178 | 
179 |         choice((
180 |             just(b"null").to(Token::Null),
181 |             just(b"true").to(Token::Bool(true)),
182 |             just(b"false").to(Token::Bool(false)),
183 |             number.map(Token::Num),
184 |             string.map(Token::Str),
185 |             ident,
186 |             just(b"<=").to(Token::LessEq),
187 |             just(b">=").to(Token::MoreEq),
188 |             just(b"<").to(Token::Less),
189 |             just(b">").to(Token::More),
190 |             just(b"(").to(Token::OpenParen),
191 |             just(b")").to(Token::CloseParen),
192 |             just(b",").to(Token::Comma),
193 |         ))
194 |         .padded()
195 |         .repeated()
196 |         .collect()
197 |     }
198 | }
199 | 


--------------------------------------------------------------------------------
/benches/parser.rs:
--------------------------------------------------------------------------------
  1 | use chumsky::prelude::*;
  2 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
  3 | 
  4 | mod utils;
  5 | 
  6 | fn bench_choice(c: &mut Criterion) {
  7 |     let alphabet_choice = choice((
  8 |         just::<_, &str, extra::Default>('A'),
  9 |         just('B'),
 10 |         just('C'),
 11 |         just('D'),
 12 |         just('E'),
 13 |         just('F'),
 14 |         just('G'),
 15 |         just('H'),
 16 |         just('I'),
 17 |         just('J'),
 18 |         just('K'),
 19 |         just('L'),
 20 |         just('M'),
 21 |         just('N'),
 22 |         just('O'),
 23 |         just('P'),
 24 |         just('Q'),
 25 |         just('R'),
 26 |         just('S'),
 27 |         just('T'),
 28 |         just('U'),
 29 |         just('V'),
 30 |         just('W'),
 31 |         just('X'),
 32 |         just('Y'),
 33 |         just('Z'),
 34 |     ));
 35 | 
 36 |     let mut group = c.benchmark_group("choice");
 37 | 
 38 |     group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "A"), |b| {
 39 |         b.iter(|| {
 40 |             black_box(alphabet_choice.parse(black_box("A")))
 41 |                 .into_result()
 42 |                 .unwrap();
 43 |         })
 44 |     });
 45 | 
 46 |     group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "Z"), |b| {
 47 |         b.iter(|| {
 48 |             black_box(alphabet_choice.parse(black_box("Z")))
 49 |                 .into_result()
 50 |                 .unwrap();
 51 |         })
 52 |     });
 53 | 
 54 |     group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "0"), |b| {
 55 |         b.iter(|| {
 56 |             assert!(black_box(alphabet_choice.parse(black_box("0")))
 57 |                 .into_result()
 58 |                 .is_err());
 59 |         })
 60 |     });
 61 | }
 62 | 
 63 | fn bench_or(c: &mut Criterion) {
 64 |     let alphabet_or = just::<_, _, extra::Default>('A')
 65 |         .or(just('B'))
 66 |         .or(just('C'))
 67 |         .or(just('D'))
 68 |         .or(just('E'))
 69 |         .or(just('F'))
 70 |         .or(just('G'))
 71 |         .or(just('H'))
 72 |         .or(just('I'))
 73 |         .or(just('J'))
 74 |         .or(just('K'))
 75 |         .or(just('L'))
 76 |         .or(just('M'))
 77 |         .or(just('N'))
 78 |         .or(just('O'))
 79 |         .or(just('P'))
 80 |         .or(just('Q'))
 81 |         .or(just('R'))
 82 |         .or(just('S'))
 83 |         .or(just('T'))
 84 |         .or(just('U'))
 85 |         .or(just('V'))
 86 |         .or(just('W'))
 87 |         .or(just('X'))
 88 |         .or(just('Y'))
 89 |         .or(just('Z'));
 90 | 
 91 |     let mut group = c.benchmark_group("or");
 92 | 
 93 |     group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "A"), |b| {
 94 |         b.iter(|| {
 95 |             black_box(alphabet_or.parse(black_box("A")))
 96 |                 .into_result()
 97 |                 .unwrap();
 98 |         })
 99 |     });
100 | 
101 |     group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "Z"), |b| {
102 |         b.iter(|| {
103 |             black_box(alphabet_or.parse(black_box("Z")))
104 |                 .into_result()
105 |                 .unwrap();
106 |         })
107 |     });
108 | 
109 |     group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "0"), |b| {
110 |         b.iter(|| {
111 |             assert!(black_box(alphabet_or.parse(black_box("0")))
112 |                 .into_result()
113 |                 .is_err());
114 |         })
115 |     });
116 | }
117 | 
118 | fn bench_group(c: &mut Criterion) {
119 |     let alphabet_group = group((
120 |         just::<_, &str, extra::Default>('A'),
121 |         just('B'),
122 |         just('C'),
123 |         just('D'),
124 |         just('E'),
125 |         just('F'),
126 |         just('G'),
127 |         just('H'),
128 |         just('I'),
129 |         just('J'),
130 |         just('K'),
131 |         just('L'),
132 |         just('M'),
133 |         just('N'),
134 |         just('O'),
135 |         just('P'),
136 |         just('Q'),
137 |         just('R'),
138 |         just('S'),
139 |         just('T'),
140 |         just('U'),
141 |         just('V'),
142 |         just('W'),
143 |         just('X'),
144 |         just('Y'),
145 |         just('Z'),
146 |     ));
147 | 
148 |     let mut group = c.benchmark_group("group");
149 | 
150 |     group.bench_function(
151 |         BenchmarkId::new("group::<(A..Z)>", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
152 |         |b| {
153 |             b.iter(|| {
154 |                 black_box(alphabet_group.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
155 |                     .into_result()
156 |                     .unwrap();
157 |             })
158 |         },
159 |     );
160 | 
161 |     group.bench_function(
162 |         BenchmarkId::new("group::<(A..Z)>", "ABCDEFGHIJKLMNOPQRSTUVWXY0"),
163 |         |b| {
164 |             b.iter(|| {
165 |                 assert!(
166 |                     black_box(alphabet_group.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXY0")))
167 |                         .into_result()
168 |                         .is_err()
169 |                 );
170 |             })
171 |         },
172 |     );
173 | 
174 |     group.bench_function(BenchmarkId::new("group::<(A..Z)>", "0"), |b| {
175 |         b.iter(|| {
176 |             assert!(black_box(alphabet_group.parse(black_box("0")))
177 |                 .into_result()
178 |                 .is_err());
179 |         })
180 |     });
181 | }
182 | 
183 | fn bench_then(c: &mut Criterion) {
184 |     let alphabet_then = just::<_, _, extra::Default>('A')
185 |         .then(just('B'))
186 |         .then(just('C'))
187 |         .then(just('D'))
188 |         .then(just('E'))
189 |         .then(just('F'))
190 |         .then(just('G'))
191 |         .then(just('H'))
192 |         .then(just('I'))
193 |         .then(just('J'))
194 |         .then(just('K'))
195 |         .then(just('L'))
196 |         .then(just('M'))
197 |         .then(just('N'))
198 |         .then(just('O'))
199 |         .then(just('P'))
200 |         .then(just('Q'))
201 |         .then(just('R'))
202 |         .then(just('S'))
203 |         .then(just('T'))
204 |         .then(just('U'))
205 |         .then(just('V'))
206 |         .then(just('W'))
207 |         .then(just('X'))
208 |         .then(just('Y'))
209 |         .then(just('Z'));
210 | 
211 |     let mut group = c.benchmark_group("then");
212 | 
213 |     group.bench_function(
214 |         BenchmarkId::new("A.then(B)...then(Z)", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
215 |         |b| {
216 |             b.iter(|| {
217 |                 black_box(alphabet_then.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
218 |                     .into_result()
219 |                     .unwrap();
220 |             })
221 |         },
222 |     );
223 | 
224 |     group.bench_function(
225 |         BenchmarkId::new("A.then(B)...then(Z)", "ABCDEFGHIJKLMNOPQRSTUVWXY0"),
226 |         |b| {
227 |             b.iter(|| {
228 |                 assert!(
229 |                     black_box(alphabet_then.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXY0")))
230 |                         .into_result()
231 |                         .is_err()
232 |                 );
233 |             })
234 |         },
235 |     );
236 | 
237 |     group.bench_function(BenchmarkId::new("A.then(B)...then(Z)", "0"), |b| {
238 |         b.iter(|| {
239 |             assert!(black_box(alphabet_then.parse(black_box("0")))
240 |                 .into_result()
241 |                 .is_err());
242 |         })
243 |     });
244 | }
245 | 
246 | #[cfg(feature = "regex")]
247 | fn bench_regex(c: &mut Criterion) {
248 |     let re_foo = regex::<_, extra::Default>("foo");
249 |     let re_foo2 = regex::<_, extra::Default>("[fF]oo");
250 |     let re_rep = regex::<_, extra::Default>("(?:abc){4}");
251 | 
252 |     let mut group = c.benchmark_group("regex");
253 | 
254 |     group.bench_function(BenchmarkId::new("foo", "foo"), |b| {
255 |         b.iter(|| {
256 |             black_box(re_foo.parse(black_box("foo")))
257 |                 .into_result()
258 |                 .unwrap();
259 |         })
260 |     });
261 | 
262 |     group.bench_function(BenchmarkId::new("foo", "barfoofoofoo"), |b| {
263 |         b.iter(|| {
264 |             black_box(re_foo.parse(black_box("barfoofoofoo")))
265 |                 .into_result()
266 |                 .unwrap_err();
267 |         })
268 |     });
269 | 
270 |     group.bench_function(BenchmarkId::new("[fF]oo", "foo"), |b| {
271 |         b.iter(|| {
272 |             black_box(re_foo2.parse(black_box("foo")))
273 |                 .into_result()
274 |                 .unwrap()
275 |         })
276 |     });
277 | 
278 |     group.bench_function(BenchmarkId::new("[fF]oo", "Foo"), |b| {
279 |         b.iter(|| {
280 |             black_box(re_foo2.parse(black_box("Foo")))
281 |                 .into_result()
282 |                 .unwrap()
283 |         })
284 |     });
285 | 
286 |     group.bench_function(BenchmarkId::new("[fF]oo", "barFoofoo"), |b| {
287 |         b.iter(|| {
288 |             black_box(re_foo2.parse(black_box("barFoofoo")))
289 |                 .into_result()
290 |                 .unwrap_err()
291 |         })
292 |     });
293 | 
294 |     group.bench_function(BenchmarkId::new("(?:abc){4}", "abcabcabcabc"), |b| {
295 |         b.iter(|| {
296 |             black_box(re_rep.parse(black_box("abcabcabcabc")))
297 |                 .into_result()
298 |                 .unwrap()
299 |         })
300 |     });
301 | }
302 | 
303 | #[cfg(not(feature = "regex"))]
304 | fn bench_regex(_: &mut Criterion) {}
305 | 
306 | criterion_group!(
307 |     name = benches;
308 |     config = utils::make_criterion();
309 |     targets = bench_choice, bench_or, bench_group, bench_then, bench_regex,
310 | );
311 | criterion_main!(benches);
312 | 


--------------------------------------------------------------------------------
/benches/samples/sample.cbor:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zesterer/chumsky/6d07aa3dfabf1b34b1135c07de321bbc8e0b1d89/benches/samples/sample.cbor


--------------------------------------------------------------------------------
/benches/tokens.txt:
--------------------------------------------------------------------------------
  1 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  2 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  3 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  4 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  5 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  6 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  7 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  8 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
  9 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 10 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 11 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 12 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 13 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 14 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 15 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 16 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 17 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 18 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 19 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 20 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 21 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 22 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 23 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 24 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 25 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 26 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 27 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 28 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 29 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 30 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 31 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 32 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 33 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 34 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 35 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 36 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 37 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 38 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 39 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 40 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 41 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 42 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 43 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 44 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 45 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 46 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 47 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 48 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 49 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 50 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 51 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 52 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 53 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 54 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 55 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 56 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 57 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 58 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 59 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 60 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 61 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 62 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 63 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 64 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 65 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 66 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 67 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 68 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 69 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 70 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 71 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 72 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 73 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 74 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 75 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 76 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 77 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 78 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 79 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 80 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 81 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 82 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 83 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 84 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 85 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 86 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 87 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 88 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 89 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 90 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 91 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 92 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 93 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 94 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 95 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 96 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 97 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 98 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
 99 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
100 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
101 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
102 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
103 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
104 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
105 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
106 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
107 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
108 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
109 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
110 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
111 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
112 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
113 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
114 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
115 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
116 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
117 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
118 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
119 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
120 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
121 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
122 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
123 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
124 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
125 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
126 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
127 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
128 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
129 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
130 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
131 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
132 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
133 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
134 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
135 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
136 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
137 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
138 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
139 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
140 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
141 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
142 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
143 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
144 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
145 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
146 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
147 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
148 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
149 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
150 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
151 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
152 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
153 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
154 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
155 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
156 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
157 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
158 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
159 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
160 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
161 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
162 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
163 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
164 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
165 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
166 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
167 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
168 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
169 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
170 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
171 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
172 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
173 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
174 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
175 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
176 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,)   9 "foo" "bar" 75
177 | 


--------------------------------------------------------------------------------
/benches/utils.rs:
--------------------------------------------------------------------------------
 1 | use criterion::Criterion;
 2 | 
 3 | #[cfg(unix)]
 4 | pub fn make_criterion() -> Criterion {
 5 |     use pprof::criterion::{Output, PProfProfiler};
 6 |     Criterion::default()
 7 |         .with_profiler(PProfProfiler::new(1000, Output::Flamegraph(None)))
 8 |         .configure_from_args()
 9 | }
10 | 
11 | #[cfg(not(unix))]
12 | pub fn make_criterion() -> Criterion {
13 |     Criterion::default().configure_from_args()
14 | }
15 | 


--------------------------------------------------------------------------------
/examples/brainfuck.rs:
--------------------------------------------------------------------------------
 1 | //! This is a Brainfuck parser and interpreter
 2 | //! Run it with the following command:
 3 | //! cargo run --example brainfuck -- examples/sample.bf
 4 | 
 5 | use chumsky::prelude::*;
 6 | use std::{
 7 |     env, fs,
 8 |     io::{self, Read},
 9 | };
10 | 
11 | #[derive(Clone)]
12 | enum Instr {
13 |     Invalid,
14 |     Left,
15 |     Right,
16 |     Incr,
17 |     Decr,
18 |     Read,
19 |     Write,
20 |     Loop(Vec<Self>),
21 | }
22 | 
23 | fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Instr>, extra::Err<Simple<'a, char>>> {
24 |     use Instr::*;
25 |     recursive(|bf| {
26 |         choice((
27 |             just('<').to(Left),
28 |             just('>').to(Right),
29 |             just('+').to(Incr),
30 |             just('-').to(Decr),
31 |             just(',').to(Read),
32 |             just('.').to(Write),
33 |         ))
34 |         .or(bf.delimited_by(just('['), just(']')).map(Loop))
35 |         .recover_with(via_parser(nested_delimiters('[', ']', [], |_| Invalid)))
36 |         // .recover_with(skip_then_retry_until([']']))
37 |         .repeated()
38 |         .collect()
39 |     })
40 | }
41 | 
42 | const TAPE_LEN: usize = 10_000;
43 | 
44 | fn execute(ast: &[Instr], ptr: &mut usize, tape: &mut [u8; TAPE_LEN]) {
45 |     use Instr::*;
46 |     for symbol in ast {
47 |         match symbol {
48 |             Invalid => unreachable!(),
49 |             Left => *ptr = (*ptr + TAPE_LEN - 1).rem_euclid(TAPE_LEN),
50 |             Right => *ptr = (*ptr + 1).rem_euclid(TAPE_LEN),
51 |             Incr => tape[*ptr] = tape[*ptr].wrapping_add(1),
52 |             Decr => tape[*ptr] = tape[*ptr].wrapping_sub(1),
53 |             #[allow(clippy::unbuffered_bytes)]
54 |             Read => tape[*ptr] = io::stdin().bytes().next().unwrap().unwrap(),
55 |             Write => print!("{}", tape[*ptr] as char),
56 |             Loop(ast) => {
57 |                 while tape[*ptr] != 0 {
58 |                     execute(ast, ptr, tape)
59 |                 }
60 |             }
61 |         }
62 |     }
63 | }
64 | 
65 | fn main() {
66 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
67 |         .expect("Failed to read file");
68 | 
69 |     match parser().parse(src.trim()).into_result() {
70 |         Ok(ast) => execute(&ast, &mut 0, &mut [0; TAPE_LEN]),
71 |         Err(errs) => errs.into_iter().for_each(|e| println!("{e:?}")),
72 |     };
73 | }
74 | 


--------------------------------------------------------------------------------
/examples/foo.rs:
--------------------------------------------------------------------------------
  1 | //! This is the parser and interpreter for the 'Foo' language.
  2 | //!
  3 | //! See the tutorial in the guide to learn more about it: https://docs.rs/chumsky/latest/chumsky/guide/index.html
  4 | 
  5 | use chumsky::prelude::*;
  6 | 
  7 | #[derive(Debug)]
  8 | enum Expr<'src> {
  9 |     Num(f64),
 10 |     Var(&'src str),
 11 | 
 12 |     Neg(Box<Expr<'src>>),
 13 |     Add(Box<Expr<'src>>, Box<Expr<'src>>),
 14 |     Sub(Box<Expr<'src>>, Box<Expr<'src>>),
 15 |     Mul(Box<Expr<'src>>, Box<Expr<'src>>),
 16 |     Div(Box<Expr<'src>>, Box<Expr<'src>>),
 17 | 
 18 |     Call(&'src str, Vec<Expr<'src>>),
 19 |     Let {
 20 |         name: &'src str,
 21 |         rhs: Box<Expr<'src>>,
 22 |         then: Box<Expr<'src>>,
 23 |     },
 24 |     Fn {
 25 |         name: &'src str,
 26 |         args: Vec<&'src str>,
 27 |         body: Box<Expr<'src>>,
 28 |         then: Box<Expr<'src>>,
 29 |     },
 30 | }
 31 | 
 32 | #[allow(clippy::let_and_return)]
 33 | fn parser<'src>() -> impl Parser<'src, &'src str, Expr<'src>> {
 34 |     let ident = text::ascii::ident().padded();
 35 | 
 36 |     let expr = recursive(|expr| {
 37 |         let int = text::int(10).map(|s: &str| Expr::Num(s.parse().unwrap()));
 38 | 
 39 |         let call = ident
 40 |             .then(
 41 |                 expr.clone()
 42 |                     .separated_by(just(','))
 43 |                     .allow_trailing()
 44 |                     .collect::<Vec<_>>()
 45 |                     .delimited_by(just('('), just(')')),
 46 |             )
 47 |             .map(|(f, args)| Expr::Call(f, args));
 48 | 
 49 |         let atom = int
 50 |             .or(expr.delimited_by(just('('), just(')')))
 51 |             .or(call)
 52 |             .or(ident.map(Expr::Var))
 53 |             .padded();
 54 | 
 55 |         let op = |c| just(c).padded();
 56 | 
 57 |         let unary = op('-')
 58 |             .repeated()
 59 |             .foldr(atom, |_op, rhs| Expr::Neg(Box::new(rhs)));
 60 | 
 61 |         let product = unary.clone().foldl(
 62 |             choice((
 63 |                 op('*').to(Expr::Mul as fn(_, _) -> _),
 64 |                 op('/').to(Expr::Div as fn(_, _) -> _),
 65 |             ))
 66 |             .then(unary)
 67 |             .repeated(),
 68 |             |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)),
 69 |         );
 70 | 
 71 |         let sum = product.clone().foldl(
 72 |             choice((
 73 |                 op('+').to(Expr::Add as fn(_, _) -> _),
 74 |                 op('-').to(Expr::Sub as fn(_, _) -> _),
 75 |             ))
 76 |             .then(product)
 77 |             .repeated(),
 78 |             |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)),
 79 |         );
 80 | 
 81 |         sum
 82 |     });
 83 | 
 84 |     let decl = recursive(|decl| {
 85 |         let r#let = text::ascii::keyword("let")
 86 |             .ignore_then(ident)
 87 |             .then_ignore(just('='))
 88 |             .then(expr.clone())
 89 |             .then_ignore(just(';'))
 90 |             .then(decl.clone())
 91 |             .map(|((name, rhs), then)| Expr::Let {
 92 |                 name,
 93 |                 rhs: Box::new(rhs),
 94 |                 then: Box::new(then),
 95 |             });
 96 | 
 97 |         let r#fn = text::ascii::keyword("fn")
 98 |             .ignore_then(ident)
 99 |             .then(ident.repeated().collect::<Vec<_>>())
100 |             .then_ignore(just('='))
101 |             .then(expr.clone())
102 |             .then_ignore(just(';'))
103 |             .then(decl)
104 |             .map(|(((name, args), body), then)| Expr::Fn {
105 |                 name,
106 |                 args,
107 |                 body: Box::new(body),
108 |                 then: Box::new(then),
109 |             });
110 | 
111 |         r#let.or(r#fn).or(expr).padded()
112 |     });
113 | 
114 |     decl
115 | }
116 | 
117 | fn eval<'src>(
118 |     expr: &'src Expr<'src>,
119 |     vars: &mut Vec<(&'src str, f64)>,
120 |     funcs: &mut Vec<(&'src str, &'src [&'src str], &'src Expr<'src>)>,
121 | ) -> Result<f64, String> {
122 |     match expr {
123 |         Expr::Num(x) => Ok(*x),
124 |         Expr::Neg(a) => Ok(-eval(a, vars, funcs)?),
125 |         Expr::Add(a, b) => Ok(eval(a, vars, funcs)? + eval(b, vars, funcs)?),
126 |         Expr::Sub(a, b) => Ok(eval(a, vars, funcs)? - eval(b, vars, funcs)?),
127 |         Expr::Mul(a, b) => Ok(eval(a, vars, funcs)? * eval(b, vars, funcs)?),
128 |         Expr::Div(a, b) => Ok(eval(a, vars, funcs)? / eval(b, vars, funcs)?),
129 |         Expr::Var(name) => {
130 |             if let Some((_, val)) = vars.iter().rev().find(|(var, _)| var == name) {
131 |                 Ok(*val)
132 |             } else {
133 |                 Err(format!("Cannot find variable `{name}` in scope"))
134 |             }
135 |         }
136 |         Expr::Let { name, rhs, then } => {
137 |             let rhs = eval(rhs, vars, funcs)?;
138 |             vars.push((*name, rhs));
139 |             let output = eval(then, vars, funcs);
140 |             vars.pop();
141 |             output
142 |         }
143 |         Expr::Call(name, args) => {
144 |             if let Some((_, arg_names, body)) =
145 |                 funcs.iter().rev().find(|(var, _, _)| var == name).copied()
146 |             {
147 |                 if arg_names.len() == args.len() {
148 |                     let mut args = args
149 |                         .iter()
150 |                         .map(|arg| eval(arg, vars, funcs))
151 |                         .zip(arg_names.iter())
152 |                         .map(|(val, name)| Ok((*name, val?)))
153 |                         .collect::<Result<_, String>>()?;
154 |                     let old_vars = vars.len();
155 |                     vars.append(&mut args);
156 |                     let output = eval(body, vars, funcs);
157 |                     vars.truncate(old_vars);
158 |                     output
159 |                 } else {
160 |                     Err(format!(
161 |                         "Wrong number of arguments for function `{name}`: expected {}, found {}",
162 |                         arg_names.len(),
163 |                         args.len(),
164 |                     ))
165 |                 }
166 |             } else {
167 |                 Err(format!("Cannot find function `{name}` in scope"))
168 |             }
169 |         }
170 |         Expr::Fn {
171 |             name,
172 |             args,
173 |             body,
174 |             then,
175 |         } => {
176 |             funcs.push((name, args, body));
177 |             let output = eval(then, vars, funcs);
178 |             funcs.pop();
179 |             output
180 |         }
181 |     }
182 | }
183 | 
184 | fn main() {
185 |     let usage = "Run `cargo run --example foo -- examples/sample.foo`";
186 |     let src = std::fs::read_to_string(std::env::args().nth(1).expect(usage)).expect(usage);
187 | 
188 |     match parser().parse(&src).into_result() {
189 |         Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) {
190 |             Ok(output) => println!("{output}"),
191 |             Err(eval_err) => println!("Evaluation error: {eval_err}"),
192 |         },
193 |         Err(parse_errs) => parse_errs
194 |             .into_iter()
195 |             .for_each(|err| println!("Parse error: {err}")),
196 |     };
197 | }
198 | 


--------------------------------------------------------------------------------
/examples/indent.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::prelude::*;
 2 | 
 3 | #[derive(Clone, Debug)]
 4 | pub enum Stmt {
 5 |     Expr,
 6 |     Loop(Vec<Stmt>),
 7 | }
 8 | 
 9 | fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Stmt>> {
10 |     let expr = just("expr"); // TODO
11 | 
12 |     let block = recursive(|block| {
13 |         let indent = just(' ')
14 |             .repeated()
15 |             .configure(|cfg, parent_indent| cfg.exactly(*parent_indent));
16 | 
17 |         let expr_stmt = expr.then_ignore(text::newline()).to(Stmt::Expr);
18 |         let control_flow = just("loop:")
19 |             .then(text::newline())
20 |             .ignore_then(block)
21 |             .map(Stmt::Loop);
22 |         let stmt = expr_stmt.or(control_flow);
23 | 
24 |         text::whitespace()
25 |             .count()
26 |             .ignore_with_ctx(stmt.separated_by(indent).collect())
27 |     });
28 | 
29 |     block.with_ctx(0)
30 | }
31 | 
32 | fn main() {
33 |     let stmts = parser().padded().parse(
34 |         r#"
35 | expr
36 | expr
37 | loop:
38 |     expr
39 |     loop:
40 |         expr
41 |         expr
42 |     expr
43 | expr
44 | "#,
45 |     );
46 |     println!("{:#?}", stmts.output());
47 |     println!("{:?}", stmts.errors().collect::<Vec<_>>());
48 | }
49 | 


--------------------------------------------------------------------------------
/examples/io.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::{error::LabelError, extra::ParserExtra, input::IoInput, prelude::*, util::MaybeRef};
 2 | use std::{env, fs::File};
 3 | 
 4 | #[allow(unused)]
 5 | #[derive(Debug)]
 6 | struct Foo {
 7 |     name: String,
 8 |     val: u32,
 9 | }
10 | 
11 | fn ident<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<File>, String, E> {
12 |     any()
13 |         .filter(u8::is_ascii_alphabetic)
14 |         .repeated()
15 |         .at_least(1)
16 |         .collect::<Vec<_>>()
17 |         .map(|v| String::from_utf8_lossy(&v).to_string())
18 | }
19 | 
20 | fn digits<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<File>, String, E> {
21 |     any()
22 |         .filter(u8::is_ascii_digit)
23 |         .repeated()
24 |         .at_least(1)
25 |         .collect::<Vec<_>>()
26 |         .map(|v| String::from_utf8_lossy(&v).to_string())
27 | }
28 | 
29 | fn parser<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<File>, Vec<Foo>, E>
30 | where
31 |     E::Error: LabelError<'a, IoInput<File>, MaybeRef<'a, u8>>,
32 | {
33 |     group((ident(), just(b':').padded(), digits()))
34 |         .map(|(name, _, digits)| Foo {
35 |             name,
36 |             val: digits.parse().unwrap(),
37 |         })
38 |         .separated_by(just(b'\n'))
39 |         .allow_trailing()
40 |         .collect()
41 | }
42 | 
43 | fn main() {
44 |     let src = File::open(env::args().nth(1).expect("Expected file argument"))
45 |         .expect("Failed to open file");
46 | 
47 |     let json = parser::<extra::Err<Rich<_>>>()
48 |         .parse(IoInput::new(src))
49 |         .into_result();
50 |     println!("{json:#?}");
51 | }
52 | 


--------------------------------------------------------------------------------
/examples/json.rs:
--------------------------------------------------------------------------------
  1 | //! This is a parser for JSON.
  2 | //! Run it with the following command:
  3 | //! cargo run --example json -- examples/sample.json
  4 | 
  5 | use ariadne::{Color, Label, Report, ReportKind, Source};
  6 | use chumsky::prelude::*;
  7 | use std::{collections::HashMap, env, fs};
  8 | 
  9 | #[derive(Clone, Debug)]
 10 | pub enum Json {
 11 |     Invalid,
 12 |     Null,
 13 |     Bool(bool),
 14 |     Str(String),
 15 |     Num(f64),
 16 |     Array(Vec<Json>),
 17 |     Object(HashMap<String, Json>),
 18 | }
 19 | 
 20 | fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err<Rich<'a, char>>> {
 21 |     recursive(|value| {
 22 |         let digits = text::digits(10).to_slice();
 23 | 
 24 |         let frac = just('.').then(digits);
 25 | 
 26 |         let exp = just('e')
 27 |             .or(just('E'))
 28 |             .then(one_of("+-").or_not())
 29 |             .then(digits);
 30 | 
 31 |         let number = just('-')
 32 |             .or_not()
 33 |             .then(text::int(10))
 34 |             .then(frac.or_not())
 35 |             .then(exp.or_not())
 36 |             .to_slice()
 37 |             .map(|s: &str| s.parse().unwrap())
 38 |             .boxed();
 39 | 
 40 |         let escape = just('\\')
 41 |             .then(choice((
 42 |                 just('\\'),
 43 |                 just('/'),
 44 |                 just('"'),
 45 |                 just('b').to('\x08'),
 46 |                 just('f').to('\x0C'),
 47 |                 just('n').to('\n'),
 48 |                 just('r').to('\r'),
 49 |                 just('t').to('\t'),
 50 |                 just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate(
 51 |                     |digits, e, emitter| {
 52 |                         char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else(
 53 |                             || {
 54 |                                 emitter.emit(Rich::custom(e.span(), "invalid unicode character"));
 55 |                                 '\u{FFFD}' // unicode replacement character
 56 |                             },
 57 |                         )
 58 |                     },
 59 |                 )),
 60 |             )))
 61 |             .ignored()
 62 |             .boxed();
 63 | 
 64 |         let string = none_of("\\\"")
 65 |             .ignored()
 66 |             .or(escape)
 67 |             .repeated()
 68 |             .to_slice()
 69 |             .map(ToString::to_string)
 70 |             .delimited_by(just('"'), just('"'))
 71 |             .boxed();
 72 | 
 73 |         let array = value
 74 |             .clone()
 75 |             .separated_by(just(',').padded().recover_with(skip_then_retry_until(
 76 |                 any().ignored(),
 77 |                 one_of(",]").ignored(),
 78 |             )))
 79 |             .allow_trailing()
 80 |             .collect()
 81 |             .padded()
 82 |             .delimited_by(
 83 |                 just('['),
 84 |                 just(']')
 85 |                     .ignored()
 86 |                     .recover_with(via_parser(end()))
 87 |                     .recover_with(skip_then_retry_until(any().ignored(), end())),
 88 |             )
 89 |             .boxed();
 90 | 
 91 |         let member = string.clone().then_ignore(just(':').padded()).then(value);
 92 |         let object = member
 93 |             .clone()
 94 |             .separated_by(just(',').padded().recover_with(skip_then_retry_until(
 95 |                 any().ignored(),
 96 |                 one_of(",}").ignored(),
 97 |             )))
 98 |             .collect()
 99 |             .padded()
100 |             .delimited_by(
101 |                 just('{'),
102 |                 just('}')
103 |                     .ignored()
104 |                     .recover_with(via_parser(end()))
105 |                     .recover_with(skip_then_retry_until(any().ignored(), end())),
106 |             )
107 |             .boxed();
108 | 
109 |         choice((
110 |             just("null").to(Json::Null),
111 |             just("true").to(Json::Bool(true)),
112 |             just("false").to(Json::Bool(false)),
113 |             number.map(Json::Num),
114 |             string.map(Json::Str),
115 |             array.map(Json::Array),
116 |             object.map(Json::Object),
117 |         ))
118 |         .recover_with(via_parser(nested_delimiters(
119 |             '{',
120 |             '}',
121 |             [('[', ']')],
122 |             |_| Json::Invalid,
123 |         )))
124 |         .recover_with(via_parser(nested_delimiters(
125 |             '[',
126 |             ']',
127 |             [('{', '}')],
128 |             |_| Json::Invalid,
129 |         )))
130 |         .recover_with(skip_then_retry_until(
131 |             any().ignored(),
132 |             one_of(",]}").ignored(),
133 |         ))
134 |         .padded()
135 |     })
136 | }
137 | 
138 | fn main() {
139 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
140 |         .expect("Failed to read file");
141 | 
142 |     let (json, errs) = parser().parse(src.trim()).into_output_errors();
143 |     println!("{json:#?}");
144 |     errs.into_iter().for_each(|e| {
145 |         Report::build(ReportKind::Error, ((), e.span().into_range()))
146 |             .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte))
147 |             .with_message(e.to_string())
148 |             .with_label(
149 |                 Label::new(((), e.span().into_range()))
150 |                     .with_message(e.reason().to_string())
151 |                     .with_color(Color::Red),
152 |             )
153 |             .finish()
154 |             .print(Source::from(&src))
155 |             .unwrap()
156 |     });
157 | }
158 | 


--------------------------------------------------------------------------------
/examples/json_fast.rs:
--------------------------------------------------------------------------------
  1 | //! This is a parser for JSON. Unlike `json.rs`, it is configured for speed over error quality.
  2 | //! Run it with the following command:
  3 | //! cargo run --example json_fast -- examples/sample.json
  4 | 
  5 | use chumsky::prelude::*;
  6 | use std::{collections::HashMap, env, fs};
  7 | 
  8 | #[derive(Clone, Debug)]
  9 | pub enum Json {
 10 |     Null,
 11 |     Bool(bool),
 12 |     Str(String),
 13 |     Num(f64),
 14 |     Array(Vec<Json>),
 15 |     Object(HashMap<String, Json>),
 16 | }
 17 | 
 18 | fn parser<'a>() -> impl Parser<'a, &'a str, Json> {
 19 |     recursive(|value| {
 20 |         let digits = text::digits(10).to_slice();
 21 | 
 22 |         let frac = just('.').then(digits);
 23 | 
 24 |         let exp = just('e')
 25 |             .or(just('E'))
 26 |             .then(one_of("+-").or_not())
 27 |             .then(digits);
 28 | 
 29 |         let number = just('-')
 30 |             .or_not()
 31 |             .then(text::int(10))
 32 |             .then(frac.or_not())
 33 |             .then(exp.or_not())
 34 |             .to_slice()
 35 |             .map(|s: &str| s.parse().unwrap());
 36 | 
 37 |         let escape = just('\\')
 38 |             .then(choice((
 39 |                 just('\\'),
 40 |                 just('/'),
 41 |                 just('"'),
 42 |                 just('b').to('\x08'),
 43 |                 just('f').to('\x0C'),
 44 |                 just('n').to('\n'),
 45 |                 just('r').to('\r'),
 46 |                 just('t').to('\t'),
 47 |                 just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate(
 48 |                     |digits, _, emitter| {
 49 |                         char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else(
 50 |                             || {
 51 |                                 emitter.emit(Default::default());
 52 |                                 '\u{FFFD}' // unicode replacement character
 53 |                             },
 54 |                         )
 55 |                     },
 56 |                 )),
 57 |             )))
 58 |             .ignored();
 59 | 
 60 |         let string = none_of("\\\"")
 61 |             .ignored()
 62 |             .or(escape)
 63 |             .repeated()
 64 |             .to_slice()
 65 |             .map(ToString::to_string)
 66 |             .delimited_by(just('"'), just('"'));
 67 | 
 68 |         let array = value
 69 |             .clone()
 70 |             .separated_by(just(',').padded())
 71 |             .allow_trailing()
 72 |             .collect()
 73 |             .padded()
 74 |             .delimited_by(just('['), just(']'));
 75 | 
 76 |         let member = string.then_ignore(just(':').padded()).then(value);
 77 |         let object = member
 78 |             .clone()
 79 |             .separated_by(just(',').padded())
 80 |             .collect()
 81 |             .padded()
 82 |             .delimited_by(just('{'), just('}'));
 83 | 
 84 |         choice((
 85 |             just("null").to(Json::Null),
 86 |             just("true").to(Json::Bool(true)),
 87 |             just("false").to(Json::Bool(false)),
 88 |             number.map(Json::Num),
 89 |             string.map(Json::Str),
 90 |             array.map(Json::Array),
 91 |             object.map(Json::Object),
 92 |         ))
 93 |         .padded()
 94 |     })
 95 | }
 96 | 
 97 | fn main() {
 98 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
 99 |         .expect("Failed to read file");
100 | 
101 |     let json = parser().parse(src.trim()).unwrap();
102 |     println!("{json:#?}");
103 | }
104 | 


--------------------------------------------------------------------------------
/examples/logos.rs:
--------------------------------------------------------------------------------
  1 | //! An example of using logos with chumsky to parse sexprs
  2 | //! Run it with the following command:
  3 | //! cargo run --example logos
  4 | 
  5 | use ariadne::{Color, Label, Report, ReportKind, Source};
  6 | use chumsky::{
  7 |     input::{Stream, ValueInput},
  8 |     prelude::*,
  9 | };
 10 | use logos::Logos;
 11 | use std::fmt;
 12 | 
 13 | #[derive(Logos, Clone, PartialEq)]
 14 | enum Token<'a> {
 15 |     Error,
 16 | 
 17 |     #[regex(r"[+-]?([0-9]*[.])?[0-9]+")]
 18 |     Float(&'a str),
 19 | 
 20 |     #[token("+")]
 21 |     Add,
 22 |     #[token("-")]
 23 |     Sub,
 24 |     #[token("*")]
 25 |     Mul,
 26 |     #[token("/")]
 27 |     Div,
 28 | 
 29 |     #[token("(")]
 30 |     LParen,
 31 |     #[token(")")]
 32 |     RParen,
 33 | 
 34 |     #[regex(r"[ \t\f\n]+", logos::skip)]
 35 |     Whitespace,
 36 | }
 37 | 
 38 | impl fmt::Display for Token<'_> {
 39 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 40 |         match self {
 41 |             Self::Float(s) => write!(f, "{s}"),
 42 |             Self::Add => write!(f, "+"),
 43 |             Self::Sub => write!(f, "-"),
 44 |             Self::Mul => write!(f, "*"),
 45 |             Self::Div => write!(f, "/"),
 46 |             Self::LParen => write!(f, "("),
 47 |             Self::RParen => write!(f, ")"),
 48 |             Self::Whitespace => write!(f, "<whitespace>"),
 49 |             Self::Error => write!(f, "<error>"),
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | #[derive(Debug)]
 55 | enum SExpr {
 56 |     Float(f64),
 57 |     Add,
 58 |     Sub,
 59 |     Mul,
 60 |     Div,
 61 |     List(Vec<Self>),
 62 | }
 63 | 
 64 | // This function signature looks complicated, but don't fear! We're just saying that this function is generic over
 65 | // inputs that:
 66 | //     - Can have tokens pulled out of them by-value, by cloning (`ValueInput`)
 67 | //     - Gives us access to slices of the original input (`SliceInput`)
 68 | //     - Produces tokens of type `Token`, the type we defined above (`Token = Token<'a>`)
 69 | //     - Produces spans of type `SimpleSpan`, a built-in span type provided by chumsky (`Span = SimpleSpan`)
 70 | // The function then returns a parser that:
 71 | //     - Has an input type of type `I`, the one we declared as a type parameter
 72 | //     - Produces an `SExpr` as its output
 73 | //     - Uses `Rich`, a built-in error type provided by chumsky, for error generation
 74 | fn parser<'tokens, 'src: 'tokens, I>(
 75 | ) -> impl Parser<'tokens, I, SExpr, extra::Err<Rich<'tokens, Token<'src>>>>
 76 | where
 77 |     I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
 78 | {
 79 |     recursive(|sexpr| {
 80 |         let atom = select! {
 81 |             Token::Float(x) => SExpr::Float(x.parse().unwrap()),
 82 |             Token::Add => SExpr::Add,
 83 |             Token::Sub => SExpr::Sub,
 84 |             Token::Mul => SExpr::Mul,
 85 |             Token::Div => SExpr::Div,
 86 |         };
 87 | 
 88 |         let list = sexpr
 89 |             .repeated()
 90 |             .collect()
 91 |             .map(SExpr::List)
 92 |             .delimited_by(just(Token::LParen), just(Token::RParen));
 93 | 
 94 |         atom.or(list)
 95 |     })
 96 | }
 97 | 
 98 | impl SExpr {
 99 |     // Recursively evaluate an s-expression
100 |     fn eval(&self) -> Result<f64, &'static str> {
101 |         match self {
102 |             Self::Float(x) => Ok(*x),
103 |             Self::Add => Err("Cannot evaluate operator '+'"),
104 |             Self::Sub => Err("Cannot evaluate operator '-'"),
105 |             Self::Mul => Err("Cannot evaluate operator '*'"),
106 |             Self::Div => Err("Cannot evaluate operator '/'"),
107 |             Self::List(list) => match &list[..] {
108 |                 [Self::Add, tail @ ..] => tail.iter().map(SExpr::eval).sum(),
109 |                 [Self::Mul, tail @ ..] => tail.iter().map(SExpr::eval).product(),
110 |                 [Self::Sub, init, tail @ ..] => {
111 |                     Ok(init.eval()? - tail.iter().map(SExpr::eval).sum::<Result<f64, _>>()?)
112 |                 }
113 |                 [Self::Div, init, tail @ ..] => {
114 |                     Ok(init.eval()? / tail.iter().map(SExpr::eval).product::<Result<f64, _>>()?)
115 |                 }
116 |                 _ => Err("Cannot evaluate list"),
117 |             },
118 |         }
119 |     }
120 | }
121 | 
122 | const SRC: &str = r"
123 |     (-
124 |         (* (+ 4 7.3) 7)
125 |         (/ 5 3)
126 |     )
127 | ";
128 | 
129 | fn main() {
130 |     // Create a logos lexer over the source code
131 |     let token_iter = Token::lexer(SRC)
132 |         .spanned()
133 |         // Convert logos errors into tokens. We want parsing to be recoverable and not fail at the lexing stage, so
134 |         // we have a dedicated `Token::Error` variant that represents a token error that was previously encountered
135 |         .map(|(tok, span)| match tok {
136 |             // Turn the `Range<usize>` spans logos gives us into chumsky's `SimpleSpan` via `Into`, because it's easier
137 |             // to work with
138 |             Ok(tok) => (tok, span.into()),
139 |             Err(()) => (Token::Error, span.into()),
140 |         });
141 | 
142 |     // Turn the token iterator into a stream that chumsky can use for things like backtracking
143 |     let token_stream = Stream::from_iter(token_iter)
144 |         // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us
145 |         // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
146 |         .map((0..SRC.len()).into(), |(t, s): (_, _)| (t, s));
147 | 
148 |     // Parse the token stream with our chumsky parser
149 |     match parser().parse(token_stream).into_result() {
150 |         // If parsing was successful, attempt to evaluate the s-expression
151 |         Ok(sexpr) => match sexpr.eval() {
152 |             Ok(out) => println!("Result = {out}"),
153 |             Err(err) => println!("Runtime error: {err}"),
154 |         },
155 |         // If parsing was unsuccessful, generate a nice user-friendly diagnostic with ariadne. You could also use
156 |         // codespan, or whatever other diagnostic library you care about. You could even just display-print the errors
157 |         // with Rust's built-in `Display` trait, but it's a little crude
158 |         Err(errs) => {
159 |             for err in errs {
160 |                 Report::build(ReportKind::Error, ((), err.span().into_range()))
161 |                     .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte))
162 |                     .with_code(3)
163 |                     .with_message(err.to_string())
164 |                     .with_label(
165 |                         Label::new(((), err.span().into_range()))
166 |                             .with_message(err.reason().to_string())
167 |                             .with_color(Color::Red),
168 |                     )
169 |                     .finish()
170 |                     .eprint(Source::from(SRC))
171 |                     .unwrap();
172 |             }
173 |         }
174 |     }
175 | }
176 | 


--------------------------------------------------------------------------------
/examples/nested.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::prelude::*;
 2 | 
 3 | // This token is a tree: it contains within it a sub-tree of tokens
 4 | #[derive(PartialEq, Debug)]
 5 | enum Token {
 6 |     Num(i64),
 7 |     Add,
 8 |     Mul,
 9 |     Parens(Vec<Token>),
10 | }
11 | 
12 | #[allow(clippy::let_and_return)]
13 | fn parser<'a>() -> impl Parser<'a, &'a [Token], i64> {
14 |     recursive(|expr| {
15 |         let num = select_ref! { Token::Num(x) => *x };
16 |         let parens = expr
17 |             // Here we specify how the parser should come up with the nested tokens
18 |             .nested_in(select_ref! { Token::Parens(xs) => xs.as_slice() });
19 | 
20 |         let atom = num.or(parens);
21 | 
22 |         let product = atom
23 |             .clone()
24 |             .foldl(just(&Token::Mul).ignore_then(atom).repeated(), |a, b| a * b);
25 | 
26 |         let sum = product
27 |             .clone()
28 |             .foldl(just(&Token::Add).ignore_then(product).repeated(), |a, b| {
29 |                 a + b
30 |             });
31 | 
32 |         sum
33 |     })
34 | }
35 | 
36 | fn main() {
37 |     // This token tree represents the expression `(2 + 3) * 4`
38 |     let tokens = [
39 |         Token::Parens(vec![Token::Num(2), Token::Add, Token::Num(3)]),
40 |         Token::Mul,
41 |         Token::Num(4),
42 |     ];
43 | 
44 |     assert_eq!(parser().parse(&tokens).into_result(), Ok(20));
45 | }
46 | 


--------------------------------------------------------------------------------
/examples/nested_spans.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::{input::BorrowInput, prelude::*};
 2 | 
 3 | // This token is a tree: it contains within it a sub-tree of tokens
 4 | #[derive(PartialEq, Debug)]
 5 | enum Token {
 6 |     Num(i64),
 7 |     Add,
 8 |     Mul,
 9 |     Parens(Vec<(Token, SimpleSpan)>),
10 | }
11 | 
12 | #[allow(clippy::let_and_return)]
13 | fn parser<'src, I, M>(make_input: M) -> impl Parser<'src, I, i64>
14 | where
15 |     I: BorrowInput<'src, Token = Token, Span = SimpleSpan>,
16 |     M: Fn(SimpleSpan, &'src [(Token, SimpleSpan)]) -> I + Clone + 'src,
17 | {
18 |     recursive(|expr| {
19 |         let num = select_ref! { Token::Num(x) => *x };
20 |         let parens = expr
21 |             // Here we specify that `expr` should appear *inside* the parenthesised token tree
22 |             .nested_in(select_ref! { Token::Parens(xs) = e => make_input(e.span(), xs) });
23 | 
24 |         let atom = num.or(parens);
25 | 
26 |         let product = atom
27 |             .clone()
28 |             .foldl(just(&Token::Mul).ignore_then(atom).repeated(), |a, b| a * b);
29 | 
30 |         let sum = product
31 |             .clone()
32 |             .foldl(just(&Token::Add).ignore_then(product).repeated(), |a, b| {
33 |                 a + b
34 |             });
35 | 
36 |         sum
37 |     })
38 | }
39 | 
40 | fn make_input(
41 |     eoi: SimpleSpan,
42 |     toks: &[(Token, SimpleSpan)],
43 | ) -> impl BorrowInput<'_, Token = Token, Span = SimpleSpan> {
44 |     toks.map(eoi, |(t, s)| (t, s))
45 | }
46 | 
47 | fn main() {
48 |     // This token tree represents the expression `(2 + 3) * 4`
49 |     let tokens = [
50 |         (
51 |             Token::Parens(vec![
52 |                 (Token::Num(2), SimpleSpan::new((), 1..2)),
53 |                 (Token::Add, SimpleSpan::new((), 3..4)),
54 |                 (Token::Num(3), SimpleSpan::new((), 5..6)),
55 |             ]),
56 |             SimpleSpan::new((), 0..7),
57 |         ),
58 |         (Token::Mul, SimpleSpan::new((), 8..9)),
59 |         (Token::Num(4), SimpleSpan::new((), 10..11)),
60 |     ];
61 | 
62 |     let eoi = SimpleSpan::new((), 11..11); // Example EoI
63 | 
64 |     assert_eq!(
65 |         parser(make_input)
66 |             .parse(make_input(eoi, &tokens))
67 |             .into_result(),
68 |         Ok(20)
69 |     );
70 | }
71 | 


--------------------------------------------------------------------------------
/examples/pythonic.rs:
--------------------------------------------------------------------------------
  1 | /*use chumsky::{zero_copy::prelude::*, BoxStream, Flat};
  2 | use std::ops::Range;
  3 | 
  4 | // Represents the different kinds of delimiters we care about
  5 | #[derive(Copy, Clone, Debug)]
  6 | enum Delim {
  7 |     Paren,
  8 |     Block,
  9 | }
 10 | 
 11 | // An 'atomic' token (i.e: it has no child tokens)
 12 | #[derive(Clone, Debug)]
 13 | enum Token {
 14 |     Int(u64),
 15 |     Ident(String),
 16 |     Op(String),
 17 |     Open(Delim),
 18 |     Close(Delim),
 19 | }
 20 | 
 21 | // The output of the lexer: a recursive tree of nested tokens
 22 | #[derive(Debug, Clone)]
 23 | enum TokenTree {
 24 |     Token(Token),
 25 |     Tree(Delim, Vec<Spanned<TokenTree>>),
 26 | }
 27 | 
 28 | type Span = Range<usize>;
 29 | 
 30 | type Spanned<T> = (T, Span);
 31 | 
 32 | // A parser that turns pythonic code with semantic whitespace into a token tree
 33 | fn lexer<'a>() -> impl Parser<'a, str, Vec<Spanned<TokenTree>>> {
 34 |     let tt = recursive::<'a, str, _, _, _, _, _>(|tt| {
 35 |         // Define some atomic tokens
 36 |         let int = text::int::<'a, str, _, _, _>(10)
 37 |             .from_str()
 38 |             .unwrapped()
 39 |             .map(Token::Int);
 40 |         let ident = text::ascii::ident::<'a, str, _, _, _>().map(|s| Token::Ident(s.to_string()));
 41 |         let op = one_of("=.:%,")
 42 |             .repeated()
 43 |             .at_least(1)
 44 |             .collect()
 45 |             .map(Token::Op);
 46 | 
 47 |         let single_token = int.or(op).or(ident).map(|t| TokenTree::Token(t.clone()));
 48 | 
 49 |         // Tokens surrounded by parentheses get turned into parenthesised token trees
 50 |         let token_tree = tt
 51 |             .padded()
 52 |             .repeated()
 53 |             .collect()
 54 |             .delimited_by(just('('), just(')'))
 55 |             .map(|tts| TokenTree::Tree(Delim::Paren, tts));
 56 | 
 57 |         single_token
 58 |             .or(token_tree)
 59 |             .map_with_span(|tt, span| (tt, span))
 60 |     });
 61 | 
 62 |     // Whitespace indentation creates code block token trees
 63 |     text::semantic_indentation(tt, |tts, span| (TokenTree::Tree(Delim::Block, tts), span))
 64 | }
 65 | 
 66 | /// Flatten a series of token trees into a single token stream, ready for feeding into the main parser
 67 | fn tts_to_stream(
 68 |     eoi: Span,
 69 |     token_trees: Vec<Spanned<TokenTree>>,
 70 | ) -> BoxStream<'static, Token, Span> {
 71 |     use std::iter::once;
 72 | 
 73 |     BoxStream::from_nested(eoi, token_trees.into_iter(), |(tt, span)| match tt {
 74 |         // Single tokens remain unchanged
 75 |         TokenTree::Token(token) => Flat::Single((token, span)),
 76 |         // Nested token trees get flattened into their inner contents, surrounded by `Open` and `Close` tokens
 77 |         TokenTree::Tree(delim, tree) => Flat::Many(
 78 |             once((TokenTree::Token(Token::Open(delim)), span.clone()))
 79 |                 .chain(tree.into_iter())
 80 |                 .chain(once((TokenTree::Token(Token::Close(delim)), span))),
 81 |         ),
 82 |     })
 83 | }
 84 | 
 85 | fn main() {
 86 |     let code = include_str!("sample.py");
 87 | 
 88 |     // First, lex the code into some nested token trees
 89 |     let tts = lexer().parse(code).into_output().unwrap();
 90 | 
 91 |     println!("--- Token Trees ---\n{:#?}", tts);
 92 | 
 93 |     // Next, flatten
 94 |     let eoi = 0..code.chars().count();
 95 |     let mut token_stream = tts_to_stream(eoi, tts);
 96 | 
 97 |     // At this point, we have a token stream that can be fed into the main parser! Because this is just an example,
 98 |     // we're instead going to just collect the token stream into a vector and print it.
 99 | 
100 |     let flattened_trees = token_stream.fetch_tokens().collect::<Vec<_>>();
101 | 
102 |     println!("--- Flattened Token Trees ---\n{:?}", flattened_trees);
103 | }
104 | */
105 | 
106 | fn main() {}
107 | 


--------------------------------------------------------------------------------
/examples/sample.bf:
--------------------------------------------------------------------------------
1 | --[>--->->->++>-<<<<<-------]>--.>---------.>--..+++.>----.>+++++++++.<<.+++.------.<-.>>+.
2 | 


--------------------------------------------------------------------------------
/examples/sample.foo:
--------------------------------------------------------------------------------
1 | let five = 5;
2 | let eight = 3 + five;
3 | fn add x y = x + y;
4 | add(five, eight)
5 | 


--------------------------------------------------------------------------------
/examples/sample.io:
--------------------------------------------------------------------------------
1 | a: 1
2 | b: 2
3 | c: 3
4 | 


--------------------------------------------------------------------------------
/examples/sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "leaving": {
 3 |     "tail": [
 4 |       -2063823378.8597813,
 5 |       true,
 6 |       !false,
 7 |       null,
 8 |       -153646.6402,
 9 |       "board",
10 |     ]],
11 |     "fed": -283765067.9149623,
12 |     "cowboy": -355139449!,
13 |     "although": --794127593.3922591,
14 |     "front": "college",,
15 |     "origin": 981339097,
16 |   },
17 |   "though": ttrue asasjk,
18 |   "invalid": "\uDFFF",
19 |   "activity": "value",
20 |   "office": -342325541.1937506,
21 |   "noise": false,
22 |   "acres": "home",
23 |   "foo": [!],
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/sample.mini_ml:
--------------------------------------------------------------------------------
1 | let add = fn x y = x + y in
2 | let mul = fn x y = x * y in
3 | let x = mul (add 5 42) 2 in
4 | add x 3.5
5 | 


--------------------------------------------------------------------------------
/examples/sample.nrs:
--------------------------------------------------------------------------------
 1 | // Run this example with `cargo run --example nano_rust -- examples/sample.nrs`
 2 | // Feel free to play around with this sample to see what errors you can generate!
 3 | // Spans are propagated to the interpreted AST so you can even invoke runtime
 4 | // errors and still have an error message that points to source code emitted!
 5 | 
 6 | fn mul(x, y) {
 7 |     x * y
 8 | }
 9 | 
10 | // Calculate the factorial of a number
11 | fn factorial(x) {
12 |     // Conditionals are supported!
13 |     if x == 0 {
14 |         1
15 |     } else {
16 |         mul(x, factorial(x - 1))
17 |     }
18 | }
19 | 
20 | // The main function
21 | fn main() {
22 |     let three = 3;
23 |     let meaning_of_life = three * 14 + 1;
24 | 
25 |     print("Hello, world!");
26 |     print("The meaning of life is...");
27 | 
28 |     if meaning_of_life == 42 {
29 |         print(meaning_of_life);
30 |     } else {
31 |         print("...something we cannot know");
32 | 
33 |         print("However, I can tell you that the factorial of 10 is...");
34 |         // Function calling
35 |         print(factorial(10));
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/sample.py:
--------------------------------------------------------------------------------
 1 | import turtle
 2 | 
 3 | board = turtle.Turtle(
 4 |     foo,
 5 |     bar,
 6 |     baz,
 7 | )
 8 | 
 9 | for i in range(6):
10 |     board.forward(50)
11 |     if i % 2 == 0:
12 |         board.right(144)
13 |     else:
14 |         board.left(72)
15 | 
16 | turtle.done()
17 | 


--------------------------------------------------------------------------------
/examples/zero-copy.rs:
--------------------------------------------------------------------------------
 1 | use chumsky::prelude::*;
 2 | 
 3 | #[derive(PartialEq, Debug)]
 4 | enum Token<'a> {
 5 |     Ident(&'a str),
 6 |     String(&'a str),
 7 | }
 8 | 
 9 | // This parser is guaranteed to never allocate!
10 | fn parser<'a>() -> impl Parser<'a, &'a str, [(SimpleSpan<usize>, Token<'a>); 6]> {
11 |     let ident = any()
12 |         .filter(|c: &char| c.is_alphanumeric())
13 |         .repeated()
14 |         .at_least(1)
15 |         .to_slice()
16 |         .map(Token::Ident);
17 | 
18 |     let string = just('"')
19 |         .then(any().filter(|c: &char| *c != '"').repeated())
20 |         .then(just('"'))
21 |         .to_slice()
22 |         .map(Token::String);
23 | 
24 |     ident
25 |         .or(string)
26 |         .map_with(|token, e| (e.span(), token))
27 |         .padded()
28 |         .repeated()
29 |         .collect_exactly()
30 | }
31 | 
32 | fn main() {
33 |     assert_eq!(
34 |         parser()
35 |             .parse(r#"hello "world" these are "test" tokens"#)
36 |             .into_result(),
37 |         Ok([
38 |             ((0..5).into(), Token::Ident("hello")),
39 |             ((6..13).into(), Token::String("\"world\"")),
40 |             ((14..19).into(), Token::Ident("these")),
41 |             ((20..23).into(), Token::Ident("are")),
42 |             ((24..30).into(), Token::String("\"test\"")),
43 |             ((31..37).into(), Token::Ident("tokens")),
44 |         ]),
45 |     );
46 | }
47 | 


--------------------------------------------------------------------------------
/guide/README.md:
--------------------------------------------------------------------------------
 1 | # Guide
 2 | 
 3 | Chumsky's guide is intended to be viewed through [docs.rs](https://docs.rs/chumsky/latest/chumsky/guide/index.html).
 4 | 
 5 | ## For contributors
 6 | 
 7 | When modifying the guide, please remember to test the docs via rustdoc. You can do this via this command:
 8 | 
 9 | ```
10 | RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features
11 | ```
12 | 
13 | Appending `--open` will cause the docs to open in your web browser when built.
14 | 


--------------------------------------------------------------------------------
/guide/debugging.md:
--------------------------------------------------------------------------------
1 | # Debugging
2 | 
3 | *TODO*
4 | 


--------------------------------------------------------------------------------
/guide/error_and_recovery.md:
--------------------------------------------------------------------------------
1 | # Error And Recovery
2 | 
3 | *TODO*
4 | 


--------------------------------------------------------------------------------
/guide/getting_started.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | 
  3 | Setting yourself up to use chumsky can be done in a few easy steps.
  4 | 
  5 | - [Adding chumsky as a dependency](#adding-chumsky-as-a-dependency)
  6 | 
  7 | - [Creating parsers](#creating-parsers)
  8 | 
  9 | - [Using parsers](#using-parsers)
 10 | 
 11 | - [Advice](#advice)
 12 | 
 13 |     - [Compiler errors](#compiler-errors)
 14 | 
 15 |     - [Compilation times](#compilation-times)
 16 | 
 17 |     - [Debugging parsers](#debugging-parsers)
 18 | 
 19 | ## Adding chumsky as a dependency
 20 | 
 21 | Chumsky can be added as a project dependency in one of two ways.
 22 | 
 23 | 1) By executing the following command in your cargo project:
 24 | 
 25 | ```sh
 26 | $ cargo add chumsky
 27 | ```
 28 | 
 29 | 2) By adding the following to your `Cargo.toml` file:
 30 | 
 31 | ```toml
 32 | chumsky = "0.10"
 33 | ```
 34 | 
 35 | <details>
 36 | <summary>A note about Minimum Supported Rust Versions (MSRVs)</summary>
 37 | <p>
 38 | <b>Minimum Supported Rust Version (MSRV)</b>
 39 | 
 40 | Chumsky currently has a MSRV of **1.65** due to internal systems that require Generic Associated Types (GATs). If you
 41 | find that chumsky fails to compile on versions of Rust later than or equal to 1.65, please
 42 | [open a bug report](https://github.com/zesterer/chumsky/issues/new).
 43 | 
 44 | Please note that chumsky's `nightly` feature is exempt from this minimum version requirement and may require up to and
 45 | including the latest nightly Rust compiler to work.
 46 | </p>
 47 | </details>
 48 | 
 49 | Back in your source code, you can use chumsky's prelude to import all commonly used types, traits, and functions.
 50 | 
 51 | ```
 52 | use chumsky::prelude::*;
 53 | ```
 54 | 
 55 | Alternatively, you can import whatever you need manually, but this can get rather tiresome.
 56 | 
 57 | The prelude contains all of the pieces you need to get started, although more complex parsers will likely need to
 58 | explicitly import less commonly used items.
 59 | 
 60 | ## Creating parsers
 61 | 
 62 | Because chumsky uses typed combinators to express parsers, parser type signatures can become a little unwieldy. For this
 63 | reason, it's common practice to leave the heavy work of dealing with types to the compiler by making use of Rust's
 64 | [`impl Trait`](https://doc.rust-lang.org/stable/rust-by-example/trait/impl_trait.html) syntax.
 65 | 
 66 | Here's an example of a typical parser function. We'll go over what each part means.
 67 | 
 68 | ```
 69 | # use chumsky::prelude::*;
 70 | //        (1)            (2)              (3)    (4)
 71 | //        _|__       _____|_____       ____|____  |_
 72 | fn parser<'src>() -> impl Parser<'src, &'src str, ()> {
 73 |     end() // --(5)
 74 | }
 75 | ```
 76 | 
 77 | 1. Parsers are parameterised over the lifetime of their inputs. Because we don't yet know what input our parser will be
 78 |    used to parse, we declare a generic lifetime, `'src`, to allow the parser to work with whatever input lifetime it
 79 |    needs to work with.
 80 | 
 81 | 2. Because large parsers can have rather unwieldy types, we save ourselves the need to declare the exact return type
 82 |    with Rust's `impl Trait` syntax. This says to the compiler "we don't actually care what type is returned here, but
 83 |    it needs to implement the `Parser<'src, &'src, str, ()>` trait, you figure it out". Note that, unlike `dyn Trait`
 84 |    syntax, `impl Trait` has no runtime cost: the compiler simply *hides* the type from you rather than performing
 85 |    *type erasure*, which would require performing [dynamic dispatch](https://en.wikipedia.org/wiki/Dynamic_dispatch)
 86 |    while your code is running.
 87 | 
 88 | 3. The first type parameter (i.e: ignoring the lifetime parameter) of the [`Parser`] trait is the input type. Inputs
 89 |    must implement the [`Input`] trait. Examples of inputs include strings, slices, arrays, [`Stream`]s, and much more.
 90 |    For now we specify that this parser can only operate upon string slices: but it is also possible to introduce the
 91 |    input type as a generic type parameter like `I: Input<'src>` instead if you want your parser to be generic across
 92 |    more than just string slices.
 93 | 
 94 | 4. The second type parameter of the [`Parser`] trait is the output type. This is the type of the value that your parser
 95 |    will eventually give you, assuming that parsing was successful. For now, we just use an output type of [`()`], i.e:
 96 |    nothing.
 97 | 
 98 | 5. Because this is just an example parser, the implementation is just a single parser primitive, [`end`]. This is a
 99 |    primitive that recognises only the end of the input and generates an error if it does not find it. This means that
100 |    our parser effectively just checks that we pass it an empty string: anything else will generate an error.
101 | 
102 | Note that this function only *creates* the parser: it does not, by itself, perform any parsing.
103 | 
104 | ## Using parsers
105 | 
106 | It's all very well creating parsers but in order to write useful programs, we need to invoke them. Chumsky provides
107 | several functions for this, but the main two are:
108 | 
109 | - [`Parser::parse`]: parses an input, generating an output value and/or any errors that were encountered along the way
110 | 
111 | - [`Parser::check`]: checks that an input is valid, generating any errors that were encountered along the way
112 | 
113 | Both functions give us back a [`ParseResult`]. You can think of this sort of like Rust's regular [`Result`] type, except
114 | it allows both outputs and errors to be generated at the same time (although we won't yet use this functionality). If
115 | you just want parsing to be an all-or-nothing affair, you can use [`ParseResult::into_result`] to convert this into a
116 | regular [`Result`].
117 | 
118 | Let's write some tests for the parser we wrote in the last section.
119 | 
120 | ```
121 | # use chumsky::prelude::*;
122 | # fn parser<'src>() -> impl Parser<'src, &'src str, ()> { end() }
123 | #[test]
124 | fn test_parser() {
125 |     // Our parser expects empty strings, so this should parse successfully
126 |     assert_eq!(parser().parse("").into_result(), Ok(()));
127 | 
128 |     // Anything other than an empty string should produce an error
129 |     assert!(parser().parse("123").has_errors());
130 | }
131 | ```
132 | 
133 | Hopefully, this code is fairly self-explanatory. We call `parse()` (the function we wrote in the previous section) to
134 | create an instance of our parsers, and then we call [`Parser::parse`] on it with the desired input to actually do some
135 | parsing. The return value is the result of the parse.
136 | 
137 | From here, the world is your lobster: you can move on to the tutorial sections of this guide or you can jump write into
138 | writing parsers. The main repository has [plenty of examples](https://github.com/zesterer/chumsky/tree/main/examples)
139 | to use as a reference and the crate has documentation that will help guide you, with many examples.
140 | 
141 | ## Advice
142 | 
143 | Chumsky is a powerful crate with a lot of bells and whistles. It makes sense that there also a lot of ways things can go
144 | wrong too.
145 | 
146 | ### Compiler errors
147 | 
148 | Chumsky is a combinator crate and leans heavily into Rust's type system (traits, generics, etc.) in order to combine
149 | high performance and ergonomics. Unfortunately, the Rust compiler can still struggle to generate useful error messages
150 | for large chumsky parsers (although things have improved substantially in recent releases!). When you hit a compiler
151 | error you're struggling to understand, you should:
152 | 
153 | 1. Always solve the first error that Rust generates. Rust generates errors in the order that it finds them, so the first
154 |    error is usually reliably accurate while later errors tend to get increasingly speculative as the compiler needs to
155 |    make more and more assumptions about your program to handle prior errors. This often results in many additional
156 |    'phantom errors': errors that muddy the water and make it look like the problem is more complicated to solve than it
157 |    actually is.
158 | 
159 | 2. Reduce the size of types. Thankfully Rust has recently taken steps to avoid printing extremely long type signatures
160 |    out to the terminal. Even so, parser types can still be rather large. You can reduce this problem by commenting out
161 |    unnecessary parts of your parser, or using `.simplify()` on parsers that contribute to the error to simplify their
162 |    types.
163 | 
164 | 3. Complaints about types 'not implementing [`Parser`]' are more often than not a failure to fulfil the obligations that
165 |    come with implementing the trait. For example, [`recursive()`] requires that the inner parser implements `Clone`: a
166 |    parser that doesn't (because, say, you moved a non-cloneable type into the closure) can't be used with
167 |    [`recursive()`] and so Rust will translate this, in its parlance, to the type not implementing [`Parser`].
168 | 
169 | ### Compilation times
170 | 
171 | Chumsky's heavy use of Rust's type system can result in parsers taking some time to compile. In particular, a common
172 | cause of long compilation times are long chains of [`Parser::or`], which sadly tend to produce exponential behaviour in
173 | Rust's trait solver.
174 | 
175 | **Don't fear! There are solutions.**
176 | 
177 | 1. Replace long (more than a handful of cases) [`Parser::or`] chains with [`choice`], which has identical behaviour but
178 |    gives Rust's trait solver a much easier time.
179 | 
180 | 2. Use [`Parser::boxed`] at the end of longer parser chains to perform type erasure, thereby reducing the amount of work
181 |    Rust needs to do to understand your parser. If you've been using Rust for a while, your first intention might be to
182 |    feel nauseous as such a suggestion: "*allocation?* In *my* high-performance code? *No thanks*". However, remember
183 |    that this allocation only occurs on parser *creation*, not during the parsing process. A few strategically placed
184 |    `.boxed()` calls has almost no effect on parsing performance (modern CPU branch predictors have absolutely no trouble
185 |    eliminating their cost), and in fact can sometimes *improve* performance!
186 | 
187 | ### Debugging parsers
188 | 
189 | TODO
190 | 


--------------------------------------------------------------------------------
/guide/intro.md:
--------------------------------------------------------------------------------
 1 | # Welcome to chumsky
 2 | 
 3 | Welcome to the guide for chumsky, a parser combinator library for Rust that lets you write high-performance, expressive
 4 | parsers with ease.
 5 | 
 6 | Here you will find useful resources for learning about both chumsky and parsing more broadly, along with many examples.
 7 | There is also a tutorial that will guide you through the implementation of a simple programming language (complete with
 8 | a simple interpreter) using chumsky for both lexing and parsing.
 9 | 
10 | This guide is split up into several sections.
11 | 
12 | ## Contents
13 | 
14 | - [Getting Started](./_00_getting_started/index.html): Getting started with chumsky, including creating and using parsers
15 | - [Key Concepts](./_01_key_concepts/index.html): Some brief theory and an introduction to chumsky's core API features
16 | - [Meet The Parsers](./_02_meet_the_parsers/index.html): An overview of the primitives and combinators provided by chumsky
17 | - [Error and recovery](./_03_error_and_recovery/index.html): Using chumsky to generate and recover from errors
18 | - [Recursion](./_04_recursion/index.html): How chumsky handles recursion and self-reference
19 | - [Debugging](./_05_debugging/index.html): Tips and tricks for debugging chumsky parsers
20 | - [Technical Notes](./_06_technical_notes/index.html): Information about chumsky for advanced users
21 | 
22 | - [Foo: A Tutorial](./_07_tutorial/index.html): Example implementation of a simple programming language using chumsky
23 | 


--------------------------------------------------------------------------------
/guide/key_concepts.md:
--------------------------------------------------------------------------------
  1 | # Key Concepts
  2 | 
  3 | This section is mostly a glossary of terms and concepts. Feel free to skip to the sections that most interest you.
  4 | 
  5 | - [What are parser combinators?](#what-are-parser-combinators)
  6 | 
  7 |     - [Parsers](#parsers)
  8 | 
  9 |     - [Declarative style](#declarative-style)
 10 | 
 11 |     - [Combinators](#combinators)
 12 | 
 13 |     - [Primitives](#primitives)
 14 | 
 15 | - [API features](#api-features)
 16 | 
 17 |     - [The `Parser` trait](#the-parser-trait)
 18 | 
 19 |     - [The `Input` trait](#the-input-trait)
 20 | 
 21 |     - [The `Error` trait](#the-error-trait)
 22 | 
 23 |     - [The `Span` trait](#the-span-trait)
 24 | 
 25 | # What are parser combinators?
 26 | 
 27 | Chumsky is a **declarative parser combinator** library. Let's break that down to explain what it means.
 28 | 
 29 | ## Parsers
 30 | 
 31 | Parsers are programs (or, for our purposes, *functions*) which take **unstructured** inputs and produce
 32 | **structured** outputs according to a set of rules called a **grammar**.
 33 | 
 34 | What counts as structured and unstructured depends on the context. To a
 35 | [lexer](https://en.wikipedia.org/wiki/Lexical_analysis), a list of tokens might count as a structured output, but to the
 36 | parser that consumes them as an input, they look rather less structured.
 37 | 
 38 | Because the set of possible unstructured inputs to a parser (such as bytes in a text file) is generally larger than
 39 | those that can be correctly translated to the structured output according to the grammar rules (such as an
 40 | [Abstract Syntax Tree](https://en.m.wikipedia.org/wiki/Abstract_syntax_tree)), parsers need a way to generate **errors**
 41 | when these invalid inputs are encountered.
 42 | 
 43 | ## Declarative style
 44 | 
 45 | If you've hand-written a parser before, it was likely in the
 46 | [**imperative**](https://en.wikipedia.org/wiki/Imperative_programming) style: which is to say that you used code to tell
 47 | your program *how* to parse inputs. This is a valid approach to writing parsers, and many successful parsers are written
 48 | in an imperative style.
 49 | 
 50 | However, imperative-style parsers are often extremely 'noisy': resulting in parser code that is long, difficult to
 51 | maintain, is hard to read, time-consuming to optimise, and easy to break, and difficult to debug.
 52 | 
 53 | In comparison, chumsky encourages you to write [**declarative**](https://en.wikipedia.org/wiki/Declarative_programming)
 54 | parsers. In the declarative style, instead of telling your code *how* to parse inputs, you tell it *what* to parse. This
 55 | is a much more grounded and to-the-point approach to implementing parsers, allowing you to focus on the grammar rules
 56 | you want to parse instead of spending ages debugging and maintaining imperative-style parser logic.
 57 | 
 58 | If you search for information about declarative parsers (and in particular, parser combinators), you'll often hear it
 59 | said that they're slow and imprecise. While this might have been true in decades gone by, modern optimising compilers -
 60 | and in particular Rust's powerful type system - make the development of expressive declarative parsers that are as fast (or
 61 | faster!) than hand-written parsers both easy and quick.
 62 | 
 63 | ## Combinators
 64 | 
 65 | Modern software is written primarily through through the use of *functions*. Each function performs a specific task and
 66 | may call out to sub-functions. To create a whole program, it is necessary to **combine** functions to get the desired
 67 | behaviour of the program as a whole.
 68 | 
 69 | Parser combinators take this approach and apply it to parsing: a parser written with a combinator approach is composed
 70 | of many smaller sub-parsers that are each able to process a sub-section of the overall grammar rules. These sub-parsers
 71 | are then *combined* with parser operators known as **combinators** that define how they relate to one-another.
 72 | 
 73 | Chumsky comes with many [`combinator`]s that allow the creation of even very complex grammars. Indeed, parsers for
 74 | entire programming languages may be easily written with chumsky.
 75 | 
 76 | As with most things, it's turtles all the way down: each sub-parser is then composed of sub-sub-parsers, which is itself
 77 | composed of sub-sub-sub-parsers, until we reach the most basic elements of the parser logic.
 78 | 
 79 | 🐢
 80 | 
 81 | ## Primitives
 82 | 
 83 | Primitives are the most basic elements of chumsky's parser logic. They are built-in components provided by chumsky
 84 | (although it is possible to write your own!). Primitives each perform a very simple action that by itself seems almost
 85 | trivial. For example, they might recognise a specific keyword or even just a single character.
 86 | 
 87 | Chumsky comes with several [`primitive`] parsers that each perform a specific job.
 88 | 
 89 | # API features
 90 | 
 91 | ## The [`Parser`] trait
 92 | 
 93 | A fundamental concept in chumsky is that of the [`Parser`] trait. All parser (both combinators and primitives) implement
 94 | it and the combinator methods on it are the primary way through which a parser is defined.
 95 | 
 96 | [`Parser`] also provides several *invocation* methods such as [`Parser::parse`] and [`Parser::check`]: these functions
 97 | allow you to actually give inputs to your parser and have it generate outputs and/or errors.
 98 | 
 99 | Check out the [`primitive`], [`combinator`], [`mod@recursive`], and [`mod@regex`] modules for examples of some of the parsers
100 | that chumsky provides.
101 | 
102 | ## The [`Input`] trait
103 | 
104 | The [`Input`] trait is implemented by all types that can act as inputs to chumsky parsers. For example, it is
105 | implemented by types such as:
106 | 
107 | - `&[T]`: Array slices
108 | 
109 | - `&str`: String slices
110 | 
111 | - [`Stream<I>`]: Dynamically-growing token streams
112 | 
113 | Certain inputs have special properties. For example, it is possible to borrow `&T` tokens from `&[T]` array slices, but
114 | not `char`s from `&str` string slices (due to their UTF-8 encoding). Additionally, some inputs can have sub-slices taken
115 | from them. All of these operations are potentially useful to a parser, so chumsky expresses them with a set of extension
116 | traits that add extra functionality on top of the base [`Input`] trait:
117 | 
118 | - [`ValueInput`]: for inputs that can have tokens copied/cloned from them by-value
119 | 
120 | - [`BorrowInput`]: for inputs that can have individual tokens borrowed from them
121 | 
122 | - [`SliceInput`]: for inputs that can have entire sub-slices of tokens borrowed from them
123 | 
124 | - [`StrInput`]: for inputs that 'look like' text strings: ASCII byte slices (`&[u8]`) and UTF-8 string slices (`&str`)
125 | 
126 | Taken together, these traits give chumsky the power to use many different types as input: bytes, strings, tokens,
127 | token trees, iterators, and much more besides.
128 | 
129 | ## The [`Error`] trait
130 | 
131 | As discussed previously, parsers commonly need to be able to handle inputs that don't conform to the grammar rules that
132 | they implement. To do this, they need to be able to emit errors that can then be processed by either the system that
133 | invoked the parser, or by a human user, in order to communicate what went wrong.
134 | 
135 | Chumsky provides support for expressive error generation through its [`Error`] trait, along with a series of built-in
136 | error types that have different tradeoffs:
137 | 
138 | - [`EmptyErr`]: the default 'null' error that doesn't record any useful information other than the fact that an error
139 |   occurred
140 | 
141 | - [`Cheap`]: a very efficient error type that records only the span of the input that triggered the error
142 | 
143 | - [`Simple`]: a simplistic error type that records both the span that triggered the error and whatever token was
144 |   erroneously found
145 | 
146 | - [`Rich`]: a very information-rich error type that records:
147 | 
148 |     - The span that triggered the error
149 | 
150 |     - The token that was erroneously found instead
151 | 
152 |     - A list of tokens or patterns that were expected at the span location instead
153 | 
154 | [`Rich`] also supports many additional features such as custom error messages, labelling (see [`Parser::labelled`]) and
155 | error merging.
156 | 
157 | Obviously, errors that express more detailed information are also slower to generate and hence reduce the performance of
158 | the overall parser. In benchmarks, we tend to find that parsers using [`Rich`] typically run at about half the speed as
159 | those using [`EmptyErr`], although this is very likely to improve as time goes on.
160 | 
161 | It is typical to take the data encoded in these types and give them to a 'diagnostic generator', a tool intended to turn
162 | error information into pretty human-readable displays suited for printing into a terminal, displaying in an IDE, or
163 | whatever other form of output is required.
164 | 
165 | ## The [`Span`] trait
166 | 
167 | Spans are ranges (usually byte offsets, but you can use whatever is most convenient for you) in the original source code
168 | that can be used to reference sections of the code in error or warning messages.
169 | 
170 | Chumsky has full support for spans and also allows you to define your own custom spans with ease by simply implementing
171 | the [`Span`] trait. Additionally, chumsky comes with a built-in span type, [`SimpleSpan`], and a variety of
172 | implementations for types in Rust's standard library such as [`std::ops::Range<usize>`].
173 | 
174 | Chumsky will use its internal knowledge of your parser to generate spans for you whenever you need them, such as for
175 | attaching to nodes of an abstract syntax tree. See [`Parser::map_with`] for more information.
176 | 


--------------------------------------------------------------------------------
/guide/recursion.md:
--------------------------------------------------------------------------------
  1 | # Recursion
  2 | 
  3 | Most non-trivial languages - both spoken and programmed - are *recursive*. Grammars that describe these languages can
  4 | express recursion by having a term in the language contain itself (either directly or indirectly). Noam Chomsky
  5 | believed that recursion was *so* fundamental to human language that he considered it the primary demarcation between
  6 | human and non-human language. This is debated in academic circles, but chumsky treats recursion with similar reverance.
  7 | 
  8 | ## The Problem
  9 | 
 10 | In Rust, writing a recursive function is usually trivial.
 11 | 
 12 | ```rust
 13 | fn factorial(x: u32) -> u32 {
 14 |     if x <= 1 {
 15 |         1
 16 |     } else {
 17 |         x * factorial(x - 1)
 18 |     }
 19 | }
 20 | ```
 21 | 
 22 | However, chumsky parsers are *values*, not *functions*. Just like [`Iterator`]s, they can be moved around, manipulated,
 23 | and invoked in a lazy manner. Intuitively, we might think to write a recursive parser to parse `4 + (1 + 2) + 3` like so:
 24 | 
 25 | ```rust compile_fail
 26 | use chumsky::prelude::*;
 27 | 
 28 | fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> + Clone {
 29 |     let int = text::int(10).map(|s: &str| s.parse().unwrap());
 30 | 
 31 |     let atom = choice((
 32 |         int,
 33 |         a_parser().delimited_by(just('('), just(')')),
 34 |     ))
 35 |         .padded();
 36 | 
 37 |     atom.clone().foldl(
 38 |         just('+').padded().ignore_then(atom).repeated(),
 39 |         |lhs, rhs| lhs + rhs,
 40 |     )
 41 | }
 42 | ```
 43 | 
 44 | Unfortunately, we hit an error:
 45 | 
 46 | ```text
 47 | error[E0720]: cannot resolve opaque type
 48 |    --> recursion.rs:1:24
 49 |     |
 50 |  1  |   fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> + Clone {
 51 |     |                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ recursive opaque type
 52 | ...
 53 |  9  | /     atom.clone().foldl(
 54 | 10  | |         just('+').padded().ignore_then(atom).repeated(),
 55 | 11  | |         |lhs, rhs| lhs + rhs,
 56 | 12  | |     )
 57 |     | |     -
 58 |     | |_____|
 59 |     | |_____returning here with type `...`
 60 | ```
 61 | 
 62 | We can 'solve' this problem by boxing `a_parser()`, but all it does is convert the compilation error into a run-time
 63 | stack overflow. Why? The answer, if we take a step back, should be obvious: our `a_parser` function isn't actually
 64 | doing any parsing, it's just *creating* a parser. In order to create a parser, it needs to call itself... which means
 65 | calling itself again... forever. We've created infinite recursion. No dice.
 66 | 
 67 | ## A Solution
 68 | 
 69 | To get us out of this somewhat sticky bind, chumsky provides a special combinator called `recursive`. I allows us to
 70 | refer to a parser within its own definition - without getting us caught in recursive hot water.
 71 | 
 72 | ```rust
 73 | use chumsky::prelude::*;
 74 | 
 75 | fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> {
 76 |     recursive(|a_parser| {
 77 |         let int = text::int(10).map(|s: &str| s.parse().unwrap());
 78 | 
 79 |         let atom = choice((
 80 |             int,
 81 |             a_parser.delimited_by(just('('), just(')')),
 82 |         ))
 83 |             .padded();
 84 | 
 85 |         atom.clone().foldl(
 86 |             just('+').padded().ignore_then(atom).repeated(),
 87 |             |lhs, rhs| lhs + rhs,
 88 |         )
 89 |     })
 90 | }
 91 | ```
 92 | 
 93 | Notice how our `a_parser` function is no longer recursive: instead, we get the definition of `a_parser` from the
 94 | closure parameter.
 95 | 
 96 | ## More Complicated Cases
 97 | 
 98 | More complicated parsers tend to have many mutually-recursive patterns. For example, in Rust's syntax, the 'expression'
 99 | and 'type' terms are intertwined: expressions can contain types (in the form of
100 | [turbofish](https://techblog.tonsser.com/posts/what-is-rusts-turbofish) type annotations, or in `as` casts) and types
101 | can contain expressions (in array type sizes or in const generics).
102 | 
103 | It is possible to use `recursive` in a 'nested' manner to express such a thing, but chumsky provides a simpler
104 | solution:
105 | [`Recursive::declare`] and [`Recursive::define`]. These functions allow us to *entirely* decouple the declaration and
106 | definition of a recursive parser, giving us the ability to easily declare our mutually-recursive parsers up-front and
107 | then use them in each other's definitions.
108 | 


--------------------------------------------------------------------------------
/guide/technical_notes.md:
--------------------------------------------------------------------------------
 1 | # Technical Notes
 2 | 
 3 | This section contains assorted details about chumsky. Most of this information is irrelevant to beginners, but we
 4 | consider it important enough to include for advanced users.
 5 | 
 6 | - [Technical Notes](#technical-notes)
 7 | - [Classification](#classification)
 8 | - [Purity and optimisation](#purity-and-optimisation)
 9 | 
10 | # Classification
11 | 
12 | Chumsky is a PEG parser by nature. That is to say, it is possible to parse all known context-free grammars with chumsky.
13 | It has not yet been formally proven that PEG parsers can parse _all_ context-free grammars but, for the sake of using
14 | the library, it is reasonable to assume as much.
15 | 
16 | Chumsky also has limited support for context-sensitive parsing. Chumsky's context-sensitive parsing allows previously
17 | parsed elements of the grammar to inform the parsing of future elements in a limited way.
18 | See [`Parser::ignore_with_ctx`] and [`Parser::then_with_ctx`]for more information.
19 | 
20 | The term 'PEG++' might be an appropriate description of chumsky, with 'CFG + left context' being a description of the
21 | grammars that it can parse.
22 | 
23 | Chumsky can also be extended via [`custom`] and [`ExtParser`], permitting it to theoretically parse any parseable
24 | grammar: but this is probably cheating since doing so requires manually implementing such parser logic.
25 | 
26 | # Purity and optimisation
27 | 
28 | Chumsky uses a plethora of techniques to improve parser performance. For example, it may skip generating output values
29 | that go unused by the parser (such as the output of `a` in `a.ignore_then(b)`). This also includes combinators like
30 | [`Parser::map`], which accept a user-provided closure. However, chumsky has no control over the behaviour of this
31 | closure, and it's possible to observe the closure being 'optimised away'.
32 | 
33 | For this reason, unless otherwise specified, any closures/functions used inline within a chumsky parser should be
34 | *semantically* [pure](https://en.wikipedia.org/wiki/Purely_functional_programming): that is, you should not assume that
35 | they are called any specific number of times. This does not mean that they are not permitted to have side effects, but
36 | that those side effects should be irrelevant to the correct functioning of the parser. For example,
37 | [string interning](https://en.wikipedia.org/wiki/String_interning) within [`Parser::map_with`] is an impure operation,
38 | but this impurity does not affect the correct functioning of the parser: interning a string that goes unused can be done
39 | any number of times or not at all without resulting in bad behaviour.
40 | 


--------------------------------------------------------------------------------
/misc/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zesterer/chumsky/6d07aa3dfabf1b34b1135c07de321bbc8e0b1d89/misc/example.png


--------------------------------------------------------------------------------
/misc/logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    width="256"
  6 |    height="256"
  7 |    viewBox="0 0 67.733332 67.733332"
  8 |    version="1.1"
  9 |    id="svg1"
 10 |    inkscape:version="1.3.2 (091e20ef0f, 2023-11-25, custom)"
 11 |    sodipodi:docname="logo.svg"
 12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 13 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 14 |    xmlns="http://www.w3.org/2000/svg"
 15 |    xmlns:svg="http://www.w3.org/2000/svg">
 16 |   <sodipodi:namedview
 17 |      id="namedview1"
 18 |      pagecolor="#505050"
 19 |      bordercolor="#eeeeee"
 20 |      borderopacity="1"
 21 |      inkscape:showpageshadow="0"
 22 |      inkscape:pageopacity="0"
 23 |      inkscape:pagecheckerboard="0"
 24 |      inkscape:deskcolor="#d1d1d1"
 25 |      inkscape:document-units="mm"
 26 |      showgrid="false"
 27 |      inkscape:zoom="2.008211"
 28 |      inkscape:cx="122.24811"
 29 |      inkscape:cy="133.20313"
 30 |      inkscape:window-width="1920"
 31 |      inkscape:window-height="1011"
 32 |      inkscape:window-x="0"
 33 |      inkscape:window-y="0"
 34 |      inkscape:window-maximized="1"
 35 |      inkscape:current-layer="layer1">
 36 |     <inkscape:grid
 37 |        id="grid1"
 38 |        units="px"
 39 |        originx="0"
 40 |        originy="0"
 41 |        spacingx="0.26458333"
 42 |        spacingy="0.26458333"
 43 |        empcolor="#3f3fff"
 44 |        empopacity="0.25098039"
 45 |        color="#3f3fff"
 46 |        opacity="0.1254902"
 47 |        empspacing="5"
 48 |        dotted="false"
 49 |        gridanglex="30"
 50 |        gridanglez="30"
 51 |        visible="false" />
 52 |   </sodipodi:namedview>
 53 |   <defs
 54 |      id="defs1">
 55 |     <marker
 56 |        style="overflow:visible"
 57 |        id="RoundedArrow"
 58 |        refX="0"
 59 |        refY="0"
 60 |        orient="auto-start-reverse"
 61 |        inkscape:stockid="Rounded arrow"
 62 |        markerWidth="0.5"
 63 |        markerHeight="0.5"
 64 |        viewBox="0 0 1 1"
 65 |        inkscape:isstock="true"
 66 |        inkscape:collect="always"
 67 |        preserveAspectRatio="xMidYMid">
 68 |       <path
 69 |          transform="scale(0.7)"
 70 |          d="m -0.21114562,-4.1055728 6.42229122,3.21114561 a 1,1 90 0 1 0,1.78885438 L -0.21114562,4.1055728 A 1.236068,1.236068 31.717474 0 1 -2,3 v -6 a 1.236068,1.236068 148.28253 0 1 1.78885438,-1.1055728 z"
 71 |          style="fill:context-stroke;fill-rule:evenodd;stroke:none"
 72 |          id="path8" />
 73 |     </marker>
 74 |     <rect
 75 |        x="29.999999"
 76 |        y="164.99999"
 77 |        width="209.99999"
 78 |        height="54.999998"
 79 |        id="rect2" />
 80 |     <marker
 81 |        style="overflow:visible"
 82 |        id="RoundedArrow-6"
 83 |        refX="0"
 84 |        refY="0"
 85 |        orient="auto-start-reverse"
 86 |        inkscape:stockid="Rounded arrow"
 87 |        markerWidth="0.5"
 88 |        markerHeight="0.5"
 89 |        viewBox="0 0 1 1"
 90 |        inkscape:isstock="true"
 91 |        inkscape:collect="always"
 92 |        preserveAspectRatio="xMidYMid">
 93 |       <path
 94 |          transform="scale(0.7)"
 95 |          d="m -0.21114562,-4.1055728 6.42229122,3.21114561 a 1,1 90 0 1 0,1.78885438 L -0.21114562,4.1055728 A 1.236068,1.236068 31.717474 0 1 -2,3 v -6 a 1.236068,1.236068 148.28253 0 1 1.78885438,-1.1055728 z"
 96 |          style="fill:context-stroke;fill-rule:evenodd;stroke:none"
 97 |          id="path8-3" />
 98 |     </marker>
 99 |     <marker
100 |        style="overflow:visible"
101 |        id="RoundedArrow-6-5"
102 |        refX="0"
103 |        refY="0"
104 |        orient="auto-start-reverse"
105 |        inkscape:stockid="Rounded arrow"
106 |        markerWidth="0.5"
107 |        markerHeight="0.5"
108 |        viewBox="0 0 1 1"
109 |        inkscape:isstock="true"
110 |        inkscape:collect="always"
111 |        preserveAspectRatio="xMidYMid">
112 |       <path
113 |          transform="scale(0.7)"
114 |          d="m -0.21114562,-4.1055728 6.42229122,3.21114561 a 1,1 90 0 1 0,1.78885438 L -0.21114562,4.1055728 A 1.236068,1.236068 31.717474 0 1 -2,3 v -6 a 1.236068,1.236068 148.28253 0 1 1.78885438,-1.1055728 z"
115 |          style="fill:context-stroke;fill-rule:evenodd;stroke:none"
116 |          id="path8-3-5" />
117 |     </marker>
118 |     <marker
119 |        style="overflow:visible"
120 |        id="RoundedArrow-6-6"
121 |        refX="0"
122 |        refY="0"
123 |        orient="auto-start-reverse"
124 |        inkscape:stockid="Rounded arrow"
125 |        markerWidth="0.5"
126 |        markerHeight="0.5"
127 |        viewBox="0 0 1 1"
128 |        inkscape:isstock="true"
129 |        inkscape:collect="always"
130 |        preserveAspectRatio="xMidYMid">
131 |       <path
132 |          transform="scale(0.7)"
133 |          d="m -0.21114562,-4.1055728 6.42229122,3.21114561 a 1,1 90 0 1 0,1.78885438 L -0.21114562,4.1055728 A 1.236068,1.236068 31.717474 0 1 -2,3 v -6 a 1.236068,1.236068 148.28253 0 1 1.78885438,-1.1055728 z"
134 |          style="fill:context-stroke;fill-rule:evenodd;stroke:none"
135 |          id="path8-3-9" />
136 |     </marker>
137 |     <rect
138 |        x="29.999998"
139 |        y="164.99998"
140 |        width="209.99998"
141 |        height="54.999996"
142 |        id="rect2-3" />
143 |   </defs>
144 |   <g
145 |      inkscape:label="Layer 1"
146 |      inkscape:groupmode="layer"
147 |      id="layer1">
148 |     <rect
149 |        style="fill:#ffffff;stroke-width:0.765;stroke-linecap:round;stroke-linejoin:round"
150 |        id="rect1"
151 |        width="67.73333"
152 |        height="67.73333"
153 |        x="0"
154 |        y="0"
155 |        ry="11.90625" />
156 |     <g
157 |        id="g5"
158 |        transform="translate(-7.9374988)">
159 |       <g
160 |          id="g3"
161 |          style="stroke:#613d00;stroke-opacity:1">
162 |         <path
163 |            style="fill:none;stroke:#613d00;stroke-width:2.565;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#RoundedArrow)"
164 |            d="m 25.135415,29.104166 c -6.3028,0.156141 -6.554607,0.299866 -6.574164,7.937498"
165 |            id="path3"
166 |            sodipodi:nodetypes="cc" />
167 |         <path
168 |            style="fill:none;stroke:#613d00;stroke-width:2.565;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#RoundedArrow-6)"
169 |            d="m 35.759163,29.104166 c 6.843848,0.184508 6.81451,0.289316 6.574168,7.937499"
170 |            id="path3-0"
171 |            sodipodi:nodetypes="cc" />
172 |         <rect
173 |            style="fill:#ffac14;fill-opacity:1;stroke:#613d00;stroke-width:1.5;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-opacity:1"
174 |            id="rect3-3"
175 |            width="10.623749"
176 |            height="10.583333"
177 |            x="25.135414"
178 |            y="23.8125"
179 |            ry="2.6458325" />
180 |       </g>
181 |       <g
182 |          id="g4"
183 |          style="stroke:#00341f;stroke-opacity:1">
184 |         <path
185 |            style="fill:none;stroke:#00341f;stroke-width:2.565;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#RoundedArrow-6-5)"
186 |            d="m 52.957081,11.90625 c 13.170668,0.162665 13.24928,0.07642 13.188749,25.135415"
187 |            id="path3-0-7"
188 |            sodipodi:nodetypes="cc" />
189 |         <path
190 |            style="fill:none;stroke:#00341f;stroke-width:2.565;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#RoundedArrow-6-6)"
191 |            d="M 42.333332,11.90625 C 30.671178,12.195561 30.414351,12.246902 30.447289,18.457148"
192 |            id="path3-0-74"
193 |            sodipodi:nodetypes="cc" />
194 |         <rect
195 |            style="fill:#00d988;fill-opacity:1;stroke:#00341f;stroke-width:1.5;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-opacity:1"
196 |            id="rect3-3-7"
197 |            width="10.623749"
198 |            height="10.583333"
199 |            x="42.333332"
200 |            y="6.614583"
201 |            ry="2.6458325" />
202 |       </g>
203 |     </g>
204 |     <text
205 |        xml:space="preserve"
206 |        transform="matrix(0.39679905,0,0,0.39679905,-5.2893883,-25.933531)"
207 |        id="text2"
208 |        style="font-size:40px;line-height:1.25;font-family:Poppins;-inkscape-font-specification:Poppins;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect2);fill:#9f3800;fill-opacity:1"><tspan
209 |          x="30"
210 |          y="201.264"
211 |          id="tspan3"><tspan
212 |            style="font-weight:bold;font-family:'Iosevka Fixed Slab';-inkscape-font-specification:'Iosevka Fixed Slab Bold'"
213 |            id="tspan1">chumsky</tspan></tspan></text>
214 |   </g>
215 | </svg>
216 | 


--------------------------------------------------------------------------------
/src/blanket.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | impl<'src, T, I, O, E> Parser<'src, I, O, E> for &T
 4 | where
 5 |     T: ?Sized + Parser<'src, I, O, E>,
 6 |     I: Input<'src>,
 7 |     E: ParserExtra<'src, I>,
 8 | {
 9 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
10 |     where
11 |         Self: Sized,
12 |     {
13 |         M::invoke(*self, inp)
14 |     }
15 | 
16 |     go_extra!(O);
17 | }
18 | 
19 | impl<'src, T, I, O, E> ConfigParser<'src, I, O, E> for &T
20 | where
21 |     T: ?Sized + ConfigParser<'src, I, O, E>,
22 |     I: Input<'src>,
23 |     E: ParserExtra<'src, I>,
24 | {
25 |     type Config = T::Config;
26 | 
27 |     fn go_cfg<M: Mode>(
28 |         &self,
29 |         inp: &mut InputRef<'src, '_, I, E>,
30 |         cfg: Self::Config,
31 |     ) -> PResult<M, O> {
32 |         M::invoke_cfg(*self, inp, cfg)
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/cache.rs:
--------------------------------------------------------------------------------
 1 | //! Traits and types that allow parsers to be cached between invocations.
 2 | //!
 3 | //! # Example
 4 | //!
 5 | //! ```
 6 | //! #![feature(lazy_cell)]
 7 | //! use std::sync::{LazyLock, Arc};
 8 | //! use chumsky::{prelude::*, cache::{Cache, Cached}};
 9 | //!
10 | //! #[derive(Debug, PartialEq)]
11 | //! enum Token<'a> { Ident(&'a str), Int(u64) }
12 | //!
13 | //! #[derive(Default)]
14 | //! struct TokenParser;
15 | //! impl Cached for TokenParser {
16 | //!     type Parser<'a> = Arc<dyn Parser<'a, &'a str, Token<'a>, extra::Default> + Send + Sync + 'a>;
17 | //!
18 | //!     fn make_parser<'a>(self) -> Self::Parser<'a> {
19 | //!         let ident = text::ident().map(Token::Ident);
20 | //!         let num = text::int(10).from_str().unwrapped().map(Token::Int);
21 | //!         Arc::new(ident.or(num))
22 | //!     }
23 | //! }
24 | //!
25 | //! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere:
26 | //! static PARSER: LazyLock<Cache<TokenParser>> = LazyLock::new(Cache::default);
27 | //!
28 | //! // The parser can be used from any context simply by calling `.get()` on the cache
29 | //! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42)));
30 | //! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello")));
31 | //! ```
32 | 
33 | use super::*;
34 | 
35 | /// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the
36 | /// need to recreate the parser for each input lifetime.
37 | pub trait Cached {
38 |     /// The type of the parser to be cached.
39 |     ///
40 |     /// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example,
41 |     /// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types.
42 |     ///
43 |     /// ```ignore
44 |     /// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default>
45 |     /// Arc<dyn Parser<'src, &'src str, Token<'src>, extra::Default> + Send + Sync + 'src>
46 |     /// ```
47 |     type Parser<'src>;
48 | 
49 |     /// Create an instance of the parser
50 |     fn make_parser<'src>(self) -> Self::Parser<'src>;
51 | }
52 | 
53 | /// Allows a parser to be cached for reuse with inputs and outputs of different lifetimes.
54 | pub struct Cache<C: Cached> {
55 |     parser: C::Parser<'static>,
56 |     #[allow(dead_code)]
57 |     phantom: EmptyPhantom<C>,
58 | }
59 | 
60 | impl<C: Cached + Default> Default for Cache<C> {
61 |     fn default() -> Self {
62 |         Self::new(C::default())
63 |     }
64 | }
65 | 
66 | impl<C: Cached> Cache<C> {
67 |     /// Create a new cached parser.
68 |     pub fn new(cacher: C) -> Self {
69 |         Self {
70 |             parser: cacher.make_parser(),
71 |             phantom: EmptyPhantom::new(),
72 |         }
73 |     }
74 | 
75 |     /// Get a reference to the cached parser.
76 |     ///
77 |     /// Because this function is generic over an input lifetime, the returned parser can be used in many
78 |     /// different contexts.
79 |     pub fn get<'src>(&self) -> &C::Parser<'src> {
80 |         // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary
81 |         // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to
82 |         // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser
83 |         // must be valid for arbitrary lifetimes.
84 |         unsafe { &*(&self.parser as *const C::Parser<'_>).cast() }
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/either.rs:
--------------------------------------------------------------------------------
 1 | //! A small module that implements the [`Parser`] trait for the
 2 | //! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type.
 3 | 
 4 | use super::*;
 5 | use ::either::Either;
 6 | 
 7 | impl<'src, L, R, I, O, E> Parser<'src, I, O, E> for Either<L, R>
 8 | where
 9 |     I: Input<'src>,
10 |     E: ParserExtra<'src, I>,
11 |     L: Parser<'src, I, O, E>,
12 |     R: Parser<'src, I, O, E>,
13 | {
14 |     fn go<M: crate::private::Mode>(
15 |         &self,
16 |         inp: &mut crate::input::InputRef<'src, '_, I, E>,
17 |     ) -> crate::private::PResult<M, O>
18 |     where
19 |         Self: Sized,
20 |     {
21 |         match self {
22 |             Either::Left(l) => L::go::<M>(l, inp),
23 |             Either::Right(r) => R::go::<M>(r, inp),
24 |         }
25 |     }
26 | 
27 |     go_extra!(O);
28 | }
29 | 
30 | #[cfg(test)]
31 | mod tests {
32 |     use crate::{
33 |         prelude::{any, just},
34 |         IterParser, Parser,
35 |     };
36 |     use either::Either;
37 | 
38 |     fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
39 |         any()
40 |             .filter(|c: &char| c.is_ascii_digit())
41 |             .repeated()
42 |             .at_least(1)
43 |             .at_most(3)
44 |             .to_slice()
45 |             .map(|b: &str| b.parse::<u64>().unwrap())
46 |             .padded()
47 |             .separated_by(just(',').padded())
48 |             .allow_trailing()
49 |             .collect()
50 |             .delimited_by(just('['), just(']'))
51 |     }
52 | 
53 |     #[test]
54 |     fn either() {
55 |         let parsers = [Either::Left(parser()), Either::Right(parser())];
56 |         for parser in parsers {
57 |             assert_eq!(
58 |                 parser.parse("[122 , 23,43,    4, ]").into_result(),
59 |                 Ok(vec![122, 23, 43, 4]),
60 |             );
61 |             assert_eq!(
62 |                 parser.parse("[0, 3, 6, 900,120]").into_result(),
63 |                 Ok(vec![0, 3, 6, 900, 120]),
64 |             );
65 |             assert_eq!(
66 |                 parser.parse("[200,400,50  ,0,0, ]").into_result(),
67 |                 Ok(vec![200, 400, 50, 0, 0]),
68 |             );
69 | 
70 |             assert!(parser.parse("[1234,123,12,1]").has_errors());
71 |             assert!(parser.parse("[,0, 1, 456]").has_errors());
72 |             assert!(parser.parse("[3, 4, 5, 67 89,]").has_errors());
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/extension.rs:
--------------------------------------------------------------------------------
  1 | //! Types and traits that let you write extensions for chumsky.
  2 | //!
  3 | //! Chumsky is a complicated crate that performs many internal optimizations to keep your parsers fast. These
  4 | //! optimizations mean that chumsky's core is rapidly changing, difficult to work with, and reveals a lot of
  5 | //! often-superfluous implementation details that are necessary to account for.
  6 | //!
  7 | //! In short: it's not a good basis for a stable public API upon which to build a parser ecosystem.
  8 | //!
  9 | //! To get around this problem, chumsky provides an extension interface (the contents of this module). This is a set of
 10 | //! types, traits, and functions that we've decided that we're comfortable providing long-term support for even if
 11 | //! the core of chumsky changes in an otherwise breaking manner in the future.
 12 | //!
 13 | //! The extension API is versioned. See the [`v1`] module for the current implementation of the API.
 14 | //!
 15 | //! # Example
 16 | //!
 17 | //! ```
 18 | //! use chumsky::{
 19 | //!     prelude::*,
 20 | //!     error::LabelError,
 21 | //!     input::InputRef,
 22 | //!     extension::v1::{ExtParser, Ext},
 23 | //!     DefaultExpected,
 24 | //! };
 25 | //!
 26 | //! // An example extension parser that expects a null byte.
 27 | //! pub struct Null_;
 28 | //!
 29 | //! // We implement `ExtParser` for our null byte parser, plugging us into the chumsky ecosystem
 30 | //! impl<'src, I, E> ExtParser<'src, I, (), E> for Null_
 31 | //! where
 32 | //!     I: Input<'src, Token = u8>,
 33 | //!     E: extra::ParserExtra<'src, I>,
 34 | //! {
 35 | //!     fn parse(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result<(), E::Error> {
 36 | //!         let before = inp.cursor();
 37 | //!         match inp.next_maybe().as_deref() {
 38 | //!             // The next token was a null byte, meaning that parsing was successful
 39 | //!             Some(b'\0') => Ok(()),
 40 | //!             // The next token was something that wasn't a null byte, generate an error instead
 41 | //!             found => Err(LabelError::<I, _>::expected_found(
 42 | //!                 // Expected a null byte
 43 | //!                 [DefaultExpected::Token(b'\0'.into())],
 44 | //!                 // Found whatever the token was instead
 45 | //!                 found.copied().map(Into::into),
 46 | //!                 // The span of the error is the span of the token that was found instead
 47 | //!                 inp.span_since(&before),
 48 | //!             )),
 49 | //!         }
 50 | //!     }
 51 | //! }
 52 | //!
 53 | //! // Finally, we create an easy way to name the parser type for users
 54 | //! pub type Null = Ext<Null_>;
 55 | //!
 56 | //! // It's also conventional to create a function to conveniently use the parser primitive
 57 | //! pub fn null() -> Null {
 58 | //!     Ext(Null_)
 59 | //! }
 60 | //!
 61 | //! // Let's give our parser a test!
 62 | //! fn make_parser<'src>() -> impl Parser<'src, &'src [u8], ()> {
 63 | //!     null()
 64 | //! }
 65 | //!
 66 | //! assert_eq!(make_parser().parse(b"\0").into_result(), Ok(()));
 67 | //! assert!(make_parser().parse(b"!").has_errors());
 68 | //! assert!(make_parser().parse(b"").has_errors());
 69 | //! ```
 70 | 
 71 | use super::*;
 72 | 
 73 | /// Version 1 of the extension API.
 74 | ///
 75 | /// Versioning the extension API allows us to make significant changes to it in the future without breaking crates that
 76 | /// depend on it.
 77 | pub mod v1 {
 78 |     pub use super::current::{Ext, ExtParser};
 79 | }
 80 | 
 81 | mod current {
 82 |     use super::*;
 83 | 
 84 |     /// A trait implemented by extension parsers.
 85 |     ///
 86 |     /// Implement this trait, and chumsky will automatically make [`Ext<YourParser>`] implement [`Parser`] for free.
 87 |     ///
 88 |     /// This trait is a stable interface that can be used to build on top of chumsky without exposing extension crates to
 89 |     /// the complex inner workings of chumsky, allowing us to iterate on the core to improve performance without regularly
 90 |     /// breaking the public API.
 91 |     ///
 92 |     /// If your parser is a combinator and you'd like it to be used like a method (such as chumsky's built-in `a.or(b)`
 93 |     /// combinator), it is recommended that you implement an extension trait in your own library and have users import
 94 |     /// it, like so:
 95 |     ///
 96 |     /// ```
 97 |     /// use chumsky::prelude::*;
 98 |     ///
 99 |     /// pub struct FrobnicatedWith<A, B> { a: A, b: B }
100 |     ///
101 |     /// pub trait ParserExt<'src, I, O, E>
102 |     /// where
103 |     ///     I: Input<'src>,
104 |     ///     E: extra::ParserExtra<'src, I>
105 |     /// {
106 |     ///     fn frobnicated_with<B>(self, other: B) -> FrobnicatedWith<Self, B>
107 |     ///     where
108 |     ///         Self: Sized,
109 |     ///         B: Parser<'src, I, O, E>,
110 |     ///     {
111 |     ///         FrobnicatedWith { a: self, b: other }
112 |     ///     }
113 |     /// }
114 |     /// ```
115 |     ///
116 |     /// Now, users can import your trait and do `a.frobnicate_with(b)` as if your parser were native to chumsky!
117 |     pub trait ExtParser<'src, I: Input<'src>, O, E: ParserExtra<'src, I>> {
118 |         /// Attempt parsing on the given input.
119 |         ///
120 |         /// See [`InputRef`] for more information about how you can work with parser inputs.
121 |         fn parse(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result<O, E::Error>;
122 | 
123 |         /// Attempt to check the given input.
124 |         ///
125 |         /// This function should have **exactly** the same behavior as [`ExtParser::parse`]. If the behavior differs,
126 |         /// the result of using the parser is unspecified (note that chumsky tries to aggressively avoid generating
127 |         /// outputs if it doesn't use them, and will readily swap between [`ExtParser::parse`] and [`ExtParser::check`]
128 |         /// when it thinks that doing so might yield performance benefits).
129 |         ///
130 |         /// By default, this method just uses `ExtParser::parse`, dropping the output. You may want to override the
131 |         /// implementation so that this output is never even generated, thereby improving performance.
132 |         fn check(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result<(), E::Error> {
133 |             self.parse(inp).map(|_| ())
134 |         }
135 |     }
136 | 
137 |     /// A type used to wrap parser extensions.
138 |     ///
139 |     /// Sadly, Rust's trait coherence rules (often called 'orphan rules') prevent us from having a blanket
140 |     /// implementation of [`Parser`] for any implementer of [`ExtParser`]. This wrapper type is the compromise solution
141 |     /// that keeps things working: wrap your parser types in [`Ext`], and you can start talking to the rest of the
142 |     /// chumsky ecosystem. See [`extension`] for an example of how to do this.
143 |     ///
144 |     /// It's possible that future changes to Rust's coherence rules, or to chumsky's core, may relax this requirement in
145 |     /// the future.
146 |     ///
147 |     /// If you're writing an extension crate for chumsky, you can make things less confusing for your users by putting your
148 |     /// parser behind a type alias.
149 |     #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
150 |     #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
151 |     #[repr(transparent)]
152 |     pub struct Ext<T: ?Sized>(pub T);
153 | 
154 |     impl<'src, I, O, E, P> Parser<'src, I, O, E> for Ext<P>
155 |     where
156 |         I: Input<'src>,
157 |         E: ParserExtra<'src, I>,
158 |         P: ExtParser<'src, I, O, E>,
159 |     {
160 |         #[inline(always)]
161 |         fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
162 |             let before = inp.cursor();
163 |             match M::choose(&mut *inp, |inp| self.0.parse(inp), |inp| self.0.check(inp)) {
164 |                 Ok(out) => Ok(out),
165 |                 Err(err) => {
166 |                     inp.add_alt_err(&before.inner, err);
167 |                     Err(())
168 |                 }
169 |             }
170 |         }
171 | 
172 |         go_extra!(O);
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/extra.rs:
--------------------------------------------------------------------------------
 1 | //! Generic error, state and context types for parsers
 2 | //! Useful for custom allocation, error handling, context-specific parsers, and more.
 3 | 
 4 | use inspector::Inspector;
 5 | pub use inspector::SimpleState;
 6 | 
 7 | use super::*;
 8 | 
 9 | type DefaultErr = EmptyErr;
10 | type DefaultState = ();
11 | type DefaultCtx = ();
12 | 
13 | /// A trait for extra types on a [`Parser`] that control the behavior of certain combinators and output.
14 | ///
15 | /// Currently, this consists of the error type emitted, the state type used in the `*_state` combinators,
16 | /// and the context type used in the `*_ctx` and `*configure` parsers.
17 | ///
18 | /// This trait is sealed and so cannot be implemented by other crates because all uses should instead
19 | /// go through the types defined in this module.
20 | pub trait ParserExtra<'a, I>: 'a + Sealed
21 | where
22 |     I: Input<'a>,
23 | {
24 |     /// Error type to use for the parser. This type must implement [`Error`], and when it fails,
25 |     /// the parser will return a set of this type to describe why the failure occurred.
26 |     type Error: Error<'a, I> + 'a;
27 |     /// State type to use for the parser. This is used to provide stateful *output* of the parser,
28 |     /// such as interned identifiers or position-dependent name resolution, however *cannot* influence
29 |     /// the actual progress of the parser - for that, use [`Self::Context`].
30 |     ///
31 |     /// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`].
32 |     type State: Inspector<'a, I> + 'a;
33 |     /// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*.
34 |     /// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate
35 |     /// from an earlier point in the stream than the parser relying on it. This can affect the output of a parser,
36 |     /// but for things that don't wish to alter the actual rules of parsing, one should instead prefer [`Self::State`].
37 |     ///
38 |     /// For examples of using this type, see [`Parser::ignore_with_ctx`], [`Parser::then_with_ctx`] and [`ConfigParser::configure`].
39 |     type Context: 'a;
40 | }
41 | 
42 | /// Use all default extra types. See [`ParserExtra`] for more details.
43 | pub type Default = Full<DefaultErr, DefaultState, DefaultCtx>;
44 | 
45 | /// Use specified error type, but default other types. See [`ParserExtra`] for more details.
46 | pub type Err<E> = Full<E, DefaultState, DefaultCtx>;
47 | 
48 | /// Use specified state type, but default other types. See [`ParserExtra`] for more details.
49 | ///
50 | /// Use `State<S>` or `Full<E, S, C>` as the `Extra` type parameter of a parser to use a custom state type.
51 | /// You can then use `parser().parse_with_state(&mut S)` to parse with a custom state.
52 | ///
53 | /// See [`Parser::map_with`] for examples.
54 | pub type State<S> = Full<DefaultErr, S, DefaultCtx>;
55 | 
56 | /// Use specified context type, but default other types. See [`ParserExtra`] for more details.
57 | pub type Context<C> = Full<DefaultErr, DefaultState, C>;
58 | 
59 | /// Specify all extra types. See [`ParserExtra`] for more details.
60 | pub struct Full<E, S, C>(PhantomData<(E, S, C)>);
61 | 
62 | impl<E, S, C> Sealed for Full<E, S, C> {}
63 | impl<'a, I, E, S, C> ParserExtra<'a, I> for Full<E, S, C>
64 | where
65 |     I: Input<'a>,
66 |     E: Error<'a, I> + 'a,
67 |     S: Inspector<'a, I> + 'a,
68 |     C: 'a,
69 | {
70 |     type Error = E;
71 |     type State = S;
72 |     type Context = C;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/guide.rs:
--------------------------------------------------------------------------------
 1 | // To generate docs with the guide, use `RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features`
 2 | 
 3 | #![doc = include_str!("../guide/intro.md")]
 4 | use super::*;
 5 | 
 6 | pub mod _00_getting_started {
 7 |     #![doc = include_str!("../guide/getting_started.md")]
 8 |     use super::*;
 9 | }
10 | 
11 | pub mod _01_key_concepts {
12 |     #![doc = include_str!("../guide/key_concepts.md")]
13 |     use super::*;
14 | }
15 | 
16 | pub mod _02_meet_the_parsers {
17 |     #![doc = include_str!("../guide/meet_the_parsers.md")]
18 |     use super::*;
19 | }
20 | 
21 | pub mod _03_error_and_recovery {
22 |     #![doc = include_str!("../guide/error_and_recovery.md")]
23 |     use super::*;
24 | }
25 | 
26 | pub mod _04_recursion {
27 |     #![doc = include_str!("../guide/recursion.md")]
28 |     use super::*;
29 | }
30 | 
31 | pub mod _05_debugging {
32 |     #![doc = include_str!("../guide/debugging.md")]
33 |     use super::*;
34 | }
35 | 
36 | pub mod _06_technical_notes {
37 |     #![doc = include_str!("../guide/technical_notes.md")]
38 |     use super::*;
39 | }
40 | 
41 | pub mod _07_tutorial {
42 |     #![doc = include_str!("../guide/tutorial.md")]
43 |     use super::*;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/inspector.rs:
--------------------------------------------------------------------------------
  1 | //! Parser extensions that inspect the input without modifying it.
  2 | //!
  3 | //! *"Only one man stood and watched the sky, stood with terrible sadness in his eyes
  4 | //! and rubber bungs in his ears. He knew exactly what was happening and had known
  5 | //! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night
  6 | //! beside his pillar and woken him with a start."*
  7 | use super::*;
  8 | use crate::input::{Checkpoint, Cursor};
  9 | use core::ops::{Deref, DerefMut};
 10 | 
 11 | #[allow(unused)] // for intra-doc links
 12 | use crate::Parser;
 13 | 
 14 | /// A type that receives event hooks when certain parsing actions occur.
 15 | ///
 16 | /// If you don't need to receive event hooks, use [`SimpleState`].
 17 | pub trait Inspector<'src, I: Input<'src>> {
 18 |     /// A type the Inspector can use to revert to a previous state.
 19 |     ///
 20 |     /// For implementation reasons, this is required to be `Clone`.
 21 |     type Checkpoint: Clone;
 22 | 
 23 |     /// This function is called when a new token is read from the input stream.
 24 |     // impl note: this should be called only when `self.cursor` is updated, not when we only peek at the next token.
 25 |     fn on_token(&mut self, token: &I::Token);
 26 |     /// This function is called when a combinator saves the current state of the parse.
 27 |     fn on_save<'parse>(&self, cursor: &Cursor<'src, 'parse, I>) -> Self::Checkpoint;
 28 |     /// This function is called when a combinator rewinds to an earlier state of the parser.
 29 |     ///
 30 |     /// You can use [`Checkpoint::inspector`] to get back the [`Checkpoint`][Self::Checkpoint]
 31 |     /// you originally created in [`on_save`][Self::on_save].
 32 |     fn on_rewind<'parse>(&mut self, marker: &Checkpoint<'src, 'parse, I, Self::Checkpoint>);
 33 | }
 34 | 
 35 | impl<'src, I: Input<'src>> Inspector<'src, I> for () {
 36 |     type Checkpoint = ();
 37 |     #[inline(always)]
 38 |     fn on_token(&mut self, _: &<I as Input<'src>>::Token) {}
 39 |     #[inline(always)]
 40 |     fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {}
 41 |     #[inline(always)]
 42 |     fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self>) {}
 43 | }
 44 | 
 45 | /// A state type that should be accessible directly from `parser.state()` and has no special behavior.
 46 | ///
 47 | /// This wrapper implements the [`Inspector`] trait for you so you don't have to.
 48 | #[derive(Copy, Clone, Default, Debug)]
 49 | pub struct SimpleState<T>(pub T);
 50 | impl<'src, T, I: Input<'src>> Inspector<'src, I> for SimpleState<T> {
 51 |     type Checkpoint = ();
 52 |     #[inline(always)]
 53 |     fn on_token(&mut self, _: &<I as Input<'src>>::Token) {}
 54 |     #[inline(always)]
 55 |     fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {}
 56 |     #[inline(always)]
 57 |     fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) {}
 58 | }
 59 | 
 60 | impl<T> Deref for SimpleState<T> {
 61 |     type Target = T;
 62 | 
 63 |     fn deref(&self) -> &Self::Target {
 64 |         &self.0
 65 |     }
 66 | }
 67 | 
 68 | impl<T> DerefMut for SimpleState<T> {
 69 |     fn deref_mut(&mut self) -> &mut Self::Target {
 70 |         &mut self.0
 71 |     }
 72 | }
 73 | 
 74 | impl<T> From<T> for SimpleState<T> {
 75 |     fn from(value: T) -> Self {
 76 |         Self(value)
 77 |     }
 78 | }
 79 | 
 80 | /// A state type that clones and rolls back its contents during a rewind.
 81 | ///
 82 | /// This might be useful if you want to use the parser state to, say, count the parsed occurrences of a particular
 83 | /// construct.
 84 | ///
 85 | /// Ideally, you should try to have the [`Clone`] implementation be fairly cheap.
 86 | #[derive(Copy, Clone, Default, Debug)]
 87 | pub struct RollbackState<T>(pub T);
 88 | impl<'src, T: Clone, I: Input<'src>> Inspector<'src, I> for RollbackState<T> {
 89 |     type Checkpoint = T;
 90 |     #[inline(always)]
 91 |     fn on_token(&mut self, _: &<I as Input<'src>>::Token) {}
 92 |     #[inline(always)]
 93 |     fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {
 94 |         self.0.clone()
 95 |     }
 96 |     #[inline(always)]
 97 |     fn on_rewind<'parse>(&mut self, cp: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) {
 98 |         self.0 = cp.inspector.clone();
 99 |     }
100 | }
101 | 
102 | impl<T> Deref for RollbackState<T> {
103 |     type Target = T;
104 | 
105 |     fn deref(&self) -> &Self::Target {
106 |         &self.0
107 |     }
108 | }
109 | 
110 | impl<T> DerefMut for RollbackState<T> {
111 |     fn deref_mut(&mut self) -> &mut Self::Target {
112 |         &mut self.0
113 |     }
114 | }
115 | 
116 | impl<T> From<T> for RollbackState<T> {
117 |     fn from(value: T) -> Self {
118 |         Self(value)
119 |     }
120 | }
121 | 
122 | /// A state type that encapsulates a vector, truncating the vector to its original size during a rewind.
123 | ///
124 | /// This might be useful for representing, say, an arena-style allocator.
125 | #[derive(Clone, Default, Debug)]
126 | pub struct TruncateState<T>(pub Vec<T>);
127 | impl<'src, T: Clone, I: Input<'src>> Inspector<'src, I> for TruncateState<T> {
128 |     type Checkpoint = usize;
129 |     #[inline(always)]
130 |     fn on_token(&mut self, _: &<I as Input<'src>>::Token) {}
131 |     #[inline(always)]
132 |     fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {
133 |         self.0.len()
134 |     }
135 |     #[inline(always)]
136 |     fn on_rewind<'parse>(&mut self, cp: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) {
137 |         self.0.truncate(cp.inspector);
138 |     }
139 | }
140 | 
141 | impl<T> Deref for TruncateState<T> {
142 |     type Target = Vec<T>;
143 | 
144 |     fn deref(&self) -> &Self::Target {
145 |         &self.0
146 |     }
147 | }
148 | 
149 | impl<T> DerefMut for TruncateState<T> {
150 |     fn deref_mut(&mut self) -> &mut Self::Target {
151 |         &mut self.0
152 |     }
153 | }
154 | 
155 | impl<T> From<Vec<T>> for TruncateState<T> {
156 |     fn from(value: Vec<T>) -> Self {
157 |         Self(value)
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------
/src/label.rs:
--------------------------------------------------------------------------------
  1 | //! Items related to parser labelling.
  2 | 
  3 | use super::*;
  4 | 
  5 | /// A trait implemented by [`Error`]s that can originate from labelled parsers. See [`Parser::labelled`].
  6 | pub trait LabelError<'src, I: Input<'src>, L>: Sized {
  7 |     /// Create a new error describing a conflict between expected inputs and that which was actually found.
  8 |     ///
  9 |     /// `found` having the value `None` indicates that the end of input was reached, but was not expected.
 10 |     ///
 11 |     /// An expected input having the value `None` indicates that the end of input was expected.
 12 |     fn expected_found<E: IntoIterator<Item = L>>(
 13 |         expected: E,
 14 |         found: Option<MaybeRef<'src, I::Token>>,
 15 |         span: I::Span,
 16 |     ) -> Self;
 17 | 
 18 |     /// Fast path for `a.merge(LabelError::expected_found(...))` that may incur less overhead by, for example, reusing allocations.
 19 |     #[inline(always)]
 20 |     fn merge_expected_found<E: IntoIterator<Item = L>>(
 21 |         self,
 22 |         expected: E,
 23 |         found: Option<MaybeRef<'src, I::Token>>,
 24 |         span: I::Span,
 25 |     ) -> Self
 26 |     where
 27 |         Self: Error<'src, I>,
 28 |     {
 29 |         self.merge(LabelError::expected_found(expected, found, span))
 30 |     }
 31 | 
 32 |     /// Fast path for `a = LabelError::expected_found(...)` that may incur less overhead by, for example, reusing allocations.
 33 |     #[inline(always)]
 34 |     fn replace_expected_found<E: IntoIterator<Item = L>>(
 35 |         self,
 36 |         expected: E,
 37 |         found: Option<MaybeRef<'src, I::Token>>,
 38 |         span: I::Span,
 39 |     ) -> Self {
 40 |         LabelError::expected_found(expected, found, span)
 41 |     }
 42 | 
 43 |     /// Annotate the expected patterns within this parser with the given label.
 44 |     ///
 45 |     /// In practice, this usually removes all other labels and expected tokens in favor of a single label that
 46 |     /// represents the overall pattern.
 47 |     fn label_with(&mut self, label: L) {
 48 |         #![allow(unused_variables)]
 49 |     }
 50 | 
 51 |     /// Annotate this error, indicating that it occurred within the context denoted by the given label.
 52 |     ///
 53 |     /// A span that runs from the beginning of the context up until the error location is also provided.
 54 |     ///
 55 |     /// In practice, this usually means adding the context to a context 'stack', similar to a backtrace.
 56 |     fn in_context(&mut self, label: L, span: I::Span) {
 57 |         #![allow(unused_variables)]
 58 |     }
 59 | }
 60 | 
 61 | /// See [`Parser::labelled`].
 62 | #[derive(Copy, Clone)]
 63 | pub struct Labelled<A, L> {
 64 |     pub(crate) parser: A,
 65 |     pub(crate) label: L,
 66 |     pub(crate) is_context: bool,
 67 | }
 68 | 
 69 | impl<A, L> Labelled<A, L> {
 70 |     /// Specify that the label should be used as context when reporting errors.
 71 |     ///
 72 |     /// This allows error messages to use this label to add information to errors that occur *within* this parser.
 73 |     pub fn as_context(self) -> Self {
 74 |         Self {
 75 |             is_context: true,
 76 |             ..self
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | impl<'src, I, O, E, A, L> Parser<'src, I, O, E> for Labelled<A, L>
 82 | where
 83 |     I: Input<'src>,
 84 |     E: ParserExtra<'src, I>,
 85 |     A: Parser<'src, I, O, E>,
 86 |     L: Clone,
 87 |     E::Error: LabelError<'src, I, L>,
 88 | {
 89 |     #[inline]
 90 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
 91 |         let old_alt = inp.errors.alt.take();
 92 |         let before = inp.save();
 93 |         let res = self.parser.go::<M>(inp);
 94 | 
 95 |         // TODO: Label secondary errors too?
 96 |         let new_alt = inp.errors.alt.take();
 97 |         inp.errors.alt = old_alt;
 98 | 
 99 |         if let Some(mut new_alt) = new_alt {
100 |             let before_loc = I::cursor_location(&before.cursor().inner);
101 |             let new_alt_loc = I::cursor_location(&new_alt.pos);
102 |             if new_alt_loc == before_loc {
103 |                 new_alt.err.label_with(self.label.clone());
104 |             } else if self.is_context && new_alt_loc > before_loc {
105 |                 // SAFETY: cursors generated by previous call to `InputRef::next` (or similar).
106 |                 let span = unsafe { I::span(inp.cache, &before.cursor().inner..&new_alt.pos) };
107 |                 new_alt.err.in_context(self.label.clone(), span);
108 |             }
109 |             inp.add_alt_err(&new_alt.pos, new_alt.err);
110 |         }
111 | 
112 |         if self.is_context {
113 |             for err in inp.errors.secondary_errors_since(before.err_count) {
114 |                 // SAFETY: cursors generated by previous call to `InputRef::next` (or similar).
115 |                 let span = unsafe { I::span(inp.cache, &before.cursor().inner..&err.pos) };
116 |                 err.err.in_context(self.label.clone(), span);
117 |             }
118 |         }
119 | 
120 |         res
121 |     }
122 | 
123 |     go_extra!(O);
124 | }
125 | 


--------------------------------------------------------------------------------
/src/number.rs:
--------------------------------------------------------------------------------
  1 | //! TODO: Add documentation when approved
  2 | 
  3 | use super::*;
  4 | pub use lexical::format;
  5 | 
  6 | use lexical::parse_partial;
  7 | use lexical::FromLexical;
  8 | 
  9 | /// TODO: Add documentation when approved
 10 | pub struct Number<const F: u128, I, O, E> {
 11 |     #[allow(dead_code)]
 12 |     phantom: EmptyPhantom<(I, E, O)>,
 13 | }
 14 | 
 15 | impl<const F: u128, I, O, E> Copy for Number<F, I, O, E> {}
 16 | impl<const F: u128, I, O, E> Clone for Number<F, I, O, E> {
 17 |     fn clone(&self) -> Self {
 18 |         *self
 19 |     }
 20 | }
 21 | 
 22 | /// TODO: Add documentation when approved
 23 | pub const fn number<const F: u128, I, O, E>() -> Number<F, I, O, E> {
 24 |     Number::<F, I, O, E> {
 25 |         phantom: EmptyPhantom::new(),
 26 |     }
 27 | }
 28 | 
 29 | /// A label denoting a parseable number.
 30 | pub struct ExpectedNumber;
 31 | 
 32 | impl<'src, const F: u128, I, O, E> Parser<'src, I, O, E> for Number<F, I, O, E>
 33 | where
 34 |     O: FromLexical,
 35 |     I: SliceInput<'src, Cursor = usize>,
 36 |     <I as SliceInput<'src>>::Slice: AsRef<[u8]>,
 37 |     E: ParserExtra<'src, I>,
 38 |     E::Error: LabelError<'src, I, ExpectedNumber>,
 39 | {
 40 |     #[inline]
 41 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
 42 |         let before = inp.cursor();
 43 |         match parse_partial(inp.slice_trailing_inner().as_ref()) {
 44 |             Ok((out, skip)) => {
 45 |                 // SAFETY: `skip` is no longer than the trailing input's byte length
 46 |                 unsafe { inp.skip_bytes(skip) };
 47 |                 Ok(M::bind(|| out))
 48 |             }
 49 |             Err(_err) => {
 50 |                 // TODO: Improve error
 51 |                 let span = inp.span_since(&before);
 52 |                 inp.add_alt([ExpectedNumber], None, span);
 53 |                 Err(())
 54 |             }
 55 |         }
 56 |     }
 57 | 
 58 |     go_extra!(O);
 59 | }
 60 | 
 61 | #[cfg(test)]
 62 | mod tests {
 63 |     use super::*;
 64 |     use crate::{extra, Parser};
 65 |     use lexical::format::RUST_LITERAL;
 66 | 
 67 |     // These have been shamelessly yanked from the rust test-float-parse suite.
 68 |     // More specifically:
 69 |     //
 70 |     // https://github.com/rust-lang/rust/tree/64185f205dcbd8db255ad6674e43c63423f2369a/src/etc/test-float-parse
 71 |     mod rust {
 72 |         use super::*;
 73 | 
 74 |         const FLOAT: Number<RUST_LITERAL, &str, f64, extra::Default> = number();
 75 | 
 76 |         fn validate(test: &str) {
 77 |             FLOAT.parse(test).unwrap();
 78 |         }
 79 | 
 80 |         #[test]
 81 |         fn few_ones() {
 82 |             let mut pow = vec![];
 83 |             for i in 0..63 {
 84 |                 pow.push(1u64 << i);
 85 |             }
 86 |             for a in &pow {
 87 |                 for b in &pow {
 88 |                     for c in &pow {
 89 |                         validate(&(a | b | c).to_string());
 90 |                     }
 91 |                 }
 92 |             }
 93 |         }
 94 | 
 95 |         #[test]
 96 |         fn huge_pow10() {
 97 |             for e in 300..310 {
 98 |                 for i in 0..100000 {
 99 |                     validate(&format!("{i}e{e}"));
100 |                 }
101 |             }
102 |         }
103 | 
104 |         #[test]
105 |         fn long_fraction() {
106 |             for n in 0..10 {
107 |                 let digit = char::from_digit(n, 10).unwrap();
108 |                 let mut s = "0.".to_string();
109 |                 for _ in 0..400 {
110 |                     s.push(digit);
111 |                     if s.parse::<f64>().is_ok() {
112 |                         validate(&s);
113 |                     }
114 |                 }
115 |             }
116 |         }
117 | 
118 |         #[test]
119 |         fn short_decimals() {
120 |             for e in 1..301 {
121 |                 for i in 0..10000 {
122 |                     if i % 10 == 0 {
123 |                         continue;
124 |                     }
125 | 
126 |                     validate(&format!("{i}e{e}"));
127 |                     validate(&format!("{i}e-{e}"));
128 |                 }
129 |             }
130 |         }
131 | 
132 |         #[test]
133 |         fn subnorm() {
134 |             for bits in 0u32..(1 << 21) {
135 |                 let single: f32 = f32::from_bits(bits);
136 |                 validate(&format!("{single:e}"));
137 |                 let double: f64 = f64::from_bits(bits as u64);
138 |                 validate(&format!("{double:e}"));
139 |             }
140 |         }
141 | 
142 |         #[test]
143 |         fn tiny_pow10() {
144 |             for e in 301..327 {
145 |                 for i in 0..100000 {
146 |                     validate(&format!("{i}e-{e}"));
147 |                 }
148 |             }
149 |         }
150 | 
151 |         #[test]
152 |         fn u32_small() {
153 |             for i in 0..(1 << 19) {
154 |                 validate(&i.to_string());
155 |             }
156 |         }
157 | 
158 |         #[test]
159 |         fn u64_pow2() {
160 |             for exp in 19..64 {
161 |                 let power: u64 = 1 << exp;
162 |                 validate(&power.to_string());
163 |                 for offset in 1..123 {
164 |                     validate(&(power + offset).to_string());
165 |                     validate(&(power - offset).to_string());
166 |                 }
167 |             }
168 |             for offset in 0..123 {
169 |                 validate(&(u64::MAX - offset).to_string());
170 |             }
171 |         }
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/src/recovery.rs:
--------------------------------------------------------------------------------
  1 | //! Types and functions that relate to error recovery.
  2 | //!
  3 | //! When chumsky encounters an erroneous input that it cannot parse, it can be told to attempt to recover from the
  4 | //! error using a variety of strategies (you can also create your own strategies).
  5 | //!
  6 | //! There is no silver bullet strategy for error recovery. By definition, if the input to a parser is invalid then the
  7 | //! parser can only make educated guesses as to the meaning of the input. Different recovery strategies will work
  8 | //! better for different languages, and for different patterns within those languages.
  9 | //!
 10 | //! Chumsky provides a variety of recovery strategies (each implementing the `Strategy` trait), but it's important to
 11 | //! understand that all of
 12 | //!
 13 | //! - which you apply
 14 | //! - where you apply them
 15 | //! - what order you apply them
 16 | //!
 17 | //! will greatly affect the quality of the errors that Chumsky is able to produce, along with the extent to which it
 18 | //! is able to recover a useful AST. Where possible, you should attempt more 'specific' recovery strategies first
 19 | //! rather than those that mindlessly skip large swathes of the input.
 20 | //!
 21 | //! It is recommended that you experiment with applying different strategies in different situations and at different
 22 | //! levels of the parser to find a configuration that you are happy with. If none of the provided error recovery
 23 | //! strategies cover the specific pattern you wish to catch, you can even create your own by digging into Chumsky's
 24 | //! internals and implementing your own strategies! If you come up with a useful strategy, feel free to open a PR
 25 | //! against the [main repository](https://github.com/zesterer/chumsky/)!
 26 | 
 27 | use super::*;
 28 | 
 29 | /// A trait implemented by error recovery strategies. See [`Parser::recover_with`].
 30 | ///
 31 | /// This trait is sealed and so cannot be implemented by other crates because it has an unstable API. This may
 32 | /// eventually change. For now, if you wish to implement a new strategy, consider using [`via_parser`] or
 33 | /// [opening an issue/PR](https://github.com/zesterer/chumsky/issues/new).
 34 | pub trait Strategy<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default>:
 35 |     Sealed
 36 | {
 37 |     // Attempt to recover from a parsing failure.
 38 |     // The strategy should properly handle the alt error but is not required to handle rewinding.
 39 |     #[doc(hidden)]
 40 |     fn recover<M: Mode, P: Parser<'src, I, O, E>>(
 41 |         &self,
 42 |         inp: &mut InputRef<'src, '_, I, E>,
 43 |         parser: &P,
 44 |     ) -> PResult<M, O>;
 45 | }
 46 | 
 47 | /// See [`via_parser`].
 48 | #[derive(Copy, Clone)]
 49 | pub struct ViaParser<A>(A);
 50 | 
 51 | /// Recover via the given recovery parser.
 52 | pub fn via_parser<A>(parser: A) -> ViaParser<A> {
 53 |     ViaParser(parser)
 54 | }
 55 | 
 56 | impl<A> Sealed for ViaParser<A> {}
 57 | impl<'src, I, O, E, A> Strategy<'src, I, O, E> for ViaParser<A>
 58 | where
 59 |     I: Input<'src>,
 60 |     A: Parser<'src, I, O, E>,
 61 |     E: ParserExtra<'src, I>,
 62 | {
 63 |     fn recover<M: Mode, P: Parser<'src, I, O, E>>(
 64 |         &self,
 65 |         inp: &mut InputRef<'src, '_, I, E>,
 66 |         _parser: &P,
 67 |     ) -> PResult<M, O> {
 68 |         let alt = inp.take_alt().unwrap(); // Can't fail!
 69 |         let out = match self.0.go::<M>(inp) {
 70 |             Ok(out) => out,
 71 |             Err(()) => {
 72 |                 inp.errors.alt = Some(alt);
 73 |                 return Err(());
 74 |             }
 75 |         };
 76 |         inp.emit(None, alt.err);
 77 |         Ok(out)
 78 |     }
 79 | }
 80 | 
 81 | /// See [`Parser::recover_with`].
 82 | #[derive(Copy, Clone)]
 83 | pub struct RecoverWith<A, S> {
 84 |     pub(crate) parser: A,
 85 |     pub(crate) strategy: S,
 86 | }
 87 | 
 88 | impl<'src, I, O, E, A, S> Parser<'src, I, O, E> for RecoverWith<A, S>
 89 | where
 90 |     I: Input<'src>,
 91 |     E: ParserExtra<'src, I>,
 92 |     A: Parser<'src, I, O, E>,
 93 |     S: Strategy<'src, I, O, E>,
 94 | {
 95 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
 96 |         let before = inp.save();
 97 |         match self.parser.go::<M>(inp) {
 98 |             Ok(out) => Ok(out),
 99 |             Err(()) => {
100 |                 inp.rewind(before.clone());
101 |                 match self.strategy.recover::<M, _>(inp, &self.parser) {
102 |                     Ok(out) => Ok(out),
103 |                     Err(()) => {
104 |                         // Reset to before fallback attempt
105 |                         inp.rewind(before);
106 |                         Err(())
107 |                     }
108 |                 }
109 |             }
110 |         }
111 |     }
112 | 
113 |     go_extra!(O);
114 | }
115 | 
116 | /// See [`skip_then_retry_until`].
117 | #[must_use]
118 | #[derive(Copy, Clone)]
119 | pub struct SkipThenRetryUntil<S, U> {
120 |     skip: S,
121 |     until: U,
122 | }
123 | 
124 | impl<S, U> Sealed for SkipThenRetryUntil<S, U> {}
125 | impl<'src, I, O, E, S, U> Strategy<'src, I, O, E> for SkipThenRetryUntil<S, U>
126 | where
127 |     I: Input<'src>,
128 |     S: Parser<'src, I, (), E>,
129 |     U: Parser<'src, I, (), E>,
130 |     E: ParserExtra<'src, I>,
131 | {
132 |     fn recover<M: Mode, P: Parser<'src, I, O, E>>(
133 |         &self,
134 |         inp: &mut InputRef<'src, '_, I, E>,
135 |         parser: &P,
136 |     ) -> PResult<M, O> {
137 |         let alt = inp.take_alt().unwrap(); // Can't fail!
138 |         loop {
139 |             let before = inp.save();
140 |             if let Ok(()) = self.until.go::<Check>(inp) {
141 |                 inp.errors.alt = Some(alt);
142 |                 inp.rewind(before);
143 |                 break Err(());
144 |             } else {
145 |                 inp.rewind(before);
146 |             }
147 | 
148 |             if let Err(()) = self.skip.go::<Check>(inp) {
149 |                 inp.errors.alt = Some(alt);
150 |                 break Err(());
151 |             }
152 | 
153 |             let before = inp.save();
154 |             if let Some(out) = parser.go::<M>(inp).ok().filter(|_| {
155 |                 inp.errors
156 |                     .secondary_errors_since(before.err_count)
157 |                     .is_empty()
158 |             }) {
159 |                 inp.emit(None, alt.err);
160 |                 break Ok(out);
161 |             } else {
162 |                 inp.errors.alt.take();
163 |                 inp.rewind(before);
164 |             }
165 |         }
166 |     }
167 | }
168 | 
169 | /// TODO
170 | pub fn skip_then_retry_until<S, U>(skip: S, until: U) -> SkipThenRetryUntil<S, U> {
171 |     SkipThenRetryUntil { skip, until }
172 | }
173 | 
174 | /// See [`skip_until`].
175 | #[must_use]
176 | #[derive(Copy, Clone)]
177 | pub struct SkipUntil<S, U, F> {
178 |     skip: S,
179 |     until: U,
180 |     fallback: F,
181 | }
182 | 
183 | impl<S, U, F> Sealed for SkipUntil<S, U, F> {}
184 | impl<'src, I, O, E, S, U, F> Strategy<'src, I, O, E> for SkipUntil<S, U, F>
185 | where
186 |     I: Input<'src>,
187 |     S: Parser<'src, I, (), E>,
188 |     U: Parser<'src, I, (), E>,
189 |     F: Fn() -> O,
190 |     E: ParserExtra<'src, I>,
191 | {
192 |     fn recover<M: Mode, P: Parser<'src, I, O, E>>(
193 |         &self,
194 |         inp: &mut InputRef<'src, '_, I, E>,
195 |         _parser: &P,
196 |     ) -> PResult<M, O> {
197 |         let alt = inp.take_alt().unwrap(); // Can't fail!
198 |         loop {
199 |             let before = inp.save();
200 |             if let Ok(()) = self.until.go::<Check>(inp) {
201 |                 inp.emit(None, alt.err);
202 |                 break Ok(M::bind(|| (self.fallback)()));
203 |             }
204 |             inp.rewind(before);
205 | 
206 |             if let Err(()) = self.skip.go::<Check>(inp) {
207 |                 inp.errors.alt = Some(alt);
208 |                 break Err(());
209 |             }
210 |         }
211 |     }
212 | }
213 | 
214 | /// A recovery parser that skips input until one of several inputs is found.
215 | ///
216 | /// This strategy is very 'stupid' and can result in very poor error generation in some languages. Place this strategy
217 | /// after others as a last resort, and be careful about over-using it.
218 | pub fn skip_until<S, U, F>(skip: S, until: U, fallback: F) -> SkipUntil<S, U, F> {
219 |     SkipUntil {
220 |         skip,
221 |         until,
222 |         fallback,
223 |     }
224 | }
225 | 
226 | /// A recovery parser that searches for a start and end delimiter, respecting nesting.
227 | ///
228 | /// It is possible to specify additional delimiter pairs that are valid in the pattern's context for better errors. For
229 | /// example, you might want to also specify `[('[', ']'), ('{', '}')]` when recovering a parenthesized expression as
230 | /// this can aid in detecting delimiter mismatches.
231 | ///
232 | /// A function that generates a fallback output on recovery is also required.
233 | // TODO: Make this a strategy, add an unclosed_delimiter error
234 | pub fn nested_delimiters<'src, I, O, E, F, const N: usize>(
235 |     start: I::Token,
236 |     end: I::Token,
237 |     others: [(I::Token, I::Token); N],
238 |     fallback: F,
239 | ) -> impl Parser<'src, I, O, E> + Clone
240 | where
241 |     I: ValueInput<'src>,
242 |     I::Token: PartialEq + Clone,
243 |     E: extra::ParserExtra<'src, I>,
244 |     F: Fn(I::Span) -> O + Clone,
245 | {
246 |     // TODO: Does this actually work? TESTS!
247 |     #[allow(clippy::tuple_array_conversions)]
248 |     // Clippy is overly eager to fine pointless non-problems
249 |     recursive({
250 |         let (start, end) = (start.clone(), end.clone());
251 |         |block| {
252 |             let mut many_block = Parser::boxed(
253 |                 block
254 |                     .clone()
255 |                     .delimited_by(just(start.clone()), just(end.clone())),
256 |             );
257 |             for (s, e) in &others {
258 |                 many_block = Parser::boxed(
259 |                     many_block.or(block.clone().delimited_by(just(s.clone()), just(e.clone()))),
260 |                 );
261 |             }
262 | 
263 |             let skip = [start, end]
264 |                 .into_iter()
265 |                 .chain(IntoIterator::into_iter(others).flat_map(|(s, e)| [s, e]))
266 |                 .collect::<Vec<_>>();
267 | 
268 |             many_block
269 |                 .or(any().and_is(none_of(skip)).ignored())
270 |                 .repeated()
271 |         }
272 |     })
273 |     .delimited_by(just(start), just(end))
274 |     .map_with(move |_, e| fallback(e.span()))
275 | }
276 | 


--------------------------------------------------------------------------------
/src/recursive.rs:
--------------------------------------------------------------------------------
  1 | //! Recursive parsers (parser that include themselves within their patterns).
  2 | //!
  3 | //! *“It's unpleasantly like being drunk."
  4 | //! "What's so unpleasant about being drunk?"
  5 | //! "You ask a glass of water.”*
  6 | //!
  7 | //! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
  8 | //! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on
  9 | //! [`Recursive`] allow for this.
 10 | 
 11 | use super::*;
 12 | 
 13 | struct OnceCell<T>(core::cell::Cell<Option<T>>);
 14 | impl<T> OnceCell<T> {
 15 |     pub fn new() -> Self {
 16 |         Self(core::cell::Cell::new(None))
 17 |     }
 18 |     pub fn set(&self, x: T) -> Result<(), ()> {
 19 |         // SAFETY: Function is not reentrant so we have exclusive access to the inner data
 20 |         unsafe {
 21 |             let vacant = (*self.0.as_ptr()).is_none();
 22 |             if vacant {
 23 |                 self.0.as_ptr().write(Some(x));
 24 |                 Ok(())
 25 |             } else {
 26 |                 Err(())
 27 |             }
 28 |         }
 29 |     }
 30 |     #[inline]
 31 |     pub fn get(&self) -> Option<&T> {
 32 |         // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and
 33 |         // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing
 34 |         unsafe { (*self.0.as_ptr()).as_ref() }
 35 |     }
 36 | }
 37 | 
 38 | // TODO: Ensure that this doesn't produce leaks
 39 | enum RecursiveInner<T: ?Sized> {
 40 |     Owned(Rc<T>),
 41 |     Unowned(rc::Weak<T>),
 42 | }
 43 | 
 44 | /// Type for recursive parsers that are defined through a call to `recursive`, and as such
 45 | /// need no internal indirection
 46 | pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>;
 47 | 
 48 | /// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as
 49 | /// such require an additional layer of allocation.
 50 | pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> {
 51 |     inner: OnceCell<Box<DynParser<'src, 'b, I, O, Extra>>>,
 52 | }
 53 | 
 54 | /// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
 55 | /// [definition](Recursive::define).
 56 | ///
 57 | /// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
 58 | pub struct Recursive<P: ?Sized> {
 59 |     inner: RecursiveInner<P>,
 60 | }
 61 | 
 62 | impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive<Indirect<'src, 'b, I, O, E>> {
 63 |     /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
 64 |     /// being fulled defined.
 65 |     ///
 66 |     /// Declaring a parser before defining it is required for a parser to reference itself.
 67 |     ///
 68 |     /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
 69 |     /// for parsing (i.e: via the [`Parser::parse`] method or similar).
 70 |     ///
 71 |     /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
 72 |     /// possible.
 73 |     ///
 74 |     /// # Examples
 75 |     ///
 76 |     /// ```
 77 |     /// # use chumsky::prelude::*;
 78 |     /// #[derive(Debug, PartialEq)]
 79 |     /// enum Chain {
 80 |     ///     End,
 81 |     ///     Link(char, Box<Chain>),
 82 |     /// }
 83 |     ///
 84 |     /// // Declare the existence of the parser before defining it so that it can reference itself
 85 |     /// let mut chain = Recursive::declare();
 86 |     ///
 87 |     /// // Define the parser in terms of itself.
 88 |     /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
 89 |     /// chain.define(just::<_, _, extra::Err<Simple<char>>>('+')
 90 |     ///     .then(chain.clone())
 91 |     ///     .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
 92 |     ///     .or_not()
 93 |     ///     .map(|chain| chain.unwrap_or(Chain::End)));
 94 |     ///
 95 |     /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End));
 96 |     /// assert_eq!(
 97 |     ///     chain.parse("++").into_result(),
 98 |     ///     Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
 99 |     /// );
100 |     /// ```
101 |     pub fn declare() -> Self {
102 |         Recursive {
103 |             inner: RecursiveInner::Owned(Rc::new(Indirect {
104 |                 inner: OnceCell::new(),
105 |             })),
106 |         }
107 |     }
108 | 
109 |     /// Defines the parser after declaring it, allowing it to be used for parsing.
110 |     // INFO: Clone bound not actually needed, but good to be safe for future compat
111 |     #[track_caller]
112 |     pub fn define<P: Parser<'src, I, O, E> + Clone + 'src + 'b>(&mut self, parser: P) {
113 |         let location = *Location::caller();
114 |         self.parser()
115 |             .inner
116 |             .set(Box::new(parser))
117 |             .unwrap_or_else(|_| {
118 |                 panic!("recursive parsers can only be defined once, trying to redefine it at {location}")
119 |             });
120 |     }
121 | }
122 | 
123 | impl<P: ?Sized> Recursive<P> {
124 |     #[inline]
125 |     fn parser(&self) -> Rc<P> {
126 |         match &self.inner {
127 |             RecursiveInner::Owned(x) => x.clone(),
128 |             RecursiveInner::Unowned(x) => x
129 |                 .upgrade()
130 |                 .expect("Recursive parser used before being defined"),
131 |         }
132 |     }
133 | }
134 | 
135 | impl<P: ?Sized> Clone for Recursive<P> {
136 |     fn clone(&self) -> Self {
137 |         Self {
138 |             inner: match &self.inner {
139 |                 RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
140 |                 RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
141 |             },
142 |         }
143 |     }
144 | }
145 | 
146 | #[cfg(feature = "stacker")]
147 | #[inline]
148 | pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
149 |     stacker::maybe_grow(1024 * 64, 1024 * 1024, f)
150 | }
151 | #[cfg(not(feature = "stacker"))]
152 | #[inline]
153 | pub(crate) fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
154 |     f()
155 | }
156 | 
157 | impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Indirect<'src, '_, I, O, E>>
158 | where
159 |     I: Input<'src>,
160 |     E: ParserExtra<'src, I>,
161 | {
162 |     #[inline]
163 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
164 |         recurse(move || {
165 |             M::invoke(
166 |                 self.parser()
167 |                     .inner
168 |                     .get()
169 |                     .expect("Recursive parser used before being defined")
170 |                     .as_ref(),
171 |                 inp,
172 |             )
173 |         })
174 |     }
175 | 
176 |     go_extra!(O);
177 | }
178 | 
179 | impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive<Direct<'src, '_, I, O, E>>
180 | where
181 |     I: Input<'src>,
182 |     E: ParserExtra<'src, I>,
183 | {
184 |     #[inline]
185 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
186 |         recurse(move || M::invoke(&*self.parser(), inp))
187 |     }
188 | 
189 |     go_extra!(O);
190 | }
191 | 
192 | /// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
193 | ///
194 | /// The given function must create the parser. The parser must not be used to parse input before this function returns.
195 | ///
196 | /// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
197 | ///
198 | /// The output type of this parser is `O`, the same as the inner parser.
199 | ///
200 | /// # Examples
201 | ///
202 | /// ```
203 | /// # use chumsky::prelude::*;
204 | /// #[derive(Debug, PartialEq)]
205 | /// enum Tree<'src> {
206 | ///     Leaf(&'src str),
207 | ///     Branch(Vec<Tree<'src>>),
208 | /// }
209 | ///
210 | /// // Parser that recursively parses nested lists
211 | /// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| tree
212 | ///     .separated_by(just(','))
213 | ///     .collect::<Vec<_>>()
214 | ///     .delimited_by(just('['), just(']'))
215 | ///     .map(Tree::Branch)
216 | ///     .or(text::ascii::ident().map(Tree::Leaf))
217 | ///     .padded());
218 | ///
219 | /// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello")));
220 | /// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![
221 | ///     Tree::Leaf("a"),
222 | ///     Tree::Leaf("b"),
223 | ///     Tree::Leaf("c"),
224 | /// ])));
225 | /// // The parser can deal with arbitrarily complex nested lists
226 | /// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![
227 | ///     Tree::Branch(vec![
228 | ///         Tree::Leaf("a"),
229 | ///         Tree::Leaf("b"),
230 | ///     ]),
231 | ///     Tree::Leaf("c"),
232 | ///     Tree::Branch(vec![
233 | ///         Tree::Leaf("d"),
234 | ///         Tree::Branch(vec![
235 | ///             Tree::Leaf("e"),
236 | ///             Tree::Leaf("f"),
237 | ///         ]),
238 | ///     ]),
239 | /// ])));
240 | /// ```
241 | // INFO: Clone bound not actually needed, but good to be safe for future compat
242 | pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'src, 'b, I, O, E>>
243 | where
244 |     I: Input<'src>,
245 |     E: ParserExtra<'src, I>,
246 |     A: Parser<'src, I, O, E> + Clone + 'b,
247 |     F: FnOnce(Recursive<Direct<'src, 'b, I, O, E>>) -> A,
248 | {
249 |     let rc = Rc::new_cyclic(|rc| {
250 |         let rc: rc::Weak<DynParser<'src, 'b, I, O, E>> = rc.clone() as _;
251 |         let parser = Recursive {
252 |             inner: RecursiveInner::Unowned(rc.clone()),
253 |         };
254 | 
255 |         f(parser)
256 |     });
257 | 
258 |     Recursive {
259 |         inner: RecursiveInner::Owned(rc),
260 |     }
261 | }
262 | 


--------------------------------------------------------------------------------
/src/regex.rs:
--------------------------------------------------------------------------------
  1 | //! Implementations of regex-based parsers
  2 | 
  3 | use super::*;
  4 | use regex_automata::{meta, Anchored, Input as ReInput};
  5 | 
  6 | /// See [`regex()`].
  7 | pub struct Regex<I, E> {
  8 |     regex: meta::Regex,
  9 |     #[allow(dead_code)]
 10 |     phantom: EmptyPhantom<(E, I)>,
 11 | }
 12 | 
 13 | impl<I, E> Clone for Regex<I, E> {
 14 |     fn clone(&self) -> Self {
 15 |         Self {
 16 |             regex: self.regex.clone(),
 17 |             phantom: EmptyPhantom::new(),
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | /// Match input based on a provided regex pattern
 23 | pub fn regex<I, E>(pattern: &str) -> Regex<I, E> {
 24 |     Regex {
 25 |         regex: meta::Regex::new(pattern).expect("Failed to compile regex"),
 26 |         phantom: EmptyPhantom::new(),
 27 |     }
 28 | }
 29 | 
 30 | impl<'src, S, I, E> Parser<'src, I, &'src S, E> for Regex<I, E>
 31 | where
 32 |     I: StrInput<'src, Slice = &'src S>,
 33 |     I::Token: Char,
 34 |     S: ?Sized + AsRef<[u8]> + 'src,
 35 |     E: ParserExtra<'src, I>,
 36 | {
 37 |     #[inline]
 38 |     fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, &'src S> {
 39 |         let before = inp.cursor();
 40 | 
 41 |         let re_in = ReInput::new(inp.full_slice())
 42 |             .anchored(Anchored::Yes)
 43 |             .range(before.inner..);
 44 | 
 45 |         let res = self.regex.find(re_in).map(|m| m.len());
 46 | 
 47 |         match res {
 48 |             Some(len) => {
 49 |                 let before = inp.cursor();
 50 |                 // SAFETY: `len` *must* be no greater than the byte length of the remaining string
 51 |                 unsafe {
 52 |                     inp.skip_bytes(len);
 53 |                 }
 54 |                 let after = inp.cursor();
 55 |                 Ok(M::bind(|| inp.slice(&before..&after)))
 56 |             }
 57 |             None => {
 58 |                 // TODO: Improve error
 59 |                 let span = inp.span_since(&before);
 60 |                 inp.add_alt([DefaultExpected::SomethingElse], None, span);
 61 |                 Err(())
 62 |             }
 63 |         }
 64 |     }
 65 | 
 66 |     go_extra!(&'src S);
 67 | }
 68 | 
 69 | #[cfg(test)]
 70 | mod tests {
 71 |     use super::*;
 72 | 
 73 |     #[test]
 74 |     fn regex_parser() {
 75 |         use self::prelude::*;
 76 |         use self::regex::*;
 77 | 
 78 |         fn parser<'src, S, I>() -> impl Parser<'src, I, Vec<&'src S>>
 79 |         where
 80 |             S: ?Sized + AsRef<[u8]> + 'src,
 81 |             I: StrInput<'src, Slice = &'src S>,
 82 |             I::Token: Char,
 83 |         {
 84 |             regex("[a-zA-Z_][a-zA-Z0-9_]*")
 85 |                 .padded()
 86 |                 .repeated()
 87 |                 .collect()
 88 |         }
 89 |         assert_eq!(
 90 |             parser().parse("hello world this works").into_result(),
 91 |             Ok(vec!["hello", "world", "this", "works"]),
 92 |         );
 93 | 
 94 |         assert_eq!(
 95 |             parser()
 96 |                 .parse(b"hello world this works" as &[_])
 97 |                 .into_result(),
 98 |             Ok(vec![
 99 |                 b"hello" as &[_],
100 |                 b"world" as &[_],
101 |                 b"this" as &[_],
102 |                 b"works" as &[_],
103 |             ]),
104 |         );
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/span.rs:
--------------------------------------------------------------------------------
  1 | //! Types and traits related to spans.
  2 | //!
  3 | //! *“We demand rigidly defined areas of doubt and uncertainty!”*
  4 | //!
  5 | //! You can use the [`Span`] trait to connect up chumsky to your compiler's knowledge of the input source.
  6 | 
  7 | use super::*;
  8 | 
  9 | /// A trait that describes a span over a particular range of inputs.
 10 | ///
 11 | /// Spans typically consist of some context, such as the file they originated from, and a start/end offset. Spans are
 12 | /// permitted to overlap one-another. The end offset must always be greater than or equal to the start offset.
 13 | ///
 14 | /// Span is automatically implemented for [`Range<T>`] and [`(C, Range<T>)`].
 15 | pub trait Span {
 16 |     /// Extra context used in a span.
 17 |     ///
 18 |     /// This is usually some way to uniquely identity the source file that a span originated in such as the file's
 19 |     /// path, URL, etc.
 20 |     ///
 21 |     /// NOTE: Span contexts have no inherent meaning to Chumsky and can be anything. For example, [`Range<usize>`]'s
 22 |     /// implementation of [`Span`] simply uses [`()`] as its context.
 23 |     type Context;
 24 | 
 25 |     /// A type representing a span's start or end offset from the start of the input.
 26 |     ///
 27 |     /// Typically, [`usize`] is used.
 28 |     ///
 29 |     /// NOTE: Offsets have no inherently meaning to Chumsky and are not used to decide how to prioritize errors. This
 30 |     /// means that it's perfectly fine for tokens to have non-continuous spans that bear no relation to their actual
 31 |     /// location in the input stream. This is useful for languages with an AST-level macro system that need to
 32 |     /// correctly point to symbols in the macro input when producing errors.
 33 |     type Offset: Clone;
 34 | 
 35 |     /// Create a new span given a context and an offset range.
 36 |     fn new(context: Self::Context, range: Range<Self::Offset>) -> Self;
 37 | 
 38 |     /// Return the span's context.
 39 |     fn context(&self) -> Self::Context;
 40 | 
 41 |     /// Return the start offset of the span.
 42 |     fn start(&self) -> Self::Offset;
 43 | 
 44 |     /// Return the end offset of the span.
 45 |     fn end(&self) -> Self::Offset;
 46 | 
 47 |     /// Turn this span into a zero-width span that starts and ends at the end of the original.
 48 |     ///
 49 |     /// For example, an original span like `3..7` will result in a new span of `7..7`.
 50 |     fn to_end(&self) -> Self
 51 |     where
 52 |         Self: Sized,
 53 |     {
 54 |         Self::new(self.context(), self.end()..self.end())
 55 |     }
 56 | 
 57 |     /// Combine two assumed-contiguous spans together into a larger span that encompasses both (and anything between).
 58 |     ///
 59 |     /// For example, spans like `3..5` and `7..8` will result in a unioned span of `3..8`.
 60 |     ///
 61 |     /// The spans may overlap one-another, but the start offset must come before the end offset for each span (i.e:
 62 |     /// each span must be 'well-formed'). If this is not the case, the result is unspecified.
 63 |     ///
 64 |     /// # Panics
 65 |     ///
 66 |     /// Panics if the [`Self::Context`]s of both spans are not equal.
 67 |     fn union(&self, other: Self) -> Self
 68 |     where
 69 |         Self::Context: PartialEq + fmt::Debug,
 70 |         Self::Offset: Ord,
 71 |         Self: Sized,
 72 |     {
 73 |         assert_eq!(
 74 |             self.context(),
 75 |             other.context(),
 76 |             "tried to union two spans with different contexts"
 77 |         );
 78 |         Self::new(
 79 |             self.context(),
 80 |             self.start().min(other.start())..self.end().max(other.end()),
 81 |         )
 82 |     }
 83 | }
 84 | 
 85 | /// The most basic implementor of `Span` - akin to `Range`, but `Copy` since it's not also
 86 | /// an iterator. Also has a `Display` implementation
 87 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 88 | #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 89 | pub struct SimpleSpan<T = usize, C = ()> {
 90 |     /// The start offset of the span.
 91 |     pub start: T,
 92 |     /// The end (exclusive) offset of the span.
 93 |     pub end: T,
 94 |     /// The context of the span (usually some ID representing the file path the span relates to).
 95 |     pub context: C,
 96 | }
 97 | 
 98 | impl<T, C> SimpleSpan<T, C> {
 99 |     /// Convert this span into a [`std::ops::Range`].
100 |     pub fn into_range(self) -> Range<T> {
101 |         self.start..self.end
102 |     }
103 | }
104 | 
105 | impl<T> From<Range<T>> for SimpleSpan<T> {
106 |     fn from(range: Range<T>) -> Self {
107 |         SimpleSpan {
108 |             start: range.start,
109 |             end: range.end,
110 |             context: (),
111 |         }
112 |     }
113 | }
114 | 
115 | impl<T> From<SimpleSpan<T, ()>> for Range<T> {
116 |     fn from(span: SimpleSpan<T>) -> Self {
117 |         Range {
118 |             start: span.start,
119 |             end: span.end,
120 |         }
121 |     }
122 | }
123 | 
124 | impl<T, C> fmt::Debug for SimpleSpan<T, C>
125 | where
126 |     T: fmt::Debug,
127 | {
128 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 |         write!(f, "{:?}..{:?}", self.start, self.end)
130 |     }
131 | }
132 | 
133 | impl<T, C> fmt::Display for SimpleSpan<T, C>
134 | where
135 |     T: fmt::Display,
136 | {
137 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138 |         write!(f, "{}..{}", self.start, self.end)
139 |     }
140 | }
141 | 
142 | impl<T, C> IntoIterator for SimpleSpan<T, C>
143 | where
144 |     Range<T>: Iterator<Item = T>,
145 | {
146 |     type IntoIter = Range<T>;
147 |     type Item = T;
148 | 
149 |     fn into_iter(self) -> Self::IntoIter {
150 |         self.start..self.end
151 |     }
152 | }
153 | 
154 | impl<T: Clone, C: Clone> Span for SimpleSpan<T, C> {
155 |     type Context = C;
156 |     type Offset = T;
157 | 
158 |     fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
159 |         Self {
160 |             start: range.start,
161 |             end: range.end,
162 |             context,
163 |         }
164 |     }
165 |     fn context(&self) -> Self::Context {
166 |         self.context.clone()
167 |     }
168 |     fn start(&self) -> Self::Offset {
169 |         self.start.clone()
170 |     }
171 |     fn end(&self) -> Self::Offset {
172 |         self.end.clone()
173 |     }
174 | }
175 | 
176 | impl<C: Clone, S: Span<Context = ()>> Span for (C, S) {
177 |     type Context = C;
178 |     type Offset = S::Offset;
179 | 
180 |     fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
181 |         (context, S::new((), range))
182 |     }
183 |     fn context(&self) -> Self::Context {
184 |         self.0.clone()
185 |     }
186 |     fn start(&self) -> Self::Offset {
187 |         self.1.start()
188 |     }
189 |     fn end(&self) -> Self::Offset {
190 |         self.1.end()
191 |     }
192 | }
193 | 
194 | impl<T: Clone> Span for Range<T> {
195 |     type Context = ();
196 |     type Offset = T;
197 | 
198 |     fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
199 |         range
200 |     }
201 |     fn context(&self) -> Self::Context {}
202 |     fn start(&self) -> Self::Offset {
203 |         self.start.clone()
204 |     }
205 |     fn end(&self) -> Self::Offset {
206 |         self.end.clone()
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/stream.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | 
  3 | /// An input that dynamically pulls tokens from a cached [`Iterator`].
  4 | ///
  5 | /// Internally, the stream will pull tokens in batches and cache the results on the heap so as to avoid invoking the
  6 | /// iterator every time a new token is required.
  7 | ///
  8 | /// Note: This input type should be used when the internal iterator type, `I`, is *expensive* to clone. This is usually
  9 | /// not the case: you might find that [`IterInput`] performs better.
 10 | pub struct Stream<I: Iterator> {
 11 |     tokens: Vec<I::Item>,
 12 |     iter: I,
 13 | }
 14 | 
 15 | impl<I: Iterator> Stream<I> {
 16 |     /// Create a new stream from an [`Iterator`].
 17 |     ///
 18 |     /// # Example
 19 |     ///
 20 |     /// ```
 21 |     /// # use chumsky::{prelude::*, input::Stream};
 22 |     /// let stream = Stream::from_iter((0..10).map(|i| char::from_digit(i, 10).unwrap()));
 23 |     ///
 24 |     /// let parser = any::<_, extra::Err<Simple<_>>>().filter(|c: &char| c.is_ascii_digit()).repeated().collect::<String>();
 25 |     ///
 26 |     /// assert_eq!(parser.parse(stream).into_result().as_deref(), Ok("0123456789"));
 27 |     /// ```
 28 |     pub fn from_iter<J: IntoIterator<IntoIter = I>>(iter: J) -> Self {
 29 |         Self {
 30 |             tokens: Vec::new(),
 31 |             iter: iter.into_iter(),
 32 |         }
 33 |     }
 34 | 
 35 |     /// Box this stream, turning it into a [BoxedStream]. This can be useful in cases where your parser accepts input
 36 |     /// from several different sources and it needs to work with all of them.
 37 |     pub fn boxed<'a>(self) -> BoxedStream<'a, I::Item>
 38 |     where
 39 |         I: 'a,
 40 |     {
 41 |         Stream {
 42 |             tokens: self.tokens,
 43 |             iter: Box::new(self.iter),
 44 |         }
 45 |     }
 46 | 
 47 |     /// Like [`Stream::boxed`], but yields an [`BoxedExactSizeStream`], which implements [`ExactSizeInput`].
 48 |     pub fn exact_size_boxed<'a>(self) -> BoxedExactSizeStream<'a, I::Item>
 49 |     where
 50 |         I: ExactSizeIterator + 'a,
 51 |     {
 52 |         Stream {
 53 |             tokens: self.tokens,
 54 |             iter: Box::new(self.iter),
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | /// A stream containing a boxed iterator. See [`Stream::boxed`].
 60 | pub type BoxedStream<'a, T> = Stream<Box<dyn Iterator<Item = T> + 'a>>;
 61 | 
 62 | /// A stream containing a boxed exact-sized iterator. See [`Stream::exact_size_boxed`].
 63 | pub type BoxedExactSizeStream<'a, T> = Stream<Box<dyn ExactSizeIterator<Item = T> + 'a>>;
 64 | 
 65 | impl<I: Iterator> Sealed for Stream<I> {}
 66 | impl<'src, I: Iterator + 'src> Input<'src> for Stream<I>
 67 | where
 68 |     I::Item: Clone,
 69 | {
 70 |     type Span = SimpleSpan<usize>;
 71 | 
 72 |     type Token = I::Item;
 73 |     type MaybeToken = I::Item;
 74 | 
 75 |     type Cursor = usize;
 76 | 
 77 |     type Cache = Self;
 78 | 
 79 |     #[inline(always)]
 80 |     fn begin(self) -> (Self::Cursor, Self::Cache) {
 81 |         (0, self)
 82 |     }
 83 | 
 84 |     #[inline]
 85 |     fn cursor_location(cursor: &Self::Cursor) -> usize {
 86 |         *cursor
 87 |     }
 88 | 
 89 |     #[inline(always)]
 90 |     unsafe fn next_maybe(
 91 |         this: &mut Self::Cache,
 92 |         cursor: &mut Self::Cursor,
 93 |     ) -> Option<Self::MaybeToken> {
 94 |         Self::next(this, cursor)
 95 |     }
 96 | 
 97 |     #[inline(always)]
 98 |     unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span {
 99 |         (*range.start..*range.end).into()
100 |     }
101 | }
102 | 
103 | impl<'src, I: ExactSizeIterator + 'src> ExactSizeInput<'src> for Stream<I>
104 | where
105 |     I::Item: Clone,
106 | {
107 |     #[inline(always)]
108 |     unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span {
109 |         (*range.start..this.tokens.len() + this.iter.len()).into()
110 |     }
111 | }
112 | 
113 | impl<'src, I: Iterator + 'src> ValueInput<'src> for Stream<I>
114 | where
115 |     I::Item: Clone,
116 | {
117 |     #[inline]
118 |     unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::Token> {
119 |         // Pull new items into the vector if we need them
120 |         if this.tokens.len() <= *cursor {
121 |             this.tokens.extend((&mut this.iter).take(512));
122 |         }
123 | 
124 |         // Get the token at the given cursor
125 |         this.tokens.get(*cursor).map(|tok| {
126 |             *cursor += 1;
127 |             tok.clone()
128 |         })
129 |     }
130 | }
131 | 
132 | /// An input that dynamically pulls tokens from an [`Iterator`].
133 | ///
134 | /// This input type supports rewinding by [`Clone`]-ing the iterator. It is recommended that your iterator is very
135 | /// cheap to clone. If this is not the case, consider using [`Stream`] instead, which caches generated tokens
136 | /// internally.
137 | pub struct IterInput<I, S> {
138 |     iter: I,
139 |     eoi: S,
140 | }
141 | 
142 | impl<I, S> IterInput<I, S> {
143 |     /// Create a new [`IterInput`] with the given iterator, and end of input span.
144 |     pub fn new(iter: I, eoi: S) -> Self {
145 |         Self { iter, eoi }
146 |     }
147 | }
148 | 
149 | impl<'src, I, T: 'src, S> Input<'src> for IterInput<I, S>
150 | where
151 |     I: Iterator<Item = (T, S)> + Clone + 'src,
152 |     S: Span + 'src,
153 | {
154 |     type Cursor = (I, usize, Option<S::Offset>);
155 |     type Span = S;
156 | 
157 |     type Token = T;
158 |     type MaybeToken = T;
159 | 
160 |     type Cache = S; // eoi
161 | 
162 |     #[inline]
163 |     fn begin(self) -> (Self::Cursor, Self::Cache) {
164 |         ((self.iter, 0, None), self.eoi)
165 |     }
166 | 
167 |     #[inline]
168 |     fn cursor_location(cursor: &Self::Cursor) -> usize {
169 |         cursor.1
170 |     }
171 | 
172 |     unsafe fn next_maybe(
173 |         _eoi: &mut Self::Cache,
174 |         cursor: &mut Self::Cursor,
175 |     ) -> Option<Self::MaybeToken> {
176 |         cursor.0.next().map(|(tok, span)| {
177 |             cursor.1 += 1;
178 |             cursor.2 = Some(span.end());
179 |             tok
180 |         })
181 |     }
182 | 
183 |     unsafe fn span(eoi: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span {
184 |         match range.start.0.clone().next() {
185 |             Some((_, s)) => {
186 |                 let end = range.end.2.clone().unwrap_or_else(|| eoi.end());
187 |                 S::new(eoi.context(), s.start()..end)
188 |             }
189 |             None => S::new(eoi.context(), eoi.end()..eoi.end()),
190 |         }
191 |     }
192 | }
193 | 
194 | // impl<'src, I, S> ExactSizeInput<'src> for IterInput<I, S>
195 | // where
196 | //     I: Iterator<Item = (T, S)> + Clone + 'src,
197 | //     S: Span + 'src,
198 | // {
199 | //     #[inline(always)]
200 | //     unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span {
201 | //         (*range.start..this.tokens.len() + cursor.0.len()).into()
202 | //     }
203 | // }
204 | 
205 | impl<'src, I, T: 'src, S> ValueInput<'src> for IterInput<I, S>
206 | where
207 |     I: Iterator<Item = (T, S)> + Clone + 'src,
208 |     S: Span + 'src,
209 | {
210 |     #[inline]
211 |     unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::Token> {
212 |         Self::next_maybe(this, cursor)
213 |     }
214 | }
215 | 
216 | #[test]
217 | fn map_tuple() {
218 |     fn parser<'src, I: Input<'src, Token = char>>() -> impl Parser<'src, I, char> {
219 |         just('h')
220 |     }
221 | 
222 |     let stream = Stream::from_iter(core::iter::once(('h', 0..1))).boxed();
223 |     let stream = stream.map(0..10, |(t, s)| (t, s));
224 | 
225 |     assert_eq!(parser().parse(stream).into_result(), Ok('h'));
226 | }
227 | 


--------------------------------------------------------------------------------
/src/tokio.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | use bytes::Bytes;
 4 | 
 5 | impl<'src> Input<'src> for Bytes {
 6 |     type Cursor = usize;
 7 |     type Span = SimpleSpan<usize>;
 8 | 
 9 |     type Token = u8;
10 |     type MaybeToken = u8;
11 | 
12 |     type Cache = Self;
13 | 
14 |     #[inline]
15 |     fn begin(self) -> (Self::Cursor, Self::Cache) {
16 |         (0, self)
17 |     }
18 | 
19 |     #[inline]
20 |     fn cursor_location(cursor: &Self::Cursor) -> usize {
21 |         *cursor
22 |     }
23 | 
24 |     #[inline(always)]
25 |     unsafe fn next_maybe(
26 |         this: &mut Self::Cache,
27 |         cursor: &mut Self::Cursor,
28 |     ) -> Option<Self::MaybeToken> {
29 |         if let Some(tok) = this.get(*cursor) {
30 |             *cursor += 1;
31 |             Some(*tok)
32 |         } else {
33 |             None
34 |         }
35 |     }
36 | 
37 |     #[inline(always)]
38 |     unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span {
39 |         (*range.start..*range.end).into()
40 |     }
41 | }
42 | 
43 | impl<'src> ExactSizeInput<'src> for Bytes {
44 |     #[inline(always)]
45 |     unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span {
46 |         (*range.start..this.len()).into()
47 |     }
48 | }
49 | 
50 | impl Sealed for Bytes {}
51 | impl<'src> StrInput<'src> for Bytes {
52 |     #[doc(hidden)]
53 |     fn stringify(slice: Self::Slice) -> String {
54 |         slice
55 |             .iter()
56 |             // .map(|e| core::ascii::Char::from_u8(e).unwrap_or(AsciiChar::Substitute).to_char())
57 |             .map(|e| char::from(*e))
58 |             .collect()
59 |     }
60 | }
61 | 
62 | impl<'src> SliceInput<'src> for Bytes {
63 |     type Slice = Bytes;
64 | 
65 |     #[inline(always)]
66 |     fn full_slice(this: &mut Self::Cache) -> Self::Slice {
67 |         this.clone()
68 |     }
69 | 
70 |     #[inline(always)]
71 |     unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice {
72 |         this.slice(*range.start..*range.end)
73 |     }
74 | 
75 |     #[inline(always)]
76 |     unsafe fn slice_from(this: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice {
77 |         this.slice(*from.start..)
78 |     }
79 | }
80 | 
81 | impl<'src> ValueInput<'src> for Bytes {
82 |     #[inline(always)]
83 |     unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::Token> {
84 |         Self::next_maybe(this, cursor)
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
  1 | //! Utility items used throughout the crate.
  2 | 
  3 | use super::*;
  4 | 
  5 | use core::{
  6 |     hash::Hasher,
  7 |     ops::{Deref, DerefMut},
  8 | };
  9 | 
 10 | /// A value that may be a `T` or a mutable reference to a `T`.
 11 | pub type MaybeMut<'a, T> = Maybe<T, &'a mut T>;
 12 | 
 13 | /// A value that may be a `T` or a shared reference to a `T`.
 14 | pub type MaybeRef<'a, T> = Maybe<T, &'a T>;
 15 | 
 16 | /// A type that can represent a borrowed reference to a `T` or a value of `T`.
 17 | ///
 18 | /// Used internally to facilitate zero-copy manipulation of tokens during error generation (see [`Error`]).
 19 | #[derive(Copy, Clone)]
 20 | pub enum Maybe<T, R: Deref<Target = T>> {
 21 |     /// We have a reference to `T`.
 22 |     Ref(R),
 23 |     /// We have a value of `T`.
 24 |     Val(T),
 25 | }
 26 | 
 27 | impl<T: PartialEq, R: Deref<Target = T>> PartialEq for Maybe<T, R> {
 28 |     #[inline]
 29 |     fn eq(&self, other: &Self) -> bool {
 30 |         **self == **other
 31 |     }
 32 | }
 33 | 
 34 | impl<T: Eq, R: Deref<Target = T>> Eq for Maybe<T, R> {}
 35 | 
 36 | impl<T: PartialOrd, R: Deref<Target = T>> PartialOrd for Maybe<T, R> {
 37 |     #[inline]
 38 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 39 |         (**self).partial_cmp(&**other)
 40 |     }
 41 | }
 42 | 
 43 | impl<T: Ord, R: Deref<Target = T>> Ord for Maybe<T, R> {
 44 |     #[inline]
 45 |     fn cmp(&self, other: &Self) -> Ordering {
 46 |         (**self).cmp(&**other)
 47 |     }
 48 | }
 49 | 
 50 | impl<T: Hash, R: Deref<Target = T>> Hash for Maybe<T, R> {
 51 |     #[inline]
 52 |     fn hash<H: Hasher>(&self, state: &mut H) {
 53 |         T::hash(&**self, state)
 54 |     }
 55 | }
 56 | 
 57 | impl<T: fmt::Debug, R: Deref<Target = T>> fmt::Debug for Maybe<T, R> {
 58 |     #[inline]
 59 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 60 |         T::fmt(&**self, f)
 61 |     }
 62 | }
 63 | 
 64 | impl<T, R: Deref<Target = T>> Maybe<T, R> {
 65 |     /// Convert this [`Maybe<T, _>`] into a `T`, cloning the inner value if necessary.
 66 |     #[inline]
 67 |     pub fn into_inner(self) -> T
 68 |     where
 69 |         T: Clone,
 70 |     {
 71 |         match self {
 72 |             Self::Ref(x) => x.clone(),
 73 |             Self::Val(x) => x,
 74 |         }
 75 |     }
 76 | 
 77 |     /// Convert this [`Maybe<T, _>`] into an owned version of itself, cloning the inner reference if required.
 78 |     #[inline]
 79 |     pub fn into_owned<U>(self) -> Maybe<T, U>
 80 |     where
 81 |         T: Clone,
 82 |         U: Deref<Target = T>,
 83 |     {
 84 |         Maybe::Val(self.into_inner())
 85 |     }
 86 | }
 87 | 
 88 | impl<T, R: Deref<Target = T>> Deref for Maybe<T, R> {
 89 |     type Target = T;
 90 | 
 91 |     #[inline]
 92 |     fn deref(&self) -> &Self::Target {
 93 |         match self {
 94 |             Self::Ref(x) => x,
 95 |             Self::Val(x) => x,
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | impl<T, R: DerefMut<Target = T>> DerefMut for Maybe<T, R> {
101 |     #[inline]
102 |     fn deref_mut(&mut self) -> &mut Self::Target {
103 |         match self {
104 |             Self::Ref(x) => &mut *x,
105 |             Self::Val(x) => x,
106 |         }
107 |     }
108 | }
109 | 
110 | impl<T> From<T> for Maybe<T, &T> {
111 |     #[inline]
112 |     fn from(x: T) -> Self {
113 |         Self::Val(x)
114 |     }
115 | }
116 | 
117 | impl<T> From<T> for Maybe<T, &mut T> {
118 |     #[inline]
119 |     fn from(x: T) -> Self {
120 |         Self::Val(x)
121 |     }
122 | }
123 | 
124 | impl<'a, T> From<&'a T> for Maybe<T, &'a T> {
125 |     #[inline]
126 |     fn from(x: &'a T) -> Self {
127 |         Self::Ref(x)
128 |     }
129 | }
130 | 
131 | impl<'a, T> From<&'a mut T> for Maybe<T, &'a mut T> {
132 |     #[inline]
133 |     fn from(x: &'a mut T) -> Self {
134 |         Self::Ref(x)
135 |     }
136 | }
137 | 
138 | #[cfg(feature = "serde")]
139 | impl<T: Serialize, R: Deref<Target = T>> Serialize for Maybe<T, R> {
140 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
141 |     where
142 |         S: Serializer,
143 |     {
144 |         serializer.serialize_newtype_struct("Maybe", &**self)
145 |     }
146 | }
147 | 
148 | #[cfg(feature = "serde")]
149 | impl<'de, T: Deserialize<'de>, R: Deref<Target = T>> Deserialize<'de> for Maybe<T, R> {
150 |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
151 |     where
152 |         D: Deserializer<'de>,
153 |     {
154 |         struct MaybeVisitor<T, R>(PhantomData<(T, R)>);
155 | 
156 |         impl<'de2, T: Deserialize<'de2>, R: Deref<Target = T>> Visitor<'de2> for MaybeVisitor<T, R> {
157 |             type Value = Maybe<T, R>;
158 | 
159 |             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
160 |                 write!(formatter, "a Maybe")
161 |             }
162 | 
163 |             fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
164 |             where
165 |                 D: Deserializer<'de2>,
166 |             {
167 |                 T::deserialize(deserializer).map(Maybe::Val)
168 |             }
169 |         }
170 | 
171 |         deserializer.deserialize_newtype_struct("Maybe", MaybeVisitor(PhantomData))
172 |     }
173 | }
174 | 
175 | mod ref_or_val_sealed {
176 |     pub trait Sealed<T> {}
177 | }
178 | 
179 | /// An trait that allows abstracting over values of or references to a `T`.
180 | ///
181 | /// Some [`Input`]s can only generate tokens by-reference (like `&[T]` -> `&T`), and some can only generate tokens
182 | /// by-value (like `&str` -> `char`). This trait allows chumsky to handle both kinds of input.
183 | ///
184 | /// The trait is sealed: you cannot implement it yourself.
185 | pub trait IntoMaybe<'src, T: 'src>:
186 |     ref_or_val_sealed::Sealed<T> + Borrow<T> + Into<MaybeRef<'src, T>>
187 | {
188 |     /// Project the referential properties of this type on to another type.
189 |     ///
190 |     /// For example, `<&Foo>::Proj<Bar> = &Bar` but `<Foo>::Proj<Bar> = Bar`.
191 |     #[doc(hidden)]
192 |     type Proj<U: 'src>: IntoMaybe<'src, U>;
193 | 
194 |     #[doc(hidden)]
195 |     fn map_maybe<R: 'src>(
196 |         self,
197 |         f: impl FnOnce(&'src T) -> &'src R,
198 |         g: impl FnOnce(T) -> R,
199 |     ) -> Self::Proj<R>;
200 | }
201 | 
202 | impl<T> ref_or_val_sealed::Sealed<T> for &T {}
203 | impl<'src, T> IntoMaybe<'src, T> for &'src T {
204 |     type Proj<U: 'src> = &'src U;
205 |     fn map_maybe<R: 'src>(
206 |         self,
207 |         f: impl FnOnce(&'src T) -> &'src R,
208 |         _g: impl FnOnce(T) -> R,
209 |     ) -> Self::Proj<R> {
210 |         f(self)
211 |     }
212 | }
213 | 
214 | impl<T> ref_or_val_sealed::Sealed<T> for T {}
215 | impl<'src, T: 'src> IntoMaybe<'src, T> for T {
216 |     type Proj<U: 'src> = U;
217 |     fn map_maybe<R: 'src>(
218 |         self,
219 |         _f: impl FnOnce(&'src T) -> &'src R,
220 |         g: impl FnOnce(T) -> R,
221 |     ) -> Self::Proj<R> {
222 |         g(self)
223 |     }
224 | }
225 | 


--------------------------------------------------------------------------------