├── .github ├── FUNDING.yml └── workflows │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── backtrack.rs ├── cbor.rs ├── json.pest ├── json.rs ├── lex.rs ├── parser.rs ├── samples │ ├── sample.cbor │ └── sample.json ├── tokens.txt └── utils.rs ├── examples ├── brainfuck.rs ├── foo.rs ├── indent.rs ├── io.rs ├── json.rs ├── json_fast.rs ├── logos.rs ├── mini_ml.rs ├── nano_rust.rs ├── nested.rs ├── nested_spans.rs ├── pythonic.rs ├── sample.bf ├── sample.foo ├── sample.io ├── sample.json ├── sample.mini_ml ├── sample.nrs ├── sample.py └── zero-copy.rs ├── guide ├── README.md ├── debugging.md ├── error_and_recovery.md ├── getting_started.md ├── intro.md ├── key_concepts.md ├── meet_the_parsers.md ├── recursion.md ├── technical_notes.md └── tutorial.md ├── misc ├── example.png └── logo.svg └── src ├── blanket.rs ├── cache.rs ├── combinator.rs ├── container.rs ├── either.rs ├── error.rs ├── extension.rs ├── extra.rs ├── guide.rs ├── input.rs ├── inspector.rs ├── label.rs ├── lib.rs ├── number.rs ├── pratt.rs ├── primitive.rs ├── private.rs ├── recovery.rs ├── recursive.rs ├── regex.rs ├── span.rs ├── stream.rs ├── text.rs ├── tokio.rs └── util.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [zesterer] 2 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | permissions: read-all 13 | 14 | jobs: 15 | check: 16 | name: Check 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Install latest nightly 21 | uses: dtolnay/rust-toolchain@master 22 | with: 23 | toolchain: nightly 24 | components: rustfmt, clippy 25 | - name: Run cargo check (all features) 26 | run: cargo check --benches --examples --tests --verbose --all-features 27 | - name: Run cargo check (no features) 28 | run: cargo check --benches --examples --tests --verbose --no-default-features 29 | - name: Run cargo clippy 30 | run: cargo clippy --benches --examples --tests --verbose --all-features -- -D warnings 31 | - name: Run cargo fmt 32 | run: cargo fmt --verbose --check 33 | - name: Run cargo doc 34 | run: cargo doc --all-features --verbose 35 | env: 36 | RUSTDOCFLAGS: --cfg docsrs 37 | test: 38 | name: Test 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v3 42 | - name: Install latest nightly 43 | uses: dtolnay/rust-toolchain@master 44 | with: 45 | toolchain: nightly 46 | components: rustfmt, clippy 47 | - name: Run cargo test 48 | run: cargo test --verbose --all-features 49 | env: 50 | RUSTDOCFLAGS: --cfg docsrs 51 | msrv: 52 | name: MSRV 53 | runs-on: ubuntu-latest 54 | steps: 55 | - uses: actions/checkout@v3 56 | - name: Install MSRV 57 | uses: dtolnay/rust-toolchain@master 58 | with: 59 | toolchain: "1.65" 60 | components: rustfmt, clippy 61 | - name: Check MSRV compatibility 62 | run: cargo check --verbose --features _test_stable 63 | semver: 64 | name: SemVer 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v3 68 | - name: Check semver compatibility 69 | uses: obi1kenobi/cargo-semver-checks-action@v2 70 | with: 71 | rust-toolchain: stable 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE projects 2 | .idea/ 3 | 4 | # Project output 5 | /target 6 | flamegraph.svg 7 | perf.data* 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | # Unreleased 9 | 10 | ### Added 11 | 12 | ### Removed 13 | 14 | ### Changed 15 | 16 | ### Fixed 17 | 18 | # [0.10.1] - 2025-04-13 19 | 20 | ### Added 21 | 22 | - Implemented `Container` for `VecDeque` 23 | - New section covering recursion in the guide 24 | 25 | ### Changed 26 | 27 | - `Boxed` types now have a default type parameter of `extra::Default`, like `Parser` and `IterParser` 28 | - The tutorial has been updated for `0.10` and has been moved to the guide 29 | 30 | ### Fixed 31 | 32 | - Nonsense spans occasionally generated for non-existent tokens 33 | - Improved docs have been added for several items 34 | - Many minor documentation issues have been fixed 35 | 36 | # [0.10.0] - 2025-03-22 37 | 38 | *Note: version 0.10 is a from-scratch rewrite of chumsky with innumerable small changes. To avoid this changelog being 39 | longer than the compiled works of Douglas Adams, the following is a high-level overview of the major feature additions 40 | and does not include small details.* 41 | 42 | ### Added 43 | 44 | - Support for zero-copy parsing (i.e: parser outputs that hold references to the parser input) 45 | - Support for parsing nested inputs like token trees 46 | - Support for parsing context-sensitive grammars such as Python-style indentation, Rust-style raw strings, and much 47 | more 48 | - Support for parsing by graphemes as well as unicode codepoints 49 | - Support for caching parsers independent of the lifetime of the parser 50 | - A new trait, `IterParser`, that allows expressing parsers that generate many outputs 51 | - Added the ability to collect iterable parsers into fixed-size arrays, along with a plethora of other container types 52 | - Support for manipulating shared state during parsing, elegantly allowing support for arena allocators, cstrees, 53 | interners, and much more 54 | - Support for a vast array of new input types: slices, strings, arrays, `impl Read`ers, iterators, etc. 55 | - Experimental support for memoization, allowing chumsky to parse left-recursive grammars and reducing the 56 | computational complexity of parsing certain grammars 57 | - An extension API, allowing third-party crates to extend chumsky's capabilities and introduce new combinators 58 | - A `pratt` parser combinator, allowing for conveniently and simply creating expression parsers with precise operator 59 | precedence 60 | - A `regex` combinator, allowing the parsing of terms based on a specific regex pattern 61 | - Properly differentiated ASCII and Unicode text parsers 62 | 63 | ## Removed 64 | 65 | - `Parser::then_with` has been removed in favour of the new context-sensitive combinators 66 | 67 | ### Changed 68 | 69 | - Performance has *radically* improved 70 | - Error generation and handling is now significantly more flexible 71 | 72 | # [0.9.2] - 2023-03-02 73 | 74 | ### Fixed 75 | 76 | - Properly fixed `skip_then_retry_until` regression 77 | 78 | # [0.9.1] - 2023-03-02 79 | 80 | ### Fixed 81 | 82 | - Regression in `skip_then_retry_until` recovery strategy 83 | 84 | # [0.9.0] - 2023-02-07 85 | 86 | ### Added 87 | 88 | - A `spill-stack` feature that uses `stacker` to avoid stack overflow errors for deeply recursive parsers 89 | - The ability to access the token span when using `select!` like `select! { |span| Token::Num(x) => (x, span) }` 90 | - Added a `skip_parser` recovery strategy that allows you to implement your own recovery strategies in terms of other 91 | parsers. For example, `.recover_with(skip_parser(take_until(just(';'))))` skips tokens until after the next semicolon 92 | - A `not` combinator that consumes a single token if it is *not* the start of a given pattern. For example, 93 | `just("\\n").or(just('"')).not()` matches any `char` that is not either the final quote of a string, and is not the 94 | start of a newline escape sequence 95 | - A `semantic_indentation` parser for parsing indentation-sensitive languages. Note that this is likely to be 96 | deprecated/removed in the future in favour of a more powerful solution 97 | - `#[must_use]` attribute for parsers to ensure that they're not accidentally created without being used 98 | - `Option>` and `Vec>` now implement `Chain` and `Option` implements `Chain` 99 | - `choice` now supports both arrays and vectors of parsers in addition to tuples 100 | - The `Simple` error type now implements `Eq` 101 | 102 | ### Changed 103 | 104 | - `text::whitespace` returns a `Repeated` instead of an `impl Parser`, allowing you to call methods like `at_least` and 105 | `exactly` on it. 106 | - Improved `no_std` support 107 | - Improved examples and documentation 108 | - Use zero-width spans for EoI by default 109 | - Don't allow defining a recursive parser more than once 110 | - Various minor bug fixes 111 | - Improved `Display` implementations for various built-in error types and `SimpleReason` 112 | - Use an `OrderedContainer` trait to avoid unexpected behaviour for unordered containers in combination with `just` 113 | 114 | ### Fixed 115 | 116 | - Made several parsers (`todo`, `unwrapped`, etc.) more useful by reporting the parser's location on panic 117 | - Boxing a parser that is already boxed just gives you the original parser to avoid double indirection 118 | - Improved compilation speeds 119 | 120 | # [0.8.0] - 2022-02-07 121 | 122 | ### Added 123 | 124 | - `then_with` combinator to allow limited support for parsing nested patterns 125 | - impl From<&[T; N]> for Stream 126 | - `SkipUntil/SkipThenRetryUntil::skip_start/consume_end` for more precise control over skip-based recovery 127 | 128 | ### Changed 129 | 130 | - Allowed `Validate` to map the output type 131 | - Switched to zero-size End Of Input spans for default implementations of `Stream` 132 | - Made `delimited_by` take combinators instead of specific tokens 133 | - Minor optimisations 134 | - Documentation improvements 135 | 136 | ### Fixed 137 | 138 | - Compilation error with `--no-default-features` 139 | - Made default behaviour of `skip_until` more sensible 140 | 141 | # [0.7.0] - 2021-12-16 142 | 143 | ### Added 144 | 145 | - A new [tutorial](tutorial.md) to help new users 146 | 147 | - `select` macro, a wrapper over `filter_map` that makes extracting data from specific tokens easy 148 | - `choice` parser, a better alternative to long `or` chains (which sometimes have poor compilation performance) 149 | - `todo` parser, that panics when used (but not when created) (akin to Rust's `todo!` macro, but for parsers) 150 | - `keyword` parser, that parses *exact* identifiers 151 | 152 | - `from_str` combinator to allow converting a pattern to a value inline, using `std::str::FromStr` 153 | - `unwrapped` combinator, to automatically unwrap an output value inline 154 | - `rewind` combinator, that allows reverting the input stream on success. It's most useful when requiring that a 155 | pattern is followed by some terminating pattern without the first parser greedily consuming it 156 | - `map_err_with_span` combinator, to allow fetching the span of the input that was parsed by a parser before an error 157 | was encountered 158 | 159 | - `or_else` combinator, to allow processing and potentially recovering from a parser error 160 | - `SeparatedBy::at_most` to require that a separated pattern appear at most a specific number of times 161 | - `SeparatedBy::exactly` to require that a separated pattern be repeated exactly a specific number of times 162 | - `Repeated::exactly` to require that a pattern be repeated exactly a specific number of times 163 | 164 | - More trait implementations for various things, making the crate more useful 165 | 166 | ### Changed 167 | 168 | - Made `just`, `one_of`, and `none_of` significant more useful. They can now accept strings, arrays, slices, vectors, 169 | sets, or just single tokens as before 170 | - Added the return type of each parser to its documentation 171 | - More explicit documentation of parser behaviour 172 | - More doc examples 173 | - Deprecated `seq` (`just` has been generalised and can now be used to parse specific input sequences) 174 | - Sealed the `Character` trait so that future changes are not breaking 175 | - Sealed the `Chain` trait and made it more powerful 176 | - Moved trait constraints on `Parser` to where clauses for improved readability 177 | 178 | ### Fixed 179 | 180 | - Fixed a subtle bug that allowed `separated_by` to parse an extra trailing separator when it shouldn't 181 | - Filled a 'hole' in the `Error` trait's API that conflated a lack of expected tokens with expectation of end of input 182 | - Made recursive parsers use weak reference-counting to avoid memory leaks 183 | 184 | # [0.6.0] - 2021-11-22 185 | 186 | ### Added 187 | 188 | - `skip_until` error recovery strategy 189 | - `SeparatedBy::at_least` and `SeparatedBy::at_most` for parsing a specific number of separated items 190 | - `Parser::validate` for integrated AST validation 191 | - `Recursive::declare` and `Recursive::define` for more precise control over recursive declarations 192 | 193 | ### Changed 194 | 195 | - Improved `separated_by` error messages 196 | - Improved documentation 197 | - Hid a new (probably) unused implementation details 198 | 199 | # [0.5.0] - 2021-10-30 200 | 201 | ### Added 202 | 203 | - `take_until` primitive 204 | 205 | ### Changed 206 | 207 | - Added span to fallback output function in `nested_delimiters` 208 | 209 | # [0.4.0] - 2021-10-28 210 | 211 | ### Added 212 | 213 | - Support for LL(k) parsing 214 | - Custom error recovery strategies 215 | - Debug mode 216 | - Nested input flattening 217 | 218 | ### Changed 219 | 220 | - Radically improved error quality 221 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "chumsky" 3 | version = "0.11.0" 4 | description = "A parser library for humans with powerful error recovery" 5 | authors = ["Joshua Barretto ", "Elijah Hartvigsen "] 6 | repository = "https://github.com/zesterer/chumsky" 7 | license = "MIT" 8 | keywords = ["parser", "combinator", "token", "language", "syntax"] 9 | categories = ["parsing", "text-processing"] 10 | edition = "2021" 11 | exclude = [ 12 | "/misc/*", 13 | "/benches/samples/*", 14 | ] 15 | rust-version = "1.65" 16 | 17 | [features] 18 | default = ["std", "stacker"] 19 | 20 | # Integrate with the standard library. 21 | std = [ 22 | "regex-automata?/std", 23 | "serde?/std" 24 | ] 25 | 26 | # Enable nightly-only features like better compiler diagnostics and a Parser impl for ! (the never type). 27 | nightly = [] 28 | 29 | # Allows deeper recursion by dynamically spilling stack state on to the heap. 30 | stacker = ["dep:stacker", "std"] 31 | 32 | # Allows parser memoization, speeding up heavily back-tracking parsers and allowing left recursion. 33 | memoization = [] 34 | 35 | # Allows extending chumsky by writing your own parser implementations. 36 | extension = [] 37 | 38 | # Make builtin parsers such as `Boxed` use atomic instead of non-atomic internals. 39 | # TODO: Remove or rework this 40 | sync = ["spin"] 41 | 42 | # Enable Pratt parsing combinator 43 | pratt = ["unstable"] 44 | 45 | # Allow the use of unstable features (aka features where the API is not settled) 46 | unstable = [] 47 | 48 | # Allows use of the `Number` parser, which is backed by the `lexical` crate 49 | lexical-numbers = ["lexical", "unstable"] 50 | 51 | # Adds impl of Parser for either::Either 52 | either = ["dep:either"] 53 | 54 | # Enables regex combinators 55 | regex = ["dep:regex-automata"] 56 | 57 | # Enable serde serialization support 58 | serde = ["dep:serde"] 59 | 60 | # Enable support for using Tokio's byte slices as inputs 61 | bytes = ["dep:bytes"] 62 | 63 | # Enable dependencies only needed for generation of documentation on docs.rs 64 | docsrs = [] 65 | 66 | # An alias of all features that work with the stable compiler. 67 | # Do not use this feature, its removal is not considered a breaking change and its behaviour may change. 68 | # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list. 69 | _test_stable = ["std", "stacker", "memoization", "extension", "sync"] 70 | 71 | [package.metadata.docs.rs] 72 | all-features = true 73 | rustdoc-args = ["--cfg", "docsrs"] 74 | 75 | [dependencies] 76 | hashbrown = "0.15" 77 | stacker = { version = "0.1", optional = true } 78 | regex-automata = { version = "0.3", default-features = false, optional = true, features = ["alloc", "meta", "perf", "unicode", "nfa", "dfa", "hybrid"] } 79 | spin = { version = "0.9", features = ["once"], default-features = false, optional = true } 80 | lexical = { version = "6.1.1", default-features = false, features = ["parse-integers", "parse-floats", "format"], optional = true } 81 | either = { version = "1.8.1", optional = true } 82 | serde = { version = "1.0", default-features = false, optional = true, features = ["derive"] } 83 | unicode-ident = "1.0.10" 84 | unicode-segmentation = "1" 85 | bytes = { version = "1", default-features = false, optional = true } 86 | 87 | [dev-dependencies] 88 | ariadne = "0.5" 89 | pom = "3.2" 90 | nom = "7.1" 91 | nom8 = { package = "nom", version = "8"} 92 | winnow = "0.7.0" 93 | serde_json = { version = "1.0", features = ["preserve_order"] } 94 | ciborium = { version = "0.2" } 95 | criterion = "0.4.0" 96 | pest = "2.5" 97 | pest_derive = "2.5" 98 | sn = "0.1" 99 | logos = "0.13" 100 | lasso = "0.7" 101 | slotmap = "1.0" 102 | 103 | [target.'cfg(unix)'.dev-dependencies] 104 | pprof = { version = "0.11", features = ["flamegraph", "criterion"] } 105 | 106 | [profile.bench] 107 | debug = true 108 | 109 | [[bench]] 110 | name = "json" 111 | harness = false 112 | required-features = ["std"] 113 | 114 | [[bench]] 115 | name = "lex" 116 | harness = false 117 | 118 | [[bench]] 119 | name = "parser" 120 | harness = false 121 | 122 | [[bench]] 123 | name = "backtrack" 124 | harness = false 125 | 126 | [[bench]] 127 | name = "cbor" 128 | harness = false 129 | 130 | [[example]] 131 | name = "nano_rust" 132 | 133 | [[example]] 134 | name = "json" 135 | required-features = ["std"] 136 | 137 | [[example]] 138 | name = "json_fast" 139 | required-features = ["std"] 140 | 141 | [[example]] 142 | name = "io" 143 | required-features = ["std"] 144 | 145 | [[example]] 146 | name = "foo" 147 | required-features = ["std"] 148 | 149 | [[example]] 150 | name = "mini_ml" 151 | required-features = ["pratt"] 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Joshua Barretto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![crates.io](https://img.shields.io/crates/v/chumsky.svg)](https://crates.io/crates/chumsky) 2 | [![crates.io](https://docs.rs/chumsky/badge.svg)](https://docs.rs/chumsky) 3 | [![License](https://img.shields.io/crates/l/chumsky.svg)](https://github.com/zesterer/chumsky) 4 | [![actions-badge](https://github.com/zesterer/chumsky/workflows/Rust/badge.svg?branch=master)](https://github.com/zesterer/chumsky/actions) 5 | 6 | Chumsky is a parser library for Rust that makes writing expressive, high-performance parsers easy. 7 | 8 | 9 | Example usage with my own language, Tao 10 | 11 | 12 | *Note: Error diagnostic rendering in this example is performed by [Ariadne](https://github.com/zesterer/ariadne)* 13 | 14 | Although chumsky is designed primarily for user-facing parsers such as compilers, chumsky is just as much at home 15 | parsing binary protocols at the networking layer, configuration files, or any other form of complex input validation 16 | that you may need. It also has `no_std` support, making it suitable for embedded environments. 17 | 18 | ## Features 19 | 20 | - 🪄 **Expressive combinators** that make writing your parser a joy 21 | - 🎛️ **Fully generic** across input, token, output, span, and error types 22 | - 📑 **Zero-copy parsing** minimises allocation by having outputs hold references/slices of the input 23 | - 🚦 **Flexible error recovery** strategies out of the box 24 | - ☑️ **Check-only mode** for fast verification of inputs, automatically supported 25 | - 🚀 **Internal optimiser** leverages the power of [GATs](https://smallcultfollowing.com/babysteps/blog/2022/06/27/many-modes-a-gats-pattern/) to optimise your parser for you 26 | - 📖 **Text-oriented parsers** for text inputs (i.e: `&[u8]` and `&str`) 27 | - 👁️‍🗨️ **Context-free grammars** are fully supported, with support for context-sensitivity 28 | - 🔄 **Left recursion and memoization** have opt-in support 29 | - 🪺 **Nested inputs** such as token trees are fully supported both as inputs and outputs 30 | - 🏷️ **Pattern labelling** for dynamic, user-friendly error messages 31 | - 🗃️ **Caching** allows parsers to be created once and reused many times 32 | - ↔️ **Pratt parsing** support for simple yet flexible expression parsing 33 | - 🪛 **no_std** support, allowing chumsky to run in embedded environments 34 | 35 | ## Example 36 | 37 | See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/main/examples/brainfuck.rs) for a full 38 | [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) interpreter 39 | (`cargo run --example brainfuck -- examples/sample.bf`). 40 | 41 | ```rust,ignore 42 | use chumsky::prelude::*; 43 | 44 | /// An AST (Abstract Syntax Tree) for Brainfuck instructions 45 | #[derive(Clone)] 46 | enum Instr { 47 | Left, Right, 48 | Incr, Decr, 49 | Read, Write, 50 | Loop(Vec), // In Brainfuck, `[...]` loop instructions contain any number of instructions 51 | } 52 | 53 | /// A function that generates a Brainfuck parser 54 | fn brainfuck<'a>() -> impl Parser<'a, &'a str, Vec> { 55 | // Brainfuck syntax is recursive: each instruction can contain many sub-instructions (via `[...]` loops) 56 | recursive(|bf| choice(( 57 | // All of the basic instructions are just single characters 58 | just('<').to(Instr::Left), 59 | just('>').to(Instr::Right), 60 | just('+').to(Instr::Incr), 61 | just('-').to(Instr::Decr), 62 | just(',').to(Instr::Read), 63 | just('.').to(Instr::Write), 64 | // Loops are strings of Brainfuck instructions, delimited by square brackets 65 | bf.delimited_by(just('['), just(']')).map(Instr::Loop), 66 | )) 67 | // Brainfuck instructions appear sequentially, so parse as many as we need 68 | .repeated() 69 | .collect()) 70 | } 71 | 72 | // Parse some Brainfuck with our parser 73 | brainfuck().parse("--[>--->->->++>-<<<<<-------]>--.>---------.>--..+++.>----.>+++++++++.<<.+++.------.<-.>>+.") 74 | ``` 75 | 76 | You can find more examples [here](https://github.com/zesterer/chumsky/tree/main/examples). 77 | 78 | ## Guide and documentation 79 | 80 | Chumsky has an extensive [guide](https://docs.rs/chumsky/latest/chumsky/guide) that walks you through the library: all 81 | the way from setting up and basic theory to advanced uses of the crate. It includes technical details of chumsky's 82 | behaviour, examples of uses, a handy index for all of the combinators, technical details about the crate, and even a 83 | tutorial that leads you through the development of a fully-functioning interpreter for a simple programming language. 84 | 85 | The crate docs should also be similarly useful: most important functions include at least one contextually-relevant 86 | example, and all crate items are fully documented. 87 | 88 | In addition, chumsky comes with a suite of fully-fledged 89 | [example projects](https://github.com/zesterer/chumsky/tree/main/examples). These include: 90 | 91 | - Parsers for existing syntaxes like Brainfuck and JSON 92 | - Integration demos for third-party crates, like [`logos`](https://crates.io/crates/logos) 93 | - Parsers for new toy programming languages: a Rust-like language and a full-on lexer, parser, type-checker, and 94 | interpreter for a minature ML-like language. 95 | - Examples of parsing non-trivial inputs like token trees, `impl Read`ers, and zero-copy, zero-alloc parsing. 96 | 97 | ## Cargo features 98 | 99 | Chumsky contains several optional features that extend the crate's functionality. 100 | 101 | - `bytes`: adds support for parsing types from the [`bytes`](https://docs.rs/bytes/) crate. 102 | 103 | - `either`: implements `Parser` for `either::Either`, allowing dynamic configuration of parsers at run-time 104 | 105 | - `extension`: enables the extension API, allowing you to write your own first-class combinators that integrate with 106 | and extend chumsky 107 | 108 | - `lexical-numbers`: Enables use of the `Number` parser for parsing various numeric formats 109 | 110 | - `memoization`: enables [memoization](https://en.wikipedia.org/wiki/Memoization#Parsers) features 111 | 112 | - `nightly`: enable support for features only supported by the nightly Rust compiler 113 | 114 | - `pratt`: enables the [pratt parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) 115 | combinator 116 | 117 | - `regex`: enables the regex combinator 118 | 119 | - `serde`: enables `serde` (de)serialization support for several types 120 | 121 | - `stacker` (enabled by default): avoid stack overflows by spilling stack data to the heap via the `stacker` crate 122 | 123 | - `std` (enabled by default): support for standard library features 124 | 125 | - `unstable`: enables experimental chumsky features (API features enabled by `unstable` are NOT considered to fall 126 | under the semver guarantees of chumsky!) 127 | 128 | ## *What* is a parser combinator? 129 | 130 | Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting 131 | parsers use a [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) strategy to transform a stream 132 | of tokens into an output. Using parser combinators to define parsers is roughly analogous to using Rust's 133 | [`Iterator`](https://doc.rust-lang.org/std/iter/trait.Iterator.html) trait to define iterative algorithms: the 134 | type-driven API of `Iterator` makes it more difficult to make mistakes and easier to encode complicated iteration logic 135 | than if one were to write the same code by hand. The same is true of parser combinators. 136 | 137 | ## *Why* use parser combinators? 138 | 139 | Writing parsers with good error recovery is conceptually difficult and time-consuming. It requires understanding the 140 | intricacies of the recursive descent algorithm, and then implementing recovery strategies on top of it. If you're 141 | developing a programming language, you'll almost certainly change your mind about syntax in the process, leading to some 142 | slow and painful parser refactoring. Parser combinators solve both problems by providing an ergonomic API that allows 143 | for rapidly iterating upon a syntax. 144 | 145 | Parser combinators are also a great fit for domain-specific languages for which an existing parser does not exist. 146 | Writing a reliable, fault-tolerant parser for such situations can go from being a multi-day task to a half-hour task 147 | with the help of a decent parser combinator library. 148 | 149 | ## Classification 150 | 151 | Chumsky's parsers are [recursive descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) parsers and are 152 | capable of parsing [parsing expression grammars (PEGs)](https://en.wikipedia.org/wiki/Parsing_expression_grammar), which 153 | includes all known context-free languages. However, chumsky doesn't stop there: it also supports context-sensitive 154 | grammars via a set of dedicated combinators that integrate cleanly with the rest of the library. This allows it to 155 | additionally parse a number of context-sensitive syntaxes like Rust-style raw strings, Python-style semantic 156 | indentation, and much more. 157 | 158 | ## Error recovery 159 | 160 | Chumsky has support for error recovery, meaning that it can encounter a syntax error, report the error, and then 161 | attempt to recover itself into a state in which it can continue parsing so that multiple errors can be produced at once 162 | and a partial [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) can still be generated from the input for future 163 | compilation stages to consume. 164 | 165 | ## Performance 166 | 167 | Chumsky allows you to choose your priorities. When needed, it can be configured for high-quality parser errors. It can 168 | also be configured for *performance*. 169 | 170 | It's difficult to produce general benchmark results for parser libraries. By their nature, the performance of a parser 171 | is intimately tied to exactly how the grammar they implement has been specified. That said, here are some numbers for a 172 | fairly routine JSON parsing benchmark implemented idiomatically in various libraries. As you can see, chumsky ranks 173 | quite well! 174 | 175 | | Ranking | Library | Time (smaller is better) | Throughput | 176 | |---------|------------------------------------------------------|--------------------------|------------| 177 | | 1 | `chumsky` (check-only) | 140.77 µs | 797 MB/s | 178 | | 2 | [`winnow`](https://github.com/winnow-rs/winnow) | 178.91 µs | 627 MB/s | 179 | | 3 | `chumsky` | 210.43 µs | 533 MB/s | 180 | | 4 | [`sn`](https://github.com/Jacherr/sn) (hand-written) | 237.94 µs | 472 MB/s | 181 | | 5 | [`serde_json`](https://github.com/serde-rs/json) | 477.41 µs | 235 MB/s | 182 | | 6 | [`nom`](https://github.com/rust-bakery/nom) | 526.52 µs | 213 MB/s | 183 | | 7 | [`pest`](https://github.com/pest-parser/pest) | 1.9706 ms | 57 MB/s | 184 | | 8 | [`pom`](https://github.com/J-F-Liu/pom) | 13.730 ms | 8 MB/s | 185 | 186 | What should you take from this? It's difficult to say. 'Chumsky is faster than X' or 'chumsky is slower than Y' is too 187 | strong a statement: this is just one particular benchmark with one particular set of implementations and one 188 | particular workload. 189 | 190 | That said, there is something you can take: chumsky isn't going to be your bottleneck. In this benchmark, chumsky is 191 | within 20% of the performance of the 'pack leader' and has performance comparable to a hand-written parser. The 192 | performance standards for Rust libraries are already far above most language ecosystems, so you can be sure that 193 | chumsky will keep pace with your use-case. 194 | 195 | Benchmarks were performed on a single core of an AMD Ryzen 7 3700x. 196 | 197 | ## Notes 198 | 199 | My apologies to Noam for choosing such an absurd name. 200 | 201 | ## License 202 | 203 | Chumsky is licensed under the MIT license (see `LICENSE` in the main repository). 204 | -------------------------------------------------------------------------------- /benches/backtrack.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 3 | 4 | fn bench_backtrack(c: &mut Criterion) { 5 | let four = just::<_, &str, extra::Default>('!') 6 | .repeated() 7 | .collect::>() 8 | .then_ignore(just(';')) 9 | .repeated() 10 | .exactly(4) 11 | .collect::>() 12 | .then_ignore(just(';')); 13 | 14 | let five = just('!') 15 | .repeated() 16 | .collect::>() 17 | .then_ignore(just(';')) 18 | .repeated() 19 | .exactly(5) 20 | .collect::>() 21 | .then_ignore(just(';')); 22 | 23 | let xs = five.or(four).repeated().collect::>(); 24 | 25 | c.bench_function("backtrack", |b| { 26 | b.iter(|| { 27 | black_box(xs.parse(&black_box("!!!!;!!!!;!!!!;!!!!;;".repeat(1000)))) 28 | .into_result() 29 | .unwrap(); 30 | }) 31 | }); 32 | } 33 | 34 | criterion_group!(benches, bench_backtrack); 35 | criterion_main!(benches); 36 | -------------------------------------------------------------------------------- /benches/json.pest: -------------------------------------------------------------------------------- 1 | WHITESPACE = _{ " " | "\t" | "\r" | "\n" } 2 | 3 | object = { 4 | "{" ~ "}" | 5 | "{" ~ pair ~ ("," ~ pair)* ~ "}" 6 | } 7 | pair = { string ~ ":" ~ value } 8 | 9 | array = { 10 | "[" ~ "]" | 11 | "[" ~ value ~ ("," ~ value)* ~ "]" 12 | } 13 | 14 | value = _{ object | array | string | number | boolean | null } 15 | 16 | boolean = { "true" | "false" } 17 | 18 | null = { "null" } 19 | 20 | string = ${ "\"" ~ inner ~ "\"" } 21 | inner = @{ char* } 22 | char = { 23 | !("\"" | "\\") ~ ANY 24 | | "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") 25 | | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) 26 | } 27 | 28 | number = @{ 29 | "-"? 30 | ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) 31 | ~ ("." ~ ASCII_DIGIT*)? 32 | ~ (^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+)? 33 | } 34 | 35 | json = _{ SOI ~ (object | array) ~ EOI } 36 | -------------------------------------------------------------------------------- /benches/lex.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | 3 | #[derive(Debug, Clone, PartialEq)] 4 | pub enum Json { 5 | Null, 6 | Bool(bool), 7 | Str(String), 8 | Num(f64), 9 | Array(Vec), 10 | Object(Vec<(String, Json)>), 11 | } 12 | 13 | #[derive(Debug, Clone, PartialEq)] 14 | pub enum Token<'a> { 15 | Null, 16 | Bool(bool), 17 | Str(&'a [u8]), 18 | Num(f64), 19 | Ident(&'a [u8]), 20 | Less, 21 | More, 22 | LessEq, 23 | MoreEq, 24 | OpenParen, 25 | CloseParen, 26 | Comma, 27 | } 28 | 29 | static SAMPLE: &[u8] = include_bytes!("tokens.txt"); 30 | 31 | fn bench_lex(c: &mut Criterion) { 32 | c.bench_function("lex_chumsky_zero_copy", { 33 | use ::chumsky::prelude::*; 34 | let parser = chumsky_zero_copy::parser(); 35 | move |b| { 36 | b.iter(|| { 37 | assert_eq!( 38 | black_box(parser.parse(black_box(SAMPLE))) 39 | .into_result() 40 | .unwrap() 41 | .len(), 42 | 4048 43 | ) 44 | }) 45 | } 46 | }); 47 | 48 | c.bench_function("lex_chumsky_zero_copy_check", { 49 | use ::chumsky::prelude::*; 50 | let parser = chumsky_zero_copy::parser(); 51 | move |b| { 52 | b.iter(|| { 53 | assert!(black_box(parser.check(black_box(SAMPLE))) 54 | .into_errors() 55 | .is_empty()) 56 | }) 57 | } 58 | }); 59 | 60 | c.bench_function("lex_logos", |b| { 61 | b.iter(|| { 62 | assert!(black_box(logos::lexer(black_box(SAMPLE))).all(|t| t != Ok(logos::Token::Error))) 63 | }) 64 | }); 65 | } 66 | 67 | criterion_group!(benches, bench_lex); 68 | criterion_main!(benches); 69 | 70 | mod logos { 71 | use logos::{Lexer, Logos}; 72 | use std::str; 73 | 74 | fn to_bool<'a>(lex: &mut Lexer<'a, Token<'a>>) -> bool { 75 | match lex.slice() { 76 | b"true" => true, 77 | b"false" => false, 78 | _ => unreachable!(), 79 | } 80 | } 81 | 82 | fn to_f64<'a>(lex: &mut Lexer<'a, Token<'a>>) -> f64 { 83 | str::from_utf8(lex.slice()).unwrap().parse().unwrap() 84 | } 85 | 86 | #[derive(Logos, Debug, Clone, PartialEq)] 87 | pub enum Token<'a> { 88 | #[token("null")] 89 | Null, 90 | #[regex("true|false", to_bool)] 91 | Bool(bool), 92 | #[regex(br#""([^\\"]|\\[\\"bfnrt/])*""#)] 93 | Str(&'a [u8]), 94 | #[regex(br"-?([1-9][0-9]*|0)(\.[0-9]*)?([eE][+-]?[0-9]*)?", to_f64)] 95 | Num(f64), 96 | #[regex(br"[a-zA-Z_][a-zA-Z0-9_]*")] 97 | Ident(&'a [u8]), 98 | #[token(b"<")] 99 | Less, 100 | #[token(b">")] 101 | More, 102 | #[token(b"<=")] 103 | LessEq, 104 | #[token(b">=")] 105 | MoreEq, 106 | #[token(b"(")] 107 | OpenParen, 108 | #[token(b")")] 109 | CloseParen, 110 | #[token(b",")] 111 | Comma, 112 | 113 | #[regex(br"\s", logos::skip)] 114 | Error, 115 | } 116 | 117 | pub fn lexer(src: &[u8]) -> Lexer<'_, Token<'_>> { 118 | Token::lexer(src) 119 | } 120 | } 121 | 122 | mod chumsky_zero_copy { 123 | use chumsky::prelude::*; 124 | 125 | use super::Token; 126 | use std::str; 127 | 128 | pub fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec>> { 129 | let digits = one_of(b'0'..=b'9').repeated().to_slice(); 130 | 131 | let int = one_of(b'1'..=b'9') 132 | .repeated() 133 | .at_least(1) 134 | .then(one_of(b'0'..=b'9').repeated()) 135 | .ignored() 136 | .or(just(b'0').ignored()) 137 | .ignored(); 138 | 139 | let frac = just(b'.').then(digits.clone()); 140 | 141 | let exp = just(b'e') 142 | .or(just(b'E')) 143 | .then(one_of(b"+-").or_not()) 144 | .then(digits.clone()); 145 | 146 | let number = just(b'-') 147 | .or_not() 148 | .then(int) 149 | .then(frac.or_not()) 150 | .then(exp.or_not()) 151 | .to_slice() 152 | .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap()) 153 | .boxed(); 154 | 155 | let escape = just(b'\\') 156 | .then(choice(( 157 | just(b'\\'), 158 | just(b'/'), 159 | just(b'"'), 160 | just(b'b').to(b'\x08'), 161 | just(b'f').to(b'\x0C'), 162 | just(b'n').to(b'\n'), 163 | just(b'r').to(b'\r'), 164 | just(b't').to(b'\t'), 165 | ))) 166 | .ignored() 167 | .boxed(); 168 | 169 | let string = none_of(b"\\\"") 170 | .ignored() 171 | .or(escape) 172 | .repeated() 173 | .to_slice() 174 | .delimited_by(just(b'"'), just(b'"')) 175 | .boxed(); 176 | 177 | let ident = text::ascii::ident().to_slice().map(Token::Ident); 178 | 179 | choice(( 180 | just(b"null").to(Token::Null), 181 | just(b"true").to(Token::Bool(true)), 182 | just(b"false").to(Token::Bool(false)), 183 | number.map(Token::Num), 184 | string.map(Token::Str), 185 | ident, 186 | just(b"<=").to(Token::LessEq), 187 | just(b">=").to(Token::MoreEq), 188 | just(b"<").to(Token::Less), 189 | just(b">").to(Token::More), 190 | just(b"(").to(Token::OpenParen), 191 | just(b")").to(Token::CloseParen), 192 | just(b",").to(Token::Comma), 193 | )) 194 | .padded() 195 | .repeated() 196 | .collect() 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /benches/parser.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; 3 | 4 | mod utils; 5 | 6 | fn bench_choice(c: &mut Criterion) { 7 | let alphabet_choice = choice(( 8 | just::<_, &str, extra::Default>('A'), 9 | just('B'), 10 | just('C'), 11 | just('D'), 12 | just('E'), 13 | just('F'), 14 | just('G'), 15 | just('H'), 16 | just('I'), 17 | just('J'), 18 | just('K'), 19 | just('L'), 20 | just('M'), 21 | just('N'), 22 | just('O'), 23 | just('P'), 24 | just('Q'), 25 | just('R'), 26 | just('S'), 27 | just('T'), 28 | just('U'), 29 | just('V'), 30 | just('W'), 31 | just('X'), 32 | just('Y'), 33 | just('Z'), 34 | )); 35 | 36 | let mut group = c.benchmark_group("choice"); 37 | 38 | group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "A"), |b| { 39 | b.iter(|| { 40 | black_box(alphabet_choice.parse(black_box("A"))) 41 | .into_result() 42 | .unwrap(); 43 | }) 44 | }); 45 | 46 | group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "Z"), |b| { 47 | b.iter(|| { 48 | black_box(alphabet_choice.parse(black_box("Z"))) 49 | .into_result() 50 | .unwrap(); 51 | }) 52 | }); 53 | 54 | group.bench_function(BenchmarkId::new("choice::<(A..Z)>", "0"), |b| { 55 | b.iter(|| { 56 | assert!(black_box(alphabet_choice.parse(black_box("0"))) 57 | .into_result() 58 | .is_err()); 59 | }) 60 | }); 61 | } 62 | 63 | fn bench_or(c: &mut Criterion) { 64 | let alphabet_or = just::<_, _, extra::Default>('A') 65 | .or(just('B')) 66 | .or(just('C')) 67 | .or(just('D')) 68 | .or(just('E')) 69 | .or(just('F')) 70 | .or(just('G')) 71 | .or(just('H')) 72 | .or(just('I')) 73 | .or(just('J')) 74 | .or(just('K')) 75 | .or(just('L')) 76 | .or(just('M')) 77 | .or(just('N')) 78 | .or(just('O')) 79 | .or(just('P')) 80 | .or(just('Q')) 81 | .or(just('R')) 82 | .or(just('S')) 83 | .or(just('T')) 84 | .or(just('U')) 85 | .or(just('V')) 86 | .or(just('W')) 87 | .or(just('X')) 88 | .or(just('Y')) 89 | .or(just('Z')); 90 | 91 | let mut group = c.benchmark_group("or"); 92 | 93 | group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "A"), |b| { 94 | b.iter(|| { 95 | black_box(alphabet_or.parse(black_box("A"))) 96 | .into_result() 97 | .unwrap(); 98 | }) 99 | }); 100 | 101 | group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "Z"), |b| { 102 | b.iter(|| { 103 | black_box(alphabet_or.parse(black_box("Z"))) 104 | .into_result() 105 | .unwrap(); 106 | }) 107 | }); 108 | 109 | group.bench_function(BenchmarkId::new("A.or(B)...or(Z)", "0"), |b| { 110 | b.iter(|| { 111 | assert!(black_box(alphabet_or.parse(black_box("0"))) 112 | .into_result() 113 | .is_err()); 114 | }) 115 | }); 116 | } 117 | 118 | fn bench_group(c: &mut Criterion) { 119 | let alphabet_group = group(( 120 | just::<_, &str, extra::Default>('A'), 121 | just('B'), 122 | just('C'), 123 | just('D'), 124 | just('E'), 125 | just('F'), 126 | just('G'), 127 | just('H'), 128 | just('I'), 129 | just('J'), 130 | just('K'), 131 | just('L'), 132 | just('M'), 133 | just('N'), 134 | just('O'), 135 | just('P'), 136 | just('Q'), 137 | just('R'), 138 | just('S'), 139 | just('T'), 140 | just('U'), 141 | just('V'), 142 | just('W'), 143 | just('X'), 144 | just('Y'), 145 | just('Z'), 146 | )); 147 | 148 | let mut group = c.benchmark_group("group"); 149 | 150 | group.bench_function( 151 | BenchmarkId::new("group::<(A..Z)>", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 152 | |b| { 153 | b.iter(|| { 154 | black_box(alphabet_group.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))) 155 | .into_result() 156 | .unwrap(); 157 | }) 158 | }, 159 | ); 160 | 161 | group.bench_function( 162 | BenchmarkId::new("group::<(A..Z)>", "ABCDEFGHIJKLMNOPQRSTUVWXY0"), 163 | |b| { 164 | b.iter(|| { 165 | assert!( 166 | black_box(alphabet_group.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXY0"))) 167 | .into_result() 168 | .is_err() 169 | ); 170 | }) 171 | }, 172 | ); 173 | 174 | group.bench_function(BenchmarkId::new("group::<(A..Z)>", "0"), |b| { 175 | b.iter(|| { 176 | assert!(black_box(alphabet_group.parse(black_box("0"))) 177 | .into_result() 178 | .is_err()); 179 | }) 180 | }); 181 | } 182 | 183 | fn bench_then(c: &mut Criterion) { 184 | let alphabet_then = just::<_, _, extra::Default>('A') 185 | .then(just('B')) 186 | .then(just('C')) 187 | .then(just('D')) 188 | .then(just('E')) 189 | .then(just('F')) 190 | .then(just('G')) 191 | .then(just('H')) 192 | .then(just('I')) 193 | .then(just('J')) 194 | .then(just('K')) 195 | .then(just('L')) 196 | .then(just('M')) 197 | .then(just('N')) 198 | .then(just('O')) 199 | .then(just('P')) 200 | .then(just('Q')) 201 | .then(just('R')) 202 | .then(just('S')) 203 | .then(just('T')) 204 | .then(just('U')) 205 | .then(just('V')) 206 | .then(just('W')) 207 | .then(just('X')) 208 | .then(just('Y')) 209 | .then(just('Z')); 210 | 211 | let mut group = c.benchmark_group("then"); 212 | 213 | group.bench_function( 214 | BenchmarkId::new("A.then(B)...then(Z)", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 215 | |b| { 216 | b.iter(|| { 217 | black_box(alphabet_then.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))) 218 | .into_result() 219 | .unwrap(); 220 | }) 221 | }, 222 | ); 223 | 224 | group.bench_function( 225 | BenchmarkId::new("A.then(B)...then(Z)", "ABCDEFGHIJKLMNOPQRSTUVWXY0"), 226 | |b| { 227 | b.iter(|| { 228 | assert!( 229 | black_box(alphabet_then.parse(black_box("ABCDEFGHIJKLMNOPQRSTUVWXY0"))) 230 | .into_result() 231 | .is_err() 232 | ); 233 | }) 234 | }, 235 | ); 236 | 237 | group.bench_function(BenchmarkId::new("A.then(B)...then(Z)", "0"), |b| { 238 | b.iter(|| { 239 | assert!(black_box(alphabet_then.parse(black_box("0"))) 240 | .into_result() 241 | .is_err()); 242 | }) 243 | }); 244 | } 245 | 246 | #[cfg(feature = "regex")] 247 | fn bench_regex(c: &mut Criterion) { 248 | let re_foo = regex::<_, extra::Default>("foo"); 249 | let re_foo2 = regex::<_, extra::Default>("[fF]oo"); 250 | let re_rep = regex::<_, extra::Default>("(?:abc){4}"); 251 | 252 | let mut group = c.benchmark_group("regex"); 253 | 254 | group.bench_function(BenchmarkId::new("foo", "foo"), |b| { 255 | b.iter(|| { 256 | black_box(re_foo.parse(black_box("foo"))) 257 | .into_result() 258 | .unwrap(); 259 | }) 260 | }); 261 | 262 | group.bench_function(BenchmarkId::new("foo", "barfoofoofoo"), |b| { 263 | b.iter(|| { 264 | black_box(re_foo.parse(black_box("barfoofoofoo"))) 265 | .into_result() 266 | .unwrap_err(); 267 | }) 268 | }); 269 | 270 | group.bench_function(BenchmarkId::new("[fF]oo", "foo"), |b| { 271 | b.iter(|| { 272 | black_box(re_foo2.parse(black_box("foo"))) 273 | .into_result() 274 | .unwrap() 275 | }) 276 | }); 277 | 278 | group.bench_function(BenchmarkId::new("[fF]oo", "Foo"), |b| { 279 | b.iter(|| { 280 | black_box(re_foo2.parse(black_box("Foo"))) 281 | .into_result() 282 | .unwrap() 283 | }) 284 | }); 285 | 286 | group.bench_function(BenchmarkId::new("[fF]oo", "barFoofoo"), |b| { 287 | b.iter(|| { 288 | black_box(re_foo2.parse(black_box("barFoofoo"))) 289 | .into_result() 290 | .unwrap_err() 291 | }) 292 | }); 293 | 294 | group.bench_function(BenchmarkId::new("(?:abc){4}", "abcabcabcabc"), |b| { 295 | b.iter(|| { 296 | black_box(re_rep.parse(black_box("abcabcabcabc"))) 297 | .into_result() 298 | .unwrap() 299 | }) 300 | }); 301 | } 302 | 303 | #[cfg(not(feature = "regex"))] 304 | fn bench_regex(_: &mut Criterion) {} 305 | 306 | criterion_group!( 307 | name = benches; 308 | config = utils::make_criterion(); 309 | targets = bench_choice, bench_or, bench_group, bench_then, bench_regex, 310 | ); 311 | criterion_main!(benches); 312 | -------------------------------------------------------------------------------- /benches/samples/sample.cbor: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zesterer/chumsky/6d07aa3dfabf1b34b1135c07de321bbc8e0b1d89/benches/samples/sample.cbor -------------------------------------------------------------------------------- /benches/tokens.txt: -------------------------------------------------------------------------------- 1 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 2 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 3 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 4 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 5 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 6 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 7 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 8 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 9 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 10 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 11 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 12 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 13 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 14 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 15 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 16 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 17 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 18 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 19 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 20 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 21 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 22 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 23 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 24 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 25 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 26 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 27 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 28 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 29 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 30 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 31 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 32 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 33 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 34 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 35 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 36 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 37 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 38 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 39 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 40 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 41 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 42 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 43 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 44 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 45 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 46 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 47 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 48 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 49 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 50 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 51 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 52 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 53 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 54 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 55 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 56 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 57 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 58 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 59 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 60 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 61 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 62 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 63 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 64 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 65 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 66 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 67 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 68 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 69 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 70 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 71 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 72 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 73 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 74 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 75 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 76 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 77 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 78 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 79 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 80 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 81 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 82 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 83 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 84 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 85 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 86 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 87 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 88 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 89 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 90 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 91 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 92 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 93 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 94 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 95 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 96 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 97 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 98 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 99 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 100 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 101 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 102 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 103 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 104 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 105 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 106 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 107 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 108 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 109 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 110 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 111 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 112 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 113 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 114 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 115 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 116 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 117 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 118 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 119 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 120 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 121 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 122 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 123 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 124 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 125 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 126 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 127 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 128 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 129 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 130 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 131 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 132 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 133 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 134 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 135 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 136 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 137 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 138 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 139 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 140 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 141 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 142 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 143 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 144 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 145 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 146 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 147 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 148 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 149 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 150 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 151 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 152 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 153 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 154 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 155 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 156 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 157 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 158 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 159 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 160 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 161 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 162 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 163 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 164 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 165 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 166 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 167 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 168 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 169 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 170 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 171 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 172 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 173 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 174 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 175 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 176 | ( ) "hello", 42.75 "world" haha test true false ,,, (((7,) 9 "foo" "bar" 75 177 | -------------------------------------------------------------------------------- /benches/utils.rs: -------------------------------------------------------------------------------- 1 | use criterion::Criterion; 2 | 3 | #[cfg(unix)] 4 | pub fn make_criterion() -> Criterion { 5 | use pprof::criterion::{Output, PProfProfiler}; 6 | Criterion::default() 7 | .with_profiler(PProfProfiler::new(1000, Output::Flamegraph(None))) 8 | .configure_from_args() 9 | } 10 | 11 | #[cfg(not(unix))] 12 | pub fn make_criterion() -> Criterion { 13 | Criterion::default().configure_from_args() 14 | } 15 | -------------------------------------------------------------------------------- /examples/brainfuck.rs: -------------------------------------------------------------------------------- 1 | //! This is a Brainfuck parser and interpreter 2 | //! Run it with the following command: 3 | //! cargo run --example brainfuck -- examples/sample.bf 4 | 5 | use chumsky::prelude::*; 6 | use std::{ 7 | env, fs, 8 | io::{self, Read}, 9 | }; 10 | 11 | #[derive(Clone)] 12 | enum Instr { 13 | Invalid, 14 | Left, 15 | Right, 16 | Incr, 17 | Decr, 18 | Read, 19 | Write, 20 | Loop(Vec), 21 | } 22 | 23 | fn parser<'a>() -> impl Parser<'a, &'a str, Vec, extra::Err>> { 24 | use Instr::*; 25 | recursive(|bf| { 26 | choice(( 27 | just('<').to(Left), 28 | just('>').to(Right), 29 | just('+').to(Incr), 30 | just('-').to(Decr), 31 | just(',').to(Read), 32 | just('.').to(Write), 33 | )) 34 | .or(bf.delimited_by(just('['), just(']')).map(Loop)) 35 | .recover_with(via_parser(nested_delimiters('[', ']', [], |_| Invalid))) 36 | // .recover_with(skip_then_retry_until([']'])) 37 | .repeated() 38 | .collect() 39 | }) 40 | } 41 | 42 | const TAPE_LEN: usize = 10_000; 43 | 44 | fn execute(ast: &[Instr], ptr: &mut usize, tape: &mut [u8; TAPE_LEN]) { 45 | use Instr::*; 46 | for symbol in ast { 47 | match symbol { 48 | Invalid => unreachable!(), 49 | Left => *ptr = (*ptr + TAPE_LEN - 1).rem_euclid(TAPE_LEN), 50 | Right => *ptr = (*ptr + 1).rem_euclid(TAPE_LEN), 51 | Incr => tape[*ptr] = tape[*ptr].wrapping_add(1), 52 | Decr => tape[*ptr] = tape[*ptr].wrapping_sub(1), 53 | #[allow(clippy::unbuffered_bytes)] 54 | Read => tape[*ptr] = io::stdin().bytes().next().unwrap().unwrap(), 55 | Write => print!("{}", tape[*ptr] as char), 56 | Loop(ast) => { 57 | while tape[*ptr] != 0 { 58 | execute(ast, ptr, tape) 59 | } 60 | } 61 | } 62 | } 63 | } 64 | 65 | fn main() { 66 | let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument")) 67 | .expect("Failed to read file"); 68 | 69 | match parser().parse(src.trim()).into_result() { 70 | Ok(ast) => execute(&ast, &mut 0, &mut [0; TAPE_LEN]), 71 | Err(errs) => errs.into_iter().for_each(|e| println!("{e:?}")), 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /examples/foo.rs: -------------------------------------------------------------------------------- 1 | //! This is the parser and interpreter for the 'Foo' language. 2 | //! 3 | //! See the tutorial in the guide to learn more about it: https://docs.rs/chumsky/latest/chumsky/guide/index.html 4 | 5 | use chumsky::prelude::*; 6 | 7 | #[derive(Debug)] 8 | enum Expr<'src> { 9 | Num(f64), 10 | Var(&'src str), 11 | 12 | Neg(Box>), 13 | Add(Box>, Box>), 14 | Sub(Box>, Box>), 15 | Mul(Box>, Box>), 16 | Div(Box>, Box>), 17 | 18 | Call(&'src str, Vec>), 19 | Let { 20 | name: &'src str, 21 | rhs: Box>, 22 | then: Box>, 23 | }, 24 | Fn { 25 | name: &'src str, 26 | args: Vec<&'src str>, 27 | body: Box>, 28 | then: Box>, 29 | }, 30 | } 31 | 32 | #[allow(clippy::let_and_return)] 33 | fn parser<'src>() -> impl Parser<'src, &'src str, Expr<'src>> { 34 | let ident = text::ascii::ident().padded(); 35 | 36 | let expr = recursive(|expr| { 37 | let int = text::int(10).map(|s: &str| Expr::Num(s.parse().unwrap())); 38 | 39 | let call = ident 40 | .then( 41 | expr.clone() 42 | .separated_by(just(',')) 43 | .allow_trailing() 44 | .collect::>() 45 | .delimited_by(just('('), just(')')), 46 | ) 47 | .map(|(f, args)| Expr::Call(f, args)); 48 | 49 | let atom = int 50 | .or(expr.delimited_by(just('('), just(')'))) 51 | .or(call) 52 | .or(ident.map(Expr::Var)) 53 | .padded(); 54 | 55 | let op = |c| just(c).padded(); 56 | 57 | let unary = op('-') 58 | .repeated() 59 | .foldr(atom, |_op, rhs| Expr::Neg(Box::new(rhs))); 60 | 61 | let product = unary.clone().foldl( 62 | choice(( 63 | op('*').to(Expr::Mul as fn(_, _) -> _), 64 | op('/').to(Expr::Div as fn(_, _) -> _), 65 | )) 66 | .then(unary) 67 | .repeated(), 68 | |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)), 69 | ); 70 | 71 | let sum = product.clone().foldl( 72 | choice(( 73 | op('+').to(Expr::Add as fn(_, _) -> _), 74 | op('-').to(Expr::Sub as fn(_, _) -> _), 75 | )) 76 | .then(product) 77 | .repeated(), 78 | |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)), 79 | ); 80 | 81 | sum 82 | }); 83 | 84 | let decl = recursive(|decl| { 85 | let r#let = text::ascii::keyword("let") 86 | .ignore_then(ident) 87 | .then_ignore(just('=')) 88 | .then(expr.clone()) 89 | .then_ignore(just(';')) 90 | .then(decl.clone()) 91 | .map(|((name, rhs), then)| Expr::Let { 92 | name, 93 | rhs: Box::new(rhs), 94 | then: Box::new(then), 95 | }); 96 | 97 | let r#fn = text::ascii::keyword("fn") 98 | .ignore_then(ident) 99 | .then(ident.repeated().collect::>()) 100 | .then_ignore(just('=')) 101 | .then(expr.clone()) 102 | .then_ignore(just(';')) 103 | .then(decl) 104 | .map(|(((name, args), body), then)| Expr::Fn { 105 | name, 106 | args, 107 | body: Box::new(body), 108 | then: Box::new(then), 109 | }); 110 | 111 | r#let.or(r#fn).or(expr).padded() 112 | }); 113 | 114 | decl 115 | } 116 | 117 | fn eval<'src>( 118 | expr: &'src Expr<'src>, 119 | vars: &mut Vec<(&'src str, f64)>, 120 | funcs: &mut Vec<(&'src str, &'src [&'src str], &'src Expr<'src>)>, 121 | ) -> Result { 122 | match expr { 123 | Expr::Num(x) => Ok(*x), 124 | Expr::Neg(a) => Ok(-eval(a, vars, funcs)?), 125 | Expr::Add(a, b) => Ok(eval(a, vars, funcs)? + eval(b, vars, funcs)?), 126 | Expr::Sub(a, b) => Ok(eval(a, vars, funcs)? - eval(b, vars, funcs)?), 127 | Expr::Mul(a, b) => Ok(eval(a, vars, funcs)? * eval(b, vars, funcs)?), 128 | Expr::Div(a, b) => Ok(eval(a, vars, funcs)? / eval(b, vars, funcs)?), 129 | Expr::Var(name) => { 130 | if let Some((_, val)) = vars.iter().rev().find(|(var, _)| var == name) { 131 | Ok(*val) 132 | } else { 133 | Err(format!("Cannot find variable `{name}` in scope")) 134 | } 135 | } 136 | Expr::Let { name, rhs, then } => { 137 | let rhs = eval(rhs, vars, funcs)?; 138 | vars.push((*name, rhs)); 139 | let output = eval(then, vars, funcs); 140 | vars.pop(); 141 | output 142 | } 143 | Expr::Call(name, args) => { 144 | if let Some((_, arg_names, body)) = 145 | funcs.iter().rev().find(|(var, _, _)| var == name).copied() 146 | { 147 | if arg_names.len() == args.len() { 148 | let mut args = args 149 | .iter() 150 | .map(|arg| eval(arg, vars, funcs)) 151 | .zip(arg_names.iter()) 152 | .map(|(val, name)| Ok((*name, val?))) 153 | .collect::>()?; 154 | let old_vars = vars.len(); 155 | vars.append(&mut args); 156 | let output = eval(body, vars, funcs); 157 | vars.truncate(old_vars); 158 | output 159 | } else { 160 | Err(format!( 161 | "Wrong number of arguments for function `{name}`: expected {}, found {}", 162 | arg_names.len(), 163 | args.len(), 164 | )) 165 | } 166 | } else { 167 | Err(format!("Cannot find function `{name}` in scope")) 168 | } 169 | } 170 | Expr::Fn { 171 | name, 172 | args, 173 | body, 174 | then, 175 | } => { 176 | funcs.push((name, args, body)); 177 | let output = eval(then, vars, funcs); 178 | funcs.pop(); 179 | output 180 | } 181 | } 182 | } 183 | 184 | fn main() { 185 | let usage = "Run `cargo run --example foo -- examples/sample.foo`"; 186 | let src = std::fs::read_to_string(std::env::args().nth(1).expect(usage)).expect(usage); 187 | 188 | match parser().parse(&src).into_result() { 189 | Ok(ast) => match eval(&ast, &mut Vec::new(), &mut Vec::new()) { 190 | Ok(output) => println!("{output}"), 191 | Err(eval_err) => println!("Evaluation error: {eval_err}"), 192 | }, 193 | Err(parse_errs) => parse_errs 194 | .into_iter() 195 | .for_each(|err| println!("Parse error: {err}")), 196 | }; 197 | } 198 | -------------------------------------------------------------------------------- /examples/indent.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | 3 | #[derive(Clone, Debug)] 4 | pub enum Stmt { 5 | Expr, 6 | Loop(Vec), 7 | } 8 | 9 | fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { 10 | let expr = just("expr"); // TODO 11 | 12 | let block = recursive(|block| { 13 | let indent = just(' ') 14 | .repeated() 15 | .configure(|cfg, parent_indent| cfg.exactly(*parent_indent)); 16 | 17 | let expr_stmt = expr.then_ignore(text::newline()).to(Stmt::Expr); 18 | let control_flow = just("loop:") 19 | .then(text::newline()) 20 | .ignore_then(block) 21 | .map(Stmt::Loop); 22 | let stmt = expr_stmt.or(control_flow); 23 | 24 | text::whitespace() 25 | .count() 26 | .ignore_with_ctx(stmt.separated_by(indent).collect()) 27 | }); 28 | 29 | block.with_ctx(0) 30 | } 31 | 32 | fn main() { 33 | let stmts = parser().padded().parse( 34 | r#" 35 | expr 36 | expr 37 | loop: 38 | expr 39 | loop: 40 | expr 41 | expr 42 | expr 43 | expr 44 | "#, 45 | ); 46 | println!("{:#?}", stmts.output()); 47 | println!("{:?}", stmts.errors().collect::>()); 48 | } 49 | -------------------------------------------------------------------------------- /examples/io.rs: -------------------------------------------------------------------------------- 1 | use chumsky::{error::LabelError, extra::ParserExtra, input::IoInput, prelude::*, util::MaybeRef}; 2 | use std::{env, fs::File}; 3 | 4 | #[allow(unused)] 5 | #[derive(Debug)] 6 | struct Foo { 7 | name: String, 8 | val: u32, 9 | } 10 | 11 | fn ident<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput, String, E> { 12 | any() 13 | .filter(u8::is_ascii_alphabetic) 14 | .repeated() 15 | .at_least(1) 16 | .collect::>() 17 | .map(|v| String::from_utf8_lossy(&v).to_string()) 18 | } 19 | 20 | fn digits<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput, String, E> { 21 | any() 22 | .filter(u8::is_ascii_digit) 23 | .repeated() 24 | .at_least(1) 25 | .collect::>() 26 | .map(|v| String::from_utf8_lossy(&v).to_string()) 27 | } 28 | 29 | fn parser<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput, Vec, E> 30 | where 31 | E::Error: LabelError<'a, IoInput, MaybeRef<'a, u8>>, 32 | { 33 | group((ident(), just(b':').padded(), digits())) 34 | .map(|(name, _, digits)| Foo { 35 | name, 36 | val: digits.parse().unwrap(), 37 | }) 38 | .separated_by(just(b'\n')) 39 | .allow_trailing() 40 | .collect() 41 | } 42 | 43 | fn main() { 44 | let src = File::open(env::args().nth(1).expect("Expected file argument")) 45 | .expect("Failed to open file"); 46 | 47 | let json = parser::>>() 48 | .parse(IoInput::new(src)) 49 | .into_result(); 50 | println!("{json:#?}"); 51 | } 52 | -------------------------------------------------------------------------------- /examples/json.rs: -------------------------------------------------------------------------------- 1 | //! This is a parser for JSON. 2 | //! Run it with the following command: 3 | //! cargo run --example json -- examples/sample.json 4 | 5 | use ariadne::{Color, Label, Report, ReportKind, Source}; 6 | use chumsky::prelude::*; 7 | use std::{collections::HashMap, env, fs}; 8 | 9 | #[derive(Clone, Debug)] 10 | pub enum Json { 11 | Invalid, 12 | Null, 13 | Bool(bool), 14 | Str(String), 15 | Num(f64), 16 | Array(Vec), 17 | Object(HashMap), 18 | } 19 | 20 | fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { 21 | recursive(|value| { 22 | let digits = text::digits(10).to_slice(); 23 | 24 | let frac = just('.').then(digits); 25 | 26 | let exp = just('e') 27 | .or(just('E')) 28 | .then(one_of("+-").or_not()) 29 | .then(digits); 30 | 31 | let number = just('-') 32 | .or_not() 33 | .then(text::int(10)) 34 | .then(frac.or_not()) 35 | .then(exp.or_not()) 36 | .to_slice() 37 | .map(|s: &str| s.parse().unwrap()) 38 | .boxed(); 39 | 40 | let escape = just('\\') 41 | .then(choice(( 42 | just('\\'), 43 | just('/'), 44 | just('"'), 45 | just('b').to('\x08'), 46 | just('f').to('\x0C'), 47 | just('n').to('\n'), 48 | just('r').to('\r'), 49 | just('t').to('\t'), 50 | just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate( 51 | |digits, e, emitter| { 52 | char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else( 53 | || { 54 | emitter.emit(Rich::custom(e.span(), "invalid unicode character")); 55 | '\u{FFFD}' // unicode replacement character 56 | }, 57 | ) 58 | }, 59 | )), 60 | ))) 61 | .ignored() 62 | .boxed(); 63 | 64 | let string = none_of("\\\"") 65 | .ignored() 66 | .or(escape) 67 | .repeated() 68 | .to_slice() 69 | .map(ToString::to_string) 70 | .delimited_by(just('"'), just('"')) 71 | .boxed(); 72 | 73 | let array = value 74 | .clone() 75 | .separated_by(just(',').padded().recover_with(skip_then_retry_until( 76 | any().ignored(), 77 | one_of(",]").ignored(), 78 | ))) 79 | .allow_trailing() 80 | .collect() 81 | .padded() 82 | .delimited_by( 83 | just('['), 84 | just(']') 85 | .ignored() 86 | .recover_with(via_parser(end())) 87 | .recover_with(skip_then_retry_until(any().ignored(), end())), 88 | ) 89 | .boxed(); 90 | 91 | let member = string.clone().then_ignore(just(':').padded()).then(value); 92 | let object = member 93 | .clone() 94 | .separated_by(just(',').padded().recover_with(skip_then_retry_until( 95 | any().ignored(), 96 | one_of(",}").ignored(), 97 | ))) 98 | .collect() 99 | .padded() 100 | .delimited_by( 101 | just('{'), 102 | just('}') 103 | .ignored() 104 | .recover_with(via_parser(end())) 105 | .recover_with(skip_then_retry_until(any().ignored(), end())), 106 | ) 107 | .boxed(); 108 | 109 | choice(( 110 | just("null").to(Json::Null), 111 | just("true").to(Json::Bool(true)), 112 | just("false").to(Json::Bool(false)), 113 | number.map(Json::Num), 114 | string.map(Json::Str), 115 | array.map(Json::Array), 116 | object.map(Json::Object), 117 | )) 118 | .recover_with(via_parser(nested_delimiters( 119 | '{', 120 | '}', 121 | [('[', ']')], 122 | |_| Json::Invalid, 123 | ))) 124 | .recover_with(via_parser(nested_delimiters( 125 | '[', 126 | ']', 127 | [('{', '}')], 128 | |_| Json::Invalid, 129 | ))) 130 | .recover_with(skip_then_retry_until( 131 | any().ignored(), 132 | one_of(",]}").ignored(), 133 | )) 134 | .padded() 135 | }) 136 | } 137 | 138 | fn main() { 139 | let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument")) 140 | .expect("Failed to read file"); 141 | 142 | let (json, errs) = parser().parse(src.trim()).into_output_errors(); 143 | println!("{json:#?}"); 144 | errs.into_iter().for_each(|e| { 145 | Report::build(ReportKind::Error, ((), e.span().into_range())) 146 | .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) 147 | .with_message(e.to_string()) 148 | .with_label( 149 | Label::new(((), e.span().into_range())) 150 | .with_message(e.reason().to_string()) 151 | .with_color(Color::Red), 152 | ) 153 | .finish() 154 | .print(Source::from(&src)) 155 | .unwrap() 156 | }); 157 | } 158 | -------------------------------------------------------------------------------- /examples/json_fast.rs: -------------------------------------------------------------------------------- 1 | //! This is a parser for JSON. Unlike `json.rs`, it is configured for speed over error quality. 2 | //! Run it with the following command: 3 | //! cargo run --example json_fast -- examples/sample.json 4 | 5 | use chumsky::prelude::*; 6 | use std::{collections::HashMap, env, fs}; 7 | 8 | #[derive(Clone, Debug)] 9 | pub enum Json { 10 | Null, 11 | Bool(bool), 12 | Str(String), 13 | Num(f64), 14 | Array(Vec), 15 | Object(HashMap), 16 | } 17 | 18 | fn parser<'a>() -> impl Parser<'a, &'a str, Json> { 19 | recursive(|value| { 20 | let digits = text::digits(10).to_slice(); 21 | 22 | let frac = just('.').then(digits); 23 | 24 | let exp = just('e') 25 | .or(just('E')) 26 | .then(one_of("+-").or_not()) 27 | .then(digits); 28 | 29 | let number = just('-') 30 | .or_not() 31 | .then(text::int(10)) 32 | .then(frac.or_not()) 33 | .then(exp.or_not()) 34 | .to_slice() 35 | .map(|s: &str| s.parse().unwrap()); 36 | 37 | let escape = just('\\') 38 | .then(choice(( 39 | just('\\'), 40 | just('/'), 41 | just('"'), 42 | just('b').to('\x08'), 43 | just('f').to('\x0C'), 44 | just('n').to('\n'), 45 | just('r').to('\r'), 46 | just('t').to('\t'), 47 | just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate( 48 | |digits, _, emitter| { 49 | char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else( 50 | || { 51 | emitter.emit(Default::default()); 52 | '\u{FFFD}' // unicode replacement character 53 | }, 54 | ) 55 | }, 56 | )), 57 | ))) 58 | .ignored(); 59 | 60 | let string = none_of("\\\"") 61 | .ignored() 62 | .or(escape) 63 | .repeated() 64 | .to_slice() 65 | .map(ToString::to_string) 66 | .delimited_by(just('"'), just('"')); 67 | 68 | let array = value 69 | .clone() 70 | .separated_by(just(',').padded()) 71 | .allow_trailing() 72 | .collect() 73 | .padded() 74 | .delimited_by(just('['), just(']')); 75 | 76 | let member = string.then_ignore(just(':').padded()).then(value); 77 | let object = member 78 | .clone() 79 | .separated_by(just(',').padded()) 80 | .collect() 81 | .padded() 82 | .delimited_by(just('{'), just('}')); 83 | 84 | choice(( 85 | just("null").to(Json::Null), 86 | just("true").to(Json::Bool(true)), 87 | just("false").to(Json::Bool(false)), 88 | number.map(Json::Num), 89 | string.map(Json::Str), 90 | array.map(Json::Array), 91 | object.map(Json::Object), 92 | )) 93 | .padded() 94 | }) 95 | } 96 | 97 | fn main() { 98 | let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument")) 99 | .expect("Failed to read file"); 100 | 101 | let json = parser().parse(src.trim()).unwrap(); 102 | println!("{json:#?}"); 103 | } 104 | -------------------------------------------------------------------------------- /examples/logos.rs: -------------------------------------------------------------------------------- 1 | //! An example of using logos with chumsky to parse sexprs 2 | //! Run it with the following command: 3 | //! cargo run --example logos 4 | 5 | use ariadne::{Color, Label, Report, ReportKind, Source}; 6 | use chumsky::{ 7 | input::{Stream, ValueInput}, 8 | prelude::*, 9 | }; 10 | use logos::Logos; 11 | use std::fmt; 12 | 13 | #[derive(Logos, Clone, PartialEq)] 14 | enum Token<'a> { 15 | Error, 16 | 17 | #[regex(r"[+-]?([0-9]*[.])?[0-9]+")] 18 | Float(&'a str), 19 | 20 | #[token("+")] 21 | Add, 22 | #[token("-")] 23 | Sub, 24 | #[token("*")] 25 | Mul, 26 | #[token("/")] 27 | Div, 28 | 29 | #[token("(")] 30 | LParen, 31 | #[token(")")] 32 | RParen, 33 | 34 | #[regex(r"[ \t\f\n]+", logos::skip)] 35 | Whitespace, 36 | } 37 | 38 | impl fmt::Display for Token<'_> { 39 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 40 | match self { 41 | Self::Float(s) => write!(f, "{s}"), 42 | Self::Add => write!(f, "+"), 43 | Self::Sub => write!(f, "-"), 44 | Self::Mul => write!(f, "*"), 45 | Self::Div => write!(f, "/"), 46 | Self::LParen => write!(f, "("), 47 | Self::RParen => write!(f, ")"), 48 | Self::Whitespace => write!(f, ""), 49 | Self::Error => write!(f, ""), 50 | } 51 | } 52 | } 53 | 54 | #[derive(Debug)] 55 | enum SExpr { 56 | Float(f64), 57 | Add, 58 | Sub, 59 | Mul, 60 | Div, 61 | List(Vec), 62 | } 63 | 64 | // This function signature looks complicated, but don't fear! We're just saying that this function is generic over 65 | // inputs that: 66 | // - Can have tokens pulled out of them by-value, by cloning (`ValueInput`) 67 | // - Gives us access to slices of the original input (`SliceInput`) 68 | // - Produces tokens of type `Token`, the type we defined above (`Token = Token<'a>`) 69 | // - Produces spans of type `SimpleSpan`, a built-in span type provided by chumsky (`Span = SimpleSpan`) 70 | // The function then returns a parser that: 71 | // - Has an input type of type `I`, the one we declared as a type parameter 72 | // - Produces an `SExpr` as its output 73 | // - Uses `Rich`, a built-in error type provided by chumsky, for error generation 74 | fn parser<'tokens, 'src: 'tokens, I>( 75 | ) -> impl Parser<'tokens, I, SExpr, extra::Err>>> 76 | where 77 | I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>, 78 | { 79 | recursive(|sexpr| { 80 | let atom = select! { 81 | Token::Float(x) => SExpr::Float(x.parse().unwrap()), 82 | Token::Add => SExpr::Add, 83 | Token::Sub => SExpr::Sub, 84 | Token::Mul => SExpr::Mul, 85 | Token::Div => SExpr::Div, 86 | }; 87 | 88 | let list = sexpr 89 | .repeated() 90 | .collect() 91 | .map(SExpr::List) 92 | .delimited_by(just(Token::LParen), just(Token::RParen)); 93 | 94 | atom.or(list) 95 | }) 96 | } 97 | 98 | impl SExpr { 99 | // Recursively evaluate an s-expression 100 | fn eval(&self) -> Result { 101 | match self { 102 | Self::Float(x) => Ok(*x), 103 | Self::Add => Err("Cannot evaluate operator '+'"), 104 | Self::Sub => Err("Cannot evaluate operator '-'"), 105 | Self::Mul => Err("Cannot evaluate operator '*'"), 106 | Self::Div => Err("Cannot evaluate operator '/'"), 107 | Self::List(list) => match &list[..] { 108 | [Self::Add, tail @ ..] => tail.iter().map(SExpr::eval).sum(), 109 | [Self::Mul, tail @ ..] => tail.iter().map(SExpr::eval).product(), 110 | [Self::Sub, init, tail @ ..] => { 111 | Ok(init.eval()? - tail.iter().map(SExpr::eval).sum::>()?) 112 | } 113 | [Self::Div, init, tail @ ..] => { 114 | Ok(init.eval()? / tail.iter().map(SExpr::eval).product::>()?) 115 | } 116 | _ => Err("Cannot evaluate list"), 117 | }, 118 | } 119 | } 120 | } 121 | 122 | const SRC: &str = r" 123 | (- 124 | (* (+ 4 7.3) 7) 125 | (/ 5 3) 126 | ) 127 | "; 128 | 129 | fn main() { 130 | // Create a logos lexer over the source code 131 | let token_iter = Token::lexer(SRC) 132 | .spanned() 133 | // Convert logos errors into tokens. We want parsing to be recoverable and not fail at the lexing stage, so 134 | // we have a dedicated `Token::Error` variant that represents a token error that was previously encountered 135 | .map(|(tok, span)| match tok { 136 | // Turn the `Range` spans logos gives us into chumsky's `SimpleSpan` via `Into`, because it's easier 137 | // to work with 138 | Ok(tok) => (tok, span.into()), 139 | Err(()) => (Token::Error, span.into()), 140 | }); 141 | 142 | // Turn the token iterator into a stream that chumsky can use for things like backtracking 143 | let token_stream = Stream::from_iter(token_iter) 144 | // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us 145 | // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string 146 | .map((0..SRC.len()).into(), |(t, s): (_, _)| (t, s)); 147 | 148 | // Parse the token stream with our chumsky parser 149 | match parser().parse(token_stream).into_result() { 150 | // If parsing was successful, attempt to evaluate the s-expression 151 | Ok(sexpr) => match sexpr.eval() { 152 | Ok(out) => println!("Result = {out}"), 153 | Err(err) => println!("Runtime error: {err}"), 154 | }, 155 | // If parsing was unsuccessful, generate a nice user-friendly diagnostic with ariadne. You could also use 156 | // codespan, or whatever other diagnostic library you care about. You could even just display-print the errors 157 | // with Rust's built-in `Display` trait, but it's a little crude 158 | Err(errs) => { 159 | for err in errs { 160 | Report::build(ReportKind::Error, ((), err.span().into_range())) 161 | .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) 162 | .with_code(3) 163 | .with_message(err.to_string()) 164 | .with_label( 165 | Label::new(((), err.span().into_range())) 166 | .with_message(err.reason().to_string()) 167 | .with_color(Color::Red), 168 | ) 169 | .finish() 170 | .eprint(Source::from(SRC)) 171 | .unwrap(); 172 | } 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /examples/nested.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | 3 | // This token is a tree: it contains within it a sub-tree of tokens 4 | #[derive(PartialEq, Debug)] 5 | enum Token { 6 | Num(i64), 7 | Add, 8 | Mul, 9 | Parens(Vec), 10 | } 11 | 12 | #[allow(clippy::let_and_return)] 13 | fn parser<'a>() -> impl Parser<'a, &'a [Token], i64> { 14 | recursive(|expr| { 15 | let num = select_ref! { Token::Num(x) => *x }; 16 | let parens = expr 17 | // Here we specify how the parser should come up with the nested tokens 18 | .nested_in(select_ref! { Token::Parens(xs) => xs.as_slice() }); 19 | 20 | let atom = num.or(parens); 21 | 22 | let product = atom 23 | .clone() 24 | .foldl(just(&Token::Mul).ignore_then(atom).repeated(), |a, b| a * b); 25 | 26 | let sum = product 27 | .clone() 28 | .foldl(just(&Token::Add).ignore_then(product).repeated(), |a, b| { 29 | a + b 30 | }); 31 | 32 | sum 33 | }) 34 | } 35 | 36 | fn main() { 37 | // This token tree represents the expression `(2 + 3) * 4` 38 | let tokens = [ 39 | Token::Parens(vec![Token::Num(2), Token::Add, Token::Num(3)]), 40 | Token::Mul, 41 | Token::Num(4), 42 | ]; 43 | 44 | assert_eq!(parser().parse(&tokens).into_result(), Ok(20)); 45 | } 46 | -------------------------------------------------------------------------------- /examples/nested_spans.rs: -------------------------------------------------------------------------------- 1 | use chumsky::{input::BorrowInput, prelude::*}; 2 | 3 | // This token is a tree: it contains within it a sub-tree of tokens 4 | #[derive(PartialEq, Debug)] 5 | enum Token { 6 | Num(i64), 7 | Add, 8 | Mul, 9 | Parens(Vec<(Token, SimpleSpan)>), 10 | } 11 | 12 | #[allow(clippy::let_and_return)] 13 | fn parser<'src, I, M>(make_input: M) -> impl Parser<'src, I, i64> 14 | where 15 | I: BorrowInput<'src, Token = Token, Span = SimpleSpan>, 16 | M: Fn(SimpleSpan, &'src [(Token, SimpleSpan)]) -> I + Clone + 'src, 17 | { 18 | recursive(|expr| { 19 | let num = select_ref! { Token::Num(x) => *x }; 20 | let parens = expr 21 | // Here we specify that `expr` should appear *inside* the parenthesised token tree 22 | .nested_in(select_ref! { Token::Parens(xs) = e => make_input(e.span(), xs) }); 23 | 24 | let atom = num.or(parens); 25 | 26 | let product = atom 27 | .clone() 28 | .foldl(just(&Token::Mul).ignore_then(atom).repeated(), |a, b| a * b); 29 | 30 | let sum = product 31 | .clone() 32 | .foldl(just(&Token::Add).ignore_then(product).repeated(), |a, b| { 33 | a + b 34 | }); 35 | 36 | sum 37 | }) 38 | } 39 | 40 | fn make_input( 41 | eoi: SimpleSpan, 42 | toks: &[(Token, SimpleSpan)], 43 | ) -> impl BorrowInput<'_, Token = Token, Span = SimpleSpan> { 44 | toks.map(eoi, |(t, s)| (t, s)) 45 | } 46 | 47 | fn main() { 48 | // This token tree represents the expression `(2 + 3) * 4` 49 | let tokens = [ 50 | ( 51 | Token::Parens(vec![ 52 | (Token::Num(2), SimpleSpan::new((), 1..2)), 53 | (Token::Add, SimpleSpan::new((), 3..4)), 54 | (Token::Num(3), SimpleSpan::new((), 5..6)), 55 | ]), 56 | SimpleSpan::new((), 0..7), 57 | ), 58 | (Token::Mul, SimpleSpan::new((), 8..9)), 59 | (Token::Num(4), SimpleSpan::new((), 10..11)), 60 | ]; 61 | 62 | let eoi = SimpleSpan::new((), 11..11); // Example EoI 63 | 64 | assert_eq!( 65 | parser(make_input) 66 | .parse(make_input(eoi, &tokens)) 67 | .into_result(), 68 | Ok(20) 69 | ); 70 | } 71 | -------------------------------------------------------------------------------- /examples/pythonic.rs: -------------------------------------------------------------------------------- 1 | /*use chumsky::{zero_copy::prelude::*, BoxStream, Flat}; 2 | use std::ops::Range; 3 | 4 | // Represents the different kinds of delimiters we care about 5 | #[derive(Copy, Clone, Debug)] 6 | enum Delim { 7 | Paren, 8 | Block, 9 | } 10 | 11 | // An 'atomic' token (i.e: it has no child tokens) 12 | #[derive(Clone, Debug)] 13 | enum Token { 14 | Int(u64), 15 | Ident(String), 16 | Op(String), 17 | Open(Delim), 18 | Close(Delim), 19 | } 20 | 21 | // The output of the lexer: a recursive tree of nested tokens 22 | #[derive(Debug, Clone)] 23 | enum TokenTree { 24 | Token(Token), 25 | Tree(Delim, Vec>), 26 | } 27 | 28 | type Span = Range; 29 | 30 | type Spanned = (T, Span); 31 | 32 | // A parser that turns pythonic code with semantic whitespace into a token tree 33 | fn lexer<'a>() -> impl Parser<'a, str, Vec>> { 34 | let tt = recursive::<'a, str, _, _, _, _, _>(|tt| { 35 | // Define some atomic tokens 36 | let int = text::int::<'a, str, _, _, _>(10) 37 | .from_str() 38 | .unwrapped() 39 | .map(Token::Int); 40 | let ident = text::ascii::ident::<'a, str, _, _, _>().map(|s| Token::Ident(s.to_string())); 41 | let op = one_of("=.:%,") 42 | .repeated() 43 | .at_least(1) 44 | .collect() 45 | .map(Token::Op); 46 | 47 | let single_token = int.or(op).or(ident).map(|t| TokenTree::Token(t.clone())); 48 | 49 | // Tokens surrounded by parentheses get turned into parenthesised token trees 50 | let token_tree = tt 51 | .padded() 52 | .repeated() 53 | .collect() 54 | .delimited_by(just('('), just(')')) 55 | .map(|tts| TokenTree::Tree(Delim::Paren, tts)); 56 | 57 | single_token 58 | .or(token_tree) 59 | .map_with_span(|tt, span| (tt, span)) 60 | }); 61 | 62 | // Whitespace indentation creates code block token trees 63 | text::semantic_indentation(tt, |tts, span| (TokenTree::Tree(Delim::Block, tts), span)) 64 | } 65 | 66 | /// Flatten a series of token trees into a single token stream, ready for feeding into the main parser 67 | fn tts_to_stream( 68 | eoi: Span, 69 | token_trees: Vec>, 70 | ) -> BoxStream<'static, Token, Span> { 71 | use std::iter::once; 72 | 73 | BoxStream::from_nested(eoi, token_trees.into_iter(), |(tt, span)| match tt { 74 | // Single tokens remain unchanged 75 | TokenTree::Token(token) => Flat::Single((token, span)), 76 | // Nested token trees get flattened into their inner contents, surrounded by `Open` and `Close` tokens 77 | TokenTree::Tree(delim, tree) => Flat::Many( 78 | once((TokenTree::Token(Token::Open(delim)), span.clone())) 79 | .chain(tree.into_iter()) 80 | .chain(once((TokenTree::Token(Token::Close(delim)), span))), 81 | ), 82 | }) 83 | } 84 | 85 | fn main() { 86 | let code = include_str!("sample.py"); 87 | 88 | // First, lex the code into some nested token trees 89 | let tts = lexer().parse(code).into_output().unwrap(); 90 | 91 | println!("--- Token Trees ---\n{:#?}", tts); 92 | 93 | // Next, flatten 94 | let eoi = 0..code.chars().count(); 95 | let mut token_stream = tts_to_stream(eoi, tts); 96 | 97 | // At this point, we have a token stream that can be fed into the main parser! Because this is just an example, 98 | // we're instead going to just collect the token stream into a vector and print it. 99 | 100 | let flattened_trees = token_stream.fetch_tokens().collect::>(); 101 | 102 | println!("--- Flattened Token Trees ---\n{:?}", flattened_trees); 103 | } 104 | */ 105 | 106 | fn main() {} 107 | -------------------------------------------------------------------------------- /examples/sample.bf: -------------------------------------------------------------------------------- 1 | --[>--->->->++>-<<<<<-------]>--.>---------.>--..+++.>----.>+++++++++.<<.+++.------.<-.>>+. 2 | -------------------------------------------------------------------------------- /examples/sample.foo: -------------------------------------------------------------------------------- 1 | let five = 5; 2 | let eight = 3 + five; 3 | fn add x y = x + y; 4 | add(five, eight) 5 | -------------------------------------------------------------------------------- /examples/sample.io: -------------------------------------------------------------------------------- 1 | a: 1 2 | b: 2 3 | c: 3 4 | -------------------------------------------------------------------------------- /examples/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "leaving": { 3 | "tail": [ 4 | -2063823378.8597813, 5 | true, 6 | !false, 7 | null, 8 | -153646.6402, 9 | "board", 10 | ]], 11 | "fed": -283765067.9149623, 12 | "cowboy": -355139449!, 13 | "although": --794127593.3922591, 14 | "front": "college",, 15 | "origin": 981339097, 16 | }, 17 | "though": ttrue asasjk, 18 | "invalid": "\uDFFF", 19 | "activity": "value", 20 | "office": -342325541.1937506, 21 | "noise": false, 22 | "acres": "home", 23 | "foo": [!], 24 | } 25 | -------------------------------------------------------------------------------- /examples/sample.mini_ml: -------------------------------------------------------------------------------- 1 | let add = fn x y = x + y in 2 | let mul = fn x y = x * y in 3 | let x = mul (add 5 42) 2 in 4 | add x 3.5 5 | -------------------------------------------------------------------------------- /examples/sample.nrs: -------------------------------------------------------------------------------- 1 | // Run this example with `cargo run --example nano_rust -- examples/sample.nrs` 2 | // Feel free to play around with this sample to see what errors you can generate! 3 | // Spans are propagated to the interpreted AST so you can even invoke runtime 4 | // errors and still have an error message that points to source code emitted! 5 | 6 | fn mul(x, y) { 7 | x * y 8 | } 9 | 10 | // Calculate the factorial of a number 11 | fn factorial(x) { 12 | // Conditionals are supported! 13 | if x == 0 { 14 | 1 15 | } else { 16 | mul(x, factorial(x - 1)) 17 | } 18 | } 19 | 20 | // The main function 21 | fn main() { 22 | let three = 3; 23 | let meaning_of_life = three * 14 + 1; 24 | 25 | print("Hello, world!"); 26 | print("The meaning of life is..."); 27 | 28 | if meaning_of_life == 42 { 29 | print(meaning_of_life); 30 | } else { 31 | print("...something we cannot know"); 32 | 33 | print("However, I can tell you that the factorial of 10 is..."); 34 | // Function calling 35 | print(factorial(10)); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /examples/sample.py: -------------------------------------------------------------------------------- 1 | import turtle 2 | 3 | board = turtle.Turtle( 4 | foo, 5 | bar, 6 | baz, 7 | ) 8 | 9 | for i in range(6): 10 | board.forward(50) 11 | if i % 2 == 0: 12 | board.right(144) 13 | else: 14 | board.left(72) 15 | 16 | turtle.done() 17 | -------------------------------------------------------------------------------- /examples/zero-copy.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | 3 | #[derive(PartialEq, Debug)] 4 | enum Token<'a> { 5 | Ident(&'a str), 6 | String(&'a str), 7 | } 8 | 9 | // This parser is guaranteed to never allocate! 10 | fn parser<'a>() -> impl Parser<'a, &'a str, [(SimpleSpan, Token<'a>); 6]> { 11 | let ident = any() 12 | .filter(|c: &char| c.is_alphanumeric()) 13 | .repeated() 14 | .at_least(1) 15 | .to_slice() 16 | .map(Token::Ident); 17 | 18 | let string = just('"') 19 | .then(any().filter(|c: &char| *c != '"').repeated()) 20 | .then(just('"')) 21 | .to_slice() 22 | .map(Token::String); 23 | 24 | ident 25 | .or(string) 26 | .map_with(|token, e| (e.span(), token)) 27 | .padded() 28 | .repeated() 29 | .collect_exactly() 30 | } 31 | 32 | fn main() { 33 | assert_eq!( 34 | parser() 35 | .parse(r#"hello "world" these are "test" tokens"#) 36 | .into_result(), 37 | Ok([ 38 | ((0..5).into(), Token::Ident("hello")), 39 | ((6..13).into(), Token::String("\"world\"")), 40 | ((14..19).into(), Token::Ident("these")), 41 | ((20..23).into(), Token::Ident("are")), 42 | ((24..30).into(), Token::String("\"test\"")), 43 | ((31..37).into(), Token::Ident("tokens")), 44 | ]), 45 | ); 46 | } 47 | -------------------------------------------------------------------------------- /guide/README.md: -------------------------------------------------------------------------------- 1 | # Guide 2 | 3 | Chumsky's guide is intended to be viewed through [docs.rs](https://docs.rs/chumsky/latest/chumsky/guide/index.html). 4 | 5 | ## For contributors 6 | 7 | When modifying the guide, please remember to test the docs via rustdoc. You can do this via this command: 8 | 9 | ``` 10 | RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features 11 | ``` 12 | 13 | Appending `--open` will cause the docs to open in your web browser when built. 14 | -------------------------------------------------------------------------------- /guide/debugging.md: -------------------------------------------------------------------------------- 1 | # Debugging 2 | 3 | *TODO* 4 | -------------------------------------------------------------------------------- /guide/error_and_recovery.md: -------------------------------------------------------------------------------- 1 | # Error And Recovery 2 | 3 | *TODO* 4 | -------------------------------------------------------------------------------- /guide/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | Setting yourself up to use chumsky can be done in a few easy steps. 4 | 5 | - [Adding chumsky as a dependency](#adding-chumsky-as-a-dependency) 6 | 7 | - [Creating parsers](#creating-parsers) 8 | 9 | - [Using parsers](#using-parsers) 10 | 11 | - [Advice](#advice) 12 | 13 | - [Compiler errors](#compiler-errors) 14 | 15 | - [Compilation times](#compilation-times) 16 | 17 | - [Debugging parsers](#debugging-parsers) 18 | 19 | ## Adding chumsky as a dependency 20 | 21 | Chumsky can be added as a project dependency in one of two ways. 22 | 23 | 1) By executing the following command in your cargo project: 24 | 25 | ```sh 26 | $ cargo add chumsky 27 | ``` 28 | 29 | 2) By adding the following to your `Cargo.toml` file: 30 | 31 | ```toml 32 | chumsky = "0.10" 33 | ``` 34 | 35 |
36 | A note about Minimum Supported Rust Versions (MSRVs) 37 |

38 | Minimum Supported Rust Version (MSRV) 39 | 40 | Chumsky currently has a MSRV of **1.65** due to internal systems that require Generic Associated Types (GATs). If you 41 | find that chumsky fails to compile on versions of Rust later than or equal to 1.65, please 42 | [open a bug report](https://github.com/zesterer/chumsky/issues/new). 43 | 44 | Please note that chumsky's `nightly` feature is exempt from this minimum version requirement and may require up to and 45 | including the latest nightly Rust compiler to work. 46 |

47 |
48 | 49 | Back in your source code, you can use chumsky's prelude to import all commonly used types, traits, and functions. 50 | 51 | ``` 52 | use chumsky::prelude::*; 53 | ``` 54 | 55 | Alternatively, you can import whatever you need manually, but this can get rather tiresome. 56 | 57 | The prelude contains all of the pieces you need to get started, although more complex parsers will likely need to 58 | explicitly import less commonly used items. 59 | 60 | ## Creating parsers 61 | 62 | Because chumsky uses typed combinators to express parsers, parser type signatures can become a little unwieldy. For this 63 | reason, it's common practice to leave the heavy work of dealing with types to the compiler by making use of Rust's 64 | [`impl Trait`](https://doc.rust-lang.org/stable/rust-by-example/trait/impl_trait.html) syntax. 65 | 66 | Here's an example of a typical parser function. We'll go over what each part means. 67 | 68 | ``` 69 | # use chumsky::prelude::*; 70 | // (1) (2) (3) (4) 71 | // _|__ _____|_____ ____|____ |_ 72 | fn parser<'src>() -> impl Parser<'src, &'src str, ()> { 73 | end() // --(5) 74 | } 75 | ``` 76 | 77 | 1. Parsers are parameterised over the lifetime of their inputs. Because we don't yet know what input our parser will be 78 | used to parse, we declare a generic lifetime, `'src`, to allow the parser to work with whatever input lifetime it 79 | needs to work with. 80 | 81 | 2. Because large parsers can have rather unwieldy types, we save ourselves the need to declare the exact return type 82 | with Rust's `impl Trait` syntax. This says to the compiler "we don't actually care what type is returned here, but 83 | it needs to implement the `Parser<'src, &'src, str, ()>` trait, you figure it out". Note that, unlike `dyn Trait` 84 | syntax, `impl Trait` has no runtime cost: the compiler simply *hides* the type from you rather than performing 85 | *type erasure*, which would require performing [dynamic dispatch](https://en.wikipedia.org/wiki/Dynamic_dispatch) 86 | while your code is running. 87 | 88 | 3. The first type parameter (i.e: ignoring the lifetime parameter) of the [`Parser`] trait is the input type. Inputs 89 | must implement the [`Input`] trait. Examples of inputs include strings, slices, arrays, [`Stream`]s, and much more. 90 | For now we specify that this parser can only operate upon string slices: but it is also possible to introduce the 91 | input type as a generic type parameter like `I: Input<'src>` instead if you want your parser to be generic across 92 | more than just string slices. 93 | 94 | 4. The second type parameter of the [`Parser`] trait is the output type. This is the type of the value that your parser 95 | will eventually give you, assuming that parsing was successful. For now, we just use an output type of [`()`], i.e: 96 | nothing. 97 | 98 | 5. Because this is just an example parser, the implementation is just a single parser primitive, [`end`]. This is a 99 | primitive that recognises only the end of the input and generates an error if it does not find it. This means that 100 | our parser effectively just checks that we pass it an empty string: anything else will generate an error. 101 | 102 | Note that this function only *creates* the parser: it does not, by itself, perform any parsing. 103 | 104 | ## Using parsers 105 | 106 | It's all very well creating parsers but in order to write useful programs, we need to invoke them. Chumsky provides 107 | several functions for this, but the main two are: 108 | 109 | - [`Parser::parse`]: parses an input, generating an output value and/or any errors that were encountered along the way 110 | 111 | - [`Parser::check`]: checks that an input is valid, generating any errors that were encountered along the way 112 | 113 | Both functions give us back a [`ParseResult`]. You can think of this sort of like Rust's regular [`Result`] type, except 114 | it allows both outputs and errors to be generated at the same time (although we won't yet use this functionality). If 115 | you just want parsing to be an all-or-nothing affair, you can use [`ParseResult::into_result`] to convert this into a 116 | regular [`Result`]. 117 | 118 | Let's write some tests for the parser we wrote in the last section. 119 | 120 | ``` 121 | # use chumsky::prelude::*; 122 | # fn parser<'src>() -> impl Parser<'src, &'src str, ()> { end() } 123 | #[test] 124 | fn test_parser() { 125 | // Our parser expects empty strings, so this should parse successfully 126 | assert_eq!(parser().parse("").into_result(), Ok(())); 127 | 128 | // Anything other than an empty string should produce an error 129 | assert!(parser().parse("123").has_errors()); 130 | } 131 | ``` 132 | 133 | Hopefully, this code is fairly self-explanatory. We call `parse()` (the function we wrote in the previous section) to 134 | create an instance of our parsers, and then we call [`Parser::parse`] on it with the desired input to actually do some 135 | parsing. The return value is the result of the parse. 136 | 137 | From here, the world is your lobster: you can move on to the tutorial sections of this guide or you can jump write into 138 | writing parsers. The main repository has [plenty of examples](https://github.com/zesterer/chumsky/tree/main/examples) 139 | to use as a reference and the crate has documentation that will help guide you, with many examples. 140 | 141 | ## Advice 142 | 143 | Chumsky is a powerful crate with a lot of bells and whistles. It makes sense that there also a lot of ways things can go 144 | wrong too. 145 | 146 | ### Compiler errors 147 | 148 | Chumsky is a combinator crate and leans heavily into Rust's type system (traits, generics, etc.) in order to combine 149 | high performance and ergonomics. Unfortunately, the Rust compiler can still struggle to generate useful error messages 150 | for large chumsky parsers (although things have improved substantially in recent releases!). When you hit a compiler 151 | error you're struggling to understand, you should: 152 | 153 | 1. Always solve the first error that Rust generates. Rust generates errors in the order that it finds them, so the first 154 | error is usually reliably accurate while later errors tend to get increasingly speculative as the compiler needs to 155 | make more and more assumptions about your program to handle prior errors. This often results in many additional 156 | 'phantom errors': errors that muddy the water and make it look like the problem is more complicated to solve than it 157 | actually is. 158 | 159 | 2. Reduce the size of types. Thankfully Rust has recently taken steps to avoid printing extremely long type signatures 160 | out to the terminal. Even so, parser types can still be rather large. You can reduce this problem by commenting out 161 | unnecessary parts of your parser, or using `.simplify()` on parsers that contribute to the error to simplify their 162 | types. 163 | 164 | 3. Complaints about types 'not implementing [`Parser`]' are more often than not a failure to fulfil the obligations that 165 | come with implementing the trait. For example, [`recursive()`] requires that the inner parser implements `Clone`: a 166 | parser that doesn't (because, say, you moved a non-cloneable type into the closure) can't be used with 167 | [`recursive()`] and so Rust will translate this, in its parlance, to the type not implementing [`Parser`]. 168 | 169 | ### Compilation times 170 | 171 | Chumsky's heavy use of Rust's type system can result in parsers taking some time to compile. In particular, a common 172 | cause of long compilation times are long chains of [`Parser::or`], which sadly tend to produce exponential behaviour in 173 | Rust's trait solver. 174 | 175 | **Don't fear! There are solutions.** 176 | 177 | 1. Replace long (more than a handful of cases) [`Parser::or`] chains with [`choice`], which has identical behaviour but 178 | gives Rust's trait solver a much easier time. 179 | 180 | 2. Use [`Parser::boxed`] at the end of longer parser chains to perform type erasure, thereby reducing the amount of work 181 | Rust needs to do to understand your parser. If you've been using Rust for a while, your first intention might be to 182 | feel nauseous as such a suggestion: "*allocation?* In *my* high-performance code? *No thanks*". However, remember 183 | that this allocation only occurs on parser *creation*, not during the parsing process. A few strategically placed 184 | `.boxed()` calls has almost no effect on parsing performance (modern CPU branch predictors have absolutely no trouble 185 | eliminating their cost), and in fact can sometimes *improve* performance! 186 | 187 | ### Debugging parsers 188 | 189 | TODO 190 | -------------------------------------------------------------------------------- /guide/intro.md: -------------------------------------------------------------------------------- 1 | # Welcome to chumsky 2 | 3 | Welcome to the guide for chumsky, a parser combinator library for Rust that lets you write high-performance, expressive 4 | parsers with ease. 5 | 6 | Here you will find useful resources for learning about both chumsky and parsing more broadly, along with many examples. 7 | There is also a tutorial that will guide you through the implementation of a simple programming language (complete with 8 | a simple interpreter) using chumsky for both lexing and parsing. 9 | 10 | This guide is split up into several sections. 11 | 12 | ## Contents 13 | 14 | - [Getting Started](./_00_getting_started/index.html): Getting started with chumsky, including creating and using parsers 15 | - [Key Concepts](./_01_key_concepts/index.html): Some brief theory and an introduction to chumsky's core API features 16 | - [Meet The Parsers](./_02_meet_the_parsers/index.html): An overview of the primitives and combinators provided by chumsky 17 | - [Error and recovery](./_03_error_and_recovery/index.html): Using chumsky to generate and recover from errors 18 | - [Recursion](./_04_recursion/index.html): How chumsky handles recursion and self-reference 19 | - [Debugging](./_05_debugging/index.html): Tips and tricks for debugging chumsky parsers 20 | - [Technical Notes](./_06_technical_notes/index.html): Information about chumsky for advanced users 21 | 22 | - [Foo: A Tutorial](./_07_tutorial/index.html): Example implementation of a simple programming language using chumsky 23 | -------------------------------------------------------------------------------- /guide/key_concepts.md: -------------------------------------------------------------------------------- 1 | # Key Concepts 2 | 3 | This section is mostly a glossary of terms and concepts. Feel free to skip to the sections that most interest you. 4 | 5 | - [What are parser combinators?](#what-are-parser-combinators) 6 | 7 | - [Parsers](#parsers) 8 | 9 | - [Declarative style](#declarative-style) 10 | 11 | - [Combinators](#combinators) 12 | 13 | - [Primitives](#primitives) 14 | 15 | - [API features](#api-features) 16 | 17 | - [The `Parser` trait](#the-parser-trait) 18 | 19 | - [The `Input` trait](#the-input-trait) 20 | 21 | - [The `Error` trait](#the-error-trait) 22 | 23 | - [The `Span` trait](#the-span-trait) 24 | 25 | # What are parser combinators? 26 | 27 | Chumsky is a **declarative parser combinator** library. Let's break that down to explain what it means. 28 | 29 | ## Parsers 30 | 31 | Parsers are programs (or, for our purposes, *functions*) which take **unstructured** inputs and produce 32 | **structured** outputs according to a set of rules called a **grammar**. 33 | 34 | What counts as structured and unstructured depends on the context. To a 35 | [lexer](https://en.wikipedia.org/wiki/Lexical_analysis), a list of tokens might count as a structured output, but to the 36 | parser that consumes them as an input, they look rather less structured. 37 | 38 | Because the set of possible unstructured inputs to a parser (such as bytes in a text file) is generally larger than 39 | those that can be correctly translated to the structured output according to the grammar rules (such as an 40 | [Abstract Syntax Tree](https://en.m.wikipedia.org/wiki/Abstract_syntax_tree)), parsers need a way to generate **errors** 41 | when these invalid inputs are encountered. 42 | 43 | ## Declarative style 44 | 45 | If you've hand-written a parser before, it was likely in the 46 | [**imperative**](https://en.wikipedia.org/wiki/Imperative_programming) style: which is to say that you used code to tell 47 | your program *how* to parse inputs. This is a valid approach to writing parsers, and many successful parsers are written 48 | in an imperative style. 49 | 50 | However, imperative-style parsers are often extremely 'noisy': resulting in parser code that is long, difficult to 51 | maintain, is hard to read, time-consuming to optimise, and easy to break, and difficult to debug. 52 | 53 | In comparison, chumsky encourages you to write [**declarative**](https://en.wikipedia.org/wiki/Declarative_programming) 54 | parsers. In the declarative style, instead of telling your code *how* to parse inputs, you tell it *what* to parse. This 55 | is a much more grounded and to-the-point approach to implementing parsers, allowing you to focus on the grammar rules 56 | you want to parse instead of spending ages debugging and maintaining imperative-style parser logic. 57 | 58 | If you search for information about declarative parsers (and in particular, parser combinators), you'll often hear it 59 | said that they're slow and imprecise. While this might have been true in decades gone by, modern optimising compilers - 60 | and in particular Rust's powerful type system - make the development of expressive declarative parsers that are as fast (or 61 | faster!) than hand-written parsers both easy and quick. 62 | 63 | ## Combinators 64 | 65 | Modern software is written primarily through through the use of *functions*. Each function performs a specific task and 66 | may call out to sub-functions. To create a whole program, it is necessary to **combine** functions to get the desired 67 | behaviour of the program as a whole. 68 | 69 | Parser combinators take this approach and apply it to parsing: a parser written with a combinator approach is composed 70 | of many smaller sub-parsers that are each able to process a sub-section of the overall grammar rules. These sub-parsers 71 | are then *combined* with parser operators known as **combinators** that define how they relate to one-another. 72 | 73 | Chumsky comes with many [`combinator`]s that allow the creation of even very complex grammars. Indeed, parsers for 74 | entire programming languages may be easily written with chumsky. 75 | 76 | As with most things, it's turtles all the way down: each sub-parser is then composed of sub-sub-parsers, which is itself 77 | composed of sub-sub-sub-parsers, until we reach the most basic elements of the parser logic. 78 | 79 | 🐢 80 | 81 | ## Primitives 82 | 83 | Primitives are the most basic elements of chumsky's parser logic. They are built-in components provided by chumsky 84 | (although it is possible to write your own!). Primitives each perform a very simple action that by itself seems almost 85 | trivial. For example, they might recognise a specific keyword or even just a single character. 86 | 87 | Chumsky comes with several [`primitive`] parsers that each perform a specific job. 88 | 89 | # API features 90 | 91 | ## The [`Parser`] trait 92 | 93 | A fundamental concept in chumsky is that of the [`Parser`] trait. All parser (both combinators and primitives) implement 94 | it and the combinator methods on it are the primary way through which a parser is defined. 95 | 96 | [`Parser`] also provides several *invocation* methods such as [`Parser::parse`] and [`Parser::check`]: these functions 97 | allow you to actually give inputs to your parser and have it generate outputs and/or errors. 98 | 99 | Check out the [`primitive`], [`combinator`], [`mod@recursive`], and [`mod@regex`] modules for examples of some of the parsers 100 | that chumsky provides. 101 | 102 | ## The [`Input`] trait 103 | 104 | The [`Input`] trait is implemented by all types that can act as inputs to chumsky parsers. For example, it is 105 | implemented by types such as: 106 | 107 | - `&[T]`: Array slices 108 | 109 | - `&str`: String slices 110 | 111 | - [`Stream`]: Dynamically-growing token streams 112 | 113 | Certain inputs have special properties. For example, it is possible to borrow `&T` tokens from `&[T]` array slices, but 114 | not `char`s from `&str` string slices (due to their UTF-8 encoding). Additionally, some inputs can have sub-slices taken 115 | from them. All of these operations are potentially useful to a parser, so chumsky expresses them with a set of extension 116 | traits that add extra functionality on top of the base [`Input`] trait: 117 | 118 | - [`ValueInput`]: for inputs that can have tokens copied/cloned from them by-value 119 | 120 | - [`BorrowInput`]: for inputs that can have individual tokens borrowed from them 121 | 122 | - [`SliceInput`]: for inputs that can have entire sub-slices of tokens borrowed from them 123 | 124 | - [`StrInput`]: for inputs that 'look like' text strings: ASCII byte slices (`&[u8]`) and UTF-8 string slices (`&str`) 125 | 126 | Taken together, these traits give chumsky the power to use many different types as input: bytes, strings, tokens, 127 | token trees, iterators, and much more besides. 128 | 129 | ## The [`Error`] trait 130 | 131 | As discussed previously, parsers commonly need to be able to handle inputs that don't conform to the grammar rules that 132 | they implement. To do this, they need to be able to emit errors that can then be processed by either the system that 133 | invoked the parser, or by a human user, in order to communicate what went wrong. 134 | 135 | Chumsky provides support for expressive error generation through its [`Error`] trait, along with a series of built-in 136 | error types that have different tradeoffs: 137 | 138 | - [`EmptyErr`]: the default 'null' error that doesn't record any useful information other than the fact that an error 139 | occurred 140 | 141 | - [`Cheap`]: a very efficient error type that records only the span of the input that triggered the error 142 | 143 | - [`Simple`]: a simplistic error type that records both the span that triggered the error and whatever token was 144 | erroneously found 145 | 146 | - [`Rich`]: a very information-rich error type that records: 147 | 148 | - The span that triggered the error 149 | 150 | - The token that was erroneously found instead 151 | 152 | - A list of tokens or patterns that were expected at the span location instead 153 | 154 | [`Rich`] also supports many additional features such as custom error messages, labelling (see [`Parser::labelled`]) and 155 | error merging. 156 | 157 | Obviously, errors that express more detailed information are also slower to generate and hence reduce the performance of 158 | the overall parser. In benchmarks, we tend to find that parsers using [`Rich`] typically run at about half the speed as 159 | those using [`EmptyErr`], although this is very likely to improve as time goes on. 160 | 161 | It is typical to take the data encoded in these types and give them to a 'diagnostic generator', a tool intended to turn 162 | error information into pretty human-readable displays suited for printing into a terminal, displaying in an IDE, or 163 | whatever other form of output is required. 164 | 165 | ## The [`Span`] trait 166 | 167 | Spans are ranges (usually byte offsets, but you can use whatever is most convenient for you) in the original source code 168 | that can be used to reference sections of the code in error or warning messages. 169 | 170 | Chumsky has full support for spans and also allows you to define your own custom spans with ease by simply implementing 171 | the [`Span`] trait. Additionally, chumsky comes with a built-in span type, [`SimpleSpan`], and a variety of 172 | implementations for types in Rust's standard library such as [`std::ops::Range`]. 173 | 174 | Chumsky will use its internal knowledge of your parser to generate spans for you whenever you need them, such as for 175 | attaching to nodes of an abstract syntax tree. See [`Parser::map_with`] for more information. 176 | -------------------------------------------------------------------------------- /guide/recursion.md: -------------------------------------------------------------------------------- 1 | # Recursion 2 | 3 | Most non-trivial languages - both spoken and programmed - are *recursive*. Grammars that describe these languages can 4 | express recursion by having a term in the language contain itself (either directly or indirectly). Noam Chomsky 5 | believed that recursion was *so* fundamental to human language that he considered it the primary demarcation between 6 | human and non-human language. This is debated in academic circles, but chumsky treats recursion with similar reverance. 7 | 8 | ## The Problem 9 | 10 | In Rust, writing a recursive function is usually trivial. 11 | 12 | ```rust 13 | fn factorial(x: u32) -> u32 { 14 | if x <= 1 { 15 | 1 16 | } else { 17 | x * factorial(x - 1) 18 | } 19 | } 20 | ``` 21 | 22 | However, chumsky parsers are *values*, not *functions*. Just like [`Iterator`]s, they can be moved around, manipulated, 23 | and invoked in a lazy manner. Intuitively, we might think to write a recursive parser to parse `4 + (1 + 2) + 3` like so: 24 | 25 | ```rust compile_fail 26 | use chumsky::prelude::*; 27 | 28 | fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> + Clone { 29 | let int = text::int(10).map(|s: &str| s.parse().unwrap()); 30 | 31 | let atom = choice(( 32 | int, 33 | a_parser().delimited_by(just('('), just(')')), 34 | )) 35 | .padded(); 36 | 37 | atom.clone().foldl( 38 | just('+').padded().ignore_then(atom).repeated(), 39 | |lhs, rhs| lhs + rhs, 40 | ) 41 | } 42 | ``` 43 | 44 | Unfortunately, we hit an error: 45 | 46 | ```text 47 | error[E0720]: cannot resolve opaque type 48 | --> recursion.rs:1:24 49 | | 50 | 1 | fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> + Clone { 51 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ recursive opaque type 52 | ... 53 | 9 | / atom.clone().foldl( 54 | 10 | | just('+').padded().ignore_then(atom).repeated(), 55 | 11 | | |lhs, rhs| lhs + rhs, 56 | 12 | | ) 57 | | | - 58 | | |_____| 59 | | |_____returning here with type `...` 60 | ``` 61 | 62 | We can 'solve' this problem by boxing `a_parser()`, but all it does is convert the compilation error into a run-time 63 | stack overflow. Why? The answer, if we take a step back, should be obvious: our `a_parser` function isn't actually 64 | doing any parsing, it's just *creating* a parser. In order to create a parser, it needs to call itself... which means 65 | calling itself again... forever. We've created infinite recursion. No dice. 66 | 67 | ## A Solution 68 | 69 | To get us out of this somewhat sticky bind, chumsky provides a special combinator called `recursive`. I allows us to 70 | refer to a parser within its own definition - without getting us caught in recursive hot water. 71 | 72 | ```rust 73 | use chumsky::prelude::*; 74 | 75 | fn a_parser<'src>() -> impl Parser<'src, &'src str, i32> { 76 | recursive(|a_parser| { 77 | let int = text::int(10).map(|s: &str| s.parse().unwrap()); 78 | 79 | let atom = choice(( 80 | int, 81 | a_parser.delimited_by(just('('), just(')')), 82 | )) 83 | .padded(); 84 | 85 | atom.clone().foldl( 86 | just('+').padded().ignore_then(atom).repeated(), 87 | |lhs, rhs| lhs + rhs, 88 | ) 89 | }) 90 | } 91 | ``` 92 | 93 | Notice how our `a_parser` function is no longer recursive: instead, we get the definition of `a_parser` from the 94 | closure parameter. 95 | 96 | ## More Complicated Cases 97 | 98 | More complicated parsers tend to have many mutually-recursive patterns. For example, in Rust's syntax, the 'expression' 99 | and 'type' terms are intertwined: expressions can contain types (in the form of 100 | [turbofish](https://techblog.tonsser.com/posts/what-is-rusts-turbofish) type annotations, or in `as` casts) and types 101 | can contain expressions (in array type sizes or in const generics). 102 | 103 | It is possible to use `recursive` in a 'nested' manner to express such a thing, but chumsky provides a simpler 104 | solution: 105 | [`Recursive::declare`] and [`Recursive::define`]. These functions allow us to *entirely* decouple the declaration and 106 | definition of a recursive parser, giving us the ability to easily declare our mutually-recursive parsers up-front and 107 | then use them in each other's definitions. 108 | -------------------------------------------------------------------------------- /guide/technical_notes.md: -------------------------------------------------------------------------------- 1 | # Technical Notes 2 | 3 | This section contains assorted details about chumsky. Most of this information is irrelevant to beginners, but we 4 | consider it important enough to include for advanced users. 5 | 6 | - [Technical Notes](#technical-notes) 7 | - [Classification](#classification) 8 | - [Purity and optimisation](#purity-and-optimisation) 9 | 10 | # Classification 11 | 12 | Chumsky is a PEG parser by nature. That is to say, it is possible to parse all known context-free grammars with chumsky. 13 | It has not yet been formally proven that PEG parsers can parse _all_ context-free grammars but, for the sake of using 14 | the library, it is reasonable to assume as much. 15 | 16 | Chumsky also has limited support for context-sensitive parsing. Chumsky's context-sensitive parsing allows previously 17 | parsed elements of the grammar to inform the parsing of future elements in a limited way. 18 | See [`Parser::ignore_with_ctx`] and [`Parser::then_with_ctx`]for more information. 19 | 20 | The term 'PEG++' might be an appropriate description of chumsky, with 'CFG + left context' being a description of the 21 | grammars that it can parse. 22 | 23 | Chumsky can also be extended via [`custom`] and [`ExtParser`], permitting it to theoretically parse any parseable 24 | grammar: but this is probably cheating since doing so requires manually implementing such parser logic. 25 | 26 | # Purity and optimisation 27 | 28 | Chumsky uses a plethora of techniques to improve parser performance. For example, it may skip generating output values 29 | that go unused by the parser (such as the output of `a` in `a.ignore_then(b)`). This also includes combinators like 30 | [`Parser::map`], which accept a user-provided closure. However, chumsky has no control over the behaviour of this 31 | closure, and it's possible to observe the closure being 'optimised away'. 32 | 33 | For this reason, unless otherwise specified, any closures/functions used inline within a chumsky parser should be 34 | *semantically* [pure](https://en.wikipedia.org/wiki/Purely_functional_programming): that is, you should not assume that 35 | they are called any specific number of times. This does not mean that they are not permitted to have side effects, but 36 | that those side effects should be irrelevant to the correct functioning of the parser. For example, 37 | [string interning](https://en.wikipedia.org/wiki/String_interning) within [`Parser::map_with`] is an impure operation, 38 | but this impurity does not affect the correct functioning of the parser: interning a string that goes unused can be done 39 | any number of times or not at all without resulting in bad behaviour. 40 | -------------------------------------------------------------------------------- /misc/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zesterer/chumsky/6d07aa3dfabf1b34b1135c07de321bbc8e0b1d89/misc/example.png -------------------------------------------------------------------------------- /misc/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 36 | 52 | 53 | 55 | 68 | 73 | 74 | 80 | 93 | 98 | 99 | 112 | 117 | 118 | 131 | 136 | 137 | 143 | 144 | 148 | 156 | 159 | 162 | 167 | 172 | 180 | 181 | 184 | 189 | 194 | 202 | 203 | 204 | chumsky 214 | 215 | 216 | -------------------------------------------------------------------------------- /src/blanket.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | impl<'src, T, I, O, E> Parser<'src, I, O, E> for &T 4 | where 5 | T: ?Sized + Parser<'src, I, O, E>, 6 | I: Input<'src>, 7 | E: ParserExtra<'src, I>, 8 | { 9 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult 10 | where 11 | Self: Sized, 12 | { 13 | M::invoke(*self, inp) 14 | } 15 | 16 | go_extra!(O); 17 | } 18 | 19 | impl<'src, T, I, O, E> ConfigParser<'src, I, O, E> for &T 20 | where 21 | T: ?Sized + ConfigParser<'src, I, O, E>, 22 | I: Input<'src>, 23 | E: ParserExtra<'src, I>, 24 | { 25 | type Config = T::Config; 26 | 27 | fn go_cfg( 28 | &self, 29 | inp: &mut InputRef<'src, '_, I, E>, 30 | cfg: Self::Config, 31 | ) -> PResult { 32 | M::invoke_cfg(*self, inp, cfg) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/cache.rs: -------------------------------------------------------------------------------- 1 | //! Traits and types that allow parsers to be cached between invocations. 2 | //! 3 | //! # Example 4 | //! 5 | //! ``` 6 | //! #![feature(lazy_cell)] 7 | //! use std::sync::{LazyLock, Arc}; 8 | //! use chumsky::{prelude::*, cache::{Cache, Cached}}; 9 | //! 10 | //! #[derive(Debug, PartialEq)] 11 | //! enum Token<'a> { Ident(&'a str), Int(u64) } 12 | //! 13 | //! #[derive(Default)] 14 | //! struct TokenParser; 15 | //! impl Cached for TokenParser { 16 | //! type Parser<'a> = Arc, extra::Default> + Send + Sync + 'a>; 17 | //! 18 | //! fn make_parser<'a>(self) -> Self::Parser<'a> { 19 | //! let ident = text::ident().map(Token::Ident); 20 | //! let num = text::int(10).from_str().unwrapped().map(Token::Int); 21 | //! Arc::new(ident.or(num)) 22 | //! } 23 | //! } 24 | //! 25 | //! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere: 26 | //! static PARSER: LazyLock> = LazyLock::new(Cache::default); 27 | //! 28 | //! // The parser can be used from any context simply by calling `.get()` on the cache 29 | //! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42))); 30 | //! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello"))); 31 | //! ``` 32 | 33 | use super::*; 34 | 35 | /// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the 36 | /// need to recreate the parser for each input lifetime. 37 | pub trait Cached { 38 | /// The type of the parser to be cached. 39 | /// 40 | /// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example, 41 | /// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types. 42 | /// 43 | /// ```ignore 44 | /// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default> 45 | /// Arc, extra::Default> + Send + Sync + 'src> 46 | /// ``` 47 | type Parser<'src>; 48 | 49 | /// Create an instance of the parser 50 | fn make_parser<'src>(self) -> Self::Parser<'src>; 51 | } 52 | 53 | /// Allows a parser to be cached for reuse with inputs and outputs of different lifetimes. 54 | pub struct Cache { 55 | parser: C::Parser<'static>, 56 | #[allow(dead_code)] 57 | phantom: EmptyPhantom, 58 | } 59 | 60 | impl Default for Cache { 61 | fn default() -> Self { 62 | Self::new(C::default()) 63 | } 64 | } 65 | 66 | impl Cache { 67 | /// Create a new cached parser. 68 | pub fn new(cacher: C) -> Self { 69 | Self { 70 | parser: cacher.make_parser(), 71 | phantom: EmptyPhantom::new(), 72 | } 73 | } 74 | 75 | /// Get a reference to the cached parser. 76 | /// 77 | /// Because this function is generic over an input lifetime, the returned parser can be used in many 78 | /// different contexts. 79 | pub fn get<'src>(&self) -> &C::Parser<'src> { 80 | // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary 81 | // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to 82 | // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser 83 | // must be valid for arbitrary lifetimes. 84 | unsafe { &*(&self.parser as *const C::Parser<'_>).cast() } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/either.rs: -------------------------------------------------------------------------------- 1 | //! A small module that implements the [`Parser`] trait for the 2 | //! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type. 3 | 4 | use super::*; 5 | use ::either::Either; 6 | 7 | impl<'src, L, R, I, O, E> Parser<'src, I, O, E> for Either 8 | where 9 | I: Input<'src>, 10 | E: ParserExtra<'src, I>, 11 | L: Parser<'src, I, O, E>, 12 | R: Parser<'src, I, O, E>, 13 | { 14 | fn go( 15 | &self, 16 | inp: &mut crate::input::InputRef<'src, '_, I, E>, 17 | ) -> crate::private::PResult 18 | where 19 | Self: Sized, 20 | { 21 | match self { 22 | Either::Left(l) => L::go::(l, inp), 23 | Either::Right(r) => R::go::(r, inp), 24 | } 25 | } 26 | 27 | go_extra!(O); 28 | } 29 | 30 | #[cfg(test)] 31 | mod tests { 32 | use crate::{ 33 | prelude::{any, just}, 34 | IterParser, Parser, 35 | }; 36 | use either::Either; 37 | 38 | fn parser<'src>() -> impl Parser<'src, &'src str, Vec> { 39 | any() 40 | .filter(|c: &char| c.is_ascii_digit()) 41 | .repeated() 42 | .at_least(1) 43 | .at_most(3) 44 | .to_slice() 45 | .map(|b: &str| b.parse::().unwrap()) 46 | .padded() 47 | .separated_by(just(',').padded()) 48 | .allow_trailing() 49 | .collect() 50 | .delimited_by(just('['), just(']')) 51 | } 52 | 53 | #[test] 54 | fn either() { 55 | let parsers = [Either::Left(parser()), Either::Right(parser())]; 56 | for parser in parsers { 57 | assert_eq!( 58 | parser.parse("[122 , 23,43, 4, ]").into_result(), 59 | Ok(vec![122, 23, 43, 4]), 60 | ); 61 | assert_eq!( 62 | parser.parse("[0, 3, 6, 900,120]").into_result(), 63 | Ok(vec![0, 3, 6, 900, 120]), 64 | ); 65 | assert_eq!( 66 | parser.parse("[200,400,50 ,0,0, ]").into_result(), 67 | Ok(vec![200, 400, 50, 0, 0]), 68 | ); 69 | 70 | assert!(parser.parse("[1234,123,12,1]").has_errors()); 71 | assert!(parser.parse("[,0, 1, 456]").has_errors()); 72 | assert!(parser.parse("[3, 4, 5, 67 89,]").has_errors()); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/extension.rs: -------------------------------------------------------------------------------- 1 | //! Types and traits that let you write extensions for chumsky. 2 | //! 3 | //! Chumsky is a complicated crate that performs many internal optimizations to keep your parsers fast. These 4 | //! optimizations mean that chumsky's core is rapidly changing, difficult to work with, and reveals a lot of 5 | //! often-superfluous implementation details that are necessary to account for. 6 | //! 7 | //! In short: it's not a good basis for a stable public API upon which to build a parser ecosystem. 8 | //! 9 | //! To get around this problem, chumsky provides an extension interface (the contents of this module). This is a set of 10 | //! types, traits, and functions that we've decided that we're comfortable providing long-term support for even if 11 | //! the core of chumsky changes in an otherwise breaking manner in the future. 12 | //! 13 | //! The extension API is versioned. See the [`v1`] module for the current implementation of the API. 14 | //! 15 | //! # Example 16 | //! 17 | //! ``` 18 | //! use chumsky::{ 19 | //! prelude::*, 20 | //! error::LabelError, 21 | //! input::InputRef, 22 | //! extension::v1::{ExtParser, Ext}, 23 | //! DefaultExpected, 24 | //! }; 25 | //! 26 | //! // An example extension parser that expects a null byte. 27 | //! pub struct Null_; 28 | //! 29 | //! // We implement `ExtParser` for our null byte parser, plugging us into the chumsky ecosystem 30 | //! impl<'src, I, E> ExtParser<'src, I, (), E> for Null_ 31 | //! where 32 | //! I: Input<'src, Token = u8>, 33 | //! E: extra::ParserExtra<'src, I>, 34 | //! { 35 | //! fn parse(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result<(), E::Error> { 36 | //! let before = inp.cursor(); 37 | //! match inp.next_maybe().as_deref() { 38 | //! // The next token was a null byte, meaning that parsing was successful 39 | //! Some(b'\0') => Ok(()), 40 | //! // The next token was something that wasn't a null byte, generate an error instead 41 | //! found => Err(LabelError::::expected_found( 42 | //! // Expected a null byte 43 | //! [DefaultExpected::Token(b'\0'.into())], 44 | //! // Found whatever the token was instead 45 | //! found.copied().map(Into::into), 46 | //! // The span of the error is the span of the token that was found instead 47 | //! inp.span_since(&before), 48 | //! )), 49 | //! } 50 | //! } 51 | //! } 52 | //! 53 | //! // Finally, we create an easy way to name the parser type for users 54 | //! pub type Null = Ext; 55 | //! 56 | //! // It's also conventional to create a function to conveniently use the parser primitive 57 | //! pub fn null() -> Null { 58 | //! Ext(Null_) 59 | //! } 60 | //! 61 | //! // Let's give our parser a test! 62 | //! fn make_parser<'src>() -> impl Parser<'src, &'src [u8], ()> { 63 | //! null() 64 | //! } 65 | //! 66 | //! assert_eq!(make_parser().parse(b"\0").into_result(), Ok(())); 67 | //! assert!(make_parser().parse(b"!").has_errors()); 68 | //! assert!(make_parser().parse(b"").has_errors()); 69 | //! ``` 70 | 71 | use super::*; 72 | 73 | /// Version 1 of the extension API. 74 | /// 75 | /// Versioning the extension API allows us to make significant changes to it in the future without breaking crates that 76 | /// depend on it. 77 | pub mod v1 { 78 | pub use super::current::{Ext, ExtParser}; 79 | } 80 | 81 | mod current { 82 | use super::*; 83 | 84 | /// A trait implemented by extension parsers. 85 | /// 86 | /// Implement this trait, and chumsky will automatically make [`Ext`] implement [`Parser`] for free. 87 | /// 88 | /// This trait is a stable interface that can be used to build on top of chumsky without exposing extension crates to 89 | /// the complex inner workings of chumsky, allowing us to iterate on the core to improve performance without regularly 90 | /// breaking the public API. 91 | /// 92 | /// If your parser is a combinator and you'd like it to be used like a method (such as chumsky's built-in `a.or(b)` 93 | /// combinator), it is recommended that you implement an extension trait in your own library and have users import 94 | /// it, like so: 95 | /// 96 | /// ``` 97 | /// use chumsky::prelude::*; 98 | /// 99 | /// pub struct FrobnicatedWith { a: A, b: B } 100 | /// 101 | /// pub trait ParserExt<'src, I, O, E> 102 | /// where 103 | /// I: Input<'src>, 104 | /// E: extra::ParserExtra<'src, I> 105 | /// { 106 | /// fn frobnicated_with(self, other: B) -> FrobnicatedWith 107 | /// where 108 | /// Self: Sized, 109 | /// B: Parser<'src, I, O, E>, 110 | /// { 111 | /// FrobnicatedWith { a: self, b: other } 112 | /// } 113 | /// } 114 | /// ``` 115 | /// 116 | /// Now, users can import your trait and do `a.frobnicate_with(b)` as if your parser were native to chumsky! 117 | pub trait ExtParser<'src, I: Input<'src>, O, E: ParserExtra<'src, I>> { 118 | /// Attempt parsing on the given input. 119 | /// 120 | /// See [`InputRef`] for more information about how you can work with parser inputs. 121 | fn parse(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result; 122 | 123 | /// Attempt to check the given input. 124 | /// 125 | /// This function should have **exactly** the same behavior as [`ExtParser::parse`]. If the behavior differs, 126 | /// the result of using the parser is unspecified (note that chumsky tries to aggressively avoid generating 127 | /// outputs if it doesn't use them, and will readily swap between [`ExtParser::parse`] and [`ExtParser::check`] 128 | /// when it thinks that doing so might yield performance benefits). 129 | /// 130 | /// By default, this method just uses `ExtParser::parse`, dropping the output. You may want to override the 131 | /// implementation so that this output is never even generated, thereby improving performance. 132 | fn check(&self, inp: &mut InputRef<'src, '_, I, E>) -> Result<(), E::Error> { 133 | self.parse(inp).map(|_| ()) 134 | } 135 | } 136 | 137 | /// A type used to wrap parser extensions. 138 | /// 139 | /// Sadly, Rust's trait coherence rules (often called 'orphan rules') prevent us from having a blanket 140 | /// implementation of [`Parser`] for any implementer of [`ExtParser`]. This wrapper type is the compromise solution 141 | /// that keeps things working: wrap your parser types in [`Ext`], and you can start talking to the rest of the 142 | /// chumsky ecosystem. See [`extension`] for an example of how to do this. 143 | /// 144 | /// It's possible that future changes to Rust's coherence rules, or to chumsky's core, may relax this requirement in 145 | /// the future. 146 | /// 147 | /// If you're writing an extension crate for chumsky, you can make things less confusing for your users by putting your 148 | /// parser behind a type alias. 149 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 150 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 151 | #[repr(transparent)] 152 | pub struct Ext(pub T); 153 | 154 | impl<'src, I, O, E, P> Parser<'src, I, O, E> for Ext

155 | where 156 | I: Input<'src>, 157 | E: ParserExtra<'src, I>, 158 | P: ExtParser<'src, I, O, E>, 159 | { 160 | #[inline(always)] 161 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 162 | let before = inp.cursor(); 163 | match M::choose(&mut *inp, |inp| self.0.parse(inp), |inp| self.0.check(inp)) { 164 | Ok(out) => Ok(out), 165 | Err(err) => { 166 | inp.add_alt_err(&before.inner, err); 167 | Err(()) 168 | } 169 | } 170 | } 171 | 172 | go_extra!(O); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/extra.rs: -------------------------------------------------------------------------------- 1 | //! Generic error, state and context types for parsers 2 | //! Useful for custom allocation, error handling, context-specific parsers, and more. 3 | 4 | use inspector::Inspector; 5 | pub use inspector::SimpleState; 6 | 7 | use super::*; 8 | 9 | type DefaultErr = EmptyErr; 10 | type DefaultState = (); 11 | type DefaultCtx = (); 12 | 13 | /// A trait for extra types on a [`Parser`] that control the behavior of certain combinators and output. 14 | /// 15 | /// Currently, this consists of the error type emitted, the state type used in the `*_state` combinators, 16 | /// and the context type used in the `*_ctx` and `*configure` parsers. 17 | /// 18 | /// This trait is sealed and so cannot be implemented by other crates because all uses should instead 19 | /// go through the types defined in this module. 20 | pub trait ParserExtra<'a, I>: 'a + Sealed 21 | where 22 | I: Input<'a>, 23 | { 24 | /// Error type to use for the parser. This type must implement [`Error`], and when it fails, 25 | /// the parser will return a set of this type to describe why the failure occurred. 26 | type Error: Error<'a, I> + 'a; 27 | /// State type to use for the parser. This is used to provide stateful *output* of the parser, 28 | /// such as interned identifiers or position-dependent name resolution, however *cannot* influence 29 | /// the actual progress of the parser - for that, use [`Self::Context`]. 30 | /// 31 | /// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`]. 32 | type State: Inspector<'a, I> + 'a; 33 | /// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*. 34 | /// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate 35 | /// from an earlier point in the stream than the parser relying on it. This can affect the output of a parser, 36 | /// but for things that don't wish to alter the actual rules of parsing, one should instead prefer [`Self::State`]. 37 | /// 38 | /// For examples of using this type, see [`Parser::ignore_with_ctx`], [`Parser::then_with_ctx`] and [`ConfigParser::configure`]. 39 | type Context: 'a; 40 | } 41 | 42 | /// Use all default extra types. See [`ParserExtra`] for more details. 43 | pub type Default = Full; 44 | 45 | /// Use specified error type, but default other types. See [`ParserExtra`] for more details. 46 | pub type Err = Full; 47 | 48 | /// Use specified state type, but default other types. See [`ParserExtra`] for more details. 49 | /// 50 | /// Use `State` or `Full` as the `Extra` type parameter of a parser to use a custom state type. 51 | /// You can then use `parser().parse_with_state(&mut S)` to parse with a custom state. 52 | /// 53 | /// See [`Parser::map_with`] for examples. 54 | pub type State = Full; 55 | 56 | /// Use specified context type, but default other types. See [`ParserExtra`] for more details. 57 | pub type Context = Full; 58 | 59 | /// Specify all extra types. See [`ParserExtra`] for more details. 60 | pub struct Full(PhantomData<(E, S, C)>); 61 | 62 | impl Sealed for Full {} 63 | impl<'a, I, E, S, C> ParserExtra<'a, I> for Full 64 | where 65 | I: Input<'a>, 66 | E: Error<'a, I> + 'a, 67 | S: Inspector<'a, I> + 'a, 68 | C: 'a, 69 | { 70 | type Error = E; 71 | type State = S; 72 | type Context = C; 73 | } 74 | -------------------------------------------------------------------------------- /src/guide.rs: -------------------------------------------------------------------------------- 1 | // To generate docs with the guide, use `RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features` 2 | 3 | #![doc = include_str!("../guide/intro.md")] 4 | use super::*; 5 | 6 | pub mod _00_getting_started { 7 | #![doc = include_str!("../guide/getting_started.md")] 8 | use super::*; 9 | } 10 | 11 | pub mod _01_key_concepts { 12 | #![doc = include_str!("../guide/key_concepts.md")] 13 | use super::*; 14 | } 15 | 16 | pub mod _02_meet_the_parsers { 17 | #![doc = include_str!("../guide/meet_the_parsers.md")] 18 | use super::*; 19 | } 20 | 21 | pub mod _03_error_and_recovery { 22 | #![doc = include_str!("../guide/error_and_recovery.md")] 23 | use super::*; 24 | } 25 | 26 | pub mod _04_recursion { 27 | #![doc = include_str!("../guide/recursion.md")] 28 | use super::*; 29 | } 30 | 31 | pub mod _05_debugging { 32 | #![doc = include_str!("../guide/debugging.md")] 33 | use super::*; 34 | } 35 | 36 | pub mod _06_technical_notes { 37 | #![doc = include_str!("../guide/technical_notes.md")] 38 | use super::*; 39 | } 40 | 41 | pub mod _07_tutorial { 42 | #![doc = include_str!("../guide/tutorial.md")] 43 | use super::*; 44 | } 45 | -------------------------------------------------------------------------------- /src/inspector.rs: -------------------------------------------------------------------------------- 1 | //! Parser extensions that inspect the input without modifying it. 2 | //! 3 | //! *"Only one man stood and watched the sky, stood with terrible sadness in his eyes 4 | //! and rubber bungs in his ears. He knew exactly what was happening and had known 5 | //! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night 6 | //! beside his pillar and woken him with a start."* 7 | use super::*; 8 | use crate::input::{Checkpoint, Cursor}; 9 | use core::ops::{Deref, DerefMut}; 10 | 11 | #[allow(unused)] // for intra-doc links 12 | use crate::Parser; 13 | 14 | /// A type that receives event hooks when certain parsing actions occur. 15 | /// 16 | /// If you don't need to receive event hooks, use [`SimpleState`]. 17 | pub trait Inspector<'src, I: Input<'src>> { 18 | /// A type the Inspector can use to revert to a previous state. 19 | /// 20 | /// For implementation reasons, this is required to be `Clone`. 21 | type Checkpoint: Clone; 22 | 23 | /// This function is called when a new token is read from the input stream. 24 | // impl note: this should be called only when `self.cursor` is updated, not when we only peek at the next token. 25 | fn on_token(&mut self, token: &I::Token); 26 | /// This function is called when a combinator saves the current state of the parse. 27 | fn on_save<'parse>(&self, cursor: &Cursor<'src, 'parse, I>) -> Self::Checkpoint; 28 | /// This function is called when a combinator rewinds to an earlier state of the parser. 29 | /// 30 | /// You can use [`Checkpoint::inspector`] to get back the [`Checkpoint`][Self::Checkpoint] 31 | /// you originally created in [`on_save`][Self::on_save]. 32 | fn on_rewind<'parse>(&mut self, marker: &Checkpoint<'src, 'parse, I, Self::Checkpoint>); 33 | } 34 | 35 | impl<'src, I: Input<'src>> Inspector<'src, I> for () { 36 | type Checkpoint = (); 37 | #[inline(always)] 38 | fn on_token(&mut self, _: &>::Token) {} 39 | #[inline(always)] 40 | fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {} 41 | #[inline(always)] 42 | fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self>) {} 43 | } 44 | 45 | /// A state type that should be accessible directly from `parser.state()` and has no special behavior. 46 | /// 47 | /// This wrapper implements the [`Inspector`] trait for you so you don't have to. 48 | #[derive(Copy, Clone, Default, Debug)] 49 | pub struct SimpleState(pub T); 50 | impl<'src, T, I: Input<'src>> Inspector<'src, I> for SimpleState { 51 | type Checkpoint = (); 52 | #[inline(always)] 53 | fn on_token(&mut self, _: &>::Token) {} 54 | #[inline(always)] 55 | fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {} 56 | #[inline(always)] 57 | fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) {} 58 | } 59 | 60 | impl Deref for SimpleState { 61 | type Target = T; 62 | 63 | fn deref(&self) -> &Self::Target { 64 | &self.0 65 | } 66 | } 67 | 68 | impl DerefMut for SimpleState { 69 | fn deref_mut(&mut self) -> &mut Self::Target { 70 | &mut self.0 71 | } 72 | } 73 | 74 | impl From for SimpleState { 75 | fn from(value: T) -> Self { 76 | Self(value) 77 | } 78 | } 79 | 80 | /// A state type that clones and rolls back its contents during a rewind. 81 | /// 82 | /// This might be useful if you want to use the parser state to, say, count the parsed occurrences of a particular 83 | /// construct. 84 | /// 85 | /// Ideally, you should try to have the [`Clone`] implementation be fairly cheap. 86 | #[derive(Copy, Clone, Default, Debug)] 87 | pub struct RollbackState(pub T); 88 | impl<'src, T: Clone, I: Input<'src>> Inspector<'src, I> for RollbackState { 89 | type Checkpoint = T; 90 | #[inline(always)] 91 | fn on_token(&mut self, _: &>::Token) {} 92 | #[inline(always)] 93 | fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint { 94 | self.0.clone() 95 | } 96 | #[inline(always)] 97 | fn on_rewind<'parse>(&mut self, cp: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) { 98 | self.0 = cp.inspector.clone(); 99 | } 100 | } 101 | 102 | impl Deref for RollbackState { 103 | type Target = T; 104 | 105 | fn deref(&self) -> &Self::Target { 106 | &self.0 107 | } 108 | } 109 | 110 | impl DerefMut for RollbackState { 111 | fn deref_mut(&mut self) -> &mut Self::Target { 112 | &mut self.0 113 | } 114 | } 115 | 116 | impl From for RollbackState { 117 | fn from(value: T) -> Self { 118 | Self(value) 119 | } 120 | } 121 | 122 | /// A state type that encapsulates a vector, truncating the vector to its original size during a rewind. 123 | /// 124 | /// This might be useful for representing, say, an arena-style allocator. 125 | #[derive(Clone, Default, Debug)] 126 | pub struct TruncateState(pub Vec); 127 | impl<'src, T: Clone, I: Input<'src>> Inspector<'src, I> for TruncateState { 128 | type Checkpoint = usize; 129 | #[inline(always)] 130 | fn on_token(&mut self, _: &>::Token) {} 131 | #[inline(always)] 132 | fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint { 133 | self.0.len() 134 | } 135 | #[inline(always)] 136 | fn on_rewind<'parse>(&mut self, cp: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) { 137 | self.0.truncate(cp.inspector); 138 | } 139 | } 140 | 141 | impl Deref for TruncateState { 142 | type Target = Vec; 143 | 144 | fn deref(&self) -> &Self::Target { 145 | &self.0 146 | } 147 | } 148 | 149 | impl DerefMut for TruncateState { 150 | fn deref_mut(&mut self) -> &mut Self::Target { 151 | &mut self.0 152 | } 153 | } 154 | 155 | impl From> for TruncateState { 156 | fn from(value: Vec) -> Self { 157 | Self(value) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/label.rs: -------------------------------------------------------------------------------- 1 | //! Items related to parser labelling. 2 | 3 | use super::*; 4 | 5 | /// A trait implemented by [`Error`]s that can originate from labelled parsers. See [`Parser::labelled`]. 6 | pub trait LabelError<'src, I: Input<'src>, L>: Sized { 7 | /// Create a new error describing a conflict between expected inputs and that which was actually found. 8 | /// 9 | /// `found` having the value `None` indicates that the end of input was reached, but was not expected. 10 | /// 11 | /// An expected input having the value `None` indicates that the end of input was expected. 12 | fn expected_found>( 13 | expected: E, 14 | found: Option>, 15 | span: I::Span, 16 | ) -> Self; 17 | 18 | /// Fast path for `a.merge(LabelError::expected_found(...))` that may incur less overhead by, for example, reusing allocations. 19 | #[inline(always)] 20 | fn merge_expected_found>( 21 | self, 22 | expected: E, 23 | found: Option>, 24 | span: I::Span, 25 | ) -> Self 26 | where 27 | Self: Error<'src, I>, 28 | { 29 | self.merge(LabelError::expected_found(expected, found, span)) 30 | } 31 | 32 | /// Fast path for `a = LabelError::expected_found(...)` that may incur less overhead by, for example, reusing allocations. 33 | #[inline(always)] 34 | fn replace_expected_found>( 35 | self, 36 | expected: E, 37 | found: Option>, 38 | span: I::Span, 39 | ) -> Self { 40 | LabelError::expected_found(expected, found, span) 41 | } 42 | 43 | /// Annotate the expected patterns within this parser with the given label. 44 | /// 45 | /// In practice, this usually removes all other labels and expected tokens in favor of a single label that 46 | /// represents the overall pattern. 47 | fn label_with(&mut self, label: L) { 48 | #![allow(unused_variables)] 49 | } 50 | 51 | /// Annotate this error, indicating that it occurred within the context denoted by the given label. 52 | /// 53 | /// A span that runs from the beginning of the context up until the error location is also provided. 54 | /// 55 | /// In practice, this usually means adding the context to a context 'stack', similar to a backtrace. 56 | fn in_context(&mut self, label: L, span: I::Span) { 57 | #![allow(unused_variables)] 58 | } 59 | } 60 | 61 | /// See [`Parser::labelled`]. 62 | #[derive(Copy, Clone)] 63 | pub struct Labelled { 64 | pub(crate) parser: A, 65 | pub(crate) label: L, 66 | pub(crate) is_context: bool, 67 | } 68 | 69 | impl Labelled { 70 | /// Specify that the label should be used as context when reporting errors. 71 | /// 72 | /// This allows error messages to use this label to add information to errors that occur *within* this parser. 73 | pub fn as_context(self) -> Self { 74 | Self { 75 | is_context: true, 76 | ..self 77 | } 78 | } 79 | } 80 | 81 | impl<'src, I, O, E, A, L> Parser<'src, I, O, E> for Labelled 82 | where 83 | I: Input<'src>, 84 | E: ParserExtra<'src, I>, 85 | A: Parser<'src, I, O, E>, 86 | L: Clone, 87 | E::Error: LabelError<'src, I, L>, 88 | { 89 | #[inline] 90 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 91 | let old_alt = inp.errors.alt.take(); 92 | let before = inp.save(); 93 | let res = self.parser.go::(inp); 94 | 95 | // TODO: Label secondary errors too? 96 | let new_alt = inp.errors.alt.take(); 97 | inp.errors.alt = old_alt; 98 | 99 | if let Some(mut new_alt) = new_alt { 100 | let before_loc = I::cursor_location(&before.cursor().inner); 101 | let new_alt_loc = I::cursor_location(&new_alt.pos); 102 | if new_alt_loc == before_loc { 103 | new_alt.err.label_with(self.label.clone()); 104 | } else if self.is_context && new_alt_loc > before_loc { 105 | // SAFETY: cursors generated by previous call to `InputRef::next` (or similar). 106 | let span = unsafe { I::span(inp.cache, &before.cursor().inner..&new_alt.pos) }; 107 | new_alt.err.in_context(self.label.clone(), span); 108 | } 109 | inp.add_alt_err(&new_alt.pos, new_alt.err); 110 | } 111 | 112 | if self.is_context { 113 | for err in inp.errors.secondary_errors_since(before.err_count) { 114 | // SAFETY: cursors generated by previous call to `InputRef::next` (or similar). 115 | let span = unsafe { I::span(inp.cache, &before.cursor().inner..&err.pos) }; 116 | err.err.in_context(self.label.clone(), span); 117 | } 118 | } 119 | 120 | res 121 | } 122 | 123 | go_extra!(O); 124 | } 125 | -------------------------------------------------------------------------------- /src/number.rs: -------------------------------------------------------------------------------- 1 | //! TODO: Add documentation when approved 2 | 3 | use super::*; 4 | pub use lexical::format; 5 | 6 | use lexical::parse_partial; 7 | use lexical::FromLexical; 8 | 9 | /// TODO: Add documentation when approved 10 | pub struct Number { 11 | #[allow(dead_code)] 12 | phantom: EmptyPhantom<(I, E, O)>, 13 | } 14 | 15 | impl Copy for Number {} 16 | impl Clone for Number { 17 | fn clone(&self) -> Self { 18 | *self 19 | } 20 | } 21 | 22 | /// TODO: Add documentation when approved 23 | pub const fn number() -> Number { 24 | Number:: { 25 | phantom: EmptyPhantom::new(), 26 | } 27 | } 28 | 29 | /// A label denoting a parseable number. 30 | pub struct ExpectedNumber; 31 | 32 | impl<'src, const F: u128, I, O, E> Parser<'src, I, O, E> for Number 33 | where 34 | O: FromLexical, 35 | I: SliceInput<'src, Cursor = usize>, 36 | >::Slice: AsRef<[u8]>, 37 | E: ParserExtra<'src, I>, 38 | E::Error: LabelError<'src, I, ExpectedNumber>, 39 | { 40 | #[inline] 41 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 42 | let before = inp.cursor(); 43 | match parse_partial(inp.slice_trailing_inner().as_ref()) { 44 | Ok((out, skip)) => { 45 | // SAFETY: `skip` is no longer than the trailing input's byte length 46 | unsafe { inp.skip_bytes(skip) }; 47 | Ok(M::bind(|| out)) 48 | } 49 | Err(_err) => { 50 | // TODO: Improve error 51 | let span = inp.span_since(&before); 52 | inp.add_alt([ExpectedNumber], None, span); 53 | Err(()) 54 | } 55 | } 56 | } 57 | 58 | go_extra!(O); 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use super::*; 64 | use crate::{extra, Parser}; 65 | use lexical::format::RUST_LITERAL; 66 | 67 | // These have been shamelessly yanked from the rust test-float-parse suite. 68 | // More specifically: 69 | // 70 | // https://github.com/rust-lang/rust/tree/64185f205dcbd8db255ad6674e43c63423f2369a/src/etc/test-float-parse 71 | mod rust { 72 | use super::*; 73 | 74 | const FLOAT: Number = number(); 75 | 76 | fn validate(test: &str) { 77 | FLOAT.parse(test).unwrap(); 78 | } 79 | 80 | #[test] 81 | fn few_ones() { 82 | let mut pow = vec![]; 83 | for i in 0..63 { 84 | pow.push(1u64 << i); 85 | } 86 | for a in &pow { 87 | for b in &pow { 88 | for c in &pow { 89 | validate(&(a | b | c).to_string()); 90 | } 91 | } 92 | } 93 | } 94 | 95 | #[test] 96 | fn huge_pow10() { 97 | for e in 300..310 { 98 | for i in 0..100000 { 99 | validate(&format!("{i}e{e}")); 100 | } 101 | } 102 | } 103 | 104 | #[test] 105 | fn long_fraction() { 106 | for n in 0..10 { 107 | let digit = char::from_digit(n, 10).unwrap(); 108 | let mut s = "0.".to_string(); 109 | for _ in 0..400 { 110 | s.push(digit); 111 | if s.parse::().is_ok() { 112 | validate(&s); 113 | } 114 | } 115 | } 116 | } 117 | 118 | #[test] 119 | fn short_decimals() { 120 | for e in 1..301 { 121 | for i in 0..10000 { 122 | if i % 10 == 0 { 123 | continue; 124 | } 125 | 126 | validate(&format!("{i}e{e}")); 127 | validate(&format!("{i}e-{e}")); 128 | } 129 | } 130 | } 131 | 132 | #[test] 133 | fn subnorm() { 134 | for bits in 0u32..(1 << 21) { 135 | let single: f32 = f32::from_bits(bits); 136 | validate(&format!("{single:e}")); 137 | let double: f64 = f64::from_bits(bits as u64); 138 | validate(&format!("{double:e}")); 139 | } 140 | } 141 | 142 | #[test] 143 | fn tiny_pow10() { 144 | for e in 301..327 { 145 | for i in 0..100000 { 146 | validate(&format!("{i}e-{e}")); 147 | } 148 | } 149 | } 150 | 151 | #[test] 152 | fn u32_small() { 153 | for i in 0..(1 << 19) { 154 | validate(&i.to_string()); 155 | } 156 | } 157 | 158 | #[test] 159 | fn u64_pow2() { 160 | for exp in 19..64 { 161 | let power: u64 = 1 << exp; 162 | validate(&power.to_string()); 163 | for offset in 1..123 { 164 | validate(&(power + offset).to_string()); 165 | validate(&(power - offset).to_string()); 166 | } 167 | } 168 | for offset in 0..123 { 169 | validate(&(u64::MAX - offset).to_string()); 170 | } 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/recovery.rs: -------------------------------------------------------------------------------- 1 | //! Types and functions that relate to error recovery. 2 | //! 3 | //! When chumsky encounters an erroneous input that it cannot parse, it can be told to attempt to recover from the 4 | //! error using a variety of strategies (you can also create your own strategies). 5 | //! 6 | //! There is no silver bullet strategy for error recovery. By definition, if the input to a parser is invalid then the 7 | //! parser can only make educated guesses as to the meaning of the input. Different recovery strategies will work 8 | //! better for different languages, and for different patterns within those languages. 9 | //! 10 | //! Chumsky provides a variety of recovery strategies (each implementing the `Strategy` trait), but it's important to 11 | //! understand that all of 12 | //! 13 | //! - which you apply 14 | //! - where you apply them 15 | //! - what order you apply them 16 | //! 17 | //! will greatly affect the quality of the errors that Chumsky is able to produce, along with the extent to which it 18 | //! is able to recover a useful AST. Where possible, you should attempt more 'specific' recovery strategies first 19 | //! rather than those that mindlessly skip large swathes of the input. 20 | //! 21 | //! It is recommended that you experiment with applying different strategies in different situations and at different 22 | //! levels of the parser to find a configuration that you are happy with. If none of the provided error recovery 23 | //! strategies cover the specific pattern you wish to catch, you can even create your own by digging into Chumsky's 24 | //! internals and implementing your own strategies! If you come up with a useful strategy, feel free to open a PR 25 | //! against the [main repository](https://github.com/zesterer/chumsky/)! 26 | 27 | use super::*; 28 | 29 | /// A trait implemented by error recovery strategies. See [`Parser::recover_with`]. 30 | /// 31 | /// This trait is sealed and so cannot be implemented by other crates because it has an unstable API. This may 32 | /// eventually change. For now, if you wish to implement a new strategy, consider using [`via_parser`] or 33 | /// [opening an issue/PR](https://github.com/zesterer/chumsky/issues/new). 34 | pub trait Strategy<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default>: 35 | Sealed 36 | { 37 | // Attempt to recover from a parsing failure. 38 | // The strategy should properly handle the alt error but is not required to handle rewinding. 39 | #[doc(hidden)] 40 | fn recover>( 41 | &self, 42 | inp: &mut InputRef<'src, '_, I, E>, 43 | parser: &P, 44 | ) -> PResult; 45 | } 46 | 47 | /// See [`via_parser`]. 48 | #[derive(Copy, Clone)] 49 | pub struct ViaParser(A); 50 | 51 | /// Recover via the given recovery parser. 52 | pub fn via_parser(parser: A) -> ViaParser { 53 | ViaParser(parser) 54 | } 55 | 56 | impl Sealed for ViaParser {} 57 | impl<'src, I, O, E, A> Strategy<'src, I, O, E> for ViaParser 58 | where 59 | I: Input<'src>, 60 | A: Parser<'src, I, O, E>, 61 | E: ParserExtra<'src, I>, 62 | { 63 | fn recover>( 64 | &self, 65 | inp: &mut InputRef<'src, '_, I, E>, 66 | _parser: &P, 67 | ) -> PResult { 68 | let alt = inp.take_alt().unwrap(); // Can't fail! 69 | let out = match self.0.go::(inp) { 70 | Ok(out) => out, 71 | Err(()) => { 72 | inp.errors.alt = Some(alt); 73 | return Err(()); 74 | } 75 | }; 76 | inp.emit(None, alt.err); 77 | Ok(out) 78 | } 79 | } 80 | 81 | /// See [`Parser::recover_with`]. 82 | #[derive(Copy, Clone)] 83 | pub struct RecoverWith { 84 | pub(crate) parser: A, 85 | pub(crate) strategy: S, 86 | } 87 | 88 | impl<'src, I, O, E, A, S> Parser<'src, I, O, E> for RecoverWith 89 | where 90 | I: Input<'src>, 91 | E: ParserExtra<'src, I>, 92 | A: Parser<'src, I, O, E>, 93 | S: Strategy<'src, I, O, E>, 94 | { 95 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 96 | let before = inp.save(); 97 | match self.parser.go::(inp) { 98 | Ok(out) => Ok(out), 99 | Err(()) => { 100 | inp.rewind(before.clone()); 101 | match self.strategy.recover::(inp, &self.parser) { 102 | Ok(out) => Ok(out), 103 | Err(()) => { 104 | // Reset to before fallback attempt 105 | inp.rewind(before); 106 | Err(()) 107 | } 108 | } 109 | } 110 | } 111 | } 112 | 113 | go_extra!(O); 114 | } 115 | 116 | /// See [`skip_then_retry_until`]. 117 | #[must_use] 118 | #[derive(Copy, Clone)] 119 | pub struct SkipThenRetryUntil { 120 | skip: S, 121 | until: U, 122 | } 123 | 124 | impl Sealed for SkipThenRetryUntil {} 125 | impl<'src, I, O, E, S, U> Strategy<'src, I, O, E> for SkipThenRetryUntil 126 | where 127 | I: Input<'src>, 128 | S: Parser<'src, I, (), E>, 129 | U: Parser<'src, I, (), E>, 130 | E: ParserExtra<'src, I>, 131 | { 132 | fn recover>( 133 | &self, 134 | inp: &mut InputRef<'src, '_, I, E>, 135 | parser: &P, 136 | ) -> PResult { 137 | let alt = inp.take_alt().unwrap(); // Can't fail! 138 | loop { 139 | let before = inp.save(); 140 | if let Ok(()) = self.until.go::(inp) { 141 | inp.errors.alt = Some(alt); 142 | inp.rewind(before); 143 | break Err(()); 144 | } else { 145 | inp.rewind(before); 146 | } 147 | 148 | if let Err(()) = self.skip.go::(inp) { 149 | inp.errors.alt = Some(alt); 150 | break Err(()); 151 | } 152 | 153 | let before = inp.save(); 154 | if let Some(out) = parser.go::(inp).ok().filter(|_| { 155 | inp.errors 156 | .secondary_errors_since(before.err_count) 157 | .is_empty() 158 | }) { 159 | inp.emit(None, alt.err); 160 | break Ok(out); 161 | } else { 162 | inp.errors.alt.take(); 163 | inp.rewind(before); 164 | } 165 | } 166 | } 167 | } 168 | 169 | /// TODO 170 | pub fn skip_then_retry_until(skip: S, until: U) -> SkipThenRetryUntil { 171 | SkipThenRetryUntil { skip, until } 172 | } 173 | 174 | /// See [`skip_until`]. 175 | #[must_use] 176 | #[derive(Copy, Clone)] 177 | pub struct SkipUntil { 178 | skip: S, 179 | until: U, 180 | fallback: F, 181 | } 182 | 183 | impl Sealed for SkipUntil {} 184 | impl<'src, I, O, E, S, U, F> Strategy<'src, I, O, E> for SkipUntil 185 | where 186 | I: Input<'src>, 187 | S: Parser<'src, I, (), E>, 188 | U: Parser<'src, I, (), E>, 189 | F: Fn() -> O, 190 | E: ParserExtra<'src, I>, 191 | { 192 | fn recover>( 193 | &self, 194 | inp: &mut InputRef<'src, '_, I, E>, 195 | _parser: &P, 196 | ) -> PResult { 197 | let alt = inp.take_alt().unwrap(); // Can't fail! 198 | loop { 199 | let before = inp.save(); 200 | if let Ok(()) = self.until.go::(inp) { 201 | inp.emit(None, alt.err); 202 | break Ok(M::bind(|| (self.fallback)())); 203 | } 204 | inp.rewind(before); 205 | 206 | if let Err(()) = self.skip.go::(inp) { 207 | inp.errors.alt = Some(alt); 208 | break Err(()); 209 | } 210 | } 211 | } 212 | } 213 | 214 | /// A recovery parser that skips input until one of several inputs is found. 215 | /// 216 | /// This strategy is very 'stupid' and can result in very poor error generation in some languages. Place this strategy 217 | /// after others as a last resort, and be careful about over-using it. 218 | pub fn skip_until(skip: S, until: U, fallback: F) -> SkipUntil { 219 | SkipUntil { 220 | skip, 221 | until, 222 | fallback, 223 | } 224 | } 225 | 226 | /// A recovery parser that searches for a start and end delimiter, respecting nesting. 227 | /// 228 | /// It is possible to specify additional delimiter pairs that are valid in the pattern's context for better errors. For 229 | /// example, you might want to also specify `[('[', ']'), ('{', '}')]` when recovering a parenthesized expression as 230 | /// this can aid in detecting delimiter mismatches. 231 | /// 232 | /// A function that generates a fallback output on recovery is also required. 233 | // TODO: Make this a strategy, add an unclosed_delimiter error 234 | pub fn nested_delimiters<'src, I, O, E, F, const N: usize>( 235 | start: I::Token, 236 | end: I::Token, 237 | others: [(I::Token, I::Token); N], 238 | fallback: F, 239 | ) -> impl Parser<'src, I, O, E> + Clone 240 | where 241 | I: ValueInput<'src>, 242 | I::Token: PartialEq + Clone, 243 | E: extra::ParserExtra<'src, I>, 244 | F: Fn(I::Span) -> O + Clone, 245 | { 246 | // TODO: Does this actually work? TESTS! 247 | #[allow(clippy::tuple_array_conversions)] 248 | // Clippy is overly eager to fine pointless non-problems 249 | recursive({ 250 | let (start, end) = (start.clone(), end.clone()); 251 | |block| { 252 | let mut many_block = Parser::boxed( 253 | block 254 | .clone() 255 | .delimited_by(just(start.clone()), just(end.clone())), 256 | ); 257 | for (s, e) in &others { 258 | many_block = Parser::boxed( 259 | many_block.or(block.clone().delimited_by(just(s.clone()), just(e.clone()))), 260 | ); 261 | } 262 | 263 | let skip = [start, end] 264 | .into_iter() 265 | .chain(IntoIterator::into_iter(others).flat_map(|(s, e)| [s, e])) 266 | .collect::>(); 267 | 268 | many_block 269 | .or(any().and_is(none_of(skip)).ignored()) 270 | .repeated() 271 | } 272 | }) 273 | .delimited_by(just(start), just(end)) 274 | .map_with(move |_, e| fallback(e.span())) 275 | } 276 | -------------------------------------------------------------------------------- /src/recursive.rs: -------------------------------------------------------------------------------- 1 | //! Recursive parsers (parser that include themselves within their patterns). 2 | //! 3 | //! *“It's unpleasantly like being drunk." 4 | //! "What's so unpleasant about being drunk?" 5 | //! "You ask a glass of water.”* 6 | //! 7 | //! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and 8 | //! definition of parsers more carefully, particularly for mutually-recursive parsers. In such cases, the functions on 9 | //! [`Recursive`] allow for this. 10 | 11 | use super::*; 12 | 13 | struct OnceCell(core::cell::Cell>); 14 | impl OnceCell { 15 | pub fn new() -> Self { 16 | Self(core::cell::Cell::new(None)) 17 | } 18 | pub fn set(&self, x: T) -> Result<(), ()> { 19 | // SAFETY: Function is not reentrant so we have exclusive access to the inner data 20 | unsafe { 21 | let vacant = (*self.0.as_ptr()).is_none(); 22 | if vacant { 23 | self.0.as_ptr().write(Some(x)); 24 | Ok(()) 25 | } else { 26 | Err(()) 27 | } 28 | } 29 | } 30 | #[inline] 31 | pub fn get(&self) -> Option<&T> { 32 | // SAFETY: We ensure that we never insert twice (so the inner `T` always lives as long as us, if it exists) and 33 | // neither function is possibly reentrant so there's no way we can invalidate mut xor shared aliasing 34 | unsafe { (*self.0.as_ptr()).as_ref() } 35 | } 36 | } 37 | 38 | // TODO: Ensure that this doesn't produce leaks 39 | enum RecursiveInner { 40 | Owned(Rc), 41 | Unowned(rc::Weak), 42 | } 43 | 44 | /// Type for recursive parsers that are defined through a call to `recursive`, and as such 45 | /// need no internal indirection 46 | pub type Direct<'src, 'b, I, O, Extra> = DynParser<'src, 'b, I, O, Extra>; 47 | 48 | /// Type for recursive parsers that are defined through a call to [`Recursive::declare`], and as 49 | /// such require an additional layer of allocation. 50 | pub struct Indirect<'src, 'b, I: Input<'src>, O, Extra: ParserExtra<'src, I>> { 51 | inner: OnceCell>>, 52 | } 53 | 54 | /// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its 55 | /// [definition](Recursive::define). 56 | /// 57 | /// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible. 58 | pub struct Recursive { 59 | inner: RecursiveInner

, 60 | } 61 | 62 | impl<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I>> Recursive> { 63 | /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before 64 | /// being fulled defined. 65 | /// 66 | /// Declaring a parser before defining it is required for a parser to reference itself. 67 | /// 68 | /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser 69 | /// for parsing (i.e: via the [`Parser::parse`] method or similar). 70 | /// 71 | /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if 72 | /// possible. 73 | /// 74 | /// # Examples 75 | /// 76 | /// ``` 77 | /// # use chumsky::prelude::*; 78 | /// #[derive(Debug, PartialEq)] 79 | /// enum Chain { 80 | /// End, 81 | /// Link(char, Box), 82 | /// } 83 | /// 84 | /// // Declare the existence of the parser before defining it so that it can reference itself 85 | /// let mut chain = Recursive::declare(); 86 | /// 87 | /// // Define the parser in terms of itself. 88 | /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list 89 | /// chain.define(just::<_, _, extra::Err>>('+') 90 | /// .then(chain.clone()) 91 | /// .map(|(c, chain)| Chain::Link(c, Box::new(chain))) 92 | /// .or_not() 93 | /// .map(|chain| chain.unwrap_or(Chain::End))); 94 | /// 95 | /// assert_eq!(chain.parse("").into_result(), Ok(Chain::End)); 96 | /// assert_eq!( 97 | /// chain.parse("++").into_result(), 98 | /// Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))), 99 | /// ); 100 | /// ``` 101 | pub fn declare() -> Self { 102 | Recursive { 103 | inner: RecursiveInner::Owned(Rc::new(Indirect { 104 | inner: OnceCell::new(), 105 | })), 106 | } 107 | } 108 | 109 | /// Defines the parser after declaring it, allowing it to be used for parsing. 110 | // INFO: Clone bound not actually needed, but good to be safe for future compat 111 | #[track_caller] 112 | pub fn define + Clone + 'src + 'b>(&mut self, parser: P) { 113 | let location = *Location::caller(); 114 | self.parser() 115 | .inner 116 | .set(Box::new(parser)) 117 | .unwrap_or_else(|_| { 118 | panic!("recursive parsers can only be defined once, trying to redefine it at {location}") 119 | }); 120 | } 121 | } 122 | 123 | impl Recursive

{ 124 | #[inline] 125 | fn parser(&self) -> Rc

{ 126 | match &self.inner { 127 | RecursiveInner::Owned(x) => x.clone(), 128 | RecursiveInner::Unowned(x) => x 129 | .upgrade() 130 | .expect("Recursive parser used before being defined"), 131 | } 132 | } 133 | } 134 | 135 | impl Clone for Recursive

{ 136 | fn clone(&self) -> Self { 137 | Self { 138 | inner: match &self.inner { 139 | RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()), 140 | RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()), 141 | }, 142 | } 143 | } 144 | } 145 | 146 | #[cfg(feature = "stacker")] 147 | #[inline] 148 | pub(crate) fn recurse R>(f: F) -> R { 149 | stacker::maybe_grow(1024 * 64, 1024 * 1024, f) 150 | } 151 | #[cfg(not(feature = "stacker"))] 152 | #[inline] 153 | pub(crate) fn recurse R>(f: F) -> R { 154 | f() 155 | } 156 | 157 | impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive> 158 | where 159 | I: Input<'src>, 160 | E: ParserExtra<'src, I>, 161 | { 162 | #[inline] 163 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 164 | recurse(move || { 165 | M::invoke( 166 | self.parser() 167 | .inner 168 | .get() 169 | .expect("Recursive parser used before being defined") 170 | .as_ref(), 171 | inp, 172 | ) 173 | }) 174 | } 175 | 176 | go_extra!(O); 177 | } 178 | 179 | impl<'src, I, O, E> Parser<'src, I, O, E> for Recursive> 180 | where 181 | I: Input<'src>, 182 | E: ParserExtra<'src, I>, 183 | { 184 | #[inline] 185 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 186 | recurse(move || M::invoke(&*self.parser(), inp)) 187 | } 188 | 189 | go_extra!(O); 190 | } 191 | 192 | /// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern). 193 | /// 194 | /// The given function must create the parser. The parser must not be used to parse input before this function returns. 195 | /// 196 | /// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`]. 197 | /// 198 | /// The output type of this parser is `O`, the same as the inner parser. 199 | /// 200 | /// # Examples 201 | /// 202 | /// ``` 203 | /// # use chumsky::prelude::*; 204 | /// #[derive(Debug, PartialEq)] 205 | /// enum Tree<'src> { 206 | /// Leaf(&'src str), 207 | /// Branch(Vec>), 208 | /// } 209 | /// 210 | /// // Parser that recursively parses nested lists 211 | /// let tree = recursive::<_, _, extra::Err>, _, _>(|tree| tree 212 | /// .separated_by(just(',')) 213 | /// .collect::>() 214 | /// .delimited_by(just('['), just(']')) 215 | /// .map(Tree::Branch) 216 | /// .or(text::ascii::ident().map(Tree::Leaf)) 217 | /// .padded()); 218 | /// 219 | /// assert_eq!(tree.parse("hello").into_result(), Ok(Tree::Leaf("hello"))); 220 | /// assert_eq!(tree.parse("[a, b, c]").into_result(), Ok(Tree::Branch(vec![ 221 | /// Tree::Leaf("a"), 222 | /// Tree::Leaf("b"), 223 | /// Tree::Leaf("c"), 224 | /// ]))); 225 | /// // The parser can deal with arbitrarily complex nested lists 226 | /// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]").into_result(), Ok(Tree::Branch(vec![ 227 | /// Tree::Branch(vec![ 228 | /// Tree::Leaf("a"), 229 | /// Tree::Leaf("b"), 230 | /// ]), 231 | /// Tree::Leaf("c"), 232 | /// Tree::Branch(vec![ 233 | /// Tree::Leaf("d"), 234 | /// Tree::Branch(vec![ 235 | /// Tree::Leaf("e"), 236 | /// Tree::Leaf("f"), 237 | /// ]), 238 | /// ]), 239 | /// ]))); 240 | /// ``` 241 | // INFO: Clone bound not actually needed, but good to be safe for future compat 242 | pub fn recursive<'src, 'b, I, O, E, A, F>(f: F) -> Recursive> 243 | where 244 | I: Input<'src>, 245 | E: ParserExtra<'src, I>, 246 | A: Parser<'src, I, O, E> + Clone + 'b, 247 | F: FnOnce(Recursive>) -> A, 248 | { 249 | let rc = Rc::new_cyclic(|rc| { 250 | let rc: rc::Weak> = rc.clone() as _; 251 | let parser = Recursive { 252 | inner: RecursiveInner::Unowned(rc.clone()), 253 | }; 254 | 255 | f(parser) 256 | }); 257 | 258 | Recursive { 259 | inner: RecursiveInner::Owned(rc), 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/regex.rs: -------------------------------------------------------------------------------- 1 | //! Implementations of regex-based parsers 2 | 3 | use super::*; 4 | use regex_automata::{meta, Anchored, Input as ReInput}; 5 | 6 | /// See [`regex()`]. 7 | pub struct Regex { 8 | regex: meta::Regex, 9 | #[allow(dead_code)] 10 | phantom: EmptyPhantom<(E, I)>, 11 | } 12 | 13 | impl Clone for Regex { 14 | fn clone(&self) -> Self { 15 | Self { 16 | regex: self.regex.clone(), 17 | phantom: EmptyPhantom::new(), 18 | } 19 | } 20 | } 21 | 22 | /// Match input based on a provided regex pattern 23 | pub fn regex(pattern: &str) -> Regex { 24 | Regex { 25 | regex: meta::Regex::new(pattern).expect("Failed to compile regex"), 26 | phantom: EmptyPhantom::new(), 27 | } 28 | } 29 | 30 | impl<'src, S, I, E> Parser<'src, I, &'src S, E> for Regex 31 | where 32 | I: StrInput<'src, Slice = &'src S>, 33 | I::Token: Char, 34 | S: ?Sized + AsRef<[u8]> + 'src, 35 | E: ParserExtra<'src, I>, 36 | { 37 | #[inline] 38 | fn go(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult { 39 | let before = inp.cursor(); 40 | 41 | let re_in = ReInput::new(inp.full_slice()) 42 | .anchored(Anchored::Yes) 43 | .range(before.inner..); 44 | 45 | let res = self.regex.find(re_in).map(|m| m.len()); 46 | 47 | match res { 48 | Some(len) => { 49 | let before = inp.cursor(); 50 | // SAFETY: `len` *must* be no greater than the byte length of the remaining string 51 | unsafe { 52 | inp.skip_bytes(len); 53 | } 54 | let after = inp.cursor(); 55 | Ok(M::bind(|| inp.slice(&before..&after))) 56 | } 57 | None => { 58 | // TODO: Improve error 59 | let span = inp.span_since(&before); 60 | inp.add_alt([DefaultExpected::SomethingElse], None, span); 61 | Err(()) 62 | } 63 | } 64 | } 65 | 66 | go_extra!(&'src S); 67 | } 68 | 69 | #[cfg(test)] 70 | mod tests { 71 | use super::*; 72 | 73 | #[test] 74 | fn regex_parser() { 75 | use self::prelude::*; 76 | use self::regex::*; 77 | 78 | fn parser<'src, S, I>() -> impl Parser<'src, I, Vec<&'src S>> 79 | where 80 | S: ?Sized + AsRef<[u8]> + 'src, 81 | I: StrInput<'src, Slice = &'src S>, 82 | I::Token: Char, 83 | { 84 | regex("[a-zA-Z_][a-zA-Z0-9_]*") 85 | .padded() 86 | .repeated() 87 | .collect() 88 | } 89 | assert_eq!( 90 | parser().parse("hello world this works").into_result(), 91 | Ok(vec!["hello", "world", "this", "works"]), 92 | ); 93 | 94 | assert_eq!( 95 | parser() 96 | .parse(b"hello world this works" as &[_]) 97 | .into_result(), 98 | Ok(vec![ 99 | b"hello" as &[_], 100 | b"world" as &[_], 101 | b"this" as &[_], 102 | b"works" as &[_], 103 | ]), 104 | ); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/span.rs: -------------------------------------------------------------------------------- 1 | //! Types and traits related to spans. 2 | //! 3 | //! *“We demand rigidly defined areas of doubt and uncertainty!”* 4 | //! 5 | //! You can use the [`Span`] trait to connect up chumsky to your compiler's knowledge of the input source. 6 | 7 | use super::*; 8 | 9 | /// A trait that describes a span over a particular range of inputs. 10 | /// 11 | /// Spans typically consist of some context, such as the file they originated from, and a start/end offset. Spans are 12 | /// permitted to overlap one-another. The end offset must always be greater than or equal to the start offset. 13 | /// 14 | /// Span is automatically implemented for [`Range`] and [`(C, Range)`]. 15 | pub trait Span { 16 | /// Extra context used in a span. 17 | /// 18 | /// This is usually some way to uniquely identity the source file that a span originated in such as the file's 19 | /// path, URL, etc. 20 | /// 21 | /// NOTE: Span contexts have no inherent meaning to Chumsky and can be anything. For example, [`Range`]'s 22 | /// implementation of [`Span`] simply uses [`()`] as its context. 23 | type Context; 24 | 25 | /// A type representing a span's start or end offset from the start of the input. 26 | /// 27 | /// Typically, [`usize`] is used. 28 | /// 29 | /// NOTE: Offsets have no inherently meaning to Chumsky and are not used to decide how to prioritize errors. This 30 | /// means that it's perfectly fine for tokens to have non-continuous spans that bear no relation to their actual 31 | /// location in the input stream. This is useful for languages with an AST-level macro system that need to 32 | /// correctly point to symbols in the macro input when producing errors. 33 | type Offset: Clone; 34 | 35 | /// Create a new span given a context and an offset range. 36 | fn new(context: Self::Context, range: Range) -> Self; 37 | 38 | /// Return the span's context. 39 | fn context(&self) -> Self::Context; 40 | 41 | /// Return the start offset of the span. 42 | fn start(&self) -> Self::Offset; 43 | 44 | /// Return the end offset of the span. 45 | fn end(&self) -> Self::Offset; 46 | 47 | /// Turn this span into a zero-width span that starts and ends at the end of the original. 48 | /// 49 | /// For example, an original span like `3..7` will result in a new span of `7..7`. 50 | fn to_end(&self) -> Self 51 | where 52 | Self: Sized, 53 | { 54 | Self::new(self.context(), self.end()..self.end()) 55 | } 56 | 57 | /// Combine two assumed-contiguous spans together into a larger span that encompasses both (and anything between). 58 | /// 59 | /// For example, spans like `3..5` and `7..8` will result in a unioned span of `3..8`. 60 | /// 61 | /// The spans may overlap one-another, but the start offset must come before the end offset for each span (i.e: 62 | /// each span must be 'well-formed'). If this is not the case, the result is unspecified. 63 | /// 64 | /// # Panics 65 | /// 66 | /// Panics if the [`Self::Context`]s of both spans are not equal. 67 | fn union(&self, other: Self) -> Self 68 | where 69 | Self::Context: PartialEq + fmt::Debug, 70 | Self::Offset: Ord, 71 | Self: Sized, 72 | { 73 | assert_eq!( 74 | self.context(), 75 | other.context(), 76 | "tried to union two spans with different contexts" 77 | ); 78 | Self::new( 79 | self.context(), 80 | self.start().min(other.start())..self.end().max(other.end()), 81 | ) 82 | } 83 | } 84 | 85 | /// The most basic implementor of `Span` - akin to `Range`, but `Copy` since it's not also 86 | /// an iterator. Also has a `Display` implementation 87 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 88 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] 89 | pub struct SimpleSpan { 90 | /// The start offset of the span. 91 | pub start: T, 92 | /// The end (exclusive) offset of the span. 93 | pub end: T, 94 | /// The context of the span (usually some ID representing the file path the span relates to). 95 | pub context: C, 96 | } 97 | 98 | impl SimpleSpan { 99 | /// Convert this span into a [`std::ops::Range`]. 100 | pub fn into_range(self) -> Range { 101 | self.start..self.end 102 | } 103 | } 104 | 105 | impl From> for SimpleSpan { 106 | fn from(range: Range) -> Self { 107 | SimpleSpan { 108 | start: range.start, 109 | end: range.end, 110 | context: (), 111 | } 112 | } 113 | } 114 | 115 | impl From> for Range { 116 | fn from(span: SimpleSpan) -> Self { 117 | Range { 118 | start: span.start, 119 | end: span.end, 120 | } 121 | } 122 | } 123 | 124 | impl fmt::Debug for SimpleSpan 125 | where 126 | T: fmt::Debug, 127 | { 128 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 129 | write!(f, "{:?}..{:?}", self.start, self.end) 130 | } 131 | } 132 | 133 | impl fmt::Display for SimpleSpan 134 | where 135 | T: fmt::Display, 136 | { 137 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 138 | write!(f, "{}..{}", self.start, self.end) 139 | } 140 | } 141 | 142 | impl IntoIterator for SimpleSpan 143 | where 144 | Range: Iterator, 145 | { 146 | type IntoIter = Range; 147 | type Item = T; 148 | 149 | fn into_iter(self) -> Self::IntoIter { 150 | self.start..self.end 151 | } 152 | } 153 | 154 | impl Span for SimpleSpan { 155 | type Context = C; 156 | type Offset = T; 157 | 158 | fn new(context: Self::Context, range: Range) -> Self { 159 | Self { 160 | start: range.start, 161 | end: range.end, 162 | context, 163 | } 164 | } 165 | fn context(&self) -> Self::Context { 166 | self.context.clone() 167 | } 168 | fn start(&self) -> Self::Offset { 169 | self.start.clone() 170 | } 171 | fn end(&self) -> Self::Offset { 172 | self.end.clone() 173 | } 174 | } 175 | 176 | impl> Span for (C, S) { 177 | type Context = C; 178 | type Offset = S::Offset; 179 | 180 | fn new(context: Self::Context, range: Range) -> Self { 181 | (context, S::new((), range)) 182 | } 183 | fn context(&self) -> Self::Context { 184 | self.0.clone() 185 | } 186 | fn start(&self) -> Self::Offset { 187 | self.1.start() 188 | } 189 | fn end(&self) -> Self::Offset { 190 | self.1.end() 191 | } 192 | } 193 | 194 | impl Span for Range { 195 | type Context = (); 196 | type Offset = T; 197 | 198 | fn new(_context: Self::Context, range: Range) -> Self { 199 | range 200 | } 201 | fn context(&self) -> Self::Context {} 202 | fn start(&self) -> Self::Offset { 203 | self.start.clone() 204 | } 205 | fn end(&self) -> Self::Offset { 206 | self.end.clone() 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/stream.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// An input that dynamically pulls tokens from a cached [`Iterator`]. 4 | /// 5 | /// Internally, the stream will pull tokens in batches and cache the results on the heap so as to avoid invoking the 6 | /// iterator every time a new token is required. 7 | /// 8 | /// Note: This input type should be used when the internal iterator type, `I`, is *expensive* to clone. This is usually 9 | /// not the case: you might find that [`IterInput`] performs better. 10 | pub struct Stream { 11 | tokens: Vec, 12 | iter: I, 13 | } 14 | 15 | impl Stream { 16 | /// Create a new stream from an [`Iterator`]. 17 | /// 18 | /// # Example 19 | /// 20 | /// ``` 21 | /// # use chumsky::{prelude::*, input::Stream}; 22 | /// let stream = Stream::from_iter((0..10).map(|i| char::from_digit(i, 10).unwrap())); 23 | /// 24 | /// let parser = any::<_, extra::Err>>().filter(|c: &char| c.is_ascii_digit()).repeated().collect::(); 25 | /// 26 | /// assert_eq!(parser.parse(stream).into_result().as_deref(), Ok("0123456789")); 27 | /// ``` 28 | pub fn from_iter>(iter: J) -> Self { 29 | Self { 30 | tokens: Vec::new(), 31 | iter: iter.into_iter(), 32 | } 33 | } 34 | 35 | /// Box this stream, turning it into a [BoxedStream]. This can be useful in cases where your parser accepts input 36 | /// from several different sources and it needs to work with all of them. 37 | pub fn boxed<'a>(self) -> BoxedStream<'a, I::Item> 38 | where 39 | I: 'a, 40 | { 41 | Stream { 42 | tokens: self.tokens, 43 | iter: Box::new(self.iter), 44 | } 45 | } 46 | 47 | /// Like [`Stream::boxed`], but yields an [`BoxedExactSizeStream`], which implements [`ExactSizeInput`]. 48 | pub fn exact_size_boxed<'a>(self) -> BoxedExactSizeStream<'a, I::Item> 49 | where 50 | I: ExactSizeIterator + 'a, 51 | { 52 | Stream { 53 | tokens: self.tokens, 54 | iter: Box::new(self.iter), 55 | } 56 | } 57 | } 58 | 59 | /// A stream containing a boxed iterator. See [`Stream::boxed`]. 60 | pub type BoxedStream<'a, T> = Stream + 'a>>; 61 | 62 | /// A stream containing a boxed exact-sized iterator. See [`Stream::exact_size_boxed`]. 63 | pub type BoxedExactSizeStream<'a, T> = Stream + 'a>>; 64 | 65 | impl Sealed for Stream {} 66 | impl<'src, I: Iterator + 'src> Input<'src> for Stream 67 | where 68 | I::Item: Clone, 69 | { 70 | type Span = SimpleSpan; 71 | 72 | type Token = I::Item; 73 | type MaybeToken = I::Item; 74 | 75 | type Cursor = usize; 76 | 77 | type Cache = Self; 78 | 79 | #[inline(always)] 80 | fn begin(self) -> (Self::Cursor, Self::Cache) { 81 | (0, self) 82 | } 83 | 84 | #[inline] 85 | fn cursor_location(cursor: &Self::Cursor) -> usize { 86 | *cursor 87 | } 88 | 89 | #[inline(always)] 90 | unsafe fn next_maybe( 91 | this: &mut Self::Cache, 92 | cursor: &mut Self::Cursor, 93 | ) -> Option { 94 | Self::next(this, cursor) 95 | } 96 | 97 | #[inline(always)] 98 | unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { 99 | (*range.start..*range.end).into() 100 | } 101 | } 102 | 103 | impl<'src, I: ExactSizeIterator + 'src> ExactSizeInput<'src> for Stream 104 | where 105 | I::Item: Clone, 106 | { 107 | #[inline(always)] 108 | unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { 109 | (*range.start..this.tokens.len() + this.iter.len()).into() 110 | } 111 | } 112 | 113 | impl<'src, I: Iterator + 'src> ValueInput<'src> for Stream 114 | where 115 | I::Item: Clone, 116 | { 117 | #[inline] 118 | unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { 119 | // Pull new items into the vector if we need them 120 | if this.tokens.len() <= *cursor { 121 | this.tokens.extend((&mut this.iter).take(512)); 122 | } 123 | 124 | // Get the token at the given cursor 125 | this.tokens.get(*cursor).map(|tok| { 126 | *cursor += 1; 127 | tok.clone() 128 | }) 129 | } 130 | } 131 | 132 | /// An input that dynamically pulls tokens from an [`Iterator`]. 133 | /// 134 | /// This input type supports rewinding by [`Clone`]-ing the iterator. It is recommended that your iterator is very 135 | /// cheap to clone. If this is not the case, consider using [`Stream`] instead, which caches generated tokens 136 | /// internally. 137 | pub struct IterInput { 138 | iter: I, 139 | eoi: S, 140 | } 141 | 142 | impl IterInput { 143 | /// Create a new [`IterInput`] with the given iterator, and end of input span. 144 | pub fn new(iter: I, eoi: S) -> Self { 145 | Self { iter, eoi } 146 | } 147 | } 148 | 149 | impl<'src, I, T: 'src, S> Input<'src> for IterInput 150 | where 151 | I: Iterator + Clone + 'src, 152 | S: Span + 'src, 153 | { 154 | type Cursor = (I, usize, Option); 155 | type Span = S; 156 | 157 | type Token = T; 158 | type MaybeToken = T; 159 | 160 | type Cache = S; // eoi 161 | 162 | #[inline] 163 | fn begin(self) -> (Self::Cursor, Self::Cache) { 164 | ((self.iter, 0, None), self.eoi) 165 | } 166 | 167 | #[inline] 168 | fn cursor_location(cursor: &Self::Cursor) -> usize { 169 | cursor.1 170 | } 171 | 172 | unsafe fn next_maybe( 173 | _eoi: &mut Self::Cache, 174 | cursor: &mut Self::Cursor, 175 | ) -> Option { 176 | cursor.0.next().map(|(tok, span)| { 177 | cursor.1 += 1; 178 | cursor.2 = Some(span.end()); 179 | tok 180 | }) 181 | } 182 | 183 | unsafe fn span(eoi: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { 184 | match range.start.0.clone().next() { 185 | Some((_, s)) => { 186 | let end = range.end.2.clone().unwrap_or_else(|| eoi.end()); 187 | S::new(eoi.context(), s.start()..end) 188 | } 189 | None => S::new(eoi.context(), eoi.end()..eoi.end()), 190 | } 191 | } 192 | } 193 | 194 | // impl<'src, I, S> ExactSizeInput<'src> for IterInput 195 | // where 196 | // I: Iterator + Clone + 'src, 197 | // S: Span + 'src, 198 | // { 199 | // #[inline(always)] 200 | // unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { 201 | // (*range.start..this.tokens.len() + cursor.0.len()).into() 202 | // } 203 | // } 204 | 205 | impl<'src, I, T: 'src, S> ValueInput<'src> for IterInput 206 | where 207 | I: Iterator + Clone + 'src, 208 | S: Span + 'src, 209 | { 210 | #[inline] 211 | unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { 212 | Self::next_maybe(this, cursor) 213 | } 214 | } 215 | 216 | #[test] 217 | fn map_tuple() { 218 | fn parser<'src, I: Input<'src, Token = char>>() -> impl Parser<'src, I, char> { 219 | just('h') 220 | } 221 | 222 | let stream = Stream::from_iter(core::iter::once(('h', 0..1))).boxed(); 223 | let stream = stream.map(0..10, |(t, s)| (t, s)); 224 | 225 | assert_eq!(parser().parse(stream).into_result(), Ok('h')); 226 | } 227 | -------------------------------------------------------------------------------- /src/tokio.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use bytes::Bytes; 4 | 5 | impl<'src> Input<'src> for Bytes { 6 | type Cursor = usize; 7 | type Span = SimpleSpan; 8 | 9 | type Token = u8; 10 | type MaybeToken = u8; 11 | 12 | type Cache = Self; 13 | 14 | #[inline] 15 | fn begin(self) -> (Self::Cursor, Self::Cache) { 16 | (0, self) 17 | } 18 | 19 | #[inline] 20 | fn cursor_location(cursor: &Self::Cursor) -> usize { 21 | *cursor 22 | } 23 | 24 | #[inline(always)] 25 | unsafe fn next_maybe( 26 | this: &mut Self::Cache, 27 | cursor: &mut Self::Cursor, 28 | ) -> Option { 29 | if let Some(tok) = this.get(*cursor) { 30 | *cursor += 1; 31 | Some(*tok) 32 | } else { 33 | None 34 | } 35 | } 36 | 37 | #[inline(always)] 38 | unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { 39 | (*range.start..*range.end).into() 40 | } 41 | } 42 | 43 | impl<'src> ExactSizeInput<'src> for Bytes { 44 | #[inline(always)] 45 | unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { 46 | (*range.start..this.len()).into() 47 | } 48 | } 49 | 50 | impl Sealed for Bytes {} 51 | impl<'src> StrInput<'src> for Bytes { 52 | #[doc(hidden)] 53 | fn stringify(slice: Self::Slice) -> String { 54 | slice 55 | .iter() 56 | // .map(|e| core::ascii::Char::from_u8(e).unwrap_or(AsciiChar::Substitute).to_char()) 57 | .map(|e| char::from(*e)) 58 | .collect() 59 | } 60 | } 61 | 62 | impl<'src> SliceInput<'src> for Bytes { 63 | type Slice = Bytes; 64 | 65 | #[inline(always)] 66 | fn full_slice(this: &mut Self::Cache) -> Self::Slice { 67 | this.clone() 68 | } 69 | 70 | #[inline(always)] 71 | unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { 72 | this.slice(*range.start..*range.end) 73 | } 74 | 75 | #[inline(always)] 76 | unsafe fn slice_from(this: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { 77 | this.slice(*from.start..) 78 | } 79 | } 80 | 81 | impl<'src> ValueInput<'src> for Bytes { 82 | #[inline(always)] 83 | unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { 84 | Self::next_maybe(this, cursor) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | //! Utility items used throughout the crate. 2 | 3 | use super::*; 4 | 5 | use core::{ 6 | hash::Hasher, 7 | ops::{Deref, DerefMut}, 8 | }; 9 | 10 | /// A value that may be a `T` or a mutable reference to a `T`. 11 | pub type MaybeMut<'a, T> = Maybe; 12 | 13 | /// A value that may be a `T` or a shared reference to a `T`. 14 | pub type MaybeRef<'a, T> = Maybe; 15 | 16 | /// A type that can represent a borrowed reference to a `T` or a value of `T`. 17 | /// 18 | /// Used internally to facilitate zero-copy manipulation of tokens during error generation (see [`Error`]). 19 | #[derive(Copy, Clone)] 20 | pub enum Maybe> { 21 | /// We have a reference to `T`. 22 | Ref(R), 23 | /// We have a value of `T`. 24 | Val(T), 25 | } 26 | 27 | impl> PartialEq for Maybe { 28 | #[inline] 29 | fn eq(&self, other: &Self) -> bool { 30 | **self == **other 31 | } 32 | } 33 | 34 | impl> Eq for Maybe {} 35 | 36 | impl> PartialOrd for Maybe { 37 | #[inline] 38 | fn partial_cmp(&self, other: &Self) -> Option { 39 | (**self).partial_cmp(&**other) 40 | } 41 | } 42 | 43 | impl> Ord for Maybe { 44 | #[inline] 45 | fn cmp(&self, other: &Self) -> Ordering { 46 | (**self).cmp(&**other) 47 | } 48 | } 49 | 50 | impl> Hash for Maybe { 51 | #[inline] 52 | fn hash(&self, state: &mut H) { 53 | T::hash(&**self, state) 54 | } 55 | } 56 | 57 | impl> fmt::Debug for Maybe { 58 | #[inline] 59 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 60 | T::fmt(&**self, f) 61 | } 62 | } 63 | 64 | impl> Maybe { 65 | /// Convert this [`Maybe`] into a `T`, cloning the inner value if necessary. 66 | #[inline] 67 | pub fn into_inner(self) -> T 68 | where 69 | T: Clone, 70 | { 71 | match self { 72 | Self::Ref(x) => x.clone(), 73 | Self::Val(x) => x, 74 | } 75 | } 76 | 77 | /// Convert this [`Maybe`] into an owned version of itself, cloning the inner reference if required. 78 | #[inline] 79 | pub fn into_owned(self) -> Maybe 80 | where 81 | T: Clone, 82 | U: Deref, 83 | { 84 | Maybe::Val(self.into_inner()) 85 | } 86 | } 87 | 88 | impl> Deref for Maybe { 89 | type Target = T; 90 | 91 | #[inline] 92 | fn deref(&self) -> &Self::Target { 93 | match self { 94 | Self::Ref(x) => x, 95 | Self::Val(x) => x, 96 | } 97 | } 98 | } 99 | 100 | impl> DerefMut for Maybe { 101 | #[inline] 102 | fn deref_mut(&mut self) -> &mut Self::Target { 103 | match self { 104 | Self::Ref(x) => &mut *x, 105 | Self::Val(x) => x, 106 | } 107 | } 108 | } 109 | 110 | impl From for Maybe { 111 | #[inline] 112 | fn from(x: T) -> Self { 113 | Self::Val(x) 114 | } 115 | } 116 | 117 | impl From for Maybe { 118 | #[inline] 119 | fn from(x: T) -> Self { 120 | Self::Val(x) 121 | } 122 | } 123 | 124 | impl<'a, T> From<&'a T> for Maybe { 125 | #[inline] 126 | fn from(x: &'a T) -> Self { 127 | Self::Ref(x) 128 | } 129 | } 130 | 131 | impl<'a, T> From<&'a mut T> for Maybe { 132 | #[inline] 133 | fn from(x: &'a mut T) -> Self { 134 | Self::Ref(x) 135 | } 136 | } 137 | 138 | #[cfg(feature = "serde")] 139 | impl> Serialize for Maybe { 140 | fn serialize(&self, serializer: S) -> Result 141 | where 142 | S: Serializer, 143 | { 144 | serializer.serialize_newtype_struct("Maybe", &**self) 145 | } 146 | } 147 | 148 | #[cfg(feature = "serde")] 149 | impl<'de, T: Deserialize<'de>, R: Deref> Deserialize<'de> for Maybe { 150 | fn deserialize(deserializer: D) -> Result 151 | where 152 | D: Deserializer<'de>, 153 | { 154 | struct MaybeVisitor(PhantomData<(T, R)>); 155 | 156 | impl<'de2, T: Deserialize<'de2>, R: Deref> Visitor<'de2> for MaybeVisitor { 157 | type Value = Maybe; 158 | 159 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 160 | write!(formatter, "a Maybe") 161 | } 162 | 163 | fn visit_newtype_struct(self, deserializer: D) -> Result 164 | where 165 | D: Deserializer<'de2>, 166 | { 167 | T::deserialize(deserializer).map(Maybe::Val) 168 | } 169 | } 170 | 171 | deserializer.deserialize_newtype_struct("Maybe", MaybeVisitor(PhantomData)) 172 | } 173 | } 174 | 175 | mod ref_or_val_sealed { 176 | pub trait Sealed {} 177 | } 178 | 179 | /// An trait that allows abstracting over values of or references to a `T`. 180 | /// 181 | /// Some [`Input`]s can only generate tokens by-reference (like `&[T]` -> `&T`), and some can only generate tokens 182 | /// by-value (like `&str` -> `char`). This trait allows chumsky to handle both kinds of input. 183 | /// 184 | /// The trait is sealed: you cannot implement it yourself. 185 | pub trait IntoMaybe<'src, T: 'src>: 186 | ref_or_val_sealed::Sealed + Borrow + Into> 187 | { 188 | /// Project the referential properties of this type on to another type. 189 | /// 190 | /// For example, `<&Foo>::Proj = &Bar` but `::Proj = Bar`. 191 | #[doc(hidden)] 192 | type Proj: IntoMaybe<'src, U>; 193 | 194 | #[doc(hidden)] 195 | fn map_maybe( 196 | self, 197 | f: impl FnOnce(&'src T) -> &'src R, 198 | g: impl FnOnce(T) -> R, 199 | ) -> Self::Proj; 200 | } 201 | 202 | impl ref_or_val_sealed::Sealed for &T {} 203 | impl<'src, T> IntoMaybe<'src, T> for &'src T { 204 | type Proj = &'src U; 205 | fn map_maybe( 206 | self, 207 | f: impl FnOnce(&'src T) -> &'src R, 208 | _g: impl FnOnce(T) -> R, 209 | ) -> Self::Proj { 210 | f(self) 211 | } 212 | } 213 | 214 | impl ref_or_val_sealed::Sealed for T {} 215 | impl<'src, T: 'src> IntoMaybe<'src, T> for T { 216 | type Proj = U; 217 | fn map_maybe( 218 | self, 219 | _f: impl FnOnce(&'src T) -> &'src R, 220 | g: impl FnOnce(T) -> R, 221 | ) -> Self::Proj { 222 | g(self) 223 | } 224 | } 225 | --------------------------------------------------------------------------------