├── rustfmt.toml
├── images
    ├── error1.png
    ├── error2.png
    ├── title.png
    ├── tree.png
    ├── example1.png
    ├── example2.png
    ├── optimize.png
    ├── parse_error.png
    └── state_option.png
├── .github
    ├── FUNDING.yml
    ├── pull_request_template.md
    └── workflows
    │   └── bootstrap-test.yml
├── example
    ├── json
    │   ├── src
    │   │   ├── parser_expanded.rs
    │   │   ├── parser.rs
    │   │   └── main.rs
    │   ├── build.rs
    │   └── Cargo.toml
    ├── calculator
    │   ├── src
    │   │   ├── parser_expanded.rs
    │   │   ├── main.rs
    │   │   └── parser.rs
    │   ├── Cargo.toml
    │   └── build.rs
    ├── calculator_u8
    │   ├── src
    │   │   ├── parser_expanded.rs
    │   │   ├── parser.rs
    │   │   └── main.rs
    │   ├── build.rs
    │   └── Cargo.toml
    └── glr
    │   ├── Cargo.toml
    │   └── src
    │       ├── parser.rs
    │       └── main.rs
├── rusty_lr_parser
    ├── src
    │   ├── parser
    │   │   ├── mod.rs
    │   │   ├── span_pair.rs
    │   │   └── lexer.rs
    │   ├── lib.rs
    │   ├── partition.rs
    │   ├── token.rs
    │   ├── utils.rs
    │   ├── terminal_info.rs
    │   ├── nonterminal_info.rs
    │   ├── rangeresolver.rs
    │   ├── terminalset.rs
    │   └── error.rs
    └── Cargo.toml
├── rusty_lr_core
    ├── src
    │   ├── parser
    │   │   ├── deterministic
    │   │   │   ├── mod.rs
    │   │   │   └── error.rs
    │   │   ├── nondeterministic
    │   │   │   ├── mod.rs
    │   │   │   ├── error.rs
    │   │   │   └── node.rs
    │   │   ├── terminalclass.rs
    │   │   ├── nonterminal.rs
    │   │   ├── mod.rs
    │   │   ├── data_stack.rs
    │   │   └── state.rs
    │   ├── builder
    │   │   ├── error.rs
    │   │   ├── mod.rs
    │   │   ├── state.rs
    │   │   └── diags.rs
    │   ├── hash.rs
    │   ├── backtrace.rs
    │   ├── location.rs
    │   ├── lib.rs
    │   ├── token.rs
    │   ├── rule.rs
    │   └── tree.rs
    └── Cargo.toml
├── Cargo.toml
├── rusty_lr_buildscript
    ├── src
    │   ├── output.rs
    │   ├── utils.rs
    │   └── split.rs
    └── Cargo.toml
├── .gitignore
├── rusty_lr_derive
    ├── Cargo.toml
    └── src
    │   └── lib.rs
├── rusty_lr_executable
    ├── Cargo.toml
    ├── src
    │   ├── arg.rs
    │   └── main.rs
    └── README.md
├── rusty_lr
    ├── Cargo.toml
    └── src
    │   └── lib.rs
├── LICENSE-MIT
├── scripts
    └── bootstrap_test.sh
├── GLR.md
├── LICENSE-APACHE
└── README.md


/rustfmt.toml:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/error1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/error1.png


--------------------------------------------------------------------------------
/images/error2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/error2.png


--------------------------------------------------------------------------------
/images/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/title.png


--------------------------------------------------------------------------------
/images/tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/tree.png


--------------------------------------------------------------------------------
/images/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/example1.png


--------------------------------------------------------------------------------
/images/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/example2.png


--------------------------------------------------------------------------------
/images/optimize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/optimize.png


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: ehwan
4 | 


--------------------------------------------------------------------------------
/example/json/src/parser_expanded.rs:
--------------------------------------------------------------------------------
1 | include!(concat!(env!("OUT_DIR"), "/parser.rs"));
2 | 


--------------------------------------------------------------------------------
/images/parse_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/parse_error.png


--------------------------------------------------------------------------------
/example/calculator/src/parser_expanded.rs:
--------------------------------------------------------------------------------
1 | include!(concat!(env!("OUT_DIR"), "/parser.rs"));
2 | 


--------------------------------------------------------------------------------
/example/calculator_u8/src/parser_expanded.rs:
--------------------------------------------------------------------------------
1 | include!(concat!(env!("OUT_DIR"), "/parser.rs"));
2 | 


--------------------------------------------------------------------------------
/images/state_option.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/state_option.png


--------------------------------------------------------------------------------
/rusty_lr_parser/src/parser/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod args;
2 | pub mod lexer;
3 | pub mod parser_expanded;
4 | pub mod span_pair;
5 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/deterministic/mod.rs:
--------------------------------------------------------------------------------
1 | mod context;
2 | mod error;
3 | 
4 | pub use context::Context;
5 | pub use error::ParseError;
6 | 


--------------------------------------------------------------------------------
/example/glr/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "glr"
3 | version = "0.1.0"
4 | edition = "2021"
5 | 
6 | [dependencies]
7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] }
8 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/nondeterministic/mod.rs:
--------------------------------------------------------------------------------
1 | mod context;
2 | mod error;
3 | mod node;
4 | 
5 | pub use context::Context;
6 | pub use error::ParseError;
7 | pub use node::Node;
8 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Please run `scripts/bootstrap_test.sh` and push all of the changes it make before making the PR.
2 | 
3 | This will run brief tests, and make it easier to look at what part of generated code has been changed.
4 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/builder/error.rs:
--------------------------------------------------------------------------------
 1 | /// Error type for building grammar
 2 | #[derive(Debug, Clone, PartialEq, Eq)]
 3 | pub enum BuildError<Term, NonTerm> {
 4 |     RuleNotFound(NonTerm),
 5 | 
 6 |     NoAugmented,
 7 | 
 8 |     __PhantomData__(Term),
 9 | }
10 | 


--------------------------------------------------------------------------------
/example/json/build.rs:
--------------------------------------------------------------------------------
1 | use rusty_lr::build;
2 | 
3 | fn main() {
4 |     println!("cargo::rerun-if-changed=src/parser.rs");
5 |     let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap());
6 | 
7 |     build::Builder::new().file("src/parser.rs").build(&output);
8 | }
9 | 


--------------------------------------------------------------------------------
/example/calculator/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calculator"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | rusty_lr = { path = "../../rusty_lr" }
 8 | 
 9 | 
10 | [build-dependencies]
11 | rusty_lr = { path = "../../rusty_lr", features = ["build"] }
12 | 


--------------------------------------------------------------------------------
/example/calculator/build.rs:
--------------------------------------------------------------------------------
1 | use rusty_lr::build;
2 | 
3 | fn main() {
4 |     println!("cargo::rerun-if-changed=src/parser.rs");
5 |     let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap());
6 | 
7 |     build::Builder::new().file("src/parser.rs").build(&output);
8 | }
9 | 


--------------------------------------------------------------------------------
/example/json/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "json"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | 
 6 | [dependencies]
 7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] }
 8 | 
 9 | [build-dependencies]
10 | rusty_lr = { path = "../../rusty_lr", features = ["build"] }
11 | 


--------------------------------------------------------------------------------
/example/calculator_u8/build.rs:
--------------------------------------------------------------------------------
1 | use rusty_lr::build;
2 | 
3 | fn main() {
4 |     println!("cargo::rerun-if-changed=src/parser.rs");
5 |     let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap());
6 | 
7 |     build::Builder::new().file("src/parser.rs").build(&output);
8 | }
9 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |   "rusty_lr",
 4 |   "rusty_lr_core",
 5 |   "rusty_lr_derive",
 6 |   "rusty_lr_parser",
 7 |   "rusty_lr_buildscript",
 8 |   "rusty_lr_executable",
 9 |   "example/calculator",
10 |   "example/calculator_u8",
11 |   "example/glr",
12 |   "example/json",
13 | ]
14 | 


--------------------------------------------------------------------------------
/example/calculator_u8/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calculator_u8"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] }
 8 | 
 9 | 
10 | [build-dependencies]
11 | rusty_lr = { path = "../../rusty_lr", features = ["build"] }
12 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/hash.rs:
--------------------------------------------------------------------------------
 1 | // #[cfg(feature = "fxhash")]
 2 | pub use rustc_hash::FxHashMap as HashMap;
 3 | 
 4 | // #[cfg(feature = "fxhash")]
 5 | pub use rustc_hash::FxHashSet as HashSet;
 6 | 
 7 | // #[cfg(not(feature = "fxhash"))]
 8 | // pub use std::collections::HashMap;
 9 | 
10 | // #[cfg(not(feature = "fxhash"))]
11 | // pub use std::collections::HashSet;
12 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/builder/mod.rs:
--------------------------------------------------------------------------------
 1 | mod diags;
 2 | mod error;
 3 | mod grammar;
 4 | mod state;
 5 | 
 6 | pub use diags::DiagnosticCollector;
 7 | pub use error::BuildError;
 8 | pub use grammar::Grammar;
 9 | pub use state::State;
10 | 
11 | /// struct for output of parser building.
12 | pub struct States<Term, NonTerm> {
13 |     pub states: Vec<State<Term, NonTerm>>,
14 | }
15 | 


--------------------------------------------------------------------------------
/rusty_lr_buildscript/src/output.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::TokenStream;
 2 | 
 3 | pub struct Output {
 4 |     /// token stream before '%%'
 5 |     pub user_stream: TokenStream,
 6 |     /// token stream after '%%'
 7 |     pub generated_stream: TokenStream,
 8 |     /// debug comments attatched to the output file
 9 |     pub debug_comments: String,
10 | 
11 |     pub grammar: rusty_lr_parser::grammar::Grammar,
12 | }
13 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/terminalclass.rs:
--------------------------------------------------------------------------------
 1 | pub trait TerminalClass: Copy {
 2 |     type Term;
 3 | 
 4 |     const ERROR: Self;
 5 |     const EOF: Self;
 6 | 
 7 |     /// Gets the pretty name of this terminal class.
 8 |     fn as_str(&self) -> &'static str;
 9 | 
10 |     /// Converts this terminal class to a usize
11 |     fn to_usize(&self) -> usize;
12 | 
13 |     fn from_term(term: &Self::Term) -> Self;
14 | 
15 |     fn precedence(&self) -> crate::parser::Precedence;
16 | }
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 8 | Cargo.lock
 9 | 
10 | # These are backup files generated by rustfmt
11 | **/*.rs.bk
12 | 
13 | # MSVC Windows builds of rustc generate these, which store debugging information
14 | *.pdb
15 | 
16 | **/out.tab.rs


--------------------------------------------------------------------------------
/rusty_lr_core/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_lr_core"
 3 | version = "3.39.1"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "core library for rusty_lr"
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing"]
11 | 
12 | [dependencies]
13 | rustc-hash = "2.1"
14 | termtree = { version = "0.5", optional = true }
15 | arrayvec = "0.7"
16 | 
17 | [features]
18 | default = []
19 | builder = []
20 | tree = ["dep:termtree"]
21 | 


--------------------------------------------------------------------------------
/rusty_lr_derive/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_lr_derive"
 3 | version = "2.42.0"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "proc-macro definitions for rusty_lr"
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing"]
11 | 
12 | [lib]
13 | proc-macro = true
14 | 
15 | [dependencies]
16 | proc-macro2 = "1.0.86"
17 | rusty_lr_parser = { version = "3.63.0", path = "../rusty_lr_parser" }
18 | quote = "1.0"
19 | 
20 | 
21 | [features]
22 | default = []
23 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_lr_parser"
 3 | version = "3.63.1"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "grammar line parser for rusty_lr"
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing"]
11 | 
12 | [dependencies]
13 | proc-macro2 = "1.0.86"
14 | quote = "1.0"
15 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core", features = [
16 |     "builder",
17 | ] }
18 | syn = { version = "2.0", features = ["extra-traits"] }
19 | 
20 | [features]
21 | default = []
22 | 


--------------------------------------------------------------------------------
/.github/workflows/bootstrap-test.yml:
--------------------------------------------------------------------------------
 1 | name: Bootstrap Test
 2 | description: Tests parser bootstrapping with different configurations to ensure output consistency
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches: [ main ]
 7 |   push:
 8 |     branches: [ main ]
 9 | 
10 | jobs:
11 |   bootstrap-test:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - name: Checkout code
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Run bootstrap test
19 |       run: |
20 |         cd scripts
21 |         chmod +x ./bootstrap_test.sh
22 |         ./bootstrap_test.sh true
23 |         if [ $? -ne 0 ]; then
24 |             exit 1
25 |         fi
26 |       working-directory: ${{ github.workspace }}


--------------------------------------------------------------------------------
/rusty_lr_executable/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rustylr"
 3 | version = "1.27.0"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "Executable for rusty_lr, a Bison-like Parser generator & Compiler frontend framework for Rust generating IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing."
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing"]
11 | 
12 | [dependencies]
13 | clap = { version = "4.5.7", features = ["derive"] }
14 | rusty_lr_buildscript = { version = "0.61.0", path = "../rusty_lr_buildscript" }
15 | prettyplease = "0.2"
16 | syn = "2.0"
17 | 


--------------------------------------------------------------------------------
/example/glr/src/parser.rs:
--------------------------------------------------------------------------------
 1 | use rusty_lr::lr1;
 2 | 
 3 | lr1! {
 4 |     %err String;
 5 |     %glr;
 6 |     %tokentype char;
 7 |     %start E;
 8 | 
 9 |     WS0: ' '*;
10 | 
11 |     Digit(char): ch=['0'-'9'] { ch };
12 | 
13 |     Number(i32): WS0 Digit+ WS0 { Digit.into_iter().collect::<String>().parse().unwrap() };
14 | 
15 |     E(i32): E '+' e2=E {
16 |         match lookahead.to_term() {
17 |             Some('*') => {
18 |                 return Err("".to_string());
19 |             }
20 |             _ => {
21 |                 *shift = false;
22 |                 E + e2
23 |             }
24 |         }
25 |     }
26 |     | E '*' e2=E {
27 |         *shift = false;
28 |         E * e2
29 |     }
30 |     | Number
31 |     ;
32 | }
33 | 


--------------------------------------------------------------------------------
/rusty_lr_buildscript/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_lr_buildscript"
 3 | version = "0.61.0"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "buildscipt tools for rusty_lr"
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing"]
11 | 
12 | 
13 | [dependencies]
14 | rusty_lr_parser = { version = "3.63.0", path = "../rusty_lr_parser" }
15 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core", features = [
16 |     "builder",
17 | ] }
18 | codespan-reporting = "0.12"
19 | proc-macro2 = { version = "1.0.86", features = ["span-locations"] }
20 | quote = "1.0"
21 | 
22 | [features]
23 | default = []
24 | 


--------------------------------------------------------------------------------
/rusty_lr_buildscript/src/utils.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::Span;
 2 | use proc_macro2::TokenStream;
 3 | 
 4 | use std::ops::Range;
 5 | 
 6 | pub fn tokenstream_range(stream: TokenStream) -> Range<usize> {
 7 |     if stream.is_empty() {
 8 |         return 0..0;
 9 |     }
10 |     let mut stream = stream.into_iter();
11 |     let first = stream.next().unwrap().span().byte_range();
12 |     let last = if let Some(last) = stream.last() {
13 |         last.span().byte_range()
14 |     } else {
15 |         first.clone()
16 |     };
17 | 
18 |     first.start..last.end
19 | }
20 | pub fn span_stream_range(span: Span, stream: TokenStream) -> Range<usize> {
21 |     let stream_range = tokenstream_range(stream);
22 |     span.byte_range().start..stream_range.end
23 | }
24 | 


--------------------------------------------------------------------------------
/example/calculator_u8/src/parser.rs:
--------------------------------------------------------------------------------
 1 | %%
 2 | 
 3 | %userdata i32;
 4 | %tokentype char;
 5 | %start E;
 6 | 
 7 | %left '+';
 8 | %left '*';
 9 | %precedence UMINUS;
10 | 
11 | WS0: ' '*;
12 | 
13 | Digit(char): ['6'-'9'] | "0" {'0'} | '1' | '2' | '3' | '4' | '5';
14 | 
15 | Number(i32): WS0 Digit+ WS0 { Digit.into_iter().collect::<String>().parse().unwrap() };
16 | 
17 | P(f32): Number { Number as f32 }
18 | | WS0 '(' E ')' WS0 { E }
19 | ;
20 | 
21 | E(f32) : E Op e2=E %prec Op {
22 |     *data += 1; // access userdata by `data`
23 |     println!( "{:?} {:?} {:?}", E, Op, e2 );
24 |     match Op {
25 |         '+' => E + e2,
26 |         '*' => E * e2,
27 |         _ => panic!("Unknown operator: {:?}", Op),
28 |     }
29 | }
30 | | WS0 '-' E %prec UMINUS {
31 |     -E
32 | }
33 | | P
34 | ;
35 | 
36 | Op(char): '+' | '*' ;
37 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Macro line parser for Rusty LR.
 2 | //!
 3 | //! This crate is private and not intended to be used directly.
 4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead.
 5 | 
 6 | pub mod emit;
 7 | pub mod error;
 8 | pub mod grammar;
 9 | pub(crate) mod nonterminal_info;
10 | pub(crate) mod parser;
11 | pub mod partition;
12 | pub(crate) mod pattern;
13 | pub mod rangeresolver;
14 | pub mod terminal_info;
15 | pub(crate) mod terminalset;
16 | pub(crate) mod token;
17 | pub mod utils;
18 | 
19 | /// This, `rusty_lr_parser` is designed to generate a code, that will be relied on `rusty_lr`.
20 | ///
21 | /// Gets the version of the rusty_lr_core crate that current crate is targeting.
22 | /// If the version is not matched, there will be a compile-time error.
23 | pub fn target_rusty_lr_version() -> (usize, usize, usize) {
24 |     (3, 34, 0)
25 | }
26 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/partition.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::BTreeMap;
 2 | 
 3 | /// return {setids} -> {values} map
 4 | pub fn minimal_partition<T>(
 5 |     sets: impl Iterator<Item = impl Iterator<Item = T>>,
 6 | ) -> BTreeMap<Vec<usize>, Vec<T>>
 7 | where
 8 |     T: Ord,
 9 | {
10 |     let mut val_setids_map: BTreeMap<T, _> = Default::default();
11 |     for (set_id, val_set) in sets.enumerate() {
12 |         for val in val_set {
13 |             val_setids_map
14 |                 .entry(val)
15 |                 .or_insert_with(Vec::new)
16 |                 .push(set_id);
17 |         }
18 |     }
19 | 
20 |     let mut setids_val_map: BTreeMap<_, Vec<T>> = Default::default();
21 |     for (val, setids) in val_setids_map {
22 |         setids_val_map
23 |             .entry(setids)
24 |             .or_insert_with(Vec::new)
25 |             .push(val);
26 |     }
27 | 
28 |     setids_val_map
29 | }
30 | 


--------------------------------------------------------------------------------
/rusty_lr/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_lr"
 3 | version = "3.34.0"
 4 | edition = "2021"
 5 | license = "MIT OR Apache-2.0"
 6 | description = "A Bison-like Parser generator & Compiler frontend framework for Rust generating IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing."
 7 | repository = "https://github.com/ehwan/RustyLR"
 8 | readme = "../README.md"
 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"]
10 | categories = ["parsing", "compilers", "parser-implementations"]
11 | 
12 | [dependencies]
13 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core" }
14 | rusty_lr_derive = { version = "2.42.0", path = "../rusty_lr_derive", optional = true }
15 | rusty_lr_buildscript = { version = "0.61.0", path = "../rusty_lr_buildscript", optional = true }
16 | 
17 | [features]
18 | default = ["derive"]
19 | build = ["dep:rusty_lr_buildscript"]
20 | derive = ["dep:rusty_lr_derive"]
21 | tree = ["rusty_lr_core/tree"]
22 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/rusty_lr_buildscript/src/split.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::Spacing;
 2 | use proc_macro2::TokenStream;
 3 | use proc_macro2::TokenTree;
 4 | use quote::TokenStreamExt;
 5 | 
 6 | // split stream by '%%'
 7 | pub fn split_stream(token_stream: TokenStream) -> Result<(TokenStream, TokenStream), ()> {
 8 |     // input stream
 9 |     let mut token_stream = token_stream.into_iter().peekable();
10 | 
11 |     // before '%%'
12 |     let mut output_stream = TokenStream::new();
13 | 
14 |     while let Some(token) = token_stream.next() {
15 |         if let TokenTree::Punct(token) = &token {
16 |             if token.as_char() == '%' && token.spacing() == Spacing::Joint {
17 |                 if let Some(TokenTree::Punct(next)) = token_stream.peek() {
18 |                     if next.as_char() == '%' && next.spacing() == Spacing::Alone {
19 |                         token_stream.next();
20 |                         let macro_stream: TokenStream = token_stream.collect();
21 |                         return Ok((output_stream, macro_stream));
22 |                     }
23 |                 }
24 |             }
25 |         }
26 |         output_stream.append(token);
27 |     }
28 | 
29 |     Err(())
30 | }
31 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/token.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::{Ident, Span};
 2 | 
 3 | /// for syntax <Ident> '=' <Token>
 4 | #[derive(Debug, Clone)]
 5 | pub struct TokenMapped {
 6 |     /// terminal or non-terminal name
 7 |     pub token: rusty_lr_core::Token<rusty_lr_core::TerminalSymbol<usize>, usize>,
 8 | 
 9 |     /// variable name that the token's data will be mapped to
10 |     pub mapto: Option<Ident>,
11 | 
12 |     /// When optimizing out single-token production rules like A -> B { ... },
13 |     /// All occurrences of A will be replaced with B, but the reduce action `{ ... }` still needs to be called.
14 |     ///
15 |     /// Say we have production rule X -> a b A c d { ... },
16 |     /// and A is replaced with B, then X -> a b B c d { ... } is the new rule,
17 |     /// but we still need to call A's reduce action before calling X's reduce action.
18 |     /// So basically we need to keep a chain of reduce actions to be called.
19 |     ///
20 |     /// This token data has to be r_n( r_n-1( ... r_0(token) ... ) )
21 |     /// where r_i is the i'th reduce action in the chain.
22 |     pub reduce_action_chains: Vec<usize>,
23 | 
24 |     /// span of the token
25 |     pub begin_span: Span,
26 |     pub end_span: Span,
27 | }
28 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/utils.rs:
--------------------------------------------------------------------------------
 1 | // constants and utility functions for macro-generation
 2 | 
 3 | use super::error::ParseError;
 4 | use proc_macro2::Ident;
 5 | use quote::format_ident;
 6 | 
 7 | pub static AUGMENTED_NAME: &str = "Augmented";
 8 | pub static EOF_NAME: &str = "eof";
 9 | pub static ERROR_NAME: &str = "error";
10 | pub static USER_DATA_PARAMETER_NAME: &str = "data";
11 | pub static TERMINAL_STACK_NAME: &str = "__rustylr_generated_terminal_stack";
12 | pub static OTHERS_TERMINAL_NAME: &str = "__rustylr_other_terminals";
13 | pub static LOOKAHEAD_PARAMETER_NAME: &str = "lookahead";
14 | 
15 | /// check if the given identifier is reserved name
16 | pub(crate) fn check_reserved_name(ident: &Ident) -> Result<(), ParseError> {
17 |     if ident == AUGMENTED_NAME {
18 |         return Err(ParseError::ReservedName(ident.clone()));
19 |     }
20 |     if ident == EOF_NAME {
21 |         return Err(ParseError::ReservedName(ident.clone()));
22 |     }
23 |     if ident == ERROR_NAME {
24 |         return Err(ParseError::ReservedName(ident.clone()));
25 |     }
26 |     Ok(())
27 | }
28 | 
29 | pub(crate) fn location_variable_name(varname: &Ident) -> Ident {
30 |     format_ident!("__rustylr_location_{}", varname)
31 | }
32 | 


--------------------------------------------------------------------------------
/example/calculator_u8/src/main.rs:
--------------------------------------------------------------------------------
 1 | pub mod parser_expanded;
 2 | use parser_expanded as parser;
 3 | 
 4 | fn main() {
 5 |     let input = "  1 +  -20 *   (3 + 4 )   ";
 6 | 
 7 |     let parser = parser::EParser::new();
 8 |     let mut context = parser::EContext::new();
 9 |     let mut userdata: i32 = 0;
10 |     for b in input.chars() {
11 |         match context.feed(&parser, b, &mut userdata) {
12 |             // feed userdata here
13 |             Ok(_) => {}
14 |             Err(e) => {
15 |                 eprintln!("error: {:?}", e);
16 |                 return;
17 |             }
18 |         }
19 |     }
20 |     println!("{:?}", context);
21 | 
22 |     let result = context.accept(&parser, &mut userdata).unwrap(); // get value of start 'E'
23 |     println!("result: {}", result);
24 |     println!("userdata: {}", userdata);
25 | 
26 |     // invalid input, expect error
27 |     let error_input = "1+2**(3+4)";
28 |     let mut context = parser::EContext::new();
29 |     let mut userdata: i32 = 0;
30 |     for b in error_input.chars() {
31 |         match context.feed(&parser, b, &mut userdata) {
32 |             // feed userdata here
33 |             Ok(_) => {}
34 |             Err(e) => {
35 |                 // this will print error messages
36 |                 eprintln!("error: {:?}", e);
37 | 
38 |                 // eprintln!("{:?}", context.backtrace(&parser));
39 |                 return;
40 |             }
41 |         }
42 |     }
43 |     context.feed(&parser, 0 as char, &mut userdata).unwrap(); // feed EOF
44 | }
45 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/parser/span_pair.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::Span;
 2 | 
 3 | /// type for %location for each token
 4 | /// since `Span::join()` is only for nightly,
 5 | /// we collect the first and last span pair of the token in the parsing tree.
 6 | #[derive(Clone, Debug, Copy)]
 7 | pub struct SpanPair {
 8 |     /// `None` if this is a zero-length span
 9 |     pub pair: Option<(Span, Span)>,
10 | }
11 | impl Default for SpanPair {
12 |     fn default() -> Self {
13 |         SpanPair { pair: None }
14 |     }
15 | }
16 | impl SpanPair {
17 |     pub fn new_single(span: Span) -> Self {
18 |         SpanPair {
19 |             pair: Some((span, span)),
20 |         }
21 |     }
22 |     pub fn span(&self) -> Span {
23 |         self.pair
24 |             .as_ref()
25 |             .map_or(Span::call_site(), |(first, last)| {
26 |                 if let Some(joined) = first.join(*last) {
27 |                     joined
28 |                 } else {
29 |                     *first
30 |                 }
31 |             })
32 |     }
33 | }
34 | impl rusty_lr_core::Location for SpanPair {
35 |     fn new<'a>(stack: impl Iterator<Item = &'a Self> + Clone, len: usize) -> Self
36 |     where
37 |         Self: 'a,
38 |     {
39 |         let mut take = stack.take(len).filter_map(|x| x.pair);
40 |         let pair = if let Some(last) = take.next() {
41 |             let first = take.last().unwrap_or(last);
42 | 
43 |             Some((first.0, last.1))
44 |         } else {
45 |             None
46 |         };
47 |         SpanPair { pair }
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/example/calculator/src/main.rs:
--------------------------------------------------------------------------------
 1 | mod parser_expanded;
 2 | 
 3 | use parser_expanded as parser;
 4 | 
 5 | fn main() {
 6 |     use parser::Token;
 7 |     let input = vec![
 8 |         Token::Num(1),
 9 |         Token::Plus,
10 |         Token::Num(2),
11 |         Token::Star,
12 |         Token::LParen,
13 |         Token::Num(3),
14 |         Token::Plus,
15 |         Token::Num(4),
16 |         Token::RParen,
17 |     ];
18 | 
19 |     let parser = parser::EParser::new();
20 |     let mut context = parser::EContext::new();
21 |     let mut userdata: i32 = 0;
22 |     for token in input {
23 |         match context.feed(&parser, token, &mut userdata) {
24 |             //                          ^^^^^   ^^^^^^^^^^^^ userdata passed here as `&mut i32`
25 |             //                           |- feed token
26 |             Ok(_) => {}
27 |             Err(e) => {
28 |                 println!("{:?}", e);
29 |                 return;
30 |             }
31 |         }
32 | 
33 |         let (terms, nonterms) = context.expected_token_str(&parser);
34 |         let terms = terms.map(String::from).collect::<Vec<_>>().join(", ");
35 |         let nonterms = nonterms.map(String::from).collect::<Vec<_>>().join(", ");
36 |         println!(
37 |             "Expected tokens: [{}], non-terminals: [{}]",
38 |             terms, nonterms
39 |         );
40 |     }
41 |     // res = value of start symbol ( E(i32) )
42 |     let res = context.accept(&parser, &mut userdata).unwrap();
43 |     println!("{}", res);
44 |     println!("userdata: {}", userdata);
45 | }
46 | 


--------------------------------------------------------------------------------
/rusty_lr/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! # rusty_lr
 2 | //! ***A Bison-like, parser generator for Rust supporting IELR(1), LALR(1) parser tables, with deterministic LR and
 3 | //! non-deterministic LR (GLR) parsing strategies.***
 4 | //!
 5 | //! RustyLR is a parser generator that converts context-free grammars into IELR(1)/LALR(1) tables with deterministic LR and non-deterministic GLR parsing strategies. It supports custom reduce action in Rust, with beautiful diagnostics.
 6 | //! Highly inspired by tools like *bison*, it uses a similar syntax while integrating seamlessly with Rust's ecosystem.
 7 | //! It constructs optimized state machine, ensuring efficient and reliable parsing.
 8 | //!
 9 | //! ## Features
10 | //!  - **Custom Reduce Actions:** Define custom actions in Rust, allowing you to build into custom data structures easily.
11 | //!  - **Automatic Optimization:**: Reduces parser table size and improves performance by grouping terminals with identical behavior across parser states.
12 | //!  - **Multiple Parsing Strategies:** Supports minimal-LR(1), LALR(1) parser table, and GLR parsing strategy.
13 | //!  - **Detailed Diagnostics:** Detect grammar conflicts, verbose conflicts resolving stages, and optimization stages.
14 | //!  - **Location Tracking:** Track the location of every tokens in the parse tree, useful for error reporting and debugging.
15 | //!
16 | // re-exports
17 | 
18 | pub use rusty_lr_core::*;
19 | pub use rusty_lr_derive::*;
20 | 
21 | /// tools for build.rs
22 | #[cfg(feature = "build")]
23 | pub mod build {
24 |     pub use rusty_lr_buildscript::*;
25 | }
26 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/backtrace.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | use std::fmt::Display;
 3 | 
 4 | /// Backtracing information for parsing context.
 5 | /// What current parser was trying to parse, and what rules were applied.
 6 | #[derive(Clone)]
 7 | pub struct Backtrace<Term, NonTerm> {
 8 |     /// 0'th element is the current parsing state, and through the backtrace, it goes to the initial state.
 9 |     pub traces: Vec<Vec<crate::rule::ShiftedRule<Term, NonTerm>>>,
10 | }
11 | 
12 | impl<Term: Display, NonTerm: Display> Display for Backtrace<Term, NonTerm> {
13 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14 |         for (idx, rules) in self.traces.iter().enumerate() {
15 |             if idx == 0 {
16 |                 writeln!(f, "Trying to parse:")?;
17 |             } else {
18 |                 writeln!(f, "Backtrace:")?;
19 |             }
20 |             for rule in rules {
21 |                 writeln!(f, "\t>>> {}", rule)?;
22 |             }
23 |         }
24 |         Ok(())
25 |     }
26 | }
27 | impl<Term: Debug, NonTerm: Debug> Debug for Backtrace<Term, NonTerm> {
28 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 |         for (idx, rules) in self.traces.iter().enumerate() {
30 |             if idx == 0 {
31 |                 writeln!(f, "Trying to parse:")?;
32 |             } else {
33 |                 writeln!(f, "Backtrace:")?;
34 |             }
35 |             for rule in rules {
36 |                 writeln!(f, "\t>>> {:?}", rule)?;
37 |             }
38 |         }
39 |         Ok(())
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/location.rs:
--------------------------------------------------------------------------------
 1 | /// trait for token location in the input stream
 2 | pub trait Location: Clone {
 3 |     /// Create a new merged location from `len` elements in the stack.
 4 |     /// The `stack` iterator should yield child locations in reverse order.
 5 |     /// That is, if the input [a, b, c, ..., z] is fed and `len` is 3,
 6 |     /// `stack.next()` will yield `z`, then `y`, then `x`,
 7 |     /// and this function should return the merged location of `[x, y, z]`.
 8 |     fn new<'a>(stack: impl Iterator<Item = &'a Self> + Clone, len: usize) -> Self
 9 |     where
10 |         Self: 'a;
11 | }
12 | 
13 | /// Default location type that does nothing.
14 | #[derive(Clone, Default, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
15 | pub struct DefaultLocation;
16 | impl Location for DefaultLocation {
17 |     fn new<'a>(_stack: impl Iterator<Item = &'a Self> + Clone, _len: usize) -> Self {
18 |         DefaultLocation
19 |     }
20 | }
21 | 
22 | impl<T> Location for std::ops::Range<T>
23 | where
24 |     T: Clone + Default + Ord,
25 | {
26 |     fn new<'a>(mut stack: impl Iterator<Item = &'a Self> + Clone, len: usize) -> Self
27 |     where
28 |         Self: 'a,
29 |     {
30 |         if len == 0 {
31 |             if let Some(last) = stack.next() {
32 |                 let end = last.end.clone();
33 |                 end.clone()..end
34 |             } else {
35 |                 T::default()..T::default()
36 |             }
37 |         } else {
38 |             let mut stack = stack.take(len);
39 |             let last = stack.next().unwrap();
40 |             let first = stack.last().unwrap_or(last);
41 |             first.start.clone()..last.end.clone()
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/nondeterministic/error.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | use std::fmt::Display;
 3 | 
 4 | /// Error type for feed()
 5 | #[derive(Clone, Debug)]
 6 | pub struct ParseError<Term, Location, ReduceActionError> {
 7 |     /// The terminal symbol that caused the error.
 8 |     pub term: crate::TerminalSymbol<Term>,
 9 |     /// Location of the terminal symbol.
10 |     /// location will be `None` if the terminal was eof.
11 |     pub location: Option<Location>,
12 |     /// Error from reduce action (from every diverged paths)
13 |     pub reduce_action_errors: Vec<ReduceActionError>,
14 |     /// Rule indices when shift/reduce conflict occur with no shift/reduce precedence defined.
15 |     /// This is same as when setting %nonassoc in Bison.
16 |     pub no_precedences: Vec<usize>,
17 | 
18 |     /// States when the error occurred (from all diverged paths)
19 |     pub(crate) states: Vec<usize>,
20 | }
21 | 
22 | impl<Term, Location, ReduceActionError> ParseError<Term, Location, ReduceActionError> {
23 |     /// location will be `None` if the terminal was eof.
24 |     pub fn location(&self) -> &Option<Location> {
25 |         &self.location
26 |     }
27 |     pub fn term(&self) -> &crate::TerminalSymbol<Term> {
28 |         &self.term
29 |     }
30 |     /// States when the error occurred (from all diverged paths)
31 |     pub fn states(&self) -> impl Iterator<Item = usize> + '_ {
32 |         self.states.iter().copied()
33 |     }
34 | }
35 | 
36 | impl<Term, Location, ReduceActionError> Display for ParseError<Term, Location, ReduceActionError>
37 | where
38 |     Term: Display,
39 | {
40 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 |         write!(f, "ParseError: {}, States: {:?}", self.term, self.states)
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/example/json/src/parser.rs:
--------------------------------------------------------------------------------
 1 | // https://www.json.org/json-en.html
 2 | 
 3 | %% 
 4 | 
 5 | %tokentype char;
 6 | %start Json;
 7 | %dense;
 8 | %location std::ops::Range<usize>;
 9 | 
10 | Json: Element;
11 | 
12 | Value: Object
13 |      | Array
14 |      | String
15 |      | Number
16 |      | "true"
17 |      | "false"
18 |      | "null"
19 |      ;
20 | 
21 | Object: '{' WS '}'
22 |       | '{' Members '}'
23 |       | '{' error '}' { 
24 |         // @error is Range<usize> at this point
25 |         let start = @error.start;
26 |         let end = @error.end;
27 |         println!("Error recovered with '}}' at {start}..{end}"); 
28 |       }
29 |       ;
30 | 
31 | Members: Member
32 |        | Member ',' Members
33 |        ;
34 | 
35 | Member: WS String WS ':' Element;
36 | 
37 | Array: '[' Elements ']'
38 |      ;
39 | 
40 | Elements: $sep(Element, ',', *)
41 |         ;
42 | 
43 | Element: WS Value WS;
44 | 
45 | String: '"' Characters '"';
46 | 
47 | Characters: Character*;
48 | 
49 | // WIP
50 | Character:
51 |           '\\' Escape
52 |         | ['\u{0020}'-'\u{10FFFF}'] - '"' - '\\'
53 |         ;
54 | 
55 | Escape: '"'
56 |       | '\\'
57 |       | '/'
58 |       | 'b'
59 |       | 'f'
60 |       | 'n'
61 |       | 'r'
62 |       | 't'
63 |       | 'u' Hex Hex Hex Hex
64 |       ;
65 | 
66 | Hex: Digit
67 |    | ['A'-'F']
68 |    | ['a'-'f']
69 |    ;
70 | 
71 | Number: Integer Fraction Exponent;
72 | 
73 | Integer: Digit
74 |        | Onenine Digits
75 |        | '-' Digit
76 |        | '-' Onenine Digits
77 |        ;
78 | 
79 | Digits: Digit+;
80 | 
81 | Digit: ['0'-'9'];
82 | 
83 | Onenine: ['1'-'9'];
84 | 
85 | Fraction: ('.' Digits)?;
86 | 
87 | Exponent: ""
88 |         | 'E' Sign Digits
89 |         | 'e' Sign Digits
90 |         ;
91 | 
92 | Sign: "" | '+' | '-';
93 | 
94 | WS: ""
95 |   | '\u{0020}' WS
96 |   | '\u{000A}' WS
97 |   | '\u{000D}' WS
98 |   | '\u{0009}' WS
99 |   ;


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/nonterminal.rs:
--------------------------------------------------------------------------------
 1 | /// A struct to hold information about non-terminal symbols
 2 | pub trait NonTerminal: Copy {
 3 |     /// Whether this non-terminal is auto-generated by rustylr.
 4 |     /// Some non-terminals could be auto-generated to handle regex patterns, character sets, etc.
 5 |     fn is_auto_generated(&self) -> bool {
 6 |         self.nonterm_type().is_some()
 7 |     }
 8 |     /// Augmented rule will be generated for entry point of the grammar.
 9 |     fn is_augmented(&self) -> bool {
10 |         self.nonterm_type() == Some(NonTerminalType::Augmented)
11 |     }
12 |     /// whether this non-terminal is set as %trace
13 |     fn is_trace(&self) -> bool;
14 | 
15 |     /// for internal use only;
16 |     /// If this non-terminal is auto-generated, gets the pattern where this non-terminal was generated from.
17 |     fn nonterm_type(&self) -> Option<NonTerminalType>;
18 | 
19 |     /// Gets the pretty name of this non-terminal.
20 |     fn as_str(&self) -> &'static str;
21 | 
22 |     /// converts this non-terminal to a usize
23 |     fn to_usize(&self) -> usize;
24 | }
25 | 
26 | /// If the non-terminal is auto-generated,
27 | /// the pattern where this non-terminal was generated from.
28 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
29 | pub enum NonTerminalType {
30 |     /// zero or more repetitions
31 |     Star,
32 |     /// one or more repetitions with left recursion
33 |     PlusLeft,
34 |     /// one or more repetitions with right recursion
35 |     PlusRight,
36 |     /// zero or one repetition
37 |     Optional,
38 |     /// Augmented rule
39 |     Augmented,
40 |     /// error recovery non-terminal
41 |     Error,
42 | 
43 |     /// terminal set enclosed in brackets ( [a-zA-Z0-9] )
44 |     TerminalSet,
45 |     /// rule with explicit lookaheads
46 |     Lookahead,
47 | 
48 |     /// sequence of tokens enclosed in parentheses ( a B c ... )
49 |     Group,
50 | 
51 |     /// "abc" or b"abc"
52 |     LiteralString,
53 | }
54 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/mod.rs:
--------------------------------------------------------------------------------
 1 | /// Core parser functionality for deterministic parsers
 2 | pub mod deterministic;
 3 | 
 4 | /// Core parser functionality for non-deterministic parsers
 5 | pub mod nondeterministic;
 6 | 
 7 | pub mod data_stack;
 8 | 
 9 | /// module for auto-generated types of non-terminals representation
10 | pub mod nonterminal;
11 | 
12 | pub mod terminalclass;
13 | 
14 | pub mod state;
15 | pub use state::State;
16 | 
17 | #[derive(Clone, Copy)]
18 | pub struct Precedence(u8);
19 | 
20 | impl Precedence {
21 |     #[inline]
22 |     pub fn none() -> Self {
23 |         Precedence(u8::MAX)
24 |     }
25 |     #[inline]
26 |     pub fn new(level: u8) -> Self {
27 |         debug_assert!(level < u8::MAX);
28 |         Precedence(level)
29 |     }
30 |     #[inline]
31 |     pub fn is_some(&self) -> bool {
32 |         self.0 < u8::MAX
33 |     }
34 | 
35 |     pub fn unwrap(self) -> u8 {
36 |         debug_assert!(self.0 < u8::MAX);
37 |         self.0
38 |     }
39 | }
40 | 
41 | /// A trait for Parser that holds the entire parser table.
42 | /// This trait will be automatically implemented by rusty_lr
43 | pub trait Parser {
44 |     /// whether the `error` token was used in the grammar.
45 |     const ERROR_USED: bool;
46 | 
47 |     /// The type of terminal symbols.
48 |     type Term;
49 |     /// The type of terminal classes.
50 |     type TermClass: terminalclass::TerminalClass<Term = Self::Term>;
51 |     /// The type of non-terminal symbols.
52 |     type NonTerm: nonterminal::NonTerminal;
53 |     /// The type of the parser state.
54 |     type State: State<TermClass = Self::TermClass, NonTerm = Self::NonTerm>;
55 | 
56 |     /// Get list of production rules
57 |     fn get_rules(&self) -> &[crate::rule::ProductionRule<Self::TermClass, Self::NonTerm>];
58 |     /// Get list of states
59 |     fn get_states(&self) -> &[Self::State];
60 |     /// Get the type of precedence for i'th level.
61 |     /// `None` if i'th level was defined as %precedence (no reduce type).
62 |     fn precedence_types(&self, level: u8) -> Option<crate::rule::ReduceType>;
63 | }
64 | 


--------------------------------------------------------------------------------
/example/calculator/src/parser.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone, Copy)]
 2 | pub enum Token {
 3 |     Num(i32),
 4 |     Plus,
 5 |     Star,
 6 |     LParen,
 7 |     RParen,
 8 | }
 9 | 
10 | fn filter( term: &Token ) -> &Token {
11 |     term
12 | }
13 | 
14 | %%
15 | 
16 | // this define struct `EParser`
17 | // where 'E' is the start symbol
18 | 
19 | // lalr parser
20 | %lalr;
21 | 
22 | // type of userdata
23 | %userdata i32;
24 | // type of token ( as Terminal symbol )
25 | %tokentype Token;
26 | 
27 | // start symbol
28 | %start E;
29 | 
30 | // error type
31 | %err String;
32 | 
33 | // define tokens
34 | %token num Token::Num(_); // `num` maps to `Token::Num(0)`
35 | %token plus Token::Plus;
36 | %token star Token::Star;
37 | %token lparen Token::LParen;
38 | %token rparen Token::RParen;
39 | 
40 | // resolving shift/reduce conflict
41 | %left plus;
42 | %left star;
43 | 
44 | %filter filter;
45 | 
46 | // data that each token holds can be accessed by its name
47 | // s is slice of shifted terminal symbols captured by current rule
48 | // userdata can be accessed by `data` ( &mut i32, for this situation )
49 | A(i32) : A plus a2=A {
50 |         println!("{:?} {:?} {:?}", A, plus, a2 );
51 |         //                         ^    ^    ^
52 |         //                         |    |    |- value of 2nd 'A'
53 |         //                         |    |- Token
54 |         //                         |- value of 1st 'A'
55 |         *data += 1;
56 |         A + a2 // --> this will be new value of current 'A'
57 |     //  ^    ^
58 |     //  |    |- value of 2nd 'A'
59 |     //  |- value of 1st 'A'
60 |     }
61 |     | M
62 |     ;
63 | 
64 | M(i32) : M_optim star m2=M_optim { M_optim * m2 }
65 |     | P
66 |     ;
67 | 
68 | // check for single-rule optimization
69 | M_optim(i32): M { M * 1 };
70 | 
71 | P(i32) : num {
72 |     if let Token::Num(n) = num { n }
73 |     else { return Err(format!("{:?}", num)); }
74 |     //            ^^^^^^^^^^^^^^^^^^^^^^^^^^
75 |     //             reduce action returns Result<(), String>
76 | }
77 |     | lparen E rparen { E }
78 |     ;
79 | 
80 | E(i32) : A;
81 | 


--------------------------------------------------------------------------------
/rusty_lr_executable/src/arg.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | 
 3 | /// Converts a context-free grammar into a deterministic finite automaton (DFA) tables,
 4 | /// and generates a Rust code that can be used as a parser for that grammar.
 5 | ///
 6 | /// For usage of the generated code, please refer to the documentation of [`rusty_lr`](https://github.com/ehwan/RustyLR).
 7 | #[derive(Parser, Debug)]
 8 | #[command(version)]
 9 | #[command(about)]
10 | pub struct Args {
11 |     /// Input_file to read
12 |     pub input_file: String,
13 | 
14 |     /// Output_file to write
15 |     #[arg(default_value = "out.tab.rs")]
16 |     pub output_file: String,
17 | 
18 |     /// Do not rustfmt the output
19 |     #[arg(long, default_value = "false")]
20 |     pub no_format: bool,
21 | 
22 |     /// Do not print note information about any shift/reduce, reduce/reduce conflicts.
23 |     ///
24 |     /// If the target is deterministic parser, conflict will be treated as an error, so this option will be ignored.
25 |     /// This option is only for non-deterministic GLR parser.
26 |     #[arg(short = 'c', long, default_value = "false")]
27 |     pub no_conflict: bool,
28 | 
29 |     /// Do not print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive.
30 |     #[arg(short = 'r', long, default_value = "false")]
31 |     pub no_conflict_resolve: bool,
32 | 
33 |     /// Do not print debug information about optimization process.
34 |     #[arg(short = 'o', long, default_value = "false")]
35 |     pub no_optimization: bool,
36 | 
37 |     /// Do not print backtrace of production rules when conflicts occurred. ruleset could be messed up
38 |     #[arg(short = 'b', long, default_value = "false")]
39 |     pub no_backtrace: bool,
40 | 
41 |     /// Override the written code and set generated parser use GLR parsing algorithm
42 |     #[arg(long)]
43 |     pub glr: Option<bool>,
44 | 
45 |     /// Override the written code and set generated parser table to use dense arrays
46 |     #[arg(long)]
47 |     pub dense: Option<bool>,
48 | 
49 |     /// Print the details of a specific state
50 |     #[arg(long)]
51 |     pub state: Option<usize>,
52 | }
53 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Core module for the Rusty LR parser.
 2 | //!
 3 | //! This crate is private and not intended to be used directly.
 4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead.
 5 | 
 6 | /// FxHash for fast and non-cryptographic hashing
 7 | pub mod hash;
 8 | 
 9 | /// module for tree representation of parse results (feature `tree`).
10 | #[cfg(feature = "tree")]
11 | pub mod tree;
12 | 
13 | pub(crate) mod location;
14 | pub use location::DefaultLocation;
15 | pub use location::Location;
16 | 
17 | /// module for build parser tables from CFG, (feature "builder")
18 | #[cfg(feature = "builder")]
19 | pub mod builder;
20 | 
21 | /// module for core parser functionality
22 | pub mod parser;
23 | 
24 | /// module for production rules representation
25 | pub mod rule;
26 | 
27 | pub(crate) mod token;
28 | pub use token::TerminalSymbol;
29 | pub use token::Token;
30 | 
31 | pub(crate) mod backtrace;
32 | pub use backtrace::Backtrace;
33 | 
34 | /// Default error type for reduce action
35 | #[derive(Debug, Default)]
36 | pub struct DefaultReduceActionError;
37 | impl std::fmt::Display for DefaultReduceActionError {
38 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 |         write!(f, "Default reduce action error")
40 |     }
41 | }
42 | impl std::error::Error for DefaultReduceActionError {
43 |     fn cause(&self) -> Option<&dyn std::error::Error> {
44 |         None
45 |     }
46 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
47 |         None
48 |     }
49 |     fn description(&self) -> &str {
50 |         "Default reduce action error"
51 |     }
52 | }
53 | 
54 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
55 | pub enum TriState {
56 |     False,
57 |     Maybe,
58 |     True,
59 | }
60 | impl std::ops::BitOr for TriState {
61 |     type Output = Self;
62 |     fn bitor(self, rhs: Self) -> Self::Output {
63 |         match (self, rhs) {
64 |             (TriState::False, TriState::False) => TriState::False,
65 |             (TriState::False, _) => TriState::Maybe,
66 |             (_, TriState::False) => TriState::Maybe,
67 |             (TriState::True, TriState::True) => TriState::True,
68 |             _ => TriState::Maybe,
69 |         }
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/example/json/src/main.rs:
--------------------------------------------------------------------------------
 1 | mod parser_expanded;
 2 | 
 3 | use std::time::Instant;
 4 | 
 5 | use parser_expanded as parser;
 6 | use rusty_lr::parser::Parser;
 7 | 
 8 | const TEST_JSON: &'static str = r#"
 9 | {
10 |     "user": {
11 |         "id": 12345,
12 |         "name": "Anonymous",
13 |         "active": true,
14 |         "roles": ["engineer", "researcher"],
15 |         "profile": {
16 |             "email": "anonymous@example.com",
17 |             "location": "Seoul",
18 |             "social": {
19 |                 "github": "https://github.com/anonymous",
20 |                 "twitter": null
21 |             }
22 |         }
23 |     },
24 |     "projects": [
25 |         {
26 |             "id": "proj_001",
27 |             "name": "High-Speed Simulation Engine",
28 |             "description": "A tool used by scientists to simulate how air flows over objects like airplane wings or car bodies.",
29 |             aa
30 |             "technologies": ["High-performance computing", "Graphics cards"],
31 |             "team_size": 5
32 |         },
33 |         {
34 |             "id": "proj_002",
35 |             "name": "Smart Investment Assistant",
36 |             "description": "An automated system that studies price movements and suggests when to buy or sell stocks.",
37 |             "technologies": ["Artificial Intelligence", "Data Analysis"],
38 |             "team_size": 1,
39 |             "status": "experimental"
40 |         }
41 |     ],
42 |     "timestamp": "2025-04-18T16:00:00+09:00",
43 |     "metadata": null
44 | }
45 | "#;
46 | 
47 | fn main() {
48 |     let parser = parser::JsonParser::new();
49 |     println!("#rules: {}", parser.get_rules().len());
50 |     println!("#states: {}", parser.get_states().len());
51 | 
52 |     fn try_once(parser: &parser::JsonParser) {
53 |         let mut context = parser::JsonContext::new();
54 |         let mut range_start = 0;
55 |         for ch in TEST_JSON.chars() {
56 |             let range_end = range_start + ch.len_utf8();
57 |             context
58 |                 .feed_location(parser, ch, &mut (), range_start..range_end)
59 |                 .expect("Error parsing character");
60 |             range_start = range_end;
61 |         }
62 |     }
63 | 
64 |     let start = Instant::now();
65 |     for _ in 0..1000 {
66 |         try_once(&parser);
67 |     }
68 |     let duration = start.elapsed();
69 |     println!("Parsed 1000 times in {:?}", duration);
70 | }
71 | 


--------------------------------------------------------------------------------
/example/glr/src/main.rs:
--------------------------------------------------------------------------------
 1 | pub mod parser;
 2 | 
 3 | fn main() {
 4 |     let parser = parser::EParser::new();
 5 |     let mut context = parser::EContext::new();
 6 | 
 7 |     let input = "1+2*3+4";
 8 |     for ch in input.chars() {
 9 |         println!("feed: {}, possible: {}", ch, context.can_feed(&parser, &ch));
10 |         match context.feed(&parser, ch, &mut ()) {
11 |             Ok(_) => {
12 |                 println!("nodes: {}", context.len_paths());
13 |             }
14 |             Err(e) => {
15 |                 println!("Error: {}", e);
16 |                 return;
17 |             }
18 |         }
19 |         context.debug_check();
20 |     }
21 |     let result = match context.accept(&parser, &mut ()) {
22 |         Ok(mut results) => results.next().unwrap(),
23 |         Err(e) => {
24 |             println!("Error: {}", e);
25 |             return;
26 |         }
27 |     };
28 |     println!("Result: {}", result);
29 | 
30 |     let input = "1+2**3+4";
31 |     let mut context = parser::EContext::new();
32 |     for ch in input.chars() {
33 |         println!(
34 |             "feed: {}, can_feed(): {}",
35 |             ch,
36 |             context.can_feed(&parser, &ch)
37 |         );
38 |         match context.feed(&parser, ch, &mut ()) {
39 |             Ok(_) => {
40 |                 println!("nodes: {}", context.len_paths());
41 |             }
42 |             Err(e) => {
43 |                 println!("Error: {}", e);
44 |                 return;
45 |             }
46 |         }
47 |     }
48 | 
49 |     for result in context.accept(&parser, &mut ()).unwrap() {
50 |         println!("Result: {}", result);
51 |     }
52 | 
53 |     // for mut n in c.current_nodes.nodes.into_iter() {
54 |     //     loop {
55 |     //         println!("{}", n.state());
56 |     //         if let Some(par) = n.parent() {
57 |     //             n = std::rc::Rc::clone(par);
58 |     //         } else {
59 |     //             break;
60 |     //         }
61 |     //     }
62 |     //     println!("---");
63 |     // }
64 | }
65 | 
66 | #[test]
67 | fn test_parser() {
68 |     let parser = parser::EParser::new();
69 |     let mut context = parser::EContext::new();
70 |     let input1 = "  1 + 2 * 3 * 4 + 5 * 6 + 7 ";
71 |     for ch in input1.chars() {
72 |         context.feed(&parser, ch, &mut ()).unwrap();
73 |     }
74 | 
75 |     let answer = 1 + 2 * 3 * 4 + 5 * 6 + 7;
76 | 
77 |     let mut results = context
78 |         .accept(&parser, &mut ())
79 |         .unwrap()
80 |         .collect::<Vec<_>>();
81 |     results.sort();
82 |     assert_eq!(results.len(), 1);
83 |     assert_eq!(results, [answer]);
84 | }
85 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/data_stack.rs:
--------------------------------------------------------------------------------
 1 | /// A trait for data stack in the parser.
 2 | ///
 3 | /// Since each non-terminal could have different ruletypes,
 4 | /// this effectively handles those rule types into separated `Vec` stack,
 5 | /// instead of using enum of rule types (since it would be costful at memory aspects if the size differs significantly).
 6 | /// For people who is curious about the implementation details,
 7 | /// you should see the actual generated `DataStack` structs, like `GrammarDataStack` in `rusty_lr_parser/src/parser/parser_expanded.rs`.
 8 | pub trait DataStack: Sized + Default {
 9 |     /// Type for terminal symbols
10 |     type Term;
11 |     /// Type for non-terminal symbols - this must be enum type that was auto-generated by rusty_lr
12 |     type NonTerm: crate::parser::nonterminal::NonTerminal;
13 |     /// Type for user data that is passed to the parser from the user.
14 |     type UserData;
15 |     /// Type for `Err` variant returned by reduce action
16 |     type ReduceActionError;
17 |     /// The value of the start symbol
18 |     type StartType;
19 |     /// Type for location of the token
20 |     type Location: crate::Location;
21 | 
22 |     fn pop_start(&mut self) -> Option<Self::StartType>;
23 |     fn pop(&mut self);
24 |     fn push_terminal(&mut self, term: Self::Term);
25 |     fn push_empty(&mut self);
26 | 
27 |     fn clear(&mut self);
28 |     fn reserve(&mut self, additional: usize);
29 |     fn with_capacity(capacity: usize) -> Self {
30 |         let mut self_: Self = Default::default();
31 |         self_.reserve(capacity);
32 |         self_
33 |     }
34 | 
35 |     fn split_off(&mut self, at: usize) -> Self;
36 |     fn truncate(&mut self, at: usize);
37 |     fn append(&mut self, other: &mut Self);
38 | 
39 |     /// Performs a reduce action with the given rule index.
40 |     /// Returns false if the empty tag was pushed by this reduce action, true otherwise.
41 |     fn reduce_action(
42 |         // the child tokens for the reduction
43 |         // the caller (usually from generated code) must pops all of the tokens used for this reduce_action
44 |         data_stack: &mut Self,
45 |         location_stack: &mut Vec<Self::Location>,
46 |         push_data: bool,
47 | 
48 |         // the index of the production rule to reduce
49 |         rule_index: usize,
50 | 
51 |         // for runtime-conflict-resolve.
52 |         // if this variable is set to false in the action, the shift action will not be performed. (GLR parser)
53 |         shift: &mut bool,
54 |         // the lookahead token that caused this reduce action
55 |         lookahead: &crate::TerminalSymbol<Self::Term>,
56 |         // user input data
57 |         userdata: &mut Self::UserData,
58 |         // location of this non-terminal, e.g. `@$`
59 |         location0: &mut Self::Location,
60 |     ) -> Result<(), Self::ReduceActionError>;
61 | }
62 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/nondeterministic/node.rs:
--------------------------------------------------------------------------------
 1 | use crate::parser::data_stack::DataStack;
 2 | use crate::parser::Precedence;
 3 | 
 4 | /// To handle multiple paths in the non-deterministic GLR parsing,
 5 | /// this node represents a subrange in stack of the parser.
 6 | /// this constructs LinkedList tree of nodes, where parent node is the previous token in the parse tree.
 7 | #[derive(Clone)]
 8 | pub struct Node<Data: DataStack, StateIndex> {
 9 |     /// parent node
10 |     pub parent: Option<usize>,
11 | 
12 |     pub child_count: usize,
13 | 
14 |     /// index of state in parser
15 |     pub state_stack: Vec<StateIndex>,
16 |     pub data_stack: Data,
17 |     pub location_stack: Vec<Data::Location>,
18 |     pub precedence_stack: Vec<Precedence>,
19 |     #[cfg(feature = "tree")]
20 |     pub(crate) tree_stack: Vec<crate::tree::Tree<Data::Term, Data::NonTerm>>,
21 | }
22 | 
23 | impl<Data: DataStack, StateIndex> Default for Node<Data, StateIndex> {
24 |     fn default() -> Self {
25 |         Node {
26 |             parent: None,
27 |             child_count: 0,
28 |             state_stack: Vec::new(),
29 |             data_stack: Data::default(),
30 |             location_stack: Vec::new(),
31 |             precedence_stack: Vec::new(),
32 |             #[cfg(feature = "tree")]
33 |             tree_stack: Vec::new(),
34 |         }
35 |     }
36 | }
37 | 
38 | impl<Data: DataStack, StateIndex> Node<Data, StateIndex> {
39 |     /// Clear this node to `Default::default()`.
40 |     pub fn clear(&mut self) {
41 |         self.parent = None;
42 |         self.child_count = 0;
43 |         self.state_stack.clear();
44 |         self.data_stack.clear();
45 |         self.location_stack.clear();
46 |         self.precedence_stack.clear();
47 |         #[cfg(feature = "tree")]
48 |         self.tree_stack.clear();
49 |     }
50 |     pub fn len(&self) -> usize {
51 |         self.state_stack.len()
52 |     }
53 |     pub fn is_leaf(&self) -> bool {
54 |         self.child_count == 0
55 |     }
56 | 
57 |     pub fn with_capacity(capacity: usize) -> Self {
58 |         Node {
59 |             parent: None,
60 |             child_count: 0,
61 |             state_stack: Vec::with_capacity(capacity),
62 |             data_stack: Data::with_capacity(capacity),
63 |             location_stack: Vec::with_capacity(capacity),
64 |             precedence_stack: Vec::with_capacity(capacity),
65 |             #[cfg(feature = "tree")]
66 |             tree_stack: Vec::with_capacity(capacity),
67 |         }
68 |     }
69 |     pub fn reserve(&mut self, additional: usize) {
70 |         self.state_stack.reserve(additional);
71 |         self.data_stack.reserve(additional);
72 |         self.location_stack.reserve(additional);
73 |         self.precedence_stack.reserve(additional);
74 |         #[cfg(feature = "tree")]
75 |         self.tree_stack.reserve(additional);
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/builder/state.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::BTreeMap;
 2 | use std::collections::BTreeSet;
 3 | 
 4 | use crate::TriState;
 5 | 
 6 | /// state for internal usage during grammar building stage
 7 | #[derive(Debug, Clone)]
 8 | pub struct State<Term, NonTerm> {
 9 |     pub shift_goto_map_term: BTreeMap<Term, usize>,
10 |     pub shift_goto_map_nonterm: BTreeMap<NonTerm, usize>,
11 |     pub reduce_map: BTreeMap<Term, BTreeSet<usize>>,
12 |     pub ruleset: BTreeSet<crate::rule::ShiftedRuleRef>,
13 |     pub can_accept_error: TriState,
14 | }
15 | impl<Term, NonTerm> State<Term, NonTerm> {
16 |     pub fn new() -> Self {
17 |         State {
18 |             shift_goto_map_term: Default::default(),
19 |             shift_goto_map_nonterm: Default::default(),
20 |             reduce_map: Default::default(),
21 |             ruleset: Default::default(),
22 |             can_accept_error: TriState::False,
23 |         }
24 |     }
25 | 
26 |     /// shift -= 1 for all rules in the ruleset
27 |     pub fn unshifted_ruleset(&self) -> impl Iterator<Item = crate::rule::ShiftedRuleRef> + '_ {
28 |         self.ruleset
29 |             .iter()
30 |             .filter(|rule| rule.shifted > 0)
31 |             .map(|rule| {
32 |                 let mut rule = *rule;
33 |                 rule.shifted -= 1;
34 |                 rule
35 |             })
36 |     }
37 | }
38 | 
39 | impl<Term, NonTerm> Default for State<Term, NonTerm> {
40 |     fn default() -> Self {
41 |         Self::new()
42 |     }
43 | }
44 | 
45 | impl<Term, NonTerm> From<State<Term, NonTerm>>
46 |     for crate::parser::state::IntermediateState<Term, NonTerm, usize, usize>
47 | where
48 |     Term: Ord,
49 | {
50 |     fn from(state: crate::builder::State<Term, NonTerm>) -> Self {
51 |         use crate::parser::state::ShiftTarget;
52 | 
53 |         crate::parser::state::IntermediateState {
54 |             shift_goto_map_term: state
55 |                 .shift_goto_map_term
56 |                 .into_iter()
57 |                 .map(|(term, state_index)| {
58 |                     (
59 |                         term,
60 |                         ShiftTarget {
61 |                             state: state_index.into(),
62 |                             push: true,
63 |                         },
64 |                     )
65 |                 })
66 |                 .collect(),
67 |             shift_goto_map_nonterm: state
68 |                 .shift_goto_map_nonterm
69 |                 .into_iter()
70 |                 .map(|(nonterm, state_index)| {
71 |                     (
72 |                         nonterm,
73 |                         ShiftTarget {
74 |                             state: state_index.into(),
75 |                             push: true,
76 |                         },
77 |                     )
78 |                 })
79 |                 .collect(),
80 |             reduce_map: state
81 |                 .reduce_map
82 |                 .into_iter()
83 |                 .map(|(term, rules)| (term, rules.into_iter().collect()))
84 |                 .collect(),
85 |             ruleset: state.ruleset.into_iter().collect(),
86 |             can_accept_error: state.can_accept_error,
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/token.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | use std::fmt::Display;
 3 | use std::hash::Hash;
 4 | 
 5 | /// A type for terminal symbols in the grammar.
 6 | /// just because we have to take care of the `error` token specially,
 7 | /// and future support for other special tokens.
 8 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 9 | pub enum TerminalSymbol<Term> {
10 |     Term(Term), // index in the terminals vector
11 |     Error,      // error token
12 |     Eof,        // end of file token
13 | }
14 | impl<Term> TerminalSymbol<Term> {
15 |     pub fn is_error(&self) -> bool {
16 |         matches!(self, TerminalSymbol::Error)
17 |     }
18 |     pub fn is_term(&self) -> bool {
19 |         matches!(self, TerminalSymbol::Term(_))
20 |     }
21 |     pub fn is_eof(&self) -> bool {
22 |         matches!(self, TerminalSymbol::Eof)
23 |     }
24 |     /// converts self to a term if it is a `Term` variant, otherwise returns `None`.
25 |     pub fn to_term(&self) -> Option<&Term> {
26 |         match self {
27 |             TerminalSymbol::Term(term) => Some(term),
28 |             TerminalSymbol::Error => None,
29 |             TerminalSymbol::Eof => None,
30 |         }
31 |     }
32 |     /// converts self to a term if it is a `Term` variant, otherwise returns `None`.
33 |     pub fn into_term(self) -> Option<Term> {
34 |         match self {
35 |             TerminalSymbol::Term(term) => Some(term),
36 |             TerminalSymbol::Error => None,
37 |             TerminalSymbol::Eof => None,
38 |         }
39 |     }
40 | }
41 | 
42 | impl<Term: Display> std::fmt::Display for TerminalSymbol<Term> {
43 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 |         match self {
45 |             TerminalSymbol::Term(term) => write!(f, "{}", term),
46 |             TerminalSymbol::Error => write!(f, "error"),
47 |             TerminalSymbol::Eof => write!(f, "eof"),
48 |         }
49 |     }
50 | }
51 | 
52 | /// Token represents a terminal or non-terminal symbol in the grammar.
53 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
54 | pub enum Token<Term, NonTerm> {
55 |     Term(Term),
56 |     NonTerm(NonTerm),
57 | }
58 | impl<Term: Display, NonTerm: Display> Display for Token<Term, NonTerm> {
59 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 |         match self {
61 |             Token::Term(term) => write!(f, "{}", term),
62 |             Token::NonTerm(nonterm) => write!(f, "{}", nonterm),
63 |         }
64 |     }
65 | }
66 | impl<Term: Debug, NonTerm: Debug> Debug for Token<Term, NonTerm> {
67 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68 |         match self {
69 |             Token::Term(term) => write!(f, "{:?}", term),
70 |             Token::NonTerm(nonterm) => write!(f, "{:?}", nonterm),
71 |         }
72 |     }
73 | }
74 | 
75 | impl<Term, NonTerm> Token<Term, NonTerm> {
76 |     pub fn map<NewTerm, NewNonTerm>(
77 |         self,
78 |         term_map: impl Fn(Term) -> NewTerm,
79 |         nonterm_map: impl Fn(NonTerm) -> NewNonTerm,
80 |     ) -> Token<NewTerm, NewNonTerm> {
81 |         match self {
82 |             Token::Term(term) => Token::Term(term_map(term)),
83 |             Token::NonTerm(nonterm) => Token::NonTerm(nonterm_map(nonterm)),
84 |         }
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/deterministic/error.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | use std::fmt::Display;
 3 | 
 4 | use crate::TerminalSymbol;
 5 | 
 6 | #[derive(Clone, Debug)]
 7 | pub struct NoActionError<Term, Location> {
 8 |     pub term: TerminalSymbol<Term>,
 9 |     pub location: Option<Location>,
10 |     pub state: usize,
11 | }
12 | #[derive(Clone, Debug)]
13 | pub struct ReduceActionError<Term, Location, Source> {
14 |     pub term: TerminalSymbol<Term>,
15 |     pub location: Option<Location>,
16 |     pub state: usize,
17 |     pub source: Source,
18 | }
19 | 
20 | #[derive(Clone, Debug)]
21 | pub struct NoPrecedenceError<Term, Location> {
22 |     pub term: TerminalSymbol<Term>,
23 |     pub location: Option<Location>,
24 |     pub state: usize,
25 |     pub rule: usize,
26 | }
27 | 
28 | /// Error type for feed()
29 | #[derive(Clone, Debug)]
30 | pub enum ParseError<Term, Location, ReduceAction> {
31 |     /// No action defined for the given terminal in the parser table.
32 |     /// location will be `None` if the terminal was eof.
33 |     NoAction(NoActionError<Term, Location>),
34 | 
35 |     /// Error from reduce action.
36 |     /// location will be `None` if the terminal was eof.
37 |     ReduceAction(ReduceActionError<Term, Location, ReduceAction>),
38 | 
39 |     /// Rule index when shift/reduce conflict occur with no shift/reduce precedence defined.
40 |     /// This is same as when setting %nonassoc in Bison.
41 |     /// location will be `None` if the terminal was eof.
42 |     NoPrecedence(NoPrecedenceError<Term, Location>),
43 | }
44 | 
45 | impl<Term, Location, ReduceAction> ParseError<Term, Location, ReduceAction> {
46 |     /// location will be `None` if the terminal was eof.
47 |     pub fn location(&self) -> &Option<Location> {
48 |         match self {
49 |             ParseError::NoAction(err) => &err.location,
50 |             ParseError::ReduceAction(err) => &err.location,
51 |             ParseError::NoPrecedence(err) => &err.location,
52 |         }
53 |     }
54 | 
55 |     pub fn term(&self) -> &TerminalSymbol<Term> {
56 |         match self {
57 |             ParseError::NoAction(err) => &err.term,
58 |             ParseError::ReduceAction(err) => &err.term,
59 |             ParseError::NoPrecedence(err) => &err.term,
60 |         }
61 |     }
62 | 
63 |     pub fn state(&self) -> usize {
64 |         match self {
65 |             ParseError::NoAction(err) => err.state,
66 |             ParseError::ReduceAction(err) => err.state,
67 |             ParseError::NoPrecedence(err) => err.state,
68 |         }
69 |     }
70 | }
71 | 
72 | impl<Term, Location, ReduceAction> Display for ParseError<Term, Location, ReduceAction>
73 | where
74 |     Term: Display,
75 |     ReduceAction: Display,
76 | {
77 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 |         match self {
79 |             ParseError::NoAction(err) => {
80 |                 write!(f, "NoAction: {}, State: {}", err.term, err.state)
81 |             }
82 |             ParseError::ReduceAction(err) => {
83 |                 write!(
84 |                     f,
85 |                     "ReduceAction: {}, State: {}\nSource: {}",
86 |                     err.term, err.state, err.source
87 |                 )
88 |             }
89 |             ParseError::NoPrecedence(err) => {
90 |                 write!(f, "NoPrecedence: {}, State: {}", err.rule, err.state)
91 |             }
92 |         }
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/terminal_info.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::Ident;
  2 | use proc_macro2::Span;
  3 | 
  4 | use proc_macro2::TokenStream;
  5 | use quote::ToTokens;
  6 | 
  7 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
  8 | pub enum TerminalName {
  9 |     /// defined in %token
 10 |     Ident(Ident),
 11 | 
 12 |     /// defined as literal anywhere in the grammar
 13 |     CharRange(char, char),
 14 | }
 15 | impl TerminalName {
 16 |     pub fn count(&self) -> usize {
 17 |         match self {
 18 |             TerminalName::Ident(_) => 1,
 19 |             TerminalName::CharRange(s, l) => {
 20 |                 let s = *s as usize;
 21 |                 let l = *l as usize;
 22 |                 l + 1 - s
 23 |             }
 24 |         }
 25 |     }
 26 |     pub fn ident(&self) -> Option<&Ident> {
 27 |         match self {
 28 |             TerminalName::Ident(ident) => Some(ident),
 29 |             TerminalName::CharRange(_, _) => None,
 30 |         }
 31 |     }
 32 |     pub fn into_ident(self) -> Option<Ident> {
 33 |         match self {
 34 |             TerminalName::Ident(ident) => Some(ident),
 35 |             TerminalName::CharRange(_, _) => None,
 36 |         }
 37 |     }
 38 |     // pub fn char(&self) -> Option<char> {
 39 |     //     match self {
 40 |     //         TerminalName::Ident(_) => None,
 41 |     //         TerminalName::Char(c) => Some(*c),
 42 |     //     }
 43 |     // }
 44 |     pub fn name(self) -> Ident {
 45 |         match self {
 46 |             TerminalName::Ident(name) => name,
 47 |             TerminalName::CharRange(c, _) => {
 48 |                 let s = format!("_Terminal{}", c as u32);
 49 |                 Ident::new(&s, Span::call_site())
 50 |             }
 51 |         }
 52 |     }
 53 |     pub fn pretty_name(&self, is_char: bool, is_u8: bool) -> String {
 54 |         match self {
 55 |             TerminalName::Ident(ident) => ident.to_string(),
 56 |             TerminalName::CharRange(start, last) => {
 57 |                 if is_char {
 58 |                     let start_tok = syn::LitChar::new(*start, Span::call_site()).to_token_stream();
 59 |                     let last_tok = syn::LitChar::new(*last, Span::call_site()).to_token_stream();
 60 |                     if start == last {
 61 |                         format!("{start_tok}")
 62 |                     } else {
 63 |                         format!("{start_tok}-{last_tok}")
 64 |                     }
 65 |                 } else if is_u8 {
 66 |                     let start_tok =
 67 |                         syn::LitByte::new(*start as u8, Span::call_site()).to_token_stream();
 68 |                     let last_tok =
 69 |                         syn::LitByte::new(*last as u8, Span::call_site()).to_token_stream();
 70 |                     if start == last {
 71 |                         format!("{start_tok}")
 72 |                     } else {
 73 |                         format!("{start_tok}-{last_tok}")
 74 |                     }
 75 |                 } else {
 76 |                     unreachable!("unexpected char type")
 77 |                 }
 78 |             }
 79 |         }
 80 |     }
 81 | }
 82 | impl From<Ident> for TerminalName {
 83 |     fn from(ident: Ident) -> Self {
 84 |         TerminalName::Ident(ident)
 85 |     }
 86 | }
 87 | impl From<(char, char)> for TerminalName {
 88 |     fn from(c: (char, char)) -> Self {
 89 |         TerminalName::CharRange(c.0, c.1)
 90 |     }
 91 | }
 92 | impl From<(u32, u32)> for TerminalName {
 93 |     fn from(c: (u32, u32)) -> Self {
 94 |         let s = unsafe { char::from_u32_unchecked(c.0) };
 95 |         let l = unsafe { char::from_u32_unchecked(c.1) };
 96 |         TerminalName::CharRange(s, l)
 97 |     }
 98 | }
 99 | 
100 | pub struct TerminalInfo {
101 |     pub name: TerminalName,
102 | 
103 |     /// the precedence level of this terminal
104 |     pub precedence: Option<(usize, Span)>,
105 | 
106 |     /// the actual Rust expr to be emitted
107 |     pub body: TokenStream,
108 | }
109 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/builder/diags.rs:
--------------------------------------------------------------------------------
  1 | use crate::rule::ShiftedRuleRef;
  2 | 
  3 | use std::collections::BTreeMap;
  4 | use std::collections::BTreeSet;
  5 | 
  6 | pub struct DiagnosticCollector<Term> {
  7 |     pub enabled: bool,
  8 |     pub reduce_reduce_resolved: BTreeSet<(usize, BTreeSet<usize>, BTreeSet<usize>)>,
  9 |     pub shift_reduce_resolved_shift: BTreeMap<
 10 |         (Term, Vec<ShiftedRuleRef>),
 11 |         (
 12 |             usize, // shift precedence
 13 |             BTreeMap<usize, usize>,
 14 |             // (rule, reduce precedence)
 15 |         ),
 16 |     >,
 17 |     pub shift_reduce_resolved_reduce: BTreeMap<
 18 |         (Term, Vec<ShiftedRuleRef>),
 19 |         (
 20 |             usize, // shift precedence
 21 |             BTreeMap<usize, usize>,
 22 |             // (rule, reduce precedence)
 23 |         ),
 24 |     >,
 25 |     pub reduce_reduce_conflicts: BTreeMap<Vec<(usize, Vec<ShiftedRuleRef>)>, BTreeSet<Term>>,
 26 |     pub shift_reduce_conflicts: BTreeMap<
 27 |         (Term, Vec<ShiftedRuleRef>, Vec<ShiftedRuleRef>),
 28 |         BTreeMap<usize, Vec<ShiftedRuleRef>>,
 29 |     >,
 30 | }
 31 | impl<Term> DiagnosticCollector<Term> {
 32 |     pub fn new(collect: bool) -> Self {
 33 |         DiagnosticCollector {
 34 |             enabled: collect,
 35 |             reduce_reduce_resolved: BTreeSet::new(),
 36 |             shift_reduce_resolved_shift: BTreeMap::new(),
 37 |             shift_reduce_resolved_reduce: BTreeMap::new(),
 38 |             shift_reduce_conflicts: BTreeMap::new(),
 39 |             reduce_reduce_conflicts: BTreeMap::new(),
 40 |         }
 41 |     }
 42 |     pub fn add_reduce_reduce_resolved(
 43 |         &mut self,
 44 |         max_priority: usize,
 45 |         reduce_rules: BTreeSet<usize>,
 46 |         removed_rules: BTreeSet<usize>,
 47 |     ) where
 48 |         Term: Ord,
 49 |     {
 50 |         if self.enabled {
 51 |             self.reduce_reduce_resolved
 52 |                 .insert((max_priority, reduce_rules, removed_rules));
 53 |         }
 54 |     }
 55 |     pub fn add_shift_reduce_resolved_shift(
 56 |         &mut self,
 57 |         term: Term,
 58 |         shift_rules: Vec<ShiftedRuleRef>,
 59 |         shift_precedence: usize,
 60 |         mut reduce_rules: BTreeMap<usize, usize>,
 61 |     ) where
 62 |         Term: Ord,
 63 |     {
 64 |         if self.enabled {
 65 |             let value = self
 66 |                 .shift_reduce_resolved_shift
 67 |                 .entry((term, shift_rules))
 68 |                 .or_default();
 69 |             value.0 = shift_precedence;
 70 |             value.1.append(&mut reduce_rules);
 71 |         }
 72 |     }
 73 |     pub fn add_shift_reduce_resolved_reduce(
 74 |         &mut self,
 75 |         term: Term,
 76 |         shift_rules: Vec<ShiftedRuleRef>,
 77 |         shift_precedence: usize,
 78 |         mut reduce_rules: BTreeMap<usize, usize>,
 79 |     ) where
 80 |         Term: Ord,
 81 |     {
 82 |         if self.enabled {
 83 |             let value = self
 84 |                 .shift_reduce_resolved_reduce
 85 |                 .entry((term, shift_rules))
 86 |                 .or_default();
 87 |             value.0 = shift_precedence;
 88 |             value.1.append(&mut reduce_rules);
 89 |         }
 90 |     }
 91 |     pub fn add_shift_reduce_conflict(
 92 |         &mut self,
 93 |         term: Term,
 94 |         shift_rules: Vec<ShiftedRuleRef>,
 95 |         shift_rules_backtrace: Vec<ShiftedRuleRef>,
 96 |         mut reduce_rules: BTreeMap<usize, Vec<ShiftedRuleRef>>,
 97 |     ) where
 98 |         Term: Ord,
 99 |     {
100 |         if self.enabled {
101 |             self.shift_reduce_conflicts
102 |                 .entry((term, shift_rules, shift_rules_backtrace))
103 |                 .or_default()
104 |                 .append(&mut reduce_rules);
105 |         }
106 |     }
107 |     pub fn update_reduce_reduce_conflict(
108 |         &mut self,
109 |         reduce_rules: Vec<(usize, Vec<ShiftedRuleRef>)>,
110 |         term: Term,
111 |     ) where
112 |         Term: Ord,
113 |     {
114 |         if self.enabled {
115 |             self.reduce_reduce_conflicts
116 |                 .entry(reduce_rules)
117 |                 .or_default()
118 |                 .insert(term);
119 |         }
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/scripts/bootstrap_test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # Bootstrap rusty_lr_parser/src/parser/parser.rs with different configurations
  4 | # and check if the output files are identical.
  5 | 
  6 | compare_files() {
  7 |     # Check if both arguments are provided
  8 |     if [ $# -ne 2 ]; then
  9 |         echo "Usage: compare_files <file1> <file2>"
 10 |         return 1
 11 |     fi
 12 | 
 13 |     local file1="$1"
 14 |     local file2="$2"
 15 | 
 16 |     # Check if both files exist
 17 |     if [ ! -f "$file1" ]; then
 18 |         echo "Error: File '$file1' does not exist"
 19 |         return 1
 20 |     fi
 21 | 
 22 |     if [ ! -f "$file2" ]; then
 23 |         echo "Error: File '$file2' does not exist"
 24 |         return 1
 25 |     fi
 26 | 
 27 |     # Run diff and capture the exit code
 28 |     if diff "$file1" "$file2" > /dev/null; then
 29 |         echo "Files are identical"
 30 |         return 0
 31 |     else
 32 |         echo "Error: Files are different"
 33 |         diff "$file1" "$file2"
 34 |         return 1
 35 |     fi
 36 | }
 37 | 
 38 | # Get the directory where this script is located and go up one level
 39 | script_dir="$(dirname "$0")"
 40 | rustylr_path="$(realpath "$script_dir/..")"
 41 | is_from_github_actions=$1
 42 | 
 43 | process_and_compare() {
 44 |     local config="$1"
 45 |     cargo run --bin rustylr -- "$rustylr_path/rusty_lr_parser/src/parser/parser.rs" out.tab.rs $config > /dev/null
 46 |     mv out.tab.rs "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs"
 47 |     cargo run --bin rustylr -- "$rustylr_path/rusty_lr_parser/src/parser/parser.rs" out.tab.rs $config > /dev/null
 48 |     compare_files "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs" out.tab.rs
 49 |     if [ $? -ne 0 ]; then
 50 |         exit 1
 51 |     fi
 52 | }
 53 | 
 54 | echo "RustyLR path: $rustylr_path"
 55 | 
 56 | 
 57 | # to briefly see the difference of the generated parser in the PR, run for the sample calculator and json parsers
 58 | cargo run --bin rustylr -- "$rustylr_path/example/calculator/src/parser.rs" "$rustylr_path/scripts/diff/calculator_new.rs" > /dev/null
 59 | cargo run --bin rustylr -- "$rustylr_path/example/calculator_u8/src/parser.rs" "$rustylr_path/scripts/diff/calculator_u8_new.rs" > /dev/null
 60 | cargo run --bin rustylr -- "$rustylr_path/example/json/src/parser.rs" "$rustylr_path/scripts/diff/json_new.rs" > /dev/null
 61 | if [ "$is_from_github_actions" = "true" ]; then
 62 |     diff "$rustylr_path/scripts/diff/calculator.rs" "$rustylr_path/scripts/diff/calculator_new.rs" >/dev/null
 63 |     if [ $? -ne 0 ]; then
 64 |         echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes."
 65 |         exit 1
 66 |     fi
 67 | 
 68 |     diff "$rustylr_path/scripts/diff/calculator_u8.rs" "$rustylr_path/scripts/diff/calculator_u8_new.rs" >/dev/null
 69 |     if [ $? -ne 0 ]; then
 70 |         echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes."
 71 |         exit 1
 72 |     fi
 73 | 
 74 |     diff "$rustylr_path/scripts/diff/json.rs" "$rustylr_path/scripts/diff/json_new.rs" >/dev/null
 75 |     if [ $? -ne 0 ]; then
 76 |         echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes."
 77 |         exit 1
 78 |     fi
 79 | fi
 80 | 
 81 | mv "$rustylr_path/scripts/diff/calculator_new.rs" "$rustylr_path/scripts/diff/calculator.rs"
 82 | mv "$rustylr_path/scripts/diff/calculator_u8_new.rs" "$rustylr_path/scripts/diff/calculator_u8.rs"
 83 | mv "$rustylr_path/scripts/diff/json_new.rs" "$rustylr_path/scripts/diff/json.rs"
 84 | 
 85 | echo "Setting Dense = false, GLR = false"
 86 | process_and_compare "--dense false --glr false"
 87 | 
 88 | echo "Setting Dense = false, GLR = true"
 89 | process_and_compare "--dense false --glr true"
 90 | 
 91 | echo "Setting Dense = true, GLR = false"
 92 | process_and_compare "--dense true --glr false"
 93 | 
 94 | echo "Setting Dense = true, GLR = true"
 95 | process_and_compare "--dense true --glr true"
 96 | 
 97 | echo "Normal configuration"
 98 | process_and_compare ""
 99 | mv out.tab.rs "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs"
100 | 
101 | cargo test --bin glr
102 | if [ $? -ne 0 ]; then
103 |     exit 1
104 | fi
105 | 
106 | echo "All tests passed."


--------------------------------------------------------------------------------
/rusty_lr_derive/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! This crate provides a procedural macro to generate a parser from a grammar.
  2 | //!
  3 | //! This crate is private and not intended to be used directly.
  4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead.
  5 | 
  6 | use proc_macro::TokenStream;
  7 | use quote::quote;
  8 | 
  9 | /// Build a parser table from a grammar definition.
 10 | ///
 11 | /// This macro will generate a `Parser` and `Context` structs.
 12 | #[proc_macro]
 13 | pub fn lr1(input: TokenStream) -> TokenStream {
 14 |     let input = input.into();
 15 |     use rusty_lr_parser::grammar::Grammar;
 16 |     let mut grammar_args = match Grammar::parse_args(input) {
 17 |         Ok(grammar_args) => grammar_args,
 18 |         Err(e) => return e.to_compile_error().into(),
 19 |     };
 20 |     match Grammar::arg_check_error(&mut grammar_args) {
 21 |         Ok(_) => {}
 22 |         Err(e) => return e.to_compile_error().into(),
 23 |     }
 24 | 
 25 |     // If there are any errors in the grammar arguments, emit compile errors.
 26 |     if !grammar_args.error_recovered.is_empty() {
 27 |         let mut output = proc_macro2::TokenStream::new();
 28 |         for error in &grammar_args.error_recovered {
 29 |             let span = error.span.span();
 30 |             let message = format!("{}\n >>> refer to: {}", error.message, error.link,);
 31 |             output.extend(quote::quote_spanned! {
 32 |                 span=>
 33 |                 compile_error!(#message);
 34 |             });
 35 |         }
 36 |         return output.into();
 37 |     }
 38 | 
 39 |     let mut grammar = match Grammar::from_grammar_args(grammar_args) {
 40 |         Ok(grammar) => grammar,
 41 |         Err(e) => return e.to_compile_error().into(),
 42 |     };
 43 |     if grammar.optimize {
 44 |         grammar.optimize(15);
 45 |     }
 46 |     grammar.builder = grammar.create_builder();
 47 |     let diags = grammar.build_grammar();
 48 |     if !grammar.glr {
 49 |         if let Some(((term, shift_rules, _), reduce_rules)) =
 50 |             diags.shift_reduce_conflicts.into_iter().next()
 51 |         {
 52 |             let class_mapper = |term| grammar.class_pretty_name_list(term, 5);
 53 |             let nonterm_mapper = |term| grammar.nonterm_pretty_name(term);
 54 |             let term = class_mapper(term);
 55 |             let (reduce_rule, _) = reduce_rules.into_iter().next().unwrap();
 56 |             let reduce_rule = grammar.builder.rules[reduce_rule]
 57 |                 .rule
 58 |                 .clone()
 59 |                 .map(class_mapper, nonterm_mapper);
 60 |             let shift_rules = shift_rules
 61 |                 .into_iter()
 62 |                 .map(|rule| {
 63 |                     format!(
 64 |                         "\n>>> {}",
 65 |                         grammar.builder.rules[rule.rule]
 66 |                             .rule
 67 |                             .clone()
 68 |                             .map(class_mapper, nonterm_mapper)
 69 |                             .into_shifted(rule.shifted)
 70 |                     )
 71 |                 })
 72 |                 .collect::<Vec<_>>()
 73 |                 .join("");
 74 | 
 75 |             let message = format!(
 76 |                 "Shift-Reduce conflict with terminal symbol: {}\n>>> Reduce: {}\n>>> Shifts: {}",
 77 |                 term, reduce_rule, shift_rules
 78 |             );
 79 |             return quote! {
 80 |                 compile_error!(#message);
 81 |             }
 82 |             .into();
 83 |         }
 84 |         if let Some((reduce_rules, reduce_terms)) = diags.reduce_reduce_conflicts.into_iter().next()
 85 |         {
 86 |             let class_mapper = |term| grammar.class_pretty_name_list(term, 5);
 87 |             let nonterm_mapper = |term| grammar.nonterm_pretty_name(term);
 88 |             let terms = reduce_terms
 89 |                 .into_iter()
 90 |                 .map(&class_mapper)
 91 |                 .collect::<Vec<_>>()
 92 |                 .join(", ");
 93 |             let reduce_rules = reduce_rules
 94 |                 .into_iter()
 95 |                 .map(|(rule, _)| {
 96 |                     format!(
 97 |                         "\n>>> {}",
 98 |                         grammar.builder.rules[rule]
 99 |                             .rule
100 |                             .clone()
101 |                             .map(class_mapper, nonterm_mapper)
102 |                     )
103 |                 })
104 |                 .collect::<Vec<_>>()
105 |                 .join("");
106 | 
107 |             let message = format!(
108 |                 "Reduce-Reduce conflict with terminal symbols: {}\n>>> Reduce: {}",
109 |                 terms, reduce_rules
110 |             );
111 |             return quote! {
112 |                 compile_error!(#message);
113 |             }
114 |             .into();
115 |         }
116 |     }
117 | 
118 |     grammar.emit_compiletime().into()
119 | }
120 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/nonterminal_info.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::BTreeSet;
  2 | 
  3 | use super::token::TokenMapped;
  4 | use proc_macro2::Ident;
  5 | use proc_macro2::Span;
  6 | use proc_macro2::TokenStream;
  7 | 
  8 | #[derive(Clone)]
  9 | pub struct CustomReduceAction {
 10 |     pub body: TokenStream,
 11 |     idents_used: BTreeSet<Ident>,
 12 | }
 13 | 
 14 | impl CustomReduceAction {
 15 |     fn fetch_idents(set: &mut BTreeSet<Ident>, ts: TokenStream) {
 16 |         for token in ts {
 17 |             match token {
 18 |                 proc_macro2::TokenTree::Group(g) => {
 19 |                     Self::fetch_idents(set, g.stream());
 20 |                 }
 21 |                 proc_macro2::TokenTree::Ident(i) => {
 22 |                     set.insert(i);
 23 |                 }
 24 |                 _ => {}
 25 |             }
 26 |         }
 27 |     }
 28 |     pub fn new(body: TokenStream) -> Self {
 29 |         let mut idents_used = BTreeSet::new();
 30 |         Self::fetch_idents(&mut idents_used, body.clone());
 31 |         Self { body, idents_used }
 32 |     }
 33 |     fn contains_ident(&self, ident: &Ident) -> bool {
 34 |         self.idents_used.contains(ident)
 35 |     }
 36 | }
 37 | 
 38 | pub enum ReduceAction {
 39 |     /// reduce action that is function-like TokenStream
 40 |     Custom(CustomReduceAction),
 41 |     /// reduce action that is auto-generated, and simply returns the i'th token itself
 42 |     Identity(usize), // index of the token in the rule
 43 | }
 44 | 
 45 | impl ReduceAction {
 46 |     pub fn new_custom(body: TokenStream) -> Self {
 47 |         ReduceAction::Custom(CustomReduceAction::new(body))
 48 |     }
 49 |     pub fn is_identity(&self) -> bool {
 50 |         matches!(self, ReduceAction::Identity(_))
 51 |     }
 52 |     pub fn is_custom(&self) -> bool {
 53 |         matches!(self, ReduceAction::Custom(_))
 54 |     }
 55 | }
 56 | 
 57 | pub struct Rule {
 58 |     pub tokens: Vec<TokenMapped>,
 59 |     /// reduce action called when this rule is reduced
 60 |     pub reduce_action: Option<ReduceAction>,
 61 |     /// span of '|' or ':' before this production rule
 62 |     pub separator_span: Span,
 63 |     /// force lookahead tokens for this pattern.
 64 |     pub lookaheads: Option<BTreeSet<usize>>,
 65 |     /// %prec definition
 66 |     pub prec: Option<(rusty_lr_core::rule::Precedence, Span)>,
 67 |     /// %dprec definition
 68 |     pub dprec: Option<(usize, Span)>,
 69 | 
 70 |     /// in `Grammar::build_grammar()`, some production rules will be optimized out and deleted
 71 |     pub(crate) is_used: bool,
 72 | }
 73 | 
 74 | impl Rule {
 75 |     pub fn span_pair(&self) -> (Span, Span) {
 76 |         let begin = self.separator_span;
 77 |         let end = if let Some(token) = self.tokens.last() {
 78 |             token.end_span
 79 |         } else {
 80 |             begin
 81 |         };
 82 |         (begin, end)
 83 |     }
 84 |     pub fn reduce_action_contains_ident(&self, ident: &Ident) -> bool {
 85 |         match self.reduce_action.as_ref() {
 86 |             Some(ReduceAction::Custom(custom)) => custom.contains_ident(ident),
 87 |             Some(ReduceAction::Identity(identity_idx)) => {
 88 |                 self.tokens[*identity_idx].mapto.as_ref() == Some(ident)
 89 |             }
 90 |             None => false,
 91 |         }
 92 |     }
 93 | }
 94 | 
 95 | pub struct NonTerminalInfo {
 96 |     pub name: Ident,
 97 | 
 98 |     /// Name of auto generated rule are in the format of `__AutoRule ...`
 99 |     /// So we need other abbreviation for auto generated rules.
100 |     pub pretty_name: String,
101 | 
102 |     /// The rule type of this non-terminal
103 |     pub ruletype: Option<TokenStream>,
104 | 
105 |     /// Every set of production rules
106 |     pub rules: Vec<Rule>,
107 | 
108 |     /// If this non-terminal is auto-generated from regex pattern,
109 |     /// the (begin, end) span-pair of the regex pattern.
110 |     pub(crate) regex_span: Option<(Span, Span)>,
111 | 
112 |     pub(crate) trace: bool,
113 |     /// protected from optimization removal; trace rules are always protected
114 |     pub(crate) protected: bool,
115 | 
116 |     /// if this non-terminal is auto-generated, the pattern that generated this rule.
117 |     /// This field is used in rusty_lr_core/tree.rs to unwrap left/right recursion parsing tree into flat array.
118 |     pub(crate) nonterm_type: Option<rusty_lr_core::parser::nonterminal::NonTerminalType>,
119 | }
120 | 
121 | impl NonTerminalInfo {
122 |     pub fn is_auto_generated(&self) -> bool {
123 |         self.regex_span.is_some()
124 |     }
125 |     /// only for auto-generated rules
126 |     /// returns the span of the regex pattern that generated this rule
127 |     pub fn origin_span(&self) -> Option<(Span, Span)> {
128 |         self.regex_span
129 |     }
130 | 
131 |     /// if this non-terminal is protected from optimization; will not be automatically deleted
132 |     pub(crate) fn is_protected(&self) -> bool {
133 |         self.protected
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/GLR.md:
--------------------------------------------------------------------------------
  1 | # GLR Parsing in RustyLR
  2 | RustyLR supports Generalized LR (GLR) parsing, enabling it to handle ambiguous or nondeterministic grammars that traditional LR(1) or LALR(1) parsers cannot process.
  3 | 
  4 | When a GLR parser encounters a conflict (such as shift/reduce or reduce/reduce),
  5 | it forks the current parsing state into multiple branches,
  6 | each representing a different possible interpretation of the input.
  7 | These branches are processed in parallel, and invalid paths are pruned as parsing progresses.
  8 | 
  9 | ## Enabling GLR Parsing
 10 | To use GLR parsing in RustyLR, include the `%glr;` directive in your grammar definition.
 11 | This directive instructs RustyLR to generate a GLR parser,
 12 | which can handle ambiguous grammars by exploring multiple parsing paths.
 13 | 
 14 | Once the `%glr` directive is added, any conflicts in the grammar will not be reported as errors.
 15 | It's important to be aware of points in your grammar where shift/reduce or reduce/reduce conflicts occur, as each divergence increases computational complexity.
 16 | 
 17 | **Tip:** If you are using the `rustylr` executable, you can use the `--verbose` option to see any conflicts in the grammar and their divergent paths.
 18 | 
 19 | ## Example: Ambiguous Grammar
 20 | 
 21 | ```rust
 22 | %glr;
 23 | %tokentype char;
 24 | %start E;
 25 | 
 26 | Digit(char): ['0'-'9'] ;
 27 | 
 28 | E(i32): E '+' E { E + E }
 29 |       | E '*' E { E * E }
 30 |       | Digit { Digit.to_digit(10).unwrap() as i32 };
 31 | ```
 32 | 
 33 | In this grammar, the expression `1 + 2 * 3 + 4` has multiple valid parse trees due to the ambiguity in operator precedence and associativity:
 34 |  - `((1 + 2) * 3) + 4`
 35 |  - `(1 + (2 * 3)) + 4`
 36 |  - `1 + ((2 * 3) + 4)`
 37 |  - `1 + (2 * (3 + 4))`
 38 |  - `(1 + 2) * (3 + 4)`
 39 | 
 40 | The GLR parser will explore all possible parsing paths to construct the parse forest.
 41 | 
 42 | ## Resolving Ambiguities
 43 | RustyLR allows you to resolve ambiguities dynamically within reduce actions.
 44 | Simply returning `Err` from a reduce action will prune the current branch of the parse tree.
 45 | By inspecting the lookahead token or other context, you can decide whether to proceed with a particular reduction.
 46 | 
 47 | For example, to enforce operator precedence (e.g., `*` has higher precedence than `+`), you can modify the reduce actions as follows:
 48 | 
 49 | ```rust
 50 | E : E '+' E {
 51 |       match *lookahead.to_term().unwrap() {
 52 |           '*' => {
 53 |               // Don't reduce if the next token is '*'
 54 |               // This prevents:
 55 |               // E + E   /   *
 56 |               //             ^ lookahead
 57 |               // from becoming:  E *  ...
 58 |               //                 ^ (E + E)
 59 |               return Err("".to_string());
 60 |           }
 61 |           _ => {
 62 |               // Revoke the shift action
 63 |               // This prevents:
 64 |               // E + E   /  +
 65 |               //            ^ lookahead
 66 |               // from becoming: E + E +  ...
 67 |               // and enforces only the reduce action:
 68 |               // E + ...
 69 |               // ^ (E + E)
 70 |               *shift = false;
 71 |           }
 72 |       }
 73 |       E + E  // Return the result of the addition
 74 | }
 75 | ```
 76 | 
 77 | ### Predefined Variables in Reduce Actions
 78 | - `lookahead: &TerminalSymbol<TokenType>` - refers to the next token in the input stream. either a terminal fed by the user or an special token like `error`
 79 | - `shift: &mut bool` - controls whether a shift action should be performed
 80 | 
 81 | ### Ambiguity Resolution Rules
 82 | - Returning `Err` from the reduce action will discard the current parsing path
 83 | - Setting `*shift = false;` prevents the parser from performing a shift action, enforcing the desired reduction
 84 | 
 85 | ## Parsing with the GLR Parser
 86 | RustyLR provides a consistent parsing interface for both deterministic and GLR parsers.
 87 | After generating the parser, you can feed tokens to the parser context and retrieve the parsing results.
 88 | 
 89 | ```rust
 90 | let parser = EParser::new();        // Create <StartSymbol>Parser instance
 91 | let mut context = EContext::new();  // Create <StartSymbol>Context instance
 92 | 
 93 | for token in input_sequence {
 94 |     match context.feed(&parser, token) {
 95 |         Ok(_) => {}
 96 |         Err(e) => {
 97 |             println!("Parse error: {}", e);
 98 |             return;
 99 |         }
100 |     }
101 | }
102 | 
103 | // Retrieve all possible parse results
104 | for result in context.accept(&parser).unwrap() {
105 |     println!("Parse result: {:?}", result);
106 | }
107 | ```
108 | 
109 | ### Key Components:
110 | - `EParser::new()` - Creates a new parser instance
111 | - `EContext::new()` - Initializes the parsing context
112 | - `context.feed(&parser, token)` - Feeds tokens to the parser
113 | - `context.accept(&parser)` - Returns all possible values of the `%start` symbol from every parse path


--------------------------------------------------------------------------------
/rusty_lr_parser/src/rangeresolver.rs:
--------------------------------------------------------------------------------
  1 | pub struct RangeResolver {
  2 |     ranges: Vec<(u32, u32)>,
  3 | }
  4 | 
  5 | impl RangeResolver {
  6 |     pub fn new() -> Self {
  7 |         RangeResolver { ranges: Vec::new() }
  8 |     }
  9 |     pub fn insert(&mut self, mut start: u32, last: u32) {
 10 |         let mut new_ranges = Vec::new();
 11 |         let mut added = false;
 12 |         for (idx, &(s, l)) in self.ranges.iter().enumerate() {
 13 |             if l < start {
 14 |                 new_ranges.push((s, l));
 15 |                 continue;
 16 |             }
 17 |             if s > last {
 18 |                 if !added {
 19 |                     new_ranges.push((start, last));
 20 |                 }
 21 |                 new_ranges.extend_from_slice(&self.ranges[idx..]);
 22 |                 added = true;
 23 |                 break;
 24 |             }
 25 | 
 26 |             // must overlap here
 27 | 
 28 |             use std::cmp::Ordering;
 29 |             match start.cmp(&s) {
 30 |                 Ordering::Less => {
 31 |                     match last.cmp(&l) {
 32 |                         Ordering::Less => {
 33 |                             // nnnnnnn
 34 |                             //     rrrrr
 35 |                             new_ranges.push((start, s - 1));
 36 |                             new_ranges.push((s, last));
 37 |                             new_ranges.push((last + 1, l));
 38 |                             added = true;
 39 |                         }
 40 |                         Ordering::Equal => {
 41 |                             // nnnnnn
 42 |                             //    rrr
 43 |                             new_ranges.push((start, s - 1));
 44 |                             new_ranges.push((s, l));
 45 |                             added = true;
 46 |                         }
 47 |                         Ordering::Greater => {
 48 |                             // nnnnnnn
 49 |                             //   rrr
 50 |                             new_ranges.push((start, s - 1));
 51 |                             new_ranges.push((s, l));
 52 |                             start = l + 1;
 53 |                         }
 54 |                     }
 55 |                 }
 56 |                 Ordering::Equal => {
 57 |                     match last.cmp(&l) {
 58 |                         Ordering::Less => {
 59 |                             // nnnn
 60 |                             // rrrrrr
 61 |                             new_ranges.push((start, last));
 62 |                             added = true;
 63 |                             new_ranges.push((last + 1, l));
 64 |                         }
 65 |                         Ordering::Equal => {
 66 |                             // nnnn
 67 |                             // rrrr
 68 |                             new_ranges.push((start, last));
 69 |                             added = true;
 70 |                         }
 71 |                         Ordering::Greater => {
 72 |                             // nnnnnnnn
 73 |                             // rrrr
 74 |                             new_ranges.push((s, l));
 75 |                             start = l + 1;
 76 |                         }
 77 |                     }
 78 |                 }
 79 |                 Ordering::Greater => {
 80 |                     // start > s
 81 |                     match last.cmp(&l) {
 82 |                         Ordering::Less => {
 83 |                             //    nnnnn
 84 |                             //  rrrrrrrrr
 85 |                             new_ranges.push((s, start - 1));
 86 |                             new_ranges.push((start, last));
 87 |                             new_ranges.push((last + 1, l));
 88 |                             added = true;
 89 |                         }
 90 |                         Ordering::Equal => {
 91 |                             //    nnnnnnn
 92 |                             //  rrrrrrrrr
 93 |                             new_ranges.push((s, start - 1));
 94 |                             new_ranges.push((start, last));
 95 |                             added = true;
 96 |                         }
 97 |                         Ordering::Greater => {
 98 |                             //    nnnnnnnnn
 99 |                             //  rrrrrrrrr
100 |                             new_ranges.push((s, start - 1));
101 |                             new_ranges.push((start, l));
102 |                             start = l + 1;
103 |                         }
104 |                     }
105 |                 }
106 |             }
107 |         }
108 |         if !added {
109 |             new_ranges.push((start, last));
110 |         }
111 | 
112 |         self.ranges = new_ranges;
113 |     }
114 | 
115 |     pub fn get_ranges(&self, start: u32, last: u32) -> impl Iterator<Item = usize> {
116 |         let first_idx = match self.ranges.binary_search(&(start, start)) {
117 |             Ok(idx) => idx,
118 |             Err(idx) => idx,
119 |         };
120 |         let end_idx = match self.ranges.binary_search(&(last, last)) {
121 |             Ok(idx) => idx + 1,
122 |             Err(idx) => idx,
123 |         };
124 |         first_idx..end_idx
125 |     }
126 | 
127 |     pub fn iter(&self) -> impl Iterator<Item = (u32, u32)> + '_ {
128 |         self.ranges.iter().copied()
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/rusty_lr_executable/README.md:
--------------------------------------------------------------------------------
  1 | # rustylr
  2 | Executable for rusty_lr, a bison-like parser generator & compiler frontend for Rust supporting IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing.
  3 | 
  4 | 
  5 | ## Installation
  6 | ```bash
  7 | cargo install rustylr
  8 | ```
  9 | 
 10 | ## Usage
 11 | ```bash
 12 | $ rustylr --help
 13 | Usage: rustylr [OPTIONS] <INPUT_FILE> [OUTPUT_FILE]
 14 | 
 15 | Arguments:
 16 |   <INPUT_FILE>
 17 |           Input_file to read
 18 | 
 19 |   [OUTPUT_FILE]
 20 |           Output_file to write
 21 | 
 22 |           [default: out.tab.rs]
 23 | 
 24 | Options:
 25 |       --no-format
 26 |           Do not rustfmt the output
 27 | 
 28 |   -c, --no-conflict
 29 |           Do not print note information about any shift/reduce, reduce/reduce conflicts.
 30 | 
 31 |           If the target is deterministic parser, conflict will be treated as an error, so this option will be ignored. This option is only for non-deterministic GLR parser.
 32 | 
 33 |   -r, --no-conflict-resolve
 34 |           Do not print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive
 35 | 
 36 |   -o, --no-optimization
 37 |           Do not print debug information about optimization process
 38 | 
 39 |   -b, --no-backtrace
 40 |           Do not print backtrace of production rules when conflicts occurred. ruleset could be messed up
 41 | 
 42 |       --glr <GLR>
 43 |           Override the written code and set generated parser use GLR parsing algorithm
 44 | 
 45 |           [possible values: true, false]
 46 | 
 47 |       --dense <DENSE>
 48 |           Override the written code and set generated parser table to use dense arrays
 49 | 
 50 |           [possible values: true, false]
 51 | 
 52 |       --state <STATE>
 53 |           Print the details of a specific state
 54 | 
 55 |   -h, --help
 56 |           Print help (see a summary with '-h')
 57 | 
 58 |   -V, --version
 59 |           Print version
 60 | ```
 61 | 
 62 | ## Grammar File Format
 63 | The program searches for `%%` in the input file to separate Rust code from grammar definitions.
 64 | 
 65 | - **Before `%%`**: Regular Rust code (imports, type definitions, etc.) that will be copied to the output file as-is
 66 | - **After `%%`**: Context-free grammar definition that must follow the [RustyLR syntax](../SYNTAX.md)
 67 | 
 68 | ## Example
 69 | 
 70 | Here's a simple example showing how to create a grammar file and generate a parser:
 71 | 
 72 | **Input file** (`my_grammar.rs`):
 73 | ```rust
 74 | // Rust imports and type definitions
 75 | use std::collections::HashMap;
 76 | 
 77 | #[derive(Debug, Clone)]
 78 | pub enum Token {
 79 |     Identifier(String),
 80 |     Number(i32),
 81 |     Punct(char),
 82 | }
 83 | 
 84 | %% // Grammar definition starts here
 85 | 
 86 | %tokentype Token;
 87 | %start E;
 88 | 
 89 | %token id Token::Identifier(_);
 90 | %token num Token::Number(_);
 91 | %token lparen Token::Punct('(');
 92 | %token rparen Token::Punct(')');
 93 | 
 94 | E: lparen E rparen { E }
 95 |  | id { 
 96 |      if let Token::Identifier(name) = id {
 97 |          println!("Found identifier: {}", name);
 98 |      }
 99 |  }
100 |  | num {
101 |      if let Token::Number(value) = num {
102 |          println!("Found number: {}", value);
103 |      }
104 |  }
105 |  ;
106 | ```
107 | 
108 | **Generate the parser:**
109 | ```bash
110 | $ rustylr my_grammar.rs my_parser.rs
111 | ```
112 | 
113 | This will create `my_parser.rs` containing the generated parser code.
114 | 
115 | **Using the generated parser:**
116 | ```rust
117 | include!("my_parser.rs");
118 | 
119 | fn main() {
120 |     let parser = EParser::new();
121 |     let mut context = EContext::new();
122 |     
123 |     // Parse some tokens
124 |     let tokens = vec![
125 |         Token::Punct('('),
126 |         Token::Identifier("hello".to_string()),
127 |         Token::Punct(')'),
128 |     ];
129 |     
130 |     for token in tokens {
131 |         match context.feed(&parser, token, &mut ()) {
132 |             Ok(_) => println!("Token accepted"),
133 |             Err(e) => println!("Parse error: {}", e),
134 |         }
135 |     }
136 |     
137 |     // Get the final result
138 |     if let Ok(result) = context.accept( &parser, &mut () ) {
139 |         println!("Parse successful: {:?}", result);
140 |     }
141 | }
142 | ```
143 | 
144 | ## The `--state` Option
145 | You can inspect the details of a specific parser state using the `--state` option. This is useful for debugging and understanding how the parser processes input.
146 | 
147 | ```bash
148 | $ rustylr my_grammar.rs --state 5 // get details of state 5
149 | ```
150 | ```
151 | State 5:
152 | Production Rules: {
153 |     Pattern -> TerminalSet •
154 | }
155 | Reduce on Terminals: {
156 |     ident => { Pattern -> TerminalSet }
157 |     semicolon => { Pattern -> TerminalSet }
158 |     pipe => { Pattern -> TerminalSet }
159 |     percent => { Pattern -> TerminalSet }
160 |     plus => { Pattern -> TerminalSet }
161 |     star => { Pattern -> TerminalSet }
162 |     question => { Pattern -> TerminalSet }
163 |     minus => { Pattern -> TerminalSet }
164 |     exclamation => { Pattern -> TerminalSet }
165 |     slash => { Pattern -> TerminalSet }
166 |      dot => { Pattern -> TerminalSet }
167 |     dollar => { Pattern -> TerminalSet }
168 |     comma => { Pattern -> TerminalSet }
169 |     literal => { Pattern -> TerminalSet }
170 |     bracegroup => { Pattern -> TerminalSet }
171 |     lparen => { Pattern -> TerminalSet }
172 |     rparen => { Pattern -> TerminalSet }
173 |     lbracket => { Pattern -> TerminalSet }
174 |     error => { Pattern -> TerminalSet }
175 | }
176 | From States: {
177 |     State 4
178 |     State 6
179 |     State 11
180 |     State 13
181 |     State 35
182 |     State 38
183 |     State 40
184 |     State 44
185 |     State 48
186 |     State 66
187 |     State 70
188 | }
189 | ```
190 | 
191 | For more usage examples and detailed documentation, see the [main README](../README.md).


--------------------------------------------------------------------------------
/rusty_lr_parser/src/terminalset.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::Ident;
  2 | use proc_macro2::Literal;
  3 | use proc_macro2::Span;
  4 | use quote::ToTokens;
  5 | 
  6 | use std::collections::BTreeSet;
  7 | 
  8 | use crate::error::ParseError;
  9 | use crate::grammar::Grammar;
 10 | use crate::terminal_info::TerminalName;
 11 | 
 12 | #[derive(Debug, Clone)]
 13 | pub enum TerminalSetItem {
 14 |     Terminal(Ident),
 15 |     Range(Ident, Ident),
 16 |     Literal(Literal),
 17 |     LiteralRange(Literal, Literal),
 18 | }
 19 | 
 20 | impl std::fmt::Display for TerminalSetItem {
 21 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 22 |         match self {
 23 |             TerminalSetItem::Terminal(ident) => write!(f, "{}", ident),
 24 |             TerminalSetItem::Range(first, last) => write!(f, "{}-{}", first, last),
 25 |             TerminalSetItem::Literal(literal) => write!(f, "{}", literal),
 26 |             TerminalSetItem::LiteralRange(first, last) => write!(f, "{}-{}", first, last),
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | impl TerminalSetItem {
 32 |     pub fn to_terminal_set(&self, grammar: &mut Grammar) -> Result<BTreeSet<usize>, ParseError> {
 33 |         match self {
 34 |             TerminalSetItem::Terminal(terminal) => {
 35 |                 if let Some(idx) = grammar
 36 |                     .terminals_index
 37 |                     .get(&TerminalName::Ident(terminal.clone()))
 38 |                 {
 39 |                     Ok(BTreeSet::from([*idx]))
 40 |                 } else {
 41 |                     Err(ParseError::TerminalNotDefined(terminal.clone()))
 42 |                 }
 43 |             }
 44 |             TerminalSetItem::Range(first, last) => {
 45 |                 let first_index = match grammar
 46 |                     .terminals_index
 47 |                     .get(&TerminalName::Ident(first.clone()))
 48 |                 {
 49 |                     Some(f) => f,
 50 |                     None => return Err(ParseError::TerminalNotDefined(first.clone())),
 51 |                 };
 52 |                 let last_index = match grammar
 53 |                     .terminals_index
 54 |                     .get(&TerminalName::Ident(last.clone()))
 55 |                 {
 56 |                     Some(l) => l,
 57 |                     None => return Err(ParseError::TerminalNotDefined(last.clone())),
 58 |                 };
 59 |                 if last_index < first_index {
 60 |                     return Err(ParseError::InvalidTerminalRange(
 61 |                         (
 62 |                             first.clone(),
 63 |                             *first_index,
 64 |                             grammar.terminals[*first_index].body.clone(),
 65 |                         ),
 66 |                         (
 67 |                             last.clone(),
 68 |                             *last_index,
 69 |                             grammar.terminals[*last_index].body.clone(),
 70 |                         ),
 71 |                     ));
 72 |                 }
 73 |                 Ok((*first_index..=*last_index).collect())
 74 |             }
 75 |             TerminalSetItem::Literal(literal) => {
 76 |                 let lit = syn::parse2::<syn::Lit>(literal.to_token_stream())
 77 |                     .expect("failed on syn::parse2");
 78 |                 let val = grammar.get_char_value(&lit)?;
 79 |                 let name: TerminalName = (val, val).into();
 80 |                 let idx = *grammar.terminals_index.get(&name).unwrap();
 81 |                 Ok(BTreeSet::from([idx]))
 82 |             }
 83 |             TerminalSetItem::LiteralRange(first_l, last_l) => {
 84 |                 let first = syn::parse2::<syn::Lit>(first_l.to_token_stream())
 85 |                     .expect("failed on syn::parse2");
 86 |                 let first_ch = grammar.get_char_value(&first)?;
 87 | 
 88 |                 let last = syn::parse2::<syn::Lit>(last_l.to_token_stream())
 89 |                     .expect("failed on syn::parse2");
 90 |                 let last_ch = grammar.get_char_value(&last)?;
 91 |                 if first_ch > last_ch {
 92 |                     return Err(ParseError::InvalidLiteralRange(
 93 |                         first_l.clone(),
 94 |                         last_l.clone(),
 95 |                     ));
 96 |                 }
 97 | 
 98 |                 let set: BTreeSet<usize> = grammar
 99 |                     .get_terminal_indices_from_char_range(first_ch, last_ch)
100 |                     .collect();
101 |                 Ok(set)
102 |             }
103 |         }
104 |     }
105 |     pub fn range_resolve(&self, grammar: &mut Grammar) -> Result<(), ParseError> {
106 |         match self {
107 |             TerminalSetItem::Terminal(_) => Ok(()),
108 |             TerminalSetItem::Range(_, _) => Ok(()),
109 |             TerminalSetItem::Literal(literal) => {
110 |                 let lit = syn::parse2::<syn::Lit>(literal.to_token_stream())
111 |                     .expect("failed on syn::parse2");
112 |                 let val = grammar.get_char_value(&lit)?;
113 |                 grammar.range_resolver.insert(val, val);
114 |                 Ok(())
115 |             }
116 |             TerminalSetItem::LiteralRange(first_l, last_l) => {
117 |                 let first = syn::parse2::<syn::Lit>(first_l.to_token_stream())
118 |                     .expect("failed on syn::parse2");
119 |                 let first_ch = grammar.get_char_value(&first)?;
120 | 
121 |                 let last = syn::parse2::<syn::Lit>(last_l.to_token_stream())
122 |                     .expect("failed on syn::parse2");
123 |                 let last_ch = grammar.get_char_value(&last)?;
124 |                 if first_ch > last_ch {
125 |                     return Err(ParseError::InvalidLiteralRange(
126 |                         first_l.clone(),
127 |                         last_l.clone(),
128 |                     ));
129 |                 }
130 |                 grammar.range_resolver.insert(first_ch, last_ch);
131 |                 Ok(())
132 |             }
133 |         }
134 |     }
135 | }
136 | 
137 | #[derive(Debug, Clone)]
138 | pub struct TerminalSet {
139 |     pub negate: bool,
140 |     pub items: Vec<TerminalSetItem>,
141 |     // '['
142 |     pub open_span: Span,
143 |     // ']'
144 |     pub close_span: Span,
145 | }
146 | impl TerminalSet {
147 |     // in case of negation, `include_eof` is true if the final terminal set contains eof
148 |     pub fn to_terminal_set(
149 |         &self,
150 |         grammar: &mut Grammar,
151 |     ) -> Result<(bool, BTreeSet<usize>), ParseError> {
152 |         let mut terminal_set = BTreeSet::new();
153 |         for item in &self.items {
154 |             let mut item_set = item.to_terminal_set(grammar)?;
155 |             terminal_set.append(&mut item_set);
156 |         }
157 |         Ok((self.negate, terminal_set))
158 |     }
159 |     pub fn range_resolve(&self, grammar: &mut Grammar) -> Result<(), ParseError> {
160 |         for item in &self.items {
161 |             item.range_resolve(grammar)?;
162 |         }
163 |         Ok(())
164 |     }
165 | }
166 | 
167 | impl std::fmt::Display for TerminalSet {
168 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
169 |         write!(f, "[")?;
170 |         if self.negate {
171 |             write!(f, "^")?;
172 |         }
173 |         for (i, item) in self.items.iter().enumerate() {
174 |             if i > 0 {
175 |                 write!(f, ", ")?;
176 |             }
177 |             write!(f, "{}", item)?;
178 |         }
179 |         write!(f, "]")
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/rusty_lr_executable/src/main.rs:
--------------------------------------------------------------------------------
  1 | use clap::Parser;
  2 | 
  3 | use std::fs::write;
  4 | 
  5 | mod arg;
  6 | 
  7 | fn main() {
  8 |     let args = match arg::Args::try_parse() {
  9 |         Ok(args) => args,
 10 |         Err(e) => {
 11 |             eprintln!("{}", e);
 12 |             return;
 13 |         }
 14 |     };
 15 | 
 16 |     let mut builder = rusty_lr_buildscript::Builder::new();
 17 |     builder.is_executable = true;
 18 |     builder.file(&args.input_file);
 19 |     if args.no_conflict {
 20 |         builder.note_conflicts(false);
 21 |     }
 22 |     if args.no_conflict_resolve {
 23 |         builder.note_conflicts_resolving(false);
 24 |     }
 25 |     if args.no_optimization {
 26 |         builder.note_optimization(false);
 27 |     }
 28 |     if args.no_backtrace {
 29 |         builder.note_backtrace(false);
 30 |     }
 31 |     if let Some(glr) = args.glr {
 32 |         builder.glr(glr);
 33 |     }
 34 |     if let Some(dense) = args.dense {
 35 |         builder.dense(dense);
 36 |     }
 37 | 
 38 |     let out = match builder.build_impl() {
 39 |         Ok(out) => out,
 40 |         Err(_) => {
 41 |             return;
 42 |         }
 43 |     };
 44 | 
 45 |     if let Some(state_idx) = args.state {
 46 |         let term_class_map = |term| out.grammar.class_pretty_name_list(term, 4);
 47 |         let nonterm_map = |nonterm| out.grammar.nonterm_pretty_name(nonterm);
 48 | 
 49 |         if let Some(state) = out.grammar.states.get(state_idx) {
 50 |             let mut from_states = Vec::new();
 51 |             for (i, s) in out.grammar.states.iter().enumerate() {
 52 |                 if s.shift_goto_map_term
 53 |                     .iter()
 54 |                     .any(|(_, t)| t.state == state_idx)
 55 |                     || s.shift_goto_map_nonterm
 56 |                         .iter()
 57 |                         .any(|(_, t)| t.state == state_idx)
 58 |                 {
 59 |                     from_states.push(i);
 60 |                 }
 61 |             }
 62 | 
 63 |             let mut similar_states = Vec::new();
 64 |             for (i, s) in out.grammar.states.iter().enumerate() {
 65 |                 if state.ruleset == s.ruleset && i != state_idx {
 66 |                     similar_states.push(i);
 67 |                 }
 68 |             }
 69 | 
 70 |             println!("State {state_idx}:");
 71 |             println!("Production Rules: {{");
 72 |             for rule in &state.ruleset {
 73 |                 let rule = out.grammar.builder.rules[rule.rule]
 74 |                     .rule
 75 |                     .clone()
 76 |                     .map(&term_class_map, &nonterm_map)
 77 |                     .into_shifted(rule.shifted);
 78 |                 println!("    {}", rule);
 79 |             }
 80 |             println!("}}");
 81 |             if !state.shift_goto_map_term.is_empty() {
 82 |                 println!("Shift/Goto on Terminals: {{");
 83 |                 for (term, target) in &state.shift_goto_map_term {
 84 |                     let term = term_class_map(*term);
 85 |                     println!("    {term:>4} => State {}", target.state);
 86 |                 }
 87 |                 println!("}}");
 88 |             }
 89 |             if !state.shift_goto_map_nonterm.is_empty() {
 90 |                 println!("Shift/Goto on Non-Terminals: {{");
 91 |                 for (nonterm, target) in &state.shift_goto_map_nonterm {
 92 |                     let nonterm = nonterm_map(*nonterm);
 93 |                     println!("    {nonterm:>4} => State {}", target.state);
 94 |                 }
 95 |                 println!("}}");
 96 |             }
 97 |             if !state.reduce_map.is_empty() {
 98 |                 println!("Reduce on Terminals: {{");
 99 |                 for (term, rules) in &state.reduce_map {
100 |                     let term = term_class_map(*term);
101 |                     let preline = format!("    {term:>4} => {{ ");
102 |                     let mut rules = rules
103 |                         .iter()
104 |                         .map(|rule| {
105 |                             out.grammar.builder.rules[*rule]
106 |                                 .rule
107 |                                 .clone()
108 |                                 .map(&term_class_map, &nonterm_map)
109 |                                 .to_string()
110 |                         })
111 |                         .collect::<Vec<_>>();
112 |                     for r in rules.iter_mut().skip(1) {
113 |                         *r = format!("\n{}{}", " ".repeat(preline.len()), r);
114 |                     }
115 | 
116 |                     println!("{}{} }}", preline, rules.join(""));
117 |                 }
118 |                 println!("}}");
119 |             }
120 |             if !from_states.is_empty() {
121 |                 println!("From States: {{");
122 |                 for from_state in from_states {
123 |                     println!("    State {}", from_state);
124 |                 }
125 |                 println!("}}");
126 |             }
127 |             if !similar_states.is_empty() {
128 |                 println!("Similar States: {{");
129 |                 for similar_state in similar_states {
130 |                     println!("    State {}", similar_state);
131 |                 }
132 |                 println!("}}");
133 |             }
134 |         } else {
135 |             println!("State {state_idx} does not exist.");
136 |         }
137 |     }
138 | 
139 |     let (major, minor, patch) = rusty_lr_buildscript::target_rusty_lr_version();
140 |     println!(">> The generated code is targeting rusty_lr version {major}.{minor}.x.");
141 |     println!(">> There might be a build error if the version is not matched.");
142 | 
143 |     // format the generated code
144 |     let user_code = if args.no_format {
145 |         out.user_stream.to_string()
146 |     } else {
147 |         match syn::parse2(out.user_stream.clone()) {
148 |             Ok(file) => prettyplease::unparse(&file),
149 |             Err(e) => {
150 |                 eprintln!("Error parsing user code: {}", e);
151 |                 out.user_stream.to_string()
152 |             }
153 |         }
154 |     };
155 |     let generated_code = if args.no_format {
156 |         out.generated_stream.to_string()
157 |     } else {
158 |         match syn::parse2(out.generated_stream.clone()) {
159 |             Ok(file) => prettyplease::unparse(&file),
160 |             Err(e) => {
161 |                 eprintln!("Error parsing generated code: {}", e);
162 |                 out.generated_stream.to_string()
163 |             }
164 |         }
165 |     };
166 | 
167 |     let this_name = env!("CARGO_PKG_NAME");
168 |     let this_version = env!("CARGO_PKG_VERSION");
169 |     let output_string = format!(
170 |         r#"
171 | // This file was generated by {} {}
172 | // This generated code is targeting rusty_lr version {major}.{minor}.{patch}.
173 | // There might be a build error if the version is not matched.
174 | //
175 | // {:=^80}
176 | {}
177 | // {:=^80}
178 | /*
179 | {}
180 | */
181 | // {:=^80}
182 | {}
183 | // {:=^80}
184 |         "#,
185 |         this_name,
186 |         this_version,
187 |         "User Codes Begin",
188 |         user_code,
189 |         "User Codes End",
190 |         out.debug_comments,
191 |         "Generated Codes Begin",
192 |         generated_code,
193 |         "Generated Codes End"
194 |     );
195 |     match write(args.output_file.clone(), output_string) {
196 |         Ok(_) => {}
197 |         Err(e) => {
198 |             eprintln!("Error writing output file: {}", e);
199 |             return;
200 |         }
201 |     }
202 | }
203 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/rule.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::BTreeMap;
  2 | use std::collections::BTreeSet;
  3 | 
  4 | use std::fmt::Debug;
  5 | use std::fmt::Display;
  6 | 
  7 | use crate::token::Token;
  8 | 
  9 | /// For resolving shift/reduce conflict
 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 11 | pub enum ReduceType {
 12 |     /// reduce to the left, i.e. reduce first
 13 |     Left,
 14 |     /// reduce to the right, i.e. shift first
 15 |     Right,
 16 | }
 17 | impl std::fmt::Display for ReduceType {
 18 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 19 |         match self {
 20 |             ReduceType::Left => write!(f, "Left"),
 21 |             ReduceType::Right => write!(f, "Right"),
 22 |         }
 23 |     }
 24 | }
 25 | 
 26 | /// Operator precedence for production rules
 27 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 28 | pub enum Precedence {
 29 |     /// fixed precedence level
 30 |     Fixed(usize), // precedence level
 31 | 
 32 |     /// get precedence from i'th child token; for runtime conflict resolution
 33 |     Dynamic(usize), // token index
 34 | }
 35 | 
 36 | // Production rule.
 37 | //
 38 | // name -> Token0 Token1 Token2 ...
 39 | #[derive(Clone, Default)]
 40 | pub struct ProductionRule<Term, NonTerm> {
 41 |     pub name: NonTerm,
 42 |     pub rule: Vec<Token<Term, NonTerm>>,
 43 |     pub precedence: Option<Precedence>,
 44 | }
 45 | impl<Term: Display, NonTerm: Display> Display for ProductionRule<Term, NonTerm> {
 46 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 47 |         write!(f, "{} -> ", self.name)?;
 48 |         for (id, token) in self.rule.iter().enumerate() {
 49 |             write!(f, "{}", token)?;
 50 |             if id < self.rule.len() - 1 {
 51 |                 write!(f, " ")?;
 52 |             }
 53 |         }
 54 |         Ok(())
 55 |     }
 56 | }
 57 | impl<Term: Debug, NonTerm: Debug> Debug for ProductionRule<Term, NonTerm> {
 58 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 59 |         write!(f, "{:?} -> ", self.name)?;
 60 |         for (id, token) in self.rule.iter().enumerate() {
 61 |             write!(f, "{:?}", token)?;
 62 |             if id < self.rule.len() - 1 {
 63 |                 write!(f, " ")?;
 64 |             }
 65 |         }
 66 | 
 67 |         if let Some(prec) = self.precedence {
 68 |             write!(f, " [prec: {:?}]", prec)?;
 69 |         }
 70 |         Ok(())
 71 |     }
 72 | }
 73 | 
 74 | impl<Term, NonTerm> ProductionRule<Term, NonTerm> {
 75 |     /// Map terminal and non-terminal symbols to another type.
 76 |     /// This is useful when exporting & importing rules.
 77 |     pub fn map<NewTerm, NewNonTerm>(
 78 |         self,
 79 |         term_map: impl Fn(Term) -> NewTerm,
 80 |         nonterm_map: impl Fn(NonTerm) -> NewNonTerm,
 81 |     ) -> ProductionRule<NewTerm, NewNonTerm> {
 82 |         ProductionRule {
 83 |             name: nonterm_map(self.name),
 84 |             rule: self
 85 |                 .rule
 86 |                 .into_iter()
 87 |                 .map(move |token| match token {
 88 |                     Token::Term(term) => Token::Term(term_map(term)),
 89 |                     Token::NonTerm(nonterm) => Token::NonTerm(nonterm_map(nonterm)),
 90 |                 })
 91 |                 .collect(),
 92 |             precedence: self.precedence,
 93 |         }
 94 |     }
 95 | 
 96 |     /// shift this rule
 97 |     pub fn into_shifted(self, shifted: usize) -> ShiftedRule<Term, NonTerm> {
 98 |         ShiftedRule {
 99 |             rule: self,
100 |             shifted,
101 |         }
102 |     }
103 | }
104 | 
105 | /// A struct for single shifted named production rule.
106 | ///
107 | /// name -> Token1 Token2 . Token3
108 | ///
109 | ///         ^^^^^^^^^^^^^ shifted = 2
110 | ///
111 | /// This struct has index of the Rule in Grammar::rules
112 | /// and it will be used for Eq, Ord, Hash
113 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Default)]
114 | pub struct ShiftedRuleRef {
115 |     /// index of the rule in `rules`
116 |     pub rule: usize,
117 |     /// shifted index
118 |     pub shifted: usize,
119 | }
120 | 
121 | #[derive(Clone, Default)]
122 | pub struct ShiftedRule<Term, NonTerm> {
123 |     pub rule: ProductionRule<Term, NonTerm>,
124 |     pub shifted: usize,
125 | }
126 | impl<Term: Display, NonTerm: Display> Display for ShiftedRule<Term, NonTerm> {
127 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128 |         write!(f, "{} -> ", self.rule.name)?;
129 |         for (id, token) in self.rule.rule.iter().enumerate() {
130 |             if id == self.shifted {
131 |                 write!(f, "• ")?;
132 |             }
133 |             write!(f, "{}", token)?;
134 |             if id < self.rule.rule.len() - 1 {
135 |                 write!(f, " ")?;
136 |             }
137 |         }
138 |         if self.shifted == self.rule.rule.len() {
139 |             write!(f, " •")?;
140 |         }
141 | 
142 |         if let Some(prec) = self.rule.precedence {
143 |             write!(f, " [prec: {:?}]", prec)?;
144 |         }
145 |         Ok(())
146 |     }
147 | }
148 | impl<Term: Debug, NonTerm: Debug> Debug for ShiftedRule<Term, NonTerm> {
149 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 |         write!(f, "{:?} -> ", self.rule.name)?;
151 |         for (id, token) in self.rule.rule.iter().enumerate() {
152 |             if id == self.shifted {
153 |                 write!(f, "• ")?;
154 |             }
155 |             write!(f, "{:?}", token)?;
156 |             if id < self.rule.rule.len() - 1 {
157 |                 write!(f, " ")?;
158 |             }
159 |         }
160 |         if self.shifted == self.rule.rule.len() {
161 |             write!(f, " •")?;
162 |         }
163 |         Ok(())
164 |     }
165 | }
166 | 
167 | /// shifted rule with lookahead tokens
168 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
169 | pub struct LookaheadRuleRef<Term> {
170 |     pub rule: ShiftedRuleRef,
171 |     pub lookaheads: BTreeSet<Term>,
172 | }
173 | 
174 | /// shifted rule with lookahead tokens
175 | #[derive(Clone)]
176 | pub struct LookaheadRule<Term, NonTerm> {
177 |     pub rule: ShiftedRule<Term, NonTerm>,
178 |     pub lookaheads: BTreeSet<Term>,
179 | }
180 | impl<Term: Display, NonTerm: Display> Display for LookaheadRule<Term, NonTerm> {
181 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182 |         write!(f, "{} / ", self.rule)?;
183 |         for (id, lookahead) in self.lookaheads.iter().enumerate() {
184 |             write!(f, "{}", lookahead)?;
185 |             if id < self.lookaheads.len() - 1 {
186 |                 write!(f, ", ")?;
187 |             }
188 |         }
189 |         Ok(())
190 |     }
191 | }
192 | impl<Term: Debug, NonTerm: Debug> Debug for LookaheadRule<Term, NonTerm> {
193 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 |         write!(f, "{:?} / ", self.rule)?;
195 |         for (id, lookahead) in self.lookaheads.iter().enumerate() {
196 |             write!(f, "{:?}", lookahead)?;
197 |             if id < self.lookaheads.len() - 1 {
198 |                 write!(f, ", ")?;
199 |             }
200 |         }
201 |         Ok(())
202 |     }
203 | }
204 | 
205 | /// set of lookahead rules
206 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
207 | pub struct LookaheadRuleRefSet<Term> {
208 |     pub rules: BTreeMap<ShiftedRuleRef, BTreeSet<Term>>,
209 | }
210 | impl<Term> LookaheadRuleRefSet<Term> {
211 |     pub fn new() -> Self {
212 |         LookaheadRuleRefSet {
213 |             rules: BTreeMap::new(),
214 |         }
215 |     }
216 |     pub fn add(&mut self, rule: ShiftedRuleRef, mut lookaheads: BTreeSet<Term>) -> bool
217 |     where
218 |         Term: Ord,
219 |     {
220 |         let mut changed = false;
221 |         let set = self.rules.entry(rule).or_insert_with(|| {
222 |             changed = true;
223 |             BTreeSet::new()
224 |         });
225 |         let old = set.len();
226 |         set.append(&mut lookaheads);
227 |         changed || old != set.len()
228 |     }
229 | }
230 | // impl<'a, Term: TermTraitBound + Display, NonTerm: NonTermTraitBound + Display> Display
231 | //     for LookaheadRuleRefSet<'a, Term, NonTerm>
232 | // {
233 | //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
234 | //         for (id, rule) in self.rules.iter().enumerate() {
235 | //             rule.fmt(f)?;
236 | //             if id < self.rules.len() - 1 {
237 | //                 writeln!(f)?;
238 | //             }
239 | //         }
240 | //         Ok(())
241 | //     }
242 | // }
243 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright 2024 Taehwan Kim
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 |    http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.


--------------------------------------------------------------------------------
/rusty_lr_core/src/tree.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Debug;
  2 | use std::fmt::Display;
  3 | use std::ops::Deref;
  4 | use std::ops::DerefMut;
  5 | 
  6 | use termtree::Tree as TermTree;
  7 | 
  8 | use crate::parser::nonterminal::NonTerminal;
  9 | use crate::parser::nonterminal::NonTerminalType;
 10 | use crate::TerminalSymbol;
 11 | 
 12 | /// Tree represention of single non-terminal token.
 13 | /// User must enable feature `tree` to use this.
 14 | #[derive(Clone)]
 15 | pub struct TreeNonTerminal<Term, NonTerm> {
 16 |     /// non terminal symbol that this tree reduced to
 17 |     pub nonterm: NonTerm,
 18 | 
 19 |     /// children of this token consumed by reduction
 20 |     pub tokens: Vec<Tree<Term, NonTerm>>,
 21 | }
 22 | 
 23 | impl<Term, NonTerm> TreeNonTerminal<Term, NonTerm> {
 24 |     pub fn new(nonterm: NonTerm, tokens: Vec<Tree<Term, NonTerm>>) -> Self {
 25 |         Self { nonterm, tokens }
 26 |     }
 27 | 
 28 |     /// convert this tree to termtree::Tree using Display trait
 29 |     pub(crate) fn to_term_tree<D: Display>(
 30 |         &self,
 31 |         term_to_display: &impl Fn(&TerminalSymbol<Term>) -> D,
 32 |         nonterm_to_display: &impl Fn(&NonTerm) -> D,
 33 |     ) -> Vec<TermTree<D>>
 34 |     where
 35 |         NonTerm: NonTerminal,
 36 |     {
 37 |         // Manually configure the format for the auto-generated non-teminals.
 38 |         // for example, one or more repetitions A+ will be implemented as left recursion,
 39 |         // but we want to display the tree as flat array.
 40 |         let nonterm_name = nonterm_to_display(&self.nonterm);
 41 |         match self.nonterm.nonterm_type() {
 42 |             // normal tree
 43 |             None
 44 |             | Some(NonTerminalType::Augmented)
 45 |             | Some(NonTerminalType::Error)
 46 |             | Some(NonTerminalType::Group) => {
 47 |                 let tree = TermTree::new(nonterm_name);
 48 |                 vec![tree.with_leaves(
 49 |                     self.tokens
 50 |                         .iter()
 51 |                         .flat_map(|token| token.to_term_tree(term_to_display, nonterm_to_display)),
 52 |                 )]
 53 |             }
 54 | 
 55 |             // remove parent, directly add children
 56 |             Some(NonTerminalType::Lookahead) | Some(NonTerminalType::TerminalSet) => self
 57 |                 .tokens
 58 |                 .iter()
 59 |                 .flat_map(|token| token.to_term_tree(term_to_display, nonterm_to_display))
 60 |                 .collect(),
 61 | 
 62 |             // remove left/right recursion, make it to flat array
 63 |             Some(NonTerminalType::Star) => {
 64 |                 let tree = TermTree::new(nonterm_name);
 65 |                 let tree = if self.tokens.is_empty() {
 66 |                     tree
 67 |                 } else {
 68 |                     let plus = self.tokens[0]
 69 |                         .to_term_tree(term_to_display, nonterm_to_display)
 70 |                         .into_iter()
 71 |                         .next()
 72 |                         .unwrap();
 73 |                     tree.with_leaves(plus.leaves)
 74 |                 };
 75 |                 vec![tree]
 76 |             }
 77 |             // remove left/right recursion, make it to flat array
 78 |             Some(NonTerminalType::PlusLeft) => {
 79 |                 let tree = TermTree::new(nonterm_name);
 80 |                 let tree = match self.tokens.len() {
 81 |                     1 => {
 82 |                         let child = self.tokens[0]
 83 |                             .to_term_tree(term_to_display, nonterm_to_display)
 84 |                             .into_iter()
 85 |                             .next()
 86 |                             .unwrap();
 87 |                         tree.with_leaves([child])
 88 |                     }
 89 |                     2 => {
 90 |                         let mut child_list = self.tokens[0]
 91 |                             .to_term_tree(term_to_display, nonterm_to_display)
 92 |                             .into_iter()
 93 |                             .next()
 94 |                             .unwrap()
 95 |                             .leaves;
 96 |                         let child = self.tokens[1]
 97 |                             .to_term_tree(term_to_display, nonterm_to_display)
 98 |                             .into_iter()
 99 |                             .next()
100 |                             .unwrap();
101 |                         child_list.push(child);
102 |                         tree.with_leaves(child_list)
103 |                     }
104 |                     3 => {
105 |                         let mut child_list = self.tokens[0]
106 |                             .to_term_tree(term_to_display, nonterm_to_display)
107 |                             .into_iter()
108 |                             .next()
109 |                             .unwrap()
110 |                             .leaves;
111 |                         let separator = self.tokens[1]
112 |                             .to_term_tree(term_to_display, nonterm_to_display)
113 |                             .into_iter()
114 |                             .next()
115 |                             .unwrap();
116 |                         let child = self.tokens[2]
117 |                             .to_term_tree(term_to_display, nonterm_to_display)
118 |                             .into_iter()
119 |                             .next()
120 |                             .unwrap();
121 |                         child_list.push(separator);
122 |                         child_list.push(child);
123 |                         tree.with_leaves(child_list)
124 |                     }
125 |                     _ => {
126 |                         unreachable!("PlusLeft length of child: {}", self.tokens.len())
127 |                     }
128 |                 };
129 |                 vec![tree]
130 |             }
131 |             // remove left/right recursion, make it to flat array
132 |             Some(NonTerminalType::PlusRight) => {
133 |                 let tree = TermTree::new(nonterm_name);
134 |                 let tree = match self.tokens.len() {
135 |                     1 => {
136 |                         let child = self.tokens[0]
137 |                             .to_term_tree(term_to_display, nonterm_to_display)
138 |                             .into_iter()
139 |                             .next()
140 |                             .unwrap();
141 |                         tree.with_leaves([child])
142 |                     }
143 |                     2 => {
144 |                         let child = self.tokens[0]
145 |                             .to_term_tree(term_to_display, nonterm_to_display)
146 |                             .into_iter()
147 |                             .next()
148 |                             .unwrap();
149 |                         let mut child_list = self.tokens[1]
150 |                             .to_term_tree(term_to_display, nonterm_to_display)
151 |                             .into_iter()
152 |                             .next()
153 |                             .unwrap()
154 |                             .leaves;
155 |                         let mut children = vec![child];
156 |                         children.append(&mut child_list);
157 | 
158 |                         tree.with_leaves(children)
159 |                     }
160 |                     3 => {
161 |                         // with separator
162 |                         let child = self.tokens[0]
163 |                             .to_term_tree(term_to_display, nonterm_to_display)
164 |                             .into_iter()
165 |                             .next()
166 |                             .unwrap();
167 |                         let separator = self.tokens[1]
168 |                             .to_term_tree(term_to_display, nonterm_to_display)
169 |                             .into_iter()
170 |                             .next()
171 |                             .unwrap();
172 |                         let mut child_list = self.tokens[2]
173 |                             .to_term_tree(term_to_display, nonterm_to_display)
174 |                             .into_iter()
175 |                             .next()
176 |                             .unwrap()
177 |                             .leaves;
178 |                         let mut children = vec![child];
179 |                         children.push(separator);
180 |                         children.append(&mut child_list);
181 | 
182 |                         tree.with_leaves(children)
183 |                     }
184 |                     _ => {
185 |                         unreachable!("PlusRight length of child: {}", self.tokens.len())
186 |                     }
187 |                 };
188 |                 vec![tree]
189 |             }
190 |             // remove left/right recursion, make it to flat array
191 |             Some(NonTerminalType::Optional) => {
192 |                 let tree = TermTree::new(nonterm_name);
193 |                 let tree =
194 |                     if self.tokens.is_empty() {
195 |                         tree
196 |                     } else {
197 |                         tree.with_leaves(self.tokens.iter().flat_map(|token| {
198 |                             token.to_term_tree(term_to_display, nonterm_to_display)
199 |                         }))
200 |                     };
201 |                 vec![tree]
202 |             }
203 | 
204 |             // show the literal directly
205 |             Some(NonTerminalType::LiteralString) => {
206 |                 let tree = TermTree::new(nonterm_name);
207 |                 vec![tree]
208 |             }
209 |         }
210 |     }
211 | }
212 | 
213 | /// Tree representation of single token.
214 | /// User must enable feature `tree` to use this.
215 | #[derive(Clone)]
216 | pub enum Tree<Term, NonTerm> {
217 |     Terminal(TerminalSymbol<Term>),
218 |     NonTerminal(TreeNonTerminal<Term, NonTerm>),
219 | }
220 | 
221 | impl<Term, NonTerm> Tree<Term, NonTerm> {
222 |     pub fn new_terminal(term: TerminalSymbol<Term>) -> Self {
223 |         Tree::Terminal(term)
224 |     }
225 |     pub fn new_nonterminal(nonterm: NonTerm, tokens: Vec<Tree<Term, NonTerm>>) -> Self {
226 |         Tree::NonTerminal(TreeNonTerminal::new(nonterm, tokens))
227 |     }
228 | 
229 |     /// convert this tree to termtree::Tree using Display trait
230 |     pub(crate) fn to_term_tree<D: Display>(
231 |         &self,
232 |         term_to_display: &impl Fn(&TerminalSymbol<Term>) -> D,
233 |         nonterm_to_display: &impl Fn(&NonTerm) -> D,
234 |     ) -> Vec<TermTree<D>>
235 |     where
236 |         NonTerm: NonTerminal,
237 |     {
238 |         match self {
239 |             Tree::Terminal(term) => vec![TermTree::new(term_to_display(term))],
240 |             Tree::NonTerminal(nonterm) => nonterm.to_term_tree(term_to_display, nonterm_to_display),
241 |         }
242 |     }
243 | }
244 | 
245 | impl<Term: Display, NonTerm: NonTerminal> Display for Tree<Term, NonTerm> {
246 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247 |         let child = self.to_term_tree(&|term| term.to_string(), &|nonterm| {
248 |             nonterm.as_str().to_string()
249 |         });
250 |         write!(
251 |             f,
252 |             "{}",
253 |             TermTree::new("Tree".to_string()).with_leaves(child)
254 |         )
255 |     }
256 | }
257 | impl<Term: Debug, NonTerm: NonTerminal> Debug for Tree<Term, NonTerm> {
258 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259 |         let child = self.to_term_tree(&|term| format!("{:?}", term), &|nonterm| {
260 |             nonterm.as_str().to_string()
261 |         });
262 |         write!(
263 |             f,
264 |             "{}",
265 |             TermTree::new("Tree".to_string()).with_leaves(child)
266 |         )
267 |     }
268 | }
269 | 
270 | /// List of [`Tree`]
271 | #[derive(Clone)]
272 | pub struct TreeList<Term, NonTerm> {
273 |     pub trees: Vec<Tree<Term, NonTerm>>,
274 | }
275 | impl<Term, NonTerm> Deref for TreeList<Term, NonTerm> {
276 |     type Target = Vec<Tree<Term, NonTerm>>;
277 |     fn deref(&self) -> &Self::Target {
278 |         &self.trees
279 |     }
280 | }
281 | impl<Term, NonTerm> DerefMut for TreeList<Term, NonTerm> {
282 |     fn deref_mut(&mut self) -> &mut Self::Target {
283 |         &mut self.trees
284 |     }
285 | }
286 | impl<Term, NonTerm> TreeList<Term, NonTerm> {
287 |     /// create new empty tree list
288 |     pub fn new() -> Self {
289 |         Self { trees: Vec::new() }
290 |     }
291 | }
292 | impl<Term: Display, NonTerm: NonTerminal> Display for TreeList<Term, NonTerm> {
293 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294 |         let tree =
295 |             TermTree::new("TreeList".to_string()).with_leaves(self.trees.iter().flat_map(|tree| {
296 |                 tree.to_term_tree(&|term| term.to_string(), &|nonterm| {
297 |                     nonterm.as_str().to_string()
298 |                 })
299 |             }));
300 |         write!(f, "{}", tree)
301 |     }
302 | }
303 | impl<Term: Debug, NonTerm: NonTerminal> Debug for TreeList<Term, NonTerm> {
304 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
305 |         let tree =
306 |             TermTree::new("TreeList".to_string()).with_leaves(self.trees.iter().flat_map(|tree| {
307 |                 tree.to_term_tree(&|term| format!("{:?}", term), &|nonterm| {
308 |                     nonterm.as_str().to_string()
309 |                 })
310 |             }));
311 |         write!(f, "{}", tree)
312 |     }
313 | }
314 | impl<Term, NonTerm> Default for TreeList<Term, NonTerm> {
315 |     /// create new empty tree list
316 |     fn default() -> Self {
317 |         Self::new()
318 |     }
319 | }
320 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rusty_lr
  2 | [![crates.io](https://img.shields.io/crates/v/rusty_lr.svg)](https://crates.io/crates/rusty_lr)
  3 | [![docs.rs](https://docs.rs/rusty_lr/badge.svg)](https://docs.rs/rusty_lr)
  4 | 
  5 | ***A Bison-like Parser generator & Compiler frontend for Rust generating optimised IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing.***
  6 | 
  7 | RustyLR is a parser generator that converts context-free grammars into IELR(1)/LALR(1) tables and supporting deterministic LR and non-deterministic GLR parsing strategies. It supports custom reduce actions in Rust, with beautiful diagnostics.
  8 | Highly inspired by tools like *bison*, it uses a similar syntax while integrating seamlessly with Rust's ecosystem.
  9 | It constructs optimized state machines, ensuring efficient and reliable parsing.
 10 | 
 11 | ![title](images/title.png)
 12 | 
 13 | ## Features
 14 |  - **Custom Reduce Actions:** Define custom actions in Rust, allowing you to build custom data structures easily.
 15 |  - **Automatic Optimization:** Reduces parser table size and improves performance by grouping terminals with identical behavior across parser states.
 16 |  - **Multiple Parsing Strategies:** Supports minimal-LR(1), LALR(1) parser tables, and GLR parsing strategy.
 17 |  - **Detailed Diagnostics:** Detects grammar conflicts, verbose conflict resolution stages, and optimization stages.
 18 |  - **Static & Runtime Conflict Resolution:** Provides mechanisms to resolve conflicts at compile time or runtime.
 19 |  - **Location Tracking:** Tracks the location of every token in the parse tree, useful for error reporting and debugging.
 20 |  - **State Machine Debugging:** The `rustylr` executable provides a `--state` option that allows you to debug and visualize the generated state machine. This is useful for understanding how the parser will behave and for identifying potential issues in the grammar.
 21 | 
 22 | ## Quick Start: Using the `rustylr` Executable
 23 | 
 24 | The recommended way to use RustyLR is with the standalone `rustylr` executable. It's faster, provides richer grammar diagnostics, and includes commands for debugging state machines directly.
 25 | 
 26 | Here is a step-by-step guide to get you started.
 27 | 
 28 | **1. Add `rusty_lr` to your dependencies**
 29 | 
 30 | First, add the `rusty_lr` runtime library to your project's `Cargo.toml`. The generated parser code will depend on it.
 31 | 
 32 | ```toml
 33 | [dependencies]
 34 | rusty_lr = "..." # Use the same version as the executable
 35 | ```
 36 | 
 37 | **2. Install the `rustylr` executable**
 38 | 
 39 | You can install the executable from crates.io using `cargo`:
 40 | 
 41 | ```bash
 42 | cargo install rustylr
 43 | ```
 44 | 
 45 | **3. Create a grammar file**
 46 | 
 47 | Create a file named `src/grammar.rs`. This file will contain your token definitions and grammar rules. Any Rust code above the `%%` separator will be copied directly to the generated output file.
 48 | 
 49 | ```rust
 50 | // src/grammar.rs
 51 | // This code is copied to the generated file.
 52 | pub enum MyToken {
 53 |     Num(i32),
 54 |     Plus,
 55 | }
 56 | 
 57 | %% // Grammar rules start here.
 58 | 
 59 | %tokentype MyToken;
 60 | %start E;
 61 | %left plus; // Specify left-associativity for the 'plus' token.
 62 | 
 63 | // Define tokens and how they map to MyToken variants.
 64 | %token num MyToken::Num(_);
 65 | %token plus MyToken::Plus;
 66 | 
 67 | // Define grammar rules and their return types.
 68 | // E(i32) means the non-terminal E returns an i32.
 69 | // In the action blocks `{ ... }`, you can refer to the values of symbols
 70 | // on the right-hand side by their names (e.g., `e1`, `e2`, `num`).
 71 | E(i32): e1=E plus e2=E { e1 + e2 }
 72 |       | num { let MyToken::Num(num) = num else { unreachable!(); };
 73 |         num
 74 |       }
 75 |       ;
 76 | ```
 77 | 
 78 | **4. Generate the parser code**
 79 | 
 80 | Run the `rustylr` executable to process your grammar file. This command will generate `src/parser.rs` from `src/grammar.rs`.
 81 | 
 82 | ```bash
 83 | rustylr src/grammar.rs src/parser.rs
 84 | ```
 85 | 
 86 | **5. Use the generated parser in your code**
 87 | 
 88 | Finally, include the newly generated `src/parser.rs` as a module in your `main.rs` or `lib.rs` and use it to parse a token stream.
 89 | 
 90 | ```rust
 91 | // In src/main.rs
 92 | 
 93 | // Include the generated parser module.
 94 | mod parser;
 95 | // Bring the token enum into scope.
 96 | use parser::MyToken;
 97 | 
 98 | fn main() {
 99 |     // Example token stream for "1 + 2"
100 |     let tokens = vec![MyToken::Num(1), MyToken::Plus, MyToken::Num(2)]; 
101 | 
102 |     let parser = parser::EParser::new();        // Assumes 'E' is your start symbol
103 |     let mut context = parser::EContext::new();
104 |     let mut userdata = (); // No userdata in this example.
105 | 
106 |     for token in tokens {
107 |         match context.feed(&parser, token, &mut userdata) {
108 |             Ok(_) => {}
109 |             Err(e) => {
110 |                 eprintln!("Parse error: {}", e);
111 |                 return;
112 |             }
113 |         }
114 |     }
115 | 
116 |     // Get the final parsed result.
117 |     match context.accept(&parser) {
118 |         Ok(result) => {
119 |             let final_result: i32 = result;
120 |             println!("Parsed result: {}", final_result); // Should print "3"
121 |         },
122 |         Err(e) => {
123 |             eprintln!("Failed to produce a final result: {}", e);
124 |         }
125 |     }
126 | }
127 | ```
128 | 
129 | **Important:** Ensure the version of the `rustylr` executable you run matches the version of the `rusty_lr` crate in your `Cargo.toml`. Mismatched versions can lead to build errors.
130 | 
131 | 
132 | 
133 | ## Generated Code Structure
134 | 
135 | The generated code will include several structs and enums:
136 |  - `<Start>Parser`: A struct that holds the parser table. [(LR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/lr/trait.Parser.html) [(GLR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/glr/trait.Parser.html)
137 |  - `<Start>Context`: A struct that maintains the current parsing state and symbol values. [(LR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/lr/struct.Context.html) [(GLR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/glr/struct.Context.html)
138 |  - `<Start>State`: A type representing a parser state and its associated table. 
139 |  - `<Start>Rule`: A type representing a production rule. [(docs)](https://docs.rs/rusty_lr/latest/rusty_lr/struct.ProductionRule.html)
140 |  - `<Start>NonTerminals`: An enum representing all non-terminal symbols in the grammar. [(docs)](https://docs.rs/rusty_lr/latest/rusty_lr/trait.NonTerminal.html)
141 | 
142 | 
143 | ### Working with Context
144 | You can also get contextual information from the `<Start>Context` struct:
145 | ```rust
146 | let mut context = <Start>Context::new();
147 | 
148 | // ... parsing ...
149 | 
150 | context.expected_token();    // Get expected (terminal, non-terminal) symbols for current state
151 | context.can_feed(&term);     // Check if a terminal symbol can be fed
152 | context.trace();             // Get all `%trace` non-terminals currently being parsed
153 | println!("{}", context.backtrace()); // Print backtrace of the parser state
154 | println!("{}", context);     // Print tree structure of the parser state (`tree` feature)
155 | ```
156 | 
157 | ### The Feed Method
158 | The generated code includes a `feed` method that processes tokens:
159 | 
160 | ```rust
161 | context.feed(&parser, term, &mut userdata); // Feed a terminal symbol and update the state machine
162 | context.feed_location(&parser, term, &mut userdata, term_location); // Feed a terminal symbol with location tracking
163 | ```
164 | 
165 | This method returns `Ok(())` if the token was successfully parsed, or an `Err` if there was an error.
166 | 
167 | **Note:** The actual method signatures differ slightly when building a GLR parser.
168 | 
169 | ## GLR Parsing
170 | RustyLR offers built-in support for Generalized LR (GLR) parsing, enabling it to handle ambiguous or nondeterministic grammars that traditional LR(1) or LALR(1) parsers cannot process.
171 | See [GLR.md](GLR.md) for details.
172 | 
173 | ## Error Handling and Conflict Resolution
174 | RustyLR provides multiple mechanisms for handling semantic errors and resolving conflicts during parsing:
175 |  - **Panic Mode Error Recovery:** Use the `error` token for panic-mode error recovery
176 |  - **Operator Precedence:** Set precedence with `%left`, `%right`, `%precedence` for terminals
177 |  - **Reduce Rule Priority:** Set priority with `%dprec` for production rules
178 |  - **Runtime Errors:** Return `Err` from reduce actions to handle semantic errors
179 | 
180 | See [SYNTAX.md - Resolving Conflicts](SYNTAX.md#resolving-conflicts) for detailed information.
181 | 
182 | ## Location Tracking
183 | Track the location of tokens and non-terminals for better error reporting and debugging:
184 | 
185 | ```rust
186 | Expr: exp1=Expr '+' exp2=Expr {
187 |     println!("Location of exp1: {:?}", @exp1);
188 |     println!("Location of exp2: {:?}", @exp2);
189 |     println!("Location of this expression: {:?}", @$); // @$ is the location of the non-terminal itself
190 |     exp1 + exp2
191 | }
192 | | Expr error Expr {
193 |     println!("Error at: {:?}", @error); // @error is the location of the error token
194 |     0 // Return a default value
195 | }
196 | ```
197 | 
198 | See [SYNTAX.md - Location Tracking](SYNTAX.md#location-tracking) for detailed information.
199 | 
200 | ## State Machine Debugging
201 | The `rustylr` executable includes a powerful `--state` option for debugging the generated parser's state machine. This feature allows you to inspect the details of each state, including its production rules, expected tokens, and transitions to other states. It is an invaluable tool for diagnosing grammar ambiguities, understanding shift/reduce conflicts, and verifying that the parser behaves as expected.
202 | 
203 | To use it, run `rustylr` with the `--state` flag, followed by your grammar file:
204 | 
205 | ```bash
206 | rustylr --state src/grammar.rs
207 | ```
208 | 
209 | This will output a detailed, color-coded representation of the state machine directly in your terminal, making it easy to trace the parser's logic.
210 | 
211 | ![State Machine Debug](images/state_option.png)
212 | 
213 | This visualization helps you understand the parsing process step-by-step and is particularly useful for debugging complex grammars.
214 | 
215 | ## Examples
216 |  - [Calculator (enum version)](examples/calculator/src/parser.rs): A numeric expression parser using custom token enums
217 |  - [Calculator (u8 version)](examples/calculator_u8/src/parser.rs): A numeric expression parser using byte tokens
218 |  - [JSON Validator](examples/json/src/parser.rs): A JSON syntax validator
219 |  - [Lua 5.4 syntax parser](https://github.com/ehwan/lua_rust/blob/main/parser/src/parser.rs): A complete Lua language parser
220 |  - [C language parser](https://github.com/ehwan/C-language-Parser-In-Rust/blob/main/src/ast/parser_lr.rs): A C language parser
221 |  - [Bootstrap parser](rusty_lr_parser/src/parser/parser.rs): RustyLR's own syntax parser is written in RustyLR itself
222 | 
223 | ## Cargo Features
224 |  - `build`: Enables build script tools for generating parsers at compile time.
225 |  - `tree`: Enables automatic syntax tree construction for debugging purposes. Makes `Context` implement `Display` for pretty-printing.
226 | 
227 | ## Grammar Syntax
228 | RustyLR's grammar syntax is inspired by traditional Yacc/Bison formats.
229 | See [SYNTAX.md](SYNTAX.md) for detailed grammar definition syntax.
230 | 
231 | ## Contributing
232 | Contributions are welcome! Please feel free to open an issue or submit a pull request.
233 | 
234 | ### Project Structure
235 | This project is organized as a Cargo workspace with the following crates:
236 | 
237 |  - **`rusty_lr/`**: The main end-user library that provides the public API. This is what users add to their `Cargo.toml`.
238 |  - **`rusty_lr_core/`**: Core parsing engine containing the fundamental data structures, algorithms, and runtime components for both deterministic (`src/parser/deterministic`) and non-deterministic (`src/parser/nondeterministic`) parsing.
239 |  - **`rusty_lr_parser/`**: The main code generation engine that parses RustyLR's grammar syntax, builds parser tables, and generates the actual parser code. This is the core of the parser generation process.
240 |  - **`rusty_lr_derive/`**: Procedural macro interface that wraps `rusty_lr_parser` to provide the `lr1!` macro for inline grammar definitions.
241 |  - **`rusty_lr_buildscript/`**: Build script interface that wraps `rusty_lr_parser` for generating parser code at compile time when using the `build` feature.
242 |  - **`rusty_lr_executable/`**: Standalone `rustylr` executable for command-line parser generation.
243 |  - **`scripts/`**: Development and testing scripts
244 | 
245 | The crates have the following dependency relationships:
246 | - `rusty_lr` depends on `rusty_lr_core`, `rusty_lr_derive`, and `rusty_lr_buildscript` (optional)
247 | - `rusty_lr_derive` and `rusty_lr_buildscript` depend on `rusty_lr_parser`
248 | - `rusty_lr_parser` depends on `rusty_lr_core`
249 | - `rusty_lr_executable` depends on `rusty_lr_buildscript`
250 | 
251 | ```mermaid
252 | graph TD;
253 |     subgraph User Facing
254 |         rusty_lr;
255 |         rusty_lr_executable;
256 |     end
257 | 
258 |     subgraph Internal
259 |         rusty_lr_derive;
260 |         rusty_lr_buildscript;
261 |         rusty_lr_parser;
262 |         rusty_lr_core;
263 |     end
264 | 
265 |     rusty_lr --> rusty_lr_core;
266 |     rusty_lr --> rusty_lr_derive;
267 |     rusty_lr --> rusty_lr_buildscript;
268 | 
269 |     rusty_lr_derive --> rusty_lr_parser;
270 |     rusty_lr_buildscript --> rusty_lr_parser;
271 |     
272 |     rusty_lr_executable --> rusty_lr_buildscript;
273 | 
274 |     rusty_lr_parser --> rusty_lr_core;
275 | ```
276 | 
277 | 
278 | ### About the Versioning
279 | RustyLR consists of two big parts:
280 |   - executable (`rustylr`), the code generator
281 |   - runtime (`rusty_lr`), the main library
282 | 
283 | Since the `cargo` automatically uses the latest patch in `major.minor.patch` version of a crate, we increase the patch number only if the generated code is compatible with the runtime. That is, for any user who is not using buildscript or proc-macro, and using the executable-generated code itself,
284 | any code change that could make compile errors with the previous generated code will result in a minor version bump.
285 | 
286 | ## License
287 | This project is dual-licensed under either of the following licenses, at your option:
288 | 
289 |  - MIT License ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
290 |  - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
291 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/error.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::Ident;
  2 | use proc_macro2::Literal;
  3 | use proc_macro2::Span;
  4 | use proc_macro2::TokenStream;
  5 | 
  6 | use quote::quote_spanned;
  7 | 
  8 | use crate::parser::args::IdentOrLiteral;
  9 | 
 10 | /// failed to feed() the token
 11 | #[non_exhaustive]
 12 | #[derive(Debug)]
 13 | pub enum ParseArgError {
 14 |     /// feed() failed
 15 |     MacroLineParse { span: Span, message: String },
 16 | }
 17 | 
 18 | #[non_exhaustive]
 19 | #[derive(Debug)]
 20 | pub enum ArgError {
 21 |     MultipleModulePrefixDefinition((Span, TokenStream), (Span, TokenStream)),
 22 |     MultipleUserDataDefinition((Span, TokenStream), (Span, TokenStream)),
 23 |     MultipleErrorDefinition((Span, TokenStream), (Span, TokenStream)),
 24 |     MultipleTokenTypeDefinition((Span, TokenStream), (Span, TokenStream)),
 25 |     MultipleEofDefinition((Span, TokenStream), (Span, TokenStream)),
 26 |     MultipleStartDefinition(Ident, Ident),
 27 | 
 28 |     StartNotDefined,
 29 |     EofNotDefined,
 30 |     TokenTypeNotDefined,
 31 | 
 32 |     /// multiple %prec in the same rule
 33 |     MultiplePrecDefinition(Span),
 34 |     /// multiple %dprec in the same rule
 35 |     MultipleDPrecDefinition(Span),
 36 | }
 37 | 
 38 | #[non_exhaustive]
 39 | #[derive(Debug)]
 40 | pub enum ConflictError {
 41 |     /// error building given CFG
 42 |     ShiftReduceConflict {
 43 |         term: String,
 44 |         reduce_rule: (usize, rusty_lr_core::rule::ProductionRule<String, String>),
 45 |         shift_rules: Vec<(usize, rusty_lr_core::rule::ShiftedRule<String, String>)>,
 46 |     },
 47 |     /// error building given CFG
 48 |     ReduceReduceConflict {
 49 |         lookahead: String,
 50 |         rule1: (usize, rusty_lr_core::rule::ProductionRule<String, String>),
 51 |         rule2: (usize, rusty_lr_core::rule::ProductionRule<String, String>),
 52 |     },
 53 | }
 54 | 
 55 | #[non_exhaustive]
 56 | #[derive(Debug)]
 57 | pub enum ParseError {
 58 |     MultipleRuleDefinition(Ident, Ident),
 59 | 
 60 |     /// different reduce type applied to the same terminal symbol
 61 |     MultipleReduceDefinition {
 62 |         terminal: String,
 63 |         old: (Span, rusty_lr_core::rule::ReduceType),
 64 |         new: (Span, rusty_lr_core::rule::ReduceType),
 65 |     },
 66 | 
 67 |     /// multiple %token definition
 68 |     MultipleTokenDefinition(Ident, Ident),
 69 | 
 70 |     /// same name for terminal and non-terminal exists
 71 |     TermNonTermConflict {
 72 |         name: Ident,
 73 |         terminal: Ident,
 74 |         non_terminal: Ident,
 75 |     },
 76 | 
 77 |     InvalidTerminalRange((Ident, usize, TokenStream), (Ident, usize, TokenStream)),
 78 | 
 79 |     /// name given to %start not defined
 80 |     StartNonTerminalNotDefined(Ident),
 81 | 
 82 |     /// unknown terminal symbol name
 83 |     TerminalNotDefined(Ident),
 84 | 
 85 |     /// can't use reserved keyword as token name
 86 |     ReservedName(Ident),
 87 | 
 88 |     /// not supported literal type
 89 |     UnsupportedLiteralType(TokenStream),
 90 | 
 91 |     /// range in literal terminal set is not valid
 92 |     InvalidLiteralRange(Literal, Literal),
 93 | 
 94 |     /// TokenType in Literal mode is not supported
 95 |     TokenInLiteralMode(Span),
 96 | 
 97 |     /// conflicts in precedence definition
 98 |     MultiplePrecedenceOrderDefinition {
 99 |         cur: IdentOrLiteral,
100 |         old: Span,
101 |     },
102 | 
103 |     /// Precedence not defined for the given token
104 |     PrecedenceNotDefined(IdentOrLiteral),
105 | 
106 |     /// All production rules in this non-terminal must have %prec defined
107 |     NonTerminalPrecedenceNotDefined(Span, usize),
108 | 
109 |     /// ReduceAction must be defined but not defined
110 |     RuleTypeDefinedButActionNotDefined {
111 |         name: Ident,
112 |         span: (Span, Span),
113 |     },
114 | 
115 |     /// Only terminal or terminal set is allowed
116 |     OnlyTerminalSet(Span, Span),
117 | 
118 |     /// unknown non-terminal symbol name
119 |     NonTerminalNotDefined(Ident),
120 | 
121 |     /// only 'usize' literal is allowed for %dprec
122 |     OnlyUsizeLiteral(Span),
123 | }
124 | #[allow(unused)]
125 | impl ArgError {
126 |     pub fn to_compile_error(&self) -> TokenStream {
127 |         let span = self.span();
128 |         let message = self.short_message();
129 |         quote_spanned! {
130 |             span=>
131 |             compile_error!(#message);
132 |         }
133 |     }
134 | 
135 |     pub fn span(&self) -> Span {
136 |         match self {
137 |             ArgError::MultipleModulePrefixDefinition(
138 |                 (span1, tokenstream1),
139 |                 (span2, tokenstream2),
140 |             ) => *span2,
141 |             ArgError::MultipleUserDataDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
142 |                 *span2
143 |             }
144 |             ArgError::MultipleErrorDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
145 |                 *span2
146 |             }
147 |             ArgError::MultipleTokenTypeDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
148 |                 *span2
149 |             }
150 |             ArgError::MultipleEofDefinition((span1, tokenstream1), (span2, tokenstream2)) => *span2,
151 |             ArgError::MultipleStartDefinition(old, new) => new.span(),
152 | 
153 |             ArgError::StartNotDefined => Span::call_site(),
154 |             ArgError::EofNotDefined => Span::call_site(),
155 |             ArgError::TokenTypeNotDefined => Span::call_site(),
156 | 
157 |             ArgError::MultiplePrecDefinition(span) => *span,
158 |             ArgError::MultipleDPrecDefinition(span) => *span,
159 |         }
160 |     }
161 | 
162 |     pub fn short_message(&self) -> String {
163 |         match self {
164 |             ArgError::MultipleModulePrefixDefinition(
165 |                 (span1, tokenstream1),
166 |                 (span2, tokenstream2),
167 |             ) => "Multiple %moduleprefix definition".into(),
168 |             ArgError::MultipleUserDataDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
169 |                 "Multiple %userdata definition".into()
170 |             }
171 |             ArgError::MultipleErrorDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
172 |                 "Multiple %error definition".into()
173 |             }
174 |             ArgError::MultipleTokenTypeDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
175 |                 "Multiple %tokentype definition".into()
176 |             }
177 |             ArgError::MultipleEofDefinition((span1, tokenstream1), (span2, tokenstream2)) => {
178 |                 "Multiple %eof definition".into()
179 |             }
180 |             ArgError::MultipleStartDefinition(old, new) => {
181 |                 format!("Multiple %start definition: {} and {}", old, new)
182 |             }
183 | 
184 |             ArgError::StartNotDefined => "Start rule not defined\n>>> %start <rule_name>;".into(),
185 |             ArgError::EofNotDefined => "Eof not defined\n>>> %eof <eof_token_value>;".into(),
186 |             ArgError::TokenTypeNotDefined => {
187 |                 "Token type not defined\n>>> %tokentype <token_type_name>;".into()
188 |             }
189 | 
190 |             ArgError::MultiplePrecDefinition(span) => "Multiple %prec definition".into(),
191 |             ArgError::MultipleDPrecDefinition(span) => "Multiple %dprec definition".into(),
192 |         }
193 |     }
194 | }
195 | #[allow(unused)]
196 | impl ParseArgError {
197 |     pub fn to_compile_error(&self) -> TokenStream {
198 |         let span = self.span();
199 |         let message = self.short_message();
200 |         quote_spanned! {
201 |             span=>
202 |             compile_error!(#message);
203 |         }
204 |     }
205 | 
206 |     pub fn span(&self) -> Span {
207 |         match self {
208 |             ParseArgError::MacroLineParse { span, message } => *span,
209 |         }
210 |     }
211 | 
212 |     pub fn short_message(&self) -> String {
213 |         match self {
214 |             ParseArgError::MacroLineParse { span, message } => message.clone(),
215 |         }
216 |     }
217 | }
218 | 
219 | #[allow(unused)]
220 | impl ParseError {
221 |     pub fn to_compile_error(&self) -> TokenStream {
222 |         let span = self.span();
223 |         let message = self.short_message();
224 |         quote_spanned! {
225 |             span=>
226 |             compile_error!(#message);
227 |         }
228 |     }
229 | 
230 |     pub fn span(&self) -> Span {
231 |         match self {
232 |             ParseError::MultipleRuleDefinition(old, new) => new.span(),
233 | 
234 |             ParseError::MultipleReduceDefinition { terminal, old, new } => new.0,
235 | 
236 |             ParseError::TermNonTermConflict {
237 |                 name,
238 |                 terminal,
239 |                 non_terminal,
240 |             } => name.span(),
241 | 
242 |             ParseError::InvalidTerminalRange((first, first_index, _), (last, last_index, _)) => {
243 |                 first.span()
244 |             }
245 | 
246 |             ParseError::StartNonTerminalNotDefined(ident) => ident.span(),
247 | 
248 |             ParseError::TerminalNotDefined(ident) => ident.span(),
249 | 
250 |             ParseError::MultipleTokenDefinition(old, new) => new.span(),
251 | 
252 |             ParseError::ReservedName(ident) => ident.span(),
253 | 
254 |             ParseError::UnsupportedLiteralType(stream) => {
255 |                 stream.clone().into_iter().next().unwrap().span()
256 |             }
257 | 
258 |             ParseError::InvalidLiteralRange(first, last) => first.span(),
259 | 
260 |             ParseError::TokenInLiteralMode(open_span) => *open_span,
261 | 
262 |             ParseError::MultiplePrecedenceOrderDefinition { cur, old } => cur.span(),
263 |             ParseError::PrecedenceNotDefined(name) => name.span(),
264 |             ParseError::NonTerminalPrecedenceNotDefined(span, _) => *span,
265 | 
266 |             ParseError::RuleTypeDefinedButActionNotDefined { name, span } => span.0,
267 |             ParseError::OnlyTerminalSet(span_begin, span_end) => *span_begin,
268 |             ParseError::NonTerminalNotDefined(ident) => ident.span(),
269 |             ParseError::OnlyUsizeLiteral(span) => *span,
270 |         }
271 |     }
272 | 
273 |     pub fn short_message(&self) -> String {
274 |         match self {
275 |             ParseError::MultipleRuleDefinition(old, new) => {
276 |                 format!("Multiple rule definition with same name: {}", old)
277 |             }
278 | 
279 |             ParseError::MultipleReduceDefinition { terminal, old, new } => {
280 |                 format!("Differnt reduce type (%left and %right) applied to the same terminal symbol: {}", terminal)
281 |             }
282 | 
283 |             ParseError::TermNonTermConflict {
284 |                 name,
285 |                 terminal,
286 |                 non_terminal,
287 |             } => {
288 |                 format!("Same name for terminal and non-terminal exists: {}", name)
289 |             }
290 | 
291 |             ParseError::InvalidTerminalRange((first, first_index, _), (last, last_index, _)) => {
292 |                 format!(
293 |                     "Invalid terminal range: [{}({}) - {}({})]",
294 |                     first, first_index, last, last_index
295 |                 )
296 |             }
297 | 
298 |             ParseError::StartNonTerminalNotDefined(ident) => {
299 |                 format!("Name given to %start not defined: {}", ident)
300 |             }
301 | 
302 |             ParseError::TerminalNotDefined(ident) => {
303 |                 format!("Unknown terminal symbol name: {}", ident)
304 |             }
305 | 
306 |             ParseError::MultipleTokenDefinition(old, new) => {
307 |                 format!("Multiple %token definition with same name: {}", old)
308 |             }
309 | 
310 |             ParseError::ReservedName(ident) => {
311 |                 format!("'{}' is reserved name", ident)
312 |             }
313 | 
314 |             ParseError::UnsupportedLiteralType(literal) => {
315 |                 format!("Not supported literal type: {}", literal)
316 |             }
317 | 
318 |             ParseError::InvalidLiteralRange(first, last) => {
319 |                 format!(
320 |                     "Range in literal terminal set is not valid: [{} - {}]",
321 |                     first, last
322 |                 )
323 |             }
324 | 
325 |             ParseError::TokenInLiteralMode(_) => {
326 |                 "%token with %tokentype `char` or `u8` is not supported. Use 'a' or b'a' instead"
327 |                     .to_string()
328 |             }
329 | 
330 |             ParseError::MultiplePrecedenceOrderDefinition { cur, old } => {
331 |                 format!("Conflicts with precedence definition: {}", cur)
332 |             }
333 |             ParseError::PrecedenceNotDefined(name) => {
334 |                 format!("Precedence not defined for the given token: {}", name)
335 |             }
336 |             ParseError::NonTerminalPrecedenceNotDefined(span, nonterm_idx) => {
337 |                 "All production rules in this non-terminal must have %prec defined".into()
338 |             }
339 | 
340 |             ParseError::RuleTypeDefinedButActionNotDefined { name, span } => {
341 |                 "ReduceAction must be defined for this rule".into()
342 |             }
343 |             ParseError::OnlyTerminalSet(_, _) => "Only terminal or terminal set is allowed".into(),
344 |             ParseError::NonTerminalNotDefined(ident) => {
345 |                 format!("Unknown non-terminal symbol name: {}", ident)
346 |             }
347 |             ParseError::OnlyUsizeLiteral(_) => "Only 'usize' literal is allowed for %dprec".into(),
348 |         }
349 |     }
350 | }
351 | 
352 | #[allow(unused)]
353 | impl ConflictError {
354 |     pub fn to_compile_error(&self) -> TokenStream {
355 |         let span = self.span();
356 |         let message = self.short_message();
357 |         quote_spanned! {
358 |             span=>
359 |             compile_error!(#message);
360 |         }
361 |     }
362 | 
363 |     pub fn span(&self) -> Span {
364 |         match self {
365 |             ConflictError::ShiftReduceConflict {
366 |                 term,
367 |                 reduce_rule: (ruleid, rule),
368 |                 shift_rules,
369 |             } => Span::call_site(),
370 |             ConflictError::ReduceReduceConflict {
371 |                 lookahead,
372 |                 rule1: (ruleid1, rule1),
373 |                 rule2: (ruleid2, rule2),
374 |             } => Span::call_site(),
375 |         }
376 |     }
377 | 
378 |     pub fn short_message(&self) -> String {
379 |         match self {
380 |             ConflictError::ShiftReduceConflict {
381 |                 term,
382 |                 reduce_rule: (ruleid, rule),
383 |                 shift_rules,
384 |             } => {
385 |                 format!(
386 |                     "Shift-Reduce conflict with terminal symbol: {}\n>>> Reduce: {}\n>>> Shifts: {}",
387 |                     term,
388 |                     rule,
389 |                     shift_rules
390 |                         .iter()
391 |                         .map(|(ruleid, rule)| format!("{}", rule))
392 |                         .collect::<Vec<_>>()
393 |                         .join("\n>>>")
394 |                 )
395 |             }
396 |             ConflictError::ReduceReduceConflict {
397 |                 lookahead,
398 |                 rule1: (ruleid1, rule1),
399 |                 rule2: (ruleid2, rule2),
400 |             } => {
401 |                 format!(
402 |                     "Reduce-Reduce conflict with lookahead symbol: {}\n>>> Rule1: {}\n>>> Rule2: {}",
403 |                     lookahead, rule1, rule2
404 |                 )
405 |             }
406 |         }
407 |     }
408 | }
409 | 


--------------------------------------------------------------------------------
/rusty_lr_parser/src/parser/lexer.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::Delimiter;
  2 | use proc_macro2::Group;
  3 | use proc_macro2::Ident;
  4 | use proc_macro2::Literal;
  5 | use proc_macro2::Punct;
  6 | use proc_macro2::TokenStream;
  7 | use proc_macro2::TokenTree;
  8 | 
  9 | use quote::TokenStreamExt;
 10 | 
 11 | use super::args::GrammarArgs;
 12 | use super::parser_expanded::GrammarContext;
 13 | use super::parser_expanded::GrammarParseError;
 14 | use super::parser_expanded::GrammarParser;
 15 | 
 16 | #[allow(dead_code)]
 17 | #[derive(Clone, Debug)]
 18 | pub enum Lexed {
 19 |     Ident(Ident),
 20 |     Colon(Punct),
 21 |     Semicolon(Punct),
 22 |     Pipe(Punct),
 23 |     Percent(Punct),
 24 |     Equal(Punct),
 25 |     Plus(Punct),
 26 |     Star(Punct),
 27 |     Question(Punct),
 28 |     Caret(Punct),
 29 |     Minus(Punct),
 30 |     Exclamation(Punct),
 31 |     Slash(Punct),
 32 |     Dot(Punct),
 33 |     Dollar(Punct),
 34 |     Comma(Punct),
 35 |     OtherPunct(Punct),
 36 | 
 37 |     Literal(Literal),
 38 | 
 39 |     ParenGroup(Group),
 40 |     BraceGroup(Group),
 41 |     BracketGroup(Group),
 42 |     NoneGroup(Group),
 43 |     LParen,
 44 |     RParen,
 45 |     LBrace,
 46 |     RBrace,
 47 |     LBracket,
 48 |     RBracket,
 49 | 
 50 |     Left(Ident),         // %left, %l, %reduce
 51 |     Right(Ident),        // %right, %r, %shift
 52 |     Token(Ident),        // %token
 53 |     Start(Ident),        // %start
 54 |     TokenType(Ident),    // %tokentype
 55 |     UserData(Ident),     // %userdata
 56 |     ErrorType(Ident),    // %err %error
 57 |     ModulePrefix(Ident), // %moduleprefix
 58 |     Lalr(Ident),         // %lalr
 59 |     Glr(Ident),          // %glr
 60 |     Prec(Ident),         // %prec
 61 |     Precedence(Ident),   // %precedence
 62 |     NoOptim(Ident),      // %nooptim
 63 |     Dense(Ident),        // %dense
 64 |     Trace(Ident),        // %trace
 65 |     DPrec(Ident),        // %dprec
 66 |     Filter(Ident),       // %filter
 67 |     Location(Ident),     // %location
 68 | }
 69 | impl Lexed {
 70 |     pub fn append_to_stream(self, stream: &mut TokenStream) {
 71 |         match self {
 72 |             Lexed::Ident(ident) => stream.append(ident),
 73 |             Lexed::Colon(punct) => stream.append(punct),
 74 |             Lexed::Semicolon(punct) => stream.append(punct),
 75 |             Lexed::Pipe(punct) => stream.append(punct),
 76 |             Lexed::Percent(punct) => stream.append(punct),
 77 |             Lexed::Equal(punct) => stream.append(punct),
 78 |             Lexed::Plus(punct) => stream.append(punct),
 79 |             Lexed::Star(punct) => stream.append(punct),
 80 |             Lexed::Question(punct) => stream.append(punct),
 81 |             Lexed::Caret(punct) => stream.append(punct),
 82 |             Lexed::Minus(punct) => stream.append(punct),
 83 |             Lexed::Exclamation(punct) => stream.append(punct),
 84 |             Lexed::Slash(punct) => stream.append(punct),
 85 |             Lexed::Dot(punct) => stream.append(punct),
 86 |             Lexed::Dollar(punct) => stream.append(punct),
 87 |             Lexed::Comma(punct) => stream.append(punct),
 88 |             Lexed::OtherPunct(punct) => stream.append(punct),
 89 | 
 90 |             Lexed::Literal(lit) => stream.append(lit),
 91 | 
 92 |             Lexed::ParenGroup(group) => stream.append(group),
 93 |             Lexed::BraceGroup(group) => stream.append(group),
 94 |             Lexed::BracketGroup(group) => stream.append(group),
 95 |             Lexed::NoneGroup(group) => stream.append(group),
 96 | 
 97 |             Lexed::LParen => unreachable!("LParen::stream()"),
 98 |             Lexed::RParen => unreachable!("RParen::stream()"),
 99 |             Lexed::LBrace => unreachable!("LBrace::stream()"),
100 |             Lexed::RBrace => unreachable!("RBrace::stream()"),
101 |             Lexed::LBracket => unreachable!("LBracket::stream()"),
102 |             Lexed::RBracket => unreachable!("RBracket::stream()"),
103 | 
104 |             Lexed::Left(ident) => {
105 |                 stream.append(ident);
106 |             }
107 |             Lexed::Right(ident) => {
108 |                 stream.append(ident);
109 |             }
110 |             Lexed::Token(ident) => {
111 |                 stream.append(ident);
112 |             }
113 |             Lexed::Start(ident) => {
114 |                 stream.append(ident);
115 |             }
116 |             Lexed::TokenType(ident) => {
117 |                 stream.append(ident);
118 |             }
119 |             Lexed::UserData(ident) => {
120 |                 stream.append(ident);
121 |             }
122 |             Lexed::ErrorType(ident) => {
123 |                 stream.append(ident);
124 |             }
125 |             Lexed::ModulePrefix(ident) => {
126 |                 stream.append(ident);
127 |             }
128 |             Lexed::Lalr(ident) => {
129 |                 stream.append(ident);
130 |             }
131 |             Lexed::Glr(ident) => {
132 |                 stream.append(ident);
133 |             }
134 |             Lexed::Prec(ident) => {
135 |                 stream.append(ident);
136 |             }
137 |             Lexed::Precedence(ident) => {
138 |                 stream.append(ident);
139 |             }
140 |             Lexed::NoOptim(ident) => {
141 |                 stream.append(ident);
142 |             }
143 |             Lexed::Dense(ident) => {
144 |                 stream.append(ident);
145 |             }
146 |             Lexed::Trace(ident) => {
147 |                 stream.append(ident);
148 |             }
149 |             Lexed::DPrec(ident) => {
150 |                 stream.append(ident);
151 |             }
152 |             Lexed::Filter(ident) => {
153 |                 stream.append(ident);
154 |             }
155 |             Lexed::Location(ident) => {
156 |                 stream.append(ident);
157 |             }
158 |         }
159 |     }
160 | }
161 | impl std::fmt::Display for Lexed {
162 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
163 |         match self {
164 |             Lexed::Ident(_) => write!(f, "<Ident>"),
165 |             Lexed::Colon(_) => write!(f, "':'"),
166 |             Lexed::Semicolon(_) => write!(f, "';'"),
167 |             Lexed::Pipe(_) => write!(f, "'|'"),
168 |             Lexed::Percent(_) => write!(f, "'%'"),
169 |             Lexed::Literal(_) => write!(f, "<Literal>"),
170 |             Lexed::Equal(_) => write!(f, "'='"),
171 |             Lexed::Plus(_) => write!(f, "'+'"),
172 |             Lexed::Star(_) => write!(f, "'*'"),
173 |             Lexed::Question(_) => write!(f, "'?'"),
174 |             Lexed::Caret(_) => write!(f, "'^'"),
175 |             Lexed::Minus(_) => write!(f, "'-'"),
176 |             Lexed::Exclamation(_) => write!(f, "'!'"),
177 |             Lexed::Slash(_) => write!(f, "'/'"),
178 |             Lexed::Dot(_) => write!(f, "'.'"),
179 |             Lexed::Dollar(_) => write!(f, "'$'"),
180 |             Lexed::Comma(_) => write!(f, "','"),
181 |             Lexed::OtherPunct(p) => write!(f, "'{}'", p.as_char()),
182 | 
183 |             Lexed::ParenGroup(_) => write!(f, "<ParenGroup>"),
184 |             Lexed::BraceGroup(_) => write!(f, "<BraceGroup>"),
185 |             Lexed::BracketGroup(_) => write!(f, "<BracketGroup>"),
186 |             Lexed::NoneGroup(_) => write!(f, "<NoneGroup>"),
187 |             Lexed::LParen => write!(f, "'('"),
188 |             Lexed::RParen => write!(f, "')'"),
189 |             Lexed::LBrace => write!(f, "'{{'"),
190 |             Lexed::RBrace => write!(f, "'}}'"),
191 |             Lexed::LBracket => write!(f, "'['"),
192 |             Lexed::RBracket => write!(f, "'['"),
193 | 
194 |             Lexed::Left(_) => write!(f, "left"),
195 |             Lexed::Right(_) => write!(f, "right"),
196 |             Lexed::Token(_) => write!(f, "token"),
197 |             Lexed::Start(_) => write!(f, "start"),
198 |             Lexed::TokenType(_) => write!(f, "tokentype"),
199 |             Lexed::UserData(_) => write!(f, "userdata"),
200 |             Lexed::ErrorType(_) => write!(f, "error"),
201 |             Lexed::ModulePrefix(_) => write!(f, "moduleprefix"),
202 |             Lexed::Lalr(_) => write!(f, "lalr"),
203 |             Lexed::Glr(_) => write!(f, "glr"),
204 |             Lexed::Prec(_) => write!(f, "prec"),
205 |             Lexed::Precedence(_) => write!(f, "precedence"),
206 |             Lexed::NoOptim(_) => write!(f, "nooptim"),
207 |             Lexed::Dense(_) => write!(f, "dense"),
208 |             Lexed::Trace(_) => write!(f, "trace"),
209 |             Lexed::DPrec(_) => write!(f, "dprec"),
210 |             Lexed::Filter(_) => write!(f, "filter"),
211 |             Lexed::Location(_) => write!(f, "location"),
212 |         }
213 |     }
214 | }
215 | 
216 | fn ident_to_keyword(ident: Ident) -> Option<Lexed> {
217 |     match ident.to_string().as_str() {
218 |         "left" | "l" | "reduce" => Some(Lexed::Left(ident)),
219 |         "right" | "r" | "shift" => Some(Lexed::Right(ident)),
220 |         "token" => Some(Lexed::Token(ident)),
221 |         "start" => Some(Lexed::Start(ident)),
222 |         "tokentype" => Some(Lexed::TokenType(ident)),
223 |         "userdata" => Some(Lexed::UserData(ident)),
224 |         "err" | "error" => Some(Lexed::ErrorType(ident)),
225 |         "moduleprefix" => Some(Lexed::ModulePrefix(ident)),
226 |         "lalr" => Some(Lexed::Lalr(ident)),
227 |         "glr" => Some(Lexed::Glr(ident)),
228 |         "prec" => Some(Lexed::Prec(ident)),
229 |         "precedence" => Some(Lexed::Precedence(ident)),
230 |         "nooptim" => Some(Lexed::NoOptim(ident)),
231 |         "dense" => Some(Lexed::Dense(ident)),
232 |         "trace" => Some(Lexed::Trace(ident)),
233 |         "dprec" => Some(Lexed::DPrec(ident)),
234 |         "filter" => Some(Lexed::Filter(ident)),
235 |         "location" => Some(Lexed::Location(ident)),
236 |         _ => None,
237 |     }
238 | }
239 | 
240 | /// lex & feed stream to parser
241 | /// For '%' directives and 'Group' variants,
242 | /// First tries to feed the Compound token
243 | /// if it failed, then feed the internal splitted tokens recursively
244 | pub fn feed_recursive(
245 |     input: TokenStream,
246 |     parser: &GrammarParser,
247 |     context: &mut GrammarContext,
248 |     grammar_args: &mut GrammarArgs,
249 | ) -> Result<(), GrammarParseError> {
250 |     use super::span_pair::SpanPair;
251 |     let mut input = input.into_iter().peekable();
252 | 
253 |     while let Some(next) = input.next() {
254 |         let location = SpanPair::new_single(next.span());
255 |         match next {
256 |             TokenTree::Ident(ident) => {
257 |                 if let Some(keyword) = ident_to_keyword(ident.clone()) {
258 |                     if context.can_feed(parser, &keyword) {
259 |                         context.feed_location(parser, keyword, grammar_args, location)?;
260 |                     } else {
261 |                         context.feed_location(
262 |                             parser,
263 |                             Lexed::Ident(ident),
264 |                             grammar_args,
265 |                             location,
266 |                         )?;
267 |                     }
268 |                 } else {
269 |                     context.feed_location(parser, Lexed::Ident(ident), grammar_args, location)?;
270 |                 }
271 |             }
272 |             TokenTree::Punct(punct) => match punct.as_char() {
273 |                 ':' => {
274 |                     context.feed_location(parser, Lexed::Colon(punct), grammar_args, location)?
275 |                 }
276 |                 ';' => context.feed_location(
277 |                     parser,
278 |                     Lexed::Semicolon(punct),
279 |                     grammar_args,
280 |                     location,
281 |                 )?,
282 |                 '|' => context.feed_location(parser, Lexed::Pipe(punct), grammar_args, location)?,
283 |                 '+' => context.feed_location(parser, Lexed::Plus(punct), grammar_args, location)?,
284 |                 '*' => context.feed_location(parser, Lexed::Star(punct), grammar_args, location)?,
285 |                 '?' => {
286 |                     context.feed_location(parser, Lexed::Question(punct), grammar_args, location)?
287 |                 }
288 |                 '^' => {
289 |                     context.feed_location(parser, Lexed::Caret(punct), grammar_args, location)?
290 |                 }
291 |                 '-' => {
292 |                     context.feed_location(parser, Lexed::Minus(punct), grammar_args, location)?
293 |                 }
294 |                 '=' => {
295 |                     context.feed_location(parser, Lexed::Equal(punct), grammar_args, location)?
296 |                 }
297 |                 '!' => context.feed_location(
298 |                     parser,
299 |                     Lexed::Exclamation(punct),
300 |                     grammar_args,
301 |                     location,
302 |                 )?,
303 |                 '/' => {
304 |                     context.feed_location(parser, Lexed::Slash(punct), grammar_args, location)?
305 |                 }
306 |                 '.' => context.feed_location(parser, Lexed::Dot(punct), grammar_args, location)?,
307 |                 '%' => {
308 |                     context.feed_location(parser, Lexed::Percent(punct), grammar_args, location)?
309 |                 }
310 |                 '$' => {
311 |                     context.feed_location(parser, Lexed::Dollar(punct), grammar_args, location)?
312 |                 }
313 |                 ',' => {
314 |                     context.feed_location(parser, Lexed::Comma(punct), grammar_args, location)?
315 |                 }
316 |                 _ => context.feed_location(
317 |                     parser,
318 |                     Lexed::OtherPunct(punct),
319 |                     grammar_args,
320 |                     location,
321 |                 )?,
322 |             },
323 |             TokenTree::Group(group) => match group.delimiter() {
324 |                 Delimiter::Parenthesis => {
325 |                     let token = Lexed::ParenGroup(group);
326 |                     if context.can_feed(parser, &token) {
327 |                         context.feed_location(parser, token, grammar_args, location)?;
328 |                     } else {
329 |                         let Lexed::ParenGroup(group) = token else {
330 |                             unreachable!();
331 |                         };
332 |                         // feed the splitted tokens
333 |                         context.feed_location(
334 |                             parser,
335 |                             Lexed::LParen,
336 |                             grammar_args,
337 |                             SpanPair::new_single(group.span_open()),
338 |                         )?;
339 |                         feed_recursive(group.stream(), parser, context, grammar_args)?;
340 |                         context.feed_location(
341 |                             parser,
342 |                             Lexed::RParen,
343 |                             grammar_args,
344 |                             SpanPair::new_single(group.span_close()),
345 |                         )?;
346 |                     }
347 |                 }
348 |                 Delimiter::Brace => {
349 |                     // for now, splitted for brace is not in syntax, so ignore it
350 |                     context.feed_location(
351 |                         parser,
352 |                         Lexed::BraceGroup(group),
353 |                         grammar_args,
354 |                         location,
355 |                     )?;
356 |                 }
357 |                 Delimiter::Bracket => {
358 |                     let token = Lexed::BracketGroup(group);
359 |                     if context.can_feed(parser, &token) {
360 |                         context.feed_location(parser, token, grammar_args, location)?;
361 |                     } else {
362 |                         let Lexed::BracketGroup(group) = token else {
363 |                             unreachable!();
364 |                         };
365 |                         // feed the splitted tokens
366 |                         context.feed_location(
367 |                             parser,
368 |                             Lexed::LBracket,
369 |                             grammar_args,
370 |                             SpanPair::new_single(group.span_open()),
371 |                         )?;
372 |                         feed_recursive(group.stream(), parser, context, grammar_args)?;
373 |                         context.feed_location(
374 |                             parser,
375 |                             Lexed::RBracket,
376 |                             grammar_args,
377 |                             SpanPair::new_single(group.span_close()),
378 |                         )?;
379 |                     }
380 |                 }
381 |                 _ => {
382 |                     // for now, compound for nonegroup is not in syntax, so ignore it
383 |                     context.feed_location(
384 |                         parser,
385 |                         Lexed::NoneGroup(group),
386 |                         grammar_args,
387 |                         location,
388 |                     )?;
389 |                 }
390 |             },
391 |             TokenTree::Literal(literal) => {
392 |                 context.feed_location(parser, Lexed::Literal(literal), grammar_args, location)?
393 |             }
394 |         };
395 |     }
396 |     Ok(())
397 | }
398 | 


--------------------------------------------------------------------------------
/rusty_lr_core/src/parser/state.rs:
--------------------------------------------------------------------------------
  1 | use std::hash::Hash;
  2 | 
  3 | use crate::hash::HashMap;
  4 | use crate::parser::nonterminal::NonTerminal;
  5 | use crate::parser::terminalclass::TerminalClass;
  6 | use crate::TriState;
  7 | 
  8 | #[derive(Debug, Clone, Copy)]
  9 | pub struct ShiftTarget<StateIndex> {
 10 |     pub state: StateIndex,
 11 |     /// true if the data should be pushed, false if data should not be pushed (so `Empty` tag will be pushed)
 12 |     pub push: bool,
 13 | }
 14 | impl<StateIndex> ShiftTarget<StateIndex> {
 15 |     pub fn new(state: StateIndex, push: bool) -> Self {
 16 |         ShiftTarget { state, push }
 17 |     }
 18 | }
 19 | 
 20 | /// This intermediate state is a common structure to convert from generated code and grammar builder
 21 | /// into various types of parser states (SparseState, DenseState, ...).
 22 | pub struct IntermediateState<TermClass, NonTerm, StateIndex, RuleIndex> {
 23 |     pub shift_goto_map_term: Vec<(TermClass, ShiftTarget<StateIndex>)>, // must be sorted
 24 |     pub shift_goto_map_nonterm: Vec<(NonTerm, ShiftTarget<StateIndex>)>, // must be sorted
 25 |     pub reduce_map: Vec<(TermClass, Vec<RuleIndex>)>,                   // must be sorted
 26 |     pub ruleset: Vec<crate::rule::ShiftedRuleRef>,
 27 |     pub can_accept_error: TriState,
 28 | }
 29 | 
 30 | /// For state, terminal and class indices, we use the most compact integer type that can hold the maximum value.
 31 | /// This trait defines the conversion between {u8, u16, u32, usize} <-> usize.
 32 | pub trait Index: Copy {
 33 |     fn into_usize(self) -> usize;
 34 |     fn from_usize_unchecked(value: usize) -> Self;
 35 | }
 36 | impl Index for usize {
 37 |     fn into_usize(self) -> usize {
 38 |         self
 39 |     }
 40 |     fn from_usize_unchecked(value: usize) -> Self {
 41 |         value
 42 |     }
 43 | }
 44 | impl Index for u8 {
 45 |     fn into_usize(self) -> usize {
 46 |         self as usize
 47 |     }
 48 |     fn from_usize_unchecked(value: usize) -> Self {
 49 |         value as u8
 50 |     }
 51 | }
 52 | impl Index for u16 {
 53 |     fn into_usize(self) -> usize {
 54 |         self as usize
 55 |     }
 56 |     fn from_usize_unchecked(value: usize) -> Self {
 57 |         value as u16
 58 |     }
 59 | }
 60 | impl Index for u32 {
 61 |     fn into_usize(self) -> usize {
 62 |         self as usize
 63 |     }
 64 |     fn from_usize_unchecked(value: usize) -> Self {
 65 |         value as u32
 66 |     }
 67 | }
 68 | 
 69 | /// Since non-deterministic parsers can have multiple reduce rules for a single terminal,
 70 | /// we need to handle the set of reduce rules efficiently, usually 2~3 items.
 71 | /// this trait implements the stack-allocated vector for this purpose.
 72 | pub trait ReduceRules {
 73 |     const CAP: usize;
 74 |     type RuleIndex: Index;
 75 | 
 76 |     fn to_iter(&self) -> impl Iterator<Item = Self::RuleIndex> + Clone;
 77 |     fn from_set<RuleIndexFrom: TryInto<Self::RuleIndex>>(set: Vec<RuleIndexFrom>) -> Self;
 78 | }
 79 | 
 80 | /// For deterministic parser behavior
 81 | impl<Integral: Index + Copy> ReduceRules for Integral {
 82 |     const CAP: usize = 1;
 83 |     type RuleIndex = Integral;
 84 | 
 85 |     fn to_iter(&self) -> impl Iterator<Item = Self::RuleIndex> + Clone {
 86 |         std::iter::once(*self)
 87 |     }
 88 |     fn from_set<RuleIndexFrom: TryInto<Self::RuleIndex>>(set: Vec<RuleIndexFrom>) -> Self {
 89 |         debug_assert!(set.len() == 1, "Expected a single element set");
 90 |         set.into_iter().next().unwrap().try_into().ok().unwrap()
 91 |     }
 92 | }
 93 | 
 94 | pub use arrayvec::ArrayVec;
 95 | impl<T: Index, const CAP: usize> ReduceRules for ArrayVec<T, CAP> {
 96 |     const CAP: usize = CAP;
 97 |     type RuleIndex = T;
 98 | 
 99 |     fn to_iter(&self) -> impl Iterator<Item = Self::RuleIndex> + Clone {
100 |         self.iter().copied()
101 |     }
102 |     fn from_set<RuleIndexFrom: TryInto<Self::RuleIndex>>(set: Vec<RuleIndexFrom>) -> Self {
103 |         set.into_iter()
104 |             .map(|value| value.try_into().ok().unwrap())
105 |             .collect()
106 |     }
107 | }
108 | 
109 | /// A trait representing a parser state.
110 | pub trait State {
111 |     type TermClass: TerminalClass;
112 |     type NonTerm: NonTerminal;
113 |     type ReduceRules: ReduceRules;
114 |     type StateIndex: Index;
115 | 
116 |     /// Get the next state for a given terminal symbol.
117 |     fn shift_goto_class(&self, class: Self::TermClass) -> Option<ShiftTarget<Self::StateIndex>>;
118 | 
119 |     /// Get the next state for a given non-terminal symbol.
120 |     fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option<ShiftTarget<Self::StateIndex>>;
121 |     /// Get the reduce rule index for a given terminal symbol.
122 |     fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules>;
123 | 
124 |     /// Check if this state is an accept state.
125 |     fn is_accept(&self) -> bool;
126 | 
127 |     /// Get the set of expected terminal classes for shift in this state
128 |     fn expected_shift_term(&self) -> impl Iterator<Item = Self::TermClass> + '_;
129 | 
130 |     /// Get the set of expected non-terminal symbols for shift in this state
131 |     fn expected_shift_nonterm(&self) -> impl Iterator<Item = Self::NonTerm> + '_;
132 | 
133 |     /// Get the set of production rule for reduce in this state
134 |     fn expected_reduce_rule(&self) -> impl Iterator<Item = impl Index> + '_;
135 | 
136 |     /// Get the set of rules that this state is trying to parse
137 |     fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef];
138 | 
139 |     fn can_accept_error(&self) -> TriState;
140 | }
141 | 
142 | /// `State` implementation for a sparse state representation using HashMap
143 | #[derive(Debug, Clone)]
144 | pub struct SparseState<TermClass, NonTerm, RuleContainer, StateIndex> {
145 |     /// terminal symbol -> next state
146 |     pub(crate) shift_goto_map_class: HashMap<TermClass, ShiftTarget<StateIndex>>,
147 | 
148 |     /// non-terminal symbol -> next state
149 |     pub(crate) shift_goto_map_nonterm: HashMap<NonTerm, ShiftTarget<StateIndex>>,
150 | 
151 |     /// terminal symbol -> reduce rule index
152 |     pub(crate) reduce_map: HashMap<TermClass, RuleContainer>,
153 | 
154 |     /// set of rules that this state is trying to parse
155 |     pub(crate) ruleset: Vec<crate::rule::ShiftedRuleRef>,
156 | 
157 |     pub(crate) can_accept_error: TriState,
158 | }
159 | 
160 | impl<
161 |         TermClass: TerminalClass + Hash + Eq,
162 |         NonTerm: NonTerminal + Hash + Eq,
163 |         RuleContainer: ReduceRules,
164 |         StateIndex: Index,
165 |     > State for SparseState<TermClass, NonTerm, RuleContainer, StateIndex>
166 | {
167 |     type TermClass = TermClass;
168 |     type NonTerm = NonTerm;
169 |     type ReduceRules = RuleContainer;
170 |     type StateIndex = StateIndex;
171 | 
172 |     fn shift_goto_class(&self, class: Self::TermClass) -> Option<ShiftTarget<Self::StateIndex>> {
173 |         self.shift_goto_map_class.get(&class).copied()
174 |     }
175 |     fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option<ShiftTarget<Self::StateIndex>> {
176 |         self.shift_goto_map_nonterm.get(&nonterm).copied()
177 |     }
178 |     fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules> {
179 |         self.reduce_map.get(&class)
180 |     }
181 |     fn is_accept(&self) -> bool {
182 |         self.reduce_map.is_empty()
183 |             && self.shift_goto_map_class.is_empty()
184 |             && self.shift_goto_map_nonterm.is_empty()
185 |     }
186 |     fn expected_shift_term(&self) -> impl Iterator<Item = Self::TermClass> + '_ {
187 |         self.shift_goto_map_class.keys().copied()
188 |     }
189 |     fn expected_shift_nonterm(&self) -> impl Iterator<Item = Self::NonTerm> + '_ {
190 |         self.shift_goto_map_nonterm.keys().copied()
191 |     }
192 |     fn expected_reduce_rule(&self) -> impl Iterator<Item = impl Index> + '_ {
193 |         self.reduce_map.values().flat_map(RuleContainer::to_iter)
194 |     }
195 |     fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef] {
196 |         &self.ruleset
197 |     }
198 |     fn can_accept_error(&self) -> TriState {
199 |         self.can_accept_error
200 |     }
201 | }
202 | 
203 | /// `State` implementation for a dense state representation using Vec
204 | #[derive(Debug, Clone)]
205 | pub struct DenseState<TermClass, NonTerm, RuleContainer, StateIndex> {
206 |     /// terminal symbol -> next state
207 |     pub(crate) shift_goto_map_class: Vec<Option<ShiftTarget<StateIndex>>>,
208 |     /// shift_goto_map_class[i] will contain i+offset 'th class's next state.
209 |     pub(crate) shift_class_offset: usize,
210 |     /// set of terminal classes that is keys of `shift_goto_map_class`
211 |     pub(crate) shift_goto_map_class_keys: Vec<TermClass>,
212 | 
213 |     /// non-terminal symbol -> next state
214 |     pub(crate) shift_goto_map_nonterm: Vec<Option<ShiftTarget<StateIndex>>>,
215 |     pub(crate) shift_nonterm_offset: usize,
216 |     /// set of non-terminal symbols that is keys of `shift_goto_map_nonterm`
217 |     pub(crate) shift_goto_map_nonterm_keys: Vec<NonTerm>,
218 | 
219 |     /// terminal symbol -> reduce rule index
220 |     pub(crate) reduce_map: Vec<Option<RuleContainer>>,
221 |     /// reduce_map[i] will contain i+offset 'th class's reduce rule.
222 |     pub(crate) reduce_offset: usize,
223 | 
224 |     /// set of rules that this state is trying to parse
225 |     pub(crate) ruleset: Vec<crate::rule::ShiftedRuleRef>,
226 | 
227 |     pub(crate) can_accept_error: TriState,
228 | }
229 | impl<
230 |         TermClass: TerminalClass,
231 |         NonTerm: NonTerminal,
232 |         RuleContainer: ReduceRules,
233 |         StateIndex: Index,
234 |     > State for DenseState<TermClass, NonTerm, RuleContainer, StateIndex>
235 | {
236 |     type TermClass = TermClass;
237 |     type NonTerm = NonTerm;
238 |     type ReduceRules = RuleContainer;
239 |     type StateIndex = StateIndex;
240 | 
241 |     fn shift_goto_class(&self, class: Self::TermClass) -> Option<ShiftTarget<Self::StateIndex>> {
242 |         self.shift_goto_map_class
243 |             .get(class.to_usize().wrapping_sub(self.shift_class_offset))
244 |             .copied()
245 |             .flatten()
246 |     }
247 |     fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option<ShiftTarget<Self::StateIndex>> {
248 |         self.shift_goto_map_nonterm
249 |             .get(nonterm.to_usize().wrapping_sub(self.shift_nonterm_offset))
250 |             .copied()
251 |             .flatten()
252 |     }
253 |     fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules> {
254 |         self.reduce_map
255 |             .get(class.to_usize().wrapping_sub(self.reduce_offset))
256 |             .and_then(|r| r.as_ref())
257 |     }
258 |     fn is_accept(&self) -> bool {
259 |         self.reduce_map.is_empty()
260 |             && self.shift_goto_map_class.is_empty()
261 |             && self.shift_goto_map_nonterm.is_empty()
262 |     }
263 |     fn expected_shift_term(&self) -> impl Iterator<Item = Self::TermClass> + '_ {
264 |         self.shift_goto_map_class_keys.iter().copied()
265 |     }
266 |     fn expected_shift_nonterm(&self) -> impl Iterator<Item = NonTerm> + '_ {
267 |         self.shift_goto_map_nonterm_keys.iter().copied()
268 |     }
269 |     fn expected_reduce_rule(&self) -> impl Iterator<Item = impl Index> + '_ {
270 |         self.reduce_map
271 |             .iter()
272 |             .filter_map(|r| r.as_ref())
273 |             .flat_map(RuleContainer::to_iter)
274 |     }
275 | 
276 |     fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef] {
277 |         &self.ruleset
278 |     }
279 | 
280 |     fn can_accept_error(&self) -> TriState {
281 |         self.can_accept_error
282 |     }
283 | }
284 | 
285 | impl<TermClass: TerminalClass, NonTerm: NonTerminal, RuleContainer, StateIndex, RuleIndex>
286 |     From<IntermediateState<TermClass, NonTerm, StateIndex, RuleIndex>>
287 |     for SparseState<TermClass, NonTerm, RuleContainer, StateIndex>
288 | where
289 |     TermClass: Ord + Hash,
290 |     NonTerm: Hash + Eq,
291 |     RuleContainer: ReduceRules,
292 |     RuleContainer::RuleIndex: TryFrom<RuleIndex>,
293 | {
294 |     fn from(builder_state: IntermediateState<TermClass, NonTerm, StateIndex, RuleIndex>) -> Self {
295 |         // TerminalSymbol::Term(_) < TerminalSymbol::Error < TerminalSymbol::Eof
296 |         // since maps are sorted, eof and error should be at the end of the array
297 | 
298 |         // make sure the order is preserved
299 |         #[cfg(debug_assertions)]
300 |         {
301 |             let keys = builder_state
302 |                 .shift_goto_map_term
303 |                 .iter()
304 |                 .map(|(term, _)| term)
305 |                 .collect::<Vec<_>>();
306 |             debug_assert!(keys.is_sorted());
307 | 
308 |             let keys = builder_state
309 |                 .reduce_map
310 |                 .iter()
311 |                 .map(|(term, _)| term)
312 |                 .collect::<Vec<_>>();
313 |             debug_assert!(keys.is_sorted());
314 |         }
315 |         SparseState {
316 |             shift_goto_map_class: builder_state.shift_goto_map_term.into_iter().collect(),
317 |             shift_goto_map_nonterm: builder_state.shift_goto_map_nonterm.into_iter().collect(),
318 |             reduce_map: builder_state
319 |                 .reduce_map
320 |                 .into_iter()
321 |                 .map(|(term, rule)| {
322 |                     (
323 |                         term.try_into().expect("term conversion failed"),
324 |                         RuleContainer::from_set(rule),
325 |                     )
326 |                 })
327 |                 .collect(),
328 |             ruleset: builder_state.ruleset.into_iter().collect(),
329 |             can_accept_error: builder_state.can_accept_error,
330 |         }
331 |     }
332 | }
333 | impl<TermClass: TerminalClass, NonTerm: NonTerminal, RuleContainer, StateIndex, RuleIndex>
334 |     From<IntermediateState<TermClass, NonTerm, StateIndex, RuleIndex>>
335 |     for DenseState<TermClass, NonTerm, RuleContainer, StateIndex>
336 | where
337 |     TermClass: Ord + Copy,
338 |     NonTerm: Hash + Eq + Copy + NonTerminal,
339 |     StateIndex: Copy,
340 |     RuleContainer: Clone + ReduceRules,
341 |     RuleContainer::RuleIndex: TryFrom<RuleIndex>,
342 | {
343 |     fn from(builder_state: IntermediateState<TermClass, NonTerm, StateIndex, RuleIndex>) -> Self {
344 |         // TerminalSymbol::Term(_) < TerminalSymbol::Error < TerminalSymbol::Eof
345 |         // since maps are sorted, eof and error should be at the end of the array
346 | 
347 |         // make sure the order is preserved
348 |         #[cfg(debug_assertions)]
349 |         {
350 |             let keys = builder_state
351 |                 .shift_goto_map_term
352 |                 .iter()
353 |                 .map(|(term, _)| term)
354 |                 .collect::<Vec<_>>();
355 |             debug_assert!(keys.is_sorted());
356 | 
357 |             let keys = builder_state
358 |                 .reduce_map
359 |                 .iter()
360 |                 .map(|(term, _)| term)
361 |                 .collect::<Vec<_>>();
362 |             debug_assert!(keys.is_sorted());
363 |         }
364 | 
365 |         let (shift_min, shift_len) = {
366 |             let mut iter = builder_state
367 |                 .shift_goto_map_term
368 |                 .iter()
369 |                 .map(|(term, _)| term);
370 |             let min: Option<usize> = iter.next().map(|x| x.to_usize());
371 |             let max: Option<usize> = iter.next_back().map(|x| x.to_usize()).or(min);
372 | 
373 |             if let (Some(min), Some(max)) = (min, max) {
374 |                 (min, max - min + 1)
375 |             } else {
376 |                 (0, 0)
377 |             }
378 |         };
379 |         let (reduce_min, reduce_len) = {
380 |             let mut iter = builder_state.reduce_map.iter().map(|(term, _)| term);
381 |             let min: Option<usize> = iter.next().map(|x| x.to_usize());
382 |             let max: Option<usize> = iter.next_back().map(|x| x.to_usize()).or(min);
383 |             if let (Some(min), Some(max)) = (min, max) {
384 |                 (min, max - min + 1)
385 |             } else {
386 |                 (0, 0)
387 |             }
388 |         };
389 |         let (nonterm_min, nonterm_len) = {
390 |             let mut iter = builder_state
391 |                 .shift_goto_map_nonterm
392 |                 .iter()
393 |                 .map(|(nonterm, _)| nonterm);
394 |             let min = iter.next().map(|x| x.to_usize());
395 |             let max = iter.next_back().map(|x| x.to_usize()).or(min);
396 |             if let (Some(min), Some(max)) = (min, max) {
397 |                 (min, max - min + 1)
398 |             } else {
399 |                 (0, 0)
400 |             }
401 |         };
402 | 
403 |         let shift_term_keys = builder_state
404 |             .shift_goto_map_term
405 |             .iter()
406 |             .map(|(term, _)| *term)
407 |             .collect();
408 |         let mut shift_goto_map_class = vec![None; shift_len];
409 |         for (term, state) in builder_state.shift_goto_map_term {
410 |             shift_goto_map_class[term.to_usize() - shift_min] = Some(state);
411 |         }
412 | 
413 |         let mut reduce_map = vec![None; reduce_len];
414 |         for (term, rule) in builder_state.reduce_map {
415 |             reduce_map[term.to_usize() - reduce_min] = Some(RuleContainer::from_set(rule));
416 |         }
417 | 
418 |         let nonterm_keys = builder_state
419 |             .shift_goto_map_nonterm
420 |             .iter()
421 |             .map(|(nonterm, _)| *nonterm)
422 |             .collect();
423 |         let mut shift_goto_map_nonterm = vec![None; nonterm_len];
424 |         for (nonterm, state) in builder_state.shift_goto_map_nonterm {
425 |             shift_goto_map_nonterm[nonterm.to_usize() - nonterm_min] = Some(state);
426 |         }
427 | 
428 |         DenseState {
429 |             shift_goto_map_class,
430 |             shift_class_offset: shift_min,
431 |             shift_goto_map_class_keys: shift_term_keys,
432 |             shift_goto_map_nonterm,
433 |             shift_goto_map_nonterm_keys: nonterm_keys,
434 |             shift_nonterm_offset: nonterm_min,
435 |             reduce_map,
436 |             reduce_offset: reduce_min,
437 |             ruleset: builder_state.ruleset.into_iter().collect(),
438 |             can_accept_error: builder_state.can_accept_error,
439 |         }
440 |     }
441 | }
442 | 


--------------------------------------------------------------------------------