├── rustfmt.toml ├── images ├── error1.png ├── error2.png ├── title.png ├── tree.png ├── example1.png ├── example2.png ├── optimize.png ├── parse_error.png └── state_option.png ├── .github ├── FUNDING.yml ├── pull_request_template.md └── workflows │ └── bootstrap-test.yml ├── example ├── json │ ├── src │ │ ├── parser_expanded.rs │ │ ├── parser.rs │ │ └── main.rs │ ├── build.rs │ └── Cargo.toml ├── calculator │ ├── src │ │ ├── parser_expanded.rs │ │ ├── main.rs │ │ └── parser.rs │ ├── Cargo.toml │ └── build.rs ├── calculator_u8 │ ├── src │ │ ├── parser_expanded.rs │ │ ├── parser.rs │ │ └── main.rs │ ├── build.rs │ └── Cargo.toml └── glr │ ├── Cargo.toml │ └── src │ ├── parser.rs │ └── main.rs ├── rusty_lr_parser ├── src │ ├── parser │ │ ├── mod.rs │ │ ├── span_pair.rs │ │ └── lexer.rs │ ├── lib.rs │ ├── partition.rs │ ├── token.rs │ ├── utils.rs │ ├── terminal_info.rs │ ├── nonterminal_info.rs │ ├── rangeresolver.rs │ ├── terminalset.rs │ └── error.rs └── Cargo.toml ├── rusty_lr_core ├── src │ ├── parser │ │ ├── deterministic │ │ │ ├── mod.rs │ │ │ └── error.rs │ │ ├── nondeterministic │ │ │ ├── mod.rs │ │ │ ├── error.rs │ │ │ └── node.rs │ │ ├── terminalclass.rs │ │ ├── nonterminal.rs │ │ ├── mod.rs │ │ ├── data_stack.rs │ │ └── state.rs │ ├── builder │ │ ├── error.rs │ │ ├── mod.rs │ │ ├── state.rs │ │ └── diags.rs │ ├── hash.rs │ ├── backtrace.rs │ ├── location.rs │ ├── lib.rs │ ├── token.rs │ ├── rule.rs │ └── tree.rs └── Cargo.toml ├── Cargo.toml ├── rusty_lr_buildscript ├── src │ ├── output.rs │ ├── utils.rs │ └── split.rs └── Cargo.toml ├── .gitignore ├── rusty_lr_derive ├── Cargo.toml └── src │ └── lib.rs ├── rusty_lr_executable ├── Cargo.toml ├── src │ ├── arg.rs │ └── main.rs └── README.md ├── rusty_lr ├── Cargo.toml └── src │ └── lib.rs ├── LICENSE-MIT ├── scripts └── bootstrap_test.sh ├── GLR.md ├── LICENSE-APACHE └── README.md /rustfmt.toml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/error1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/error1.png -------------------------------------------------------------------------------- /images/error2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/error2.png -------------------------------------------------------------------------------- /images/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/title.png -------------------------------------------------------------------------------- /images/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/tree.png -------------------------------------------------------------------------------- /images/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/example1.png -------------------------------------------------------------------------------- /images/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/example2.png -------------------------------------------------------------------------------- /images/optimize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/optimize.png -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: ehwan 4 | -------------------------------------------------------------------------------- /example/json/src/parser_expanded.rs: -------------------------------------------------------------------------------- 1 | include!(concat!(env!("OUT_DIR"), "/parser.rs")); 2 | -------------------------------------------------------------------------------- /images/parse_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/parse_error.png -------------------------------------------------------------------------------- /example/calculator/src/parser_expanded.rs: -------------------------------------------------------------------------------- 1 | include!(concat!(env!("OUT_DIR"), "/parser.rs")); 2 | -------------------------------------------------------------------------------- /example/calculator_u8/src/parser_expanded.rs: -------------------------------------------------------------------------------- 1 | include!(concat!(env!("OUT_DIR"), "/parser.rs")); 2 | -------------------------------------------------------------------------------- /images/state_option.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ehwan/RustyLR/HEAD/images/state_option.png -------------------------------------------------------------------------------- /rusty_lr_parser/src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod args; 2 | pub mod lexer; 3 | pub mod parser_expanded; 4 | pub mod span_pair; 5 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/deterministic/mod.rs: -------------------------------------------------------------------------------- 1 | mod context; 2 | mod error; 3 | 4 | pub use context::Context; 5 | pub use error::ParseError; 6 | -------------------------------------------------------------------------------- /example/glr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "glr" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] } 8 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/nondeterministic/mod.rs: -------------------------------------------------------------------------------- 1 | mod context; 2 | mod error; 3 | mod node; 4 | 5 | pub use context::Context; 6 | pub use error::ParseError; 7 | pub use node::Node; 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Please run `scripts/bootstrap_test.sh` and push all of the changes it make before making the PR. 2 | 3 | This will run brief tests, and make it easier to look at what part of generated code has been changed. 4 | -------------------------------------------------------------------------------- /rusty_lr_core/src/builder/error.rs: -------------------------------------------------------------------------------- 1 | /// Error type for building grammar 2 | #[derive(Debug, Clone, PartialEq, Eq)] 3 | pub enum BuildError { 4 | RuleNotFound(NonTerm), 5 | 6 | NoAugmented, 7 | 8 | __PhantomData__(Term), 9 | } 10 | -------------------------------------------------------------------------------- /example/json/build.rs: -------------------------------------------------------------------------------- 1 | use rusty_lr::build; 2 | 3 | fn main() { 4 | println!("cargo::rerun-if-changed=src/parser.rs"); 5 | let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap()); 6 | 7 | build::Builder::new().file("src/parser.rs").build(&output); 8 | } 9 | -------------------------------------------------------------------------------- /example/calculator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "calculator" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rusty_lr = { path = "../../rusty_lr" } 8 | 9 | 10 | [build-dependencies] 11 | rusty_lr = { path = "../../rusty_lr", features = ["build"] } 12 | -------------------------------------------------------------------------------- /example/calculator/build.rs: -------------------------------------------------------------------------------- 1 | use rusty_lr::build; 2 | 3 | fn main() { 4 | println!("cargo::rerun-if-changed=src/parser.rs"); 5 | let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap()); 6 | 7 | build::Builder::new().file("src/parser.rs").build(&output); 8 | } 9 | -------------------------------------------------------------------------------- /example/json/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "json" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [dependencies] 7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] } 8 | 9 | [build-dependencies] 10 | rusty_lr = { path = "../../rusty_lr", features = ["build"] } 11 | -------------------------------------------------------------------------------- /example/calculator_u8/build.rs: -------------------------------------------------------------------------------- 1 | use rusty_lr::build; 2 | 3 | fn main() { 4 | println!("cargo::rerun-if-changed=src/parser.rs"); 5 | let output = format!("{}/parser.rs", std::env::var("OUT_DIR").unwrap()); 6 | 7 | build::Builder::new().file("src/parser.rs").build(&output); 8 | } 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "rusty_lr", 4 | "rusty_lr_core", 5 | "rusty_lr_derive", 6 | "rusty_lr_parser", 7 | "rusty_lr_buildscript", 8 | "rusty_lr_executable", 9 | "example/calculator", 10 | "example/calculator_u8", 11 | "example/glr", 12 | "example/json", 13 | ] 14 | -------------------------------------------------------------------------------- /example/calculator_u8/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "calculator_u8" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rusty_lr = { path = "../../rusty_lr", features = ["tree"] } 8 | 9 | 10 | [build-dependencies] 11 | rusty_lr = { path = "../../rusty_lr", features = ["build"] } 12 | -------------------------------------------------------------------------------- /rusty_lr_core/src/hash.rs: -------------------------------------------------------------------------------- 1 | // #[cfg(feature = "fxhash")] 2 | pub use rustc_hash::FxHashMap as HashMap; 3 | 4 | // #[cfg(feature = "fxhash")] 5 | pub use rustc_hash::FxHashSet as HashSet; 6 | 7 | // #[cfg(not(feature = "fxhash"))] 8 | // pub use std::collections::HashMap; 9 | 10 | // #[cfg(not(feature = "fxhash"))] 11 | // pub use std::collections::HashSet; 12 | -------------------------------------------------------------------------------- /rusty_lr_core/src/builder/mod.rs: -------------------------------------------------------------------------------- 1 | mod diags; 2 | mod error; 3 | mod grammar; 4 | mod state; 5 | 6 | pub use diags::DiagnosticCollector; 7 | pub use error::BuildError; 8 | pub use grammar::Grammar; 9 | pub use state::State; 10 | 11 | /// struct for output of parser building. 12 | pub struct States { 13 | pub states: Vec>, 14 | } 15 | -------------------------------------------------------------------------------- /rusty_lr_buildscript/src/output.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::TokenStream; 2 | 3 | pub struct Output { 4 | /// token stream before '%%' 5 | pub user_stream: TokenStream, 6 | /// token stream after '%%' 7 | pub generated_stream: TokenStream, 8 | /// debug comments attatched to the output file 9 | pub debug_comments: String, 10 | 11 | pub grammar: rusty_lr_parser::grammar::Grammar, 12 | } 13 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/terminalclass.rs: -------------------------------------------------------------------------------- 1 | pub trait TerminalClass: Copy { 2 | type Term; 3 | 4 | const ERROR: Self; 5 | const EOF: Self; 6 | 7 | /// Gets the pretty name of this terminal class. 8 | fn as_str(&self) -> &'static str; 9 | 10 | /// Converts this terminal class to a usize 11 | fn to_usize(&self) -> usize; 12 | 13 | fn from_term(term: &Self::Term) -> Self; 14 | 15 | fn precedence(&self) -> crate::parser::Precedence; 16 | } 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | **/out.tab.rs -------------------------------------------------------------------------------- /rusty_lr_core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_lr_core" 3 | version = "3.39.1" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "core library for rusty_lr" 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing"] 11 | 12 | [dependencies] 13 | rustc-hash = "2.1" 14 | termtree = { version = "0.5", optional = true } 15 | arrayvec = "0.7" 16 | 17 | [features] 18 | default = [] 19 | builder = [] 20 | tree = ["dep:termtree"] 21 | -------------------------------------------------------------------------------- /rusty_lr_derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_lr_derive" 3 | version = "2.42.0" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "proc-macro definitions for rusty_lr" 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing"] 11 | 12 | [lib] 13 | proc-macro = true 14 | 15 | [dependencies] 16 | proc-macro2 = "1.0.86" 17 | rusty_lr_parser = { version = "3.63.0", path = "../rusty_lr_parser" } 18 | quote = "1.0" 19 | 20 | 21 | [features] 22 | default = [] 23 | -------------------------------------------------------------------------------- /rusty_lr_parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_lr_parser" 3 | version = "3.63.1" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "grammar line parser for rusty_lr" 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing"] 11 | 12 | [dependencies] 13 | proc-macro2 = "1.0.86" 14 | quote = "1.0" 15 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core", features = [ 16 | "builder", 17 | ] } 18 | syn = { version = "2.0", features = ["extra-traits"] } 19 | 20 | [features] 21 | default = [] 22 | -------------------------------------------------------------------------------- /.github/workflows/bootstrap-test.yml: -------------------------------------------------------------------------------- 1 | name: Bootstrap Test 2 | description: Tests parser bootstrapping with different configurations to ensure output consistency 3 | 4 | on: 5 | pull_request: 6 | branches: [ main ] 7 | push: 8 | branches: [ main ] 9 | 10 | jobs: 11 | bootstrap-test: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | 18 | - name: Run bootstrap test 19 | run: | 20 | cd scripts 21 | chmod +x ./bootstrap_test.sh 22 | ./bootstrap_test.sh true 23 | if [ $? -ne 0 ]; then 24 | exit 1 25 | fi 26 | working-directory: ${{ github.workspace }} -------------------------------------------------------------------------------- /rusty_lr_executable/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustylr" 3 | version = "1.27.0" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "Executable for rusty_lr, a Bison-like Parser generator & Compiler frontend framework for Rust generating IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing." 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing"] 11 | 12 | [dependencies] 13 | clap = { version = "4.5.7", features = ["derive"] } 14 | rusty_lr_buildscript = { version = "0.61.0", path = "../rusty_lr_buildscript" } 15 | prettyplease = "0.2" 16 | syn = "2.0" 17 | -------------------------------------------------------------------------------- /example/glr/src/parser.rs: -------------------------------------------------------------------------------- 1 | use rusty_lr::lr1; 2 | 3 | lr1! { 4 | %err String; 5 | %glr; 6 | %tokentype char; 7 | %start E; 8 | 9 | WS0: ' '*; 10 | 11 | Digit(char): ch=['0'-'9'] { ch }; 12 | 13 | Number(i32): WS0 Digit+ WS0 { Digit.into_iter().collect::().parse().unwrap() }; 14 | 15 | E(i32): E '+' e2=E { 16 | match lookahead.to_term() { 17 | Some('*') => { 18 | return Err("".to_string()); 19 | } 20 | _ => { 21 | *shift = false; 22 | E + e2 23 | } 24 | } 25 | } 26 | | E '*' e2=E { 27 | *shift = false; 28 | E * e2 29 | } 30 | | Number 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /rusty_lr_buildscript/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_lr_buildscript" 3 | version = "0.61.0" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "buildscipt tools for rusty_lr" 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing"] 11 | 12 | 13 | [dependencies] 14 | rusty_lr_parser = { version = "3.63.0", path = "../rusty_lr_parser" } 15 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core", features = [ 16 | "builder", 17 | ] } 18 | codespan-reporting = "0.12" 19 | proc-macro2 = { version = "1.0.86", features = ["span-locations"] } 20 | quote = "1.0" 21 | 22 | [features] 23 | default = [] 24 | -------------------------------------------------------------------------------- /rusty_lr_buildscript/src/utils.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Span; 2 | use proc_macro2::TokenStream; 3 | 4 | use std::ops::Range; 5 | 6 | pub fn tokenstream_range(stream: TokenStream) -> Range { 7 | if stream.is_empty() { 8 | return 0..0; 9 | } 10 | let mut stream = stream.into_iter(); 11 | let first = stream.next().unwrap().span().byte_range(); 12 | let last = if let Some(last) = stream.last() { 13 | last.span().byte_range() 14 | } else { 15 | first.clone() 16 | }; 17 | 18 | first.start..last.end 19 | } 20 | pub fn span_stream_range(span: Span, stream: TokenStream) -> Range { 21 | let stream_range = tokenstream_range(stream); 22 | span.byte_range().start..stream_range.end 23 | } 24 | -------------------------------------------------------------------------------- /example/calculator_u8/src/parser.rs: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | %userdata i32; 4 | %tokentype char; 5 | %start E; 6 | 7 | %left '+'; 8 | %left '*'; 9 | %precedence UMINUS; 10 | 11 | WS0: ' '*; 12 | 13 | Digit(char): ['6'-'9'] | "0" {'0'} | '1' | '2' | '3' | '4' | '5'; 14 | 15 | Number(i32): WS0 Digit+ WS0 { Digit.into_iter().collect::().parse().unwrap() }; 16 | 17 | P(f32): Number { Number as f32 } 18 | | WS0 '(' E ')' WS0 { E } 19 | ; 20 | 21 | E(f32) : E Op e2=E %prec Op { 22 | *data += 1; // access userdata by `data` 23 | println!( "{:?} {:?} {:?}", E, Op, e2 ); 24 | match Op { 25 | '+' => E + e2, 26 | '*' => E * e2, 27 | _ => panic!("Unknown operator: {:?}", Op), 28 | } 29 | } 30 | | WS0 '-' E %prec UMINUS { 31 | -E 32 | } 33 | | P 34 | ; 35 | 36 | Op(char): '+' | '*' ; 37 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Macro line parser for Rusty LR. 2 | //! 3 | //! This crate is private and not intended to be used directly. 4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead. 5 | 6 | pub mod emit; 7 | pub mod error; 8 | pub mod grammar; 9 | pub(crate) mod nonterminal_info; 10 | pub(crate) mod parser; 11 | pub mod partition; 12 | pub(crate) mod pattern; 13 | pub mod rangeresolver; 14 | pub mod terminal_info; 15 | pub(crate) mod terminalset; 16 | pub(crate) mod token; 17 | pub mod utils; 18 | 19 | /// This, `rusty_lr_parser` is designed to generate a code, that will be relied on `rusty_lr`. 20 | /// 21 | /// Gets the version of the rusty_lr_core crate that current crate is targeting. 22 | /// If the version is not matched, there will be a compile-time error. 23 | pub fn target_rusty_lr_version() -> (usize, usize, usize) { 24 | (3, 34, 0) 25 | } 26 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/partition.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | /// return {setids} -> {values} map 4 | pub fn minimal_partition( 5 | sets: impl Iterator>, 6 | ) -> BTreeMap, Vec> 7 | where 8 | T: Ord, 9 | { 10 | let mut val_setids_map: BTreeMap = Default::default(); 11 | for (set_id, val_set) in sets.enumerate() { 12 | for val in val_set { 13 | val_setids_map 14 | .entry(val) 15 | .or_insert_with(Vec::new) 16 | .push(set_id); 17 | } 18 | } 19 | 20 | let mut setids_val_map: BTreeMap<_, Vec> = Default::default(); 21 | for (val, setids) in val_setids_map { 22 | setids_val_map 23 | .entry(setids) 24 | .or_insert_with(Vec::new) 25 | .push(val); 26 | } 27 | 28 | setids_val_map 29 | } 30 | -------------------------------------------------------------------------------- /rusty_lr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_lr" 3 | version = "3.34.0" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "A Bison-like Parser generator & Compiler frontend framework for Rust generating IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing." 7 | repository = "https://github.com/ehwan/RustyLR" 8 | readme = "../README.md" 9 | keywords = ["parser", "bison", "lr", "glr", "compiler"] 10 | categories = ["parsing", "compilers", "parser-implementations"] 11 | 12 | [dependencies] 13 | rusty_lr_core = { version = "3.39.0", path = "../rusty_lr_core" } 14 | rusty_lr_derive = { version = "2.42.0", path = "../rusty_lr_derive", optional = true } 15 | rusty_lr_buildscript = { version = "0.61.0", path = "../rusty_lr_buildscript", optional = true } 16 | 17 | [features] 18 | default = ["derive"] 19 | build = ["dep:rusty_lr_buildscript"] 20 | derive = ["dep:rusty_lr_derive"] 21 | tree = ["rusty_lr_core/tree"] 22 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /rusty_lr_buildscript/src/split.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Spacing; 2 | use proc_macro2::TokenStream; 3 | use proc_macro2::TokenTree; 4 | use quote::TokenStreamExt; 5 | 6 | // split stream by '%%' 7 | pub fn split_stream(token_stream: TokenStream) -> Result<(TokenStream, TokenStream), ()> { 8 | // input stream 9 | let mut token_stream = token_stream.into_iter().peekable(); 10 | 11 | // before '%%' 12 | let mut output_stream = TokenStream::new(); 13 | 14 | while let Some(token) = token_stream.next() { 15 | if let TokenTree::Punct(token) = &token { 16 | if token.as_char() == '%' && token.spacing() == Spacing::Joint { 17 | if let Some(TokenTree::Punct(next)) = token_stream.peek() { 18 | if next.as_char() == '%' && next.spacing() == Spacing::Alone { 19 | token_stream.next(); 20 | let macro_stream: TokenStream = token_stream.collect(); 21 | return Ok((output_stream, macro_stream)); 22 | } 23 | } 24 | } 25 | } 26 | output_stream.append(token); 27 | } 28 | 29 | Err(()) 30 | } 31 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/token.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::{Ident, Span}; 2 | 3 | /// for syntax '=' 4 | #[derive(Debug, Clone)] 5 | pub struct TokenMapped { 6 | /// terminal or non-terminal name 7 | pub token: rusty_lr_core::Token, usize>, 8 | 9 | /// variable name that the token's data will be mapped to 10 | pub mapto: Option, 11 | 12 | /// When optimizing out single-token production rules like A -> B { ... }, 13 | /// All occurrences of A will be replaced with B, but the reduce action `{ ... }` still needs to be called. 14 | /// 15 | /// Say we have production rule X -> a b A c d { ... }, 16 | /// and A is replaced with B, then X -> a b B c d { ... } is the new rule, 17 | /// but we still need to call A's reduce action before calling X's reduce action. 18 | /// So basically we need to keep a chain of reduce actions to be called. 19 | /// 20 | /// This token data has to be r_n( r_n-1( ... r_0(token) ... ) ) 21 | /// where r_i is the i'th reduce action in the chain. 22 | pub reduce_action_chains: Vec, 23 | 24 | /// span of the token 25 | pub begin_span: Span, 26 | pub end_span: Span, 27 | } 28 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/utils.rs: -------------------------------------------------------------------------------- 1 | // constants and utility functions for macro-generation 2 | 3 | use super::error::ParseError; 4 | use proc_macro2::Ident; 5 | use quote::format_ident; 6 | 7 | pub static AUGMENTED_NAME: &str = "Augmented"; 8 | pub static EOF_NAME: &str = "eof"; 9 | pub static ERROR_NAME: &str = "error"; 10 | pub static USER_DATA_PARAMETER_NAME: &str = "data"; 11 | pub static TERMINAL_STACK_NAME: &str = "__rustylr_generated_terminal_stack"; 12 | pub static OTHERS_TERMINAL_NAME: &str = "__rustylr_other_terminals"; 13 | pub static LOOKAHEAD_PARAMETER_NAME: &str = "lookahead"; 14 | 15 | /// check if the given identifier is reserved name 16 | pub(crate) fn check_reserved_name(ident: &Ident) -> Result<(), ParseError> { 17 | if ident == AUGMENTED_NAME { 18 | return Err(ParseError::ReservedName(ident.clone())); 19 | } 20 | if ident == EOF_NAME { 21 | return Err(ParseError::ReservedName(ident.clone())); 22 | } 23 | if ident == ERROR_NAME { 24 | return Err(ParseError::ReservedName(ident.clone())); 25 | } 26 | Ok(()) 27 | } 28 | 29 | pub(crate) fn location_variable_name(varname: &Ident) -> Ident { 30 | format_ident!("__rustylr_location_{}", varname) 31 | } 32 | -------------------------------------------------------------------------------- /example/calculator_u8/src/main.rs: -------------------------------------------------------------------------------- 1 | pub mod parser_expanded; 2 | use parser_expanded as parser; 3 | 4 | fn main() { 5 | let input = " 1 + -20 * (3 + 4 ) "; 6 | 7 | let parser = parser::EParser::new(); 8 | let mut context = parser::EContext::new(); 9 | let mut userdata: i32 = 0; 10 | for b in input.chars() { 11 | match context.feed(&parser, b, &mut userdata) { 12 | // feed userdata here 13 | Ok(_) => {} 14 | Err(e) => { 15 | eprintln!("error: {:?}", e); 16 | return; 17 | } 18 | } 19 | } 20 | println!("{:?}", context); 21 | 22 | let result = context.accept(&parser, &mut userdata).unwrap(); // get value of start 'E' 23 | println!("result: {}", result); 24 | println!("userdata: {}", userdata); 25 | 26 | // invalid input, expect error 27 | let error_input = "1+2**(3+4)"; 28 | let mut context = parser::EContext::new(); 29 | let mut userdata: i32 = 0; 30 | for b in error_input.chars() { 31 | match context.feed(&parser, b, &mut userdata) { 32 | // feed userdata here 33 | Ok(_) => {} 34 | Err(e) => { 35 | // this will print error messages 36 | eprintln!("error: {:?}", e); 37 | 38 | // eprintln!("{:?}", context.backtrace(&parser)); 39 | return; 40 | } 41 | } 42 | } 43 | context.feed(&parser, 0 as char, &mut userdata).unwrap(); // feed EOF 44 | } 45 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/parser/span_pair.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Span; 2 | 3 | /// type for %location for each token 4 | /// since `Span::join()` is only for nightly, 5 | /// we collect the first and last span pair of the token in the parsing tree. 6 | #[derive(Clone, Debug, Copy)] 7 | pub struct SpanPair { 8 | /// `None` if this is a zero-length span 9 | pub pair: Option<(Span, Span)>, 10 | } 11 | impl Default for SpanPair { 12 | fn default() -> Self { 13 | SpanPair { pair: None } 14 | } 15 | } 16 | impl SpanPair { 17 | pub fn new_single(span: Span) -> Self { 18 | SpanPair { 19 | pair: Some((span, span)), 20 | } 21 | } 22 | pub fn span(&self) -> Span { 23 | self.pair 24 | .as_ref() 25 | .map_or(Span::call_site(), |(first, last)| { 26 | if let Some(joined) = first.join(*last) { 27 | joined 28 | } else { 29 | *first 30 | } 31 | }) 32 | } 33 | } 34 | impl rusty_lr_core::Location for SpanPair { 35 | fn new<'a>(stack: impl Iterator + Clone, len: usize) -> Self 36 | where 37 | Self: 'a, 38 | { 39 | let mut take = stack.take(len).filter_map(|x| x.pair); 40 | let pair = if let Some(last) = take.next() { 41 | let first = take.last().unwrap_or(last); 42 | 43 | Some((first.0, last.1)) 44 | } else { 45 | None 46 | }; 47 | SpanPair { pair } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /example/calculator/src/main.rs: -------------------------------------------------------------------------------- 1 | mod parser_expanded; 2 | 3 | use parser_expanded as parser; 4 | 5 | fn main() { 6 | use parser::Token; 7 | let input = vec![ 8 | Token::Num(1), 9 | Token::Plus, 10 | Token::Num(2), 11 | Token::Star, 12 | Token::LParen, 13 | Token::Num(3), 14 | Token::Plus, 15 | Token::Num(4), 16 | Token::RParen, 17 | ]; 18 | 19 | let parser = parser::EParser::new(); 20 | let mut context = parser::EContext::new(); 21 | let mut userdata: i32 = 0; 22 | for token in input { 23 | match context.feed(&parser, token, &mut userdata) { 24 | // ^^^^^ ^^^^^^^^^^^^ userdata passed here as `&mut i32` 25 | // |- feed token 26 | Ok(_) => {} 27 | Err(e) => { 28 | println!("{:?}", e); 29 | return; 30 | } 31 | } 32 | 33 | let (terms, nonterms) = context.expected_token_str(&parser); 34 | let terms = terms.map(String::from).collect::>().join(", "); 35 | let nonterms = nonterms.map(String::from).collect::>().join(", "); 36 | println!( 37 | "Expected tokens: [{}], non-terminals: [{}]", 38 | terms, nonterms 39 | ); 40 | } 41 | // res = value of start symbol ( E(i32) ) 42 | let res = context.accept(&parser, &mut userdata).unwrap(); 43 | println!("{}", res); 44 | println!("userdata: {}", userdata); 45 | } 46 | -------------------------------------------------------------------------------- /rusty_lr/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # rusty_lr 2 | //! ***A Bison-like, parser generator for Rust supporting IELR(1), LALR(1) parser tables, with deterministic LR and 3 | //! non-deterministic LR (GLR) parsing strategies.*** 4 | //! 5 | //! RustyLR is a parser generator that converts context-free grammars into IELR(1)/LALR(1) tables with deterministic LR and non-deterministic GLR parsing strategies. It supports custom reduce action in Rust, with beautiful diagnostics. 6 | //! Highly inspired by tools like *bison*, it uses a similar syntax while integrating seamlessly with Rust's ecosystem. 7 | //! It constructs optimized state machine, ensuring efficient and reliable parsing. 8 | //! 9 | //! ## Features 10 | //! - **Custom Reduce Actions:** Define custom actions in Rust, allowing you to build into custom data structures easily. 11 | //! - **Automatic Optimization:**: Reduces parser table size and improves performance by grouping terminals with identical behavior across parser states. 12 | //! - **Multiple Parsing Strategies:** Supports minimal-LR(1), LALR(1) parser table, and GLR parsing strategy. 13 | //! - **Detailed Diagnostics:** Detect grammar conflicts, verbose conflicts resolving stages, and optimization stages. 14 | //! - **Location Tracking:** Track the location of every tokens in the parse tree, useful for error reporting and debugging. 15 | //! 16 | // re-exports 17 | 18 | pub use rusty_lr_core::*; 19 | pub use rusty_lr_derive::*; 20 | 21 | /// tools for build.rs 22 | #[cfg(feature = "build")] 23 | pub mod build { 24 | pub use rusty_lr_buildscript::*; 25 | } 26 | -------------------------------------------------------------------------------- /rusty_lr_core/src/backtrace.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::fmt::Display; 3 | 4 | /// Backtracing information for parsing context. 5 | /// What current parser was trying to parse, and what rules were applied. 6 | #[derive(Clone)] 7 | pub struct Backtrace { 8 | /// 0'th element is the current parsing state, and through the backtrace, it goes to the initial state. 9 | pub traces: Vec>>, 10 | } 11 | 12 | impl Display for Backtrace { 13 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 14 | for (idx, rules) in self.traces.iter().enumerate() { 15 | if idx == 0 { 16 | writeln!(f, "Trying to parse:")?; 17 | } else { 18 | writeln!(f, "Backtrace:")?; 19 | } 20 | for rule in rules { 21 | writeln!(f, "\t>>> {}", rule)?; 22 | } 23 | } 24 | Ok(()) 25 | } 26 | } 27 | impl Debug for Backtrace { 28 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 29 | for (idx, rules) in self.traces.iter().enumerate() { 30 | if idx == 0 { 31 | writeln!(f, "Trying to parse:")?; 32 | } else { 33 | writeln!(f, "Backtrace:")?; 34 | } 35 | for rule in rules { 36 | writeln!(f, "\t>>> {:?}", rule)?; 37 | } 38 | } 39 | Ok(()) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /rusty_lr_core/src/location.rs: -------------------------------------------------------------------------------- 1 | /// trait for token location in the input stream 2 | pub trait Location: Clone { 3 | /// Create a new merged location from `len` elements in the stack. 4 | /// The `stack` iterator should yield child locations in reverse order. 5 | /// That is, if the input [a, b, c, ..., z] is fed and `len` is 3, 6 | /// `stack.next()` will yield `z`, then `y`, then `x`, 7 | /// and this function should return the merged location of `[x, y, z]`. 8 | fn new<'a>(stack: impl Iterator + Clone, len: usize) -> Self 9 | where 10 | Self: 'a; 11 | } 12 | 13 | /// Default location type that does nothing. 14 | #[derive(Clone, Default, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)] 15 | pub struct DefaultLocation; 16 | impl Location for DefaultLocation { 17 | fn new<'a>(_stack: impl Iterator + Clone, _len: usize) -> Self { 18 | DefaultLocation 19 | } 20 | } 21 | 22 | impl Location for std::ops::Range 23 | where 24 | T: Clone + Default + Ord, 25 | { 26 | fn new<'a>(mut stack: impl Iterator + Clone, len: usize) -> Self 27 | where 28 | Self: 'a, 29 | { 30 | if len == 0 { 31 | if let Some(last) = stack.next() { 32 | let end = last.end.clone(); 33 | end.clone()..end 34 | } else { 35 | T::default()..T::default() 36 | } 37 | } else { 38 | let mut stack = stack.take(len); 39 | let last = stack.next().unwrap(); 40 | let first = stack.last().unwrap_or(last); 41 | first.start.clone()..last.end.clone() 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/nondeterministic/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::fmt::Display; 3 | 4 | /// Error type for feed() 5 | #[derive(Clone, Debug)] 6 | pub struct ParseError { 7 | /// The terminal symbol that caused the error. 8 | pub term: crate::TerminalSymbol, 9 | /// Location of the terminal symbol. 10 | /// location will be `None` if the terminal was eof. 11 | pub location: Option, 12 | /// Error from reduce action (from every diverged paths) 13 | pub reduce_action_errors: Vec, 14 | /// Rule indices when shift/reduce conflict occur with no shift/reduce precedence defined. 15 | /// This is same as when setting %nonassoc in Bison. 16 | pub no_precedences: Vec, 17 | 18 | /// States when the error occurred (from all diverged paths) 19 | pub(crate) states: Vec, 20 | } 21 | 22 | impl ParseError { 23 | /// location will be `None` if the terminal was eof. 24 | pub fn location(&self) -> &Option { 25 | &self.location 26 | } 27 | pub fn term(&self) -> &crate::TerminalSymbol { 28 | &self.term 29 | } 30 | /// States when the error occurred (from all diverged paths) 31 | pub fn states(&self) -> impl Iterator + '_ { 32 | self.states.iter().copied() 33 | } 34 | } 35 | 36 | impl Display for ParseError 37 | where 38 | Term: Display, 39 | { 40 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 41 | write!(f, "ParseError: {}, States: {:?}", self.term, self.states) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /example/json/src/parser.rs: -------------------------------------------------------------------------------- 1 | // https://www.json.org/json-en.html 2 | 3 | %% 4 | 5 | %tokentype char; 6 | %start Json; 7 | %dense; 8 | %location std::ops::Range; 9 | 10 | Json: Element; 11 | 12 | Value: Object 13 | | Array 14 | | String 15 | | Number 16 | | "true" 17 | | "false" 18 | | "null" 19 | ; 20 | 21 | Object: '{' WS '}' 22 | | '{' Members '}' 23 | | '{' error '}' { 24 | // @error is Range at this point 25 | let start = @error.start; 26 | let end = @error.end; 27 | println!("Error recovered with '}}' at {start}..{end}"); 28 | } 29 | ; 30 | 31 | Members: Member 32 | | Member ',' Members 33 | ; 34 | 35 | Member: WS String WS ':' Element; 36 | 37 | Array: '[' Elements ']' 38 | ; 39 | 40 | Elements: $sep(Element, ',', *) 41 | ; 42 | 43 | Element: WS Value WS; 44 | 45 | String: '"' Characters '"'; 46 | 47 | Characters: Character*; 48 | 49 | // WIP 50 | Character: 51 | '\\' Escape 52 | | ['\u{0020}'-'\u{10FFFF}'] - '"' - '\\' 53 | ; 54 | 55 | Escape: '"' 56 | | '\\' 57 | | '/' 58 | | 'b' 59 | | 'f' 60 | | 'n' 61 | | 'r' 62 | | 't' 63 | | 'u' Hex Hex Hex Hex 64 | ; 65 | 66 | Hex: Digit 67 | | ['A'-'F'] 68 | | ['a'-'f'] 69 | ; 70 | 71 | Number: Integer Fraction Exponent; 72 | 73 | Integer: Digit 74 | | Onenine Digits 75 | | '-' Digit 76 | | '-' Onenine Digits 77 | ; 78 | 79 | Digits: Digit+; 80 | 81 | Digit: ['0'-'9']; 82 | 83 | Onenine: ['1'-'9']; 84 | 85 | Fraction: ('.' Digits)?; 86 | 87 | Exponent: "" 88 | | 'E' Sign Digits 89 | | 'e' Sign Digits 90 | ; 91 | 92 | Sign: "" | '+' | '-'; 93 | 94 | WS: "" 95 | | '\u{0020}' WS 96 | | '\u{000A}' WS 97 | | '\u{000D}' WS 98 | | '\u{0009}' WS 99 | ; -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/nonterminal.rs: -------------------------------------------------------------------------------- 1 | /// A struct to hold information about non-terminal symbols 2 | pub trait NonTerminal: Copy { 3 | /// Whether this non-terminal is auto-generated by rustylr. 4 | /// Some non-terminals could be auto-generated to handle regex patterns, character sets, etc. 5 | fn is_auto_generated(&self) -> bool { 6 | self.nonterm_type().is_some() 7 | } 8 | /// Augmented rule will be generated for entry point of the grammar. 9 | fn is_augmented(&self) -> bool { 10 | self.nonterm_type() == Some(NonTerminalType::Augmented) 11 | } 12 | /// whether this non-terminal is set as %trace 13 | fn is_trace(&self) -> bool; 14 | 15 | /// for internal use only; 16 | /// If this non-terminal is auto-generated, gets the pattern where this non-terminal was generated from. 17 | fn nonterm_type(&self) -> Option; 18 | 19 | /// Gets the pretty name of this non-terminal. 20 | fn as_str(&self) -> &'static str; 21 | 22 | /// converts this non-terminal to a usize 23 | fn to_usize(&self) -> usize; 24 | } 25 | 26 | /// If the non-terminal is auto-generated, 27 | /// the pattern where this non-terminal was generated from. 28 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 29 | pub enum NonTerminalType { 30 | /// zero or more repetitions 31 | Star, 32 | /// one or more repetitions with left recursion 33 | PlusLeft, 34 | /// one or more repetitions with right recursion 35 | PlusRight, 36 | /// zero or one repetition 37 | Optional, 38 | /// Augmented rule 39 | Augmented, 40 | /// error recovery non-terminal 41 | Error, 42 | 43 | /// terminal set enclosed in brackets ( [a-zA-Z0-9] ) 44 | TerminalSet, 45 | /// rule with explicit lookaheads 46 | Lookahead, 47 | 48 | /// sequence of tokens enclosed in parentheses ( a B c ... ) 49 | Group, 50 | 51 | /// "abc" or b"abc" 52 | LiteralString, 53 | } 54 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | /// Core parser functionality for deterministic parsers 2 | pub mod deterministic; 3 | 4 | /// Core parser functionality for non-deterministic parsers 5 | pub mod nondeterministic; 6 | 7 | pub mod data_stack; 8 | 9 | /// module for auto-generated types of non-terminals representation 10 | pub mod nonterminal; 11 | 12 | pub mod terminalclass; 13 | 14 | pub mod state; 15 | pub use state::State; 16 | 17 | #[derive(Clone, Copy)] 18 | pub struct Precedence(u8); 19 | 20 | impl Precedence { 21 | #[inline] 22 | pub fn none() -> Self { 23 | Precedence(u8::MAX) 24 | } 25 | #[inline] 26 | pub fn new(level: u8) -> Self { 27 | debug_assert!(level < u8::MAX); 28 | Precedence(level) 29 | } 30 | #[inline] 31 | pub fn is_some(&self) -> bool { 32 | self.0 < u8::MAX 33 | } 34 | 35 | pub fn unwrap(self) -> u8 { 36 | debug_assert!(self.0 < u8::MAX); 37 | self.0 38 | } 39 | } 40 | 41 | /// A trait for Parser that holds the entire parser table. 42 | /// This trait will be automatically implemented by rusty_lr 43 | pub trait Parser { 44 | /// whether the `error` token was used in the grammar. 45 | const ERROR_USED: bool; 46 | 47 | /// The type of terminal symbols. 48 | type Term; 49 | /// The type of terminal classes. 50 | type TermClass: terminalclass::TerminalClass; 51 | /// The type of non-terminal symbols. 52 | type NonTerm: nonterminal::NonTerminal; 53 | /// The type of the parser state. 54 | type State: State; 55 | 56 | /// Get list of production rules 57 | fn get_rules(&self) -> &[crate::rule::ProductionRule]; 58 | /// Get list of states 59 | fn get_states(&self) -> &[Self::State]; 60 | /// Get the type of precedence for i'th level. 61 | /// `None` if i'th level was defined as %precedence (no reduce type). 62 | fn precedence_types(&self, level: u8) -> Option; 63 | } 64 | -------------------------------------------------------------------------------- /example/calculator/src/parser.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy)] 2 | pub enum Token { 3 | Num(i32), 4 | Plus, 5 | Star, 6 | LParen, 7 | RParen, 8 | } 9 | 10 | fn filter( term: &Token ) -> &Token { 11 | term 12 | } 13 | 14 | %% 15 | 16 | // this define struct `EParser` 17 | // where 'E' is the start symbol 18 | 19 | // lalr parser 20 | %lalr; 21 | 22 | // type of userdata 23 | %userdata i32; 24 | // type of token ( as Terminal symbol ) 25 | %tokentype Token; 26 | 27 | // start symbol 28 | %start E; 29 | 30 | // error type 31 | %err String; 32 | 33 | // define tokens 34 | %token num Token::Num(_); // `num` maps to `Token::Num(0)` 35 | %token plus Token::Plus; 36 | %token star Token::Star; 37 | %token lparen Token::LParen; 38 | %token rparen Token::RParen; 39 | 40 | // resolving shift/reduce conflict 41 | %left plus; 42 | %left star; 43 | 44 | %filter filter; 45 | 46 | // data that each token holds can be accessed by its name 47 | // s is slice of shifted terminal symbols captured by current rule 48 | // userdata can be accessed by `data` ( &mut i32, for this situation ) 49 | A(i32) : A plus a2=A { 50 | println!("{:?} {:?} {:?}", A, plus, a2 ); 51 | // ^ ^ ^ 52 | // | | |- value of 2nd 'A' 53 | // | |- Token 54 | // |- value of 1st 'A' 55 | *data += 1; 56 | A + a2 // --> this will be new value of current 'A' 57 | // ^ ^ 58 | // | |- value of 2nd 'A' 59 | // |- value of 1st 'A' 60 | } 61 | | M 62 | ; 63 | 64 | M(i32) : M_optim star m2=M_optim { M_optim * m2 } 65 | | P 66 | ; 67 | 68 | // check for single-rule optimization 69 | M_optim(i32): M { M * 1 }; 70 | 71 | P(i32) : num { 72 | if let Token::Num(n) = num { n } 73 | else { return Err(format!("{:?}", num)); } 74 | // ^^^^^^^^^^^^^^^^^^^^^^^^^^ 75 | // reduce action returns Result<(), String> 76 | } 77 | | lparen E rparen { E } 78 | ; 79 | 80 | E(i32) : A; 81 | -------------------------------------------------------------------------------- /rusty_lr_executable/src/arg.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | /// Converts a context-free grammar into a deterministic finite automaton (DFA) tables, 4 | /// and generates a Rust code that can be used as a parser for that grammar. 5 | /// 6 | /// For usage of the generated code, please refer to the documentation of [`rusty_lr`](https://github.com/ehwan/RustyLR). 7 | #[derive(Parser, Debug)] 8 | #[command(version)] 9 | #[command(about)] 10 | pub struct Args { 11 | /// Input_file to read 12 | pub input_file: String, 13 | 14 | /// Output_file to write 15 | #[arg(default_value = "out.tab.rs")] 16 | pub output_file: String, 17 | 18 | /// Do not rustfmt the output 19 | #[arg(long, default_value = "false")] 20 | pub no_format: bool, 21 | 22 | /// Do not print note information about any shift/reduce, reduce/reduce conflicts. 23 | /// 24 | /// If the target is deterministic parser, conflict will be treated as an error, so this option will be ignored. 25 | /// This option is only for non-deterministic GLR parser. 26 | #[arg(short = 'c', long, default_value = "false")] 27 | pub no_conflict: bool, 28 | 29 | /// Do not print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive. 30 | #[arg(short = 'r', long, default_value = "false")] 31 | pub no_conflict_resolve: bool, 32 | 33 | /// Do not print debug information about optimization process. 34 | #[arg(short = 'o', long, default_value = "false")] 35 | pub no_optimization: bool, 36 | 37 | /// Do not print backtrace of production rules when conflicts occurred. ruleset could be messed up 38 | #[arg(short = 'b', long, default_value = "false")] 39 | pub no_backtrace: bool, 40 | 41 | /// Override the written code and set generated parser use GLR parsing algorithm 42 | #[arg(long)] 43 | pub glr: Option, 44 | 45 | /// Override the written code and set generated parser table to use dense arrays 46 | #[arg(long)] 47 | pub dense: Option, 48 | 49 | /// Print the details of a specific state 50 | #[arg(long)] 51 | pub state: Option, 52 | } 53 | -------------------------------------------------------------------------------- /rusty_lr_core/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Core module for the Rusty LR parser. 2 | //! 3 | //! This crate is private and not intended to be used directly. 4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead. 5 | 6 | /// FxHash for fast and non-cryptographic hashing 7 | pub mod hash; 8 | 9 | /// module for tree representation of parse results (feature `tree`). 10 | #[cfg(feature = "tree")] 11 | pub mod tree; 12 | 13 | pub(crate) mod location; 14 | pub use location::DefaultLocation; 15 | pub use location::Location; 16 | 17 | /// module for build parser tables from CFG, (feature "builder") 18 | #[cfg(feature = "builder")] 19 | pub mod builder; 20 | 21 | /// module for core parser functionality 22 | pub mod parser; 23 | 24 | /// module for production rules representation 25 | pub mod rule; 26 | 27 | pub(crate) mod token; 28 | pub use token::TerminalSymbol; 29 | pub use token::Token; 30 | 31 | pub(crate) mod backtrace; 32 | pub use backtrace::Backtrace; 33 | 34 | /// Default error type for reduce action 35 | #[derive(Debug, Default)] 36 | pub struct DefaultReduceActionError; 37 | impl std::fmt::Display for DefaultReduceActionError { 38 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 39 | write!(f, "Default reduce action error") 40 | } 41 | } 42 | impl std::error::Error for DefaultReduceActionError { 43 | fn cause(&self) -> Option<&dyn std::error::Error> { 44 | None 45 | } 46 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 47 | None 48 | } 49 | fn description(&self) -> &str { 50 | "Default reduce action error" 51 | } 52 | } 53 | 54 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 55 | pub enum TriState { 56 | False, 57 | Maybe, 58 | True, 59 | } 60 | impl std::ops::BitOr for TriState { 61 | type Output = Self; 62 | fn bitor(self, rhs: Self) -> Self::Output { 63 | match (self, rhs) { 64 | (TriState::False, TriState::False) => TriState::False, 65 | (TriState::False, _) => TriState::Maybe, 66 | (_, TriState::False) => TriState::Maybe, 67 | (TriState::True, TriState::True) => TriState::True, 68 | _ => TriState::Maybe, 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /example/json/src/main.rs: -------------------------------------------------------------------------------- 1 | mod parser_expanded; 2 | 3 | use std::time::Instant; 4 | 5 | use parser_expanded as parser; 6 | use rusty_lr::parser::Parser; 7 | 8 | const TEST_JSON: &'static str = r#" 9 | { 10 | "user": { 11 | "id": 12345, 12 | "name": "Anonymous", 13 | "active": true, 14 | "roles": ["engineer", "researcher"], 15 | "profile": { 16 | "email": "anonymous@example.com", 17 | "location": "Seoul", 18 | "social": { 19 | "github": "https://github.com/anonymous", 20 | "twitter": null 21 | } 22 | } 23 | }, 24 | "projects": [ 25 | { 26 | "id": "proj_001", 27 | "name": "High-Speed Simulation Engine", 28 | "description": "A tool used by scientists to simulate how air flows over objects like airplane wings or car bodies.", 29 | aa 30 | "technologies": ["High-performance computing", "Graphics cards"], 31 | "team_size": 5 32 | }, 33 | { 34 | "id": "proj_002", 35 | "name": "Smart Investment Assistant", 36 | "description": "An automated system that studies price movements and suggests when to buy or sell stocks.", 37 | "technologies": ["Artificial Intelligence", "Data Analysis"], 38 | "team_size": 1, 39 | "status": "experimental" 40 | } 41 | ], 42 | "timestamp": "2025-04-18T16:00:00+09:00", 43 | "metadata": null 44 | } 45 | "#; 46 | 47 | fn main() { 48 | let parser = parser::JsonParser::new(); 49 | println!("#rules: {}", parser.get_rules().len()); 50 | println!("#states: {}", parser.get_states().len()); 51 | 52 | fn try_once(parser: &parser::JsonParser) { 53 | let mut context = parser::JsonContext::new(); 54 | let mut range_start = 0; 55 | for ch in TEST_JSON.chars() { 56 | let range_end = range_start + ch.len_utf8(); 57 | context 58 | .feed_location(parser, ch, &mut (), range_start..range_end) 59 | .expect("Error parsing character"); 60 | range_start = range_end; 61 | } 62 | } 63 | 64 | let start = Instant::now(); 65 | for _ in 0..1000 { 66 | try_once(&parser); 67 | } 68 | let duration = start.elapsed(); 69 | println!("Parsed 1000 times in {:?}", duration); 70 | } 71 | -------------------------------------------------------------------------------- /example/glr/src/main.rs: -------------------------------------------------------------------------------- 1 | pub mod parser; 2 | 3 | fn main() { 4 | let parser = parser::EParser::new(); 5 | let mut context = parser::EContext::new(); 6 | 7 | let input = "1+2*3+4"; 8 | for ch in input.chars() { 9 | println!("feed: {}, possible: {}", ch, context.can_feed(&parser, &ch)); 10 | match context.feed(&parser, ch, &mut ()) { 11 | Ok(_) => { 12 | println!("nodes: {}", context.len_paths()); 13 | } 14 | Err(e) => { 15 | println!("Error: {}", e); 16 | return; 17 | } 18 | } 19 | context.debug_check(); 20 | } 21 | let result = match context.accept(&parser, &mut ()) { 22 | Ok(mut results) => results.next().unwrap(), 23 | Err(e) => { 24 | println!("Error: {}", e); 25 | return; 26 | } 27 | }; 28 | println!("Result: {}", result); 29 | 30 | let input = "1+2**3+4"; 31 | let mut context = parser::EContext::new(); 32 | for ch in input.chars() { 33 | println!( 34 | "feed: {}, can_feed(): {}", 35 | ch, 36 | context.can_feed(&parser, &ch) 37 | ); 38 | match context.feed(&parser, ch, &mut ()) { 39 | Ok(_) => { 40 | println!("nodes: {}", context.len_paths()); 41 | } 42 | Err(e) => { 43 | println!("Error: {}", e); 44 | return; 45 | } 46 | } 47 | } 48 | 49 | for result in context.accept(&parser, &mut ()).unwrap() { 50 | println!("Result: {}", result); 51 | } 52 | 53 | // for mut n in c.current_nodes.nodes.into_iter() { 54 | // loop { 55 | // println!("{}", n.state()); 56 | // if let Some(par) = n.parent() { 57 | // n = std::rc::Rc::clone(par); 58 | // } else { 59 | // break; 60 | // } 61 | // } 62 | // println!("---"); 63 | // } 64 | } 65 | 66 | #[test] 67 | fn test_parser() { 68 | let parser = parser::EParser::new(); 69 | let mut context = parser::EContext::new(); 70 | let input1 = " 1 + 2 * 3 * 4 + 5 * 6 + 7 "; 71 | for ch in input1.chars() { 72 | context.feed(&parser, ch, &mut ()).unwrap(); 73 | } 74 | 75 | let answer = 1 + 2 * 3 * 4 + 5 * 6 + 7; 76 | 77 | let mut results = context 78 | .accept(&parser, &mut ()) 79 | .unwrap() 80 | .collect::>(); 81 | results.sort(); 82 | assert_eq!(results.len(), 1); 83 | assert_eq!(results, [answer]); 84 | } 85 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/data_stack.rs: -------------------------------------------------------------------------------- 1 | /// A trait for data stack in the parser. 2 | /// 3 | /// Since each non-terminal could have different ruletypes, 4 | /// this effectively handles those rule types into separated `Vec` stack, 5 | /// instead of using enum of rule types (since it would be costful at memory aspects if the size differs significantly). 6 | /// For people who is curious about the implementation details, 7 | /// you should see the actual generated `DataStack` structs, like `GrammarDataStack` in `rusty_lr_parser/src/parser/parser_expanded.rs`. 8 | pub trait DataStack: Sized + Default { 9 | /// Type for terminal symbols 10 | type Term; 11 | /// Type for non-terminal symbols - this must be enum type that was auto-generated by rusty_lr 12 | type NonTerm: crate::parser::nonterminal::NonTerminal; 13 | /// Type for user data that is passed to the parser from the user. 14 | type UserData; 15 | /// Type for `Err` variant returned by reduce action 16 | type ReduceActionError; 17 | /// The value of the start symbol 18 | type StartType; 19 | /// Type for location of the token 20 | type Location: crate::Location; 21 | 22 | fn pop_start(&mut self) -> Option; 23 | fn pop(&mut self); 24 | fn push_terminal(&mut self, term: Self::Term); 25 | fn push_empty(&mut self); 26 | 27 | fn clear(&mut self); 28 | fn reserve(&mut self, additional: usize); 29 | fn with_capacity(capacity: usize) -> Self { 30 | let mut self_: Self = Default::default(); 31 | self_.reserve(capacity); 32 | self_ 33 | } 34 | 35 | fn split_off(&mut self, at: usize) -> Self; 36 | fn truncate(&mut self, at: usize); 37 | fn append(&mut self, other: &mut Self); 38 | 39 | /// Performs a reduce action with the given rule index. 40 | /// Returns false if the empty tag was pushed by this reduce action, true otherwise. 41 | fn reduce_action( 42 | // the child tokens for the reduction 43 | // the caller (usually from generated code) must pops all of the tokens used for this reduce_action 44 | data_stack: &mut Self, 45 | location_stack: &mut Vec, 46 | push_data: bool, 47 | 48 | // the index of the production rule to reduce 49 | rule_index: usize, 50 | 51 | // for runtime-conflict-resolve. 52 | // if this variable is set to false in the action, the shift action will not be performed. (GLR parser) 53 | shift: &mut bool, 54 | // the lookahead token that caused this reduce action 55 | lookahead: &crate::TerminalSymbol, 56 | // user input data 57 | userdata: &mut Self::UserData, 58 | // location of this non-terminal, e.g. `@$` 59 | location0: &mut Self::Location, 60 | ) -> Result<(), Self::ReduceActionError>; 61 | } 62 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/nondeterministic/node.rs: -------------------------------------------------------------------------------- 1 | use crate::parser::data_stack::DataStack; 2 | use crate::parser::Precedence; 3 | 4 | /// To handle multiple paths in the non-deterministic GLR parsing, 5 | /// this node represents a subrange in stack of the parser. 6 | /// this constructs LinkedList tree of nodes, where parent node is the previous token in the parse tree. 7 | #[derive(Clone)] 8 | pub struct Node { 9 | /// parent node 10 | pub parent: Option, 11 | 12 | pub child_count: usize, 13 | 14 | /// index of state in parser 15 | pub state_stack: Vec, 16 | pub data_stack: Data, 17 | pub location_stack: Vec, 18 | pub precedence_stack: Vec, 19 | #[cfg(feature = "tree")] 20 | pub(crate) tree_stack: Vec>, 21 | } 22 | 23 | impl Default for Node { 24 | fn default() -> Self { 25 | Node { 26 | parent: None, 27 | child_count: 0, 28 | state_stack: Vec::new(), 29 | data_stack: Data::default(), 30 | location_stack: Vec::new(), 31 | precedence_stack: Vec::new(), 32 | #[cfg(feature = "tree")] 33 | tree_stack: Vec::new(), 34 | } 35 | } 36 | } 37 | 38 | impl Node { 39 | /// Clear this node to `Default::default()`. 40 | pub fn clear(&mut self) { 41 | self.parent = None; 42 | self.child_count = 0; 43 | self.state_stack.clear(); 44 | self.data_stack.clear(); 45 | self.location_stack.clear(); 46 | self.precedence_stack.clear(); 47 | #[cfg(feature = "tree")] 48 | self.tree_stack.clear(); 49 | } 50 | pub fn len(&self) -> usize { 51 | self.state_stack.len() 52 | } 53 | pub fn is_leaf(&self) -> bool { 54 | self.child_count == 0 55 | } 56 | 57 | pub fn with_capacity(capacity: usize) -> Self { 58 | Node { 59 | parent: None, 60 | child_count: 0, 61 | state_stack: Vec::with_capacity(capacity), 62 | data_stack: Data::with_capacity(capacity), 63 | location_stack: Vec::with_capacity(capacity), 64 | precedence_stack: Vec::with_capacity(capacity), 65 | #[cfg(feature = "tree")] 66 | tree_stack: Vec::with_capacity(capacity), 67 | } 68 | } 69 | pub fn reserve(&mut self, additional: usize) { 70 | self.state_stack.reserve(additional); 71 | self.data_stack.reserve(additional); 72 | self.location_stack.reserve(additional); 73 | self.precedence_stack.reserve(additional); 74 | #[cfg(feature = "tree")] 75 | self.tree_stack.reserve(additional); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /rusty_lr_core/src/builder/state.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::collections::BTreeSet; 3 | 4 | use crate::TriState; 5 | 6 | /// state for internal usage during grammar building stage 7 | #[derive(Debug, Clone)] 8 | pub struct State { 9 | pub shift_goto_map_term: BTreeMap, 10 | pub shift_goto_map_nonterm: BTreeMap, 11 | pub reduce_map: BTreeMap>, 12 | pub ruleset: BTreeSet, 13 | pub can_accept_error: TriState, 14 | } 15 | impl State { 16 | pub fn new() -> Self { 17 | State { 18 | shift_goto_map_term: Default::default(), 19 | shift_goto_map_nonterm: Default::default(), 20 | reduce_map: Default::default(), 21 | ruleset: Default::default(), 22 | can_accept_error: TriState::False, 23 | } 24 | } 25 | 26 | /// shift -= 1 for all rules in the ruleset 27 | pub fn unshifted_ruleset(&self) -> impl Iterator + '_ { 28 | self.ruleset 29 | .iter() 30 | .filter(|rule| rule.shifted > 0) 31 | .map(|rule| { 32 | let mut rule = *rule; 33 | rule.shifted -= 1; 34 | rule 35 | }) 36 | } 37 | } 38 | 39 | impl Default for State { 40 | fn default() -> Self { 41 | Self::new() 42 | } 43 | } 44 | 45 | impl From> 46 | for crate::parser::state::IntermediateState 47 | where 48 | Term: Ord, 49 | { 50 | fn from(state: crate::builder::State) -> Self { 51 | use crate::parser::state::ShiftTarget; 52 | 53 | crate::parser::state::IntermediateState { 54 | shift_goto_map_term: state 55 | .shift_goto_map_term 56 | .into_iter() 57 | .map(|(term, state_index)| { 58 | ( 59 | term, 60 | ShiftTarget { 61 | state: state_index.into(), 62 | push: true, 63 | }, 64 | ) 65 | }) 66 | .collect(), 67 | shift_goto_map_nonterm: state 68 | .shift_goto_map_nonterm 69 | .into_iter() 70 | .map(|(nonterm, state_index)| { 71 | ( 72 | nonterm, 73 | ShiftTarget { 74 | state: state_index.into(), 75 | push: true, 76 | }, 77 | ) 78 | }) 79 | .collect(), 80 | reduce_map: state 81 | .reduce_map 82 | .into_iter() 83 | .map(|(term, rules)| (term, rules.into_iter().collect())) 84 | .collect(), 85 | ruleset: state.ruleset.into_iter().collect(), 86 | can_accept_error: state.can_accept_error, 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /rusty_lr_core/src/token.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::fmt::Display; 3 | use std::hash::Hash; 4 | 5 | /// A type for terminal symbols in the grammar. 6 | /// just because we have to take care of the `error` token specially, 7 | /// and future support for other special tokens. 8 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] 9 | pub enum TerminalSymbol { 10 | Term(Term), // index in the terminals vector 11 | Error, // error token 12 | Eof, // end of file token 13 | } 14 | impl TerminalSymbol { 15 | pub fn is_error(&self) -> bool { 16 | matches!(self, TerminalSymbol::Error) 17 | } 18 | pub fn is_term(&self) -> bool { 19 | matches!(self, TerminalSymbol::Term(_)) 20 | } 21 | pub fn is_eof(&self) -> bool { 22 | matches!(self, TerminalSymbol::Eof) 23 | } 24 | /// converts self to a term if it is a `Term` variant, otherwise returns `None`. 25 | pub fn to_term(&self) -> Option<&Term> { 26 | match self { 27 | TerminalSymbol::Term(term) => Some(term), 28 | TerminalSymbol::Error => None, 29 | TerminalSymbol::Eof => None, 30 | } 31 | } 32 | /// converts self to a term if it is a `Term` variant, otherwise returns `None`. 33 | pub fn into_term(self) -> Option { 34 | match self { 35 | TerminalSymbol::Term(term) => Some(term), 36 | TerminalSymbol::Error => None, 37 | TerminalSymbol::Eof => None, 38 | } 39 | } 40 | } 41 | 42 | impl std::fmt::Display for TerminalSymbol { 43 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 44 | match self { 45 | TerminalSymbol::Term(term) => write!(f, "{}", term), 46 | TerminalSymbol::Error => write!(f, "error"), 47 | TerminalSymbol::Eof => write!(f, "eof"), 48 | } 49 | } 50 | } 51 | 52 | /// Token represents a terminal or non-terminal symbol in the grammar. 53 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 54 | pub enum Token { 55 | Term(Term), 56 | NonTerm(NonTerm), 57 | } 58 | impl Display for Token { 59 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 60 | match self { 61 | Token::Term(term) => write!(f, "{}", term), 62 | Token::NonTerm(nonterm) => write!(f, "{}", nonterm), 63 | } 64 | } 65 | } 66 | impl Debug for Token { 67 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 68 | match self { 69 | Token::Term(term) => write!(f, "{:?}", term), 70 | Token::NonTerm(nonterm) => write!(f, "{:?}", nonterm), 71 | } 72 | } 73 | } 74 | 75 | impl Token { 76 | pub fn map( 77 | self, 78 | term_map: impl Fn(Term) -> NewTerm, 79 | nonterm_map: impl Fn(NonTerm) -> NewNonTerm, 80 | ) -> Token { 81 | match self { 82 | Token::Term(term) => Token::Term(term_map(term)), 83 | Token::NonTerm(nonterm) => Token::NonTerm(nonterm_map(nonterm)), 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/deterministic/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::fmt::Display; 3 | 4 | use crate::TerminalSymbol; 5 | 6 | #[derive(Clone, Debug)] 7 | pub struct NoActionError { 8 | pub term: TerminalSymbol, 9 | pub location: Option, 10 | pub state: usize, 11 | } 12 | #[derive(Clone, Debug)] 13 | pub struct ReduceActionError { 14 | pub term: TerminalSymbol, 15 | pub location: Option, 16 | pub state: usize, 17 | pub source: Source, 18 | } 19 | 20 | #[derive(Clone, Debug)] 21 | pub struct NoPrecedenceError { 22 | pub term: TerminalSymbol, 23 | pub location: Option, 24 | pub state: usize, 25 | pub rule: usize, 26 | } 27 | 28 | /// Error type for feed() 29 | #[derive(Clone, Debug)] 30 | pub enum ParseError { 31 | /// No action defined for the given terminal in the parser table. 32 | /// location will be `None` if the terminal was eof. 33 | NoAction(NoActionError), 34 | 35 | /// Error from reduce action. 36 | /// location will be `None` if the terminal was eof. 37 | ReduceAction(ReduceActionError), 38 | 39 | /// Rule index when shift/reduce conflict occur with no shift/reduce precedence defined. 40 | /// This is same as when setting %nonassoc in Bison. 41 | /// location will be `None` if the terminal was eof. 42 | NoPrecedence(NoPrecedenceError), 43 | } 44 | 45 | impl ParseError { 46 | /// location will be `None` if the terminal was eof. 47 | pub fn location(&self) -> &Option { 48 | match self { 49 | ParseError::NoAction(err) => &err.location, 50 | ParseError::ReduceAction(err) => &err.location, 51 | ParseError::NoPrecedence(err) => &err.location, 52 | } 53 | } 54 | 55 | pub fn term(&self) -> &TerminalSymbol { 56 | match self { 57 | ParseError::NoAction(err) => &err.term, 58 | ParseError::ReduceAction(err) => &err.term, 59 | ParseError::NoPrecedence(err) => &err.term, 60 | } 61 | } 62 | 63 | pub fn state(&self) -> usize { 64 | match self { 65 | ParseError::NoAction(err) => err.state, 66 | ParseError::ReduceAction(err) => err.state, 67 | ParseError::NoPrecedence(err) => err.state, 68 | } 69 | } 70 | } 71 | 72 | impl Display for ParseError 73 | where 74 | Term: Display, 75 | ReduceAction: Display, 76 | { 77 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 78 | match self { 79 | ParseError::NoAction(err) => { 80 | write!(f, "NoAction: {}, State: {}", err.term, err.state) 81 | } 82 | ParseError::ReduceAction(err) => { 83 | write!( 84 | f, 85 | "ReduceAction: {}, State: {}\nSource: {}", 86 | err.term, err.state, err.source 87 | ) 88 | } 89 | ParseError::NoPrecedence(err) => { 90 | write!(f, "NoPrecedence: {}, State: {}", err.rule, err.state) 91 | } 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/terminal_info.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Ident; 2 | use proc_macro2::Span; 3 | 4 | use proc_macro2::TokenStream; 5 | use quote::ToTokens; 6 | 7 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] 8 | pub enum TerminalName { 9 | /// defined in %token 10 | Ident(Ident), 11 | 12 | /// defined as literal anywhere in the grammar 13 | CharRange(char, char), 14 | } 15 | impl TerminalName { 16 | pub fn count(&self) -> usize { 17 | match self { 18 | TerminalName::Ident(_) => 1, 19 | TerminalName::CharRange(s, l) => { 20 | let s = *s as usize; 21 | let l = *l as usize; 22 | l + 1 - s 23 | } 24 | } 25 | } 26 | pub fn ident(&self) -> Option<&Ident> { 27 | match self { 28 | TerminalName::Ident(ident) => Some(ident), 29 | TerminalName::CharRange(_, _) => None, 30 | } 31 | } 32 | pub fn into_ident(self) -> Option { 33 | match self { 34 | TerminalName::Ident(ident) => Some(ident), 35 | TerminalName::CharRange(_, _) => None, 36 | } 37 | } 38 | // pub fn char(&self) -> Option { 39 | // match self { 40 | // TerminalName::Ident(_) => None, 41 | // TerminalName::Char(c) => Some(*c), 42 | // } 43 | // } 44 | pub fn name(self) -> Ident { 45 | match self { 46 | TerminalName::Ident(name) => name, 47 | TerminalName::CharRange(c, _) => { 48 | let s = format!("_Terminal{}", c as u32); 49 | Ident::new(&s, Span::call_site()) 50 | } 51 | } 52 | } 53 | pub fn pretty_name(&self, is_char: bool, is_u8: bool) -> String { 54 | match self { 55 | TerminalName::Ident(ident) => ident.to_string(), 56 | TerminalName::CharRange(start, last) => { 57 | if is_char { 58 | let start_tok = syn::LitChar::new(*start, Span::call_site()).to_token_stream(); 59 | let last_tok = syn::LitChar::new(*last, Span::call_site()).to_token_stream(); 60 | if start == last { 61 | format!("{start_tok}") 62 | } else { 63 | format!("{start_tok}-{last_tok}") 64 | } 65 | } else if is_u8 { 66 | let start_tok = 67 | syn::LitByte::new(*start as u8, Span::call_site()).to_token_stream(); 68 | let last_tok = 69 | syn::LitByte::new(*last as u8, Span::call_site()).to_token_stream(); 70 | if start == last { 71 | format!("{start_tok}") 72 | } else { 73 | format!("{start_tok}-{last_tok}") 74 | } 75 | } else { 76 | unreachable!("unexpected char type") 77 | } 78 | } 79 | } 80 | } 81 | } 82 | impl From for TerminalName { 83 | fn from(ident: Ident) -> Self { 84 | TerminalName::Ident(ident) 85 | } 86 | } 87 | impl From<(char, char)> for TerminalName { 88 | fn from(c: (char, char)) -> Self { 89 | TerminalName::CharRange(c.0, c.1) 90 | } 91 | } 92 | impl From<(u32, u32)> for TerminalName { 93 | fn from(c: (u32, u32)) -> Self { 94 | let s = unsafe { char::from_u32_unchecked(c.0) }; 95 | let l = unsafe { char::from_u32_unchecked(c.1) }; 96 | TerminalName::CharRange(s, l) 97 | } 98 | } 99 | 100 | pub struct TerminalInfo { 101 | pub name: TerminalName, 102 | 103 | /// the precedence level of this terminal 104 | pub precedence: Option<(usize, Span)>, 105 | 106 | /// the actual Rust expr to be emitted 107 | pub body: TokenStream, 108 | } 109 | -------------------------------------------------------------------------------- /rusty_lr_core/src/builder/diags.rs: -------------------------------------------------------------------------------- 1 | use crate::rule::ShiftedRuleRef; 2 | 3 | use std::collections::BTreeMap; 4 | use std::collections::BTreeSet; 5 | 6 | pub struct DiagnosticCollector { 7 | pub enabled: bool, 8 | pub reduce_reduce_resolved: BTreeSet<(usize, BTreeSet, BTreeSet)>, 9 | pub shift_reduce_resolved_shift: BTreeMap< 10 | (Term, Vec), 11 | ( 12 | usize, // shift precedence 13 | BTreeMap, 14 | // (rule, reduce precedence) 15 | ), 16 | >, 17 | pub shift_reduce_resolved_reduce: BTreeMap< 18 | (Term, Vec), 19 | ( 20 | usize, // shift precedence 21 | BTreeMap, 22 | // (rule, reduce precedence) 23 | ), 24 | >, 25 | pub reduce_reduce_conflicts: BTreeMap)>, BTreeSet>, 26 | pub shift_reduce_conflicts: BTreeMap< 27 | (Term, Vec, Vec), 28 | BTreeMap>, 29 | >, 30 | } 31 | impl DiagnosticCollector { 32 | pub fn new(collect: bool) -> Self { 33 | DiagnosticCollector { 34 | enabled: collect, 35 | reduce_reduce_resolved: BTreeSet::new(), 36 | shift_reduce_resolved_shift: BTreeMap::new(), 37 | shift_reduce_resolved_reduce: BTreeMap::new(), 38 | shift_reduce_conflicts: BTreeMap::new(), 39 | reduce_reduce_conflicts: BTreeMap::new(), 40 | } 41 | } 42 | pub fn add_reduce_reduce_resolved( 43 | &mut self, 44 | max_priority: usize, 45 | reduce_rules: BTreeSet, 46 | removed_rules: BTreeSet, 47 | ) where 48 | Term: Ord, 49 | { 50 | if self.enabled { 51 | self.reduce_reduce_resolved 52 | .insert((max_priority, reduce_rules, removed_rules)); 53 | } 54 | } 55 | pub fn add_shift_reduce_resolved_shift( 56 | &mut self, 57 | term: Term, 58 | shift_rules: Vec, 59 | shift_precedence: usize, 60 | mut reduce_rules: BTreeMap, 61 | ) where 62 | Term: Ord, 63 | { 64 | if self.enabled { 65 | let value = self 66 | .shift_reduce_resolved_shift 67 | .entry((term, shift_rules)) 68 | .or_default(); 69 | value.0 = shift_precedence; 70 | value.1.append(&mut reduce_rules); 71 | } 72 | } 73 | pub fn add_shift_reduce_resolved_reduce( 74 | &mut self, 75 | term: Term, 76 | shift_rules: Vec, 77 | shift_precedence: usize, 78 | mut reduce_rules: BTreeMap, 79 | ) where 80 | Term: Ord, 81 | { 82 | if self.enabled { 83 | let value = self 84 | .shift_reduce_resolved_reduce 85 | .entry((term, shift_rules)) 86 | .or_default(); 87 | value.0 = shift_precedence; 88 | value.1.append(&mut reduce_rules); 89 | } 90 | } 91 | pub fn add_shift_reduce_conflict( 92 | &mut self, 93 | term: Term, 94 | shift_rules: Vec, 95 | shift_rules_backtrace: Vec, 96 | mut reduce_rules: BTreeMap>, 97 | ) where 98 | Term: Ord, 99 | { 100 | if self.enabled { 101 | self.shift_reduce_conflicts 102 | .entry((term, shift_rules, shift_rules_backtrace)) 103 | .or_default() 104 | .append(&mut reduce_rules); 105 | } 106 | } 107 | pub fn update_reduce_reduce_conflict( 108 | &mut self, 109 | reduce_rules: Vec<(usize, Vec)>, 110 | term: Term, 111 | ) where 112 | Term: Ord, 113 | { 114 | if self.enabled { 115 | self.reduce_reduce_conflicts 116 | .entry(reduce_rules) 117 | .or_default() 118 | .insert(term); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /scripts/bootstrap_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Bootstrap rusty_lr_parser/src/parser/parser.rs with different configurations 4 | # and check if the output files are identical. 5 | 6 | compare_files() { 7 | # Check if both arguments are provided 8 | if [ $# -ne 2 ]; then 9 | echo "Usage: compare_files " 10 | return 1 11 | fi 12 | 13 | local file1="$1" 14 | local file2="$2" 15 | 16 | # Check if both files exist 17 | if [ ! -f "$file1" ]; then 18 | echo "Error: File '$file1' does not exist" 19 | return 1 20 | fi 21 | 22 | if [ ! -f "$file2" ]; then 23 | echo "Error: File '$file2' does not exist" 24 | return 1 25 | fi 26 | 27 | # Run diff and capture the exit code 28 | if diff "$file1" "$file2" > /dev/null; then 29 | echo "Files are identical" 30 | return 0 31 | else 32 | echo "Error: Files are different" 33 | diff "$file1" "$file2" 34 | return 1 35 | fi 36 | } 37 | 38 | # Get the directory where this script is located and go up one level 39 | script_dir="$(dirname "$0")" 40 | rustylr_path="$(realpath "$script_dir/..")" 41 | is_from_github_actions=$1 42 | 43 | process_and_compare() { 44 | local config="$1" 45 | cargo run --bin rustylr -- "$rustylr_path/rusty_lr_parser/src/parser/parser.rs" out.tab.rs $config > /dev/null 46 | mv out.tab.rs "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs" 47 | cargo run --bin rustylr -- "$rustylr_path/rusty_lr_parser/src/parser/parser.rs" out.tab.rs $config > /dev/null 48 | compare_files "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs" out.tab.rs 49 | if [ $? -ne 0 ]; then 50 | exit 1 51 | fi 52 | } 53 | 54 | echo "RustyLR path: $rustylr_path" 55 | 56 | 57 | # to briefly see the difference of the generated parser in the PR, run for the sample calculator and json parsers 58 | cargo run --bin rustylr -- "$rustylr_path/example/calculator/src/parser.rs" "$rustylr_path/scripts/diff/calculator_new.rs" > /dev/null 59 | cargo run --bin rustylr -- "$rustylr_path/example/calculator_u8/src/parser.rs" "$rustylr_path/scripts/diff/calculator_u8_new.rs" > /dev/null 60 | cargo run --bin rustylr -- "$rustylr_path/example/json/src/parser.rs" "$rustylr_path/scripts/diff/json_new.rs" > /dev/null 61 | if [ "$is_from_github_actions" = "true" ]; then 62 | diff "$rustylr_path/scripts/diff/calculator.rs" "$rustylr_path/scripts/diff/calculator_new.rs" >/dev/null 63 | if [ $? -ne 0 ]; then 64 | echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes." 65 | exit 1 66 | fi 67 | 68 | diff "$rustylr_path/scripts/diff/calculator_u8.rs" "$rustylr_path/scripts/diff/calculator_u8_new.rs" >/dev/null 69 | if [ $? -ne 0 ]; then 70 | echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes." 71 | exit 1 72 | fi 73 | 74 | diff "$rustylr_path/scripts/diff/json.rs" "$rustylr_path/scripts/diff/json_new.rs" >/dev/null 75 | if [ $? -ne 0 ]; then 76 | echo "scripts/diff/ is not updated. Please run scripts/bootstrap_test.sh locally and commit the changes." 77 | exit 1 78 | fi 79 | fi 80 | 81 | mv "$rustylr_path/scripts/diff/calculator_new.rs" "$rustylr_path/scripts/diff/calculator.rs" 82 | mv "$rustylr_path/scripts/diff/calculator_u8_new.rs" "$rustylr_path/scripts/diff/calculator_u8.rs" 83 | mv "$rustylr_path/scripts/diff/json_new.rs" "$rustylr_path/scripts/diff/json.rs" 84 | 85 | echo "Setting Dense = false, GLR = false" 86 | process_and_compare "--dense false --glr false" 87 | 88 | echo "Setting Dense = false, GLR = true" 89 | process_and_compare "--dense false --glr true" 90 | 91 | echo "Setting Dense = true, GLR = false" 92 | process_and_compare "--dense true --glr false" 93 | 94 | echo "Setting Dense = true, GLR = true" 95 | process_and_compare "--dense true --glr true" 96 | 97 | echo "Normal configuration" 98 | process_and_compare "" 99 | mv out.tab.rs "$rustylr_path/rusty_lr_parser/src/parser/parser_expanded.rs" 100 | 101 | cargo test --bin glr 102 | if [ $? -ne 0 ]; then 103 | exit 1 104 | fi 105 | 106 | echo "All tests passed." -------------------------------------------------------------------------------- /rusty_lr_derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides a procedural macro to generate a parser from a grammar. 2 | //! 3 | //! This crate is private and not intended to be used directly. 4 | //! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead. 5 | 6 | use proc_macro::TokenStream; 7 | use quote::quote; 8 | 9 | /// Build a parser table from a grammar definition. 10 | /// 11 | /// This macro will generate a `Parser` and `Context` structs. 12 | #[proc_macro] 13 | pub fn lr1(input: TokenStream) -> TokenStream { 14 | let input = input.into(); 15 | use rusty_lr_parser::grammar::Grammar; 16 | let mut grammar_args = match Grammar::parse_args(input) { 17 | Ok(grammar_args) => grammar_args, 18 | Err(e) => return e.to_compile_error().into(), 19 | }; 20 | match Grammar::arg_check_error(&mut grammar_args) { 21 | Ok(_) => {} 22 | Err(e) => return e.to_compile_error().into(), 23 | } 24 | 25 | // If there are any errors in the grammar arguments, emit compile errors. 26 | if !grammar_args.error_recovered.is_empty() { 27 | let mut output = proc_macro2::TokenStream::new(); 28 | for error in &grammar_args.error_recovered { 29 | let span = error.span.span(); 30 | let message = format!("{}\n >>> refer to: {}", error.message, error.link,); 31 | output.extend(quote::quote_spanned! { 32 | span=> 33 | compile_error!(#message); 34 | }); 35 | } 36 | return output.into(); 37 | } 38 | 39 | let mut grammar = match Grammar::from_grammar_args(grammar_args) { 40 | Ok(grammar) => grammar, 41 | Err(e) => return e.to_compile_error().into(), 42 | }; 43 | if grammar.optimize { 44 | grammar.optimize(15); 45 | } 46 | grammar.builder = grammar.create_builder(); 47 | let diags = grammar.build_grammar(); 48 | if !grammar.glr { 49 | if let Some(((term, shift_rules, _), reduce_rules)) = 50 | diags.shift_reduce_conflicts.into_iter().next() 51 | { 52 | let class_mapper = |term| grammar.class_pretty_name_list(term, 5); 53 | let nonterm_mapper = |term| grammar.nonterm_pretty_name(term); 54 | let term = class_mapper(term); 55 | let (reduce_rule, _) = reduce_rules.into_iter().next().unwrap(); 56 | let reduce_rule = grammar.builder.rules[reduce_rule] 57 | .rule 58 | .clone() 59 | .map(class_mapper, nonterm_mapper); 60 | let shift_rules = shift_rules 61 | .into_iter() 62 | .map(|rule| { 63 | format!( 64 | "\n>>> {}", 65 | grammar.builder.rules[rule.rule] 66 | .rule 67 | .clone() 68 | .map(class_mapper, nonterm_mapper) 69 | .into_shifted(rule.shifted) 70 | ) 71 | }) 72 | .collect::>() 73 | .join(""); 74 | 75 | let message = format!( 76 | "Shift-Reduce conflict with terminal symbol: {}\n>>> Reduce: {}\n>>> Shifts: {}", 77 | term, reduce_rule, shift_rules 78 | ); 79 | return quote! { 80 | compile_error!(#message); 81 | } 82 | .into(); 83 | } 84 | if let Some((reduce_rules, reduce_terms)) = diags.reduce_reduce_conflicts.into_iter().next() 85 | { 86 | let class_mapper = |term| grammar.class_pretty_name_list(term, 5); 87 | let nonterm_mapper = |term| grammar.nonterm_pretty_name(term); 88 | let terms = reduce_terms 89 | .into_iter() 90 | .map(&class_mapper) 91 | .collect::>() 92 | .join(", "); 93 | let reduce_rules = reduce_rules 94 | .into_iter() 95 | .map(|(rule, _)| { 96 | format!( 97 | "\n>>> {}", 98 | grammar.builder.rules[rule] 99 | .rule 100 | .clone() 101 | .map(class_mapper, nonterm_mapper) 102 | ) 103 | }) 104 | .collect::>() 105 | .join(""); 106 | 107 | let message = format!( 108 | "Reduce-Reduce conflict with terminal symbols: {}\n>>> Reduce: {}", 109 | terms, reduce_rules 110 | ); 111 | return quote! { 112 | compile_error!(#message); 113 | } 114 | .into(); 115 | } 116 | } 117 | 118 | grammar.emit_compiletime().into() 119 | } 120 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/nonterminal_info.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeSet; 2 | 3 | use super::token::TokenMapped; 4 | use proc_macro2::Ident; 5 | use proc_macro2::Span; 6 | use proc_macro2::TokenStream; 7 | 8 | #[derive(Clone)] 9 | pub struct CustomReduceAction { 10 | pub body: TokenStream, 11 | idents_used: BTreeSet, 12 | } 13 | 14 | impl CustomReduceAction { 15 | fn fetch_idents(set: &mut BTreeSet, ts: TokenStream) { 16 | for token in ts { 17 | match token { 18 | proc_macro2::TokenTree::Group(g) => { 19 | Self::fetch_idents(set, g.stream()); 20 | } 21 | proc_macro2::TokenTree::Ident(i) => { 22 | set.insert(i); 23 | } 24 | _ => {} 25 | } 26 | } 27 | } 28 | pub fn new(body: TokenStream) -> Self { 29 | let mut idents_used = BTreeSet::new(); 30 | Self::fetch_idents(&mut idents_used, body.clone()); 31 | Self { body, idents_used } 32 | } 33 | fn contains_ident(&self, ident: &Ident) -> bool { 34 | self.idents_used.contains(ident) 35 | } 36 | } 37 | 38 | pub enum ReduceAction { 39 | /// reduce action that is function-like TokenStream 40 | Custom(CustomReduceAction), 41 | /// reduce action that is auto-generated, and simply returns the i'th token itself 42 | Identity(usize), // index of the token in the rule 43 | } 44 | 45 | impl ReduceAction { 46 | pub fn new_custom(body: TokenStream) -> Self { 47 | ReduceAction::Custom(CustomReduceAction::new(body)) 48 | } 49 | pub fn is_identity(&self) -> bool { 50 | matches!(self, ReduceAction::Identity(_)) 51 | } 52 | pub fn is_custom(&self) -> bool { 53 | matches!(self, ReduceAction::Custom(_)) 54 | } 55 | } 56 | 57 | pub struct Rule { 58 | pub tokens: Vec, 59 | /// reduce action called when this rule is reduced 60 | pub reduce_action: Option, 61 | /// span of '|' or ':' before this production rule 62 | pub separator_span: Span, 63 | /// force lookahead tokens for this pattern. 64 | pub lookaheads: Option>, 65 | /// %prec definition 66 | pub prec: Option<(rusty_lr_core::rule::Precedence, Span)>, 67 | /// %dprec definition 68 | pub dprec: Option<(usize, Span)>, 69 | 70 | /// in `Grammar::build_grammar()`, some production rules will be optimized out and deleted 71 | pub(crate) is_used: bool, 72 | } 73 | 74 | impl Rule { 75 | pub fn span_pair(&self) -> (Span, Span) { 76 | let begin = self.separator_span; 77 | let end = if let Some(token) = self.tokens.last() { 78 | token.end_span 79 | } else { 80 | begin 81 | }; 82 | (begin, end) 83 | } 84 | pub fn reduce_action_contains_ident(&self, ident: &Ident) -> bool { 85 | match self.reduce_action.as_ref() { 86 | Some(ReduceAction::Custom(custom)) => custom.contains_ident(ident), 87 | Some(ReduceAction::Identity(identity_idx)) => { 88 | self.tokens[*identity_idx].mapto.as_ref() == Some(ident) 89 | } 90 | None => false, 91 | } 92 | } 93 | } 94 | 95 | pub struct NonTerminalInfo { 96 | pub name: Ident, 97 | 98 | /// Name of auto generated rule are in the format of `__AutoRule ...` 99 | /// So we need other abbreviation for auto generated rules. 100 | pub pretty_name: String, 101 | 102 | /// The rule type of this non-terminal 103 | pub ruletype: Option, 104 | 105 | /// Every set of production rules 106 | pub rules: Vec, 107 | 108 | /// If this non-terminal is auto-generated from regex pattern, 109 | /// the (begin, end) span-pair of the regex pattern. 110 | pub(crate) regex_span: Option<(Span, Span)>, 111 | 112 | pub(crate) trace: bool, 113 | /// protected from optimization removal; trace rules are always protected 114 | pub(crate) protected: bool, 115 | 116 | /// if this non-terminal is auto-generated, the pattern that generated this rule. 117 | /// This field is used in rusty_lr_core/tree.rs to unwrap left/right recursion parsing tree into flat array. 118 | pub(crate) nonterm_type: Option, 119 | } 120 | 121 | impl NonTerminalInfo { 122 | pub fn is_auto_generated(&self) -> bool { 123 | self.regex_span.is_some() 124 | } 125 | /// only for auto-generated rules 126 | /// returns the span of the regex pattern that generated this rule 127 | pub fn origin_span(&self) -> Option<(Span, Span)> { 128 | self.regex_span 129 | } 130 | 131 | /// if this non-terminal is protected from optimization; will not be automatically deleted 132 | pub(crate) fn is_protected(&self) -> bool { 133 | self.protected 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /GLR.md: -------------------------------------------------------------------------------- 1 | # GLR Parsing in RustyLR 2 | RustyLR supports Generalized LR (GLR) parsing, enabling it to handle ambiguous or nondeterministic grammars that traditional LR(1) or LALR(1) parsers cannot process. 3 | 4 | When a GLR parser encounters a conflict (such as shift/reduce or reduce/reduce), 5 | it forks the current parsing state into multiple branches, 6 | each representing a different possible interpretation of the input. 7 | These branches are processed in parallel, and invalid paths are pruned as parsing progresses. 8 | 9 | ## Enabling GLR Parsing 10 | To use GLR parsing in RustyLR, include the `%glr;` directive in your grammar definition. 11 | This directive instructs RustyLR to generate a GLR parser, 12 | which can handle ambiguous grammars by exploring multiple parsing paths. 13 | 14 | Once the `%glr` directive is added, any conflicts in the grammar will not be reported as errors. 15 | It's important to be aware of points in your grammar where shift/reduce or reduce/reduce conflicts occur, as each divergence increases computational complexity. 16 | 17 | **Tip:** If you are using the `rustylr` executable, you can use the `--verbose` option to see any conflicts in the grammar and their divergent paths. 18 | 19 | ## Example: Ambiguous Grammar 20 | 21 | ```rust 22 | %glr; 23 | %tokentype char; 24 | %start E; 25 | 26 | Digit(char): ['0'-'9'] ; 27 | 28 | E(i32): E '+' E { E + E } 29 | | E '*' E { E * E } 30 | | Digit { Digit.to_digit(10).unwrap() as i32 }; 31 | ``` 32 | 33 | In this grammar, the expression `1 + 2 * 3 + 4` has multiple valid parse trees due to the ambiguity in operator precedence and associativity: 34 | - `((1 + 2) * 3) + 4` 35 | - `(1 + (2 * 3)) + 4` 36 | - `1 + ((2 * 3) + 4)` 37 | - `1 + (2 * (3 + 4))` 38 | - `(1 + 2) * (3 + 4)` 39 | 40 | The GLR parser will explore all possible parsing paths to construct the parse forest. 41 | 42 | ## Resolving Ambiguities 43 | RustyLR allows you to resolve ambiguities dynamically within reduce actions. 44 | Simply returning `Err` from a reduce action will prune the current branch of the parse tree. 45 | By inspecting the lookahead token or other context, you can decide whether to proceed with a particular reduction. 46 | 47 | For example, to enforce operator precedence (e.g., `*` has higher precedence than `+`), you can modify the reduce actions as follows: 48 | 49 | ```rust 50 | E : E '+' E { 51 | match *lookahead.to_term().unwrap() { 52 | '*' => { 53 | // Don't reduce if the next token is '*' 54 | // This prevents: 55 | // E + E / * 56 | // ^ lookahead 57 | // from becoming: E * ... 58 | // ^ (E + E) 59 | return Err("".to_string()); 60 | } 61 | _ => { 62 | // Revoke the shift action 63 | // This prevents: 64 | // E + E / + 65 | // ^ lookahead 66 | // from becoming: E + E + ... 67 | // and enforces only the reduce action: 68 | // E + ... 69 | // ^ (E + E) 70 | *shift = false; 71 | } 72 | } 73 | E + E // Return the result of the addition 74 | } 75 | ``` 76 | 77 | ### Predefined Variables in Reduce Actions 78 | - `lookahead: &TerminalSymbol` - refers to the next token in the input stream. either a terminal fed by the user or an special token like `error` 79 | - `shift: &mut bool` - controls whether a shift action should be performed 80 | 81 | ### Ambiguity Resolution Rules 82 | - Returning `Err` from the reduce action will discard the current parsing path 83 | - Setting `*shift = false;` prevents the parser from performing a shift action, enforcing the desired reduction 84 | 85 | ## Parsing with the GLR Parser 86 | RustyLR provides a consistent parsing interface for both deterministic and GLR parsers. 87 | After generating the parser, you can feed tokens to the parser context and retrieve the parsing results. 88 | 89 | ```rust 90 | let parser = EParser::new(); // Create Parser instance 91 | let mut context = EContext::new(); // Create Context instance 92 | 93 | for token in input_sequence { 94 | match context.feed(&parser, token) { 95 | Ok(_) => {} 96 | Err(e) => { 97 | println!("Parse error: {}", e); 98 | return; 99 | } 100 | } 101 | } 102 | 103 | // Retrieve all possible parse results 104 | for result in context.accept(&parser).unwrap() { 105 | println!("Parse result: {:?}", result); 106 | } 107 | ``` 108 | 109 | ### Key Components: 110 | - `EParser::new()` - Creates a new parser instance 111 | - `EContext::new()` - Initializes the parsing context 112 | - `context.feed(&parser, token)` - Feeds tokens to the parser 113 | - `context.accept(&parser)` - Returns all possible values of the `%start` symbol from every parse path -------------------------------------------------------------------------------- /rusty_lr_parser/src/rangeresolver.rs: -------------------------------------------------------------------------------- 1 | pub struct RangeResolver { 2 | ranges: Vec<(u32, u32)>, 3 | } 4 | 5 | impl RangeResolver { 6 | pub fn new() -> Self { 7 | RangeResolver { ranges: Vec::new() } 8 | } 9 | pub fn insert(&mut self, mut start: u32, last: u32) { 10 | let mut new_ranges = Vec::new(); 11 | let mut added = false; 12 | for (idx, &(s, l)) in self.ranges.iter().enumerate() { 13 | if l < start { 14 | new_ranges.push((s, l)); 15 | continue; 16 | } 17 | if s > last { 18 | if !added { 19 | new_ranges.push((start, last)); 20 | } 21 | new_ranges.extend_from_slice(&self.ranges[idx..]); 22 | added = true; 23 | break; 24 | } 25 | 26 | // must overlap here 27 | 28 | use std::cmp::Ordering; 29 | match start.cmp(&s) { 30 | Ordering::Less => { 31 | match last.cmp(&l) { 32 | Ordering::Less => { 33 | // nnnnnnn 34 | // rrrrr 35 | new_ranges.push((start, s - 1)); 36 | new_ranges.push((s, last)); 37 | new_ranges.push((last + 1, l)); 38 | added = true; 39 | } 40 | Ordering::Equal => { 41 | // nnnnnn 42 | // rrr 43 | new_ranges.push((start, s - 1)); 44 | new_ranges.push((s, l)); 45 | added = true; 46 | } 47 | Ordering::Greater => { 48 | // nnnnnnn 49 | // rrr 50 | new_ranges.push((start, s - 1)); 51 | new_ranges.push((s, l)); 52 | start = l + 1; 53 | } 54 | } 55 | } 56 | Ordering::Equal => { 57 | match last.cmp(&l) { 58 | Ordering::Less => { 59 | // nnnn 60 | // rrrrrr 61 | new_ranges.push((start, last)); 62 | added = true; 63 | new_ranges.push((last + 1, l)); 64 | } 65 | Ordering::Equal => { 66 | // nnnn 67 | // rrrr 68 | new_ranges.push((start, last)); 69 | added = true; 70 | } 71 | Ordering::Greater => { 72 | // nnnnnnnn 73 | // rrrr 74 | new_ranges.push((s, l)); 75 | start = l + 1; 76 | } 77 | } 78 | } 79 | Ordering::Greater => { 80 | // start > s 81 | match last.cmp(&l) { 82 | Ordering::Less => { 83 | // nnnnn 84 | // rrrrrrrrr 85 | new_ranges.push((s, start - 1)); 86 | new_ranges.push((start, last)); 87 | new_ranges.push((last + 1, l)); 88 | added = true; 89 | } 90 | Ordering::Equal => { 91 | // nnnnnnn 92 | // rrrrrrrrr 93 | new_ranges.push((s, start - 1)); 94 | new_ranges.push((start, last)); 95 | added = true; 96 | } 97 | Ordering::Greater => { 98 | // nnnnnnnnn 99 | // rrrrrrrrr 100 | new_ranges.push((s, start - 1)); 101 | new_ranges.push((start, l)); 102 | start = l + 1; 103 | } 104 | } 105 | } 106 | } 107 | } 108 | if !added { 109 | new_ranges.push((start, last)); 110 | } 111 | 112 | self.ranges = new_ranges; 113 | } 114 | 115 | pub fn get_ranges(&self, start: u32, last: u32) -> impl Iterator { 116 | let first_idx = match self.ranges.binary_search(&(start, start)) { 117 | Ok(idx) => idx, 118 | Err(idx) => idx, 119 | }; 120 | let end_idx = match self.ranges.binary_search(&(last, last)) { 121 | Ok(idx) => idx + 1, 122 | Err(idx) => idx, 123 | }; 124 | first_idx..end_idx 125 | } 126 | 127 | pub fn iter(&self) -> impl Iterator + '_ { 128 | self.ranges.iter().copied() 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /rusty_lr_executable/README.md: -------------------------------------------------------------------------------- 1 | # rustylr 2 | Executable for rusty_lr, a bison-like parser generator & compiler frontend for Rust supporting IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing. 3 | 4 | 5 | ## Installation 6 | ```bash 7 | cargo install rustylr 8 | ``` 9 | 10 | ## Usage 11 | ```bash 12 | $ rustylr --help 13 | Usage: rustylr [OPTIONS] [OUTPUT_FILE] 14 | 15 | Arguments: 16 | 17 | Input_file to read 18 | 19 | [OUTPUT_FILE] 20 | Output_file to write 21 | 22 | [default: out.tab.rs] 23 | 24 | Options: 25 | --no-format 26 | Do not rustfmt the output 27 | 28 | -c, --no-conflict 29 | Do not print note information about any shift/reduce, reduce/reduce conflicts. 30 | 31 | If the target is deterministic parser, conflict will be treated as an error, so this option will be ignored. This option is only for non-deterministic GLR parser. 32 | 33 | -r, --no-conflict-resolve 34 | Do not print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive 35 | 36 | -o, --no-optimization 37 | Do not print debug information about optimization process 38 | 39 | -b, --no-backtrace 40 | Do not print backtrace of production rules when conflicts occurred. ruleset could be messed up 41 | 42 | --glr 43 | Override the written code and set generated parser use GLR parsing algorithm 44 | 45 | [possible values: true, false] 46 | 47 | --dense 48 | Override the written code and set generated parser table to use dense arrays 49 | 50 | [possible values: true, false] 51 | 52 | --state 53 | Print the details of a specific state 54 | 55 | -h, --help 56 | Print help (see a summary with '-h') 57 | 58 | -V, --version 59 | Print version 60 | ``` 61 | 62 | ## Grammar File Format 63 | The program searches for `%%` in the input file to separate Rust code from grammar definitions. 64 | 65 | - **Before `%%`**: Regular Rust code (imports, type definitions, etc.) that will be copied to the output file as-is 66 | - **After `%%`**: Context-free grammar definition that must follow the [RustyLR syntax](../SYNTAX.md) 67 | 68 | ## Example 69 | 70 | Here's a simple example showing how to create a grammar file and generate a parser: 71 | 72 | **Input file** (`my_grammar.rs`): 73 | ```rust 74 | // Rust imports and type definitions 75 | use std::collections::HashMap; 76 | 77 | #[derive(Debug, Clone)] 78 | pub enum Token { 79 | Identifier(String), 80 | Number(i32), 81 | Punct(char), 82 | } 83 | 84 | %% // Grammar definition starts here 85 | 86 | %tokentype Token; 87 | %start E; 88 | 89 | %token id Token::Identifier(_); 90 | %token num Token::Number(_); 91 | %token lparen Token::Punct('('); 92 | %token rparen Token::Punct(')'); 93 | 94 | E: lparen E rparen { E } 95 | | id { 96 | if let Token::Identifier(name) = id { 97 | println!("Found identifier: {}", name); 98 | } 99 | } 100 | | num { 101 | if let Token::Number(value) = num { 102 | println!("Found number: {}", value); 103 | } 104 | } 105 | ; 106 | ``` 107 | 108 | **Generate the parser:** 109 | ```bash 110 | $ rustylr my_grammar.rs my_parser.rs 111 | ``` 112 | 113 | This will create `my_parser.rs` containing the generated parser code. 114 | 115 | **Using the generated parser:** 116 | ```rust 117 | include!("my_parser.rs"); 118 | 119 | fn main() { 120 | let parser = EParser::new(); 121 | let mut context = EContext::new(); 122 | 123 | // Parse some tokens 124 | let tokens = vec![ 125 | Token::Punct('('), 126 | Token::Identifier("hello".to_string()), 127 | Token::Punct(')'), 128 | ]; 129 | 130 | for token in tokens { 131 | match context.feed(&parser, token, &mut ()) { 132 | Ok(_) => println!("Token accepted"), 133 | Err(e) => println!("Parse error: {}", e), 134 | } 135 | } 136 | 137 | // Get the final result 138 | if let Ok(result) = context.accept( &parser, &mut () ) { 139 | println!("Parse successful: {:?}", result); 140 | } 141 | } 142 | ``` 143 | 144 | ## The `--state` Option 145 | You can inspect the details of a specific parser state using the `--state` option. This is useful for debugging and understanding how the parser processes input. 146 | 147 | ```bash 148 | $ rustylr my_grammar.rs --state 5 // get details of state 5 149 | ``` 150 | ``` 151 | State 5: 152 | Production Rules: { 153 | Pattern -> TerminalSet • 154 | } 155 | Reduce on Terminals: { 156 | ident => { Pattern -> TerminalSet } 157 | semicolon => { Pattern -> TerminalSet } 158 | pipe => { Pattern -> TerminalSet } 159 | percent => { Pattern -> TerminalSet } 160 | plus => { Pattern -> TerminalSet } 161 | star => { Pattern -> TerminalSet } 162 | question => { Pattern -> TerminalSet } 163 | minus => { Pattern -> TerminalSet } 164 | exclamation => { Pattern -> TerminalSet } 165 | slash => { Pattern -> TerminalSet } 166 | dot => { Pattern -> TerminalSet } 167 | dollar => { Pattern -> TerminalSet } 168 | comma => { Pattern -> TerminalSet } 169 | literal => { Pattern -> TerminalSet } 170 | bracegroup => { Pattern -> TerminalSet } 171 | lparen => { Pattern -> TerminalSet } 172 | rparen => { Pattern -> TerminalSet } 173 | lbracket => { Pattern -> TerminalSet } 174 | error => { Pattern -> TerminalSet } 175 | } 176 | From States: { 177 | State 4 178 | State 6 179 | State 11 180 | State 13 181 | State 35 182 | State 38 183 | State 40 184 | State 44 185 | State 48 186 | State 66 187 | State 70 188 | } 189 | ``` 190 | 191 | For more usage examples and detailed documentation, see the [main README](../README.md). -------------------------------------------------------------------------------- /rusty_lr_parser/src/terminalset.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Ident; 2 | use proc_macro2::Literal; 3 | use proc_macro2::Span; 4 | use quote::ToTokens; 5 | 6 | use std::collections::BTreeSet; 7 | 8 | use crate::error::ParseError; 9 | use crate::grammar::Grammar; 10 | use crate::terminal_info::TerminalName; 11 | 12 | #[derive(Debug, Clone)] 13 | pub enum TerminalSetItem { 14 | Terminal(Ident), 15 | Range(Ident, Ident), 16 | Literal(Literal), 17 | LiteralRange(Literal, Literal), 18 | } 19 | 20 | impl std::fmt::Display for TerminalSetItem { 21 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 22 | match self { 23 | TerminalSetItem::Terminal(ident) => write!(f, "{}", ident), 24 | TerminalSetItem::Range(first, last) => write!(f, "{}-{}", first, last), 25 | TerminalSetItem::Literal(literal) => write!(f, "{}", literal), 26 | TerminalSetItem::LiteralRange(first, last) => write!(f, "{}-{}", first, last), 27 | } 28 | } 29 | } 30 | 31 | impl TerminalSetItem { 32 | pub fn to_terminal_set(&self, grammar: &mut Grammar) -> Result, ParseError> { 33 | match self { 34 | TerminalSetItem::Terminal(terminal) => { 35 | if let Some(idx) = grammar 36 | .terminals_index 37 | .get(&TerminalName::Ident(terminal.clone())) 38 | { 39 | Ok(BTreeSet::from([*idx])) 40 | } else { 41 | Err(ParseError::TerminalNotDefined(terminal.clone())) 42 | } 43 | } 44 | TerminalSetItem::Range(first, last) => { 45 | let first_index = match grammar 46 | .terminals_index 47 | .get(&TerminalName::Ident(first.clone())) 48 | { 49 | Some(f) => f, 50 | None => return Err(ParseError::TerminalNotDefined(first.clone())), 51 | }; 52 | let last_index = match grammar 53 | .terminals_index 54 | .get(&TerminalName::Ident(last.clone())) 55 | { 56 | Some(l) => l, 57 | None => return Err(ParseError::TerminalNotDefined(last.clone())), 58 | }; 59 | if last_index < first_index { 60 | return Err(ParseError::InvalidTerminalRange( 61 | ( 62 | first.clone(), 63 | *first_index, 64 | grammar.terminals[*first_index].body.clone(), 65 | ), 66 | ( 67 | last.clone(), 68 | *last_index, 69 | grammar.terminals[*last_index].body.clone(), 70 | ), 71 | )); 72 | } 73 | Ok((*first_index..=*last_index).collect()) 74 | } 75 | TerminalSetItem::Literal(literal) => { 76 | let lit = syn::parse2::(literal.to_token_stream()) 77 | .expect("failed on syn::parse2"); 78 | let val = grammar.get_char_value(&lit)?; 79 | let name: TerminalName = (val, val).into(); 80 | let idx = *grammar.terminals_index.get(&name).unwrap(); 81 | Ok(BTreeSet::from([idx])) 82 | } 83 | TerminalSetItem::LiteralRange(first_l, last_l) => { 84 | let first = syn::parse2::(first_l.to_token_stream()) 85 | .expect("failed on syn::parse2"); 86 | let first_ch = grammar.get_char_value(&first)?; 87 | 88 | let last = syn::parse2::(last_l.to_token_stream()) 89 | .expect("failed on syn::parse2"); 90 | let last_ch = grammar.get_char_value(&last)?; 91 | if first_ch > last_ch { 92 | return Err(ParseError::InvalidLiteralRange( 93 | first_l.clone(), 94 | last_l.clone(), 95 | )); 96 | } 97 | 98 | let set: BTreeSet = grammar 99 | .get_terminal_indices_from_char_range(first_ch, last_ch) 100 | .collect(); 101 | Ok(set) 102 | } 103 | } 104 | } 105 | pub fn range_resolve(&self, grammar: &mut Grammar) -> Result<(), ParseError> { 106 | match self { 107 | TerminalSetItem::Terminal(_) => Ok(()), 108 | TerminalSetItem::Range(_, _) => Ok(()), 109 | TerminalSetItem::Literal(literal) => { 110 | let lit = syn::parse2::(literal.to_token_stream()) 111 | .expect("failed on syn::parse2"); 112 | let val = grammar.get_char_value(&lit)?; 113 | grammar.range_resolver.insert(val, val); 114 | Ok(()) 115 | } 116 | TerminalSetItem::LiteralRange(first_l, last_l) => { 117 | let first = syn::parse2::(first_l.to_token_stream()) 118 | .expect("failed on syn::parse2"); 119 | let first_ch = grammar.get_char_value(&first)?; 120 | 121 | let last = syn::parse2::(last_l.to_token_stream()) 122 | .expect("failed on syn::parse2"); 123 | let last_ch = grammar.get_char_value(&last)?; 124 | if first_ch > last_ch { 125 | return Err(ParseError::InvalidLiteralRange( 126 | first_l.clone(), 127 | last_l.clone(), 128 | )); 129 | } 130 | grammar.range_resolver.insert(first_ch, last_ch); 131 | Ok(()) 132 | } 133 | } 134 | } 135 | } 136 | 137 | #[derive(Debug, Clone)] 138 | pub struct TerminalSet { 139 | pub negate: bool, 140 | pub items: Vec, 141 | // '[' 142 | pub open_span: Span, 143 | // ']' 144 | pub close_span: Span, 145 | } 146 | impl TerminalSet { 147 | // in case of negation, `include_eof` is true if the final terminal set contains eof 148 | pub fn to_terminal_set( 149 | &self, 150 | grammar: &mut Grammar, 151 | ) -> Result<(bool, BTreeSet), ParseError> { 152 | let mut terminal_set = BTreeSet::new(); 153 | for item in &self.items { 154 | let mut item_set = item.to_terminal_set(grammar)?; 155 | terminal_set.append(&mut item_set); 156 | } 157 | Ok((self.negate, terminal_set)) 158 | } 159 | pub fn range_resolve(&self, grammar: &mut Grammar) -> Result<(), ParseError> { 160 | for item in &self.items { 161 | item.range_resolve(grammar)?; 162 | } 163 | Ok(()) 164 | } 165 | } 166 | 167 | impl std::fmt::Display for TerminalSet { 168 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 169 | write!(f, "[")?; 170 | if self.negate { 171 | write!(f, "^")?; 172 | } 173 | for (i, item) in self.items.iter().enumerate() { 174 | if i > 0 { 175 | write!(f, ", ")?; 176 | } 177 | write!(f, "{}", item)?; 178 | } 179 | write!(f, "]") 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /rusty_lr_executable/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | use std::fs::write; 4 | 5 | mod arg; 6 | 7 | fn main() { 8 | let args = match arg::Args::try_parse() { 9 | Ok(args) => args, 10 | Err(e) => { 11 | eprintln!("{}", e); 12 | return; 13 | } 14 | }; 15 | 16 | let mut builder = rusty_lr_buildscript::Builder::new(); 17 | builder.is_executable = true; 18 | builder.file(&args.input_file); 19 | if args.no_conflict { 20 | builder.note_conflicts(false); 21 | } 22 | if args.no_conflict_resolve { 23 | builder.note_conflicts_resolving(false); 24 | } 25 | if args.no_optimization { 26 | builder.note_optimization(false); 27 | } 28 | if args.no_backtrace { 29 | builder.note_backtrace(false); 30 | } 31 | if let Some(glr) = args.glr { 32 | builder.glr(glr); 33 | } 34 | if let Some(dense) = args.dense { 35 | builder.dense(dense); 36 | } 37 | 38 | let out = match builder.build_impl() { 39 | Ok(out) => out, 40 | Err(_) => { 41 | return; 42 | } 43 | }; 44 | 45 | if let Some(state_idx) = args.state { 46 | let term_class_map = |term| out.grammar.class_pretty_name_list(term, 4); 47 | let nonterm_map = |nonterm| out.grammar.nonterm_pretty_name(nonterm); 48 | 49 | if let Some(state) = out.grammar.states.get(state_idx) { 50 | let mut from_states = Vec::new(); 51 | for (i, s) in out.grammar.states.iter().enumerate() { 52 | if s.shift_goto_map_term 53 | .iter() 54 | .any(|(_, t)| t.state == state_idx) 55 | || s.shift_goto_map_nonterm 56 | .iter() 57 | .any(|(_, t)| t.state == state_idx) 58 | { 59 | from_states.push(i); 60 | } 61 | } 62 | 63 | let mut similar_states = Vec::new(); 64 | for (i, s) in out.grammar.states.iter().enumerate() { 65 | if state.ruleset == s.ruleset && i != state_idx { 66 | similar_states.push(i); 67 | } 68 | } 69 | 70 | println!("State {state_idx}:"); 71 | println!("Production Rules: {{"); 72 | for rule in &state.ruleset { 73 | let rule = out.grammar.builder.rules[rule.rule] 74 | .rule 75 | .clone() 76 | .map(&term_class_map, &nonterm_map) 77 | .into_shifted(rule.shifted); 78 | println!(" {}", rule); 79 | } 80 | println!("}}"); 81 | if !state.shift_goto_map_term.is_empty() { 82 | println!("Shift/Goto on Terminals: {{"); 83 | for (term, target) in &state.shift_goto_map_term { 84 | let term = term_class_map(*term); 85 | println!(" {term:>4} => State {}", target.state); 86 | } 87 | println!("}}"); 88 | } 89 | if !state.shift_goto_map_nonterm.is_empty() { 90 | println!("Shift/Goto on Non-Terminals: {{"); 91 | for (nonterm, target) in &state.shift_goto_map_nonterm { 92 | let nonterm = nonterm_map(*nonterm); 93 | println!(" {nonterm:>4} => State {}", target.state); 94 | } 95 | println!("}}"); 96 | } 97 | if !state.reduce_map.is_empty() { 98 | println!("Reduce on Terminals: {{"); 99 | for (term, rules) in &state.reduce_map { 100 | let term = term_class_map(*term); 101 | let preline = format!(" {term:>4} => {{ "); 102 | let mut rules = rules 103 | .iter() 104 | .map(|rule| { 105 | out.grammar.builder.rules[*rule] 106 | .rule 107 | .clone() 108 | .map(&term_class_map, &nonterm_map) 109 | .to_string() 110 | }) 111 | .collect::>(); 112 | for r in rules.iter_mut().skip(1) { 113 | *r = format!("\n{}{}", " ".repeat(preline.len()), r); 114 | } 115 | 116 | println!("{}{} }}", preline, rules.join("")); 117 | } 118 | println!("}}"); 119 | } 120 | if !from_states.is_empty() { 121 | println!("From States: {{"); 122 | for from_state in from_states { 123 | println!(" State {}", from_state); 124 | } 125 | println!("}}"); 126 | } 127 | if !similar_states.is_empty() { 128 | println!("Similar States: {{"); 129 | for similar_state in similar_states { 130 | println!(" State {}", similar_state); 131 | } 132 | println!("}}"); 133 | } 134 | } else { 135 | println!("State {state_idx} does not exist."); 136 | } 137 | } 138 | 139 | let (major, minor, patch) = rusty_lr_buildscript::target_rusty_lr_version(); 140 | println!(">> The generated code is targeting rusty_lr version {major}.{minor}.x."); 141 | println!(">> There might be a build error if the version is not matched."); 142 | 143 | // format the generated code 144 | let user_code = if args.no_format { 145 | out.user_stream.to_string() 146 | } else { 147 | match syn::parse2(out.user_stream.clone()) { 148 | Ok(file) => prettyplease::unparse(&file), 149 | Err(e) => { 150 | eprintln!("Error parsing user code: {}", e); 151 | out.user_stream.to_string() 152 | } 153 | } 154 | }; 155 | let generated_code = if args.no_format { 156 | out.generated_stream.to_string() 157 | } else { 158 | match syn::parse2(out.generated_stream.clone()) { 159 | Ok(file) => prettyplease::unparse(&file), 160 | Err(e) => { 161 | eprintln!("Error parsing generated code: {}", e); 162 | out.generated_stream.to_string() 163 | } 164 | } 165 | }; 166 | 167 | let this_name = env!("CARGO_PKG_NAME"); 168 | let this_version = env!("CARGO_PKG_VERSION"); 169 | let output_string = format!( 170 | r#" 171 | // This file was generated by {} {} 172 | // This generated code is targeting rusty_lr version {major}.{minor}.{patch}. 173 | // There might be a build error if the version is not matched. 174 | // 175 | // {:=^80} 176 | {} 177 | // {:=^80} 178 | /* 179 | {} 180 | */ 181 | // {:=^80} 182 | {} 183 | // {:=^80} 184 | "#, 185 | this_name, 186 | this_version, 187 | "User Codes Begin", 188 | user_code, 189 | "User Codes End", 190 | out.debug_comments, 191 | "Generated Codes Begin", 192 | generated_code, 193 | "Generated Codes End" 194 | ); 195 | match write(args.output_file.clone(), output_string) { 196 | Ok(_) => {} 197 | Err(e) => { 198 | eprintln!("Error writing output file: {}", e); 199 | return; 200 | } 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /rusty_lr_core/src/rule.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::collections::BTreeSet; 3 | 4 | use std::fmt::Debug; 5 | use std::fmt::Display; 6 | 7 | use crate::token::Token; 8 | 9 | /// For resolving shift/reduce conflict 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 11 | pub enum ReduceType { 12 | /// reduce to the left, i.e. reduce first 13 | Left, 14 | /// reduce to the right, i.e. shift first 15 | Right, 16 | } 17 | impl std::fmt::Display for ReduceType { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | match self { 20 | ReduceType::Left => write!(f, "Left"), 21 | ReduceType::Right => write!(f, "Right"), 22 | } 23 | } 24 | } 25 | 26 | /// Operator precedence for production rules 27 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] 28 | pub enum Precedence { 29 | /// fixed precedence level 30 | Fixed(usize), // precedence level 31 | 32 | /// get precedence from i'th child token; for runtime conflict resolution 33 | Dynamic(usize), // token index 34 | } 35 | 36 | // Production rule. 37 | // 38 | // name -> Token0 Token1 Token2 ... 39 | #[derive(Clone, Default)] 40 | pub struct ProductionRule { 41 | pub name: NonTerm, 42 | pub rule: Vec>, 43 | pub precedence: Option, 44 | } 45 | impl Display for ProductionRule { 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 47 | write!(f, "{} -> ", self.name)?; 48 | for (id, token) in self.rule.iter().enumerate() { 49 | write!(f, "{}", token)?; 50 | if id < self.rule.len() - 1 { 51 | write!(f, " ")?; 52 | } 53 | } 54 | Ok(()) 55 | } 56 | } 57 | impl Debug for ProductionRule { 58 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 59 | write!(f, "{:?} -> ", self.name)?; 60 | for (id, token) in self.rule.iter().enumerate() { 61 | write!(f, "{:?}", token)?; 62 | if id < self.rule.len() - 1 { 63 | write!(f, " ")?; 64 | } 65 | } 66 | 67 | if let Some(prec) = self.precedence { 68 | write!(f, " [prec: {:?}]", prec)?; 69 | } 70 | Ok(()) 71 | } 72 | } 73 | 74 | impl ProductionRule { 75 | /// Map terminal and non-terminal symbols to another type. 76 | /// This is useful when exporting & importing rules. 77 | pub fn map( 78 | self, 79 | term_map: impl Fn(Term) -> NewTerm, 80 | nonterm_map: impl Fn(NonTerm) -> NewNonTerm, 81 | ) -> ProductionRule { 82 | ProductionRule { 83 | name: nonterm_map(self.name), 84 | rule: self 85 | .rule 86 | .into_iter() 87 | .map(move |token| match token { 88 | Token::Term(term) => Token::Term(term_map(term)), 89 | Token::NonTerm(nonterm) => Token::NonTerm(nonterm_map(nonterm)), 90 | }) 91 | .collect(), 92 | precedence: self.precedence, 93 | } 94 | } 95 | 96 | /// shift this rule 97 | pub fn into_shifted(self, shifted: usize) -> ShiftedRule { 98 | ShiftedRule { 99 | rule: self, 100 | shifted, 101 | } 102 | } 103 | } 104 | 105 | /// A struct for single shifted named production rule. 106 | /// 107 | /// name -> Token1 Token2 . Token3 108 | /// 109 | /// ^^^^^^^^^^^^^ shifted = 2 110 | /// 111 | /// This struct has index of the Rule in Grammar::rules 112 | /// and it will be used for Eq, Ord, Hash 113 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Default)] 114 | pub struct ShiftedRuleRef { 115 | /// index of the rule in `rules` 116 | pub rule: usize, 117 | /// shifted index 118 | pub shifted: usize, 119 | } 120 | 121 | #[derive(Clone, Default)] 122 | pub struct ShiftedRule { 123 | pub rule: ProductionRule, 124 | pub shifted: usize, 125 | } 126 | impl Display for ShiftedRule { 127 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 128 | write!(f, "{} -> ", self.rule.name)?; 129 | for (id, token) in self.rule.rule.iter().enumerate() { 130 | if id == self.shifted { 131 | write!(f, "• ")?; 132 | } 133 | write!(f, "{}", token)?; 134 | if id < self.rule.rule.len() - 1 { 135 | write!(f, " ")?; 136 | } 137 | } 138 | if self.shifted == self.rule.rule.len() { 139 | write!(f, " •")?; 140 | } 141 | 142 | if let Some(prec) = self.rule.precedence { 143 | write!(f, " [prec: {:?}]", prec)?; 144 | } 145 | Ok(()) 146 | } 147 | } 148 | impl Debug for ShiftedRule { 149 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 150 | write!(f, "{:?} -> ", self.rule.name)?; 151 | for (id, token) in self.rule.rule.iter().enumerate() { 152 | if id == self.shifted { 153 | write!(f, "• ")?; 154 | } 155 | write!(f, "{:?}", token)?; 156 | if id < self.rule.rule.len() - 1 { 157 | write!(f, " ")?; 158 | } 159 | } 160 | if self.shifted == self.rule.rule.len() { 161 | write!(f, " •")?; 162 | } 163 | Ok(()) 164 | } 165 | } 166 | 167 | /// shifted rule with lookahead tokens 168 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)] 169 | pub struct LookaheadRuleRef { 170 | pub rule: ShiftedRuleRef, 171 | pub lookaheads: BTreeSet, 172 | } 173 | 174 | /// shifted rule with lookahead tokens 175 | #[derive(Clone)] 176 | pub struct LookaheadRule { 177 | pub rule: ShiftedRule, 178 | pub lookaheads: BTreeSet, 179 | } 180 | impl Display for LookaheadRule { 181 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 182 | write!(f, "{} / ", self.rule)?; 183 | for (id, lookahead) in self.lookaheads.iter().enumerate() { 184 | write!(f, "{}", lookahead)?; 185 | if id < self.lookaheads.len() - 1 { 186 | write!(f, ", ")?; 187 | } 188 | } 189 | Ok(()) 190 | } 191 | } 192 | impl Debug for LookaheadRule { 193 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 194 | write!(f, "{:?} / ", self.rule)?; 195 | for (id, lookahead) in self.lookaheads.iter().enumerate() { 196 | write!(f, "{:?}", lookahead)?; 197 | if id < self.lookaheads.len() - 1 { 198 | write!(f, ", ")?; 199 | } 200 | } 201 | Ok(()) 202 | } 203 | } 204 | 205 | /// set of lookahead rules 206 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)] 207 | pub struct LookaheadRuleRefSet { 208 | pub rules: BTreeMap>, 209 | } 210 | impl LookaheadRuleRefSet { 211 | pub fn new() -> Self { 212 | LookaheadRuleRefSet { 213 | rules: BTreeMap::new(), 214 | } 215 | } 216 | pub fn add(&mut self, rule: ShiftedRuleRef, mut lookaheads: BTreeSet) -> bool 217 | where 218 | Term: Ord, 219 | { 220 | let mut changed = false; 221 | let set = self.rules.entry(rule).or_insert_with(|| { 222 | changed = true; 223 | BTreeSet::new() 224 | }); 225 | let old = set.len(); 226 | set.append(&mut lookaheads); 227 | changed || old != set.len() 228 | } 229 | } 230 | // impl<'a, Term: TermTraitBound + Display, NonTerm: NonTermTraitBound + Display> Display 231 | // for LookaheadRuleRefSet<'a, Term, NonTerm> 232 | // { 233 | // fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 234 | // for (id, rule) in self.rules.iter().enumerate() { 235 | // rule.fmt(f)?; 236 | // if id < self.rules.len() - 1 { 237 | // writeln!(f)?; 238 | // } 239 | // } 240 | // Ok(()) 241 | // } 242 | // } 243 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 Taehwan Kim 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /rusty_lr_core/src/tree.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::fmt::Display; 3 | use std::ops::Deref; 4 | use std::ops::DerefMut; 5 | 6 | use termtree::Tree as TermTree; 7 | 8 | use crate::parser::nonterminal::NonTerminal; 9 | use crate::parser::nonterminal::NonTerminalType; 10 | use crate::TerminalSymbol; 11 | 12 | /// Tree represention of single non-terminal token. 13 | /// User must enable feature `tree` to use this. 14 | #[derive(Clone)] 15 | pub struct TreeNonTerminal { 16 | /// non terminal symbol that this tree reduced to 17 | pub nonterm: NonTerm, 18 | 19 | /// children of this token consumed by reduction 20 | pub tokens: Vec>, 21 | } 22 | 23 | impl TreeNonTerminal { 24 | pub fn new(nonterm: NonTerm, tokens: Vec>) -> Self { 25 | Self { nonterm, tokens } 26 | } 27 | 28 | /// convert this tree to termtree::Tree using Display trait 29 | pub(crate) fn to_term_tree( 30 | &self, 31 | term_to_display: &impl Fn(&TerminalSymbol) -> D, 32 | nonterm_to_display: &impl Fn(&NonTerm) -> D, 33 | ) -> Vec> 34 | where 35 | NonTerm: NonTerminal, 36 | { 37 | // Manually configure the format for the auto-generated non-teminals. 38 | // for example, one or more repetitions A+ will be implemented as left recursion, 39 | // but we want to display the tree as flat array. 40 | let nonterm_name = nonterm_to_display(&self.nonterm); 41 | match self.nonterm.nonterm_type() { 42 | // normal tree 43 | None 44 | | Some(NonTerminalType::Augmented) 45 | | Some(NonTerminalType::Error) 46 | | Some(NonTerminalType::Group) => { 47 | let tree = TermTree::new(nonterm_name); 48 | vec![tree.with_leaves( 49 | self.tokens 50 | .iter() 51 | .flat_map(|token| token.to_term_tree(term_to_display, nonterm_to_display)), 52 | )] 53 | } 54 | 55 | // remove parent, directly add children 56 | Some(NonTerminalType::Lookahead) | Some(NonTerminalType::TerminalSet) => self 57 | .tokens 58 | .iter() 59 | .flat_map(|token| token.to_term_tree(term_to_display, nonterm_to_display)) 60 | .collect(), 61 | 62 | // remove left/right recursion, make it to flat array 63 | Some(NonTerminalType::Star) => { 64 | let tree = TermTree::new(nonterm_name); 65 | let tree = if self.tokens.is_empty() { 66 | tree 67 | } else { 68 | let plus = self.tokens[0] 69 | .to_term_tree(term_to_display, nonterm_to_display) 70 | .into_iter() 71 | .next() 72 | .unwrap(); 73 | tree.with_leaves(plus.leaves) 74 | }; 75 | vec![tree] 76 | } 77 | // remove left/right recursion, make it to flat array 78 | Some(NonTerminalType::PlusLeft) => { 79 | let tree = TermTree::new(nonterm_name); 80 | let tree = match self.tokens.len() { 81 | 1 => { 82 | let child = self.tokens[0] 83 | .to_term_tree(term_to_display, nonterm_to_display) 84 | .into_iter() 85 | .next() 86 | .unwrap(); 87 | tree.with_leaves([child]) 88 | } 89 | 2 => { 90 | let mut child_list = self.tokens[0] 91 | .to_term_tree(term_to_display, nonterm_to_display) 92 | .into_iter() 93 | .next() 94 | .unwrap() 95 | .leaves; 96 | let child = self.tokens[1] 97 | .to_term_tree(term_to_display, nonterm_to_display) 98 | .into_iter() 99 | .next() 100 | .unwrap(); 101 | child_list.push(child); 102 | tree.with_leaves(child_list) 103 | } 104 | 3 => { 105 | let mut child_list = self.tokens[0] 106 | .to_term_tree(term_to_display, nonterm_to_display) 107 | .into_iter() 108 | .next() 109 | .unwrap() 110 | .leaves; 111 | let separator = self.tokens[1] 112 | .to_term_tree(term_to_display, nonterm_to_display) 113 | .into_iter() 114 | .next() 115 | .unwrap(); 116 | let child = self.tokens[2] 117 | .to_term_tree(term_to_display, nonterm_to_display) 118 | .into_iter() 119 | .next() 120 | .unwrap(); 121 | child_list.push(separator); 122 | child_list.push(child); 123 | tree.with_leaves(child_list) 124 | } 125 | _ => { 126 | unreachable!("PlusLeft length of child: {}", self.tokens.len()) 127 | } 128 | }; 129 | vec![tree] 130 | } 131 | // remove left/right recursion, make it to flat array 132 | Some(NonTerminalType::PlusRight) => { 133 | let tree = TermTree::new(nonterm_name); 134 | let tree = match self.tokens.len() { 135 | 1 => { 136 | let child = self.tokens[0] 137 | .to_term_tree(term_to_display, nonterm_to_display) 138 | .into_iter() 139 | .next() 140 | .unwrap(); 141 | tree.with_leaves([child]) 142 | } 143 | 2 => { 144 | let child = self.tokens[0] 145 | .to_term_tree(term_to_display, nonterm_to_display) 146 | .into_iter() 147 | .next() 148 | .unwrap(); 149 | let mut child_list = self.tokens[1] 150 | .to_term_tree(term_to_display, nonterm_to_display) 151 | .into_iter() 152 | .next() 153 | .unwrap() 154 | .leaves; 155 | let mut children = vec![child]; 156 | children.append(&mut child_list); 157 | 158 | tree.with_leaves(children) 159 | } 160 | 3 => { 161 | // with separator 162 | let child = self.tokens[0] 163 | .to_term_tree(term_to_display, nonterm_to_display) 164 | .into_iter() 165 | .next() 166 | .unwrap(); 167 | let separator = self.tokens[1] 168 | .to_term_tree(term_to_display, nonterm_to_display) 169 | .into_iter() 170 | .next() 171 | .unwrap(); 172 | let mut child_list = self.tokens[2] 173 | .to_term_tree(term_to_display, nonterm_to_display) 174 | .into_iter() 175 | .next() 176 | .unwrap() 177 | .leaves; 178 | let mut children = vec![child]; 179 | children.push(separator); 180 | children.append(&mut child_list); 181 | 182 | tree.with_leaves(children) 183 | } 184 | _ => { 185 | unreachable!("PlusRight length of child: {}", self.tokens.len()) 186 | } 187 | }; 188 | vec![tree] 189 | } 190 | // remove left/right recursion, make it to flat array 191 | Some(NonTerminalType::Optional) => { 192 | let tree = TermTree::new(nonterm_name); 193 | let tree = 194 | if self.tokens.is_empty() { 195 | tree 196 | } else { 197 | tree.with_leaves(self.tokens.iter().flat_map(|token| { 198 | token.to_term_tree(term_to_display, nonterm_to_display) 199 | })) 200 | }; 201 | vec![tree] 202 | } 203 | 204 | // show the literal directly 205 | Some(NonTerminalType::LiteralString) => { 206 | let tree = TermTree::new(nonterm_name); 207 | vec![tree] 208 | } 209 | } 210 | } 211 | } 212 | 213 | /// Tree representation of single token. 214 | /// User must enable feature `tree` to use this. 215 | #[derive(Clone)] 216 | pub enum Tree { 217 | Terminal(TerminalSymbol), 218 | NonTerminal(TreeNonTerminal), 219 | } 220 | 221 | impl Tree { 222 | pub fn new_terminal(term: TerminalSymbol) -> Self { 223 | Tree::Terminal(term) 224 | } 225 | pub fn new_nonterminal(nonterm: NonTerm, tokens: Vec>) -> Self { 226 | Tree::NonTerminal(TreeNonTerminal::new(nonterm, tokens)) 227 | } 228 | 229 | /// convert this tree to termtree::Tree using Display trait 230 | pub(crate) fn to_term_tree( 231 | &self, 232 | term_to_display: &impl Fn(&TerminalSymbol) -> D, 233 | nonterm_to_display: &impl Fn(&NonTerm) -> D, 234 | ) -> Vec> 235 | where 236 | NonTerm: NonTerminal, 237 | { 238 | match self { 239 | Tree::Terminal(term) => vec![TermTree::new(term_to_display(term))], 240 | Tree::NonTerminal(nonterm) => nonterm.to_term_tree(term_to_display, nonterm_to_display), 241 | } 242 | } 243 | } 244 | 245 | impl Display for Tree { 246 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 247 | let child = self.to_term_tree(&|term| term.to_string(), &|nonterm| { 248 | nonterm.as_str().to_string() 249 | }); 250 | write!( 251 | f, 252 | "{}", 253 | TermTree::new("Tree".to_string()).with_leaves(child) 254 | ) 255 | } 256 | } 257 | impl Debug for Tree { 258 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 259 | let child = self.to_term_tree(&|term| format!("{:?}", term), &|nonterm| { 260 | nonterm.as_str().to_string() 261 | }); 262 | write!( 263 | f, 264 | "{}", 265 | TermTree::new("Tree".to_string()).with_leaves(child) 266 | ) 267 | } 268 | } 269 | 270 | /// List of [`Tree`] 271 | #[derive(Clone)] 272 | pub struct TreeList { 273 | pub trees: Vec>, 274 | } 275 | impl Deref for TreeList { 276 | type Target = Vec>; 277 | fn deref(&self) -> &Self::Target { 278 | &self.trees 279 | } 280 | } 281 | impl DerefMut for TreeList { 282 | fn deref_mut(&mut self) -> &mut Self::Target { 283 | &mut self.trees 284 | } 285 | } 286 | impl TreeList { 287 | /// create new empty tree list 288 | pub fn new() -> Self { 289 | Self { trees: Vec::new() } 290 | } 291 | } 292 | impl Display for TreeList { 293 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 294 | let tree = 295 | TermTree::new("TreeList".to_string()).with_leaves(self.trees.iter().flat_map(|tree| { 296 | tree.to_term_tree(&|term| term.to_string(), &|nonterm| { 297 | nonterm.as_str().to_string() 298 | }) 299 | })); 300 | write!(f, "{}", tree) 301 | } 302 | } 303 | impl Debug for TreeList { 304 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 305 | let tree = 306 | TermTree::new("TreeList".to_string()).with_leaves(self.trees.iter().flat_map(|tree| { 307 | tree.to_term_tree(&|term| format!("{:?}", term), &|nonterm| { 308 | nonterm.as_str().to_string() 309 | }) 310 | })); 311 | write!(f, "{}", tree) 312 | } 313 | } 314 | impl Default for TreeList { 315 | /// create new empty tree list 316 | fn default() -> Self { 317 | Self::new() 318 | } 319 | } 320 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rusty_lr 2 | [![crates.io](https://img.shields.io/crates/v/rusty_lr.svg)](https://crates.io/crates/rusty_lr) 3 | [![docs.rs](https://docs.rs/rusty_lr/badge.svg)](https://docs.rs/rusty_lr) 4 | 5 | ***A Bison-like Parser generator & Compiler frontend for Rust generating optimised IELR(1), LALR(1) parser tables, with deterministic LR and non-deterministic LR (GLR) parsing.*** 6 | 7 | RustyLR is a parser generator that converts context-free grammars into IELR(1)/LALR(1) tables and supporting deterministic LR and non-deterministic GLR parsing strategies. It supports custom reduce actions in Rust, with beautiful diagnostics. 8 | Highly inspired by tools like *bison*, it uses a similar syntax while integrating seamlessly with Rust's ecosystem. 9 | It constructs optimized state machines, ensuring efficient and reliable parsing. 10 | 11 | ![title](images/title.png) 12 | 13 | ## Features 14 | - **Custom Reduce Actions:** Define custom actions in Rust, allowing you to build custom data structures easily. 15 | - **Automatic Optimization:** Reduces parser table size and improves performance by grouping terminals with identical behavior across parser states. 16 | - **Multiple Parsing Strategies:** Supports minimal-LR(1), LALR(1) parser tables, and GLR parsing strategy. 17 | - **Detailed Diagnostics:** Detects grammar conflicts, verbose conflict resolution stages, and optimization stages. 18 | - **Static & Runtime Conflict Resolution:** Provides mechanisms to resolve conflicts at compile time or runtime. 19 | - **Location Tracking:** Tracks the location of every token in the parse tree, useful for error reporting and debugging. 20 | - **State Machine Debugging:** The `rustylr` executable provides a `--state` option that allows you to debug and visualize the generated state machine. This is useful for understanding how the parser will behave and for identifying potential issues in the grammar. 21 | 22 | ## Quick Start: Using the `rustylr` Executable 23 | 24 | The recommended way to use RustyLR is with the standalone `rustylr` executable. It's faster, provides richer grammar diagnostics, and includes commands for debugging state machines directly. 25 | 26 | Here is a step-by-step guide to get you started. 27 | 28 | **1. Add `rusty_lr` to your dependencies** 29 | 30 | First, add the `rusty_lr` runtime library to your project's `Cargo.toml`. The generated parser code will depend on it. 31 | 32 | ```toml 33 | [dependencies] 34 | rusty_lr = "..." # Use the same version as the executable 35 | ``` 36 | 37 | **2. Install the `rustylr` executable** 38 | 39 | You can install the executable from crates.io using `cargo`: 40 | 41 | ```bash 42 | cargo install rustylr 43 | ``` 44 | 45 | **3. Create a grammar file** 46 | 47 | Create a file named `src/grammar.rs`. This file will contain your token definitions and grammar rules. Any Rust code above the `%%` separator will be copied directly to the generated output file. 48 | 49 | ```rust 50 | // src/grammar.rs 51 | // This code is copied to the generated file. 52 | pub enum MyToken { 53 | Num(i32), 54 | Plus, 55 | } 56 | 57 | %% // Grammar rules start here. 58 | 59 | %tokentype MyToken; 60 | %start E; 61 | %left plus; // Specify left-associativity for the 'plus' token. 62 | 63 | // Define tokens and how they map to MyToken variants. 64 | %token num MyToken::Num(_); 65 | %token plus MyToken::Plus; 66 | 67 | // Define grammar rules and their return types. 68 | // E(i32) means the non-terminal E returns an i32. 69 | // In the action blocks `{ ... }`, you can refer to the values of symbols 70 | // on the right-hand side by their names (e.g., `e1`, `e2`, `num`). 71 | E(i32): e1=E plus e2=E { e1 + e2 } 72 | | num { let MyToken::Num(num) = num else { unreachable!(); }; 73 | num 74 | } 75 | ; 76 | ``` 77 | 78 | **4. Generate the parser code** 79 | 80 | Run the `rustylr` executable to process your grammar file. This command will generate `src/parser.rs` from `src/grammar.rs`. 81 | 82 | ```bash 83 | rustylr src/grammar.rs src/parser.rs 84 | ``` 85 | 86 | **5. Use the generated parser in your code** 87 | 88 | Finally, include the newly generated `src/parser.rs` as a module in your `main.rs` or `lib.rs` and use it to parse a token stream. 89 | 90 | ```rust 91 | // In src/main.rs 92 | 93 | // Include the generated parser module. 94 | mod parser; 95 | // Bring the token enum into scope. 96 | use parser::MyToken; 97 | 98 | fn main() { 99 | // Example token stream for "1 + 2" 100 | let tokens = vec![MyToken::Num(1), MyToken::Plus, MyToken::Num(2)]; 101 | 102 | let parser = parser::EParser::new(); // Assumes 'E' is your start symbol 103 | let mut context = parser::EContext::new(); 104 | let mut userdata = (); // No userdata in this example. 105 | 106 | for token in tokens { 107 | match context.feed(&parser, token, &mut userdata) { 108 | Ok(_) => {} 109 | Err(e) => { 110 | eprintln!("Parse error: {}", e); 111 | return; 112 | } 113 | } 114 | } 115 | 116 | // Get the final parsed result. 117 | match context.accept(&parser) { 118 | Ok(result) => { 119 | let final_result: i32 = result; 120 | println!("Parsed result: {}", final_result); // Should print "3" 121 | }, 122 | Err(e) => { 123 | eprintln!("Failed to produce a final result: {}", e); 124 | } 125 | } 126 | } 127 | ``` 128 | 129 | **Important:** Ensure the version of the `rustylr` executable you run matches the version of the `rusty_lr` crate in your `Cargo.toml`. Mismatched versions can lead to build errors. 130 | 131 | 132 | 133 | ## Generated Code Structure 134 | 135 | The generated code will include several structs and enums: 136 | - `Parser`: A struct that holds the parser table. [(LR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/lr/trait.Parser.html) [(GLR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/glr/trait.Parser.html) 137 | - `Context`: A struct that maintains the current parsing state and symbol values. [(LR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/lr/struct.Context.html) [(GLR docs)](https://docs.rs/rusty_lr/latest/rusty_lr/glr/struct.Context.html) 138 | - `State`: A type representing a parser state and its associated table. 139 | - `Rule`: A type representing a production rule. [(docs)](https://docs.rs/rusty_lr/latest/rusty_lr/struct.ProductionRule.html) 140 | - `NonTerminals`: An enum representing all non-terminal symbols in the grammar. [(docs)](https://docs.rs/rusty_lr/latest/rusty_lr/trait.NonTerminal.html) 141 | 142 | 143 | ### Working with Context 144 | You can also get contextual information from the `Context` struct: 145 | ```rust 146 | let mut context = Context::new(); 147 | 148 | // ... parsing ... 149 | 150 | context.expected_token(); // Get expected (terminal, non-terminal) symbols for current state 151 | context.can_feed(&term); // Check if a terminal symbol can be fed 152 | context.trace(); // Get all `%trace` non-terminals currently being parsed 153 | println!("{}", context.backtrace()); // Print backtrace of the parser state 154 | println!("{}", context); // Print tree structure of the parser state (`tree` feature) 155 | ``` 156 | 157 | ### The Feed Method 158 | The generated code includes a `feed` method that processes tokens: 159 | 160 | ```rust 161 | context.feed(&parser, term, &mut userdata); // Feed a terminal symbol and update the state machine 162 | context.feed_location(&parser, term, &mut userdata, term_location); // Feed a terminal symbol with location tracking 163 | ``` 164 | 165 | This method returns `Ok(())` if the token was successfully parsed, or an `Err` if there was an error. 166 | 167 | **Note:** The actual method signatures differ slightly when building a GLR parser. 168 | 169 | ## GLR Parsing 170 | RustyLR offers built-in support for Generalized LR (GLR) parsing, enabling it to handle ambiguous or nondeterministic grammars that traditional LR(1) or LALR(1) parsers cannot process. 171 | See [GLR.md](GLR.md) for details. 172 | 173 | ## Error Handling and Conflict Resolution 174 | RustyLR provides multiple mechanisms for handling semantic errors and resolving conflicts during parsing: 175 | - **Panic Mode Error Recovery:** Use the `error` token for panic-mode error recovery 176 | - **Operator Precedence:** Set precedence with `%left`, `%right`, `%precedence` for terminals 177 | - **Reduce Rule Priority:** Set priority with `%dprec` for production rules 178 | - **Runtime Errors:** Return `Err` from reduce actions to handle semantic errors 179 | 180 | See [SYNTAX.md - Resolving Conflicts](SYNTAX.md#resolving-conflicts) for detailed information. 181 | 182 | ## Location Tracking 183 | Track the location of tokens and non-terminals for better error reporting and debugging: 184 | 185 | ```rust 186 | Expr: exp1=Expr '+' exp2=Expr { 187 | println!("Location of exp1: {:?}", @exp1); 188 | println!("Location of exp2: {:?}", @exp2); 189 | println!("Location of this expression: {:?}", @$); // @$ is the location of the non-terminal itself 190 | exp1 + exp2 191 | } 192 | | Expr error Expr { 193 | println!("Error at: {:?}", @error); // @error is the location of the error token 194 | 0 // Return a default value 195 | } 196 | ``` 197 | 198 | See [SYNTAX.md - Location Tracking](SYNTAX.md#location-tracking) for detailed information. 199 | 200 | ## State Machine Debugging 201 | The `rustylr` executable includes a powerful `--state` option for debugging the generated parser's state machine. This feature allows you to inspect the details of each state, including its production rules, expected tokens, and transitions to other states. It is an invaluable tool for diagnosing grammar ambiguities, understanding shift/reduce conflicts, and verifying that the parser behaves as expected. 202 | 203 | To use it, run `rustylr` with the `--state` flag, followed by your grammar file: 204 | 205 | ```bash 206 | rustylr --state src/grammar.rs 207 | ``` 208 | 209 | This will output a detailed, color-coded representation of the state machine directly in your terminal, making it easy to trace the parser's logic. 210 | 211 | ![State Machine Debug](images/state_option.png) 212 | 213 | This visualization helps you understand the parsing process step-by-step and is particularly useful for debugging complex grammars. 214 | 215 | ## Examples 216 | - [Calculator (enum version)](examples/calculator/src/parser.rs): A numeric expression parser using custom token enums 217 | - [Calculator (u8 version)](examples/calculator_u8/src/parser.rs): A numeric expression parser using byte tokens 218 | - [JSON Validator](examples/json/src/parser.rs): A JSON syntax validator 219 | - [Lua 5.4 syntax parser](https://github.com/ehwan/lua_rust/blob/main/parser/src/parser.rs): A complete Lua language parser 220 | - [C language parser](https://github.com/ehwan/C-language-Parser-In-Rust/blob/main/src/ast/parser_lr.rs): A C language parser 221 | - [Bootstrap parser](rusty_lr_parser/src/parser/parser.rs): RustyLR's own syntax parser is written in RustyLR itself 222 | 223 | ## Cargo Features 224 | - `build`: Enables build script tools for generating parsers at compile time. 225 | - `tree`: Enables automatic syntax tree construction for debugging purposes. Makes `Context` implement `Display` for pretty-printing. 226 | 227 | ## Grammar Syntax 228 | RustyLR's grammar syntax is inspired by traditional Yacc/Bison formats. 229 | See [SYNTAX.md](SYNTAX.md) for detailed grammar definition syntax. 230 | 231 | ## Contributing 232 | Contributions are welcome! Please feel free to open an issue or submit a pull request. 233 | 234 | ### Project Structure 235 | This project is organized as a Cargo workspace with the following crates: 236 | 237 | - **`rusty_lr/`**: The main end-user library that provides the public API. This is what users add to their `Cargo.toml`. 238 | - **`rusty_lr_core/`**: Core parsing engine containing the fundamental data structures, algorithms, and runtime components for both deterministic (`src/parser/deterministic`) and non-deterministic (`src/parser/nondeterministic`) parsing. 239 | - **`rusty_lr_parser/`**: The main code generation engine that parses RustyLR's grammar syntax, builds parser tables, and generates the actual parser code. This is the core of the parser generation process. 240 | - **`rusty_lr_derive/`**: Procedural macro interface that wraps `rusty_lr_parser` to provide the `lr1!` macro for inline grammar definitions. 241 | - **`rusty_lr_buildscript/`**: Build script interface that wraps `rusty_lr_parser` for generating parser code at compile time when using the `build` feature. 242 | - **`rusty_lr_executable/`**: Standalone `rustylr` executable for command-line parser generation. 243 | - **`scripts/`**: Development and testing scripts 244 | 245 | The crates have the following dependency relationships: 246 | - `rusty_lr` depends on `rusty_lr_core`, `rusty_lr_derive`, and `rusty_lr_buildscript` (optional) 247 | - `rusty_lr_derive` and `rusty_lr_buildscript` depend on `rusty_lr_parser` 248 | - `rusty_lr_parser` depends on `rusty_lr_core` 249 | - `rusty_lr_executable` depends on `rusty_lr_buildscript` 250 | 251 | ```mermaid 252 | graph TD; 253 | subgraph User Facing 254 | rusty_lr; 255 | rusty_lr_executable; 256 | end 257 | 258 | subgraph Internal 259 | rusty_lr_derive; 260 | rusty_lr_buildscript; 261 | rusty_lr_parser; 262 | rusty_lr_core; 263 | end 264 | 265 | rusty_lr --> rusty_lr_core; 266 | rusty_lr --> rusty_lr_derive; 267 | rusty_lr --> rusty_lr_buildscript; 268 | 269 | rusty_lr_derive --> rusty_lr_parser; 270 | rusty_lr_buildscript --> rusty_lr_parser; 271 | 272 | rusty_lr_executable --> rusty_lr_buildscript; 273 | 274 | rusty_lr_parser --> rusty_lr_core; 275 | ``` 276 | 277 | 278 | ### About the Versioning 279 | RustyLR consists of two big parts: 280 | - executable (`rustylr`), the code generator 281 | - runtime (`rusty_lr`), the main library 282 | 283 | Since the `cargo` automatically uses the latest patch in `major.minor.patch` version of a crate, we increase the patch number only if the generated code is compatible with the runtime. That is, for any user who is not using buildscript or proc-macro, and using the executable-generated code itself, 284 | any code change that could make compile errors with the previous generated code will result in a minor version bump. 285 | 286 | ## License 287 | This project is dual-licensed under either of the following licenses, at your option: 288 | 289 | - MIT License ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 290 | - Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 291 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/error.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Ident; 2 | use proc_macro2::Literal; 3 | use proc_macro2::Span; 4 | use proc_macro2::TokenStream; 5 | 6 | use quote::quote_spanned; 7 | 8 | use crate::parser::args::IdentOrLiteral; 9 | 10 | /// failed to feed() the token 11 | #[non_exhaustive] 12 | #[derive(Debug)] 13 | pub enum ParseArgError { 14 | /// feed() failed 15 | MacroLineParse { span: Span, message: String }, 16 | } 17 | 18 | #[non_exhaustive] 19 | #[derive(Debug)] 20 | pub enum ArgError { 21 | MultipleModulePrefixDefinition((Span, TokenStream), (Span, TokenStream)), 22 | MultipleUserDataDefinition((Span, TokenStream), (Span, TokenStream)), 23 | MultipleErrorDefinition((Span, TokenStream), (Span, TokenStream)), 24 | MultipleTokenTypeDefinition((Span, TokenStream), (Span, TokenStream)), 25 | MultipleEofDefinition((Span, TokenStream), (Span, TokenStream)), 26 | MultipleStartDefinition(Ident, Ident), 27 | 28 | StartNotDefined, 29 | EofNotDefined, 30 | TokenTypeNotDefined, 31 | 32 | /// multiple %prec in the same rule 33 | MultiplePrecDefinition(Span), 34 | /// multiple %dprec in the same rule 35 | MultipleDPrecDefinition(Span), 36 | } 37 | 38 | #[non_exhaustive] 39 | #[derive(Debug)] 40 | pub enum ConflictError { 41 | /// error building given CFG 42 | ShiftReduceConflict { 43 | term: String, 44 | reduce_rule: (usize, rusty_lr_core::rule::ProductionRule), 45 | shift_rules: Vec<(usize, rusty_lr_core::rule::ShiftedRule)>, 46 | }, 47 | /// error building given CFG 48 | ReduceReduceConflict { 49 | lookahead: String, 50 | rule1: (usize, rusty_lr_core::rule::ProductionRule), 51 | rule2: (usize, rusty_lr_core::rule::ProductionRule), 52 | }, 53 | } 54 | 55 | #[non_exhaustive] 56 | #[derive(Debug)] 57 | pub enum ParseError { 58 | MultipleRuleDefinition(Ident, Ident), 59 | 60 | /// different reduce type applied to the same terminal symbol 61 | MultipleReduceDefinition { 62 | terminal: String, 63 | old: (Span, rusty_lr_core::rule::ReduceType), 64 | new: (Span, rusty_lr_core::rule::ReduceType), 65 | }, 66 | 67 | /// multiple %token definition 68 | MultipleTokenDefinition(Ident, Ident), 69 | 70 | /// same name for terminal and non-terminal exists 71 | TermNonTermConflict { 72 | name: Ident, 73 | terminal: Ident, 74 | non_terminal: Ident, 75 | }, 76 | 77 | InvalidTerminalRange((Ident, usize, TokenStream), (Ident, usize, TokenStream)), 78 | 79 | /// name given to %start not defined 80 | StartNonTerminalNotDefined(Ident), 81 | 82 | /// unknown terminal symbol name 83 | TerminalNotDefined(Ident), 84 | 85 | /// can't use reserved keyword as token name 86 | ReservedName(Ident), 87 | 88 | /// not supported literal type 89 | UnsupportedLiteralType(TokenStream), 90 | 91 | /// range in literal terminal set is not valid 92 | InvalidLiteralRange(Literal, Literal), 93 | 94 | /// TokenType in Literal mode is not supported 95 | TokenInLiteralMode(Span), 96 | 97 | /// conflicts in precedence definition 98 | MultiplePrecedenceOrderDefinition { 99 | cur: IdentOrLiteral, 100 | old: Span, 101 | }, 102 | 103 | /// Precedence not defined for the given token 104 | PrecedenceNotDefined(IdentOrLiteral), 105 | 106 | /// All production rules in this non-terminal must have %prec defined 107 | NonTerminalPrecedenceNotDefined(Span, usize), 108 | 109 | /// ReduceAction must be defined but not defined 110 | RuleTypeDefinedButActionNotDefined { 111 | name: Ident, 112 | span: (Span, Span), 113 | }, 114 | 115 | /// Only terminal or terminal set is allowed 116 | OnlyTerminalSet(Span, Span), 117 | 118 | /// unknown non-terminal symbol name 119 | NonTerminalNotDefined(Ident), 120 | 121 | /// only 'usize' literal is allowed for %dprec 122 | OnlyUsizeLiteral(Span), 123 | } 124 | #[allow(unused)] 125 | impl ArgError { 126 | pub fn to_compile_error(&self) -> TokenStream { 127 | let span = self.span(); 128 | let message = self.short_message(); 129 | quote_spanned! { 130 | span=> 131 | compile_error!(#message); 132 | } 133 | } 134 | 135 | pub fn span(&self) -> Span { 136 | match self { 137 | ArgError::MultipleModulePrefixDefinition( 138 | (span1, tokenstream1), 139 | (span2, tokenstream2), 140 | ) => *span2, 141 | ArgError::MultipleUserDataDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 142 | *span2 143 | } 144 | ArgError::MultipleErrorDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 145 | *span2 146 | } 147 | ArgError::MultipleTokenTypeDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 148 | *span2 149 | } 150 | ArgError::MultipleEofDefinition((span1, tokenstream1), (span2, tokenstream2)) => *span2, 151 | ArgError::MultipleStartDefinition(old, new) => new.span(), 152 | 153 | ArgError::StartNotDefined => Span::call_site(), 154 | ArgError::EofNotDefined => Span::call_site(), 155 | ArgError::TokenTypeNotDefined => Span::call_site(), 156 | 157 | ArgError::MultiplePrecDefinition(span) => *span, 158 | ArgError::MultipleDPrecDefinition(span) => *span, 159 | } 160 | } 161 | 162 | pub fn short_message(&self) -> String { 163 | match self { 164 | ArgError::MultipleModulePrefixDefinition( 165 | (span1, tokenstream1), 166 | (span2, tokenstream2), 167 | ) => "Multiple %moduleprefix definition".into(), 168 | ArgError::MultipleUserDataDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 169 | "Multiple %userdata definition".into() 170 | } 171 | ArgError::MultipleErrorDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 172 | "Multiple %error definition".into() 173 | } 174 | ArgError::MultipleTokenTypeDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 175 | "Multiple %tokentype definition".into() 176 | } 177 | ArgError::MultipleEofDefinition((span1, tokenstream1), (span2, tokenstream2)) => { 178 | "Multiple %eof definition".into() 179 | } 180 | ArgError::MultipleStartDefinition(old, new) => { 181 | format!("Multiple %start definition: {} and {}", old, new) 182 | } 183 | 184 | ArgError::StartNotDefined => "Start rule not defined\n>>> %start ;".into(), 185 | ArgError::EofNotDefined => "Eof not defined\n>>> %eof ;".into(), 186 | ArgError::TokenTypeNotDefined => { 187 | "Token type not defined\n>>> %tokentype ;".into() 188 | } 189 | 190 | ArgError::MultiplePrecDefinition(span) => "Multiple %prec definition".into(), 191 | ArgError::MultipleDPrecDefinition(span) => "Multiple %dprec definition".into(), 192 | } 193 | } 194 | } 195 | #[allow(unused)] 196 | impl ParseArgError { 197 | pub fn to_compile_error(&self) -> TokenStream { 198 | let span = self.span(); 199 | let message = self.short_message(); 200 | quote_spanned! { 201 | span=> 202 | compile_error!(#message); 203 | } 204 | } 205 | 206 | pub fn span(&self) -> Span { 207 | match self { 208 | ParseArgError::MacroLineParse { span, message } => *span, 209 | } 210 | } 211 | 212 | pub fn short_message(&self) -> String { 213 | match self { 214 | ParseArgError::MacroLineParse { span, message } => message.clone(), 215 | } 216 | } 217 | } 218 | 219 | #[allow(unused)] 220 | impl ParseError { 221 | pub fn to_compile_error(&self) -> TokenStream { 222 | let span = self.span(); 223 | let message = self.short_message(); 224 | quote_spanned! { 225 | span=> 226 | compile_error!(#message); 227 | } 228 | } 229 | 230 | pub fn span(&self) -> Span { 231 | match self { 232 | ParseError::MultipleRuleDefinition(old, new) => new.span(), 233 | 234 | ParseError::MultipleReduceDefinition { terminal, old, new } => new.0, 235 | 236 | ParseError::TermNonTermConflict { 237 | name, 238 | terminal, 239 | non_terminal, 240 | } => name.span(), 241 | 242 | ParseError::InvalidTerminalRange((first, first_index, _), (last, last_index, _)) => { 243 | first.span() 244 | } 245 | 246 | ParseError::StartNonTerminalNotDefined(ident) => ident.span(), 247 | 248 | ParseError::TerminalNotDefined(ident) => ident.span(), 249 | 250 | ParseError::MultipleTokenDefinition(old, new) => new.span(), 251 | 252 | ParseError::ReservedName(ident) => ident.span(), 253 | 254 | ParseError::UnsupportedLiteralType(stream) => { 255 | stream.clone().into_iter().next().unwrap().span() 256 | } 257 | 258 | ParseError::InvalidLiteralRange(first, last) => first.span(), 259 | 260 | ParseError::TokenInLiteralMode(open_span) => *open_span, 261 | 262 | ParseError::MultiplePrecedenceOrderDefinition { cur, old } => cur.span(), 263 | ParseError::PrecedenceNotDefined(name) => name.span(), 264 | ParseError::NonTerminalPrecedenceNotDefined(span, _) => *span, 265 | 266 | ParseError::RuleTypeDefinedButActionNotDefined { name, span } => span.0, 267 | ParseError::OnlyTerminalSet(span_begin, span_end) => *span_begin, 268 | ParseError::NonTerminalNotDefined(ident) => ident.span(), 269 | ParseError::OnlyUsizeLiteral(span) => *span, 270 | } 271 | } 272 | 273 | pub fn short_message(&self) -> String { 274 | match self { 275 | ParseError::MultipleRuleDefinition(old, new) => { 276 | format!("Multiple rule definition with same name: {}", old) 277 | } 278 | 279 | ParseError::MultipleReduceDefinition { terminal, old, new } => { 280 | format!("Differnt reduce type (%left and %right) applied to the same terminal symbol: {}", terminal) 281 | } 282 | 283 | ParseError::TermNonTermConflict { 284 | name, 285 | terminal, 286 | non_terminal, 287 | } => { 288 | format!("Same name for terminal and non-terminal exists: {}", name) 289 | } 290 | 291 | ParseError::InvalidTerminalRange((first, first_index, _), (last, last_index, _)) => { 292 | format!( 293 | "Invalid terminal range: [{}({}) - {}({})]", 294 | first, first_index, last, last_index 295 | ) 296 | } 297 | 298 | ParseError::StartNonTerminalNotDefined(ident) => { 299 | format!("Name given to %start not defined: {}", ident) 300 | } 301 | 302 | ParseError::TerminalNotDefined(ident) => { 303 | format!("Unknown terminal symbol name: {}", ident) 304 | } 305 | 306 | ParseError::MultipleTokenDefinition(old, new) => { 307 | format!("Multiple %token definition with same name: {}", old) 308 | } 309 | 310 | ParseError::ReservedName(ident) => { 311 | format!("'{}' is reserved name", ident) 312 | } 313 | 314 | ParseError::UnsupportedLiteralType(literal) => { 315 | format!("Not supported literal type: {}", literal) 316 | } 317 | 318 | ParseError::InvalidLiteralRange(first, last) => { 319 | format!( 320 | "Range in literal terminal set is not valid: [{} - {}]", 321 | first, last 322 | ) 323 | } 324 | 325 | ParseError::TokenInLiteralMode(_) => { 326 | "%token with %tokentype `char` or `u8` is not supported. Use 'a' or b'a' instead" 327 | .to_string() 328 | } 329 | 330 | ParseError::MultiplePrecedenceOrderDefinition { cur, old } => { 331 | format!("Conflicts with precedence definition: {}", cur) 332 | } 333 | ParseError::PrecedenceNotDefined(name) => { 334 | format!("Precedence not defined for the given token: {}", name) 335 | } 336 | ParseError::NonTerminalPrecedenceNotDefined(span, nonterm_idx) => { 337 | "All production rules in this non-terminal must have %prec defined".into() 338 | } 339 | 340 | ParseError::RuleTypeDefinedButActionNotDefined { name, span } => { 341 | "ReduceAction must be defined for this rule".into() 342 | } 343 | ParseError::OnlyTerminalSet(_, _) => "Only terminal or terminal set is allowed".into(), 344 | ParseError::NonTerminalNotDefined(ident) => { 345 | format!("Unknown non-terminal symbol name: {}", ident) 346 | } 347 | ParseError::OnlyUsizeLiteral(_) => "Only 'usize' literal is allowed for %dprec".into(), 348 | } 349 | } 350 | } 351 | 352 | #[allow(unused)] 353 | impl ConflictError { 354 | pub fn to_compile_error(&self) -> TokenStream { 355 | let span = self.span(); 356 | let message = self.short_message(); 357 | quote_spanned! { 358 | span=> 359 | compile_error!(#message); 360 | } 361 | } 362 | 363 | pub fn span(&self) -> Span { 364 | match self { 365 | ConflictError::ShiftReduceConflict { 366 | term, 367 | reduce_rule: (ruleid, rule), 368 | shift_rules, 369 | } => Span::call_site(), 370 | ConflictError::ReduceReduceConflict { 371 | lookahead, 372 | rule1: (ruleid1, rule1), 373 | rule2: (ruleid2, rule2), 374 | } => Span::call_site(), 375 | } 376 | } 377 | 378 | pub fn short_message(&self) -> String { 379 | match self { 380 | ConflictError::ShiftReduceConflict { 381 | term, 382 | reduce_rule: (ruleid, rule), 383 | shift_rules, 384 | } => { 385 | format!( 386 | "Shift-Reduce conflict with terminal symbol: {}\n>>> Reduce: {}\n>>> Shifts: {}", 387 | term, 388 | rule, 389 | shift_rules 390 | .iter() 391 | .map(|(ruleid, rule)| format!("{}", rule)) 392 | .collect::>() 393 | .join("\n>>>") 394 | ) 395 | } 396 | ConflictError::ReduceReduceConflict { 397 | lookahead, 398 | rule1: (ruleid1, rule1), 399 | rule2: (ruleid2, rule2), 400 | } => { 401 | format!( 402 | "Reduce-Reduce conflict with lookahead symbol: {}\n>>> Rule1: {}\n>>> Rule2: {}", 403 | lookahead, rule1, rule2 404 | ) 405 | } 406 | } 407 | } 408 | } 409 | -------------------------------------------------------------------------------- /rusty_lr_parser/src/parser/lexer.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Delimiter; 2 | use proc_macro2::Group; 3 | use proc_macro2::Ident; 4 | use proc_macro2::Literal; 5 | use proc_macro2::Punct; 6 | use proc_macro2::TokenStream; 7 | use proc_macro2::TokenTree; 8 | 9 | use quote::TokenStreamExt; 10 | 11 | use super::args::GrammarArgs; 12 | use super::parser_expanded::GrammarContext; 13 | use super::parser_expanded::GrammarParseError; 14 | use super::parser_expanded::GrammarParser; 15 | 16 | #[allow(dead_code)] 17 | #[derive(Clone, Debug)] 18 | pub enum Lexed { 19 | Ident(Ident), 20 | Colon(Punct), 21 | Semicolon(Punct), 22 | Pipe(Punct), 23 | Percent(Punct), 24 | Equal(Punct), 25 | Plus(Punct), 26 | Star(Punct), 27 | Question(Punct), 28 | Caret(Punct), 29 | Minus(Punct), 30 | Exclamation(Punct), 31 | Slash(Punct), 32 | Dot(Punct), 33 | Dollar(Punct), 34 | Comma(Punct), 35 | OtherPunct(Punct), 36 | 37 | Literal(Literal), 38 | 39 | ParenGroup(Group), 40 | BraceGroup(Group), 41 | BracketGroup(Group), 42 | NoneGroup(Group), 43 | LParen, 44 | RParen, 45 | LBrace, 46 | RBrace, 47 | LBracket, 48 | RBracket, 49 | 50 | Left(Ident), // %left, %l, %reduce 51 | Right(Ident), // %right, %r, %shift 52 | Token(Ident), // %token 53 | Start(Ident), // %start 54 | TokenType(Ident), // %tokentype 55 | UserData(Ident), // %userdata 56 | ErrorType(Ident), // %err %error 57 | ModulePrefix(Ident), // %moduleprefix 58 | Lalr(Ident), // %lalr 59 | Glr(Ident), // %glr 60 | Prec(Ident), // %prec 61 | Precedence(Ident), // %precedence 62 | NoOptim(Ident), // %nooptim 63 | Dense(Ident), // %dense 64 | Trace(Ident), // %trace 65 | DPrec(Ident), // %dprec 66 | Filter(Ident), // %filter 67 | Location(Ident), // %location 68 | } 69 | impl Lexed { 70 | pub fn append_to_stream(self, stream: &mut TokenStream) { 71 | match self { 72 | Lexed::Ident(ident) => stream.append(ident), 73 | Lexed::Colon(punct) => stream.append(punct), 74 | Lexed::Semicolon(punct) => stream.append(punct), 75 | Lexed::Pipe(punct) => stream.append(punct), 76 | Lexed::Percent(punct) => stream.append(punct), 77 | Lexed::Equal(punct) => stream.append(punct), 78 | Lexed::Plus(punct) => stream.append(punct), 79 | Lexed::Star(punct) => stream.append(punct), 80 | Lexed::Question(punct) => stream.append(punct), 81 | Lexed::Caret(punct) => stream.append(punct), 82 | Lexed::Minus(punct) => stream.append(punct), 83 | Lexed::Exclamation(punct) => stream.append(punct), 84 | Lexed::Slash(punct) => stream.append(punct), 85 | Lexed::Dot(punct) => stream.append(punct), 86 | Lexed::Dollar(punct) => stream.append(punct), 87 | Lexed::Comma(punct) => stream.append(punct), 88 | Lexed::OtherPunct(punct) => stream.append(punct), 89 | 90 | Lexed::Literal(lit) => stream.append(lit), 91 | 92 | Lexed::ParenGroup(group) => stream.append(group), 93 | Lexed::BraceGroup(group) => stream.append(group), 94 | Lexed::BracketGroup(group) => stream.append(group), 95 | Lexed::NoneGroup(group) => stream.append(group), 96 | 97 | Lexed::LParen => unreachable!("LParen::stream()"), 98 | Lexed::RParen => unreachable!("RParen::stream()"), 99 | Lexed::LBrace => unreachable!("LBrace::stream()"), 100 | Lexed::RBrace => unreachable!("RBrace::stream()"), 101 | Lexed::LBracket => unreachable!("LBracket::stream()"), 102 | Lexed::RBracket => unreachable!("RBracket::stream()"), 103 | 104 | Lexed::Left(ident) => { 105 | stream.append(ident); 106 | } 107 | Lexed::Right(ident) => { 108 | stream.append(ident); 109 | } 110 | Lexed::Token(ident) => { 111 | stream.append(ident); 112 | } 113 | Lexed::Start(ident) => { 114 | stream.append(ident); 115 | } 116 | Lexed::TokenType(ident) => { 117 | stream.append(ident); 118 | } 119 | Lexed::UserData(ident) => { 120 | stream.append(ident); 121 | } 122 | Lexed::ErrorType(ident) => { 123 | stream.append(ident); 124 | } 125 | Lexed::ModulePrefix(ident) => { 126 | stream.append(ident); 127 | } 128 | Lexed::Lalr(ident) => { 129 | stream.append(ident); 130 | } 131 | Lexed::Glr(ident) => { 132 | stream.append(ident); 133 | } 134 | Lexed::Prec(ident) => { 135 | stream.append(ident); 136 | } 137 | Lexed::Precedence(ident) => { 138 | stream.append(ident); 139 | } 140 | Lexed::NoOptim(ident) => { 141 | stream.append(ident); 142 | } 143 | Lexed::Dense(ident) => { 144 | stream.append(ident); 145 | } 146 | Lexed::Trace(ident) => { 147 | stream.append(ident); 148 | } 149 | Lexed::DPrec(ident) => { 150 | stream.append(ident); 151 | } 152 | Lexed::Filter(ident) => { 153 | stream.append(ident); 154 | } 155 | Lexed::Location(ident) => { 156 | stream.append(ident); 157 | } 158 | } 159 | } 160 | } 161 | impl std::fmt::Display for Lexed { 162 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 163 | match self { 164 | Lexed::Ident(_) => write!(f, ""), 165 | Lexed::Colon(_) => write!(f, "':'"), 166 | Lexed::Semicolon(_) => write!(f, "';'"), 167 | Lexed::Pipe(_) => write!(f, "'|'"), 168 | Lexed::Percent(_) => write!(f, "'%'"), 169 | Lexed::Literal(_) => write!(f, ""), 170 | Lexed::Equal(_) => write!(f, "'='"), 171 | Lexed::Plus(_) => write!(f, "'+'"), 172 | Lexed::Star(_) => write!(f, "'*'"), 173 | Lexed::Question(_) => write!(f, "'?'"), 174 | Lexed::Caret(_) => write!(f, "'^'"), 175 | Lexed::Minus(_) => write!(f, "'-'"), 176 | Lexed::Exclamation(_) => write!(f, "'!'"), 177 | Lexed::Slash(_) => write!(f, "'/'"), 178 | Lexed::Dot(_) => write!(f, "'.'"), 179 | Lexed::Dollar(_) => write!(f, "'$'"), 180 | Lexed::Comma(_) => write!(f, "','"), 181 | Lexed::OtherPunct(p) => write!(f, "'{}'", p.as_char()), 182 | 183 | Lexed::ParenGroup(_) => write!(f, ""), 184 | Lexed::BraceGroup(_) => write!(f, ""), 185 | Lexed::BracketGroup(_) => write!(f, ""), 186 | Lexed::NoneGroup(_) => write!(f, ""), 187 | Lexed::LParen => write!(f, "'('"), 188 | Lexed::RParen => write!(f, "')'"), 189 | Lexed::LBrace => write!(f, "'{{'"), 190 | Lexed::RBrace => write!(f, "'}}'"), 191 | Lexed::LBracket => write!(f, "'['"), 192 | Lexed::RBracket => write!(f, "'['"), 193 | 194 | Lexed::Left(_) => write!(f, "left"), 195 | Lexed::Right(_) => write!(f, "right"), 196 | Lexed::Token(_) => write!(f, "token"), 197 | Lexed::Start(_) => write!(f, "start"), 198 | Lexed::TokenType(_) => write!(f, "tokentype"), 199 | Lexed::UserData(_) => write!(f, "userdata"), 200 | Lexed::ErrorType(_) => write!(f, "error"), 201 | Lexed::ModulePrefix(_) => write!(f, "moduleprefix"), 202 | Lexed::Lalr(_) => write!(f, "lalr"), 203 | Lexed::Glr(_) => write!(f, "glr"), 204 | Lexed::Prec(_) => write!(f, "prec"), 205 | Lexed::Precedence(_) => write!(f, "precedence"), 206 | Lexed::NoOptim(_) => write!(f, "nooptim"), 207 | Lexed::Dense(_) => write!(f, "dense"), 208 | Lexed::Trace(_) => write!(f, "trace"), 209 | Lexed::DPrec(_) => write!(f, "dprec"), 210 | Lexed::Filter(_) => write!(f, "filter"), 211 | Lexed::Location(_) => write!(f, "location"), 212 | } 213 | } 214 | } 215 | 216 | fn ident_to_keyword(ident: Ident) -> Option { 217 | match ident.to_string().as_str() { 218 | "left" | "l" | "reduce" => Some(Lexed::Left(ident)), 219 | "right" | "r" | "shift" => Some(Lexed::Right(ident)), 220 | "token" => Some(Lexed::Token(ident)), 221 | "start" => Some(Lexed::Start(ident)), 222 | "tokentype" => Some(Lexed::TokenType(ident)), 223 | "userdata" => Some(Lexed::UserData(ident)), 224 | "err" | "error" => Some(Lexed::ErrorType(ident)), 225 | "moduleprefix" => Some(Lexed::ModulePrefix(ident)), 226 | "lalr" => Some(Lexed::Lalr(ident)), 227 | "glr" => Some(Lexed::Glr(ident)), 228 | "prec" => Some(Lexed::Prec(ident)), 229 | "precedence" => Some(Lexed::Precedence(ident)), 230 | "nooptim" => Some(Lexed::NoOptim(ident)), 231 | "dense" => Some(Lexed::Dense(ident)), 232 | "trace" => Some(Lexed::Trace(ident)), 233 | "dprec" => Some(Lexed::DPrec(ident)), 234 | "filter" => Some(Lexed::Filter(ident)), 235 | "location" => Some(Lexed::Location(ident)), 236 | _ => None, 237 | } 238 | } 239 | 240 | /// lex & feed stream to parser 241 | /// For '%' directives and 'Group' variants, 242 | /// First tries to feed the Compound token 243 | /// if it failed, then feed the internal splitted tokens recursively 244 | pub fn feed_recursive( 245 | input: TokenStream, 246 | parser: &GrammarParser, 247 | context: &mut GrammarContext, 248 | grammar_args: &mut GrammarArgs, 249 | ) -> Result<(), GrammarParseError> { 250 | use super::span_pair::SpanPair; 251 | let mut input = input.into_iter().peekable(); 252 | 253 | while let Some(next) = input.next() { 254 | let location = SpanPair::new_single(next.span()); 255 | match next { 256 | TokenTree::Ident(ident) => { 257 | if let Some(keyword) = ident_to_keyword(ident.clone()) { 258 | if context.can_feed(parser, &keyword) { 259 | context.feed_location(parser, keyword, grammar_args, location)?; 260 | } else { 261 | context.feed_location( 262 | parser, 263 | Lexed::Ident(ident), 264 | grammar_args, 265 | location, 266 | )?; 267 | } 268 | } else { 269 | context.feed_location(parser, Lexed::Ident(ident), grammar_args, location)?; 270 | } 271 | } 272 | TokenTree::Punct(punct) => match punct.as_char() { 273 | ':' => { 274 | context.feed_location(parser, Lexed::Colon(punct), grammar_args, location)? 275 | } 276 | ';' => context.feed_location( 277 | parser, 278 | Lexed::Semicolon(punct), 279 | grammar_args, 280 | location, 281 | )?, 282 | '|' => context.feed_location(parser, Lexed::Pipe(punct), grammar_args, location)?, 283 | '+' => context.feed_location(parser, Lexed::Plus(punct), grammar_args, location)?, 284 | '*' => context.feed_location(parser, Lexed::Star(punct), grammar_args, location)?, 285 | '?' => { 286 | context.feed_location(parser, Lexed::Question(punct), grammar_args, location)? 287 | } 288 | '^' => { 289 | context.feed_location(parser, Lexed::Caret(punct), grammar_args, location)? 290 | } 291 | '-' => { 292 | context.feed_location(parser, Lexed::Minus(punct), grammar_args, location)? 293 | } 294 | '=' => { 295 | context.feed_location(parser, Lexed::Equal(punct), grammar_args, location)? 296 | } 297 | '!' => context.feed_location( 298 | parser, 299 | Lexed::Exclamation(punct), 300 | grammar_args, 301 | location, 302 | )?, 303 | '/' => { 304 | context.feed_location(parser, Lexed::Slash(punct), grammar_args, location)? 305 | } 306 | '.' => context.feed_location(parser, Lexed::Dot(punct), grammar_args, location)?, 307 | '%' => { 308 | context.feed_location(parser, Lexed::Percent(punct), grammar_args, location)? 309 | } 310 | '$' => { 311 | context.feed_location(parser, Lexed::Dollar(punct), grammar_args, location)? 312 | } 313 | ',' => { 314 | context.feed_location(parser, Lexed::Comma(punct), grammar_args, location)? 315 | } 316 | _ => context.feed_location( 317 | parser, 318 | Lexed::OtherPunct(punct), 319 | grammar_args, 320 | location, 321 | )?, 322 | }, 323 | TokenTree::Group(group) => match group.delimiter() { 324 | Delimiter::Parenthesis => { 325 | let token = Lexed::ParenGroup(group); 326 | if context.can_feed(parser, &token) { 327 | context.feed_location(parser, token, grammar_args, location)?; 328 | } else { 329 | let Lexed::ParenGroup(group) = token else { 330 | unreachable!(); 331 | }; 332 | // feed the splitted tokens 333 | context.feed_location( 334 | parser, 335 | Lexed::LParen, 336 | grammar_args, 337 | SpanPair::new_single(group.span_open()), 338 | )?; 339 | feed_recursive(group.stream(), parser, context, grammar_args)?; 340 | context.feed_location( 341 | parser, 342 | Lexed::RParen, 343 | grammar_args, 344 | SpanPair::new_single(group.span_close()), 345 | )?; 346 | } 347 | } 348 | Delimiter::Brace => { 349 | // for now, splitted for brace is not in syntax, so ignore it 350 | context.feed_location( 351 | parser, 352 | Lexed::BraceGroup(group), 353 | grammar_args, 354 | location, 355 | )?; 356 | } 357 | Delimiter::Bracket => { 358 | let token = Lexed::BracketGroup(group); 359 | if context.can_feed(parser, &token) { 360 | context.feed_location(parser, token, grammar_args, location)?; 361 | } else { 362 | let Lexed::BracketGroup(group) = token else { 363 | unreachable!(); 364 | }; 365 | // feed the splitted tokens 366 | context.feed_location( 367 | parser, 368 | Lexed::LBracket, 369 | grammar_args, 370 | SpanPair::new_single(group.span_open()), 371 | )?; 372 | feed_recursive(group.stream(), parser, context, grammar_args)?; 373 | context.feed_location( 374 | parser, 375 | Lexed::RBracket, 376 | grammar_args, 377 | SpanPair::new_single(group.span_close()), 378 | )?; 379 | } 380 | } 381 | _ => { 382 | // for now, compound for nonegroup is not in syntax, so ignore it 383 | context.feed_location( 384 | parser, 385 | Lexed::NoneGroup(group), 386 | grammar_args, 387 | location, 388 | )?; 389 | } 390 | }, 391 | TokenTree::Literal(literal) => { 392 | context.feed_location(parser, Lexed::Literal(literal), grammar_args, location)? 393 | } 394 | }; 395 | } 396 | Ok(()) 397 | } 398 | -------------------------------------------------------------------------------- /rusty_lr_core/src/parser/state.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hash; 2 | 3 | use crate::hash::HashMap; 4 | use crate::parser::nonterminal::NonTerminal; 5 | use crate::parser::terminalclass::TerminalClass; 6 | use crate::TriState; 7 | 8 | #[derive(Debug, Clone, Copy)] 9 | pub struct ShiftTarget { 10 | pub state: StateIndex, 11 | /// true if the data should be pushed, false if data should not be pushed (so `Empty` tag will be pushed) 12 | pub push: bool, 13 | } 14 | impl ShiftTarget { 15 | pub fn new(state: StateIndex, push: bool) -> Self { 16 | ShiftTarget { state, push } 17 | } 18 | } 19 | 20 | /// This intermediate state is a common structure to convert from generated code and grammar builder 21 | /// into various types of parser states (SparseState, DenseState, ...). 22 | pub struct IntermediateState { 23 | pub shift_goto_map_term: Vec<(TermClass, ShiftTarget)>, // must be sorted 24 | pub shift_goto_map_nonterm: Vec<(NonTerm, ShiftTarget)>, // must be sorted 25 | pub reduce_map: Vec<(TermClass, Vec)>, // must be sorted 26 | pub ruleset: Vec, 27 | pub can_accept_error: TriState, 28 | } 29 | 30 | /// For state, terminal and class indices, we use the most compact integer type that can hold the maximum value. 31 | /// This trait defines the conversion between {u8, u16, u32, usize} <-> usize. 32 | pub trait Index: Copy { 33 | fn into_usize(self) -> usize; 34 | fn from_usize_unchecked(value: usize) -> Self; 35 | } 36 | impl Index for usize { 37 | fn into_usize(self) -> usize { 38 | self 39 | } 40 | fn from_usize_unchecked(value: usize) -> Self { 41 | value 42 | } 43 | } 44 | impl Index for u8 { 45 | fn into_usize(self) -> usize { 46 | self as usize 47 | } 48 | fn from_usize_unchecked(value: usize) -> Self { 49 | value as u8 50 | } 51 | } 52 | impl Index for u16 { 53 | fn into_usize(self) -> usize { 54 | self as usize 55 | } 56 | fn from_usize_unchecked(value: usize) -> Self { 57 | value as u16 58 | } 59 | } 60 | impl Index for u32 { 61 | fn into_usize(self) -> usize { 62 | self as usize 63 | } 64 | fn from_usize_unchecked(value: usize) -> Self { 65 | value as u32 66 | } 67 | } 68 | 69 | /// Since non-deterministic parsers can have multiple reduce rules for a single terminal, 70 | /// we need to handle the set of reduce rules efficiently, usually 2~3 items. 71 | /// this trait implements the stack-allocated vector for this purpose. 72 | pub trait ReduceRules { 73 | const CAP: usize; 74 | type RuleIndex: Index; 75 | 76 | fn to_iter(&self) -> impl Iterator + Clone; 77 | fn from_set>(set: Vec) -> Self; 78 | } 79 | 80 | /// For deterministic parser behavior 81 | impl ReduceRules for Integral { 82 | const CAP: usize = 1; 83 | type RuleIndex = Integral; 84 | 85 | fn to_iter(&self) -> impl Iterator + Clone { 86 | std::iter::once(*self) 87 | } 88 | fn from_set>(set: Vec) -> Self { 89 | debug_assert!(set.len() == 1, "Expected a single element set"); 90 | set.into_iter().next().unwrap().try_into().ok().unwrap() 91 | } 92 | } 93 | 94 | pub use arrayvec::ArrayVec; 95 | impl ReduceRules for ArrayVec { 96 | const CAP: usize = CAP; 97 | type RuleIndex = T; 98 | 99 | fn to_iter(&self) -> impl Iterator + Clone { 100 | self.iter().copied() 101 | } 102 | fn from_set>(set: Vec) -> Self { 103 | set.into_iter() 104 | .map(|value| value.try_into().ok().unwrap()) 105 | .collect() 106 | } 107 | } 108 | 109 | /// A trait representing a parser state. 110 | pub trait State { 111 | type TermClass: TerminalClass; 112 | type NonTerm: NonTerminal; 113 | type ReduceRules: ReduceRules; 114 | type StateIndex: Index; 115 | 116 | /// Get the next state for a given terminal symbol. 117 | fn shift_goto_class(&self, class: Self::TermClass) -> Option>; 118 | 119 | /// Get the next state for a given non-terminal symbol. 120 | fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option>; 121 | /// Get the reduce rule index for a given terminal symbol. 122 | fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules>; 123 | 124 | /// Check if this state is an accept state. 125 | fn is_accept(&self) -> bool; 126 | 127 | /// Get the set of expected terminal classes for shift in this state 128 | fn expected_shift_term(&self) -> impl Iterator + '_; 129 | 130 | /// Get the set of expected non-terminal symbols for shift in this state 131 | fn expected_shift_nonterm(&self) -> impl Iterator + '_; 132 | 133 | /// Get the set of production rule for reduce in this state 134 | fn expected_reduce_rule(&self) -> impl Iterator + '_; 135 | 136 | /// Get the set of rules that this state is trying to parse 137 | fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef]; 138 | 139 | fn can_accept_error(&self) -> TriState; 140 | } 141 | 142 | /// `State` implementation for a sparse state representation using HashMap 143 | #[derive(Debug, Clone)] 144 | pub struct SparseState { 145 | /// terminal symbol -> next state 146 | pub(crate) shift_goto_map_class: HashMap>, 147 | 148 | /// non-terminal symbol -> next state 149 | pub(crate) shift_goto_map_nonterm: HashMap>, 150 | 151 | /// terminal symbol -> reduce rule index 152 | pub(crate) reduce_map: HashMap, 153 | 154 | /// set of rules that this state is trying to parse 155 | pub(crate) ruleset: Vec, 156 | 157 | pub(crate) can_accept_error: TriState, 158 | } 159 | 160 | impl< 161 | TermClass: TerminalClass + Hash + Eq, 162 | NonTerm: NonTerminal + Hash + Eq, 163 | RuleContainer: ReduceRules, 164 | StateIndex: Index, 165 | > State for SparseState 166 | { 167 | type TermClass = TermClass; 168 | type NonTerm = NonTerm; 169 | type ReduceRules = RuleContainer; 170 | type StateIndex = StateIndex; 171 | 172 | fn shift_goto_class(&self, class: Self::TermClass) -> Option> { 173 | self.shift_goto_map_class.get(&class).copied() 174 | } 175 | fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option> { 176 | self.shift_goto_map_nonterm.get(&nonterm).copied() 177 | } 178 | fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules> { 179 | self.reduce_map.get(&class) 180 | } 181 | fn is_accept(&self) -> bool { 182 | self.reduce_map.is_empty() 183 | && self.shift_goto_map_class.is_empty() 184 | && self.shift_goto_map_nonterm.is_empty() 185 | } 186 | fn expected_shift_term(&self) -> impl Iterator + '_ { 187 | self.shift_goto_map_class.keys().copied() 188 | } 189 | fn expected_shift_nonterm(&self) -> impl Iterator + '_ { 190 | self.shift_goto_map_nonterm.keys().copied() 191 | } 192 | fn expected_reduce_rule(&self) -> impl Iterator + '_ { 193 | self.reduce_map.values().flat_map(RuleContainer::to_iter) 194 | } 195 | fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef] { 196 | &self.ruleset 197 | } 198 | fn can_accept_error(&self) -> TriState { 199 | self.can_accept_error 200 | } 201 | } 202 | 203 | /// `State` implementation for a dense state representation using Vec 204 | #[derive(Debug, Clone)] 205 | pub struct DenseState { 206 | /// terminal symbol -> next state 207 | pub(crate) shift_goto_map_class: Vec>>, 208 | /// shift_goto_map_class[i] will contain i+offset 'th class's next state. 209 | pub(crate) shift_class_offset: usize, 210 | /// set of terminal classes that is keys of `shift_goto_map_class` 211 | pub(crate) shift_goto_map_class_keys: Vec, 212 | 213 | /// non-terminal symbol -> next state 214 | pub(crate) shift_goto_map_nonterm: Vec>>, 215 | pub(crate) shift_nonterm_offset: usize, 216 | /// set of non-terminal symbols that is keys of `shift_goto_map_nonterm` 217 | pub(crate) shift_goto_map_nonterm_keys: Vec, 218 | 219 | /// terminal symbol -> reduce rule index 220 | pub(crate) reduce_map: Vec>, 221 | /// reduce_map[i] will contain i+offset 'th class's reduce rule. 222 | pub(crate) reduce_offset: usize, 223 | 224 | /// set of rules that this state is trying to parse 225 | pub(crate) ruleset: Vec, 226 | 227 | pub(crate) can_accept_error: TriState, 228 | } 229 | impl< 230 | TermClass: TerminalClass, 231 | NonTerm: NonTerminal, 232 | RuleContainer: ReduceRules, 233 | StateIndex: Index, 234 | > State for DenseState 235 | { 236 | type TermClass = TermClass; 237 | type NonTerm = NonTerm; 238 | type ReduceRules = RuleContainer; 239 | type StateIndex = StateIndex; 240 | 241 | fn shift_goto_class(&self, class: Self::TermClass) -> Option> { 242 | self.shift_goto_map_class 243 | .get(class.to_usize().wrapping_sub(self.shift_class_offset)) 244 | .copied() 245 | .flatten() 246 | } 247 | fn shift_goto_nonterm(&self, nonterm: Self::NonTerm) -> Option> { 248 | self.shift_goto_map_nonterm 249 | .get(nonterm.to_usize().wrapping_sub(self.shift_nonterm_offset)) 250 | .copied() 251 | .flatten() 252 | } 253 | fn reduce(&self, class: Self::TermClass) -> Option<&Self::ReduceRules> { 254 | self.reduce_map 255 | .get(class.to_usize().wrapping_sub(self.reduce_offset)) 256 | .and_then(|r| r.as_ref()) 257 | } 258 | fn is_accept(&self) -> bool { 259 | self.reduce_map.is_empty() 260 | && self.shift_goto_map_class.is_empty() 261 | && self.shift_goto_map_nonterm.is_empty() 262 | } 263 | fn expected_shift_term(&self) -> impl Iterator + '_ { 264 | self.shift_goto_map_class_keys.iter().copied() 265 | } 266 | fn expected_shift_nonterm(&self) -> impl Iterator + '_ { 267 | self.shift_goto_map_nonterm_keys.iter().copied() 268 | } 269 | fn expected_reduce_rule(&self) -> impl Iterator + '_ { 270 | self.reduce_map 271 | .iter() 272 | .filter_map(|r| r.as_ref()) 273 | .flat_map(RuleContainer::to_iter) 274 | } 275 | 276 | fn get_rules(&self) -> &[crate::rule::ShiftedRuleRef] { 277 | &self.ruleset 278 | } 279 | 280 | fn can_accept_error(&self) -> TriState { 281 | self.can_accept_error 282 | } 283 | } 284 | 285 | impl 286 | From> 287 | for SparseState 288 | where 289 | TermClass: Ord + Hash, 290 | NonTerm: Hash + Eq, 291 | RuleContainer: ReduceRules, 292 | RuleContainer::RuleIndex: TryFrom, 293 | { 294 | fn from(builder_state: IntermediateState) -> Self { 295 | // TerminalSymbol::Term(_) < TerminalSymbol::Error < TerminalSymbol::Eof 296 | // since maps are sorted, eof and error should be at the end of the array 297 | 298 | // make sure the order is preserved 299 | #[cfg(debug_assertions)] 300 | { 301 | let keys = builder_state 302 | .shift_goto_map_term 303 | .iter() 304 | .map(|(term, _)| term) 305 | .collect::>(); 306 | debug_assert!(keys.is_sorted()); 307 | 308 | let keys = builder_state 309 | .reduce_map 310 | .iter() 311 | .map(|(term, _)| term) 312 | .collect::>(); 313 | debug_assert!(keys.is_sorted()); 314 | } 315 | SparseState { 316 | shift_goto_map_class: builder_state.shift_goto_map_term.into_iter().collect(), 317 | shift_goto_map_nonterm: builder_state.shift_goto_map_nonterm.into_iter().collect(), 318 | reduce_map: builder_state 319 | .reduce_map 320 | .into_iter() 321 | .map(|(term, rule)| { 322 | ( 323 | term.try_into().expect("term conversion failed"), 324 | RuleContainer::from_set(rule), 325 | ) 326 | }) 327 | .collect(), 328 | ruleset: builder_state.ruleset.into_iter().collect(), 329 | can_accept_error: builder_state.can_accept_error, 330 | } 331 | } 332 | } 333 | impl 334 | From> 335 | for DenseState 336 | where 337 | TermClass: Ord + Copy, 338 | NonTerm: Hash + Eq + Copy + NonTerminal, 339 | StateIndex: Copy, 340 | RuleContainer: Clone + ReduceRules, 341 | RuleContainer::RuleIndex: TryFrom, 342 | { 343 | fn from(builder_state: IntermediateState) -> Self { 344 | // TerminalSymbol::Term(_) < TerminalSymbol::Error < TerminalSymbol::Eof 345 | // since maps are sorted, eof and error should be at the end of the array 346 | 347 | // make sure the order is preserved 348 | #[cfg(debug_assertions)] 349 | { 350 | let keys = builder_state 351 | .shift_goto_map_term 352 | .iter() 353 | .map(|(term, _)| term) 354 | .collect::>(); 355 | debug_assert!(keys.is_sorted()); 356 | 357 | let keys = builder_state 358 | .reduce_map 359 | .iter() 360 | .map(|(term, _)| term) 361 | .collect::>(); 362 | debug_assert!(keys.is_sorted()); 363 | } 364 | 365 | let (shift_min, shift_len) = { 366 | let mut iter = builder_state 367 | .shift_goto_map_term 368 | .iter() 369 | .map(|(term, _)| term); 370 | let min: Option = iter.next().map(|x| x.to_usize()); 371 | let max: Option = iter.next_back().map(|x| x.to_usize()).or(min); 372 | 373 | if let (Some(min), Some(max)) = (min, max) { 374 | (min, max - min + 1) 375 | } else { 376 | (0, 0) 377 | } 378 | }; 379 | let (reduce_min, reduce_len) = { 380 | let mut iter = builder_state.reduce_map.iter().map(|(term, _)| term); 381 | let min: Option = iter.next().map(|x| x.to_usize()); 382 | let max: Option = iter.next_back().map(|x| x.to_usize()).or(min); 383 | if let (Some(min), Some(max)) = (min, max) { 384 | (min, max - min + 1) 385 | } else { 386 | (0, 0) 387 | } 388 | }; 389 | let (nonterm_min, nonterm_len) = { 390 | let mut iter = builder_state 391 | .shift_goto_map_nonterm 392 | .iter() 393 | .map(|(nonterm, _)| nonterm); 394 | let min = iter.next().map(|x| x.to_usize()); 395 | let max = iter.next_back().map(|x| x.to_usize()).or(min); 396 | if let (Some(min), Some(max)) = (min, max) { 397 | (min, max - min + 1) 398 | } else { 399 | (0, 0) 400 | } 401 | }; 402 | 403 | let shift_term_keys = builder_state 404 | .shift_goto_map_term 405 | .iter() 406 | .map(|(term, _)| *term) 407 | .collect(); 408 | let mut shift_goto_map_class = vec![None; shift_len]; 409 | for (term, state) in builder_state.shift_goto_map_term { 410 | shift_goto_map_class[term.to_usize() - shift_min] = Some(state); 411 | } 412 | 413 | let mut reduce_map = vec![None; reduce_len]; 414 | for (term, rule) in builder_state.reduce_map { 415 | reduce_map[term.to_usize() - reduce_min] = Some(RuleContainer::from_set(rule)); 416 | } 417 | 418 | let nonterm_keys = builder_state 419 | .shift_goto_map_nonterm 420 | .iter() 421 | .map(|(nonterm, _)| *nonterm) 422 | .collect(); 423 | let mut shift_goto_map_nonterm = vec![None; nonterm_len]; 424 | for (nonterm, state) in builder_state.shift_goto_map_nonterm { 425 | shift_goto_map_nonterm[nonterm.to_usize() - nonterm_min] = Some(state); 426 | } 427 | 428 | DenseState { 429 | shift_goto_map_class, 430 | shift_class_offset: shift_min, 431 | shift_goto_map_class_keys: shift_term_keys, 432 | shift_goto_map_nonterm, 433 | shift_goto_map_nonterm_keys: nonterm_keys, 434 | shift_nonterm_offset: nonterm_min, 435 | reduce_map, 436 | reduce_offset: reduce_min, 437 | ruleset: builder_state.ruleset.into_iter().collect(), 438 | can_accept_error: builder_state.can_accept_error, 439 | } 440 | } 441 | } 442 | --------------------------------------------------------------------------------