├── doc
    ├── .gitignore
    ├── book.toml
    └── src
    │   ├── libsandtools.md
    │   ├── thirdparty.md
    │   ├── othertools.md
    │   ├── lrpar.md
    │   ├── README.md
    │   ├── lrtable.md
    │   ├── cfgrammar.md
    │   ├── lexing.md
    │   ├── parsing.md
    │   ├── yaccextensions.md
    │   ├── SUMMARY.md
    │   ├── editions.md
    │   ├── lrlex.md
    │   ├── lexextensions.md
    │   ├── lexcompatibility.md
    │   ├── actioncode.md
    │   ├── start_states.md
    │   ├── manuallexer.md
    │   ├── yacccompatibility.md
    │   ├── ast_example.md
    │   ├── nimbleparse.md
    │   └── parsing_idioms.md
├── .gitignore
├── lrpar
    ├── examples
    │   ├── calc_ast
    │   │   ├── src
    │   │   │   ├── input.txt
    │   │   │   ├── calc.l
    │   │   │   ├── calc.y
    │   │   │   └── main.rs
    │   │   ├── Cargo.toml
    │   │   ├── README.md
    │   │   └── build.rs
    │   ├── calc_actions
    │   │   ├── src
    │   │   │   ├── input.txt
    │   │   │   ├── calc.l
    │   │   │   ├── calc.y
    │   │   │   └── main.rs
    │   │   ├── Cargo.toml
    │   │   ├── README.md
    │   │   └── build.rs
    │   ├── calc_ast_arena
    │   │   ├── src
    │   │   │   ├── input.txt
    │   │   │   ├── calc.l
    │   │   │   ├── calc.y
    │   │   │   └── main.rs
    │   │   ├── Cargo.toml
    │   │   └── build.rs
    │   ├── calc_parsetree
    │   │   ├── src
    │   │   │   ├── input.txt
    │   │   │   ├── calc.l
    │   │   │   ├── calc.y
    │   │   │   └── main.rs
    │   │   ├── Cargo.toml
    │   │   ├── README.md
    │   │   └── build.rs
    │   ├── clone_param
    │   │   ├── src
    │   │   │   ├── input.txt
    │   │   │   ├── param.l
    │   │   │   ├── param.y
    │   │   │   └── main.rs
    │   │   ├── Cargo.toml
    │   │   ├── build.rs
    │   │   └── README.md
    │   └── start_states
    │   │   ├── src
    │   │       ├── input.txt
    │   │       ├── comment.y
    │   │       ├── comment.l
    │   │       └── main.rs
    │   │   ├── Cargo.toml
    │   │   ├── README.md
    │   │   └── build.rs
    ├── cttests
    │   ├── src
    │   │   ├── storaget.l
    │   │   ├── multi_start.l
    │   │   ├── multi_start.y
    │   │   ├── epp.test
    │   │   ├── expect.test
    │   │   ├── ctfails
    │   │   │   ├── warnings.test
    │   │   │   ├── missing.test
    │   │   │   ├── warnings_flags.test
    │   │   │   ├── test_files2.test
    │   │   │   ├── test_files3.test
    │   │   │   ├── test_files1.test
    │   │   │   └── calc_bad_input.test
    │   │   ├── expectrr.test
    │   │   ├── lexer_lifetime.test
    │   │   ├── parseparam.test
    │   │   ├── regex_opt.test
    │   │   ├── warnings.test
    │   │   ├── quoting.test
    │   │   ├── lex_flags.test
    │   │   ├── storaget.y
    │   │   ├── multitypes.test
    │   │   ├── typeparams.test
    │   │   ├── passthrough.test
    │   │   ├── calc_noactions.test
    │   │   ├── parseparam_copy.test
    │   │   ├── calc_nodefault_yacckind.test
    │   │   ├── calc_recoverer_none.test
    │   │   ├── calc_multitypes.test
    │   │   ├── calc_recoverer_cpctplus.test
    │   │   ├── span.test
    │   │   ├── calc_actiontype.test
    │   │   ├── calc_unsafeaction.test
    │   │   ├── calc_input.test
    │   │   ├── calc_wasm.test
    │   │   ├── calc_wasm.rs
    │   │   ├── grmtools_section.test
    │   │   └── cgen_helper.rs
    │   ├── Cargo.toml
    │   └── build.rs
    ├── build.rs
    ├── cttests_macro
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    ├── Cargo.toml
    ├── src
    │   └── lib
    │   │   ├── test_utils.rs
    │   │   ├── dijkstra.rs
    │   │   └── lex_api.rs
    └── README.md
├── lrlex
    ├── build.rs
    ├── examples
    │   ├── calclex
    │   │   ├── src
    │   │   │   ├── calc.l
    │   │   │   └── main.rs
    │   │   ├── build.rs
    │   │   ├── Cargo.toml
    │   │   └── README.md
    │   └── calc_manual_lex
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   ├── build.rs
    │   │   └── src
    │   │       ├── calc.y
    │   │       └── main.rs
    ├── README.md
    ├── Cargo.toml
    └── src
    │   ├── lib
    │       └── defaults.rs
    │   └── main.rs
├── deny.toml
├── .cargo
    └── config.toml
├── cfgrammar
    ├── README.md
    ├── Cargo.toml
    └── src
    │   └── lib
    │       ├── idxnewtype.rs
    │       ├── yacc
    │           └── mod.rs
    │       ├── span.rs
    │       └── mod.rs
├── lrtable
    ├── README.md
    ├── Cargo.toml
    └── src
    │   └── lib
    │       └── mod.rs
├── .buildbot_dockerfile_debian
├── .github
    └── workflows
    │   └── sdci.yml
├── LICENSE-APACHE
├── nimbleparse
    ├── Cargo.toml
    └── README.md
├── COPYRIGHT
├── LICENSE-MIT
├── Cargo.toml
├── .buildbot.sh
└── README.md


/doc/.gitignore:
--------------------------------------------------------------------------------
1 | book
2 | release
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | *.swp
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/src/input.txt:
--------------------------------------------------------------------------------
1 | 5 + 4 * 3


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/src/input.txt:
--------------------------------------------------------------------------------
1 | 5 + 4 * 3


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/src/input.txt:
--------------------------------------------------------------------------------
1 | 5 + 4 * 3


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/src/input.txt:
--------------------------------------------------------------------------------
1 | 5 + 4 * 3


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/src/input.txt:
--------------------------------------------------------------------------------
1 | 0++++---
2 | 


--------------------------------------------------------------------------------
/doc/book.toml:
--------------------------------------------------------------------------------
1 | [book]
2 | src = "src"
3 | title = "grmtools"
4 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/storaget.l:
--------------------------------------------------------------------------------
1 | %%
2 | , ","
3 | [a-zA-Z]+ "word"
4 | [\n\t ]+ ;
5 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/src/input.txt:
--------------------------------------------------------------------------------
1 | coment /* */
2 | nested comment /* /* */ */
3 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/multi_start.l:
--------------------------------------------------------------------------------
1 | %%
2 | A+ 'A'
3 | B+ 'B'
4 | C+ 'C'
5 | ; ';'
6 | : ':'
7 | , ','
8 | [ \n\t] ;


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/src/calc.l:
--------------------------------------------------------------------------------
1 | %%
2 | [0-9]+ "INT"
3 | \+ "+"
4 | \* "*"
5 | \( "("
6 | \) ")"
7 | [\t\n ]+ ;
8 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/src/calc.l:
--------------------------------------------------------------------------------
1 | %%
2 | [0-9]+ "INT"
3 | \+ "+"
4 | \* "*"
5 | \( "("
6 | \) ")"
7 | [\t\n ]+ ;
8 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/src/param.l:
--------------------------------------------------------------------------------
1 | %%
2 | (\-?)[0-9]+ "INT"
3 | \- "Decr"
4 | \+ "Incr"
5 | [\n\t\ ] ;
6 | . 'UNMATCHED'
7 | 


--------------------------------------------------------------------------------
/lrlex/build.rs:
--------------------------------------------------------------------------------
1 | use vergen::EmitBuilder;
2 | 
3 | fn main() {
4 |     EmitBuilder::builder().build_timestamp().emit().unwrap();
5 | }
6 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/src/calc.l:
--------------------------------------------------------------------------------
1 | %%
2 | [0-9]+ "INT"
3 | \+ "+"
4 | \* "*"
5 | \( "("
6 | \) ")"
7 | [\t\n ]+ ;
8 | . "UNMATCHED"
9 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/src/calc.l:
--------------------------------------------------------------------------------
1 | %%
2 | [0-9]+ "INT"
3 | \+ "+"
4 | \* "*"
5 | \( "("
6 | \) ")"
7 | [\t\n ]+ ;
8 | . "UNMATCHED"
9 | 


--------------------------------------------------------------------------------
/lrlex/examples/calclex/src/calc.l:
--------------------------------------------------------------------------------
 1 | %%
 2 | ([0-9]+\.[0-9]*)|([0-9]*\.[0-9]+) "FLOAT"
 3 | [0-9]+ "INT"
 4 | \+ "+"
 5 | \* "*"
 6 | - "-"
 7 | / "/"
 8 | \( "("
 9 | [\t ]+ ;
10 | 


--------------------------------------------------------------------------------
/deny.toml:
--------------------------------------------------------------------------------
 1 | [licenses]
 2 | confidence-threshold = 1.0
 3 | allow = [
 4 |     "Apache-2.0",
 5 |     "MIT",
 6 |     "BSD-3-Clause",
 7 |     "Unicode-3.0",
 8 |     "Zlib",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/lrpar/build.rs:
--------------------------------------------------------------------------------
1 | use vergen::EmitBuilder;
2 | 
3 | fn main() {
4 |     println!("cargo::rustc-check-cfg=cfg(grmtools_extra_checks)");
5 |     EmitBuilder::builder().build_timestamp().emit().unwrap();
6 | }
7 | 


--------------------------------------------------------------------------------
/doc/src/libsandtools.md:
--------------------------------------------------------------------------------
1 | # The individual libraries and tools
2 | 
3 | [grmtools](https://github.com/softdevteam/grmtools/) consists of several
4 | libraries and command-line tools. The following sections describe each.
5 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/src/comment.y:
--------------------------------------------------------------------------------
 1 | %grmtools{
 2 |     yacckind: Original(GenericParseTree),
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %start Expr
 6 | %%
 7 | Expr: Expr Text | ;
 8 | 
 9 | Text: 'TEXT';
10 | 


--------------------------------------------------------------------------------
/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [target.wasm32-wasip2]
2 | runner = "workspace_runner --target wasm32-wasip2 --"
3 | 
4 | [target.wasm32-unknown-unknown]
5 | # Provided by the crate wasm-bindgen-cli.
6 | runner = "wasm-bindgen-test-runner"
7 | 


--------------------------------------------------------------------------------
/lrlex/examples/calclex/build.rs:
--------------------------------------------------------------------------------
 1 | use lrlex::CTLexerBuilder;
 2 | 
 3 | fn main() {
 4 |     CTLexerBuilder::new()
 5 |         .lexer_in_src_dir("calc.l")
 6 |         .unwrap()
 7 |         .build()
 8 |         .unwrap();
 9 | }
10 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/multi_start.y:
--------------------------------------------------------------------------------
 1 | %grmtools{yacckind: Grmtools}
 2 | %start AStart
 3 | %token A B C
 4 | %%
 5 | 
 6 | AStart -> ()
 7 |  : A ':' BStart ';' {}
 8 |  ;
 9 | 
10 | BStart -> () 
11 |  : B ',' C {}
12 |  | C ',' B {}
13 |  ;
14 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/src/comment.l:
--------------------------------------------------------------------------------
1 | %x COMMENT
2 | %%
3 | .                       "TEXT"
4 | <COMMENT,INITIAL>/\*    <+COMMENT>;
5 | <COMMENT>.              ;
6 | <INITIAL,COMMENT>\n             ;
7 | <COMMENT>\*/            <-COMMENT>;
8 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/epp.test:
--------------------------------------------------------------------------------
 1 | name: Test %epp string
 2 | yacckind: Original(YaccOriginalActionKind::GenericParseTree)
 3 | grammar: |
 4 |     %start A
 5 |     %epp a '"\"a"'
 6 |     %%
 7 |     A : 'a';
 8 | lexer: |
 9 |     %%
10 |     a 'a'
11 | 


--------------------------------------------------------------------------------
/cfgrammar/README.md:
--------------------------------------------------------------------------------
1 | # `cfgrammar`
2 | 
3 | `cfgrammar` reads in grammar files, processes them, and provides a convenient
4 | API for operating with them. It may be of interest to those manipulating
5 | grammars directly, or who wish to use custom types of parsers.
6 | 


--------------------------------------------------------------------------------
/lrtable/README.md:
--------------------------------------------------------------------------------
1 | # `lrtable`
2 | 
3 | `lrtable` takes in grammars from [`cfgrammar`](cfgrammar.html) and creates LR
4 | state tables from them. Few users will be interested in its functionality
5 | directly, except those doing advanced forms of grammar analysis.
6 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/expect.test:
--------------------------------------------------------------------------------
 1 | name: Test %expect
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | grammar: |
 4 |     %start A
 5 |     %expect 1
 6 |     %%
 7 |     A: 'a' 'b' | B 'b';
 8 |     B: 'a';
 9 | lexer: |
10 |     %%
11 |     a 'a'
12 |     b 'b'
13 | 


--------------------------------------------------------------------------------
/.buildbot_dockerfile_debian:
--------------------------------------------------------------------------------
 1 | FROM debian:latest
 2 | ARG CI_UID
 3 | RUN useradd -m -u ${CI_UID} ci
 4 | RUN apt-get update && \
 5 | apt-get -y install build-essential curl procps file
 6 | WORKDIR /ci
 7 | RUN chown ${CI_UID}:${CI_UID} .
 8 | COPY --chown=${CI_UID}:${CI_UID} . .
 9 | CMD sh -x .buildbot.sh
10 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/warnings.test:
--------------------------------------------------------------------------------
 1 | name: Test warnings are treated as errors by default.
 2 | yacckind: Original(YaccOriginalActionKind::GenericParseTree)
 3 | grammar: |
 4 |     %start A
 5 |     %token b
 6 |     %%
 7 |     A : 'a';
 8 |     B : 'b';
 9 | lexer: |
10 |     %%
11 |     a 'a'
12 |     b 'b'
13 | 


--------------------------------------------------------------------------------
/.github/workflows/sdci.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |   merge_group:
 4 | 
 5 | # This is required to silence emails about the workflow having no jobs.
 6 | # We simply define a dummy job that does nothing much.
 7 | jobs:
 8 |   dummy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - run: /usr/bin/true
12 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/missing.test:
--------------------------------------------------------------------------------
 1 | name: Test missing terms in lexer and parser 
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | lex_flags: [ '!allow_missing_terms_in_lexer', '!allow_missing_tokens_in_parser' ]
 4 | grammar: |
 5 |     %%
 6 |     S: 'B';
 7 | 
 8 | lexer: |
 9 |     %%
10 |     A "A"
11 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/expectrr.test:
--------------------------------------------------------------------------------
 1 | name: Test %expect
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | grammar: |
 4 |     %start A
 5 |     %expect 1
 6 |     %expect-rr 1
 7 |     %%
 8 |     A : 'a' 'b' | B 'b';
 9 |     B : 'a' | C;
10 |     C : 'a';
11 | lexer: |
12 |     %%
13 |     a 'a'
14 |     b 'b'
15 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/lexer_lifetime.test:
--------------------------------------------------------------------------------
 1 | name: Test that the lexer does not have to outlive the input's lifetime
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start T
 5 |     %%
 6 |     T -> &'input str:
 7 |         "ID" { $lexer.span_str($1.unwrap().span()) }
 8 |         ;
 9 | lexer: |
10 |     %%
11 |     [a-z] "ID"
12 | 


--------------------------------------------------------------------------------
/doc/src/thirdparty.md:
--------------------------------------------------------------------------------
1 | # Libraries and tools developed by third parties
2 | 
3 | Besides using grmtools to develop parsers. The following items use grmtools
4 | to extend or augment the functionality which may be useful to people developing
5 | parsers with grmtools.
6 | 
7 | - [nimbleparse_lsp](https://github.com/ratmice/nimbleparse_lsp)
8 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/parseparam.test:
--------------------------------------------------------------------------------
 1 | name: Test %parse-param
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start S
 5 |     %parse-param p: &u64
 6 |     %%
 7 |     S -> u64:
 8 |         'INT' { *p + $lexer.span_str($1.unwrap().span()).parse::<u64>().unwrap() }
 9 |     ;
10 |     %%
11 | lexer: |
12 |     %%
13 |     [0-9]+ 'INT'
14 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/src/calc.y:
--------------------------------------------------------------------------------
 1 | %grmtools{
 2 |     yacckind: Original(GenericParseTree),
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %start Expr
 6 | %avoid_insert "INT"
 7 | %%
 8 | Expr: Expr '+' Term
 9 |     | Term ;
10 | 
11 | Term: Term '*' Factor
12 |     | Factor ;
13 | 
14 | Factor: '(' Expr ')'
15 |       | 'INT';
16 | 


--------------------------------------------------------------------------------
/lrlex/examples/calc_manual_lex/README.md:
--------------------------------------------------------------------------------
1 | # Parsing a simple calculator language
2 | 
3 | This directory contains a very simple example of a calculator evaluator that
4 | uses a hand-written lexer alongside an `lrpar` parser.
5 | 
6 | Look at `build.rs` and `src/main.rs` to see how `lrlex` can make it easier to
7 | use a hand-written lexer with `lrpar`.
8 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/warnings_flags.test:
--------------------------------------------------------------------------------
 1 | name: Test enabling warnings are errors.
 2 | yacckind: Original(YaccOriginalActionKind::GenericParseTree)
 3 | yacc_flags: [ warnings_are_errors ]
 4 | grammar: |
 5 |     %start A
 6 |     %token b
 7 |     %%
 8 |     A : 'a';
 9 |     B : 'b';
10 | lexer: |
11 |     %%
12 |     a 'a'
13 |     b 'b'
14 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/regex_opt.test:
--------------------------------------------------------------------------------
 1 | name: Test regex options via builder.
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | lex_flags: ['!dot_matches_new_line', 'octal']
 4 | grammar: |
 5 |     %start Start 
 6 |     %%
 7 |     Start: 'ANY' | 'a' | 'NL';
 8 | 
 9 | lexer: |
10 |     %%
11 |     \141 'a'
12 |     . 'ANY'
13 |     [\n] 'NL'
14 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/warnings.test:
--------------------------------------------------------------------------------
 1 | name: Test disabling warnings are errors.
 2 | yacckind: Original(YaccOriginalActionKind::GenericParseTree)
 3 | yacc_flags: [ '!warnings_are_errors', '!show_warnings' ]
 4 | grammar: |
 5 |     %start A
 6 |     %token b
 7 |     %%
 8 |     A : 'a';
 9 |     B : 'b';
10 | lexer: |
11 |     %%
12 |     a 'a'
13 |     b 'b'


--------------------------------------------------------------------------------
/lrpar/cttests/src/quoting.test:
--------------------------------------------------------------------------------
 1 | name: Test NoAction using the calculator grammar
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | grammar: |
 4 |     %start S 
 5 |     %%
 6 |     S: '\' | '"' | '<' | '+' '🦀' ;
 7 | 
 8 | lexer: |
 9 |     %%
10 |     " '"'
11 |     \< '<'
12 |     \\ '\'
13 |     \+ '+'
14 |     🦀 '🦀'
15 |     [\t ]+ ;
16 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/lex_flags.test:
--------------------------------------------------------------------------------
 1 | name: Lex flags in the grmtools section
 2 | grammar: |
 3 |     %grmtools{yacckind: Original(NoAction)}
 4 |     %start Start
 5 |     %%
 6 |     Start: 'ANY' | 'a' | 'NL';
 7 | 
 8 | lexer: |
 9 |     %grmtools{!dot_matches_new_line, octal, size_limit: 1048576}
10 |     %%
11 |     \141 'a'
12 |     . 'ANY'
13 |     [\n] 'NL'
14 | 


--------------------------------------------------------------------------------
/doc/src/othertools.md:
--------------------------------------------------------------------------------
 1 | # Other tools
 2 | 
 3 | When parsing text in Rust, you should also evaluate the following tools to see
 4 | if they are more suitable for your purposes:
 5 | 
 6 |  * [LALRPOP](http://lalrpop.github.io/lalrpop/)
 7 |  * [nom](https://crates.io/crates/nom)
 8 |  * [pest](https://pest.rs/)
 9 |  * [rust-peg](https://github.com/kevinmehall/rust-peg)
10 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/storaget.y:
--------------------------------------------------------------------------------
 1 | %grmtools{yacckind: Grmtools}
 2 | %%
 3 | word_seq -> Vec<String>
 4 |     : "word" {vec![$lexer.span_str($1.as_ref().unwrap().span()).to_string()]
 5 |     }
 6 |     | word_seq "," "word" {
 7 |         let w: String = $lexer.span_str($3.as_ref().unwrap().span()).to_string();
 8 |         $1.push(w);
 9 |         $1
10 |     }
11 |     ;
12 | %%
13 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/multitypes.test:
--------------------------------------------------------------------------------
 1 | name: Test multiple types
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start S
 5 |     %%
 6 |     S -> Vec<A>:
 7 |         A { vec![$1] }
 8 |         | S A {
 9 |             $1.push($2);
10 |             $1
11 |         }
12 |         ;
13 |     A -> A: 'a' { A } ;
14 |     %%
15 |     pub struct A;
16 | lexer: |
17 |     %%
18 |     a 'a'
19 | 


--------------------------------------------------------------------------------
/doc/src/lrpar.md:
--------------------------------------------------------------------------------
1 | # `lrpar`
2 | 
3 | `lrpar` ([crate](https://crates.io/crates/lrpar);
4 | [source](https://github.com/softdevteam/grmtools/tree/master/lrpar)) is the LR
5 | parser library aspect of grmtools. It takes in streams of lexemes (using a
6 | lexer of the user's choice) and parses them, determining if they successfully
7 | match a grammar or not; if not, it can optionally recover from errors.
8 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/typeparams.test:
--------------------------------------------------------------------------------
 1 | name: Test %parse-param
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start S
 5 |     %parse-generics 'a, T: Into<u64> + Copy, R: From<u64>
 6 |     %parse-param p: &'a T
 7 |     %%
 8 |     S -> R:
 9 |         'INT' { From::from((*p).into() + $lexer.span_str($1.unwrap().span()).parse::<u64>().unwrap()) }
10 |     ;
11 |     %%
12 | lexer: |
13 |     %%
14 |     [0-9]+ 'INT'
15 | 


--------------------------------------------------------------------------------
/doc/src/README.md:
--------------------------------------------------------------------------------
1 | # grmtools
2 | 
3 | [grmtools](https://github.com/softdevteam/grmtools/) is a suite of Rust
4 | libraries and binaries for parsing text, both at compile-time, and run-time.
5 | Most users will probably be interested in the compile-time Yacc feature, which
6 | allows traditional `.y` files to be used mostly unchanged in Rust. See the
7 | [Quickstart Guide](quickstart.md) for a quick introduction to this feature.
8 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/passthrough.test:
--------------------------------------------------------------------------------
 1 | name: Test that $$ is passed through correctly.
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start Expr
 5 |     %avoid_insert "INT"
 6 |     %%
 7 |     Expr -> Result<String, ()>:
 8 |         Num { $1 }
 9 |         ;
10 |     Num -> Result<String, ()>:
11 |         "INT" { Ok(format!("$${}", $lexer.span_str($1.unwrap().span()))) }
12 |         ;
13 | lexer: |
14 |     %%
15 |     [0-9]+ "INT"
16 | 
17 | 


--------------------------------------------------------------------------------
/lrpar/cttests_macro/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cttests_macro"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | license = "Apache-2.0/MIT"
 6 | 
 7 | [lib]
 8 | proc-macro = true
 9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
10 | 
11 | [dependencies]
12 | glob.workspace = true
13 | quote.workspace = true
14 | proc-macro2 = { version = "1.0", features=["proc-macro"]}
15 | syn.workspace = true
16 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/test_files2.test:
--------------------------------------------------------------------------------
 1 | name: Test non-string in array of %grmtools{test_files}
 2 | grammar: |
 3 |     %grmtools {
 4 |         yacckind: Original(YaccOriginalActionKind::UserAction),
 5 |         recoverer: RecoveryKind::None,
 6 |         test_files: [ShouldBeAString]
 7 |     }
 8 |     %start Expr
 9 |     %actiontype ()
10 |     %%
11 |     Expr: '(' ')' { () } ;
12 | lexer: |
13 |     %%
14 |     \( "("
15 |     \) ")"
16 |     [\t\n ]+ ;
17 | 


--------------------------------------------------------------------------------
/lrlex/examples/calclex/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calclex"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calclex"
11 | 
12 | [build-dependencies]
13 | lrlex = { path = "../.." }
14 | 
15 | [dependencies]
16 | cfgrammar = { path = "../../../cfgrammar" }
17 | lrlex = { path = "../.." }
18 | lrpar = { path = "../../../lrpar" }
19 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/test_files3.test:
--------------------------------------------------------------------------------
 1 | name: Test empty matchless glob in array of %grmtools{test_files}
 2 | grammar: |
 3 |     %grmtools {
 4 |         yacckind: Original(YaccOriginalActionKind::UserAction),
 5 |         recoverer: RecoveryKind::None,
 6 |         test_files: ["*.nonexistent"]
 7 |     }
 8 |     %start Expr
 9 |     %actiontype ()
10 |     %%
11 |     Expr: '(' ')' { () } ;
12 | lexer: |
13 |     %%
14 |     \( "("
15 |     \) ")"
16 |     [\t\n ]+ ;
17 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/test_files1.test:
--------------------------------------------------------------------------------
 1 | name: Test string value type instead of array in %grmtools{test_files}
 2 | grammar: |
 3 |     %grmtools {
 4 |         yacckind: Original(YaccOriginalActionKind::UserAction),
 5 |         recoverer: RecoveryKind::None,
 6 |         test_files: "should_be_an_array"
 7 |     }
 8 |     %start Expr
 9 |     %actiontype ()
10 |     %%
11 |     Expr: '(' ')' { () } ;
12 | lexer: |
13 |     %%
14 |     \( "("
15 |     \) ")"
16 |     [\t\n ]+ ;
17 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "clone_param"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | license = "Apache-2.0/MIT"
 6 | 
 7 | [[bin]]
 8 | doc = false
 9 | name = "clone_param"
10 | 
11 | [build-dependencies]
12 | cfgrammar = { path="../../../cfgrammar" }
13 | lrlex = { path="../../../lrlex" }
14 | lrpar = { path="../.." }
15 | 
16 | [dependencies]
17 | cfgrammar = { path="../../../cfgrammar" }
18 | lrlex = { path="../../../lrlex" }
19 | lrpar = { path="../.." }
20 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc_ast"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calc_ast"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../../lrlex" }
15 | lrpar = { path="../.." }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../../../lrlex" }
20 | lrpar = { path="../.." }
21 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "start_states"
 3 | version = "0.1.0"
 4 | authors = ["Simon Martin <havvoric@gmail.com>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "start_states"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../../lrlex" }
15 | lrpar = { path="../.." }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../../../lrlex" }
20 | lrpar = { path="../.." }
21 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc_actions"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calc_actions"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../../lrlex" }
15 | lrpar = { path="../.." }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../../../lrlex" }
20 | lrpar = { path="../.." }
21 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_noactions.test:
--------------------------------------------------------------------------------
 1 | name: Test NoAction using the calculator grammar
 2 | yacckind: Original(YaccOriginalActionKind::NoAction)
 3 | grammar: |
 4 |     %start Expr
 5 |     %avoid_insert 'INT'
 6 |     %%
 7 |     Expr: Expr '+' Term
 8 |         | Term
 9 |         ;
10 | 
11 |     Term: Term '*' Factor
12 |         | Factor
13 |         ;
14 | 
15 |     Factor: '(' Expr ')'
16 |           | 'INT'
17 |           ;
18 | 
19 | lexer: |
20 |     %%
21 |     [0-9]+ "INT"
22 |     \+ "+"
23 |     \* "*"
24 |     \( "("
25 |     \) ")"
26 |     [\t ]+ ;
27 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc_parsetree"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calc_parsetree"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../../lrlex" }
15 | lrpar = { path="../.." }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../../../lrlex" }
20 | lrpar = { path="../.." }
21 | 


--------------------------------------------------------------------------------
/lrlex/examples/calc_manual_lex/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc_manual_lex"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calc_manual_lex"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../" }
15 | lrpar = { path="../../../lrpar" }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../.." }
20 | lrpar = { path="../../../lrpar" }
21 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
 1 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 2 | this file except in compliance with the License.  You may obtain a copy of the
 3 | License at
 4 | 
 5 |     http://www.apache.org/licenses/LICENSE-2.0
 6 | 
 7 | Unless required by applicable law or agreed to in writing, software distributed
 8 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 9 | CONDITIONS OF ANY KIND, either express or implied.  See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc_ast_arena"
 3 | version = "0.1.0"
 4 | authors = ["Laurence Tratt <http://tratt.net/laurie/>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | 
 8 | [[bin]]
 9 | doc = false
10 | name = "calc_ast_arena"
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path="../../../cfgrammar" }
14 | lrlex = { path="../../../lrlex" }
15 | lrpar = { path="../.." }
16 | 
17 | [dependencies]
18 | cfgrammar = { path="../../../cfgrammar" }
19 | lrlex = { path="../../../lrlex" }
20 | lrpar = { path="../.." }
21 | bumpalo = "3"
22 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/README.md:
--------------------------------------------------------------------------------
 1 | # Parsing a simple calculator language
 2 | 
 3 | This directory contains a very simple example of a calculator in `lrpar` that
 4 | uses the generic parse tree output of `lrpar`. `cargo build` processes
 5 | `src/calc.l` and `src/calc.y` at compile-time. The compiled program then takes
 6 | input from stdin. You can type anything in here (though you'll only get useful
 7 | output for valid input!) -- parsing and lexing errors are reported.
 8 | 
 9 | Look at `build.rs`, `src/calc.y`, and `src/main.rs` to see how to use `lrpar` in
10 | your project.
11 | 


--------------------------------------------------------------------------------
/lrlex/examples/calclex/README.md:
--------------------------------------------------------------------------------
 1 | # Lexing a simple calculator language
 2 | 
 3 | This directory contains a very simple example of a calculator in `lrlex`.
 4 | Executing `cargo run` processes `src/calc.l` at compile-time; the resulting
 5 | binary then takes input from stdin. Each line should be a sequence of calculator
 6 | lexemes (note that, since this is a lexer example, there is no notion of lexeme
 7 | ordering: i.e. `1 2 +` is a valid sequence of lexemes as is `1
 8 | + 2`).
 9 | 
10 | Look at `build.rs`, `src/calc.l`, and `src/main.rs` to see how to use `lrlex` in
11 | your project.
12 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/README.md:
--------------------------------------------------------------------------------
 1 | # Parsing a simple calculator language
 2 | 
 3 | This directory contains a very simple example of a calculator in `lrpar` that
 4 | executes user-specified actions as parsing is undertaken. `cargo build`
 5 | processes `src/calc.l` and `src/calc.y` at compile-time. The compiled program
 6 | then takes input from stdin. You can type anything in here (though you'll only
 7 | get useful output for valid input!) -- parsing and lexing errors are reported.
 8 | 
 9 | Look at `build.rs`, `src/calc.y`, and `src/main.rs` to see how to use `lrpar` in
10 | your project.
11 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/README.md:
--------------------------------------------------------------------------------
 1 | # Parsing a simple calculator language
 2 | 
 3 | This directory contains a very simple example of a calculator in `lrpar` that
 4 | builds up an AST and evaluates it once the AST is completely built. `cargo
 5 | build` processes `src/calc.l` and `src/calc.y` at compile-time. The compiled
 6 | program then takes input from stdin. You can type anything in here (though
 7 | you'll only get useful output for valid input!) -- parsing and lexing errors
 8 | are reported.
 9 | 
10 | Look at `build.rs`, `src/calc.y`, and `src/main.rs` to see how to use `lrpar` in
11 | your project.
12 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/README.md:
--------------------------------------------------------------------------------
 1 | # Parsing C-style block comments
 2 | 
 3 | This directory contains a very simple example of (non-nested) comment removal in `lrpar` that
 4 | uses the generic parse tree output of `lrpar`. `cargo build` processes
 5 | `src/comment.l` and `src/comment.y` at compile-time. The compiled program then takes
 6 | input from stdin. You can type anything in here (though you'll only get useful
 7 | output for valid input!) -- parsing and lexing errors are reported.
 8 | 
 9 | Look at `build.rs`, `src/comment.l`, `src/comment.y`, and `src/main.rs` to see how to use `lrpar`
10 | in your project.
11 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/build.rs:
--------------------------------------------------------------------------------
 1 | use lrlex::CTLexerBuilder;
 2 | 
 3 | fn main() {
 4 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 5 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 6 |     CTLexerBuilder::new()
 7 |         .rust_edition(lrlex::RustEdition::Rust2021)
 8 |         .lrpar_config(|ctp| {
 9 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
10 |                 .grammar_in_src_dir("calc.y")
11 |                 .unwrap()
12 |         })
13 |         .lexer_in_src_dir("calc.l")
14 |         .unwrap()
15 |         .build()
16 |         .unwrap();
17 | }
18 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/build.rs:
--------------------------------------------------------------------------------
 1 | use lrlex::CTLexerBuilder;
 2 | 
 3 | fn main() {
 4 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 5 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 6 |     CTLexerBuilder::new()
 7 |         .rust_edition(lrlex::RustEdition::Rust2021)
 8 |         .lrpar_config(|ctp| {
 9 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
10 |                 .grammar_in_src_dir("comment.y")
11 |                 .unwrap()
12 |         })
13 |         .lexer_in_src_dir("comment.l")
14 |         .unwrap()
15 |         .build()
16 |         .unwrap();
17 | }
18 | 


--------------------------------------------------------------------------------
/nimbleparse/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "nimbleparse"
 3 | description = "Simple Yacc grammar debugging tool"
 4 | repository = "https://github.com/softdevteam/grmtools"
 5 | version = "0.14.0"
 6 | edition = "2024"
 7 | readme = "README.md"
 8 | license = "Apache-2.0/MIT"
 9 | categories = ["parsing"]
10 | 
11 | [[bin]]
12 | doc = false
13 | name = "nimbleparse"
14 | 
15 | [dependencies]
16 | cfgrammar = { path="../cfgrammar", version="0.14" }
17 | lrlex = { path="../lrlex", version="0.14" }
18 | lrpar = { path="../lrpar", version="0.14" }
19 | lrtable = { path="../lrtable", version="0.14" }
20 | 
21 | getopts.workspace = true
22 | num-traits.workspace = true
23 | glob.workspace = true
24 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/build.rs:
--------------------------------------------------------------------------------
 1 | #![deny(rust_2018_idioms)]
 2 | use lrlex::CTLexerBuilder;
 3 | 
 4 | fn main() {
 5 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 6 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 7 |     CTLexerBuilder::new()
 8 |         .rust_edition(lrlex::RustEdition::Rust2021)
 9 |         .lrpar_config(|ctp| {
10 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
11 |                 .grammar_in_src_dir("calc.y")
12 |                 .unwrap()
13 |         })
14 |         .lexer_in_src_dir("calc.l")
15 |         .unwrap()
16 |         .build()
17 |         .unwrap();
18 | }
19 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/build.rs:
--------------------------------------------------------------------------------
 1 | #![deny(rust_2018_idioms)]
 2 | use lrlex::CTLexerBuilder;
 3 | 
 4 | fn main() {
 5 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 6 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 7 |     CTLexerBuilder::new()
 8 |         .rust_edition(lrlex::RustEdition::Rust2021)
 9 |         .lrpar_config(|ctp| {
10 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
11 |                 .grammar_in_src_dir("calc.y")
12 |                 .unwrap()
13 |         })
14 |         .lexer_in_src_dir("calc.l")
15 |         .unwrap()
16 |         .build()
17 |         .unwrap();
18 | }
19 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/build.rs:
--------------------------------------------------------------------------------
 1 | #![deny(rust_2018_idioms)]
 2 | use lrlex::CTLexerBuilder;
 3 | 
 4 | fn main() {
 5 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 6 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 7 |     CTLexerBuilder::new()
 8 |         .rust_edition(lrlex::RustEdition::Rust2021)
 9 |         .lrpar_config(|ctp| {
10 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
11 |                 .grammar_in_src_dir("calc.y")
12 |                 .unwrap()
13 |         })
14 |         .lexer_in_src_dir("calc.l")
15 |         .unwrap()
16 |         .build()
17 |         .unwrap();
18 | }
19 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/build.rs:
--------------------------------------------------------------------------------
 1 | #![deny(rust_2018_idioms)]
 2 | use lrlex::CTLexerBuilder;
 3 | 
 4 | fn main() {
 5 |     // Since we're using both lrlex and lrpar, we use lrlex's `lrpar_config` convenience function
 6 |     // that makes it easy to a) create a lexer and parser and b) link them together.
 7 |     CTLexerBuilder::new()
 8 |         .rust_edition(lrlex::RustEdition::Rust2021)
 9 |         .lrpar_config(|ctp| {
10 |             ctp.rust_edition(lrpar::RustEdition::Rust2021)
11 |                 .grammar_in_src_dir("param.y")
12 |                 .unwrap()
13 |         })
14 |         .lexer_in_src_dir("param.l")
15 |         .unwrap()
16 |         .build()
17 |         .unwrap();
18 | }
19 | 


--------------------------------------------------------------------------------
/lrlex/examples/calc_manual_lex/build.rs:
--------------------------------------------------------------------------------
 1 | use lrlex::{CTTokenMapBuilder, DefaultLexerTypes};
 2 | use lrpar::CTParserBuilder;
 3 | 
 4 | // Some of the token names in the parser do not lead to valid Rust identifiers, so we map them to
 5 | // valid identifier names here.
 6 | const TOKENS_MAP: &[(&str, &str)] = &[
 7 |     ("+", "PLUS"),
 8 |     ("*", "STAR"),
 9 |     ("(", "LBRACK"),
10 |     (")", "RBRACK"),
11 | ];
12 | 
13 | fn main() {
14 |     let ctp = CTParserBuilder::<DefaultLexerTypes<u8>>::new()
15 |         .grammar_in_src_dir("calc.y")
16 |         .unwrap()
17 |         .build()
18 |         .unwrap();
19 |     CTTokenMapBuilder::<u8>::new("token_map", ctp.token_map())
20 |         .rename_map(Some(TOKENS_MAP))
21 |         .build()
22 |         .unwrap();
23 | }
24 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/parseparam_copy.test:
--------------------------------------------------------------------------------
 1 | name: Test %parse-param copy
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start S
 5 |     %parse-param p: u64
 6 |     %%
 7 |     S -> u64:
 8 |         // Previously %parse-param required a `Copy` bounds.
 9 |         // Since then we relaxed the bounds to require `Clone`.
10 |         // This tests backwards compatibility of actions that
11 |         // rely on the older copy bounds.
12 |         'INT' {
13 |             #[allow(clippy::redundant_closure_call)]
14 |             (move |_| {})(p);
15 |             check_copy(p);
16 |             p + $lexer.span_str($1.unwrap().span()).parse::<u64>().unwrap()
17 |         }
18 |     ;
19 |     %%
20 |     fn check_copy<T: Copy>(_: T){}
21 | lexer: |
22 |     %%
23 |     [0-9]+ 'INT'
24 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/README.md:
--------------------------------------------------------------------------------
 1 | # `clone_param`
 2 | 
 3 | ## Description
 4 | Example which shows how to use interior mutability with the `%parse-param` directive.
 5 | As a parameter the parse function accepts a `Rc<RefCell<u64>>`.
 6 | 
 7 | ## Input 
 8 | For input the parser accepts a positive or negative integer e.g. `-1`, `42`, etc followed
 9 | by any sequence of `+`, or `-` characters. Except for the initial `-` on a negative integer,
10 | `+` or `-` are treated as `Increment` and `Decrement` operators.
11 | 
12 | ## Evaluation
13 | Rather than building an AST, the param is directly mutated by the actions.
14 | As such an input sequence like `-3++-` will evalute to `-2`.
15 | 
16 | ## Example 
17 | ```
18 | >>> -3++-
19 | Evaluated: RefCell { value: -2 }
20 | ```
21 | 


--------------------------------------------------------------------------------
/doc/src/lrtable.md:
--------------------------------------------------------------------------------
 1 | # `lrtable`
 2 | 
 3 | `lrtable` ([crate](https://crates.io/crates/lrtable);
 4 | [source](https://github.com/softdevteam/grmtools/tree/master/lrtable)) takes in
 5 | grammars from [`cfgrammar`](cfgrammar.html) and creates LR state tables from
 6 | them. Few users will be interested in its functionality directly, except those
 7 | doing advanced forms of grammar analysis.
 8 | 
 9 | One, admittedly fairly advanced, aspect worth noting is that
10 | `lrtable` uses [Pager's
11 | algorithm](https://link.springer.com/article/10.1007/BF00290336) to compress the
12 | resulting LR state tables. In rare cases this can provide surprising results:
13 | see [Denny and Malloy's
14 | paper](https://www.sciencedirect.com/science/article/pii/S0167642309001191) for
15 | more.
16 | 


--------------------------------------------------------------------------------
/doc/src/cfgrammar.md:
--------------------------------------------------------------------------------
 1 | # `cfgrammar`
 2 | 
 3 | `cfgrammar` ([crate](https://crates.io/crates/cfgrammar);
 4 | [source](https://github.com/softdevteam/grmtools/tree/master/cfgrammar)) reads
 5 | in grammar files, processes them, and provides a convenient API for operating
 6 | with them. Most users only need to think about `cfgrammar` to the
 7 | extent that they are required to use it to specify what Yacc variant they wish
 8 | to use.
 9 | 
10 | `cfgrammar` may also be of interest to those manipulating grammars directly, or
11 | who wish to use custom types of parsers. Note that `cfgrammar`'s API should be
12 | considered semi-stable at best. As the needs of other parts of grmtools change,
13 | `cfgrammar` tends to have to change too. Since it is unlikely to have few direct
14 | users, the consequences of changing the API are relatively slight.
15 | 


--------------------------------------------------------------------------------
/cfgrammar/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cfgrammar"
 3 | description = "Grammar manipulation"
 4 | repository = "https://github.com/softdevteam/grmtools"
 5 | version = "0.14.0"
 6 | edition = "2024"
 7 | readme = "README.md"
 8 | license = "Apache-2.0/MIT"
 9 | categories = ["parsing"]
10 | keywords = ["yacc", "grammar"]
11 | 
12 | [features]
13 | serde = ["dep:serde", "serde/derive", "vob/serde"]
14 | bincode = ["dep:bincode", "vob/bincode"]
15 | 
16 | [lib]
17 | name = "cfgrammar"
18 | path = "src/lib/mod.rs"
19 | 
20 | [dependencies]
21 | bincode = { workspace = true, optional = true, features = ["derive"] }
22 | indexmap.workspace = true
23 | num-traits.workspace = true
24 | regex.workspace = true
25 | serde = { workspace = true, optional = true }
26 | vob = { workspace = true }
27 | quote.workspace = true
28 | proc-macro2.workspace = true
29 | 


--------------------------------------------------------------------------------
/doc/src/lexing.md:
--------------------------------------------------------------------------------
 1 | # Lexing
 2 | 
 3 | Lexing is the act of taking in an input stream and splitting it into lexemes. 
 4 | Colloquially, lexing is often described as splitting input into words. In
 5 | `grmtools`, a Lexeme has a type (e.g. "INT", "ID"), a value (e.g. "23",
 6 | "xyz"), and knows which part of the user's input matched (e.g. "the input
 7 | starting at index 7 to index 10"). There is also a simple mechanism to
 8 | differentiate lexemes of zero length (e.g. `DEDENT` tokens in Python) from
 9 | lexemes inserted by [error recovery](errorrecovery.md).
10 | 
11 | `lrpar` provides a generic lexing interface to which any lexer can plug into.
12 | Many easy lexing tasks can more easily be carried out by [`lrlex`](lrlex.md), a
13 | `lex` replacement. `lrlex` also provides helper functions which make it [easier
14 | to hand-write lexers](manuallexers.md).
15 | 


--------------------------------------------------------------------------------
/lrtable/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "lrtable"
 3 | description = "LR grammar table generation"
 4 | repository = "https://github.com/softdevteam/grmtools"
 5 | version = "0.14.0"
 6 | edition = "2024"
 7 | readme = "README.md"
 8 | license = "Apache-2.0/MIT"
 9 | categories = ["parsing"]
10 | 
11 | [features]
12 | bincode = ["dep:bincode", "sparsevec/bincode", "cfgrammar/bincode"]
13 | serde = ["dep:serde", "sparsevec/serde", "cfgrammar/serde"]
14 | 
15 | [lib]
16 | name = "lrtable"
17 | path = "src/lib/mod.rs"
18 | 
19 | [dependencies]
20 | cfgrammar = { path="../cfgrammar", version = "0.14" }
21 | 
22 | bincode = { workspace = true, features = ["derive"], optional = true }
23 | fnv.workspace = true
24 | num-traits.workspace = true
25 | serde = { workspace = true, features = ["derive"], optional = true }
26 | vob.workspace = true
27 | sparsevec.workspace = true
28 | 


--------------------------------------------------------------------------------
/doc/src/parsing.md:
--------------------------------------------------------------------------------
 1 | # Parsing
 2 | 
 3 | Parsing is the act of checking whether a stream of lexemes match a grammar.
 4 | Since a simple "yes/no" answer is rarely useful, it is common to execute
 5 | user-defined *actions* during parsing.
 6 | 
 7 | `grmtools` contains libraries ([`cfgrammar`](cfgrammar.md) and
 8 | [`lrtable`](lrtable.md)) which allow users to build their own LR parsers in
 9 | whatever fashion they want. However, for 99% of cases, the [`lrpar`](lrpar.md)
10 | library is what users want and need: a (largely) Yacc-compatible parser. Roughly
11 | speaking, the core parts of grammars work identically in Yacc and `lrpar`, but
12 | some other parts of the system have been modernised (e.g. to avoid the use of
13 | global variables) and given a more idiomatic Rust feel. Notably, `lrpar` is
14 | built from the ground-up to have a powerful, flexible approach to [error
15 | recovery](errorrecovery.md).
16 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/src/param.y:
--------------------------------------------------------------------------------
 1 | %grmtools {
 2 |     yacckind: Grmtools,
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %expect-unused Unmatched "UNMATCHED"
 6 | %token Incr Decr
 7 | %parse-param val: Rc<RefCell<i64>>
 8 | %%
 9 | Expr -> () : "INT" Ops {
10 |     *val.borrow_mut() += parse_int($lexer.span_str($1.map_err(|_| "<evaluation aborted>").unwrap().span())).unwrap()
11 | };
12 | Ops -> (): 
13 |     %empty {}
14 |   | Ops Incr { *val.borrow_mut() += 1; }
15 |   | Ops Decr { *val.borrow_mut() -= 1; }
16 |   ;
17 | Unmatched -> ():
18 |     "UNMATCHED" { }
19 |   ;
20 | %%
21 | use std::{ rc::Rc, cell::RefCell, error::Error };
22 | 
23 | fn parse_int(s: &str) -> Result<i64, Box<dyn Error>> {
24 |     match s.parse::<i64>() {
25 |         Ok(val) => Ok(val),
26 |         Err(_) => {
27 |             Err(Box::from(format!("{} cannot be represented as a i64", s)))
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/doc/src/yaccextensions.md:
--------------------------------------------------------------------------------
 1 | # Yacc Extensions
 2 | 
 3 | At the beginning of a `.y` file is a `%grmtools{}` section, by default this section is required.
 4 | But a default can be set or forced by using a `YaccKindResolver`.
 5 | 
 6 | | Flag             | Value                                           | Required     |
 7 | |------------------|-------------------------------------------------|--------------|
 8 | | `yacckind`       |  [YaccKind](yacccompatibility.md#yacckinds)     | &checkmark;  |
 9 | | `recoverykind`   |  [RecoveryKind](errorrecovery.md#recoverykinds) | &cross;      |
10 | | `test_files`[^†] |  Array of string values                         | &cross;      |
11 | 
12 | [^†]: Strings containing globs are resolved relative to the yacc `.y` source file.
13 |       `test_files` is currently experimental.
14 | 
15 | ## Example
16 | 
17 | ```
18 | %grmtools{yacckind: Grmtools}
19 | %%
20 | Start: ;
21 | ```
22 | 


--------------------------------------------------------------------------------
/doc/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | - [grmtools](README.md)
 4 | - [Quickstart Guide](quickstart.md)
 5 | - [Lexing](lexing.md)
 6 |   - [Lex compatibility](lexcompatibility.md)
 7 |     - [Extensions](lexextensions.md)
 8 |   - [Hand-written lexers](manuallexer.md)
 9 |   - [Start States](start_states.md)
10 | - [Parsing](parsing.md)
11 |   - [Yacc compatibility](yacccompatibility.md)
12 |     - [Extensions](yaccextensions.md)
13 |   - [Return types and action code](actioncode.md)
14 |   - [grmtools parsing idioms](parsing_idioms.md)
15 |   - [Error recovery](errorrecovery.md)
16 |   - [An AST evaluator](ast_example.md)
17 | - [Rust Editions](editions.md)
18 | - [The individual libraries and tools](libsandtools.md)
19 |   - [lrpar](lrpar.md)
20 |   - [lrlex](lrlex.md)
21 |   - [nimbleparse](nimbleparse.md)
22 |   - [cfgrammar](cfgrammar.md)
23 |   - [lrtable](lrtable.md)
24 |   - [third party](thirdparty.md)
25 | - [Other Rust parsing tools](othertools.md)
26 | 


--------------------------------------------------------------------------------
/lrlex/README.md:
--------------------------------------------------------------------------------
 1 | # `lrlex`
 2 | 
 3 | `lrlex` is a partial replacement for
 4 | [`lex`](http://dinosaur.compilertools.net/lex/index.html) /
 5 | [`flex`](https://westes.github.io/flex/manual/). It takes an input string and
 6 | splits it into *lexemes* based on a `.l` file. Unfortunately, many real-world
 7 | languages have corner cases which exceed the power that `lrlex` can provide.
 8 | However, when it is suitable, it is a very convenient way of expressing lexing.
 9 | 
10 | `lrlex` also has a simple command-line interface, allowing you to check whether
11 | your lexing rules are working as expected:
12 | 
13 | ```ignore
14 | $ cat C.java
15 | class C {
16 |     int x = 0;
17 | }
18 | $ cargo run --lrlex java.l /tmp/C.java
19 |     Finished dev [unoptimized + debuginfo] target(s) in 0.18s
20 |      Running `target/debug/lrlex ../grammars/java7/java.l /tmp/C.java`
21 | CLASS class
22 | IDENTIFIER C
23 | LBRACE {
24 | INT int
25 | IDENTIFIER x
26 | EQ =
27 | INTEGER_LITERAL 0
28 | SEMICOLON ;
29 | RBRACE }
30 | ```
31 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Except as otherwise noted (below and/or in individual files), this project is
 2 | licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
 3 | <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT>
 4 | <http://opensource.org/licenses/MIT>, at your option.
 5 | 
 6 | Copyright is retained by contributors and/or the organisations they
 7 | represent(ed) -- this project does not require copyright assignment. Please see
 8 | version control history for a full list of contributors. Note that some files
 9 | may include explicit copyright and/or licensing notices.
10 | 
11 | The following contributors wish to explicitly make it known that the copyright
12 | of their contributions is retained by an organisation:
13 | 
14 |     Lukas Diekmann <lukas.diekmann@gmail.com>: copyright retained by
15 |       King's College London
16 |     Sarah Mount <sarah.mount@kcl.ac.uk>: copyright retained by
17 |       King's College London
18 |     Laurence Tratt <laurie@tratt.net>: copyright retained by
19 |       King's College London
20 | 


--------------------------------------------------------------------------------
/lrlex/examples/calclex/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, BufRead, Write};
 2 | 
 3 | use lrlex::lrlex_mod;
 4 | use lrpar::Lexer;
 5 | 
 6 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
 7 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
 8 | lrlex_mod!("calc.l");
 9 | 
10 | fn main() {
11 |     // Get the `LexerDef` for the `calc` language.
12 |     let lexerdef = calc_l::lexerdef();
13 |     let stdin = io::stdin();
14 |     loop {
15 |         print!(">>> ");
16 |         io::stdout().flush().ok();
17 |         match stdin.lock().lines().next() {
18 |             Some(Ok(ref l)) => {
19 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
20 |                 // Note that each lexer can only lex one input in its lifetime.
21 |                 let lexer = lexerdef.lexer(l);
22 |                 println!("{:?}", lexer.iter().collect::<Vec<_>>());
23 |             }
24 |             _ => break,
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/lrpar/cttests/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "lrpar-tests"
 3 | version = "0.1.0"
 4 | authors = ["test"]
 5 | edition = "2024"
 6 | license = "Apache-2.0/MIT"
 7 | build = "build.rs"
 8 | 
 9 | [lib]
10 | crate-type = ["cdylib"]
11 | 
12 | [build-dependencies]
13 | cfgrammar = { path = "../../cfgrammar" }
14 | lrlex = { path = "../../lrlex" }
15 | lrpar = { path = "../", features = ["_unstable_api"] }
16 | glob.workspace = true
17 | yaml-rust2.workspace = true
18 | cfg_aliases = "0.2.1"
19 | 
20 | [dependencies]
21 | cfgrammar = { path = "../../cfgrammar" }
22 | glob.workspace = true
23 | lrlex = { path = "../../lrlex" }
24 | lrpar = { path = "../" }
25 | yaml-rust2.workspace = true
26 | 
27 | [dev-dependencies]
28 | cttests_macro = { path = "../cttests_macro" }
29 | 
30 | [target.'cfg(all(target_arch = "wasm32", target_os="unknown", target_vendor="unknown"))'.dependencies]
31 | wasm-bindgen = {version = "0.2.100", default-features = false}
32 | 
33 | [target.'cfg(all(target_arch = "wasm32", target_os="unknown", target_vendor="unknown"))'.dev-dependencies]
34 | wasm-bindgen-test = "0.3.50"
35 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 2 | this software and associated documentation files (the "Software"), to deal in
 3 | the Software without restriction, including without limitation the rights to
 4 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 5 | of the Software, and to permit persons to whom the Software is furnished to do
 6 | so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all
 9 | copies or substantial portions of the Software.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
18 | 


--------------------------------------------------------------------------------
/lrlex/examples/calc_manual_lex/src/calc.y:
--------------------------------------------------------------------------------
 1 | %grmtools{yacckind: Grmtools}
 2 | %start Expr
 3 | %avoid_insert "INT"
 4 | %expect-unused Unmatched "UNMATCHED"
 5 | %%
 6 | Expr -> Result<Expr, ()>:
 7 |       Expr '+' Term {
 8 |         Ok(Expr::Add{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) })
 9 |       }
10 |     | Term { $1 }
11 |     ;
12 | 
13 | Term -> Result<Expr, ()>:
14 |       Term '*' Factor {
15 |         Ok(Expr::Mul{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) })
16 |       }
17 |     | Factor { $1 }
18 |     ;
19 | 
20 | Factor -> Result<Expr, ()>:
21 |       '(' Expr ')' { $2 }
22 |     | 'INT' { Ok(Expr::Number{ span: $span }) }
23 |     ;
24 | 
25 | Unmatched -> ():
26 |       "UNMATCHED" { }
27 |     ;
28 | %%
29 | 
30 | use cfgrammar::Span;
31 | 
32 | #[derive(Debug)]
33 | pub enum Expr {
34 |     Add {
35 |         span: Span,
36 |         lhs: Box<Expr>,
37 |         rhs: Box<Expr>,
38 |     },
39 |     Mul {
40 |         span: Span,
41 |         lhs: Box<Expr>,
42 |         rhs: Box<Expr>,
43 |     },
44 |     Number {
45 |         span: Span
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/doc/src/editions.md:
--------------------------------------------------------------------------------
 1 | # Rust Editions
 2 | 
 3 | The [edition](https://doc.rust-lang.org/edition-guide/rust-2021/index.html)
 4 | of rust used by `grmtools` updates as the rust language evolves. We try to
 5 | keep code generated by `CTParserBuilder` and `CTLexerBuilder` building with
 6 | older versions of rust, so that downstream users can use the edition that
 7 | suits their requirements.
 8 | 
 9 | ## Controlling edition used during code generation
10 | 
11 | `CTLexerBuilder` and `CTParserBuilder` both have functions, `rust_edition()`
12 | that accept a `lrpar::RustEdition` and `lrlex::RustEdition` respectively.
13 | 
14 | ## Known edition incompatibility in the book
15 | 
16 | While there is a preference for keeping the code in this manual working with all
17 | editions, exceptions may be made when for clarity.
18 | 
19 | * In [An AST evaluator](ast_example.md), with the rust_2018_idioms lint deprecates
20 | some behavior which was previously accepted by the 2015 edition. The `eval` function has
21 | an elided lifetime that must be given explicitly as `lexer: &dyn NonStreamingLexer<'_, DefaultLexeme, u32>`.


--------------------------------------------------------------------------------
/lrlex/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "lrlex"
 3 | description = "Simple lexer generator"
 4 | repository = "https://github.com/softdevteam/grmtools"
 5 | version = "0.14.0"
 6 | edition = "2024"
 7 | readme = "README.md"
 8 | license = "Apache-2.0/MIT"
 9 | categories = ["parsing"]
10 | 
11 | [[bin]]
12 | doc = false
13 | name = "lrlex"
14 | 
15 | [lib]
16 | name = "lrlex"
17 | path = "src/lib/mod.rs"
18 | 
19 | [features]
20 | _unstable_api = []
21 | _unsealed_unstable_traits = ["_unstable_api"]
22 | 
23 | [build-dependencies]
24 | vergen = { version = "8", default-features = false, features = ["build"] }
25 | 
26 | [dependencies]
27 | cfgrammar = { path = "../cfgrammar", version = "0.14" }
28 | lrpar = { path = "../lrpar", version = "0.14" }
29 | 
30 | getopts.workspace = true
31 | regex.workspace = true
32 | regex-syntax.workspace = true
33 | num-traits.workspace = true
34 | proc-macro2.workspace = true
35 | quote.workspace = true
36 | bincode.workspace = true
37 | serde = { workspace = true, optional = true }
38 | prettyplease.workspace = true
39 | syn.workspace = true
40 | glob.workspace = true
41 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/src/calc.y:
--------------------------------------------------------------------------------
 1 | %grmtools {
 2 |     yacckind: Grmtools,
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %start Expr
 6 | %avoid_insert "INT"
 7 | %expect-unused Unmatched "UNMATCHED"
 8 | %%
 9 | Expr -> Result<Expr, ()>:
10 |       Expr '+' Term {
11 |         Ok(Expr::Add{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) })
12 |       }
13 |     | Term { $1 }
14 |     ;
15 | 
16 | Term -> Result<Expr, ()>:
17 |       Term '*' Factor {
18 |         Ok(Expr::Mul{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) })
19 |       }
20 |     | Factor { $1 }
21 |     ;
22 | 
23 | Factor -> Result<Expr, ()>:
24 |       '(' Expr ')' { $2 }
25 |     | 'INT' { Ok(Expr::Number{ span: $span }) }
26 |     ;
27 | 
28 | Unmatched -> ():
29 |       "UNMATCHED" { }
30 |     ;
31 | %%
32 | 
33 | use cfgrammar::Span;
34 | 
35 | #[derive(Debug)]
36 | pub enum Expr {
37 |     Add {
38 |         span: Span,
39 |         lhs: Box<Expr>,
40 |         rhs: Box<Expr>,
41 |     },
42 |     Mul {
43 |         span: Span,
44 |         lhs: Box<Expr>,
45 |         rhs: Box<Expr>,
46 |     },
47 |     Number {
48 |         span: Span
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/doc/src/lrlex.md:
--------------------------------------------------------------------------------
 1 | # `lrlex`
 2 | 
 3 | `lrlex` ([crate](https://crates.io/crates/lrlex);
 4 | [source](https://github.com/softdevteam/grmtools/tree/master/lrlex)) is a
 5 | partial replacement for [`lex`](https://web.archive.org/web/20220402195947/dinosaur.compilertools.net/lex/index.html) /
 6 | [`flex`](https://westes.github.io/flex/manual/). It takes an input string and
 7 | splits it into *lexemes* based on a `.l` file. Unfortunately, many real-world
 8 | languages have corner cases which exceed the power that `lrlex` can provide.
 9 | However, when it is suitable, it is a very convenient way of expressing lexing.
10 | 
11 | `lrlex` also has a simple command-line interface, allowing you to check whether
12 | your lexing rules are working as expected:
13 | 
14 | ```
15 | $ cat C.java
16 | class C {
17 |     int x = 0;
18 | }
19 | $ cargo run --lrlex java.l /tmp/C.java
20 |     Finished dev [unoptimized + debuginfo] target(s) in 0.18s
21 |      Running `target/debug/lrlex ../grammars/java7/java.l /tmp/C.java`
22 | CLASS class
23 | IDENTIFIER C
24 | LBRACE {
25 | INT int
26 | IDENTIFIER x
27 | EQ =
28 | INTEGER_LITERAL 0
29 | SEMICOLON ;
30 | RBRACE }
31 | ```
32 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members=[
 3 |     "cfgrammar",
 4 |     "lrlex",
 5 |     "lrlex/examples/calclex",
 6 |     "lrlex/examples/calc_manual_lex",
 7 |     "lrpar",
 8 |     "lrpar/cttests",
 9 |     "lrpar/cttests_macro",
10 |     "lrpar/examples/calc_actions",
11 |     "lrpar/examples/calc_ast",
12 |     "lrpar/examples/calc_parsetree",
13 |     "lrpar/examples/calc_ast_arena",
14 |     "lrpar/examples/start_states",
15 |     "lrpar/examples/clone_param",
16 |     "lrtable",
17 |     "nimbleparse",
18 | ]
19 | resolver = "2"
20 | 
21 | [profile.release]
22 | opt-level = 3
23 | debug = false
24 | rpath = false
25 | lto = true
26 | debug-assertions = false
27 | codegen-units = 1
28 | panic = 'unwind'
29 | 
30 | [workspace.dependencies]
31 | bincode = "2.0"
32 | cactus = "1.0"
33 | filetime = "0.2"
34 | fnv = "1.0"
35 | getopts = "0.2"
36 | glob = "0.3"
37 | indexmap = "2"
38 | num-traits = "0.2"
39 | packedvec = "1.2"
40 | quote = "1.0"
41 | regex = "1.3"
42 | regex-syntax = "0.8"
43 | serde = "1.0"
44 | sparsevec = "0.2.2"
45 | unicode-width = "0.1.11"
46 | vob = "3.0.4"
47 | proc-macro2 = "1.0"
48 | prettyplease = "0.2.31"
49 | syn = "2.0"
50 | yaml-rust2 = "0.10.1"
51 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/src/calc.y:
--------------------------------------------------------------------------------
 1 | %grmtools {
 2 |     yacckind: Grmtools,
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %start Expr
 6 | %avoid_insert "INT"
 7 | %%
 8 | Expr -> Result<u64, Box<dyn Error>>:
 9 |       Expr '+' Term {
10 |         $1?.checked_add($3?)
11 |               .ok_or_else(|| Box::<dyn Error>::from("Overflow detected."))
12 |       }
13 |     | Term { $1 }
14 |     ;
15 | 
16 | Term -> Result<u64, Box<dyn Error>>:
17 |       Term '*' Factor {
18 |         $1?.checked_mul($3?)
19 |               .ok_or_else(|| Box::<dyn Error>::from("Overflow detected."))
20 |       }
21 |     | Factor { $1 }
22 |     ;
23 | 
24 | Factor -> Result<u64, Box<dyn Error>>:
25 |       '(' Expr ')' { $2 }
26 |     | 'INT' {
27 |         parse_int($lexer.span_str($1.map_err(|_| "<evaluation aborted>")?.span()))
28 |       }
29 |     ;
30 | %%
31 | // Any imports here are in scope for all the grammar actions above.
32 | 
33 | use std::error::Error;
34 | 
35 | fn parse_int(s: &str) -> Result<u64, Box<dyn Error>> {
36 |     match s.parse::<u64>() {
37 |         Ok(val) => Ok(val),
38 |         Err(_) => {
39 |             Err(Box::from(format!("{} cannot be represented as a u64", s)))
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_nodefault_yacckind.test:
--------------------------------------------------------------------------------
 1 | name: Test specification of yacckind in %grmtools section
 2 | grammar: |
 3 |     %grmtools {yacckind: Original(UserAction)}
 4 |     %start Expr
 5 |     %actiontype Result<u64, ()>
 6 |     %avoid_insert 'INT'
 7 |     %%
 8 |     Expr: Expr '+' Term { Ok($1? + $3?) }
 9 |         | Term { $1 }
10 |         ;
11 | 
12 |     Term: Term '*' Factor { Ok($1? * $3?) }
13 |         | Factor { $1 }
14 |         ;
15 | 
16 |     Factor: '(' Expr ')' { $2 }
17 |           | 'INT' {
18 |                 let l = $1.map_err(|_| ())?;
19 |                 match $lexer.span_str(l.span()).parse::<u64>() {
20 |                     Ok(v) => Ok(v),
21 |                     Err(_) => {
22 |                         let ((_, col), _) = $lexer.line_col(l.span());
23 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
24 |                                   col,
25 |                                   $lexer.span_str(l.span()));
26 |                         Err(())
27 |                     }
28 |                 }
29 |             }
30 |           ;
31 | 
32 | lexer: |
33 |     %%
34 |     [0-9]+ "INT"
35 |     \+ "+"
36 |     \* "*"
37 |     \( "("
38 |     \) ")"
39 |     [\t ]+ ;
40 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_recoverer_none.test:
--------------------------------------------------------------------------------
 1 | vname: Test %grmtools section RecoveryKind::None
 2 | grammar: |
 3 |     %grmtools {yacckind: Original(UserAction), recoverer: RecoveryKind::None}
 4 |     %start Expr
 5 |     %actiontype Result<u64, ()>
 6 |     %avoid_insert 'INT'
 7 |     %%
 8 |     Expr: Expr '+' Term { Ok($1? + $3?) }
 9 |         | Term { $1 }
10 |         ;
11 | 
12 |     Term: Term '*' Factor { Ok($1? * $3?) }
13 |         | Factor { $1 }
14 |         ;
15 | 
16 |     Factor: '(' Expr ')' { $2 }
17 |           | 'INT' {
18 |                 let l = $1.map_err(|_| ())?;
19 |                 match $lexer.span_str(l.span()).parse::<u64>() {
20 |                     Ok(v) => Ok(v),
21 |                     Err(_) => {
22 |                         let ((_, col), _) = $lexer.line_col(l.span());
23 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
24 |                                   col,
25 |                                   $lexer.span_str(l.span()));
26 |                         Err(())
27 |                     }
28 |                 }
29 |             }
30 |           ;
31 | 
32 | lexer: |
33 |     %%
34 |     [0-9]+ "INT"
35 |     \+ "+"
36 |     \* "*"
37 |     \( "("
38 |     \) ")"
39 |     [\t ]+ ;
40 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_multitypes.test:
--------------------------------------------------------------------------------
 1 | name: Test basic user actions using the calculator grammar (Grmtools yacckind)
 2 | yacckind: Grmtools
 3 | recoverer: RecoveryKind::CPCTPlus
 4 | grammar: |
 5 |     %start Expr
 6 |     %avoid_insert "INT"
 7 |     %%
 8 |     Expr -> Result<u64, ()>:
 9 |           Expr '+' Term { Ok($1? + $3?) }
10 |         | Term { $1 }
11 |         ;
12 | 
13 |     Term -> Result<u64, ()>:
14 |           Term '*' Factor { Ok($1? * $3?) }
15 |         | Factor { $1 }
16 |         ;
17 | 
18 |     Factor -> Result<u64, ()>:
19 |           '(' Expr ')' { $2 }
20 |         | 'INT'
21 |           {
22 |               let v = $1.map_err(|_| ())?;
23 |               parse_int($lexer.span_str(v.span()))
24 |           }
25 |         ;
26 |     %%
27 |     // Any functions here are in scope for all the grammar actions above.
28 | 
29 |     fn parse_int(s: &str) -> Result<u64, ()> {
30 |         match s.parse::<u64>() {
31 |             Ok(val) => Ok(val),
32 |             Err(_) => {
33 |                 eprintln!("{} cannot be represented as a u64", s);
34 |                 Err(())
35 |             }
36 |         }
37 |     }
38 | lexer: |
39 |     %%
40 |     [0-9]+ "INT"
41 |     \+ "+"
42 |     \* "*"
43 |     \( "("
44 |     \) ")"
45 |     [\t ]+ ;
46 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_recoverer_cpctplus.test:
--------------------------------------------------------------------------------
 1 | name: Test multiple values in %grmtools section
 2 | grammar: |
 3 |     %grmtools {yacckind: Original(UserAction), recoverer: RecoveryKind::CPCTPlus}
 4 |     %start Expr
 5 |     %actiontype Result<u64, ()>
 6 |     %avoid_insert 'INT'
 7 |     %%
 8 |     Expr: Expr '+' Term { Ok($1? + $3?) }
 9 |         | Term { $1 }
10 |         ;
11 | 
12 |     Term: Term '*' Factor { Ok($1? * $3?) }
13 |         | Factor { $1 }
14 |         ;
15 | 
16 |     Factor: '(' Expr ')' { $2 }
17 |           | 'INT' {
18 |                 let l = $1.map_err(|_| ())?;
19 |                 match $lexer.span_str(l.span()).parse::<u64>() {
20 |                     Ok(v) => Ok(v),
21 |                     Err(_) => {
22 |                         let ((_, col), _) = $lexer.line_col(l.span());
23 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
24 |                                   col,
25 |                                   $lexer.span_str(l.span()));
26 |                         Err(())
27 |                     }
28 |                 }
29 |             }
30 |           ;
31 | 
32 | lexer: |
33 |     %%
34 |     [0-9]+ "INT"
35 |     \+ "+"
36 |     \* "*"
37 |     \( "("
38 |     \) ")"
39 |     [\t ]+ ;
40 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/span.test:
--------------------------------------------------------------------------------
 1 | name: Test the span variable
 2 | yacckind: Grmtools
 3 | grammar: |
 4 |     %start Expr
 5 |     %avoid_insert "INT"
 6 |     %%
 7 |     Expr -> Vec<::cfgrammar::Span>:
 8 |           Expr '+' Term {
 9 |               let mut spans = $1;
10 |               spans.extend($3);
11 |               spans.push($span);
12 |               spans
13 |           }
14 |         | Term {
15 |               let mut spans = $1;
16 |               spans.push($span);
17 |               spans
18 |           }
19 |         ;
20 | 
21 |     Term -> Vec<::cfgrammar::Span>:
22 |           Term '*' Factor {
23 |               let mut spans = $1;
24 |               spans.extend($3);
25 |               spans.push($span);
26 |               spans
27 |           }
28 |         | Factor {
29 |               let mut spans = $1;
30 |               spans.push($span);
31 |               spans
32 |           }
33 |         ;
34 | 
35 |     Factor -> Vec<::cfgrammar::Span>:
36 |           '(' Expr ')' {
37 |               let mut spans = $2;
38 |               spans.push($span);
39 |               spans
40 |           }
41 |         | 'INT' { vec![$span] }
42 |         ;
43 | lexer: |
44 |     %%
45 |     [0-9]+ "INT"
46 |     \+ "+"
47 |     \* "*"
48 |     \( "("
49 |     \) ")"
50 |     [\t ]+ ;
51 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/src/calc.y:
--------------------------------------------------------------------------------
 1 | %grmtools {
 2 |     yacckind: Grmtools,
 3 |     test_files: ["input*.txt"],
 4 | }
 5 | %start Expr
 6 | %avoid_insert "INT"
 7 | %expect-unused Unmatched "UNMATCHED"
 8 | %parse-generics 'ast
 9 | %parse-param arena: &'ast Bump
10 | %%
11 | Expr -> Result<Expr<'ast>, ()>:
12 |       Expr '+' Term {
13 |         Ok(Expr::Add{ span: $span, lhs: arena.alloc($1?), rhs: arena.alloc($3?) })
14 |       }
15 |     | Term { $1 }
16 |     ;
17 | 
18 | Term -> Result<Expr<'ast>, ()>:
19 |       Term '*' Factor {
20 |         Ok(Expr::Mul{ span: $span, lhs: arena.alloc($1?), rhs: arena.alloc($3?) })
21 |       }
22 |     | Factor { $1 }
23 |     ;
24 | 
25 | Factor -> Result<Expr<'ast>, ()>:
26 |       '(' Expr ')' { $2 }
27 |     | 'INT' { Ok(Expr::Number{ span: $span }) }
28 |     ;
29 | 
30 | Unmatched -> ():
31 |       "UNMATCHED" { }
32 |     ;
33 | %%
34 | 
35 | use cfgrammar::Span;
36 | use bumpalo::Bump;
37 | 
38 | #[derive(Debug)]
39 | pub enum Expr<'ast> {
40 |     Add {
41 |         span: Span,
42 |         lhs: &'ast Expr<'ast>,
43 |         rhs: &'ast Expr<'ast>,
44 |     },
45 |     Mul {
46 |         span: Span,
47 |         lhs: &'ast Expr<'ast>,
48 |         rhs: &'ast Expr<'ast>,
49 |     },
50 |     Number {
51 |         span: Span
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_actiontype.test:
--------------------------------------------------------------------------------
 1 | name: Test basic user actions using the calculator grammar (Original yacckind)
 2 | yacckind: Original(YaccOriginalActionKind::UserAction)
 3 | recoverer: RecoveryKind::None
 4 | grammar: |
 5 |     %start Expr
 6 |     %actiontype Result<u64, ()>
 7 |     %avoid_insert 'INT'
 8 |     %%
 9 |     Expr: Expr '+' Term { Ok($1? + $3?) }
10 |         | Term { $1 }
11 |         ;
12 | 
13 |     Term: Term '*' Factor { Ok($1? * $3?) }
14 |         | Factor { $1 }
15 |         ;
16 | 
17 |     Factor: '(' Expr ')' { $2 }
18 |           | 'INT' {
19 |                 let l = $1.map_err(|_| ())?;
20 |                 match $lexer.span_str(l.span()).parse::<u64>() {
21 |                     Ok(v) => Ok(v),
22 |                     Err(_) => {
23 |                         let ((_, col), _) = $lexer.line_col(l.span());
24 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
25 |                                   col,
26 |                                   $lexer.span_str(l.span()));
27 |                         Err(())
28 |                     }
29 |                 }
30 |             }
31 |           ;
32 | 
33 | lexer: |
34 |     %%
35 |     [0-9]+ "INT"
36 |     \+ "+"
37 |     \* "*"
38 |     \( "("
39 |     \) ")"
40 |     [\t ]+ ;
41 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_unsafeaction.test:
--------------------------------------------------------------------------------
 1 | name: Test unsafe user actions using the calculator grammar
 2 | yacckind: Original(YaccOriginalActionKind::UserAction)
 3 | grammar: |
 4 |     %start Expr
 5 |     %actiontype Result<u64, ()>
 6 |     %avoid_insert 'INT'
 7 |     %%
 8 |     Expr: Expr '+' Term { unsafe { unsafe_ok($1? + $3?) } }
 9 |         | Term { $1 }
10 |         ;
11 | 
12 |     Term: Term '*' Factor { unsafe { unsafe_ok($1? * $3?) } }
13 |         | Factor { $1 }
14 |         ;
15 | 
16 |     Factor: '(' Expr ')' { $2 }
17 |           | 'INT' {
18 |                 let l = $1.map_err(|_| ())?;
19 |                 match $lexer.span_str(l.span()).parse::<u64>() {
20 |                     Ok(v) => unsafe { unsafe_ok(v) },
21 |                     Err(_) => {
22 |                         let ((_, col), _) = $lexer.line_col(l.span());
23 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
24 |                                   col,
25 |                                   $lexer.span_str(l.span()));
26 |                         Err(())
27 |                     }
28 |                 }
29 |             }
30 |           ;
31 |     %%
32 |     // Just check that unsafe blocks work in actions.
33 |     unsafe fn unsafe_ok<T, E>(x:T) -> Result<T, E> {
34 |       Ok(x)
35 |     }
36 | 
37 | lexer: |
38 |     %%
39 |     [0-9]+ "INT"
40 |     \+ "+"
41 |     \* "*"
42 |     \( "("
43 |     \) ")"
44 |     [\t ]+ ;
45 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/ctfails/calc_bad_input.test:
--------------------------------------------------------------------------------
 1 | name: Test calculator with malformed input from %grmtools{test_files}
 2 | grammar: |
 3 |     %grmtools {
 4 |         yacckind: Original(YaccOriginalActionKind::UserAction),
 5 |         recoverer: RecoveryKind::None,
 6 |         test_files: ["*.valid_input", "*.bad_input"]
 7 |     }
 8 |     %start Expr
 9 |     %actiontype Result<u64, ()>
10 |     %avoid_insert 'INT'
11 |     %%
12 |     Expr: Expr '+' Term { Ok($1? + $3?) }
13 |         | Term { $1 }
14 |         ;
15 | 
16 |     Term: Term '*' Factor { Ok($1? * $3?) }
17 |         | Factor { $1 }
18 |         ;
19 | 
20 |     Factor: '(' Expr ')' { $2 }
21 |           | 'INT' {
22 |                 let l = $1.map_err(|_| ())?;
23 |                 match $lexer.span_str(l.span()).parse::<u64>() {
24 |                     Ok(v) => Ok(v),
25 |                     Err(_) => {
26 |                         let ((_, col), _) = $lexer.line_col(l.span());
27 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
28 |                                   col,
29 |                                   $lexer.span_str(l.span()));
30 |                         Err(())
31 |                     }
32 |                 }
33 |             }
34 |           ;
35 | 
36 | lexer: |
37 |     %%
38 |     [0-9]+ "INT"
39 |     \+ "+"
40 |     \* "*"
41 |     \( "("
42 |     \) ")"
43 |     [\t\n ]+ ;
44 | extra_files:
45 |   input1.valid_input: |
46 |     (1 + 2) * 3
47 |   input1.bad_input: |
48 |     (1 + 2 * 3
49 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_input.test:
--------------------------------------------------------------------------------
 1 | name: Test with calculator input from %grmtools{test_files}
 2 | grammar: |
 3 |     %grmtools {
 4 |         yacckind: Original(YaccOriginalActionKind::UserAction),
 5 |         recoverer: RecoveryKind::None,
 6 |         test_files: ["*.calc_input", "*.calc_input2"],
 7 |     }
 8 |     %start Expr
 9 |     %actiontype Result<u64, ()>
10 |     %avoid_insert 'INT'
11 |     %%
12 |     Expr: Expr '+' Term { Ok($1? + $3?) }
13 |         | Term { $1 }
14 |         ;
15 | 
16 |     Term: Term '*' Factor { Ok($1? * $3?) }
17 |         | Factor { $1 }
18 |         ;
19 | 
20 |     Factor: '(' Expr ')' { $2 }
21 |           | 'INT' {
22 |                 let l = $1.map_err(|_| ())?;
23 |                 match $lexer.span_str(l.span()).parse::<u64>() {
24 |                     Ok(v) => Ok(v),
25 |                     Err(_) => {
26 |                         let ((_, col), _) = $lexer.line_col(l.span());
27 |                         eprintln!("Error at column {}: '{}' cannot be represented as a u64",
28 |                                   col,
29 |                                   $lexer.span_str(l.span()));
30 |                         Err(())
31 |                     }
32 |                 }
33 |             }
34 |           ;
35 | 
36 | lexer: |
37 |     %%
38 |     [0-9]+ "INT"
39 |     \+ "+"
40 |     \* "*"
41 |     \( "("
42 |     \) ")"
43 |     [\t\n ]+ ;
44 | extra_files:
45 |   input1.calc_input: |
46 |     1 + 2 * 3
47 |   input2.calc_input: |
48 |     (1 + 2) * 3
49 |   input1.calc_input2: |
50 |     2 * 3
51 | 
52 | 


--------------------------------------------------------------------------------
/lrpar/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "lrpar"
 3 | description = "Yacc-compatible parser generator"
 4 | repository = "https://github.com/softdevteam/grmtools"
 5 | version = "0.14.0"
 6 | edition = "2024"
 7 | readme = "README.md"
 8 | license = "Apache-2.0/MIT"
 9 | build = "build.rs"
10 | categories = ["parsing"]
11 | keywords = ["parser", "LR", "yacc", "grammar"]
12 | 
13 | [lib]
14 | name = "lrpar"
15 | path = "src/lib/mod.rs"
16 | 
17 | [features]
18 | serde = ["dep:serde", "cfgrammar/serde", "lrtable/serde"]
19 | _unstable_api = []
20 | _unsealed_unstable_traits = ["_unstable_api"]
21 | 
22 | [build-dependencies]
23 | vergen = { version = "8", default-features = false, features = ["build"] }
24 | 
25 | [dependencies]
26 | cfgrammar = { path="../cfgrammar", version = "0.14", features = ["bincode"] }
27 | lrtable = { path="../lrtable", version = "0.14", features = ["bincode"] }
28 | 
29 | bincode = { workspace = true, features = ["derive"] }
30 | cactus.workspace = true
31 | filetime.workspace = true
32 | indexmap.workspace = true
33 | num-traits.workspace = true
34 | packedvec.workspace = true
35 | proc-macro2.workspace = true
36 | quote.workspace = true
37 | regex.workspace = true
38 | serde = { workspace = true, features = ["derive"], optional = true }
39 | vob.workspace = true
40 | syn.workspace = true
41 | prettyplease.workspace = true
42 | unicode-width.workspace = true
43 | 
44 | [target.'cfg(target_arch = "wasm32")'.dependencies]
45 | web-time = "1.1.0"
46 | 
47 | [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
48 | tempfile = "3.0"
49 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_wasm.test:
--------------------------------------------------------------------------------
 1 | name: Test running on wasm targets
 2 | grammar: |
 3 |     %grmtools {yacckind: Grmtools}
 4 |     %start Expr
 5 |     %avoid_insert "INT"
 6 |     %expect-unused Unmatched "UNMATCHED"
 7 |     %epp INT "Int"
 8 |     %%
 9 |     Expr -> Result<u64, Box<dyn Error>>:
10 |         Expr '+' Term {
11 |             $1?.checked_add($3?)
12 |                 .ok_or_else(|| Box::<dyn Error>::from("Overflow detected."))
13 |         }
14 |         | Term { $1 }
15 |         ;
16 | 
17 |     Term -> Result<u64, Box<dyn Error>>:
18 |         Term '*' Factor {
19 |             $1?.checked_mul($3?)
20 |                 .ok_or_else(|| Box::<dyn Error>::from("Overflow detected."))
21 |         }
22 |         | Factor { $1 }
23 |         ;
24 | 
25 |     Factor -> Result<u64, Box<dyn Error>>:
26 |         '(' Expr ')' { $2 }
27 |         | 'INT' {
28 |             parse_int($lexer.span_str($1.map_err(|_| "<evaluation aborted>")?.span()))
29 |         }
30 |         ;
31 |     Unmatched -> (): "UNMATCHED" { };
32 |     %%
33 |     // Any imports here are in scope for all the grammar actions above.
34 | 
35 |     use std::error::Error;
36 | 
37 |     fn parse_int(s: &str) -> Result<u64, Box<dyn Error>> {
38 |         match s.parse::<u64>() {
39 |             Ok(val) => Ok(val),
40 |             Err(_) => {
41 |                 Err(Box::from(format!("{} cannot be represented as a u64", s)))
42 |             }
43 |         }
44 |     }
45 | lexer: |
46 |     %%
47 |     [0-9]+ "INT"
48 |     \+ "+"
49 |     \* "*"
50 |     \( "("
51 |     \) ")"
52 |     [\t ]+ ;
53 |     . "UNMATCHED"
54 | 
55 | 


--------------------------------------------------------------------------------
/lrpar/examples/clone_param/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::unnecessary_wraps)]
 2 | 
 3 | use lrlex::lrlex_mod;
 4 | use lrpar::lrpar_mod;
 5 | use std::io::{self, BufRead, Write};
 6 | use std::{cell::RefCell, rc::Rc};
 7 | 
 8 | // Using `lrlex_mod!` brings the lexer for `param.l` into scope. By default the module name will be
 9 | // `param_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
10 | lrlex_mod!("param.l");
11 | // Using `lrpar_mod!` brings the parser for `param.y` into scope. By default the module name will be
12 | // `param_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
13 | lrpar_mod!("param.y");
14 | 
15 | fn main() {
16 |     // Get the `LexerDef` for the `param` language.
17 |     let lexerdef = param_l::lexerdef();
18 |     let stdin = io::stdin();
19 |     loop {
20 |         print!(">>> ");
21 |         io::stdout().flush().ok();
22 |         match stdin.lock().lines().next() {
23 |             Some(Ok(ref l)) => {
24 |                 if l.trim().is_empty() {
25 |                     continue;
26 |                 }
27 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
28 |                 let lexer = lexerdef.lexer(l);
29 |                 let param = Rc::new(RefCell::new(0));
30 |                 // Pass the lexer to the parser and lex and parse the input.
31 |                 let (_opt, errs) = param_y::parse(&lexer, param.clone());
32 |                 for e in errs {
33 |                     println!("{}", e.pp(&lexer, &param_y::token_epp));
34 |                 }
35 |                 println!("Evaluated: {:?}", &param);
36 |             }
37 |             _ => break,
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_actions/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, BufRead, Write};
 2 | 
 3 | use lrlex::lrlex_mod;
 4 | use lrpar::lrpar_mod;
 5 | 
 6 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
 7 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
 8 | lrlex_mod!("calc.l");
 9 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
10 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
11 | lrpar_mod!("calc.y");
12 | 
13 | fn main() {
14 |     // Get the `LexerDef` for the `calc` language.
15 |     let lexerdef = calc_l::lexerdef();
16 |     let stdin = io::stdin();
17 |     loop {
18 |         print!(">>> ");
19 |         io::stdout().flush().ok();
20 |         match stdin.lock().lines().next() {
21 |             Some(Ok(ref l)) => {
22 |                 if l.trim().is_empty() {
23 |                     continue;
24 |                 }
25 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
26 |                 let lexer = lexerdef.lexer(l);
27 |                 // Pass the lexer to the parser and lex and parse the input.
28 |                 let (res, errs) = calc_y::parse(&lexer);
29 |                 for e in errs {
30 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
31 |                 }
32 |                 match res {
33 |                     Some(Ok(r)) => println!("Result: {}", r),
34 |                     Some(Err(e)) => eprintln!("{}", e),
35 |                     _ => eprintln!("Unable to evaluate expression."),
36 |                 }
37 |             }
38 |             _ => break,
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/lrpar/cttests_macro/src/lib.rs:
--------------------------------------------------------------------------------
 1 | extern crate proc_macro;
 2 | use glob::glob;
 3 | use proc_macro::TokenStream;
 4 | use proc_macro2::Span;
 5 | use quote::quote;
 6 | use syn::{Ident, LitStr, parse_macro_input};
 7 | #[proc_macro]
 8 | pub fn generate_codegen_fail_tests(item: TokenStream) -> TokenStream {
 9 |     let mut out = Vec::new();
10 |     let test_glob_str: LitStr = parse_macro_input!(item);
11 |     // Not env!("CARGO_MANIFEST_DIR"), which would be relative to the cttests_macro crate.
12 |     // An absolute path which may contain non-utf8 characters.
13 |     let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
14 |     let cwd = std::env::current_dir().unwrap();
15 |     // We want a relative path to the glob from the working directory
16 |     // such as: lrpar/cttests/ with any potentially non-utf8 leading characters removed.
17 |     let manifest_dir = std::path::Path::new(&manifest_dir)
18 |         .strip_prefix(cwd)
19 |         .unwrap();
20 |     let test_glob_path = manifest_dir.join(test_glob_str.value());
21 |     let test_glob_str = test_glob_path.into_os_string().into_string().unwrap();
22 |     let test_files = glob(&test_glob_str).unwrap();
23 |     for file in test_files {
24 |         let file = file.unwrap();
25 |         // Remove potentially non-utf8 leading characters again.
26 |         // This time relative to the manifest dir e.g. `src/ctfails/foo.test`
27 |         let file = file.as_path().strip_prefix(manifest_dir).unwrap();
28 |         // Need to convert to string, because `PathBuf` lacks
29 |         // an impl for `ToTokens` a bounds given by `quote!`.
30 |         let path = file.display().to_string();
31 |         let stem = file.file_stem().unwrap().to_string_lossy();
32 |         let ident = Ident::new(&format!("codegen_fail_{}", stem), Span::call_site());
33 |         out.push(quote! {
34 |             #[should_panic]
35 |             #[test]
36 |             fn #ident(){
37 |                 run_test_path(#path).unwrap();
38 |             }
39 |         });
40 |     }
41 |     out.into_iter().collect::<proc_macro2::TokenStream>().into()
42 | }
43 | 


--------------------------------------------------------------------------------
/lrpar/src/lib/test_utils.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::len_without_is_empty)]
 2 | #![allow(unused)]
 3 | 
 4 | use std::{error::Error, fmt, hash::Hash};
 5 | 
 6 | use cfgrammar::Span;
 7 | 
 8 | use crate::{LexError, Lexeme, LexerTypes};
 9 | 
10 | type StorageT = u16;
11 | 
12 | #[derive(Debug, Clone)]
13 | pub(crate) struct TestLexerTypes();
14 | 
15 | impl LexerTypes for TestLexerTypes {
16 |     type LexemeT = TestLexeme;
17 |     type StorageT = u16;
18 |     type LexErrorT = TestLexError;
19 | }
20 | 
21 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
22 | pub(crate) struct TestLexeme {
23 |     start: usize,
24 |     len: usize,
25 |     faulty: bool,
26 |     tok_id: u16,
27 | }
28 | 
29 | impl Lexeme<StorageT> for TestLexeme {
30 |     fn new(tok_id: StorageT, start: usize, len: usize) -> Self {
31 |         TestLexeme {
32 |             start,
33 |             len,
34 |             faulty: false,
35 |             tok_id,
36 |         }
37 |     }
38 | 
39 |     fn new_faulty(tok_id: StorageT, start: usize, len: usize) -> Self {
40 |         TestLexeme {
41 |             start,
42 |             len,
43 |             faulty: true,
44 |             tok_id,
45 |         }
46 |     }
47 | 
48 |     fn tok_id(&self) -> StorageT {
49 |         self.tok_id
50 |     }
51 | 
52 |     fn span(&self) -> Span {
53 |         Span::new(self.start, self.start + self.len)
54 |     }
55 | 
56 |     fn faulty(&self) -> bool {
57 |         self.faulty
58 |     }
59 | }
60 | 
61 | impl fmt::Display for TestLexeme {
62 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63 |         write!(
64 |             f,
65 |             "TestLexeme[{}..{}]",
66 |             self.span().start(),
67 |             self.span().end()
68 |         )
69 |     }
70 | }
71 | 
72 | impl Error for TestLexeme {}
73 | 
74 | #[derive(Debug)]
75 | pub(crate) struct TestLexError {}
76 | 
77 | impl LexError for TestLexError {
78 |     fn span(&self) -> Span {
79 |         unreachable!()
80 |     }
81 | }
82 | 
83 | impl Error for TestLexError {}
84 | 
85 | impl fmt::Display for TestLexError {
86 |     fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
87 |         unreachable!();
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/lrlex/src/lib/defaults.rs:
--------------------------------------------------------------------------------
 1 | use std::{cmp, error::Error, fmt, hash::Hash, marker};
 2 | 
 3 | use cfgrammar::Span;
 4 | use lrpar::{Lexeme, LexerTypes};
 5 | use num_traits::{AsPrimitive, PrimInt, Unsigned};
 6 | 
 7 | use crate::LRLexError;
 8 | 
 9 | /// lrlex's standard [LexerTypes] `struct`, provided as a convenience.
10 | #[derive(Debug, Clone)]
11 | pub struct DefaultLexerTypes<T = u32>
12 | where
13 |     T: 'static + fmt::Debug + Hash + PrimInt + Unsigned,
14 |     usize: AsPrimitive<T>,
15 | {
16 |     phantom: std::marker::PhantomData<T>,
17 | }
18 | 
19 | impl<T> LexerTypes for DefaultLexerTypes<T>
20 | where
21 |     usize: AsPrimitive<T>,
22 |     T: 'static + fmt::Debug + Hash + PrimInt + Unsigned,
23 | {
24 |     type LexemeT = DefaultLexeme<T>;
25 |     type StorageT = T;
26 |     type LexErrorT = LRLexError;
27 | }
28 | 
29 | /// lrlex's standard lexeme struct, provided as a convenience.
30 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
31 | pub struct DefaultLexeme<StorageT: fmt::Debug = u32> {
32 |     start: usize,
33 |     len: usize,
34 |     faulty: bool,
35 |     tok_id: StorageT,
36 | }
37 | 
38 | impl<StorageT: Copy + fmt::Debug + Hash + cmp::Eq> Lexeme<StorageT> for DefaultLexeme<StorageT> {
39 |     fn new(tok_id: StorageT, start: usize, len: usize) -> Self {
40 |         DefaultLexeme {
41 |             start,
42 |             len,
43 |             faulty: false,
44 |             tok_id,
45 |         }
46 |     }
47 | 
48 |     fn new_faulty(tok_id: StorageT, start: usize, len: usize) -> Self {
49 |         DefaultLexeme {
50 |             start,
51 |             len,
52 |             faulty: true,
53 |             tok_id,
54 |         }
55 |     }
56 | 
57 |     fn tok_id(&self) -> StorageT {
58 |         self.tok_id
59 |     }
60 | 
61 |     fn span(&self) -> Span {
62 |         Span::new(self.start, self.start + self.len)
63 |     }
64 | 
65 |     fn faulty(&self) -> bool {
66 |         self.faulty
67 |     }
68 | }
69 | 
70 | impl<StorageT: Copy + fmt::Debug + cmp::Eq + Hash + marker::Copy> fmt::Display
71 |     for DefaultLexeme<StorageT>
72 | {
73 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 |         write!(
75 |             f,
76 |             "DefaultLexeme[{}..{}]",
77 |             self.span().start(),
78 |             self.span().end()
79 |         )
80 |     }
81 | }
82 | 
83 | impl<StorageT: Copy + fmt::Debug + cmp::Eq + Hash + marker::Copy> Error
84 |     for DefaultLexeme<StorageT>
85 | {
86 | }
87 | 


--------------------------------------------------------------------------------
/cfgrammar/src/lib/idxnewtype.rs:
--------------------------------------------------------------------------------
 1 | // This macro generates a struct which exposes a u32 API (but which may, internally, use a smaller
 2 | // storage size).
 3 | 
 4 | use std::mem::size_of;
 5 | 
 6 | #[cfg(feature = "bincode")]
 7 | use bincode::{Decode, Encode};
 8 | use num_traits::{PrimInt, Unsigned};
 9 | #[cfg(feature = "serde")]
10 | use serde::{Deserialize, Serialize};
11 | 
12 | macro_rules! IdxNewtype {
13 |     ($(#[$attr:meta])* $n: ident) => {
14 |         $(#[$attr])*
15 |         #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
16 |         #[cfg_attr(feature="serde", derive(Serialize, Deserialize))]
17 |         #[cfg_attr(feature="bincode", derive(Encode, Decode))]
18 |         pub struct $n<T>(pub T);
19 | 
20 |         impl<T: PrimInt + Unsigned> From<$n<T>> for usize {
21 |             fn from($n(st): $n<T>) -> Self {
22 |                 debug_assert!(size_of::<usize>() >= size_of::<T>());
23 |                 num_traits::cast(st).unwrap()
24 |             }
25 |         }
26 | 
27 |         impl<T: PrimInt + Unsigned> From<$n<T>> for u32 {
28 |             fn from($n(st): $n<T>) -> Self {
29 |                 debug_assert!(size_of::<u32>() >= size_of::<T>());
30 |                 num_traits::cast(st).unwrap()
31 |             }
32 |         }
33 | 
34 |         impl<T: PrimInt + Unsigned> $n<T> {
35 |             pub fn as_storaget(&self) -> T {
36 |                 let $n(st) = self;
37 |                 *st
38 |             }
39 |         }
40 |     }
41 | }
42 | 
43 | IdxNewtype!(
44 |     /// A type specifically for rule indices.
45 |     ///
46 |     /// It is guaranteed that `RIdx` can be converted, without loss of precision, to `usize` with
47 |     /// the idiom `usize::from(...)`.
48 |     RIdx
49 | );
50 | IdxNewtype!(
51 |     /// A type specifically for production indices (e.g. a rule `E::=A|B` would
52 |     /// have two productions for the single rule `E`).
53 |     ///
54 |     /// It is guaranteed that `PIdx` can be converted, without loss of precision, to `usize` with
55 |     /// the idiom `usize::from(...)`.
56 |     PIdx
57 | );
58 | IdxNewtype!(
59 |     /// A type specifically for symbol indices (within a production).
60 |     ///
61 |     /// It is guaranteed that `SIdx` can be converted, without loss of precision, to `usize` with
62 |     /// the idiom `usize::from(...)`.
63 |     SIdx
64 | );
65 | IdxNewtype!(
66 |     /// A type specifically for token indices.
67 |     ///
68 |     /// It is guaranteed that `TIdx` can be converted, without loss of precision, to `usize` with
69 |     /// the idiom `usize::from(...)`.
70 |     TIdx
71 | );
72 | 


--------------------------------------------------------------------------------
/lrtable/src/lib/mod.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::cognitive_complexity)]
 2 | #![allow(clippy::too_many_arguments)]
 3 | #![allow(clippy::type_complexity)]
 4 | #![forbid(unsafe_code)]
 5 | #![deny(unreachable_pub)]
 6 | 
 7 | use std::{hash::Hash, mem::size_of};
 8 | 
 9 | #[cfg(feature = "bincode")]
10 | use bincode::{Decode, Encode};
11 | use num_traits::{AsPrimitive, PrimInt, Unsigned};
12 | #[cfg(feature = "serde")]
13 | use serde::{Deserialize, Serialize};
14 | 
15 | mod itemset;
16 | mod pager;
17 | mod stategraph;
18 | pub mod statetable;
19 | 
20 | pub use crate::{
21 |     stategraph::StateGraph,
22 |     statetable::{Action, StateTable, StateTableError, StateTableErrorKind},
23 | };
24 | use cfgrammar::yacc::YaccGrammar;
25 | 
26 | macro_rules! IdxNewtype {
27 |     ($(#[$attr:meta])* $n: ident) => {
28 |         $(#[$attr])*
29 |         #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
30 |         #[cfg_attr(feature="serde", derive(Serialize, Deserialize))]
31 |         #[cfg_attr(feature="bincode", derive(Encode, Decode))]
32 |         pub struct $n<T>(pub T);
33 | 
34 |         impl<T: PrimInt + Unsigned> From<$n<T>> for usize {
35 |             fn from($n(st): $n<T>) -> Self {
36 |                 debug_assert!(size_of::<usize>() >= size_of::<T>());
37 |                 num_traits::cast(st).unwrap()
38 |             }
39 |         }
40 | 
41 |         impl<T: PrimInt + Unsigned> From<$n<T>> for u32 {
42 |             fn from($n(st): $n<T>) -> Self {
43 |                 debug_assert!(size_of::<u32>() >= size_of::<T>());
44 |                 num_traits::cast(st).unwrap()
45 |             }
46 |         }
47 | 
48 |         impl<T: PrimInt + Unsigned> $n<T> {
49 |             pub fn as_storaget(&self) -> T {
50 |                 let $n(st) = self;
51 |                 *st
52 |             }
53 |         }
54 |     }
55 | }
56 | 
57 | IdxNewtype!(
58 |     /// A type specifically for state table indices.
59 |     ///
60 |     /// It is guaranteed that `StIdx` can be converted, without loss of precision, to `usize` with
61 |     /// the idiom `usize::from(...)`.
62 |     StIdx
63 | );
64 | 
65 | #[derive(Clone, Copy)]
66 | pub enum Minimiser {
67 |     Pager,
68 | }
69 | 
70 | pub fn from_yacc<StorageT: 'static + Hash + PrimInt + Unsigned>(
71 |     grm: &YaccGrammar<StorageT>,
72 |     m: Minimiser,
73 | ) -> Result<(StateGraph<StorageT>, StateTable<StorageT>), StateTableError<StorageT>>
74 | where
75 |     usize: AsPrimitive<StorageT>,
76 | {
77 |     match m {
78 |         Minimiser::Pager => {
79 |             let sg = pager::pager_stategraph(grm);
80 |             let st = StateTable::new(grm, &sg)?;
81 |             Ok((sg, st))
82 |         }
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/doc/src/lexextensions.md:
--------------------------------------------------------------------------------
 1 | # Lex extensions
 2 |     
 3 | Flags can be specified at compile time through `LexFlags` or at `.l` file parse time using
 4 | a `%grmtools{ }` section. At compile time these flags can be enabled using
 5 | [`CTLexerBuilder`](https://docs.rs/lrlex/latest/lrlex/struct.CTLexerBuilder.html) methods.
 6 | 
 7 | Flags commonly affect the parsing of the lex file, the interpretation regular expressions,
 8 | and set limits.
 9 | 
10 | Boolean flags are specified by their name, and can be negated by prefixing with `!`
11 | other flags should specify their value immediately after the flag name.
12 | 
13 | 
14 | ## Example
15 | 
16 | ```
17 | %grmtools {
18 |     allow_wholeline_comments,
19 |     !octal,
20 |     size_limit: 1024,
21 | }
22 | %%
23 | . "rule"
24 | ```
25 | 
26 | 
27 | ## List of flags:
28 | 
29 | | Flag                          | Value     | Required | Regex[^regex] |
30 | |-------------------------------|-----------|----------|---------------|
31 | | `lexerkind`                   | [LexerKind](lexcompatibility.md#lexerkinds) | &cross;  | &cross; |
32 | | `posix_escapes`[^†]           | bool      | &cross;  | &cross;       |
33 | | `allow_wholeline_comment`[^‡] | bool      | &cross;  | &cross;       |
34 | | `case_insensitive`            | bool      | &cross;  | &checkmark;   |
35 | | `dot_matches_new_line`        | bool      | &cross;  | &checkmark;   |
36 | | `multi_line`                  | bool      | &cross;  | &checkmark;   |
37 | | `octal`                       | bool      | &cross;  | &checkmark;   |
38 | | `swap_greed`                  | bool      | &cross;  | &checkmark;   |
39 | | `ignore_whitespace`           | bool      | &cross;  | &checkmark;   |
40 | | `unicode`                     | bool      | &cross;  | &checkmark;   |
41 | | `size_limit`                  | usize     | &cross;  | &checkmark;   |
42 | | `dfa_size_limit`              | usize     | &cross;  | &checkmark;   |
43 | | `nest_limit`                  | u32       | &cross;  | &checkmark;   |
44 | 
45 | [^†]: Enable compatibility with posix escape sequences.
46 | [^‡]: Enables rust style `// comments` at the start of lines.
47 | Which requires escaping of `/` when used in a regex.
48 | [^regex]: &checkmark; Flag gets passed directly to `regex::RegexBuilder`.
49 | 
50 | 
51 | ## Flags affecting Posix compatibility
52 | 
53 | As discussed in [Lex compatibility](lexcompatibility.md) the default behaviors of grmtools and rust's regex
54 | library have differed from that of posix lex.
55 | 
56 | The following flags can change the behavior to match posix lex more closely.
57 | 
58 | ```
59 | %grmtools {
60 |     !dot_matches_new_line,
61 |     posix_escapes
62 | }
63 | %%
64 | ...
65 | ```
66 | 


--------------------------------------------------------------------------------
/cfgrammar/src/lib/yacc/mod.rs:
--------------------------------------------------------------------------------
 1 | #![deny(unreachable_pub)]
 2 | 
 3 | pub mod ast;
 4 | pub mod firsts;
 5 | pub mod follows;
 6 | pub mod grammar;
 7 | pub mod parser;
 8 | 
 9 | pub use self::{
10 |     grammar::{AssocKind, Precedence, SentenceGenerator, YaccGrammar},
11 |     parser::{YaccGrammarError, YaccGrammarErrorKind, YaccGrammarWarning, YaccGrammarWarningKind},
12 | };
13 | use proc_macro2::TokenStream;
14 | use quote::quote;
15 | 
16 | #[cfg(feature = "serde")]
17 | use serde::{Deserialize, Serialize};
18 | 
19 | /// The particular Yacc variant this grammar makes use of.
20 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
21 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
22 | #[non_exhaustive]
23 | pub enum YaccKind {
24 |     /// The original Yacc style as documented by
25 |     /// [Johnson](http://dinosaur.compilertools.net/yacc/index.html),
26 |     Original(YaccOriginalActionKind),
27 |     /// Similar to the original Yacc style, but allowing individual rules' actions to have their
28 |     /// own return type.
29 |     Grmtools,
30 |     /// The variant used in the [Eco language composition editor](http://soft-dev.org/src/eco/)
31 |     Eco,
32 | }
33 | 
34 | impl quote::ToTokens for YaccKind {
35 |     fn to_tokens(&self, tokens: &mut TokenStream) {
36 |         tokens.extend(match *self {
37 |             YaccKind::Grmtools => quote!(::cfgrammar::yacc::YaccKind::Grmtools),
38 |             YaccKind::Original(action_kind) => {
39 |                 quote!(::cfgrammar::yacc::YaccKind::Original(#action_kind))
40 |             }
41 |             YaccKind::Eco => quote!(::cfgrammar::yacc::YaccKind::Eco),
42 |         })
43 |     }
44 | }
45 | 
46 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
47 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
48 | pub enum YaccOriginalActionKind {
49 |     /// Execute user-specified actions attached to each production; also requires a %actiontype
50 |     /// declaration.
51 |     UserAction,
52 |     /// Automatically create a parse tree instead of user-specified actions.
53 |     GenericParseTree,
54 |     /// Do not do execute actions of any sort.
55 |     NoAction,
56 | }
57 | 
58 | impl quote::ToTokens for YaccOriginalActionKind {
59 |     fn to_tokens(&self, tokens: &mut TokenStream) {
60 |         tokens.extend(match *self {
61 |             YaccOriginalActionKind::UserAction => {
62 |                 quote!(::cfgrammar::yacc::YaccOriginalActionKind::UserAction)
63 |             }
64 |             YaccOriginalActionKind::GenericParseTree => {
65 |                 quote!(::cfgrammar::yacc::YaccOriginalActionKind::GenericParseTree)
66 |             }
67 |             YaccOriginalActionKind::NoAction => {
68 |                 quote!(::cfgrammar::yacc::YaccOriginalActionKind::NoAction)
69 |             }
70 |         })
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/cfgrammar/src/lib/span.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "bincode")]
 2 | use bincode::{Decode, Encode};
 3 | use proc_macro2::TokenStream;
 4 | use quote::{ToTokens, TokenStreamExt, quote};
 5 | #[cfg(feature = "serde")]
 6 | use serde::{Deserialize, Serialize};
 7 | 
 8 | /// A `Span` records what portion of the user's input something (e.g. a lexeme or production)
 9 | /// references (i.e. the `Span` doesn't hold a reference / copy of the actual input).
10 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
11 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
12 | #[cfg_attr(feature = "bincode", derive(Encode, Decode))]
13 | pub struct Span {
14 |     start: usize,
15 |     end: usize,
16 | }
17 | 
18 | impl Span {
19 |     /// Create a new span starting at byte `start` and ending at byte `end`.
20 |     ///
21 |     /// # Panics
22 |     ///
23 |     /// If `end` is less than `start`.
24 |     pub fn new(start: usize, end: usize) -> Self {
25 |         if end < start {
26 |             panic!("Span starts ({}) after it ends ({})!", start, end);
27 |         }
28 |         Span { start, end }
29 |     }
30 | 
31 |     /// Byte offset of the start of the span.
32 |     pub fn start(&self) -> usize {
33 |         self.start
34 |     }
35 | 
36 |     /// Byte offset of the end of the span.
37 |     pub fn end(&self) -> usize {
38 |         self.end
39 |     }
40 | 
41 |     /// Length in bytes of the span.
42 |     pub fn len(&self) -> usize {
43 |         self.end - self.start
44 |     }
45 | 
46 |     /// Returns `true` if this `Span` covers 0 bytes, or `false` otherwise.
47 |     pub fn is_empty(&self) -> bool {
48 |         self.len() == 0
49 |     }
50 | }
51 | 
52 | /// Implemented for errors and warnings to provide access to their spans.
53 | pub trait Spanned: std::fmt::Display {
54 |     /// Returns the spans associated with the error, always containing at least 1 span.
55 |     ///
56 |     /// Refer to [SpansKind](crate::yacc::parser::SpansKind) via [spanskind](Self::spanskind)
57 |     /// for the meaning and interpretation of spans and their ordering.
58 |     fn spans(&self) -> &[Span];
59 |     /// Returns the `SpansKind` associated with this error.
60 |     fn spanskind(&self) -> crate::yacc::parser::SpansKind;
61 | }
62 | 
63 | impl ToTokens for Span {
64 |     fn to_tokens(&self, tokens: &mut TokenStream) {
65 |         let Span { start, end } = self;
66 |         tokens.append_all(quote! {::cfgrammar::Span::new(#start, #end)});
67 |     }
68 | }
69 | 
70 | /// A possibly inexact location which could either be a `Span`,
71 | /// a command-line option, or some other location described textually.
72 | #[derive(Clone, Debug, Eq, PartialEq)]
73 | pub enum Location {
74 |     Span(Span),
75 |     CommandLine,
76 |     Other(String),
77 | }
78 | 
79 | impl From<Span> for Location {
80 |     fn from(span: Span) -> Location {
81 |         Location::Span(span)
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/calc_wasm.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(wasm32_unknown)]
 2 | use wasm_bindgen::prelude::*;
 3 | 
 4 | use lrlex::lrlex_mod;
 5 | use lrpar::lrpar_mod;
 6 | 
 7 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
 8 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
 9 | lrlex_mod!("calc_wasm.l");
10 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
11 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
12 | lrpar_mod!("calc_wasm.y");
13 | 
14 | #[cfg_attr(wasm32_unknown, wasm_bindgen)]
15 | #[allow(unused)]
16 | pub fn calculate(l: &str) -> Result<u64, String> {
17 |     // Get the `LexerDef` for the `calc` language.
18 |     let lexerdef = calc_wasm_l::lexerdef();
19 |     if l.trim().is_empty() {
20 |         return Err("input is empty".to_string());
21 |     }
22 |     // Now we create a lexer with the `lexer` method with which we can lex an input.
23 |     let lexer = lexerdef.lexer(l);
24 |     // Pass the lexer to the parser and lex and parse the input.
25 |     let (res, errs) = calc_wasm_y::parse(&lexer);
26 |     if !errs.is_empty() {
27 |         let mut ret = String::new();
28 |         for e in errs {
29 |             use lrpar::LexParseError;
30 |             match e {
31 |                 LexParseError::ParseError(e) => {
32 |                     let repairs_flag = !e.repairs().is_empty();
33 |                     ret.push_str(&format!("Error: {}\n Repairs: {}", e, repairs_flag));
34 |                 }
35 |                 e => ret.push_str(&format!("{}\n", e)),
36 |             };
37 |         }
38 |         if let Some(Err(e)) = res {
39 |             ret.push_str(&format!("{}\n", e));
40 |         }
41 |         return Err(ret);
42 |     }
43 |     match res {
44 |         Some(Ok(r)) => Ok(r),
45 |         Some(Err(e)) => Err(e.to_string()),
46 |         None => Err("Unable to parse".to_string()),
47 |     }
48 | }
49 | 
50 | #[cfg(test)]
51 | mod test {
52 |     use super::calculate;
53 |     #[cfg(wasm32_unknown)]
54 |     use wasm_bindgen_test::*;
55 | 
56 |     #[cfg_attr(wasm32_unknown, wasm_bindgen_test)]
57 |     #[test]
58 |     fn test_calc_14() {
59 |         assert_eq!(calculate("2 + 3 * 4").unwrap(), 14);
60 |     }
61 | 
62 |     #[cfg_attr(wasm32_unknown, wasm_bindgen_test)]
63 |     #[test]
64 |     fn test_lex_error() {
65 |         assert!(calculate("#1 + #2").is_err());
66 |     }
67 | 
68 |     #[cfg_attr(wasm32_unknown, wasm_bindgen_test)]
69 |     #[test]
70 |     fn test_recovery() {
71 |         // We really want to test this recovery path, since it contains
72 |         // calls to `Instant::now()` which panics on `std`
73 |         // Thus we need to check that the `web_time` crate is working.
74 |         let x = calculate("1+");
75 |         match x {
76 |             Err(e) => assert!(e.contains("Repairs: true")),
77 |             Ok(e) => panic!("unexpectedly parsed {}", e),
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::unnecessary_wraps)]
 2 | 
 3 | use std::io::{self, BufRead, Write};
 4 | 
 5 | use cfgrammar::Span;
 6 | use lrlex::{DefaultLexerTypes, lrlex_mod};
 7 | use lrpar::{NonStreamingLexer, lrpar_mod};
 8 | 
 9 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
10 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
11 | lrlex_mod!("calc.l");
12 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
13 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
14 | lrpar_mod!("calc.y");
15 | 
16 | use calc_y::Expr;
17 | 
18 | fn main() {
19 |     // Get the `LexerDef` for the `calc` language.
20 |     let lexerdef = calc_l::lexerdef();
21 |     let stdin = io::stdin();
22 |     loop {
23 |         print!(">>> ");
24 |         io::stdout().flush().ok();
25 |         match stdin.lock().lines().next() {
26 |             Some(Ok(ref l)) => {
27 |                 if l.trim().is_empty() {
28 |                     continue;
29 |                 }
30 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
31 |                 let lexer = lexerdef.lexer(l);
32 |                 // Pass the lexer to the parser and lex and parse the input.
33 |                 let (res, errs) = calc_y::parse(&lexer);
34 |                 for e in errs {
35 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
36 |                 }
37 |                 if let Some(Ok(r)) = res {
38 |                     match eval(&lexer, r) {
39 |                         Ok(i) => println!("Result: {}", i),
40 |                         Err((span, msg)) => {
41 |                             let ((line, col), _) = lexer.line_col(span);
42 |                             eprintln!(
43 |                                 "Evaluation error at line {} column {}, '{}' {}.",
44 |                                 line,
45 |                                 col,
46 |                                 lexer.span_str(span),
47 |                                 msg
48 |                             )
49 |                         }
50 |                     }
51 |                 }
52 |             }
53 |             _ => break,
54 |         }
55 |     }
56 | }
57 | 
58 | fn eval(
59 |     lexer: &dyn NonStreamingLexer<DefaultLexerTypes<u32>>,
60 |     e: Expr,
61 | ) -> Result<u64, (Span, &'static str)> {
62 |     match e {
63 |         Expr::Add { span, lhs, rhs } => eval(lexer, *lhs)?
64 |             .checked_add(eval(lexer, *rhs)?)
65 |             .ok_or((span, "overflowed")),
66 |         Expr::Mul { span, lhs, rhs } => eval(lexer, *lhs)?
67 |             .checked_mul(eval(lexer, *rhs)?)
68 |             .ok_or((span, "overflowed")),
69 |         Expr::Number { span } => lexer
70 |             .span_str(span)
71 |             .parse::<u64>()
72 |             .map_err(|_| (span, "cannot be represented as a u64")),
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_ast_arena/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::unnecessary_wraps)]
 2 | 
 3 | use std::io::{self, BufRead, Write};
 4 | 
 5 | use cfgrammar::Span;
 6 | use lrlex::{DefaultLexerTypes, lrlex_mod};
 7 | use lrpar::{NonStreamingLexer, lrpar_mod};
 8 | 
 9 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
10 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
11 | lrlex_mod!("calc.l");
12 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
13 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
14 | lrpar_mod!("calc.y");
15 | 
16 | use calc_y::Expr;
17 | 
18 | fn main() {
19 |     // Get the `LexerDef` for the `calc` language.
20 |     let lexerdef = calc_l::lexerdef();
21 |     let stdin = io::stdin();
22 |     loop {
23 |         print!(">>> ");
24 |         io::stdout().flush().ok();
25 |         match stdin.lock().lines().next() {
26 |             Some(Ok(ref l)) => {
27 |                 if l.trim().is_empty() {
28 |                     continue;
29 |                 }
30 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
31 |                 let lexer = lexerdef.lexer(l);
32 |                 let arena = bumpalo::Bump::new();
33 |                 // Pass the lexer to the parser and lex and parse the input.
34 |                 let (res, errs) = calc_y::parse(&lexer, &arena);
35 |                 for e in errs {
36 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
37 |                 }
38 |                 if let Some(Ok(r)) = res {
39 |                     match eval(&lexer, &r) {
40 |                         Ok(i) => println!("Result: {}", i),
41 |                         Err((span, msg)) => {
42 |                             let ((line, col), _) = lexer.line_col(span);
43 |                             eprintln!(
44 |                                 "Evaluation error at line {} column {}, '{}' {}.",
45 |                                 line,
46 |                                 col,
47 |                                 lexer.span_str(span),
48 |                                 msg
49 |                             )
50 |                         }
51 |                     }
52 |                 }
53 |             }
54 |             _ => break,
55 |         }
56 |     }
57 | }
58 | 
59 | fn eval(
60 |     lexer: &dyn NonStreamingLexer<DefaultLexerTypes<u32>>,
61 |     e: &Expr,
62 | ) -> Result<u64, (Span, &'static str)> {
63 |     match e {
64 |         Expr::Add { span, lhs, rhs } => eval(lexer, lhs)?
65 |             .checked_add(eval(lexer, rhs)?)
66 |             .ok_or((*span, "overflowed")),
67 |         Expr::Mul { span, lhs, rhs } => eval(lexer, lhs)?
68 |             .checked_mul(eval(lexer, rhs)?)
69 |             .ok_or((*span, "overflowed")),
70 |         Expr::Number { span } => lexer
71 |             .span_str(*span)
72 |             .parse::<u64>()
73 |             .map_err(|_| (*span, "cannot be represented as a u64")),
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/lrpar/src/lib/dijkstra.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, hash::Hash};
 2 | 
 3 | use indexmap::{
 4 |     indexmap,
 5 |     map::{Entry, IndexMap},
 6 | };
 7 | 
 8 | /// Starting at `start_node`, return, in arbitrary order, all least-cost success nodes.
 9 | ///
10 | /// * `neighbours` takes a node `n` and returns an iterator consisting of all `n`'s neighbouring
11 | ///   nodes.
12 | /// * `success` takes a node `n` and returns `true` if it is a success node or `false` otherwise.
13 | ///
14 | /// The name of this function isn't entirely accurate: this isn't Dijkstra's original algorithm or
15 | /// one of its well-known variants. However, unlike the astar_all function it doesn't expect a
16 | /// heuristic and it also filters out some duplicates.
17 | pub(crate) fn dijkstra<N, FM, FN, FS>(
18 |     start_node: N,
19 |     neighbours: FN,
20 |     merge: FM,
21 |     success: FS,
22 | ) -> Vec<N>
23 | where
24 |     N: Debug + Clone + Hash + Eq + PartialEq,
25 |     FN: Fn(bool, &N, &mut Vec<(u16, N)>) -> bool,
26 |     FM: Fn(&mut N, N),
27 |     FS: Fn(&N) -> bool,
28 | {
29 |     let mut scs_nodes = Vec::new();
30 |     let mut todo: Vec<IndexMap<N, N>> = vec![indexmap![start_node.clone() => start_node]];
31 |     let mut c: u16 = 0;
32 |     let mut next = Vec::new();
33 |     loop {
34 |         if todo[usize::from(c)].is_empty() {
35 |             c = c.checked_add(1).unwrap();
36 |             if usize::from(c) == todo.len() {
37 |                 return Vec::new();
38 |             }
39 |             continue;
40 |         }
41 | 
42 |         let (_, n) = todo[usize::from(c)].pop().unwrap();
43 |         if success(&n) {
44 |             scs_nodes.push(n);
45 |             break;
46 |         }
47 | 
48 |         if !neighbours(true, &n, &mut next) {
49 |             return Vec::new();
50 |         }
51 |         for (nbr_cost, nbr) in next.drain(..) {
52 |             let off = usize::from(nbr_cost);
53 |             todo.resize(todo.len() + off + 1, IndexMap::new());
54 |             match todo[off].entry(nbr.clone()) {
55 |                 Entry::Vacant(e) => {
56 |                     e.insert(nbr);
57 |                 }
58 |                 Entry::Occupied(mut e) => {
59 |                     merge(e.get_mut(), nbr);
60 |                 }
61 |             }
62 |         }
63 |     }
64 | 
65 |     let mut scs_todo = todo
66 |         .drain(usize::from(c)..usize::from(c) + 1)
67 |         .next()
68 |         .unwrap();
69 |     while let Some((_, n)) = scs_todo.pop() {
70 |         if success(&n) {
71 |             scs_nodes.push(n);
72 |             continue;
73 |         }
74 |         if !neighbours(false, &n, &mut next) {
75 |             return Vec::new();
76 |         }
77 |         for (nbr_cost, nbr) in next.drain(..) {
78 |             if nbr_cost == c {
79 |                 match scs_todo.entry(nbr.clone()) {
80 |                     Entry::Vacant(e) => {
81 |                         e.insert(nbr);
82 |                     }
83 |                     Entry::Occupied(mut e) => {
84 |                         merge(e.get_mut(), nbr);
85 |                     }
86 |                 }
87 |             }
88 |         }
89 |     }
90 | 
91 |     scs_nodes
92 | }
93 | 


--------------------------------------------------------------------------------
/lrpar/examples/start_states/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, BufRead, Write};
 2 | 
 3 | use cfgrammar::RIdx;
 4 | use lrlex::{DefaultLexeme, lrlex_mod};
 5 | use lrpar::{Lexeme, lrpar_mod};
 6 | 
 7 | // Using `lrlex_mod!` brings the lexer for `comment.l` into scope. By default the module name will be
 8 | // `comment_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
 9 | lrlex_mod!("comment.l");
10 | // Using `lrpar_mod!` brings the parser for `comment.y` into scope. By default the module name will be
11 | // `comment_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
12 | lrpar_mod!("comment.y");
13 | 
14 | use comment_y::Node;
15 | 
16 | fn main() {
17 |     // Get the `LexerDef` for the `comment` language.
18 |     let lexerdef = comment_l::lexerdef();
19 |     let stdin = io::stdin();
20 |     loop {
21 |         print!(">>> ");
22 |         io::stdout().flush().ok();
23 |         match stdin.lock().lines().next() {
24 |             Some(Ok(ref l)) => {
25 |                 if l.trim().is_empty() {
26 |                     continue;
27 |                 }
28 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
29 |                 let lexer = lexerdef.lexer(l);
30 |                 // Pass the lexer to the parser and lex and parse the input.
31 |                 let (pt, errs) = comment_y::parse(&lexer);
32 |                 for e in errs {
33 |                     println!("{}", e.pp(&lexer, &comment_y::token_epp));
34 |                 }
35 |                 if let Some(pt) = pt {
36 |                     // Success! We parsed the input and created a parse tree.
37 |                     println!("Result: {}", Eval::new(l).eval(&pt));
38 |                 }
39 |             }
40 |             _ => break,
41 |         }
42 |     }
43 | }
44 | 
45 | struct Eval<'a> {
46 |     s: &'a str,
47 | }
48 | 
49 | impl<'a> Eval<'a> {
50 |     fn new(s: &'a str) -> Self {
51 |         Eval { s }
52 |     }
53 | 
54 |     fn eval(&self, n: &Node<DefaultLexeme<u32>, u32>) -> String {
55 |         match *n {
56 |             Node::Nonterm {
57 |                 ridx: RIdx(ridx),
58 |                 ref nodes,
59 |             } if ridx == comment_y::R_EXPR => {
60 |                 let mut s = String::new();
61 |                 for node in nodes {
62 |                     s.push_str(&self.eval(node));
63 |                 }
64 |                 s
65 |             }
66 |             Node::Nonterm {
67 |                 ridx: RIdx(ridx),
68 |                 ref nodes,
69 |             } if ridx == comment_y::R_TEXT => {
70 |                 if nodes.len() == 1 {
71 |                     if let Node::Term { lexeme } = nodes[0] {
72 |                         self.s[lexeme.span().start()..lexeme.span().end()].to_string()
73 |                     } else {
74 |                         unreachable!();
75 |                     }
76 |                 } else {
77 |                     let mut s = String::new();
78 |                     for node in nodes {
79 |                         s.push_str(&self.eval(node));
80 |                     }
81 |                     s
82 |                 }
83 |             }
84 |             _ => unreachable!(),
85 |         }
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/lrpar/examples/calc_parsetree/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, BufRead, Write};
 2 | 
 3 | use cfgrammar::RIdx;
 4 | use lrlex::{DefaultLexeme, lrlex_mod};
 5 | use lrpar::{Lexeme, lrpar_mod};
 6 | 
 7 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the module name will be
 8 | // `calc_l` (i.e. the file name, minus any extensions, with a suffix of `_l`).
 9 | lrlex_mod!("calc.l");
10 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
11 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
12 | lrpar_mod!("calc.y");
13 | 
14 | use calc_y::Node;
15 | 
16 | fn main() {
17 |     // Get the `LexerDef` for the `calc` language.
18 |     let lexerdef = calc_l::lexerdef();
19 |     let stdin = io::stdin();
20 |     loop {
21 |         print!(">>> ");
22 |         io::stdout().flush().ok();
23 |         match stdin.lock().lines().next() {
24 |             Some(Ok(ref l)) => {
25 |                 if l.trim().is_empty() {
26 |                     continue;
27 |                 }
28 |                 // Now we create a lexer with the `lexer` method with which we can lex an input.
29 |                 let lexer = lexerdef.lexer(l);
30 |                 // Pass the lexer to the parser and lex and parse the input.
31 |                 let (pt, errs) = calc_y::parse(&lexer);
32 |                 for e in errs {
33 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
34 |                 }
35 |                 if let Some(pt) = pt {
36 |                     // Success! We parsed the input and created a parse tree.
37 |                     println!("Result: {}", Eval::new(l).eval(&pt));
38 |                 }
39 |             }
40 |             _ => break,
41 |         }
42 |     }
43 | }
44 | 
45 | struct Eval<'a> {
46 |     s: &'a str,
47 | }
48 | 
49 | impl<'a> Eval<'a> {
50 |     fn new(s: &'a str) -> Self {
51 |         Eval { s }
52 |     }
53 | 
54 |     fn eval(&self, n: &Node<DefaultLexeme<u32>, u32>) -> i64 {
55 |         match *n {
56 |             Node::Nonterm {
57 |                 ridx: RIdx(ridx),
58 |                 ref nodes,
59 |             } if ridx == calc_y::R_EXPR => {
60 |                 if nodes.len() == 1 {
61 |                     self.eval(&nodes[0])
62 |                 } else {
63 |                     debug_assert_eq!(nodes.len(), 3);
64 |                     self.eval(&nodes[0]) + self.eval(&nodes[2])
65 |                 }
66 |             }
67 |             Node::Nonterm {
68 |                 ridx: RIdx(ridx),
69 |                 ref nodes,
70 |             } if ridx == calc_y::R_TERM => {
71 |                 if nodes.len() == 1 {
72 |                     self.eval(&nodes[0])
73 |                 } else {
74 |                     debug_assert_eq!(nodes.len(), 3);
75 |                     self.eval(&nodes[0]) * self.eval(&nodes[2])
76 |                 }
77 |             }
78 |             Node::Nonterm {
79 |                 ridx: RIdx(ridx),
80 |                 ref nodes,
81 |             } if ridx == calc_y::R_FACTOR => {
82 |                 if nodes.len() == 1 {
83 |                     if let Node::Term { lexeme } = nodes[0] {
84 |                         self.s[lexeme.span().start()..lexeme.span().end()]
85 |                             .parse()
86 |                             .unwrap()
87 |                     } else {
88 |                         unreachable!();
89 |                     }
90 |                 } else {
91 |                     debug_assert_eq!(nodes.len(), 3);
92 |                     self.eval(&nodes[1])
93 |                 }
94 |             }
95 |             _ => unreachable!(),
96 |         }
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/cfgrammar/src/lib/mod.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::cognitive_complexity)]
 2 | #![allow(clippy::many_single_char_names)]
 3 | #![allow(clippy::new_without_default)]
 4 | #![allow(clippy::unnecessary_wraps)]
 5 | #![allow(clippy::upper_case_acronyms)]
 6 | #![forbid(unsafe_code)]
 7 | #![deny(unreachable_pub)]
 8 | 
 9 | //! A library for manipulating Context Free Grammars (CFG). It is impractical to fully homogenise
10 | //! all the types of grammars out there, so the aim is for different grammar types
11 | //! to have completely separate implementations. Code that wants to be generic over more than one
12 | //! grammar type can then use an "adapter" to homogenise the particular grammar types of interest.
13 | //! Currently this is a little academic, since only Yacc-style grammars are supported (albeit
14 | //! several variants of Yacc grammars).
15 | //!
16 | //! Unfortunately, CFG terminology is something of a mess. Some people use different terms for the
17 | //! same concept interchangeably; some use different terms to convey subtle differences of meaning
18 | //! (but without complete uniformity). "Token", "terminal", and "lexeme" are examples of this: they
19 | //! are synonyms in some tools and papers, but not in others.
20 | //!
21 | //! In order to make this library somewhat coherent, we therefore use some basic terminology
22 | //! guidelines for major concepts (acknowledging that this will cause clashes with some grammar
23 | //! types).
24 | //!
25 | //!   * A *grammar* is an ordered sequence of *productions*.
26 | //!   * A *production* is an ordered sequence of *symbols*.
27 | //!   * A *rule* maps a name to one or more productions.
28 | //!   * A *token* is the name of a syntactic element.
29 | //!
30 | //! For example, in the following Yacc grammar:
31 | //!
32 | //!   R1: "a" "b" | R2;
33 | //!   R2: "c";
34 | //!
35 | //! the following statements are true:
36 | //!
37 | //!   * There are 3 productions. 1: ["a", "b"] 2: ["R2"] 3: ["c"]`
38 | //!   * There are two rules: R1 and R2. The mapping to productions is {R1: {1, 2}, R2: {3}}
39 | //!   * There are three tokens: a, b, and c.
40 | //!
41 | //! cfgrammar makes the following guarantees about grammars:
42 | //!
43 | //!   * Productions are numbered from `0` to `prods_len() - 1` (inclusive).
44 | //!   * Rules are numbered from `0` to `rules_len() - 1` (inclusive).
45 | //!   * Tokens are numbered from `0` to `toks_len() - 1` (inclusive).
46 | //!   * The StorageT type used to store productions, rules, and token indices can be infallibly
47 | //!     converted into usize (see [`TIdx`](struct.TIdx.html) and friends for more details).
48 | //!
49 | //! For most current uses, the main function to investigate is
50 | //! [`YaccGrammar::new()`](yacc/grammar/struct.YaccGrammar.html#method.new) and/or
51 | //! [`YaccGrammar::new_with_storaget()`](yacc/grammar/struct.YaccGrammar.html#method.new_with_storaget)
52 | //! which take as input a Yacc grammar.
53 | 
54 | #[cfg(feature = "bincode")]
55 | use bincode::{Decode, Encode};
56 | #[cfg(feature = "serde")]
57 | use serde::{Deserialize, Serialize};
58 | 
59 | #[doc(hidden)]
60 | pub mod header;
61 | mod idxnewtype;
62 | #[doc(hidden)]
63 | pub mod markmap;
64 | pub mod newlinecache;
65 | pub mod span;
66 | pub mod yacc;
67 | 
68 | pub use newlinecache::NewlineCache;
69 | pub use span::{Location, Span, Spanned};
70 | 
71 | /// A type specifically for rule indices.
72 | pub use crate::idxnewtype::{PIdx, RIdx, SIdx, TIdx};
73 | 
74 | #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
75 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
76 | #[cfg_attr(feature = "bincode", derive(Encode, Decode))]
77 | pub enum Symbol<StorageT> {
78 |     Rule(RIdx<StorageT>),
79 |     Token(TIdx<StorageT>),
80 | }
81 | 


--------------------------------------------------------------------------------
/doc/src/lexcompatibility.md:
--------------------------------------------------------------------------------
 1 | # Lex compatibility
 2 | 
 3 | grmtools currently supports one common use of Lex, which is to produce a
 4 | sequence of tokens. All Lex files require at least some porting to grmtools,
 5 | though in many cases this is fairly trivial. Nevertheless, aspects such as
 6 | the longest match rule are identical to Lex, and we assume familiarity with Lex
 7 | syntax and its major features: the [Lex
 8 | manual](https://web.archive.org/web/20220402195947/dinosaur.compilertools.net/lex/index.html) is recommended
 9 | reading.
10 | 
11 | 
12 | ## Major differences
13 | 
14 | There are several major differences between Lex and grmtools:
15 | 
16 |  * Lex has its own regular expression language whereas grmtools uses the well
17 |    known Rust [regex crate](https://crates.io/crates/regex) for regular
18 |    expressions. These two regular expression languages are very similar, but
19 |    complex regular expressions might not be supported under one or the other.
20 | 
21 |  * Lex files consist of a sequence of regular expressions and an action for each.
22 |    grmtools lex files consists of a sequence of regular expressions and a token
23 |    name. Actions are not currently supported (and, by extension, nor are
24 |    special action expressions such as `ECHO` and `REJECT`).
25 | 
26 |  * Both Lex and grmtools lex files support start conditions as an optional prefix
27 |    to regular expressions, listing necessary states for the input expression to 
28 |    be considered for matching against the input. Lex uses a special action
29 |    expression `BEGIN(state)` to switch to the named `state`. Start states in grmtools
30 |    are described in [start_states](start_states.md).
31 | 
32 |  * Character sets, and changes to internal array sizes are not supported by grmtools.
33 | 
34 |  * Escape sequences:
35 | 
36 |    In addition to escape sequences involved in the escaping of regular expressions.
37 |    Lex and grmtools support the escape sequences `\123` (octal) `\x1234` (hexadecimal)
38 |    and ASCII escape sequences. `\\` `\a` `\f` `\n` `\r` `\t` `\v`.
39 | 
40 |    Lex also interprets the escape sequence `\b` as `backspace`.  While regex treats `\b`
41 |    as a word boundary subsequently grmtools will too. The Lex behavior can be enabled
42 |    using [posix_escapes](lexextensions.md).
43 | 
44 |    Additional escape sequences supported by regex:
45 | 
46 |    The `\u1234` and `\U12345678` escape sequences for unicode characters,
47 |    the `\p`,`\P` unicode character classes, as well as the `\d` `\D` `\s` `\S`
48 |    `\w` `\W` perl character classes, and `\A` `\b` `\B` `\z` escape sequences.
49 | 
50 |    Both Lex and grmtools support escaping arbitrary characters, for all other characters
51 |    besides those listed above, when given an escaped character `\c` it will be passed to
52 |    the regex engine as the character `c`.  This is useful when a character is used within
53 |    the lex format.
54 | 
55 |    An example of this is when the character `<` is used at the beginning of a regex. Both Lex
56 |    and grmtools interpret this as the beginning of a start condition prefix. Which can be
57 |    escaped with `\<` to ensure it is treated as the start of a regular expression.
58 | 
59 |    But the characters to which this behavior applies is impacted by the escape sequence
60 |    differences listed above.
61 | 
62 | * Lex treats lines in the rules section beginning with whitespace as code to be copied verbatim
63 |   into the generated lexer source.  Grmtools lex does not support these and produces an error. 
64 | 
65 | ## LexerKinds
66 | 
67 | ### LRNonStreamingLexerKind
68 | 
69 | Currently lrlex only supports a single `LexKind::LRNonStreamingLexerKind` which is the default if unspecified.
70 | 


--------------------------------------------------------------------------------
/doc/src/actioncode.md:
--------------------------------------------------------------------------------
 1 | # Action code and return types
 2 | 
 3 | ## Action code
 4 | 
 5 | Action code is normal Rust code with the addition of the following special variables:
 6 | 
 7 |  * `$1` ... `$n` refer to the respective symbol in the production, numbered
 8 |    from 1 (i.e. `$1` refers to the first symbol in the production). If the
 9 |    symbol references a rule `R` then an instance of `R`'s type will be stored
10 |    in the `$i` variable. If the symbol references a lexeme then a
11 |    `Result<Lexeme<StorageT>, Lexeme<StorageT>>` instance is returned where the
12 |    `Ok` variant is used for lexemes that are directly derived from the user's
13 |    input and the `Err` variant is used for lexemes that have been inserted by
14 |    [error recovery](errorrecovery.md).
15 | 
16 |  * `$lexer` allows access to the lexer and its [various
17 |    functions](https://softdevteam.github.io/grmtools/master/api/lrpar/trait.Lexer.html).
18 |    The most commonly used of these is the `span_str` function, which allows us
19 |    to extract `&'input str`s from a `Span` (e.g. to extract the string
20 |    represented by a `Lexeme`, we would use `$lexer.span_str(lexeme.span())`).
21 |    As this may suggest, actions may also reference the special lifetime
22 |    `'input` (without any `$` prefix), which allows strings to be returned /
23 |    stored by the grammar without copying memory.
24 | 
25 |  * `$span` is a
26 |    [`cfgrammar::Span`](https://softdevteam.github.io/grmtools/master/api/cfgrammar/struct.Span.html)
27 |    which captures how much of the user's input the current production matched.
28 | 
29 |  * `$$` is equivalent to `$` in normal Rust code.
30 | 
31 | Any other variables beginning with `$` are treated as errors.
32 | 
33 | 
34 | ## Return types
35 | 
36 | Productions' return types can be any arbitrary Rust type. You may in addition
37 | make use of the following:
38 | 
39 |  * The generic parameter `StorageT` references the type of lexemes and is
40 |    typically used with the
41 |    [`Lexeme`](https://softdevteam.github.io/grmtools/master/api/lrpar/struct.Lexeme.html)
42 |    type i.e. `Lexeme<StorageT>`. This allows you to return lexemes from rules.
43 | 
44 |  * The lifetime `'input` allows you to extract strings whose lifetime is tied
45 |    to the lexer and return them from rules / store them in structs without
46 |    copying. `Lexer::span_str` returns such strings and the typical idiom of use
47 |    is `&'input str`.
48 | 
49 | 
50 | ## Additional parse parameter
51 | 
52 | A single extra parameter can be passed to action functions if the `%parse-param
53 | <var>: <type>` declaration is used. The variable `<var>` is then visible in all
54 | action code. `<type>` must implement the [`Clone`
55 | trait](https://doc.rust-lang.org/stable/std/clone/trait.Clone.html) (note that `Copy`
56 | bounds imply `Clone`, and `&` references implement `Copy`).
57 | 
58 | For example if a grammar has a declaration:
59 | 
60 | ```
61 | %parse-param p: u64
62 | ```
63 | 
64 | then the statically generated `parse` function will take two paramaters
65 | `(lexer: &..., p: u64)` and the variable `p` can be used in action code e.g.:
66 | 
67 | ```
68 | R -> ...:
69 |   'ID' { format!("{}{}", p, ...) }
70 |   ;
71 | ```
72 | 
73 | # Generic parse parameter
74 | 
75 | If `%parse-param` needs to be generic, additional type variables and lifetimes
76 | can be specified in the `%parse-generics T1, T2, ...` declaration.
77 | 
78 | For example, if a grammar has following declarations:
79 | 
80 | ```
81 | %parse-generics T: FromStr
82 | %parse-param p: T
83 | ```
84 | 
85 | then the `parse` function will take an additional parameter of type `T`.
86 | 
87 | This can be used, for example, [to allocate AST nodes in a memory arena.](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_ast_arena).
88 | 


--------------------------------------------------------------------------------
/doc/src/start_states.md:
--------------------------------------------------------------------------------
  1 | # Start States
  2 | 
  3 | The following explains the syntax and semantics of Start States in lrlex.<br>
  4 | A working example can be found in the repository at [lrpar/examples/start_states][1]
  5 | 
  6 | [1]: https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/start_states
  7 | ## Motivation
  8 | 
  9 | Start states are a feature from lex which can be used for context sensitive lexing.
 10 | For instance, they can be used to implement nested comments (see the example in the repository).
 11 | Such that the tokens start/end markers of tokens maintain balance.
 12 | 
 13 | This is achieved by making rules which are qualified to match only when the lexer is in a
 14 | particular state. Additionally the lexer has a stack of states, and matching rules perform actions
 15 | which modify the stack.
 16 | 
 17 | ## The INITIAL start state
 18 | Unless specified otherwise all lex rules are members of the *INITIAL* start state.
 19 | 
 20 | ```
 21 | %%
 22 | <INITIAL>a "A"
 23 | <INITIAL>[\t \n]+ ;
 24 | ```
 25 | 
 26 | This is the lex file below with no start states specified.
 27 | 
 28 | ```
 29 | %%
 30 | a "A"
 31 | [\t \n]+ ;
 32 | ```
 33 | 
 34 | ## Rules matching multiple states
 35 | 
 36 | Rules can be matched in multiple states, just separate the states a rule should match in commas.
 37 | The following matches the `a` character when in either of the states `FirstState` or `SecondState`.
 38 | 
 39 | ```
 40 | <FirstState,SecondState>a "A"
 41 | ```
 42 | 
 43 | ## Differences from POSIX lex
 44 | 
 45 | In posix lex start states are entered via code in the action, through either `BEGIN(STATE)` and
 46 | calling combinations of `yy_push_state`, and `yy_pop_state`.
 47 | 
 48 | Because lrlex is actionless, and does not support code actions, instead we have operators to
 49 | perform the common modifications to the stack of start states.
 50 | 
 51 | ### Push
 52 | The push operator is given by the adding '+' to the target state on the right hand side within
 53 | angle brackets. The following when regex matches in the *CURRENT_STATE* pushes *TARGET_STATE* to
 54 | the top of a stack of states.
 55 | 
 56 | ```
 57 | <CURRENT_STATE>Regex <+TARGET_STATE>;
 58 | ```
 59 | 
 60 | ### Pop
 61 | The pop operator is given by the adding '-' to the target state on the right hand side within angle
 62 | brackets. Similarly when in the current state, the following pops the current state off of the
 63 | stack of states. Similarly to calling `yy_pop_state` from action code.
 64 | ```
 65 | <CURRENT_STATE>Regex <-CURRENT_STATE>;
 66 | ```
 67 | 
 68 | ### ReplaceStack
 69 | The ReplaceStack operator is given by naming the target state within angle brackets.
 70 | The ReplaceStack op clears the entire stack of states, then pushing the target state.
 71 | 
 72 | ```
 73 | <CURRENT_STATE>Regex <TARGET_STATE>;
 74 | ```
 75 | 
 76 | ### Returning a token while performing an operator.
 77 | Start state operators can be combined with returning a token for example:
 78 | 
 79 | ```
 80 | <CURRENT_STATE>Regex <+TARGET_STATE>"TOKEN"
 81 | ```
 82 | 
 83 | ## Adding a start state
 84 | Start stats come in two forms, *exclusive* and *inclusive*. These are given by `%x` and `%s`
 85 | respectively.
 86 | 
 87 | ### Exclusive states
 88 | In an exclusive state, the rule can be matched *only* if the rule begins with the state specified.
 89 | In the following because `ExclState` is *exclusive*, the `#=` rule is only matched during the
 90 | `INITIAL` state, while the `a` and `=#` characters are only matched while in the `ExclState`.
 91 | 
 92 | ```
 93 | %x ExclState
 94 | %%
 95 | 
 96 | #= <+ExclState>;
 97 | <ExclState>a "A"
 98 | <ExclState>=# <-ExclState>;
 99 | ```
100 | 
101 | ### Inclusive states
102 | 
103 | Inclusive states are added to the set of rules to be matched when the start state is unspecified.
104 | 
105 | ```
106 | %s InclusiveState
107 | %%
108 | 
109 | a "A"
110 | <InclusiveState>b "B"
111 | <INITIAL>#= <+InclusiveState>;
112 | <InclusiveState>=# <-InclusiveState>;
113 | ```
114 | 
115 | Is equivalent to the following using exclusive states.
116 | 
117 | ```
118 | %x Excl
119 | %%
120 | 
121 | <INITIAL, Excl>a "A"
122 | <Excl>b "B"
123 | <INITIAL>#= <+Excl>;
124 | <Excl>=# <-Excl>;
125 | ```
126 | 


--------------------------------------------------------------------------------
/.buildbot.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | 
  3 | set -e
  4 | 
  5 | export CARGO_HOME="`pwd`/.cargo_install"
  6 | export RUSTUP_HOME="`pwd`/.rustup"
  7 | export WASMTIME_HOME="`pwd`/.wasmtime"
  8 | export NVM_DIR="`pwd`/.nodejs"
  9 | export RUSTFLAGS="--cfg grmtools_extra_checks"
 10 | 
 11 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > rustup.sh
 12 | sh rustup.sh --default-host x86_64-unknown-linux-gnu --default-toolchain stable -y --no-modify-path
 13 | 
 14 | export PATH=`pwd`/.cargo_install/bin/:$WASMTIME_HOME/bin:$PATH
 15 | 
 16 | # Install wasmtime once debian trixie is stablized
 17 | # we can likely just use rust-wasmtime.
 18 | #
 19 | # Needed for wasm32-wasip2
 20 | touch .wasmtime_profile
 21 | if [ "X`which wasmtime`" = "X" ]; then
 22 |     PROFILE=".wasmtime_profile" bash -c 'curl https://wasmtime.dev/install.sh -sSf | bash'
 23 | fi
 24 | . ./.wasmtime_profile
 25 | 
 26 | # Needed for wasm32-unknown-unknown
 27 | mkdir -p $NVM_DIR
 28 | PROFILE=/dev/null bash -c 'curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.2/install.sh | bash'
 29 | . "$NVM_DIR/nvm.sh"
 30 | # Download and install Node.js:
 31 | nvm install 22
 32 | 
 33 | cargo fmt --all -- --check
 34 | 
 35 | rustup toolchain install stable
 36 | rustup default stable
 37 | 
 38 | # Later on we are going to need to install cargo-deny and mdbook. We kick the
 39 | # install jobs off now so that at least some work (e.g. downloading crates) can
 40 | # happen in parallel, speeding up the overall process.
 41 | 
 42 | cargo_deny_mdbook_tmp=$(mktemp)
 43 | ( cargo install --locked cargo-deny ; cargo install --locked mdbook ) \
 44 |   >"${cargo_deny_mdbook_tmp}" 2>&1 &
 45 | cargo_deny_mdbook_pid=$!
 46 | 
 47 | cargo test
 48 | cargo test --release
 49 | 
 50 | rustup target add wasm32-unknown-unknown
 51 | cargo install wasm-bindgen-cli
 52 | cargo test --target wasm32-unknown-unknown
 53 | 
 54 | rustup target add wasm32-wasip2
 55 | cargo install workspace_runner
 56 | cargo test --target wasm32-wasip2
 57 | 
 58 | cargo test --lib cfgrammar --features serde
 59 | cargo test --lib lrpar --features serde
 60 | 
 61 | root=`pwd`
 62 | cd $root/lrlex/examples/calc_manual_lex
 63 | echo "2 + 3 * 4" | cargo run | grep "Result: 14"
 64 | # Touching these files shouldn't invalidate the cache (via --cfg grmtools_extra_checks)
 65 | touch src/main.rs && CACHE_EXPECTED=y cargo build
 66 | cd $root/lrpar/examples/calc_actions
 67 | echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y -
 68 | # Invoke `%grmtools{test_files}`
 69 | cargo run --package nimbleparse -- src/calc.l src/calc.y
 70 | echo "2 + 3 * 4" | cargo run | grep "Result: 14"
 71 | touch src/main.rs && CACHE_EXPECTED=y cargo build
 72 | cd $root/lrpar/examples/calc_ast
 73 | echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y -
 74 | # Invoke `%grmtools{test_files}`
 75 | cargo run --package nimbleparse -- src/calc.l src/calc.y
 76 | echo "2 + 3 * 4" | cargo run | grep "Result: 14"
 77 | cd $root/lrpar/examples/calc_ast_arena
 78 | echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y -
 79 | # Invoke `%grmtools{test_files}`
 80 | cargo run --package nimbleparse -- src/calc.l src/calc.y
 81 | echo "2 + 3 * 4" | cargo run | grep "Result: 14"
 82 | touch src/main.rs && CACHE_EXPECTED=y cargo build
 83 | cd $root/lrpar/examples/calc_parsetree
 84 | echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y -
 85 | # Invoke `%grmtools{test_files}`
 86 | cargo run --package nimbleparse -- src/calc.l src/calc.y
 87 | echo "2 + 3 * 4" | cargo run | grep "Result: 14"
 88 | touch src/main.rs && CACHE_EXPECTED=y cargo build
 89 | cd $root/lrpar/examples/clone_param
 90 | echo "1+++" | cargo run --package nimbleparse -- src/param.l src/param.y -
 91 | # Invoke `%grmtools{test_files}`
 92 | cargo run --package nimbleparse -- src/param.l src/param.y
 93 | cd $root/lrpar/examples/start_states
 94 | echo "/* /* commented out */ */ uncommented text /* */" | cargo run --package nimbleparse -- src/comment.l src/comment.y -
 95 | # Invoke `%grmtools{test_files}`
 96 | cargo run --package nimbleparse -- src/comment.l src/comment.y
 97 | cd $root
 98 | 
 99 | RUSTDOCFLAGS="-Dwarnings" cargo doc --no-deps
100 | 
101 | # Check licenses.
102 | wait "${cargo_deny_mdbook_pid}" || ( cat "${cargo_deny_mdbook_tmp}" && exit 1 )
103 | cargo-deny check license
104 | 
105 | # Build the docs
106 | cd $root/doc
107 | mdbook build
108 | test -d book
109 | cd ..
110 | 


--------------------------------------------------------------------------------
/doc/src/manuallexer.md:
--------------------------------------------------------------------------------
  1 | # Hand-written lexers
  2 | 
  3 | `lrpar` provides a generic lexing interface to which any lexer can plug into.
  4 | Users can provide
  5 | one or both of a custom lexeme type -- conforming to
  6 | [`lrpar::Lexeme`](https://softdevteam.github.io/grmtools/master/api/lrpar/trait.Lexeme.html)
  7 | -- and a custom lexing type -- conforming to
  8 | [`lrpar::NonStreamingLexer`](https://softdevteam.github.io/grmtools/master/api/lrpar/trait.NonStreamingLexer.html).
  9 | If you wish to use a custom lexer, you will need to instantiate `lrpar`
 10 | appropriately (both
 11 | [`CTParserBuilder`](https://softdevteam.github.io/grmtools/master/api/lrpar/struct.CTParserBuilder.html)
 12 | and
 13 | [`RTParserBuilder`](https://softdevteam.github.io/grmtools/master/api/lrpar/struct.RTParserBuilder.html)).
 14 | 
 15 | For many purposes, the low-level control and performance that `lrpar` gives you is unneeded,
 16 | and the boiler-plate that comes with it unwanted. Fortunately, `lrlex` provides the following convenience mechanisms to make it easier to use a hand-written lexer with `lrpar`:
 17 | 
 18 |   1. `lrlex`'s normal `LRNonStreamingLexer` struct can be instantiated by an
 19 |       end-user with an input stream, a list of lexemes created from that
 20 |       input stream, and the newlines encountered while lexing that input
 21 |       stream. This saves having to define a custom instance of the
 22 |       [`lrpar::NonStreamingLexer`](https://softdevteam.github.io/grmtools/master/api/lrpar/trait.NonStreamingLexer.html)
 23 |       trait.
 24 | 
 25 |   2. `lrlex`'s [`DefaultLexeme`](https://softdevteam.github.io/grmtools/master/api/lrlex/struct.DefaultLexeme.html)
 26 |      struct can also be instantiated by end-users, saving having to define a
 27 |      custom instance of the
 28 |      [`lrpar::Lexeme`](https://softdevteam.github.io/grmtools/master/api/lrpar/trait.Lexeme.html)
 29 |      trait.
 30 | 
 31 |   3. `lrlex` exposes
 32 |      [`CTTokenMapBuilder`](https://softdevteam.github.io/grmtools/master/api/lrlex/struct.CTTokenMapBuilder.html)
 33 |      to be used from `build.rs` scripts which automatically produces a
 34 |      Rust module with one constant per token ID. It is explicitly
 35 |      designed to be easy to use with `lrpar`'s compile-time building.
 36 | 
 37 | Putting these together is then relatively easy. First a `build.rs` file for a
 38 | hand-written lexer will look roughly as follows:
 39 | 
 40 | ```rust
 41 | use lrlex::{CTTokenMapBuilder, DefaultLexerTypes};
 42 | use lrpar::CTParserBuilder;
 43 | 
 44 | fn main() {
 45 |     let ctp = CTParserBuilder::<DefaultLexerTypes<u8>>::new()
 46 |         .grammar_in_src_dir("grammar.y")
 47 |         .unwrap()
 48 |         .build()
 49 |         .unwrap();
 50 |     CTTokenMapBuilder::<u8>::new("token_map", ctp.token_map()).build().unwrap()
 51 | }
 52 | ```
 53 | 
 54 | This produces a module that can be imported with `lrlex_mod!("token_map")`. The
 55 | module will contain one constant, prefixed with `T_` per token identifiers in the
 56 | grammar. For example, for the following grammar excerpt:
 57 | 
 58 | ```lex
 59 | Expr -> Result<u64, ()>:
 60 |       Expr 'PLUS' Term { Ok($1? + $3?) }
 61 |     | Term { $1 }
 62 |     ;
 63 | ```
 64 | 
 65 | the module will contain `const T_PLUS: u8 = ...;`.
 66 | 
 67 | Since Yacc grammars can contain token identifiers which are not valid Rust
 68 | identifiers, `CTTokenMapBuilder` allows you to provide a map from the token
 69 | identifier to a "Rust friendly" variant. For example, for the following grammar
 70 | excerpt:
 71 | 
 72 | ```lex
 73 | Expr -> Result<u64, ()>:
 74 |       Expr '+' Term { Ok($1? + $3?) }
 75 |     | Term { $1 }
 76 |     ;
 77 | ```
 78 | 
 79 | we would provide a map `'+' => 'PLUS'` leading, again, to a constant `T_PLUS`
 80 | being defined.
 81 | 
 82 | One can then write a simple custom lexer which lexes all the input in one go
 83 | and returns an `LRNonStreamingLexer` as follows:
 84 | 
 85 | ```rust
 86 | use cfgrammar::NewlineCache;
 87 | use lrlex::{lrlex_mod, DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer};
 88 | use lrpar::{lrpar_mod, Lexeme, NonStreamingLexer, Span};
 89 | 
 90 | lrlex_mod!("token_map");
 91 | use token_map::*;
 92 | 
 93 | fn lex(s: &str) -> LRNonStreamingLexer<DefaultLexerTypes<u8>> {
 94 |   let mut lexemes = Vec::new();
 95 |   let mut newlines = NewlineCache::new();
 96 |   let mut i = 0;
 97 |   while i < s.len() {
 98 |     if i == ... {
 99 |       lexemes.push(DefaultLexeme::new(T_PLUS, i, ...));
100 |     } else {
101 |       ...
102 |     }
103 |   }
104 |   LRNonStreamingLexer::new(s, lexemes, newlines)
105 | }
106 | ```
107 | 


--------------------------------------------------------------------------------
/doc/src/yacccompatibility.md:
--------------------------------------------------------------------------------
 1 | # Yacc compatibility
 2 | 
 3 | grmtools supports most major Yacc features, to the extent that many Yacc
 4 | grammars can be used unchanged with grmtools. In this book we assume
 5 | familiarity with Yacc syntax and its major features: the
 6 | [Yacc manual](https://web.archive.org/web/20220830093827/dinosaur.compilertools.net/yacc/index.html) is recommended
 7 | reading.
 8 | 
 9 | 
10 | ## Major differences
11 | 
12 | There are several differences between Yacc and grmtools including:
13 | 
14 |  * grmtools has no equivalent of any of the `yy*` functions (e.g. `yyerror`,
15 |    `yylex`, `yylval`, `yyparse` and so on). This means, for example, that
16 |    grammar actions cannot currently influence the lexer in any way.
17 | 
18 |  * grmtools has an entirely different approach to [error
19 |    recovery](errorrecovery.md). The token `error` and the special action
20 |    expressions `yyerrok` and `yyclearin` are not supported. In general, users
21 |    can simply remove alternatives that consist solely of `error`.
22 | 
23 |  * `%union` can be mapped to `%actiontype` in grmtools, though this is rarely
24 |    the best way of using a Yacc grammar in Rust. See the [Grmtools Yacc
25 |    variant](#grmtools) below for the most common way of making grammars do
26 |    something useful; in a limited number of cases (e.g. if you just want to
27 |    build a parse tree), you may find the ["Original" Yacc
28 |    variant](#original-yacc) useful.
29 | 
30 |  * grmtools allows both Yacc's `%expect` and Bison's `%expect-rr` declarations
31 |    in its base "Yacc" mode.
32 | 
33 |  * Bison's `%parse-param` can take multiple arguments. grmtools' `%parse-param`
34 |    takes a single argument which can be a tuple, thus emulating multiple
35 |    arguments while integrating naturally into Rust's type system.
36 | 
37 |  * Although rare, it is possible to generate accept/reduce conflicts (e.g. for
38 |    a grammar with the sole rule `A: A;`). grmtools considers accept/reduce
39 |    conflicts to be a hard error, and refuses to generate anything for the
40 |    resulting grammar, whereas Yacc allows them through (with unclear
41 |    consequences). Bison also appears to consider accept/reduce conflicts a hard
42 |    error, though it appears to detect them in a more generic way (reporting
43 |    such rules as "not generating any sentences").
44 | 
45 | 
46 | ## YaccKinds
47 | 
48 | ### Grmtools
49 | 
50 | `YaccKind::Grmtools` is grmtools' own variant of Yacc syntax, and the one that
51 | most users will want to use. The most significant difference to "normal" Yacc
52 | is that rules are annotated with a Rust type to which all their production's
53 | actions must adhere to. Note that whilst a rule's productions must all adhere
54 | to a single type, different rules can have different types.  Consider the
55 | following snippet:
56 | 
57 | ```rust,noplaypen
58 | R1 -> Result<i32, ()>:
59 |      'a' { Ok(5) }
60 |    | 'b' { Err(()) }
61 |    ;
62 | 
63 | R2 -> u64:
64 |    | { 0 }
65 |    ;
66 | ```
67 | 
68 | Here the rule `R1` has a Rust return type of `Result<X, ()>` (between `->` and
69 | `:`). Both of its productions adhere to this type, the first by instantiating
70 | `Ok(5)` and the second `Err(())`. The rule `R2` has a return type of `u64`.
71 | 
72 | 
73 | ### “Original” Yacc
74 | 
75 | Although the name is not fully accurate (grmtools supports a slightly disjoint
76 | subset of original Yacc's input), this mode allows users to most easily test
77 | externally created Yacc files. Several sub-variants are allowed:
78 | 
79 | * `YaccKind::Original(YaccOriginalActionKind::GenericParseTree)` does not
80 |   execute user actions, but instead creates a generic parse tree, where elements
81 |   are instances of the `lrpar::parser::Node` enum. This is useful for quickly
82 |   testing whether a parser is accepting the intended language.
83 | 
84 | * `YaccKind::Original(YaccOriginalActionKind::NoAction)` parses input and
85 |   reports errors but does not execute any user actions. This is useful if you
86 |   are trying to find out whether a corpus of input parses successfully against
87 |   your grammar or not.
88 | 
89 | * `YaccKind::Original(YaccOriginalActionKind::UserAction)` models original Yacc
90 |   most closely but, in a Rust setting, is probably of little use beyond simple
91 |   calculator like languages. Instead of Yacc's `%union` directive, users can
92 |   specify `%actiontype` which is a Rust type to which every production's actions
93 |   in the grammar must adhere to. Unless all actions happen to naturally return
94 |   the same type, this quickly becomes cumbersome to use. For most use cases,
95 |   `YaccKind::Grmtools` is a superior alternative.
96 | 


--------------------------------------------------------------------------------
/lrlex/examples/calc_manual_lex/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![allow(clippy::unnecessary_wraps)]
  2 | 
  3 | use std::io::{self, BufRead, Write};
  4 | 
  5 | use cfgrammar::{NewlineCache, Span};
  6 | use lrlex::{DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer, lrlex_mod};
  7 | use lrpar::{Lexeme, NonStreamingLexer, lrpar_mod};
  8 | 
  9 | lrlex_mod!("token_map");
 10 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the module name will be
 11 | // `calc_y` (i.e. the file name, minus any extensions, with a suffix of `_y`).
 12 | lrpar_mod!("calc.y");
 13 | 
 14 | use calc_y::Expr;
 15 | use token_map::*;
 16 | 
 17 | fn main() {
 18 |     let stdin = io::stdin();
 19 |     loop {
 20 |         print!(">>> ");
 21 |         io::stdout().flush().ok();
 22 |         match stdin.lock().lines().next() {
 23 |             Some(Ok(ref l)) => {
 24 |                 if l.trim().is_empty() {
 25 |                     continue;
 26 |                 }
 27 |                 let lexer = lex(l);
 28 |                 // Pass the lexer to the parser and lex and parse the input.
 29 |                 let (res, errs) = calc_y::parse(&lexer);
 30 |                 for e in errs {
 31 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
 32 |                 }
 33 |                 if let Some(Ok(r)) = res {
 34 |                     match eval(&lexer, r) {
 35 |                         Ok(i) => println!("Result: {}", i),
 36 |                         Err((span, msg)) => {
 37 |                             let ((line, col), _) = lexer.line_col(span);
 38 |                             eprintln!(
 39 |                                 "Evaluation error at line {} column {}, '{}' {}.",
 40 |                                 line,
 41 |                                 col,
 42 |                                 lexer.span_str(span),
 43 |                                 msg
 44 |                             )
 45 |                         }
 46 |                     }
 47 |                 }
 48 |             }
 49 |             _ => break,
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | fn lex(s: &str) -> LRNonStreamingLexer<'_, '_, DefaultLexerTypes<u8>> {
 55 |     let mut lexemes = Vec::new();
 56 |     let mut i = 0;
 57 |     while i < s.len() {
 58 |         // Skip whitespace
 59 |         i += s[i..]
 60 |             .chars()
 61 |             .take_while(|c| c.is_whitespace())
 62 |             .map(|c| c.len_utf8())
 63 |             .sum::<usize>();
 64 |         if i == s.len() {
 65 |             break;
 66 |         }
 67 |         match s[i..].chars().next().unwrap() {
 68 |             '+' => {
 69 |                 lexemes.push(Ok(DefaultLexeme::new(T_PLUS, i, 1)));
 70 |                 i += 1;
 71 |             }
 72 |             '*' => {
 73 |                 lexemes.push(Ok(DefaultLexeme::new(T_STAR, i, 1)));
 74 |                 i += 1;
 75 |             }
 76 |             '(' => {
 77 |                 lexemes.push(Ok(DefaultLexeme::new(T_LBRACK, i, 1)));
 78 |                 i += 1;
 79 |             }
 80 |             ')' => {
 81 |                 lexemes.push(Ok(DefaultLexeme::new(T_RBRACK, i, 1)));
 82 |                 i += 1;
 83 |             }
 84 |             _ => {
 85 |                 let old_i = i;
 86 |                 while let Some('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') =
 87 |                     s[i..].chars().next()
 88 |                 {
 89 |                     i += 1;
 90 |                 }
 91 |                 if i > old_i {
 92 |                     lexemes.push(Ok(DefaultLexeme::new(T_INT, old_i, i - old_i)));
 93 |                 } else {
 94 |                     let c_len = s[i..].chars().next().unwrap().len_utf8();
 95 |                     lexemes.push(Ok(DefaultLexeme::new(T_UNMATCHED, i, c_len)));
 96 |                     i += c_len;
 97 |                 }
 98 |             }
 99 |         }
100 |     }
101 |     LRNonStreamingLexer::new(s, lexemes, NewlineCache::new())
102 | }
103 | 
104 | fn eval(
105 |     lexer: &dyn NonStreamingLexer<DefaultLexerTypes<u8>>,
106 |     e: Expr,
107 | ) -> Result<u64, (Span, &'static str)> {
108 |     match e {
109 |         Expr::Add { span, lhs, rhs } => eval(lexer, *lhs)?
110 |             .checked_add(eval(lexer, *rhs)?)
111 |             .ok_or((span, "overflowed")),
112 |         Expr::Mul { span, lhs, rhs } => eval(lexer, *lhs)?
113 |             .checked_mul(eval(lexer, *rhs)?)
114 |             .ok_or((span, "overflowed")),
115 |         Expr::Number { span } => lexer
116 |             .span_str(span)
117 |             .parse::<u64>()
118 |             .map_err(|_| (span, "cannot be represented as a u64")),
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/lrpar/src/lib/lex_api.rs:
--------------------------------------------------------------------------------
  1 | #![allow(clippy::len_without_is_empty)]
  2 | 
  3 | use std::{cmp, error::Error, fmt, hash::Hash, marker};
  4 | 
  5 | use cfgrammar::Span;
  6 | use num_traits::{AsPrimitive, PrimInt, Unsigned};
  7 | 
  8 | pub trait LexerTypes: fmt::Debug + Clone
  9 | where
 10 |     usize: AsPrimitive<Self::StorageT>,
 11 | {
 12 |     type LexemeT: Lexeme<Self::StorageT>;
 13 |     type StorageT: 'static + fmt::Debug + Hash + PrimInt + Unsigned;
 14 |     type LexErrorT: LexError;
 15 | }
 16 | 
 17 | /// The base trait which all lexers which want to interact with `lrpar` must implement.
 18 | pub trait Lexer<LexerTypesT: LexerTypes>
 19 | where
 20 |     usize: AsPrimitive<LexerTypesT::StorageT>,
 21 | {
 22 |     /// Iterate over all the lexemes in this lexer. Note that:
 23 |     ///   * The lexer may or may not stop after the first [LexError] is encountered.
 24 |     ///   * There are no guarantees about what happens if this function is called more than once.
 25 |     ///     For example, a streaming lexer may only produce [Lexeme]s on the first call.
 26 |     fn iter<'a>(
 27 |         &'a self,
 28 |     ) -> Box<dyn Iterator<Item = Result<LexerTypesT::LexemeT, LexerTypesT::LexErrorT>> + 'a>;
 29 | }
 30 | 
 31 | /// A `NonStreamingLexer` is one that takes input in one go, and is then able to hand out
 32 | /// substrings to that input and calculate line and column numbers from a [Span].
 33 | pub trait NonStreamingLexer<'input, LexerTypesT: LexerTypes>: Lexer<LexerTypesT>
 34 | where
 35 |     usize: AsPrimitive<LexerTypesT::StorageT>,
 36 | {
 37 |     /// Return the user input associated with a [Span].
 38 |     ///
 39 |     /// The [Span] must be well formed:
 40 |     ///   * The start/end byte indexes must be valid UTF-8 character indexes.
 41 |     ///   * The end byte index must not exceed the input's length.
 42 |     ///
 43 |     /// If these requirements are not respected this function may panic or return unexpected
 44 |     /// portions of the input.
 45 |     fn span_str(&self, span: Span) -> &'input str;
 46 | 
 47 |     /// Return the lines containing the input at `span` (including *all* the text on the lines
 48 |     /// that `span` starts and ends on).
 49 |     ///
 50 |     /// The [Span] must be well formed:
 51 |     ///   * The start/end byte indexes must be valid UTF-8 character indexes.
 52 |     ///   * The end byte index must not exceed the input's length.
 53 |     ///
 54 |     /// If these requirements are not respected this function may panic or return unexpected
 55 |     /// portions of the input.
 56 |     fn span_lines_str(&self, span: Span) -> &'input str;
 57 | 
 58 |     /// Return `((start line, start column), (end line, end column))` for `span`. Note that column
 59 |     /// *characters* (not bytes) are returned.
 60 |     ///
 61 |     /// The [Span] must be well formed:
 62 |     ///   * The start/end byte indexes must be valid UTF-8 character indexes.
 63 |     ///   * The end byte index must not exceed the input's length.
 64 |     ///
 65 |     /// If these requirements are not respected this function may panic or return unexpected
 66 |     /// portions of the input.
 67 |     fn line_col(&self, span: Span) -> ((usize, usize), (usize, usize));
 68 | }
 69 | 
 70 | /// A lexeme represents a segment of the user's input that conforms to a known type: this trait
 71 | /// captures the common behaviour of all lexeme structs.
 72 | ///
 73 | /// Lexemes are assumed to have a definition which describes all possible correct lexemes (e.g. the
 74 | /// regular expression `[0-9]+` defines all integer lexemes). This trait also allows "faulty"
 75 | /// lexemes to be represented -- that is, lexemes that have resulted from error recovery of some
 76 | /// sort. Faulty lexemes can violate the lexeme's type definition in any possible way (e.g. they
 77 | /// might span more or less input than the definition would suggest is possible).
 78 | pub trait Lexeme<StorageT>: fmt::Debug + fmt::Display + cmp::Eq + Hash + marker::Copy {
 79 |     /// Create a new lexeme with ID `tok_id`, a starting position in the input `start`, and length
 80 |     /// `len`.
 81 |     ///
 82 |     /// Lexemes created using this function are expected to be "correct" in the sense that they
 83 |     /// fully respect the lexeme's definition semantics. To create faulty lexemes, use
 84 |     /// [new_faulty](Lexeme::new_faulty).
 85 |     fn new(tok_id: StorageT, start: usize, len: usize) -> Self
 86 |     where
 87 |         Self: Sized;
 88 | 
 89 |     /// Create a new faulty lexeme with ID `tok_id` and a starting position in the input `start`.
 90 |     fn new_faulty(tok_id: StorageT, start: usize, len: usize) -> Self
 91 |     where
 92 |         Self: Sized;
 93 | 
 94 |     /// The token ID.
 95 |     fn tok_id(&self) -> StorageT;
 96 | 
 97 |     /// Obtain this `Lexeme`'s [Span].
 98 |     fn span(&self) -> Span;
 99 | 
100 |     /// Returns `true` if this lexeme is "faulty" i.e. is the result of error recovery in some way.
101 |     /// If `true`, note that the lexeme's span may be greater or less than you may expect from the
102 |     /// lexeme's definition.
103 |     fn faulty(&self) -> bool;
104 | }
105 | 
106 | /// A lexing error.
107 | pub trait LexError: Error {
108 |     /// Return the span associated with this error.
109 |     fn span(&self) -> Span;
110 | }
111 | 


--------------------------------------------------------------------------------
/lrpar/cttests/build.rs:
--------------------------------------------------------------------------------
  1 | use cfgrammar::yacc::ast::ASTWithValidityInfo;
  2 | use glob::glob;
  3 | #[path = "src/cgen_helper.rs"]
  4 | mod cgen_helper;
  5 | use cfg_aliases::cfg_aliases;
  6 | use cgen_helper::run_test_path;
  7 | use lrlex::{CTLexerBuilder, DefaultLexerTypes};
  8 | 
  9 | // Compiles the `*.test` files within `src`. Test files are written in Yaml syntax and have 4
 10 | // mandatory sections: name (describing what the test does), yacckind (defining the grammar type
 11 | // used), grammar (the grammar rules), and lexer (the lexing rules). The tests are compiled into
 12 | // two modules `<filename>_y` and `<filename>_l`, which we can then import into src/lib.rs and
 13 | // write tests for.
 14 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 15 |     for src in glob("src/*.rs")? {
 16 |         println!("cargo::rerun-if-changed={}", src?.display());
 17 |     }
 18 |     for entry in glob("src/*.test")? {
 19 |         run_test_path(entry.unwrap())?;
 20 |     }
 21 | 
 22 |     cfg_aliases! {
 23 |         // Platforms
 24 |         wasm32_unknown: { all(target_arch = "wasm32", target_os="unknown", target_vendor="unknown") },
 25 |     }
 26 | 
 27 |     // The generic `src/*.test` testing all use a `u32` StorageT
 28 |     // In this block we test `storaget.l`, and `storaget.y` with a `u8` instead.
 29 |     {
 30 |         // Because we're modifying the `StorageT` this isn't something `run_test_path` can do,
 31 |         // Since it modifies the type of the builder.
 32 |         CTLexerBuilder::<DefaultLexerTypes<u8>>::new_with_lexemet()
 33 |             .rust_edition(lrlex::RustEdition::Rust2021)
 34 |             .output_path(format!(
 35 |                 "{}/storaget.l.rs",
 36 |                 std::env::var("OUT_DIR").unwrap()
 37 |             ))
 38 |             .lrpar_config(|ctp| {
 39 |                 ctp.rust_edition(lrpar::RustEdition::Rust2021)
 40 |                     .output_path(format!(
 41 |                         "{}/storaget.y.rs",
 42 |                         std::env::var("OUT_DIR").unwrap()
 43 |                     ))
 44 |                     .grammar_in_src_dir("storaget.y")
 45 |                     .unwrap()
 46 |             })
 47 |             .lexer_in_src_dir("storaget.l")
 48 |             .unwrap()
 49 |             .build()
 50 |             .unwrap();
 51 |     }
 52 |     println!("cargo::rerun-if-changed=src/storaget.l");
 53 |     println!(
 54 |         "cargo::rerun-if-changed={}/storaget.l.rs",
 55 |         std::env::var("OUT_DIR").unwrap()
 56 |     );
 57 |     println!("cargo::rerun-if-changed=src/storaget.y");
 58 |     println!(
 59 |         "cargo::rerun-if-changed={}/storaget.y.rs",
 60 |         std::env::var("OUT_DIR").unwrap()
 61 |     );
 62 | 
 63 |     // This block specific to `multi_start.test`
 64 |     //
 65 |     // We use `clone_and_change_start_rule` to generate multiple parsers with
 66 |     // different start rules from a single grammar source.
 67 |     {
 68 |         use lrpar::unstable_api::UnstableApi;
 69 |         // In this case we'll be building multiple grammars
 70 |         //
 71 |         // 1. Parse multi_start_rule.y into an AST
 72 |         // 2. Clone the original and change the start rule.
 73 |         // 3. Build a grammar for `multi_start_rule.y` unchanged.
 74 |         // 4. Build the modified grammar.
 75 |         let grammar_path = &std::env::current_dir().unwrap().join("src/multi_start.y");
 76 |         let grammar_src = std::fs::read_to_string(grammar_path).unwrap();
 77 |         let grammar_src_clone = grammar_src.clone();
 78 |         let valid_ast = ASTWithValidityInfo::new(cfgrammar::yacc::YaccKind::Grmtools, &grammar_src);
 79 |         eprintln!("rules {:?}", valid_ast.ast().rules);
 80 |         let bstart_rule = valid_ast.ast().get_rule("BStart").unwrap().clone();
 81 |         let modified_ast = valid_ast.clone_and_change_start_rule(bstart_rule).unwrap();
 82 |         CTLexerBuilder::new()
 83 |             .lrpar_config(move |ctp| {
 84 |                 ctp.grammar_ast(valid_ast.clone(), UnstableApi)
 85 |                     .with_grammar_src(grammar_src.clone(), UnstableApi)
 86 |                     .grammar_in_src_dir("multi_start.y")
 87 |                     .unwrap()
 88 |                     .mod_name("ast_unmodified_y")
 89 |                     .output_path(format!(
 90 |                         "{}/ast_unmodified.y.rs",
 91 |                         std::env::var("OUT_DIR").unwrap()
 92 |                     ))
 93 |             })
 94 |             .lexer_in_src_dir("multi_start.l")
 95 |             .unwrap()
 96 |             .output_path(format!(
 97 |                 "{}/ast_unmodified.l.rs",
 98 |                 std::env::var("OUT_DIR").unwrap()
 99 |             ))
100 |             .mod_name("ast_unmodified_l")
101 |             .build()
102 |             .unwrap();
103 |         CTLexerBuilder::new()
104 |             .lrpar_config(move |ctp| {
105 |                 ctp.grammar_ast(modified_ast.clone(), UnstableApi)
106 |                     .with_grammar_src(grammar_src_clone.clone(), UnstableApi)
107 |                     .grammar_in_src_dir("multi_start.y")
108 |                     .unwrap()
109 |                     .mod_name("ast_modified_y")
110 |                     .output_path(format!(
111 |                         "{}/ast_modified.y.rs",
112 |                         std::env::var("OUT_DIR").unwrap()
113 |                     ))
114 |                     // We still need to disable these because they are checked after ast validation.
115 |                     .warnings_are_errors(false)
116 |                     .show_warnings(false)
117 |             })
118 |             .lexer_in_src_dir("multi_start.l")
119 |             .unwrap()
120 |             .mod_name("ast_modified_l")
121 |             .output_path(format!(
122 |                 "{}/ast_modified.l.rs",
123 |                 std::env::var("OUT_DIR").unwrap()
124 |             ))
125 |             .build()
126 |             .unwrap();
127 |     }
128 |     Ok(())
129 | }
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Grammar and parsing libraries for Rust
  2 | 
  3 | [![Bors enabled](https://bors.tech/images/badge_small.svg)](https://app.bors.tech/repositories/22484) [![lrpar on crates.io](https://img.shields.io/crates/v/lrpar.svg?label=lrpar)](https://crates.io/crates/lrpar) [![lrlex on crates.io](https://img.shields.io/crates/v/lrlex.svg?label=lrlex)](https://crates.io/crates/lrlex) [![lrtable on crates.io](https://img.shields.io/crates/v/lrtable.svg?label=lrtable)](https://crates.io/crates/lrtable) [![cfgrammar on crates.io](https://img.shields.io/crates/v/cfgrammar.svg?label=cfgrammar)](https://crates.io/crates/cfgrammar)
  4 | 
  5 | grmtools is a suite of Rust libraries and binaries for parsing text, both at
  6 | compile-time, and run-time. Most users will probably be interested in the
  7 | compile-time Yacc feature, which allows traditional `.y` files to be used
  8 | (mostly) unchanged in Rust.
  9 | 
 10 | ## Quickstart
 11 | 
 12 | A minimal example using this library consists of two files (in addition to the
 13 | grammar and lexing definitions). First we need to create a file `build.rs` in
 14 | the root of our project with the following content:
 15 | 
 16 | ```rust
 17 | use lrlex::CTLexerBuilder;
 18 | 
 19 | fn main() {
 20 |     CTLexerBuilder::new()
 21 |         .lrpar_config(|ctp| {
 22 |             ctp.grammar_in_src_dir("calc.y")
 23 |                 .unwrap()
 24 |         })
 25 |         .lexer_in_src_dir("calc.l")
 26 |         .unwrap()
 27 |         .build()
 28 |         .unwrap();
 29 | }
 30 | ```
 31 | 
 32 | This will generate and compile a parser and lexer, where the definitions for the
 33 | lexer can be found in `src/calc.l`:
 34 | 
 35 | ```rust
 36 | %%
 37 | [0-9]+ "INT"
 38 | \+ "+"
 39 | \* "*"
 40 | \( "("
 41 | \) ")"
 42 | [\t ]+ ;
 43 | ```
 44 | 
 45 | and where the definitions for the parser can be found in `src/calc.y`:
 46 | 
 47 | ```rust
 48 | %grmtools{yacckind: Grmtools}
 49 | %start Expr
 50 | %avoid_insert "INT"
 51 | %%
 52 | Expr -> Result<u64, ()>:
 53 |       Expr '+' Term { Ok($1? + $3?) }
 54 |     | Term { $1 }
 55 |     ;
 56 | 
 57 | Term -> Result<u64, ()>:
 58 |       Term '*' Factor { Ok($1? * $3?) }
 59 |     | Factor { $1 }
 60 |     ;
 61 | 
 62 | Factor -> Result<u64, ()>:
 63 |       '(' Expr ')' { $2 }
 64 |     | 'INT'
 65 |       {
 66 |           let v = $1.map_err(|_| ())?;
 67 |           parse_int($lexer.span_str(v.span()))
 68 |       }
 69 |     ;
 70 | %%
 71 | // Any functions here are in scope for all the grammar actions above.
 72 | 
 73 | fn parse_int(s: &str) -> Result<u64, ()> {
 74 |     match s.parse::<u64>() {
 75 |         Ok(val) => Ok(val),
 76 |         Err(_) => {
 77 |             eprintln!("{} cannot be represented as a u64", s);
 78 |             Err(())
 79 |         }
 80 |     }
 81 | }
 82 | ```
 83 | 
 84 | We can then use the generated lexer and parser within our `src/main.rs` file as
 85 | follows:
 86 | 
 87 | ```rust
 88 | use std::env;
 89 | 
 90 | use lrlex::lrlex_mod;
 91 | use lrpar::lrpar_mod;
 92 | 
 93 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope. By default the
 94 | // module name will be `calc_l` (i.e. the file name, minus any extensions,
 95 | // with a suffix of `_l`).
 96 | lrlex_mod!("calc.l");
 97 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope. By default the
 98 | // module name will be `calc_y` (i.e. the file name, minus any extensions,
 99 | // with a suffix of `_y`).
100 | lrpar_mod!("calc.y");
101 | 
102 | fn main() {
103 |     // Get the `LexerDef` for the `calc` language.
104 |     let lexerdef = calc_l::lexerdef();
105 |     let args: Vec<String> = env::args().collect();
106 |     // Now we create a lexer with the `lexer` method with which we can lex an
107 |     // input.
108 |     let lexer = lexerdef.lexer(&args[1]);
109 |     // Pass the lexer to the parser and lex and parse the input.
110 |     let (res, errs) = calc_y::parse(&lexer);
111 |     for e in errs {
112 |         println!("{}", e.pp(&lexer, &calc_y::token_epp));
113 |     }
114 |     match res {
115 |         Some(r) => println!("Result: {:?}", r),
116 |         _ => eprintln!("Unable to evaluate expression.")
117 |     }
118 | }
119 | ```
120 | 
121 | For more information on how to use this library please refer to the [grmtools
122 | book](https://softdevteam.github.io/grmtools/master/book/), which also includes
123 | a more detailed [quickstart
124 | guide](https://softdevteam.github.io/grmtools/master/book/quickstart.html).
125 | 
126 | ## Examples
127 | 
128 | [lrpar](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples)
129 | contains several examples on how to use the `lrpar`/`lrlex` libraries, showing
130 | how to generate [parse
131 | trees](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_parsetree)
132 | and
133 | [ASTs](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_ast), use
134 | [start conditions/states](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/start_states)
135 | or [execute
136 | code](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_actions)
137 | while parsing.
138 | 
139 | ## Documentation
140 | 
141 | | Latest release                          | master |
142 | |-----------------------------------------|--------|
143 | | [grmtools book](https://softdevteam.github.io/grmtools/latest_release/book/) | [grmtools book](https://softdevteam.github.io/grmtools/master/book) |
144 | | [cfgrammar](https://docs.rs/cfgrammar/) | [cfgrammar](https://softdevteam.github.io/grmtools/master/api/cfgrammar/) |
145 | | [lrpar](https://docs.rs/lrpar/)         | [lrpar](https://softdevteam.github.io/grmtools/master/api/lrpar/)         |
146 | | [lrlex](https://docs.rs/lrlex/)         | [lrlex](https://softdevteam.github.io/grmtools/master/api/lrlex/)         |
147 | | [lrtable](https://docs.rs/lrtable/)     | [lrtable](https://softdevteam.github.io/grmtools/master/api/lrtable/)     |
148 | 
149 | [Documentation for all past and present releases](https://softdevteam.github.io/grmtools/)
150 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/grmtools_section.test:
--------------------------------------------------------------------------------
  1 | grammar: |
  2 |     %grmtools{
  3 |         yacckind: Grmtools,
  4 |         recoverer: RecoveryKind::CPCTPlus,
  5 |         test_files: ["*.input_grmtools_section"]
  6 |     }
  7 |     %token MAGIC IDENT NUM STRING
  8 |     %epp MAGIC "%grmtools"
  9 |     %%
 10 |     start -> Result<Header<Span>, Vec<HeaderError<Span>>>
 11 |     : MAGIC '{' contents '}' { $3 }
 12 |     ;
 13 | 
 14 |     contents -> Result<Header<Span>, Vec<HeaderError<Span>>>
 15 |     : %empty { Ok(Header::new()) }
 16 |     | val_seq comma_opt { $1 }
 17 |     ;
 18 | 
 19 |     val_seq -> Result<Header<Span>, Vec<HeaderError<Span>>>
 20 |     : valbind {
 21 |         let ((key, key_loc), val) = $1;
 22 |         let mut ret = Header::<Span>::new();
 23 |         match ret.entry(key) {
 24 |             Entry::Occupied(orig) => {
 25 |                 let HeaderValue(orig_loc, _) : &HeaderValue<Span> = orig.get();
 26 |                 // One difference between the manually written parser and this
 27 |                 // is we don't try return multiple errors, or coalesce them.
 28 |                 return Err(vec![HeaderError {
 29 |                     kind: HeaderErrorKind::DuplicateEntry,
 30 |                     locations: vec![*orig_loc, key_loc]
 31 |                 }]);
 32 |             }
 33 |             Entry::Vacant(entry) => {
 34 |                 entry.insert(HeaderValue(key_loc, val));
 35 |             }
 36 |         }
 37 |         Ok(ret)
 38 |     }
 39 |     | val_seq ',' valbind {
 40 |         let ((key, key_loc), val) = $3;
 41 |         let mut ret = $1?;
 42 |         match ret.entry(key) {
 43 |             Entry::Occupied(orig) => {
 44 |                 let HeaderValue(orig_loc, _): &HeaderValue<Span> = orig.get();
 45 |                 // One difference between the manually written parser and this
 46 |                 // is we don't try return multiple errors, or coalesce them.
 47 |                 return Err(vec![HeaderError {
 48 |                     kind: HeaderErrorKind::DuplicateEntry,
 49 |                     locations: vec![*orig_loc, key_loc]
 50 |                 }]);
 51 |             }
 52 |             Entry::Vacant(entry) => {
 53 |                 entry.insert(HeaderValue(key_loc, val));
 54 |             }
 55 |         }
 56 |         Ok(ret)
 57 |     }
 58 |     ;
 59 | 
 60 |     namespaced -> Namespaced<Span>
 61 |     : IDENT {
 62 |         let ident_span = $1.as_ref().unwrap().span();
 63 |         let ident = $lexer.span_str(ident_span).to_string().to_lowercase();
 64 |         Namespaced{
 65 |             namespace: None,
 66 |             member: (ident, ident_span)
 67 |         }
 68 |     }
 69 |     | IDENT '::' IDENT {
 70 |         let namespace_span = $1.as_ref().unwrap().span();
 71 |         let namespace = $lexer.span_str(namespace_span).to_string().to_lowercase();
 72 | 
 73 |         let ident_span = $3.as_ref().unwrap().span();
 74 |         let ident = $lexer.span_str(ident_span).to_string().to_lowercase();
 75 |         Namespaced {
 76 |             namespace: Some((namespace, namespace_span)),
 77 |             member: (ident, ident_span)
 78 |         }
 79 |     }
 80 |     ;
 81 | 
 82 |     valbind -> ((String, Span), Value<Span>)
 83 |     : IDENT ':' val {
 84 |         let key_span = $1.as_ref().unwrap().span();
 85 |         let key = $lexer.span_str(key_span).to_string().to_lowercase();
 86 |         ((key, key_span), Value::Setting($3))
 87 |     }
 88 |     | IDENT {
 89 |         let key_span = $1.as_ref().unwrap().span();
 90 |         let key = $lexer.span_str(key_span).to_string().to_lowercase();
 91 |         ((key, key_span), Value::Flag(true, key_span))
 92 |     }
 93 |     | '!' IDENT {
 94 |         let bang_span = $1.as_ref().unwrap().span();
 95 |         let key_span = $2.as_ref().unwrap().span();
 96 |         let key = $lexer.span_str(key_span).to_string().to_lowercase();
 97 |         ((key, key_span), Value::Flag(false, Span::new(bang_span.start(), key_span.end())))
 98 |     }
 99 |     ;
100 | 
101 |     val -> Setting<Span>
102 |     : namespaced { Setting::Unitary($1) }
103 |     | NUM  {
104 |         let num_span = $1.as_ref().unwrap().span();
105 |         let n = str::parse::<u64>($lexer.span_str(num_span));
106 |         Setting::Num(n.expect("convertible"), num_span)
107 |     }
108 |     | STRING {
109 |         let string_span = $1.as_ref().unwrap().span();
110 |         // Trim the leading and trailing " characters.
111 |         let string_span = Span::new(string_span.start() + 1, string_span.end() - 1);
112 |         let s = $lexer.span_str(string_span).to_string();
113 |         Setting::String(s, string_span)
114 |     }
115 |     | namespaced '(' namespaced ')' { Setting::Constructor{ctor: $1, arg: $3} }
116 |     | '[' array_seq ']' { Setting::Array($2, $1.as_ref().unwrap().span(), $3.as_ref().unwrap().span()) }
117 |     ;
118 | 
119 |     array_seq -> Vec<Setting<Span>>
120 |     : %empty { Vec::new() }
121 |     | val {
122 |         vec![$1]
123 |     }
124 |     | array_seq ',' val {
125 |         $1.push($3);
126 |         $1
127 |     }
128 |     ;
129 |     comma_opt -> ()
130 |     : %empty { }
131 |     | ',' { }
132 |     ;
133 |     %%
134 |     #![allow(dead_code)]
135 |     #![allow(unused)]
136 | 
137 |     use cfgrammar::{
138 |         Span,
139 |         header::{
140 |             Value,
141 |             Setting,
142 |             HeaderError,
143 |             HeaderErrorKind,
144 |             Namespaced,
145 |             Header,
146 |             HeaderValue,
147 |         },
148 |         markmap::Entry,
149 |     };
150 | 
151 | lexer: |
152 |     %grmtools{case_insensitive}
153 |     %%
154 |     %grmtools 'MAGIC'
155 |     ! '!'
156 |     [A-Z][A-Z_]* 'IDENT'
157 |     [0-9]+ 'NUM'
158 |     , ','
159 |     \{ '{'
160 |     \} '}'
161 |     \( '('
162 |     \) ')'
163 |     \[ '['
164 |     \] ']'
165 |     :: '::'
166 |     : ':'
167 |     \"(\\.|[^"\\])*\" 'STRING'
168 |     \p{Pattern_White_Space} ;
169 | extra_files:
170 |   test.input_grmtools_section: |
171 |     %grmtools{yacckind: Grmtools, !b, !a}
172 | 


--------------------------------------------------------------------------------
/lrlex/src/main.rs:
--------------------------------------------------------------------------------
  1 | use getopts::Options;
  2 | use std::{
  3 |     env,
  4 |     error::Error,
  5 |     fmt,
  6 |     fs::File,
  7 |     io::{Read, Write, stderr, stdin},
  8 |     path::Path,
  9 |     process,
 10 | };
 11 | 
 12 | use cfgrammar::header::{GrmtoolsSectionParser, HeaderValue};
 13 | use lrlex::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef, LexerKind};
 14 | use lrpar::{
 15 |     Lexeme, Lexer,
 16 |     diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
 17 | };
 18 | 
 19 | const ERROR: &str = "[Error]";
 20 | 
 21 | /// A string which uses `Display` for it's `Debug` impl.
 22 | struct ErrorString(String);
 23 | impl fmt::Display for ErrorString {
 24 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 25 |         let ErrorString(s) = self;
 26 |         write!(f, "{}", s)
 27 |     }
 28 | }
 29 | impl fmt::Debug for ErrorString {
 30 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 31 |         let ErrorString(s) = self;
 32 |         write!(f, "{}", s)
 33 |     }
 34 | }
 35 | impl Error for ErrorString {}
 36 | 
 37 | fn usage(prog: &str, msg: &str) {
 38 |     let path = Path::new(prog);
 39 |     let leaf = match path.file_name() {
 40 |         Some(m) => m.to_str().unwrap(),
 41 |         None => "lrpar",
 42 |     };
 43 |     if !msg.is_empty() {
 44 |         writeln!(stderr(), "{}", msg).ok();
 45 |     }
 46 |     writeln!(stderr(), "Usage: {} <lexer.l> <input file>", leaf).ok();
 47 |     process::exit(1);
 48 | }
 49 | 
 50 | fn read_file(path: &str) -> String {
 51 |     let mut s = String::new();
 52 |     if path == "-" {
 53 |         stdin().read_to_string(&mut s).unwrap();
 54 |         return s;
 55 |     }
 56 |     let mut f = match File::open(path) {
 57 |         Ok(r) => r,
 58 |         Err(e) => {
 59 |             writeln!(stderr(), "Can't open file {}: {}", path, e).ok();
 60 |             process::exit(1);
 61 |         }
 62 |     };
 63 |     f.read_to_string(&mut s).unwrap();
 64 |     s
 65 | }
 66 | 
 67 | fn main() -> Result<(), Box<dyn Error>> {
 68 |     let args: Vec<String> = env::args().collect();
 69 |     let prog = args[0].clone();
 70 |     let matches = match Options::new().optflag("h", "help", "").parse(&args[1..]) {
 71 |         Ok(m) => m,
 72 |         Err(f) => {
 73 |             usage(&prog, f.to_string().as_str());
 74 |             return Ok(());
 75 |         }
 76 |     };
 77 |     if matches.opt_present("h") || matches.free.len() != 2 {
 78 |         usage(&prog, "");
 79 |         return Ok(());
 80 |     }
 81 | 
 82 |     let lex_l_path = &matches.free[0];
 83 |     let lex_src = read_file(lex_l_path);
 84 |     let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, Path::new(lex_l_path));
 85 |     let (mut header, _) = match GrmtoolsSectionParser::new(&lex_src, false).parse() {
 86 |         Ok(x) => x,
 87 |         Err(es) => {
 88 |             eprintln!(
 89 |                 "\n{ERROR}{}",
 90 |                 lex_diag.file_location_msg(" parsing the `%grmtools` section", None)
 91 |             );
 92 |             for e in es {
 93 |                 eprintln!(
 94 |                     "{}",
 95 |                     &indent("     ", &lex_diag.format_error(e).to_string())
 96 |                 );
 97 |             }
 98 |             process::exit(1);
 99 |         }
100 |     };
101 |     header.mark_used(&"lexerkind".to_string());
102 |     let lexerkind = if let Some(HeaderValue(_, lk_val)) = header.get("lexerkind") {
103 |         LexerKind::try_from(lk_val)?
104 |     } else {
105 |         LexerKind::LRNonStreamingLexer
106 |     };
107 | 
108 |     let lexerdef = match lexerkind {
109 |         LexerKind::LRNonStreamingLexer => {
110 |             let lex_flags = LexFlags::try_from(&mut header)?;
111 |             match LRNonStreamingLexerDef::<DefaultLexerTypes<u32>>::new_with_options(
112 |                 &lex_src, lex_flags,
113 |             ) {
114 |                 Ok(x) => x,
115 |                 Err(errs) => {
116 |                     eprintln!("\n{ERROR}{}", lex_diag.file_location_msg("", None));
117 |                     for e in errs {
118 |                         eprintln!(
119 |                             "{}",
120 |                             &indent("     ", &lex_diag.format_error(e).to_string())
121 |                         );
122 |                     }
123 |                     process::exit(1);
124 |                 }
125 |             }
126 |         }
127 |         _ => {
128 |             return Err(ErrorString("Unrecognized lexer kind".to_string()))?;
129 |         }
130 |     };
131 |     {
132 |         let unused_header_values = header.unused();
133 |         if !unused_header_values.is_empty() {
134 |             Err(ErrorString(format!(
135 |                 "Unused header values: {}",
136 |                 unused_header_values.join(", ")
137 |             )))?
138 |         }
139 |     }
140 |     let input = &read_file(&matches.free[1]);
141 |     for r in lexerdef.lexer(input).iter() {
142 |         match r {
143 |             Ok(l) => println!(
144 |                 "{} {}",
145 |                 lexerdef.get_rule_by_id(l.tok_id()).name().unwrap(),
146 |                 &input[l.span().start()..l.span().end()]
147 |             ),
148 |             Err(e) => {
149 |                 println!("{:?}", e);
150 |                 process::exit(1);
151 |             }
152 |         }
153 |     }
154 |     Ok(())
155 | }
156 | 
157 | /// Indents a multi-line string and trims any trailing newline.
158 | /// This currently assumes that indentation on blank lines does not matter.
159 | ///
160 | /// The algorithm used by this function is:
161 | /// 1. Prefix `s` with the indentation, indenting the first line.
162 | /// 2. Trim any trailing newlines.
163 | /// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
164 | ///
165 | /// It is plausible that we should a step 4, but currently do not:
166 | /// 4. Replace all `\n{indent}\n` with `\n\n`
167 | fn indent(indent: &str, s: &str) -> String {
168 |     format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
169 | }
170 | 


--------------------------------------------------------------------------------
/lrpar/README.md:
--------------------------------------------------------------------------------
  1 | # `lrpar`
  2 | 
  3 | `lrpar` provides a Yacc-compatible parser (where grammars can be generated at
  4 | compile-time or run-time). It can take in traditional `.y` files and convert
  5 | them into an idiomatic Rust parser.
  6 | 
  7 | If you're new to `lrpar`, please read the "quick start guide". The "grmtools
  8 | book" and API reference have more detailed information.  You can find the
  9 | appropriate documentation for the version of lrpar you are using here:
 10 | 
 11 | | Latest release                          | master |
 12 | |-----------------------------------------|--------|
 13 | | [Quickstart guide](https://softdevteam.github.io/grmtools/latest_release/book/quickstart.html) | [Quickstart guide](https://softdevteam.github.io/grmtools/master/book/quickstart.html) |
 14 | | [grmtools book](https://softdevteam.github.io/grmtools/latest_release/book/) | [grmtools book](https://softdevteam.github.io/grmtools/master/book) |
 15 | | [lrpar API](https://docs.rs/lrpar/)         | [lrpar API](https://softdevteam.github.io/grmtools/master/api/lrpar/)         |
 16 | 
 17 | [Documentation for all past and present releases](https://softdevteam.github.io/grmtools/)
 18 | 
 19 | 
 20 | ## Example
 21 | 
 22 | Let's assume we want to statically generate a parser for a simple calculator
 23 | language (and let's also assume we are able to use
 24 | [`lrlex`](https://crates.io/crates/lrlex) for the lexer). We need to add a
 25 | `build.rs` file to our project which statically compiles both the lexer and
 26 | parser. While we can perform both steps individually, it's easiest to use
 27 | `lrlex` which does both jobs for us in one go. Our `build.rs` file thus looks
 28 | as follows:
 29 | 
 30 | ```rust
 31 | use cfgrammar::yacc::YaccKind;
 32 | use lrlex::CTLexerBuilder;
 33 | 
 34 | fn main() {
 35 |     CTLexerBuilder::new()
 36 |         .lrpar_config(|ctp| {
 37 |             ctp.yacckind(YaccKind::Grmtools)
 38 |                 .grammar_in_src_dir("calc.y")
 39 |                 .unwrap()
 40 |         })
 41 |         .lexer_in_src_dir("calc.l")
 42 |         .unwrap()
 43 |         .build()
 44 |         .unwrap();
 45 | }
 46 | ```
 47 | 
 48 | where `src/calc.l` is as follows:
 49 | 
 50 | ```
 51 | %%
 52 | [0-9]+ "INT"
 53 | \+ "+"
 54 | \* "*"
 55 | \( "("
 56 | \) ")"
 57 | [\t ]+ ;
 58 | ```
 59 | 
 60 | and `src/calc.y` is as follows:
 61 | 
 62 | ```
 63 | %start Expr
 64 | %avoid_insert "INT"
 65 | %%
 66 | Expr -> Result<u64, ()>:
 67 |       Expr '+' Term { Ok($1? + $3?) }
 68 |     | Term { $1 }
 69 |     ;
 70 | 
 71 | Term -> Result<u64, ()>:
 72 |       Term '*' Factor { Ok($1? * $3?) }
 73 |     | Factor { $1 }
 74 |     ;
 75 | 
 76 | Factor -> Result<u64, ()>:
 77 |       '(' Expr ')' { $2 }
 78 |     | 'INT'
 79 |       {
 80 |           let v = $1.map_err(|_| ())?;
 81 |           parse_int($lexer.span_str(v.span()))
 82 |       }
 83 |     ;
 84 | %%
 85 | // Any functions here are in scope for all the grammar actions above.
 86 | 
 87 | fn parse_int(s: &str) -> Result<u64, ()> {
 88 |     match s.parse::<u64>() {
 89 |         Ok(val) => Ok(val),
 90 |         Err(_) => {
 91 |             eprintln!("{} cannot be represented as a u64", s);
 92 |             Err(())
 93 |         }
 94 |     }
 95 | }
 96 | ```
 97 | 
 98 | Because we specified that our Yacc file is in `Grmtools` format, each rule has a
 99 | separate Rust type to which all its functions conform (in this case, all the
100 | rules have the same type, but that's not a requirement).
101 | 
102 | A simple `src/main.rs` is as follows:
103 | 
104 | ```rust
105 | use std::io::{self, BufRead, Write};
106 | 
107 | use lrlex::lrlex_mod;
108 | use lrpar::lrpar_mod;
109 | 
110 | // Using `lrlex_mod!` brings the lexer for `calc.l` into scope.
111 | lrlex_mod!("calc.l");
112 | // Using `lrpar_mod!` brings the parser for `calc.y` into scope.
113 | lrpar_mod!("calc.y");
114 | 
115 | fn main() {
116 |     // Get the `LexerDef` for the `calc` language.
117 |     let lexerdef = calc_l::lexerdef();
118 |     let stdin = io::stdin();
119 |     loop {
120 |         print!(">>> ");
121 |         io::stdout().flush().ok();
122 |         match stdin.lock().lines().next() {
123 |             Some(Ok(ref l)) => {
124 |                 if l.trim().is_empty() {
125 |                     continue;
126 |                 }
127 |                 // Now we create a lexer with the `lexer` method with which
128 |                 // we can lex an input.
129 |                 let lexer = lexerdef.lexer(l);
130 |                 // Pass the lexer to the parser and lex and parse the input.
131 |                 let (res, errs) = calc_y::parse(&lexer);
132 |                 for e in errs {
133 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
134 |                 }
135 |                 match res {
136 |                     Some(Ok(r)) => println!("Result: {}", r),
137 |                     _ => eprintln!("Unable to evaluate expression.")
138 |                 }
139 |             }
140 |             _ => break
141 |         }
142 |     }
143 | }
144 | ```
145 | 
146 | We can now `cargo run` our project and evaluate simple expressions:
147 | 
148 | ```
149 | >>> 2 + 3
150 | Result: 5
151 | >>> 2 + 3 * 4
152 | Result: 14
153 | >>> (2 + 3) * 4
154 | Result: 20
155 | ```
156 | 
157 | `lrpar` also comes with advanced [error
158 | recovery](https://softdevteam.github.io/grmtools/master/book/errorrecovery.html) built-in:
159 | 
160 | ```
161 | >>> 2 + + 3
162 | Parsing error at line 1 column 5. Repair sequences found:
163 |    1: Delete +
164 |    2: Insert INT
165 | Result: 5
166 | >>> 2 + 3 3
167 | Parsing error at line 1 column 7. Repair sequences found:
168 |    1: Insert *
169 |    2: Insert +
170 |    3: Delete 3
171 | Result: 11
172 | >>> 2 + 3 4 5
173 | Parsing error at line 1 column 7. Repair sequences found:
174 |    1: Insert *, Delete 4
175 |    2: Insert +, Delete 4
176 |    3: Delete 4, Delete 5
177 |    4: Insert +, Shift 4, Delete 5
178 |    5: Insert +, Shift 4, Insert +
179 |    6: Insert *, Shift 4, Delete 5
180 |    7: Insert *, Shift 4, Insert *
181 |    8: Insert *, Shift 4, Insert +
182 |    9: Insert +, Shift 4, Insert *
183 | Result: 17
184 | ```
185 | 


--------------------------------------------------------------------------------
/doc/src/ast_example.md:
--------------------------------------------------------------------------------
  1 | # An AST evaluator
  2 | 
  3 | We now know enough to put together a more sophisticated version of our simple
  4 | calculator example that builds an Abstract Syntax Tree (AST) while parsing,
  5 | which is then evaluated separately. This models a common way of building real
  6 | compilers. The full example code can be found at
  7 | [https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_ast](https://github.com/softdevteam/grmtools/tree/master/lrpar/examples/calc_ast).
  8 | 
  9 | The `calc.l` file remains unchanged from that in the [Quickstart
 10 | guide](quickstart.md). However the `calc.y` file is change as follows:
 11 | 
 12 | 
 13 | ```rust,noplaypen
 14 | %start Expr
 15 | %avoid_insert "INT"
 16 | %%
 17 | Expr -> Result<Expr, ()>:
 18 |       Expr '+' Term { Ok(Expr::Add{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) }) }
 19 |     | Term { $1 }
 20 |     ;
 21 | 
 22 | Term -> Result<Expr, ()>:
 23 |       Term '*' Factor { Ok(Expr::Mul{ span: $span, lhs: Box::new($1?), rhs: Box::new($3?) }) }
 24 |     | Factor { $1 }
 25 |     ;
 26 | 
 27 | Factor -> Result<Expr, ()>:
 28 |       '(' Expr ')' { $2 }
 29 |     | 'INT' { Ok(Expr::Number{ span: $span }) }
 30 |     ;
 31 | %%
 32 | 
 33 | use cfgrammar::Span;
 34 | 
 35 | #[derive(Debug)]
 36 | pub enum Expr {
 37 |     Add {
 38 |         span: Span,
 39 |         lhs: Box<Expr>,
 40 |         rhs: Box<Expr>,
 41 |     },
 42 |     Mul {
 43 |         span: Span,
 44 |         lhs: Box<Expr>,
 45 |         rhs: Box<Expr>,
 46 |     },
 47 |     Number {
 48 |         span: Span
 49 |     }
 50 | }
 51 | ```
 52 | 
 53 | The most obvious difference here is that we have defined a simple `enum` `Expr`,
 54 | with three variants, for our AST. Each AST variant also records a `Span` which
 55 | records how much input the AST element covers. By using the
 56 | [`$span`](actioncode.md) variable we can ensure that AST elements record their
 57 | relationship to portions of the user's input that span multiple tokens (e.g.
 58 | for the expressions `1 + 2` the resulting `Expr::Add` will have a `Span`
 59 | starting at byte index 0 and ending at byte index 5 -- in other words covering
 60 | the complete input string in this case).
 61 | 
 62 | After parsing, we thus end up with a `Result<Expr, ()>`. In the case of a
 63 | successful parse, this will give us an arbitrarily deeply nested `Expr`.
 64 | 
 65 | Our `main.rs` file then looks as follows:
 66 | 
 67 | ```rust,noplaypen
 68 | use std::io::{self, BufRead, Write};
 69 | 
 70 | use lrlex::{lrlex_mod, DefaultLexeme, LRLexError};
 71 | use lrpar::{lrpar_mod, NonStreamingLexer, Span};
 72 | 
 73 | lrlex_mod!("calc.l");
 74 | lrpar_mod!("calc.y");
 75 | 
 76 | use calc_y::Expr;
 77 | 
 78 | fn main() {
 79 |     let lexerdef = calc_l::lexerdef();
 80 |     let stdin = io::stdin();
 81 |     loop {
 82 |         print!(">>> ");
 83 |         io::stdout().flush().ok();
 84 |         match stdin.lock().lines().next() {
 85 |             Some(Ok(ref l)) => {
 86 |                 if l.trim().is_empty() {
 87 |                     continue;
 88 |                 }
 89 |                 let lexer = lexerdef.lexer(l);
 90 |                 let (res, errs) = calc_y::parse(&lexer);
 91 |                 for e in errs {
 92 |                     println!("{}", e.pp(&lexer, &calc_y::token_epp));
 93 |                 }
 94 |                 if let Some(Ok(r)) = res {
 95 |                     // We have a successful parse.
 96 |                     match eval(&lexer, r) {
 97 |                         Ok(i) => println!("Result: {}", i),
 98 |                         Err((span, msg)) => {
 99 |                             let ((line, col), _) = lexer.line_col(span);
100 |                             eprintln!(
101 |                                 "Evaluation error at line {} column {}, '{}' {}.",
102 |                                 line,
103 |                                 col,
104 |                                 lexer.span_str(span),
105 |                                 msg
106 |                             )
107 |                         }
108 |                     }
109 |                 }
110 |             }
111 |             _ => break
112 |         }
113 |     }
114 | }
115 | 
116 | fn eval(
117 |     lexer: &dyn NonStreamingLexer<DefaultLexeme, u32>,
118 |     e: Expr,
119 |     LRLexError)
120 | -> Result<u64, (Span, &'static str)> {
121 |     match e {
122 |         Expr::Add { span, lhs, rhs } => eval(lexer, *lhs)?
123 |             .checked_add(eval(lexer, *rhs)?)
124 |             .ok_or((span, "overflowed")),
125 |         Expr::Mul { span, lhs, rhs } => eval(lexer, *lhs)?
126 |             .checked_mul(eval(lexer, *rhs)?)
127 |             .ok_or((span, "overflowed")),
128 |         Expr::Number { span } => lexer
129 |             .span_str(span)
130 |             .parse::<u64>()
131 |             .map_err(|_| (span, "cannot be represented as a u64"))
132 |     }
133 | }
134 | ```
135 | 
136 | Let's start by running this and seeing what happens:
137 | 
138 | ```
139 | >>> 2+3*4
140 | Result: 14
141 | >>> 2++3*4
142 | Parsing error at line 1 column 3. Repair sequences found:
143 |    1: Delete +
144 |    2: Insert INT
145 | Result: 14
146 | >>> 999999*888888 + 777777*666666
147 | Result: 1407404592594
148 | >>> 9999999999*8888888888 + 7777777777*6666666666
149 | Evaluation error at line 1 column 6, '9999999999*8888888888' overflowed.
150 | ```
151 | 
152 | The first three expressions evaluate just as before. However, the fourth is
153 | interesting: we have explicitly captured the fact that the result of
154 | `9999999999*8888888888` is too big to fit into a `u64`; and not only have we
155 | told the user which character the input starts out, but we've printed out the
156 | precise sub-part of the input which caused that error. This works even when
157 | it's in the middle of the input:
158 | 
159 | ```
160 | >>> 10 + 9999999999*8888888888 + 20
161 | Evaluation error at line 1 column 6, '9999999999*8888888888' overflowed.
162 | ```
163 | 
164 | The key to this is that each AST element knows the `$span` of the production it
165 | is related to; and the resulting `Span` can extract the user's input with
166 | `lexer.span_str(span)`.
167 | 
168 | Happily, this facility composes nicely with error recovery:
169 | 
170 | ```
171 | >>> 10 ++ 9999999999*8888888888 + 20
172 | Parsing error at line 1 column 5. Repair sequences found:
173 |    1: Delete +
174 |    2: Insert INT
175 | Evaluation error at line 1 column 7, '9999999999*8888888888' overflowed.
176 | ```
177 | 


--------------------------------------------------------------------------------
/nimbleparse/README.md:
--------------------------------------------------------------------------------
  1 | # `nimbleparse`
  2 | 
  3 | `nimbleparse` is a simple grammar debugging aid. It takes as input a Lex
  4 | specification, a Yacc specification, and an input file and prints any warnings
  5 | about the specifications (e.g. shift/reduce errors) as well as the resulting
  6 | parse tree to stdout. If the parse is unsuccessful it will report parsing
  7 | errors and, when possible fixes. If parsing is successful, `nimbleparse` exits
  8 | with 0; if an error is detected it exits with 1.
  9 | 
 10 | The full command-line specification is as follows:
 11 | 
 12 | ```
 13 | nimbleparse [-r <cpctplus|none>] [-y <eco|grmtools|original>] [-q] <lexer.l> <parser.y> <input file>
 14 | ```
 15 | 
 16 | where:
 17 | 
 18 | * `-r` selects the recovery algorithm to be used. Defaults to `cpctplus`.
 19 | * `-y` selects the Yacc variant to be used. Defaults to `original`.
 20 | * `-q` prevents warnings (e.g. shift/reduce errors) from being reported.
 21 | 
 22 | You can use your own Lex/Yacc files. A small repository of example grammars can
 23 | be found at https://github.com/softdevteam/grammars/.
 24 | 
 25 | An example invocation is as follows:
 26 | 
 27 | ```
 28 | $ cat Hello.java
 29 | class Hello {
 30 |     public static void main(String[] args) {
 31 |         System.out.println("Hello world");
 32 |     }
 33 | }
 34 | $ nimbleparse java7.l java7.y Hello.java
 35 | goal
 36 |  compilation_unit
 37 |   type_declarations_opt
 38 |    type_declarations
 39 |     type_declaration
 40 |      class_declaration
 41 |       modifiers_opt
 42 |       CLASS class
 43 |       IDENTIFIER Hello
 44 |       type_parameters_opt
 45 |       super_opt
 46 |       interfaces_opt
 47 |       class_body
 48 |        LBRACE {
 49 |        class_body_declarations_opt
 50 |         class_body_declarations
 51 |          class_body_declaration
 52 |           class_member_declaration
 53 |            method_declaration
 54 |             method_header
 55 |              modifiers_opt
 56 |               modifiers
 57 |                modifiers
 58 |                 modifier
 59 |                  PUBLIC public
 60 |                modifier
 61 |                 STATIC static
 62 |              VOID void
 63 |              method_declarator
 64 |               IDENTIFIER main
 65 |               LPAREN (
 66 |               formal_parameter_list_opt
 67 |                formal_parameter_list
 68 |                 formal_parameter
 69 |                  type
 70 |                   reference_type
 71 |                    array_type
 72 |                     name
 73 |                      simple_name
 74 |                       IDENTIFIER String
 75 |                     dims
 76 |                      LBRACK [
 77 |                      RBRACK ]
 78 |                  variable_declarator_id
 79 |                   IDENTIFIER args
 80 |               RPAREN )
 81 |              throws_opt
 82 |             method_body
 83 |              block
 84 |               LBRACE {
 85 |               block_statements_opt
 86 |                block_statements
 87 |                 block_statement
 88 |                  statement
 89 |                   statement_without_trailing_substatement
 90 |                    expression_statement
 91 |                     statement_expression
 92 |                      method_invocation
 93 |                       qualified_name
 94 |                        name
 95 |                         qualified_name
 96 |                          name
 97 |                           simple_name
 98 |                            IDENTIFIER System
 99 |                          DOT .
100 |                          IDENTIFIER out
101 |                        DOT .
102 |                        IDENTIFIER println
103 |                       LPAREN (
104 |                       argument_list_opt
105 |                        argument_list
106 |                         expression
107 |                          assignment_expression
108 |                           conditional_expression
109 |                            conditional_or_expression
110 |                             conditional_and_expression
111 |                              inclusive_or_expression
112 |                               exclusive_or_expression
113 |                                and_expression
114 |                                 equality_expression
115 |                                  instanceof_expression
116 |                                   relational_expression
117 |                                    shift_expression
118 |                                     additive_expression
119 |                                      multiplicative_expression
120 |                                       unary_expression
121 |                                        unary_expression_not_plus_minus
122 |                                         postfix_expression
123 |                                          primary
124 |                                           primary_no_new_array
125 |                                            literal
126 |                                             STRING_LITERAL "Hello world"
127 |                       RPAREN )
128 |                     SEMICOLON ;
129 |               RBRACE }
130 |        RBRACE }
131 | $ cat SyntaxError.java
132 | class SyntaxError {
133 |     int x y;
134 | }
135 | $ nimbleparse java7.l java7.y Hello.java
136 | goal
137 |  compilation_unit
138 |   type_declarations_opt
139 |    type_declarations
140 |     type_declaration
141 |      class_declaration
142 |       modifiers_opt
143 |       CLASS class
144 |       IDENTIFIER SyntaxError
145 |       type_parameters_opt
146 |       super_opt
147 |       interfaces_opt
148 |       class_body
149 |        LBRACE {
150 |        class_body_declarations_opt
151 |         class_body_declarations
152 |          class_body_declaration
153 |           class_member_declaration
154 |            field_declaration
155 |             modifiers_opt
156 |             type
157 |              primitive_type
158 |               numeric_type
159 |                integral_type
160 |                 INT int
161 |             variable_declarators
162 |              variable_declarators
163 |               variable_declarator
164 |                variable_declarator_id
165 |                 IDENTIFIER x
166 |              COMMA 
167 |              variable_declarator
168 |               variable_declarator_id
169 |                IDENTIFIER y
170 |             SEMICOLON ;
171 |        RBRACE }
172 | 
173 | Parsing error at line 2 column 11. Repair sequences found:
174 |    1: Insert ,
175 |    2: Insert =
176 |    3: Delete y
177 | ```
178 | 


--------------------------------------------------------------------------------
/doc/src/nimbleparse.md:
--------------------------------------------------------------------------------
  1 | # nimbleparse
  2 | 
  3 | `nimbleparse` is a simple grammar debugging aid. It takes as input a Lex
  4 | specification, a Yacc specification, and an input file and prints any warnings
  5 | about the specifications (e.g. shift/reduce errors) as well as the resulting
  6 | parse tree to stdout. If the parse is unsuccessful it will report parsing
  7 | errors and, when possible fixes. If parsing is successful, `nimbleparse` exits
  8 | with 0; if an error is detected it exits with 1.
  9 | 
 10 | The full command-line specification is as follows:
 11 | 
 12 | ```
 13 | nimbleparse [-r <cpctplus|none>] [-y <eco|grmtools|original>] [-q] <lexer.l> <parser.y> <input file>
 14 | ```
 15 | 
 16 | where:
 17 | 
 18 | * `-r` selects the recovery algorithm to be used. Defaults to `cpctplus`.
 19 | * `-y` selects the Yacc variant to be used. Defaults to `original`.
 20 | * `-q` prevents warnings (e.g. shift/reduce errors) from being reported.
 21 | 
 22 | You can use your own Lex/Yacc files. A small repository of example grammars can
 23 | be found at [https://github.com/softdevteam/grammars/](https://github.com/softdevteam/grammars/).
 24 | 
 25 | An example invocation is as follows:
 26 | 
 27 | ```
 28 | $ cat Hello.java
 29 | class Hello {
 30 |     public static void main(String[] args) {
 31 |         System.out.println("Hello world");
 32 |     }
 33 | }
 34 | $ nimbleparse java7.l java7.y Hello.java
 35 | goal
 36 |  compilation_unit
 37 |   type_declarations_opt
 38 |    type_declarations
 39 |     type_declaration
 40 |      class_declaration
 41 |       modifiers_opt
 42 |       CLASS class
 43 |       IDENTIFIER Hello
 44 |       type_parameters_opt
 45 |       super_opt
 46 |       interfaces_opt
 47 |       class_body
 48 |        LBRACE {
 49 |        class_body_declarations_opt
 50 |         class_body_declarations
 51 |          class_body_declaration
 52 |           class_member_declaration
 53 |            method_declaration
 54 |             method_header
 55 |              modifiers_opt
 56 |               modifiers
 57 |                modifiers
 58 |                 modifier
 59 |                  PUBLIC public
 60 |                modifier
 61 |                 STATIC static
 62 |              VOID void
 63 |              method_declarator
 64 |               IDENTIFIER main
 65 |               LPAREN (
 66 |               formal_parameter_list_opt
 67 |                formal_parameter_list
 68 |                 formal_parameter
 69 |                  type
 70 |                   reference_type
 71 |                    array_type
 72 |                     name
 73 |                      simple_name
 74 |                       IDENTIFIER String
 75 |                     dims
 76 |                      LBRACK [
 77 |                      RBRACK ]
 78 |                  variable_declarator_id
 79 |                   IDENTIFIER args
 80 |               RPAREN )
 81 |              throws_opt
 82 |             method_body
 83 |              block
 84 |               LBRACE {
 85 |               block_statements_opt
 86 |                block_statements
 87 |                 block_statement
 88 |                  statement
 89 |                   statement_without_trailing_substatement
 90 |                    expression_statement
 91 |                     statement_expression
 92 |                      method_invocation
 93 |                       qualified_name
 94 |                        name
 95 |                         qualified_name
 96 |                          name
 97 |                           simple_name
 98 |                            IDENTIFIER System
 99 |                          DOT .
100 |                          IDENTIFIER out
101 |                        DOT .
102 |                        IDENTIFIER println
103 |                       LPAREN (
104 |                       argument_list_opt
105 |                        argument_list
106 |                         expression
107 |                          assignment_expression
108 |                           conditional_expression
109 |                            conditional_or_expression
110 |                             conditional_and_expression
111 |                              inclusive_or_expression
112 |                               exclusive_or_expression
113 |                                and_expression
114 |                                 equality_expression
115 |                                  instanceof_expression
116 |                                   relational_expression
117 |                                    shift_expression
118 |                                     additive_expression
119 |                                      multiplicative_expression
120 |                                       unary_expression
121 |                                        unary_expression_not_plus_minus
122 |                                         postfix_expression
123 |                                          primary
124 |                                           primary_no_new_array
125 |                                            literal
126 |                                             STRING_LITERAL "Hello world"
127 |                       RPAREN )
128 |                     SEMICOLON ;
129 |               RBRACE }
130 |        RBRACE }
131 | $ cat SyntaxError.java
132 | class SyntaxError {
133 |     int x y;
134 | }
135 | $ nimbleparse java7.l java7.y Hello.java
136 | goal
137 |  compilation_unit
138 |   type_declarations_opt
139 |    type_declarations
140 |     type_declaration
141 |      class_declaration
142 |       modifiers_opt
143 |       CLASS class
144 |       IDENTIFIER SyntaxError
145 |       type_parameters_opt
146 |       super_opt
147 |       interfaces_opt
148 |       class_body
149 |        LBRACE {
150 |        class_body_declarations_opt
151 |         class_body_declarations
152 |          class_body_declaration
153 |           class_member_declaration
154 |            field_declaration
155 |             modifiers_opt
156 |             type
157 |              primitive_type
158 |               numeric_type
159 |                integral_type
160 |                 INT int
161 |             variable_declarators
162 |              variable_declarators
163 |               variable_declarator
164 |                variable_declarator_id
165 |                 IDENTIFIER x
166 |              COMMA 
167 |              variable_declarator
168 |               variable_declarator_id
169 |                IDENTIFIER y
170 |             SEMICOLON ;
171 |        RBRACE }
172 | 
173 | Parsing error at line 2 column 11. Repair sequences found:
174 |    1: Insert ,
175 |    2: Insert =
176 |    3: Delete y
177 | ```
178 | 


--------------------------------------------------------------------------------
/doc/src/parsing_idioms.md:
--------------------------------------------------------------------------------
  1 | # grmtools parsing idioms
  2 | 
  3 | grmtools is a flexible tool and can be used in many ways. However, for those
  4 | using the `Grmtools` format, the simple idioms below can often make life easier.
  5 | 
  6 | 
  7 | ## Return `Span`s when possible
  8 | 
  9 | When executing grammar actions one is often building up an Abstract Syntax Tree
 10 | (AST) or equivalent. For example consider a simple language with assignments:
 11 | 
 12 | ```
 13 | Assign: "ID" "=" Expr;
 14 | ```
 15 | 
 16 | Perhaps the "obvious" way to build this into an AST is to extract the string
 17 | representing the identifier as follows:
 18 | 
 19 | ```rust,noplaypen
 20 | Assign -> ASTAssign: "ID" "=" Expr
 21 |     {
 22 |         let id = $lexer.span_str($1.as_ref().unwrap().span()).to_string();
 23 |         ASTAssign::new(id, $3)
 24 |     }
 25 | 
 26 | %%
 27 | 
 28 | struct ASTAssign {
 29 |     id: String
 30 | }
 31 | 
 32 | impl ASTAssign {
 33 |     fn new(name: String) -> Self {
 34 |         ASTAssign { name }
 35 |     }
 36 | }
 37 | ```
 38 | 
 39 | This approach is easy to work with, but isn't as performant as may be desired:
 40 | the `to_string` call allocates memory and copies part of the user's input into
 41 | that. It also loses information about the part of the user's input that the
 42 | string relates to.
 43 | 
 44 | An alternative approach is not to convert the lexeme into a `String` during
 45 | parsing, but simply to return a
 46 | [`Span`](https://docs.rs/lrpar/~0/lrpar/struct.Span.html). An outline of this
 47 | is as follows:
 48 | 
 49 | ```rust,noplaypen
 50 | Assign -> ASTAssign: "ID" "=" Expr
 51 |     {
 52 |         ASTAssign { id: $1, expr: Box::new($3.span()) }
 53 |     }
 54 | 
 55 | %%
 56 | 
 57 | type StorageT = u32;
 58 | 
 59 | struct ASTAssign {
 60 |     id: Span
 61 |     expr: Box<Expr>
 62 | }
 63 | 
 64 | enum Expr { ... }
 65 | ```
 66 | 
 67 | If this is not quite what you want to do, you can use largely the same trick with
 68 | the [`Lexeme`](https://docs.rs/lrpar/~0/lrpar/lex/struct.Lexeme.html) `struct`.
 69 | Working with `Lexeme`s has the advantage that you can tell what the type of the
 70 | lexeme in question is, though generally this is entirely clear from AST
 71 | context, and `Lexeme`'s type parameter makes it marginally more fiddly to work
 72 | with than `Span`.
 73 | 
 74 | Alternatively, if you really want to extract strings during parsing, consider
 75 | using the `'input` to extract `&str`'s during parsing, since this does not
 76 | cause any additional memory to be allocated.
 77 | 
 78 | 
 79 | ## Have rules return a `Result` type
 80 | 
 81 | As described in the [error recovery
 82 | section](errorrecovery.html#a-rule-of-thumb-have-rules-return-a-result-type), it
 83 | is generally a good idea to give rules a `Result` return type as this allows
 84 | you to easily stop, or change, action code execution if you encounter
 85 | "important" inserted lexemes. There are many ways that you can use this, but
 86 | many simple cases work well using either:
 87 | 
 88 |   * `Err(())` works well if you are creating a parse tree and simply want to
 89 |     stop creating the tree when you encounter an important inserted lexeme.
 90 | 
 91 |   * `Err(Box<dyn Error>)` works well if you are performing more detailed
 92 |     evaluation while parsing and wish to explain to the user why you stopped
 93 |     evaluating when you encountered an important inserted lexeme.
 94 | 
 95 | 
 96 | ### Using `Err(())`
 97 | 
 98 | The idea here is that we stop evaluating normal action code by returning
 99 | `Err(())`. However, this can lead to endless instances of the following
100 | `map_err` idiom:
101 | 
102 | ```rust,noplaypen
103 | R -> Result<..., ()>:
104 |     "ID" { $1.map_err(|_| ())? }
105 |     ;
106 | ```
107 | 
108 | It can be helpful to define a custom `map_err` function which hides some of this
109 | mess for you:
110 | 
111 | ```rust,noplaypen
112 | R -> Result<Lexeme<StorageT>, ()>:
113 |     "ID" { map_err($1)? }
114 |     ;
115 | 
116 | %%
117 | 
118 | fn map_err(r: Result<Lexeme<StorageT>, Lexeme<StorageT>>)
119 |         -> Result<Lexeme<StorageT>, ()>
120 | {
121 |     r.map_err(|_| ())
122 | }
123 | ```
124 | 
125 | 
126 | ### Using `Err(Box<dyn Error>)`
127 | 
128 | The idea here is that we both stop evaluating normal action code, and explain
129 | why, by returning `Err(Box<dyn Error>)`. Although `Box<dyn Error>` is something
130 | of a mouthful, it allows you significant flexibility in *what* you return in
131 | error situations. If you want to quickly experiment, then this is convenient
132 | because the token type `Result<Lexeme<StorageT>, Lexeme<StorageT>>` can be
133 | automatically coerced to `Box<dyn Error>` (e.g. `$1?` in action code will
134 | return the `Err` variant without additional code). You can also return
135 | strings-as-errors with `Box::<dyn Error>::from("...")`.
136 | 
137 | Using this idiom we can change our calculator example to deal with many more
138 | possible sources of error:
139 | 
140 | ```rust,noplaypen
141 | 
142 | %start Expr
143 | %avoid_insert "INT"
144 | %%
145 | Expr -> Result<u64, Box<dyn Error>>:
146 |       Expr '+' Term
147 |       {
148 |           Ok($1?.checked_add($3?)
149 |               .ok_or(Box::<dyn Error>::from("Overflow detected."))?)
150 |       }
151 |     | Term { $1 }
152 |     ;
153 | 
154 | Term -> Result<u64, Box<dyn Error>>:
155 |       Term '*' Factor
156 |       {
157 |           Ok($1?.checked_mul($3?)
158 |               .ok_or(Box::<dyn Error>::from("Overflow detected."))?)
159 |       }
160 |     | Factor { $1 }
161 |     ;
162 | 
163 | Factor -> Result<u64, Box<dyn Error>>:
164 |       '(' Expr ')' { $2 }
165 |     | 'INT'
166 |       {
167 |           parse_int(
168 |               $lexer.span_str(
169 |                   $1.map_err(|_| "<evaluation aborted>")?.span()))
170 |       }
171 |     ;
172 | %%
173 | // Any imports here are in scope for all the grammar actions above.
174 | 
175 | use std::error::Error;
176 | 
177 | fn parse_int(s: &str) -> Result<u64, Box<dyn Error>> {
178 |     match s.parse::<u64>() {
179 |         Ok(val) => Ok(val),
180 |         Err(_) => {
181 |             Err(Box::from(
182 |                 format!("{} cannot be represented as a u64", s)))
183 |         }
184 |     }
185 | }
186 | ```
187 | 
188 | 
189 | ## Define a `flatten` function
190 | 
191 | Yacc grammars make specifying sequences of things something of a bore. A common
192 | idiom is thus:
193 | 
194 | ```rust,noplaypen
195 | ListOfAs -> Result<Vec<A>, ()>:
196 |       A { Ok(vec![$1?]) }
197 |     | ListOfAs A
198 |       {
199 |           let mut $1 = $1?;
200 |           $1.push($1?);
201 |           Ok($1)
202 |       }
203 |     ;
204 | 
205 | A -> Result<A, ()>: ... ;
206 | ```
207 | 
208 | Since this idiom is often present multiple times in a grammar, it's generally
209 | worth adding a `flatten` function to hide some of this:
210 | 
211 | ```rust,noplaypen
212 | ListOfAs -> Result<Vec<A>, ()>:
213 |       A { Ok(vec![$1?]) }
214 |     | ListOfAs A { flatten($1, $2) }
215 |     ;
216 | 
217 | A -> Result<A, ()>: ... ;
218 | %%
219 | 
220 | fn flatten<T>(lhs: Result<Vec<T>, ()>, rhs: Result<T, ()>)
221 |            -> Result<Vec<T>, ()>
222 | {
223 |     let mut flt = lhs?;
224 |     flt.push(rhs?);
225 |     Ok(flt)
226 | }
227 | ```
228 | 
229 | Note that `flatten` is generic with respect to `T` so that it can be used in
230 | multiple places in the grammar.
231 | 
232 | 
233 | ## Composing idioms
234 | 
235 | The above idioms compose well together. For example, `flatten`, `map_err`, and
236 | `Lexeme` can be used together as shown in the following example:
237 | 
238 | ```rust,noplaypen
239 | ListOfIds -> Result<Vec<Lexeme<StorageT>>, ()>:
240 |       "ID" { Ok(vec![map_err($1)?]) }
241 |     | ListOfIds "Id" { flatten($1, map_err($2)?) }
242 |     ;
243 | 
244 | %%
245 | 
246 | type StorageT = u32;
247 | 
248 | fn map_err(r: Result<Lexeme<StorageT>, Lexeme<StorageT>>)
249 |         -> Result<Lexeme<StorageT>, ()>
250 | {
251 |     r.map_err(|_| ())
252 | }
253 | 
254 | fn flatten<T>(lhs: Result<Vec<T>, ()>, rhs: Result<T, ()>)
255 |            -> Result<Vec<T>, ()>
256 | {
257 |     let mut flt = lhs?;
258 |     flt.push(rhs?);
259 |     Ok(flt)
260 | }
261 | ```
262 | 


--------------------------------------------------------------------------------
/lrpar/cttests/src/cgen_helper.rs:
--------------------------------------------------------------------------------
  1 | use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
  2 | use lrlex::CTLexerBuilder;
  3 | use lrpar::RecoveryKind;
  4 | use std::{
  5 |     env, fs,
  6 |     path::{Path, PathBuf},
  7 | };
  8 | use yaml_rust2::YamlLoader;
  9 | 
 10 | #[allow(dead_code)]
 11 | pub(crate) fn run_test_path<P: AsRef<Path>>(path: P) -> Result<(), Box<dyn std::error::Error>> {
 12 |     let out_dir = env::var("OUT_DIR").unwrap();
 13 |     if path.as_ref().is_file() {
 14 |         println!("cargo::rerun-if-changed={}", path.as_ref().display());
 15 |         // Parse test file
 16 |         let s = fs::read_to_string(path.as_ref()).unwrap();
 17 |         let docs = YamlLoader::load_from_str(&s).unwrap();
 18 |         let grm = &docs[0]["grammar"].as_str().unwrap();
 19 |         let lex = &docs[0]["lexer"].as_str().unwrap();
 20 |         let yacckind = match docs[0]["yacckind"].as_str() {
 21 |             Some("Original(YaccOriginalActionKind::NoAction)") => {
 22 |                 Some(YaccKind::Original(YaccOriginalActionKind::NoAction))
 23 |             }
 24 |             Some("Original(YaccOriginalActionKind::UserAction)") => {
 25 |                 Some(YaccKind::Original(YaccOriginalActionKind::UserAction))
 26 |             }
 27 |             Some("Grmtools") => Some(YaccKind::Grmtools),
 28 |             Some("Original(YaccOriginalActionKind::GenericParseTree)") => {
 29 |                 Some(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
 30 |             }
 31 |             Some(s) => panic!("YaccKind '{}' not supported", s),
 32 |             None => None,
 33 |         };
 34 |         let recoverer = match docs[0]["revoverer"].as_str() {
 35 |             Some("RecoveryKind::CPCTPlus") => Some(RecoveryKind::CPCTPlus),
 36 |             Some("RecoveryKind::None") => Some(RecoveryKind::None),
 37 |             _ => None,
 38 |         };
 39 |         let (negative_lex_flags, positive_lex_flags) = &docs[0]["lex_flags"]
 40 |             .as_vec()
 41 |             .map(|flags_vec| {
 42 |                 flags_vec
 43 |                     .iter()
 44 |                     .partition(|flag| flag.as_str().unwrap().starts_with('!'))
 45 |             })
 46 |             .unwrap_or_else(|| (Vec::new(), Vec::new()));
 47 |         let negative_lex_flags = negative_lex_flags
 48 |             .iter()
 49 |             .map(|flag| {
 50 |                 let flag = flag.as_str().unwrap();
 51 |                 flag.strip_prefix('!').unwrap()
 52 |             })
 53 |             .collect::<Vec<_>>();
 54 |         let positive_lex_flags = positive_lex_flags
 55 |             .iter()
 56 |             .map(|flag| flag.as_str().unwrap())
 57 |             .collect::<Vec<_>>();
 58 |         let lex_flags = (&positive_lex_flags, &negative_lex_flags);
 59 | 
 60 |         // The code below, in essence, replicates lrlex and lrpar's internal / undocumented
 61 |         // filename conventions. If those change, this code will also have to change.
 62 | 
 63 |         // Create grammar files
 64 |         let base = path.as_ref().file_stem().unwrap().to_str().unwrap();
 65 |         let mut pg = PathBuf::from(&out_dir);
 66 |         pg.push(format!("{}.test.y", base));
 67 |         fs::write(&pg, grm).unwrap();
 68 |         let mut pl = PathBuf::from(&out_dir);
 69 |         pl.push(format!("{}.test.l", base));
 70 |         fs::write(&pl, lex).unwrap();
 71 | 
 72 |         if let Some(extra_files) = docs[0]["extra_files"].as_hash() {
 73 |             for (filename, contents) in extra_files.iter() {
 74 |                 let mut out_file = PathBuf::from(&out_dir);
 75 |                 let filename = filename.as_str().unwrap();
 76 |                 out_file.push(filename);
 77 |                 let contents = contents.as_str().unwrap();
 78 |                 fs::write(&out_file, contents).unwrap();
 79 |             }
 80 |         }
 81 | 
 82 |         // Build parser and lexer
 83 |         let mut outl = PathBuf::from(&out_dir);
 84 |         outl.push(format!("{}.l.rs", base));
 85 |         outl.set_extension("rs");
 86 |         let mut cl_build = CTLexerBuilder::new()
 87 |             .lrpar_config(|mut cp_build| {
 88 |                 let mut outp = PathBuf::from(&out_dir);
 89 |                 outp.push(format!("{}.y.rs", base));
 90 |                 outp.set_extension("rs");
 91 |                 let (negative_yacc_flags, positive_yacc_flags) = &docs[0]["yacc_flags"]
 92 |                     .as_vec()
 93 |                     .map(|flags_vec| {
 94 |                         flags_vec
 95 |                             .iter()
 96 |                             .partition(|flag| flag.as_str().unwrap().starts_with('!'))
 97 |                     })
 98 |                     .unwrap_or_else(|| (Vec::new(), Vec::new()));
 99 |                 let positive_yacc_flags = positive_yacc_flags
100 |                     .iter()
101 |                     .map(|flag| flag.as_str().unwrap())
102 |                     .collect::<Vec<_>>();
103 |                 let negative_yacc_flags = negative_yacc_flags
104 |                     .iter()
105 |                     .map(|flag| {
106 |                         let flag = flag.as_str().unwrap();
107 |                         flag.strip_prefix('!').unwrap()
108 |                     })
109 |                     .collect::<Vec<_>>();
110 |                 let yacc_flags = (&positive_yacc_flags, &negative_yacc_flags);
111 |                 if let Some(yacckind) = yacckind {
112 |                     cp_build = cp_build.yacckind(yacckind);
113 |                 }
114 |                 if let Some(recoverer) = recoverer {
115 |                     cp_build = cp_build.recoverer(recoverer)
116 |                 }
117 |                 cp_build = cp_build
118 |                     .grammar_path(pg.to_str().unwrap())
119 |                     .output_path(&outp);
120 |                 if let Some(flag) = check_flag(yacc_flags, "error_on_conflicts") {
121 |                     cp_build = cp_build.error_on_conflicts(flag)
122 |                 }
123 |                 if let Some(flag) = check_flag(yacc_flags, "warnings_are_errors") {
124 |                     cp_build = cp_build.warnings_are_errors(flag)
125 |                 }
126 |                 if let Some(flag) = check_flag(yacc_flags, "show_warnings") {
127 |                     cp_build = cp_build.show_warnings(flag)
128 |                 };
129 |                 cp_build
130 |             })
131 |             .lexer_path(pl.to_str().unwrap())
132 |             .output_path(&outl);
133 |         if let Some(flag) = check_flag(lex_flags, "allow_missing_terms_in_lexer") {
134 |             cl_build = cl_build.allow_missing_terms_in_lexer(flag)
135 |         }
136 |         if let Some(flag) = check_flag(lex_flags, "allow_missing_tokens_in_parser") {
137 |             cl_build = cl_build.allow_missing_tokens_in_parser(flag)
138 |         }
139 |         if let Some(flag) = check_flag(lex_flags, "dot_matches_new_line") {
140 |             cl_build = cl_build.dot_matches_new_line(flag)
141 |         }
142 |         if let Some(flag) = check_flag(lex_flags, "case_insensitive") {
143 |             cl_build = cl_build.case_insensitive(flag)
144 |         }
145 |         if let Some(flag) = check_flag(lex_flags, "multi_line") {
146 |             cl_build = cl_build.multi_line(flag)
147 |         }
148 |         if let Some(flag) = check_flag(lex_flags, "swap_greed") {
149 |             cl_build = cl_build.swap_greed(flag)
150 |         }
151 |         if let Some(flag) = check_flag(lex_flags, "ignore_whitespace") {
152 |             cl_build = cl_build.ignore_whitespace(flag)
153 |         }
154 |         if let Some(flag) = check_flag(lex_flags, "unicode") {
155 |             cl_build = cl_build.unicode(flag)
156 |         }
157 |         if let Some(flag) = check_flag(lex_flags, "octal") {
158 |             cl_build = cl_build.octal(flag)
159 |         }
160 |         cl_build.build()?;
161 |     }
162 |     Ok(())
163 | }
164 | 
165 | fn check_flag((positive, negative): (&Vec<&str>, &Vec<&str>), flag: &str) -> Option<bool> {
166 |     assert_eq!(
167 |         positive.contains(&flag) | negative.contains(&flag),
168 |         positive.contains(&flag) ^ negative.contains(&flag)
169 |     );
170 |     if positive.contains(&flag) {
171 |         Some(true)
172 |     } else if negative.contains(&flag) {
173 |         Some(false)
174 |     } else {
175 |         None
176 |     }
177 | }
178 | 


--------------------------------------------------------------------------------