├── .watchmanconfig
├── .eslintignore
├── .gitignore
├── examples
    ├── example1.slr1
    ├── grammar.slr1
    ├── grammar.lr0
    ├── example.ll1
    ├── auto-tokens.g
    ├── binary.g
    ├── calc.slr1
    ├── calculator.g
    ├── and.hdl
    ├── calculator-assoc-conflict.g
    ├── test.letter
    ├── word-boundary.g
    ├── word-boundary.g.js
    ├── follow-follow-conflict.ll1
    ├── first-follow-conflict.ll1
    ├── calc.cpp.g
    ├── calc.jl.g
    ├── calc.py.g
    ├── calc.rb.g
    ├── calc.php.g
    ├── first-first-conflict.ll1
    ├── explicit-eof.g
    ├── calc.cs.g
    ├── calc.example.g
    ├── calc-eval.bnf
    ├── case-insensitive-lex.g
    ├── balanced-parens.slr1
    ├── s-expression.g
    ├── calc.ll1
    ├── module-include.bnf
    ├── calculator-assoc.g
    ├── calc.rs.g
    ├── calc.java.g
    ├── calc.cpp.ast.g
    ├── calc-eval.g
    ├── lambda-calculus.g
    ├── calc-loc.jl.bnf
    ├── bnf.bnf
    ├── on-token.bnf
    ├── calc-loc.py.bnf
    ├── calc-loc.php.bnf
    ├── boolean.bnf
    ├── calc-loc.rb.bnf
    ├── calc-ast.rs.g
    ├── hdl.g
    ├── grammar.clr1
    ├── module-include.rb.g
    ├── json.grammar.js
    ├── module-include.g.js
    ├── json.ast.js
    ├── s-expression.cpp.bnf
    ├── test.lang
    ├── calc-loc.cs.bnf
    ├── cnf.g
    ├── module-include.py.g
    ├── calc-ast-java.bnf
    ├── lexer-start-conditions.py.g
    ├── module-include.cs.g
    ├── lexer-start-conditions.rb.g
    ├── module-include.php.g
    ├── lexer-start-conditions.g.js
    ├── calc-loc.bnf
    ├── indent.g
    ├── lang.lex
    ├── parser-lexer-communication.g
    └── parser-lexer-communication.php.g
├── src
    ├── __tests__
    │   ├── rust-calc
    │   │   ├── .gitignore
    │   │   ├── Cargo.toml
    │   │   ├── calc-syntax
    │   │   │   ├── build.rs
    │   │   │   ├── Cargo.toml
    │   │   │   └── Makefile
    │   │   ├── calc-bin
    │   │   │   ├── Cargo.toml
    │   │   │   └── src
    │   │   │   │   └── main.rs
    │   │   └── Cargo.lock
    │   ├── rust-plugin-test.js
    │   └── code-unit-test.js
    ├── ll
    │   ├── __tests__
    │   │   ├── grammar1.bnf
    │   │   └── ll-parsing-table-test.js
    │   └── ll-parser-generator-default.js
    ├── special-symbols.js
    ├── grammar
    │   ├── __tests__
    │   │   ├── calc.bnf
    │   │   ├── calc.lex
    │   │   ├── calc.g
    │   │   ├── grammar-mode-test.js
    │   │   ├── grammar-symbol-test.js
    │   │   └── lex-grammar-test.js
    │   ├── grammar-mode.js
    │   └── grammar-symbol.js
    ├── table-printer.js
    ├── debug.js
    ├── syntax.js
    ├── plugins
    │   ├── python
    │   │   ├── ll
    │   │   │   └── ll-parser-generator-py.js
    │   │   ├── lr
    │   │   │   └── lr-parser-generator-py.js
    │   │   └── templates
    │   │   │   └── ll.template.py
    │   ├── php
    │   │   ├── ll
    │   │   │   └── ll-parser-generator-php.js
    │   │   ├── lr
    │   │   │   └── lr-parser-generator-php.js
    │   │   └── templates
    │   │   │   └── ll.template.php
    │   ├── ruby
    │   │   ├── ll
    │   │   │   └── ll-parser-generator-ruby.js
    │   │   ├── lr
    │   │   │   └── lr-parser-generator-ruby.js
    │   │   └── templates
    │   │   │   └── ll.template.rb
    │   ├── csharp
    │   │   └── lr
    │   │   │   └── lr-parser-generator-csharp.js
    │   ├── java
    │   │   └── lr
    │   │   │   └── lr-parser-generator-java.js
    │   ├── julia
    │   │   └── lr
    │   │   │   └── lr-parser-generator-julia.js
    │   ├── cpp
    │   │   └── lr
    │   │   │   └── lr-parser-generator-cpp.js
    │   ├── rust
    │   │   └── lr
    │   │   │   └── lr-parser-generator-rust.js
    │   └── example
    │   │   ├── ll
    │   │       └── ll-parser-generator-example.js
    │   │   └── lr
    │   │       └── lr-parser-generator-example.js
    ├── lr
    │   ├── __tests__
    │   │   ├── lr-parser-generator-test.js
    │   │   ├── lr-parsing-table-test.js
    │   │   └── state-test.js
    │   └── lr-parser-generator-default.js
    └── templates
    │   └── ll.template.js
├── .prettierignore
├── bin
    └── syntax
├── scripts
    ├── git-pre-push
    ├── git-pre-commit
    └── build.js
├── .npmignore
├── .prettierrc
├── index.js
├── .babelrc
├── .travis.yml
├── .eslintrc.json
├── LICENSE
└── package.json


/.watchmanconfig:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | src/generated/
2 | src/templates/
3 | src/plugins/
4 | dist/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /dist
2 | node_modules/
3 | .npm-debug.log
4 | npm-debug.log


--------------------------------------------------------------------------------
/examples/example1.slr1:
--------------------------------------------------------------------------------
1 | %%
2 | 
3 | E -> "1" E
4 |    | "1"
5 |    ;
6 | 


--------------------------------------------------------------------------------
/examples/grammar.slr1:
--------------------------------------------------------------------------------
1 | %%
2 | 
3 | S -> S "a"
4 |    | "b"
5 |    ;
6 | 


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /calc-syntax/src/lib.rs


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | src/generated/
2 | src/templates/
3 | src/plugins/
4 | dist/


--------------------------------------------------------------------------------
/examples/grammar.lr0:
--------------------------------------------------------------------------------
1 | %%
2 | 
3 | S -> A A;
4 | 
5 | A -> "a" A
6 |    | "b"
7 |    ;


--------------------------------------------------------------------------------
/bin/syntax:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | 
3 | 'use strict';
4 | 
5 | require('../dist/bin/syntax')();


--------------------------------------------------------------------------------
/scripts/git-pre-push:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # Pre-commit validaitons:
4 | 
5 | npm test
6 | 
7 | npm run eslint


--------------------------------------------------------------------------------
/examples/example.ll1:
--------------------------------------------------------------------------------
 1 | %%
 2 | 
 3 | S
 4 |   : F
 5 |   | '(' S '+' F ')'
 6 |   ;
 7 | 
 8 | F
 9 |   : 'id'
10 |   ;


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | 
3 | members = [
4 |   "calc-bin",
5 |   "calc-syntax",
6 | ]
7 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | /examples/
2 | /scripts/
3 | /src/
4 | .gitignore
5 | .eslintrc
6 | .babelrc
7 | .travis.yml
8 | .module-cache
9 | __tests__


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "singleQuote": true,
3 |   "semi": true,
4 |   "useTabs": false,
5 |   "tabWidth": 2,
6 |   "trailingComma": "es5",
7 |   "bracketSpacing": false
8 | }


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * The MIT License (MIT)
3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
4 |  */
5 | 
6 | module.exports = require('./dist/syntax');


--------------------------------------------------------------------------------
/src/ll/__tests__/grammar1.bnf:
--------------------------------------------------------------------------------
 1 | // https://github.com/DmitrySoshnikov/syntax/issues/151
 2 | 
 3 | %%
 4 | 
 5 | S
 6 |   : A
 7 |   ;
 8 | A
 9 |   : 'a'
10 |   | /* empty */
11 |   ;
12 | 


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/calc-syntax/build.rs:
--------------------------------------------------------------------------------
1 | use std::process::Command;
2 | 
3 | fn main() {
4 |     Command::new("make")
5 |         .status()
6 |         .unwrap();
7 |     println!("act-file parser lib successfully generated");
8 | }


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/calc-syntax/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "calc-syntax"
 3 | version = "0.1.0"
 4 | authors = ["Andrey Rublev <box@anru.me>"]
 5 | edition = "2018"
 6 | build = "build.rs"
 7 | 
 8 | [dependencies]
 9 | onig = "4"
10 | lazy_static = "1"


--------------------------------------------------------------------------------
/examples/auto-tokens.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tokens `PLUS` and `ZERO` automatically infered.
 3 |  *
 4 |  *   ./bin/syntax --grammar examples/auto-tokens.g --mode slr1 --table
 5 |  */
 6 | 
 7 | %%
 8 | 
 9 | E -> E PLUS T
10 |    | T
11 |    ;
12 | 
13 | T -> ZERO
14 |    ;


--------------------------------------------------------------------------------
/examples/binary.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Binary numbers.
 3 |  *
 4 |  * Example:
 5 |  *
 6 |  *   ./bin/syntax -g examples/binary.g -p '101001101' -t -m slr1
 7 |  */
 8 | 
 9 | %%
10 | 
11 | N -> L;
12 | 
13 | L -> L B
14 |    | B
15 |    ;
16 | 
17 | B -> '1'
18 |    | '0'
19 |    ;


--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "presets": [
 3 |     [
 4 |       "@babel/preset-env",
 5 |       {
 6 |         "targets": {
 7 |           "node": "0.12"
 8 |         }
 9 |       }
10 |     ],
11 |   ],
12 |   "plugins": [
13 |     "@babel/plugin-transform-object-rest-spread"
14 |   ]
15 | }


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: node_js
 3 | node_js:
 4 |   - "8.9.4"
 5 |   - "10.15.3"
 6 | before_install:
 7 |   - curl https://sh.rustup.rs -sSf | sh -s -- -y
 8 |   - source $HOME/.cargo/env
 9 | cache:
10 |   cargo: true
11 |   directories:
12 |     - node_modules


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/calc-bin/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "calc-bin"
3 | version = "0.1.0"
4 | authors = ["Andrey Rublev <box@anru.me>", "DmitrySoshnikov <dmitry.soshnikov@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | calc-syntax = { path = "../calc-syntax" }


--------------------------------------------------------------------------------
/examples/calc.slr1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * An LR(1) grammar with precedence, and assocs.
 3 |  *
 4 |  *   ./bin/syntax -g examples/calc.slr1 --table --parse 'id + id * id' -w
 5 |  *
 6 |  */
 7 | 
 8 | %left '+' '-'
 9 | %left '*' '/'
10 | 
11 | %%
12 | 
13 | E
14 |   : E '+' E
15 |   | E '*' E
16 |   | 'id'
17 |   | '(' E ')'
18 |   ;
19 | 


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/calc-bin/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate calc_syntax;
 2 | 
 3 | use calc_syntax::Parser;
 4 | 
 5 | fn main() {
 6 |     let mut parser = Parser::new();
 7 | 
 8 |     let parse_string = String::from("2 + 2 * 2");
 9 |     let result = parser.parse(&parse_string);
10 | 
11 |     println!("parse result: {}", result);
12 | }
13 | 


--------------------------------------------------------------------------------
/src/special-symbols.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | /**
 7 |  * Special "empty" symbol, Epsilon.
 8 |  */
 9 | export const EPSILON = 'ε';
10 | 
11 | /**
12 |  * End of input, and bottom of the stack, "Dollar".
13 |  */
14 | export const EOF = '$';
15 | 


--------------------------------------------------------------------------------
/examples/calculator.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Example:
 3 |  *
 4 |  *   ./bin/syntax \
 5 |  *     --grammar examples/calculator.g \
 6 |  *     --mode slr1
 7 |  *     --parse '(id + id) * id'
 8 |  *     --ignore-whitespaces
 9 |  */
10 | 
11 | %%
12 | 
13 | E -> E '+' T
14 |    | T
15 |    ;
16 | 
17 | T -> T '*' F
18 |    | F
19 |    ;
20 | 
21 | F -> 'id'
22 |    | '(' E ')'
23 |    ;


--------------------------------------------------------------------------------
/src/grammar/__tests__/calc.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * An LR(1) grammar with precedence, and assocs.
 3 |  */
 4 | 
 5 | %{
 6 |   (() => 'module include code')();
 7 | %}
 8 | 
 9 | %left '+' '-'
10 | %left '*' '/'
11 | 
12 | %%
13 | 
14 | E
15 |   : E '+' E   { $$ = ['+', $1, $2] }
16 |   | E '*' E   { $$ = ['*', $1, $2] }
17 |   | 'id'      { $$ = $1 }
18 |   | '(' E ')' { $$ = $2 }
19 |   ;
20 | 


--------------------------------------------------------------------------------
/scripts/git-pre-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Prettier
 4 | 
 5 | jsfiles=$(git diff HEAD --name-only --diff-filter=ACM "*.js" | tr '\n' ' ')
 6 | 
 7 | if [ ! -z "$jsfiles" ]
 8 | then
 9 |   # Prettify all staged .js files
10 |   echo "$jsfiles" | xargs ./node_modules/.bin/prettier --write
11 | 
12 |   # Add back the modified/prettified files to staging
13 |   echo "$jsfiles" | xargs git add
14 | fi
15 | 
16 | # Pre-commit validaitons:
17 | 
18 | npm test
19 | 
20 | npm run eslint


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/calc-syntax/Makefile:
--------------------------------------------------------------------------------
 1 | rust_plugin_sources := $(wildcard ../../../plugins/rust/*.js) \
 2 |                $(wildcard ../../../plugins/rust/lr/*.js) \
 3 |                $(wildcard ../../../plugins/rust/templates/*.rs)
 4 | 
 5 | src/lib.rs: ../../../../examples/calc.rs.g ../../../../dist/bin/syntax.js $(rust_plugin_sources)
 6 | 	mkdir -p $(@D)
 7 | 	../../../../bin/syntax -g $< -m LALR1 -o $@
 8 | 
 9 | ../../../../dist/bin/syntax.js: $(rust_plugin_sources)
10 | 	npm run build


--------------------------------------------------------------------------------
/examples/and.hdl:
--------------------------------------------------------------------------------
 1 | // This file is part of www.nand2tetris.org
 2 | // and the book "The Elements of Computing Systems"
 3 | // by Nisan and Schocken, MIT Press.
 4 | // File name: projects/01/And.hdl
 5 | 
 6 | /**
 7 |  * And gate:
 8 |  * out = 1 if (a == 1 and b == 1)
 9 |  *       0 otherwise
10 |  */
11 | 
12 | CHIP And {
13 |     IN a, b;
14 |     OUT out;
15 | 
16 |     PARTS:
17 |     // Put your code here:
18 |     Nand(a=a, b=b, out=n);
19 |     Nand(a=n, b=n, out=out);
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/calculator-assoc-conflict.g:
--------------------------------------------------------------------------------
 1 | %%
 2 | 
 3 | /**
 4 |  * This grammar has "shift-reduce" conflicts. See how to resolve them using
 5 |  * operators precedence in the `./examples/calculator-assoc.g`.
 6 |  *
 7 |  * Also automatic conflicts resolution is possible (see `--resolve-conflicts`
 8 |  * flag), however it may not always help, and a more correct way is to specify
 9 |  * precedence and associativity, or to rewrite grammar.
10 |  */
11 | 
12 | E
13 |   : E '+' E
14 |   | E '*' E
15 |   | 'id'
16 |   ;
17 | 


--------------------------------------------------------------------------------
/examples/test.letter:
--------------------------------------------------------------------------------
 1 | 
 2 | class Point {
 3 |     def constructor(x, y) {
 4 |         this.x = x;
 5 |         this.y = y;
 6 |     }
 7 | 
 8 |     def getX() {
 9 |         return this.x;
10 |     }
11 | 
12 |     def getY() {
13 |         return this.y;
14 |     }
15 | }
16 | 
17 | class Point3D extends Point {
18 |     def constructor(x, y, z) {
19 |         super(x, y);
20 |         this.z = z;
21 |     }
22 | 
23 |     def getZ() {
24 |         return this.z;
25 |     }
26 | }
27 | 
28 | let p = new Point3D(10, 20, 30);


--------------------------------------------------------------------------------
/examples/word-boundary.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Word boundary example: `if` keyword vs. `ifi` identifier.
 3 |  *
 4 |  * ./bin/syntax -g examples/word-boundary.g -m lalr1 -p 'if'
 5 |  *   > id-keyword
 6 |  *
 7 |  * ./bin/syntax -g examples/word-boundary.g -m lalr1 -p 'ifi'
 8 |  *   > identifier
 9 |  */
10 | 
11 | %lex
12 | 
13 | %%
14 | 
15 | 'if'\b          return 'IF'
16 | \w+             return 'ID'
17 | 
18 | /lex
19 | 
20 | %%
21 | 
22 | Program
23 |   : IF { $$ = 'if-keyword' }
24 |   | ID { $$ = 'identifier' }
25 |   ;
26 | 


--------------------------------------------------------------------------------
/src/table-printer.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import Table from 'cli-table3';
 7 | 
 8 | /**
 9 |  * Wrapper class over `cli-table3` with default options preset.
10 |  */
11 | export default class TablePrinter {
12 |   constructor(options) {
13 |     return new Table(
14 |       Object.assign({}, options, {
15 |         style: {
16 |           head: ['blue'],
17 |           border: ['gray'],
18 |         },
19 |       })
20 |     );
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/word-boundary.g.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Word boundary example: `if` keyword vs. `ifi` identifier.
 3 |  *
 4 |  * ./bin/syntax -g examples/word-boundary.g.js -m lalr1 -p 'if'
 5 |  *   > id-keyword
 6 |  *
 7 |  * ./bin/syntax -g examples/word-boundary.g.js -m lalr1 -p 'ifi'
 8 |  *   > identifier
 9 |  */
10 | 
11 | {
12 |   lex: {
13 |     rules: [
14 |       ["if\\b", "return 'IF'"],
15 |       ["\\w+",  "return 'ID'"]
16 |     ]
17 |   },
18 | 
19 |   "bnf": {
20 |     "Program": [["IF", " $$ = 'if-keyword' "],
21 |                 ["ID", " $$ = 'identifier' "]],
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/follow-follow-conflict.ll1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * LL(1): FOLLOW/FOLLOW conflict (rare case)
 3 |  *
 4 |  * Since `a` symbol is both in the FOLLOW of `B` and `C`, on `(A, a)`,
 5 |  * there will be a conflict between `A → B` and `A → C`.
 6 |  *
 7 |  *  ┌───┬─────┬───┐
 8 |  *  │   │ 'a' │ $ │
 9 |  *  ├───┼─────┼───┤
10 |  *  │ S │ 1   │   │
11 |  *  ├───┼─────┼───┤
12 |  *  │ A │ 2/3 │   │
13 |  *  ├───┼─────┼───┤
14 |  *  │ B │ 4   │   │
15 |  *  ├───┼─────┼───┤
16 |  *  │ C │ 5   │   │
17 |  *  └───┴─────┴───┘
18 |  */
19 | 
20 | %%
21 | 
22 | S:
23 |   A 'a'
24 |   ;
25 | 
26 | A
27 |   : B
28 |   | C
29 |   ;
30 | 
31 | B
32 |   : /* ε */
33 |   ;
34 | 
35 | C
36 |   : /* ε */
37 |   ;
38 | 


--------------------------------------------------------------------------------
/examples/first-follow-conflict.ll1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * LL(1): FIRST/FOLLOW conflict
 3 |  *
 4 |  * The FIRST and FOLLOW set of a grammar rule overlap. With an empty string (ε)
 5 |  * in the FIRST set it is unknown which alternative to select.
 6 |  *
 7 |  * The FIRST set of A now is {'a', ε} and the FOLLOW set {'a'}.
 8 |  *
 9 |  * ./bin/syntax -g examples/first-follow-conflict.ll1 -t
10 |  *
11 |  * ┌───┬─────┬─────┬───┐
12 |  * │   │ 'a' │ 'b' │ $ │
13 |  * ├───┼─────┼─────┼───┤
14 |  * │ S │ 1   │     │   │
15 |  * ├───┼─────┼─────┼───┤
16 |  * │ A │ 2/3 │     │   │
17 |  * └───┴─────┴─────┴───┘
18 |  */
19 | 
20 | %%
21 | 
22 | S
23 |    : A 'a' 'b'
24 |    ;
25 | 
26 | A
27 |    : 'a'
28 |    | /* epsilon */
29 |    ;


--------------------------------------------------------------------------------
/examples/calc.cpp.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in C++.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.cpp.g -m lalr1 -o CalcParser.h
 5 |  *
 6 |  *   #include "CalcParser.h"
 7 |  *
 8 |  *   CalcParser parser;
 9 |  *
10 |  *   std::cout << parser.parse("2 + 2 * 2"); // 6
11 |  */
12 | 
13 | %lex
14 | 
15 | %%
16 | 
17 | \s+    %empty
18 | 
19 | \d+    NUMBER
20 | 
21 | /lex
22 | 
23 | %{
24 | 
25 | // Type of the parsing value. Can either
26 | // be a type alias or an actual struct:
27 | 
28 | using Value = int;
29 | 
30 | %}
31 | 
32 | %left '+'
33 | %left '*'
34 | 
35 | %%
36 | 
37 | E
38 |   : E '+' E   { $$ = $1 + $3 }
39 |   | E '*' E   { $$ = $1 * $3 }
40 |   | '(' E ')' { $$ = $2 }
41 |   | NUMBER    { $$ = std::stoi($1) }
42 |   ;


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": {
 3 |     "es6": true,
 4 |     "node": true,
 5 |     "jest": true
 6 |   },
 7 |   "extends": "eslint:recommended",
 8 |   "parserOptions": {
 9 |     "sourceType": "module"
10 |   },
11 |   "rules": {
12 |     "indent": 0,
13 |     "linebreak-style": [
14 |       "error",
15 |       "unix"
16 |     ],
17 |     "quotes": [
18 |       "error",
19 |       "single",
20 |       {
21 |         "allowTemplateLiterals": true,
22 |         "avoidEscape": true
23 |       }
24 |     ],
25 |     "semi": [
26 |       "error",
27 |       "always"
28 |     ],
29 |     "no-useless-escape": 0,
30 |     "no-prototype-builtins": 0,
31 |     "no-console": ["error", { "allow": ["warn", "error", "info", "timeEnd", "time"] }]
32 |   }
33 | }


--------------------------------------------------------------------------------
/examples/calc.jl.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Julia language
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.jl.g -m lalr1 -o CalcParser.jl
 5 |  *
 6 |  */
 7 | 
 8 | {
 9 |   "lex": {
10 |     "rules": [
11 |       ["\\s+",  '# skip whitespace'],
12 |       ["\\d+",  'return "NUMBER"'],
13 |       ["\\*",   'return "*"'],
14 |       ["\\+",   'return "+"'],
15 |       ["\\(",   'return "("'],
16 |       ["\\)",   'return ")"'],
17 |     ]
18 |   },
19 | 
20 |   "operators": [
21 |     ["left", "+"],
22 |     ["left", "*"],
23 |   ],
24 | 
25 |   "bnf": {
26 |     "E": [
27 |       ["E + E",  "$$ = $1 + $3"],
28 |       ["E * E",  "$$ = $1 * $3"],
29 |       ["NUMBER", "$$ = tryparse(Int, $1)"],
30 |       ["( E )",  "$$ = $2"],
31 |     ],
32 |   },
33 | }


--------------------------------------------------------------------------------
/examples/calc.py.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Python.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.py.g -m lalr1 -o calcparser.py
 5 |  *
 6 |  * >>> import calcparser
 7 |  * >>> calcparser.parse('2 + 2 * 2')
 8 |  * >>> 6
 9 |  */
10 | 
11 | {
12 |   "lex": {
13 |     "rules": [
14 |       ["\\s+",  "# skip whitespace"],
15 |       ["\\d+",  "return 'NUMBER'"],
16 |       ["\\*",   "return '*'"],
17 |       ["\\+",   "return '+'"],
18 |       ["\\(",   "return '('"],
19 |       ["\\)",   "return ')'"],
20 |     ]
21 |   },
22 | 
23 |   "operators": [
24 |     ["left", "+"],
25 |     ["left", "*"],
26 |   ],
27 | 
28 |   "bnf": {
29 |     "E": [
30 |       ["E + E",  "$$ = $1 + $3"],
31 |       ["E * E",  "$$ = $1 * $3"],
32 |       ["NUMBER", "$$ = int($1)"],
33 |       ["( E )",  "$$ = $2"],
34 |     ],
35 |   },
36 | }


--------------------------------------------------------------------------------
/examples/calc.rb.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Ruby.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.rb.g -m lalr1 -o CalcParser.rb
 5 |  *
 6 |  *   require('CalcParser.rb')
 7 |  *
 8 |  *   puts CalcParser.parse('2 + 2 * 2') # 6
 9 |  */
10 | 
11 | {
12 |   "lex": {
13 |     "rules": [
14 |       ["\\s+",  "# skip whitespace"],
15 |       ["\\d+",  "return 'NUMBER'"],
16 |       ["\\*",   "return '*'"],
17 |       ["\\+",   "return '+'"],
18 |       ["\\(",   "return '('"],
19 |       ["\\)",   "return ')'"],
20 |     ]
21 |   },
22 | 
23 |   "operators": [
24 |     ["left", "+"],
25 |     ["left", "*"],
26 |   ],
27 | 
28 |   "bnf": {
29 |     "E": [
30 |       ["E + E",  "$$ = $1 + $3"],
31 |       ["E * E",  "$$ = $1 * $3"],
32 |       ["NUMBER", "$$ = $1.to_i"],
33 |       ["( E )",  "$$ = $2"],
34 |     ],
35 |   },
36 | }


--------------------------------------------------------------------------------
/src/debug.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import colors from 'colors';
 7 | 
 8 | function emptyFn() {}
 9 | 
10 | /**
11 |  * Debug module.
12 |  */
13 | const Debug = {
14 |   isEnabled() {
15 |     return !!global.SYNTAX_DEBUG;
16 |   },
17 | 
18 |   string(message) {
19 |     return `${colors.bold('[DEBUG]')} ${message}`;
20 |   },
21 | 
22 |   log(message) {
23 |     console.info(Debug.string(message));
24 |   },
25 | 
26 |   time(label) {
27 |     console.time(this.string(label));
28 |   },
29 | 
30 |   timeEnd(label) {
31 |     console.timeEnd(this.string(label));
32 |   },
33 | };
34 | 
35 | if (!global.SYNTAX_DEBUG) {
36 |   Object.keys(Debug).forEach(method => Debug[method] = emptyFn);
37 | }
38 | 
39 | export default Debug;
40 | 


--------------------------------------------------------------------------------
/examples/calc.php.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in PHP.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.php.g -m lalr1 -o CalcParser.php
 5 |  *
 6 |  * <?php
 7 |  *
 8 |  *   require('CalcParser.php');
 9 |  *
10 |  *   var_dump(CalcParser::parse('2 + 2 * 2')); // int(6)
11 |  */
12 | 
13 | {
14 |   "lex": {
15 |     "rules": [
16 |       ["\\s+",  "/* skip whitespace */"],
17 |       ["\\d+",  "return 'NUMBER'"],
18 |       ["\\*",   "return '*'"],
19 |       ["\\+",   "return '+'"],
20 |       ["\\(",   "return '('"],
21 |       ["\\)",   "return ')'"],
22 |     ]
23 |   },
24 | 
25 |   "operators": [
26 |     ["left", "+"],
27 |     ["left", "*"],
28 |   ],
29 | 
30 |   "bnf": {
31 |     "E": [
32 |       ["E + E",  "$$ = $1 + $3"],
33 |       ["E * E",  "$$ = $1 * $3"],
34 |       ["NUMBER", "$$ = intval($1)"],
35 |       ["( E )",  "$$ = $2"],
36 |     ],
37 |   },
38 | }


--------------------------------------------------------------------------------
/examples/first-first-conflict.ll1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * LL(1): FIRST/FIRST conflict
 3 |  *
 4 |  * The FIRST sets of two different grammar rules for the same non-terminal
 5 |  * intersect.
 6 |  *
 7 |  * FIRST(E) = {'b', ε} and FIRST(E 'a') = {'b', 'a'}, so when the table is
 8 |  * drawn, there is conflict under terminal 'b' of production rule S.
 9 |  *
10 |  * ./bin/syntax -g examples/first-first-conflict.ll1 -t
11 |  *
12 |  * ┌───┬─────┬─────┬───┐
13 |  * │   │ 'a' │ 'b' │ $ │
14 |  * ├───┼─────┼─────┼───┤
15 |  * │ S │ 2   │ 1/2 │   │
16 |  * ├───┼─────┼─────┼───┤
17 |  * │ E │ 4   │ 3   │ 4 │
18 |  * └───┴─────┴─────┴───┘
19 |  *
20 |  * Note: Left Recursion is a special type of the FIRST/FIRST conflict, which
21 |  * is caused by all the alternative rules:
22 |  *
23 |  * E : E '+' term | alt1 | alt2
24 |  */
25 | 
26 | %%
27 | 
28 | S
29 |    : E
30 |    | E 'a'
31 |    ;
32 | 
33 | E
34 |    : 'b'
35 |    | /* epsilon */
36 |    ;


--------------------------------------------------------------------------------
/examples/explicit-eof.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Explicit <<EOF>> handling.
 3 |  *
 4 |  * By default Syntax handles end of file with special EOF token, which is
 5 |  *
 6 |  *   {type: '$', value: ''}
 7 |  *
 8 |  * However, a grammar may want to handle EOF explicitly in case it uses
 9 |  * EOF explicitly in some rules. In this case a lexical rule should match
10 |  * special `<<EOF>>` regexp, which corresponds to the empty string
11 |  * at the end of the parsing string, i.e. `/^$/`. The type of the token
12 |  * can be returned any in this case.
13 |  *
14 |  * ./bin/syntax -g examples/explicit-eof.g -m slr1 -p '10'
15 |  */
16 | 
17 | {
18 |   lex: {
19 |     rules: [
20 |       [`\\d+`,            `return "NUMBER"`],
21 |       [`<<EOF>>`,         `return "EOF"`],
22 |     ],
23 |   },
24 | 
25 |   bnf: {
26 |     // The whole string consists only of one number (followed by EOF).
27 |     Main: [[`NUMBER EOF`,  `$$ = $1`]],
28 |   },
29 | }


--------------------------------------------------------------------------------
/src/grammar/__tests__/calc.lex:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * An testing lexical grammar.
 3 |  */
 4 | 
 5 | module.exports = {
 6 |   "macros": {
 7 |     "id": "[a-zA-Z0-9_]",
 8 |   },
 9 | 
10 |   "startConditions": {
11 |     "comment": 1, // exclusive
12 |   },
13 | 
14 |   "rules": [
15 |     [["*"],       "\\s+", "/*skip whitespace*/"],
16 | 
17 |     [["*"],       "<<EOF>>", "return 'EOF'"],
18 | 
19 |     ["\\d+",      "return 'NUMBER'"],
20 |     ["{id}+",     "return 'IDENTIFIER'"],
21 |     ["\\(",       "return '('"],
22 |     ["\\)",       "return ')'"],
23 |     ["\\+",       "return '+'"],
24 |     ["\\*",       "return '*'"],
25 | 
26 |     ["\\/\\*",    "this.pushState('comment');"],
27 |     [["comment"], "\\*+\\/", "this.popState();"],
28 |     [["comment"], "\\d+", "return 'NUMBER_IN_COMMENT'"],
29 |     [["comment"], "{id}+", "return 'IDENTIFIER_IN_COMMENT'"],
30 |   ],
31 | 
32 |   "options": {
33 |     "case-insensitive": true,
34 |   },
35 | };


--------------------------------------------------------------------------------
/examples/calc.cs.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in C#.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.cs.g -m lalr1 -o CalcParser.cs
 5 |  *
 6 |  * using SyntaxParser;
 7 |  *
 8 |  * var parser = new CalcParser();
 9 |  *
10 |  * Console.WriteLine(parser.parse("2 + 2 * 2"));   // 6
11 |  * Console.WriteLine(parser.parse("(2 + 2) * 2")); // 8
12 |  */
13 | 
14 | {
15 |   "lex": {
16 |     "rules": [
17 |       ["\\s+",  '/* skip whitespace */ return null'],
18 |       ["\\d+",  'return "NUMBER"'],
19 |       ["\\*",   'return "*"'],
20 |       ["\\+",   'return "+"'],
21 |       ["\\(",   'return "("'],
22 |       ["\\)",   'return ")"'],
23 |     ]
24 |   },
25 | 
26 |   "operators": [
27 |     ["left", "+"],
28 |     ["left", "*"],
29 |   ],
30 | 
31 |   "bnf": {
32 |     "E": [
33 |       ["E + E",  "$$ = $1 + $3"],
34 |       ["E * E",  "$$ = $1 * $3"],
35 |       ["NUMBER", "$$ = Convert.ToInt32($1)"],
36 |       ["( E )",  "$$ = $2"],
37 |     ],
38 |   },
39 | }


--------------------------------------------------------------------------------
/examples/calc.example.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Example language (actual JS, used in plugins example).
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.example.g -m lalr1 -o CalcParser.example
 5 |  *
 6 |  * const CalcParser = require('CalcParser.example');
 7 |  *
 8 |  * const parser = new CalcParser();
 9 |  * console.log(parser.parse("2 + 2 * 2")); // 6
10 |  */
11 | 
12 | {
13 |   "lex": {
14 |     "rules": [
15 |       ["\\s+",  '/* skip whitespace */'],
16 |       ["\\d+",  'return "NUMBER"'],
17 |       ["\\*",   'return "*"'],
18 |       ["\\+",   'return "+"'],
19 |       ["\\(",   'return "("'],
20 |       ["\\)",   'return ")"'],
21 |     ]
22 |   },
23 | 
24 |   "operators": [
25 |     ["left", "+"],
26 |     ["left", "*"],
27 |   ],
28 | 
29 |   "bnf": {
30 |     "E": [
31 |       ["E + E",  "$$ = $1 + $3"],
32 |       ["E * E",  "$$ = $1 * $3"],
33 |       ["NUMBER", "$$ = Number($1)"],
34 |       ["( E )",  "$$ = $2"],
35 |     ],
36 |   },
37 | }


--------------------------------------------------------------------------------
/examples/calc-eval.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Precedence and assoc in Yacc format.
 3 |  *
 4 |  * See also in JSON grammar:
 5 |  *
 6 |  * https://github.com/DmitrySoshnikov/syntax/blob/master/examples/calc-eval.g
 7 |  *
 8 |  * To run this grammar:
 9 |  *
10 |  *   ./bin/syntax -g examples/calc-eval.bnf -w -m slr1 -p '5 + 5 * 5'
11 |  *
12 |  *   > 30
13 |  *
14 |  *   ./bin/syntax -g examples/calc-eval.bnf -w -m slr1 -p '5 + 5 * -5'
15 |  *
16 |  *   > -20
17 |  */
18 | 
19 | %lex
20 | 
21 | %%
22 | 
23 | \s+             /* skip whitespace */
24 | \d+             return 'NUMBER'
25 | 
26 | /lex
27 | 
28 | %left '+' '-'
29 | %left '*' '/'
30 | %right '^'
31 | %left UMINUS
32 | 
33 | %%
34 | 
35 | E
36 |   : E '+' E     { $$ = $1 + $3 }
37 |   | E '-' E     { $$ = $1 - $3 }
38 |   | E '*' E     { $$ = $1 * $3 }
39 |   | E '/' E     { $$ = $1 / $3 }
40 |   | E '^' E     { $$ = Math.pow($1, $3) }
41 |   | '-' E       %prec UMINUS { $$ = -$2 }
42 |   | '(' E ')'   { $$ = $2 }
43 |   | NUMBER      { $$ = Number($1) }
44 |   ;


--------------------------------------------------------------------------------
/examples/case-insensitive-lex.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Case-insensitive lexical rules.
 3 |  *
 4 |  * Examples (accepted):
 5 |  *
 6 |  *   ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'x'
 7 |  *   ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'X'
 8 |  *   ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'y'
 9 |  *
10 |  *   ✓ Accepted
11 |  *
12 |  *
13 |  * Example (fail, "Y" is not case-insensitive):
14 |  *
15 |  *   ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'Y'
16 |  *
17 |  *   Rejected: Unexpected token: "Y" at 1:0.
18 |  */
19 | {
20 |   "lex": {
21 |     "rules": [
22 | 
23 |       // This rule is by default case-insensitive:
24 | 
25 |       [`x`, `return "X"`],
26 | 
27 |       // This rule overrides global options:
28 | 
29 |       [`y`, `return "Y"`, {"case-insensitive": false}],
30 |     ],
31 | 
32 |     // Global options for the whole lexical grammar.
33 | 
34 |     "options": {
35 |       "case-insensitive": true,
36 |     }
37 |   },
38 | 
39 |   "bnf": {
40 |     "E": ["X", "Y"],
41 |   }
42 | }


--------------------------------------------------------------------------------
/examples/balanced-parens.slr1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * How to run:
 3 |  *
 4 |  *   ./bin/syntax \
 5 |  *     --grammar examples/balanced-parens.slr1 \
 6 |  *     --table \
 7 |  *     --parse '((()))'
 8 |  *
 9 |  * Result:
10 |  *
11 |  *   Parsing mode: SLR1.
12 |  *
13 |  *   Grammar:
14 |  *
15 |  *       0. S' -> S
16 |  *       -----------
17 |  *       1. S -> '(' S ')'
18 |  *       2.    | ε
19 |  *
20 |  *   SLR(1) parsing table:
21 |  *
22 |  *   ┌───┬─────┬─────┬─────┬───┐
23 |  *   │   │ '(' │ ')' │ $   │ S │
24 |  *   ├───┼─────┼─────┼─────┼───┤
25 |  *   │ 0 │ s1  │ r2  │ r2  │ 3 │
26 |  *   ├───┼─────┼─────┼─────┼───┤
27 |  *   │ 1 │ s1  │ r2  │ r2  │ 2 │
28 |  *   ├───┼─────┼─────┼─────┼───┤
29 |  *   │ 2 │     │ s4  │     │   │
30 |  *   ├───┼─────┼─────┼─────┼───┤
31 |  *   │ 3 │     │     │ acc │   │
32 |  *   ├───┼─────┼─────┼─────┼───┤
33 |  *   │ 4 │     │ r1  │ r1  │   │
34 |  *   └───┴─────┴─────┴─────┴───┘
35 |  *
36 |  *   Parsing: ((()))
37 |  *
38 |  *   Accepted.
39 |  */
40 | 
41 | %%
42 | 
43 | S -> '(' S ')'
44 |    | /* epsilon */
45 |    ;


--------------------------------------------------------------------------------
/examples/s-expression.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * How to run:
 3 |  *
 4 |  *   ./bin/syntax \
 5 |  *     --grammar examples/s-expression.g \
 6 |  *     --mode slr1 \
 7 |  *     --parse '(+ 1 15)'
 8 |  *
 9 |  *   > ['+', 1, 15]
10 |  *
11 |  * See also recursive descent version for this grammar:
12 |  * https://gist.github.com/DmitrySoshnikov/2a434dda67019a4a7c37
13 |  */
14 | 
15 | {
16 |   "lex": {
17 |     "rules": [
18 |       ["\\s+", "/* skip whitespace */"],
19 |       ["\\d+", "return 'NUMBER';"],
20 |       ["[a-zA-Z\\-\\+\\*\\?\\=/]+\\d*", "return 'SYMBOL';"],
21 |       ["\\(", "return '(';"],
22 |       ["\\)", "return ')';"],
23 |     ]
24 |   },
25 | 
26 |   "bnf": {
27 |     "s-exp": [["atom", "return $$ = $1;"],
28 |               ["list", "return $$ = $1;"]],
29 | 
30 |      "list": [["( list-entries )", "$$ = $2;"]],
31 | 
32 |      "list-entries": [["s-exp list-entries", "$2.unshift($1); $$ = $2;"],
33 |                       ["ε", "$$ = [];"]],
34 | 
35 |      "atom": [["NUMBER", "$$ = Number(yytext);"],
36 |               ["SYMBOL", "$$ = yytext;"]]
37 |   }
38 | }


--------------------------------------------------------------------------------
/src/grammar/__tests__/calc.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * An LR(1) grammar with precedence, and assocs, in JS format.
 3 |  */
 4 | 
 5 | {
 6 |   "lex": {
 7 |     "startConditions": {
 8 |       "comment": 1, // exclusive
 9 |     },
10 | 
11 |     "rules": [
12 |       [["*"],  "\\s+", "/*skip whitespace*/"],
13 |       ["\\d+", "return 'NUMBER'"],
14 |       ["\\(",  "return '('"],
15 |       ["\\)",  "return ')'"],
16 |       ["\\+",  "return '+'"],
17 |       ["\\*",  "return '*'"],
18 | 
19 |       ["\\/\\*", "this.pushState('comment');"],
20 |       [["comment"], "\\*+\\/", "this.popState();"],
21 |       [["comment"], "\\d+", "return 'NUMBER_IN_COMMENT'"],
22 |     ],
23 |   },
24 | 
25 |   "operators": [
26 |     ["left", "+", "-"],
27 |     ["left", "*", "/"],
28 |   ],
29 | 
30 |   "bnf": {
31 |     "E": [["E + E",  "$$ = $1 + $3"],
32 |           ["E * E",  "$$ = $1 * $3"],
33 |           ["E - E",  "$$ = $1 - $3"],
34 |           ["E / E",  "$$ = $1 / $3"],
35 |           ["NUMBER", "$$ = Number($1)"],
36 |           ["( E )",  "$$ = $2"]],
37 |   },
38 | 
39 |   "moduleInclude": `
40 |     (() => "module include code")();
41 |   `
42 | }


--------------------------------------------------------------------------------
/examples/calc.ll1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Left-factored LL(1) calculator grammar.
 3 |  *
 4 |  *   ./bin/syntax -g examples/calc.ll1 --table --parse 'id + id * id' -w
 5 |  *
 6 |  * Corresponding parsing table:
 7 |  *
 8 |  *   ┌────┬─────┬─────┬──────┬─────┬─────┬───┐
 9 |  *   │    │ "+" │ "*" │ "id" │ "(" │ ")" │ $ │
10 |  *   ├────┼─────┼─────┼──────┼─────┼─────┼───┤
11 |  *   │ E  │     │     │ 1    │ 1   │     │   │
12 |  *   ├────┼─────┼─────┼──────┼─────┼─────┼───┤
13 |  *   │ E' │ 2   │     │      │     │ 3   │ 3 │
14 |  *   ├────┼─────┼─────┼──────┼─────┼─────┼───┤
15 |  *   │ T  │     │     │ 4    │ 4   │     │   │
16 |  *   ├────┼─────┼─────┼──────┼─────┼─────┼───┤
17 |  *   │ T' │ 6   │ 5   │      │     │ 6   │ 6 │
18 |  *   ├────┼─────┼─────┼──────┼─────┼─────┼───┤
19 |  *   │ F  │     │     │ 7    │ 8   │     │   │
20 |  *   └────┴─────┴─────┴──────┴─────┴─────┴───┘
21 |  */
22 | 
23 | %%
24 | 
25 | E
26 |   : T E'
27 |   ;
28 | 
29 | E'
30 |   : "+" T E'
31 |   | /* epsilon */
32 |   ;
33 | 
34 | T
35 |   : F T'
36 |   ;
37 | 
38 | T'
39 |   : "*" F T'
40 |     | /* epsilon */
41 |     ;
42 | 
43 | F
44 |   : "id"
45 |   | "(" E ")"
46 |   ;


--------------------------------------------------------------------------------
/examples/module-include.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Module includes.
 3 |  *
 4 |  * The "moduleInclude" prologue code allows including an arbitrary code at the
 5 |  * beginning of the generated parser file. As an example, it can be the code
 6 |  * to require modules for corresponding AST nodes, or direct AST nodes
 7 |  * definitions.
 8 |  */
 9 | 
10 | %{
11 |   // Define AST node classes.
12 |   class Node {}
13 | 
14 |   /* JS comments work here */
15 |   class Binary extends Node {}
16 |   class Primary extends Node {}
17 | 
18 |   // Can define callbacks for parse events here,
19 |   // attaching to `yyparse` object.
20 | 
21 |   yyparse.onParseBegin = (string) => {
22 |     console.log('Parsing:', string);
23 |   };
24 | 
25 |   yyparse.onParseEnd = (value) => {
26 |     console.log('Parsed:', value);
27 |   };
28 | 
29 | %}
30 | 
31 | %%
32 | 
33 | E
34 |   : E '+' T       { $$ = new Binary($1, $3, '+') }
35 |   | T             { $$ = $1 }
36 |   ;
37 | 
38 | T
39 |   : T '*' F       { $$ = new Binary($1, $3, '*') }
40 |   | F             { $$ = $1 }
41 |   ;
42 | 
43 | F
44 |   : 'id'          { $$ = new Primary($1) }
45 |   | '(' E ')'     { $$ = $2 }
46 |   ;


--------------------------------------------------------------------------------
/src/syntax.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | // To require local modules from root.
 7 | global.ROOT = __dirname + '/';
 8 | 
 9 | // Tokenizer.
10 | export {default as Tokenizer} from './tokenizer';
11 | 
12 | // Grammar classes.
13 | export {default as Grammar} from './grammar/grammar';
14 | export {default as GrammarSymbol} from './grammar/grammar-symbol';
15 | export {default as LexRule} from './grammar/lex-rule';
16 | export {default as Production} from './grammar/production';
17 | 
18 | // Sets generator.
19 | export {default as SetsGenerator} from './sets-generator';
20 | 
21 | // LR parsing.
22 | export {default as CanonicalCollection} from './lr/canonical-collection';
23 | export {default as State} from './lr/state';
24 | export {default as LRItem} from './lr/lr-item';
25 | export {default as LRParser} from './lr/lr-parser';
26 | export {default as LRParserGenerator} from './lr/lr-parser-generator-default';
27 | export {default as LRParsingTable} from './lr/lr-parsing-table';
28 | 
29 | // LL parsing.
30 | export {default as LLParsingTable} from './ll/ll-parsing-table';
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/ll/__tests__/ll-parsing-table-test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import Grammar from '../../grammar/grammar';
 7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode';
 8 | import LLParsingTable from '../ll-parsing-table';
 9 | import LLParser from '../ll-parser';
10 | 
11 | describe('ll-parsing-table', () => {
12 |   it('ll1-grammar-1', () => {
13 |     const grammarFile = __dirname + '/grammar1.bnf';
14 |     const expectedTable = {
15 |       S: {
16 |         "'a'": '1',
17 |         $: '1',
18 |       },
19 |       A: {
20 |         "'a'": '2',
21 |         $: '3',
22 |       },
23 |     };
24 | 
25 |     const grammarBySLR = Grammar.fromGrammarFile(grammarFile, {
26 |       mode: GRAMMAR_MODE.LL1,
27 |     });
28 |     expect(new LLParsingTable({grammar: grammarBySLR}).get()).toEqual(
29 |       expectedTable
30 |     );
31 |     expect(new LLParser({grammar: grammarBySLR}).parse('a')).toEqual({
32 |       status: 'accept',
33 |       semanticValue: true,
34 |     });
35 |     expect(new LLParser({grammar: grammarBySLR}).parse('')).toEqual({
36 |       status: 'accept',
37 |       semanticValue: true,
38 |     });
39 |   });
40 | });
41 | 


--------------------------------------------------------------------------------
/examples/calculator-assoc.g:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Precedence and associativity rules:
 4 | 
 5 | - If the token's precedence is higher, the choice is to shift:
 6 | 
 7 |     E -> E + E •
 8 |     E -> E • * E (choose to shift on `*` since its precedence is higher than of `+`)
 9 | 
10 | - If the rule's precedence is higher, the choice is to reduce:
11 | 
12 |     E -> E * E • (choose to reduce since precedence of the production is higher than of `+`)
13 |     E -> E • + E
14 | 
15 | - If they have equal precedence, the choice is made based on the associativity of that precedence level:
16 | 
17 |     E -> E * E • (choose to reduce since precedence is the same `*` is left-associative)
18 |     E -> E • * E
19 | 
20 |   This case we want `id * id * id` to be left-associative, i.e.
21 |   `(id * id) * id`, not right-associative, that would be `id * (id * id)`.
22 | 
23 | */
24 | 
25 | {
26 |     "lex": {
27 |         "rules": [
28 |             ["id",  "return 'id'"],
29 |             ["\\*", "return '*'"],
30 |             ["\\+", "return '+'"]
31 |         ]
32 |     },
33 | 
34 |     "operators": [
35 |         ["left", "+"],
36 |         ["left", "*"]
37 |     ],
38 | 
39 |     "bnf": {
40 |         "E": [
41 |             "E + E",
42 |             "E * E",
43 |             "id"
44 |         ]
45 |     }
46 | }


--------------------------------------------------------------------------------
/examples/calc.rs.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Rust.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.rs.g -m lalr1 -o lib.rs
 5 |  *
 6 |  * use syntax::Parser;
 7 |  *
 8 |  * let parser = Parser::new();
 9 |  *
10 |  * println!("{:?}", parser.parse("2 + 2 * 2"));   // 6
11 |  * println!("{:?}", parser.parse("(2 + 2) * 2")); // 8
12 |  */
13 | 
14 | {
15 |   "lex": {
16 |     "rules": [
17 |       ["\\s+",  '/* skip whitespace */ ""'],
18 |       ["\\d+",  '"NUMBER"'],
19 |       ["\\*",   '"*"'],
20 |       ["\\+",   '"+"'],
21 |       ["\\(",   '"("'],
22 |       ["\\)",   '")"'],
23 |     ]
24 |   },
25 | 
26 |   "operators": [
27 |     ["left", "+"],
28 |     ["left", "*"],
29 |   ],
30 | 
31 |   "moduleInclude": `
32 | 
33 |       type TResult = i32;
34 | 
35 |       fn on_parse_begin(_parser: &mut Parser, string: &str) {
36 |           println!("on_parse_begin: {:?}", string);
37 |       }
38 | 
39 |       fn on_parse_end(_parser: &mut Parser, parsed: &TResult) {
40 |           println!("on_parse_end: {:?}", parsed);
41 |       }
42 | 
43 |   `,
44 | 
45 |   "bnf": {
46 |     "E": [
47 |       ["E + E",  "|$1: i32, $3: i32| -> i32; $$ = $1 + $3"],
48 |       ["E * E",  "|$1: i32, $3: i32| -> i32; $$ = $1 * $3"],
49 |       ["NUMBER", "|| -> i32; $$ = yytext.parse::<i32>().unwrap()"],
50 |       ["( E )",  "$$ = $2"],
51 |     ],
52 |   },
53 | }


--------------------------------------------------------------------------------
/src/plugins/python/ll/ll-parser-generator-py.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default;
 7 | const PyParserGeneratorTrait = require('../py-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | 
11 | /**
12 |  * Generic Python template for LL(1) parser.
13 |  */
14 | const PY_LL_PARSER_TEMPLATE = fs.readFileSync(
15 |   `${__dirname}/../templates/ll.template.py`,
16 |   'utf-8'
17 | );
18 | 
19 | /**
20 |  * LL parser generator for Python.
21 |  */
22 | export default class LLParserGeneratorPy extends LLParserGeneratorDefault {
23 | 
24 |   /**
25 |    * Instance constructor.
26 |    */
27 |   constructor({
28 |     grammar,
29 |     outputFile,
30 |     options = {},
31 |   }) {
32 |     super({grammar, outputFile, options})
33 |       .setTemplate(PY_LL_PARSER_TEMPLATE);
34 | 
35 |     this._lexHandlers = [];
36 |     this._productionHandlers = [];
37 | 
38 |     // Trait provides methods for lex and production handlers.
39 |     Object.assign(this, PyParserGeneratorTrait);
40 |   }
41 | 
42 |   /**
43 |    * Generates parser code.
44 |    */
45 |   generateParserData() {
46 |     super.generateParserData();
47 |     this.generateLexHandlers();
48 |     this.generateProductionHandlers();
49 |   }
50 | };
51 | 


--------------------------------------------------------------------------------
/src/plugins/python/lr/lr-parser-generator-py.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const PyParserGeneratorTrait = require('../py-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | 
11 | /**
12 |  * Generic Python template for all LR parsers.
13 |  */
14 | const PY_LR_PARSER_TEMPLATE = fs.readFileSync(
15 |   `${__dirname}/../templates/lr.template.py`,
16 |   'utf-8',
17 | );
18 | 
19 | /**
20 |  * LR parser generator for Python.
21 |  */
22 | export default class LRParserGeneratorPy extends LRParserGeneratorDefault {
23 | 
24 |   /**
25 |    * Instance constructor.
26 |    */
27 |   constructor({
28 |     grammar,
29 |     outputFile,
30 |     options = {},
31 |   }) {
32 |     super({grammar, outputFile, options})
33 |       .setTemplate(PY_LR_PARSER_TEMPLATE);
34 | 
35 |     this._lexHandlers = [];
36 |     this._productionHandlers = [];
37 | 
38 |     // Trait provides methods for lex and production handlers.
39 |     Object.assign(this, PyParserGeneratorTrait);
40 |   }
41 | 
42 |   /**
43 |    * Generates parser code.
44 |    */
45 |   generateParserData() {
46 |     super.generateParserData();
47 |     this.generateLexHandlers();
48 |     this.generateProductionHandlers();
49 |   }
50 | };
51 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "syntax-cli",
 3 |   "version": "0.1.27",
 4 |   "description": "Syntactic analysis toolkit, language agnostic parsers generator.",
 5 |   "repository": {
 6 |     "type": "git",
 7 |     "url": "https://github.com/DmitrySoshnikov/syntax.git"
 8 |   },
 9 |   "bugs": "https://github.com/DmitrySoshnikov/syntax/issues",
10 |   "main": "index.js",
11 |   "scripts": {
12 |     "build": "node scripts/build.js",
13 |     "watch": "node scripts/build.js --watch",
14 |     "test": "jest",
15 |     "prepublish": "npm run build && npm test",
16 |     "eslint": "eslint src/ && eslint bin/syntax"
17 |   },
18 |   "bin": {
19 |     "syntax-cli": "./bin/syntax"
20 |   },
21 |   "keywords": [
22 |     "parser",
23 |     "LL(1)",
24 |     "LR(1)",
25 |     "LALR(1)",
26 |     "SLR(1)",
27 |     "generator",
28 |     "JavaScript",
29 |     "Python",
30 |     "PHP",
31 |     "Ruby",
32 |     "C#"
33 |   ],
34 |   "author": "Dmitry Soshnikov",
35 |   "license": "MIT",
36 |   "dependencies": {
37 |     "nomnom": "^1.8.1",
38 |     "cli-table3": "^0.5.0",
39 |     "colors": "^1.1.2"
40 |   },
41 |   "devDependencies": {
42 |     "@babel/cli": "^7.23.4",
43 |     "@babel/preset-env": "^7.23.7",
44 |     "@babel/plugin-transform-object-rest-spread": "7.23.4",
45 |     "shelljs": "^0.8.5",
46 |     "jest-cli": "^29.3.1",
47 |     "eslint": "^8.28.0",
48 |     "prettier": "^1.11.1"
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/plugins/php/ll/ll-parser-generator-php.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default;
 7 | const PHPParserGeneratorTrait = require('../php-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic PHP template for LL(1) parser.
14 |  */
15 | const PHP_LL_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/ll.template.php`,
17 |   'utf-8'
18 | );
19 | 
20 | /**
21 |  * LL parser generator for PHP.
22 |  */
23 | export default class LLParserGeneratorPHP extends LLParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(PHP_LL_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, PHPParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     super.generateParserData();
53 |     this.generateLexHandlers();
54 |     this.generateProductionHandlers();
55 |     this.generateParserClassName(this._parserClassName);
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/examples/calc.java.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in Java.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.java.g -m lalr1 -o CalcParser.rs
 5 |  *
 6 |  * import com.syntax.*;
 7 |  *
 8 |  * CalcParser parser = new CalcParser();
 9 |  *
10 |  * System.out.println(parser.parse("2 + 2 * 2");   // 6
11 |  * System.out.println(parser.parse("(2 + 2) * 2"); // 8
12 |  */
13 | 
14 | {
15 |   "lex": {
16 |     "rules": [
17 |       ["\\s+",  '/* skip whitespace */ return null'],
18 |       ["\\d+",  'return "NUMBER"'],
19 |       ["\\*",   'return "*"'],
20 |       ["\\+",   'return "+"'],
21 |       ["\\(",   'return "("'],
22 |       ["\\)",   'return ")"'],
23 |     ]
24 |   },
25 | 
26 |   "operators": [
27 |     ["left", "+"],
28 |     ["left", "*"],
29 |   ],
30 | 
31 |   "moduleInclude": `
32 | 
33 |     /**
34 |      * The ParserEvents class allows subscribing to
35 |      * different parsing events.
36 |      */
37 |     class ParserEvents {
38 |       public static void init() {
39 |         System.out.println("Parser is created.");
40 |       }
41 | 
42 |       public static void onParseBegin(String str) {
43 |         System.out.println("Parsing is started: " + str);
44 |       }
45 | 
46 |       public static void onParseEnd(Object result) {
47 |         System.out.println("Parsing is completed: " + result);
48 |       }
49 |     }
50 | 
51 |   `,
52 | 
53 |   "bnf": {
54 |     "E": [
55 |       ["E + E",  "$$ = (Integer)$1 + (Integer)$3"],
56 |       ["E * E",  "$$ = (Integer)$1 * (Integer)$3"],
57 |       ["NUMBER", "$$ = Integer.valueOf(yytext)"],
58 |       ["( E )",  "$$ = $2"],
59 |     ],
60 |   },
61 | }


--------------------------------------------------------------------------------
/src/plugins/php/lr/lr-parser-generator-php.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const PHPParserGeneratorTrait = require('../php-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic PHP template for all LR parsers.
14 |  */
15 | const PHP_LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.php`,
17 |   'utf-8',
18 | );
19 | 
20 | /**
21 |  * LR parser generator for PHP.
22 |  */
23 | export default class LRParserGeneratorPHP extends LRParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(PHP_LR_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, PHPParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     super.generateParserData();
53 |     this.generateLexHandlers();
54 |     this.generateProductionHandlers();
55 |     this.generateParserClassName(this._parserClassName);
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/src/plugins/ruby/ll/ll-parser-generator-ruby.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default;
 7 | const RubyParserGeneratorTrait = require('../ruby-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic Ruby template for LL(1) parser.
14 |  */
15 | const RUBY_LL_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/ll.template.rb`,
17 |   'utf-8'
18 | );
19 | 
20 | /**
21 |  * LL parser generator for Ruby.
22 |  */
23 | export default class LLParserGeneratorRuby extends LLParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(RUBY_LL_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, RubyParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     super.generateParserData();
53 |     this.generateLexHandlers();
54 |     this.generateProductionHandlers();
55 |     this.generateParserClassName(this._parserClassName);
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/src/plugins/ruby/lr/lr-parser-generator-ruby.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const RubyParserGeneratorTrait = require('../ruby-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic Ruby template for all LR parsers.
14 |  */
15 | const RUBY_LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.rb`,
17 |   'utf-8',
18 | );
19 | 
20 | /**
21 |  * LR parser generator for PHP.
22 |  */
23 | export default class LRParserGeneratorRuby extends LRParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(RUBY_LR_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, RubyParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     super.generateParserData();
53 |     this.generateLexHandlers();
54 |     this.generateProductionHandlers();
55 |     this.generateParserClassName(this._parserClassName);
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/src/plugins/csharp/lr/lr-parser-generator-csharp.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const CSharpParserGeneratorTrait = require('../csharp-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic C# template for all LR parsers.
14 |  */
15 | const CSHARP_LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.cs`,
17 |   'utf-8',
18 | );
19 | 
20 | /**
21 |  * LR parser generator for PHP.
22 |  */
23 | export default class LRParserGeneratorCSharp extends LRParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(CSHARP_LR_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, CSharpParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     super.generateParserData();
53 |     this.generateLexHandlers();
54 |     this.generateProductionHandlers();
55 |     this.generateParserClassName(this._parserClassName);
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/examples/calc.cpp.ast.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generated parser in C++.
 3 |  *
 4 |  * ./bin/syntax -g examples/calc.cpp.g -m lalr1 -o CalcParser.h
 5 |  *
 6 |  *   #include "CalcParser.h"
 7 |  *
 8 |  *   CalcParser parser;
 9 |  *
10 |  *   std::cout << parser.parse("2 + 2 * 2"); // 6
11 |  */
12 | 
13 | %lex
14 | 
15 | %%
16 | 
17 | \s+    %empty
18 | 
19 | \d+    NUMBER
20 | 
21 | /lex
22 | 
23 | %{
24 | 
25 | #include <iostream>
26 | #include <memory>
27 | 
28 | /**
29 |  * Base class for AST nodes.
30 |  */
31 | class Node {};
32 | 
33 | /**
34 |  * Binary expressions.
35 |  */
36 | class BinaryExpression : public Node {
37 |  public:
38 |   BinaryExpression(std::string op, Node* left, Node* right)
39 |     : op(op), left(left), right(right) {}
40 | 
41 |   std::string op;
42 |   Node* left;
43 |   Node* right;
44 | };
45 | 
46 | /**
47 |  * AST node for numbers.
48 |  */
49 | class NumericLiteral : public Node {
50 |  public:
51 |   NumericLiteral(int value): value(value) {}
52 |   int value;
53 | };
54 | 
55 | // Type of the parsing value.
56 | using Value = Node*;
57 | 
58 | 
59 | // On parser begin hook:
60 | void onParseBegin(const std::string& str) {
61 |   std::cout << "Parsing: " << str << "\n";
62 | }
63 | 
64 | // On parser end hook:
65 | void onParseEnd(Node* result) {
66 |   std::cout << "Result: " << result << "\n";
67 | }
68 | 
69 | 
70 | %}
71 | 
72 | 
73 | %left '+'
74 | %left '*'
75 | 
76 | %%
77 | 
78 | E
79 |   : E '+' E
80 |     { $$ = new BinaryExpression($2, $1, $3) }
81 | 
82 |   | E '*' E
83 |     { $$ = new BinaryExpression($2, $1, $3) }
84 | 
85 |   | '(' E ')' { $$ = $2 }
86 | 
87 |   | NUMBER
88 |     { $$ = new NumericLiteral(std::stoi($1)) }
89 |   ;


--------------------------------------------------------------------------------
/examples/calc-eval.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Example:
 3 |  *
 4 |  *   ./bin/syntax -g examples/calc-eval.g -m slr1 -p '2 + 2 * 2'
 5 |  *
 6 |  *   > 6
 7 |  *
 8 |  *   ./bin/syntax -g examples/calc-eval.g -m slr1 -p '(2 + 2) * 2'
 9 |  *
10 |  *   > 8
11 |  */
12 | 
13 | {
14 |   "lex": {
15 |     "rules": [
16 |       ["\\s+",                    "/*skip whitespace*/"],
17 |       ["[0-9]+(?:\\.[0-9]+)?\\b", "return 'NUMBER'"],
18 |       ["\\+",                     "return '+'"],
19 |       ["\\*",                     "return '*'"],
20 |       ["-",                       "return '-'"],
21 |       ["\\/",                     "return '/'"],
22 |       ["\\(",                     "return '('"],
23 |       ["\\)",                     "return ')'"],
24 |       ["\\^",                     "return '^'"],
25 |       ["!",                       "return '!'"],
26 |       ["%",                       "return '%'"],
27 |       ["PI\\b",                   "return 'PI'"],
28 |       ["E\\b",                    "return 'E'"],
29 |     ]
30 |   },
31 | 
32 |   "operators": [
33 |     ["left", "+", "-"],
34 |     ["left", "*", "/"],
35 |     ["left", "^"],
36 |     ["right", "!"],
37 |     ["right", "%"],
38 |     ["left", "UMINUS"],
39 |   ],
40 | 
41 |   "bnf": {
42 |     "e": [["e + e",  "$$ = $1 + $3"],
43 |           ["e - e",  "$$ = $1 - $3"],
44 |           ["e * e",  "$$ = $1 * $3"],
45 |           ["e / e",  "$$ = $1 / $3"],
46 |           ["e ^ e",  "$$ = Math.pow($1, $3)"],
47 |           ["e !",    "$$ = (function _factorial(n) {if(n===0) return 1; return _factorial(n-1) * n})($1)"],
48 |           ["e %",    "$$ = $1 / 100"],
49 |           ["- e",    "$$ = -$2", {"prec": "UMINUS"}],
50 |           ["( e )",  "$$ = $2"],
51 |           ["NUMBER", "$$ = Number(yytext)"],
52 |           ["E",      "$$ = Math.E"],
53 |           ["PI",     "$$ = Math.PI"]],
54 |   }
55 | }


--------------------------------------------------------------------------------
/src/plugins/java/lr/lr-parser-generator-java.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const JavaParserGeneratorTrait = require('../java-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic Java template for all LR parsers.
14 |  */
15 | const JAVA_LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.java`,
17 |   'utf-8',
18 | );
19 | 
20 | /**
21 |  * LR parser generator for Java.
22 |  */
23 | export default class LRParserGeneratorJava extends LRParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(JAVA_LR_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 | 
39 |     this._parserClassName = path.basename(
40 |       outputFile,
41 |       path.extname(outputFile),
42 |     );
43 | 
44 |     // Trait provides methods for lex and production handlers.
45 |     Object.assign(this, JavaParserGeneratorTrait);
46 |   }
47 | 
48 |   /**
49 |    * Generates parser code.
50 |    */
51 |   generateParserData() {
52 |     this.generateParserClassName(this._parserClassName);
53 | 
54 |     // Lexical grammar.
55 |     this.generateTokenizer();
56 | 
57 |     // Syntactic grammar.
58 |     this.generateProductions();
59 | 
60 |     // Tables.
61 |     this.generateTokensTable();
62 |     this.generateParseTable();
63 | 
64 |     this.generateLexHandlers();
65 |     this.generateProductionHandlers();
66 | 
67 |     this.generateModuleInclude();
68 |   }
69 | };
70 | 


--------------------------------------------------------------------------------
/examples/lambda-calculus.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  *
 4 |  * ./bin/syntax -g examples/lambda-calculus.g -m slr1 -p '(λx. x) (λy. y)'
 5 |  *
 6 |  *   Parsing mode: SLR(1).
 7 |  *
 8 |  *   Parsing: (λx. x) (λy. y)
 9 |  *
10 |  *   ✓ Accepted
11 |  *
12 |  *   Parsed value: { type: 'Application',
13 |  *     lhs:
14 |  *      { type: 'Abstraction',
15 |  *        param: { type: 'Identifier', value: 'x' },
16 |  *        body: { type: 'Identifier', value: 'x' } },
17 |  *     rhs:
18 |  *      { type: 'Abstraction',
19 |  *        param: { type: 'Identifier', value: 'y' },
20 |  *        body: { type: 'Identifier', value: 'y' } } }
21 |  *
22 |  *
23 |  * To generate a parser:
24 |  *
25 |  * ./bin/syntax -g examples/lambda-calculus.g -m slr1 -o lc-parser.js
26 |  *
27 |  * In Node:
28 |  *
29 |  * require('lc-parser.js').parse('(λx. x) (λy. y)');
30 |  *
31 |  */
32 | 
33 | {
34 |   "lex": {
35 |     "rules": [
36 |       ["\\s+",                                 "/* skip whitespace */"],
37 |       ["\\.",                                  "return 'DOT';"],
38 |       ["[a-z][a-zA-Z]*",                       "return 'LCID';"],
39 |       ["λ",                                    "return 'LAMBDA';"],
40 |       ["\\(",                                  "return 'LPAREN';"],
41 |       ["\\)",                                  "return 'RPAREN';"],
42 |     ]
43 |   },
44 | 
45 |   "bnf": {
46 |     "Term":         [["Application",           "$$ = $1;"],
47 |                      ["LAMBDA Lcid DOT Term",  "$$ = {type: 'Abstraction', param: $2, body: $4};"]],
48 | 
49 |      "Application": [["Application Atom",      "$$ = {type: 'Application', lhs: $1, rhs: $2};"],
50 |                      ["Atom", "$$ = $1;"]],
51 | 
52 |      "Atom":        [["LPAREN Term RPAREN",    "$$ = $2;"],
53 |                      ["Lcid",                  "$$ = $1;"]],
54 | 
55 |      "Lcid":        [["LCID",                  "$$ = {type: 'Identifier', value: $1};"]]
56 |   }
57 | }


--------------------------------------------------------------------------------
/examples/calc-loc.jl.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Captures location info. Julia version.
 3 |  *
 4 |  * In order to capture locations, pass the `--loc` option.
 5 |  *
 6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
 7 |  * A named accessors are available as well: @foo, @bar.
 8 |  *
 9 |  * A location is a vector of structure:
10 |  *
11 |  * [
12 |  *   startOffset,
13 |  *   endOffset,
14 |  *   startLine,
15 |  *   endLine,
16 |  *   startColumn,
17 |  *   endColum,
18 |  * ]
19 |  *
20 |  * The resulting location is in the @$. It is calculated automatically from
21 |  * first and last symbol on a RHS handle, and it also can be overridden
22 |  * manually.
23 |  *
24 |  * ./bin/syntax -g examples/calc-loc.jl.bnf -m slr1 -o ~/CalcLoc.jl --locß
25 |  *
26 |  */
27 | 
28 |  %lex
29 | 
30 | %%
31 | 
32 | \s+       # ignore whitespace
33 | \d+       return "NUMBER"
34 | 
35 | /lex
36 | 
37 | %{
38 |   
39 |   function calcloc(s, e)
40 |     return [s.startoffset, e.endoffset, s.startline, e.endline, s.startcolumn, e.endcolumn]
41 |   end
42 | 
43 |   function singleloc(t)
44 |     return [t.startoffset, t.endoffset, t.startline, t.endline, t.startcolumn, t.endcolumn]
45 |   end
46 | 
47 |   function numericliteral(value, loc)
48 |     return [value, loc]
49 |   end
50 | 
51 |   function binaryexpression(op, left, right, loc)
52 |     return [op, left, right, loc]
53 |   end
54 | 
55 | %}
56 | 
57 | %left '+'
58 | %left '*'
59 | 
60 | %%
61 | 
62 | exp
63 |   : exp '+' exp
64 |     /* Explicitly calculate location */
65 |     { $$ = binaryexpression("+", $1, $3, calcloc(@1, @3)) }
66 | 
67 |   | exp '*' exp
68 |     /* Use default result location: @$ */
69 |     { $$ = binaryexpression("*", $1, $3, @$) }
70 | 
71 |   | '(' exp ')'
72 |     { $$ = $2 }
73 | 
74 |   | number
75 |     /* Named args and position */
76 |     { $$ = numericliteral($number, singleloc(@number)) }
77 |   ;
78 | 
79 | number
80 |   : NUMBER { $$ = tryparse(Int, parserdata.yytext) }
81 |   ;


--------------------------------------------------------------------------------
/examples/bnf.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * BNF grammar grammar.
 3 |  *
 4 |  * A BNF parser generated from it can parse its own grammar.
 5 |  *
 6 |  * Example:
 7 |  *
 8 |  *   ./bin/syntax -g examples/bnf.g -f examples/bnf.bnf -m slr1
 9 |  */
10 | 
11 | %%
12 | 
13 | Spec
14 |               : OptModInc '%%' ProductionList
15 |                   { return $$ = {bnf: $3, moduleInclude: $1 }; }
16 |               ;
17 | 
18 | OptModInc
19 |               : MODULE_INCLUDE
20 |                   { $$ = $1; }
21 |               |
22 |               ;
23 | 
24 | ProductionList
25 |               : ProductionList Production
26 |                   { $$ = $1; $$[$2[0]] = $2[1]; }
27 | 
28 |               | Production
29 |                   { $$ = {}; $$[$1[0]] = $1[1]; }
30 |               ;
31 | 
32 | Production
33 |               : LHS ':' HandleList ';'
34 |                   { $$ = [$1, $3]; }
35 |               ;
36 | 
37 | LHS
38 |               : ID
39 |                   { $$ = yytext; }
40 |               ;
41 | 
42 | HandleList
43 |               : HandleList '|' HandleAction
44 |                   { $$ = $1; $$.push($3); }
45 | 
46 |               | HandleAction
47 |                   { $$ = [$1]; }
48 |               ;
49 | 
50 | HandleAction
51 |               : Handle Action
52 |                   { $$ = [$1, $2]; }
53 |               ;
54 | 
55 | Handle
56 |               : Entries
57 |                   { $$ = $1; }
58 |               |
59 |                   { $$ = ''; }
60 |               ;
61 | 
62 | 
63 | Entries
64 |               : Entries Entry
65 |                   { $$ = $1 + ' ' + $2; }
66 | 
67 |               | Entry
68 |                   { $$ = $1; }
69 |               ;
70 | 
71 | Entry
72 |               : ID
73 |                   { $$ = yytext; }
74 | 
75 |               | STRING
76 |                   { $$ = yytext; }
77 |               ;
78 | 
79 | Action
80 |               : CODE
81 |                   { $$ = yytext; }
82 |               |
83 |                   { $$ = null; }
84 |               ;


--------------------------------------------------------------------------------
/src/lr/__tests__/lr-parser-generator-test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import Grammar from '../../grammar/grammar';
 7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode';
 8 | 
 9 | import LRParserGeneratorDefault from '../lr-parser-generator-default';
10 | 
11 | import path from 'path';
12 | import os from 'os';
13 | 
14 | function createParser(grammar, options) {
15 |   const outputFile = path.resolve(os.tmpdir(), '.syntax-parser.js');
16 | 
17 |   return new LRParserGeneratorDefault({
18 |     grammar,
19 |     outputFile,
20 |     options,
21 |   }).generate();
22 | }
23 | 
24 | const grammar = Grammar.fromGrammarFile(
25 |   __dirname + '/../../grammar/__tests__/calc.g',
26 |   {
27 |     mode: GRAMMAR_MODE.LALR1,
28 |     captureLocations: true,
29 |   }
30 | );
31 | 
32 | describe('LR parser generator', () => {
33 | 
34 |   it('parse options', () => {
35 | 
36 |     const options = {
37 |       captureLocations: true,
38 |     };
39 | 
40 |     const parser = createParser(grammar, options);
41 | 
42 |     // Global options.
43 |     expect(parser.getOptions()).toEqual(options);
44 | 
45 |     const overrideOptions = {
46 |       captureLocations: false,
47 |       'x-flag': true,
48 |     };
49 | 
50 |     const parsingString = '2 + 2';
51 | 
52 |     // // Setup on parse begin hook.
53 |     parser.onParseBegin = (string, tokenizer, options) => {
54 | 
55 |       expect(string).toBe(parsingString);
56 | 
57 |       expect(options).toEqual(overrideOptions);
58 |       expect(parser.getOptions()).toEqual(overrideOptions);
59 | 
60 |       if (options['x-flag']) {
61 |         tokenizer.pushState('x-flag');
62 |       }
63 | 
64 |       expect(tokenizer.getCurrentState()).toBe('x-flag');
65 |       tokenizer.popState();
66 |     };
67 | 
68 |     parser.parse(parsingString, overrideOptions);
69 | 
70 |     // Check the global options are restored.
71 |     expect(parser.getOptions()).toEqual(options);
72 |   });
73 | 
74 | });


--------------------------------------------------------------------------------
/src/plugins/julia/lr/lr-parser-generator-julia.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | /**
 7 |  * Implementation notes.
 8 |  *
 9 |  * 1. Extend `LRParserGeneratorDefault`
10 |  * 2. Implement `generateParserData()`
11 |  * 3. Implement all specific to the target language
12 |  *    functionality in the trait file.
13 |  */
14 | 
15 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
16 | const JuliaParserGeneratorTrait = require('../julia-parser-generator-trait');
17 | 
18 | import fs from 'fs';
19 | import path from 'path';
20 | 
21 | /**
22 |  * Generic template for all LR parsers in the Example language.
23 |  */
24 | const JL_LR_PARSER_TEMPLATE = fs.readFileSync(
25 |   `${__dirname}/../templates/lr.template.jl`,
26 |   'utf-8',
27 | );
28 | 
29 | /**
30 |  * LR parser generator for Julia language.
31 |  */
32 | export default class LRParserGeneratorJulia extends LRParserGeneratorDefault {
33 | 
34 |   /**
35 |    * Instance constructor.
36 |    */
37 |   constructor({
38 |     grammar,
39 |     outputFile,
40 |     options = {},
41 |   }) {
42 |     super({grammar, outputFile, options})
43 |       .setTemplate(JL_LR_PARSER_TEMPLATE);
44 |     this._lexHandlers = [];
45 |     this._productionHandlers = [];
46 | 
47 |     /**
48 |      * Actual class name of your parser. Here we infer from the output filename.
49 |      */
50 |     this._parserClassName = path.basename(
51 |       outputFile,
52 |       path.extname(outputFile),
53 |     );
54 | 
55 |     /**
56 |      * The trait provides methods for lex and production handlers, as well
57 |      * as some very specific code generation for the target language.
58 |      */
59 |     Object.assign(this, JuliaParserGeneratorTrait);
60 |   }
61 | 
62 |   /**
63 |    * Generates parser code.
64 |    */
65 |   generateParserData() {
66 |     super.generateParserData();
67 |     this.generateLexHandlers();
68 |     this.generateProductionHandlers();
69 |   }
70 | };
71 | 


--------------------------------------------------------------------------------
/src/plugins/cpp/lr/lr-parser-generator-cpp.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const CppParserGeneratorTrait = require('../cpp-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | import path from 'path';
11 | 
12 | /**
13 |  * Generic C++ template for all LR parsers.
14 |  */
15 | const CPP_LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.h`,
17 |   'utf-8',
18 | );
19 | 
20 | /**
21 |  * LR parser generator for C++.
22 |  */
23 | export default class LRParserGeneratorCpp extends LRParserGeneratorDefault {
24 | 
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({
29 |     grammar,
30 |     outputFile,
31 |     options = {},
32 |   }) {
33 |     super({grammar, outputFile, options})
34 |       .setTemplate(CPP_LR_PARSER_TEMPLATE);
35 | 
36 |     this._lexHandlers = [];
37 |     this._productionHandlers = [];
38 |     this._tokenTypes = [];
39 |     this._terminalsMap = {};
40 |     this._terminalsIndexMap = {};
41 | 
42 |     this._parserClassName = path.basename(
43 |       outputFile,
44 |       path.extname(outputFile),
45 |     );
46 | 
47 |     // Trait provides methods for lex and production handlers.
48 |     Object.assign(this, CppParserGeneratorTrait);
49 |   }
50 | 
51 |   /**
52 |    * Generates parser code.
53 |    */
54 |   generateParserData() {
55 |     this.generateNamespace();
56 |     this.generateModuleInclude();
57 |     this.generateCaptureLocations();
58 |     this.generateBuiltInTokenizer();
59 |     this.generateTokenTypes();
60 |     this.generateTokensTable();
61 |     this.generateLexRules();
62 |     this.generateLexRulesByStartConditions();
63 |     this.generateLexHandlers();
64 |     this.generateProductions();
65 |     this.generateParseTable();
66 |     this.generateProductionHandlers();
67 |     this.generateParserClassName(this._parserClassName);
68 |     this.generateParsedResult();
69 |   }
70 | };
71 | 


--------------------------------------------------------------------------------
/examples/on-token.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Calculates parens in RegExp style.
  3 |  *
  4 |  *   /(((a)b)c)(d)(e)/
  5 |  *
  6 |  * Here "a" is 3, "b" - 2, "c" - 3, "d" - 4, and "e" is 5.
  7 |  *
  8 |  * Test:
  9 |  *
 10 |  *   ./bin/syntax -g examples/on-token.bnf -m lalr1 -p '(((a)b)c)(d)(e)'
 11 |  */
 12 | 
 13 | %lex
 14 | 
 15 | %%
 16 | 
 17 | \w+       return 'NAME'
 18 | '('       return 'L_PAREN'
 19 | ')'       return 'R_PAREN'
 20 | 
 21 | /lex
 22 | 
 23 | %{
 24 | 
 25 | /**
 26 |  * Lower group boundary:
 27 |  *
 28 |  *   /(((a)b)c)(d)(e)/
 29 |  *
 30 |  * The first paren in (((a)b)c) has lower bound 0, but when
 31 |  * we reach the (d), it already 4.
 32 |  */
 33 | let currentLower = 0;
 34 | 
 35 | /**
 36 |  * Group number to assign to a group.
 37 |  */
 38 | let currentGroup = 0;
 39 | 
 40 | /**
 41 |  * Total number of groups.
 42 |  */
 43 | let totalGroups = 0;
 44 | 
 45 | yyparse.onParseBegin = (_string) => {
 46 |   currentGroup = 0;
 47 |   totalGroups = 0;
 48 | };
 49 | 
 50 | yyparse.onShift = token => {
 51 |   if (token.type === 'L_PAREN') {
 52 |     currentGroup++;
 53 |     totalGroups++;
 54 |   }
 55 |   return token;
 56 | };
 57 | 
 58 | %}
 59 | 
 60 | %%
 61 | 
 62 | Program
 63 |   : Items
 64 |     {
 65 |       $$ = {
 66 |         type: 'Program',
 67 |         items: $1,
 68 |       }
 69 |     }
 70 |   ;
 71 | 
 72 | Items
 73 |   : Item
 74 |     { $$ = [$1] }
 75 | 
 76 |   | Items Item
 77 |     { $$ = $1; $1.push($2) }
 78 |   ;
 79 | 
 80 | Item
 81 |   : Group
 82 |   | Name
 83 |   ;
 84 | 
 85 | Name
 86 |   : NAME
 87 |     {
 88 |       $$ = {
 89 |         type: 'Name',
 90 |         value: $1,
 91 |       }
 92 |     }
 93 |   ;
 94 | 
 95 | Group
 96 |   : L_PAREN Items R_PAREN
 97 |     {
 98 |       $$ = {
 99 |         type: 'Group',
100 |         number: currentGroup,
101 |         items: $2,
102 |       };
103 | 
104 |       // Go up.
105 |       currentGroup--;
106 | 
107 |       // We reached the top level, reset the current group:
108 |       if (currentGroup === currentLower) {
109 |         currentGroup = totalGroups;
110 |         currentLower = totalGroups;
111 |       }
112 |     }
113 |   ;
114 | 
115 | 


--------------------------------------------------------------------------------
/examples/calc-loc.py.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Captures location info. Python version.
 3 |  *
 4 |  * In order to capture locations, pass the `--loc` option.
 5 |  *
 6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
 7 |  * A named accessors are available as well: @foo, @bar.
 8 |  *
 9 |  * A location is an object of structure:
10 |  *
11 |  * {
12 |  *   start_offset,
13 |  *   end_offset,
14 |  *   start_line,
15 |  *   end_line,
16 |  *   start_column,
17 |  *   end_colum,
18 |  * }
19 |  *
20 |  * The resulting location is in the @$. It is calculated automatically from
21 |  * first and last symbol on a RHS handle, and it also can be overridden
22 |  * manually.
23 |  *
24 |  * ./bin/syntax -g examples/calc-loc.py.bnf -m slr1 -o ~/Parser.py --loc
25 |  *
26 |  */
27 | 
28 | %lex
29 | 
30 | %%
31 | 
32 | \s+       return "" # skip whitespace
33 | \d+       return "NUMBER"
34 | 
35 | /lex
36 | 
37 | %{
38 | 
39 | class BinaryExpression(object):
40 |     def __init__(self, op, left, right, loc):
41 |         self.op = op
42 |         self.left = left
43 |         self.right = right
44 |         self.loc = loc
45 | 
46 | def Loc(s, e):
47 |     return {
48 |         'start_offset': s['start_offset'],
49 |         'end_offset': e['end_offset'],
50 |         'start_line': s['start_line'],
51 |         'end_line': e['end_line'],
52 |         'start_column': s['start_column'],
53 |         'end_column': e['end_column'],
54 |     }
55 | 
56 | class NumericLiteral(object):
57 |     def __init__(self, value, loc):
58 |         self.value = value
59 |         self.loc = loc
60 | 
61 | def on_parse_end(value):
62 |     print(value.loc)
63 | 
64 | %}
65 | 
66 | %left '+'
67 | %left '*'
68 | 
69 | %%
70 | 
71 | exp
72 |   : exp '+' exp
73 |     /* Explicitly calculate location */
74 |     { $$ = BinaryExpression('+', $1, $3, Loc(@1, @3)) }
75 | 
76 |   | exp '*' exp
77 |     /* Use default result location: @$ */
78 |     { $$ = BinaryExpression('*', $1, $3, @$) }
79 | 
80 |   | '(' exp ')'
81 |     { $$ = $2 }
82 | 
83 |   | number
84 |     /* Named args and position */
85 |     { $$ = NumericLiteral($number, @number) }
86 |   ;
87 | 
88 | number
89 |   : NUMBER { $$ = int(yytext) }
90 |   ;
91 | 


--------------------------------------------------------------------------------
/src/grammar/grammar-mode.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | export const MODES = {
 7 |   LR0: 'LR0',
 8 |   SLR1: 'SLR1',
 9 |   LALR1: 'LALR1',
10 |   LALR1_BY_SLR1: 'LALR1_BY_SLR1',
11 |   LALR1_BY_CLR1: 'LALR1_BY_CLR1',
12 |   LALR1_EXTENDED: 'LALR1_EXTENDED',
13 |   CLR1: 'CLR1',
14 |   LL1: 'LL1',
15 | };
16 | 
17 | /**
18 |  * Grammar/parser mode.
19 |  */
20 | export default class GrammarMode {
21 | 
22 |   constructor(mode = MODES.LR0) {
23 |     mode = mode.toUpperCase();
24 | 
25 |     if (!MODES.hasOwnProperty(mode)) {
26 |       throw new TypeError(
27 |         `\n"${mode}" is not a valid parsing mode. ` +
28 |         `Valid modes are: ${Object.keys(MODES).join(', ')}.\n`
29 |       );
30 |     }
31 | 
32 |     this._mode = mode;
33 |   }
34 | 
35 |   getRaw() {
36 |     return this._mode;
37 |   }
38 | 
39 |   isLL() {
40 |     return this._isMode(MODES.LL1);
41 |   }
42 | 
43 |   isLR() {
44 |     return !this.isLL();
45 |   }
46 | 
47 |   usesLookaheadSet() {
48 |     return this.isLALR1ByCLR1() || this.isCLR1();
49 |   }
50 | 
51 |   isLR0() {
52 |     return this._isMode(MODES.LR0);
53 |   }
54 | 
55 |   isSLR1() {
56 |     return this._isMode(MODES.SLR1);
57 |   }
58 | 
59 |   isLALR1() {
60 |     // Default algorithm for LALR(1) is "LALR(1) by SLR(1)".
61 |     return this.isLALR1BySLR1() || this._isMode(MODES.LALR1);
62 |   }
63 | 
64 |   isLALR1BySLR1() {
65 |     return this._isMode(MODES.LALR1_BY_SLR1);
66 |   }
67 | 
68 |   isLALR1ByCLR1() {
69 |     return this._isMode(MODES.LALR1_BY_CLR1);
70 |   }
71 | 
72 |   isLALR1Extended() {
73 |     // Special grammar mode, where productions are built from
74 |     // the LR(0) automation in the "LALR(1) by SLR(1)" algorithm.
75 |     return this._isMode(MODES.LALR1_EXTENDED);
76 |   }
77 | 
78 |   isCLR1() {
79 |     return this._isMode(MODES.CLR1);
80 |   }
81 | 
82 |   _isMode(mode) {
83 |     return this._mode === mode;
84 |   }
85 | 
86 |   /**
87 |    * Returns string representation of a mode.
88 |    * LR0 -> LR(0)
89 |    */
90 |   toString() {
91 |     return `${this._mode.slice(0, -1)}(${this._mode[this._mode.length - 1]})`;
92 |   }
93 | }


--------------------------------------------------------------------------------
/examples/calc-loc.php.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Captures location info. PHP version.
 3 |  *
 4 |  * In order to capture locations, pass the `--loc` option.
 5 |  *
 6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
 7 |  * A named accessors are available as well: @foo, @bar.
 8 |  *
 9 |  * A location is an object of structure:
10 |  *
11 |  * {
12 |  *   startOffset,
13 |  *   endOffset,
14 |  *   startLine,
15 |  *   endLine,
16 |  *   startColumn,
17 |  *   endColum,
18 |  * }
19 |  *
20 |  * The resulting location is in the @$. It is calculated automatically from
21 |  * first and last symbol on a RHS handle, and it also can be overridden
22 |  * manually.
23 |  *
24 |  * ./bin/syntax -g examples/calc-loc.php.bnf -m slr1 -o ~/Parser.php --loc
25 |  *
26 |  */
27 | 
28 | %lex
29 | 
30 | %%
31 | 
32 | \s+       /* skip whitespace */ return ""
33 | \d+       return "NUMBER"
34 | 
35 | /lex
36 | 
37 | %{
38 | 
39 | class BinaryExpression {
40 |   public function __construct($op, $left, $right, $loc) {
41 |     $this->op = $op;
42 |     $this->left = $left;
43 |     $this->right = $right;
44 |     $this->loc = $loc;
45 |   }
46 | }
47 | 
48 | function Loc($s, $e) {
49 |   // Same as default result location.
50 |   return array(
51 |     'startOffset' => $s['startOffset'],
52 |     'endOffset' => $e['endOffset'],
53 |     'startLine' => $s['startLine'],
54 |     'endLine' => $e['endLine'],
55 |     'startColumn' => $s['startColumn'],
56 |     'endColumn' => $e['endColumn'],
57 |   );
58 | }
59 | 
60 | class NumericLiteral {
61 |   public function __construct($value, $loc) {
62 |     $this->value = $value;
63 |     $this->loc = $loc;
64 |   }
65 | }
66 | 
67 | %}
68 | 
69 | %left '+'
70 | %left '*'
71 | 
72 | %%
73 | 
74 | exp
75 |   : exp '+' exp
76 |     /* Explicitly calculate location */
77 |     { $$ = new BinaryExpression('+', $1, $3, Loc(@1, @3)) }
78 | 
79 |   | exp '*' exp
80 |     /* Use default result location: @$ */
81 |     { $$ = new BinaryExpression('*', $1, $3, @$) }
82 | 
83 |   | '(' exp ')'
84 |     { $$ = $2 }
85 | 
86 |   | number
87 |     /* Named args and position */
88 |     { $$ = new NumericLiteral($number, @number) }
89 |   ;
90 | 
91 | number
92 |   : NUMBER { $$ = intval(yytext) }
93 |   ;
94 | 


--------------------------------------------------------------------------------
/src/plugins/rust/lr/lr-parser-generator-rust.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
 7 | const RustParserGeneratorTrait = require('../rust-parser-generator-trait');
 8 | 
 9 | import fs from 'fs';
10 | 
11 | /**
12 |  * Generic Rust template for all LR parsers.
13 |  */
14 | const RUST_LR_PARSER_TEMPLATE = fs.readFileSync(
15 |   `${__dirname}/../templates/lr.template.rs`,
16 |   'utf-8',
17 | );
18 | 
19 | /**
20 |  * LR parser generator for Rust.
21 |  */
22 | export default class LRParserGeneratorRust extends LRParserGeneratorDefault {
23 | 
24 |   /**
25 |    * Instance constructor.
26 |    */
27 |   constructor({
28 |     grammar,
29 |     outputFile,
30 |     options = {},
31 |   }) {
32 |     super({grammar, outputFile, options})
33 |       .setTemplate(RUST_LR_PARSER_TEMPLATE);
34 | 
35 |     this._lexHandlers = [];
36 |     this._productionHandlers = [];
37 | 
38 |     /**
39 |      * Stores all used types of the arguments, and return values.
40 |      * This is used to generate `SV` (stack value) enum.
41 |      * Init to `Token` type which is always stored on the stack.
42 |      *
43 |      * enum SV {
44 |      *     _0(Token),
45 |      *     _1(...),
46 |      * }
47 |      */
48 |     this._allTypes = {
49 |       Token: 0,
50 |     };
51 | 
52 |     // Autoinc index in SV.
53 |     this._allTypesIndex = 1;
54 | 
55 |     // Trait provides methods for lex and production handlers.
56 |     Object.assign(this, RustParserGeneratorTrait);
57 |   }
58 | 
59 |   /**
60 |    * Generates parser code.
61 |    */
62 |   generateParserData() {
63 |     // Lexical grammar.
64 |     this.generateTokenizer();
65 | 
66 |     // Syntactic grammar.
67 |     this.generateProductions();
68 | 
69 |     // Tables.
70 |     this.generateTokensTable();
71 |     this.generateParseTable();
72 | 
73 |     this.generateLexHandlers();
74 |     this.generateProductionHandlers();
75 |     this.generateStackValueEnum();
76 | 
77 |     // The module include which should include at least
78 |     // result type: type TResult = <...>;
79 |     this.generateModuleInclude();
80 |   }
81 | };
82 | 


--------------------------------------------------------------------------------
/examples/boolean.bnf:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Boolean expressions.
 3 |  *
 4 |  * ./bin/syntax -g examples/boolean.bnf -m slr1 -p '5 == 5' true
 5 |  *
 6 |  * ... -p '5 > 2 and 5 < 10'    true
 7 |  *     -p 'x or y'              x
 8 |  *     -p '5 == 5 or  2 < 1'    true
 9 |  *     -p '5 == 5 and 2 < 1'    false
10 |  *     -p 'true and false'      false
11 |  *     -p 'false or true'       true
12 |  *     -p 'true'                true
13 |  *     -p 'false'               false
14 |  *     -p 'not true'            false
15 |  *     -p 'not false'           true
16 |  *     -p 'not not true'        true
17 |  */
18 | 
19 | {
20 |   "lex": {
21 |     "rules": [
22 |       ["\\s+",        "/* skip whitespace */"],
23 | 
24 |       ["or",          "return 'or'"],
25 |       ["and",         "return 'and'"],
26 |       ["not",         "return 'not'"],
27 | 
28 |       ["true",        "return 'true'"],
29 |       ["false",       "return 'false'"],
30 | 
31 |       ["<=",          "return '<='"],
32 |       [">=",          "return '>='"],
33 |       ["<",           "return '<'"],
34 |       [">",           "return '>'"],
35 |       ["==",          "return '=='"],
36 | 
37 |       ["\\(",         "return '('"],
38 |       ["\\)",         "return ')'"],
39 | 
40 |       ["\\d+",        "return 'NUMBER'"],
41 |       ["[a-zA-Z0-9]", "return 'ID'"],
42 |     ]
43 |   },
44 | 
45 |   "bnf": {
46 |     "Or":      [["Or or And",          "$$ = $1 || $3"],
47 |                 ["And",                "$$ = $1"]],
48 | 
49 |     "And":     [["And and Compare",    "$$ = $1 && $3"],
50 |                 ["Compare",            "$$ = $1"]],
51 | 
52 |     "Compare": [["Value <= Value",     "$$ = $1 <= $3"],
53 |                 ["Value >= Value",     "$$ = $1 >= $3"],
54 |                 ["Value <  Value",     "$$ = $1 < $3"],
55 |                 ["Value >  Value",     "$$ = $1 > $3"],
56 |                 ["Value == Value",     "$$ = $1 == $3"],
57 |                 ["Value",              "$$ = $1"]],
58 | 
59 |     "Value":   [["not Value",          "$$ = !$2"],
60 |                 ["Primary",            "$$ = $1"]],
61 | 
62 |     "Primary": [["NUMBER",      "$$ = Number($1)"],
63 |                 ["ID",          "$$ = $1"],
64 |                 ["true",        "$$ = true"],
65 |                 ["false",       "$$ = false"],
66 |                 ["( Or )",  "$$ = $2"]],
67 |   }
68 | }


--------------------------------------------------------------------------------
/examples/calc-loc.rb.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Captures location info. Ruby version.
  3 |  *
  4 |  * In order to capture locations, pass the `--loc` option.
  5 |  *
  6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
  7 |  * A named accessors are available as well: @foo, @bar.
  8 |  *
  9 |  * A location is an object of structure:
 10 |  *
 11 |  * {
 12 |  *   :start_offset,
 13 |  *   :end_offset,
 14 |  *   :start_line,
 15 |  *   :end_line,
 16 |  *   :start_column,
 17 |  *   :end_colum,
 18 |  * }
 19 |  *
 20 |  * The resulting location is in the @$. It is calculated automatically from
 21 |  * first and last symbol on a RHS handle, and it also can be overridden
 22 |  * manually.
 23 |  *
 24 |  * ./bin/syntax -g examples/calc-loc.rb.bnf -m slr1 -o ~/Parser.rb -w --loc
 25 |  *
 26 |  */
 27 | 
 28 | %lex
 29 | 
 30 | %%
 31 | 
 32 | \s+       return "" # skip whitespace
 33 | \d+       return "NUMBER"
 34 | 
 35 | /lex
 36 | 
 37 | %{
 38 | 
 39 | class BinaryExpression
 40 |   attr_reader :loc, :left, :right, :op
 41 | 
 42 |   def initialize(op, left, right, loc)
 43 |     @op = op
 44 |     @left = left
 45 |     @right = right
 46 |     @loc = loc
 47 |   end
 48 | end
 49 | 
 50 | def Loc(s, e)
 51 |   return {
 52 |     :start_offset => s[:start_offset],
 53 |     :end_offset => e[:end_offset],
 54 |     :start_line => s[:start_line],
 55 |     :end_line => e[:end_line],
 56 |     :start_column => s[:start_column],
 57 |     :end_column => e[:end_column],
 58 |   }
 59 | end
 60 | 
 61 | class NumericLiteral
 62 |   attr_reader :loc, :value
 63 | 
 64 |   def initialize(value, loc)
 65 |     @value = value
 66 |     @loc = loc
 67 |   end
 68 | end
 69 | 
 70 | YYParse.on_parse_end {|value|
 71 |   print value.loc
 72 | }
 73 | 
 74 | %}
 75 | 
 76 | %left '+'
 77 | %left '*'
 78 | 
 79 | %%
 80 | 
 81 | exp
 82 |   : exp '+' exp
 83 |     /* Explicitly calculate location */
 84 |     { $$ = BinaryExpression.new('+', $1, $3, Loc(@1, @3)) }
 85 | 
 86 |   | exp '*' exp
 87 |     /* Use default result location: @$ */
 88 |     { $$ = BinaryExpression.new('*', $1, $3, @$) }
 89 | 
 90 |   | '(' exp ')'
 91 |     { $$ = $2 }
 92 | 
 93 |   | number
 94 |     /* Named args and position */
 95 |     { $$ = NumericLiteral.new($number, @number) }
 96 |   ;
 97 | 
 98 | number
 99 |   : NUMBER { $$ = yytext.to_i }
100 |   ;
101 | 


--------------------------------------------------------------------------------
/examples/calc-ast.rs.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Generated parser in Rust.
  3 |  *
  4 |  * ./bin/syntax -g examples/calc-ast.rs.g -m lalr1 -o lib.rs
  5 |  *
  6 |  * use syntax::Parser;
  7 |  *
  8 |  * let parser = Parser::new();
  9 |  *
 10 |  * let ast = parser.parse("2 + 2 * 2");
 11 |  */
 12 | 
 13 | %lex
 14 | 
 15 | %%
 16 | 
 17 | \s+         /* skip whitespace */ return "";
 18 | 
 19 | \d+         return "NUMBER";
 20 | 
 21 | "+"         return "+";
 22 | "*"         return "*";
 23 | 
 24 | "("         return "(";
 25 | ")"         return ")";
 26 | 
 27 | /lex
 28 | 
 29 | %left +
 30 | %left *
 31 | 
 32 | %{
 33 | 
 34 | /**
 35 |  * Recursive generic `Node` enum structure.
 36 |  */
 37 | #[derive(Debug)]
 38 | pub enum Node {
 39 | 
 40 |     Literal(i32),
 41 | 
 42 |     Binary {
 43 |         op: &'static str,
 44 |         left: Box<Node>,
 45 |         right: Box<Node>,
 46 |     },
 47 | }
 48 | 
 49 | /**
 50 |  * Final result type returned from `parse` method call.
 51 |  */
 52 | pub type TResult = Node;
 53 | 
 54 | /**
 55 |  * Hook executed on parse begin.
 56 |  */
 57 | fn on_parse_begin(_parser: &mut Parser, string: &str) {
 58 |     println!("Parsing: {:?}", string);
 59 | }
 60 | 
 61 | /**
 62 |  * Hook executed on parse end.
 63 |  */
 64 | fn on_parse_end(_parser: &mut Parser, result: &TResult) {
 65 |     println!("Parsed: {:?}", result);
 66 | }
 67 | 
 68 | %}
 69 | 
 70 | %%
 71 | 
 72 | Expr
 73 |     : Expr + Expr {
 74 | 
 75 |         // Types of used args ($1, $2, ...), and return type:
 76 |         |$1: Node; $3: Node| -> Node;
 77 | 
 78 |         $$ = Node::Binary {
 79 |             op: "+",
 80 |             left: Box::new($1),
 81 |             right: Box::new($3),
 82 |         }
 83 |     }
 84 | 
 85 |     | Expr * Expr {
 86 | 
 87 |         |$1: Node; $3: Node| -> Node;
 88 | 
 89 |         $$ = Node::Binary {
 90 |             op: "*",
 91 |             left: Box::new($1),
 92 |             right: Box::new($3),
 93 |         }
 94 |     }
 95 | 
 96 |     | ( Expr ) {
 97 | 
 98 |         // No need to define argument types, since we don't do any
 99 |         // operations here, and just propagate $2 further.
100 | 
101 |         $$ = $2;
102 | 
103 |     }
104 | 
105 |     | NUMBER {
106 | 
107 |         || -> Node;
108 | 
109 |         let n = yytext.parse::<i32>().unwrap();
110 | 
111 |         $$ = Node::Literal(n);
112 | 
113 |     };
114 | 
115 | 


--------------------------------------------------------------------------------
/examples/hdl.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * HDL (Hardware-definition langauge) syntactic grammar.
  3 |  *
  4 |  * How to run:
  5 |  *
  6 |  * ./bin/syntax -g examples/hdl.g -m lalr1 -f examples/and.hdl
  7 |  */
  8 | 
  9 | %lex
 10 | 
 11 | %%
 12 | 
 13 | \/\/.*              /* skip comments */
 14 | \/\*(.|\s)*?\*\/    /* skip comments */
 15 | 
 16 | \s+                 /* skip whitespace */
 17 | 
 18 | CHIP                return 'CHIP'
 19 | IN                  return 'IN'
 20 | OUT                 return 'OUT'
 21 | PARTS               return 'PARTS'
 22 | 
 23 | \w+                 return 'ID'
 24 | 
 25 | /lex
 26 | 
 27 | %{
 28 | 
 29 | /**
 30 |  * List of inputs for this chip.
 31 |  */
 32 | let inputs = [];
 33 | 
 34 | /**
 35 |  * List of outputs for this chip.
 36 |  */
 37 | let outputs = [];
 38 | 
 39 | /**
 40 |  * Actual definitions.
 41 |  */
 42 | let parts = [];
 43 | 
 44 | %}
 45 | 
 46 | %%
 47 | 
 48 | Chip
 49 |   : CHIP Name '{' Sections '}' {
 50 |       $$ = {
 51 |         type: 'Chip',
 52 |         name: $2,
 53 |         inputs,
 54 |         outputs,
 55 |         parts,
 56 |       };
 57 |     }
 58 |   ;
 59 | 
 60 | Sections
 61 |   : Section Section Section
 62 |   ;
 63 | 
 64 | Section
 65 |   : Inputs
 66 |   | Outputs
 67 |   | Parts
 68 |   ;
 69 | 
 70 | Inputs
 71 |   : IN Names ';' {
 72 |       inputs.push(...$2);
 73 |     }
 74 |   ;
 75 | 
 76 | Outputs
 77 |   : OUT Names ';' {
 78 |       outputs.push(...$2);
 79 |     }
 80 |   ;
 81 | 
 82 | Parts
 83 |   : PARTS ':' ChipCalls {
 84 |       parts.push(...$3);
 85 |     }
 86 |   ;
 87 | 
 88 | Names
 89 |   : Name
 90 |     { $$ = [$1]; }
 91 | 
 92 |   | Names ',' Name
 93 |     { $1.push($3); $$ = $1; }
 94 |   ;
 95 | 
 96 | Name
 97 |   : ID
 98 |   | CHIP
 99 |   | IN
100 |   | OUT
101 |   | PARTS
102 |   ;
103 | 
104 | ChipCalls
105 |   : ChipCall
106 |     { $$ = [$1] }
107 | 
108 |   | ChipCalls ChipCall
109 |     { $1.push($2); $$ = $1 }
110 |   ;
111 | 
112 | ChipCall
113 |   : ID '(' ArgsList ')' ';' {
114 |       $$ = {
115 |         type: 'ChipCall',
116 |         name: $1,
117 |         arguments: $3,
118 |       }
119 |     }
120 |   ;
121 | 
122 | ArgsList
123 |   : Arg
124 |     { $$ = [$1] }
125 | 
126 |   | ArgsList ',' Arg
127 |     { $1.push($3); $$ = $1 }
128 |   ;
129 | 
130 | Arg
131 |   : ID '=' ID {
132 |       $$ = {
133 |         type: 'Argument',
134 |         name: $1,
135 |         value: $3,
136 |       }
137 |     }
138 |   ;


--------------------------------------------------------------------------------
/examples/grammar.clr1:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This grammar can not be parsed neither by LR(0), nor by SLR(1), as well as
 3 |  * not by LALR(1), resulting to the "reduce-reduce" conflicts.
 4 |  *
 5 |  *
 6 |  * LR(0) mode, see `r3/r4` conflicts:
 7 |  *
 8 |  *   ./bin/syntax --grammar examples/grammar.clr1 --mode lr0 --table
 9 |  *
10 |  *   ┌───┬───────┬───────┬───────┬───┬───┬───┐
11 |  *   │   │ 'a'   │ 'b'   │ $     │ S │ A │ B │
12 |  *   ├───┼───────┼───────┼───────┼───┼───┼───┤
13 |  *   │ 0 │ r3/r4 │ r3/r4 │ r3/r4 │ 7 │ 1 │ 2 │
14 |  *
15 |  *   ....              .....              ....
16 |  *
17 |  *   │ 9 │ r2    │ r2    │ r2    │   │   │   │
18 |  *   └───┴───────┴───────┴───────┴───┴───┴───┘
19 |  *
20 |  *
21 |  * SLR(1) mode, see one `r3/r4` conflict less, but still conflicts:
22 |  *
23 |  *   ./bin/syntax --grammar examples/grammar.clr1 --mode slr1 --table
24 |  *
25 |  *   ┌───┬───────┬───────┬───────┬───┬───┬───┐
26 |  *   │   │ 'a'   │ 'b'   │ $     │ S │ A │ B │
27 |  *   ├───┼───────┼───────┼───────┼───┼───┼───┤
28 |  *   │ 0 │ r3/r4 │ r3/r4 │       │ 7 │ 1 │ 2 │
29 |  *
30 |  *   ....              .....              ....
31 |  *
32 |  *   │ 9 │ r2    │ r2    │ r2    │   │   │   │
33 |  *   └───┴───────┴───────┴───────┴───┴───┴───┘
34 |  *
35 |  * CLR(1) mode, no conflicts:
36 |  *
37 |  *   ./bin/syntax --grammar examples/grammar.clr1 --mode clr1 --table
38 |  *
39 |  *   ┌───┬─────┬─────┬─────┬───┬───┬───┐
40 |  *   │   │ 'a' │ 'b' │ $   │ S │ A │ B │
41 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
42 |  *   │ 0 │ r3  │ r4  │     │ 7 │ 1 │ 2 │
43 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
44 |  *   │ 1 │ s3  │     │     │   │   │   │
45 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
46 |  *   │ 2 │     │ s5  │     │   │   │   │
47 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
48 |  *   │ 3 │     │ r3  │     │   │ 4 │   │
49 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
50 |  *   │ 4 │     │ s8  │     │   │   │   │
51 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
52 |  *   │ 5 │ r4  │     │     │   │   │ 6 │
53 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
54 |  *   │ 6 │ s9  │     │     │   │   │   │
55 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
56 |  *   │ 7 │     │     │ acc │   │   │   │
57 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
58 |  *   │ 8 │     │     │ r1  │   │   │   │
59 |  *   ├───┼─────┼─────┼─────┼───┼───┼───┤
60 |  *   │ 9 │     │     │ r2  │   │   │   │
61 |  *   └───┴─────┴─────┴─────┴───┴───┴───┘
62 |  */
63 | 
64 | %%
65 | 
66 | S -> A 'a' A 'b'
67 |    | B 'b' B 'a'
68 |    ;
69 | 
70 | A -> /*epsilon*/ ;
71 | B -> /*epsilon*/ ;
72 | 


--------------------------------------------------------------------------------
/src/grammar/__tests__/grammar-mode-test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import GrammarMode from '../grammar-mode';
 7 | import {MODES} from '../grammar-mode';
 8 | 
 9 | describe('grammar-mode', () => {
10 | 
11 |   it('LR', () => {
12 |     // Default LR0
13 |     let mode = new GrammarMode();
14 |     expect(mode.getRaw()).toBe(MODES.LR0);
15 |     expect(mode.isLR()).toBe(true);
16 |     expect(mode.isLR0()).toBe(true);
17 |     expect(mode.toString()).toBe('LR(0)');
18 | 
19 |     mode = new GrammarMode(MODES.LR0);
20 |     expect(mode.getRaw()).toBe(MODES.LR0);
21 |     expect(mode.isLR()).toBe(true);
22 |     expect(mode.isLR0()).toBe(true);
23 |     expect(mode.toString()).toBe('LR(0)');
24 | 
25 |     mode = new GrammarMode(MODES.SLR1);
26 |     expect(mode.getRaw()).toBe(MODES.SLR1);
27 |     expect(mode.isLR()).toBe(true);
28 |     expect(mode.isSLR1()).toBe(true);
29 |     expect(mode.toString()).toBe('SLR(1)');
30 | 
31 |     mode = new GrammarMode(MODES.LALR1);
32 |     expect(mode.getRaw()).toBe(MODES.LALR1);
33 |     expect(mode.isLR()).toBe(true);
34 |     expect(mode.isLALR1()).toBe(true);
35 |     expect(mode.toString()).toBe('LALR(1)');
36 | 
37 |     mode = new GrammarMode(MODES.CLR1);
38 |     expect(mode.getRaw()).toBe(MODES.CLR1);
39 |     expect(mode.isLR()).toBe(true);
40 |     expect(mode.isCLR1()).toBe(true);
41 |     expect(mode.toString()).toBe('CLR(1)');
42 |   });
43 | 
44 |   it('LL', () => {
45 |     const mode = new GrammarMode(MODES.LL1);
46 |     expect(mode.getRaw()).toBe(MODES.LL1);
47 |     expect(mode.isLR()).toBe(false);
48 |     expect(mode.isLL()).toBe(true);
49 |     expect(mode.toString()).toBe('LL(1)');
50 |   });
51 | 
52 |   it('lookahead set', () => {
53 |     let mode = new GrammarMode(MODES.LL1);
54 |     expect(mode.usesLookaheadSet()).toBe(false);
55 | 
56 |     mode = new GrammarMode(MODES.LR0);
57 |     expect(mode.usesLookaheadSet()).toBe(false);
58 | 
59 |     mode = new GrammarMode(MODES.SLR1);
60 |     expect(mode.usesLookaheadSet()).toBe(false);
61 | 
62 |     mode = new GrammarMode(MODES.LALR1_BY_CLR1);
63 |     expect(mode.usesLookaheadSet()).toBe(true);
64 | 
65 |     mode = new GrammarMode(MODES.LALR1_BY_SLR1);
66 |     expect(mode.usesLookaheadSet()).toBe(false);
67 | 
68 |     mode = new GrammarMode(MODES.LALR1);
69 |     expect(mode.usesLookaheadSet()).toBe(false);
70 | 
71 |     mode = new GrammarMode(MODES.CLR1);
72 |     expect(mode.usesLookaheadSet()).toBe(true);
73 |   });
74 | 
75 | });


--------------------------------------------------------------------------------
/src/plugins/python/templates/ll.template.py:
--------------------------------------------------------------------------------
  1 | ##
  2 | # LL(1) parser generated by the Syntax tool.
  3 | #
  4 | # https://www.npmjs.com/package/syntax-cli
  5 | #
  6 | #   npm install -g syntax-cli
  7 | #
  8 | #   syntax-cli --help
  9 | #
 10 | # To regenerate run:
 11 | #
 12 | #   syntax-cli \
 13 | #     --grammar ~/path-to-grammar-file \
 14 | #     --mode LL1 \
 15 | #     --output ~/parsermodule.py
 16 | ##
 17 | 
 18 | yytext = ''
 19 | yyleng = 0
 20 | __ = None
 21 | 
 22 | EOF = '$'
 23 | 
 24 | def on_parse_begin(string):
 25 |     pass
 26 | 
 27 | def on_parse_end(parsed):
 28 |     pass
 29 | 
 30 | {{{MODULE_INCLUDE}}}
 31 | 
 32 | {{{PRODUCTION_HANDLERS}}}
 33 | 
 34 | ps = {{{PRODUCTIONS}}}
 35 | tks = {{{TOKENS}}}
 36 | tbl = {{{TABLE}}}
 37 | 
 38 | s = None
 39 | 
 40 | {{{TOKENIZER}}}
 41 | 
 42 | def set_tokenizer(custom_tokenizer):
 43 |     global _tokenizer
 44 |     _tokenizer = custom_tokenizer
 45 | 
 46 | def get_tokenizer():
 47 |     return _tokenizer
 48 | 
 49 | def parse(string):
 50 |     global s
 51 |     on_parse_begin(string)
 52 | 
 53 |     if _tokenizer is None:
 54 |         raise Exception('_tokenizer instance wasn\'t specified.')
 55 | 
 56 |     _tokenizer.init_string(string)
 57 | 
 58 |     s = [EOF, {{{START}}}]
 59 | 
 60 |     t = _tokenizer.get_next_token()
 61 |     to = None
 62 |     tt = None
 63 | 
 64 |     while True:
 65 |         to = s.pop()
 66 |         tt = tks[t['type']]
 67 | 
 68 |         if (to == tt):
 69 |             t = _tokenizer.get_next_token()
 70 |             continue
 71 | 
 72 |         der(to, t, tt)
 73 | 
 74 |         if not _tokenizer.has_more_tokens() and len(s) <= 1:
 75 |             break
 76 | 
 77 |     while len(s) != 1:
 78 |         der(s.pop(), t, tt)
 79 | 
 80 | 
 81 |     if s[0] != EOF or t['type'] != EOF:
 82 |         _parse_error('stack is not empty: ' + str(s) + ', ' + str(t['value']))
 83 | 
 84 |     return True
 85 | 
 86 | def der(to, t, tt):
 87 |     npn = tbl[to][tt]
 88 | 
 89 |     if npn is None:
 90 |       _unexpected_token(t)
 91 | 
 92 |     s.extend(ps[int(npn)][0])
 93 | 
 94 | def _unexpected_token(token):
 95 |     if token['type'] == EOF:
 96 |         _unexpected_end_of_input()
 97 | 
 98 |     _tokenizer.throw_unexpected_token(
 99 |         token['value'],
100 |         token['start_line'],
101 |         token['start_column']
102 |     )
103 | 
104 | def _unexpected_end_of_input():
105 |     _parse_error('Unexpected end of input.')
106 | 
107 | def _parse_error(message):
108 |     raise Exception('SyntaxError: ' + str(message))
109 | 


--------------------------------------------------------------------------------
/examples/module-include.rb.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Module includes. Ruby version.
 3 |  *
 4 |  * The "moduleInclude" directive allows including an arbitrary code at the
 5 |  * beginning of the generated parser file. As an example, can be the code
 6 |  * to require modules for corresponding AST nodes, or direct AST nodes
 7 |  * definitions.
 8 |  *
 9 |  * The code may define callbacks for several parse events, in particular
10 |  * `on_parse_begin`, and `on_parse_end`:
11 |  *
12 |  *   YYParse.on_parse_begin {|string|
13 |  *     puts 'Parsing: ' + string
14 |  *   }
15 |  *
16 |  * ./bin/syntax -g ./examples/module-include.rb.g -m slr1 -o './CalcParser.rb'
17 |  *
18 |  *   require '<path-to>/CalcParser.rb'
19 |  *
20 |  *   puts CalcParser.parse('2 + 2 * 2')
21 |  *
22 |  * Custom hook on parse begin. Parsing: 2 + 2 * 2
23 |  * Custom hook on parse end. Parsed: #<BinaryExpression:0x007fda0b0ad880>
24 |  */
25 | 
26 | {
27 |   "lex": {
28 |     "rules": [
29 |       ["\\s+",  "# skip whitespace"],
30 |       ["\\d+",  "return 'NUMBER'"],
31 |       ["\\*",   "return '*'"],
32 |       ["\\+",   "return '+'"],
33 |       ["\\(",   "return '('"],
34 |       ["\\)",   "return ')'"],
35 |     ]
36 |   },
37 | 
38 |   "moduleInclude": `
39 |     # Can be "require" statments, or direct declarations.
40 | 
41 |     class Node
42 |       attr_accessor :type
43 | 
44 |       def initialize(type)
45 |         @type = type
46 |       end
47 |     end
48 | 
49 |     class BinaryExpression < Node
50 |       attr_accessor :left
51 |       attr_accessor :right
52 |       attr_accessor :op
53 | 
54 |       def initialize(left, right, op)
55 |         super('Binary')
56 |         @@left = left
57 |         @right = right
58 |         @op = op
59 |       end
60 |     end
61 | 
62 |     class PrimaryExpression < Node
63 |       attr_accessor :value
64 | 
65 |       def initialize(value)
66 |         super('Primary')
67 |         @value = value.to_i
68 |       end
69 |     end
70 | 
71 |     # Standard hook on parse beging, and end:
72 | 
73 |     YYParse.on_parse_begin {|string|
74 |       puts 'Custom hook on parse begin. Parsing: ' + string
75 |     }
76 | 
77 |     YYParse.on_parse_end {|value|
78 |       puts 'Custom hook on parse end. Parsed: ' + value.inspect
79 |     }
80 |   `,
81 | 
82 |   "operators": [
83 |     ["left", "+"],
84 |     ["left", "*"],
85 |   ],
86 | 
87 |   "bnf": {
88 |     "E": [
89 |       ["E + E",  "$$ = BinaryExpression.new($1, $3, $2)"],
90 |       ["E * E",  "$$ = BinaryExpression.new($1, $3, $2)"],
91 |       ["NUMBER", "$$ = PrimaryExpression.new($1)"],
92 |       ["( E )",  "$$ = $2"],
93 |     ],
94 |   },
95 | }


--------------------------------------------------------------------------------
/src/plugins/example/ll/ll-parser-generator-example.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | /**
 7 |  * Implementation notes.
 8 |  *
 9 |  * 1. Extend `LLParserGeneratorDefault`
10 |  * 2. Implement `generateParserData()`
11 |  * 3. Implement all specific to the target language
12 |  *    functionality in the trait file.
13 |  */
14 | 
15 | /**
16 |  * Base class to extend.
17 |  */
18 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default;
19 | 
20 | /**
21 |  * A trait file usually implements some very specific to a target language
22 |  * constructs, and transformations.
23 |  */
24 | const ExampleParserGeneratorTrait = require('../example-parser-generator-trait');
25 | 
26 | import fs from 'fs';
27 | import path from 'path';
28 | 
29 | /**
30 |  * Generic template for all LR parsers in the Example language.
31 |  */
32 | const EXAMPLE_LL_PARSER_TEMPLATE = fs.readFileSync(
33 |   `${__dirname}/../templates/ll.template.example`,
34 |   'utf-8',
35 | );
36 | 
37 | /**
38 |  * LL parser generator for Example language.
39 |  */
40 | export default class LLParserGeneratorExample extends LLParserGeneratorDefault {
41 | 
42 |   /**
43 |    * Instance constructor.
44 |    */
45 |   constructor({
46 |     grammar,
47 |     outputFile,
48 |     options = {},
49 |   }) {
50 |     super({grammar, outputFile, options})
51 |       .setTemplate(EXAMPLE_LL_PARSER_TEMPLATE);
52 | 
53 |     /**
54 |      * Contains the lexical rule handlers: _lexRule1, _lexRule2, etc.
55 |      * It's populated by the trait file.
56 |      */
57 |     this._lexHandlers = [];
58 | 
59 |     /**
60 |      * Contains production handlers: _handler1, _handler2, etc.
61 |      * It's populated by the trait file.
62 |      */
63 |     this._productionHandlers = [];
64 | 
65 |     /**
66 |      * Actual class name of your parser. Here we infer from the output filename.
67 |      */
68 |     this._parserClassName = path.basename(
69 |       outputFile,
70 |       path.extname(outputFile),
71 |     );
72 | 
73 |     /**
74 |      * The trait provides methods for lex and production handlers, as well
75 |      * as some very specific code generation for the target language.
76 |      */
77 |     Object.assign(this, ExampleParserGeneratorTrait);
78 |   }
79 | 
80 |   /**
81 |    * Generates parser code.
82 |    */
83 |   generateParserData() {
84 |     super.generateParserData();
85 |     this.generateLexHandlers();
86 |     this.generateProductionHandlers();
87 |     this.generateParserClassName(this._parserClassName);
88 |   }
89 | };
90 | 


--------------------------------------------------------------------------------
/src/plugins/example/lr/lr-parser-generator-example.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | /**
 7 |  * Implementation notes.
 8 |  *
 9 |  * 1. Extend `LRParserGeneratorDefault`
10 |  * 2. Implement `generateParserData()`
11 |  * 3. Implement all specific to the target language
12 |  *    functionality in the trait file.
13 |  */
14 | 
15 | /**
16 |  * Base class to extend.
17 |  */
18 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default;
19 | 
20 | /**
21 |  * A trait file usually implements some very specific to a target language
22 |  * constructs, and transformations.
23 |  */
24 | const ExampleParserGeneratorTrait = require('../example-parser-generator-trait');
25 | 
26 | import fs from 'fs';
27 | import path from 'path';
28 | 
29 | /**
30 |  * Generic template for all LR parsers in the Example language.
31 |  */
32 | const EXAMPLE_LR_PARSER_TEMPLATE = fs.readFileSync(
33 |   `${__dirname}/../templates/lr.template.example`,
34 |   'utf-8',
35 | );
36 | 
37 | /**
38 |  * LR parser generator for Example language.
39 |  */
40 | export default class LRParserGeneratorExample extends LRParserGeneratorDefault {
41 | 
42 |   /**
43 |    * Instance constructor.
44 |    */
45 |   constructor({
46 |     grammar,
47 |     outputFile,
48 |     options = {},
49 |   }) {
50 |     super({grammar, outputFile, options})
51 |       .setTemplate(EXAMPLE_LR_PARSER_TEMPLATE);
52 | 
53 |     /**
54 |      * Contains the lexical rule handlers: _lexRule1, _lexRule2, etc.
55 |      * It's populated by the trait file.
56 |      */
57 |     this._lexHandlers = [];
58 | 
59 |     /**
60 |      * Contains production handlers: _handler1, _handler2, etc.
61 |      * It's populated by the trait file.
62 |      */
63 |     this._productionHandlers = [];
64 | 
65 |     /**
66 |      * Actual class name of your parser. Here we infer from the output filename.
67 |      */
68 |     this._parserClassName = path.basename(
69 |       outputFile,
70 |       path.extname(outputFile),
71 |     );
72 | 
73 |     /**
74 |      * The trait provides methods for lex and production handlers, as well
75 |      * as some very specific code generation for the target language.
76 |      */
77 |     Object.assign(this, ExampleParserGeneratorTrait);
78 |   }
79 | 
80 |   /**
81 |    * Generates parser code.
82 |    */
83 |   generateParserData() {
84 |     super.generateParserData();
85 |     this.generateLexHandlers();
86 |     this.generateProductionHandlers();
87 |     this.generateParserClassName(this._parserClassName);
88 |   }
89 | };
90 | 


--------------------------------------------------------------------------------
/src/templates/ll.template.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * LL(1) parser generated by the Syntax tool.
  3 |  *
  4 |  * https://www.npmjs.com/package/syntax-cli
  5 |  *
  6 |  *   npm install -g syntax-cli
  7 |  *
  8 |  *   syntax-cli --help
  9 |  *
 10 |  * To regenerate run:
 11 |  *
 12 |  *   syntax-cli \
 13 |  *     --grammar ~/path-to-grammar-file \
 14 |  *     --mode LL1 \
 15 |  *     --output ~/path-to-output-parser-file.js
 16 |  */
 17 | 
 18 | 'use strict';
 19 | 
 20 | let yytext;
 21 | let yyleng;
 22 | let __;
 23 | 
 24 | const EOF = '$';
 25 | 
 26 | const ps = {{{PRODUCTIONS}}};
 27 | const tks = {{{TOKENS}}};
 28 | const tbl = {{{TABLE}}};
 29 | 
 30 | const s = [];
 31 | 
 32 | let tokenizer;
 33 | {{{TOKENIZER}}}
 34 | 
 35 | const yyparse = {
 36 |   parse(string) {
 37 |     yyparse.onParseBegin(string);
 38 | 
 39 |     if (!tokenizer) {
 40 |       throw new Error(`Tokenizer instance wasn't specified.`);
 41 |     }
 42 | 
 43 |     tokenizer.initString(string);
 44 | 
 45 |     s.length = 0;
 46 |     s.push(EOF, {{{START}}});
 47 | 
 48 |     let t = tokenizer.getNextToken();
 49 |     let to = null;
 50 |     let tt = null;
 51 | 
 52 |     do {
 53 |       to = s.pop();
 54 |       tt = tks[t.type];
 55 | 
 56 |       if (to === tt) {
 57 |         t = tokenizer.getNextToken();
 58 |         continue;
 59 |       }
 60 | 
 61 |       der(to, t, tt);
 62 |     } while (tokenizer.hasMoreTokens() || s.length > 1);
 63 | 
 64 |     while (s.length !== 1) {
 65 |       der(s.pop(), t, tt);
 66 |     }
 67 | 
 68 |     if (s[0] !== EOF || t.type !== EOF) {
 69 |       parseError(`stack is not empty: ${s}, ${t.value}`);
 70 |     }
 71 | 
 72 |     return true;
 73 |   },
 74 | 
 75 |   setTokenizer(customTokenizer) {
 76 |     tokenizer = customTokenizer;
 77 |     return yyparse;
 78 |   },
 79 | 
 80 |   getTokenizer() {
 81 |     return tokenizer;
 82 |   },
 83 | 
 84 |   onParseBegin(string) {},
 85 |   onParseEnd(parsed) {},
 86 | };
 87 | 
 88 | {{{MODULE_INCLUDE}}}
 89 | 
 90 | function der(to, t, tt) {
 91 |   let npn = tbl[to][tt];
 92 |   if (!npn) {
 93 |     unexpectedToken(t);
 94 |   }
 95 |   s.push(...ps[npn][0]);
 96 | }
 97 | 
 98 | function unexpectedToken(token) {
 99 |   if (token.type === EOF) {
100 |     unexpectedEndOfInput();
101 |   }
102 | 
103 |   tokenizer.throwUnexpectedToken(
104 |     token.value,
105 |     token.startLine,
106 |     token.startColumn
107 |   );
108 | }
109 | 
110 | function unexpectedEndOfInput() {
111 |   parseError(`Unexpected end of input.`);
112 | }
113 | 
114 | function parseError(message) {
115 |   throw new SyntaxError(message);
116 | }
117 | 
118 | module.exports = yyparse;


--------------------------------------------------------------------------------
/examples/json.grammar.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * How to run:
 3 |  *
 4 |  *   Basic example:
 5 |  *
 6 |  *   ./bin/syntax \
 7 |  *      --grammar examples/json.grammar.js \
 8 |  *      --mode slr1 \
 9 |  *      --parse '{"x": 10}'
10 |  *
11 |  *   Parsing table, canonical collection or LR items, etc:
12 |  *
13 |  *   ./bin/syntax \
14 |  *      --grammar examples/json.grammar.js \
15 |  *      --mode slr1 \
16 |  *      --collection \
17 |  *      --table \
18 |  *      --parse '{"x": 10, "y": {"z": [1, 2, 3]}}'
19 |  */
20 | 
21 | {
22 |     "_info": "Based on: https://github.com/zaach/jison/blob/master/examples/json.js",
23 | 
24 |     "lex": {
25 |         "macros": {
26 |             "digit": "[0-9]",
27 |             "esc": "\\\\",
28 |             "int": "-?(?:[0-9]|[1-9][0-9]+)",
29 |             "exp": "(?:[eE][-+]?[0-9]+)",
30 |             "frac": "(?:\\.[0-9]+)"
31 |         },
32 |         "rules": [
33 |             ["\\s+", "/* skip whitespace */"],
34 |             ["{int}{frac}?{exp}?\\b", "return 'NUMBER';"],
35 |             ["\"(?:{esc}[\"bfnrt/{esc}]|{esc}u[a-fA-F0-9]{4}|[^\"{esc}])*\"", "return 'STRING';"],
36 |             ["\\{", "return '{'"],
37 |             ["\\}", "return '}'"],
38 |             ["\\[", "return '['"],
39 |             ["\\]", "return ']'"],
40 |             [",", "return ','"],
41 |             [":", "return ':'"],
42 |             ["true\\b", "return 'TRUE'"],
43 |             ["false\\b", "return 'FALSE'"],
44 |             ["null\\b", "return 'NULL'"]
45 |         ]
46 |     },
47 | 
48 |     "tokens": "STRING NUMBER { } [ ] , : TRUE FALSE NULL",
49 |     "start": "JSONText",
50 | 
51 |     "bnf": {
52 |         "JSONText": [ "JSONValue" ],
53 | 
54 |         "JSONString": [ "STRING" ],
55 | 
56 |         "JSONNullLiteral": [ "NULL" ],
57 | 
58 |         "JSONNumber": [ "NUMBER" ],
59 | 
60 |         "JSONBooleanLiteral": [ "TRUE", "FALSE" ],
61 | 
62 |         "JSONValue": [ "JSONNullLiteral",
63 |                        "JSONBooleanLiteral",
64 |                        "JSONString",
65 |                        "JSONNumber",
66 |                        "JSONObject",
67 |                        "JSONArray" ],
68 | 
69 |         "JSONObject": [ "{ }",
70 |                         "{ JSONMemberList }" ],
71 | 
72 |         "JSONMember": [ "JSONString : JSONValue" ],
73 | 
74 |         "JSONMemberList": [ "JSONMember",
75 |                               "JSONMemberList , JSONMember" ],
76 | 
77 |         "JSONArray": [ "[ ]",
78 |                        "[ JSONElementList ]" ],
79 | 
80 |         "JSONElementList": [ "JSONValue",
81 |                              "JSONElementList , JSONValue" ]
82 |     }
83 | }


--------------------------------------------------------------------------------
/examples/module-include.g.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Module includes.
 3 |  *
 4 |  * The "moduleInclude" directive allows including an arbitrary code at the
 5 |  * beginning of the generated parser file. As an example, can be the code
 6 |  * to require modules for corresponding AST nodes, or direct AST nodes
 7 |  * definitions.
 8 |  *
 9 |  * The code may define callbacks for several parse events, attaching them
10 |  * to the `yyparse` object. For example:
11 |  *
12 |  *   yyparse.onParseBegin = (string) => {
13 |  *     console.log('Parsing:', string);
14 |  *   };
15 |  *
16 |  * ./bin/syntax -g ./examples/module-include.g.js -m slr1 -o './parser.js'
17 |  *
18 |  * > require('./parser').parse('2 + 2 * 2');
19 |  *
20 |  * BinaryExpression {
21 |  *   type: 'Binary',
22 |  *   left:  PrimaryExpression { type: 'Primary', value: '2' },
23 |  *   right: BinaryExpression {
24 |  *     type: 'Binary',
25 |  *     left:  PrimaryExpression { type: 'Primary', value: '2' },
26 |  *     right: PrimaryExpression { type: 'Primary', value: '2' },
27 |  *     op: '*',
28 |  *   },
29 |  *   op: '+',
30 |  * }
31 |  */
32 | 
33 | {
34 |   "lex": {
35 |     "rules": [
36 |       ["\\s+",  "/* skip whitespace */"],
37 |       ["\\d+",  "return 'NUMBER'"],
38 |       ["\\*",   "return '*'"],
39 |       ["\\+",   "return '+'"],
40 |       ["\\(",   "return '('"],
41 |       ["\\)",   "return ')'"],
42 |     ]
43 |   },
44 | 
45 |   "moduleInclude": `
46 |     // Can be "require" statments, or direct declarations.
47 | 
48 |     class Node {
49 |       constructor(type) {
50 |         this.type = type;
51 |       }
52 |     }
53 | 
54 |     class BinaryExpression extends Node {
55 |       constructor(left, right, op) {
56 |         super('Binary');
57 |         this.left = left;
58 |         this.right = right;
59 |         this.op = op;
60 |       }
61 |     }
62 | 
63 |     class PrimaryExpression extends Node {
64 |       constructor(value) {
65 |         super('Primary');
66 |         this.value = value;
67 |       }
68 |     }
69 | 
70 |     yyparse.onParseBegin = (string) => {
71 |       console.log('Custom hook on parse begin. Parsing:', string, '\\n');
72 |     };
73 | 
74 |     yyparse.onParseEnd = (value) => {
75 |       console.log('Custom hook on parse end. Parsed:\\n\\n', value, '\\n');
76 |     };
77 | 
78 |   `,
79 | 
80 |   "operators": [
81 |     ["left", "+"],
82 |     ["left", "*"],
83 |   ],
84 | 
85 |   "bnf": {
86 |     "E": [
87 |       ["E + E",  "$$ = new BinaryExpression($1, $3, $2)"],
88 |       ["E * E",  "$$ = new BinaryExpression($1, $3, $2)"],
89 |       ["NUMBER", "$$ = new PrimaryExpression($1)"],
90 |       ["( E )",  "$$ = $2"],
91 |     ],
92 |   },
93 | }


--------------------------------------------------------------------------------
/examples/json.ast.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * How to run:
 3 |  *
 4 |  *   ./bin/syntax \
 5 |  *     --grammar examples/json.ast.js \
 6 |  *     --mode slr1 \
 7 |  *     --parse '{"x": 10, "y": {"z": [1, 2, true]}}'
 8 |  */
 9 | 
10 | {
11 |     "lex": {
12 |         "macros": {
13 |             "digit": "[0-9]",
14 |             "esc": "\\\\",
15 |             "int": "-?(?:[0-9]|[1-9][0-9]+)",
16 |             "exp": "(?:[eE][-+]?[0-9]+)",
17 |             "frac": "(?:\\.[0-9]+)"
18 |         },
19 |         "rules": [
20 |             ["\\s+", "/* skip whitespace */"],
21 |             ["{int}{frac}?{exp}?\\b", "return 'NUMBER';"],
22 |             ["\"(?:{esc}[\"bfnrt/{esc}]|{esc}u[a-fA-F0-9]{4}|[^\"{esc}])*\"", "yytext = yytext.substr(1,yyleng-2); return 'STRING';"],
23 |             ["\\{", "return '{'"],
24 |             ["\\}", "return '}'"],
25 |             ["\\[", "return '['"],
26 |             ["\\]", "return ']'"],
27 |             [",", "return ','"],
28 |             [":", "return ':'"],
29 |             ["true\\b", "return 'TRUE'"],
30 |             ["false\\b", "return 'FALSE'"],
31 |             ["null\\b", "return 'NULL'"]
32 |         ]
33 |     },
34 | 
35 |     "tokens": "STRING NUMBER { } [ ] , : TRUE FALSE NULL",
36 |     "start": "JSONText",
37 | 
38 |     "bnf": {
39 |         "JSONString": [[ "STRING", "$$ = yytext;" ]],
40 | 
41 |         "JSONNumber": [[ "NUMBER", "$$ = Number(yytext);" ]],
42 | 
43 |         "JSONNullLiteral": [[ "NULL", "$$ = null;" ]],
44 | 
45 |         "JSONBooleanLiteral": [[ "TRUE", "$$ = true;" ],
46 |                                [ "FALSE", "$$ = false;" ]],
47 | 
48 | 
49 |         "JSONText": [[ "JSONValue", "return $$ = $1;" ]],
50 | 
51 |         "JSONValue": [[ "JSONNullLiteral",    "$$ = $1;" ],
52 |                       [ "JSONBooleanLiteral", "$$ = $1;" ],
53 |                       [ "JSONString",         "$$ = $1;" ],
54 |                       [ "JSONNumber",         "$$ = $1;" ],
55 |                       [ "JSONObject",         "$$ = $1;" ],
56 |                       [ "JSONArray",          "$$ = $1;" ]],
57 | 
58 |         "JSONObject": [[ "{ }", "$$ = {};" ],
59 |                        [ "{ JSONMemberList }", "$$ = $2;" ]],
60 | 
61 |         "JSONMember": [[ "JSONString : JSONValue", "$$ = [$1, $3];" ]],
62 | 
63 |         "JSONMemberList": [[ "JSONMember", "$$ = {}; $$[$1[0]] = $1[1];" ],
64 |                            [ "JSONMemberList , JSONMember", "$$ = $1; $1[$3[0]] = $3[1];" ]],
65 | 
66 |         "JSONArray": [[ "[ ]", "$$ = [];" ],
67 |                       [ "[ JSONElementList ]", "$$ = $2;" ]],
68 | 
69 |         "JSONElementList": [[ "JSONValue", "$$ = [$1];" ],
70 |                             [ "JSONElementList , JSONValue", "$$ = $1; $1.push($3);" ]]
71 |     }
72 | }


--------------------------------------------------------------------------------
/src/grammar/__tests__/grammar-symbol-test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import GrammarSymbol from '../grammar-symbol';
 7 | import {EOF, EPSILON} from '../../special-symbols';
 8 | 
 9 | describe('grammar-symbol', () => {
10 |   it('singleton', () => {
11 |     expect(GrammarSymbol.get('A')).toBe(GrammarSymbol.get('A'));
12 |   });
13 | 
14 |   it('instance', () => {
15 |     expect(new GrammarSymbol('A')).not.toBe(new GrammarSymbol('A'));
16 |   });
17 | 
18 |   it('terminal', () => {
19 |     expect(new GrammarSymbol(`"a"`).isTerminal()).toBe(true);
20 |     expect(new GrammarSymbol(`'a'`).isTerminal()).toBe(true);
21 |     expect(new GrammarSymbol(`A`).isTerminal()).toBe(false);
22 |   });
23 | 
24 |   it('terminal value', () => {
25 |     expect(new GrammarSymbol(`"a"`).getTerminalValue()).toBe('a');
26 |   });
27 | 
28 |   it('quoted terminal', () => {
29 |     expect(new GrammarSymbol(`"a"`).quotedTerminal()).toBe(`'"a"'`);
30 |     expect(new GrammarSymbol(`'a'`).quotedTerminal()).toBe(`"'a'"`);
31 |   });
32 | 
33 |   it('non-terminal', () => {
34 |     expect(new GrammarSymbol(`A`).isNonTerminal()).toBe(true);
35 |     expect(new GrammarSymbol(`"a"`).isNonTerminal()).toBe(false);
36 |     expect(new GrammarSymbol(`'a'`).isNonTerminal()).toBe(false);
37 |   });
38 | 
39 |   it('raw symbol', () => {
40 |     expect(new GrammarSymbol(`A`).getSymbol()).toBe('A');
41 |     expect(new GrammarSymbol(`"a"`).getSymbol()).toBe(`"a"`);
42 |     expect(new GrammarSymbol(`'a'`).getSymbol()).toBe(`'a'`);
43 |   });
44 | 
45 |   it('raw symbol', () => {
46 |     expect(new GrammarSymbol(`A`).getSymbol()).toBe('A');
47 |     expect(new GrammarSymbol(`"a"`).getSymbol()).toBe(`"a"`);
48 |     expect(new GrammarSymbol(`'a'`).getSymbol()).toBe(`'a'`);
49 |   });
50 | 
51 |   it('compare symbol', () => {
52 |     expect(new GrammarSymbol(`A`).isSymbol('A')).toBe(true);
53 |     expect(new GrammarSymbol(`A`).isSymbol('B')).toBe(false);
54 |     expect(new GrammarSymbol(`A`).isSymbol(`'a'`)).toBe(false);
55 |     expect(new GrammarSymbol(`"a"`).isSymbol(`"a"`)).toBe(true);
56 |     expect(new GrammarSymbol(`'a'`).isSymbol(`'a'`)).toBe(true);
57 |     expect(new GrammarSymbol(`'a'`).isSymbol(`'b'`)).toBe(false);
58 |     expect(new GrammarSymbol(`'a'`).isSymbol(`"b"`)).toBe(false);
59 |     expect(new GrammarSymbol(`'a'`).isSymbol('A')).toBe(false);
60 |   });
61 | 
62 |   it('special symbols', () => {
63 |     // EOF.
64 |     expect(new GrammarSymbol(EOF).isEOF()).toBe(true);
65 |     expect(GrammarSymbol.isEOF(EOF)).toBe(true);
66 | 
67 |     // Epsilon.
68 |     expect(new GrammarSymbol(EPSILON).isEpsilon()).toBe(true);
69 |     expect(GrammarSymbol.isEpsilon(EPSILON)).toBe(true);
70 |   });
71 | });
72 | 


--------------------------------------------------------------------------------
/src/lr/lr-parser-generator-default.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | import BaseParserGenerator from '../base-parser-generator';
 7 | import CanonicalCollection from './canonical-collection';
 8 | import LRParsingTable from './lr-parsing-table';
 9 | 
10 | import fs from 'fs';
11 | 
12 | /**
13 |  * Generic JS template for all LR parsers.
14 |  */
15 | const LR_PARSER_TEMPLATE = fs.readFileSync(
16 |   `${__dirname}/../templates/lr.template.js`,
17 |   'utf-8'
18 | );
19 | 
20 | /**
21 |  * LR parser generator. Creates a parser module for a given grammar, and
22 |  * saves it to the `outputFile`.
23 |  */
24 | export default class LRParserGeneratorDefault extends BaseParserGenerator {
25 |   /**
26 |    * Instance constructor.
27 |    */
28 |   constructor({grammar, outputFile, options = {}}) {
29 |     if (!grammar.getMode().isLR()) {
30 |       throw new Error(`LR parser generator: LR grammar is expected.`);
31 |     }
32 | 
33 |     const table = new LRParsingTable({
34 |       canonicalCollection: new CanonicalCollection({grammar}),
35 |       grammar,
36 |       resolveConflicts: options.resolveConflicts,
37 |     });
38 | 
39 |     super({grammar, outputFile, options})
40 |       .setTable(table)
41 |       .setTemplate(LR_PARSER_TEMPLATE);
42 |   }
43 | 
44 |   /**
45 |    * Default format in the [ ] array notation.
46 |    */
47 |   generateProductionsData() {
48 |     return this.generateRawProductionsData().map(data => `[${data}]`);
49 |   }
50 | 
51 |   /**
52 |    * Format of the production is:
53 |    * [Non-terminal index, RHS.length, semanticAction]
54 |    */
55 |   generateRawProductionsData() {
56 |     return this.getGrammar()
57 |       .getProductions()
58 |       .map(production => {
59 |         let LHS = production
60 |           .getLHS()
61 |           .getSymbol()
62 |           .replace(/'/g, "\\'");
63 |         let RHSLength = production.isEpsilon() ? 0 : production.getRHS().length;
64 |         let semanticAction = this.buildSemanticAction(production);
65 | 
66 |         let result = [this.getEncodedNonTerminal(LHS), RHSLength];
67 | 
68 |         if (semanticAction) {
69 |           result.push(semanticAction);
70 |         }
71 | 
72 |         return result;
73 |       });
74 |   }
75 | 
76 |   /**
77 |    * Actual parsing table.
78 |    */
79 |   generateParseTableData() {
80 |     let originalTable = this._table.get();
81 |     let table = [];
82 | 
83 |     for (let state in originalTable) {
84 |       let row = {};
85 |       let originalRow = originalTable[state];
86 | 
87 |       for (let symbol in originalRow) {
88 |         let entry = originalRow[symbol];
89 |         row[this.getEncodedSymbol(symbol)] = entry;
90 |       }
91 | 
92 |       table[state] = row;
93 |     }
94 | 
95 |     return table;
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/examples/s-expression.cpp.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * S-expression parser.
  3 |  *
  4 |  * ./bin/syntax -g parser/s-expression.cpp.bnf -m LALR1 -o ~/SExpressionParser.h
  5 |  *
  6 |  * Examples:
  7 |  *
  8 |  * Atom: 42, foo, bar, "Hello World"
  9 |  *
 10 |  * List: (), (+ 5 x), (print "hello")
 11 |  *
 12 |  * Usage:
 13 |  *
 14 |  *  string exp{R"( (var x (+ "hello" 10)) )"};
 15 |  *
 16 |  *  SExpressionParser parser;
 17 |  *
 18 |  *  auto ast = parser.parse(exp);
 19 |  *
 20 |  *  cout << "(";
 21 |  *  cout << ast->list[0]->symbol << " "; // var
 22 |  *  cout << ast->list[1]->symbol << " ("; // x (
 23 |  *  cout << ast->list[2]->list[0]->string << " "; // +
 24 |  *  cout << ast->list[2]->list[1]->string << " "; // "hello"
 25 |  *  cout << ast->list[2]->list[2]->number << "))\n"; 10 ))
 26 |  *
 27 |  * Result: (var x (+ "hello" 10))
 28 |  */
 29 | 
 30 | // -----------------------------------------------
 31 | // Lexical grammar (tokens):
 32 | 
 33 | %lex
 34 | 
 35 | %%
 36 | 
 37 | \s+               %empty
 38 | 
 39 | \"[^\"]*\"        STRING
 40 | 
 41 | \d+               NUMBER
 42 | 
 43 | [\w\-+*=<>/]+     SYMBOL
 44 | 
 45 | /lex
 46 | 
 47 | // -----------------------------------------------
 48 | // Syntactic grammar (BNF):
 49 | 
 50 | %{
 51 | 
 52 | #include <string>
 53 | #include <initializer_list>
 54 | 
 55 | /**
 56 |  * Expression type.
 57 |  */
 58 | enum class ExpType {
 59 |   Number,
 60 |   String,
 61 |   Symbol,
 62 |   List,
 63 | };
 64 | 
 65 | /**
 66 |  * Expression.
 67 |  */
 68 | class Exp {
 69 |  public:
 70 |   ExpType type;
 71 | 
 72 |   int number;
 73 |   std::string string;
 74 |   std::vector<std::shared_ptr<Exp>> list;
 75 | 
 76 |   // Numbers:
 77 |   Exp(int number) : type(ExpType::Number), number(number) {}
 78 | 
 79 |   // Strings, Symbols:
 80 |   Exp(std::string& strVal) {
 81 |     if (strVal[0] == '"') {
 82 |       type = ExpType::String;
 83 |       string = strVal.substr(1, strVal.size() - 2);
 84 |     } else {
 85 |       type = ExpType::Symbol;
 86 |       string = strVal;
 87 |     }
 88 |   }
 89 | 
 90 |   // Lists:
 91 |   Exp(std::vector<std::shared_ptr<Exp>> list) : type(ExpType::List), list(list) {}
 92 | 
 93 |   // List init:
 94 |   Exp(const std::initializer_list<std::shared_ptr<Exp>> &list) : type(ExpType::List), list(list) {}
 95 | 
 96 |   ~Exp() {}
 97 | };
 98 | 
 99 | using Value = std::shared_ptr<Exp>;
100 | 
101 | %}
102 | 
103 | %%
104 | 
105 | Exp
106 |   : Atom
107 |   | List
108 |   ;
109 | 
110 | Atom
111 |   : NUMBER { $$ = std::make_shared<Exp>(std::stoi($1)) }
112 |   | STRING { $$ = std::make_shared<Exp>($1) }
113 |   | SYMBOL { $$ = std::make_shared<Exp>($1) }
114 |   ;
115 | 
116 | List
117 |   : '(' ListEntries ')' { $$ = $2 }
118 |   ;
119 | 
120 | ListEntries
121 |   : ListEntries Exp { $1->list.push_back($2); $$ = $1 }
122 |   | %empty          { $$ = std::make_shared<Exp>(std::vector<std::shared_ptr<Exp>>{}) }
123 |   ;
124 | 


--------------------------------------------------------------------------------
/examples/test.lang:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Example Lang file.
  3 |  *
  4 |  * BNF grammar is in: examples/lang.bnf
  5 |  * Lex grammar is in: examples/lang.lex
  6 |  */
  7 | 
  8 | // Variable declaration.
  9 | let x = 10, y = PI * 2;
 10 | 
 11 | /**
 12 |  * Function declaration.
 13 |  */
 14 | fn square(x) {
 15 |   return x * x;
 16 | }
 17 | 
 18 | /**
 19 |  * Lambda function.
 20 |  */
 21 | let square = (x) -> {
 22 |   return x * x;
 23 | };
 24 | 
 25 | /**
 26 |  * Function call.
 27 |  */
 28 | square(2);
 29 | 
 30 | /**
 31 |  * do-while loop.
 32 |  */
 33 | do {
 34 |   foo += x;
 35 |   x -= 1;
 36 | } while (x > 0);
 37 | 
 38 | /**
 39 |  * while-loop.
 40 |  */
 41 | while (x < 10) {
 42 |   foo -= x;
 43 |   x += 1;
 44 | }
 45 | 
 46 | /**
 47 |  * for-loop.
 48 |  */
 49 | for (let i = 0; i < 10; i += 1) {
 50 |   foo += i;
 51 | }
 52 | 
 53 | /**
 54 |  * break/continue.
 55 |  */
 56 | for (;;) {
 57 |   x += 1;
 58 |   if (x < 10) {
 59 |     continue;
 60 |   }
 61 |   x = 0;
 62 |   break;
 63 | }
 64 | 
 65 | /**
 66 |  * for-prop loop.
 67 |  */
 68 | for (let prop : data) {
 69 |   print(prop);
 70 | }
 71 | 
 72 | /**
 73 |  * for-prop loop.
 74 |  */
 75 | for (v : data);
 76 | 
 77 | /**
 78 |  * Array literals.
 79 |  */
 80 | let data = [1, 2, 3];
 81 | 
 82 | /**
 83 |  * Object literals.
 84 |  */
 85 | let config = {
 86 |   ip: "127.0.0.1",
 87 |   port: 3306,
 88 |   user: {
 89 |     name: "John Doe",
 90 |     alias: "jdoe",
 91 |   },
 92 |   0: index,
 93 |   "value scores": [1, 2],
 94 | };
 95 | 
 96 | /**
 97 |  * Simple block.
 98 |  */
 99 | {
100 |   let x = 10;
101 |   let y = 0.5;
102 | 
103 |   fn Point(x, y) {
104 |     return {
105 |       "x": x,
106 |       "y": y,
107 |     };
108 |   }
109 | }
110 | 
111 | /**
112 |  * Object literal.
113 |  */
114 | ({
115 |   "x": x,
116 |   "y": y,
117 | });
118 | 
119 | /**
120 |  * Two nested empty blocks.
121 |  */
122 | 
123 | {
124 |   {
125 | 
126 |   }
127 | }
128 | 
129 | /**
130 |  * Keywords as property names (object followed by keyword `if`).
131 |  */
132 | ({
133 |   if: 10,
134 |   while: 20,
135 | });
136 | 
137 | /**
138 |  * Block followed by keyword `if`.
139 |  */
140 | {
141 |   if (x) {
142 |     print(1);
143 |   }
144 | }
145 | 
146 | /**
147 |  * Member expressions.
148 |  */
149 | 
150 | foo.bar = 10;
151 | foo["bar"] = 20;
152 | foo["bar"].baz = 30;
153 | 
154 | root.action(10);
155 | 
156 | /**
157 |  * Class declaration.
158 |  */
159 | 
160 | class Point {
161 |   fn constructor(x, y) {
162 |     this.x = x;
163 |     this.y = y;
164 |   }
165 | 
166 |   fn getX() {
167 |     return this.x;
168 |   }
169 | 
170 |   fn getY() {
171 |     return this.y;
172 |   }
173 | }
174 | 
175 | class Point3D extends Point {
176 |   fn constructor(x, y, z) {
177 |     super(x, y);
178 |     this.z = z;
179 |   }
180 | 
181 |   fn getZ() {
182 |     return this.z;
183 |   }
184 | }
185 | 
186 | new Point.Other();
187 | 
188 | let p = new Point3D(10, 20, 30);
189 | 
190 | 
191 | 


--------------------------------------------------------------------------------
/examples/calc-loc.cs.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Captures location info. C# version.
  3 |  *
  4 |  * In order to capture locations, pass the `--loc` option.
  5 |  *
  6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
  7 |  * A named accessors are available as well: @foo, @bar.
  8 |  *
  9 |  * A location is an object of structure:
 10 |  *
 11 |  * {
 12 |  *   startOffset,
 13 |  *   endOffset,
 14 |  *   startLine,
 15 |  *   endLine,
 16 |  *   startColumn,
 17 |  *   endColum,
 18 |  * }
 19 |  *
 20 |  * The resulting location is in the @$. It is calculated automatically from
 21 |  * first and last symbol on a RHS handle, and it also can be overridden
 22 |  * manually.
 23 |  *
 24 |  * ./bin/syntax -g examples/calc-loc.php.bnf -m slr1 -o ~/Parser.php --loc
 25 |  *
 26 |  */
 27 | 
 28 | %lex
 29 | 
 30 | %%
 31 | 
 32 | \s+       /* skip whitespace */ return "";
 33 | \d+       return "NUMBER";
 34 | 
 35 | /lex
 36 | 
 37 | %{
 38 | 
 39 | namespace SyntaxParser {
 40 | 
 41 | public class BinaryExpression
 42 | {
 43 |     public object Left;
 44 |     public object Right;
 45 |     public string Op;
 46 |     public dynamic Loc;
 47 | 
 48 |     public BinaryExpression(dynamic op, object left, object right, dynamic loc)
 49 |     {
 50 |         Op = op.ToString();
 51 |         Left = left;
 52 |         Right = right;
 53 |         Loc = loc;
 54 |     }
 55 | }
 56 | 
 57 | public class Loc
 58 | {
 59 |     public Loc() {}
 60 | 
 61 |     public int StartOffset;
 62 |     public int EndOffset;
 63 |     public int StartLine;
 64 |     public int EndLine;
 65 |     public int StartColumn;
 66 |     public int EndColumn;
 67 | 
 68 |     // Same as default result location.
 69 |     public static Loc capture(dynamic start, dynamic end)
 70 |     {
 71 |         return new Loc()
 72 |         {
 73 |           StartOffset = start.StartOffset,
 74 |           EndOffset = end.EndOffset,
 75 |           StartLine = start.StartLine,
 76 |           EndLine = end.EndLine,
 77 |           StartColumn = start.StartColumn,
 78 |           EndColumn = end.EndColumn
 79 |         };
 80 |     }
 81 | }
 82 | 
 83 | public class NumericLiteral
 84 | {
 85 |     public int Value;
 86 |     public dynamic Loc;
 87 | 
 88 |     public NumericLiteral(dynamic value, dynamic loc)
 89 |     {
 90 |         Value = Convert.ToInt32(value);
 91 |         Loc = loc;
 92 |     }
 93 | }
 94 | 
 95 | }
 96 | 
 97 | %}
 98 | 
 99 | %left '+'
100 | %left '*'
101 | 
102 | %%
103 | 
104 | exp
105 |   : exp '+' exp
106 |     /* Explicitly calculate location */
107 |     { $$ = new BinaryExpression("+", $1, $3, Loc.capture(@1, @3)) }
108 | 
109 |   | exp '*' exp
110 |     /* Use default result location: @$ */
111 |     { $$ = new BinaryExpression("*", $1, $3, @$) }
112 | 
113 |   | '(' exp ')'
114 |     { $$ = $2 }
115 | 
116 |   | number
117 |     /* Named args and position */
118 |     { $$ = new NumericLiteral($number, @number) }
119 |   ;
120 | 
121 | number
122 |   : NUMBER { $$ = Convert.ToInt32(yytext) }
123 |   ;
124 | 


--------------------------------------------------------------------------------
/src/__tests__/rust-calc/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | [[package]]
 4 | name = "bitflags"
 5 | version = "1.0.4"
 6 | source = "registry+https://github.com/rust-lang/crates.io-index"
 7 | 
 8 | [[package]]
 9 | name = "calc-bin"
10 | version = "0.1.0"
11 | dependencies = [
12 |  "calc-syntax 0.1.0",
13 | ]
14 | 
15 | [[package]]
16 | name = "calc-syntax"
17 | version = "0.1.0"
18 | dependencies = [
19 |  "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
20 |  "onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
21 | ]
22 | 
23 | [[package]]
24 | name = "cc"
25 | version = "1.0.36"
26 | source = "registry+https://github.com/rust-lang/crates.io-index"
27 | 
28 | [[package]]
29 | name = "lazy_static"
30 | version = "1.3.0"
31 | source = "registry+https://github.com/rust-lang/crates.io-index"
32 | 
33 | [[package]]
34 | name = "libc"
35 | version = "0.2.54"
36 | source = "registry+https://github.com/rust-lang/crates.io-index"
37 | 
38 | [[package]]
39 | name = "onig"
40 | version = "4.3.2"
41 | source = "registry+https://github.com/rust-lang/crates.io-index"
42 | dependencies = [
43 |  "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
44 |  "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
45 |  "libc 0.2.54 (registry+https://github.com/rust-lang/crates.io-index)",
46 |  "onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
47 | ]
48 | 
49 | [[package]]
50 | name = "onig_sys"
51 | version = "69.1.0"
52 | source = "registry+https://github.com/rust-lang/crates.io-index"
53 | dependencies = [
54 |  "cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
55 |  "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
56 | ]
57 | 
58 | [[package]]
59 | name = "pkg-config"
60 | version = "0.3.14"
61 | source = "registry+https://github.com/rust-lang/crates.io-index"
62 | 
63 | [metadata]
64 | "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
65 | "checksum cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a0c56216487bb80eec9c4516337b2588a4f2a2290d72a1416d930e4dcdb0c90d"
66 | "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
67 | "checksum libc 0.2.54 (registry+https://github.com/rust-lang/crates.io-index)" = "c6785aa7dd976f5fbf3b71cfd9cd49d7f783c1ff565a858d71031c6c313aa5c6"
68 | "checksum onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a646989adad8a19f49be2090374712931c3a59835cb5277b4530f48b417f26e7"
69 | "checksum onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388410bf5fa341f10e58e6db3975f4bea1ac30247dd79d37a9e5ced3cb4cc3b0"
70 | "checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
71 | 


--------------------------------------------------------------------------------
/src/__tests__/rust-plugin-test.js:
--------------------------------------------------------------------------------
 1 | import * as shelljs from 'shelljs';
 2 | import path from 'path';
 3 | 
 4 | // rust-calc test package uses Rust 2018, minimum version for that rust edition is 1.31
 5 | // https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html
 6 | const rustVersionRequired = [1, 31];
 7 | 
 8 | const whichCargo = shelljs.which('cargo');
 9 | const whichMake = shelljs.which('make');
10 | const rustInstalled = whichCargo && whichCargo.code === 0;
11 | const makeInstalled = whichMake && whichMake.code === 0;
12 | const rustCalcDir = path.join(__dirname, 'rust-calc');
13 | 
14 | function getRustVersion() {
15 |   const child = shelljs.exec('cargo --version');
16 |   if (child.code === 0) {
17 |     const semverRe = /\d+\.\d+\.\d+/;
18 |     const match = semverRe.exec(child.stdout);
19 |     if (match) {
20 |       return match[0].split('.').map(x => Number(x));
21 |     }
22 |   }
23 |   return [];
24 | }
25 | 
26 | function semverGte(aNumbers, bNumbers) {
27 |   if (!aNumbers || !bNumbers) {
28 |     return false;
29 |   }
30 |   const maxLen = Math.max(aNumbers.length, bNumbers.length);
31 |   let eq = false;
32 |   for (let i = 0; i < maxLen; i++) {
33 |     const aNum = aNumbers[i];
34 |     const bNum = bNumbers[i];
35 | 
36 |     if (aNum > bNum) {
37 |       return true;
38 |     } else if (aNum < bNum) {
39 |       return false;
40 |     } else {
41 |       eq = true;
42 |     }
43 |   }
44 |   return eq;
45 | }
46 | 
47 | const rustVersion = getRustVersion();
48 | const minimumVersionSatisfied = semverGte(rustVersion, rustVersionRequired);
49 | 
50 | if (makeInstalled && rustInstalled && minimumVersionSatisfied) {
51 |   xdescribe('rust plugin', () => {
52 |     beforeAll(() => {
53 |       shelljs.exec('make', {
54 |         cwd: path.join(rustCalcDir, 'calc-syntax'),
55 |       });
56 |     }, 10000);
57 | 
58 |     it('calc rust example should build, also output must match expected value', () => {
59 |       let runResult = shelljs.exec('cargo run --quiet', {
60 |         silent: true,
61 |         cwd: rustCalcDir,
62 |       });
63 | 
64 |       if (runResult.code !== 0) {
65 |         // something went wrong, rerun command with full debug output
66 |         runResult = shelljs.exec('cargo run', {
67 |           silent: false,
68 |           cwd: rustCalcDir,
69 |         });
70 |         console.error(runResult.stderr);
71 |       } else {
72 |         expect(runResult.stderr).toEqual('');
73 |       }
74 | 
75 |       expect(runResult.code).toEqual(0);
76 |       const stdout = runResult.stdout.toString('utf8');
77 | 
78 |       const match = /parse result: (\d+)/.exec(stdout);
79 |       expect(match).not.toBeNull();
80 |       expect(match[1]).toEqual('6');
81 |     });
82 |   });
83 | } else {
84 |   xdescribe('rust plugin mock', () => {
85 |     it('noop', () => {
86 |       console.warn(
87 |         `make and rust toolchain version ${rustVersionRequired.join(
88 |           '.'
89 |         )} or greater are not installed.`,
90 |         `Tests for rust plugin will be skipped.`
91 |       );
92 |     });
93 |   });
94 | }
95 | 


--------------------------------------------------------------------------------
/examples/cnf.g:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Conjunctive normal form parser.
 3 |  *
 4 |  * https://en.wikipedia.org/wiki/Conjunctive_normal_form
 5 |  *
 6 |  * ./bin/syntax -g examples/cnf.g -m slr1 -p '(A v B v ¬ C) ^ (D v E)'
 7 |  *
 8 | 
 9 |  *   Parsing mode: SLR(1).
10 |  *
11 |  *   Parsing: (A v B v ¬ C) ^ (D v E)
12 |  *
13 |  *   ✓ Accepted
14 |  *
15 |  *   Parsed value:
16 |  *
17 |  *   {
18 |  *       "type": "Conjunction",
19 |  *       "disjunctions": [
20 |  *           {
21 |  *               "type": "Disjunction",
22 |  *               "variables": [
23 |  *                   {
24 |  *                       "type": "Variable",
25 |  *                       "value": "A"
26 |  *                   },
27 |  *                   {
28 |  *                       "type": "Variable",
29 |  *                       "value": "B"
30 |  *                   },
31 |  *                   {
32 |  *                       "type": "Negation",
33 |  *                       "variable": {
34 |  *                           "type": "Variable",
35 |  *                           "value": "C"
36 |  *                       }
37 |  *                   }
38 |  *               ]
39 |  *           },
40 |  *           {
41 |  *               "type": "Disjunction",
42 |  *               "variables": [
43 |  *                   {
44 |  *                       "type": "Variable",
45 |  *                       "value": "D"
46 |  *                   },
47 |  *                   {
48 |  *                       "type": "Variable",
49 |  *                       "value": "E"
50 |  *                   }
51 |  *               ]
52 |  *           }
53 |  *       ]
54 |  *   }
55 |  *
56 |  * To generate a parser:
57 |  *
58 |  * ./bin/syntax -g examples/cnf.g -m slr1 -o cnf-parser.js
59 |  *
60 |  * In Node:
61 |  *
62 |  * require('cnf-parser.js').parse('(A v B v ¬ C) ^ (D v E)');
63 |  *
64 |  */
65 | 
66 | {
67 |   "lex": {
68 |     "rules": [
69 |       ["\\s+",                                 "/* skip whitespace */"],
70 |       ["v",                                    "return 'OR';"],
71 |       ["\\^",                                  "return 'AND';"],
72 |       ["¬",                                    "return 'NOT';"],
73 |       ["[a-zA-Z]+",                            "return 'ID';"],
74 |       ["\\(",                                  "return 'LPAREN';"],
75 |       ["\\)",                                  "return 'RPAREN';"],
76 |     ]
77 |   },
78 | 
79 |   "bnf": {
80 |     "Conjunction":  [["Conjunction AND Disjunction",    "$$ = {type: 'Conjunction', disjunctions: [].concat($1, $3)};"],
81 |                      ["Disjunction",                    "$$ = $1;"]],
82 | 
83 |      "Disjunction": [["LPAREN Clauses RPAREN",          "$$ = {type: 'Disjunction', variables: $2};"]],
84 | 
85 |      "Clauses":     [["Clauses OR Var",                 "$$ = [].concat($1, $3);"],
86 |                      ["Var",                            "$$ = [$1];"]],
87 | 
88 |      "Var":         [["ID",                             "$$ = {type: 'Variable', value: $1};"],
89 |                      ["NOT Var",                        "$$ = {type: 'Negation', variable: $2};"]]
90 |   }
91 | }


--------------------------------------------------------------------------------
/src/ll/ll-parser-generator-default.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import BaseParserGenerator from '../base-parser-generator';
  7 | import LLParsingTable from './ll-parsing-table';
  8 | 
  9 | import fs from 'fs';
 10 | 
 11 | /**
 12 |  * Template for LL(1) parser.
 13 |  */
 14 | const LL_PARSER_TEMPLATE = fs.readFileSync(
 15 |   `${__dirname}/../templates/ll.template.js`,
 16 |   'utf-8'
 17 | );
 18 | 
 19 | /**
 20 |  * LL parser generator. Creates a parser module for a given grammar, and
 21 |  * saves it to the `outputFile`.
 22 |  */
 23 | export default class LLParserGeneratorDefault extends BaseParserGenerator {
 24 |   /**
 25 |    * Instance constructor.
 26 |    */
 27 |   constructor({grammar, outputFile, options = {}}) {
 28 |     if (!grammar.getMode().isLL()) {
 29 |       throw new Error(`LL parser generator: LL(1) grammar is expected.`);
 30 |     }
 31 |     super({grammar, outputFile, options})
 32 |       .setTable(new LLParsingTable({grammar}))
 33 |       .setTemplate(LL_PARSER_TEMPLATE);
 34 |   }
 35 | 
 36 |   /**
 37 |    * Generates parser data.
 38 |    */
 39 |   generateParserData() {
 40 |     super.generateParserData();
 41 |     this._generateStartSymbol();
 42 |   }
 43 | 
 44 |   /**
 45 |    * Format of the production is:
 46 |    * [RHS.reverse().map(index)]
 47 |    * The RHS is reversed to push onto the stack at derivation.
 48 |    * LL parser doesn't implement yet semantic action.
 49 |    */
 50 |   generateRawProductionsData() {
 51 |     let productionsData = this.getGrammar()
 52 |       .getProductions()
 53 |       .map(production => {
 54 |         // RHS for derivation.
 55 |         let reversedRHS = [];
 56 |         if (!production.isEpsilon()) {
 57 |           reversedRHS = production
 58 |             .getRHS()
 59 |             .map(symbol => {
 60 |               return this.getEncodedSymbol(symbol.getSymbol()).toString();
 61 |             })
 62 |             .reverse();
 63 |         }
 64 |         return [reversedRHS];
 65 |       });
 66 | 
 67 |     // For 1-based index production.
 68 |     productionsData.unshift([-1]);
 69 |     return productionsData;
 70 |   }
 71 | 
 72 |   generateProductionsData() {
 73 |     return this.generateRawProductionsData().map(data => JSON.stringify(data));
 74 |   }
 75 | 
 76 |   /**
 77 |    * Actual parsing table.
 78 |    */
 79 |   generateParseTableData() {
 80 |     let originalTable = this._table.get();
 81 |     let table = {};
 82 | 
 83 |     for (let nonTerminal in originalTable) {
 84 |       let row = {};
 85 |       let originalRow = originalTable[nonTerminal];
 86 | 
 87 |       for (let symbol in originalRow) {
 88 |         let entry = originalRow[symbol];
 89 |         row[this.getEncodedSymbol(symbol)] = entry;
 90 |       }
 91 | 
 92 |       table[this.getEncodedNonTerminal(nonTerminal)] = row;
 93 |     }
 94 | 
 95 |     return table;
 96 |   }
 97 | 
 98 |   _generateStartSymbol() {
 99 |     let startSymbol = this.getEncodedNonTerminal(
100 |       this.getGrammar().getStartSymbol()
101 |     );
102 |     this.writeData('START', `'${startSymbol}'`);
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/examples/module-include.py.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Module includes. Python version.
  3 |  *
  4 |  * The "moduleInclude" directive allows including an arbitrary code at the
  5 |  * beginning of the generated parser file. As an example, can be the code
  6 |  * to require modules for corresponding AST nodes, or direct AST nodes
  7 |  * definitions.
  8 |  *
  9 |  * The code may define callbacks for several parse events, in particular
 10 |  * `on_parse_begin`, and `on_parse_end`:
 11 |  *
 12 |  *   def on_parse_begin(string):
 13 |  *       print('Parsing:', string)
 14 |  *
 15 |  * ./bin/syntax -g ./examples/module-include.py.g -m slr1 -o './parser.py'
 16 |  *
 17 |  * >>> import parser
 18 |  * >>> parser.parse('2 + 2 * 2')
 19 |  *
 20 |  * ('Custom hook on parse begin. Parsing:', '2 + 2 * 2')
 21 |  * ('Custom hook on parse end. Parsed:', <test.BinaryExpression object at 0x10d1ace10>)
 22 |  * <test.BinaryExpression object at 0x10d1ace10>
 23 |  */
 24 | 
 25 | {
 26 |   "lex": {
 27 |     "rules": [
 28 |       ["\\s+",  "# skip whitespace"],
 29 |       ["\\d+",  "return 'NUMBER'"],
 30 |       ["\\*",   "return '*'"],
 31 |       ["\\+",   "return '+'"],
 32 |       ["\\(",   "return '('"],
 33 |       ["\\)",   "return ')'"],
 34 |     ]
 35 |   },
 36 | 
 37 |   "moduleInclude": `
 38 |     # Can be "require" statments, or direct declarations.
 39 | 
 40 |     class Node(object):
 41 |         def __init__(self, type):
 42 |             self.type = type
 43 | 
 44 |     class BinaryExpression(Node):
 45 |         def __init__(self, left, right, op):
 46 |             super(BinaryExpression, self).__init__('Binary')
 47 |             self.left = left
 48 |             self.right = right
 49 |             self.op = op
 50 | 
 51 |     class PrimaryExpression(Node):
 52 |         def __init__(self, value):
 53 |             super(PrimaryExpression, self).__init__('Primary')
 54 |             self.value = int(value)
 55 | 
 56 |     # Standard hook on parse beging, and end:
 57 | 
 58 |     _string = None
 59 | 
 60 |     def on_parse_begin(string):
 61 |         global _string
 62 |         _string = string
 63 |         print('Custom hook on parse begin. Parsing:', string)
 64 | 
 65 |     def on_parse_end(value):
 66 |         print('Custom hook on parse end. Parsed:', value)
 67 | 
 68 |         if _string != '2 + 2 * 2':
 69 |             return
 70 | 
 71 |         assert isinstance(value, BinaryExpression)
 72 |         assert value.op == '+'
 73 | 
 74 |         assert isinstance(value.left, PrimaryExpression)
 75 |         assert value.left.value == 2
 76 |         assert isinstance(value.right, BinaryExpression)
 77 | 
 78 |         assert value.right.op == '*'
 79 |         assert isinstance(value.right.left, PrimaryExpression)
 80 |         assert isinstance(value.right.right, PrimaryExpression)
 81 |         assert value.right.left.value == 2
 82 |         assert value.right.right.value == 2
 83 | 
 84 |         print('All assertions are passed!')
 85 |   `,
 86 | 
 87 |   "operators": [
 88 |     ["left", "+"],
 89 |     ["left", "*"],
 90 |   ],
 91 | 
 92 |   "bnf": {
 93 |     "E": [
 94 |       ["E + E",  "$$ = BinaryExpression($1, $3, $2)"],
 95 |       ["E * E",  "$$ = BinaryExpression($1, $3, $2)"],
 96 |       ["NUMBER", "$$ = PrimaryExpression($1)"],
 97 |       ["( E )",  "$$ = $2"],
 98 |     ],
 99 |   },
100 | }


--------------------------------------------------------------------------------
/scripts/build.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The MIT License (MIT)
 3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 4 |  */
 5 | 
 6 | require('shelljs/global');
 7 | 
 8 | const colors = require('colors');
 9 | 
10 | console.info(colors.bold('Building...\n'));
11 | 
12 | // Need to checkout to working copies of the generated parser if
13 | // they got corrupted with current changes. The parsers are regenerated
14 | // further in the build steps.
15 | exec(`git checkout "src/generated/lex-parser.gen.js"`);
16 | exec(`git checkout "src/generated/bnf-parser.gen.js"`);
17 | 
18 | // ----------------------------------------------------------
19 | // 1. Git hooks.
20 | 
21 | console.info(colors.bold('[1/6] Installing Git hooks...\n'));
22 | 
23 | // Setup pre-commit hook.
24 | console.info('  - pre-commit: .git/hooks/pre-commit');
25 | exec('unlink .git/hooks/pre-commit');
26 | chmod('+x', './scripts/git-pre-commit');
27 | ln('-s', '../../scripts/git-pre-commit', '.git/hooks/pre-commit');
28 | 
29 | // Setup pre-push hook.
30 | console.info('  - pre-push:   .git/hooks/pre-push\n');
31 | exec('unlink .git/hooks/pre-push');
32 | chmod('+x', './scripts/git-pre-push');
33 | ln('-s', '../../scripts/git-pre-push', '.git/hooks/pre-push');
34 | 
35 | // ----------------------------------------------------------
36 | // 2. Templates
37 | 
38 | console.info(colors.bold('[2/6] Installing templates...\n'));
39 | rm('-rf', 'dist');
40 | mkdir('dist');
41 | mkdir('dist/templates');
42 | 
43 | const templates = ls('src/templates').map(template => '  - ' + template);
44 | console.info(templates.join('\n'));
45 | 
46 | cp('-r', 'src/templates/*', 'dist/templates/');
47 | 
48 | // ----------------------------------------------------------
49 | // 3. Plugins
50 | 
51 | console.info(colors.bold('\n[3/6] Installing plugins...\n'));
52 | const plugins = ls('src/plugins/').filter(file => file !== 'README.md');
53 | 
54 | plugins.forEach(plugin => {
55 |   console.info('  - ' + plugin);
56 |   mkdir('-p', `dist/plugins/${plugin}/templates`);
57 |   cp(
58 |     '-r',
59 |     `src/plugins/${plugin}/templates/*`,
60 |     `dist/plugins/${plugin}/templates/`
61 |   );
62 | });
63 | 
64 | // ----------------------------------------------------------
65 | // 4. Transpiling JS code
66 | 
67 | console.info(colors.bold('\n[4/6] Transpiling JS code...\n'));
68 | exec(
69 |   `"node_modules/.bin/babel" ${process.argv[2] ||
70 |     ''} src/ --out-dir dist/ --ignore "**/templates/*" --ignore "**/__tests__/*"`
71 | );
72 | 
73 | // ----------------------------------------------------------
74 | // 5. Rebuilding LEX parser
75 | 
76 | console.info(colors.bold('\n[5/6] Rebuilding LEX parser...'));
77 | exec(
78 |   `node "./bin/syntax" -g src/generated/lex.bnf -l src/generated/lex.lex -m lalr1 -o src/generated/lex-parser.gen.js`
79 | );
80 | exec(
81 |   `"node_modules/.bin/babel" src/generated/lex-parser.gen.js -o dist/generated/lex-parser.gen.js`
82 | );
83 | 
84 | // ----------------------------------------------------------
85 | // 6. Rebuilding BNF parser
86 | 
87 | console.info(colors.bold('\n[6/6] Rebuilding BNF parser...'));
88 | exec(
89 |   `node "./bin/syntax" -g src/generated/bnf.g -m lalr1 -o src/generated/bnf-parser.gen.js`
90 | );
91 | exec(
92 |   `"node_modules/.bin/babel" src/generated/bnf-parser.gen.js -o dist/generated/bnf-parser.gen.js`
93 | );
94 | 
95 | console.info(colors.bold('All done.\n'));
96 | 


--------------------------------------------------------------------------------
/examples/calc-ast-java.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Generated parser in Java.
  3 |  *
  4 |  * ./bin/syntax -g examples/calc.java.g -m lalr1 -o CalcParser.rs
  5 |  *
  6 |  * import com.syntax.*;
  7 |  *
  8 |  * CalcParser parser = new CalcParser();
  9 |  *
 10 |  * System.out.println(parser.parse("2 + 2 * 2");
 11 |  * System.out.println(parser.parse("(2 + 2) * 2");
 12 |  */
 13 | 
 14 | // -----------------------------------------------------------------------------
 15 | // Lexical grammar
 16 | 
 17 | /**
 18 |  * RegExp-based lexical grammar. Simple symbols like '*', '(', etc, can be
 19 |  * defined inline.
 20 |  */
 21 | 
 22 | %lex
 23 | 
 24 | %%
 25 | 
 26 | \s+               /* skip whitespace */ return null
 27 | \d+               return "NUMBER"
 28 | 
 29 | /lex
 30 | 
 31 | // -----------------------------------------------------------------------------
 32 | // Operator precedence
 33 | 
 34 | /**
 35 |  * Both, '+' and '*' are left-associative. I.e. 5 + 3 + 2 is parsed as
 36 |  * (5 + 3) + 2, and not as 5 + (3 + 2).
 37 |  *
 38 |  * The '*' goes after '+' in the list below, so it has higher precedence,
 39 |  * and 2 + 2 * 2 is parsed as correctly as 2 + (2 * 2).
 40 |  */
 41 | 
 42 | %left '+'
 43 | %left '*'
 44 | 
 45 | 
 46 | // -----------------------------------------------------------------------------
 47 | // Module include
 48 | 
 49 | /**
 50 |  * The code in the module include section is included "as is".
 51 |  * If can contain the `ParserEvents` class, which defines parse even handlers.
 52 |  */
 53 | 
 54 | %{
 55 | 
 56 | /**
 57 |  * The ParserEvents class allows subscribing to
 58 |  * different parsing events.
 59 |  */
 60 | class ParserEvents {
 61 |   public static void init() {
 62 |     System.out.println("Parser is created.");
 63 |   }
 64 | 
 65 |   public static void onParseBegin(String str) {
 66 |     System.out.println("Parsing is started: " + str);
 67 |   }
 68 | 
 69 |   public static void onParseEnd(Object result) {
 70 |     System.out.println("Parsing is completed: " + result);
 71 |   }
 72 | }
 73 | 
 74 | // Define the class nodes inline here, however on practice they can be
 75 | // located anywhere, and just imported here.
 76 | 
 77 | class Node {
 78 |   public String type;
 79 | }
 80 | 
 81 | class BinaryNode extends Node {
 82 |   public Node left;
 83 |   public Node right;
 84 |   public String operator;
 85 | 
 86 |   public BinaryNode(Object left, Object op, Object right) {
 87 |     this.type = "BinaryNode";
 88 |     this.left = (Node)left;
 89 |     this.right = (Node)right;
 90 |     this.operator = (String)op;
 91 |   }
 92 | }
 93 | 
 94 | class LiteralNode extends Node {
 95 |   public Integer value;
 96 | 
 97 |   public LiteralNode(Integer value) {
 98 |     this.type = "LiteralNode";
 99 |     this.value = value;
100 |   }
101 | }
102 | 
103 | 
104 | %}
105 | 
106 | // -----------------------------------------------------------------------------
107 | // Syntactic grammar (BNF)
108 | 
109 | %%
110 | 
111 | Expression
112 | 
113 |   : Expression '+' Expression
114 |     {
115 |       $$ = new BinaryNode($1, $2, $3)
116 |     }
117 | 
118 |   | Expression '*' Expression
119 |     {
120 |       $$ = new BinaryNode($1, $2, $3)
121 |     }
122 | 
123 |   | NUMBER
124 |     {
125 |       $$ = new LiteralNode(Integer.valueOf(yytext))
126 |     }
127 | 
128 |   | '(' Expression ')'
129 |     {
130 |       $$ = $2
131 |     }
132 |   ;
133 | 


--------------------------------------------------------------------------------
/examples/lexer-start-conditions.py.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Start conditions of lex rules. Tokenizer states. Python version.
  3 |  *
  4 |  * Tokenizer rules may provide start conditions. Such rules are executed
  5 |  * only when lexer enters the state corresponding to the names of the
  6 |  * start conditions.
  7 |  *
  8 |  * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1).
  9 |  * Inclusive conditions also include rules without any start conditions.
 10 |  * Exclusive conditions do not include other rules when the parser enter
 11 |  * this state. The rules with `*` condition are always included.
 12 |  *
 13 |  * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a
 14 |  *
 15 |  * When a grammar is defined in the JSON format, the start conditions are
 16 |  * specified as:
 17 |  *
 18 |  *   "startConditions": {
 19 |  *     "name": 1,  // exclusive
 20 |  *     "other": 0, // inclusive
 21 |  *   }
 22 |  *
 23 |  * And a rule itself may specify a list of start conditions as the
 24 |  * first element:
 25 |  *
 26 |  *   // This lex-rule is applied only when parser enters `name` state.
 27 |  *
 28 |  *   [["name"], "\w+", "return 'NAME'"]
 29 |  *
 30 |  * At the beginning a lexer is in the `INITIAL` state. A new state is
 31 |  * entered either using `this.pushState(name)` or `this.begin(name)`. To
 32 |  * exit a state, use `this.popState()`.
 33 |  *
 34 |  * In the grammar below we has `comment` tokenizer state, which allows us
 35 |  * to skip all the comment characters, but still to count number of lines.
 36 |  *
 37 |  *   ./bin/syntax -g examples/lexer-start-conditions.py.g -m slr1 -f ~/test.txt
 38 |  */
 39 | 
 40 | // Example of ~/test.txt
 41 | //
 42 | //  1.
 43 | //  2.  /* Hello world
 44 | //  3.      privet
 45 | //  4.
 46 | //  5.     OK **/
 47 | //  6.
 48 | //  7.  Main
 49 | //  8.
 50 | //
 51 | // Number of lines: 8
 52 | 
 53 | {
 54 |   "moduleInclude": `
 55 |     lines = 1
 56 | 
 57 |     def on_parse_end(_result):
 58 |       print('Number of lines: ' + str(lines))
 59 | 
 60 |   `,
 61 | 
 62 |   "lex": {
 63 |     "startConditions": {
 64 |       "comment": 1, // exclusive
 65 |     },
 66 | 
 67 |     "rules": [
 68 | 
 69 |       // On `/*` we enter the comment state:
 70 | 
 71 |       ["\\/\\*", "self.push_state('comment')      # skip comments"],
 72 | 
 73 |       // On `*/` being in `comment` state we return to the initial state:
 74 | 
 75 |       [["comment"], "\\*+\\/", "self.pop_state()  # skip comments"],
 76 | 
 77 |       // Being inside the `comment` state, skip all chars, except new lines
 78 |       // which we count.
 79 | 
 80 |       [["comment"], "[^*\\n]+",                  "# skip comments"],
 81 |       [["comment"], "\\*+[^*/\\n]*",             "# skip comments"],
 82 | 
 83 |       // Count lines in comments.
 84 |       [["comment"], "\\n", `
 85 |         global lines
 86 |         lines += 1                                # skip new lines in comments`
 87 |       ],
 88 | 
 89 |       // In INITIAL state, count line numbers as well:
 90 |       ["\\n", `
 91 |         global lines
 92 |         lines += 1                                # skip new lines
 93 |       `],
 94 | 
 95 |       [["*"], " +",                              "# skip spaces in any state "],
 96 | 
 97 |       // Main program consisting only of one word "Main"
 98 |       ["Main", "return 'MAIN'"],
 99 |     ],
100 |   },
101 | 
102 |   "bnf": {
103 |     "Program": ["MAIN"],
104 |   }
105 | }


--------------------------------------------------------------------------------
/src/plugins/ruby/templates/ll.template.rb:
--------------------------------------------------------------------------------
  1 | ##
  2 | # LL(1) parser generated by the Syntax tool.
  3 | #
  4 | # https://www.npmjs.com/package/syntax-cli
  5 | #
  6 | #   npm install -g syntax-cli
  7 | #
  8 | #   syntax-cli --help
  9 | #
 10 | # To regenerate run:
 11 | #
 12 | #   syntax-cli \
 13 | #     --grammar ~/path-to-grammar-file \
 14 | #     --mode LL1 \
 15 | #     --output ~/ParserClassName.rb
 16 | ##
 17 | 
 18 | class YYParse
 19 |   @@ps = {{{PRODUCTIONS}}}
 20 |   @@tks = {{{TOKENS}}}
 21 |   @@tbl = {{{TABLE}}}
 22 | 
 23 |   @@s = []
 24 |   @@__ = nil
 25 | 
 26 |   @@callbacks = {
 27 |     :on_parse_begin => nil,
 28 |     :on_parse_end => nil
 29 |   }
 30 | 
 31 |   EOF = '$'
 32 | 
 33 |   @@yytext = ''
 34 |   @@yyleng = 0
 35 | 
 36 |   def self.__=(__)
 37 |     @@__ = __
 38 |   end
 39 | 
 40 |   def self.yytext=(yytext)
 41 |     @@yytext = yytext
 42 |   end
 43 | 
 44 |   def self.yytext
 45 |     @@yytext
 46 |   end
 47 | 
 48 |   def self.yyleng=(yyleng)
 49 |     @@yyleng = yyleng
 50 |   end
 51 | 
 52 |   def self.yyleng
 53 |     @@yyleng
 54 |   end
 55 | 
 56 |   @@tokenizer = nil
 57 | 
 58 |   {{{PRODUCTION_HANDLERS}}}
 59 | 
 60 |   def self.tokenizer=(tokenizer)
 61 |     @@tokenizer = tokenizer
 62 |   end
 63 | 
 64 |   def self.tokenizer
 65 |     @@tokenizer
 66 |   end
 67 | 
 68 |   def self.on_parse_begin(&callback)
 69 |     @@callbacks[:on_parse_begin] = callback
 70 |   end
 71 | 
 72 |   def self.on_parse_end(&callback)
 73 |     @@callbacks[:on_parse_end] = callback
 74 |   end
 75 | 
 76 |   def self.parse(string)
 77 |     if (@@callbacks[:on_parse_begin])
 78 |       @@callbacks[:on_parse_begin].call(string)
 79 |     end
 80 | 
 81 |     tokenizer = self.tokenizer
 82 | 
 83 |     if not tokenizer
 84 |       raise "Tokenizer instance wasn't specified."
 85 |     end
 86 | 
 87 |     tokenizer.init_string(string)
 88 | 
 89 |     @@s = [self::EOF, {{{START}}}]
 90 | 
 91 |     t = tokenizer.get_next_token
 92 |     st = nil
 93 | 
 94 |     to = nil
 95 |     tt = nil
 96 | 
 97 |     loop do
 98 |       to = @@s.pop
 99 |       tt = @@tks[t[:type]]
100 | 
101 |       if to == tt
102 |         t = tokenizer.get_next_token
103 |         next
104 |       end
105 | 
106 |       self.der(to, t, tt)
107 | 
108 |       if not tokenizer.has_more_tokens and @@s.length <= 1
109 |         break
110 |       end
111 |     end
112 | 
113 |     while @@s.length != 1
114 |       self.der(@@s.length, t, tt)
115 |     end
116 | 
117 |     if @@s[0] != self::EOF || t[:type] != self::EOF
118 |       self.parse_error('stack is not empty');
119 |     end
120 | 
121 |     return true;
122 |   end
123 | 
124 |   def self.der(to, t, tt)
125 |     npn = @@tbl[to.to_s][tt.to_s]
126 |     if not npn
127 |       self.unexpected_token(t)
128 |     end
129 |     @@s.push(*@@ps[npn.to_i][0])
130 |   end
131 | 
132 |   def self.unexpected_token(token)
133 |     if token[:type] == self::EOF
134 |       self.unexpected_end_of_input()
135 |     end
136 | 
137 |     self.tokenizer.throw_unexpected_token(
138 |       token[:value],
139 |       token[:start_line],
140 |       token[:start_column]
141 |     )
142 |   end
143 | 
144 |   def self.unexpected_end_of_input
145 |     self.parse_error('Unexpected end of input.')
146 |   end
147 | 
148 |   def self.parse_error(message)
149 |     raise 'Parse error: ' + message
150 |   end
151 | end
152 | 
153 | {{{MODULE_INCLUDE}}}
154 | 
155 | {{{TOKENIZER}}}
156 | 
157 | class {{{PARSER_CLASS_NAME}}} < YYParse; end
158 | 


--------------------------------------------------------------------------------
/src/grammar/__tests__/lex-grammar-test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import LexGrammar from '../lex-grammar';
  7 | import {EOF} from '../../special-symbols';
  8 | 
  9 | const lexGrammarData = require(__dirname + '/calc.lex');
 10 | const lexGrammar = new LexGrammar(lexGrammarData);
 11 | 
 12 | const rulesToIndices = rules => {
 13 |   return rules.map(rule => lexGrammar.getRuleIndex(rule));
 14 | };
 15 | 
 16 | const startConditions = {
 17 |   INITIAL: 0,
 18 |   comment: 1,
 19 | };
 20 | 
 21 | const lexRulesByStartConditions = {
 22 |   INITIAL: [0, 1, 2, 3, 4, 5, 6, 7, 8],
 23 |   comment: [0, 1, 9, 10, 11],
 24 | };
 25 | 
 26 | describe('lex-grammar', () => {
 27 | 
 28 |   it('rules', () => {
 29 |     const rulesData = lexGrammar.getRules().map(rule => rule.toData());
 30 |     expect(rulesData).toEqual(lexGrammarData.rules);
 31 |     expect(rulesData).toEqual(lexGrammar.getOriginalRules());
 32 |   });
 33 | 
 34 |   it('rule by index', () => {
 35 |     const firstRule = lexGrammar.getRuleByIndex(0);
 36 |     expect(firstRule).toBe(lexGrammar.getRules()[0]);
 37 |   });
 38 | 
 39 |   it('index of a rule', () => {
 40 |     const firstRule = lexGrammar.getRuleByIndex(0);
 41 |     expect(lexGrammar.getRuleIndex(firstRule)).toBe(0);
 42 |   });
 43 | 
 44 |   it('start conditions', () => {
 45 |     expect(lexGrammar.getStartConditions()).toEqual(startConditions);
 46 |   });
 47 | 
 48 |   it('macros', () => {
 49 |     expect(lexGrammar.getMacros()).toEqual(lexGrammarData.macros);
 50 |   });
 51 | 
 52 |   it('expanded macro', () => {
 53 |     const rule3 = lexGrammar.getRuleByIndex(3);
 54 |     const id = lexGrammarData.macros.id;
 55 | 
 56 |     expect(rule3.getMatcher().source).toEqual(`^${id}+`);
 57 |     expect(rule3.getOriginalMatcher()).toEqual(`${id}+`);
 58 |     expect(rule3.getRawMatcher()).toEqual(`^${id}+`);
 59 | 
 60 |     // Macro in a rule with a start condition.
 61 |     const rule11 = lexGrammar.getRuleByIndex(11);
 62 |     expect(rule11.getOriginalMatcher()).toEqual(`${id}+`);
 63 | 
 64 |     // Standard macro.
 65 |     const rule1 = lexGrammar.getRuleByIndex(1);
 66 |     // <<EOF>> -> $
 67 |     expect(rule1.getOriginalMatcher()).toEqual(EOF);
 68 |   });
 69 | 
 70 |   it('rules by start conditions', () => {
 71 |     const rulesByStartConditions = lexGrammar.getRulesByStartConditions();
 72 |     const rulesByConditionsData = {};
 73 | 
 74 |     Object.keys(rulesByStartConditions).forEach(startCondition => {
 75 |       const rules = rulesByStartConditions[startCondition];
 76 |       rulesByConditionsData[startCondition] = rulesToIndices(rules);
 77 |     });
 78 | 
 79 |     expect(rulesByConditionsData).toEqual(lexRulesByStartConditions);
 80 |   });
 81 | 
 82 |   it('rules for start conditions', () => {
 83 |     const rulesByStartConditions = lexGrammar.getRulesByStartConditions();
 84 | 
 85 |     Object.keys(rulesByStartConditions).forEach(startCondition => {
 86 |       const expectedLexRules = lexRulesByStartConditions[startCondition];
 87 | 
 88 |       const rules = rulesToIndices(lexGrammar.getRulesForState(startCondition));
 89 |       expect(rules).toEqual(expectedLexRules);
 90 |     });
 91 |   });
 92 | 
 93 |   it('options', () => {
 94 |     const options = lexGrammarData.options;
 95 | 
 96 |     expect(lexGrammar.getOptions()).toEqual(options);
 97 |     expect(lexGrammar.getRuleByIndex(0).getOptions()).toEqual(options);
 98 |   });
 99 | 
100 | });


--------------------------------------------------------------------------------
/examples/module-include.cs.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Module includes. C# version.
  3 |  *
  4 |  * The "moduleInclude" directive allows including an arbitrary code at the
  5 |  * beginning of the generated parser file. As an example, can be the code
  6 |  * to require modules for corresponding AST nodes, or direct AST nodes
  7 |  * definitions.
  8 |  *
  9 |  * The code may define callbacks for several parse events, in particular
 10 |  * `onParseBegin`, and `onParseEnd`, attaching to `yyparse`:
 11 |  *
 12 |  *   yyparse.onParseBegin = (string code) =>
 13 |  *   {
 14 |  *     Console.WriteLine("Parsing: " + code);
 15 |  *   };
 16 |  *
 17 |  * ./bin/syntax -g ./examples/module-include.cs.g -m slr1 -o './CalcParser.cs'
 18 |  *
 19 |  * using SyntaxParser;
 20 |  *
 21 |  * var parser = new CalcParser();
 22 |  *
 23 |  * Console.WriteLine(parser.parse("2 + 2 * 2"));
 24 |  *
 25 |  * > Custom hook on parse begin. Parsing: 2 + 2 * 2
 26 |  * > Custom hook on parse end. Parsed: SyntaxParser.BinaryExpression
 27 |  * > SyntaxParser.BinaryExpression
 28 |  */
 29 | 
 30 | {
 31 |   "lex": {
 32 |     "rules": [
 33 |       ["\\s+",  '/* skip whitespace */ return null'],
 34 |       ["\\d+",  'return "NUMBER"'],
 35 |       ["\\*",   'return "*"'],
 36 |       ["\\+",   'return "+"'],
 37 |       ["\\(",   'return "("'],
 38 |       ["\\)",   'return ")"'],
 39 |     ]
 40 |   },
 41 | 
 42 |   "moduleInclude": `
 43 |     // Can be "using" statments, or direct declarations.
 44 | 
 45 |     namespace SyntaxParser
 46 |     {
 47 | 
 48 |         public class Node
 49 |         {
 50 |           public string Type;
 51 | 
 52 |           public Node(string type)
 53 |           {
 54 |               Type = type;
 55 |           }
 56 |         }
 57 | 
 58 |         public class BinaryExpression : Node
 59 |         {
 60 |             public object Left;
 61 |             public object Right;
 62 |             public string Op;
 63 | 
 64 |             public BinaryExpression(object left, object right, string op): base("Binary")
 65 |             {
 66 |                 Left = left;
 67 |                 Right = right;
 68 |                 Op = op;
 69 |             }
 70 |         }
 71 | 
 72 |         public class PrimaryExpression : Node
 73 |         {
 74 |             public int Value;
 75 | 
 76 |             public PrimaryExpression(string value) : base("Primary")
 77 |             {
 78 |                 Value = Convert.ToInt32(value);
 79 |             }
 80 |         }
 81 | 
 82 |         // Setup of the parser hooks is done via Init.run();
 83 |         public class Init
 84 |         {
 85 |             public static void run()
 86 |             {
 87 |                 // Standard hook on parse beging, and end:
 88 | 
 89 |                 yyparse.onParseBegin = (string code) =>
 90 |                 {
 91 |                   Console.WriteLine("Custom hook on parse begin. Parsing: " + code);
 92 |                 };
 93 | 
 94 |                 yyparse.onParseEnd = (object parsed) =>
 95 |                 {
 96 |                   Console.WriteLine("Custom hook on parse end. Parsed: " + parsed);
 97 |                 };
 98 |             }
 99 |         }
100 |     }
101 |   `,
102 | 
103 |   "operators": [
104 |     ["left", "+"],
105 |     ["left", "*"],
106 |   ],
107 | 
108 |   "bnf": {
109 |     "E": [
110 |       ["E + E",  "$$ = new BinaryExpression($1, $3, $2)"],
111 |       ["E * E",  "$$ = new BinaryExpression($1, $3, $2)"],
112 |       ["NUMBER", "$$ = new PrimaryExpression($1)"],
113 |       ["( E )",  "$$ = $2"],
114 |     ],
115 |   },
116 | }


--------------------------------------------------------------------------------
/examples/lexer-start-conditions.rb.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Start conditions of lex rules. Tokenizer states. Ruby version.
  3 |  *
  4 |  * Tokenizer rules may provide start conditions. Such rules are executed
  5 |  * only when lexer enters the state corresponding to the names of the
  6 |  * start conditions.
  7 |  *
  8 |  * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1).
  9 |  * Inclusive conditions also include rules without any start conditions.
 10 |  * Exclusive conditions do not include other rules when the parser enter
 11 |  * this state. The rules with `*` condition are always included.
 12 |  *
 13 |  * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a
 14 |  *
 15 |  * When a grammar is defined in the JSON format, the start conditions are
 16 |  * specified as:
 17 |  *
 18 |  *   "startConditions": {
 19 |  *     "name": 1,  // exclusive
 20 |  *     "other": 0, // inclusive
 21 |  *   }
 22 |  *
 23 |  * And a rule itself may specify a list of start conditions as the
 24 |  * first element:
 25 |  *
 26 |  *   // This lex-rule is applied only when parser enters `name` state.
 27 |  *
 28 |  *   [["name"], "\w+", "return 'NAME'"]
 29 |  *
 30 |  * At the beginning a lexer is in the `INITIAL` state. A new state is
 31 |  * entered either using `this.pushState(name)` or `this.begin(name)`. To
 32 |  * exit a state, use `this.popState()`.
 33 |  *
 34 |  * In the grammar below we has `comment` tokenizer state, which allows us
 35 |  * to skip all the comment characters, but still to count number of lines.
 36 |  *
 37 |  *   ./bin/syntax -g examples/lexer-start-conditions.py.g -m slr1 -f ~/test.txt
 38 |  */
 39 | 
 40 | // Example of ~/test.txt
 41 | //
 42 | //  1.
 43 | //  2.  /* Hello world
 44 | //  3.      privet
 45 | //  4.
 46 | //  5.     OK **/
 47 | //  6.
 48 | //  7.  Main
 49 | //  8.
 50 | //
 51 | // Number of lines: 8
 52 | 
 53 | {
 54 |   "moduleInclude": `
 55 |     $lines = 1
 56 | 
 57 |     YYParse.on_parse_end {|_value|
 58 |       puts 'Number of lines: ' + $lines.to_s
 59 |     }
 60 | 
 61 |   `,
 62 | 
 63 |   "lex": {
 64 |     "startConditions": {
 65 |       "comment": 1, // exclusive
 66 |     },
 67 | 
 68 |     "rules": [
 69 | 
 70 |       // On `/*` we enter the comment state:
 71 | 
 72 |       ["\\/\\*", `
 73 |         push_state('comment')
 74 |         return nil                               # skip comments
 75 |       `],
 76 | 
 77 |       // On `*/` being in `comment` state we return to the initial state:
 78 | 
 79 |       [["comment"], "\\*+\\/", `
 80 |         pop_state()
 81 |         return nil                                # skip comments`
 82 |       ],
 83 | 
 84 |       // Being inside the `comment` state, skip all chars, except new lines
 85 |       // which we count.
 86 | 
 87 |       [["comment"], "[^*\\n]+",                  "# skip comments"],
 88 |       [["comment"], "\\*+[^*\\/\\n]*",           "# skip comments"],
 89 | 
 90 |       // Count lines in comments.
 91 |       [["comment"], "\\n", `
 92 |         $lines += 1
 93 |         return nil                                # skip new lines in comments
 94 |       `],
 95 | 
 96 |       // In INITIAL state, count line numbers as well:
 97 |       ["\\n", `
 98 |         $lines += 1
 99 |         return nil                                # skip new lines`],
100 | 
101 |       [["*"], " +",                              "# skip spaces in any state "],
102 | 
103 |       // Main program consisting only of one word "Main"
104 |       ["Main", "return 'MAIN'"],
105 |     ],
106 |   },
107 | 
108 |   "bnf": {
109 |     "Program": ["MAIN"],
110 |   }
111 | }


--------------------------------------------------------------------------------
/examples/module-include.php.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Module includes. PHP version.
  3 |  *
  4 |  * The "moduleInclude" directive allows including an arbitrary code at the
  5 |  * beginning of the generated parser file. As an example, can be the code
  6 |  * to require modules for corresponding AST nodes, or direct AST nodes
  7 |  * definitions.
  8 |  *
  9 |  * The code may define callbacks for several parse events, in particular
 10 |  * `onParseBegin`, and `onParseEnd`, attaching to `yyparse`:
 11 |  *
 12 |  *   yyparse::setOnParseBegin(function($string) {
 13 |  *   var_dump('Parsing:', $string);
 14 |  *   });
 15 |  *
 16 |  * ./bin/syntax -g ./examples/module-include.php.g -m slr1 -o './Parser.php'
 17 |  *
 18 |  * <?php
 19 |  *
 20 |  *   require('Parser.php');
 21 |  *
 22 |  *   var_dump(Parser::parse('2 + 2 * 2'));
 23 |  *
 24 |  * string(36) "Custom hook on parse begin. Parsing:"
 25 |  * string(9) "2 + 2 * 2"
 26 |  * string(33) "Custom hook on parse end. Parsed:"
 27 |  *
 28 |  * object(BinaryExpression)#8 (4) {
 29 |  *   ["type"]=>
 30 |  *   string(6) "Binary"
 31 |  *   ["left"]=>
 32 |  *   object(PrimaryExpression)#4 (2) {
 33 |  *     ["type"]=>
 34 |  *     string(7) "Primary"
 35 |  *     ["value"]=>
 36 |  *     int(2)
 37 |  *   }
 38 |  *   ["right"]=>
 39 |  *   object(BinaryExpression)#7 (4) {
 40 |  *     ["type"]=>
 41 |  *     string(6) "Binary"
 42 |  *     ["left"]=>
 43 |  *     object(PrimaryExpression)#5 (2) {
 44 |  *       ["type"]=>
 45 |  *       string(7) "Primary"
 46 |  *       ["value"]=>
 47 |  *       int(2)
 48 |  *     }
 49 |  *     ["right"]=>
 50 |  *     object(PrimaryExpression)#6 (2) {
 51 |  *       ["type"]=>
 52 |  *       string(7) "Primary"
 53 |  *       ["value"]=>
 54 |  *       int(2)
 55 |  *     }
 56 |  *     ["op"]=>
 57 |  *     string(1) "*"
 58 |  *   }
 59 |  *   ["op"]=>
 60 |  *   string(1) "+"
 61 |  * }
 62 |  */
 63 | 
 64 | {
 65 |   "lex": {
 66 |     "rules": [
 67 |       ["\\s+",  "/* skip whitespace */"],
 68 |       ["\\d+",  "return 'NUMBER'"],
 69 |       ["\\*",   "return '*'"],
 70 |       ["\\+",   "return '+'"],
 71 |       ["\\(",   "return '('"],
 72 |       ["\\)",   "return ')'"],
 73 |     ]
 74 |   },
 75 | 
 76 |   "moduleInclude": `
 77 |     // Can be "require" statments, or direct declarations.
 78 | 
 79 |     class Node {
 80 |       public function __construct($type) {
 81 |         $this->type = $type;
 82 |       }
 83 |     }
 84 | 
 85 |     class BinaryExpression extends Node {
 86 |       public function __construct($left, $right, $op) {
 87 |         parent::__construct('Binary');
 88 |         $this->left = $left;
 89 |         $this->right = $right;
 90 |         $this->op = $op;
 91 |       }
 92 |     }
 93 | 
 94 |     class PrimaryExpression extends Node {
 95 |       public function __construct($value) {
 96 |         parent::__construct('Primary');
 97 |         $this->value = intval($value);
 98 |       }
 99 |     }
100 | 
101 |     // Standard hook on parse beging, and end:
102 | 
103 |     yyparse::setOnParseBegin(function($string) {
104 |       var_dump('Custom hook on parse begin. Parsing:', $string);
105 |     });
106 | 
107 |     yyparse::setOnParseEnd(function($value) {
108 |       var_dump('Custom hook on parse end. Parsed:', $value);
109 |     });
110 |   `,
111 | 
112 |   "operators": [
113 |     ["left", "+"],
114 |     ["left", "*"],
115 |   ],
116 | 
117 |   "bnf": {
118 |     "E": [
119 |       ["E + E",  "$$ = new BinaryExpression($1, $3, $2)"],
120 |       ["E * E",  "$$ = new BinaryExpression($1, $3, $2)"],
121 |       ["NUMBER", "$$ = new PrimaryExpression($1)"],
122 |       ["( E )",  "$$ = $2"],
123 |     ],
124 |   },
125 | }


--------------------------------------------------------------------------------
/examples/lexer-start-conditions.g.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Start conditions of lex rules. Tokenizer states.
  3 |  *
  4 |  * Tokenizer rules may provide start conditions. Such rules are executed
  5 |  * only when lexer enters the state corresponding to the names of the
  6 |  * start conditions.
  7 |  *
  8 |  * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1).
  9 |  * Inclusive conditions also include rules without any start conditions.
 10 |  * Exclusive conditions do not include other rules when the parser enter
 11 |  * this state. The rules with `*` condition are always included.
 12 |  *
 13 |  * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a
 14 |  *
 15 |  * When a grammar is defined in the JSON format, the start conditions are
 16 |  * specified as:
 17 |  *
 18 |  *   "startConditions": {
 19 |  *     "name": 1,  // exclusive
 20 |  *     "other": 0, // inclusive
 21 |  *   }
 22 |  *
 23 |  * And a rule itself may specify a list of start conditions as the
 24 |  * first element:
 25 |  *
 26 |  *   // This lex-rule is applied only when parser enters `name` state.
 27 |  *
 28 |  *   [["name"], "\w+", "return 'NAME'"]
 29 |  *
 30 |  * At the beginning a lexer is in the `INITIAL` state. A new state is
 31 |  * entered either using `this.pushState(name)` or `this.begin(name)`. To
 32 |  * exit a state, use `this.popState()`.
 33 |  *
 34 |  * In the grammar below we has `comment` tokenizer state, which allows us
 35 |  * to skip all the comment characters, but still to count number of lines.
 36 |  *
 37 |  *   ./bin/syntax -g examples/lexer-start-conditions.g.js -m slr1 -f ~/test.js
 38 |  */
 39 | 
 40 | // Example of ~/test.js
 41 | //
 42 | //  1.
 43 | //  2.  /* Hello world
 44 | //  3.      privet
 45 | //  4.
 46 | //  5.     OK **/
 47 | //  6.
 48 | //  7.  Main
 49 | //  8.
 50 | //
 51 | // Number of lines: 8
 52 | 
 53 | {
 54 |   "moduleInclude": `
 55 |     let lines = 1;
 56 | 
 57 |     yyparse.onParseBegin = (string) => {
 58 |       // Print the string with line numbers.
 59 | 
 60 |       let code = string
 61 |         .split('\\n')
 62 |         .map((s, line) => (line + 1) + '. ' + s)
 63 |         .join('\\n');
 64 | 
 65 |       console.log(code + '\\n');
 66 |     };
 67 | 
 68 |     yyparse.onParseEnd = () => {
 69 |       console.log('Number of lines: ' + lines + '\\n');
 70 |     };
 71 |   `,
 72 | 
 73 |   "lex": {
 74 |     "startConditions": {
 75 |       "comment": 1, // exclusive
 76 |     },
 77 | 
 78 |     "rules": [
 79 | 
 80 |       // On `/*` we enter the comment state:
 81 | 
 82 |       ["\\/\\*", "this.pushState('comment');      /* skip comments */"],
 83 | 
 84 |       // On `*/` being in `comment` state we return to the initial state:
 85 | 
 86 |       [["comment"], "\\*+\\/", "this.popState();  /* skip comments */"],
 87 | 
 88 |       // Being inside the `comment` state, skip all chars, except new lines
 89 |       // which we count.
 90 | 
 91 |       [["comment"], "[^*\\n]+",                  "/* skip comments */"],
 92 |       [["comment"], "\\*+[^*/\\n]*",             "/* skip comments */"],
 93 | 
 94 |       // Count lines in comments.
 95 |       [["comment"], "\\n",  "lines++;             /* skip new lines in comments */"],
 96 | 
 97 |       // In INITIAL state, count line numbers as well:
 98 |       ["\\n",               "lines++              /* skip new lines */"],
 99 | 
100 |       [["*"], " +",                              "/* skip spaces in any state */"],
101 | 
102 |       // Main program consisting only of one word "Main"
103 |       ["Main", "return 'MAIN'"],
104 |     ],
105 |   },
106 | 
107 |   "bnf": {
108 |     "Program": ["MAIN"],
109 |   }
110 | }


--------------------------------------------------------------------------------
/src/plugins/php/templates/ll.template.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * LL(1) parser generated by the Syntax tool.
  5 |  *
  6 |  * https://www.npmjs.com/package/syntax-cli
  7 |  *
  8 |  *   npm install -g syntax-cli
  9 |  *
 10 |  *   syntax-cli --help
 11 |  *
 12 |  * To regenerate run:
 13 |  *
 14 |  *   syntax-cli \
 15 |  *     --grammar ~/path-to-grammar-file \
 16 |  *     --mode LL1 \
 17 |  *     --output ~/ParserClassName.php
 18 |  */
 19 | 
 20 | {{{NAMESPACE}}}
 21 | 
 22 | class SyntaxException extends \Exception {}
 23 | 
 24 | {{{MODULE_INCLUDE}}}
 25 | 
 26 | class yyparse {
 27 |   private static $ps = {{{PRODUCTIONS}}};
 28 |   private static $tks = {{{TOKENS}}};
 29 |   private static $tbl = {{{TABLE}}};
 30 | 
 31 |   private static $s = [];
 32 |   private static $__ = null;
 33 | 
 34 |   private static $on_parse_begin = null;
 35 |   private static $on_parse_end = null;
 36 | 
 37 |   public static $yytext = '';
 38 |   public static $yyleng = 0;
 39 | 
 40 |   const EOF = '$';
 41 | 
 42 |   private static $tokenizer = null;
 43 | 
 44 |   {{{PRODUCTION_HANDLERS}}}
 45 | 
 46 |   public static function setTokenizer($tokenizer) {
 47 |     self::$tokenizer = $tokenizer;
 48 |   }
 49 | 
 50 |   public static function getTokenizer() {
 51 |     return self::$tokenizer;
 52 |   }
 53 | 
 54 |   public static function setOnParseBegin($on_parse_begin) {
 55 |     self::$on_parse_begin = $on_parse_begin;
 56 |   }
 57 | 
 58 |   public static function setOnParseEnd($on_parse_end) {
 59 |     self::$on_parse_end = $on_parse_end;
 60 |   }
 61 | 
 62 |   public static function parse($string) {
 63 |     if (is_callable(self::$on_parse_begin)) {
 64 |       call_user_func(self::$on_parse_begin, $string);
 65 |     }
 66 | 
 67 |     $tokenizer = self::getTokenizer();
 68 | 
 69 |     if (!$tokenizer) {
 70 |       throw new \Exception('Tokenizer instance wasn\'t specified.');
 71 |     }
 72 | 
 73 |     $tokenizer->initString($string);
 74 | 
 75 |     $s = &self::$s;
 76 |     $s = [self::EOF, {{{START}}}];
 77 | 
 78 |     $tks = &self::$tks;
 79 |     $tbl = &self::$tbl;
 80 |     $ps = &self::$ps;
 81 | 
 82 |     $t = $tokenizer->getNextToken();
 83 |     $st = null;
 84 | 
 85 |     $to = null;
 86 |     $tt = null;
 87 | 
 88 |     do {
 89 |       $to = array_pop($s);
 90 |       $tt = $tks[$t['type']];
 91 | 
 92 |       if ($to === $tt) {
 93 |         $t = $tokenizer->getNextToken();
 94 |         continue;
 95 |       }
 96 | 
 97 |       self::der($to, $t, $tt);
 98 |     } while ($tokenizer->hasMoreTokens() || count($s) > 1);
 99 | 
100 |     while (count($s) !== 1) {
101 |       self::der(array_pop($s), $t, $tt);
102 |     }
103 | 
104 |     if ($s[0] !== self::EOF || $t['type'] !== self::EOF) {
105 |       self::parseError('stack is not empty');
106 |     }
107 | 
108 |     return true;
109 |   }
110 | 
111 |   private static function der($to, $t, $tt) {
112 |     $npn = self::$tbl[$to][$tt];
113 |     if (!$npn) {
114 |       self::unexpectedToken($t);
115 |     }
116 |     self::$s = array_merge(self::$s, self::$ps[intval($npn)][0]);
117 |   }
118 | 
119 |   private static function unexpectedToken($token) {
120 |     if ($token['type'] === self::EOF) {
121 |       unexpectedEndOfInput();
122 |     }
123 | 
124 |     self::getTokenizer()->throwUnexpectedToken(
125 |       $token['value'],
126 |       $token['startLine'],
127 |       $token['startColumn']
128 |     );
129 |   }
130 | 
131 |   private static function unexpectedEndOfInput() {
132 |     self::parseError('Unexpected end of input.');
133 |   }
134 | 
135 |   private static function parseError($message) {
136 |     throw new \Exception('SyntaxError: '.$message);
137 |   }
138 | }
139 | 
140 | {{{TOKENIZER}}}
141 | 
142 | class {{{PARSER_CLASS_NAME}}} extends yyparse {}
143 | 


--------------------------------------------------------------------------------
/examples/calc-loc.bnf:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Captures location info.
  3 |  *
  4 |  * In order to capture locations, pass the `--loc` option.
  5 |  *
  6 |  * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc.
  7 |  * A named accessors are available as well: @foo, @bar.
  8 |  *
  9 |  * A location is an object of structure:
 10 |  *
 11 |  * {
 12 |  *   startOffset,
 13 |  *   endOffset,
 14 |  *   startLine,
 15 |  *   endLine,
 16 |  *   startColumn,
 17 |  *   endColum,
 18 |  * }
 19 |  *
 20 |  * The resulting location is in the @$. It is calculated automatically from
 21 |  * first and last symbol on a RHS handle, and it also can be overridden
 22 |  * manually.
 23 |  *
 24 |  * ./bin/syntax -g examples/calc-loc.bnf -m slr1 -p '15 + 10 * 3' -w --loc
 25 |  *
 26 |  * Parsed nodes with captures locations:
 27 |  *
 28 |  * {
 29 |  *   "type": "BinaryExpression",
 30 |  *   "op": "+",
 31 |  *   "left": {
 32 |  *     "type": "NumericLiteral",
 33 |  *     "value": 15,
 34 |  *     "loc": {
 35 |  *       "startOffset": 0,
 36 |  *       "endOffset": 2,
 37 |  *       "startLine": 1,
 38 |  *       "endLine": 1,
 39 |  *       "startColumn": 0,
 40 |  *       "endColumn": 2
 41 |  *     }
 42 |  *   },
 43 |  *   "right": {
 44 |  *     "type": "BinaryExpression",
 45 |  *     "op": "*",
 46 |  *     "left": {
 47 |  *       "type": "NumericLiteral",
 48 |  *       "value": 10,
 49 |  *       "loc": {
 50 |  *         "startOffset": 5,
 51 |  *         "endOffset": 7,
 52 |  *         "startLine": 1,
 53 |  *         "endLine": 1,
 54 |  *         "startColumn": 5,
 55 |  *         "endColumn": 7
 56 |  *       }
 57 |  *     },
 58 |  *     "right": {
 59 |  *       "type": "NumericLiteral",
 60 |  *       "value": 3,
 61 |  *       "loc": {
 62 |  *         "startOffset": 10,
 63 |  *         "endOffset": 11,
 64 |  *         "startLine": 1,
 65 |  *         "endLine": 1,
 66 |  *         "startColumn": 10,
 67 |  *         "endColumn": 11
 68 |  *       }
 69 |  *     },
 70 |  *     "loc": {
 71 |  *       "startOffset": 5,
 72 |  *       "endOffset": 11,
 73 |  *       "startLine": 1,
 74 |  *       "endLine": 1,
 75 |  *       "startColumn": 5,
 76 |  *       "endColumn": 11
 77 |  *     }
 78 |  *   },
 79 |  *   "loc": {
 80 |  *     "startOffset": 0,
 81 |  *     "endOffset": 11,
 82 |  *     "startLine": 1,
 83 |  *     "endLine": 1,
 84 |  *     "startColumn": 0,
 85 |  *     "endColumn": 11
 86 |  *   }
 87 |  * }
 88 |  */
 89 | 
 90 | %lex
 91 | 
 92 | %%
 93 | 
 94 | \s+       return '' /* skip whitespace */
 95 | \d+       return 'NUMBER'
 96 | 
 97 | /lex
 98 | 
 99 | %{
100 | 
101 | function BinaryExpression(op, left, right, loc) {
102 |   return {
103 |     type: 'BinaryExpression',
104 |     op,
105 |     left,
106 |     right,
107 |     loc,
108 |   };
109 | }
110 | 
111 | function Loc(s, e) {
112 |   // Same as default result location.
113 |   return {
114 |     startOffset: s.startOffset,
115 |     endOffset: e.endOffset,
116 |     startLine: s.startLine,
117 |     endLine: e.endLine,
118 |     startColumn: s.startColumn,
119 |     endColumn: e.endColumn,
120 |   };
121 | }
122 | 
123 | function NumericLiteral(value, loc) {
124 |   return {
125 |     type: 'NumericLiteral',
126 |     value,
127 |     loc,
128 |   };
129 | }
130 | 
131 | %}
132 | 
133 | %left '+'
134 | %left '*'
135 | 
136 | %%
137 | 
138 | exp
139 |   : exp '+' exp
140 |     /* Explicitly calculate location */
141 |     { $$ = BinaryExpression('+', $1, $3, Loc(@1, @3)) }
142 | 
143 |   | exp '*' exp
144 |     /* Use default result location: @$ */
145 |     { $$ = BinaryExpression('*', $1, $3, @$) }
146 | 
147 |   | '(' exp ')'
148 |     { $$ = $2 }
149 | 
150 |   | number
151 |     /* Named args and position */
152 |     { $$ = NumericLiteral($number, @number) }
153 |   ;
154 | 
155 | number
156 |   : NUMBER { $$ = Number(yytext) }
157 |   ;


--------------------------------------------------------------------------------
/src/lr/__tests__/lr-parsing-table-test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import Grammar from '../../grammar/grammar';
  7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode';
  8 | import LRParsingTable from '../lr-parsing-table';
  9 | 
 10 | describe('lr-parsing-table', () => {
 11 |   it('lalr1-grammar-1', () => {
 12 |     const grammarString = `
 13 |       %%
 14 | 
 15 |       Start
 16 |         : OptPrefix1 SUFFIX1
 17 |         | OptPrefix2 SUFFIX2
 18 |         ;
 19 | 
 20 |       OptPrefix1
 21 |         : PREFIX1
 22 |         | /* empty */
 23 |         ;
 24 | 
 25 |       OptPrefix2
 26 |         : PREFIX2
 27 |         | /* empty */
 28 |         ;
 29 |     `;
 30 | 
 31 |     const expectedTable = {
 32 |       '0': {
 33 |         Start: 1,
 34 |         OptPrefix1: 2,
 35 |         OptPrefix2: 3,
 36 |         PREFIX1: 's4',
 37 |         SUFFIX1: 'r4',
 38 |         PREFIX2: 's5',
 39 |         SUFFIX2: 'r6',
 40 |       },
 41 |       '1': {$: 'acc'},
 42 |       '2': {SUFFIX1: 's6'},
 43 |       '3': {SUFFIX2: 's7'},
 44 |       '4': {SUFFIX1: 'r3'},
 45 |       '5': {SUFFIX2: 'r5'},
 46 |       '6': {$: 'r1'},
 47 |       '7': {$: 'r2'},
 48 |     };
 49 | 
 50 |     const grammarBySLR = Grammar.fromString(grammarString, {
 51 |       mode: GRAMMAR_MODE.LALR1_BY_SLR1,
 52 |     });
 53 |     expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual(
 54 |       expectedTable
 55 |     );
 56 | 
 57 |     const grammarByCLR = Grammar.fromString(grammarString, {
 58 |       mode: GRAMMAR_MODE.LALR1_BY_CLR1,
 59 |     });
 60 |     expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual(
 61 |       expectedTable
 62 |     );
 63 |   });
 64 | 
 65 |   it('lalr1-grammar-2', () => {
 66 |     const grammarString = `
 67 |       %%
 68 | 
 69 |       S
 70 |         : A 'a' A 'b'
 71 |         | B 'b' B 'a'
 72 |         ;
 73 | 
 74 |       A : /*epsilon*/ ;
 75 |       B : /*epsilon*/ ;
 76 |     `;
 77 | 
 78 |     const expectedTable = {
 79 |       '0': {S: 1, A: 2, B: 3, "'a'": 'r3', "'b'": 'r4'},
 80 |       '1': {$: 'acc'},
 81 |       '2': {"'a'": 's4'},
 82 |       '3': {"'b'": 's7'},
 83 |       '4': {A: 5, "'b'": 'r3'},
 84 |       '5': {"'b'": 's6'},
 85 |       '6': {$: 'r1'},
 86 |       '7': {B: 8, "'a'": 'r4'},
 87 |       '8': {"'a'": 's9'},
 88 |       '9': {$: 'r2'},
 89 |     };
 90 | 
 91 |     const grammarBySLR = Grammar.fromString(grammarString, {
 92 |       mode: GRAMMAR_MODE.LALR1_BY_SLR1,
 93 |     });
 94 |     expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual(
 95 |       expectedTable
 96 |     );
 97 | 
 98 |     const grammarByCLR = Grammar.fromString(grammarString, {
 99 |       mode: GRAMMAR_MODE.LALR1_BY_CLR1,
100 |     });
101 |     expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual(
102 |       expectedTable
103 |     );
104 |   });
105 | 
106 |   it('lalr1-grammar-3', () => {
107 |     const grammarString = `
108 |       %%
109 | 
110 |       Stmt
111 |         : Type ID ';'
112 |         | Expr ';'
113 |         ;
114 | 
115 |       Type
116 |         : ID
117 |         ;
118 | 
119 |       Expr
120 |         : ID
121 |         ;
122 |     `;
123 | 
124 |     const expectedTable = {
125 |       '0': {Stmt: 1, Type: 2, Expr: 3, ID: 's4'},
126 |       '1': {$: 'acc'},
127 |       '2': {ID: 's5'},
128 |       '3': {"';'": 's7'},
129 |       '4': {ID: 'r3', "';'": 'r4'},
130 |       '5': {"';'": 's6'},
131 |       '6': {$: 'r1'},
132 |       '7': {$: 'r2'},
133 |     };
134 | 
135 |     const grammarBySLR = Grammar.fromString(grammarString, {
136 |       mode: GRAMMAR_MODE.LALR1_BY_SLR1,
137 |     });
138 |     expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual(
139 |       expectedTable
140 |     );
141 | 
142 |     const grammarByCLR = Grammar.fromString(grammarString, {
143 |       mode: GRAMMAR_MODE.LALR1_BY_CLR1,
144 |     });
145 |     expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual(
146 |       expectedTable
147 |     );
148 |   });
149 | });
150 | 


--------------------------------------------------------------------------------
/examples/indent.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Handling nested blocks based on indentation (similar to Python).
  3 |  *
  4 |  * In this example we handle nested lists based on indentation (YAML-style):
  5 |  *
  6 |  * Example `~/test.list`:
  7 |  *
  8 |  *   - one
  9 |  *   - two
 10 |  *     - three
 11 |  *     - four
 12 |  *       - five
 13 |  *       - six
 14 |  *     - seven
 15 |  *     - eight
 16 |  *   - nine
 17 |  *   - ten
 18 |  *
 19 |  * Handling blocks based on indentation doesn't differ much from handling blocks
 20 |  * based on { } or []. In this case we have a recursive `List` production, which
 21 |  * consists of Entry items, separated by the `SEPARATOR` token.
 22 |  *
 23 |  * The `SEPARATOR` handles indentation (indent/dedent), tracking current level
 24 |  * of indentation, and current nested list where entries are added.
 25 |  *
 26 |  *   ./bin/syntax -g ~/indent.g -m slr1 -f ~/test.list
 27 |  *
 28 |  * Parsed value:
 29 |  *
 30 |  *   [
 31 |  *     "one",
 32 |  *     "two",
 33 |  *     [
 34 |  *       "three",
 35 |  *       "four",
 36 |  *       [
 37 |  *         "five",
 38 |  *         "six"
 39 |  *       ],
 40 |  *       "seven",
 41 |  *       "eight"
 42 |  *     ],
 43 |  *     "nine",
 44 |  *     "ten"
 45 |  *   ]
 46 |  *
 47 |  * See also `examples/indent-explicit.g.js` for explicit INDENT, and DEDENT
 48 |  * tokens handling.
 49 |  */
 50 | 
 51 | {
 52 |   lex: {
 53 |     rules: [
 54 |       [`[a-zA-Z0-9_]+`,    `return 'IDENTIFIER'`],
 55 | 
 56 |       // ------------------------------------------------
 57 |       // Indent/Dedent.
 58 | 
 59 |       [`\\n( *)`,  `
 60 | 
 61 |         yytext = yytext.slice(1); // strip leading NL
 62 |         matchedIndent = yytext.length;
 63 | 
 64 |         return 'SEPARATOR';
 65 |       `],
 66 | 
 67 |       [`\\s+`,    `/* skip whitespace */`],
 68 |       [`\\-`,     `return '-'`],
 69 |     ],
 70 |   },
 71 | 
 72 |   moduleInclude: `
 73 | 
 74 |     /**
 75 |      * Matched during tokenization indentation level
 76 |      * (step ahead from the "currentIndent").
 77 |      */
 78 |     let matchedIndent = 0;
 79 | 
 80 |     /**
 81 |      * Current level of indentation.
 82 |      */
 83 |     let currentIndent = 0;
 84 | 
 85 |     /**
 86 |      * Current list where we add entries.
 87 |      */
 88 |     let currentList = [];
 89 | 
 90 |     /**
 91 |      * Keeps track of the indentation levels to check
 92 |      * correct level on dedent.
 93 |      */
 94 |     const indentStack = [];
 95 |     indentStack.push(currentIndent);
 96 | 
 97 |     /**
 98 |      * Same as "indentStack" but to track nested lists.
 99 |      */
100 |     const listsStack = [];
101 |     listsStack.push(currentList);
102 |   `,
103 | 
104 |   bnf: {
105 |     Program: [[`List`,                  `$$ = currentList`]],
106 | 
107 |     List:    [[`Entry`,                 `currentList.push($1)`],
108 |               [`List SEPARATOR Entry`,  `
109 | 
110 |         // 1. We're on the same nested level, just push the entry
111 |         // to the current list.
112 | 
113 |         if (currentIndent === matchedIndent) {
114 |           currentList.push($3);
115 |         }
116 | 
117 |         // 2. Dedent. Pop the current list from the stack, pushing
118 |         // as a child to the previous.
119 | 
120 |         else if (currentIndent > matchedIndent) {
121 |           currentList.push($3);
122 | 
123 |           const poppsedList = listsStack.pop();
124 |           currentList = listsStack[listsStack.length - 1];
125 |           currentList.push(poppsedList);
126 | 
127 |           indentStack.pop();
128 |           currentIndent = indentStack[indentStack.length - 1];
129 |         }
130 | 
131 |         // 3. Indent. Allocate a new list for entries and push
132 |         // onto the stack.
133 | 
134 |         else {
135 |           currentList.push($3);
136 | 
137 |           currentIndent = matchedIndent;
138 |           currentList = [];
139 | 
140 |           listsStack.push(currentList);
141 |           indentStack.push(currentIndent);
142 |         }
143 | 
144 |     `]],
145 | 
146 |     Entry:   [[`- IDENTIFIER`,  `$$ = $2`]],
147 |   },
148 | }


--------------------------------------------------------------------------------
/src/grammar/grammar-symbol.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import {EOF, EPSILON} from '../special-symbols';
  7 | 
  8 | /**
  9 |  * Symbols are stored in the registry, and retrieved from it
 10 |  * if the symbol was already created.
 11 |  */
 12 | const registry = {};
 13 | 
 14 | /**
 15 |  * Class encapsulates operations with one
 16 |  * grammar symbol (terminal or non-terminal)
 17 |  */
 18 | export default class GrammarSymbol {
 19 |   constructor(symbol) {
 20 |     this._symbol = symbol;
 21 |   }
 22 | 
 23 |   /**
 24 |    * Terminals in our grammar are quoted,
 25 |    * "a", " ", "var", etc.
 26 |    */
 27 |   isTerminal() {
 28 |     const first = this._symbol[0];
 29 |     const last = this._symbol[this._symbol.length - 1];
 30 | 
 31 |     return (first === '"' && last === '"') || (first === "'" && last === "'");
 32 |   }
 33 | 
 34 |   /**
 35 |    * Returns original symbol from an extended name. 1X3 => X
 36 |    */
 37 |   getOrignialSymbol() {
 38 |     if (!this._originalSymbol) {
 39 |       this._originalSymbol = this._symbol
 40 |         .replace(/^\d+\|/, '')
 41 |         .replace(/\|(?:\d+|\$)$/, '');
 42 |     }
 43 |     return this._originalSymbol;
 44 |   }
 45 | 
 46 |   /**
 47 |    * Returns start context (in extended LALR 1X3 => 1)
 48 |    */
 49 |   getStartContext() {
 50 |     if (!this._startContext) {
 51 |       this._startContext = Number(this._symbol.match(/^(\d+)\|/)[1]);
 52 |     }
 53 |     return this._startContext;
 54 |   }
 55 | 
 56 |   /**
 57 |    * Returns start context (in extended LALR 1X3 => 1)
 58 |    */
 59 |   getEndContext() {
 60 |     if (!this._endContext) {
 61 |       this._endContext = Number(this._symbol.match(/\|(\d+)$/)[1]);
 62 |     }
 63 |     return this._endContext;
 64 |   }
 65 | 
 66 |   /**
 67 |    * Returns a symbol from the registry, or creates one.
 68 |    */
 69 |   static get(symbol) {
 70 |     if (!registry.hasOwnProperty(symbol)) {
 71 |       registry[symbol] = new GrammarSymbol(symbol);
 72 |     }
 73 |     return registry[symbol];
 74 |   }
 75 | 
 76 |   /**
 77 |    * Returns raw terminal value (between quotes)
 78 |    */
 79 |   getTerminalValue() {
 80 |     this._checkTerminal();
 81 |     return this._symbol.slice(1, this._symbol.length - 1);
 82 |   }
 83 | 
 84 |   /**
 85 |    * Returns a terminal quoted into single or double-quotes,
 86 |    * depending on which quotes it's already wrapped itself.
 87 |    */
 88 |   quotedTerminal() {
 89 |     this._checkTerminal();
 90 |     let isSingleQuoted = this._symbol[0] === "'";
 91 | 
 92 |     let leftQuote = isSingleQuoted ? `"'` : `'"`;
 93 |     let rightQuote = isSingleQuoted ? `'"` : `"'`;
 94 | 
 95 |     return `${leftQuote}${this.getTerminalValue()}${rightQuote}`;
 96 |   }
 97 | 
 98 |   /**
 99 |    * Checks whether a symbol is a non-terminal.
100 |    */
101 |   isNonTerminal() {
102 |     return !this.isTerminal();
103 |   }
104 | 
105 |   /**
106 |    * Checks whether a symbol is Epsilon (instance method).
107 |    */
108 |   isEpsilon() {
109 |     return GrammarSymbol.isEpsilon(this._symbol);
110 |   }
111 | 
112 |   /**
113 |    * Checks whether a symbol is an end of file (instance method).
114 |    */
115 |   isEOF() {
116 |     return this._symbol === EOF;
117 |   }
118 | 
119 |   /**
120 |    * Checks whether a symbol is Epsilon (static method).
121 |    */
122 |   static isEpsilon(symbol) {
123 |     return symbol.includes(EPSILON);
124 |   }
125 | 
126 |   /**
127 |    * Checks whether a symbol is EOF (static method).
128 |    */
129 |   static isEOF(symbol) {
130 |     return symbol === EOF;
131 |   }
132 | 
133 |   /**
134 |    * Returns raw symbol.
135 |    */
136 |   getSymbol() {
137 |     return this._symbol;
138 |   }
139 | 
140 |   /**
141 |    * Checks whether the symbol equals to the passed one.
142 |    */
143 |   isSymbol(symbol) {
144 |     return this.getSymbol() === symbol;
145 |   }
146 | 
147 |   _checkTerminal() {
148 |     if (!this.isTerminal()) {
149 |       throw new TypeError(`Symbol ${this._symbol} is not terminal.`);
150 |     }
151 |   }
152 | }
153 | 


--------------------------------------------------------------------------------
/examples/lang.lex:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Lang: Lexical Grammar.
  3 |  *
  4 |  * BNF grammar is in: examples/lang.bnf
  5 |  * Test source code is in: examples/test.lang
  6 |  *
  7 |  * How to run:
  8 |  *
  9 |  *   ./bin/syntax \
 10 |  *     --grammar examples/lang.bnf \
 11 |  *     --lex examples/lang.lex \
 12 |  *     --mode lalr1 \
 13 |  *     --file examples/test.lang
 14 |  */
 15 | {
 16 |   macros: {
 17 |     id: `[a-zA-Z0-9_]`,
 18 |   },
 19 | 
 20 |   rules: [
 21 |     ["\\/\\/.*",                `/* skip comments */`],
 22 |     ["\/\\*[\\s\\S]*?\\*\/",    `/* skip comments */`],
 23 | 
 24 |     [`\\s+`,                    `/* skip whitespace */`],
 25 | 
 26 |     // ------------------------------------------------
 27 |     // Keywords.
 28 | 
 29 |     [`\\blet\\b`,                  `return 'LET'`],
 30 |     [`\\bif\\b`,                   `return 'IF'`],
 31 |     [`\\belse\\b`,                 `return 'ELSE'`],
 32 |     [`\\btrue\\b`,                 `return 'TRUE'`],
 33 |     [`\\bfalse\\b`,                `return 'FALSE'`],
 34 |     [`\\bnull\\b`,                 `return 'NULL'`],
 35 |     [`\\breturn\\b`,               `return 'RETURN'`],
 36 |     [`\\bfn\\b`,                   `return 'FN'`],
 37 |     [`\\bdo\\b`,                   `return 'DO'`],
 38 |     [`\\bwhile\\b`,                `return 'WHILE'`],
 39 |     [`\\bfor\\b`,                  `return 'FOR'`],
 40 |     [`\\bbreak\\b`,                `return 'BREAK'`],
 41 |     [`\\bcontinue\\b`,             `return 'CONTINUE'`],
 42 |     [`\\bclass\\b`,                `return 'CLASS'`],
 43 |     [`\\bextends\\b`,              `return 'EXTENDS'`],
 44 |     [`\\bnew\\b`,                  `return 'NEW'`],
 45 |     [`\\bsuper\\b`,                `return 'SUPER'`],
 46 |     [`\\bthis\\b`,                 `return 'THIS'`],
 47 | 
 48 |     // ------------------------------------------------
 49 |     // Symbols.
 50 | 
 51 |     [`\\->`,                    `return 'ARROW'`],
 52 | 
 53 |     [`\\(`,                     `return 'LPAREN'`],
 54 |     [`\\)`,                     `return 'RPAREN'`],
 55 | 
 56 |     [`\\{`,                     `return 'LCURLY'`],
 57 |     [`\\}`,                     `return 'RCURLY'`],
 58 | 
 59 |     [`\\[`,                     `return 'LBRACKET'`],
 60 |     [`\\]`,                     `return 'RBRACKET'`],
 61 | 
 62 |     [`:`,                       `return 'COLON'`],
 63 |     [`;`,                       `return 'SEMICOLON'`],
 64 |     [`,`,                       `return 'COMMA'`],
 65 | 
 66 |     [`\\.`,                     `return 'DOT'`],
 67 | 
 68 |     // ------------------------------------------------
 69 |     // Logical operators: &&, ||
 70 | 
 71 |     [`\\|\\|`,                  `return 'LOGICAL_OR'`],
 72 |     [`&&`,                      `return 'LOGICAL_AND'`],
 73 | 
 74 |     // ------------------------------------------------
 75 |     // Assignment operators: =, *=, /=, +=, -=,
 76 | 
 77 |     [`=`,                       `return 'SIMPLE_ASSIGN'`],
 78 |     [`(\\*|\\/|\\+|\\-)=`,      `return 'COMPLEX_ASSIGN'`],
 79 | 
 80 |     // ------------------------------------------------
 81 |     // Numbers.
 82 | 
 83 |     [`(\\d+(\\.\\d+)?)`,        `return 'NUMBER'`],
 84 | 
 85 |     // ------------------------------------------------
 86 |     // Equality operators: ==, !=
 87 | 
 88 |     [`(=|!)=`,                  `return 'EQUALITY_OPERATOR'`],
 89 | 
 90 |     // ------------------------------------------------
 91 |     // Math operators: +, -, *, /
 92 | 
 93 |     [`(\\+|\\-)`,               `return 'ADDITIVE_OPERATOR'`],
 94 |     [`(\\*|\\/)`,               `return 'MULTIPLICATIVE_OPERATOR'`],
 95 | 
 96 |     // ------------------------------------------------
 97 |     // Relational operators: >, >=, <, <=
 98 | 
 99 |     [`(>|<)=?`,                 `return 'RELATIONAL_OPERATOR'`],
100 | 
101 |     // ------------------------------------------------
102 |     // Strings.
103 | 
104 |     [`"[^"]*"`,                 `yytext = yytext.slice(1, -1); return 'STRING';`],
105 |     [`'[^']*'`,                 `yytext = yytext.slice(1, -1); return 'CHAR';`],
106 | 
107 |     [`{id}+`,                   `return 'IDENTIFIER'`],
108 |   ],
109 | }


--------------------------------------------------------------------------------
/src/__tests__/code-unit-test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import CodeUnit from '../code-unit';
  7 | 
  8 | const environment = CodeUnit.getSandbox();
  9 | 
 10 | function MockSymbol(symbol) {
 11 |   return {
 12 |     getSymbol() {
 13 |       return symbol;
 14 |     },
 15 |   };
 16 | }
 17 | 
 18 | function MockProduction(RHS, handler = '$$ = $1 + $3', isEpsilon = false) {
 19 |   return {
 20 |     getRHS() {
 21 |       return RHS.map(symbol => MockSymbol(symbol));
 22 |     },
 23 | 
 24 |     getRawSemanticAction() {
 25 |       return handler;
 26 |     },
 27 | 
 28 |     isEpsilon() {
 29 |       return isEpsilon;
 30 |     },
 31 |   };
 32 | }
 33 | 
 34 | const defaultLoc = {
 35 |   startOffset: 1,
 36 |   endOffset: 2,
 37 |   startLine: 1,
 38 |   endLine: 1,
 39 |   startColumn: 1,
 40 |   endColumn: 2,
 41 | };
 42 | 
 43 | describe('code-unit', () => {
 44 |   it('default bindings', () => {
 45 |     expect(environment.yytext).toBe('');
 46 |     expect(environment.yyleng).toBe(0);
 47 |     expect(environment.yy).toEqual({});
 48 | 
 49 |     expect(environment.yyparse).not.toBe(null);
 50 |     expect(typeof environment.yyparse.onParseBegin).toBe('function');
 51 |     expect(typeof environment.yyparse.onParseEnd).toBe('function');
 52 | 
 53 |     expect(environment.__).toBe(null);
 54 |     expect(typeof environment.require).toBe('function');
 55 |   });
 56 | 
 57 |   it('create handler', () => {
 58 |     const handler = CodeUnit.createHandler('$1, $2', '$$ = $1 + $2');
 59 |     expect(typeof handler).toBe('function');
 60 | 
 61 |     handler(1, 2);
 62 |     expect(environment.__).toBe(3);
 63 |   });
 64 | 
 65 |   it('shared sandbox', () => {
 66 |     expect(environment).toBe(CodeUnit.getSandbox());
 67 |   });
 68 | 
 69 |   it('eval', () => {
 70 |     CodeUnit.eval('$$ = 2 * 5');
 71 |     expect(environment.__).toBe(10);
 72 |   });
 73 | 
 74 |   it('production action parameters', () => {
 75 |     let production = MockProduction(['additive', 'PLUS', 'multiplicative']);
 76 | 
 77 |     expect(CodeUnit.createProductionParams({production})).toBe('_1, _2, _3');
 78 | 
 79 |     expect(
 80 |       CodeUnit.createProductionParams({
 81 |         production,
 82 |         captureLocations: true,
 83 |       })
 84 |     ).toBe('_1, _2, _3, _1loc, _2loc, _3loc');
 85 |   });
 86 | 
 87 |   it('production handler', () => {
 88 |     const production = MockProduction(['additive', 'PLUS', 'multiplicative']);
 89 |     let handler = CodeUnit.createProductionHandler({production});
 90 | 
 91 |     expect(handler.toString()).toContain(
 92 |       '(' + '_1, _2, _3' + ') { __ = _1 + _3 }'
 93 |     );
 94 | 
 95 |     handler(1, '+', 2);
 96 |     expect(environment.__).toBe(3);
 97 | 
 98 |     handler = CodeUnit.createProductionHandler({
 99 |       production,
100 |       captureLocations: true,
101 |     });
102 | 
103 |     expect(handler.toString()).toContain(
104 |       '(' +
105 |         '_1, _2, _3, _1loc, _2loc, _3loc' +
106 |         ') { __loc = yyloc(_1loc, _3loc);__ = _1 + _3 }'
107 |     );
108 | 
109 |     handler(1, '+', 2, defaultLoc, defaultLoc, defaultLoc);
110 |     expect(environment.__).toBe(3);
111 |   });
112 | 
113 |   it('epsilon production loc', () => {
114 |     const production = MockProduction([], '', /* isEpsilon */ true);
115 | 
116 |     let handler = CodeUnit.createProductionHandler({
117 |       production,
118 |       captureLocations: true,
119 |     });
120 | 
121 |     expect(handler.toString()).toContain('(' + '' + ') { __loc = null; }');
122 |   });
123 | 
124 |   it('yyloc', () => {
125 |     const yyloc = environment.yyloc;
126 | 
127 |     const $1loc = {
128 |       startOffset: 0,
129 |       endOffset: 2,
130 |       startLine: 1,
131 |       endLine: 1,
132 |       startColumn: 0,
133 |       endColumn: 2,
134 |     };
135 | 
136 |     const $2loc = {
137 |       startOffset: 6,
138 |       endOffset: 8,
139 |       startLine: 1,
140 |       endLine: 1,
141 |       startColumn: 6,
142 |       endColumn: 8,
143 |     };
144 | 
145 |     const $$loc = {
146 |       startOffset: 0,
147 |       endOffset: 8,
148 |       startLine: 1,
149 |       endLine: 1,
150 |       startColumn: 0,
151 |       endColumn: 8,
152 |     };
153 | 
154 |     expect(yyloc($1loc, $2loc)).toEqual($$loc);
155 | 
156 |     // Epsilon loc (null)
157 |     expect(yyloc(null, $2loc)).toEqual($2loc);
158 |     expect(yyloc($1loc, null)).toEqual($1loc);
159 |   });
160 | 
161 |   it('set bindings', () => {
162 |     CodeUnit.setBindings({
163 |       yytext: 'Hi!',
164 |       yyleng: 3,
165 |     });
166 | 
167 |     expect(environment.yytext).toBe('Hi!');
168 |     expect(environment.yyleng).toBe(3);
169 |   });
170 | });
171 | 


--------------------------------------------------------------------------------
/examples/parser-lexer-communication.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Change lexer state from parser.
  3 |  *
  4 |  * Note: a tokenizer can be accessed in a semantic action as `yy.lexer`,
  5 |  * or `yy.tokenizer`.
  6 |  *
  7 |  * The grammar below solves the problem of parsing { } in statement position as
  8 |  * a "BlockStatement", and in the expression position as an "ObjectLiteral".
  9 |  *
 10 |  * Note: there are several other techniques for solving this: lookahead
 11 |  * restriction productions, or cover grammar.
 12 |  *
 13 |  * Example in the statement position:
 14 |  *
 15 |  *   ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '{ 1; 2; }'
 16 |  *
 17 |  *   ✓ Accepted
 18 |  *
 19 |  *   Parsed value:
 20 |  *
 21 |  *   {
 22 |  *     "type": "Program",
 23 |  *     "body": [
 24 |  *       {
 25 |  *         "type": "BlockStatement",
 26 |  *         "body": [
 27 |  *           "1",
 28 |  *           "2"
 29 |  *         ]
 30 |  *       }
 31 |  *     ]
 32 |  *   }
 33 |  *
 34 |  * Two empty blocks:
 35 |  *
 36 |  *   ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '{{}}'
 37 |  *
 38 |  * Exaple in the expression position:
 39 |  *
 40 |  *   ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '({ 1, 2 });'
 41 |  *
 42 |  *    ✓ Accepted
 43 |  *
 44 |  *   Parsed value:
 45 |  *
 46 |  *   {
 47 |  *     "type": "Program",
 48 |  *     "body": [
 49 |  *       {
 50 |  *         "type": "ObjectLiteral",
 51 |  *         "properties": [
 52 |  *           "1",
 53 |  *           "2"
 54 |  *         ]
 55 |  *       }
 56 |  *     ]
 57 |  *   }
 58 |  */
 59 | 
 60 | {
 61 |   // --------------------------------------------------
 62 |   // Lexical grammar.
 63 | 
 64 |   lex: {
 65 | 
 66 |     // Lexer states.
 67 |     startConditions: {
 68 |       expression: 0,
 69 |     },
 70 | 
 71 |     rules: [
 72 |       [`\\s+`,                    `/* skip whitespace */`],
 73 | 
 74 |       // { and } in the expression position yield different token types:
 75 | 
 76 |       [['expression'], `\\{`,     `return '%{'`],
 77 |       [['expression'], `\\}`,     `return '}%'`],
 78 | 
 79 |       // { and } in the statement position yield default token types:
 80 | 
 81 |       [`\\{`,                     `return '{'`],
 82 |       [`\\}`,                     `return '}'`],
 83 | 
 84 |       [`\\d+`,                    `return 'NUMBER'`],
 85 | 
 86 |       [`;`,                       `return ';'`],
 87 |       [`,`,                       `return ','`],
 88 | 
 89 |       [`\\(`,                     `return '('`],
 90 |       [`\\)`,                     `return ')'`],
 91 |     ],
 92 |   },
 93 | 
 94 |   // --------------------------------------------------
 95 |   // Syntactic grammar.
 96 | 
 97 |   bnf: {
 98 |     Program:              [[`StatmentList`,                 `$$ = {type: 'Program', body: $1}`]],
 99 | 
100 |     StatmentList:         [[`Statment`,                     `$$ = [$1]`],
101 |                            [`StatmentList Statment`,        `$$ = $1; $1.push($2)`]],
102 | 
103 |     Statment:             [[`BlockStatement`,               `$$ = $1`],
104 |                            [`ExpressionStatement`,          `$$ = $1`]],
105 | 
106 |     BlockStatement:       [[`{ OptStatmentList }`,          `$$ = {type: 'BlockStatement', body: $2}`]],
107 | 
108 |     OptStatmentList:      [[`StatmentList`,                 `$$ = $1`],
109 |                            [`ε`,                            `$$ = null`]],
110 | 
111 |     ExpressionStatement:  [[`Expression ;`,                 `$$ = $1`]],
112 | 
113 |     Expression:           [[`expressionBegin ExpressionNode expressionEnd`,
114 |                             `$$ = $2`]],
115 | 
116 |     // Special "activation productions". They activate needed lexer state,
117 |     // so the later can yield different token types for the same chars.
118 | 
119 |     expressionBegin:      [[`ε`,                            `yy.lexer.pushState('expression')`]],
120 |     expressionEnd:        [[`ε`,                            `yy.lexer.popState()`]],
121 | 
122 |     ExpressionNode:       [[`NumericLiteral`,               `$$ = $1`],
123 |                            [`ObjectLiteral`,                `$$ = $1`],
124 |                            [`( Expression )`,               `$$ = $2`]],
125 | 
126 |     NumericLiteral:       [[`NUMBER`,                       `$$ = $1`]],
127 | 
128 |     ObjectLiteral:        [[`%{ OptPropertyList }%`,        `$$ = {type: 'ObjectLiteral', properties: $2}`]],
129 | 
130 |     OptPropertyList:      [[`PropertyList`,                 `$$ = $1`],
131 |                            [`ε`,                            `$$ = null`]],
132 | 
133 |     PropertyList:         [[`Property`,                     `$$ = [$1]`],
134 |                            [`PropertyList , Property`,      `$$ = $1; $1.push($3)`]],
135 | 
136 |     Property:             [`NumericLiteral`,                `$$ = $1`],
137 |   }
138 | }


--------------------------------------------------------------------------------
/src/lr/__tests__/state-test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4 |  */
  5 | 
  6 | import Grammar from '../../grammar/grammar';
  7 | import LRItem from '../lr-item';
  8 | import SetsGenerator from '../../sets-generator';
  9 | import State from '../state';
 10 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode';
 11 | import CanonicalCollection from '../canonical-collection';
 12 | 
 13 | const grammar = Grammar.fromGrammarFile(
 14 |   __dirname + '/../../grammar/__tests__/calc.g',
 15 |   {
 16 |     mode: GRAMMAR_MODE.LALR1,
 17 |   }
 18 | );
 19 | 
 20 | function laSet(arraySet) {
 21 |   const set = {};
 22 |   arraySet.forEach(symbol => (set[symbol] = true));
 23 |   return set;
 24 | }
 25 | 
 26 | const canonicalCollection = new CanonicalCollection({grammar});
 27 | const setsGenerator = new SetsGenerator({grammar});
 28 | 
 29 | const defaultLookaheadSet = laSet(['$', '/', '-', '*', '+']);
 30 | 
 31 | // E -> E • + E
 32 | const kernelItem1 = new LRItem(
 33 |   /* production */ grammar.getProduction(1),
 34 |   /* dotPosition */ 1,
 35 |   grammar,
 36 |   canonicalCollection,
 37 |   setsGenerator,
 38 |   /* lookaheadSet */ defaultLookaheadSet
 39 | );
 40 | 
 41 | // E -> E • * E
 42 | const kernelItem2 = new LRItem(
 43 |   /* production */ grammar.getProduction(2),
 44 |   /* dotPosition */ 1,
 45 |   grammar,
 46 |   canonicalCollection,
 47 |   setsGenerator,
 48 |   /* lookaheadSet */ defaultLookaheadSet
 49 | );
 50 | 
 51 | const kernelItems = [kernelItem1, kernelItem2];
 52 | 
 53 | const state = new State(kernelItems, grammar, canonicalCollection);
 54 | 
 55 | const otherItem = new LRItem(
 56 |   /* production */ grammar.getProduction(3),
 57 |   /* dotPosition */ 1,
 58 |   grammar,
 59 |   canonicalCollection,
 60 |   setsGenerator,
 61 |   /* lookaheadSet */ defaultLookaheadSet
 62 | );
 63 | 
 64 | state.addItem(otherItem);
 65 | 
 66 | const items = kernelItems.concat(otherItem);
 67 | 
 68 | // $accept -> E •
 69 | const acceptItem = new LRItem(
 70 |   /* production */ grammar.getAugmentedProduction(),
 71 |   /* dotPosition */ 1,
 72 |   grammar,
 73 |   canonicalCollection,
 74 |   setsGenerator,
 75 |   /* lookaheadSet */ laSet(['$'])
 76 | );
 77 | 
 78 | const acceptItems = [acceptItem];
 79 | 
 80 | const acceptState = new State(
 81 |   /* kernelItems */ acceptItems,
 82 |   grammar,
 83 |   canonicalCollection
 84 | );
 85 | 
 86 | // E -> E + E •
 87 | const finalItem = new LRItem(
 88 |   /* production */ grammar.getProduction(2),
 89 |   /* dotPosition */ 3,
 90 |   grammar,
 91 |   canonicalCollection,
 92 |   setsGenerator,
 93 |   /* lookaheadSet */ defaultLookaheadSet
 94 | );
 95 | 
 96 | const finalState = new State(
 97 |   /* kernelItems */ [finalItem],
 98 |   grammar,
 99 |   canonicalCollection
100 | );
101 | 
102 | function toKeys(items) {
103 |   return items.map(item => item.getKey());
104 | }
105 | 
106 | describe('state', () => {
107 |   it('kernal items', () => {
108 |     expect(state.getKernelItems()).toBe(kernelItems);
109 |     expect(acceptState.getKernelItems()).toBe(acceptItems);
110 |   });
111 | 
112 |   it('items', () => {
113 |     expect(toKeys(state.getItems())).toEqual(toKeys(items));
114 |     expect(toKeys(acceptState.getItems())).toEqual(toKeys(acceptItems));
115 |   });
116 | 
117 |   it('is kernel item', () => {
118 |     expect(state.isKernelItem(kernelItem1)).toBe(true);
119 |     expect(state.isKernelItem(kernelItem2)).toBe(true);
120 | 
121 |     const otherItem = new LRItem(
122 |       /* production */ grammar.getProduction(2),
123 |       /* dotPosition */ 1,
124 |       grammar,
125 |       canonicalCollection,
126 |       setsGenerator,
127 |       /* lookaheadSet */ defaultLookaheadSet
128 |     );
129 | 
130 |     expect(state.isKernelItem(otherItem)).toBe(false);
131 |     expect(acceptState.isKernelItem(acceptItem)).toBe(true);
132 |   });
133 | 
134 |   it('number', () => {
135 |     expect(state.getNumber()).toBe(null);
136 |     expect(acceptState.getNumber()).toBe(null);
137 | 
138 |     state.setNumber(1);
139 |     acceptState.setNumber(2);
140 | 
141 |     expect(state.getNumber()).toBe(1);
142 |     expect(acceptState.getNumber()).toBe(2);
143 |   });
144 | 
145 |   it('is final', () => {
146 |     expect(finalState.isFinal()).toBe(true);
147 |     expect(state.isFinal()).toBe(false);
148 |     expect(acceptState.isFinal()).toBe(true);
149 |   });
150 | 
151 |   it('is accept', () => {
152 |     expect(finalState.isAccept()).toBe(false);
153 |     expect(state.isAccept()).toBe(false);
154 |     expect(acceptState.isAccept()).toBe(true);
155 |   });
156 | 
157 |   it('reduce items', () => {
158 |     expect(finalState.getReduceItems().length).toBe(1);
159 |     expect(finalState.getReduceItems()[0]).toBe(finalItem);
160 | 
161 |     expect(state.getReduceItems().length).toBe(0);
162 |     expect(acceptState.getReduceItems().length).toBe(0);
163 |   });
164 | });
165 | 


--------------------------------------------------------------------------------
/examples/parser-lexer-communication.php.g:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Change lexer state from parser. PHP version..
  3 |  *
  4 |  * Note: a tokenizer can be accessed in a semantic action as `yy::$lexer`,
  5 |  * or `yy::$tokenizer`.
  6 |  *
  7 |  * The grammar below solves the problem of parsing { } in statement position as
  8 |  * a "BlockStatement", and in the expression position as an "ObjectLiteral".
  9 |  *
 10 |  * Note: there are several other techniques for solving this: lookahead
 11 |  * restriction productions, or cover grammar.
 12 |  *
 13 |  * Example in the statement position:
 14 |  *
 15 |  *   ./bin/syntax -g examples/parser-lexer-communication.php.g -m lalr1 -o ~/Parser.php
 16 |  *
 17 |  *   Parser::parse('{ 1; 2; }');
 18 |  *
 19 |  *   array(2) {
 20 |  *     ["type"]=>
 21 |  *     string(7) "Program"
 22 |  *     ["body"]=>
 23 |  *     array(1) {
 24 |  *       [0]=>
 25 |  *       array(2) {
 26 |  *         ["type"]=>
 27 |  *         string(14) "BlockStatement"
 28 |  *         ["body"]=>
 29 |  *         array(2) {
 30 |  *           [0]=>
 31 |  *           string(1) "1"
 32 |  *           [1]=>
 33 |  *           string(1) "2"
 34 |  *         }
 35 |  *       }
 36 |  *     }
 37 |  *   }
 38 |  *
 39 |  * Exaple in the expression position:
 40 |  *
 41 |  *   ./bin/syntax -g examples/parser-lexer-communication.php.g -m lalr1 -o ~/Parser.php
 42 |  *
 43 |  *   Parser::parse('({ 1, 2 });');
 44 |  *
 45 |  *   array(2) {
 46 |  *     ["type"]=>
 47 |  *     string(7) "Program"
 48 |  *     ["body"]=>
 49 |  *     array(1) {
 50 |  *       [0]=>
 51 |  *       array(2) {
 52 |  *         ["type"]=>
 53 |  *         string(13) "ObjectLiteral"
 54 |  *         ["properties"]=>
 55 |  *         array(2) {
 56 |  *           [0]=>
 57 |  *           string(1) "1"
 58 |  *           [1]=>
 59 |  *           string(1) "2"
 60 |  *         }
 61 |  *       }
 62 |  *     }
 63 |  *   }
 64 |  */
 65 | 
 66 | {
 67 |   // --------------------------------------------------
 68 |   // Lexical grammar.
 69 | 
 70 |   lex: {
 71 | 
 72 |     // Lexer states.
 73 |     startConditions: {
 74 |       expression: 0,
 75 |     },
 76 | 
 77 |     rules: [
 78 |       [`\\s+`,                    `/* skip whitespace */`],
 79 | 
 80 |       // { and } in the expression position yield different token types:
 81 | 
 82 |       [['expression'], `\\{`,     `return '%{'`],
 83 |       [['expression'], `\\}`,     `return '}%'`],
 84 | 
 85 |       // { and } in the statement position yield default token types:
 86 | 
 87 |       [`\\{`,                     `return '{'`],
 88 |       [`\\}`,                     `return '}'`],
 89 | 
 90 |       [`\\d+`,                    `return 'NUMBER'`],
 91 | 
 92 |       [`;`,                       `return ';'`],
 93 |       [`,`,                       `return ','`],
 94 | 
 95 |       [`\\(`,                     `return '('`],
 96 |       [`\\)`,                     `return ')'`],
 97 |     ],
 98 |   },
 99 | 
100 |   // --------------------------------------------------
101 |   // Syntactic grammar.
102 | 
103 |   bnf: {
104 |     Program:              [[`StatmentList`,                 `$$ = array('type' => 'Program', 'body' => $1)`]],
105 | 
106 |     StatmentList:         [[`Statment`,                     `$$ = [$1]`],
107 |                            [`StatmentList Statment`,        `array_push($1, $2); $$ = $1;`]],
108 | 
109 |     Statment:             [[`BlockStatement`,               `$$ = $1`],
110 |                            [`ExpressionStatement`,          `$$ = $1`]],
111 | 
112 |     BlockStatement:       [[`{ OptStatmentList }`,          `$$ = array('type' => 'BlockStatement', 'body' => $2)`]],
113 | 
114 |     OptStatmentList:      [[`StatmentList`,                 `$$ = $1`],
115 |                            [`ε`,                            `$$ = null`]],
116 | 
117 |     ExpressionStatement:  [[`Expression ;`,                 `$$ = $1`]],
118 | 
119 |     Expression:           [[`expressionBegin ExpressionNode expressionEnd`,
120 |                             `$$ = $2`]],
121 | 
122 |     // Special "activation productions". They activate needed lexer state,
123 |     // so the later can yield different token types for the same chars.
124 | 
125 |     expressionBegin:      [[`ε`,                            `yy::$lexer->pushState('expression');`]],
126 |     expressionEnd:        [[`ε`,                            `yy::$lexer->popState();`]],
127 | 
128 |     ExpressionNode:       [[`NumericLiteral`,               `$$ = $1`],
129 |                            [`ObjectLiteral`,                `$$ = $1`],
130 |                            [`( Expression )`,               `$$ = $2`]],
131 | 
132 |     NumericLiteral:       [[`NUMBER`,                       `$$ = $1`]],
133 | 
134 |     ObjectLiteral:        [[`%{ OptPropertyList }%`,        `$$ = array('type' => 'ObjectLiteral', 'properties' => $2)`]],
135 | 
136 |     OptPropertyList:      [[`PropertyList`,                 `$$ = $1`],
137 |                            [`ε`,                            `$$ = null`]],
138 | 
139 |     PropertyList:         [[`Property`,                     `$$ = [$1]`],
140 |                            [`PropertyList , Property`,      `array_push($1, $3); $$ = $1;`]],
141 | 
142 |     Property:             [`NumericLiteral`,                `$$ = $1`],
143 |   }
144 | }


--------------------------------------------------------------------------------