├── .watchmanconfig ├── .eslintignore ├── .gitignore ├── examples ├── example1.slr1 ├── grammar.slr1 ├── grammar.lr0 ├── example.ll1 ├── auto-tokens.g ├── binary.g ├── calc.slr1 ├── calculator.g ├── and.hdl ├── calculator-assoc-conflict.g ├── test.letter ├── word-boundary.g ├── word-boundary.g.js ├── follow-follow-conflict.ll1 ├── first-follow-conflict.ll1 ├── calc.cpp.g ├── calc.jl.g ├── calc.py.g ├── calc.rb.g ├── calc.php.g ├── first-first-conflict.ll1 ├── explicit-eof.g ├── calc.cs.g ├── calc.example.g ├── calc-eval.bnf ├── case-insensitive-lex.g ├── balanced-parens.slr1 ├── s-expression.g ├── calc.ll1 ├── module-include.bnf ├── calculator-assoc.g ├── calc.rs.g ├── calc.java.g ├── calc.cpp.ast.g ├── calc-eval.g ├── lambda-calculus.g ├── calc-loc.jl.bnf ├── bnf.bnf ├── on-token.bnf ├── calc-loc.py.bnf ├── calc-loc.php.bnf ├── boolean.bnf ├── calc-loc.rb.bnf ├── calc-ast.rs.g ├── hdl.g ├── grammar.clr1 ├── module-include.rb.g ├── json.grammar.js ├── module-include.g.js ├── json.ast.js ├── s-expression.cpp.bnf ├── test.lang ├── calc-loc.cs.bnf ├── cnf.g ├── module-include.py.g ├── calc-ast-java.bnf ├── lexer-start-conditions.py.g ├── module-include.cs.g ├── lexer-start-conditions.rb.g ├── module-include.php.g ├── lexer-start-conditions.g.js ├── calc-loc.bnf ├── indent.g ├── lang.lex ├── parser-lexer-communication.g └── parser-lexer-communication.php.g ├── src ├── __tests__ │ ├── rust-calc │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── calc-syntax │ │ │ ├── build.rs │ │ │ ├── Cargo.toml │ │ │ └── Makefile │ │ ├── calc-bin │ │ │ ├── Cargo.toml │ │ │ └── src │ │ │ │ └── main.rs │ │ └── Cargo.lock │ ├── rust-plugin-test.js │ └── code-unit-test.js ├── ll │ ├── __tests__ │ │ ├── grammar1.bnf │ │ └── ll-parsing-table-test.js │ └── ll-parser-generator-default.js ├── special-symbols.js ├── grammar │ ├── __tests__ │ │ ├── calc.bnf │ │ ├── calc.lex │ │ ├── calc.g │ │ ├── grammar-mode-test.js │ │ ├── grammar-symbol-test.js │ │ └── lex-grammar-test.js │ ├── grammar-mode.js │ └── grammar-symbol.js ├── table-printer.js ├── debug.js ├── syntax.js ├── plugins │ ├── python │ │ ├── ll │ │ │ └── ll-parser-generator-py.js │ │ ├── lr │ │ │ └── lr-parser-generator-py.js │ │ └── templates │ │ │ └── ll.template.py │ ├── php │ │ ├── ll │ │ │ └── ll-parser-generator-php.js │ │ ├── lr │ │ │ └── lr-parser-generator-php.js │ │ └── templates │ │ │ └── ll.template.php │ ├── ruby │ │ ├── ll │ │ │ └── ll-parser-generator-ruby.js │ │ ├── lr │ │ │ └── lr-parser-generator-ruby.js │ │ └── templates │ │ │ └── ll.template.rb │ ├── csharp │ │ └── lr │ │ │ └── lr-parser-generator-csharp.js │ ├── java │ │ └── lr │ │ │ └── lr-parser-generator-java.js │ ├── julia │ │ └── lr │ │ │ └── lr-parser-generator-julia.js │ ├── cpp │ │ └── lr │ │ │ └── lr-parser-generator-cpp.js │ ├── rust │ │ └── lr │ │ │ └── lr-parser-generator-rust.js │ └── example │ │ ├── ll │ │ └── ll-parser-generator-example.js │ │ └── lr │ │ └── lr-parser-generator-example.js ├── lr │ ├── __tests__ │ │ ├── lr-parser-generator-test.js │ │ ├── lr-parsing-table-test.js │ │ └── state-test.js │ └── lr-parser-generator-default.js └── templates │ └── ll.template.js ├── .prettierignore ├── bin └── syntax ├── scripts ├── git-pre-push ├── git-pre-commit └── build.js ├── .npmignore ├── .prettierrc ├── index.js ├── .babelrc ├── .travis.yml ├── .eslintrc.json ├── LICENSE └── package.json /.watchmanconfig: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | src/generated/ 2 | src/templates/ 3 | src/plugins/ 4 | dist/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist 2 | node_modules/ 3 | .npm-debug.log 4 | npm-debug.log -------------------------------------------------------------------------------- /examples/example1.slr1: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | E -> "1" E 4 | | "1" 5 | ; 6 | -------------------------------------------------------------------------------- /examples/grammar.slr1: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | S -> S "a" 4 | | "b" 5 | ; 6 | -------------------------------------------------------------------------------- /src/__tests__/rust-calc/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /calc-syntax/src/lib.rs -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | src/generated/ 2 | src/templates/ 3 | src/plugins/ 4 | dist/ -------------------------------------------------------------------------------- /examples/grammar.lr0: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | S -> A A; 4 | 5 | A -> "a" A 6 | | "b" 7 | ; -------------------------------------------------------------------------------- /bin/syntax: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | require('../dist/bin/syntax')(); -------------------------------------------------------------------------------- /scripts/git-pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Pre-commit validaitons: 4 | 5 | npm test 6 | 7 | npm run eslint -------------------------------------------------------------------------------- /examples/example.ll1: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | S 4 | : F 5 | | '(' S '+' F ')' 6 | ; 7 | 8 | F 9 | : 'id' 10 | ; -------------------------------------------------------------------------------- /src/__tests__/rust-calc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "calc-bin", 5 | "calc-syntax", 6 | ] 7 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | /examples/ 2 | /scripts/ 3 | /src/ 4 | .gitignore 5 | .eslintrc 6 | .babelrc 7 | .travis.yml 8 | .module-cache 9 | __tests__ -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "semi": true, 4 | "useTabs": false, 5 | "tabWidth": 2, 6 | "trailingComma": "es5", 7 | "bracketSpacing": false 8 | } -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | module.exports = require('./dist/syntax'); -------------------------------------------------------------------------------- /src/ll/__tests__/grammar1.bnf: -------------------------------------------------------------------------------- 1 | // https://github.com/DmitrySoshnikov/syntax/issues/151 2 | 3 | %% 4 | 5 | S 6 | : A 7 | ; 8 | A 9 | : 'a' 10 | | /* empty */ 11 | ; 12 | -------------------------------------------------------------------------------- /src/__tests__/rust-calc/calc-syntax/build.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | 3 | fn main() { 4 | Command::new("make") 5 | .status() 6 | .unwrap(); 7 | println!("act-file parser lib successfully generated"); 8 | } -------------------------------------------------------------------------------- /src/__tests__/rust-calc/calc-syntax/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "calc-syntax" 3 | version = "0.1.0" 4 | authors = ["Andrey Rublev "] 5 | edition = "2018" 6 | build = "build.rs" 7 | 8 | [dependencies] 9 | onig = "4" 10 | lazy_static = "1" -------------------------------------------------------------------------------- /examples/auto-tokens.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Tokens `PLUS` and `ZERO` automatically infered. 3 | * 4 | * ./bin/syntax --grammar examples/auto-tokens.g --mode slr1 --table 5 | */ 6 | 7 | %% 8 | 9 | E -> E PLUS T 10 | | T 11 | ; 12 | 13 | T -> ZERO 14 | ; -------------------------------------------------------------------------------- /examples/binary.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Binary numbers. 3 | * 4 | * Example: 5 | * 6 | * ./bin/syntax -g examples/binary.g -p '101001101' -t -m slr1 7 | */ 8 | 9 | %% 10 | 11 | N -> L; 12 | 13 | L -> L B 14 | | B 15 | ; 16 | 17 | B -> '1' 18 | | '0' 19 | ; -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "@babel/preset-env", 5 | { 6 | "targets": { 7 | "node": "0.12" 8 | } 9 | } 10 | ], 11 | ], 12 | "plugins": [ 13 | "@babel/plugin-transform-object-rest-spread" 14 | ] 15 | } -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: node_js 3 | node_js: 4 | - "8.9.4" 5 | - "10.15.3" 6 | before_install: 7 | - curl https://sh.rustup.rs -sSf | sh -s -- -y 8 | - source $HOME/.cargo/env 9 | cache: 10 | cargo: true 11 | directories: 12 | - node_modules -------------------------------------------------------------------------------- /src/__tests__/rust-calc/calc-bin/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "calc-bin" 3 | version = "0.1.0" 4 | authors = ["Andrey Rublev ", "DmitrySoshnikov "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | calc-syntax = { path = "../calc-syntax" } -------------------------------------------------------------------------------- /examples/calc.slr1: -------------------------------------------------------------------------------- 1 | /** 2 | * An LR(1) grammar with precedence, and assocs. 3 | * 4 | * ./bin/syntax -g examples/calc.slr1 --table --parse 'id + id * id' -w 5 | * 6 | */ 7 | 8 | %left '+' '-' 9 | %left '*' '/' 10 | 11 | %% 12 | 13 | E 14 | : E '+' E 15 | | E '*' E 16 | | 'id' 17 | | '(' E ')' 18 | ; 19 | -------------------------------------------------------------------------------- /src/__tests__/rust-calc/calc-bin/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate calc_syntax; 2 | 3 | use calc_syntax::Parser; 4 | 5 | fn main() { 6 | let mut parser = Parser::new(); 7 | 8 | let parse_string = String::from("2 + 2 * 2"); 9 | let result = parser.parse(&parse_string); 10 | 11 | println!("parse result: {}", result); 12 | } 13 | -------------------------------------------------------------------------------- /src/special-symbols.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | /** 7 | * Special "empty" symbol, Epsilon. 8 | */ 9 | export const EPSILON = 'ε'; 10 | 11 | /** 12 | * End of input, and bottom of the stack, "Dollar". 13 | */ 14 | export const EOF = '$'; 15 | -------------------------------------------------------------------------------- /examples/calculator.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Example: 3 | * 4 | * ./bin/syntax \ 5 | * --grammar examples/calculator.g \ 6 | * --mode slr1 7 | * --parse '(id + id) * id' 8 | * --ignore-whitespaces 9 | */ 10 | 11 | %% 12 | 13 | E -> E '+' T 14 | | T 15 | ; 16 | 17 | T -> T '*' F 18 | | F 19 | ; 20 | 21 | F -> 'id' 22 | | '(' E ')' 23 | ; -------------------------------------------------------------------------------- /src/grammar/__tests__/calc.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * An LR(1) grammar with precedence, and assocs. 3 | */ 4 | 5 | %{ 6 | (() => 'module include code')(); 7 | %} 8 | 9 | %left '+' '-' 10 | %left '*' '/' 11 | 12 | %% 13 | 14 | E 15 | : E '+' E { $$ = ['+', $1, $2] } 16 | | E '*' E { $$ = ['*', $1, $2] } 17 | | 'id' { $$ = $1 } 18 | | '(' E ')' { $$ = $2 } 19 | ; 20 | -------------------------------------------------------------------------------- /scripts/git-pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Prettier 4 | 5 | jsfiles=$(git diff HEAD --name-only --diff-filter=ACM "*.js" | tr '\n' ' ') 6 | 7 | if [ ! -z "$jsfiles" ] 8 | then 9 | # Prettify all staged .js files 10 | echo "$jsfiles" | xargs ./node_modules/.bin/prettier --write 11 | 12 | # Add back the modified/prettified files to staging 13 | echo "$jsfiles" | xargs git add 14 | fi 15 | 16 | # Pre-commit validaitons: 17 | 18 | npm test 19 | 20 | npm run eslint -------------------------------------------------------------------------------- /src/__tests__/rust-calc/calc-syntax/Makefile: -------------------------------------------------------------------------------- 1 | rust_plugin_sources := $(wildcard ../../../plugins/rust/*.js) \ 2 | $(wildcard ../../../plugins/rust/lr/*.js) \ 3 | $(wildcard ../../../plugins/rust/templates/*.rs) 4 | 5 | src/lib.rs: ../../../../examples/calc.rs.g ../../../../dist/bin/syntax.js $(rust_plugin_sources) 6 | mkdir -p $(@D) 7 | ../../../../bin/syntax -g $< -m LALR1 -o $@ 8 | 9 | ../../../../dist/bin/syntax.js: $(rust_plugin_sources) 10 | npm run build -------------------------------------------------------------------------------- /examples/and.hdl: -------------------------------------------------------------------------------- 1 | // This file is part of www.nand2tetris.org 2 | // and the book "The Elements of Computing Systems" 3 | // by Nisan and Schocken, MIT Press. 4 | // File name: projects/01/And.hdl 5 | 6 | /** 7 | * And gate: 8 | * out = 1 if (a == 1 and b == 1) 9 | * 0 otherwise 10 | */ 11 | 12 | CHIP And { 13 | IN a, b; 14 | OUT out; 15 | 16 | PARTS: 17 | // Put your code here: 18 | Nand(a=a, b=b, out=n); 19 | Nand(a=n, b=n, out=out); 20 | } 21 | -------------------------------------------------------------------------------- /examples/calculator-assoc-conflict.g: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | /** 4 | * This grammar has "shift-reduce" conflicts. See how to resolve them using 5 | * operators precedence in the `./examples/calculator-assoc.g`. 6 | * 7 | * Also automatic conflicts resolution is possible (see `--resolve-conflicts` 8 | * flag), however it may not always help, and a more correct way is to specify 9 | * precedence and associativity, or to rewrite grammar. 10 | */ 11 | 12 | E 13 | : E '+' E 14 | | E '*' E 15 | | 'id' 16 | ; 17 | -------------------------------------------------------------------------------- /examples/test.letter: -------------------------------------------------------------------------------- 1 | 2 | class Point { 3 | def constructor(x, y) { 4 | this.x = x; 5 | this.y = y; 6 | } 7 | 8 | def getX() { 9 | return this.x; 10 | } 11 | 12 | def getY() { 13 | return this.y; 14 | } 15 | } 16 | 17 | class Point3D extends Point { 18 | def constructor(x, y, z) { 19 | super(x, y); 20 | this.z = z; 21 | } 22 | 23 | def getZ() { 24 | return this.z; 25 | } 26 | } 27 | 28 | let p = new Point3D(10, 20, 30); -------------------------------------------------------------------------------- /examples/word-boundary.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Word boundary example: `if` keyword vs. `ifi` identifier. 3 | * 4 | * ./bin/syntax -g examples/word-boundary.g -m lalr1 -p 'if' 5 | * > id-keyword 6 | * 7 | * ./bin/syntax -g examples/word-boundary.g -m lalr1 -p 'ifi' 8 | * > identifier 9 | */ 10 | 11 | %lex 12 | 13 | %% 14 | 15 | 'if'\b return 'IF' 16 | \w+ return 'ID' 17 | 18 | /lex 19 | 20 | %% 21 | 22 | Program 23 | : IF { $$ = 'if-keyword' } 24 | | ID { $$ = 'identifier' } 25 | ; 26 | -------------------------------------------------------------------------------- /src/table-printer.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import Table from 'cli-table3'; 7 | 8 | /** 9 | * Wrapper class over `cli-table3` with default options preset. 10 | */ 11 | export default class TablePrinter { 12 | constructor(options) { 13 | return new Table( 14 | Object.assign({}, options, { 15 | style: { 16 | head: ['blue'], 17 | border: ['gray'], 18 | }, 19 | }) 20 | ); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/word-boundary.g.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Word boundary example: `if` keyword vs. `ifi` identifier. 3 | * 4 | * ./bin/syntax -g examples/word-boundary.g.js -m lalr1 -p 'if' 5 | * > id-keyword 6 | * 7 | * ./bin/syntax -g examples/word-boundary.g.js -m lalr1 -p 'ifi' 8 | * > identifier 9 | */ 10 | 11 | { 12 | lex: { 13 | rules: [ 14 | ["if\\b", "return 'IF'"], 15 | ["\\w+", "return 'ID'"] 16 | ] 17 | }, 18 | 19 | "bnf": { 20 | "Program": [["IF", " $$ = 'if-keyword' "], 21 | ["ID", " $$ = 'identifier' "]], 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /examples/follow-follow-conflict.ll1: -------------------------------------------------------------------------------- 1 | /** 2 | * LL(1): FOLLOW/FOLLOW conflict (rare case) 3 | * 4 | * Since `a` symbol is both in the FOLLOW of `B` and `C`, on `(A, a)`, 5 | * there will be a conflict between `A → B` and `A → C`. 6 | * 7 | * ┌───┬─────┬───┐ 8 | * │ │ 'a' │ $ │ 9 | * ├───┼─────┼───┤ 10 | * │ S │ 1 │ │ 11 | * ├───┼─────┼───┤ 12 | * │ A │ 2/3 │ │ 13 | * ├───┼─────┼───┤ 14 | * │ B │ 4 │ │ 15 | * ├───┼─────┼───┤ 16 | * │ C │ 5 │ │ 17 | * └───┴─────┴───┘ 18 | */ 19 | 20 | %% 21 | 22 | S: 23 | A 'a' 24 | ; 25 | 26 | A 27 | : B 28 | | C 29 | ; 30 | 31 | B 32 | : /* ε */ 33 | ; 34 | 35 | C 36 | : /* ε */ 37 | ; 38 | -------------------------------------------------------------------------------- /examples/first-follow-conflict.ll1: -------------------------------------------------------------------------------- 1 | /** 2 | * LL(1): FIRST/FOLLOW conflict 3 | * 4 | * The FIRST and FOLLOW set of a grammar rule overlap. With an empty string (ε) 5 | * in the FIRST set it is unknown which alternative to select. 6 | * 7 | * The FIRST set of A now is {'a', ε} and the FOLLOW set {'a'}. 8 | * 9 | * ./bin/syntax -g examples/first-follow-conflict.ll1 -t 10 | * 11 | * ┌───┬─────┬─────┬───┐ 12 | * │ │ 'a' │ 'b' │ $ │ 13 | * ├───┼─────┼─────┼───┤ 14 | * │ S │ 1 │ │ │ 15 | * ├───┼─────┼─────┼───┤ 16 | * │ A │ 2/3 │ │ │ 17 | * └───┴─────┴─────┴───┘ 18 | */ 19 | 20 | %% 21 | 22 | S 23 | : A 'a' 'b' 24 | ; 25 | 26 | A 27 | : 'a' 28 | | /* epsilon */ 29 | ; -------------------------------------------------------------------------------- /examples/calc.cpp.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in C++. 3 | * 4 | * ./bin/syntax -g examples/calc.cpp.g -m lalr1 -o CalcParser.h 5 | * 6 | * #include "CalcParser.h" 7 | * 8 | * CalcParser parser; 9 | * 10 | * std::cout << parser.parse("2 + 2 * 2"); // 6 11 | */ 12 | 13 | %lex 14 | 15 | %% 16 | 17 | \s+ %empty 18 | 19 | \d+ NUMBER 20 | 21 | /lex 22 | 23 | %{ 24 | 25 | // Type of the parsing value. Can either 26 | // be a type alias or an actual struct: 27 | 28 | using Value = int; 29 | 30 | %} 31 | 32 | %left '+' 33 | %left '*' 34 | 35 | %% 36 | 37 | E 38 | : E '+' E { $$ = $1 + $3 } 39 | | E '*' E { $$ = $1 * $3 } 40 | | '(' E ')' { $$ = $2 } 41 | | NUMBER { $$ = std::stoi($1) } 42 | ; -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "es6": true, 4 | "node": true, 5 | "jest": true 6 | }, 7 | "extends": "eslint:recommended", 8 | "parserOptions": { 9 | "sourceType": "module" 10 | }, 11 | "rules": { 12 | "indent": 0, 13 | "linebreak-style": [ 14 | "error", 15 | "unix" 16 | ], 17 | "quotes": [ 18 | "error", 19 | "single", 20 | { 21 | "allowTemplateLiterals": true, 22 | "avoidEscape": true 23 | } 24 | ], 25 | "semi": [ 26 | "error", 27 | "always" 28 | ], 29 | "no-useless-escape": 0, 30 | "no-prototype-builtins": 0, 31 | "no-console": ["error", { "allow": ["warn", "error", "info", "timeEnd", "time"] }] 32 | } 33 | } -------------------------------------------------------------------------------- /examples/calc.jl.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Julia language 3 | * 4 | * ./bin/syntax -g examples/calc.jl.g -m lalr1 -o CalcParser.jl 5 | * 6 | */ 7 | 8 | { 9 | "lex": { 10 | "rules": [ 11 | ["\\s+", '# skip whitespace'], 12 | ["\\d+", 'return "NUMBER"'], 13 | ["\\*", 'return "*"'], 14 | ["\\+", 'return "+"'], 15 | ["\\(", 'return "("'], 16 | ["\\)", 'return ")"'], 17 | ] 18 | }, 19 | 20 | "operators": [ 21 | ["left", "+"], 22 | ["left", "*"], 23 | ], 24 | 25 | "bnf": { 26 | "E": [ 27 | ["E + E", "$$ = $1 + $3"], 28 | ["E * E", "$$ = $1 * $3"], 29 | ["NUMBER", "$$ = tryparse(Int, $1)"], 30 | ["( E )", "$$ = $2"], 31 | ], 32 | }, 33 | } -------------------------------------------------------------------------------- /examples/calc.py.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Python. 3 | * 4 | * ./bin/syntax -g examples/calc.py.g -m lalr1 -o calcparser.py 5 | * 6 | * >>> import calcparser 7 | * >>> calcparser.parse('2 + 2 * 2') 8 | * >>> 6 9 | */ 10 | 11 | { 12 | "lex": { 13 | "rules": [ 14 | ["\\s+", "# skip whitespace"], 15 | ["\\d+", "return 'NUMBER'"], 16 | ["\\*", "return '*'"], 17 | ["\\+", "return '+'"], 18 | ["\\(", "return '('"], 19 | ["\\)", "return ')'"], 20 | ] 21 | }, 22 | 23 | "operators": [ 24 | ["left", "+"], 25 | ["left", "*"], 26 | ], 27 | 28 | "bnf": { 29 | "E": [ 30 | ["E + E", "$$ = $1 + $3"], 31 | ["E * E", "$$ = $1 * $3"], 32 | ["NUMBER", "$$ = int($1)"], 33 | ["( E )", "$$ = $2"], 34 | ], 35 | }, 36 | } -------------------------------------------------------------------------------- /examples/calc.rb.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Ruby. 3 | * 4 | * ./bin/syntax -g examples/calc.rb.g -m lalr1 -o CalcParser.rb 5 | * 6 | * require('CalcParser.rb') 7 | * 8 | * puts CalcParser.parse('2 + 2 * 2') # 6 9 | */ 10 | 11 | { 12 | "lex": { 13 | "rules": [ 14 | ["\\s+", "# skip whitespace"], 15 | ["\\d+", "return 'NUMBER'"], 16 | ["\\*", "return '*'"], 17 | ["\\+", "return '+'"], 18 | ["\\(", "return '('"], 19 | ["\\)", "return ')'"], 20 | ] 21 | }, 22 | 23 | "operators": [ 24 | ["left", "+"], 25 | ["left", "*"], 26 | ], 27 | 28 | "bnf": { 29 | "E": [ 30 | ["E + E", "$$ = $1 + $3"], 31 | ["E * E", "$$ = $1 * $3"], 32 | ["NUMBER", "$$ = $1.to_i"], 33 | ["( E )", "$$ = $2"], 34 | ], 35 | }, 36 | } -------------------------------------------------------------------------------- /src/debug.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import colors from 'colors'; 7 | 8 | function emptyFn() {} 9 | 10 | /** 11 | * Debug module. 12 | */ 13 | const Debug = { 14 | isEnabled() { 15 | return !!global.SYNTAX_DEBUG; 16 | }, 17 | 18 | string(message) { 19 | return `${colors.bold('[DEBUG]')} ${message}`; 20 | }, 21 | 22 | log(message) { 23 | console.info(Debug.string(message)); 24 | }, 25 | 26 | time(label) { 27 | console.time(this.string(label)); 28 | }, 29 | 30 | timeEnd(label) { 31 | console.timeEnd(this.string(label)); 32 | }, 33 | }; 34 | 35 | if (!global.SYNTAX_DEBUG) { 36 | Object.keys(Debug).forEach(method => Debug[method] = emptyFn); 37 | } 38 | 39 | export default Debug; 40 | -------------------------------------------------------------------------------- /examples/calc.php.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in PHP. 3 | * 4 | * ./bin/syntax -g examples/calc.php.g -m lalr1 -o CalcParser.php 5 | * 6 | * > handling. 3 | * 4 | * By default Syntax handles end of file with special EOF token, which is 5 | * 6 | * {type: '$', value: ''} 7 | * 8 | * However, a grammar may want to handle EOF explicitly in case it uses 9 | * EOF explicitly in some rules. In this case a lexical rule should match 10 | * special `<>` regexp, which corresponds to the empty string 11 | * at the end of the parsing string, i.e. `/^$/`. The type of the token 12 | * can be returned any in this case. 13 | * 14 | * ./bin/syntax -g examples/explicit-eof.g -m slr1 -p '10' 15 | */ 16 | 17 | { 18 | lex: { 19 | rules: [ 20 | [`\\d+`, `return "NUMBER"`], 21 | [`<>`, `return "EOF"`], 22 | ], 23 | }, 24 | 25 | bnf: { 26 | // The whole string consists only of one number (followed by EOF). 27 | Main: [[`NUMBER EOF`, `$$ = $1`]], 28 | }, 29 | } -------------------------------------------------------------------------------- /src/grammar/__tests__/calc.lex: -------------------------------------------------------------------------------- 1 | /** 2 | * An testing lexical grammar. 3 | */ 4 | 5 | module.exports = { 6 | "macros": { 7 | "id": "[a-zA-Z0-9_]", 8 | }, 9 | 10 | "startConditions": { 11 | "comment": 1, // exclusive 12 | }, 13 | 14 | "rules": [ 15 | [["*"], "\\s+", "/*skip whitespace*/"], 16 | 17 | [["*"], "<>", "return 'EOF'"], 18 | 19 | ["\\d+", "return 'NUMBER'"], 20 | ["{id}+", "return 'IDENTIFIER'"], 21 | ["\\(", "return '('"], 22 | ["\\)", "return ')'"], 23 | ["\\+", "return '+'"], 24 | ["\\*", "return '*'"], 25 | 26 | ["\\/\\*", "this.pushState('comment');"], 27 | [["comment"], "\\*+\\/", "this.popState();"], 28 | [["comment"], "\\d+", "return 'NUMBER_IN_COMMENT'"], 29 | [["comment"], "{id}+", "return 'IDENTIFIER_IN_COMMENT'"], 30 | ], 31 | 32 | "options": { 33 | "case-insensitive": true, 34 | }, 35 | }; -------------------------------------------------------------------------------- /examples/calc.cs.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in C#. 3 | * 4 | * ./bin/syntax -g examples/calc.cs.g -m lalr1 -o CalcParser.cs 5 | * 6 | * using SyntaxParser; 7 | * 8 | * var parser = new CalcParser(); 9 | * 10 | * Console.WriteLine(parser.parse("2 + 2 * 2")); // 6 11 | * Console.WriteLine(parser.parse("(2 + 2) * 2")); // 8 12 | */ 13 | 14 | { 15 | "lex": { 16 | "rules": [ 17 | ["\\s+", '/* skip whitespace */ return null'], 18 | ["\\d+", 'return "NUMBER"'], 19 | ["\\*", 'return "*"'], 20 | ["\\+", 'return "+"'], 21 | ["\\(", 'return "("'], 22 | ["\\)", 'return ")"'], 23 | ] 24 | }, 25 | 26 | "operators": [ 27 | ["left", "+"], 28 | ["left", "*"], 29 | ], 30 | 31 | "bnf": { 32 | "E": [ 33 | ["E + E", "$$ = $1 + $3"], 34 | ["E * E", "$$ = $1 * $3"], 35 | ["NUMBER", "$$ = Convert.ToInt32($1)"], 36 | ["( E )", "$$ = $2"], 37 | ], 38 | }, 39 | } -------------------------------------------------------------------------------- /examples/calc.example.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Example language (actual JS, used in plugins example). 3 | * 4 | * ./bin/syntax -g examples/calc.example.g -m lalr1 -o CalcParser.example 5 | * 6 | * const CalcParser = require('CalcParser.example'); 7 | * 8 | * const parser = new CalcParser(); 9 | * console.log(parser.parse("2 + 2 * 2")); // 6 10 | */ 11 | 12 | { 13 | "lex": { 14 | "rules": [ 15 | ["\\s+", '/* skip whitespace */'], 16 | ["\\d+", 'return "NUMBER"'], 17 | ["\\*", 'return "*"'], 18 | ["\\+", 'return "+"'], 19 | ["\\(", 'return "("'], 20 | ["\\)", 'return ")"'], 21 | ] 22 | }, 23 | 24 | "operators": [ 25 | ["left", "+"], 26 | ["left", "*"], 27 | ], 28 | 29 | "bnf": { 30 | "E": [ 31 | ["E + E", "$$ = $1 + $3"], 32 | ["E * E", "$$ = $1 * $3"], 33 | ["NUMBER", "$$ = Number($1)"], 34 | ["( E )", "$$ = $2"], 35 | ], 36 | }, 37 | } -------------------------------------------------------------------------------- /examples/calc-eval.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Precedence and assoc in Yacc format. 3 | * 4 | * See also in JSON grammar: 5 | * 6 | * https://github.com/DmitrySoshnikov/syntax/blob/master/examples/calc-eval.g 7 | * 8 | * To run this grammar: 9 | * 10 | * ./bin/syntax -g examples/calc-eval.bnf -w -m slr1 -p '5 + 5 * 5' 11 | * 12 | * > 30 13 | * 14 | * ./bin/syntax -g examples/calc-eval.bnf -w -m slr1 -p '5 + 5 * -5' 15 | * 16 | * > -20 17 | */ 18 | 19 | %lex 20 | 21 | %% 22 | 23 | \s+ /* skip whitespace */ 24 | \d+ return 'NUMBER' 25 | 26 | /lex 27 | 28 | %left '+' '-' 29 | %left '*' '/' 30 | %right '^' 31 | %left UMINUS 32 | 33 | %% 34 | 35 | E 36 | : E '+' E { $$ = $1 + $3 } 37 | | E '-' E { $$ = $1 - $3 } 38 | | E '*' E { $$ = $1 * $3 } 39 | | E '/' E { $$ = $1 / $3 } 40 | | E '^' E { $$ = Math.pow($1, $3) } 41 | | '-' E %prec UMINUS { $$ = -$2 } 42 | | '(' E ')' { $$ = $2 } 43 | | NUMBER { $$ = Number($1) } 44 | ; -------------------------------------------------------------------------------- /examples/case-insensitive-lex.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Case-insensitive lexical rules. 3 | * 4 | * Examples (accepted): 5 | * 6 | * ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'x' 7 | * ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'X' 8 | * ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'y' 9 | * 10 | * ✓ Accepted 11 | * 12 | * 13 | * Example (fail, "Y" is not case-insensitive): 14 | * 15 | * ./bin/syntax -g examples/case-insensitive-lex.g -m slr1 -p 'Y' 16 | * 17 | * Rejected: Unexpected token: "Y" at 1:0. 18 | */ 19 | { 20 | "lex": { 21 | "rules": [ 22 | 23 | // This rule is by default case-insensitive: 24 | 25 | [`x`, `return "X"`], 26 | 27 | // This rule overrides global options: 28 | 29 | [`y`, `return "Y"`, {"case-insensitive": false}], 30 | ], 31 | 32 | // Global options for the whole lexical grammar. 33 | 34 | "options": { 35 | "case-insensitive": true, 36 | } 37 | }, 38 | 39 | "bnf": { 40 | "E": ["X", "Y"], 41 | } 42 | } -------------------------------------------------------------------------------- /examples/balanced-parens.slr1: -------------------------------------------------------------------------------- 1 | /** 2 | * How to run: 3 | * 4 | * ./bin/syntax \ 5 | * --grammar examples/balanced-parens.slr1 \ 6 | * --table \ 7 | * --parse '((()))' 8 | * 9 | * Result: 10 | * 11 | * Parsing mode: SLR1. 12 | * 13 | * Grammar: 14 | * 15 | * 0. S' -> S 16 | * ----------- 17 | * 1. S -> '(' S ')' 18 | * 2. | ε 19 | * 20 | * SLR(1) parsing table: 21 | * 22 | * ┌───┬─────┬─────┬─────┬───┐ 23 | * │ │ '(' │ ')' │ $ │ S │ 24 | * ├───┼─────┼─────┼─────┼───┤ 25 | * │ 0 │ s1 │ r2 │ r2 │ 3 │ 26 | * ├───┼─────┼─────┼─────┼───┤ 27 | * │ 1 │ s1 │ r2 │ r2 │ 2 │ 28 | * ├───┼─────┼─────┼─────┼───┤ 29 | * │ 2 │ │ s4 │ │ │ 30 | * ├───┼─────┼─────┼─────┼───┤ 31 | * │ 3 │ │ │ acc │ │ 32 | * ├───┼─────┼─────┼─────┼───┤ 33 | * │ 4 │ │ r1 │ r1 │ │ 34 | * └───┴─────┴─────┴─────┴───┘ 35 | * 36 | * Parsing: ((())) 37 | * 38 | * Accepted. 39 | */ 40 | 41 | %% 42 | 43 | S -> '(' S ')' 44 | | /* epsilon */ 45 | ; -------------------------------------------------------------------------------- /examples/s-expression.g: -------------------------------------------------------------------------------- 1 | /** 2 | * How to run: 3 | * 4 | * ./bin/syntax \ 5 | * --grammar examples/s-expression.g \ 6 | * --mode slr1 \ 7 | * --parse '(+ 1 15)' 8 | * 9 | * > ['+', 1, 15] 10 | * 11 | * See also recursive descent version for this grammar: 12 | * https://gist.github.com/DmitrySoshnikov/2a434dda67019a4a7c37 13 | */ 14 | 15 | { 16 | "lex": { 17 | "rules": [ 18 | ["\\s+", "/* skip whitespace */"], 19 | ["\\d+", "return 'NUMBER';"], 20 | ["[a-zA-Z\\-\\+\\*\\?\\=/]+\\d*", "return 'SYMBOL';"], 21 | ["\\(", "return '(';"], 22 | ["\\)", "return ')';"], 23 | ] 24 | }, 25 | 26 | "bnf": { 27 | "s-exp": [["atom", "return $$ = $1;"], 28 | ["list", "return $$ = $1;"]], 29 | 30 | "list": [["( list-entries )", "$$ = $2;"]], 31 | 32 | "list-entries": [["s-exp list-entries", "$2.unshift($1); $$ = $2;"], 33 | ["ε", "$$ = [];"]], 34 | 35 | "atom": [["NUMBER", "$$ = Number(yytext);"], 36 | ["SYMBOL", "$$ = yytext;"]] 37 | } 38 | } -------------------------------------------------------------------------------- /src/grammar/__tests__/calc.g: -------------------------------------------------------------------------------- 1 | /** 2 | * An LR(1) grammar with precedence, and assocs, in JS format. 3 | */ 4 | 5 | { 6 | "lex": { 7 | "startConditions": { 8 | "comment": 1, // exclusive 9 | }, 10 | 11 | "rules": [ 12 | [["*"], "\\s+", "/*skip whitespace*/"], 13 | ["\\d+", "return 'NUMBER'"], 14 | ["\\(", "return '('"], 15 | ["\\)", "return ')'"], 16 | ["\\+", "return '+'"], 17 | ["\\*", "return '*'"], 18 | 19 | ["\\/\\*", "this.pushState('comment');"], 20 | [["comment"], "\\*+\\/", "this.popState();"], 21 | [["comment"], "\\d+", "return 'NUMBER_IN_COMMENT'"], 22 | ], 23 | }, 24 | 25 | "operators": [ 26 | ["left", "+", "-"], 27 | ["left", "*", "/"], 28 | ], 29 | 30 | "bnf": { 31 | "E": [["E + E", "$$ = $1 + $3"], 32 | ["E * E", "$$ = $1 * $3"], 33 | ["E - E", "$$ = $1 - $3"], 34 | ["E / E", "$$ = $1 / $3"], 35 | ["NUMBER", "$$ = Number($1)"], 36 | ["( E )", "$$ = $2"]], 37 | }, 38 | 39 | "moduleInclude": ` 40 | (() => "module include code")(); 41 | ` 42 | } -------------------------------------------------------------------------------- /examples/calc.ll1: -------------------------------------------------------------------------------- 1 | /** 2 | * Left-factored LL(1) calculator grammar. 3 | * 4 | * ./bin/syntax -g examples/calc.ll1 --table --parse 'id + id * id' -w 5 | * 6 | * Corresponding parsing table: 7 | * 8 | * ┌────┬─────┬─────┬──────┬─────┬─────┬───┐ 9 | * │ │ "+" │ "*" │ "id" │ "(" │ ")" │ $ │ 10 | * ├────┼─────┼─────┼──────┼─────┼─────┼───┤ 11 | * │ E │ │ │ 1 │ 1 │ │ │ 12 | * ├────┼─────┼─────┼──────┼─────┼─────┼───┤ 13 | * │ E' │ 2 │ │ │ │ 3 │ 3 │ 14 | * ├────┼─────┼─────┼──────┼─────┼─────┼───┤ 15 | * │ T │ │ │ 4 │ 4 │ │ │ 16 | * ├────┼─────┼─────┼──────┼─────┼─────┼───┤ 17 | * │ T' │ 6 │ 5 │ │ │ 6 │ 6 │ 18 | * ├────┼─────┼─────┼──────┼─────┼─────┼───┤ 19 | * │ F │ │ │ 7 │ 8 │ │ │ 20 | * └────┴─────┴─────┴──────┴─────┴─────┴───┘ 21 | */ 22 | 23 | %% 24 | 25 | E 26 | : T E' 27 | ; 28 | 29 | E' 30 | : "+" T E' 31 | | /* epsilon */ 32 | ; 33 | 34 | T 35 | : F T' 36 | ; 37 | 38 | T' 39 | : "*" F T' 40 | | /* epsilon */ 41 | ; 42 | 43 | F 44 | : "id" 45 | | "(" E ")" 46 | ; -------------------------------------------------------------------------------- /examples/module-include.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. 3 | * 4 | * The "moduleInclude" prologue code allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, it can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | */ 9 | 10 | %{ 11 | // Define AST node classes. 12 | class Node {} 13 | 14 | /* JS comments work here */ 15 | class Binary extends Node {} 16 | class Primary extends Node {} 17 | 18 | // Can define callbacks for parse events here, 19 | // attaching to `yyparse` object. 20 | 21 | yyparse.onParseBegin = (string) => { 22 | console.log('Parsing:', string); 23 | }; 24 | 25 | yyparse.onParseEnd = (value) => { 26 | console.log('Parsed:', value); 27 | }; 28 | 29 | %} 30 | 31 | %% 32 | 33 | E 34 | : E '+' T { $$ = new Binary($1, $3, '+') } 35 | | T { $$ = $1 } 36 | ; 37 | 38 | T 39 | : T '*' F { $$ = new Binary($1, $3, '*') } 40 | | F { $$ = $1 } 41 | ; 42 | 43 | F 44 | : 'id' { $$ = new Primary($1) } 45 | | '(' E ')' { $$ = $2 } 46 | ; -------------------------------------------------------------------------------- /src/syntax.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | // To require local modules from root. 7 | global.ROOT = __dirname + '/'; 8 | 9 | // Tokenizer. 10 | export {default as Tokenizer} from './tokenizer'; 11 | 12 | // Grammar classes. 13 | export {default as Grammar} from './grammar/grammar'; 14 | export {default as GrammarSymbol} from './grammar/grammar-symbol'; 15 | export {default as LexRule} from './grammar/lex-rule'; 16 | export {default as Production} from './grammar/production'; 17 | 18 | // Sets generator. 19 | export {default as SetsGenerator} from './sets-generator'; 20 | 21 | // LR parsing. 22 | export {default as CanonicalCollection} from './lr/canonical-collection'; 23 | export {default as State} from './lr/state'; 24 | export {default as LRItem} from './lr/lr-item'; 25 | export {default as LRParser} from './lr/lr-parser'; 26 | export {default as LRParserGenerator} from './lr/lr-parser-generator-default'; 27 | export {default as LRParsingTable} from './lr/lr-parsing-table'; 28 | 29 | // LL parsing. 30 | export {default as LLParsingTable} from './ll/ll-parsing-table'; 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dmitry Soshnikov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/ll/__tests__/ll-parsing-table-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import Grammar from '../../grammar/grammar'; 7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode'; 8 | import LLParsingTable from '../ll-parsing-table'; 9 | import LLParser from '../ll-parser'; 10 | 11 | describe('ll-parsing-table', () => { 12 | it('ll1-grammar-1', () => { 13 | const grammarFile = __dirname + '/grammar1.bnf'; 14 | const expectedTable = { 15 | S: { 16 | "'a'": '1', 17 | $: '1', 18 | }, 19 | A: { 20 | "'a'": '2', 21 | $: '3', 22 | }, 23 | }; 24 | 25 | const grammarBySLR = Grammar.fromGrammarFile(grammarFile, { 26 | mode: GRAMMAR_MODE.LL1, 27 | }); 28 | expect(new LLParsingTable({grammar: grammarBySLR}).get()).toEqual( 29 | expectedTable 30 | ); 31 | expect(new LLParser({grammar: grammarBySLR}).parse('a')).toEqual({ 32 | status: 'accept', 33 | semanticValue: true, 34 | }); 35 | expect(new LLParser({grammar: grammarBySLR}).parse('')).toEqual({ 36 | status: 'accept', 37 | semanticValue: true, 38 | }); 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /examples/calculator-assoc.g: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Precedence and associativity rules: 4 | 5 | - If the token's precedence is higher, the choice is to shift: 6 | 7 | E -> E + E • 8 | E -> E • * E (choose to shift on `*` since its precedence is higher than of `+`) 9 | 10 | - If the rule's precedence is higher, the choice is to reduce: 11 | 12 | E -> E * E • (choose to reduce since precedence of the production is higher than of `+`) 13 | E -> E • + E 14 | 15 | - If they have equal precedence, the choice is made based on the associativity of that precedence level: 16 | 17 | E -> E * E • (choose to reduce since precedence is the same `*` is left-associative) 18 | E -> E • * E 19 | 20 | This case we want `id * id * id` to be left-associative, i.e. 21 | `(id * id) * id`, not right-associative, that would be `id * (id * id)`. 22 | 23 | */ 24 | 25 | { 26 | "lex": { 27 | "rules": [ 28 | ["id", "return 'id'"], 29 | ["\\*", "return '*'"], 30 | ["\\+", "return '+'"] 31 | ] 32 | }, 33 | 34 | "operators": [ 35 | ["left", "+"], 36 | ["left", "*"] 37 | ], 38 | 39 | "bnf": { 40 | "E": [ 41 | "E + E", 42 | "E * E", 43 | "id" 44 | ] 45 | } 46 | } -------------------------------------------------------------------------------- /examples/calc.rs.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Rust. 3 | * 4 | * ./bin/syntax -g examples/calc.rs.g -m lalr1 -o lib.rs 5 | * 6 | * use syntax::Parser; 7 | * 8 | * let parser = Parser::new(); 9 | * 10 | * println!("{:?}", parser.parse("2 + 2 * 2")); // 6 11 | * println!("{:?}", parser.parse("(2 + 2) * 2")); // 8 12 | */ 13 | 14 | { 15 | "lex": { 16 | "rules": [ 17 | ["\\s+", '/* skip whitespace */ ""'], 18 | ["\\d+", '"NUMBER"'], 19 | ["\\*", '"*"'], 20 | ["\\+", '"+"'], 21 | ["\\(", '"("'], 22 | ["\\)", '")"'], 23 | ] 24 | }, 25 | 26 | "operators": [ 27 | ["left", "+"], 28 | ["left", "*"], 29 | ], 30 | 31 | "moduleInclude": ` 32 | 33 | type TResult = i32; 34 | 35 | fn on_parse_begin(_parser: &mut Parser, string: &str) { 36 | println!("on_parse_begin: {:?}", string); 37 | } 38 | 39 | fn on_parse_end(_parser: &mut Parser, parsed: &TResult) { 40 | println!("on_parse_end: {:?}", parsed); 41 | } 42 | 43 | `, 44 | 45 | "bnf": { 46 | "E": [ 47 | ["E + E", "|$1: i32, $3: i32| -> i32; $$ = $1 + $3"], 48 | ["E * E", "|$1: i32, $3: i32| -> i32; $$ = $1 * $3"], 49 | ["NUMBER", "|| -> i32; $$ = yytext.parse::().unwrap()"], 50 | ["( E )", "$$ = $2"], 51 | ], 52 | }, 53 | } -------------------------------------------------------------------------------- /src/plugins/python/ll/ll-parser-generator-py.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default; 7 | const PyParserGeneratorTrait = require('../py-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | 11 | /** 12 | * Generic Python template for LL(1) parser. 13 | */ 14 | const PY_LL_PARSER_TEMPLATE = fs.readFileSync( 15 | `${__dirname}/../templates/ll.template.py`, 16 | 'utf-8' 17 | ); 18 | 19 | /** 20 | * LL parser generator for Python. 21 | */ 22 | export default class LLParserGeneratorPy extends LLParserGeneratorDefault { 23 | 24 | /** 25 | * Instance constructor. 26 | */ 27 | constructor({ 28 | grammar, 29 | outputFile, 30 | options = {}, 31 | }) { 32 | super({grammar, outputFile, options}) 33 | .setTemplate(PY_LL_PARSER_TEMPLATE); 34 | 35 | this._lexHandlers = []; 36 | this._productionHandlers = []; 37 | 38 | // Trait provides methods for lex and production handlers. 39 | Object.assign(this, PyParserGeneratorTrait); 40 | } 41 | 42 | /** 43 | * Generates parser code. 44 | */ 45 | generateParserData() { 46 | super.generateParserData(); 47 | this.generateLexHandlers(); 48 | this.generateProductionHandlers(); 49 | } 50 | }; 51 | -------------------------------------------------------------------------------- /src/plugins/python/lr/lr-parser-generator-py.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const PyParserGeneratorTrait = require('../py-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | 11 | /** 12 | * Generic Python template for all LR parsers. 13 | */ 14 | const PY_LR_PARSER_TEMPLATE = fs.readFileSync( 15 | `${__dirname}/../templates/lr.template.py`, 16 | 'utf-8', 17 | ); 18 | 19 | /** 20 | * LR parser generator for Python. 21 | */ 22 | export default class LRParserGeneratorPy extends LRParserGeneratorDefault { 23 | 24 | /** 25 | * Instance constructor. 26 | */ 27 | constructor({ 28 | grammar, 29 | outputFile, 30 | options = {}, 31 | }) { 32 | super({grammar, outputFile, options}) 33 | .setTemplate(PY_LR_PARSER_TEMPLATE); 34 | 35 | this._lexHandlers = []; 36 | this._productionHandlers = []; 37 | 38 | // Trait provides methods for lex and production handlers. 39 | Object.assign(this, PyParserGeneratorTrait); 40 | } 41 | 42 | /** 43 | * Generates parser code. 44 | */ 45 | generateParserData() { 46 | super.generateParserData(); 47 | this.generateLexHandlers(); 48 | this.generateProductionHandlers(); 49 | } 50 | }; 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "syntax-cli", 3 | "version": "0.1.27", 4 | "description": "Syntactic analysis toolkit, language agnostic parsers generator.", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/DmitrySoshnikov/syntax.git" 8 | }, 9 | "bugs": "https://github.com/DmitrySoshnikov/syntax/issues", 10 | "main": "index.js", 11 | "scripts": { 12 | "build": "node scripts/build.js", 13 | "watch": "node scripts/build.js --watch", 14 | "test": "jest", 15 | "prepublish": "npm run build && npm test", 16 | "eslint": "eslint src/ && eslint bin/syntax" 17 | }, 18 | "bin": { 19 | "syntax-cli": "./bin/syntax" 20 | }, 21 | "keywords": [ 22 | "parser", 23 | "LL(1)", 24 | "LR(1)", 25 | "LALR(1)", 26 | "SLR(1)", 27 | "generator", 28 | "JavaScript", 29 | "Python", 30 | "PHP", 31 | "Ruby", 32 | "C#" 33 | ], 34 | "author": "Dmitry Soshnikov", 35 | "license": "MIT", 36 | "dependencies": { 37 | "nomnom": "^1.8.1", 38 | "cli-table3": "^0.5.0", 39 | "colors": "^1.1.2" 40 | }, 41 | "devDependencies": { 42 | "@babel/cli": "^7.23.4", 43 | "@babel/preset-env": "^7.23.7", 44 | "@babel/plugin-transform-object-rest-spread": "7.23.4", 45 | "shelljs": "^0.8.5", 46 | "jest-cli": "^29.3.1", 47 | "eslint": "^8.28.0", 48 | "prettier": "^1.11.1" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/plugins/php/ll/ll-parser-generator-php.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default; 7 | const PHPParserGeneratorTrait = require('../php-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic PHP template for LL(1) parser. 14 | */ 15 | const PHP_LL_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/ll.template.php`, 17 | 'utf-8' 18 | ); 19 | 20 | /** 21 | * LL parser generator for PHP. 22 | */ 23 | export default class LLParserGeneratorPHP extends LLParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(PHP_LL_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, PHPParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | super.generateParserData(); 53 | this.generateLexHandlers(); 54 | this.generateProductionHandlers(); 55 | this.generateParserClassName(this._parserClassName); 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /examples/calc.java.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Java. 3 | * 4 | * ./bin/syntax -g examples/calc.java.g -m lalr1 -o CalcParser.rs 5 | * 6 | * import com.syntax.*; 7 | * 8 | * CalcParser parser = new CalcParser(); 9 | * 10 | * System.out.println(parser.parse("2 + 2 * 2"); // 6 11 | * System.out.println(parser.parse("(2 + 2) * 2"); // 8 12 | */ 13 | 14 | { 15 | "lex": { 16 | "rules": [ 17 | ["\\s+", '/* skip whitespace */ return null'], 18 | ["\\d+", 'return "NUMBER"'], 19 | ["\\*", 'return "*"'], 20 | ["\\+", 'return "+"'], 21 | ["\\(", 'return "("'], 22 | ["\\)", 'return ")"'], 23 | ] 24 | }, 25 | 26 | "operators": [ 27 | ["left", "+"], 28 | ["left", "*"], 29 | ], 30 | 31 | "moduleInclude": ` 32 | 33 | /** 34 | * The ParserEvents class allows subscribing to 35 | * different parsing events. 36 | */ 37 | class ParserEvents { 38 | public static void init() { 39 | System.out.println("Parser is created."); 40 | } 41 | 42 | public static void onParseBegin(String str) { 43 | System.out.println("Parsing is started: " + str); 44 | } 45 | 46 | public static void onParseEnd(Object result) { 47 | System.out.println("Parsing is completed: " + result); 48 | } 49 | } 50 | 51 | `, 52 | 53 | "bnf": { 54 | "E": [ 55 | ["E + E", "$$ = (Integer)$1 + (Integer)$3"], 56 | ["E * E", "$$ = (Integer)$1 * (Integer)$3"], 57 | ["NUMBER", "$$ = Integer.valueOf(yytext)"], 58 | ["( E )", "$$ = $2"], 59 | ], 60 | }, 61 | } -------------------------------------------------------------------------------- /src/plugins/php/lr/lr-parser-generator-php.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const PHPParserGeneratorTrait = require('../php-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic PHP template for all LR parsers. 14 | */ 15 | const PHP_LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.php`, 17 | 'utf-8', 18 | ); 19 | 20 | /** 21 | * LR parser generator for PHP. 22 | */ 23 | export default class LRParserGeneratorPHP extends LRParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(PHP_LR_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, PHPParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | super.generateParserData(); 53 | this.generateLexHandlers(); 54 | this.generateProductionHandlers(); 55 | this.generateParserClassName(this._parserClassName); 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /src/plugins/ruby/ll/ll-parser-generator-ruby.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default; 7 | const RubyParserGeneratorTrait = require('../ruby-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic Ruby template for LL(1) parser. 14 | */ 15 | const RUBY_LL_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/ll.template.rb`, 17 | 'utf-8' 18 | ); 19 | 20 | /** 21 | * LL parser generator for Ruby. 22 | */ 23 | export default class LLParserGeneratorRuby extends LLParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(RUBY_LL_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, RubyParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | super.generateParserData(); 53 | this.generateLexHandlers(); 54 | this.generateProductionHandlers(); 55 | this.generateParserClassName(this._parserClassName); 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /src/plugins/ruby/lr/lr-parser-generator-ruby.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const RubyParserGeneratorTrait = require('../ruby-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic Ruby template for all LR parsers. 14 | */ 15 | const RUBY_LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.rb`, 17 | 'utf-8', 18 | ); 19 | 20 | /** 21 | * LR parser generator for PHP. 22 | */ 23 | export default class LRParserGeneratorRuby extends LRParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(RUBY_LR_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, RubyParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | super.generateParserData(); 53 | this.generateLexHandlers(); 54 | this.generateProductionHandlers(); 55 | this.generateParserClassName(this._parserClassName); 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /src/plugins/csharp/lr/lr-parser-generator-csharp.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const CSharpParserGeneratorTrait = require('../csharp-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic C# template for all LR parsers. 14 | */ 15 | const CSHARP_LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.cs`, 17 | 'utf-8', 18 | ); 19 | 20 | /** 21 | * LR parser generator for PHP. 22 | */ 23 | export default class LRParserGeneratorCSharp extends LRParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(CSHARP_LR_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, CSharpParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | super.generateParserData(); 53 | this.generateLexHandlers(); 54 | this.generateProductionHandlers(); 55 | this.generateParserClassName(this._parserClassName); 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /examples/calc.cpp.ast.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in C++. 3 | * 4 | * ./bin/syntax -g examples/calc.cpp.g -m lalr1 -o CalcParser.h 5 | * 6 | * #include "CalcParser.h" 7 | * 8 | * CalcParser parser; 9 | * 10 | * std::cout << parser.parse("2 + 2 * 2"); // 6 11 | */ 12 | 13 | %lex 14 | 15 | %% 16 | 17 | \s+ %empty 18 | 19 | \d+ NUMBER 20 | 21 | /lex 22 | 23 | %{ 24 | 25 | #include 26 | #include 27 | 28 | /** 29 | * Base class for AST nodes. 30 | */ 31 | class Node {}; 32 | 33 | /** 34 | * Binary expressions. 35 | */ 36 | class BinaryExpression : public Node { 37 | public: 38 | BinaryExpression(std::string op, Node* left, Node* right) 39 | : op(op), left(left), right(right) {} 40 | 41 | std::string op; 42 | Node* left; 43 | Node* right; 44 | }; 45 | 46 | /** 47 | * AST node for numbers. 48 | */ 49 | class NumericLiteral : public Node { 50 | public: 51 | NumericLiteral(int value): value(value) {} 52 | int value; 53 | }; 54 | 55 | // Type of the parsing value. 56 | using Value = Node*; 57 | 58 | 59 | // On parser begin hook: 60 | void onParseBegin(const std::string& str) { 61 | std::cout << "Parsing: " << str << "\n"; 62 | } 63 | 64 | // On parser end hook: 65 | void onParseEnd(Node* result) { 66 | std::cout << "Result: " << result << "\n"; 67 | } 68 | 69 | 70 | %} 71 | 72 | 73 | %left '+' 74 | %left '*' 75 | 76 | %% 77 | 78 | E 79 | : E '+' E 80 | { $$ = new BinaryExpression($2, $1, $3) } 81 | 82 | | E '*' E 83 | { $$ = new BinaryExpression($2, $1, $3) } 84 | 85 | | '(' E ')' { $$ = $2 } 86 | 87 | | NUMBER 88 | { $$ = new NumericLiteral(std::stoi($1)) } 89 | ; -------------------------------------------------------------------------------- /examples/calc-eval.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Example: 3 | * 4 | * ./bin/syntax -g examples/calc-eval.g -m slr1 -p '2 + 2 * 2' 5 | * 6 | * > 6 7 | * 8 | * ./bin/syntax -g examples/calc-eval.g -m slr1 -p '(2 + 2) * 2' 9 | * 10 | * > 8 11 | */ 12 | 13 | { 14 | "lex": { 15 | "rules": [ 16 | ["\\s+", "/*skip whitespace*/"], 17 | ["[0-9]+(?:\\.[0-9]+)?\\b", "return 'NUMBER'"], 18 | ["\\+", "return '+'"], 19 | ["\\*", "return '*'"], 20 | ["-", "return '-'"], 21 | ["\\/", "return '/'"], 22 | ["\\(", "return '('"], 23 | ["\\)", "return ')'"], 24 | ["\\^", "return '^'"], 25 | ["!", "return '!'"], 26 | ["%", "return '%'"], 27 | ["PI\\b", "return 'PI'"], 28 | ["E\\b", "return 'E'"], 29 | ] 30 | }, 31 | 32 | "operators": [ 33 | ["left", "+", "-"], 34 | ["left", "*", "/"], 35 | ["left", "^"], 36 | ["right", "!"], 37 | ["right", "%"], 38 | ["left", "UMINUS"], 39 | ], 40 | 41 | "bnf": { 42 | "e": [["e + e", "$$ = $1 + $3"], 43 | ["e - e", "$$ = $1 - $3"], 44 | ["e * e", "$$ = $1 * $3"], 45 | ["e / e", "$$ = $1 / $3"], 46 | ["e ^ e", "$$ = Math.pow($1, $3)"], 47 | ["e !", "$$ = (function _factorial(n) {if(n===0) return 1; return _factorial(n-1) * n})($1)"], 48 | ["e %", "$$ = $1 / 100"], 49 | ["- e", "$$ = -$2", {"prec": "UMINUS"}], 50 | ["( e )", "$$ = $2"], 51 | ["NUMBER", "$$ = Number(yytext)"], 52 | ["E", "$$ = Math.E"], 53 | ["PI", "$$ = Math.PI"]], 54 | } 55 | } -------------------------------------------------------------------------------- /src/plugins/java/lr/lr-parser-generator-java.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const JavaParserGeneratorTrait = require('../java-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic Java template for all LR parsers. 14 | */ 15 | const JAVA_LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.java`, 17 | 'utf-8', 18 | ); 19 | 20 | /** 21 | * LR parser generator for Java. 22 | */ 23 | export default class LRParserGeneratorJava extends LRParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(JAVA_LR_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | 39 | this._parserClassName = path.basename( 40 | outputFile, 41 | path.extname(outputFile), 42 | ); 43 | 44 | // Trait provides methods for lex and production handlers. 45 | Object.assign(this, JavaParserGeneratorTrait); 46 | } 47 | 48 | /** 49 | * Generates parser code. 50 | */ 51 | generateParserData() { 52 | this.generateParserClassName(this._parserClassName); 53 | 54 | // Lexical grammar. 55 | this.generateTokenizer(); 56 | 57 | // Syntactic grammar. 58 | this.generateProductions(); 59 | 60 | // Tables. 61 | this.generateTokensTable(); 62 | this.generateParseTable(); 63 | 64 | this.generateLexHandlers(); 65 | this.generateProductionHandlers(); 66 | 67 | this.generateModuleInclude(); 68 | } 69 | }; 70 | -------------------------------------------------------------------------------- /examples/lambda-calculus.g: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * 4 | * ./bin/syntax -g examples/lambda-calculus.g -m slr1 -p '(λx. x) (λy. y)' 5 | * 6 | * Parsing mode: SLR(1). 7 | * 8 | * Parsing: (λx. x) (λy. y) 9 | * 10 | * ✓ Accepted 11 | * 12 | * Parsed value: { type: 'Application', 13 | * lhs: 14 | * { type: 'Abstraction', 15 | * param: { type: 'Identifier', value: 'x' }, 16 | * body: { type: 'Identifier', value: 'x' } }, 17 | * rhs: 18 | * { type: 'Abstraction', 19 | * param: { type: 'Identifier', value: 'y' }, 20 | * body: { type: 'Identifier', value: 'y' } } } 21 | * 22 | * 23 | * To generate a parser: 24 | * 25 | * ./bin/syntax -g examples/lambda-calculus.g -m slr1 -o lc-parser.js 26 | * 27 | * In Node: 28 | * 29 | * require('lc-parser.js').parse('(λx. x) (λy. y)'); 30 | * 31 | */ 32 | 33 | { 34 | "lex": { 35 | "rules": [ 36 | ["\\s+", "/* skip whitespace */"], 37 | ["\\.", "return 'DOT';"], 38 | ["[a-z][a-zA-Z]*", "return 'LCID';"], 39 | ["λ", "return 'LAMBDA';"], 40 | ["\\(", "return 'LPAREN';"], 41 | ["\\)", "return 'RPAREN';"], 42 | ] 43 | }, 44 | 45 | "bnf": { 46 | "Term": [["Application", "$$ = $1;"], 47 | ["LAMBDA Lcid DOT Term", "$$ = {type: 'Abstraction', param: $2, body: $4};"]], 48 | 49 | "Application": [["Application Atom", "$$ = {type: 'Application', lhs: $1, rhs: $2};"], 50 | ["Atom", "$$ = $1;"]], 51 | 52 | "Atom": [["LPAREN Term RPAREN", "$$ = $2;"], 53 | ["Lcid", "$$ = $1;"]], 54 | 55 | "Lcid": [["LCID", "$$ = {type: 'Identifier', value: $1};"]] 56 | } 57 | } -------------------------------------------------------------------------------- /examples/calc-loc.jl.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. Julia version. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is a vector of structure: 10 | * 11 | * [ 12 | * startOffset, 13 | * endOffset, 14 | * startLine, 15 | * endLine, 16 | * startColumn, 17 | * endColum, 18 | * ] 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.jl.bnf -m slr1 -o ~/CalcLoc.jl --locß 25 | * 26 | */ 27 | 28 | %lex 29 | 30 | %% 31 | 32 | \s+ # ignore whitespace 33 | \d+ return "NUMBER" 34 | 35 | /lex 36 | 37 | %{ 38 | 39 | function calcloc(s, e) 40 | return [s.startoffset, e.endoffset, s.startline, e.endline, s.startcolumn, e.endcolumn] 41 | end 42 | 43 | function singleloc(t) 44 | return [t.startoffset, t.endoffset, t.startline, t.endline, t.startcolumn, t.endcolumn] 45 | end 46 | 47 | function numericliteral(value, loc) 48 | return [value, loc] 49 | end 50 | 51 | function binaryexpression(op, left, right, loc) 52 | return [op, left, right, loc] 53 | end 54 | 55 | %} 56 | 57 | %left '+' 58 | %left '*' 59 | 60 | %% 61 | 62 | exp 63 | : exp '+' exp 64 | /* Explicitly calculate location */ 65 | { $$ = binaryexpression("+", $1, $3, calcloc(@1, @3)) } 66 | 67 | | exp '*' exp 68 | /* Use default result location: @$ */ 69 | { $$ = binaryexpression("*", $1, $3, @$) } 70 | 71 | | '(' exp ')' 72 | { $$ = $2 } 73 | 74 | | number 75 | /* Named args and position */ 76 | { $$ = numericliteral($number, singleloc(@number)) } 77 | ; 78 | 79 | number 80 | : NUMBER { $$ = tryparse(Int, parserdata.yytext) } 81 | ; -------------------------------------------------------------------------------- /examples/bnf.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * BNF grammar grammar. 3 | * 4 | * A BNF parser generated from it can parse its own grammar. 5 | * 6 | * Example: 7 | * 8 | * ./bin/syntax -g examples/bnf.g -f examples/bnf.bnf -m slr1 9 | */ 10 | 11 | %% 12 | 13 | Spec 14 | : OptModInc '%%' ProductionList 15 | { return $$ = {bnf: $3, moduleInclude: $1 }; } 16 | ; 17 | 18 | OptModInc 19 | : MODULE_INCLUDE 20 | { $$ = $1; } 21 | | 22 | ; 23 | 24 | ProductionList 25 | : ProductionList Production 26 | { $$ = $1; $$[$2[0]] = $2[1]; } 27 | 28 | | Production 29 | { $$ = {}; $$[$1[0]] = $1[1]; } 30 | ; 31 | 32 | Production 33 | : LHS ':' HandleList ';' 34 | { $$ = [$1, $3]; } 35 | ; 36 | 37 | LHS 38 | : ID 39 | { $$ = yytext; } 40 | ; 41 | 42 | HandleList 43 | : HandleList '|' HandleAction 44 | { $$ = $1; $$.push($3); } 45 | 46 | | HandleAction 47 | { $$ = [$1]; } 48 | ; 49 | 50 | HandleAction 51 | : Handle Action 52 | { $$ = [$1, $2]; } 53 | ; 54 | 55 | Handle 56 | : Entries 57 | { $$ = $1; } 58 | | 59 | { $$ = ''; } 60 | ; 61 | 62 | 63 | Entries 64 | : Entries Entry 65 | { $$ = $1 + ' ' + $2; } 66 | 67 | | Entry 68 | { $$ = $1; } 69 | ; 70 | 71 | Entry 72 | : ID 73 | { $$ = yytext; } 74 | 75 | | STRING 76 | { $$ = yytext; } 77 | ; 78 | 79 | Action 80 | : CODE 81 | { $$ = yytext; } 82 | | 83 | { $$ = null; } 84 | ; -------------------------------------------------------------------------------- /src/lr/__tests__/lr-parser-generator-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import Grammar from '../../grammar/grammar'; 7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode'; 8 | 9 | import LRParserGeneratorDefault from '../lr-parser-generator-default'; 10 | 11 | import path from 'path'; 12 | import os from 'os'; 13 | 14 | function createParser(grammar, options) { 15 | const outputFile = path.resolve(os.tmpdir(), '.syntax-parser.js'); 16 | 17 | return new LRParserGeneratorDefault({ 18 | grammar, 19 | outputFile, 20 | options, 21 | }).generate(); 22 | } 23 | 24 | const grammar = Grammar.fromGrammarFile( 25 | __dirname + '/../../grammar/__tests__/calc.g', 26 | { 27 | mode: GRAMMAR_MODE.LALR1, 28 | captureLocations: true, 29 | } 30 | ); 31 | 32 | describe('LR parser generator', () => { 33 | 34 | it('parse options', () => { 35 | 36 | const options = { 37 | captureLocations: true, 38 | }; 39 | 40 | const parser = createParser(grammar, options); 41 | 42 | // Global options. 43 | expect(parser.getOptions()).toEqual(options); 44 | 45 | const overrideOptions = { 46 | captureLocations: false, 47 | 'x-flag': true, 48 | }; 49 | 50 | const parsingString = '2 + 2'; 51 | 52 | // // Setup on parse begin hook. 53 | parser.onParseBegin = (string, tokenizer, options) => { 54 | 55 | expect(string).toBe(parsingString); 56 | 57 | expect(options).toEqual(overrideOptions); 58 | expect(parser.getOptions()).toEqual(overrideOptions); 59 | 60 | if (options['x-flag']) { 61 | tokenizer.pushState('x-flag'); 62 | } 63 | 64 | expect(tokenizer.getCurrentState()).toBe('x-flag'); 65 | tokenizer.popState(); 66 | }; 67 | 68 | parser.parse(parsingString, overrideOptions); 69 | 70 | // Check the global options are restored. 71 | expect(parser.getOptions()).toEqual(options); 72 | }); 73 | 74 | }); -------------------------------------------------------------------------------- /src/plugins/julia/lr/lr-parser-generator-julia.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | /** 7 | * Implementation notes. 8 | * 9 | * 1. Extend `LRParserGeneratorDefault` 10 | * 2. Implement `generateParserData()` 11 | * 3. Implement all specific to the target language 12 | * functionality in the trait file. 13 | */ 14 | 15 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 16 | const JuliaParserGeneratorTrait = require('../julia-parser-generator-trait'); 17 | 18 | import fs from 'fs'; 19 | import path from 'path'; 20 | 21 | /** 22 | * Generic template for all LR parsers in the Example language. 23 | */ 24 | const JL_LR_PARSER_TEMPLATE = fs.readFileSync( 25 | `${__dirname}/../templates/lr.template.jl`, 26 | 'utf-8', 27 | ); 28 | 29 | /** 30 | * LR parser generator for Julia language. 31 | */ 32 | export default class LRParserGeneratorJulia extends LRParserGeneratorDefault { 33 | 34 | /** 35 | * Instance constructor. 36 | */ 37 | constructor({ 38 | grammar, 39 | outputFile, 40 | options = {}, 41 | }) { 42 | super({grammar, outputFile, options}) 43 | .setTemplate(JL_LR_PARSER_TEMPLATE); 44 | this._lexHandlers = []; 45 | this._productionHandlers = []; 46 | 47 | /** 48 | * Actual class name of your parser. Here we infer from the output filename. 49 | */ 50 | this._parserClassName = path.basename( 51 | outputFile, 52 | path.extname(outputFile), 53 | ); 54 | 55 | /** 56 | * The trait provides methods for lex and production handlers, as well 57 | * as some very specific code generation for the target language. 58 | */ 59 | Object.assign(this, JuliaParserGeneratorTrait); 60 | } 61 | 62 | /** 63 | * Generates parser code. 64 | */ 65 | generateParserData() { 66 | super.generateParserData(); 67 | this.generateLexHandlers(); 68 | this.generateProductionHandlers(); 69 | } 70 | }; 71 | -------------------------------------------------------------------------------- /src/plugins/cpp/lr/lr-parser-generator-cpp.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const CppParserGeneratorTrait = require('../cpp-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | 12 | /** 13 | * Generic C++ template for all LR parsers. 14 | */ 15 | const CPP_LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.h`, 17 | 'utf-8', 18 | ); 19 | 20 | /** 21 | * LR parser generator for C++. 22 | */ 23 | export default class LRParserGeneratorCpp extends LRParserGeneratorDefault { 24 | 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({ 29 | grammar, 30 | outputFile, 31 | options = {}, 32 | }) { 33 | super({grammar, outputFile, options}) 34 | .setTemplate(CPP_LR_PARSER_TEMPLATE); 35 | 36 | this._lexHandlers = []; 37 | this._productionHandlers = []; 38 | this._tokenTypes = []; 39 | this._terminalsMap = {}; 40 | this._terminalsIndexMap = {}; 41 | 42 | this._parserClassName = path.basename( 43 | outputFile, 44 | path.extname(outputFile), 45 | ); 46 | 47 | // Trait provides methods for lex and production handlers. 48 | Object.assign(this, CppParserGeneratorTrait); 49 | } 50 | 51 | /** 52 | * Generates parser code. 53 | */ 54 | generateParserData() { 55 | this.generateNamespace(); 56 | this.generateModuleInclude(); 57 | this.generateCaptureLocations(); 58 | this.generateBuiltInTokenizer(); 59 | this.generateTokenTypes(); 60 | this.generateTokensTable(); 61 | this.generateLexRules(); 62 | this.generateLexRulesByStartConditions(); 63 | this.generateLexHandlers(); 64 | this.generateProductions(); 65 | this.generateParseTable(); 66 | this.generateProductionHandlers(); 67 | this.generateParserClassName(this._parserClassName); 68 | this.generateParsedResult(); 69 | } 70 | }; 71 | -------------------------------------------------------------------------------- /examples/on-token.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Calculates parens in RegExp style. 3 | * 4 | * /(((a)b)c)(d)(e)/ 5 | * 6 | * Here "a" is 3, "b" - 2, "c" - 3, "d" - 4, and "e" is 5. 7 | * 8 | * Test: 9 | * 10 | * ./bin/syntax -g examples/on-token.bnf -m lalr1 -p '(((a)b)c)(d)(e)' 11 | */ 12 | 13 | %lex 14 | 15 | %% 16 | 17 | \w+ return 'NAME' 18 | '(' return 'L_PAREN' 19 | ')' return 'R_PAREN' 20 | 21 | /lex 22 | 23 | %{ 24 | 25 | /** 26 | * Lower group boundary: 27 | * 28 | * /(((a)b)c)(d)(e)/ 29 | * 30 | * The first paren in (((a)b)c) has lower bound 0, but when 31 | * we reach the (d), it already 4. 32 | */ 33 | let currentLower = 0; 34 | 35 | /** 36 | * Group number to assign to a group. 37 | */ 38 | let currentGroup = 0; 39 | 40 | /** 41 | * Total number of groups. 42 | */ 43 | let totalGroups = 0; 44 | 45 | yyparse.onParseBegin = (_string) => { 46 | currentGroup = 0; 47 | totalGroups = 0; 48 | }; 49 | 50 | yyparse.onShift = token => { 51 | if (token.type === 'L_PAREN') { 52 | currentGroup++; 53 | totalGroups++; 54 | } 55 | return token; 56 | }; 57 | 58 | %} 59 | 60 | %% 61 | 62 | Program 63 | : Items 64 | { 65 | $$ = { 66 | type: 'Program', 67 | items: $1, 68 | } 69 | } 70 | ; 71 | 72 | Items 73 | : Item 74 | { $$ = [$1] } 75 | 76 | | Items Item 77 | { $$ = $1; $1.push($2) } 78 | ; 79 | 80 | Item 81 | : Group 82 | | Name 83 | ; 84 | 85 | Name 86 | : NAME 87 | { 88 | $$ = { 89 | type: 'Name', 90 | value: $1, 91 | } 92 | } 93 | ; 94 | 95 | Group 96 | : L_PAREN Items R_PAREN 97 | { 98 | $$ = { 99 | type: 'Group', 100 | number: currentGroup, 101 | items: $2, 102 | }; 103 | 104 | // Go up. 105 | currentGroup--; 106 | 107 | // We reached the top level, reset the current group: 108 | if (currentGroup === currentLower) { 109 | currentGroup = totalGroups; 110 | currentLower = totalGroups; 111 | } 112 | } 113 | ; 114 | 115 | -------------------------------------------------------------------------------- /examples/calc-loc.py.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. Python version. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is an object of structure: 10 | * 11 | * { 12 | * start_offset, 13 | * end_offset, 14 | * start_line, 15 | * end_line, 16 | * start_column, 17 | * end_colum, 18 | * } 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.py.bnf -m slr1 -o ~/Parser.py --loc 25 | * 26 | */ 27 | 28 | %lex 29 | 30 | %% 31 | 32 | \s+ return "" # skip whitespace 33 | \d+ return "NUMBER" 34 | 35 | /lex 36 | 37 | %{ 38 | 39 | class BinaryExpression(object): 40 | def __init__(self, op, left, right, loc): 41 | self.op = op 42 | self.left = left 43 | self.right = right 44 | self.loc = loc 45 | 46 | def Loc(s, e): 47 | return { 48 | 'start_offset': s['start_offset'], 49 | 'end_offset': e['end_offset'], 50 | 'start_line': s['start_line'], 51 | 'end_line': e['end_line'], 52 | 'start_column': s['start_column'], 53 | 'end_column': e['end_column'], 54 | } 55 | 56 | class NumericLiteral(object): 57 | def __init__(self, value, loc): 58 | self.value = value 59 | self.loc = loc 60 | 61 | def on_parse_end(value): 62 | print(value.loc) 63 | 64 | %} 65 | 66 | %left '+' 67 | %left '*' 68 | 69 | %% 70 | 71 | exp 72 | : exp '+' exp 73 | /* Explicitly calculate location */ 74 | { $$ = BinaryExpression('+', $1, $3, Loc(@1, @3)) } 75 | 76 | | exp '*' exp 77 | /* Use default result location: @$ */ 78 | { $$ = BinaryExpression('*', $1, $3, @$) } 79 | 80 | | '(' exp ')' 81 | { $$ = $2 } 82 | 83 | | number 84 | /* Named args and position */ 85 | { $$ = NumericLiteral($number, @number) } 86 | ; 87 | 88 | number 89 | : NUMBER { $$ = int(yytext) } 90 | ; 91 | -------------------------------------------------------------------------------- /src/grammar/grammar-mode.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | export const MODES = { 7 | LR0: 'LR0', 8 | SLR1: 'SLR1', 9 | LALR1: 'LALR1', 10 | LALR1_BY_SLR1: 'LALR1_BY_SLR1', 11 | LALR1_BY_CLR1: 'LALR1_BY_CLR1', 12 | LALR1_EXTENDED: 'LALR1_EXTENDED', 13 | CLR1: 'CLR1', 14 | LL1: 'LL1', 15 | }; 16 | 17 | /** 18 | * Grammar/parser mode. 19 | */ 20 | export default class GrammarMode { 21 | 22 | constructor(mode = MODES.LR0) { 23 | mode = mode.toUpperCase(); 24 | 25 | if (!MODES.hasOwnProperty(mode)) { 26 | throw new TypeError( 27 | `\n"${mode}" is not a valid parsing mode. ` + 28 | `Valid modes are: ${Object.keys(MODES).join(', ')}.\n` 29 | ); 30 | } 31 | 32 | this._mode = mode; 33 | } 34 | 35 | getRaw() { 36 | return this._mode; 37 | } 38 | 39 | isLL() { 40 | return this._isMode(MODES.LL1); 41 | } 42 | 43 | isLR() { 44 | return !this.isLL(); 45 | } 46 | 47 | usesLookaheadSet() { 48 | return this.isLALR1ByCLR1() || this.isCLR1(); 49 | } 50 | 51 | isLR0() { 52 | return this._isMode(MODES.LR0); 53 | } 54 | 55 | isSLR1() { 56 | return this._isMode(MODES.SLR1); 57 | } 58 | 59 | isLALR1() { 60 | // Default algorithm for LALR(1) is "LALR(1) by SLR(1)". 61 | return this.isLALR1BySLR1() || this._isMode(MODES.LALR1); 62 | } 63 | 64 | isLALR1BySLR1() { 65 | return this._isMode(MODES.LALR1_BY_SLR1); 66 | } 67 | 68 | isLALR1ByCLR1() { 69 | return this._isMode(MODES.LALR1_BY_CLR1); 70 | } 71 | 72 | isLALR1Extended() { 73 | // Special grammar mode, where productions are built from 74 | // the LR(0) automation in the "LALR(1) by SLR(1)" algorithm. 75 | return this._isMode(MODES.LALR1_EXTENDED); 76 | } 77 | 78 | isCLR1() { 79 | return this._isMode(MODES.CLR1); 80 | } 81 | 82 | _isMode(mode) { 83 | return this._mode === mode; 84 | } 85 | 86 | /** 87 | * Returns string representation of a mode. 88 | * LR0 -> LR(0) 89 | */ 90 | toString() { 91 | return `${this._mode.slice(0, -1)}(${this._mode[this._mode.length - 1]})`; 92 | } 93 | } -------------------------------------------------------------------------------- /examples/calc-loc.php.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. PHP version. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is an object of structure: 10 | * 11 | * { 12 | * startOffset, 13 | * endOffset, 14 | * startLine, 15 | * endLine, 16 | * startColumn, 17 | * endColum, 18 | * } 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.php.bnf -m slr1 -o ~/Parser.php --loc 25 | * 26 | */ 27 | 28 | %lex 29 | 30 | %% 31 | 32 | \s+ /* skip whitespace */ return "" 33 | \d+ return "NUMBER" 34 | 35 | /lex 36 | 37 | %{ 38 | 39 | class BinaryExpression { 40 | public function __construct($op, $left, $right, $loc) { 41 | $this->op = $op; 42 | $this->left = $left; 43 | $this->right = $right; 44 | $this->loc = $loc; 45 | } 46 | } 47 | 48 | function Loc($s, $e) { 49 | // Same as default result location. 50 | return array( 51 | 'startOffset' => $s['startOffset'], 52 | 'endOffset' => $e['endOffset'], 53 | 'startLine' => $s['startLine'], 54 | 'endLine' => $e['endLine'], 55 | 'startColumn' => $s['startColumn'], 56 | 'endColumn' => $e['endColumn'], 57 | ); 58 | } 59 | 60 | class NumericLiteral { 61 | public function __construct($value, $loc) { 62 | $this->value = $value; 63 | $this->loc = $loc; 64 | } 65 | } 66 | 67 | %} 68 | 69 | %left '+' 70 | %left '*' 71 | 72 | %% 73 | 74 | exp 75 | : exp '+' exp 76 | /* Explicitly calculate location */ 77 | { $$ = new BinaryExpression('+', $1, $3, Loc(@1, @3)) } 78 | 79 | | exp '*' exp 80 | /* Use default result location: @$ */ 81 | { $$ = new BinaryExpression('*', $1, $3, @$) } 82 | 83 | | '(' exp ')' 84 | { $$ = $2 } 85 | 86 | | number 87 | /* Named args and position */ 88 | { $$ = new NumericLiteral($number, @number) } 89 | ; 90 | 91 | number 92 | : NUMBER { $$ = intval(yytext) } 93 | ; 94 | -------------------------------------------------------------------------------- /src/plugins/rust/lr/lr-parser-generator-rust.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 7 | const RustParserGeneratorTrait = require('../rust-parser-generator-trait'); 8 | 9 | import fs from 'fs'; 10 | 11 | /** 12 | * Generic Rust template for all LR parsers. 13 | */ 14 | const RUST_LR_PARSER_TEMPLATE = fs.readFileSync( 15 | `${__dirname}/../templates/lr.template.rs`, 16 | 'utf-8', 17 | ); 18 | 19 | /** 20 | * LR parser generator for Rust. 21 | */ 22 | export default class LRParserGeneratorRust extends LRParserGeneratorDefault { 23 | 24 | /** 25 | * Instance constructor. 26 | */ 27 | constructor({ 28 | grammar, 29 | outputFile, 30 | options = {}, 31 | }) { 32 | super({grammar, outputFile, options}) 33 | .setTemplate(RUST_LR_PARSER_TEMPLATE); 34 | 35 | this._lexHandlers = []; 36 | this._productionHandlers = []; 37 | 38 | /** 39 | * Stores all used types of the arguments, and return values. 40 | * This is used to generate `SV` (stack value) enum. 41 | * Init to `Token` type which is always stored on the stack. 42 | * 43 | * enum SV { 44 | * _0(Token), 45 | * _1(...), 46 | * } 47 | */ 48 | this._allTypes = { 49 | Token: 0, 50 | }; 51 | 52 | // Autoinc index in SV. 53 | this._allTypesIndex = 1; 54 | 55 | // Trait provides methods for lex and production handlers. 56 | Object.assign(this, RustParserGeneratorTrait); 57 | } 58 | 59 | /** 60 | * Generates parser code. 61 | */ 62 | generateParserData() { 63 | // Lexical grammar. 64 | this.generateTokenizer(); 65 | 66 | // Syntactic grammar. 67 | this.generateProductions(); 68 | 69 | // Tables. 70 | this.generateTokensTable(); 71 | this.generateParseTable(); 72 | 73 | this.generateLexHandlers(); 74 | this.generateProductionHandlers(); 75 | this.generateStackValueEnum(); 76 | 77 | // The module include which should include at least 78 | // result type: type TResult = <...>; 79 | this.generateModuleInclude(); 80 | } 81 | }; 82 | -------------------------------------------------------------------------------- /examples/boolean.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Boolean expressions. 3 | * 4 | * ./bin/syntax -g examples/boolean.bnf -m slr1 -p '5 == 5' true 5 | * 6 | * ... -p '5 > 2 and 5 < 10' true 7 | * -p 'x or y' x 8 | * -p '5 == 5 or 2 < 1' true 9 | * -p '5 == 5 and 2 < 1' false 10 | * -p 'true and false' false 11 | * -p 'false or true' true 12 | * -p 'true' true 13 | * -p 'false' false 14 | * -p 'not true' false 15 | * -p 'not false' true 16 | * -p 'not not true' true 17 | */ 18 | 19 | { 20 | "lex": { 21 | "rules": [ 22 | ["\\s+", "/* skip whitespace */"], 23 | 24 | ["or", "return 'or'"], 25 | ["and", "return 'and'"], 26 | ["not", "return 'not'"], 27 | 28 | ["true", "return 'true'"], 29 | ["false", "return 'false'"], 30 | 31 | ["<=", "return '<='"], 32 | [">=", "return '>='"], 33 | ["<", "return '<'"], 34 | [">", "return '>'"], 35 | ["==", "return '=='"], 36 | 37 | ["\\(", "return '('"], 38 | ["\\)", "return ')'"], 39 | 40 | ["\\d+", "return 'NUMBER'"], 41 | ["[a-zA-Z0-9]", "return 'ID'"], 42 | ] 43 | }, 44 | 45 | "bnf": { 46 | "Or": [["Or or And", "$$ = $1 || $3"], 47 | ["And", "$$ = $1"]], 48 | 49 | "And": [["And and Compare", "$$ = $1 && $3"], 50 | ["Compare", "$$ = $1"]], 51 | 52 | "Compare": [["Value <= Value", "$$ = $1 <= $3"], 53 | ["Value >= Value", "$$ = $1 >= $3"], 54 | ["Value < Value", "$$ = $1 < $3"], 55 | ["Value > Value", "$$ = $1 > $3"], 56 | ["Value == Value", "$$ = $1 == $3"], 57 | ["Value", "$$ = $1"]], 58 | 59 | "Value": [["not Value", "$$ = !$2"], 60 | ["Primary", "$$ = $1"]], 61 | 62 | "Primary": [["NUMBER", "$$ = Number($1)"], 63 | ["ID", "$$ = $1"], 64 | ["true", "$$ = true"], 65 | ["false", "$$ = false"], 66 | ["( Or )", "$$ = $2"]], 67 | } 68 | } -------------------------------------------------------------------------------- /examples/calc-loc.rb.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. Ruby version. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is an object of structure: 10 | * 11 | * { 12 | * :start_offset, 13 | * :end_offset, 14 | * :start_line, 15 | * :end_line, 16 | * :start_column, 17 | * :end_colum, 18 | * } 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.rb.bnf -m slr1 -o ~/Parser.rb -w --loc 25 | * 26 | */ 27 | 28 | %lex 29 | 30 | %% 31 | 32 | \s+ return "" # skip whitespace 33 | \d+ return "NUMBER" 34 | 35 | /lex 36 | 37 | %{ 38 | 39 | class BinaryExpression 40 | attr_reader :loc, :left, :right, :op 41 | 42 | def initialize(op, left, right, loc) 43 | @op = op 44 | @left = left 45 | @right = right 46 | @loc = loc 47 | end 48 | end 49 | 50 | def Loc(s, e) 51 | return { 52 | :start_offset => s[:start_offset], 53 | :end_offset => e[:end_offset], 54 | :start_line => s[:start_line], 55 | :end_line => e[:end_line], 56 | :start_column => s[:start_column], 57 | :end_column => e[:end_column], 58 | } 59 | end 60 | 61 | class NumericLiteral 62 | attr_reader :loc, :value 63 | 64 | def initialize(value, loc) 65 | @value = value 66 | @loc = loc 67 | end 68 | end 69 | 70 | YYParse.on_parse_end {|value| 71 | print value.loc 72 | } 73 | 74 | %} 75 | 76 | %left '+' 77 | %left '*' 78 | 79 | %% 80 | 81 | exp 82 | : exp '+' exp 83 | /* Explicitly calculate location */ 84 | { $$ = BinaryExpression.new('+', $1, $3, Loc(@1, @3)) } 85 | 86 | | exp '*' exp 87 | /* Use default result location: @$ */ 88 | { $$ = BinaryExpression.new('*', $1, $3, @$) } 89 | 90 | | '(' exp ')' 91 | { $$ = $2 } 92 | 93 | | number 94 | /* Named args and position */ 95 | { $$ = NumericLiteral.new($number, @number) } 96 | ; 97 | 98 | number 99 | : NUMBER { $$ = yytext.to_i } 100 | ; 101 | -------------------------------------------------------------------------------- /examples/calc-ast.rs.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Rust. 3 | * 4 | * ./bin/syntax -g examples/calc-ast.rs.g -m lalr1 -o lib.rs 5 | * 6 | * use syntax::Parser; 7 | * 8 | * let parser = Parser::new(); 9 | * 10 | * let ast = parser.parse("2 + 2 * 2"); 11 | */ 12 | 13 | %lex 14 | 15 | %% 16 | 17 | \s+ /* skip whitespace */ return ""; 18 | 19 | \d+ return "NUMBER"; 20 | 21 | "+" return "+"; 22 | "*" return "*"; 23 | 24 | "(" return "("; 25 | ")" return ")"; 26 | 27 | /lex 28 | 29 | %left + 30 | %left * 31 | 32 | %{ 33 | 34 | /** 35 | * Recursive generic `Node` enum structure. 36 | */ 37 | #[derive(Debug)] 38 | pub enum Node { 39 | 40 | Literal(i32), 41 | 42 | Binary { 43 | op: &'static str, 44 | left: Box, 45 | right: Box, 46 | }, 47 | } 48 | 49 | /** 50 | * Final result type returned from `parse` method call. 51 | */ 52 | pub type TResult = Node; 53 | 54 | /** 55 | * Hook executed on parse begin. 56 | */ 57 | fn on_parse_begin(_parser: &mut Parser, string: &str) { 58 | println!("Parsing: {:?}", string); 59 | } 60 | 61 | /** 62 | * Hook executed on parse end. 63 | */ 64 | fn on_parse_end(_parser: &mut Parser, result: &TResult) { 65 | println!("Parsed: {:?}", result); 66 | } 67 | 68 | %} 69 | 70 | %% 71 | 72 | Expr 73 | : Expr + Expr { 74 | 75 | // Types of used args ($1, $2, ...), and return type: 76 | |$1: Node; $3: Node| -> Node; 77 | 78 | $$ = Node::Binary { 79 | op: "+", 80 | left: Box::new($1), 81 | right: Box::new($3), 82 | } 83 | } 84 | 85 | | Expr * Expr { 86 | 87 | |$1: Node; $3: Node| -> Node; 88 | 89 | $$ = Node::Binary { 90 | op: "*", 91 | left: Box::new($1), 92 | right: Box::new($3), 93 | } 94 | } 95 | 96 | | ( Expr ) { 97 | 98 | // No need to define argument types, since we don't do any 99 | // operations here, and just propagate $2 further. 100 | 101 | $$ = $2; 102 | 103 | } 104 | 105 | | NUMBER { 106 | 107 | || -> Node; 108 | 109 | let n = yytext.parse::().unwrap(); 110 | 111 | $$ = Node::Literal(n); 112 | 113 | }; 114 | 115 | -------------------------------------------------------------------------------- /examples/hdl.g: -------------------------------------------------------------------------------- 1 | /** 2 | * HDL (Hardware-definition langauge) syntactic grammar. 3 | * 4 | * How to run: 5 | * 6 | * ./bin/syntax -g examples/hdl.g -m lalr1 -f examples/and.hdl 7 | */ 8 | 9 | %lex 10 | 11 | %% 12 | 13 | \/\/.* /* skip comments */ 14 | \/\*(.|\s)*?\*\/ /* skip comments */ 15 | 16 | \s+ /* skip whitespace */ 17 | 18 | CHIP return 'CHIP' 19 | IN return 'IN' 20 | OUT return 'OUT' 21 | PARTS return 'PARTS' 22 | 23 | \w+ return 'ID' 24 | 25 | /lex 26 | 27 | %{ 28 | 29 | /** 30 | * List of inputs for this chip. 31 | */ 32 | let inputs = []; 33 | 34 | /** 35 | * List of outputs for this chip. 36 | */ 37 | let outputs = []; 38 | 39 | /** 40 | * Actual definitions. 41 | */ 42 | let parts = []; 43 | 44 | %} 45 | 46 | %% 47 | 48 | Chip 49 | : CHIP Name '{' Sections '}' { 50 | $$ = { 51 | type: 'Chip', 52 | name: $2, 53 | inputs, 54 | outputs, 55 | parts, 56 | }; 57 | } 58 | ; 59 | 60 | Sections 61 | : Section Section Section 62 | ; 63 | 64 | Section 65 | : Inputs 66 | | Outputs 67 | | Parts 68 | ; 69 | 70 | Inputs 71 | : IN Names ';' { 72 | inputs.push(...$2); 73 | } 74 | ; 75 | 76 | Outputs 77 | : OUT Names ';' { 78 | outputs.push(...$2); 79 | } 80 | ; 81 | 82 | Parts 83 | : PARTS ':' ChipCalls { 84 | parts.push(...$3); 85 | } 86 | ; 87 | 88 | Names 89 | : Name 90 | { $$ = [$1]; } 91 | 92 | | Names ',' Name 93 | { $1.push($3); $$ = $1; } 94 | ; 95 | 96 | Name 97 | : ID 98 | | CHIP 99 | | IN 100 | | OUT 101 | | PARTS 102 | ; 103 | 104 | ChipCalls 105 | : ChipCall 106 | { $$ = [$1] } 107 | 108 | | ChipCalls ChipCall 109 | { $1.push($2); $$ = $1 } 110 | ; 111 | 112 | ChipCall 113 | : ID '(' ArgsList ')' ';' { 114 | $$ = { 115 | type: 'ChipCall', 116 | name: $1, 117 | arguments: $3, 118 | } 119 | } 120 | ; 121 | 122 | ArgsList 123 | : Arg 124 | { $$ = [$1] } 125 | 126 | | ArgsList ',' Arg 127 | { $1.push($3); $$ = $1 } 128 | ; 129 | 130 | Arg 131 | : ID '=' ID { 132 | $$ = { 133 | type: 'Argument', 134 | name: $1, 135 | value: $3, 136 | } 137 | } 138 | ; -------------------------------------------------------------------------------- /examples/grammar.clr1: -------------------------------------------------------------------------------- 1 | /** 2 | * This grammar can not be parsed neither by LR(0), nor by SLR(1), as well as 3 | * not by LALR(1), resulting to the "reduce-reduce" conflicts. 4 | * 5 | * 6 | * LR(0) mode, see `r3/r4` conflicts: 7 | * 8 | * ./bin/syntax --grammar examples/grammar.clr1 --mode lr0 --table 9 | * 10 | * ┌───┬───────┬───────┬───────┬───┬───┬───┐ 11 | * │ │ 'a' │ 'b' │ $ │ S │ A │ B │ 12 | * ├───┼───────┼───────┼───────┼───┼───┼───┤ 13 | * │ 0 │ r3/r4 │ r3/r4 │ r3/r4 │ 7 │ 1 │ 2 │ 14 | * 15 | * .... ..... .... 16 | * 17 | * │ 9 │ r2 │ r2 │ r2 │ │ │ │ 18 | * └───┴───────┴───────┴───────┴───┴───┴───┘ 19 | * 20 | * 21 | * SLR(1) mode, see one `r3/r4` conflict less, but still conflicts: 22 | * 23 | * ./bin/syntax --grammar examples/grammar.clr1 --mode slr1 --table 24 | * 25 | * ┌───┬───────┬───────┬───────┬───┬───┬───┐ 26 | * │ │ 'a' │ 'b' │ $ │ S │ A │ B │ 27 | * ├───┼───────┼───────┼───────┼───┼───┼───┤ 28 | * │ 0 │ r3/r4 │ r3/r4 │ │ 7 │ 1 │ 2 │ 29 | * 30 | * .... ..... .... 31 | * 32 | * │ 9 │ r2 │ r2 │ r2 │ │ │ │ 33 | * └───┴───────┴───────┴───────┴───┴───┴───┘ 34 | * 35 | * CLR(1) mode, no conflicts: 36 | * 37 | * ./bin/syntax --grammar examples/grammar.clr1 --mode clr1 --table 38 | * 39 | * ┌───┬─────┬─────┬─────┬───┬───┬───┐ 40 | * │ │ 'a' │ 'b' │ $ │ S │ A │ B │ 41 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 42 | * │ 0 │ r3 │ r4 │ │ 7 │ 1 │ 2 │ 43 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 44 | * │ 1 │ s3 │ │ │ │ │ │ 45 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 46 | * │ 2 │ │ s5 │ │ │ │ │ 47 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 48 | * │ 3 │ │ r3 │ │ │ 4 │ │ 49 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 50 | * │ 4 │ │ s8 │ │ │ │ │ 51 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 52 | * │ 5 │ r4 │ │ │ │ │ 6 │ 53 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 54 | * │ 6 │ s9 │ │ │ │ │ │ 55 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 56 | * │ 7 │ │ │ acc │ │ │ │ 57 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 58 | * │ 8 │ │ │ r1 │ │ │ │ 59 | * ├───┼─────┼─────┼─────┼───┼───┼───┤ 60 | * │ 9 │ │ │ r2 │ │ │ │ 61 | * └───┴─────┴─────┴─────┴───┴───┴───┘ 62 | */ 63 | 64 | %% 65 | 66 | S -> A 'a' A 'b' 67 | | B 'b' B 'a' 68 | ; 69 | 70 | A -> /*epsilon*/ ; 71 | B -> /*epsilon*/ ; 72 | -------------------------------------------------------------------------------- /src/grammar/__tests__/grammar-mode-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import GrammarMode from '../grammar-mode'; 7 | import {MODES} from '../grammar-mode'; 8 | 9 | describe('grammar-mode', () => { 10 | 11 | it('LR', () => { 12 | // Default LR0 13 | let mode = new GrammarMode(); 14 | expect(mode.getRaw()).toBe(MODES.LR0); 15 | expect(mode.isLR()).toBe(true); 16 | expect(mode.isLR0()).toBe(true); 17 | expect(mode.toString()).toBe('LR(0)'); 18 | 19 | mode = new GrammarMode(MODES.LR0); 20 | expect(mode.getRaw()).toBe(MODES.LR0); 21 | expect(mode.isLR()).toBe(true); 22 | expect(mode.isLR0()).toBe(true); 23 | expect(mode.toString()).toBe('LR(0)'); 24 | 25 | mode = new GrammarMode(MODES.SLR1); 26 | expect(mode.getRaw()).toBe(MODES.SLR1); 27 | expect(mode.isLR()).toBe(true); 28 | expect(mode.isSLR1()).toBe(true); 29 | expect(mode.toString()).toBe('SLR(1)'); 30 | 31 | mode = new GrammarMode(MODES.LALR1); 32 | expect(mode.getRaw()).toBe(MODES.LALR1); 33 | expect(mode.isLR()).toBe(true); 34 | expect(mode.isLALR1()).toBe(true); 35 | expect(mode.toString()).toBe('LALR(1)'); 36 | 37 | mode = new GrammarMode(MODES.CLR1); 38 | expect(mode.getRaw()).toBe(MODES.CLR1); 39 | expect(mode.isLR()).toBe(true); 40 | expect(mode.isCLR1()).toBe(true); 41 | expect(mode.toString()).toBe('CLR(1)'); 42 | }); 43 | 44 | it('LL', () => { 45 | const mode = new GrammarMode(MODES.LL1); 46 | expect(mode.getRaw()).toBe(MODES.LL1); 47 | expect(mode.isLR()).toBe(false); 48 | expect(mode.isLL()).toBe(true); 49 | expect(mode.toString()).toBe('LL(1)'); 50 | }); 51 | 52 | it('lookahead set', () => { 53 | let mode = new GrammarMode(MODES.LL1); 54 | expect(mode.usesLookaheadSet()).toBe(false); 55 | 56 | mode = new GrammarMode(MODES.LR0); 57 | expect(mode.usesLookaheadSet()).toBe(false); 58 | 59 | mode = new GrammarMode(MODES.SLR1); 60 | expect(mode.usesLookaheadSet()).toBe(false); 61 | 62 | mode = new GrammarMode(MODES.LALR1_BY_CLR1); 63 | expect(mode.usesLookaheadSet()).toBe(true); 64 | 65 | mode = new GrammarMode(MODES.LALR1_BY_SLR1); 66 | expect(mode.usesLookaheadSet()).toBe(false); 67 | 68 | mode = new GrammarMode(MODES.LALR1); 69 | expect(mode.usesLookaheadSet()).toBe(false); 70 | 71 | mode = new GrammarMode(MODES.CLR1); 72 | expect(mode.usesLookaheadSet()).toBe(true); 73 | }); 74 | 75 | }); -------------------------------------------------------------------------------- /src/plugins/python/templates/ll.template.py: -------------------------------------------------------------------------------- 1 | ## 2 | # LL(1) parser generated by the Syntax tool. 3 | # 4 | # https://www.npmjs.com/package/syntax-cli 5 | # 6 | # npm install -g syntax-cli 7 | # 8 | # syntax-cli --help 9 | # 10 | # To regenerate run: 11 | # 12 | # syntax-cli \ 13 | # --grammar ~/path-to-grammar-file \ 14 | # --mode LL1 \ 15 | # --output ~/parsermodule.py 16 | ## 17 | 18 | yytext = '' 19 | yyleng = 0 20 | __ = None 21 | 22 | EOF = '$' 23 | 24 | def on_parse_begin(string): 25 | pass 26 | 27 | def on_parse_end(parsed): 28 | pass 29 | 30 | {{{MODULE_INCLUDE}}} 31 | 32 | {{{PRODUCTION_HANDLERS}}} 33 | 34 | ps = {{{PRODUCTIONS}}} 35 | tks = {{{TOKENS}}} 36 | tbl = {{{TABLE}}} 37 | 38 | s = None 39 | 40 | {{{TOKENIZER}}} 41 | 42 | def set_tokenizer(custom_tokenizer): 43 | global _tokenizer 44 | _tokenizer = custom_tokenizer 45 | 46 | def get_tokenizer(): 47 | return _tokenizer 48 | 49 | def parse(string): 50 | global s 51 | on_parse_begin(string) 52 | 53 | if _tokenizer is None: 54 | raise Exception('_tokenizer instance wasn\'t specified.') 55 | 56 | _tokenizer.init_string(string) 57 | 58 | s = [EOF, {{{START}}}] 59 | 60 | t = _tokenizer.get_next_token() 61 | to = None 62 | tt = None 63 | 64 | while True: 65 | to = s.pop() 66 | tt = tks[t['type']] 67 | 68 | if (to == tt): 69 | t = _tokenizer.get_next_token() 70 | continue 71 | 72 | der(to, t, tt) 73 | 74 | if not _tokenizer.has_more_tokens() and len(s) <= 1: 75 | break 76 | 77 | while len(s) != 1: 78 | der(s.pop(), t, tt) 79 | 80 | 81 | if s[0] != EOF or t['type'] != EOF: 82 | _parse_error('stack is not empty: ' + str(s) + ', ' + str(t['value'])) 83 | 84 | return True 85 | 86 | def der(to, t, tt): 87 | npn = tbl[to][tt] 88 | 89 | if npn is None: 90 | _unexpected_token(t) 91 | 92 | s.extend(ps[int(npn)][0]) 93 | 94 | def _unexpected_token(token): 95 | if token['type'] == EOF: 96 | _unexpected_end_of_input() 97 | 98 | _tokenizer.throw_unexpected_token( 99 | token['value'], 100 | token['start_line'], 101 | token['start_column'] 102 | ) 103 | 104 | def _unexpected_end_of_input(): 105 | _parse_error('Unexpected end of input.') 106 | 107 | def _parse_error(message): 108 | raise Exception('SyntaxError: ' + str(message)) 109 | -------------------------------------------------------------------------------- /examples/module-include.rb.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. Ruby version. 3 | * 4 | * The "moduleInclude" directive allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | * 9 | * The code may define callbacks for several parse events, in particular 10 | * `on_parse_begin`, and `on_parse_end`: 11 | * 12 | * YYParse.on_parse_begin {|string| 13 | * puts 'Parsing: ' + string 14 | * } 15 | * 16 | * ./bin/syntax -g ./examples/module-include.rb.g -m slr1 -o './CalcParser.rb' 17 | * 18 | * require '/CalcParser.rb' 19 | * 20 | * puts CalcParser.parse('2 + 2 * 2') 21 | * 22 | * Custom hook on parse begin. Parsing: 2 + 2 * 2 23 | * Custom hook on parse end. Parsed: # 24 | */ 25 | 26 | { 27 | "lex": { 28 | "rules": [ 29 | ["\\s+", "# skip whitespace"], 30 | ["\\d+", "return 'NUMBER'"], 31 | ["\\*", "return '*'"], 32 | ["\\+", "return '+'"], 33 | ["\\(", "return '('"], 34 | ["\\)", "return ')'"], 35 | ] 36 | }, 37 | 38 | "moduleInclude": ` 39 | # Can be "require" statments, or direct declarations. 40 | 41 | class Node 42 | attr_accessor :type 43 | 44 | def initialize(type) 45 | @type = type 46 | end 47 | end 48 | 49 | class BinaryExpression < Node 50 | attr_accessor :left 51 | attr_accessor :right 52 | attr_accessor :op 53 | 54 | def initialize(left, right, op) 55 | super('Binary') 56 | @@left = left 57 | @right = right 58 | @op = op 59 | end 60 | end 61 | 62 | class PrimaryExpression < Node 63 | attr_accessor :value 64 | 65 | def initialize(value) 66 | super('Primary') 67 | @value = value.to_i 68 | end 69 | end 70 | 71 | # Standard hook on parse beging, and end: 72 | 73 | YYParse.on_parse_begin {|string| 74 | puts 'Custom hook on parse begin. Parsing: ' + string 75 | } 76 | 77 | YYParse.on_parse_end {|value| 78 | puts 'Custom hook on parse end. Parsed: ' + value.inspect 79 | } 80 | `, 81 | 82 | "operators": [ 83 | ["left", "+"], 84 | ["left", "*"], 85 | ], 86 | 87 | "bnf": { 88 | "E": [ 89 | ["E + E", "$$ = BinaryExpression.new($1, $3, $2)"], 90 | ["E * E", "$$ = BinaryExpression.new($1, $3, $2)"], 91 | ["NUMBER", "$$ = PrimaryExpression.new($1)"], 92 | ["( E )", "$$ = $2"], 93 | ], 94 | }, 95 | } -------------------------------------------------------------------------------- /src/plugins/example/ll/ll-parser-generator-example.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | /** 7 | * Implementation notes. 8 | * 9 | * 1. Extend `LLParserGeneratorDefault` 10 | * 2. Implement `generateParserData()` 11 | * 3. Implement all specific to the target language 12 | * functionality in the trait file. 13 | */ 14 | 15 | /** 16 | * Base class to extend. 17 | */ 18 | const LLParserGeneratorDefault = require(ROOT + 'll/ll-parser-generator-default').default; 19 | 20 | /** 21 | * A trait file usually implements some very specific to a target language 22 | * constructs, and transformations. 23 | */ 24 | const ExampleParserGeneratorTrait = require('../example-parser-generator-trait'); 25 | 26 | import fs from 'fs'; 27 | import path from 'path'; 28 | 29 | /** 30 | * Generic template for all LR parsers in the Example language. 31 | */ 32 | const EXAMPLE_LL_PARSER_TEMPLATE = fs.readFileSync( 33 | `${__dirname}/../templates/ll.template.example`, 34 | 'utf-8', 35 | ); 36 | 37 | /** 38 | * LL parser generator for Example language. 39 | */ 40 | export default class LLParserGeneratorExample extends LLParserGeneratorDefault { 41 | 42 | /** 43 | * Instance constructor. 44 | */ 45 | constructor({ 46 | grammar, 47 | outputFile, 48 | options = {}, 49 | }) { 50 | super({grammar, outputFile, options}) 51 | .setTemplate(EXAMPLE_LL_PARSER_TEMPLATE); 52 | 53 | /** 54 | * Contains the lexical rule handlers: _lexRule1, _lexRule2, etc. 55 | * It's populated by the trait file. 56 | */ 57 | this._lexHandlers = []; 58 | 59 | /** 60 | * Contains production handlers: _handler1, _handler2, etc. 61 | * It's populated by the trait file. 62 | */ 63 | this._productionHandlers = []; 64 | 65 | /** 66 | * Actual class name of your parser. Here we infer from the output filename. 67 | */ 68 | this._parserClassName = path.basename( 69 | outputFile, 70 | path.extname(outputFile), 71 | ); 72 | 73 | /** 74 | * The trait provides methods for lex and production handlers, as well 75 | * as some very specific code generation for the target language. 76 | */ 77 | Object.assign(this, ExampleParserGeneratorTrait); 78 | } 79 | 80 | /** 81 | * Generates parser code. 82 | */ 83 | generateParserData() { 84 | super.generateParserData(); 85 | this.generateLexHandlers(); 86 | this.generateProductionHandlers(); 87 | this.generateParserClassName(this._parserClassName); 88 | } 89 | }; 90 | -------------------------------------------------------------------------------- /src/plugins/example/lr/lr-parser-generator-example.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | /** 7 | * Implementation notes. 8 | * 9 | * 1. Extend `LRParserGeneratorDefault` 10 | * 2. Implement `generateParserData()` 11 | * 3. Implement all specific to the target language 12 | * functionality in the trait file. 13 | */ 14 | 15 | /** 16 | * Base class to extend. 17 | */ 18 | const LRParserGeneratorDefault = require(ROOT + 'lr/lr-parser-generator-default').default; 19 | 20 | /** 21 | * A trait file usually implements some very specific to a target language 22 | * constructs, and transformations. 23 | */ 24 | const ExampleParserGeneratorTrait = require('../example-parser-generator-trait'); 25 | 26 | import fs from 'fs'; 27 | import path from 'path'; 28 | 29 | /** 30 | * Generic template for all LR parsers in the Example language. 31 | */ 32 | const EXAMPLE_LR_PARSER_TEMPLATE = fs.readFileSync( 33 | `${__dirname}/../templates/lr.template.example`, 34 | 'utf-8', 35 | ); 36 | 37 | /** 38 | * LR parser generator for Example language. 39 | */ 40 | export default class LRParserGeneratorExample extends LRParserGeneratorDefault { 41 | 42 | /** 43 | * Instance constructor. 44 | */ 45 | constructor({ 46 | grammar, 47 | outputFile, 48 | options = {}, 49 | }) { 50 | super({grammar, outputFile, options}) 51 | .setTemplate(EXAMPLE_LR_PARSER_TEMPLATE); 52 | 53 | /** 54 | * Contains the lexical rule handlers: _lexRule1, _lexRule2, etc. 55 | * It's populated by the trait file. 56 | */ 57 | this._lexHandlers = []; 58 | 59 | /** 60 | * Contains production handlers: _handler1, _handler2, etc. 61 | * It's populated by the trait file. 62 | */ 63 | this._productionHandlers = []; 64 | 65 | /** 66 | * Actual class name of your parser. Here we infer from the output filename. 67 | */ 68 | this._parserClassName = path.basename( 69 | outputFile, 70 | path.extname(outputFile), 71 | ); 72 | 73 | /** 74 | * The trait provides methods for lex and production handlers, as well 75 | * as some very specific code generation for the target language. 76 | */ 77 | Object.assign(this, ExampleParserGeneratorTrait); 78 | } 79 | 80 | /** 81 | * Generates parser code. 82 | */ 83 | generateParserData() { 84 | super.generateParserData(); 85 | this.generateLexHandlers(); 86 | this.generateProductionHandlers(); 87 | this.generateParserClassName(this._parserClassName); 88 | } 89 | }; 90 | -------------------------------------------------------------------------------- /src/templates/ll.template.js: -------------------------------------------------------------------------------- 1 | /** 2 | * LL(1) parser generated by the Syntax tool. 3 | * 4 | * https://www.npmjs.com/package/syntax-cli 5 | * 6 | * npm install -g syntax-cli 7 | * 8 | * syntax-cli --help 9 | * 10 | * To regenerate run: 11 | * 12 | * syntax-cli \ 13 | * --grammar ~/path-to-grammar-file \ 14 | * --mode LL1 \ 15 | * --output ~/path-to-output-parser-file.js 16 | */ 17 | 18 | 'use strict'; 19 | 20 | let yytext; 21 | let yyleng; 22 | let __; 23 | 24 | const EOF = '$'; 25 | 26 | const ps = {{{PRODUCTIONS}}}; 27 | const tks = {{{TOKENS}}}; 28 | const tbl = {{{TABLE}}}; 29 | 30 | const s = []; 31 | 32 | let tokenizer; 33 | {{{TOKENIZER}}} 34 | 35 | const yyparse = { 36 | parse(string) { 37 | yyparse.onParseBegin(string); 38 | 39 | if (!tokenizer) { 40 | throw new Error(`Tokenizer instance wasn't specified.`); 41 | } 42 | 43 | tokenizer.initString(string); 44 | 45 | s.length = 0; 46 | s.push(EOF, {{{START}}}); 47 | 48 | let t = tokenizer.getNextToken(); 49 | let to = null; 50 | let tt = null; 51 | 52 | do { 53 | to = s.pop(); 54 | tt = tks[t.type]; 55 | 56 | if (to === tt) { 57 | t = tokenizer.getNextToken(); 58 | continue; 59 | } 60 | 61 | der(to, t, tt); 62 | } while (tokenizer.hasMoreTokens() || s.length > 1); 63 | 64 | while (s.length !== 1) { 65 | der(s.pop(), t, tt); 66 | } 67 | 68 | if (s[0] !== EOF || t.type !== EOF) { 69 | parseError(`stack is not empty: ${s}, ${t.value}`); 70 | } 71 | 72 | return true; 73 | }, 74 | 75 | setTokenizer(customTokenizer) { 76 | tokenizer = customTokenizer; 77 | return yyparse; 78 | }, 79 | 80 | getTokenizer() { 81 | return tokenizer; 82 | }, 83 | 84 | onParseBegin(string) {}, 85 | onParseEnd(parsed) {}, 86 | }; 87 | 88 | {{{MODULE_INCLUDE}}} 89 | 90 | function der(to, t, tt) { 91 | let npn = tbl[to][tt]; 92 | if (!npn) { 93 | unexpectedToken(t); 94 | } 95 | s.push(...ps[npn][0]); 96 | } 97 | 98 | function unexpectedToken(token) { 99 | if (token.type === EOF) { 100 | unexpectedEndOfInput(); 101 | } 102 | 103 | tokenizer.throwUnexpectedToken( 104 | token.value, 105 | token.startLine, 106 | token.startColumn 107 | ); 108 | } 109 | 110 | function unexpectedEndOfInput() { 111 | parseError(`Unexpected end of input.`); 112 | } 113 | 114 | function parseError(message) { 115 | throw new SyntaxError(message); 116 | } 117 | 118 | module.exports = yyparse; -------------------------------------------------------------------------------- /examples/json.grammar.js: -------------------------------------------------------------------------------- 1 | /** 2 | * How to run: 3 | * 4 | * Basic example: 5 | * 6 | * ./bin/syntax \ 7 | * --grammar examples/json.grammar.js \ 8 | * --mode slr1 \ 9 | * --parse '{"x": 10}' 10 | * 11 | * Parsing table, canonical collection or LR items, etc: 12 | * 13 | * ./bin/syntax \ 14 | * --grammar examples/json.grammar.js \ 15 | * --mode slr1 \ 16 | * --collection \ 17 | * --table \ 18 | * --parse '{"x": 10, "y": {"z": [1, 2, 3]}}' 19 | */ 20 | 21 | { 22 | "_info": "Based on: https://github.com/zaach/jison/blob/master/examples/json.js", 23 | 24 | "lex": { 25 | "macros": { 26 | "digit": "[0-9]", 27 | "esc": "\\\\", 28 | "int": "-?(?:[0-9]|[1-9][0-9]+)", 29 | "exp": "(?:[eE][-+]?[0-9]+)", 30 | "frac": "(?:\\.[0-9]+)" 31 | }, 32 | "rules": [ 33 | ["\\s+", "/* skip whitespace */"], 34 | ["{int}{frac}?{exp}?\\b", "return 'NUMBER';"], 35 | ["\"(?:{esc}[\"bfnrt/{esc}]|{esc}u[a-fA-F0-9]{4}|[^\"{esc}])*\"", "return 'STRING';"], 36 | ["\\{", "return '{'"], 37 | ["\\}", "return '}'"], 38 | ["\\[", "return '['"], 39 | ["\\]", "return ']'"], 40 | [",", "return ','"], 41 | [":", "return ':'"], 42 | ["true\\b", "return 'TRUE'"], 43 | ["false\\b", "return 'FALSE'"], 44 | ["null\\b", "return 'NULL'"] 45 | ] 46 | }, 47 | 48 | "tokens": "STRING NUMBER { } [ ] , : TRUE FALSE NULL", 49 | "start": "JSONText", 50 | 51 | "bnf": { 52 | "JSONText": [ "JSONValue" ], 53 | 54 | "JSONString": [ "STRING" ], 55 | 56 | "JSONNullLiteral": [ "NULL" ], 57 | 58 | "JSONNumber": [ "NUMBER" ], 59 | 60 | "JSONBooleanLiteral": [ "TRUE", "FALSE" ], 61 | 62 | "JSONValue": [ "JSONNullLiteral", 63 | "JSONBooleanLiteral", 64 | "JSONString", 65 | "JSONNumber", 66 | "JSONObject", 67 | "JSONArray" ], 68 | 69 | "JSONObject": [ "{ }", 70 | "{ JSONMemberList }" ], 71 | 72 | "JSONMember": [ "JSONString : JSONValue" ], 73 | 74 | "JSONMemberList": [ "JSONMember", 75 | "JSONMemberList , JSONMember" ], 76 | 77 | "JSONArray": [ "[ ]", 78 | "[ JSONElementList ]" ], 79 | 80 | "JSONElementList": [ "JSONValue", 81 | "JSONElementList , JSONValue" ] 82 | } 83 | } -------------------------------------------------------------------------------- /examples/module-include.g.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. 3 | * 4 | * The "moduleInclude" directive allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | * 9 | * The code may define callbacks for several parse events, attaching them 10 | * to the `yyparse` object. For example: 11 | * 12 | * yyparse.onParseBegin = (string) => { 13 | * console.log('Parsing:', string); 14 | * }; 15 | * 16 | * ./bin/syntax -g ./examples/module-include.g.js -m slr1 -o './parser.js' 17 | * 18 | * > require('./parser').parse('2 + 2 * 2'); 19 | * 20 | * BinaryExpression { 21 | * type: 'Binary', 22 | * left: PrimaryExpression { type: 'Primary', value: '2' }, 23 | * right: BinaryExpression { 24 | * type: 'Binary', 25 | * left: PrimaryExpression { type: 'Primary', value: '2' }, 26 | * right: PrimaryExpression { type: 'Primary', value: '2' }, 27 | * op: '*', 28 | * }, 29 | * op: '+', 30 | * } 31 | */ 32 | 33 | { 34 | "lex": { 35 | "rules": [ 36 | ["\\s+", "/* skip whitespace */"], 37 | ["\\d+", "return 'NUMBER'"], 38 | ["\\*", "return '*'"], 39 | ["\\+", "return '+'"], 40 | ["\\(", "return '('"], 41 | ["\\)", "return ')'"], 42 | ] 43 | }, 44 | 45 | "moduleInclude": ` 46 | // Can be "require" statments, or direct declarations. 47 | 48 | class Node { 49 | constructor(type) { 50 | this.type = type; 51 | } 52 | } 53 | 54 | class BinaryExpression extends Node { 55 | constructor(left, right, op) { 56 | super('Binary'); 57 | this.left = left; 58 | this.right = right; 59 | this.op = op; 60 | } 61 | } 62 | 63 | class PrimaryExpression extends Node { 64 | constructor(value) { 65 | super('Primary'); 66 | this.value = value; 67 | } 68 | } 69 | 70 | yyparse.onParseBegin = (string) => { 71 | console.log('Custom hook on parse begin. Parsing:', string, '\\n'); 72 | }; 73 | 74 | yyparse.onParseEnd = (value) => { 75 | console.log('Custom hook on parse end. Parsed:\\n\\n', value, '\\n'); 76 | }; 77 | 78 | `, 79 | 80 | "operators": [ 81 | ["left", "+"], 82 | ["left", "*"], 83 | ], 84 | 85 | "bnf": { 86 | "E": [ 87 | ["E + E", "$$ = new BinaryExpression($1, $3, $2)"], 88 | ["E * E", "$$ = new BinaryExpression($1, $3, $2)"], 89 | ["NUMBER", "$$ = new PrimaryExpression($1)"], 90 | ["( E )", "$$ = $2"], 91 | ], 92 | }, 93 | } -------------------------------------------------------------------------------- /examples/json.ast.js: -------------------------------------------------------------------------------- 1 | /** 2 | * How to run: 3 | * 4 | * ./bin/syntax \ 5 | * --grammar examples/json.ast.js \ 6 | * --mode slr1 \ 7 | * --parse '{"x": 10, "y": {"z": [1, 2, true]}}' 8 | */ 9 | 10 | { 11 | "lex": { 12 | "macros": { 13 | "digit": "[0-9]", 14 | "esc": "\\\\", 15 | "int": "-?(?:[0-9]|[1-9][0-9]+)", 16 | "exp": "(?:[eE][-+]?[0-9]+)", 17 | "frac": "(?:\\.[0-9]+)" 18 | }, 19 | "rules": [ 20 | ["\\s+", "/* skip whitespace */"], 21 | ["{int}{frac}?{exp}?\\b", "return 'NUMBER';"], 22 | ["\"(?:{esc}[\"bfnrt/{esc}]|{esc}u[a-fA-F0-9]{4}|[^\"{esc}])*\"", "yytext = yytext.substr(1,yyleng-2); return 'STRING';"], 23 | ["\\{", "return '{'"], 24 | ["\\}", "return '}'"], 25 | ["\\[", "return '['"], 26 | ["\\]", "return ']'"], 27 | [",", "return ','"], 28 | [":", "return ':'"], 29 | ["true\\b", "return 'TRUE'"], 30 | ["false\\b", "return 'FALSE'"], 31 | ["null\\b", "return 'NULL'"] 32 | ] 33 | }, 34 | 35 | "tokens": "STRING NUMBER { } [ ] , : TRUE FALSE NULL", 36 | "start": "JSONText", 37 | 38 | "bnf": { 39 | "JSONString": [[ "STRING", "$$ = yytext;" ]], 40 | 41 | "JSONNumber": [[ "NUMBER", "$$ = Number(yytext);" ]], 42 | 43 | "JSONNullLiteral": [[ "NULL", "$$ = null;" ]], 44 | 45 | "JSONBooleanLiteral": [[ "TRUE", "$$ = true;" ], 46 | [ "FALSE", "$$ = false;" ]], 47 | 48 | 49 | "JSONText": [[ "JSONValue", "return $$ = $1;" ]], 50 | 51 | "JSONValue": [[ "JSONNullLiteral", "$$ = $1;" ], 52 | [ "JSONBooleanLiteral", "$$ = $1;" ], 53 | [ "JSONString", "$$ = $1;" ], 54 | [ "JSONNumber", "$$ = $1;" ], 55 | [ "JSONObject", "$$ = $1;" ], 56 | [ "JSONArray", "$$ = $1;" ]], 57 | 58 | "JSONObject": [[ "{ }", "$$ = {};" ], 59 | [ "{ JSONMemberList }", "$$ = $2;" ]], 60 | 61 | "JSONMember": [[ "JSONString : JSONValue", "$$ = [$1, $3];" ]], 62 | 63 | "JSONMemberList": [[ "JSONMember", "$$ = {}; $$[$1[0]] = $1[1];" ], 64 | [ "JSONMemberList , JSONMember", "$$ = $1; $1[$3[0]] = $3[1];" ]], 65 | 66 | "JSONArray": [[ "[ ]", "$$ = [];" ], 67 | [ "[ JSONElementList ]", "$$ = $2;" ]], 68 | 69 | "JSONElementList": [[ "JSONValue", "$$ = [$1];" ], 70 | [ "JSONElementList , JSONValue", "$$ = $1; $1.push($3);" ]] 71 | } 72 | } -------------------------------------------------------------------------------- /src/grammar/__tests__/grammar-symbol-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import GrammarSymbol from '../grammar-symbol'; 7 | import {EOF, EPSILON} from '../../special-symbols'; 8 | 9 | describe('grammar-symbol', () => { 10 | it('singleton', () => { 11 | expect(GrammarSymbol.get('A')).toBe(GrammarSymbol.get('A')); 12 | }); 13 | 14 | it('instance', () => { 15 | expect(new GrammarSymbol('A')).not.toBe(new GrammarSymbol('A')); 16 | }); 17 | 18 | it('terminal', () => { 19 | expect(new GrammarSymbol(`"a"`).isTerminal()).toBe(true); 20 | expect(new GrammarSymbol(`'a'`).isTerminal()).toBe(true); 21 | expect(new GrammarSymbol(`A`).isTerminal()).toBe(false); 22 | }); 23 | 24 | it('terminal value', () => { 25 | expect(new GrammarSymbol(`"a"`).getTerminalValue()).toBe('a'); 26 | }); 27 | 28 | it('quoted terminal', () => { 29 | expect(new GrammarSymbol(`"a"`).quotedTerminal()).toBe(`'"a"'`); 30 | expect(new GrammarSymbol(`'a'`).quotedTerminal()).toBe(`"'a'"`); 31 | }); 32 | 33 | it('non-terminal', () => { 34 | expect(new GrammarSymbol(`A`).isNonTerminal()).toBe(true); 35 | expect(new GrammarSymbol(`"a"`).isNonTerminal()).toBe(false); 36 | expect(new GrammarSymbol(`'a'`).isNonTerminal()).toBe(false); 37 | }); 38 | 39 | it('raw symbol', () => { 40 | expect(new GrammarSymbol(`A`).getSymbol()).toBe('A'); 41 | expect(new GrammarSymbol(`"a"`).getSymbol()).toBe(`"a"`); 42 | expect(new GrammarSymbol(`'a'`).getSymbol()).toBe(`'a'`); 43 | }); 44 | 45 | it('raw symbol', () => { 46 | expect(new GrammarSymbol(`A`).getSymbol()).toBe('A'); 47 | expect(new GrammarSymbol(`"a"`).getSymbol()).toBe(`"a"`); 48 | expect(new GrammarSymbol(`'a'`).getSymbol()).toBe(`'a'`); 49 | }); 50 | 51 | it('compare symbol', () => { 52 | expect(new GrammarSymbol(`A`).isSymbol('A')).toBe(true); 53 | expect(new GrammarSymbol(`A`).isSymbol('B')).toBe(false); 54 | expect(new GrammarSymbol(`A`).isSymbol(`'a'`)).toBe(false); 55 | expect(new GrammarSymbol(`"a"`).isSymbol(`"a"`)).toBe(true); 56 | expect(new GrammarSymbol(`'a'`).isSymbol(`'a'`)).toBe(true); 57 | expect(new GrammarSymbol(`'a'`).isSymbol(`'b'`)).toBe(false); 58 | expect(new GrammarSymbol(`'a'`).isSymbol(`"b"`)).toBe(false); 59 | expect(new GrammarSymbol(`'a'`).isSymbol('A')).toBe(false); 60 | }); 61 | 62 | it('special symbols', () => { 63 | // EOF. 64 | expect(new GrammarSymbol(EOF).isEOF()).toBe(true); 65 | expect(GrammarSymbol.isEOF(EOF)).toBe(true); 66 | 67 | // Epsilon. 68 | expect(new GrammarSymbol(EPSILON).isEpsilon()).toBe(true); 69 | expect(GrammarSymbol.isEpsilon(EPSILON)).toBe(true); 70 | }); 71 | }); 72 | -------------------------------------------------------------------------------- /src/lr/lr-parser-generator-default.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import BaseParserGenerator from '../base-parser-generator'; 7 | import CanonicalCollection from './canonical-collection'; 8 | import LRParsingTable from './lr-parsing-table'; 9 | 10 | import fs from 'fs'; 11 | 12 | /** 13 | * Generic JS template for all LR parsers. 14 | */ 15 | const LR_PARSER_TEMPLATE = fs.readFileSync( 16 | `${__dirname}/../templates/lr.template.js`, 17 | 'utf-8' 18 | ); 19 | 20 | /** 21 | * LR parser generator. Creates a parser module for a given grammar, and 22 | * saves it to the `outputFile`. 23 | */ 24 | export default class LRParserGeneratorDefault extends BaseParserGenerator { 25 | /** 26 | * Instance constructor. 27 | */ 28 | constructor({grammar, outputFile, options = {}}) { 29 | if (!grammar.getMode().isLR()) { 30 | throw new Error(`LR parser generator: LR grammar is expected.`); 31 | } 32 | 33 | const table = new LRParsingTable({ 34 | canonicalCollection: new CanonicalCollection({grammar}), 35 | grammar, 36 | resolveConflicts: options.resolveConflicts, 37 | }); 38 | 39 | super({grammar, outputFile, options}) 40 | .setTable(table) 41 | .setTemplate(LR_PARSER_TEMPLATE); 42 | } 43 | 44 | /** 45 | * Default format in the [ ] array notation. 46 | */ 47 | generateProductionsData() { 48 | return this.generateRawProductionsData().map(data => `[${data}]`); 49 | } 50 | 51 | /** 52 | * Format of the production is: 53 | * [Non-terminal index, RHS.length, semanticAction] 54 | */ 55 | generateRawProductionsData() { 56 | return this.getGrammar() 57 | .getProductions() 58 | .map(production => { 59 | let LHS = production 60 | .getLHS() 61 | .getSymbol() 62 | .replace(/'/g, "\\'"); 63 | let RHSLength = production.isEpsilon() ? 0 : production.getRHS().length; 64 | let semanticAction = this.buildSemanticAction(production); 65 | 66 | let result = [this.getEncodedNonTerminal(LHS), RHSLength]; 67 | 68 | if (semanticAction) { 69 | result.push(semanticAction); 70 | } 71 | 72 | return result; 73 | }); 74 | } 75 | 76 | /** 77 | * Actual parsing table. 78 | */ 79 | generateParseTableData() { 80 | let originalTable = this._table.get(); 81 | let table = []; 82 | 83 | for (let state in originalTable) { 84 | let row = {}; 85 | let originalRow = originalTable[state]; 86 | 87 | for (let symbol in originalRow) { 88 | let entry = originalRow[symbol]; 89 | row[this.getEncodedSymbol(symbol)] = entry; 90 | } 91 | 92 | table[state] = row; 93 | } 94 | 95 | return table; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /examples/s-expression.cpp.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * S-expression parser. 3 | * 4 | * ./bin/syntax -g parser/s-expression.cpp.bnf -m LALR1 -o ~/SExpressionParser.h 5 | * 6 | * Examples: 7 | * 8 | * Atom: 42, foo, bar, "Hello World" 9 | * 10 | * List: (), (+ 5 x), (print "hello") 11 | * 12 | * Usage: 13 | * 14 | * string exp{R"( (var x (+ "hello" 10)) )"}; 15 | * 16 | * SExpressionParser parser; 17 | * 18 | * auto ast = parser.parse(exp); 19 | * 20 | * cout << "("; 21 | * cout << ast->list[0]->symbol << " "; // var 22 | * cout << ast->list[1]->symbol << " ("; // x ( 23 | * cout << ast->list[2]->list[0]->string << " "; // + 24 | * cout << ast->list[2]->list[1]->string << " "; // "hello" 25 | * cout << ast->list[2]->list[2]->number << "))\n"; 10 )) 26 | * 27 | * Result: (var x (+ "hello" 10)) 28 | */ 29 | 30 | // ----------------------------------------------- 31 | // Lexical grammar (tokens): 32 | 33 | %lex 34 | 35 | %% 36 | 37 | \s+ %empty 38 | 39 | \"[^\"]*\" STRING 40 | 41 | \d+ NUMBER 42 | 43 | [\w\-+*=<>/]+ SYMBOL 44 | 45 | /lex 46 | 47 | // ----------------------------------------------- 48 | // Syntactic grammar (BNF): 49 | 50 | %{ 51 | 52 | #include 53 | #include 54 | 55 | /** 56 | * Expression type. 57 | */ 58 | enum class ExpType { 59 | Number, 60 | String, 61 | Symbol, 62 | List, 63 | }; 64 | 65 | /** 66 | * Expression. 67 | */ 68 | class Exp { 69 | public: 70 | ExpType type; 71 | 72 | int number; 73 | std::string string; 74 | std::vector> list; 75 | 76 | // Numbers: 77 | Exp(int number) : type(ExpType::Number), number(number) {} 78 | 79 | // Strings, Symbols: 80 | Exp(std::string& strVal) { 81 | if (strVal[0] == '"') { 82 | type = ExpType::String; 83 | string = strVal.substr(1, strVal.size() - 2); 84 | } else { 85 | type = ExpType::Symbol; 86 | string = strVal; 87 | } 88 | } 89 | 90 | // Lists: 91 | Exp(std::vector> list) : type(ExpType::List), list(list) {} 92 | 93 | // List init: 94 | Exp(const std::initializer_list> &list) : type(ExpType::List), list(list) {} 95 | 96 | ~Exp() {} 97 | }; 98 | 99 | using Value = std::shared_ptr; 100 | 101 | %} 102 | 103 | %% 104 | 105 | Exp 106 | : Atom 107 | | List 108 | ; 109 | 110 | Atom 111 | : NUMBER { $$ = std::make_shared(std::stoi($1)) } 112 | | STRING { $$ = std::make_shared($1) } 113 | | SYMBOL { $$ = std::make_shared($1) } 114 | ; 115 | 116 | List 117 | : '(' ListEntries ')' { $$ = $2 } 118 | ; 119 | 120 | ListEntries 121 | : ListEntries Exp { $1->list.push_back($2); $$ = $1 } 122 | | %empty { $$ = std::make_shared(std::vector>{}) } 123 | ; 124 | -------------------------------------------------------------------------------- /examples/test.lang: -------------------------------------------------------------------------------- 1 | /** 2 | * Example Lang file. 3 | * 4 | * BNF grammar is in: examples/lang.bnf 5 | * Lex grammar is in: examples/lang.lex 6 | */ 7 | 8 | // Variable declaration. 9 | let x = 10, y = PI * 2; 10 | 11 | /** 12 | * Function declaration. 13 | */ 14 | fn square(x) { 15 | return x * x; 16 | } 17 | 18 | /** 19 | * Lambda function. 20 | */ 21 | let square = (x) -> { 22 | return x * x; 23 | }; 24 | 25 | /** 26 | * Function call. 27 | */ 28 | square(2); 29 | 30 | /** 31 | * do-while loop. 32 | */ 33 | do { 34 | foo += x; 35 | x -= 1; 36 | } while (x > 0); 37 | 38 | /** 39 | * while-loop. 40 | */ 41 | while (x < 10) { 42 | foo -= x; 43 | x += 1; 44 | } 45 | 46 | /** 47 | * for-loop. 48 | */ 49 | for (let i = 0; i < 10; i += 1) { 50 | foo += i; 51 | } 52 | 53 | /** 54 | * break/continue. 55 | */ 56 | for (;;) { 57 | x += 1; 58 | if (x < 10) { 59 | continue; 60 | } 61 | x = 0; 62 | break; 63 | } 64 | 65 | /** 66 | * for-prop loop. 67 | */ 68 | for (let prop : data) { 69 | print(prop); 70 | } 71 | 72 | /** 73 | * for-prop loop. 74 | */ 75 | for (v : data); 76 | 77 | /** 78 | * Array literals. 79 | */ 80 | let data = [1, 2, 3]; 81 | 82 | /** 83 | * Object literals. 84 | */ 85 | let config = { 86 | ip: "127.0.0.1", 87 | port: 3306, 88 | user: { 89 | name: "John Doe", 90 | alias: "jdoe", 91 | }, 92 | 0: index, 93 | "value scores": [1, 2], 94 | }; 95 | 96 | /** 97 | * Simple block. 98 | */ 99 | { 100 | let x = 10; 101 | let y = 0.5; 102 | 103 | fn Point(x, y) { 104 | return { 105 | "x": x, 106 | "y": y, 107 | }; 108 | } 109 | } 110 | 111 | /** 112 | * Object literal. 113 | */ 114 | ({ 115 | "x": x, 116 | "y": y, 117 | }); 118 | 119 | /** 120 | * Two nested empty blocks. 121 | */ 122 | 123 | { 124 | { 125 | 126 | } 127 | } 128 | 129 | /** 130 | * Keywords as property names (object followed by keyword `if`). 131 | */ 132 | ({ 133 | if: 10, 134 | while: 20, 135 | }); 136 | 137 | /** 138 | * Block followed by keyword `if`. 139 | */ 140 | { 141 | if (x) { 142 | print(1); 143 | } 144 | } 145 | 146 | /** 147 | * Member expressions. 148 | */ 149 | 150 | foo.bar = 10; 151 | foo["bar"] = 20; 152 | foo["bar"].baz = 30; 153 | 154 | root.action(10); 155 | 156 | /** 157 | * Class declaration. 158 | */ 159 | 160 | class Point { 161 | fn constructor(x, y) { 162 | this.x = x; 163 | this.y = y; 164 | } 165 | 166 | fn getX() { 167 | return this.x; 168 | } 169 | 170 | fn getY() { 171 | return this.y; 172 | } 173 | } 174 | 175 | class Point3D extends Point { 176 | fn constructor(x, y, z) { 177 | super(x, y); 178 | this.z = z; 179 | } 180 | 181 | fn getZ() { 182 | return this.z; 183 | } 184 | } 185 | 186 | new Point.Other(); 187 | 188 | let p = new Point3D(10, 20, 30); 189 | 190 | 191 | -------------------------------------------------------------------------------- /examples/calc-loc.cs.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. C# version. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is an object of structure: 10 | * 11 | * { 12 | * startOffset, 13 | * endOffset, 14 | * startLine, 15 | * endLine, 16 | * startColumn, 17 | * endColum, 18 | * } 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.php.bnf -m slr1 -o ~/Parser.php --loc 25 | * 26 | */ 27 | 28 | %lex 29 | 30 | %% 31 | 32 | \s+ /* skip whitespace */ return ""; 33 | \d+ return "NUMBER"; 34 | 35 | /lex 36 | 37 | %{ 38 | 39 | namespace SyntaxParser { 40 | 41 | public class BinaryExpression 42 | { 43 | public object Left; 44 | public object Right; 45 | public string Op; 46 | public dynamic Loc; 47 | 48 | public BinaryExpression(dynamic op, object left, object right, dynamic loc) 49 | { 50 | Op = op.ToString(); 51 | Left = left; 52 | Right = right; 53 | Loc = loc; 54 | } 55 | } 56 | 57 | public class Loc 58 | { 59 | public Loc() {} 60 | 61 | public int StartOffset; 62 | public int EndOffset; 63 | public int StartLine; 64 | public int EndLine; 65 | public int StartColumn; 66 | public int EndColumn; 67 | 68 | // Same as default result location. 69 | public static Loc capture(dynamic start, dynamic end) 70 | { 71 | return new Loc() 72 | { 73 | StartOffset = start.StartOffset, 74 | EndOffset = end.EndOffset, 75 | StartLine = start.StartLine, 76 | EndLine = end.EndLine, 77 | StartColumn = start.StartColumn, 78 | EndColumn = end.EndColumn 79 | }; 80 | } 81 | } 82 | 83 | public class NumericLiteral 84 | { 85 | public int Value; 86 | public dynamic Loc; 87 | 88 | public NumericLiteral(dynamic value, dynamic loc) 89 | { 90 | Value = Convert.ToInt32(value); 91 | Loc = loc; 92 | } 93 | } 94 | 95 | } 96 | 97 | %} 98 | 99 | %left '+' 100 | %left '*' 101 | 102 | %% 103 | 104 | exp 105 | : exp '+' exp 106 | /* Explicitly calculate location */ 107 | { $$ = new BinaryExpression("+", $1, $3, Loc.capture(@1, @3)) } 108 | 109 | | exp '*' exp 110 | /* Use default result location: @$ */ 111 | { $$ = new BinaryExpression("*", $1, $3, @$) } 112 | 113 | | '(' exp ')' 114 | { $$ = $2 } 115 | 116 | | number 117 | /* Named args and position */ 118 | { $$ = new NumericLiteral($number, @number) } 119 | ; 120 | 121 | number 122 | : NUMBER { $$ = Convert.ToInt32(yytext) } 123 | ; 124 | -------------------------------------------------------------------------------- /src/__tests__/rust-calc/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "bitflags" 5 | version = "1.0.4" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | 8 | [[package]] 9 | name = "calc-bin" 10 | version = "0.1.0" 11 | dependencies = [ 12 | "calc-syntax 0.1.0", 13 | ] 14 | 15 | [[package]] 16 | name = "calc-syntax" 17 | version = "0.1.0" 18 | dependencies = [ 19 | "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 20 | "onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 21 | ] 22 | 23 | [[package]] 24 | name = "cc" 25 | version = "1.0.36" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | 28 | [[package]] 29 | name = "lazy_static" 30 | version = "1.3.0" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | 33 | [[package]] 34 | name = "libc" 35 | version = "0.2.54" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | 38 | [[package]] 39 | name = "onig" 40 | version = "4.3.2" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | dependencies = [ 43 | "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 44 | "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 45 | "libc 0.2.54 (registry+https://github.com/rust-lang/crates.io-index)", 46 | "onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 47 | ] 48 | 49 | [[package]] 50 | name = "onig_sys" 51 | version = "69.1.0" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | dependencies = [ 54 | "cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)", 55 | "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", 56 | ] 57 | 58 | [[package]] 59 | name = "pkg-config" 60 | version = "0.3.14" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | 63 | [metadata] 64 | "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" 65 | "checksum cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a0c56216487bb80eec9c4516337b2588a4f2a2290d72a1416d930e4dcdb0c90d" 66 | "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" 67 | "checksum libc 0.2.54 (registry+https://github.com/rust-lang/crates.io-index)" = "c6785aa7dd976f5fbf3b71cfd9cd49d7f783c1ff565a858d71031c6c313aa5c6" 68 | "checksum onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a646989adad8a19f49be2090374712931c3a59835cb5277b4530f48b417f26e7" 69 | "checksum onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388410bf5fa341f10e58e6db3975f4bea1ac30247dd79d37a9e5ced3cb4cc3b0" 70 | "checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" 71 | -------------------------------------------------------------------------------- /src/__tests__/rust-plugin-test.js: -------------------------------------------------------------------------------- 1 | import * as shelljs from 'shelljs'; 2 | import path from 'path'; 3 | 4 | // rust-calc test package uses Rust 2018, minimum version for that rust edition is 1.31 5 | // https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html 6 | const rustVersionRequired = [1, 31]; 7 | 8 | const whichCargo = shelljs.which('cargo'); 9 | const whichMake = shelljs.which('make'); 10 | const rustInstalled = whichCargo && whichCargo.code === 0; 11 | const makeInstalled = whichMake && whichMake.code === 0; 12 | const rustCalcDir = path.join(__dirname, 'rust-calc'); 13 | 14 | function getRustVersion() { 15 | const child = shelljs.exec('cargo --version'); 16 | if (child.code === 0) { 17 | const semverRe = /\d+\.\d+\.\d+/; 18 | const match = semverRe.exec(child.stdout); 19 | if (match) { 20 | return match[0].split('.').map(x => Number(x)); 21 | } 22 | } 23 | return []; 24 | } 25 | 26 | function semverGte(aNumbers, bNumbers) { 27 | if (!aNumbers || !bNumbers) { 28 | return false; 29 | } 30 | const maxLen = Math.max(aNumbers.length, bNumbers.length); 31 | let eq = false; 32 | for (let i = 0; i < maxLen; i++) { 33 | const aNum = aNumbers[i]; 34 | const bNum = bNumbers[i]; 35 | 36 | if (aNum > bNum) { 37 | return true; 38 | } else if (aNum < bNum) { 39 | return false; 40 | } else { 41 | eq = true; 42 | } 43 | } 44 | return eq; 45 | } 46 | 47 | const rustVersion = getRustVersion(); 48 | const minimumVersionSatisfied = semverGte(rustVersion, rustVersionRequired); 49 | 50 | if (makeInstalled && rustInstalled && minimumVersionSatisfied) { 51 | xdescribe('rust plugin', () => { 52 | beforeAll(() => { 53 | shelljs.exec('make', { 54 | cwd: path.join(rustCalcDir, 'calc-syntax'), 55 | }); 56 | }, 10000); 57 | 58 | it('calc rust example should build, also output must match expected value', () => { 59 | let runResult = shelljs.exec('cargo run --quiet', { 60 | silent: true, 61 | cwd: rustCalcDir, 62 | }); 63 | 64 | if (runResult.code !== 0) { 65 | // something went wrong, rerun command with full debug output 66 | runResult = shelljs.exec('cargo run', { 67 | silent: false, 68 | cwd: rustCalcDir, 69 | }); 70 | console.error(runResult.stderr); 71 | } else { 72 | expect(runResult.stderr).toEqual(''); 73 | } 74 | 75 | expect(runResult.code).toEqual(0); 76 | const stdout = runResult.stdout.toString('utf8'); 77 | 78 | const match = /parse result: (\d+)/.exec(stdout); 79 | expect(match).not.toBeNull(); 80 | expect(match[1]).toEqual('6'); 81 | }); 82 | }); 83 | } else { 84 | xdescribe('rust plugin mock', () => { 85 | it('noop', () => { 86 | console.warn( 87 | `make and rust toolchain version ${rustVersionRequired.join( 88 | '.' 89 | )} or greater are not installed.`, 90 | `Tests for rust plugin will be skipped.` 91 | ); 92 | }); 93 | }); 94 | } 95 | -------------------------------------------------------------------------------- /examples/cnf.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Conjunctive normal form parser. 3 | * 4 | * https://en.wikipedia.org/wiki/Conjunctive_normal_form 5 | * 6 | * ./bin/syntax -g examples/cnf.g -m slr1 -p '(A v B v ¬ C) ^ (D v E)' 7 | * 8 | 9 | * Parsing mode: SLR(1). 10 | * 11 | * Parsing: (A v B v ¬ C) ^ (D v E) 12 | * 13 | * ✓ Accepted 14 | * 15 | * Parsed value: 16 | * 17 | * { 18 | * "type": "Conjunction", 19 | * "disjunctions": [ 20 | * { 21 | * "type": "Disjunction", 22 | * "variables": [ 23 | * { 24 | * "type": "Variable", 25 | * "value": "A" 26 | * }, 27 | * { 28 | * "type": "Variable", 29 | * "value": "B" 30 | * }, 31 | * { 32 | * "type": "Negation", 33 | * "variable": { 34 | * "type": "Variable", 35 | * "value": "C" 36 | * } 37 | * } 38 | * ] 39 | * }, 40 | * { 41 | * "type": "Disjunction", 42 | * "variables": [ 43 | * { 44 | * "type": "Variable", 45 | * "value": "D" 46 | * }, 47 | * { 48 | * "type": "Variable", 49 | * "value": "E" 50 | * } 51 | * ] 52 | * } 53 | * ] 54 | * } 55 | * 56 | * To generate a parser: 57 | * 58 | * ./bin/syntax -g examples/cnf.g -m slr1 -o cnf-parser.js 59 | * 60 | * In Node: 61 | * 62 | * require('cnf-parser.js').parse('(A v B v ¬ C) ^ (D v E)'); 63 | * 64 | */ 65 | 66 | { 67 | "lex": { 68 | "rules": [ 69 | ["\\s+", "/* skip whitespace */"], 70 | ["v", "return 'OR';"], 71 | ["\\^", "return 'AND';"], 72 | ["¬", "return 'NOT';"], 73 | ["[a-zA-Z]+", "return 'ID';"], 74 | ["\\(", "return 'LPAREN';"], 75 | ["\\)", "return 'RPAREN';"], 76 | ] 77 | }, 78 | 79 | "bnf": { 80 | "Conjunction": [["Conjunction AND Disjunction", "$$ = {type: 'Conjunction', disjunctions: [].concat($1, $3)};"], 81 | ["Disjunction", "$$ = $1;"]], 82 | 83 | "Disjunction": [["LPAREN Clauses RPAREN", "$$ = {type: 'Disjunction', variables: $2};"]], 84 | 85 | "Clauses": [["Clauses OR Var", "$$ = [].concat($1, $3);"], 86 | ["Var", "$$ = [$1];"]], 87 | 88 | "Var": [["ID", "$$ = {type: 'Variable', value: $1};"], 89 | ["NOT Var", "$$ = {type: 'Negation', variable: $2};"]] 90 | } 91 | } -------------------------------------------------------------------------------- /src/ll/ll-parser-generator-default.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import BaseParserGenerator from '../base-parser-generator'; 7 | import LLParsingTable from './ll-parsing-table'; 8 | 9 | import fs from 'fs'; 10 | 11 | /** 12 | * Template for LL(1) parser. 13 | */ 14 | const LL_PARSER_TEMPLATE = fs.readFileSync( 15 | `${__dirname}/../templates/ll.template.js`, 16 | 'utf-8' 17 | ); 18 | 19 | /** 20 | * LL parser generator. Creates a parser module for a given grammar, and 21 | * saves it to the `outputFile`. 22 | */ 23 | export default class LLParserGeneratorDefault extends BaseParserGenerator { 24 | /** 25 | * Instance constructor. 26 | */ 27 | constructor({grammar, outputFile, options = {}}) { 28 | if (!grammar.getMode().isLL()) { 29 | throw new Error(`LL parser generator: LL(1) grammar is expected.`); 30 | } 31 | super({grammar, outputFile, options}) 32 | .setTable(new LLParsingTable({grammar})) 33 | .setTemplate(LL_PARSER_TEMPLATE); 34 | } 35 | 36 | /** 37 | * Generates parser data. 38 | */ 39 | generateParserData() { 40 | super.generateParserData(); 41 | this._generateStartSymbol(); 42 | } 43 | 44 | /** 45 | * Format of the production is: 46 | * [RHS.reverse().map(index)] 47 | * The RHS is reversed to push onto the stack at derivation. 48 | * LL parser doesn't implement yet semantic action. 49 | */ 50 | generateRawProductionsData() { 51 | let productionsData = this.getGrammar() 52 | .getProductions() 53 | .map(production => { 54 | // RHS for derivation. 55 | let reversedRHS = []; 56 | if (!production.isEpsilon()) { 57 | reversedRHS = production 58 | .getRHS() 59 | .map(symbol => { 60 | return this.getEncodedSymbol(symbol.getSymbol()).toString(); 61 | }) 62 | .reverse(); 63 | } 64 | return [reversedRHS]; 65 | }); 66 | 67 | // For 1-based index production. 68 | productionsData.unshift([-1]); 69 | return productionsData; 70 | } 71 | 72 | generateProductionsData() { 73 | return this.generateRawProductionsData().map(data => JSON.stringify(data)); 74 | } 75 | 76 | /** 77 | * Actual parsing table. 78 | */ 79 | generateParseTableData() { 80 | let originalTable = this._table.get(); 81 | let table = {}; 82 | 83 | for (let nonTerminal in originalTable) { 84 | let row = {}; 85 | let originalRow = originalTable[nonTerminal]; 86 | 87 | for (let symbol in originalRow) { 88 | let entry = originalRow[symbol]; 89 | row[this.getEncodedSymbol(symbol)] = entry; 90 | } 91 | 92 | table[this.getEncodedNonTerminal(nonTerminal)] = row; 93 | } 94 | 95 | return table; 96 | } 97 | 98 | _generateStartSymbol() { 99 | let startSymbol = this.getEncodedNonTerminal( 100 | this.getGrammar().getStartSymbol() 101 | ); 102 | this.writeData('START', `'${startSymbol}'`); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /examples/module-include.py.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. Python version. 3 | * 4 | * The "moduleInclude" directive allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | * 9 | * The code may define callbacks for several parse events, in particular 10 | * `on_parse_begin`, and `on_parse_end`: 11 | * 12 | * def on_parse_begin(string): 13 | * print('Parsing:', string) 14 | * 15 | * ./bin/syntax -g ./examples/module-include.py.g -m slr1 -o './parser.py' 16 | * 17 | * >>> import parser 18 | * >>> parser.parse('2 + 2 * 2') 19 | * 20 | * ('Custom hook on parse begin. Parsing:', '2 + 2 * 2') 21 | * ('Custom hook on parse end. Parsed:', ) 22 | * 23 | */ 24 | 25 | { 26 | "lex": { 27 | "rules": [ 28 | ["\\s+", "# skip whitespace"], 29 | ["\\d+", "return 'NUMBER'"], 30 | ["\\*", "return '*'"], 31 | ["\\+", "return '+'"], 32 | ["\\(", "return '('"], 33 | ["\\)", "return ')'"], 34 | ] 35 | }, 36 | 37 | "moduleInclude": ` 38 | # Can be "require" statments, or direct declarations. 39 | 40 | class Node(object): 41 | def __init__(self, type): 42 | self.type = type 43 | 44 | class BinaryExpression(Node): 45 | def __init__(self, left, right, op): 46 | super(BinaryExpression, self).__init__('Binary') 47 | self.left = left 48 | self.right = right 49 | self.op = op 50 | 51 | class PrimaryExpression(Node): 52 | def __init__(self, value): 53 | super(PrimaryExpression, self).__init__('Primary') 54 | self.value = int(value) 55 | 56 | # Standard hook on parse beging, and end: 57 | 58 | _string = None 59 | 60 | def on_parse_begin(string): 61 | global _string 62 | _string = string 63 | print('Custom hook on parse begin. Parsing:', string) 64 | 65 | def on_parse_end(value): 66 | print('Custom hook on parse end. Parsed:', value) 67 | 68 | if _string != '2 + 2 * 2': 69 | return 70 | 71 | assert isinstance(value, BinaryExpression) 72 | assert value.op == '+' 73 | 74 | assert isinstance(value.left, PrimaryExpression) 75 | assert value.left.value == 2 76 | assert isinstance(value.right, BinaryExpression) 77 | 78 | assert value.right.op == '*' 79 | assert isinstance(value.right.left, PrimaryExpression) 80 | assert isinstance(value.right.right, PrimaryExpression) 81 | assert value.right.left.value == 2 82 | assert value.right.right.value == 2 83 | 84 | print('All assertions are passed!') 85 | `, 86 | 87 | "operators": [ 88 | ["left", "+"], 89 | ["left", "*"], 90 | ], 91 | 92 | "bnf": { 93 | "E": [ 94 | ["E + E", "$$ = BinaryExpression($1, $3, $2)"], 95 | ["E * E", "$$ = BinaryExpression($1, $3, $2)"], 96 | ["NUMBER", "$$ = PrimaryExpression($1)"], 97 | ["( E )", "$$ = $2"], 98 | ], 99 | }, 100 | } -------------------------------------------------------------------------------- /scripts/build.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | require('shelljs/global'); 7 | 8 | const colors = require('colors'); 9 | 10 | console.info(colors.bold('Building...\n')); 11 | 12 | // Need to checkout to working copies of the generated parser if 13 | // they got corrupted with current changes. The parsers are regenerated 14 | // further in the build steps. 15 | exec(`git checkout "src/generated/lex-parser.gen.js"`); 16 | exec(`git checkout "src/generated/bnf-parser.gen.js"`); 17 | 18 | // ---------------------------------------------------------- 19 | // 1. Git hooks. 20 | 21 | console.info(colors.bold('[1/6] Installing Git hooks...\n')); 22 | 23 | // Setup pre-commit hook. 24 | console.info(' - pre-commit: .git/hooks/pre-commit'); 25 | exec('unlink .git/hooks/pre-commit'); 26 | chmod('+x', './scripts/git-pre-commit'); 27 | ln('-s', '../../scripts/git-pre-commit', '.git/hooks/pre-commit'); 28 | 29 | // Setup pre-push hook. 30 | console.info(' - pre-push: .git/hooks/pre-push\n'); 31 | exec('unlink .git/hooks/pre-push'); 32 | chmod('+x', './scripts/git-pre-push'); 33 | ln('-s', '../../scripts/git-pre-push', '.git/hooks/pre-push'); 34 | 35 | // ---------------------------------------------------------- 36 | // 2. Templates 37 | 38 | console.info(colors.bold('[2/6] Installing templates...\n')); 39 | rm('-rf', 'dist'); 40 | mkdir('dist'); 41 | mkdir('dist/templates'); 42 | 43 | const templates = ls('src/templates').map(template => ' - ' + template); 44 | console.info(templates.join('\n')); 45 | 46 | cp('-r', 'src/templates/*', 'dist/templates/'); 47 | 48 | // ---------------------------------------------------------- 49 | // 3. Plugins 50 | 51 | console.info(colors.bold('\n[3/6] Installing plugins...\n')); 52 | const plugins = ls('src/plugins/').filter(file => file !== 'README.md'); 53 | 54 | plugins.forEach(plugin => { 55 | console.info(' - ' + plugin); 56 | mkdir('-p', `dist/plugins/${plugin}/templates`); 57 | cp( 58 | '-r', 59 | `src/plugins/${plugin}/templates/*`, 60 | `dist/plugins/${plugin}/templates/` 61 | ); 62 | }); 63 | 64 | // ---------------------------------------------------------- 65 | // 4. Transpiling JS code 66 | 67 | console.info(colors.bold('\n[4/6] Transpiling JS code...\n')); 68 | exec( 69 | `"node_modules/.bin/babel" ${process.argv[2] || 70 | ''} src/ --out-dir dist/ --ignore "**/templates/*" --ignore "**/__tests__/*"` 71 | ); 72 | 73 | // ---------------------------------------------------------- 74 | // 5. Rebuilding LEX parser 75 | 76 | console.info(colors.bold('\n[5/6] Rebuilding LEX parser...')); 77 | exec( 78 | `node "./bin/syntax" -g src/generated/lex.bnf -l src/generated/lex.lex -m lalr1 -o src/generated/lex-parser.gen.js` 79 | ); 80 | exec( 81 | `"node_modules/.bin/babel" src/generated/lex-parser.gen.js -o dist/generated/lex-parser.gen.js` 82 | ); 83 | 84 | // ---------------------------------------------------------- 85 | // 6. Rebuilding BNF parser 86 | 87 | console.info(colors.bold('\n[6/6] Rebuilding BNF parser...')); 88 | exec( 89 | `node "./bin/syntax" -g src/generated/bnf.g -m lalr1 -o src/generated/bnf-parser.gen.js` 90 | ); 91 | exec( 92 | `"node_modules/.bin/babel" src/generated/bnf-parser.gen.js -o dist/generated/bnf-parser.gen.js` 93 | ); 94 | 95 | console.info(colors.bold('All done.\n')); 96 | -------------------------------------------------------------------------------- /examples/calc-ast-java.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated parser in Java. 3 | * 4 | * ./bin/syntax -g examples/calc.java.g -m lalr1 -o CalcParser.rs 5 | * 6 | * import com.syntax.*; 7 | * 8 | * CalcParser parser = new CalcParser(); 9 | * 10 | * System.out.println(parser.parse("2 + 2 * 2"); 11 | * System.out.println(parser.parse("(2 + 2) * 2"); 12 | */ 13 | 14 | // ----------------------------------------------------------------------------- 15 | // Lexical grammar 16 | 17 | /** 18 | * RegExp-based lexical grammar. Simple symbols like '*', '(', etc, can be 19 | * defined inline. 20 | */ 21 | 22 | %lex 23 | 24 | %% 25 | 26 | \s+ /* skip whitespace */ return null 27 | \d+ return "NUMBER" 28 | 29 | /lex 30 | 31 | // ----------------------------------------------------------------------------- 32 | // Operator precedence 33 | 34 | /** 35 | * Both, '+' and '*' are left-associative. I.e. 5 + 3 + 2 is parsed as 36 | * (5 + 3) + 2, and not as 5 + (3 + 2). 37 | * 38 | * The '*' goes after '+' in the list below, so it has higher precedence, 39 | * and 2 + 2 * 2 is parsed as correctly as 2 + (2 * 2). 40 | */ 41 | 42 | %left '+' 43 | %left '*' 44 | 45 | 46 | // ----------------------------------------------------------------------------- 47 | // Module include 48 | 49 | /** 50 | * The code in the module include section is included "as is". 51 | * If can contain the `ParserEvents` class, which defines parse even handlers. 52 | */ 53 | 54 | %{ 55 | 56 | /** 57 | * The ParserEvents class allows subscribing to 58 | * different parsing events. 59 | */ 60 | class ParserEvents { 61 | public static void init() { 62 | System.out.println("Parser is created."); 63 | } 64 | 65 | public static void onParseBegin(String str) { 66 | System.out.println("Parsing is started: " + str); 67 | } 68 | 69 | public static void onParseEnd(Object result) { 70 | System.out.println("Parsing is completed: " + result); 71 | } 72 | } 73 | 74 | // Define the class nodes inline here, however on practice they can be 75 | // located anywhere, and just imported here. 76 | 77 | class Node { 78 | public String type; 79 | } 80 | 81 | class BinaryNode extends Node { 82 | public Node left; 83 | public Node right; 84 | public String operator; 85 | 86 | public BinaryNode(Object left, Object op, Object right) { 87 | this.type = "BinaryNode"; 88 | this.left = (Node)left; 89 | this.right = (Node)right; 90 | this.operator = (String)op; 91 | } 92 | } 93 | 94 | class LiteralNode extends Node { 95 | public Integer value; 96 | 97 | public LiteralNode(Integer value) { 98 | this.type = "LiteralNode"; 99 | this.value = value; 100 | } 101 | } 102 | 103 | 104 | %} 105 | 106 | // ----------------------------------------------------------------------------- 107 | // Syntactic grammar (BNF) 108 | 109 | %% 110 | 111 | Expression 112 | 113 | : Expression '+' Expression 114 | { 115 | $$ = new BinaryNode($1, $2, $3) 116 | } 117 | 118 | | Expression '*' Expression 119 | { 120 | $$ = new BinaryNode($1, $2, $3) 121 | } 122 | 123 | | NUMBER 124 | { 125 | $$ = new LiteralNode(Integer.valueOf(yytext)) 126 | } 127 | 128 | | '(' Expression ')' 129 | { 130 | $$ = $2 131 | } 132 | ; 133 | -------------------------------------------------------------------------------- /examples/lexer-start-conditions.py.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Start conditions of lex rules. Tokenizer states. Python version. 3 | * 4 | * Tokenizer rules may provide start conditions. Such rules are executed 5 | * only when lexer enters the state corresponding to the names of the 6 | * start conditions. 7 | * 8 | * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1). 9 | * Inclusive conditions also include rules without any start conditions. 10 | * Exclusive conditions do not include other rules when the parser enter 11 | * this state. The rules with `*` condition are always included. 12 | * 13 | * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a 14 | * 15 | * When a grammar is defined in the JSON format, the start conditions are 16 | * specified as: 17 | * 18 | * "startConditions": { 19 | * "name": 1, // exclusive 20 | * "other": 0, // inclusive 21 | * } 22 | * 23 | * And a rule itself may specify a list of start conditions as the 24 | * first element: 25 | * 26 | * // This lex-rule is applied only when parser enters `name` state. 27 | * 28 | * [["name"], "\w+", "return 'NAME'"] 29 | * 30 | * At the beginning a lexer is in the `INITIAL` state. A new state is 31 | * entered either using `this.pushState(name)` or `this.begin(name)`. To 32 | * exit a state, use `this.popState()`. 33 | * 34 | * In the grammar below we has `comment` tokenizer state, which allows us 35 | * to skip all the comment characters, but still to count number of lines. 36 | * 37 | * ./bin/syntax -g examples/lexer-start-conditions.py.g -m slr1 -f ~/test.txt 38 | */ 39 | 40 | // Example of ~/test.txt 41 | // 42 | // 1. 43 | // 2. /* Hello world 44 | // 3. privet 45 | // 4. 46 | // 5. OK **/ 47 | // 6. 48 | // 7. Main 49 | // 8. 50 | // 51 | // Number of lines: 8 52 | 53 | { 54 | "moduleInclude": ` 55 | lines = 1 56 | 57 | def on_parse_end(_result): 58 | print('Number of lines: ' + str(lines)) 59 | 60 | `, 61 | 62 | "lex": { 63 | "startConditions": { 64 | "comment": 1, // exclusive 65 | }, 66 | 67 | "rules": [ 68 | 69 | // On `/*` we enter the comment state: 70 | 71 | ["\\/\\*", "self.push_state('comment') # skip comments"], 72 | 73 | // On `*/` being in `comment` state we return to the initial state: 74 | 75 | [["comment"], "\\*+\\/", "self.pop_state() # skip comments"], 76 | 77 | // Being inside the `comment` state, skip all chars, except new lines 78 | // which we count. 79 | 80 | [["comment"], "[^*\\n]+", "# skip comments"], 81 | [["comment"], "\\*+[^*/\\n]*", "# skip comments"], 82 | 83 | // Count lines in comments. 84 | [["comment"], "\\n", ` 85 | global lines 86 | lines += 1 # skip new lines in comments` 87 | ], 88 | 89 | // In INITIAL state, count line numbers as well: 90 | ["\\n", ` 91 | global lines 92 | lines += 1 # skip new lines 93 | `], 94 | 95 | [["*"], " +", "# skip spaces in any state "], 96 | 97 | // Main program consisting only of one word "Main" 98 | ["Main", "return 'MAIN'"], 99 | ], 100 | }, 101 | 102 | "bnf": { 103 | "Program": ["MAIN"], 104 | } 105 | } -------------------------------------------------------------------------------- /src/plugins/ruby/templates/ll.template.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # LL(1) parser generated by the Syntax tool. 3 | # 4 | # https://www.npmjs.com/package/syntax-cli 5 | # 6 | # npm install -g syntax-cli 7 | # 8 | # syntax-cli --help 9 | # 10 | # To regenerate run: 11 | # 12 | # syntax-cli \ 13 | # --grammar ~/path-to-grammar-file \ 14 | # --mode LL1 \ 15 | # --output ~/ParserClassName.rb 16 | ## 17 | 18 | class YYParse 19 | @@ps = {{{PRODUCTIONS}}} 20 | @@tks = {{{TOKENS}}} 21 | @@tbl = {{{TABLE}}} 22 | 23 | @@s = [] 24 | @@__ = nil 25 | 26 | @@callbacks = { 27 | :on_parse_begin => nil, 28 | :on_parse_end => nil 29 | } 30 | 31 | EOF = '$' 32 | 33 | @@yytext = '' 34 | @@yyleng = 0 35 | 36 | def self.__=(__) 37 | @@__ = __ 38 | end 39 | 40 | def self.yytext=(yytext) 41 | @@yytext = yytext 42 | end 43 | 44 | def self.yytext 45 | @@yytext 46 | end 47 | 48 | def self.yyleng=(yyleng) 49 | @@yyleng = yyleng 50 | end 51 | 52 | def self.yyleng 53 | @@yyleng 54 | end 55 | 56 | @@tokenizer = nil 57 | 58 | {{{PRODUCTION_HANDLERS}}} 59 | 60 | def self.tokenizer=(tokenizer) 61 | @@tokenizer = tokenizer 62 | end 63 | 64 | def self.tokenizer 65 | @@tokenizer 66 | end 67 | 68 | def self.on_parse_begin(&callback) 69 | @@callbacks[:on_parse_begin] = callback 70 | end 71 | 72 | def self.on_parse_end(&callback) 73 | @@callbacks[:on_parse_end] = callback 74 | end 75 | 76 | def self.parse(string) 77 | if (@@callbacks[:on_parse_begin]) 78 | @@callbacks[:on_parse_begin].call(string) 79 | end 80 | 81 | tokenizer = self.tokenizer 82 | 83 | if not tokenizer 84 | raise "Tokenizer instance wasn't specified." 85 | end 86 | 87 | tokenizer.init_string(string) 88 | 89 | @@s = [self::EOF, {{{START}}}] 90 | 91 | t = tokenizer.get_next_token 92 | st = nil 93 | 94 | to = nil 95 | tt = nil 96 | 97 | loop do 98 | to = @@s.pop 99 | tt = @@tks[t[:type]] 100 | 101 | if to == tt 102 | t = tokenizer.get_next_token 103 | next 104 | end 105 | 106 | self.der(to, t, tt) 107 | 108 | if not tokenizer.has_more_tokens and @@s.length <= 1 109 | break 110 | end 111 | end 112 | 113 | while @@s.length != 1 114 | self.der(@@s.length, t, tt) 115 | end 116 | 117 | if @@s[0] != self::EOF || t[:type] != self::EOF 118 | self.parse_error('stack is not empty'); 119 | end 120 | 121 | return true; 122 | end 123 | 124 | def self.der(to, t, tt) 125 | npn = @@tbl[to.to_s][tt.to_s] 126 | if not npn 127 | self.unexpected_token(t) 128 | end 129 | @@s.push(*@@ps[npn.to_i][0]) 130 | end 131 | 132 | def self.unexpected_token(token) 133 | if token[:type] == self::EOF 134 | self.unexpected_end_of_input() 135 | end 136 | 137 | self.tokenizer.throw_unexpected_token( 138 | token[:value], 139 | token[:start_line], 140 | token[:start_column] 141 | ) 142 | end 143 | 144 | def self.unexpected_end_of_input 145 | self.parse_error('Unexpected end of input.') 146 | end 147 | 148 | def self.parse_error(message) 149 | raise 'Parse error: ' + message 150 | end 151 | end 152 | 153 | {{{MODULE_INCLUDE}}} 154 | 155 | {{{TOKENIZER}}} 156 | 157 | class {{{PARSER_CLASS_NAME}}} < YYParse; end 158 | -------------------------------------------------------------------------------- /src/grammar/__tests__/lex-grammar-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import LexGrammar from '../lex-grammar'; 7 | import {EOF} from '../../special-symbols'; 8 | 9 | const lexGrammarData = require(__dirname + '/calc.lex'); 10 | const lexGrammar = new LexGrammar(lexGrammarData); 11 | 12 | const rulesToIndices = rules => { 13 | return rules.map(rule => lexGrammar.getRuleIndex(rule)); 14 | }; 15 | 16 | const startConditions = { 17 | INITIAL: 0, 18 | comment: 1, 19 | }; 20 | 21 | const lexRulesByStartConditions = { 22 | INITIAL: [0, 1, 2, 3, 4, 5, 6, 7, 8], 23 | comment: [0, 1, 9, 10, 11], 24 | }; 25 | 26 | describe('lex-grammar', () => { 27 | 28 | it('rules', () => { 29 | const rulesData = lexGrammar.getRules().map(rule => rule.toData()); 30 | expect(rulesData).toEqual(lexGrammarData.rules); 31 | expect(rulesData).toEqual(lexGrammar.getOriginalRules()); 32 | }); 33 | 34 | it('rule by index', () => { 35 | const firstRule = lexGrammar.getRuleByIndex(0); 36 | expect(firstRule).toBe(lexGrammar.getRules()[0]); 37 | }); 38 | 39 | it('index of a rule', () => { 40 | const firstRule = lexGrammar.getRuleByIndex(0); 41 | expect(lexGrammar.getRuleIndex(firstRule)).toBe(0); 42 | }); 43 | 44 | it('start conditions', () => { 45 | expect(lexGrammar.getStartConditions()).toEqual(startConditions); 46 | }); 47 | 48 | it('macros', () => { 49 | expect(lexGrammar.getMacros()).toEqual(lexGrammarData.macros); 50 | }); 51 | 52 | it('expanded macro', () => { 53 | const rule3 = lexGrammar.getRuleByIndex(3); 54 | const id = lexGrammarData.macros.id; 55 | 56 | expect(rule3.getMatcher().source).toEqual(`^${id}+`); 57 | expect(rule3.getOriginalMatcher()).toEqual(`${id}+`); 58 | expect(rule3.getRawMatcher()).toEqual(`^${id}+`); 59 | 60 | // Macro in a rule with a start condition. 61 | const rule11 = lexGrammar.getRuleByIndex(11); 62 | expect(rule11.getOriginalMatcher()).toEqual(`${id}+`); 63 | 64 | // Standard macro. 65 | const rule1 = lexGrammar.getRuleByIndex(1); 66 | // <> -> $ 67 | expect(rule1.getOriginalMatcher()).toEqual(EOF); 68 | }); 69 | 70 | it('rules by start conditions', () => { 71 | const rulesByStartConditions = lexGrammar.getRulesByStartConditions(); 72 | const rulesByConditionsData = {}; 73 | 74 | Object.keys(rulesByStartConditions).forEach(startCondition => { 75 | const rules = rulesByStartConditions[startCondition]; 76 | rulesByConditionsData[startCondition] = rulesToIndices(rules); 77 | }); 78 | 79 | expect(rulesByConditionsData).toEqual(lexRulesByStartConditions); 80 | }); 81 | 82 | it('rules for start conditions', () => { 83 | const rulesByStartConditions = lexGrammar.getRulesByStartConditions(); 84 | 85 | Object.keys(rulesByStartConditions).forEach(startCondition => { 86 | const expectedLexRules = lexRulesByStartConditions[startCondition]; 87 | 88 | const rules = rulesToIndices(lexGrammar.getRulesForState(startCondition)); 89 | expect(rules).toEqual(expectedLexRules); 90 | }); 91 | }); 92 | 93 | it('options', () => { 94 | const options = lexGrammarData.options; 95 | 96 | expect(lexGrammar.getOptions()).toEqual(options); 97 | expect(lexGrammar.getRuleByIndex(0).getOptions()).toEqual(options); 98 | }); 99 | 100 | }); -------------------------------------------------------------------------------- /examples/module-include.cs.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. C# version. 3 | * 4 | * The "moduleInclude" directive allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | * 9 | * The code may define callbacks for several parse events, in particular 10 | * `onParseBegin`, and `onParseEnd`, attaching to `yyparse`: 11 | * 12 | * yyparse.onParseBegin = (string code) => 13 | * { 14 | * Console.WriteLine("Parsing: " + code); 15 | * }; 16 | * 17 | * ./bin/syntax -g ./examples/module-include.cs.g -m slr1 -o './CalcParser.cs' 18 | * 19 | * using SyntaxParser; 20 | * 21 | * var parser = new CalcParser(); 22 | * 23 | * Console.WriteLine(parser.parse("2 + 2 * 2")); 24 | * 25 | * > Custom hook on parse begin. Parsing: 2 + 2 * 2 26 | * > Custom hook on parse end. Parsed: SyntaxParser.BinaryExpression 27 | * > SyntaxParser.BinaryExpression 28 | */ 29 | 30 | { 31 | "lex": { 32 | "rules": [ 33 | ["\\s+", '/* skip whitespace */ return null'], 34 | ["\\d+", 'return "NUMBER"'], 35 | ["\\*", 'return "*"'], 36 | ["\\+", 'return "+"'], 37 | ["\\(", 'return "("'], 38 | ["\\)", 'return ")"'], 39 | ] 40 | }, 41 | 42 | "moduleInclude": ` 43 | // Can be "using" statments, or direct declarations. 44 | 45 | namespace SyntaxParser 46 | { 47 | 48 | public class Node 49 | { 50 | public string Type; 51 | 52 | public Node(string type) 53 | { 54 | Type = type; 55 | } 56 | } 57 | 58 | public class BinaryExpression : Node 59 | { 60 | public object Left; 61 | public object Right; 62 | public string Op; 63 | 64 | public BinaryExpression(object left, object right, string op): base("Binary") 65 | { 66 | Left = left; 67 | Right = right; 68 | Op = op; 69 | } 70 | } 71 | 72 | public class PrimaryExpression : Node 73 | { 74 | public int Value; 75 | 76 | public PrimaryExpression(string value) : base("Primary") 77 | { 78 | Value = Convert.ToInt32(value); 79 | } 80 | } 81 | 82 | // Setup of the parser hooks is done via Init.run(); 83 | public class Init 84 | { 85 | public static void run() 86 | { 87 | // Standard hook on parse beging, and end: 88 | 89 | yyparse.onParseBegin = (string code) => 90 | { 91 | Console.WriteLine("Custom hook on parse begin. Parsing: " + code); 92 | }; 93 | 94 | yyparse.onParseEnd = (object parsed) => 95 | { 96 | Console.WriteLine("Custom hook on parse end. Parsed: " + parsed); 97 | }; 98 | } 99 | } 100 | } 101 | `, 102 | 103 | "operators": [ 104 | ["left", "+"], 105 | ["left", "*"], 106 | ], 107 | 108 | "bnf": { 109 | "E": [ 110 | ["E + E", "$$ = new BinaryExpression($1, $3, $2)"], 111 | ["E * E", "$$ = new BinaryExpression($1, $3, $2)"], 112 | ["NUMBER", "$$ = new PrimaryExpression($1)"], 113 | ["( E )", "$$ = $2"], 114 | ], 115 | }, 116 | } -------------------------------------------------------------------------------- /examples/lexer-start-conditions.rb.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Start conditions of lex rules. Tokenizer states. Ruby version. 3 | * 4 | * Tokenizer rules may provide start conditions. Such rules are executed 5 | * only when lexer enters the state corresponding to the names of the 6 | * start conditions. 7 | * 8 | * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1). 9 | * Inclusive conditions also include rules without any start conditions. 10 | * Exclusive conditions do not include other rules when the parser enter 11 | * this state. The rules with `*` condition are always included. 12 | * 13 | * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a 14 | * 15 | * When a grammar is defined in the JSON format, the start conditions are 16 | * specified as: 17 | * 18 | * "startConditions": { 19 | * "name": 1, // exclusive 20 | * "other": 0, // inclusive 21 | * } 22 | * 23 | * And a rule itself may specify a list of start conditions as the 24 | * first element: 25 | * 26 | * // This lex-rule is applied only when parser enters `name` state. 27 | * 28 | * [["name"], "\w+", "return 'NAME'"] 29 | * 30 | * At the beginning a lexer is in the `INITIAL` state. A new state is 31 | * entered either using `this.pushState(name)` or `this.begin(name)`. To 32 | * exit a state, use `this.popState()`. 33 | * 34 | * In the grammar below we has `comment` tokenizer state, which allows us 35 | * to skip all the comment characters, but still to count number of lines. 36 | * 37 | * ./bin/syntax -g examples/lexer-start-conditions.py.g -m slr1 -f ~/test.txt 38 | */ 39 | 40 | // Example of ~/test.txt 41 | // 42 | // 1. 43 | // 2. /* Hello world 44 | // 3. privet 45 | // 4. 46 | // 5. OK **/ 47 | // 6. 48 | // 7. Main 49 | // 8. 50 | // 51 | // Number of lines: 8 52 | 53 | { 54 | "moduleInclude": ` 55 | $lines = 1 56 | 57 | YYParse.on_parse_end {|_value| 58 | puts 'Number of lines: ' + $lines.to_s 59 | } 60 | 61 | `, 62 | 63 | "lex": { 64 | "startConditions": { 65 | "comment": 1, // exclusive 66 | }, 67 | 68 | "rules": [ 69 | 70 | // On `/*` we enter the comment state: 71 | 72 | ["\\/\\*", ` 73 | push_state('comment') 74 | return nil # skip comments 75 | `], 76 | 77 | // On `*/` being in `comment` state we return to the initial state: 78 | 79 | [["comment"], "\\*+\\/", ` 80 | pop_state() 81 | return nil # skip comments` 82 | ], 83 | 84 | // Being inside the `comment` state, skip all chars, except new lines 85 | // which we count. 86 | 87 | [["comment"], "[^*\\n]+", "# skip comments"], 88 | [["comment"], "\\*+[^*\\/\\n]*", "# skip comments"], 89 | 90 | // Count lines in comments. 91 | [["comment"], "\\n", ` 92 | $lines += 1 93 | return nil # skip new lines in comments 94 | `], 95 | 96 | // In INITIAL state, count line numbers as well: 97 | ["\\n", ` 98 | $lines += 1 99 | return nil # skip new lines`], 100 | 101 | [["*"], " +", "# skip spaces in any state "], 102 | 103 | // Main program consisting only of one word "Main" 104 | ["Main", "return 'MAIN'"], 105 | ], 106 | }, 107 | 108 | "bnf": { 109 | "Program": ["MAIN"], 110 | } 111 | } -------------------------------------------------------------------------------- /examples/module-include.php.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Module includes. PHP version. 3 | * 4 | * The "moduleInclude" directive allows including an arbitrary code at the 5 | * beginning of the generated parser file. As an example, can be the code 6 | * to require modules for corresponding AST nodes, or direct AST nodes 7 | * definitions. 8 | * 9 | * The code may define callbacks for several parse events, in particular 10 | * `onParseBegin`, and `onParseEnd`, attaching to `yyparse`: 11 | * 12 | * yyparse::setOnParseBegin(function($string) { 13 | * var_dump('Parsing:', $string); 14 | * }); 15 | * 16 | * ./bin/syntax -g ./examples/module-include.php.g -m slr1 -o './Parser.php' 17 | * 18 | * 30 | * string(6) "Binary" 31 | * ["left"]=> 32 | * object(PrimaryExpression)#4 (2) { 33 | * ["type"]=> 34 | * string(7) "Primary" 35 | * ["value"]=> 36 | * int(2) 37 | * } 38 | * ["right"]=> 39 | * object(BinaryExpression)#7 (4) { 40 | * ["type"]=> 41 | * string(6) "Binary" 42 | * ["left"]=> 43 | * object(PrimaryExpression)#5 (2) { 44 | * ["type"]=> 45 | * string(7) "Primary" 46 | * ["value"]=> 47 | * int(2) 48 | * } 49 | * ["right"]=> 50 | * object(PrimaryExpression)#6 (2) { 51 | * ["type"]=> 52 | * string(7) "Primary" 53 | * ["value"]=> 54 | * int(2) 55 | * } 56 | * ["op"]=> 57 | * string(1) "*" 58 | * } 59 | * ["op"]=> 60 | * string(1) "+" 61 | * } 62 | */ 63 | 64 | { 65 | "lex": { 66 | "rules": [ 67 | ["\\s+", "/* skip whitespace */"], 68 | ["\\d+", "return 'NUMBER'"], 69 | ["\\*", "return '*'"], 70 | ["\\+", "return '+'"], 71 | ["\\(", "return '('"], 72 | ["\\)", "return ')'"], 73 | ] 74 | }, 75 | 76 | "moduleInclude": ` 77 | // Can be "require" statments, or direct declarations. 78 | 79 | class Node { 80 | public function __construct($type) { 81 | $this->type = $type; 82 | } 83 | } 84 | 85 | class BinaryExpression extends Node { 86 | public function __construct($left, $right, $op) { 87 | parent::__construct('Binary'); 88 | $this->left = $left; 89 | $this->right = $right; 90 | $this->op = $op; 91 | } 92 | } 93 | 94 | class PrimaryExpression extends Node { 95 | public function __construct($value) { 96 | parent::__construct('Primary'); 97 | $this->value = intval($value); 98 | } 99 | } 100 | 101 | // Standard hook on parse beging, and end: 102 | 103 | yyparse::setOnParseBegin(function($string) { 104 | var_dump('Custom hook on parse begin. Parsing:', $string); 105 | }); 106 | 107 | yyparse::setOnParseEnd(function($value) { 108 | var_dump('Custom hook on parse end. Parsed:', $value); 109 | }); 110 | `, 111 | 112 | "operators": [ 113 | ["left", "+"], 114 | ["left", "*"], 115 | ], 116 | 117 | "bnf": { 118 | "E": [ 119 | ["E + E", "$$ = new BinaryExpression($1, $3, $2)"], 120 | ["E * E", "$$ = new BinaryExpression($1, $3, $2)"], 121 | ["NUMBER", "$$ = new PrimaryExpression($1)"], 122 | ["( E )", "$$ = $2"], 123 | ], 124 | }, 125 | } -------------------------------------------------------------------------------- /examples/lexer-start-conditions.g.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Start conditions of lex rules. Tokenizer states. 3 | * 4 | * Tokenizer rules may provide start conditions. Such rules are executed 5 | * only when lexer enters the state corresponding to the names of the 6 | * start conditions. 7 | * 8 | * Start conditions can be inclusive (%s, 0), and exclusive (%x, 1). 9 | * Inclusive conditions also include rules without any start conditions. 10 | * Exclusive conditions do not include other rules when the parser enter 11 | * this state. The rules with `*` condition are always included. 12 | * 13 | * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a 14 | * 15 | * When a grammar is defined in the JSON format, the start conditions are 16 | * specified as: 17 | * 18 | * "startConditions": { 19 | * "name": 1, // exclusive 20 | * "other": 0, // inclusive 21 | * } 22 | * 23 | * And a rule itself may specify a list of start conditions as the 24 | * first element: 25 | * 26 | * // This lex-rule is applied only when parser enters `name` state. 27 | * 28 | * [["name"], "\w+", "return 'NAME'"] 29 | * 30 | * At the beginning a lexer is in the `INITIAL` state. A new state is 31 | * entered either using `this.pushState(name)` or `this.begin(name)`. To 32 | * exit a state, use `this.popState()`. 33 | * 34 | * In the grammar below we has `comment` tokenizer state, which allows us 35 | * to skip all the comment characters, but still to count number of lines. 36 | * 37 | * ./bin/syntax -g examples/lexer-start-conditions.g.js -m slr1 -f ~/test.js 38 | */ 39 | 40 | // Example of ~/test.js 41 | // 42 | // 1. 43 | // 2. /* Hello world 44 | // 3. privet 45 | // 4. 46 | // 5. OK **/ 47 | // 6. 48 | // 7. Main 49 | // 8. 50 | // 51 | // Number of lines: 8 52 | 53 | { 54 | "moduleInclude": ` 55 | let lines = 1; 56 | 57 | yyparse.onParseBegin = (string) => { 58 | // Print the string with line numbers. 59 | 60 | let code = string 61 | .split('\\n') 62 | .map((s, line) => (line + 1) + '. ' + s) 63 | .join('\\n'); 64 | 65 | console.log(code + '\\n'); 66 | }; 67 | 68 | yyparse.onParseEnd = () => { 69 | console.log('Number of lines: ' + lines + '\\n'); 70 | }; 71 | `, 72 | 73 | "lex": { 74 | "startConditions": { 75 | "comment": 1, // exclusive 76 | }, 77 | 78 | "rules": [ 79 | 80 | // On `/*` we enter the comment state: 81 | 82 | ["\\/\\*", "this.pushState('comment'); /* skip comments */"], 83 | 84 | // On `*/` being in `comment` state we return to the initial state: 85 | 86 | [["comment"], "\\*+\\/", "this.popState(); /* skip comments */"], 87 | 88 | // Being inside the `comment` state, skip all chars, except new lines 89 | // which we count. 90 | 91 | [["comment"], "[^*\\n]+", "/* skip comments */"], 92 | [["comment"], "\\*+[^*/\\n]*", "/* skip comments */"], 93 | 94 | // Count lines in comments. 95 | [["comment"], "\\n", "lines++; /* skip new lines in comments */"], 96 | 97 | // In INITIAL state, count line numbers as well: 98 | ["\\n", "lines++ /* skip new lines */"], 99 | 100 | [["*"], " +", "/* skip spaces in any state */"], 101 | 102 | // Main program consisting only of one word "Main" 103 | ["Main", "return 'MAIN'"], 104 | ], 105 | }, 106 | 107 | "bnf": { 108 | "Program": ["MAIN"], 109 | } 110 | } -------------------------------------------------------------------------------- /src/plugins/php/templates/ll.template.php: -------------------------------------------------------------------------------- 1 | initString($string); 74 | 75 | $s = &self::$s; 76 | $s = [self::EOF, {{{START}}}]; 77 | 78 | $tks = &self::$tks; 79 | $tbl = &self::$tbl; 80 | $ps = &self::$ps; 81 | 82 | $t = $tokenizer->getNextToken(); 83 | $st = null; 84 | 85 | $to = null; 86 | $tt = null; 87 | 88 | do { 89 | $to = array_pop($s); 90 | $tt = $tks[$t['type']]; 91 | 92 | if ($to === $tt) { 93 | $t = $tokenizer->getNextToken(); 94 | continue; 95 | } 96 | 97 | self::der($to, $t, $tt); 98 | } while ($tokenizer->hasMoreTokens() || count($s) > 1); 99 | 100 | while (count($s) !== 1) { 101 | self::der(array_pop($s), $t, $tt); 102 | } 103 | 104 | if ($s[0] !== self::EOF || $t['type'] !== self::EOF) { 105 | self::parseError('stack is not empty'); 106 | } 107 | 108 | return true; 109 | } 110 | 111 | private static function der($to, $t, $tt) { 112 | $npn = self::$tbl[$to][$tt]; 113 | if (!$npn) { 114 | self::unexpectedToken($t); 115 | } 116 | self::$s = array_merge(self::$s, self::$ps[intval($npn)][0]); 117 | } 118 | 119 | private static function unexpectedToken($token) { 120 | if ($token['type'] === self::EOF) { 121 | unexpectedEndOfInput(); 122 | } 123 | 124 | self::getTokenizer()->throwUnexpectedToken( 125 | $token['value'], 126 | $token['startLine'], 127 | $token['startColumn'] 128 | ); 129 | } 130 | 131 | private static function unexpectedEndOfInput() { 132 | self::parseError('Unexpected end of input.'); 133 | } 134 | 135 | private static function parseError($message) { 136 | throw new \Exception('SyntaxError: '.$message); 137 | } 138 | } 139 | 140 | {{{TOKENIZER}}} 141 | 142 | class {{{PARSER_CLASS_NAME}}} extends yyparse {} 143 | -------------------------------------------------------------------------------- /examples/calc-loc.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Captures location info. 3 | * 4 | * In order to capture locations, pass the `--loc` option. 5 | * 6 | * Locations in handlers are accessible via `@` notation, e.g. @1, @2, etc. 7 | * A named accessors are available as well: @foo, @bar. 8 | * 9 | * A location is an object of structure: 10 | * 11 | * { 12 | * startOffset, 13 | * endOffset, 14 | * startLine, 15 | * endLine, 16 | * startColumn, 17 | * endColum, 18 | * } 19 | * 20 | * The resulting location is in the @$. It is calculated automatically from 21 | * first and last symbol on a RHS handle, and it also can be overridden 22 | * manually. 23 | * 24 | * ./bin/syntax -g examples/calc-loc.bnf -m slr1 -p '15 + 10 * 3' -w --loc 25 | * 26 | * Parsed nodes with captures locations: 27 | * 28 | * { 29 | * "type": "BinaryExpression", 30 | * "op": "+", 31 | * "left": { 32 | * "type": "NumericLiteral", 33 | * "value": 15, 34 | * "loc": { 35 | * "startOffset": 0, 36 | * "endOffset": 2, 37 | * "startLine": 1, 38 | * "endLine": 1, 39 | * "startColumn": 0, 40 | * "endColumn": 2 41 | * } 42 | * }, 43 | * "right": { 44 | * "type": "BinaryExpression", 45 | * "op": "*", 46 | * "left": { 47 | * "type": "NumericLiteral", 48 | * "value": 10, 49 | * "loc": { 50 | * "startOffset": 5, 51 | * "endOffset": 7, 52 | * "startLine": 1, 53 | * "endLine": 1, 54 | * "startColumn": 5, 55 | * "endColumn": 7 56 | * } 57 | * }, 58 | * "right": { 59 | * "type": "NumericLiteral", 60 | * "value": 3, 61 | * "loc": { 62 | * "startOffset": 10, 63 | * "endOffset": 11, 64 | * "startLine": 1, 65 | * "endLine": 1, 66 | * "startColumn": 10, 67 | * "endColumn": 11 68 | * } 69 | * }, 70 | * "loc": { 71 | * "startOffset": 5, 72 | * "endOffset": 11, 73 | * "startLine": 1, 74 | * "endLine": 1, 75 | * "startColumn": 5, 76 | * "endColumn": 11 77 | * } 78 | * }, 79 | * "loc": { 80 | * "startOffset": 0, 81 | * "endOffset": 11, 82 | * "startLine": 1, 83 | * "endLine": 1, 84 | * "startColumn": 0, 85 | * "endColumn": 11 86 | * } 87 | * } 88 | */ 89 | 90 | %lex 91 | 92 | %% 93 | 94 | \s+ return '' /* skip whitespace */ 95 | \d+ return 'NUMBER' 96 | 97 | /lex 98 | 99 | %{ 100 | 101 | function BinaryExpression(op, left, right, loc) { 102 | return { 103 | type: 'BinaryExpression', 104 | op, 105 | left, 106 | right, 107 | loc, 108 | }; 109 | } 110 | 111 | function Loc(s, e) { 112 | // Same as default result location. 113 | return { 114 | startOffset: s.startOffset, 115 | endOffset: e.endOffset, 116 | startLine: s.startLine, 117 | endLine: e.endLine, 118 | startColumn: s.startColumn, 119 | endColumn: e.endColumn, 120 | }; 121 | } 122 | 123 | function NumericLiteral(value, loc) { 124 | return { 125 | type: 'NumericLiteral', 126 | value, 127 | loc, 128 | }; 129 | } 130 | 131 | %} 132 | 133 | %left '+' 134 | %left '*' 135 | 136 | %% 137 | 138 | exp 139 | : exp '+' exp 140 | /* Explicitly calculate location */ 141 | { $$ = BinaryExpression('+', $1, $3, Loc(@1, @3)) } 142 | 143 | | exp '*' exp 144 | /* Use default result location: @$ */ 145 | { $$ = BinaryExpression('*', $1, $3, @$) } 146 | 147 | | '(' exp ')' 148 | { $$ = $2 } 149 | 150 | | number 151 | /* Named args and position */ 152 | { $$ = NumericLiteral($number, @number) } 153 | ; 154 | 155 | number 156 | : NUMBER { $$ = Number(yytext) } 157 | ; -------------------------------------------------------------------------------- /src/lr/__tests__/lr-parsing-table-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import Grammar from '../../grammar/grammar'; 7 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode'; 8 | import LRParsingTable from '../lr-parsing-table'; 9 | 10 | describe('lr-parsing-table', () => { 11 | it('lalr1-grammar-1', () => { 12 | const grammarString = ` 13 | %% 14 | 15 | Start 16 | : OptPrefix1 SUFFIX1 17 | | OptPrefix2 SUFFIX2 18 | ; 19 | 20 | OptPrefix1 21 | : PREFIX1 22 | | /* empty */ 23 | ; 24 | 25 | OptPrefix2 26 | : PREFIX2 27 | | /* empty */ 28 | ; 29 | `; 30 | 31 | const expectedTable = { 32 | '0': { 33 | Start: 1, 34 | OptPrefix1: 2, 35 | OptPrefix2: 3, 36 | PREFIX1: 's4', 37 | SUFFIX1: 'r4', 38 | PREFIX2: 's5', 39 | SUFFIX2: 'r6', 40 | }, 41 | '1': {$: 'acc'}, 42 | '2': {SUFFIX1: 's6'}, 43 | '3': {SUFFIX2: 's7'}, 44 | '4': {SUFFIX1: 'r3'}, 45 | '5': {SUFFIX2: 'r5'}, 46 | '6': {$: 'r1'}, 47 | '7': {$: 'r2'}, 48 | }; 49 | 50 | const grammarBySLR = Grammar.fromString(grammarString, { 51 | mode: GRAMMAR_MODE.LALR1_BY_SLR1, 52 | }); 53 | expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual( 54 | expectedTable 55 | ); 56 | 57 | const grammarByCLR = Grammar.fromString(grammarString, { 58 | mode: GRAMMAR_MODE.LALR1_BY_CLR1, 59 | }); 60 | expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual( 61 | expectedTable 62 | ); 63 | }); 64 | 65 | it('lalr1-grammar-2', () => { 66 | const grammarString = ` 67 | %% 68 | 69 | S 70 | : A 'a' A 'b' 71 | | B 'b' B 'a' 72 | ; 73 | 74 | A : /*epsilon*/ ; 75 | B : /*epsilon*/ ; 76 | `; 77 | 78 | const expectedTable = { 79 | '0': {S: 1, A: 2, B: 3, "'a'": 'r3', "'b'": 'r4'}, 80 | '1': {$: 'acc'}, 81 | '2': {"'a'": 's4'}, 82 | '3': {"'b'": 's7'}, 83 | '4': {A: 5, "'b'": 'r3'}, 84 | '5': {"'b'": 's6'}, 85 | '6': {$: 'r1'}, 86 | '7': {B: 8, "'a'": 'r4'}, 87 | '8': {"'a'": 's9'}, 88 | '9': {$: 'r2'}, 89 | }; 90 | 91 | const grammarBySLR = Grammar.fromString(grammarString, { 92 | mode: GRAMMAR_MODE.LALR1_BY_SLR1, 93 | }); 94 | expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual( 95 | expectedTable 96 | ); 97 | 98 | const grammarByCLR = Grammar.fromString(grammarString, { 99 | mode: GRAMMAR_MODE.LALR1_BY_CLR1, 100 | }); 101 | expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual( 102 | expectedTable 103 | ); 104 | }); 105 | 106 | it('lalr1-grammar-3', () => { 107 | const grammarString = ` 108 | %% 109 | 110 | Stmt 111 | : Type ID ';' 112 | | Expr ';' 113 | ; 114 | 115 | Type 116 | : ID 117 | ; 118 | 119 | Expr 120 | : ID 121 | ; 122 | `; 123 | 124 | const expectedTable = { 125 | '0': {Stmt: 1, Type: 2, Expr: 3, ID: 's4'}, 126 | '1': {$: 'acc'}, 127 | '2': {ID: 's5'}, 128 | '3': {"';'": 's7'}, 129 | '4': {ID: 'r3', "';'": 'r4'}, 130 | '5': {"';'": 's6'}, 131 | '6': {$: 'r1'}, 132 | '7': {$: 'r2'}, 133 | }; 134 | 135 | const grammarBySLR = Grammar.fromString(grammarString, { 136 | mode: GRAMMAR_MODE.LALR1_BY_SLR1, 137 | }); 138 | expect(new LRParsingTable({grammar: grammarBySLR}).get()).toEqual( 139 | expectedTable 140 | ); 141 | 142 | const grammarByCLR = Grammar.fromString(grammarString, { 143 | mode: GRAMMAR_MODE.LALR1_BY_CLR1, 144 | }); 145 | expect(new LRParsingTable({grammar: grammarByCLR}).get()).toEqual( 146 | expectedTable 147 | ); 148 | }); 149 | }); 150 | -------------------------------------------------------------------------------- /examples/indent.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Handling nested blocks based on indentation (similar to Python). 3 | * 4 | * In this example we handle nested lists based on indentation (YAML-style): 5 | * 6 | * Example `~/test.list`: 7 | * 8 | * - one 9 | * - two 10 | * - three 11 | * - four 12 | * - five 13 | * - six 14 | * - seven 15 | * - eight 16 | * - nine 17 | * - ten 18 | * 19 | * Handling blocks based on indentation doesn't differ much from handling blocks 20 | * based on { } or []. In this case we have a recursive `List` production, which 21 | * consists of Entry items, separated by the `SEPARATOR` token. 22 | * 23 | * The `SEPARATOR` handles indentation (indent/dedent), tracking current level 24 | * of indentation, and current nested list where entries are added. 25 | * 26 | * ./bin/syntax -g ~/indent.g -m slr1 -f ~/test.list 27 | * 28 | * Parsed value: 29 | * 30 | * [ 31 | * "one", 32 | * "two", 33 | * [ 34 | * "three", 35 | * "four", 36 | * [ 37 | * "five", 38 | * "six" 39 | * ], 40 | * "seven", 41 | * "eight" 42 | * ], 43 | * "nine", 44 | * "ten" 45 | * ] 46 | * 47 | * See also `examples/indent-explicit.g.js` for explicit INDENT, and DEDENT 48 | * tokens handling. 49 | */ 50 | 51 | { 52 | lex: { 53 | rules: [ 54 | [`[a-zA-Z0-9_]+`, `return 'IDENTIFIER'`], 55 | 56 | // ------------------------------------------------ 57 | // Indent/Dedent. 58 | 59 | [`\\n( *)`, ` 60 | 61 | yytext = yytext.slice(1); // strip leading NL 62 | matchedIndent = yytext.length; 63 | 64 | return 'SEPARATOR'; 65 | `], 66 | 67 | [`\\s+`, `/* skip whitespace */`], 68 | [`\\-`, `return '-'`], 69 | ], 70 | }, 71 | 72 | moduleInclude: ` 73 | 74 | /** 75 | * Matched during tokenization indentation level 76 | * (step ahead from the "currentIndent"). 77 | */ 78 | let matchedIndent = 0; 79 | 80 | /** 81 | * Current level of indentation. 82 | */ 83 | let currentIndent = 0; 84 | 85 | /** 86 | * Current list where we add entries. 87 | */ 88 | let currentList = []; 89 | 90 | /** 91 | * Keeps track of the indentation levels to check 92 | * correct level on dedent. 93 | */ 94 | const indentStack = []; 95 | indentStack.push(currentIndent); 96 | 97 | /** 98 | * Same as "indentStack" but to track nested lists. 99 | */ 100 | const listsStack = []; 101 | listsStack.push(currentList); 102 | `, 103 | 104 | bnf: { 105 | Program: [[`List`, `$$ = currentList`]], 106 | 107 | List: [[`Entry`, `currentList.push($1)`], 108 | [`List SEPARATOR Entry`, ` 109 | 110 | // 1. We're on the same nested level, just push the entry 111 | // to the current list. 112 | 113 | if (currentIndent === matchedIndent) { 114 | currentList.push($3); 115 | } 116 | 117 | // 2. Dedent. Pop the current list from the stack, pushing 118 | // as a child to the previous. 119 | 120 | else if (currentIndent > matchedIndent) { 121 | currentList.push($3); 122 | 123 | const poppsedList = listsStack.pop(); 124 | currentList = listsStack[listsStack.length - 1]; 125 | currentList.push(poppsedList); 126 | 127 | indentStack.pop(); 128 | currentIndent = indentStack[indentStack.length - 1]; 129 | } 130 | 131 | // 3. Indent. Allocate a new list for entries and push 132 | // onto the stack. 133 | 134 | else { 135 | currentList.push($3); 136 | 137 | currentIndent = matchedIndent; 138 | currentList = []; 139 | 140 | listsStack.push(currentList); 141 | indentStack.push(currentIndent); 142 | } 143 | 144 | `]], 145 | 146 | Entry: [[`- IDENTIFIER`, `$$ = $2`]], 147 | }, 148 | } -------------------------------------------------------------------------------- /src/grammar/grammar-symbol.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import {EOF, EPSILON} from '../special-symbols'; 7 | 8 | /** 9 | * Symbols are stored in the registry, and retrieved from it 10 | * if the symbol was already created. 11 | */ 12 | const registry = {}; 13 | 14 | /** 15 | * Class encapsulates operations with one 16 | * grammar symbol (terminal or non-terminal) 17 | */ 18 | export default class GrammarSymbol { 19 | constructor(symbol) { 20 | this._symbol = symbol; 21 | } 22 | 23 | /** 24 | * Terminals in our grammar are quoted, 25 | * "a", " ", "var", etc. 26 | */ 27 | isTerminal() { 28 | const first = this._symbol[0]; 29 | const last = this._symbol[this._symbol.length - 1]; 30 | 31 | return (first === '"' && last === '"') || (first === "'" && last === "'"); 32 | } 33 | 34 | /** 35 | * Returns original symbol from an extended name. 1X3 => X 36 | */ 37 | getOrignialSymbol() { 38 | if (!this._originalSymbol) { 39 | this._originalSymbol = this._symbol 40 | .replace(/^\d+\|/, '') 41 | .replace(/\|(?:\d+|\$)$/, ''); 42 | } 43 | return this._originalSymbol; 44 | } 45 | 46 | /** 47 | * Returns start context (in extended LALR 1X3 => 1) 48 | */ 49 | getStartContext() { 50 | if (!this._startContext) { 51 | this._startContext = Number(this._symbol.match(/^(\d+)\|/)[1]); 52 | } 53 | return this._startContext; 54 | } 55 | 56 | /** 57 | * Returns start context (in extended LALR 1X3 => 1) 58 | */ 59 | getEndContext() { 60 | if (!this._endContext) { 61 | this._endContext = Number(this._symbol.match(/\|(\d+)$/)[1]); 62 | } 63 | return this._endContext; 64 | } 65 | 66 | /** 67 | * Returns a symbol from the registry, or creates one. 68 | */ 69 | static get(symbol) { 70 | if (!registry.hasOwnProperty(symbol)) { 71 | registry[symbol] = new GrammarSymbol(symbol); 72 | } 73 | return registry[symbol]; 74 | } 75 | 76 | /** 77 | * Returns raw terminal value (between quotes) 78 | */ 79 | getTerminalValue() { 80 | this._checkTerminal(); 81 | return this._symbol.slice(1, this._symbol.length - 1); 82 | } 83 | 84 | /** 85 | * Returns a terminal quoted into single or double-quotes, 86 | * depending on which quotes it's already wrapped itself. 87 | */ 88 | quotedTerminal() { 89 | this._checkTerminal(); 90 | let isSingleQuoted = this._symbol[0] === "'"; 91 | 92 | let leftQuote = isSingleQuoted ? `"'` : `'"`; 93 | let rightQuote = isSingleQuoted ? `'"` : `"'`; 94 | 95 | return `${leftQuote}${this.getTerminalValue()}${rightQuote}`; 96 | } 97 | 98 | /** 99 | * Checks whether a symbol is a non-terminal. 100 | */ 101 | isNonTerminal() { 102 | return !this.isTerminal(); 103 | } 104 | 105 | /** 106 | * Checks whether a symbol is Epsilon (instance method). 107 | */ 108 | isEpsilon() { 109 | return GrammarSymbol.isEpsilon(this._symbol); 110 | } 111 | 112 | /** 113 | * Checks whether a symbol is an end of file (instance method). 114 | */ 115 | isEOF() { 116 | return this._symbol === EOF; 117 | } 118 | 119 | /** 120 | * Checks whether a symbol is Epsilon (static method). 121 | */ 122 | static isEpsilon(symbol) { 123 | return symbol.includes(EPSILON); 124 | } 125 | 126 | /** 127 | * Checks whether a symbol is EOF (static method). 128 | */ 129 | static isEOF(symbol) { 130 | return symbol === EOF; 131 | } 132 | 133 | /** 134 | * Returns raw symbol. 135 | */ 136 | getSymbol() { 137 | return this._symbol; 138 | } 139 | 140 | /** 141 | * Checks whether the symbol equals to the passed one. 142 | */ 143 | isSymbol(symbol) { 144 | return this.getSymbol() === symbol; 145 | } 146 | 147 | _checkTerminal() { 148 | if (!this.isTerminal()) { 149 | throw new TypeError(`Symbol ${this._symbol} is not terminal.`); 150 | } 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /examples/lang.lex: -------------------------------------------------------------------------------- 1 | /** 2 | * Lang: Lexical Grammar. 3 | * 4 | * BNF grammar is in: examples/lang.bnf 5 | * Test source code is in: examples/test.lang 6 | * 7 | * How to run: 8 | * 9 | * ./bin/syntax \ 10 | * --grammar examples/lang.bnf \ 11 | * --lex examples/lang.lex \ 12 | * --mode lalr1 \ 13 | * --file examples/test.lang 14 | */ 15 | { 16 | macros: { 17 | id: `[a-zA-Z0-9_]`, 18 | }, 19 | 20 | rules: [ 21 | ["\\/\\/.*", `/* skip comments */`], 22 | ["\/\\*[\\s\\S]*?\\*\/", `/* skip comments */`], 23 | 24 | [`\\s+`, `/* skip whitespace */`], 25 | 26 | // ------------------------------------------------ 27 | // Keywords. 28 | 29 | [`\\blet\\b`, `return 'LET'`], 30 | [`\\bif\\b`, `return 'IF'`], 31 | [`\\belse\\b`, `return 'ELSE'`], 32 | [`\\btrue\\b`, `return 'TRUE'`], 33 | [`\\bfalse\\b`, `return 'FALSE'`], 34 | [`\\bnull\\b`, `return 'NULL'`], 35 | [`\\breturn\\b`, `return 'RETURN'`], 36 | [`\\bfn\\b`, `return 'FN'`], 37 | [`\\bdo\\b`, `return 'DO'`], 38 | [`\\bwhile\\b`, `return 'WHILE'`], 39 | [`\\bfor\\b`, `return 'FOR'`], 40 | [`\\bbreak\\b`, `return 'BREAK'`], 41 | [`\\bcontinue\\b`, `return 'CONTINUE'`], 42 | [`\\bclass\\b`, `return 'CLASS'`], 43 | [`\\bextends\\b`, `return 'EXTENDS'`], 44 | [`\\bnew\\b`, `return 'NEW'`], 45 | [`\\bsuper\\b`, `return 'SUPER'`], 46 | [`\\bthis\\b`, `return 'THIS'`], 47 | 48 | // ------------------------------------------------ 49 | // Symbols. 50 | 51 | [`\\->`, `return 'ARROW'`], 52 | 53 | [`\\(`, `return 'LPAREN'`], 54 | [`\\)`, `return 'RPAREN'`], 55 | 56 | [`\\{`, `return 'LCURLY'`], 57 | [`\\}`, `return 'RCURLY'`], 58 | 59 | [`\\[`, `return 'LBRACKET'`], 60 | [`\\]`, `return 'RBRACKET'`], 61 | 62 | [`:`, `return 'COLON'`], 63 | [`;`, `return 'SEMICOLON'`], 64 | [`,`, `return 'COMMA'`], 65 | 66 | [`\\.`, `return 'DOT'`], 67 | 68 | // ------------------------------------------------ 69 | // Logical operators: &&, || 70 | 71 | [`\\|\\|`, `return 'LOGICAL_OR'`], 72 | [`&&`, `return 'LOGICAL_AND'`], 73 | 74 | // ------------------------------------------------ 75 | // Assignment operators: =, *=, /=, +=, -=, 76 | 77 | [`=`, `return 'SIMPLE_ASSIGN'`], 78 | [`(\\*|\\/|\\+|\\-)=`, `return 'COMPLEX_ASSIGN'`], 79 | 80 | // ------------------------------------------------ 81 | // Numbers. 82 | 83 | [`(\\d+(\\.\\d+)?)`, `return 'NUMBER'`], 84 | 85 | // ------------------------------------------------ 86 | // Equality operators: ==, != 87 | 88 | [`(=|!)=`, `return 'EQUALITY_OPERATOR'`], 89 | 90 | // ------------------------------------------------ 91 | // Math operators: +, -, *, / 92 | 93 | [`(\\+|\\-)`, `return 'ADDITIVE_OPERATOR'`], 94 | [`(\\*|\\/)`, `return 'MULTIPLICATIVE_OPERATOR'`], 95 | 96 | // ------------------------------------------------ 97 | // Relational operators: >, >=, <, <= 98 | 99 | [`(>|<)=?`, `return 'RELATIONAL_OPERATOR'`], 100 | 101 | // ------------------------------------------------ 102 | // Strings. 103 | 104 | [`"[^"]*"`, `yytext = yytext.slice(1, -1); return 'STRING';`], 105 | [`'[^']*'`, `yytext = yytext.slice(1, -1); return 'CHAR';`], 106 | 107 | [`{id}+`, `return 'IDENTIFIER'`], 108 | ], 109 | } -------------------------------------------------------------------------------- /src/__tests__/code-unit-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import CodeUnit from '../code-unit'; 7 | 8 | const environment = CodeUnit.getSandbox(); 9 | 10 | function MockSymbol(symbol) { 11 | return { 12 | getSymbol() { 13 | return symbol; 14 | }, 15 | }; 16 | } 17 | 18 | function MockProduction(RHS, handler = '$$ = $1 + $3', isEpsilon = false) { 19 | return { 20 | getRHS() { 21 | return RHS.map(symbol => MockSymbol(symbol)); 22 | }, 23 | 24 | getRawSemanticAction() { 25 | return handler; 26 | }, 27 | 28 | isEpsilon() { 29 | return isEpsilon; 30 | }, 31 | }; 32 | } 33 | 34 | const defaultLoc = { 35 | startOffset: 1, 36 | endOffset: 2, 37 | startLine: 1, 38 | endLine: 1, 39 | startColumn: 1, 40 | endColumn: 2, 41 | }; 42 | 43 | describe('code-unit', () => { 44 | it('default bindings', () => { 45 | expect(environment.yytext).toBe(''); 46 | expect(environment.yyleng).toBe(0); 47 | expect(environment.yy).toEqual({}); 48 | 49 | expect(environment.yyparse).not.toBe(null); 50 | expect(typeof environment.yyparse.onParseBegin).toBe('function'); 51 | expect(typeof environment.yyparse.onParseEnd).toBe('function'); 52 | 53 | expect(environment.__).toBe(null); 54 | expect(typeof environment.require).toBe('function'); 55 | }); 56 | 57 | it('create handler', () => { 58 | const handler = CodeUnit.createHandler('$1, $2', '$$ = $1 + $2'); 59 | expect(typeof handler).toBe('function'); 60 | 61 | handler(1, 2); 62 | expect(environment.__).toBe(3); 63 | }); 64 | 65 | it('shared sandbox', () => { 66 | expect(environment).toBe(CodeUnit.getSandbox()); 67 | }); 68 | 69 | it('eval', () => { 70 | CodeUnit.eval('$$ = 2 * 5'); 71 | expect(environment.__).toBe(10); 72 | }); 73 | 74 | it('production action parameters', () => { 75 | let production = MockProduction(['additive', 'PLUS', 'multiplicative']); 76 | 77 | expect(CodeUnit.createProductionParams({production})).toBe('_1, _2, _3'); 78 | 79 | expect( 80 | CodeUnit.createProductionParams({ 81 | production, 82 | captureLocations: true, 83 | }) 84 | ).toBe('_1, _2, _3, _1loc, _2loc, _3loc'); 85 | }); 86 | 87 | it('production handler', () => { 88 | const production = MockProduction(['additive', 'PLUS', 'multiplicative']); 89 | let handler = CodeUnit.createProductionHandler({production}); 90 | 91 | expect(handler.toString()).toContain( 92 | '(' + '_1, _2, _3' + ') { __ = _1 + _3 }' 93 | ); 94 | 95 | handler(1, '+', 2); 96 | expect(environment.__).toBe(3); 97 | 98 | handler = CodeUnit.createProductionHandler({ 99 | production, 100 | captureLocations: true, 101 | }); 102 | 103 | expect(handler.toString()).toContain( 104 | '(' + 105 | '_1, _2, _3, _1loc, _2loc, _3loc' + 106 | ') { __loc = yyloc(_1loc, _3loc);__ = _1 + _3 }' 107 | ); 108 | 109 | handler(1, '+', 2, defaultLoc, defaultLoc, defaultLoc); 110 | expect(environment.__).toBe(3); 111 | }); 112 | 113 | it('epsilon production loc', () => { 114 | const production = MockProduction([], '', /* isEpsilon */ true); 115 | 116 | let handler = CodeUnit.createProductionHandler({ 117 | production, 118 | captureLocations: true, 119 | }); 120 | 121 | expect(handler.toString()).toContain('(' + '' + ') { __loc = null; }'); 122 | }); 123 | 124 | it('yyloc', () => { 125 | const yyloc = environment.yyloc; 126 | 127 | const $1loc = { 128 | startOffset: 0, 129 | endOffset: 2, 130 | startLine: 1, 131 | endLine: 1, 132 | startColumn: 0, 133 | endColumn: 2, 134 | }; 135 | 136 | const $2loc = { 137 | startOffset: 6, 138 | endOffset: 8, 139 | startLine: 1, 140 | endLine: 1, 141 | startColumn: 6, 142 | endColumn: 8, 143 | }; 144 | 145 | const $$loc = { 146 | startOffset: 0, 147 | endOffset: 8, 148 | startLine: 1, 149 | endLine: 1, 150 | startColumn: 0, 151 | endColumn: 8, 152 | }; 153 | 154 | expect(yyloc($1loc, $2loc)).toEqual($$loc); 155 | 156 | // Epsilon loc (null) 157 | expect(yyloc(null, $2loc)).toEqual($2loc); 158 | expect(yyloc($1loc, null)).toEqual($1loc); 159 | }); 160 | 161 | it('set bindings', () => { 162 | CodeUnit.setBindings({ 163 | yytext: 'Hi!', 164 | yyleng: 3, 165 | }); 166 | 167 | expect(environment.yytext).toBe('Hi!'); 168 | expect(environment.yyleng).toBe(3); 169 | }); 170 | }); 171 | -------------------------------------------------------------------------------- /examples/parser-lexer-communication.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Change lexer state from parser. 3 | * 4 | * Note: a tokenizer can be accessed in a semantic action as `yy.lexer`, 5 | * or `yy.tokenizer`. 6 | * 7 | * The grammar below solves the problem of parsing { } in statement position as 8 | * a "BlockStatement", and in the expression position as an "ObjectLiteral". 9 | * 10 | * Note: there are several other techniques for solving this: lookahead 11 | * restriction productions, or cover grammar. 12 | * 13 | * Example in the statement position: 14 | * 15 | * ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '{ 1; 2; }' 16 | * 17 | * ✓ Accepted 18 | * 19 | * Parsed value: 20 | * 21 | * { 22 | * "type": "Program", 23 | * "body": [ 24 | * { 25 | * "type": "BlockStatement", 26 | * "body": [ 27 | * "1", 28 | * "2" 29 | * ] 30 | * } 31 | * ] 32 | * } 33 | * 34 | * Two empty blocks: 35 | * 36 | * ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '{{}}' 37 | * 38 | * Exaple in the expression position: 39 | * 40 | * ./bin/syntax -g examples/parser-lexer-communication.g -m lalr1 -p '({ 1, 2 });' 41 | * 42 | * ✓ Accepted 43 | * 44 | * Parsed value: 45 | * 46 | * { 47 | * "type": "Program", 48 | * "body": [ 49 | * { 50 | * "type": "ObjectLiteral", 51 | * "properties": [ 52 | * "1", 53 | * "2" 54 | * ] 55 | * } 56 | * ] 57 | * } 58 | */ 59 | 60 | { 61 | // -------------------------------------------------- 62 | // Lexical grammar. 63 | 64 | lex: { 65 | 66 | // Lexer states. 67 | startConditions: { 68 | expression: 0, 69 | }, 70 | 71 | rules: [ 72 | [`\\s+`, `/* skip whitespace */`], 73 | 74 | // { and } in the expression position yield different token types: 75 | 76 | [['expression'], `\\{`, `return '%{'`], 77 | [['expression'], `\\}`, `return '}%'`], 78 | 79 | // { and } in the statement position yield default token types: 80 | 81 | [`\\{`, `return '{'`], 82 | [`\\}`, `return '}'`], 83 | 84 | [`\\d+`, `return 'NUMBER'`], 85 | 86 | [`;`, `return ';'`], 87 | [`,`, `return ','`], 88 | 89 | [`\\(`, `return '('`], 90 | [`\\)`, `return ')'`], 91 | ], 92 | }, 93 | 94 | // -------------------------------------------------- 95 | // Syntactic grammar. 96 | 97 | bnf: { 98 | Program: [[`StatmentList`, `$$ = {type: 'Program', body: $1}`]], 99 | 100 | StatmentList: [[`Statment`, `$$ = [$1]`], 101 | [`StatmentList Statment`, `$$ = $1; $1.push($2)`]], 102 | 103 | Statment: [[`BlockStatement`, `$$ = $1`], 104 | [`ExpressionStatement`, `$$ = $1`]], 105 | 106 | BlockStatement: [[`{ OptStatmentList }`, `$$ = {type: 'BlockStatement', body: $2}`]], 107 | 108 | OptStatmentList: [[`StatmentList`, `$$ = $1`], 109 | [`ε`, `$$ = null`]], 110 | 111 | ExpressionStatement: [[`Expression ;`, `$$ = $1`]], 112 | 113 | Expression: [[`expressionBegin ExpressionNode expressionEnd`, 114 | `$$ = $2`]], 115 | 116 | // Special "activation productions". They activate needed lexer state, 117 | // so the later can yield different token types for the same chars. 118 | 119 | expressionBegin: [[`ε`, `yy.lexer.pushState('expression')`]], 120 | expressionEnd: [[`ε`, `yy.lexer.popState()`]], 121 | 122 | ExpressionNode: [[`NumericLiteral`, `$$ = $1`], 123 | [`ObjectLiteral`, `$$ = $1`], 124 | [`( Expression )`, `$$ = $2`]], 125 | 126 | NumericLiteral: [[`NUMBER`, `$$ = $1`]], 127 | 128 | ObjectLiteral: [[`%{ OptPropertyList }%`, `$$ = {type: 'ObjectLiteral', properties: $2}`]], 129 | 130 | OptPropertyList: [[`PropertyList`, `$$ = $1`], 131 | [`ε`, `$$ = null`]], 132 | 133 | PropertyList: [[`Property`, `$$ = [$1]`], 134 | [`PropertyList , Property`, `$$ = $1; $1.push($3)`]], 135 | 136 | Property: [`NumericLiteral`, `$$ = $1`], 137 | } 138 | } -------------------------------------------------------------------------------- /src/lr/__tests__/state-test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2015-present Dmitry Soshnikov 4 | */ 5 | 6 | import Grammar from '../../grammar/grammar'; 7 | import LRItem from '../lr-item'; 8 | import SetsGenerator from '../../sets-generator'; 9 | import State from '../state'; 10 | import {MODES as GRAMMAR_MODE} from '../../grammar/grammar-mode'; 11 | import CanonicalCollection from '../canonical-collection'; 12 | 13 | const grammar = Grammar.fromGrammarFile( 14 | __dirname + '/../../grammar/__tests__/calc.g', 15 | { 16 | mode: GRAMMAR_MODE.LALR1, 17 | } 18 | ); 19 | 20 | function laSet(arraySet) { 21 | const set = {}; 22 | arraySet.forEach(symbol => (set[symbol] = true)); 23 | return set; 24 | } 25 | 26 | const canonicalCollection = new CanonicalCollection({grammar}); 27 | const setsGenerator = new SetsGenerator({grammar}); 28 | 29 | const defaultLookaheadSet = laSet(['$', '/', '-', '*', '+']); 30 | 31 | // E -> E • + E 32 | const kernelItem1 = new LRItem( 33 | /* production */ grammar.getProduction(1), 34 | /* dotPosition */ 1, 35 | grammar, 36 | canonicalCollection, 37 | setsGenerator, 38 | /* lookaheadSet */ defaultLookaheadSet 39 | ); 40 | 41 | // E -> E • * E 42 | const kernelItem2 = new LRItem( 43 | /* production */ grammar.getProduction(2), 44 | /* dotPosition */ 1, 45 | grammar, 46 | canonicalCollection, 47 | setsGenerator, 48 | /* lookaheadSet */ defaultLookaheadSet 49 | ); 50 | 51 | const kernelItems = [kernelItem1, kernelItem2]; 52 | 53 | const state = new State(kernelItems, grammar, canonicalCollection); 54 | 55 | const otherItem = new LRItem( 56 | /* production */ grammar.getProduction(3), 57 | /* dotPosition */ 1, 58 | grammar, 59 | canonicalCollection, 60 | setsGenerator, 61 | /* lookaheadSet */ defaultLookaheadSet 62 | ); 63 | 64 | state.addItem(otherItem); 65 | 66 | const items = kernelItems.concat(otherItem); 67 | 68 | // $accept -> E • 69 | const acceptItem = new LRItem( 70 | /* production */ grammar.getAugmentedProduction(), 71 | /* dotPosition */ 1, 72 | grammar, 73 | canonicalCollection, 74 | setsGenerator, 75 | /* lookaheadSet */ laSet(['$']) 76 | ); 77 | 78 | const acceptItems = [acceptItem]; 79 | 80 | const acceptState = new State( 81 | /* kernelItems */ acceptItems, 82 | grammar, 83 | canonicalCollection 84 | ); 85 | 86 | // E -> E + E • 87 | const finalItem = new LRItem( 88 | /* production */ grammar.getProduction(2), 89 | /* dotPosition */ 3, 90 | grammar, 91 | canonicalCollection, 92 | setsGenerator, 93 | /* lookaheadSet */ defaultLookaheadSet 94 | ); 95 | 96 | const finalState = new State( 97 | /* kernelItems */ [finalItem], 98 | grammar, 99 | canonicalCollection 100 | ); 101 | 102 | function toKeys(items) { 103 | return items.map(item => item.getKey()); 104 | } 105 | 106 | describe('state', () => { 107 | it('kernal items', () => { 108 | expect(state.getKernelItems()).toBe(kernelItems); 109 | expect(acceptState.getKernelItems()).toBe(acceptItems); 110 | }); 111 | 112 | it('items', () => { 113 | expect(toKeys(state.getItems())).toEqual(toKeys(items)); 114 | expect(toKeys(acceptState.getItems())).toEqual(toKeys(acceptItems)); 115 | }); 116 | 117 | it('is kernel item', () => { 118 | expect(state.isKernelItem(kernelItem1)).toBe(true); 119 | expect(state.isKernelItem(kernelItem2)).toBe(true); 120 | 121 | const otherItem = new LRItem( 122 | /* production */ grammar.getProduction(2), 123 | /* dotPosition */ 1, 124 | grammar, 125 | canonicalCollection, 126 | setsGenerator, 127 | /* lookaheadSet */ defaultLookaheadSet 128 | ); 129 | 130 | expect(state.isKernelItem(otherItem)).toBe(false); 131 | expect(acceptState.isKernelItem(acceptItem)).toBe(true); 132 | }); 133 | 134 | it('number', () => { 135 | expect(state.getNumber()).toBe(null); 136 | expect(acceptState.getNumber()).toBe(null); 137 | 138 | state.setNumber(1); 139 | acceptState.setNumber(2); 140 | 141 | expect(state.getNumber()).toBe(1); 142 | expect(acceptState.getNumber()).toBe(2); 143 | }); 144 | 145 | it('is final', () => { 146 | expect(finalState.isFinal()).toBe(true); 147 | expect(state.isFinal()).toBe(false); 148 | expect(acceptState.isFinal()).toBe(true); 149 | }); 150 | 151 | it('is accept', () => { 152 | expect(finalState.isAccept()).toBe(false); 153 | expect(state.isAccept()).toBe(false); 154 | expect(acceptState.isAccept()).toBe(true); 155 | }); 156 | 157 | it('reduce items', () => { 158 | expect(finalState.getReduceItems().length).toBe(1); 159 | expect(finalState.getReduceItems()[0]).toBe(finalItem); 160 | 161 | expect(state.getReduceItems().length).toBe(0); 162 | expect(acceptState.getReduceItems().length).toBe(0); 163 | }); 164 | }); 165 | -------------------------------------------------------------------------------- /examples/parser-lexer-communication.php.g: -------------------------------------------------------------------------------- 1 | /** 2 | * Change lexer state from parser. PHP version.. 3 | * 4 | * Note: a tokenizer can be accessed in a semantic action as `yy::$lexer`, 5 | * or `yy::$tokenizer`. 6 | * 7 | * The grammar below solves the problem of parsing { } in statement position as 8 | * a "BlockStatement", and in the expression position as an "ObjectLiteral". 9 | * 10 | * Note: there are several other techniques for solving this: lookahead 11 | * restriction productions, or cover grammar. 12 | * 13 | * Example in the statement position: 14 | * 15 | * ./bin/syntax -g examples/parser-lexer-communication.php.g -m lalr1 -o ~/Parser.php 16 | * 17 | * Parser::parse('{ 1; 2; }'); 18 | * 19 | * array(2) { 20 | * ["type"]=> 21 | * string(7) "Program" 22 | * ["body"]=> 23 | * array(1) { 24 | * [0]=> 25 | * array(2) { 26 | * ["type"]=> 27 | * string(14) "BlockStatement" 28 | * ["body"]=> 29 | * array(2) { 30 | * [0]=> 31 | * string(1) "1" 32 | * [1]=> 33 | * string(1) "2" 34 | * } 35 | * } 36 | * } 37 | * } 38 | * 39 | * Exaple in the expression position: 40 | * 41 | * ./bin/syntax -g examples/parser-lexer-communication.php.g -m lalr1 -o ~/Parser.php 42 | * 43 | * Parser::parse('({ 1, 2 });'); 44 | * 45 | * array(2) { 46 | * ["type"]=> 47 | * string(7) "Program" 48 | * ["body"]=> 49 | * array(1) { 50 | * [0]=> 51 | * array(2) { 52 | * ["type"]=> 53 | * string(13) "ObjectLiteral" 54 | * ["properties"]=> 55 | * array(2) { 56 | * [0]=> 57 | * string(1) "1" 58 | * [1]=> 59 | * string(1) "2" 60 | * } 61 | * } 62 | * } 63 | * } 64 | */ 65 | 66 | { 67 | // -------------------------------------------------- 68 | // Lexical grammar. 69 | 70 | lex: { 71 | 72 | // Lexer states. 73 | startConditions: { 74 | expression: 0, 75 | }, 76 | 77 | rules: [ 78 | [`\\s+`, `/* skip whitespace */`], 79 | 80 | // { and } in the expression position yield different token types: 81 | 82 | [['expression'], `\\{`, `return '%{'`], 83 | [['expression'], `\\}`, `return '}%'`], 84 | 85 | // { and } in the statement position yield default token types: 86 | 87 | [`\\{`, `return '{'`], 88 | [`\\}`, `return '}'`], 89 | 90 | [`\\d+`, `return 'NUMBER'`], 91 | 92 | [`;`, `return ';'`], 93 | [`,`, `return ','`], 94 | 95 | [`\\(`, `return '('`], 96 | [`\\)`, `return ')'`], 97 | ], 98 | }, 99 | 100 | // -------------------------------------------------- 101 | // Syntactic grammar. 102 | 103 | bnf: { 104 | Program: [[`StatmentList`, `$$ = array('type' => 'Program', 'body' => $1)`]], 105 | 106 | StatmentList: [[`Statment`, `$$ = [$1]`], 107 | [`StatmentList Statment`, `array_push($1, $2); $$ = $1;`]], 108 | 109 | Statment: [[`BlockStatement`, `$$ = $1`], 110 | [`ExpressionStatement`, `$$ = $1`]], 111 | 112 | BlockStatement: [[`{ OptStatmentList }`, `$$ = array('type' => 'BlockStatement', 'body' => $2)`]], 113 | 114 | OptStatmentList: [[`StatmentList`, `$$ = $1`], 115 | [`ε`, `$$ = null`]], 116 | 117 | ExpressionStatement: [[`Expression ;`, `$$ = $1`]], 118 | 119 | Expression: [[`expressionBegin ExpressionNode expressionEnd`, 120 | `$$ = $2`]], 121 | 122 | // Special "activation productions". They activate needed lexer state, 123 | // so the later can yield different token types for the same chars. 124 | 125 | expressionBegin: [[`ε`, `yy::$lexer->pushState('expression');`]], 126 | expressionEnd: [[`ε`, `yy::$lexer->popState();`]], 127 | 128 | ExpressionNode: [[`NumericLiteral`, `$$ = $1`], 129 | [`ObjectLiteral`, `$$ = $1`], 130 | [`( Expression )`, `$$ = $2`]], 131 | 132 | NumericLiteral: [[`NUMBER`, `$$ = $1`]], 133 | 134 | ObjectLiteral: [[`%{ OptPropertyList }%`, `$$ = array('type' => 'ObjectLiteral', 'properties' => $2)`]], 135 | 136 | OptPropertyList: [[`PropertyList`, `$$ = $1`], 137 | [`ε`, `$$ = null`]], 138 | 139 | PropertyList: [[`Property`, `$$ = [$1]`], 140 | [`PropertyList , Property`, `array_push($1, $3); $$ = $1;`]], 141 | 142 | Property: [`NumericLiteral`, `$$ = $1`], 143 | } 144 | } --------------------------------------------------------------------------------