├── .gitattributes ├── .github └── workflows │ └── npm.yaml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── assets ├── calc.png ├── computed.png ├── example1.png ├── no_eof_symb.png ├── sum.png └── with_eof_symb.png ├── demos ├── calculator │ ├── README.md │ ├── cli.ts │ ├── eval.ts │ ├── grammar.peg │ └── tsconfig.json ├── calculator_with_computed_properties │ ├── README.md │ ├── cli.ts │ ├── grammar.peg │ └── tsconfig.json ├── json_parser │ ├── README.md │ ├── grammar.peg │ ├── test.ts │ └── tsconfig.json └── lookahead │ ├── README.md │ ├── grammar.peg │ ├── test.ts │ └── tsconfig.json ├── eslint.config.mjs ├── gen-tests.sh ├── package-lock.json ├── package.json ├── src ├── checks.ts ├── cli.ts ├── gen.ts ├── leftrec.ts ├── meta.ts ├── metagrammar.peg ├── rules.ts ├── template.ts ├── test │ ├── alias_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── bounded_matches_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── calc_leftrec_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── calc_test │ │ ├── flags.txt │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── case_insensitive_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── checks.test.ts │ ├── crlf_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── eof_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── gen.test.ts │ ├── id_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── indirect_leftrec_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── kleene_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── leftrec.test.ts │ ├── memo_test │ │ ├── flags.txt │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── memo_time_test │ │ ├── flags.txt │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── multiline_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── muse │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── pos_test │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ ├── regex_flags_test │ │ ├── flags.txt │ │ ├── grammar.peg │ │ ├── parser.ts │ │ └── test.test.ts │ └── setanta_test │ │ ├── README.md │ │ ├── flags.txt │ │ ├── grammar.peg │ │ ├── parser.ts │ │ ├── run.test.ts │ │ └── test.test.ts ├── types.ts └── util.ts └── tsconfig.json /.gitattributes: -------------------------------------------------------------------------------- 1 | package-lock.json binary 2 | -------------------------------------------------------------------------------- /.github/workflows/npm.yaml: -------------------------------------------------------------------------------- 1 | name: Node CI/CD 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | cd: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Use Node.js 22.x 12 | uses: actions/setup-node@v1 13 | with: 14 | node-version: '22.x' 15 | registry-url: 'https://registry.npmjs.org' 16 | - run: npm ci 17 | - run: npm run build 18 | - run: npm test 19 | - run: npm publish 20 | if: ${{ startsWith(github.ref, 'refs/tags/v') }} 21 | env: 22 | NODE_AUTH_TOKEN: ${{ secrets.NPM_SECRET }} 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | /tsbuild/ 3 | *.swp 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 3.3.2 - 2023/11/09 4 | 5 | ### Fixed 6 | 7 | - Bug where rules with single match, no named types but with computed properties would generate 8 | invalid typescript. 9 | - Dependency updates. 10 | 11 | ## 3.3.1 - 2023/30/11 12 | 13 | - Minor dependency version updates. 14 | 15 | ## 3.3.0 - 2023/07/06 16 | 17 | ### Added 18 | 19 | - Support for optional output file. If not provided just output parser to stdout. 20 | - `+` operator return type now specifies non-empty arrays (#49). 21 | - Grammars now are checked for any Typescript keywords before parser generation, to avoid 22 | generating invalid TypeScript (#44). 23 | - Added support for regex modifiers including case insensitivity and multiline mode (#48). 24 | - Added support for specifying a specific number of matches (#45). 25 | 26 | ### Fixed 27 | 28 | - Added annotations to avoid errors on `--noUnusedLocals`. 29 | 30 | ## 3.2.3 - 2023/06/17 31 | 32 | - Fix non-terminating nested Kleene star (#46). 33 | - Update build dependencies. 34 | 35 | ## 3.2.2 - 2023/03/24 36 | 37 | - Update major versions of dependencies. 38 | 39 | ## 3.2.1 - 2022/04/18 40 | 41 | ### Added 42 | 43 | - Added `--include-grammar-comment` to support removing the autogenerated comment at the beginning 44 | of the parser file with the input grammar. (#40). 45 | 46 | ## 3.2.0 - 2021/09/22 47 | 48 | ### Added 49 | 50 | - Added `--regex-flags` flag to support user specified regex flags. Can be used to enable unicode 51 | property matchers (#27). 52 | 53 | ### Fixed 54 | 55 | - Support for [CRLF endlines](https://en.wikipedia.org/wiki/Carriage_return#Computers) added. (#28) 56 | 57 | ## 3.1.0 - 2021/02/10 58 | 59 | ### Added 60 | 61 | - Support for memoisation (#8). 62 | 63 | ## 3.0.1 - 2021/01/05 64 | 65 | ### Added 66 | 67 | - Added support for left recursion (#17). 68 | 69 | ### Changed 70 | 71 | - Add `$` symbol for matching EOF. 72 | - Parsers no longer fail if EOF is not met (unless specified with `$`). 73 | - `ParseResult` objects now return a list of `SyntaxErr`s. 74 | - `SyntaxErr` objects now return more detailed `MatchAttempts` rather than just strings. 75 | 76 | ### Removed 77 | 78 | - Removed useless expected rules computation (never worked properly). 79 | 80 | ## 3.0.0 81 | 82 | Unpublished due to CD failures, can't be undone. All 3.0.0 changes are in 3.0.1 83 | 84 | ## 2.1.0 - 2020/09/29 85 | 86 | ### Added 87 | 88 | - Added static checks for banned match names 89 | - Added static checks for undefined rules (#13). 90 | 91 | ### Fixed 92 | 93 | - Fixed template string support in code blocks 94 | - Reduced size of npm package (#14). 95 | 96 | ## 2.0.1 - 2020/09/24 97 | 98 | ### Changed 99 | 100 | - Updated README with latest docs + screenshots 101 | 102 | ## 2.0.0 - 2020/09/23 103 | 104 | ### Added 105 | 106 | - Special error is raised when EOF is not reached during parse. 107 | - Numbers can now be used in names of IDs (not in first position). 108 | - Added `num-enums` flag to specify numeric kind enums. 109 | - Support for C-style comments (#2) 110 | - Add `--version` and `--help` CLI flags 111 | 112 | ### Changed 113 | 114 | - No longer need to escape braces ({}) in code sections in computed properties. (#12) 115 | - Type declarations for computed properties properly support whitespace. (#4) 116 | - `mark` parser method is now public. 117 | - Enums are now string valued by default. (#3) 118 | 119 | ### Fixed 120 | 121 | - Bump lodash dependency from 4.17.15 to 4.17.19 122 | - Name collision avoidance has been avoided. 123 | - Type expressions no longer require whitespace in computed properties. 124 | 125 | ## 1.3.2 - 2020/07/11 126 | 127 | ### Fixed 128 | 129 | - Add workaround for Safari/iOS/WebKit issues. See https://bugs.webkit.org/show_bug.cgi?id=214181 130 | for details on bug. 131 | 132 | ## 1.3.1 - 2020/04/13 133 | 134 | ### Fixed 135 | 136 | - Update typo in README 137 | - Fix off-by-one error in position tracking 138 | 139 | ## 1.3.0 - 2020/03/30 140 | 141 | ### Added 142 | 143 | - Documentation update. 144 | - Compile time regex correctness check. 145 | - Export parse function, don't always require explicit `Parser` instantiation. 146 | - Add test() function to `Parser`, only checks if grammar matches, doesn't return AST. 147 | - Add special `@` rule for storing parser position on AST. 148 | - Add lots of tests. 149 | 150 | ### Fixed 151 | 152 | - CLI usage messages 153 | -------------------------------------------------------------------------------- /assets/calc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/calc.png -------------------------------------------------------------------------------- /assets/computed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/computed.png -------------------------------------------------------------------------------- /assets/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/example1.png -------------------------------------------------------------------------------- /assets/no_eof_symb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/no_eof_symb.png -------------------------------------------------------------------------------- /assets/sum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/sum.png -------------------------------------------------------------------------------- /assets/with_eof_symb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EoinDavey/tsPEG/bbd255334e0a1582a53e52cf62bc00d393b1b766/assets/with_eof_symb.png -------------------------------------------------------------------------------- /demos/calculator/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Example implementation of a CLI app which takes an expression 4 | like '(10+2) / 4' and outputs the answer. 5 | 6 | - `grammar.peg`: The tsPEG grammar specification. 7 | - `eval.ts`: Functions for evaluating the parse tree. 8 | - `cli.ts`: CLI wrapper. 9 | 10 | # Running 11 | 12 | - Run `tspeg grammar.peg parser.ts` to generate the parser. 13 | - `tsc -p .` to compile the project. 14 | - `node jsbuild/cli.js` to execute. 15 | -------------------------------------------------------------------------------- /demos/calculator/cli.ts: -------------------------------------------------------------------------------- 1 | import { evaluate } from './eval'; 2 | import { parse } from './parser'; 3 | 4 | import * as readline from 'readline'; 5 | 6 | const rl = readline.createInterface({ 7 | input: process.stdin, 8 | output: process.stdout, 9 | }); 10 | 11 | rl.question('', (s) => { 12 | const tree = parse(s); 13 | if(tree.errs.length > 0 || tree.ast === null) { 14 | for(const err of tree.errs){ 15 | console.error(err.toString()); 16 | } 17 | } else { 18 | console.log(evaluate(tree.ast)); 19 | } 20 | rl.close(); 21 | }); 22 | -------------------------------------------------------------------------------- /demos/calculator/eval.ts: -------------------------------------------------------------------------------- 1 | import * as Parser from './parser'; 2 | 3 | export function evaluate(input : Parser.EXPR) : number { 4 | return calcSum(input.s); 5 | } 6 | 7 | function calcInt(at : Parser.INT) : number { 8 | return parseInt(at.val); 9 | } 10 | 11 | function calcAtom(at : Parser.ATOM) : number { 12 | if(at.kind === Parser.ASTKinds.ATOM_1) 13 | return calcInt(at.val); 14 | return calcSum(at.val); 15 | } 16 | 17 | function calcFac(at : Parser.FAC) : number { 18 | return at.tail.reduce((x, y) => { 19 | if(y.op === '*') 20 | return x * calcAtom(y.sm); 21 | return x / calcAtom(y.sm); 22 | }, calcAtom(at.head)); 23 | } 24 | 25 | function calcSum(at : Parser.SUM) : number { 26 | return at.tail.reduce((x, y) => { 27 | if(y.op === '+') 28 | return x + calcFac(y.sm); 29 | return x - calcFac(y.sm); 30 | }, calcFac(at.head)); 31 | } 32 | -------------------------------------------------------------------------------- /demos/calculator/grammar.peg: -------------------------------------------------------------------------------- 1 | // A grammar for parsing integer calculator expressions which supports order of operations and 2 | // grouping with parentheses (brackets). 3 | // e.g. "10 + 5*2", "(20 - 15/3) - 2". 4 | 5 | // We use a $ marker to require that the match goes the whole way to the end of the input. 6 | EXPR := s=SUM $ 7 | 8 | // An expression at the top level is a sequence of summands A + B + ... + C (some of the +'s can be 9 | // -'s instead) where each summand might be a product of terms 10 | SUM := head=FAC tail={ op='\+|-' sm=FAC }* 11 | 12 | // A product is a sequence of terms A * B * .... * C (some of the *'s can be /'s instead) 13 | FAC := head=ATOM tail={ op='\*|/' sm=ATOM }* 14 | 15 | // Each term is either an integer or some new expression wrapped in parentheses. 16 | // We also eat up any whitespace at this point. 17 | ATOM := _ val=INT _ | _ '\(' val=SUM '\)' _ 18 | INT := val='-?[0-9]+' 19 | 20 | // This rule matches any whitespace. 21 | _ := '\s*' 22 | -------------------------------------------------------------------------------- /demos/calculator/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2016", 4 | "module": "nodenext", 5 | "strict": true, 6 | "outDir": "jsbuild" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /demos/calculator_with_computed_properties/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This demo is functionally the same as the `calculator` but uses computed properties 4 | to compute the expressions value rather than separate evaluation functions. All of 5 | the computation logic is included in grammar.gram 6 | 7 | # Running 8 | 9 | - Run `tspeg grammar.gram parser.ts` to generate the parser. 10 | - `tsc -p .` to compile the project. 11 | - `node jsbuild/cli.js` to execute. 12 | -------------------------------------------------------------------------------- /demos/calculator_with_computed_properties/cli.ts: -------------------------------------------------------------------------------- 1 | import { parse } from './parser'; 2 | 3 | import * as readline from 'readline'; 4 | 5 | const rl = readline.createInterface({ 6 | input: process.stdin, 7 | output: process.stdout, 8 | }); 9 | 10 | rl.question('', (s) => { 11 | const tree = parse(s); 12 | if(tree.errs.length > 0 || tree.ast === null) { 13 | for(const err of tree.errs){ 14 | console.error(err.toString()); 15 | } 16 | } else { 17 | console.log(tree.ast.value); 18 | } 19 | rl.close(); 20 | }); 21 | -------------------------------------------------------------------------------- /demos/calculator_with_computed_properties/grammar.peg: -------------------------------------------------------------------------------- 1 | // A grammar for parsing integer calculator expressions which 2 | // supports order of operations and grouping with parentheses (brackets). 3 | // e.g. "10 + 5*2", "(20 - 15/3) - 2". 4 | 5 | // We use a $ marker to require that the match goes the whole way to the end of the input. 6 | EXPR := s=SUM $ 7 | .value = number { return s.value; } 8 | 9 | // An expression at the top level is a sequence of summands 10 | // A + B + ... + C (some of the + can be - instead) 11 | // where each summand might be a product of terms 12 | SUM := head=FAC tail={ op='\+|-' sm=FAC }* 13 | .value = number { 14 | return this.tail.reduce( 15 | (x, y) => y.op === '+' ? x + y.sm.value : x - y.sm.value, 16 | this.head.value 17 | ); 18 | } 19 | 20 | // A product is a sequence of terms 21 | // A * B * .... * C (some of the * can be / instead) 22 | FAC := head=ATOM tail={ op='\*|/' sm=ATOM }* 23 | .value = number { 24 | return this.tail.reduce( 25 | (x, y) => y.op === '*' ? x * y.sm.value : x / y.sm.value, 26 | this.head.value 27 | ); 28 | } 29 | 30 | // Each term is either an integer or some new expression wrapped 31 | // in parentheses. 32 | // We also eat up any whitespace at this point. 33 | ATOM := _ val=INT _ 34 | .value = number { return this.val.value; } 35 | | _ '\(' val=SUM '\)' _ 36 | .value = number { return this.val.value; } 37 | 38 | INT := val='-?[0-9]+' 39 | .value = number { return parseInt(this.val); } 40 | 41 | // This rule matches any whitespace 42 | _ := '\s*' 43 | -------------------------------------------------------------------------------- /demos/calculator_with_computed_properties/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2016", 4 | "module": "nodenext", 5 | "strict": true, 6 | "outDir": "jsbuild" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /demos/json_parser/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This is an implementation of a JSON parser based on the 4 | [RFC 4627](https://www.ietf.org/rfc/rfc4627.txt) specification of JSON. It is almost 5 | a direct implementation of the grammar specified there so the RFC can be used to understand 6 | the grammar file. 7 | 8 | This grammar uses lots of tsPEG features including computed properties, the header 9 | and regex modifiers. 10 | 11 | ## Running 12 | 13 | You can generate the parser and run the test with: 14 | 1. `tspeg grammar.peg parser.ts`. 15 | 2. `tsc -p .`. 16 | 2. `node jsbuild/test.js`. 17 | -------------------------------------------------------------------------------- /demos/json_parser/grammar.peg: -------------------------------------------------------------------------------- 1 | --- 2 | // A recursive type declaration of a JSON value. 3 | type value = boolean | null | string | number | value[] | {[key: string]: value} 4 | --- 5 | 6 | JSON_text := OBJECT | ARRAY 7 | 8 | VALUE := OBJECT | ARRAY | NUMBER | STRING 9 | | s='true|false' .value = boolean { return s==='true';} 10 | | s='null' .value = null { return null; } 11 | 12 | OBJECT := BEGIN_OBJECT body={h=MEMBER t={VALUE_SEPARATOR v=MEMBER}*}? END_OBJECT 13 | .value = {[index: string]: value} { 14 | return this.body === null ? {} 15 | : Object.fromEntries([[this.body.h.s.value, this.body.h.v.value]].concat( 16 | this.body.t.map(m => [m.v.s.value, m.v.v.value]))); 17 | } 18 | BEGIN_OBJECT := _ '{' _ 19 | END_OBJECT := _ '}' _ 20 | VALUE_SEPARATOR := _ ',' _ 21 | MEMBER := s=STRING NAME_SEPARATOR v=VALUE 22 | NAME_SEPARATOR := _ ':' _ 23 | 24 | ARRAY := BEGIN_ARRAY body={h=VALUE t={VALUE_SEPARATOR v=VALUE}*}? END_ARRAY 25 | .value = value[] { 26 | return this.body === null ? [] : [this.body.h.value].concat(this.body.t.map(x=>x.v.value)); 27 | } 28 | BEGIN_ARRAY := _ '\[' _ 29 | END_ARRAY := _ ']' _ 30 | 31 | NUMBER := m='-'? i=INT f=FRAC? e=EXP? 32 | // We cheat slightly and use Javascripts parseFloat to handle converting the number string 33 | // to a float. 34 | .value = number { return parseFloat((this.m??'') + this.i + (this.f??'') + (this.e??'')); } 35 | INT := '0|[1-9][0-9]*' 36 | FRAC := '\.[0-9]+' 37 | EXP := '[eE][+-]?[0-9]+' 38 | 39 | STRING := '"' s=CHARS '"' 40 | // We need to interpret any escaped characters which we do with this function. 41 | .value = string { 42 | let out = ""; 43 | const esc = { 44 | '"': '"', '\\': '\\', '/': '/', 'b': '\b', 45 | 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 46 | } as {[key: string]: string}; 47 | for (let i = 0; i < s.length; ++i) { 48 | if (s[i] !== "\\") { 49 | out += s[i]; 50 | continue; 51 | } 52 | out += esc[s[i+1]]; 53 | ++i; 54 | } 55 | return out; 56 | } 57 | 58 | CHARS := '([^"\\\u0000-\u001F]|\\["\\/bfnrt])*'u 59 | 60 | // Whitespace 61 | _ := '\s*' 62 | -------------------------------------------------------------------------------- /demos/json_parser/test.ts: -------------------------------------------------------------------------------- 1 | import { parse, Parser } from './parser'; 2 | 3 | const obj = { 4 | 'number': 12.57e-1, 5 | 'boolean': true, 6 | 'string': "astring", 7 | 'array': [1,2,3], 8 | 'obj': {'a': 1, 'b': 2, 'c': 3}, 9 | 'nested array:': [[1,2,3], [4,5,6]], 10 | 'nested objects': {'a': {'b': {'c': 1}}} 11 | }; 12 | 13 | const json = JSON.stringify(obj); 14 | if(json !== JSON.stringify(parse(JSON.stringify(obj)).ast!.value)){ 15 | console.error('JSON parsing test failed'); 16 | process.exit(1); 17 | } 18 | -------------------------------------------------------------------------------- /demos/json_parser/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2016", 4 | "module": "nodenext", 5 | "strict": true, 6 | "outDir": "jsbuild" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /demos/lookahead/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This example uses the & operator to match **but not consume** input, allowing us to match a context sensitive language. 4 | 5 | This parser matches any sequence like 'aaaabbbbcccc'' where the number of a's, b's and c's are 6 | all equal. 7 | e.g. We want to match 'aabbcc', 'aaabbbccc' but not 'aaaaabbc' or 'aaabbb'. 8 | This set of strings is not a context-free language 9 | (See [Pumping Lemma](https://en.wikipedia.org/wiki/Pumping_lemma_for_context-free_languages)) 10 | but we can still define a parser for it by using the lookahead operator &. 11 | 12 | # Running 13 | - `tspeg grammar.peg parser.ts` to generate the parser. 14 | - `tsc -p .` to compile. 15 | - `node jsbuild/test.js` to run tests. 16 | -------------------------------------------------------------------------------- /demos/lookahead/grammar.peg: -------------------------------------------------------------------------------- 1 | // This grammar matches any sequence like 'aaaabbbbcccc'' where the number of a's, b's and c's are 2 | // all equal. 3 | // e.g. We want to match 'aabbcc', 'aaabbbccc' but not 'aaaaabbc' or 'aaabbb'. 4 | // This set of strings is not a context-free language 5 | // (See https://en.wikipedia.org/wiki/Pumping_lemma_for_context-free_languages) but we can still 6 | // define a parser for it by using the lookahead operator &. 7 | 8 | // Our strategy is to first use the 'A' rule which matches sequences aaabbb where there are 9 | // the same number of a's and b's. We also check that there is at least one 'c' after the a's and 10 | // b's. The trick is then that we apply the lookahead operator & to this match so that 11 | // after we successfully ensure there are the same number of a's and b's, we return back to the 12 | // start of the input. 13 | // Next we skip over all the a's (and require there are not 0 of them) and check that there are 14 | // the same number of b's as c's. 15 | // If we successfully reach the end of the input then we must have the same number of a's, b's 16 | // and c's as required. 17 | S := &{A 'c'} 'a'+ B $ 18 | 19 | // This rule matches any sequence 'a..ab..b' with the same number of a's and b's. 20 | A := 'a' A? 'b' 21 | // This rule matches any sequence 'b..bc..c' with the same number of b's and c's. 22 | B := 'b' B? 'c' 23 | -------------------------------------------------------------------------------- /demos/lookahead/test.ts: -------------------------------------------------------------------------------- 1 | import { parse, SyntaxErr } from './parser'; 2 | 3 | function failWithErrs(errs: SyntaxErr[]) { 4 | console.error('Failed'); 5 | for(const err of errs){ 6 | console.error(err.toString()); 7 | } 8 | process.exit(1); 9 | } 10 | 11 | function failWithString(s: string) { 12 | console.error('Failed', s); 13 | process.exit(1); 14 | } 15 | 16 | console.log('First check the happy path:'); 17 | const t1 = parse('abc'); 18 | if(t1.errs.length > 0){ 19 | failWithErrs(t1.errs); 20 | } 21 | const t2 = parse('aabbcc'); 22 | if(t2.errs.length > 0){ 23 | failWithErrs(t1.errs); 24 | } 25 | const t3 = parse('aaabbbccc'); 26 | if(t3.errs.length > 0){ 27 | failWithErrs(t1.errs); 28 | } 29 | console.log('Happy path succeeded.'); 30 | console.log('Now check unhappy path'); 31 | if(parse('ab').errs.length === 0){ 32 | failWithString('"ab" Shouldn\'t succeed'); 33 | } 34 | if(parse('bc').errs.length === 0){ 35 | failWithString('"bc" Shouldn\'t succeed'); 36 | } 37 | if(parse('aabc').errs.length === 0){ 38 | failWithString('"aabc" Shouldn\'t succeed'); 39 | } 40 | if(parse('aabbc').errs.length === 0){ 41 | failWithString('"aabbc" Shouldn\'t succeed'); 42 | } 43 | if(parse('aabbccc').errs.length === 0){ 44 | failWithString('"aabccc" Shouldn\'t succeed'); 45 | } 46 | console.log('Unhappy path succeeded.'); 47 | -------------------------------------------------------------------------------- /demos/lookahead/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2016", 4 | "module": "nodenext", 5 | "strict": true, 6 | "outDir": "jsbuild" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import tslint from "typescript-eslint"; 2 | import jslint from "@eslint/js"; 3 | 4 | export default [ 5 | { 6 | ignores: ["**/node_modules", "**/tsbuild", "src/**/parser.ts", "src/meta.ts"], 7 | }, 8 | jslint.configs.recommended, 9 | ...tslint.configs.strict, 10 | { 11 | rules: { 12 | eqeqeq: ["error", "always"], 13 | "no-promise-executor-return": "error", 14 | "no-template-curly-in-string": "error", 15 | 16 | "no-else-return": ["error", { 17 | allowElseIf: false, 18 | }], 19 | 20 | "no-eval": "error", 21 | "no-implied-eval": "error", 22 | "no-loop-func": "error", 23 | "no-useless-concat": "error", 24 | "no-shadow": "error", 25 | 26 | "brace-style": ["warn", "1tbs", { 27 | allowSingleLine: true, 28 | }], 29 | 30 | "comma-spacing": "warn", 31 | "key-spacing": "warn", 32 | semi: ["error", "always"], 33 | "space-infix-ops": "warn", 34 | 35 | "sort-imports": ["warn", { 36 | ignoreCase: false, 37 | ignoreMemberSort: false, 38 | ignoreDeclarationSort: true, 39 | memberSyntaxSortOrder: ["none", "single", "multiple", "all"], 40 | allowSeparatedGroups: false, 41 | }], 42 | 43 | "arrow-spacing": "warn", 44 | "comma-dangle": ["warn", "always-multiline"], 45 | "no-var": "error", 46 | "prefer-const": "warn", 47 | }, 48 | }, 49 | { 50 | files: ["**/*.test.ts"], 51 | rules: { 52 | "@typescript-eslint/no-non-null-assertion": "off", 53 | }, 54 | }, 55 | ]; 56 | -------------------------------------------------------------------------------- /gen-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | for dir in src/test/*; do 6 | if ! [[ -d ${dir} ]]; then 7 | continue 8 | fi 9 | if ! [[ -f ${dir}/grammar.peg ]]; then 10 | continue 11 | fi 12 | if [[ -e ${dir}/flags.txt ]]; then 13 | cat ${dir}/flags.txt | xargs -I {} node ./tsbuild/cli.js {} ${dir}/grammar.peg ${dir}/parser.ts 14 | else 15 | node ./tsbuild/cli.js ${dir}/grammar.peg ${dir}/parser.ts 16 | fi 17 | done; 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tspeg", 3 | "version": "3.3.2", 4 | "homepage": "https://github.com/EoinDavey/tsPEG", 5 | "description": "TypeScript parser generator", 6 | "author": "Eoin Davey ", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/EoinDavey/tsPEG.git" 10 | }, 11 | "main": "index.js", 12 | "scripts": { 13 | "gen-tests": "./gen-tests.sh", 14 | "build": "tsc && npm run gen && tsc", 15 | "gen": "cp src/meta.ts /tmp/meta.ts && node ./tsbuild/cli.js src/metagrammar.peg src/meta.ts", 16 | "test": "./gen-tests.sh && jest && npm run lint-all -- --max-warnings=0", 17 | "clean": "rm -rf tsbuild", 18 | "lint": "eslint", 19 | "lint-all": "eslint src/" 20 | }, 21 | "license": "MPL-2.0", 22 | "files": [ 23 | "tsbuild/*.js" 24 | ], 25 | "devDependencies": { 26 | "@eslint/js": "^9.14.0", 27 | "@types/jest": "^29.5.0", 28 | "@types/node": "^22.9.0", 29 | "@types/yargs": "^17.0.3", 30 | "eslint": "^9.14.0", 31 | "jest": "^29.5.0", 32 | "setanta": "^0.10.0", 33 | "ts-jest": "^29.0.5", 34 | "typescript": "^5.0.2", 35 | "typescript-eslint": "^8.13.0" 36 | }, 37 | "bin": { 38 | "tspeg": "tsbuild/cli.js" 39 | }, 40 | "jest": { 41 | "preset": "ts-jest", 42 | "testPathIgnorePatterns": [ 43 | "/node_modules/", 44 | "^.+\\.js$", 45 | "/demos/" 46 | ] 47 | }, 48 | "dependencies": { 49 | "yargs": "^17.1.1" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/checks.ts: -------------------------------------------------------------------------------- 1 | import { ASTKinds, PosInfo } from "./meta"; 2 | import { Grammar, altNames } from "./util"; 3 | 4 | // TODO Support returning multiple CheckErrors 5 | 6 | export class CheckError extends Error { 7 | constructor(public s: string, public pos?: PosInfo) { 8 | super(s); 9 | this.name = "CheckError"; 10 | this.message = pos 11 | ? `Error at line ${pos.line}:${pos.offset}: ${s}` 12 | : `Error: ${s}`; 13 | } 14 | } 15 | 16 | export interface Checker { 17 | Check(g: Grammar, input: string): CheckError | null; 18 | } 19 | 20 | const bannedNames: Set = new Set(['kind']); 21 | export const BannedNamesChecker: Checker = { 22 | Check: (g: Grammar): CheckError | null => { 23 | for(const ruledef of g) { 24 | for(const alt of ruledef.rule) { 25 | for(const matchspec of alt.matches) { 26 | if(!matchspec.named) 27 | continue; 28 | if(bannedNames.has(matchspec.named.name)) 29 | return new CheckError(`'${matchspec.named.name}' is not` + 30 | ' an allowed match name', matchspec.named.start); 31 | } 32 | } 33 | } 34 | return null; 35 | }, 36 | }; 37 | 38 | // Check that all referenced rule name exist 39 | export const RulesExistChecker: Checker = { 40 | Check: (g: Grammar): CheckError | null => { 41 | const ruleNames: Set = new Set(); 42 | for(const ruledef of g) 43 | ruleNames.add(ruledef.name); 44 | for(const ruledef of g) { 45 | for(const alt of ruledef.rule) { 46 | for(const match of alt.matches) { 47 | if(match.rule.kind === ASTKinds.SPECIAL) 48 | continue; 49 | const at = match.rule.pre.at; 50 | if(at.kind !== ASTKinds.ATOM_1) 51 | continue; 52 | if(!ruleNames.has(at.name)) 53 | return new CheckError(`'Rule '${at.name}' is not defined`, at.start); 54 | } 55 | } 56 | } 57 | return null; 58 | }, 59 | }; 60 | 61 | // get the correct rule collision name error, based on the 62 | // name of the rule, if the rule is called `_`, then 63 | // we know that this is a collision of rule names and alternative names 64 | // (It is possible that this could be triggered falsely, but they would 65 | // have to declare >= 2 rules, both called `_`, for same N, 66 | // which I don't worry about 67 | function ruleCollisionNameErr(ruleName: string) : CheckError { 68 | const match = ruleName.match(/^(.*)_([0-9])+$/); 69 | if(match === null) 70 | return new CheckError(`Rule already defined: "${ruleName}"`); 71 | const baseRule = match[1]; 72 | const index = match[2]; 73 | return new CheckError(`Rule "${baseRule}" declared with >= ${index} alternatives and rule "${ruleName}" should not both be declared`); 74 | } 75 | 76 | export const NoRuleNameCollisionChecker: Checker = { 77 | Check: (g: Grammar): CheckError | null => { 78 | const seen: Set = new Set(); 79 | for(const ruledef of g) { 80 | if(seen.has(ruledef.name)) 81 | return ruleCollisionNameErr(ruledef.name); 82 | 83 | // Stop after adding ruledef.name if === 1 alternative 84 | // as altNames(ruledef) will only contain ruledef.name 85 | seen.add(ruledef.name); 86 | if(ruledef.rule.length === 1) 87 | continue; 88 | for(const name of altNames(ruledef)) { 89 | if(seen.has(name)) 90 | return ruleCollisionNameErr(ruledef.name); 91 | seen.add(name); 92 | } 93 | } 94 | return null; 95 | }, 96 | }; 97 | 98 | const keywords: string[] = [ 99 | "break", "case", "catch", "class", "const", "continue", "debugger", 100 | "default", "delete", "do", "else", "enum", "export", "extends", 101 | "false", "finally", "for", "function", "if", "import", "in", 102 | "instanceof", "new", "null", "return", "super", "switch", "this", 103 | "throw", "true", "try", "typeof", "var", "void", "while", "with", 104 | "as", "implements", "interface", "let", "package", "private", 105 | "protected", "public", "static", "yield", 106 | "any", "boolean", "constructor", "declare", "get", "module", 107 | "require", "number", "set", "string", "symbol", "type", "from", "of", 108 | "object", 109 | ]; 110 | export const NoKeywords: Checker = { 111 | Check: (g: Grammar): CheckError | null => { 112 | for(const ruledef of g){ 113 | if(keywords.includes(ruledef.name)){ 114 | return new CheckError(`Rule name "${ruledef.name}" is a reserved Typescript keyword`, ruledef.pos); 115 | } 116 | } 117 | return null; 118 | }, 119 | }; 120 | -------------------------------------------------------------------------------- /src/cli.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as fs from "fs"; 4 | import * as yargs from "yargs"; 5 | import { SyntaxErrs, buildParser } from "./gen"; 6 | import { CheckError } from "./checks"; 7 | 8 | // TODO format syntax errors better 9 | 10 | function validateRegexFlags(regexFlags: string): void { 11 | for(const flag of regexFlags) 12 | if(!"gimus".includes(flag)) 13 | throw new Error(`--regex-flags must only contain valid regex flags: unexpected ${flag}`); 14 | } 15 | 16 | function validateIncludeGrammarFlag(includeGrammar: boolean, s: string): void { 17 | if(!includeGrammar) 18 | return; 19 | 20 | if(s.includes('*/')) 21 | throw new Error("--include-grammar-comment must be false when grammar contains the sequence '*/'"); 22 | } 23 | 24 | yargs.command("$0 [output_file]", "Build parser from grammar", 25 | _yargs => { 26 | _yargs.positional('grammar', { 27 | describe: 'Grammar input file', 28 | type: 'string', 29 | }).positional('output_file', { 30 | describe: 'Output file: If provided write the generated parser to this file, otherwise output to stdout.', 31 | type: 'string', 32 | }); 33 | return _yargs.options({ 34 | "num-enums": { 35 | type: "boolean", 36 | default: false, 37 | desc: "Use numeric enums for AST kinds", 38 | }, 39 | "enable-memo": { 40 | type: "boolean", 41 | default: false, 42 | desc: "Enable memoisation, get better performance for increased memory usage", 43 | }, 44 | "include-grammar-comment": { 45 | type: "boolean", 46 | default: true, 47 | desc: "Include the input grammar as a comment at the start of the parser file.", 48 | }, 49 | "regex-flags": { 50 | type: "string", 51 | default: "", 52 | desc: "Additional regex flags to be supplied to regex literals. e.g. " + 53 | "--regex-flags=u will enable unicode support", 54 | }, 55 | }); 56 | }, 57 | argv => { 58 | const grammarFile = argv.grammar as string; 59 | const outputFile = argv.output_file as string | undefined; 60 | const regexFlags = argv["regex-flags"]; 61 | const includeGrammar = argv["include-grammar-comment"]; 62 | try { 63 | validateRegexFlags(regexFlags); 64 | const inGram = fs.readFileSync(grammarFile, { encoding: "utf8" }); 65 | validateIncludeGrammarFlag(includeGrammar, inGram); 66 | const parser = buildParser(inGram, argv["num-enums"], argv["enable-memo"], regexFlags, includeGrammar); 67 | if(outputFile !== undefined) { 68 | fs.writeFileSync(outputFile, parser); 69 | } else { 70 | process.stdout.write(parser); 71 | } 72 | } catch(err) { 73 | process.exitCode = 1; 74 | if(err instanceof CheckError) { 75 | console.error(err.message); 76 | } else if(err instanceof SyntaxErrs) { 77 | for(const se of err.errs) 78 | console.log(se.toString()); 79 | } else { 80 | console.error(err); 81 | } 82 | } 83 | }) 84 | .strict() 85 | .scriptName("tspeg") 86 | .help() 87 | .parse(); 88 | -------------------------------------------------------------------------------- /src/leftrec.ts: -------------------------------------------------------------------------------- 1 | import { Grammar, Rule, assertValidRegex, getRuleFromGram } from "./util"; 2 | import { ASTKinds, ATOM, MATCH } from "./meta"; 3 | import { CheckError } from "./checks"; 4 | 5 | 6 | /* 7 | * As there are cyclic dependencies between rules we use the concept of a context 8 | * to compute which ATOM's are nullable, a context is a set of ATOM's we currently consider 9 | * nullable, we then incrementally update this context until we reach a fixed point 10 | */ 11 | 12 | // ruleIsNullableInCtx returns if a given rule is nullable within the given context 13 | export function ruleIsNullableInCtx(r: Rule, nullableAtoms: Set): boolean { 14 | for(const alt of r) { 15 | let allNullable = true; 16 | for(const matchspec of alt.matches) 17 | if(!matchIsNullableInCtx(matchspec.rule, nullableAtoms)) 18 | allNullable = false; 19 | if(allNullable) 20 | return true; 21 | } 22 | return false; 23 | } 24 | 25 | // matchIsNullableInCtx returns if a given `MATCH` is nullable within the given context 26 | function matchIsNullableInCtx(match: MATCH, nullableAtoms: Set): boolean { 27 | if(match.kind === ASTKinds.SPECIAL) 28 | return true; 29 | // match is a POSTOP 30 | 31 | // match is nullable if these are the postops 32 | if(match.op?.kind === ASTKinds.POSTOP_$0_1 && (match.op.op === "?" || match.op.op === "*")) 33 | return true; 34 | const preop = match.pre; 35 | // Negations of nullables are invalid grammar expressions 36 | if(preop.op === "!" && nullableAtoms.has(preop.at)) 37 | throw new CheckError("Cannot negate a nullable expression", preop.start); 38 | // Always nullable, doesn't match anything 39 | if(preop.op !== null) 40 | return true; 41 | if(nullableAtoms.has(preop.at)) 42 | return true; 43 | return false; 44 | } 45 | 46 | // This function updates our nullableAtoms context to the next step of the iterative process 47 | function updateNullableAtomsInRule(rule: Rule, gram: Grammar, nullableAtoms: Set) { 48 | for(const alt of rule) { 49 | for(const matchspec of alt.matches) { 50 | const match = matchspec.rule; 51 | if(match.kind === ASTKinds.SPECIAL) 52 | continue; 53 | const at = match.pre.at; 54 | // Already in set, ignore 55 | if(nullableAtoms.has(at)) 56 | continue; 57 | if(at.kind === ASTKinds.ATOM_1) { 58 | // Rule reference, get rule and add to set if rule is nullable 59 | const namedRule = getRuleFromGram(gram, at.name); 60 | if(namedRule === null) 61 | continue; 62 | if(ruleIsNullableInCtx(namedRule.rule, nullableAtoms)) 63 | nullableAtoms.add(at); 64 | } 65 | if(at.kind === ASTKinds.ATOM_2) { 66 | // Regex literal, we just test if "" matches to test if nullable 67 | assertValidRegex(at.match.val, at.match.start); 68 | const reg = new RegExp(at.match.val); 69 | if(reg.test("")) // Is nullable 70 | nullableAtoms.add(at); 71 | } 72 | if(at.kind === ASTKinds.ATOM_3 && ruleIsNullableInCtx(at.sub.list, nullableAtoms)) 73 | // Subrule, recurse 74 | nullableAtoms.add(at); 75 | } 76 | } 77 | } 78 | 79 | // Compute all nullable atoms, start with empty context and then iteratively expand 80 | // the set until a fixed point is reached. 81 | export function nullableAtomSet(gram: Grammar): Set { 82 | // Inefficient approach but it doesn't matter 83 | const nullable: Set = new Set(); 84 | for(;;) { 85 | const oldSize = nullable.size; 86 | for(const ruledef of gram) 87 | updateNullableAtomsInRule(ruledef.rule, gram, nullable); 88 | const newSize = nullable.size; 89 | if(newSize === oldSize) 90 | break; 91 | } 92 | return nullable; 93 | } 94 | 95 | // leftRecEdges returns a set of Rule names that a given Rule calls "on the left" 96 | // (with a given nullable atoms context). 97 | function leftRecEdges(r: Rule, nullableAtoms: Set): Set { 98 | const out: Set = new Set(); 99 | for(const alt of r) { 100 | // Loop as long as matches are nullable 101 | for(const matchspec of alt.matches) { 102 | const mtch = matchspec.rule; 103 | // Pos matches don't need searching 104 | if(mtch.kind === ASTKinds.SPECIAL) 105 | continue; 106 | const at = mtch.pre.at; 107 | if(at.kind === ASTKinds.ATOM_1) 108 | out.add(at.name); 109 | if(at.kind === ASTKinds.ATOM_3) 110 | for(const edge of leftRecEdges(at.sub.list, nullableAtoms)) 111 | out.add(edge); 112 | // Break if no longer nullable 113 | if(!matchIsNullableInCtx(mtch, nullableAtoms)) 114 | break; 115 | } 116 | } 117 | return out; 118 | } 119 | 120 | // leftRecGraph returns a graph object containing all direct left recursion edges. 121 | // (with a given nullable atoms context). 122 | function leftRecGraph(gram: Grammar, nullableAtoms: Set): Map> { 123 | return new Map(gram.map(r => [r.name, leftRecEdges(r.rule, nullableAtoms)])); 124 | } 125 | 126 | // leftRecClosure uses the left recursion graph to extend direct rule references to a graph 127 | // with all indirect references. 128 | function leftRecClosure(gram: Grammar, nullableAtoms: Set): Map> { 129 | const grph = leftRecGraph(gram, nullableAtoms); 130 | return transitiveClosure(grph); 131 | } 132 | 133 | // transitiveClosure implements Floyd Warshall algorithm 134 | function transitiveClosure(grph: Map>): Map> { 135 | for(const [kName, kEdges] of grph.entries()) 136 | for(const aEdges of grph.values()) 137 | for(const bName of grph.keys()) 138 | if(aEdges.has(kName) && kEdges.has(bName)) 139 | aEdges.add(bName); 140 | return grph; 141 | } 142 | 143 | // leftRecRules returns all left recursive rules within a grammar. 144 | export function leftRecRules(g: Grammar): Set { 145 | const s: Set = new Set(); 146 | const nullAtoms = nullableAtomSet(g); 147 | const cls = leftRecClosure(g, nullAtoms); 148 | for(const [k, v] of cls.entries()) 149 | if(v.has(k)) 150 | s.add(k); 151 | return s; 152 | } 153 | 154 | // cycleEq checks if two cycles are equal (two equivalent cycles 155 | // can be shifted around by some amount). 156 | function cycleEq(a: string[], b: string[]): boolean { 157 | if(a.length !== b.length) 158 | return false; 159 | const bOffset = b.indexOf(a[0]); 160 | if(bOffset === -1) 161 | return false; 162 | for(let i = 0; i < a.length; ++i) 163 | if(a[i] !== b[(bOffset + i) % b.length]) 164 | return false; 165 | return true; 166 | } 167 | 168 | // addCycle adds a cycle to the cycles list if it's not present. 169 | function addCycle(cycles: string[][], cyc: string[]) { 170 | for(const c of cycles) 171 | if(cycleEq(c, cyc)) 172 | return; 173 | cycles.push(cyc); 174 | } 175 | 176 | // leftRecCycles returns all left recursion cycles in a given grammar 177 | // (within a given nullable atom context). 178 | export function leftRecCycles(gram: Grammar, nullableAtoms: Set): string[][] { 179 | const cycles: string[][] = []; 180 | const grph = leftRecGraph(gram, nullableAtoms); 181 | 182 | const vis: Set = new Set(); 183 | const seq: string[] = []; 184 | 185 | const cycleRec = (cur: string, tgt: string) => { 186 | if(vis.has(cur)) { 187 | if(cur === tgt) 188 | addCycle(cycles, [...seq]); 189 | return; 190 | } 191 | const edges = grph.get(cur); 192 | if(edges === undefined) 193 | return; 194 | vis.add(cur); 195 | seq.push(cur); 196 | for(const k of edges) 197 | cycleRec(k, tgt); 198 | vis.delete(cur); 199 | seq.pop(); 200 | }; 201 | 202 | for(const g of gram) { 203 | vis.clear(); 204 | cycleRec(g.name, g.name); 205 | } 206 | return cycles; 207 | } 208 | 209 | // disjointCycleSets uses UFDS algorithm to compute disjoint sets of 210 | // left recursive cycles, this is to break the marking of single elements down 211 | // into simpler subproblems if possible. 212 | export function disjointCycleSets(cycles: string[][]): string[][][] { 213 | const p: Map = new Map(); 214 | 215 | const find = (a: string[]): string[] => { 216 | const pa = p.get(a) ?? a; 217 | const res = pa === a ? a : find(pa); 218 | p.set(a, res); 219 | return res; 220 | }; 221 | 222 | const union = (a: string[], b: string[]) => { 223 | p.set(find(a), find(b)); 224 | }; 225 | 226 | for(const a of cycles) { 227 | const sa = new Set(a); 228 | for(const b of cycles) { 229 | const sb = new Set(b); 230 | if([...a, ...b].filter(x => sa.has(x) && sb.has(x)).length !== 0) 231 | union(a, b); 232 | } 233 | } 234 | 235 | const sets: string[][][] = []; 236 | for(const a of cycles) { 237 | if(p.get(a) !== a) 238 | continue; 239 | const st: string[][] = []; 240 | for(const b of cycles) 241 | if(find(b) === a) 242 | st.push(b); 243 | sets.push(st); 244 | } 245 | 246 | return sets; 247 | } 248 | 249 | // getRulesToMarkForBoundedRecursion takes a grammar and returns a Set of rule names 250 | // that should be computed with bounded recursion memoisation. 251 | // Bounded recursion memoisation logic is used to support left recursion, however it 252 | // only works if exactly one rule in each left recursion cycle implements it. 253 | // This function brute forces the assignment of marked rules to find a suitable 254 | // assignment. 255 | export function getRulesToMarkForBoundedRecursion(g: Grammar): Set { 256 | const marked: Set = new Set(); 257 | 258 | const nullAtoms = nullableAtomSet(g); 259 | const cycles = leftRecCycles(g, nullAtoms); 260 | const sets = disjointCycleSets(cycles); 261 | 262 | // Loop over all subproblems (disjoint sets of cycles) 263 | for(const st of sets) { 264 | // All rules used in the set 265 | const allRulesSet: Set = new Set(st.reduce((x, y) => x.concat(y))); 266 | const allRules = [...allRulesSet]; 267 | const sz = allRules.length; 268 | // Check that left recursion sets are small enough to brute force 269 | // 2^18 == 262144 270 | if(sz > 18) 271 | throw new CheckError("Left recursion is too complex to solve"); 272 | 273 | // Brute force all subsets 274 | const lim = 1 << sz; 275 | for(let subsetIdx = 0; subsetIdx < lim; ++subsetIdx) { 276 | // Check that each cycle in st has exactly one rule in subset 277 | 278 | const subst: Set = new Set(); 279 | for(let i = 0; i < sz; ++i) 280 | if((subsetIdx & (1 << i)) !== 0) 281 | subst.add(allRules[i]); 282 | 283 | // Check that all cycles have exactly one marked rule. 284 | const success = st.every(cyc => cyc.filter(x => subst.has(x)).length === 1); 285 | 286 | if(!success) 287 | continue; 288 | 289 | // Assignment found for st 290 | for(const rule of subst) 291 | marked.add(rule); 292 | break; 293 | } 294 | } 295 | return marked; 296 | } 297 | -------------------------------------------------------------------------------- /src/metagrammar.peg: -------------------------------------------------------------------------------- 1 | // Meta grammar for parser 2 | 3 | GRAM := header=HDR? rules=RULEDEF+ $ 4 | HDR := '---' content='((?!---)(.|\r\n|\n))*' '---' 5 | RULEDEF := _ namestart=@ name=NAME nameend=@ _ ':=' _ rule=RULE _ 6 | RULE := head=ALT tail={_ '\|' _ alt=ALT }* 7 | .list = ALT[] { return [this.head, ...this.tail.map((x) => x.alt)]; } 8 | ALT := matches=MATCHSPEC+ attrs=ATTR* 9 | MATCHSPEC := _ named={start=@ name=NAME _ '=' _}? rule=MATCH // TODO rename to match 10 | MATCH := SPECIAL | POSTOP 11 | SPECIAL := op='@' 12 | POSTOP := pre=PREOP op={ op='\+|\*|\?' | RANGESPEC }? 13 | .optional = boolean { return this.op?.kind === ASTKinds.POSTOP_$0_1 && this.op.op === '?';} 14 | PREOP := start=@ op='\&|!'? at=ATOM 15 | // Negative lookahead is used here to allow no requirement for semicolons 16 | // to denote end of rule definition 17 | ATOM := start=@ name=NAME !'\s*:=' 18 | | match=STRLIT 19 | | '{' _ sub=RULE _ '}' 20 | .name = string | null { return null; } 21 | | EOF 22 | EOF := symb='\$' 23 | ATTR := _ '\.' name=NAME _ '=' _ type=TS_TYPE _ code=CODE_SECTION 24 | NAME := '[a-zA-Z_][a-zA-Z0-9_]*' 25 | STRLIT := start=@ '\'' val='([^\'\\]|(\\.))*' '\'' mods='[mius]*' 26 | RANGESPEC := '\[\s*' a='[0-9]+' u={',\s*' b='[0-9]+'? }? '\s*\]' 27 | .a_val = number { return parseInt(this.a); } 28 | .b_val = number | null { return this.u !== null && this.u.b !== null ? parseInt(this.u.b) : null; } 29 | .lb = number { return this.a_val; } 30 | .ub = number { return this.b_val ?? (this.u !== null ? -1 : this.lb); } 31 | 32 | // Whitespace definition includes traditional whitespace 33 | // and // comments. 34 | _ := '(?:\s|(?:\/\/.*(?:\r\n|\n|$)))*' 35 | 36 | // Grammar to match TypeScript type defs 37 | 38 | TS_TYPE := _ start=@ { TS_FUNCTION | TS_CONSTRUCTOR | TS_EXPR } end=@ 39 | 40 | TS_EXPR := _ TS_PRIM {_ '[&|]' TS_PRIM }* 41 | TS_PRIM := { 42 | '\(' _ TS_TYPE _ '\)' 43 | | TS_TYPE_QUERY 44 | | TS_TYPE_REF 45 | | TS_PROPERTY_NAME 46 | | '\{' {_ TS_TYPE_MEMBER {_ '[;,]' _ TS_TYPE_MEMBER }* _ '[;,]?' }? _ '\}' 47 | | '\[' _ { _ TS_TYPE {_ ',' _ TS_TYPE}* }? _ '\]' 48 | } '\[\]'* // Optional trailing []s for array type 49 | 50 | TS_TYPE_REF := _ NAME {'\.' NAME}* {_ TS_GENERIC_ARGS}? 51 | TS_TYPE_QUERY := _ 'typeof' &_ _ NAME {'\.' NAME}* 52 | 53 | TS_FUNCTION := _ TS_GENERIC_PARAMS? _ '\(' _ TS_PARAM_LIST? _ '\)' _ '=>' _ TS_TYPE 54 | TS_CONSTRUCTOR := _ 'new' _ TS_FUNCTION 55 | 56 | TS_GENERIC_PARAMS := _ '<' _ {TS_GENERIC_PARAM {_ ',' _ TS_GENERIC_PARAM}* }? _ '>' 57 | TS_GENERIC_PARAM := _ NAME {_ 'extends' _ TS_TYPE}? 58 | TS_GENERIC_ARGS := _ '<' _ {TS_TYPE {_ ',' _ TS_TYPE}* }? _ '>' 59 | 60 | TS_PARAM_LIST := _ TS_REQUIRED_PARAMS {_ ',' _ TS_OPTIONAL_PARAMS}? {_ ',' _ TS_REST_PARAM}? 61 | | _ TS_OPTIONAL_PARAMS {_ ',' _ TS_REST_PARAM}? 62 | | _ TS_REST_PARAM 63 | 64 | TS_REQUIRED_PARAMS := _ TS_REQUIRED_PARAM {_ ',' _ TS_REQUIRED_PARAM}* 65 | TS_REQUIRED_PARAM := _ NAME _ ':' _ TS_TYPE 66 | 67 | TS_OPTIONAL_PARAMS := _ TS_OPTIONAL_PARAM _ {',' _ TS_OPTIONAL_PARAM}* 68 | TS_OPTIONAL_PARAM := _ NAME '\?' _ ':' _ TS_TYPE 69 | 70 | TS_REST_PARAM := _ '\.\.\.' _ NAME _ ':' _ TS_TYPE 71 | 72 | TS_TYPE_MEMBER := TS_PROPERTY_NAME '\??' _ ':' _ TS_TYPE 73 | | _ TS_GENERIC_PARAMS? _ '\(' _ TS_PARAM_LIST? _ '\)' _ ':' _ TS_TYPE 74 | | _ 'new' &_ TS_GENERIC_PARAMS? _ '\(' _ TS_PARAM_LIST? _ '\)' _ ':' _ TS_TYPE 75 | | _ '\[' _ NAME _ ':' _ NAME _ '\]' _ ':' _ TS_TYPE 76 | | _ NAME '\??' _ TS_GENERIC_PARAMS? _ '\(' _ TS_PARAM_LIST? _ '\)' _ ':' _ TS_TYPE 77 | 78 | TS_PROPERTY_NAME := NAME | TS_STRING | TS_NUM 79 | TS_STRING := '"' val='([^"\\]|(\\.))*' '"' 80 | | '\'' val='([^\'\\]|(\\.))*' '\'' 81 | | '`' val='([^`\\]|(\\.))*' '`' 82 | TS_NUM := '-?[0-9]+(?:\.[0-9]+)?' 83 | 84 | // Grammar to match code section without escaped braces 85 | // Logic is based off braces can only appear without matching brace in strings. 86 | 87 | CODE_SECTION := _ '\{' start=@ CODE_REC? end=@ _ '\}' 88 | CODE_REC := { '[^{}\'"`]+' | TS_STRING | '\{' CODE_REC _ '\}' }* 89 | -------------------------------------------------------------------------------- /src/rules.ts: -------------------------------------------------------------------------------- 1 | import { atomType, preType } from "./types"; 2 | import { ASTKinds, ATOM, MATCH, PREOP, PosInfo } from "./meta"; 3 | import { Rule, Ruledef, assertValidRegex, escapeBackticks } from "./util"; 4 | 5 | export function matchRule(expr: MATCH): string { 6 | // Check if special rule 7 | if (expr.kind === ASTKinds.SPECIAL) 8 | return "this.mark()"; 9 | if (expr.op === null) 10 | return preRule(expr.pre); 11 | if (expr.op.kind === ASTKinds.RANGESPEC) 12 | return `this.loop<${preType(expr.pre)}>(() => ${preRule(expr.pre)}, ${expr.op.lb}, ${expr.op.ub})`; 13 | if (expr.op.kind === ASTKinds.POSTOP_$0_1 && expr.op.op === "*") 14 | return `this.loop<${preType(expr.pre)}>(() => ${preRule(expr.pre)}, 0, -1)`; 15 | if (expr.op.kind === ASTKinds.POSTOP_$0_1 && expr.op.op === "+") 16 | return `this.loopPlus<${preType(expr.pre)}>(() => ${preRule(expr.pre)})`; 17 | return preRule(expr.pre); 18 | } 19 | 20 | export function preRule(expr: PREOP): string { 21 | if (expr.op && expr.op === "&") 22 | return `this.noConsume<${atomType(expr.at)}>(() => ${atomRule(expr.at)})`; 23 | if (expr.op && expr.op === "!") 24 | return `this.negate(() => ${atomRule(expr.at)})`; 25 | return atomRule(expr.at); 26 | } 27 | 28 | export function atomRule(at: ATOM): string { 29 | if (at.kind === ASTKinds.ATOM_1) 30 | return `this.match${at.name}($$dpth + 1, $$cr)`; 31 | if(at.kind === ASTKinds.EOF) 32 | return 'this.match$EOF($$cr)'; 33 | if (at.kind === ASTKinds.ATOM_2) { 34 | // Ensure the regex is valid 35 | const mtch = at.match; 36 | assertValidRegex(mtch.val); 37 | const reg = "(?:" + mtch.val + ")"; 38 | return `this.regexAccept(String.raw\`${escapeBackticks(reg)}\`, "${mtch.mods}", $$dpth + 1, $$cr)`; 39 | } 40 | const subname = at.name; 41 | if (subname) 42 | return `this.match${subname}($$dpth + 1, $$cr)`; 43 | return "ERR"; 44 | } 45 | 46 | // extractRule does a traversal of the AST assigning names to 47 | // subrules. It takes subrules and assigns 48 | // them their own Ruledef in the grammar, effectively flattening the 49 | // structure of the grammar. 50 | export function extractRules(rule: Rule, name: string, pos?: PosInfo): Ruledef[] { 51 | let cnt = 0; 52 | const rules: Ruledef[] = [{name, rule, pos}]; 53 | for (const alt of rule) { 54 | for (const match of alt.matches) { 55 | // Check if special rule 56 | if(match.rule.kind === ASTKinds.SPECIAL) 57 | continue; 58 | // Check if not a subrule 59 | const at = match.rule.pre.at; 60 | if (at === null || at.kind !== ASTKinds.ATOM_3) 61 | continue; 62 | const subrule = at.sub; 63 | const nm = `${name}_$${cnt}`; 64 | at.name = nm; 65 | const rdfs = extractRules(subrule.list, nm); 66 | rules.push(...rdfs); 67 | ++cnt; 68 | } 69 | } 70 | return rules; 71 | } 72 | -------------------------------------------------------------------------------- /src/template.ts: -------------------------------------------------------------------------------- 1 | import { Block } from "./util"; 2 | 3 | export interface TemplateOpts { 4 | inputStr: string, 5 | header: Block, 6 | memos: Block, 7 | memoClearFn: Block, 8 | kinds: Block, 9 | regexFlags: string, 10 | ruleClasses: Block, 11 | ruleParseFns: Block, 12 | parseResult: Block, 13 | usesEOF?: boolean, 14 | includeGrammar?: boolean, 15 | } 16 | 17 | export function expandTemplate(opts: TemplateOpts): Block { 18 | return [ 19 | "/* AutoGenerated Code, changes may be overwritten", 20 | ...opts.includeGrammar 21 | ? [ 22 | "* INPUT GRAMMAR:", 23 | ...opts.inputStr.split(/\r?\n/) 24 | .filter(x => x !== "") 25 | .map(x => "* " + x), 26 | ] 27 | : [], 28 | "*/", 29 | ...opts.header, 30 | "type Nullable = T | null;", 31 | "type $$RuleType = () => Nullable;", 32 | "export interface ASTNodeIntf {", 33 | [ 34 | "kind: ASTKinds;", 35 | ], 36 | "}", 37 | ...opts.kinds, 38 | ...opts.ruleClasses, 39 | "export class Parser {", 40 | [ 41 | "private readonly input: string;", 42 | "private pos: PosInfo;", 43 | "private negating: boolean = false;", 44 | "private memoSafe: boolean = true;", 45 | "constructor(input: string) {", 46 | [ 47 | "this.pos = {overallPos: 0, line: 1, offset: 0};", 48 | "this.input = input;", 49 | ], 50 | "}", 51 | "public reset(pos: PosInfo) {", 52 | [ 53 | "this.pos = pos;", 54 | ], 55 | "}", 56 | "public finished(): boolean {", 57 | [ 58 | "return this.pos.overallPos === this.input.length;", 59 | ], 60 | "}", 61 | ...opts.memoClearFn, 62 | ...opts.memos, 63 | ...opts.ruleParseFns, 64 | "public mark(): PosInfo {", 65 | [ 66 | "return this.pos;", 67 | ], 68 | "}", 69 | "// @ts-ignore: loopPlus may not be called", 70 | "private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> {", 71 | [ 72 | "return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>;", 73 | ], 74 | "}", 75 | "private loop(func: $$RuleType, lb: number, ub: number): Nullable {", 76 | [ 77 | "const mrk = this.mark();", 78 | "const res: T[] = [];", 79 | "while (ub === -1 || res.length < ub) {", 80 | [ 81 | "const preMrk = this.mark();", 82 | "const t = func();", 83 | "if (t === null || this.pos.overallPos === preMrk.overallPos) {", 84 | [ 85 | "break;", 86 | ], 87 | "}", 88 | "res.push(t);", 89 | ], 90 | "}", 91 | "if (res.length >= lb) {", 92 | [ 93 | "return res;", 94 | ], 95 | "}", 96 | "this.reset(mrk);", 97 | "return null;", 98 | ], 99 | "}", 100 | "private run($$dpth: number, fn: $$RuleType): Nullable {", 101 | [ 102 | "const mrk = this.mark();", 103 | "const res = fn()", 104 | "if (res !== null)", 105 | [ 106 | "return res;", 107 | ], 108 | "this.reset(mrk);", 109 | "return null;", 110 | ], 111 | "}", 112 | "// @ts-ignore: choice may not be called", 113 | "private choice(fns: Array<$$RuleType>): Nullable {", 114 | [ 115 | "for (const f of fns) {", 116 | [ 117 | "const res = f();", 118 | "if (res !== null) {", 119 | [ 120 | "return res;", 121 | ], 122 | "}", 123 | ], 124 | "}", 125 | "return null;", 126 | ], 127 | "}", 128 | "private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable {", 129 | [ 130 | "return this.run(dpth,", 131 | [ 132 | "() => {", 133 | [ 134 | `const reg = new RegExp(match, "y${opts.regexFlags}" + mods);`, 135 | "const mrk = this.mark();", 136 | "reg.lastIndex = mrk.overallPos;", 137 | "const res = this.tryConsume(reg);", 138 | "if(cr) {", 139 | [ 140 | "cr.record(mrk, res, {", 141 | [ 142 | "kind: \"RegexMatch\",", 143 | "// We substring from 3 to len - 1 to strip off the", 144 | "// non-capture group syntax added as a WebKit workaround", 145 | "literal: match.substring(3, match.length - 1),", 146 | "negated: this.negating,", 147 | ], 148 | "});", 149 | ], 150 | "}", 151 | "return res;", 152 | ], 153 | "});", 154 | ], 155 | ], 156 | "}", 157 | "private tryConsume(reg: RegExp): Nullable {", 158 | [ 159 | "const res = reg.exec(this.input);", 160 | "if (res) {", 161 | [ 162 | "let lineJmp = 0;", 163 | "let lind = -1;", 164 | "for (let i = 0; i < res[0].length; ++i) {", 165 | [ 166 | "if (res[0][i] === \"\\n\") {", 167 | [ 168 | "++lineJmp;", 169 | "lind = i;", 170 | ], 171 | "}", 172 | ], 173 | "}", 174 | "this.pos = {", 175 | [ 176 | "overallPos: reg.lastIndex,", 177 | "line: this.pos.line + lineJmp,", 178 | "offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1)", 179 | ], 180 | "};", 181 | "return res[0];", 182 | ], 183 | "}", 184 | "return null;", 185 | ], 186 | "}", 187 | "// @ts-ignore: noConsume may not be called", 188 | "private noConsume(fn: $$RuleType): Nullable {", 189 | [ 190 | "const mrk = this.mark();", 191 | "const res = fn();", 192 | "this.reset(mrk);", 193 | "return res;", 194 | ], 195 | "}", 196 | "// @ts-ignore: negate may not be called", 197 | "private negate(fn: $$RuleType): Nullable {", 198 | [ 199 | "const mrk = this.mark();", 200 | "const oneg = this.negating;", 201 | "this.negating = !oneg;", 202 | "const res = fn();", 203 | "this.negating = oneg;", 204 | "this.reset(mrk);", 205 | "return res === null ? true : null;", 206 | ], 207 | "}", 208 | "// @ts-ignore: Memoise may not be used", 209 | "private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable {", 210 | [ 211 | "const $scope$pos = this.mark();", 212 | "const $scope$memoRes = memo.get($scope$pos.overallPos);", 213 | "if(this.memoSafe && $scope$memoRes !== undefined) {", 214 | "this.reset($scope$memoRes[1]);", 215 | "return $scope$memoRes[0];", 216 | "}", 217 | "const $scope$result = rule();", 218 | "if(this.memoSafe)", 219 | "memo.set($scope$pos.overallPos, [$scope$result, this.mark()]);", 220 | "return $scope$result;", 221 | ], 222 | "}", 223 | ...(opts.usesEOF 224 | ? ["private match$EOF(et?: ErrorTracker): Nullable<{kind: ASTKinds.$EOF}> {", 225 | [ 226 | "const res: {kind: ASTKinds.$EOF} | null = this.finished() ? { kind: ASTKinds.$EOF } : null;", 227 | "if(et)", 228 | [ 229 | "et.record(this.mark(), res, { kind: \"EOF\", negated: this.negating });", 230 | ], 231 | "return res;", 232 | ], 233 | "}", 234 | ] 235 | : []), 236 | ], 237 | "}", 238 | 239 | "export function parse(s: string): ParseResult {", 240 | [ 241 | "const p = new Parser(s);", 242 | "return p.parse();", 243 | ], 244 | "}", 245 | 246 | ...opts.parseResult, 247 | 248 | "export interface PosInfo {", 249 | [ 250 | "readonly overallPos: number;", 251 | "readonly line: number;", 252 | "readonly offset: number;", 253 | ], 254 | "}", 255 | "export interface RegexMatch {", 256 | [ 257 | "readonly kind: \"RegexMatch\";", 258 | "readonly negated: boolean;", 259 | "readonly literal: string;", 260 | ], 261 | "}", 262 | "export type EOFMatch = { kind: \"EOF\"; negated: boolean };", 263 | "export type MatchAttempt = RegexMatch | EOFMatch;", 264 | "export class SyntaxErr {", 265 | [ 266 | "public pos: PosInfo;", 267 | "public expmatches: MatchAttempt[];", 268 | "constructor(pos: PosInfo, expmatches: MatchAttempt[]) {", 269 | [ 270 | "this.pos = pos;", 271 | "this.expmatches = [...expmatches];", 272 | ], 273 | "}", 274 | "public toString(): string {", 275 | [ 276 | // eslint-disable-next-line no-template-curly-in-string 277 | "return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === \"EOF\" ? \" EOF\" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`;", 278 | ], 279 | "}", 280 | ], 281 | "}", 282 | "class ErrorTracker {", 283 | [ 284 | "private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1};", 285 | "private regexset: Set = new Set();", 286 | "private pmatches: MatchAttempt[] = [];", 287 | "public record(pos: PosInfo, result: any, att: MatchAttempt) {", 288 | [ 289 | "if ((result === null) === att.negated)", 290 | [ 291 | "return;", 292 | ], 293 | "if (pos.overallPos > this.mxpos.overallPos) {", 294 | [ 295 | "this.mxpos = pos;", 296 | "this.pmatches = [];", 297 | "this.regexset.clear()", 298 | ], 299 | "}", 300 | "if (this.mxpos.overallPos === pos.overallPos) {", 301 | [ 302 | "if(att.kind === \"RegexMatch\") {", 303 | [ 304 | "if(!this.regexset.has(att.literal))", 305 | [ 306 | "this.pmatches.push(att);", 307 | ], 308 | "this.regexset.add(att.literal);", 309 | ], 310 | "} else {", 311 | [ 312 | "this.pmatches.push(att);", 313 | ], 314 | "}", 315 | ], 316 | "}", 317 | ], 318 | "}", 319 | "public getErr(): SyntaxErr | null {", 320 | [ 321 | "if (this.mxpos.overallPos !== -1)", 322 | [ 323 | "return new SyntaxErr(this.mxpos, this.pmatches);", 324 | ], 325 | "return null;", 326 | ], 327 | "}", 328 | ], 329 | "}", 330 | ]; 331 | } 332 | -------------------------------------------------------------------------------- /src/test/alias_test/grammar.peg: -------------------------------------------------------------------------------- 1 | A := 'match something' 2 | .value = string { return 'test'; } 3 | B := A 4 | .value = number { return 1; } 5 | -------------------------------------------------------------------------------- /src/test/alias_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * A := 'match something' 4 | * .value = string { return 'test'; } 5 | * B := A 6 | * .value = number { return 1; } 7 | */ 8 | type Nullable = T | null; 9 | type $$RuleType = () => Nullable; 10 | export interface ASTNodeIntf { 11 | kind: ASTKinds; 12 | } 13 | export enum ASTKinds { 14 | A = "A", 15 | B = "B", 16 | } 17 | export class A { 18 | public kind: ASTKinds.A = ASTKinds.A; 19 | public value: string; 20 | constructor(){ 21 | this.value = ((): string => { 22 | return 'test'; 23 | })(); 24 | } 25 | } 26 | export class B { 27 | public kind: ASTKinds.B = ASTKinds.B; 28 | public value: number; 29 | constructor(){ 30 | this.value = ((): number => { 31 | return 1; 32 | })(); 33 | } 34 | } 35 | export class Parser { 36 | private readonly input: string; 37 | private pos: PosInfo; 38 | private negating: boolean = false; 39 | private memoSafe: boolean = true; 40 | constructor(input: string) { 41 | this.pos = {overallPos: 0, line: 1, offset: 0}; 42 | this.input = input; 43 | } 44 | public reset(pos: PosInfo) { 45 | this.pos = pos; 46 | } 47 | public finished(): boolean { 48 | return this.pos.overallPos === this.input.length; 49 | } 50 | public clearMemos(): void { 51 | } 52 | public matchA($$dpth: number, $$cr?: ErrorTracker): Nullable { 53 | return this.run($$dpth, 54 | () => { 55 | let $$res: Nullable = null; 56 | if (true 57 | && this.regexAccept(String.raw`(?:match something)`, "", $$dpth + 1, $$cr) !== null 58 | ) { 59 | $$res = new A(); 60 | } 61 | return $$res; 62 | }); 63 | } 64 | public matchB($$dpth: number, $$cr?: ErrorTracker): Nullable { 65 | return this.run($$dpth, 66 | () => { 67 | let $$res: Nullable = null; 68 | if (true 69 | && this.matchA($$dpth + 1, $$cr) !== null 70 | ) { 71 | $$res = new B(); 72 | } 73 | return $$res; 74 | }); 75 | } 76 | public test(): boolean { 77 | const mrk = this.mark(); 78 | const res = this.matchA(0); 79 | const ans = res !== null; 80 | this.reset(mrk); 81 | return ans; 82 | } 83 | public parse(): ParseResult { 84 | const mrk = this.mark(); 85 | const res = this.matchA(0); 86 | if (res) 87 | return {ast: res, errs: []}; 88 | this.reset(mrk); 89 | const rec = new ErrorTracker(); 90 | this.clearMemos(); 91 | this.matchA(0, rec); 92 | const err = rec.getErr() 93 | return {ast: res, errs: err !== null ? [err] : []} 94 | } 95 | public mark(): PosInfo { 96 | return this.pos; 97 | } 98 | // @ts-ignore: loopPlus may not be called 99 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 100 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 101 | } 102 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 103 | const mrk = this.mark(); 104 | const res: T[] = []; 105 | while (ub === -1 || res.length < ub) { 106 | const preMrk = this.mark(); 107 | const t = func(); 108 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 109 | break; 110 | } 111 | res.push(t); 112 | } 113 | if (res.length >= lb) { 114 | return res; 115 | } 116 | this.reset(mrk); 117 | return null; 118 | } 119 | private run($$dpth: number, fn: $$RuleType): Nullable { 120 | const mrk = this.mark(); 121 | const res = fn() 122 | if (res !== null) 123 | return res; 124 | this.reset(mrk); 125 | return null; 126 | } 127 | // @ts-ignore: choice may not be called 128 | private choice(fns: Array<$$RuleType>): Nullable { 129 | for (const f of fns) { 130 | const res = f(); 131 | if (res !== null) { 132 | return res; 133 | } 134 | } 135 | return null; 136 | } 137 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 138 | return this.run(dpth, 139 | () => { 140 | const reg = new RegExp(match, "y" + mods); 141 | const mrk = this.mark(); 142 | reg.lastIndex = mrk.overallPos; 143 | const res = this.tryConsume(reg); 144 | if(cr) { 145 | cr.record(mrk, res, { 146 | kind: "RegexMatch", 147 | // We substring from 3 to len - 1 to strip off the 148 | // non-capture group syntax added as a WebKit workaround 149 | literal: match.substring(3, match.length - 1), 150 | negated: this.negating, 151 | }); 152 | } 153 | return res; 154 | }); 155 | } 156 | private tryConsume(reg: RegExp): Nullable { 157 | const res = reg.exec(this.input); 158 | if (res) { 159 | let lineJmp = 0; 160 | let lind = -1; 161 | for (let i = 0; i < res[0].length; ++i) { 162 | if (res[0][i] === "\n") { 163 | ++lineJmp; 164 | lind = i; 165 | } 166 | } 167 | this.pos = { 168 | overallPos: reg.lastIndex, 169 | line: this.pos.line + lineJmp, 170 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 171 | }; 172 | return res[0]; 173 | } 174 | return null; 175 | } 176 | // @ts-ignore: noConsume may not be called 177 | private noConsume(fn: $$RuleType): Nullable { 178 | const mrk = this.mark(); 179 | const res = fn(); 180 | this.reset(mrk); 181 | return res; 182 | } 183 | // @ts-ignore: negate may not be called 184 | private negate(fn: $$RuleType): Nullable { 185 | const mrk = this.mark(); 186 | const oneg = this.negating; 187 | this.negating = !oneg; 188 | const res = fn(); 189 | this.negating = oneg; 190 | this.reset(mrk); 191 | return res === null ? true : null; 192 | } 193 | // @ts-ignore: Memoise may not be used 194 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 195 | const $scope$pos = this.mark(); 196 | const $scope$memoRes = memo.get($scope$pos.overallPos); 197 | if(this.memoSafe && $scope$memoRes !== undefined) { 198 | this.reset($scope$memoRes[1]); 199 | return $scope$memoRes[0]; 200 | } 201 | const $scope$result = rule(); 202 | if(this.memoSafe) 203 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 204 | return $scope$result; 205 | } 206 | } 207 | export function parse(s: string): ParseResult { 208 | const p = new Parser(s); 209 | return p.parse(); 210 | } 211 | export interface ParseResult { 212 | ast: Nullable; 213 | errs: SyntaxErr[]; 214 | } 215 | export interface PosInfo { 216 | readonly overallPos: number; 217 | readonly line: number; 218 | readonly offset: number; 219 | } 220 | export interface RegexMatch { 221 | readonly kind: "RegexMatch"; 222 | readonly negated: boolean; 223 | readonly literal: string; 224 | } 225 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 226 | export type MatchAttempt = RegexMatch | EOFMatch; 227 | export class SyntaxErr { 228 | public pos: PosInfo; 229 | public expmatches: MatchAttempt[]; 230 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 231 | this.pos = pos; 232 | this.expmatches = [...expmatches]; 233 | } 234 | public toString(): string { 235 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 236 | } 237 | } 238 | class ErrorTracker { 239 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 240 | private regexset: Set = new Set(); 241 | private pmatches: MatchAttempt[] = []; 242 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 243 | if ((result === null) === att.negated) 244 | return; 245 | if (pos.overallPos > this.mxpos.overallPos) { 246 | this.mxpos = pos; 247 | this.pmatches = []; 248 | this.regexset.clear() 249 | } 250 | if (this.mxpos.overallPos === pos.overallPos) { 251 | if(att.kind === "RegexMatch") { 252 | if(!this.regexset.has(att.literal)) 253 | this.pmatches.push(att); 254 | this.regexset.add(att.literal); 255 | } else { 256 | this.pmatches.push(att); 257 | } 258 | } 259 | } 260 | public getErr(): SyntaxErr | null { 261 | if (this.mxpos.overallPos !== -1) 262 | return new SyntaxErr(this.mxpos, this.pmatches); 263 | return null; 264 | } 265 | } -------------------------------------------------------------------------------- /src/test/alias_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from './parser'; 2 | test(`dummy test`, () => { 3 | parse('a'); 4 | }); 5 | -------------------------------------------------------------------------------- /src/test/bounded_matches_test/grammar.peg: -------------------------------------------------------------------------------- 1 | S := { 'a'[2] | 'b'[3,] | 'c'[ 3, 5 ] } $ 2 | -------------------------------------------------------------------------------- /src/test/bounded_matches_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * S := { 'a'[2] | 'b'[3,] | 'c'[ 3, 5 ] } $ 4 | */ 5 | type Nullable = T | null; 6 | type $$RuleType = () => Nullable; 7 | export interface ASTNodeIntf { 8 | kind: ASTKinds; 9 | } 10 | export enum ASTKinds { 11 | S = "S", 12 | S_$0_1 = "S_$0_1", 13 | S_$0_2 = "S_$0_2", 14 | S_$0_3 = "S_$0_3", 15 | $EOF = "$EOF", 16 | } 17 | export interface S { 18 | kind: ASTKinds.S; 19 | } 20 | export type S_$0 = S_$0_1 | S_$0_2 | S_$0_3; 21 | export type S_$0_1 = string[]; 22 | export type S_$0_2 = string[]; 23 | export type S_$0_3 = string[]; 24 | export class Parser { 25 | private readonly input: string; 26 | private pos: PosInfo; 27 | private negating: boolean = false; 28 | private memoSafe: boolean = true; 29 | constructor(input: string) { 30 | this.pos = {overallPos: 0, line: 1, offset: 0}; 31 | this.input = input; 32 | } 33 | public reset(pos: PosInfo) { 34 | this.pos = pos; 35 | } 36 | public finished(): boolean { 37 | return this.pos.overallPos === this.input.length; 38 | } 39 | public clearMemos(): void { 40 | } 41 | public matchS($$dpth: number, $$cr?: ErrorTracker): Nullable { 42 | return this.run($$dpth, 43 | () => { 44 | let $$res: Nullable = null; 45 | if (true 46 | && this.matchS_$0($$dpth + 1, $$cr) !== null 47 | && this.match$EOF($$cr) !== null 48 | ) { 49 | $$res = {kind: ASTKinds.S, }; 50 | } 51 | return $$res; 52 | }); 53 | } 54 | public matchS_$0($$dpth: number, $$cr?: ErrorTracker): Nullable { 55 | return this.choice([ 56 | () => this.matchS_$0_1($$dpth + 1, $$cr), 57 | () => this.matchS_$0_2($$dpth + 1, $$cr), 58 | () => this.matchS_$0_3($$dpth + 1, $$cr), 59 | ]); 60 | } 61 | public matchS_$0_1($$dpth: number, $$cr?: ErrorTracker): Nullable { 62 | return this.loop(() => this.regexAccept(String.raw`(?:a)`, "", $$dpth + 1, $$cr), 2, 2); 63 | } 64 | public matchS_$0_2($$dpth: number, $$cr?: ErrorTracker): Nullable { 65 | return this.loop(() => this.regexAccept(String.raw`(?:b)`, "", $$dpth + 1, $$cr), 3, -1); 66 | } 67 | public matchS_$0_3($$dpth: number, $$cr?: ErrorTracker): Nullable { 68 | return this.loop(() => this.regexAccept(String.raw`(?:c)`, "", $$dpth + 1, $$cr), 3, 5); 69 | } 70 | public test(): boolean { 71 | const mrk = this.mark(); 72 | const res = this.matchS(0); 73 | const ans = res !== null; 74 | this.reset(mrk); 75 | return ans; 76 | } 77 | public parse(): ParseResult { 78 | const mrk = this.mark(); 79 | const res = this.matchS(0); 80 | if (res) 81 | return {ast: res, errs: []}; 82 | this.reset(mrk); 83 | const rec = new ErrorTracker(); 84 | this.clearMemos(); 85 | this.matchS(0, rec); 86 | const err = rec.getErr() 87 | return {ast: res, errs: err !== null ? [err] : []} 88 | } 89 | public mark(): PosInfo { 90 | return this.pos; 91 | } 92 | // @ts-ignore: loopPlus may not be called 93 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 94 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 95 | } 96 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 97 | const mrk = this.mark(); 98 | const res: T[] = []; 99 | while (ub === -1 || res.length < ub) { 100 | const preMrk = this.mark(); 101 | const t = func(); 102 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 103 | break; 104 | } 105 | res.push(t); 106 | } 107 | if (res.length >= lb) { 108 | return res; 109 | } 110 | this.reset(mrk); 111 | return null; 112 | } 113 | private run($$dpth: number, fn: $$RuleType): Nullable { 114 | const mrk = this.mark(); 115 | const res = fn() 116 | if (res !== null) 117 | return res; 118 | this.reset(mrk); 119 | return null; 120 | } 121 | // @ts-ignore: choice may not be called 122 | private choice(fns: Array<$$RuleType>): Nullable { 123 | for (const f of fns) { 124 | const res = f(); 125 | if (res !== null) { 126 | return res; 127 | } 128 | } 129 | return null; 130 | } 131 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 132 | return this.run(dpth, 133 | () => { 134 | const reg = new RegExp(match, "y" + mods); 135 | const mrk = this.mark(); 136 | reg.lastIndex = mrk.overallPos; 137 | const res = this.tryConsume(reg); 138 | if(cr) { 139 | cr.record(mrk, res, { 140 | kind: "RegexMatch", 141 | // We substring from 3 to len - 1 to strip off the 142 | // non-capture group syntax added as a WebKit workaround 143 | literal: match.substring(3, match.length - 1), 144 | negated: this.negating, 145 | }); 146 | } 147 | return res; 148 | }); 149 | } 150 | private tryConsume(reg: RegExp): Nullable { 151 | const res = reg.exec(this.input); 152 | if (res) { 153 | let lineJmp = 0; 154 | let lind = -1; 155 | for (let i = 0; i < res[0].length; ++i) { 156 | if (res[0][i] === "\n") { 157 | ++lineJmp; 158 | lind = i; 159 | } 160 | } 161 | this.pos = { 162 | overallPos: reg.lastIndex, 163 | line: this.pos.line + lineJmp, 164 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 165 | }; 166 | return res[0]; 167 | } 168 | return null; 169 | } 170 | // @ts-ignore: noConsume may not be called 171 | private noConsume(fn: $$RuleType): Nullable { 172 | const mrk = this.mark(); 173 | const res = fn(); 174 | this.reset(mrk); 175 | return res; 176 | } 177 | // @ts-ignore: negate may not be called 178 | private negate(fn: $$RuleType): Nullable { 179 | const mrk = this.mark(); 180 | const oneg = this.negating; 181 | this.negating = !oneg; 182 | const res = fn(); 183 | this.negating = oneg; 184 | this.reset(mrk); 185 | return res === null ? true : null; 186 | } 187 | // @ts-ignore: Memoise may not be used 188 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 189 | const $scope$pos = this.mark(); 190 | const $scope$memoRes = memo.get($scope$pos.overallPos); 191 | if(this.memoSafe && $scope$memoRes !== undefined) { 192 | this.reset($scope$memoRes[1]); 193 | return $scope$memoRes[0]; 194 | } 195 | const $scope$result = rule(); 196 | if(this.memoSafe) 197 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 198 | return $scope$result; 199 | } 200 | private match$EOF(et?: ErrorTracker): Nullable<{kind: ASTKinds.$EOF}> { 201 | const res: {kind: ASTKinds.$EOF} | null = this.finished() ? { kind: ASTKinds.$EOF } : null; 202 | if(et) 203 | et.record(this.mark(), res, { kind: "EOF", negated: this.negating }); 204 | return res; 205 | } 206 | } 207 | export function parse(s: string): ParseResult { 208 | const p = new Parser(s); 209 | return p.parse(); 210 | } 211 | export interface ParseResult { 212 | ast: Nullable; 213 | errs: SyntaxErr[]; 214 | } 215 | export interface PosInfo { 216 | readonly overallPos: number; 217 | readonly line: number; 218 | readonly offset: number; 219 | } 220 | export interface RegexMatch { 221 | readonly kind: "RegexMatch"; 222 | readonly negated: boolean; 223 | readonly literal: string; 224 | } 225 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 226 | export type MatchAttempt = RegexMatch | EOFMatch; 227 | export class SyntaxErr { 228 | public pos: PosInfo; 229 | public expmatches: MatchAttempt[]; 230 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 231 | this.pos = pos; 232 | this.expmatches = [...expmatches]; 233 | } 234 | public toString(): string { 235 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 236 | } 237 | } 238 | class ErrorTracker { 239 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 240 | private regexset: Set = new Set(); 241 | private pmatches: MatchAttempt[] = []; 242 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 243 | if ((result === null) === att.negated) 244 | return; 245 | if (pos.overallPos > this.mxpos.overallPos) { 246 | this.mxpos = pos; 247 | this.pmatches = []; 248 | this.regexset.clear() 249 | } 250 | if (this.mxpos.overallPos === pos.overallPos) { 251 | if(att.kind === "RegexMatch") { 252 | if(!this.regexset.has(att.literal)) 253 | this.pmatches.push(att); 254 | this.regexset.add(att.literal); 255 | } else { 256 | this.pmatches.push(att); 257 | } 258 | } 259 | } 260 | public getErr(): SyntaxErr | null { 261 | if (this.mxpos.overallPos !== -1) 262 | return new SyntaxErr(this.mxpos, this.pmatches); 263 | return null; 264 | } 265 | } -------------------------------------------------------------------------------- /src/test/bounded_matches_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | describe("Fixed Repetition", () => { 4 | test("Too Short", () => { 5 | const res = parse("a"); 6 | expect(res.ast).toBeNull(); 7 | expect(res.errs).not.toHaveLength(0); 8 | }); 9 | test("Too Long", () => { 10 | const res = parse("aaa"); 11 | expect(res.ast).toBeNull(); 12 | expect(res.errs).not.toHaveLength(0); 13 | }); 14 | test("Exact Length", () => { 15 | const res = parse("aa"); 16 | expect(res.ast).not.toBeNull(); 17 | expect(res.errs).toHaveLength(0); 18 | }); 19 | }); 20 | 21 | describe("Only Lower Bound", () => { 22 | test("Too Short", () => { 23 | const res = parse("bb"); 24 | expect(res.ast).toBeNull(); 25 | expect(res.errs).not.toHaveLength(0); 26 | }); 27 | test("Long is Ok", () => { 28 | const res = parse("bbbbbb"); 29 | expect(res.ast).not.toBeNull(); 30 | expect(res.errs).toHaveLength(0); 31 | }); 32 | test("Exact Length", () => { 33 | const res = parse("bbb"); 34 | expect(res.ast).not.toBeNull(); 35 | expect(res.errs).toHaveLength(0); 36 | }); 37 | }); 38 | 39 | describe("Lower and Upper Bound", () => { 40 | test("Too Short", () => { 41 | const res = parse("cc"); 42 | expect(res.ast).toBeNull(); 43 | expect(res.errs).not.toHaveLength(0); 44 | }); 45 | test("Too Long", () => { 46 | const res = parse("cccccc"); 47 | expect(res.ast).toBeNull(); 48 | expect(res.errs).not.toHaveLength(0); 49 | }); 50 | test("Exact Lower Bound", () => { 51 | const res = parse("ccc"); 52 | expect(res.ast).not.toBeNull(); 53 | expect(res.errs).toHaveLength(0); 54 | }); 55 | test("Exact Upper Bound", () => { 56 | const res = parse("ccccc"); 57 | expect(res.ast).not.toBeNull(); 58 | expect(res.errs).toHaveLength(0); 59 | }); 60 | test("Between Bounds", () => { 61 | const res = parse("cccc"); 62 | expect(res.ast).not.toBeNull(); 63 | expect(res.errs).toHaveLength(0); 64 | }); 65 | }); 66 | -------------------------------------------------------------------------------- /src/test/calc_leftrec_test/grammar.peg: -------------------------------------------------------------------------------- 1 | // Left recursion test 2 | 3 | SUM := l=SUM op='\+|-' r=FAC 4 | .value = number { 5 | return this.op === "+" ? l.value + r.value : l.value - r.value; 6 | } 7 | | FAC 8 | FAC := l=FAC op='\*|/' r=ATOM 9 | .value = number { 10 | return this.op === "*" ? l.value * r.value : l.value / r.value; 11 | } 12 | | ATOM 13 | ATOM := _ val=INT _ 14 | .value = number { return this.val.value; } 15 | | _ '\(' val=SUM '\)' _ 16 | .value = number { return this.val.value; } 17 | INT := val='[0-9]+' 18 | .value = number { return parseInt(this.val); } 19 | _ := '\s*' 20 | -------------------------------------------------------------------------------- /src/test/calc_leftrec_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | describe("test calculator", () => { 4 | interface TestCase { inp: string, exp: number } 5 | const tcs: TestCase[] = [ 6 | { inp: "1+2", exp: 3 }, 7 | { inp: " 1 + 2 ", exp: 3 }, 8 | { inp: "1 * 2 - 3", exp: -1 }, 9 | { inp: "2 * (2 - 3)", exp: -2 }, 10 | { inp: "54 * 30 - 4098 * 17 + 34 * 4", exp: -67910 }, 11 | ]; 12 | for (const tc of tcs) { 13 | test(`inp: ${tc.inp}`, () => { 14 | const res = parse(tc.inp); 15 | expect(res.errs).toEqual([]); 16 | expect(res.ast).not.toBeNull(); 17 | const ast = res.ast!; 18 | expect(ast.value).toEqual(tc.exp); 19 | }); 20 | } 21 | }); 22 | -------------------------------------------------------------------------------- /src/test/calc_test/flags.txt: -------------------------------------------------------------------------------- 1 | --num-enums 2 | -------------------------------------------------------------------------------- /src/test/calc_test/grammar.peg: -------------------------------------------------------------------------------- 1 | SUM := head=FAC tail={ op='\+|-' sm=FAC }* 2 | .value = number { 3 | return this.tail.reduce((x, y) => { 4 | return y.op === "+" ? x + y.sm.value : x - y.sm.value; 5 | }, this.head.value); 6 | } 7 | FAC := head=ATOM tail={ op='\*|/' sm=ATOM }* 8 | .value = number { 9 | return this.tail.reduce((x, y) => { 10 | return y.op === "*" ? x * y.sm.value : x / y.sm.value; 11 | }, this.head.value); 12 | } 13 | ATOM := _ val=INT _ 14 | .value = number { return this.val.value; } 15 | | _ '\(' val=SUM '\)' _ 16 | .value = number { return this.val.value; } 17 | INT := val='[0-9]+' 18 | .value = number { return parseInt(this.val); } 19 | _ := '\s*' 20 | -------------------------------------------------------------------------------- /src/test/calc_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | describe("test calculator", () => { 4 | interface TestCase { inp: string, exp: number } 5 | const tcs: TestCase[] = [ 6 | { inp: "1+2", exp: 3 }, 7 | { inp: " 1 + 2 ", exp: 3 }, 8 | { inp: "1 * 2 - 3", exp: -1 }, 9 | { inp: "2 * (2 - 3)", exp: -2 }, 10 | { inp: "54 * 30 - 4098 * 17 + 34 * 4", exp: -67910 }, 11 | ]; 12 | for (const tc of tcs) { 13 | test(`inp: ${tc.inp}`, () => { 14 | const res = parse(tc.inp); 15 | expect(res.errs).toEqual([]); 16 | expect(res.ast).not.toBeNull(); 17 | const ast = res.ast!; 18 | expect(ast.value).toEqual(tc.exp); 19 | }); 20 | } 21 | }); 22 | -------------------------------------------------------------------------------- /src/test/case_insensitive_test/grammar.peg: -------------------------------------------------------------------------------- 1 | start := 'SELECT'i '\s*' 'FROM'i $ 2 | -------------------------------------------------------------------------------- /src/test/case_insensitive_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * start := 'SELECT'i '\s*' 'FROM'i $ 4 | */ 5 | type Nullable = T | null; 6 | type $$RuleType = () => Nullable; 7 | export interface ASTNodeIntf { 8 | kind: ASTKinds; 9 | } 10 | export enum ASTKinds { 11 | start = "start", 12 | $EOF = "$EOF", 13 | } 14 | export interface start { 15 | kind: ASTKinds.start; 16 | } 17 | export class Parser { 18 | private readonly input: string; 19 | private pos: PosInfo; 20 | private negating: boolean = false; 21 | private memoSafe: boolean = true; 22 | constructor(input: string) { 23 | this.pos = {overallPos: 0, line: 1, offset: 0}; 24 | this.input = input; 25 | } 26 | public reset(pos: PosInfo) { 27 | this.pos = pos; 28 | } 29 | public finished(): boolean { 30 | return this.pos.overallPos === this.input.length; 31 | } 32 | public clearMemos(): void { 33 | } 34 | public matchstart($$dpth: number, $$cr?: ErrorTracker): Nullable { 35 | return this.run($$dpth, 36 | () => { 37 | let $$res: Nullable = null; 38 | if (true 39 | && this.regexAccept(String.raw`(?:SELECT)`, "i", $$dpth + 1, $$cr) !== null 40 | && this.regexAccept(String.raw`(?:\s*)`, "", $$dpth + 1, $$cr) !== null 41 | && this.regexAccept(String.raw`(?:FROM)`, "i", $$dpth + 1, $$cr) !== null 42 | && this.match$EOF($$cr) !== null 43 | ) { 44 | $$res = {kind: ASTKinds.start, }; 45 | } 46 | return $$res; 47 | }); 48 | } 49 | public test(): boolean { 50 | const mrk = this.mark(); 51 | const res = this.matchstart(0); 52 | const ans = res !== null; 53 | this.reset(mrk); 54 | return ans; 55 | } 56 | public parse(): ParseResult { 57 | const mrk = this.mark(); 58 | const res = this.matchstart(0); 59 | if (res) 60 | return {ast: res, errs: []}; 61 | this.reset(mrk); 62 | const rec = new ErrorTracker(); 63 | this.clearMemos(); 64 | this.matchstart(0, rec); 65 | const err = rec.getErr() 66 | return {ast: res, errs: err !== null ? [err] : []} 67 | } 68 | public mark(): PosInfo { 69 | return this.pos; 70 | } 71 | // @ts-ignore: loopPlus may not be called 72 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 73 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 74 | } 75 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 76 | const mrk = this.mark(); 77 | const res: T[] = []; 78 | while (ub === -1 || res.length < ub) { 79 | const preMrk = this.mark(); 80 | const t = func(); 81 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 82 | break; 83 | } 84 | res.push(t); 85 | } 86 | if (res.length >= lb) { 87 | return res; 88 | } 89 | this.reset(mrk); 90 | return null; 91 | } 92 | private run($$dpth: number, fn: $$RuleType): Nullable { 93 | const mrk = this.mark(); 94 | const res = fn() 95 | if (res !== null) 96 | return res; 97 | this.reset(mrk); 98 | return null; 99 | } 100 | // @ts-ignore: choice may not be called 101 | private choice(fns: Array<$$RuleType>): Nullable { 102 | for (const f of fns) { 103 | const res = f(); 104 | if (res !== null) { 105 | return res; 106 | } 107 | } 108 | return null; 109 | } 110 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 111 | return this.run(dpth, 112 | () => { 113 | const reg = new RegExp(match, "y" + mods); 114 | const mrk = this.mark(); 115 | reg.lastIndex = mrk.overallPos; 116 | const res = this.tryConsume(reg); 117 | if(cr) { 118 | cr.record(mrk, res, { 119 | kind: "RegexMatch", 120 | // We substring from 3 to len - 1 to strip off the 121 | // non-capture group syntax added as a WebKit workaround 122 | literal: match.substring(3, match.length - 1), 123 | negated: this.negating, 124 | }); 125 | } 126 | return res; 127 | }); 128 | } 129 | private tryConsume(reg: RegExp): Nullable { 130 | const res = reg.exec(this.input); 131 | if (res) { 132 | let lineJmp = 0; 133 | let lind = -1; 134 | for (let i = 0; i < res[0].length; ++i) { 135 | if (res[0][i] === "\n") { 136 | ++lineJmp; 137 | lind = i; 138 | } 139 | } 140 | this.pos = { 141 | overallPos: reg.lastIndex, 142 | line: this.pos.line + lineJmp, 143 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 144 | }; 145 | return res[0]; 146 | } 147 | return null; 148 | } 149 | // @ts-ignore: noConsume may not be called 150 | private noConsume(fn: $$RuleType): Nullable { 151 | const mrk = this.mark(); 152 | const res = fn(); 153 | this.reset(mrk); 154 | return res; 155 | } 156 | // @ts-ignore: negate may not be called 157 | private negate(fn: $$RuleType): Nullable { 158 | const mrk = this.mark(); 159 | const oneg = this.negating; 160 | this.negating = !oneg; 161 | const res = fn(); 162 | this.negating = oneg; 163 | this.reset(mrk); 164 | return res === null ? true : null; 165 | } 166 | // @ts-ignore: Memoise may not be used 167 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 168 | const $scope$pos = this.mark(); 169 | const $scope$memoRes = memo.get($scope$pos.overallPos); 170 | if(this.memoSafe && $scope$memoRes !== undefined) { 171 | this.reset($scope$memoRes[1]); 172 | return $scope$memoRes[0]; 173 | } 174 | const $scope$result = rule(); 175 | if(this.memoSafe) 176 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 177 | return $scope$result; 178 | } 179 | private match$EOF(et?: ErrorTracker): Nullable<{kind: ASTKinds.$EOF}> { 180 | const res: {kind: ASTKinds.$EOF} | null = this.finished() ? { kind: ASTKinds.$EOF } : null; 181 | if(et) 182 | et.record(this.mark(), res, { kind: "EOF", negated: this.negating }); 183 | return res; 184 | } 185 | } 186 | export function parse(s: string): ParseResult { 187 | const p = new Parser(s); 188 | return p.parse(); 189 | } 190 | export interface ParseResult { 191 | ast: Nullable; 192 | errs: SyntaxErr[]; 193 | } 194 | export interface PosInfo { 195 | readonly overallPos: number; 196 | readonly line: number; 197 | readonly offset: number; 198 | } 199 | export interface RegexMatch { 200 | readonly kind: "RegexMatch"; 201 | readonly negated: boolean; 202 | readonly literal: string; 203 | } 204 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 205 | export type MatchAttempt = RegexMatch | EOFMatch; 206 | export class SyntaxErr { 207 | public pos: PosInfo; 208 | public expmatches: MatchAttempt[]; 209 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 210 | this.pos = pos; 211 | this.expmatches = [...expmatches]; 212 | } 213 | public toString(): string { 214 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 215 | } 216 | } 217 | class ErrorTracker { 218 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 219 | private regexset: Set = new Set(); 220 | private pmatches: MatchAttempt[] = []; 221 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 222 | if ((result === null) === att.negated) 223 | return; 224 | if (pos.overallPos > this.mxpos.overallPos) { 225 | this.mxpos = pos; 226 | this.pmatches = []; 227 | this.regexset.clear() 228 | } 229 | if (this.mxpos.overallPos === pos.overallPos) { 230 | if(att.kind === "RegexMatch") { 231 | if(!this.regexset.has(att.literal)) 232 | this.pmatches.push(att); 233 | this.regexset.add(att.literal); 234 | } else { 235 | this.pmatches.push(att); 236 | } 237 | } 238 | } 239 | public getErr(): SyntaxErr | null { 240 | if (this.mxpos.overallPos !== -1) 241 | return new SyntaxErr(this.mxpos, this.pmatches); 242 | return null; 243 | } 244 | } -------------------------------------------------------------------------------- /src/test/case_insensitive_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | test(`Case Test 1`, () => { 4 | const res = parse("SELECT from"); 5 | expect(res.errs).toEqual([]); 6 | expect(res.ast).not.toBeNull(); 7 | }); 8 | 9 | test(`Case Test 2`, () => { 10 | const res = parse("SeLeCT fRoM"); 11 | expect(res.errs).toEqual([]); 12 | expect(res.ast).not.toBeNull(); 13 | }); 14 | -------------------------------------------------------------------------------- /src/test/checks.test.ts: -------------------------------------------------------------------------------- 1 | import { buildParser } from "../gen"; 2 | 3 | test("Check banned names", () => { 4 | const inp = "rule := kind='hello world'"; 5 | expect(() => buildParser(inp, false, false, "")) 6 | .toThrow("'kind' is not an allowed match name"); 7 | }); 8 | 9 | test("Check rules exist checker", () => { 10 | const inp = "rule := a=rule2"; 11 | expect(() => buildParser(inp, false, false, "")) 12 | .toThrow("Rule 'rule2' is not defined"); 13 | }); 14 | 15 | test("Check rule collision checker for alternatives", () => { 16 | const inp = `rule := 'a' | 'b' 17 | rule_1 := 'c'`; 18 | expect(() => buildParser(inp, false, false, "")) 19 | .toThrow('Rule "rule" declared with >= 1 alternatives and rule "rule_1" should not both be declared'); 20 | }); 21 | 22 | test("Check rule collision checker for double definition", () => { 23 | const inp = `rule := 'a' | 'b' 24 | rule := 'a' | 'b'`; 25 | expect(() => buildParser(inp, false, false, "")) 26 | .toThrow('Rule already defined: "rule"'); 27 | }); 28 | 29 | test("Check keywords aren't allowed as name", () => { 30 | const inp = `break := 'a' | 'b'`; 31 | expect(() => buildParser(inp, false, false, "")) 32 | .toThrow('Rule name "break" is a reserved Typescript keyword'); 33 | }); 34 | -------------------------------------------------------------------------------- /src/test/crlf_test/grammar.peg: -------------------------------------------------------------------------------- 1 | --- 2 | // CRLF header 3 | --- 4 | 5 | // Commment ending in CRLF 6 | SUM := head=FAC tail={ op='\+|-' sm=FAC }* 7 | .value = number { 8 | return this.tail.reduce((x, y) => { 9 | return y.op === "+" ? x + y.sm.value : x - y.sm.value; 10 | }, this.head.value); 11 | } 12 | FAC := head=ATOM tail={ op='\*|/' sm=ATOM }* 13 | .value = number { 14 | return this.tail.reduce((x, y) => { 15 | return y.op === "*" ? x * y.sm.value : x / y.sm.value; 16 | }, this.head.value); 17 | } 18 | ATOM := _ val=INT _ 19 | .value = number { return this.val.value; } 20 | | _ '\(' val=SUM '\)' _ 21 | .value = number { return this.val.value; } 22 | INT := val='[0-9]+' 23 | .value = number { return parseInt(this.val); } 24 | _ := '\s*' 25 | -------------------------------------------------------------------------------- /src/test/crlf_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | describe("test calculator", () => { 4 | interface TestCase { inp: string, exp: number } 5 | const tcs: TestCase[] = [ 6 | { inp: "1+2", exp: 3 }, 7 | { inp: " 1 + 2 ", exp: 3 }, 8 | { inp: "1 * 2 - 3", exp: -1 }, 9 | { inp: "2 * (2 - 3)", exp: -2 }, 10 | { inp: "54 * 30 - 4098 * 17 + 34 * 4", exp: -67910 }, 11 | ]; 12 | for (const tc of tcs) { 13 | test(`inp: ${tc.inp}`, () => { 14 | const res = parse(tc.inp); 15 | expect(res.errs).toEqual([]); 16 | expect(res.ast).not.toBeNull(); 17 | const ast = res.ast!; 18 | expect(ast.value).toEqual(tc.exp); 19 | }); 20 | } 21 | }); 22 | -------------------------------------------------------------------------------- /src/test/eof_test/grammar.peg: -------------------------------------------------------------------------------- 1 | // Simple finite grammar 2 | 3 | RULE := 'abcde' $ 4 | -------------------------------------------------------------------------------- /src/test/eof_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * // Simple finite grammar 4 | * RULE := 'abcde' $ 5 | */ 6 | type Nullable = T | null; 7 | type $$RuleType = () => Nullable; 8 | export interface ASTNodeIntf { 9 | kind: ASTKinds; 10 | } 11 | export enum ASTKinds { 12 | RULE = "RULE", 13 | $EOF = "$EOF", 14 | } 15 | export interface RULE { 16 | kind: ASTKinds.RULE; 17 | } 18 | export class Parser { 19 | private readonly input: string; 20 | private pos: PosInfo; 21 | private negating: boolean = false; 22 | private memoSafe: boolean = true; 23 | constructor(input: string) { 24 | this.pos = {overallPos: 0, line: 1, offset: 0}; 25 | this.input = input; 26 | } 27 | public reset(pos: PosInfo) { 28 | this.pos = pos; 29 | } 30 | public finished(): boolean { 31 | return this.pos.overallPos === this.input.length; 32 | } 33 | public clearMemos(): void { 34 | } 35 | public matchRULE($$dpth: number, $$cr?: ErrorTracker): Nullable { 36 | return this.run($$dpth, 37 | () => { 38 | let $$res: Nullable = null; 39 | if (true 40 | && this.regexAccept(String.raw`(?:abcde)`, "", $$dpth + 1, $$cr) !== null 41 | && this.match$EOF($$cr) !== null 42 | ) { 43 | $$res = {kind: ASTKinds.RULE, }; 44 | } 45 | return $$res; 46 | }); 47 | } 48 | public test(): boolean { 49 | const mrk = this.mark(); 50 | const res = this.matchRULE(0); 51 | const ans = res !== null; 52 | this.reset(mrk); 53 | return ans; 54 | } 55 | public parse(): ParseResult { 56 | const mrk = this.mark(); 57 | const res = this.matchRULE(0); 58 | if (res) 59 | return {ast: res, errs: []}; 60 | this.reset(mrk); 61 | const rec = new ErrorTracker(); 62 | this.clearMemos(); 63 | this.matchRULE(0, rec); 64 | const err = rec.getErr() 65 | return {ast: res, errs: err !== null ? [err] : []} 66 | } 67 | public mark(): PosInfo { 68 | return this.pos; 69 | } 70 | // @ts-ignore: loopPlus may not be called 71 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 72 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 73 | } 74 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 75 | const mrk = this.mark(); 76 | const res: T[] = []; 77 | while (ub === -1 || res.length < ub) { 78 | const preMrk = this.mark(); 79 | const t = func(); 80 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 81 | break; 82 | } 83 | res.push(t); 84 | } 85 | if (res.length >= lb) { 86 | return res; 87 | } 88 | this.reset(mrk); 89 | return null; 90 | } 91 | private run($$dpth: number, fn: $$RuleType): Nullable { 92 | const mrk = this.mark(); 93 | const res = fn() 94 | if (res !== null) 95 | return res; 96 | this.reset(mrk); 97 | return null; 98 | } 99 | // @ts-ignore: choice may not be called 100 | private choice(fns: Array<$$RuleType>): Nullable { 101 | for (const f of fns) { 102 | const res = f(); 103 | if (res !== null) { 104 | return res; 105 | } 106 | } 107 | return null; 108 | } 109 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 110 | return this.run(dpth, 111 | () => { 112 | const reg = new RegExp(match, "y" + mods); 113 | const mrk = this.mark(); 114 | reg.lastIndex = mrk.overallPos; 115 | const res = this.tryConsume(reg); 116 | if(cr) { 117 | cr.record(mrk, res, { 118 | kind: "RegexMatch", 119 | // We substring from 3 to len - 1 to strip off the 120 | // non-capture group syntax added as a WebKit workaround 121 | literal: match.substring(3, match.length - 1), 122 | negated: this.negating, 123 | }); 124 | } 125 | return res; 126 | }); 127 | } 128 | private tryConsume(reg: RegExp): Nullable { 129 | const res = reg.exec(this.input); 130 | if (res) { 131 | let lineJmp = 0; 132 | let lind = -1; 133 | for (let i = 0; i < res[0].length; ++i) { 134 | if (res[0][i] === "\n") { 135 | ++lineJmp; 136 | lind = i; 137 | } 138 | } 139 | this.pos = { 140 | overallPos: reg.lastIndex, 141 | line: this.pos.line + lineJmp, 142 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 143 | }; 144 | return res[0]; 145 | } 146 | return null; 147 | } 148 | // @ts-ignore: noConsume may not be called 149 | private noConsume(fn: $$RuleType): Nullable { 150 | const mrk = this.mark(); 151 | const res = fn(); 152 | this.reset(mrk); 153 | return res; 154 | } 155 | // @ts-ignore: negate may not be called 156 | private negate(fn: $$RuleType): Nullable { 157 | const mrk = this.mark(); 158 | const oneg = this.negating; 159 | this.negating = !oneg; 160 | const res = fn(); 161 | this.negating = oneg; 162 | this.reset(mrk); 163 | return res === null ? true : null; 164 | } 165 | // @ts-ignore: Memoise may not be used 166 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 167 | const $scope$pos = this.mark(); 168 | const $scope$memoRes = memo.get($scope$pos.overallPos); 169 | if(this.memoSafe && $scope$memoRes !== undefined) { 170 | this.reset($scope$memoRes[1]); 171 | return $scope$memoRes[0]; 172 | } 173 | const $scope$result = rule(); 174 | if(this.memoSafe) 175 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 176 | return $scope$result; 177 | } 178 | private match$EOF(et?: ErrorTracker): Nullable<{kind: ASTKinds.$EOF}> { 179 | const res: {kind: ASTKinds.$EOF} | null = this.finished() ? { kind: ASTKinds.$EOF } : null; 180 | if(et) 181 | et.record(this.mark(), res, { kind: "EOF", negated: this.negating }); 182 | return res; 183 | } 184 | } 185 | export function parse(s: string): ParseResult { 186 | const p = new Parser(s); 187 | return p.parse(); 188 | } 189 | export interface ParseResult { 190 | ast: Nullable; 191 | errs: SyntaxErr[]; 192 | } 193 | export interface PosInfo { 194 | readonly overallPos: number; 195 | readonly line: number; 196 | readonly offset: number; 197 | } 198 | export interface RegexMatch { 199 | readonly kind: "RegexMatch"; 200 | readonly negated: boolean; 201 | readonly literal: string; 202 | } 203 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 204 | export type MatchAttempt = RegexMatch | EOFMatch; 205 | export class SyntaxErr { 206 | public pos: PosInfo; 207 | public expmatches: MatchAttempt[]; 208 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 209 | this.pos = pos; 210 | this.expmatches = [...expmatches]; 211 | } 212 | public toString(): string { 213 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 214 | } 215 | } 216 | class ErrorTracker { 217 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 218 | private regexset: Set = new Set(); 219 | private pmatches: MatchAttempt[] = []; 220 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 221 | if ((result === null) === att.negated) 222 | return; 223 | if (pos.overallPos > this.mxpos.overallPos) { 224 | this.mxpos = pos; 225 | this.pmatches = []; 226 | this.regexset.clear() 227 | } 228 | if (this.mxpos.overallPos === pos.overallPos) { 229 | if(att.kind === "RegexMatch") { 230 | if(!this.regexset.has(att.literal)) 231 | this.pmatches.push(att); 232 | this.regexset.add(att.literal); 233 | } else { 234 | this.pmatches.push(att); 235 | } 236 | } 237 | } 238 | public getErr(): SyntaxErr | null { 239 | if (this.mxpos.overallPos !== -1) 240 | return new SyntaxErr(this.mxpos, this.pmatches); 241 | return null; 242 | } 243 | } -------------------------------------------------------------------------------- /src/test/eof_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | test("Verify $EOF error returns", () => { 4 | const res = parse("abcdefghi"); 5 | expect(res.errs).toHaveLength(1); 6 | expect(res.errs[0].expmatches.map(x => x.kind)).toEqual(["EOF"]); 7 | }); 8 | -------------------------------------------------------------------------------- /src/test/id_test/grammar.peg: -------------------------------------------------------------------------------- 1 | // Test for allowable id names 2 | 3 | lowercase := 'a' 4 | UPPERCASE := 'b' 5 | _start_hypen_ := 'c' 6 | numbers1ab234 := 'd' 7 | 8 | // Check for namespace collision 9 | 10 | rule := rule=rule .a = number { return 0; } 11 | rule2 := res='a' 12 | rule3 := cr='b' 13 | -------------------------------------------------------------------------------- /src/test/id_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * // Test for allowable id names 4 | * lowercase := 'a' 5 | * UPPERCASE := 'b' 6 | * _start_hypen_ := 'c' 7 | * numbers1ab234 := 'd' 8 | * // Check for namespace collision 9 | * rule := rule=rule .a = number { return 0; } 10 | * rule2 := res='a' 11 | * rule3 := cr='b' 12 | */ 13 | type Nullable = T | null; 14 | type $$RuleType = () => Nullable; 15 | export interface ASTNodeIntf { 16 | kind: ASTKinds; 17 | } 18 | export enum ASTKinds { 19 | lowercase = "lowercase", 20 | UPPERCASE = "UPPERCASE", 21 | _start_hypen_ = "_start_hypen_", 22 | numbers1ab234 = "numbers1ab234", 23 | rule = "rule", 24 | rule2 = "rule2", 25 | rule3 = "rule3", 26 | } 27 | export type lowercase = string; 28 | export type UPPERCASE = string; 29 | export type _start_hypen_ = string; 30 | export type numbers1ab234 = string; 31 | export class rule { 32 | public kind: ASTKinds.rule = ASTKinds.rule; 33 | public rule: rule; 34 | public a: number; 35 | constructor(rule: rule){ 36 | this.rule = rule; 37 | this.a = ((): number => { 38 | return 0; 39 | })(); 40 | } 41 | } 42 | export interface rule2 { 43 | kind: ASTKinds.rule2; 44 | res: string; 45 | } 46 | export interface rule3 { 47 | kind: ASTKinds.rule3; 48 | cr: string; 49 | } 50 | export class Parser { 51 | private readonly input: string; 52 | private pos: PosInfo; 53 | private negating: boolean = false; 54 | private memoSafe: boolean = true; 55 | constructor(input: string) { 56 | this.pos = {overallPos: 0, line: 1, offset: 0}; 57 | this.input = input; 58 | } 59 | public reset(pos: PosInfo) { 60 | this.pos = pos; 61 | } 62 | public finished(): boolean { 63 | return this.pos.overallPos === this.input.length; 64 | } 65 | public clearMemos(): void { 66 | this.$scope$rule$memo.clear(); 67 | } 68 | protected $scope$rule$memo: Map, PosInfo]> = new Map(); 69 | public matchlowercase($$dpth: number, $$cr?: ErrorTracker): Nullable { 70 | return this.regexAccept(String.raw`(?:a)`, "", $$dpth + 1, $$cr); 71 | } 72 | public matchUPPERCASE($$dpth: number, $$cr?: ErrorTracker): Nullable { 73 | return this.regexAccept(String.raw`(?:b)`, "", $$dpth + 1, $$cr); 74 | } 75 | public match_start_hypen_($$dpth: number, $$cr?: ErrorTracker): Nullable<_start_hypen_> { 76 | return this.regexAccept(String.raw`(?:c)`, "", $$dpth + 1, $$cr); 77 | } 78 | public matchnumbers1ab234($$dpth: number, $$cr?: ErrorTracker): Nullable { 79 | return this.regexAccept(String.raw`(?:d)`, "", $$dpth + 1, $$cr); 80 | } 81 | public matchrule($$dpth: number, $$cr?: ErrorTracker): Nullable { 82 | const fn = () => { 83 | return this.run($$dpth, 84 | () => { 85 | let $scope$rule: Nullable; 86 | let $$res: Nullable = null; 87 | if (true 88 | && ($scope$rule = this.matchrule($$dpth + 1, $$cr)) !== null 89 | ) { 90 | $$res = new rule($scope$rule); 91 | } 92 | return $$res; 93 | }); 94 | }; 95 | const $scope$pos = this.mark(); 96 | const memo = this.$scope$rule$memo.get($scope$pos.overallPos); 97 | if(memo !== undefined) { 98 | this.reset(memo[1]); 99 | return memo[0]; 100 | } 101 | const $scope$oldMemoSafe = this.memoSafe; 102 | this.memoSafe = false; 103 | this.$scope$rule$memo.set($scope$pos.overallPos, [null, $scope$pos]); 104 | let lastRes: Nullable = null; 105 | let lastPos: PosInfo = $scope$pos; 106 | for(;;) { 107 | this.reset($scope$pos); 108 | const res = fn(); 109 | const end = this.mark(); 110 | if(end.overallPos <= lastPos.overallPos) 111 | break; 112 | lastRes = res; 113 | lastPos = end; 114 | this.$scope$rule$memo.set($scope$pos.overallPos, [lastRes, lastPos]); 115 | } 116 | this.reset(lastPos); 117 | this.memoSafe = $scope$oldMemoSafe; 118 | return lastRes; 119 | } 120 | public matchrule2($$dpth: number, $$cr?: ErrorTracker): Nullable { 121 | return this.run($$dpth, 122 | () => { 123 | let $scope$res: Nullable; 124 | let $$res: Nullable = null; 125 | if (true 126 | && ($scope$res = this.regexAccept(String.raw`(?:a)`, "", $$dpth + 1, $$cr)) !== null 127 | ) { 128 | $$res = {kind: ASTKinds.rule2, res: $scope$res}; 129 | } 130 | return $$res; 131 | }); 132 | } 133 | public matchrule3($$dpth: number, $$cr?: ErrorTracker): Nullable { 134 | return this.run($$dpth, 135 | () => { 136 | let $scope$cr: Nullable; 137 | let $$res: Nullable = null; 138 | if (true 139 | && ($scope$cr = this.regexAccept(String.raw`(?:b)`, "", $$dpth + 1, $$cr)) !== null 140 | ) { 141 | $$res = {kind: ASTKinds.rule3, cr: $scope$cr}; 142 | } 143 | return $$res; 144 | }); 145 | } 146 | public test(): boolean { 147 | const mrk = this.mark(); 148 | const res = this.matchlowercase(0); 149 | const ans = res !== null; 150 | this.reset(mrk); 151 | return ans; 152 | } 153 | public parse(): ParseResult { 154 | const mrk = this.mark(); 155 | const res = this.matchlowercase(0); 156 | if (res) 157 | return {ast: res, errs: []}; 158 | this.reset(mrk); 159 | const rec = new ErrorTracker(); 160 | this.clearMemos(); 161 | this.matchlowercase(0, rec); 162 | const err = rec.getErr() 163 | return {ast: res, errs: err !== null ? [err] : []} 164 | } 165 | public mark(): PosInfo { 166 | return this.pos; 167 | } 168 | // @ts-ignore: loopPlus may not be called 169 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 170 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 171 | } 172 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 173 | const mrk = this.mark(); 174 | const res: T[] = []; 175 | while (ub === -1 || res.length < ub) { 176 | const preMrk = this.mark(); 177 | const t = func(); 178 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 179 | break; 180 | } 181 | res.push(t); 182 | } 183 | if (res.length >= lb) { 184 | return res; 185 | } 186 | this.reset(mrk); 187 | return null; 188 | } 189 | private run($$dpth: number, fn: $$RuleType): Nullable { 190 | const mrk = this.mark(); 191 | const res = fn() 192 | if (res !== null) 193 | return res; 194 | this.reset(mrk); 195 | return null; 196 | } 197 | // @ts-ignore: choice may not be called 198 | private choice(fns: Array<$$RuleType>): Nullable { 199 | for (const f of fns) { 200 | const res = f(); 201 | if (res !== null) { 202 | return res; 203 | } 204 | } 205 | return null; 206 | } 207 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 208 | return this.run(dpth, 209 | () => { 210 | const reg = new RegExp(match, "y" + mods); 211 | const mrk = this.mark(); 212 | reg.lastIndex = mrk.overallPos; 213 | const res = this.tryConsume(reg); 214 | if(cr) { 215 | cr.record(mrk, res, { 216 | kind: "RegexMatch", 217 | // We substring from 3 to len - 1 to strip off the 218 | // non-capture group syntax added as a WebKit workaround 219 | literal: match.substring(3, match.length - 1), 220 | negated: this.negating, 221 | }); 222 | } 223 | return res; 224 | }); 225 | } 226 | private tryConsume(reg: RegExp): Nullable { 227 | const res = reg.exec(this.input); 228 | if (res) { 229 | let lineJmp = 0; 230 | let lind = -1; 231 | for (let i = 0; i < res[0].length; ++i) { 232 | if (res[0][i] === "\n") { 233 | ++lineJmp; 234 | lind = i; 235 | } 236 | } 237 | this.pos = { 238 | overallPos: reg.lastIndex, 239 | line: this.pos.line + lineJmp, 240 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 241 | }; 242 | return res[0]; 243 | } 244 | return null; 245 | } 246 | // @ts-ignore: noConsume may not be called 247 | private noConsume(fn: $$RuleType): Nullable { 248 | const mrk = this.mark(); 249 | const res = fn(); 250 | this.reset(mrk); 251 | return res; 252 | } 253 | // @ts-ignore: negate may not be called 254 | private negate(fn: $$RuleType): Nullable { 255 | const mrk = this.mark(); 256 | const oneg = this.negating; 257 | this.negating = !oneg; 258 | const res = fn(); 259 | this.negating = oneg; 260 | this.reset(mrk); 261 | return res === null ? true : null; 262 | } 263 | // @ts-ignore: Memoise may not be used 264 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 265 | const $scope$pos = this.mark(); 266 | const $scope$memoRes = memo.get($scope$pos.overallPos); 267 | if(this.memoSafe && $scope$memoRes !== undefined) { 268 | this.reset($scope$memoRes[1]); 269 | return $scope$memoRes[0]; 270 | } 271 | const $scope$result = rule(); 272 | if(this.memoSafe) 273 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 274 | return $scope$result; 275 | } 276 | } 277 | export function parse(s: string): ParseResult { 278 | const p = new Parser(s); 279 | return p.parse(); 280 | } 281 | export interface ParseResult { 282 | ast: Nullable; 283 | errs: SyntaxErr[]; 284 | } 285 | export interface PosInfo { 286 | readonly overallPos: number; 287 | readonly line: number; 288 | readonly offset: number; 289 | } 290 | export interface RegexMatch { 291 | readonly kind: "RegexMatch"; 292 | readonly negated: boolean; 293 | readonly literal: string; 294 | } 295 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 296 | export type MatchAttempt = RegexMatch | EOFMatch; 297 | export class SyntaxErr { 298 | public pos: PosInfo; 299 | public expmatches: MatchAttempt[]; 300 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 301 | this.pos = pos; 302 | this.expmatches = [...expmatches]; 303 | } 304 | public toString(): string { 305 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 306 | } 307 | } 308 | class ErrorTracker { 309 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 310 | private regexset: Set = new Set(); 311 | private pmatches: MatchAttempt[] = []; 312 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 313 | if ((result === null) === att.negated) 314 | return; 315 | if (pos.overallPos > this.mxpos.overallPos) { 316 | this.mxpos = pos; 317 | this.pmatches = []; 318 | this.regexset.clear() 319 | } 320 | if (this.mxpos.overallPos === pos.overallPos) { 321 | if(att.kind === "RegexMatch") { 322 | if(!this.regexset.has(att.literal)) 323 | this.pmatches.push(att); 324 | this.regexset.add(att.literal); 325 | } else { 326 | this.pmatches.push(att); 327 | } 328 | } 329 | } 330 | public getErr(): SyntaxErr | null { 331 | if (this.mxpos.overallPos !== -1) 332 | return new SyntaxErr(this.mxpos, this.pmatches); 333 | return null; 334 | } 335 | } -------------------------------------------------------------------------------- /src/test/id_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from './parser'; 2 | test(`dummy test`, () => { 3 | parse('a'); 4 | }); 5 | -------------------------------------------------------------------------------- /src/test/indirect_leftrec_test/grammar.peg: -------------------------------------------------------------------------------- 1 | // Indirect left recursion test 2 | 3 | A := B B | 'a' 4 | B := A | 'b' 5 | -------------------------------------------------------------------------------- /src/test/indirect_leftrec_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * // Indirect left recursion test 4 | * A := B B | 'a' 5 | * B := A | 'b' 6 | */ 7 | type Nullable = T | null; 8 | type $$RuleType = () => Nullable; 9 | export interface ASTNodeIntf { 10 | kind: ASTKinds; 11 | } 12 | export enum ASTKinds { 13 | A_1 = "A_1", 14 | A_2 = "A_2", 15 | B_1 = "B_1", 16 | B_2 = "B_2", 17 | } 18 | export type A = A_1 | A_2; 19 | export interface A_1 { 20 | kind: ASTKinds.A_1; 21 | } 22 | export type A_2 = string; 23 | export type B = B_1 | B_2; 24 | export type B_1 = A; 25 | export type B_2 = string; 26 | export class Parser { 27 | private readonly input: string; 28 | private pos: PosInfo; 29 | private negating: boolean = false; 30 | private memoSafe: boolean = true; 31 | constructor(input: string) { 32 | this.pos = {overallPos: 0, line: 1, offset: 0}; 33 | this.input = input; 34 | } 35 | public reset(pos: PosInfo) { 36 | this.pos = pos; 37 | } 38 | public finished(): boolean { 39 | return this.pos.overallPos === this.input.length; 40 | } 41 | public clearMemos(): void { 42 | this.$scope$A$memo.clear(); 43 | } 44 | protected $scope$A$memo: Map, PosInfo]> = new Map(); 45 | public matchA($$dpth: number, $$cr?: ErrorTracker): Nullable { 46 | const fn = () => { 47 | return this.choice([ 48 | () => this.matchA_1($$dpth + 1, $$cr), 49 | () => this.matchA_2($$dpth + 1, $$cr), 50 | ]); 51 | }; 52 | const $scope$pos = this.mark(); 53 | const memo = this.$scope$A$memo.get($scope$pos.overallPos); 54 | if(memo !== undefined) { 55 | this.reset(memo[1]); 56 | return memo[0]; 57 | } 58 | const $scope$oldMemoSafe = this.memoSafe; 59 | this.memoSafe = false; 60 | this.$scope$A$memo.set($scope$pos.overallPos, [null, $scope$pos]); 61 | let lastRes: Nullable = null; 62 | let lastPos: PosInfo = $scope$pos; 63 | for(;;) { 64 | this.reset($scope$pos); 65 | const res = fn(); 66 | const end = this.mark(); 67 | if(end.overallPos <= lastPos.overallPos) 68 | break; 69 | lastRes = res; 70 | lastPos = end; 71 | this.$scope$A$memo.set($scope$pos.overallPos, [lastRes, lastPos]); 72 | } 73 | this.reset(lastPos); 74 | this.memoSafe = $scope$oldMemoSafe; 75 | return lastRes; 76 | } 77 | public matchA_1($$dpth: number, $$cr?: ErrorTracker): Nullable { 78 | return this.run($$dpth, 79 | () => { 80 | let $$res: Nullable = null; 81 | if (true 82 | && this.matchB($$dpth + 1, $$cr) !== null 83 | && this.matchB($$dpth + 1, $$cr) !== null 84 | ) { 85 | $$res = {kind: ASTKinds.A_1, }; 86 | } 87 | return $$res; 88 | }); 89 | } 90 | public matchA_2($$dpth: number, $$cr?: ErrorTracker): Nullable { 91 | return this.regexAccept(String.raw`(?:a)`, "", $$dpth + 1, $$cr); 92 | } 93 | public matchB($$dpth: number, $$cr?: ErrorTracker): Nullable { 94 | return this.choice([ 95 | () => this.matchB_1($$dpth + 1, $$cr), 96 | () => this.matchB_2($$dpth + 1, $$cr), 97 | ]); 98 | } 99 | public matchB_1($$dpth: number, $$cr?: ErrorTracker): Nullable { 100 | return this.matchA($$dpth + 1, $$cr); 101 | } 102 | public matchB_2($$dpth: number, $$cr?: ErrorTracker): Nullable { 103 | return this.regexAccept(String.raw`(?:b)`, "", $$dpth + 1, $$cr); 104 | } 105 | public test(): boolean { 106 | const mrk = this.mark(); 107 | const res = this.matchA(0); 108 | const ans = res !== null; 109 | this.reset(mrk); 110 | return ans; 111 | } 112 | public parse(): ParseResult { 113 | const mrk = this.mark(); 114 | const res = this.matchA(0); 115 | if (res) 116 | return {ast: res, errs: []}; 117 | this.reset(mrk); 118 | const rec = new ErrorTracker(); 119 | this.clearMemos(); 120 | this.matchA(0, rec); 121 | const err = rec.getErr() 122 | return {ast: res, errs: err !== null ? [err] : []} 123 | } 124 | public mark(): PosInfo { 125 | return this.pos; 126 | } 127 | // @ts-ignore: loopPlus may not be called 128 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 129 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 130 | } 131 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 132 | const mrk = this.mark(); 133 | const res: T[] = []; 134 | while (ub === -1 || res.length < ub) { 135 | const preMrk = this.mark(); 136 | const t = func(); 137 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 138 | break; 139 | } 140 | res.push(t); 141 | } 142 | if (res.length >= lb) { 143 | return res; 144 | } 145 | this.reset(mrk); 146 | return null; 147 | } 148 | private run($$dpth: number, fn: $$RuleType): Nullable { 149 | const mrk = this.mark(); 150 | const res = fn() 151 | if (res !== null) 152 | return res; 153 | this.reset(mrk); 154 | return null; 155 | } 156 | // @ts-ignore: choice may not be called 157 | private choice(fns: Array<$$RuleType>): Nullable { 158 | for (const f of fns) { 159 | const res = f(); 160 | if (res !== null) { 161 | return res; 162 | } 163 | } 164 | return null; 165 | } 166 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 167 | return this.run(dpth, 168 | () => { 169 | const reg = new RegExp(match, "y" + mods); 170 | const mrk = this.mark(); 171 | reg.lastIndex = mrk.overallPos; 172 | const res = this.tryConsume(reg); 173 | if(cr) { 174 | cr.record(mrk, res, { 175 | kind: "RegexMatch", 176 | // We substring from 3 to len - 1 to strip off the 177 | // non-capture group syntax added as a WebKit workaround 178 | literal: match.substring(3, match.length - 1), 179 | negated: this.negating, 180 | }); 181 | } 182 | return res; 183 | }); 184 | } 185 | private tryConsume(reg: RegExp): Nullable { 186 | const res = reg.exec(this.input); 187 | if (res) { 188 | let lineJmp = 0; 189 | let lind = -1; 190 | for (let i = 0; i < res[0].length; ++i) { 191 | if (res[0][i] === "\n") { 192 | ++lineJmp; 193 | lind = i; 194 | } 195 | } 196 | this.pos = { 197 | overallPos: reg.lastIndex, 198 | line: this.pos.line + lineJmp, 199 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 200 | }; 201 | return res[0]; 202 | } 203 | return null; 204 | } 205 | // @ts-ignore: noConsume may not be called 206 | private noConsume(fn: $$RuleType): Nullable { 207 | const mrk = this.mark(); 208 | const res = fn(); 209 | this.reset(mrk); 210 | return res; 211 | } 212 | // @ts-ignore: negate may not be called 213 | private negate(fn: $$RuleType): Nullable { 214 | const mrk = this.mark(); 215 | const oneg = this.negating; 216 | this.negating = !oneg; 217 | const res = fn(); 218 | this.negating = oneg; 219 | this.reset(mrk); 220 | return res === null ? true : null; 221 | } 222 | // @ts-ignore: Memoise may not be used 223 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 224 | const $scope$pos = this.mark(); 225 | const $scope$memoRes = memo.get($scope$pos.overallPos); 226 | if(this.memoSafe && $scope$memoRes !== undefined) { 227 | this.reset($scope$memoRes[1]); 228 | return $scope$memoRes[0]; 229 | } 230 | const $scope$result = rule(); 231 | if(this.memoSafe) 232 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 233 | return $scope$result; 234 | } 235 | } 236 | export function parse(s: string): ParseResult { 237 | const p = new Parser(s); 238 | return p.parse(); 239 | } 240 | export interface ParseResult { 241 | ast: Nullable; 242 | errs: SyntaxErr[]; 243 | } 244 | export interface PosInfo { 245 | readonly overallPos: number; 246 | readonly line: number; 247 | readonly offset: number; 248 | } 249 | export interface RegexMatch { 250 | readonly kind: "RegexMatch"; 251 | readonly negated: boolean; 252 | readonly literal: string; 253 | } 254 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 255 | export type MatchAttempt = RegexMatch | EOFMatch; 256 | export class SyntaxErr { 257 | public pos: PosInfo; 258 | public expmatches: MatchAttempt[]; 259 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 260 | this.pos = pos; 261 | this.expmatches = [...expmatches]; 262 | } 263 | public toString(): string { 264 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 265 | } 266 | } 267 | class ErrorTracker { 268 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 269 | private regexset: Set = new Set(); 270 | private pmatches: MatchAttempt[] = []; 271 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 272 | if ((result === null) === att.negated) 273 | return; 274 | if (pos.overallPos > this.mxpos.overallPos) { 275 | this.mxpos = pos; 276 | this.pmatches = []; 277 | this.regexset.clear() 278 | } 279 | if (this.mxpos.overallPos === pos.overallPos) { 280 | if(att.kind === "RegexMatch") { 281 | if(!this.regexset.has(att.literal)) 282 | this.pmatches.push(att); 283 | this.regexset.add(att.literal); 284 | } else { 285 | this.pmatches.push(att); 286 | } 287 | } 288 | } 289 | public getErr(): SyntaxErr | null { 290 | if (this.mxpos.overallPos !== -1) 291 | return new SyntaxErr(this.mxpos, this.pmatches); 292 | return null; 293 | } 294 | } -------------------------------------------------------------------------------- /src/test/indirect_leftrec_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | test.each([ 4 | "a", 5 | "aaaaaaaa", 6 | "bb", 7 | "bab", 8 | ])('%p', inp => { 9 | const res = parse(inp); 10 | expect(res.errs).toEqual([]); 11 | expect(res.ast).not.toBeNull(); 12 | }); 13 | -------------------------------------------------------------------------------- /src/test/kleene_test/grammar.peg: -------------------------------------------------------------------------------- 1 | spec := { 'a'* 'b'* }* -------------------------------------------------------------------------------- /src/test/kleene_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * spec := { 'a'* 'b'* }* 4 | */ 5 | type Nullable = T | null; 6 | type $$RuleType = () => Nullable; 7 | export interface ASTNodeIntf { 8 | kind: ASTKinds; 9 | } 10 | export enum ASTKinds { 11 | spec = "spec", 12 | spec_$0 = "spec_$0", 13 | } 14 | export type spec = spec_$0[]; 15 | export interface spec_$0 { 16 | kind: ASTKinds.spec_$0; 17 | } 18 | export class Parser { 19 | private readonly input: string; 20 | private pos: PosInfo; 21 | private negating: boolean = false; 22 | private memoSafe: boolean = true; 23 | constructor(input: string) { 24 | this.pos = {overallPos: 0, line: 1, offset: 0}; 25 | this.input = input; 26 | } 27 | public reset(pos: PosInfo) { 28 | this.pos = pos; 29 | } 30 | public finished(): boolean { 31 | return this.pos.overallPos === this.input.length; 32 | } 33 | public clearMemos(): void { 34 | } 35 | public matchspec($$dpth: number, $$cr?: ErrorTracker): Nullable { 36 | return this.loop(() => this.matchspec_$0($$dpth + 1, $$cr), 0, -1); 37 | } 38 | public matchspec_$0($$dpth: number, $$cr?: ErrorTracker): Nullable { 39 | return this.run($$dpth, 40 | () => { 41 | let $$res: Nullable = null; 42 | if (true 43 | && this.loop(() => this.regexAccept(String.raw`(?:a)`, "", $$dpth + 1, $$cr), 0, -1) !== null 44 | && this.loop(() => this.regexAccept(String.raw`(?:b)`, "", $$dpth + 1, $$cr), 0, -1) !== null 45 | ) { 46 | $$res = {kind: ASTKinds.spec_$0, }; 47 | } 48 | return $$res; 49 | }); 50 | } 51 | public test(): boolean { 52 | const mrk = this.mark(); 53 | const res = this.matchspec(0); 54 | const ans = res !== null; 55 | this.reset(mrk); 56 | return ans; 57 | } 58 | public parse(): ParseResult { 59 | const mrk = this.mark(); 60 | const res = this.matchspec(0); 61 | if (res) 62 | return {ast: res, errs: []}; 63 | this.reset(mrk); 64 | const rec = new ErrorTracker(); 65 | this.clearMemos(); 66 | this.matchspec(0, rec); 67 | const err = rec.getErr() 68 | return {ast: res, errs: err !== null ? [err] : []} 69 | } 70 | public mark(): PosInfo { 71 | return this.pos; 72 | } 73 | // @ts-ignore: loopPlus may not be called 74 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 75 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 76 | } 77 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 78 | const mrk = this.mark(); 79 | const res: T[] = []; 80 | while (ub === -1 || res.length < ub) { 81 | const preMrk = this.mark(); 82 | const t = func(); 83 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 84 | break; 85 | } 86 | res.push(t); 87 | } 88 | if (res.length >= lb) { 89 | return res; 90 | } 91 | this.reset(mrk); 92 | return null; 93 | } 94 | private run($$dpth: number, fn: $$RuleType): Nullable { 95 | const mrk = this.mark(); 96 | const res = fn() 97 | if (res !== null) 98 | return res; 99 | this.reset(mrk); 100 | return null; 101 | } 102 | // @ts-ignore: choice may not be called 103 | private choice(fns: Array<$$RuleType>): Nullable { 104 | for (const f of fns) { 105 | const res = f(); 106 | if (res !== null) { 107 | return res; 108 | } 109 | } 110 | return null; 111 | } 112 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 113 | return this.run(dpth, 114 | () => { 115 | const reg = new RegExp(match, "y" + mods); 116 | const mrk = this.mark(); 117 | reg.lastIndex = mrk.overallPos; 118 | const res = this.tryConsume(reg); 119 | if(cr) { 120 | cr.record(mrk, res, { 121 | kind: "RegexMatch", 122 | // We substring from 3 to len - 1 to strip off the 123 | // non-capture group syntax added as a WebKit workaround 124 | literal: match.substring(3, match.length - 1), 125 | negated: this.negating, 126 | }); 127 | } 128 | return res; 129 | }); 130 | } 131 | private tryConsume(reg: RegExp): Nullable { 132 | const res = reg.exec(this.input); 133 | if (res) { 134 | let lineJmp = 0; 135 | let lind = -1; 136 | for (let i = 0; i < res[0].length; ++i) { 137 | if (res[0][i] === "\n") { 138 | ++lineJmp; 139 | lind = i; 140 | } 141 | } 142 | this.pos = { 143 | overallPos: reg.lastIndex, 144 | line: this.pos.line + lineJmp, 145 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 146 | }; 147 | return res[0]; 148 | } 149 | return null; 150 | } 151 | // @ts-ignore: noConsume may not be called 152 | private noConsume(fn: $$RuleType): Nullable { 153 | const mrk = this.mark(); 154 | const res = fn(); 155 | this.reset(mrk); 156 | return res; 157 | } 158 | // @ts-ignore: negate may not be called 159 | private negate(fn: $$RuleType): Nullable { 160 | const mrk = this.mark(); 161 | const oneg = this.negating; 162 | this.negating = !oneg; 163 | const res = fn(); 164 | this.negating = oneg; 165 | this.reset(mrk); 166 | return res === null ? true : null; 167 | } 168 | // @ts-ignore: Memoise may not be used 169 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 170 | const $scope$pos = this.mark(); 171 | const $scope$memoRes = memo.get($scope$pos.overallPos); 172 | if(this.memoSafe && $scope$memoRes !== undefined) { 173 | this.reset($scope$memoRes[1]); 174 | return $scope$memoRes[0]; 175 | } 176 | const $scope$result = rule(); 177 | if(this.memoSafe) 178 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 179 | return $scope$result; 180 | } 181 | } 182 | export function parse(s: string): ParseResult { 183 | const p = new Parser(s); 184 | return p.parse(); 185 | } 186 | export interface ParseResult { 187 | ast: Nullable; 188 | errs: SyntaxErr[]; 189 | } 190 | export interface PosInfo { 191 | readonly overallPos: number; 192 | readonly line: number; 193 | readonly offset: number; 194 | } 195 | export interface RegexMatch { 196 | readonly kind: "RegexMatch"; 197 | readonly negated: boolean; 198 | readonly literal: string; 199 | } 200 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 201 | export type MatchAttempt = RegexMatch | EOFMatch; 202 | export class SyntaxErr { 203 | public pos: PosInfo; 204 | public expmatches: MatchAttempt[]; 205 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 206 | this.pos = pos; 207 | this.expmatches = [...expmatches]; 208 | } 209 | public toString(): string { 210 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 211 | } 212 | } 213 | class ErrorTracker { 214 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 215 | private regexset: Set = new Set(); 216 | private pmatches: MatchAttempt[] = []; 217 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 218 | if ((result === null) === att.negated) 219 | return; 220 | if (pos.overallPos > this.mxpos.overallPos) { 221 | this.mxpos = pos; 222 | this.pmatches = []; 223 | this.regexset.clear() 224 | } 225 | if (this.mxpos.overallPos === pos.overallPos) { 226 | if(att.kind === "RegexMatch") { 227 | if(!this.regexset.has(att.literal)) 228 | this.pmatches.push(att); 229 | this.regexset.add(att.literal); 230 | } else { 231 | this.pmatches.push(att); 232 | } 233 | } 234 | } 235 | public getErr(): SyntaxErr | null { 236 | if (this.mxpos.overallPos !== -1) 237 | return new SyntaxErr(this.mxpos, this.pmatches); 238 | return null; 239 | } 240 | } -------------------------------------------------------------------------------- /src/test/kleene_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | describe('nested kleene stars', () => { 4 | 5 | test('will terminate', () => { 6 | expect(parse('aaa').errs).toEqual([]); 7 | expect(parse('').errs).toEqual([]); 8 | }); 9 | }); 10 | -------------------------------------------------------------------------------- /src/test/leftrec.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "../meta"; 2 | import { Generator } from "../gen"; 3 | import { getRuleFromGram } from "../util"; 4 | import { disjointCycleSets, getRulesToMarkForBoundedRecursion, leftRecCycles, leftRecRules, nullableAtomSet, ruleIsNullableInCtx } from "../leftrec"; 5 | 6 | describe("test left recursion detection", () => { 7 | const tcs: {inp: string, hasLeftRec: boolean, cycles: string[][]}[] = [ 8 | { // no left recursion simple 9 | inp: "test := 'test'", 10 | hasLeftRec: false, 11 | cycles: [], 12 | }, 13 | { // no left recursion, does recurse 14 | inp: "test := 'test' test", 15 | hasLeftRec: false, 16 | cycles: [], 17 | }, 18 | { // direct left recursion in first alt 19 | inp: "test := test", 20 | hasLeftRec: true, 21 | cycles: [["test"]], 22 | }, 23 | { // direct left recursion not first alt 24 | inp: "test := not_test | { 'not_test' | test | not_test }", 25 | hasLeftRec: true, 26 | cycles: [["test"]], 27 | }, 28 | { // direct left recursion multiple match sequence 29 | inp: "test := test 'test'", 30 | hasLeftRec: true, 31 | cycles: [["test"]], 32 | }, 33 | { // indirect left recursion 34 | inp: ` 35 | test := other 36 | other := test`, 37 | hasLeftRec: true, 38 | cycles: [["test", "other"]], 39 | }, 40 | { // indirect left recursion, other rules also recurse 41 | inp: ` 42 | test := other 43 | other := other | test`, 44 | hasLeftRec: true, 45 | cycles: [["other"], ["test", "other"]], 46 | }, 47 | { // Nullable prefix, left recurses 48 | inp: "test := 'a?' test", 49 | hasLeftRec: true, 50 | cycles: [["test"]], 51 | }, 52 | { // Nullable prefix, doesn't recurse 53 | inp: "test := 'a?' 'b'", 54 | hasLeftRec: false, 55 | cycles: [], 56 | }, 57 | { // Long nullable prefix, recurses 58 | inp: ` 59 | test := nullme nullme 'a*' nonnull? indirect 60 | nullme := '(optional)?' '(also optional)?' 61 | nonnull := 'not optional' 62 | indirect := nullme test`, 63 | hasLeftRec: true, 64 | cycles: [["test", "indirect"]], 65 | }, 66 | { // Multi stage indirect left recurse 67 | inp: ` 68 | test := b 69 | b := { c } 70 | c := d 71 | d := e 72 | e := { f } 73 | f := g 74 | g := { h } 75 | h := test`, 76 | hasLeftRec: true, 77 | cycles: [["test", "b", "c", "d", "e", "f", "g", "h"]], 78 | }, 79 | ]; 80 | for(const tc of tcs) { 81 | test(`inp: ${tc.inp}`, () => { 82 | const res = parse(tc.inp); 83 | expect(res.errs).toEqual([]); 84 | expect(res.ast).not.toBeNull(); 85 | const g = new Generator(tc.inp); 86 | const leftRecs = leftRecRules(g.unexpandedGram); 87 | expect(leftRecs.has("test")).toEqual(tc.hasLeftRec); 88 | 89 | const atoms = nullableAtomSet(g.unexpandedGram); 90 | const cycles = leftRecCycles(g.unexpandedGram, atoms); 91 | expect(cycles.sort()).toEqual(tc.cycles.sort()); 92 | 93 | // Ensure only one rule per cycle is marked 94 | const marked = getRulesToMarkForBoundedRecursion(g.unexpandedGram); 95 | for(const cyc of cycles) { 96 | const cnt = cyc.filter(x => marked.has(x)).length; 97 | expect(cnt).toEqual(1); 98 | } 99 | }); 100 | } 101 | }); 102 | 103 | test("test nullable rule detection", () => { 104 | const tcs: {inp: string, nullableRules: string[]}[] = [ 105 | { 106 | inp: "test := ''", 107 | nullableRules: ["test"], 108 | }, 109 | { 110 | inp: "test := 'a?'", 111 | nullableRules: ["test"], 112 | }, 113 | { 114 | inp: "test := 'a'", 115 | nullableRules: [], 116 | }, 117 | { 118 | inp: "test := 'a'?", 119 | nullableRules: ["test"], 120 | }, 121 | { 122 | inp: "test := 'a'*", 123 | nullableRules: ["test"], 124 | }, 125 | { 126 | inp: ` 127 | a := 'a*' 128 | b := 'b*' 129 | c := a | b 130 | d := a b c`, 131 | nullableRules: ["a", "b", "c", "d"], 132 | }, 133 | { 134 | inp: ` 135 | a := 'a*' 136 | b := a | 'b' 137 | c := 'c'`, 138 | nullableRules: ["a", "b"], 139 | }, 140 | { 141 | inp: ` 142 | a := a | ''`, 143 | nullableRules: ["a"], 144 | }, 145 | ]; 146 | for(const tc of tcs) { 147 | const res = parse(tc.inp); 148 | expect(res.errs).toEqual([]); 149 | expect(res.ast).not.toBeNull(); 150 | const gram = new Generator(tc.inp).unexpandedGram; 151 | const atoms = nullableAtomSet(gram); 152 | for(const rule of tc.nullableRules) 153 | expect(ruleIsNullableInCtx(getRuleFromGram(gram, rule)!.rule, atoms)).toEqual(true); 154 | } 155 | }); 156 | 157 | test("test disjointCycleSets", () => { 158 | const tcs: {cycles: string[][], sets: string[][][]}[] = [ 159 | { 160 | cycles: [["a", "b"], ["b", "c"], ["c", "d"]], 161 | sets: [[["a", "b"], ["b", "c"], ["c", "d"]]], 162 | }, 163 | { 164 | cycles: [["a", "b"], ["c", "d"], ["e", "f"]], 165 | sets: [[["a", "b"]], [["c", "d"]], [["e", "f"]]], 166 | }, 167 | { 168 | cycles: [["a", "b", "c"], ["b", "d"], ["e", "f"]], 169 | sets: [[["a", "b", "c"], ["b", "d"]], [["e", "f"]]], 170 | }, 171 | { 172 | cycles: [["a", "b", "c"], ["b", "c"], ["b", "d"], ["e", "f"], ["e"]], 173 | sets: [[["a", "b", "c"], ["b", "c"], ["b", "d"]], [["e", "f"], ["e"]]], 174 | }, 175 | ]; 176 | for(const tc of tcs) { 177 | const sets = disjointCycleSets(tc.cycles); 178 | expect(sets.sort()).toEqual(tc.sets.sort()); 179 | } 180 | }); 181 | -------------------------------------------------------------------------------- /src/test/memo_test/flags.txt: -------------------------------------------------------------------------------- 1 | --enable-memo=true 2 | -------------------------------------------------------------------------------- /src/test/memo_test/grammar.peg: -------------------------------------------------------------------------------- 1 | S := expr=E0 _ $ 2 | .value = number { return this.expr.value; } 3 | 4 | E0 := a=E1 _ op='\+|-' b=E0 5 | .value=number { 6 | return this.op === "+" 7 | ? this.a.value + this.b.value 8 | : this.a.value - this.b.value; 9 | } 10 | | E1 11 | 12 | E1 := a=ATOM _ op='\*|\/' b=E1 13 | .value=number { 14 | return this.op === "*" 15 | ? this.a.value * this.b.value 16 | : this.a.value / this.b.value; 17 | } 18 | | ATOM 19 | 20 | ATOM := _ val=INT 21 | .value=number { return this.val.value; } 22 | | _ '\(' val=E0 _ '\)' 23 | .value=number { return this.val.value; } 24 | INT := val='[0-9]+' 25 | .value=number { return parseInt(this.val); } 26 | _ := '\s*' 27 | -------------------------------------------------------------------------------- /src/test/memo_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { Parser, PosInfo } from './parser'; 2 | 3 | // ExposedParser exposes the memo tables as public for testing. 4 | class ExposedParser extends Parser { 5 | public getSmemo(): Map { 6 | return this.$scope$S$memo; 7 | } 8 | public getE0memo(): Map { 9 | return this.$scope$E0$memo; 10 | } 11 | public getE1memo(): Map { 12 | return this.$scope$E1$memo; 13 | } 14 | public getATOMmemo(): Map { 15 | return this.$scope$ATOM$memo; 16 | } 17 | public getINTmemo(): Map { 18 | return this.$scope$INT$memo; 19 | } 20 | public get_memo(): Map { 21 | return this.$scope$_$memo; 22 | } 23 | } 24 | 25 | // This is a bad test, it is entirely based on testing implementation 26 | // details. Easily broken if implementation changes, oh well. 27 | 28 | test.each([ 29 | "100", 30 | " 100 ", 31 | "50 * 2", 32 | "50 + 50", 33 | "100 + 50 + (100 - 50 * 3)", 34 | ])('test parse + memo works: %s', input => { 35 | // Strategy here is to parse input, make a copy of all memo tables 36 | // then for each entry in a memo table, clear the memo in the parser, 37 | // try the parse from scratch from the start position, and verify the 38 | // result is the same as the cached value. 39 | 40 | const prsr = new ExposedParser(input); 41 | // using match instead of parse to avoid memo reset. 42 | const res = prsr.matchS(0); 43 | expect(res).not.toBeNull(); 44 | expect(res!.value).toEqual(100); 45 | 46 | // Using ts-ignore to access private fields. 47 | const memosAndFns: [[number, [unknown, PosInfo]][], () => unknown][] = [ 48 | [[...prsr.getSmemo()], () => prsr.matchS(0)], 49 | [[...prsr.getE0memo()], () => prsr.matchE0(0)], 50 | [[...prsr.getE1memo()], () => prsr.matchE1(0)], 51 | [[...prsr.getATOMmemo()], () => prsr.matchATOM(0)], 52 | [[...prsr.getINTmemo()], () => prsr.matchINT(0)], 53 | [[...prsr.get_memo()], () => prsr.match_(0)], 54 | ]; 55 | 56 | for(const [ls, fn] of memosAndFns) { 57 | for(const [start, [exp, end]] of ls) { 58 | prsr.clearMemos(); 59 | 60 | // Abuse of implementation to reset position, line and 61 | // offset values will be wrong. 62 | prsr.reset({ overallPos: start, line: 1, offset: 0}); 63 | 64 | const got = fn(); 65 | expect(got).toEqual(exp); 66 | // We only check overallPos because of above abuse 67 | // of implementation. overallPos is the source of truth. 68 | expect(prsr.mark().overallPos).toEqual(end.overallPos); 69 | } 70 | } 71 | }); 72 | -------------------------------------------------------------------------------- /src/test/memo_time_test/flags.txt: -------------------------------------------------------------------------------- 1 | --enable-memo=true 2 | -------------------------------------------------------------------------------- /src/test/memo_time_test/grammar.peg: -------------------------------------------------------------------------------- 1 | E0 := a=E1 op='a|b' b=E0 .value=number { return this.a.value + this.b.value } 2 | | E1 3 | 4 | E1 := a=E2 op='c|d' b=E1 .value=number { return this.a.value + this.b.value } 5 | | E2 6 | 7 | E2 := a=E3 op='e|f' b=E2 .value=number { return this.a.value + this.b.value } 8 | | E3 9 | 10 | E3 := a=E4 op='g|h' b=E3 .value=number { return this.a.value + this.b.value } 11 | | E4 12 | 13 | E4 := a=ATOM op='i|j' b=E4 .value=number { return this.a.value + this.b.value } 14 | | ATOM 15 | 16 | ATOM := val=INT 17 | .value=number { return this.val.value; } 18 | | '\(' val=E0 '\)' 19 | .value=number { return this.val.value; } 20 | INT := val='[0-9]+' 21 | .value=number { return parseInt(this.val); } 22 | -------------------------------------------------------------------------------- /src/test/memo_time_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | // With memo working, this parse is trivial and completes instantly. 4 | // E(time) < 5ms 5 | // If memo isn't working, it takes exponentially longer for each bracket 6 | // E(time) > 200ms. 7 | // We fail if the time taken is > 50ms 8 | test("parse completes in time", () => { 9 | const start = new Date(); 10 | const res = parse('((100))'); 11 | expect(res.errs.length).toEqual(0); 12 | expect(res.ast).not.toBeNull(); 13 | expect(res.ast!.value).toEqual(100); 14 | const end = new Date(); 15 | expect(end.getTime() - start.getTime()).toBeLessThan(50); 16 | }); 17 | -------------------------------------------------------------------------------- /src/test/multiline_test/grammar.peg: -------------------------------------------------------------------------------- 1 | start := '^line 1$'m '.'s '^line 2$'m 2 | -------------------------------------------------------------------------------- /src/test/multiline_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * start := '^line 1$'m '.'s '^line 2$'m 4 | */ 5 | type Nullable = T | null; 6 | type $$RuleType = () => Nullable; 7 | export interface ASTNodeIntf { 8 | kind: ASTKinds; 9 | } 10 | export enum ASTKinds { 11 | start = "start", 12 | } 13 | export interface start { 14 | kind: ASTKinds.start; 15 | } 16 | export class Parser { 17 | private readonly input: string; 18 | private pos: PosInfo; 19 | private negating: boolean = false; 20 | private memoSafe: boolean = true; 21 | constructor(input: string) { 22 | this.pos = {overallPos: 0, line: 1, offset: 0}; 23 | this.input = input; 24 | } 25 | public reset(pos: PosInfo) { 26 | this.pos = pos; 27 | } 28 | public finished(): boolean { 29 | return this.pos.overallPos === this.input.length; 30 | } 31 | public clearMemos(): void { 32 | } 33 | public matchstart($$dpth: number, $$cr?: ErrorTracker): Nullable { 34 | return this.run($$dpth, 35 | () => { 36 | let $$res: Nullable = null; 37 | if (true 38 | && this.regexAccept(String.raw`(?:^line 1$)`, "m", $$dpth + 1, $$cr) !== null 39 | && this.regexAccept(String.raw`(?:.)`, "s", $$dpth + 1, $$cr) !== null 40 | && this.regexAccept(String.raw`(?:^line 2$)`, "m", $$dpth + 1, $$cr) !== null 41 | ) { 42 | $$res = {kind: ASTKinds.start, }; 43 | } 44 | return $$res; 45 | }); 46 | } 47 | public test(): boolean { 48 | const mrk = this.mark(); 49 | const res = this.matchstart(0); 50 | const ans = res !== null; 51 | this.reset(mrk); 52 | return ans; 53 | } 54 | public parse(): ParseResult { 55 | const mrk = this.mark(); 56 | const res = this.matchstart(0); 57 | if (res) 58 | return {ast: res, errs: []}; 59 | this.reset(mrk); 60 | const rec = new ErrorTracker(); 61 | this.clearMemos(); 62 | this.matchstart(0, rec); 63 | const err = rec.getErr() 64 | return {ast: res, errs: err !== null ? [err] : []} 65 | } 66 | public mark(): PosInfo { 67 | return this.pos; 68 | } 69 | // @ts-ignore: loopPlus may not be called 70 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 71 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 72 | } 73 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 74 | const mrk = this.mark(); 75 | const res: T[] = []; 76 | while (ub === -1 || res.length < ub) { 77 | const preMrk = this.mark(); 78 | const t = func(); 79 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 80 | break; 81 | } 82 | res.push(t); 83 | } 84 | if (res.length >= lb) { 85 | return res; 86 | } 87 | this.reset(mrk); 88 | return null; 89 | } 90 | private run($$dpth: number, fn: $$RuleType): Nullable { 91 | const mrk = this.mark(); 92 | const res = fn() 93 | if (res !== null) 94 | return res; 95 | this.reset(mrk); 96 | return null; 97 | } 98 | // @ts-ignore: choice may not be called 99 | private choice(fns: Array<$$RuleType>): Nullable { 100 | for (const f of fns) { 101 | const res = f(); 102 | if (res !== null) { 103 | return res; 104 | } 105 | } 106 | return null; 107 | } 108 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 109 | return this.run(dpth, 110 | () => { 111 | const reg = new RegExp(match, "y" + mods); 112 | const mrk = this.mark(); 113 | reg.lastIndex = mrk.overallPos; 114 | const res = this.tryConsume(reg); 115 | if(cr) { 116 | cr.record(mrk, res, { 117 | kind: "RegexMatch", 118 | // We substring from 3 to len - 1 to strip off the 119 | // non-capture group syntax added as a WebKit workaround 120 | literal: match.substring(3, match.length - 1), 121 | negated: this.negating, 122 | }); 123 | } 124 | return res; 125 | }); 126 | } 127 | private tryConsume(reg: RegExp): Nullable { 128 | const res = reg.exec(this.input); 129 | if (res) { 130 | let lineJmp = 0; 131 | let lind = -1; 132 | for (let i = 0; i < res[0].length; ++i) { 133 | if (res[0][i] === "\n") { 134 | ++lineJmp; 135 | lind = i; 136 | } 137 | } 138 | this.pos = { 139 | overallPos: reg.lastIndex, 140 | line: this.pos.line + lineJmp, 141 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 142 | }; 143 | return res[0]; 144 | } 145 | return null; 146 | } 147 | // @ts-ignore: noConsume may not be called 148 | private noConsume(fn: $$RuleType): Nullable { 149 | const mrk = this.mark(); 150 | const res = fn(); 151 | this.reset(mrk); 152 | return res; 153 | } 154 | // @ts-ignore: negate may not be called 155 | private negate(fn: $$RuleType): Nullable { 156 | const mrk = this.mark(); 157 | const oneg = this.negating; 158 | this.negating = !oneg; 159 | const res = fn(); 160 | this.negating = oneg; 161 | this.reset(mrk); 162 | return res === null ? true : null; 163 | } 164 | // @ts-ignore: Memoise may not be used 165 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 166 | const $scope$pos = this.mark(); 167 | const $scope$memoRes = memo.get($scope$pos.overallPos); 168 | if(this.memoSafe && $scope$memoRes !== undefined) { 169 | this.reset($scope$memoRes[1]); 170 | return $scope$memoRes[0]; 171 | } 172 | const $scope$result = rule(); 173 | if(this.memoSafe) 174 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 175 | return $scope$result; 176 | } 177 | } 178 | export function parse(s: string): ParseResult { 179 | const p = new Parser(s); 180 | return p.parse(); 181 | } 182 | export interface ParseResult { 183 | ast: Nullable; 184 | errs: SyntaxErr[]; 185 | } 186 | export interface PosInfo { 187 | readonly overallPos: number; 188 | readonly line: number; 189 | readonly offset: number; 190 | } 191 | export interface RegexMatch { 192 | readonly kind: "RegexMatch"; 193 | readonly negated: boolean; 194 | readonly literal: string; 195 | } 196 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 197 | export type MatchAttempt = RegexMatch | EOFMatch; 198 | export class SyntaxErr { 199 | public pos: PosInfo; 200 | public expmatches: MatchAttempt[]; 201 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 202 | this.pos = pos; 203 | this.expmatches = [...expmatches]; 204 | } 205 | public toString(): string { 206 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 207 | } 208 | } 209 | class ErrorTracker { 210 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 211 | private regexset: Set = new Set(); 212 | private pmatches: MatchAttempt[] = []; 213 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 214 | if ((result === null) === att.negated) 215 | return; 216 | if (pos.overallPos > this.mxpos.overallPos) { 217 | this.mxpos = pos; 218 | this.pmatches = []; 219 | this.regexset.clear() 220 | } 221 | if (this.mxpos.overallPos === pos.overallPos) { 222 | if(att.kind === "RegexMatch") { 223 | if(!this.regexset.has(att.literal)) 224 | this.pmatches.push(att); 225 | this.regexset.add(att.literal); 226 | } else { 227 | this.pmatches.push(att); 228 | } 229 | } 230 | } 231 | public getErr(): SyntaxErr | null { 232 | if (this.mxpos.overallPos !== -1) 233 | return new SyntaxErr(this.mxpos, this.pmatches); 234 | return null; 235 | } 236 | } -------------------------------------------------------------------------------- /src/test/multiline_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | test(`Multiline test: Positive`, () => { 4 | const res = parse(`line 1 5 | line 2`); 6 | expect(res.errs).toEqual([]); 7 | expect(res.ast).not.toBeNull(); 8 | }); 9 | 10 | test(`Multiline test: Negative`, () => { 11 | const res = parse("line 1 line 2"); 12 | expect(res.ast).toBeNull(); 13 | expect(res.errs).not.toHaveLength(0); 14 | }); 15 | -------------------------------------------------------------------------------- /src/test/muse/grammar.peg: -------------------------------------------------------------------------------- 1 | Program := Melody* _ 2 | Melody := _ 'melody' _ KID {_ KID}* _ 'start' 3 | Stmt* 4 | _ 'end' 5 | Stmt := KeyStmt | AssignStmt | ForStmt | IfStmt 6 | KeyStmt := _ Funcs _ Expr 7 | AssignStmt := _ KID _ '=' _ Expr 8 | ForStmt := _ 'for' _ KID _ 'from' _ Expr _ 'to' _ Expr 9 | Stmt* 10 | _ 'end' 11 | IfStmt := _ 'if' _ Expr _ 'then' 12 | Stmt* 13 | { _ 'end' | _ 'else' Stmt* _ 'end' } 14 | FuncExpr := Expr { _ Expr ' '* !'\n' }* 15 | Expr := Eq 16 | Eq := _ Comp { _ '==' Comp }* 17 | Comp := _ Sum { _ Compare _ Sum }* 18 | Sum := _ Product { _ PlusMinus _ Product }* 19 | Product := _ Atom { _ MulDiv _ Atom }* 20 | Atom := NoteLit | KID | INT | '\(' FuncExpr _ '\)' 21 | PlusMinus := '\+' | '-' 22 | MulDiv := '\*' | '\/' | '%' 23 | Compare := '<=' | '>=' | '<' | '>' 24 | NoteLit := '[A-G][#b]?\d' 25 | 26 | INT := '[0-9]+' 27 | 28 | Keyword := 'start' | 'end' | 'for' | 'else' | 'if' | 'then' 29 | Funcs := 'play' | 'wait' 30 | 31 | KID := !Keyword ID 32 | 33 | ID := '[a-zA-Z_]+' 34 | 35 | _ := '\s'* 36 | -------------------------------------------------------------------------------- /src/test/muse/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | // Just ensure they all parse 4 | 5 | describe("test parser", () => { 6 | interface TestCase { inp: string } 7 | const tcs: TestCase[] = [ 8 | { 9 | inp: `melody main start 10 | play (four cmaj) 11 | play (four gmaj) 12 | play (four dmaj) 13 | play (four emin) 14 | end 15 | 16 | melody cmaj start 17 | play (overlay (Piano C4) (Piano E4) (Piano G4)) 18 | end 19 | 20 | melody gmaj start 21 | play (overlay (Piano G4) (Piano B4) (Piano D4)) 22 | end 23 | 24 | melody dmaj start 25 | play (overlay (Piano D4) (Piano Gb4) (Piano A4)) 26 | end 27 | 28 | melody emin start 29 | play (overlay (Piano E4) (Piano G4) (Piano B4)) 30 | end 31 | 32 | melody four x start 33 | play (repeat x 4) 34 | end`, 35 | }, 36 | { 37 | inp: `melody main start 38 | play (overlay progression drumBeat) 39 | end 40 | 41 | melody progression start 42 | play (four cmaj) 43 | play (four gmaj) 44 | play (four dmaj) 45 | play (four emin) 46 | end 47 | 48 | melody drumBeat start 49 | for i from 1 to 16 50 | if i % 4 == 0 then 51 | play Kick 52 | else if i % 2 == 0 then 53 | play Snare 54 | else 55 | wait 1 56 | end end 57 | end 58 | end 59 | 60 | melody four x start 61 | play (repeat x 4) 62 | end 63 | 64 | melody cmaj start 65 | play (overlay (Piano C4) (Piano E4) (Piano G4)) 66 | end 67 | 68 | melody gmaj start 69 | play (overlay (Piano G4) (Piano B4) (Piano D4)) 70 | end 71 | 72 | melody dmaj start 73 | play (overlay (Piano D4) (Piano Gb4) (Piano A4)) 74 | end 75 | 76 | melody emin start 77 | play (overlay (Piano E4) (Piano G4) (Piano B4)) 78 | end`, 79 | }, 80 | { 81 | inp: `melody a start 82 | play (fib 9) 83 | end 84 | 85 | melody fib n start 86 | a = 0 87 | b = 1 88 | for i from 1 to n 89 | play (Sine 220 + 20 * a) 90 | b = a + b 91 | a = b - a 92 | end 93 | end`, 94 | }, 95 | { 96 | inp: `melody a start 97 | play (fib 9) 98 | end 99 | 100 | melody fib n start 101 | a = 0 102 | b = 1 103 | for i from 1 to n 104 | play (Sine 220 + 20 * a) 105 | b = a + b 106 | a = b - a 107 | end 108 | end`, 109 | }, 110 | { 111 | inp: `melody A start 112 | play (Fib 10 0 1) 113 | end 114 | 115 | melody Fib n a b start 116 | if n then 117 | play (Sine 220 + a) 118 | play (Fib (n-1) (a+b) a) 119 | end 120 | end`, 121 | }, 122 | { 123 | inp: `melody a start 124 | play (overlay b c d) 125 | end 126 | 127 | melody b start 128 | play (Piano C4) 129 | play (Piano G4) 130 | play (Piano D4) 131 | end 132 | 133 | melody c start 134 | play (Piano E4) 135 | play (Piano B4) 136 | play (Piano Gb4) 137 | end 138 | 139 | melody d start 140 | play (Piano G4) 141 | play (Piano D4) 142 | play (Piano A4) 143 | end`, 144 | }, 145 | ]; 146 | for (let i = 0; i < tcs.length; ++i) { 147 | const tc = tcs[i]; 148 | test(`test ${i}`, () => { 149 | const res = parse(tc.inp); 150 | expect(res.errs).toEqual([]); 151 | expect(res.ast).not.toBeNull(); 152 | }); 153 | } 154 | }); 155 | -------------------------------------------------------------------------------- /src/test/pos_test/grammar.peg: -------------------------------------------------------------------------------- 1 | --- 2 | // This grammar matches balanced parentheses 3 | // Allowing for whitespace 4 | --- 5 | 6 | EXPR := _ strt=@ '\(' left=EXPR? '\)' end=@ right=EXPR? _ 7 | _ := '\s*' 8 | -------------------------------------------------------------------------------- /src/test/pos_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * --- 4 | * // This grammar matches balanced parentheses 5 | * // Allowing for whitespace 6 | * --- 7 | * EXPR := _ strt=@ '\(' left=EXPR? '\)' end=@ right=EXPR? _ 8 | * _ := '\s*' 9 | */ 10 | 11 | // This grammar matches balanced parentheses 12 | // Allowing for whitespace 13 | 14 | type Nullable = T | null; 15 | type $$RuleType = () => Nullable; 16 | export interface ASTNodeIntf { 17 | kind: ASTKinds; 18 | } 19 | export enum ASTKinds { 20 | EXPR = "EXPR", 21 | _ = "_", 22 | } 23 | export interface EXPR { 24 | kind: ASTKinds.EXPR; 25 | strt: PosInfo; 26 | left: Nullable; 27 | end: PosInfo; 28 | right: Nullable; 29 | } 30 | export type _ = string; 31 | export class Parser { 32 | private readonly input: string; 33 | private pos: PosInfo; 34 | private negating: boolean = false; 35 | private memoSafe: boolean = true; 36 | constructor(input: string) { 37 | this.pos = {overallPos: 0, line: 1, offset: 0}; 38 | this.input = input; 39 | } 40 | public reset(pos: PosInfo) { 41 | this.pos = pos; 42 | } 43 | public finished(): boolean { 44 | return this.pos.overallPos === this.input.length; 45 | } 46 | public clearMemos(): void { 47 | } 48 | public matchEXPR($$dpth: number, $$cr?: ErrorTracker): Nullable { 49 | return this.run($$dpth, 50 | () => { 51 | let $scope$strt: Nullable; 52 | let $scope$left: Nullable>; 53 | let $scope$end: Nullable; 54 | let $scope$right: Nullable>; 55 | let $$res: Nullable = null; 56 | if (true 57 | && this.match_($$dpth + 1, $$cr) !== null 58 | && ($scope$strt = this.mark()) !== null 59 | && this.regexAccept(String.raw`(?:\()`, "", $$dpth + 1, $$cr) !== null 60 | && (($scope$left = this.matchEXPR($$dpth + 1, $$cr)) || true) 61 | && this.regexAccept(String.raw`(?:\))`, "", $$dpth + 1, $$cr) !== null 62 | && ($scope$end = this.mark()) !== null 63 | && (($scope$right = this.matchEXPR($$dpth + 1, $$cr)) || true) 64 | && this.match_($$dpth + 1, $$cr) !== null 65 | ) { 66 | $$res = {kind: ASTKinds.EXPR, strt: $scope$strt, left: $scope$left, end: $scope$end, right: $scope$right}; 67 | } 68 | return $$res; 69 | }); 70 | } 71 | public match_($$dpth: number, $$cr?: ErrorTracker): Nullable<_> { 72 | return this.regexAccept(String.raw`(?:\s*)`, "", $$dpth + 1, $$cr); 73 | } 74 | public test(): boolean { 75 | const mrk = this.mark(); 76 | const res = this.matchEXPR(0); 77 | const ans = res !== null; 78 | this.reset(mrk); 79 | return ans; 80 | } 81 | public parse(): ParseResult { 82 | const mrk = this.mark(); 83 | const res = this.matchEXPR(0); 84 | if (res) 85 | return {ast: res, errs: []}; 86 | this.reset(mrk); 87 | const rec = new ErrorTracker(); 88 | this.clearMemos(); 89 | this.matchEXPR(0, rec); 90 | const err = rec.getErr() 91 | return {ast: res, errs: err !== null ? [err] : []} 92 | } 93 | public mark(): PosInfo { 94 | return this.pos; 95 | } 96 | // @ts-ignore: loopPlus may not be called 97 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 98 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 99 | } 100 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 101 | const mrk = this.mark(); 102 | const res: T[] = []; 103 | while (ub === -1 || res.length < ub) { 104 | const preMrk = this.mark(); 105 | const t = func(); 106 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 107 | break; 108 | } 109 | res.push(t); 110 | } 111 | if (res.length >= lb) { 112 | return res; 113 | } 114 | this.reset(mrk); 115 | return null; 116 | } 117 | private run($$dpth: number, fn: $$RuleType): Nullable { 118 | const mrk = this.mark(); 119 | const res = fn() 120 | if (res !== null) 121 | return res; 122 | this.reset(mrk); 123 | return null; 124 | } 125 | // @ts-ignore: choice may not be called 126 | private choice(fns: Array<$$RuleType>): Nullable { 127 | for (const f of fns) { 128 | const res = f(); 129 | if (res !== null) { 130 | return res; 131 | } 132 | } 133 | return null; 134 | } 135 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 136 | return this.run(dpth, 137 | () => { 138 | const reg = new RegExp(match, "y" + mods); 139 | const mrk = this.mark(); 140 | reg.lastIndex = mrk.overallPos; 141 | const res = this.tryConsume(reg); 142 | if(cr) { 143 | cr.record(mrk, res, { 144 | kind: "RegexMatch", 145 | // We substring from 3 to len - 1 to strip off the 146 | // non-capture group syntax added as a WebKit workaround 147 | literal: match.substring(3, match.length - 1), 148 | negated: this.negating, 149 | }); 150 | } 151 | return res; 152 | }); 153 | } 154 | private tryConsume(reg: RegExp): Nullable { 155 | const res = reg.exec(this.input); 156 | if (res) { 157 | let lineJmp = 0; 158 | let lind = -1; 159 | for (let i = 0; i < res[0].length; ++i) { 160 | if (res[0][i] === "\n") { 161 | ++lineJmp; 162 | lind = i; 163 | } 164 | } 165 | this.pos = { 166 | overallPos: reg.lastIndex, 167 | line: this.pos.line + lineJmp, 168 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 169 | }; 170 | return res[0]; 171 | } 172 | return null; 173 | } 174 | // @ts-ignore: noConsume may not be called 175 | private noConsume(fn: $$RuleType): Nullable { 176 | const mrk = this.mark(); 177 | const res = fn(); 178 | this.reset(mrk); 179 | return res; 180 | } 181 | // @ts-ignore: negate may not be called 182 | private negate(fn: $$RuleType): Nullable { 183 | const mrk = this.mark(); 184 | const oneg = this.negating; 185 | this.negating = !oneg; 186 | const res = fn(); 187 | this.negating = oneg; 188 | this.reset(mrk); 189 | return res === null ? true : null; 190 | } 191 | // @ts-ignore: Memoise may not be used 192 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 193 | const $scope$pos = this.mark(); 194 | const $scope$memoRes = memo.get($scope$pos.overallPos); 195 | if(this.memoSafe && $scope$memoRes !== undefined) { 196 | this.reset($scope$memoRes[1]); 197 | return $scope$memoRes[0]; 198 | } 199 | const $scope$result = rule(); 200 | if(this.memoSafe) 201 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 202 | return $scope$result; 203 | } 204 | } 205 | export function parse(s: string): ParseResult { 206 | const p = new Parser(s); 207 | return p.parse(); 208 | } 209 | export interface ParseResult { 210 | ast: Nullable; 211 | errs: SyntaxErr[]; 212 | } 213 | export interface PosInfo { 214 | readonly overallPos: number; 215 | readonly line: number; 216 | readonly offset: number; 217 | } 218 | export interface RegexMatch { 219 | readonly kind: "RegexMatch"; 220 | readonly negated: boolean; 221 | readonly literal: string; 222 | } 223 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 224 | export type MatchAttempt = RegexMatch | EOFMatch; 225 | export class SyntaxErr { 226 | public pos: PosInfo; 227 | public expmatches: MatchAttempt[]; 228 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 229 | this.pos = pos; 230 | this.expmatches = [...expmatches]; 231 | } 232 | public toString(): string { 233 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 234 | } 235 | } 236 | class ErrorTracker { 237 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 238 | private regexset: Set = new Set(); 239 | private pmatches: MatchAttempt[] = []; 240 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 241 | if ((result === null) === att.negated) 242 | return; 243 | if (pos.overallPos > this.mxpos.overallPos) { 244 | this.mxpos = pos; 245 | this.pmatches = []; 246 | this.regexset.clear() 247 | } 248 | if (this.mxpos.overallPos === pos.overallPos) { 249 | if(att.kind === "RegexMatch") { 250 | if(!this.regexset.has(att.literal)) 251 | this.pmatches.push(att); 252 | this.regexset.add(att.literal); 253 | } else { 254 | this.pmatches.push(att); 255 | } 256 | } 257 | } 258 | public getErr(): SyntaxErr | null { 259 | if (this.mxpos.overallPos !== -1) 260 | return new SyntaxErr(this.mxpos, this.pmatches); 261 | return null; 262 | } 263 | } -------------------------------------------------------------------------------- /src/test/pos_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { EXPR, PosInfo, parse } from "./parser"; 2 | 3 | function comp(a: number, b: number): number { 4 | if(a < b) 5 | return -1; 6 | if(a === b) 7 | return 0; 8 | return 1; 9 | } 10 | function posSort(a: PosInfo, b: PosInfo): number { 11 | if(a.overallPos !== b.overallPos) 12 | return comp(a.overallPos, b.overallPos); 13 | if(a.line !== b.line) 14 | return comp(a.line, b.line); 15 | return comp(a.offset, b.offset); 16 | } 17 | 18 | function traverse(e: EXPR): [PosInfo[], PosInfo[]] { 19 | const ret = [[e.strt], [e.end]] as [PosInfo[], PosInfo[]]; 20 | if (e.left) { 21 | const t = traverse(e.left); 22 | ret[0] = ret[0].concat(t[0]); 23 | ret[1] = ret[1].concat(t[1]); 24 | } 25 | if (e.right) { 26 | const t = traverse(e.right); 27 | ret[0] = ret[0].concat(t[0]); 28 | ret[1] = ret[1].concat(t[1]); 29 | } 30 | return ret; 31 | } 32 | 33 | describe("test positions", () => { 34 | interface TestCase { inp: string; starts: PosInfo[]; ends: PosInfo[] } 35 | const tcs: TestCase[] = [ 36 | { 37 | inp: "()", 38 | starts: [ 39 | { 40 | overallPos: 0, 41 | line: 1, 42 | offset: 0, 43 | }, 44 | ], 45 | ends: [ 46 | { 47 | overallPos: 2, 48 | line: 1, 49 | offset: 2, 50 | }, 51 | ], 52 | }, 53 | { 54 | inp: "()()()", 55 | starts: [ 56 | { 57 | overallPos: 0, 58 | line: 1, 59 | offset: 0, 60 | }, 61 | { 62 | overallPos: 2, 63 | line: 1, 64 | offset: 2, 65 | }, 66 | { 67 | overallPos: 4, 68 | line: 1, 69 | offset: 4, 70 | }, 71 | ], 72 | ends: [ 73 | { 74 | overallPos: 2, 75 | line: 1, 76 | offset: 2, 77 | }, 78 | { 79 | overallPos: 4, 80 | line: 1, 81 | offset: 4, 82 | }, 83 | { 84 | overallPos: 6, 85 | line: 1, 86 | offset: 6, 87 | }, 88 | ], 89 | }, 90 | { 91 | inp: "(()())", 92 | starts: [ 93 | { 94 | overallPos: 0, 95 | line: 1, 96 | offset: 0, 97 | }, 98 | { 99 | overallPos: 1, 100 | line: 1, 101 | offset: 1, 102 | }, 103 | { 104 | overallPos: 3, 105 | line: 1, 106 | offset: 3, 107 | }, 108 | ], 109 | ends: [ 110 | { 111 | overallPos: 3, 112 | line: 1, 113 | offset: 3, 114 | }, 115 | { 116 | overallPos: 5, 117 | line: 1, 118 | offset: 5, 119 | }, 120 | { 121 | overallPos: 6, 122 | line: 1, 123 | offset: 6, 124 | }, 125 | ], 126 | }, 127 | { 128 | inp: ` 129 | () 130 | 131 | ()`, 132 | starts: [ 133 | { 134 | overallPos: 1, 135 | line: 2, 136 | offset: 0, 137 | }, 138 | { 139 | overallPos: 6, 140 | line: 4, 141 | offset: 1, 142 | }, 143 | ], 144 | ends: [ 145 | { 146 | overallPos: 3, 147 | line: 2, 148 | offset: 2, 149 | }, 150 | { 151 | overallPos: 8, 152 | line: 4, 153 | offset: 3, 154 | }, 155 | ], 156 | }, 157 | ]; 158 | for (const tc of tcs) { 159 | test(`inp: ${tc.inp}`, () => { 160 | const res = parse(tc.inp); 161 | expect(res.errs).toEqual([]); 162 | expect(res.ast).not.toBeNull(); 163 | 164 | const [starts, ends] = traverse(res.ast!); 165 | 166 | expect(tc.starts.sort(posSort)).toEqual(starts.sort(posSort)); 167 | expect(tc.ends.sort(posSort)).toEqual(ends.sort(posSort)); 168 | }); 169 | } 170 | }); 171 | -------------------------------------------------------------------------------- /src/test/regex_flags_test/flags.txt: -------------------------------------------------------------------------------- 1 | --regex-flags=u 2 | -------------------------------------------------------------------------------- /src/test/regex_flags_test/grammar.peg: -------------------------------------------------------------------------------- 1 | test := '\p{Alpha}+' 2 | -------------------------------------------------------------------------------- /src/test/regex_flags_test/parser.ts: -------------------------------------------------------------------------------- 1 | /* AutoGenerated Code, changes may be overwritten 2 | * INPUT GRAMMAR: 3 | * test := '\p{Alpha}+' 4 | */ 5 | type Nullable = T | null; 6 | type $$RuleType = () => Nullable; 7 | export interface ASTNodeIntf { 8 | kind: ASTKinds; 9 | } 10 | export enum ASTKinds { 11 | test = "test", 12 | } 13 | export type test = string; 14 | export class Parser { 15 | private readonly input: string; 16 | private pos: PosInfo; 17 | private negating: boolean = false; 18 | private memoSafe: boolean = true; 19 | constructor(input: string) { 20 | this.pos = {overallPos: 0, line: 1, offset: 0}; 21 | this.input = input; 22 | } 23 | public reset(pos: PosInfo) { 24 | this.pos = pos; 25 | } 26 | public finished(): boolean { 27 | return this.pos.overallPos === this.input.length; 28 | } 29 | public clearMemos(): void { 30 | } 31 | public matchtest($$dpth: number, $$cr?: ErrorTracker): Nullable { 32 | return this.regexAccept(String.raw`(?:\p{Alpha}+)`, "", $$dpth + 1, $$cr); 33 | } 34 | public test(): boolean { 35 | const mrk = this.mark(); 36 | const res = this.matchtest(0); 37 | const ans = res !== null; 38 | this.reset(mrk); 39 | return ans; 40 | } 41 | public parse(): ParseResult { 42 | const mrk = this.mark(); 43 | const res = this.matchtest(0); 44 | if (res) 45 | return {ast: res, errs: []}; 46 | this.reset(mrk); 47 | const rec = new ErrorTracker(); 48 | this.clearMemos(); 49 | this.matchtest(0, rec); 50 | const err = rec.getErr() 51 | return {ast: res, errs: err !== null ? [err] : []} 52 | } 53 | public mark(): PosInfo { 54 | return this.pos; 55 | } 56 | // @ts-ignore: loopPlus may not be called 57 | private loopPlus(func: $$RuleType): Nullable<[T, ...T[]]> { 58 | return this.loop(func, 1, -1) as Nullable<[T, ...T[]]>; 59 | } 60 | private loop(func: $$RuleType, lb: number, ub: number): Nullable { 61 | const mrk = this.mark(); 62 | const res: T[] = []; 63 | while (ub === -1 || res.length < ub) { 64 | const preMrk = this.mark(); 65 | const t = func(); 66 | if (t === null || this.pos.overallPos === preMrk.overallPos) { 67 | break; 68 | } 69 | res.push(t); 70 | } 71 | if (res.length >= lb) { 72 | return res; 73 | } 74 | this.reset(mrk); 75 | return null; 76 | } 77 | private run($$dpth: number, fn: $$RuleType): Nullable { 78 | const mrk = this.mark(); 79 | const res = fn() 80 | if (res !== null) 81 | return res; 82 | this.reset(mrk); 83 | return null; 84 | } 85 | // @ts-ignore: choice may not be called 86 | private choice(fns: Array<$$RuleType>): Nullable { 87 | for (const f of fns) { 88 | const res = f(); 89 | if (res !== null) { 90 | return res; 91 | } 92 | } 93 | return null; 94 | } 95 | private regexAccept(match: string, mods: string, dpth: number, cr?: ErrorTracker): Nullable { 96 | return this.run(dpth, 97 | () => { 98 | const reg = new RegExp(match, "yu" + mods); 99 | const mrk = this.mark(); 100 | reg.lastIndex = mrk.overallPos; 101 | const res = this.tryConsume(reg); 102 | if(cr) { 103 | cr.record(mrk, res, { 104 | kind: "RegexMatch", 105 | // We substring from 3 to len - 1 to strip off the 106 | // non-capture group syntax added as a WebKit workaround 107 | literal: match.substring(3, match.length - 1), 108 | negated: this.negating, 109 | }); 110 | } 111 | return res; 112 | }); 113 | } 114 | private tryConsume(reg: RegExp): Nullable { 115 | const res = reg.exec(this.input); 116 | if (res) { 117 | let lineJmp = 0; 118 | let lind = -1; 119 | for (let i = 0; i < res[0].length; ++i) { 120 | if (res[0][i] === "\n") { 121 | ++lineJmp; 122 | lind = i; 123 | } 124 | } 125 | this.pos = { 126 | overallPos: reg.lastIndex, 127 | line: this.pos.line + lineJmp, 128 | offset: lind === -1 ? this.pos.offset + res[0].length : (res[0].length - lind - 1) 129 | }; 130 | return res[0]; 131 | } 132 | return null; 133 | } 134 | // @ts-ignore: noConsume may not be called 135 | private noConsume(fn: $$RuleType): Nullable { 136 | const mrk = this.mark(); 137 | const res = fn(); 138 | this.reset(mrk); 139 | return res; 140 | } 141 | // @ts-ignore: negate may not be called 142 | private negate(fn: $$RuleType): Nullable { 143 | const mrk = this.mark(); 144 | const oneg = this.negating; 145 | this.negating = !oneg; 146 | const res = fn(); 147 | this.negating = oneg; 148 | this.reset(mrk); 149 | return res === null ? true : null; 150 | } 151 | // @ts-ignore: Memoise may not be used 152 | private memoise(rule: $$RuleType, memo: Map, PosInfo]>): Nullable { 153 | const $scope$pos = this.mark(); 154 | const $scope$memoRes = memo.get($scope$pos.overallPos); 155 | if(this.memoSafe && $scope$memoRes !== undefined) { 156 | this.reset($scope$memoRes[1]); 157 | return $scope$memoRes[0]; 158 | } 159 | const $scope$result = rule(); 160 | if(this.memoSafe) 161 | memo.set($scope$pos.overallPos, [$scope$result, this.mark()]); 162 | return $scope$result; 163 | } 164 | } 165 | export function parse(s: string): ParseResult { 166 | const p = new Parser(s); 167 | return p.parse(); 168 | } 169 | export interface ParseResult { 170 | ast: Nullable; 171 | errs: SyntaxErr[]; 172 | } 173 | export interface PosInfo { 174 | readonly overallPos: number; 175 | readonly line: number; 176 | readonly offset: number; 177 | } 178 | export interface RegexMatch { 179 | readonly kind: "RegexMatch"; 180 | readonly negated: boolean; 181 | readonly literal: string; 182 | } 183 | export type EOFMatch = { kind: "EOF"; negated: boolean }; 184 | export type MatchAttempt = RegexMatch | EOFMatch; 185 | export class SyntaxErr { 186 | public pos: PosInfo; 187 | public expmatches: MatchAttempt[]; 188 | constructor(pos: PosInfo, expmatches: MatchAttempt[]) { 189 | this.pos = pos; 190 | this.expmatches = [...expmatches]; 191 | } 192 | public toString(): string { 193 | return `Syntax Error at line ${this.pos.line}:${this.pos.offset}. Expected one of ${this.expmatches.map(x => x.kind === "EOF" ? " EOF" : ` ${x.negated ? 'not ': ''}'${x.literal}'`)}`; 194 | } 195 | } 196 | class ErrorTracker { 197 | private mxpos: PosInfo = {overallPos: -1, line: -1, offset: -1}; 198 | private regexset: Set = new Set(); 199 | private pmatches: MatchAttempt[] = []; 200 | public record(pos: PosInfo, result: any, att: MatchAttempt) { 201 | if ((result === null) === att.negated) 202 | return; 203 | if (pos.overallPos > this.mxpos.overallPos) { 204 | this.mxpos = pos; 205 | this.pmatches = []; 206 | this.regexset.clear() 207 | } 208 | if (this.mxpos.overallPos === pos.overallPos) { 209 | if(att.kind === "RegexMatch") { 210 | if(!this.regexset.has(att.literal)) 211 | this.pmatches.push(att); 212 | this.regexset.add(att.literal); 213 | } else { 214 | this.pmatches.push(att); 215 | } 216 | } 217 | } 218 | public getErr(): SyntaxErr | null { 219 | if (this.mxpos.overallPos !== -1) 220 | return new SyntaxErr(this.mxpos, this.pmatches); 221 | return null; 222 | } 223 | } -------------------------------------------------------------------------------- /src/test/regex_flags_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from './parser'; 2 | 3 | test.each([ 4 | "hello", 5 | "goodbye", 6 | "OkIeDokEy", 7 | ])('test alpha unicode property: %s', input => { 8 | const res = parse(input); 9 | expect(res.errs).toHaveLength(0); 10 | expect(res.ast).not.toBeNull(); 11 | }); 12 | -------------------------------------------------------------------------------- /src/test/setanta_test/README.md: -------------------------------------------------------------------------------- 1 | These tests pulled from github.com/EoinDavey/Setanta which uses tsPEG for it's parser 2 | -------------------------------------------------------------------------------- /src/test/setanta_test/flags.txt: -------------------------------------------------------------------------------- 1 | --enable-memo=false 2 | -------------------------------------------------------------------------------- /src/test/setanta_test/grammar.peg: -------------------------------------------------------------------------------- 1 | --- 2 | import { Context } from "setanta/node_build/ctx"; 3 | import { PossibleResolution, Value } from "setanta/node_build/values"; 4 | import { orBinOp, orQuickBinOp, andBinOp, andQuickBinOp, 5 | binOpEvalFn, binOpQuickEvalFn } from "setanta/node_build/binops"; 6 | import { objLookupsEval, postfixArgsEval, csArgsEval, prefEval, EvalFn } from "setanta/node_build/evals"; 7 | import { qEvalToEval } from "setanta/node_build/evals"; 8 | import * as Quick from "setanta/node_build/quickevals"; 9 | import { ASTVisitor } from "setanta/node_build/visitor"; 10 | type Acceptor = (visitor: ASTVisitor) => T; 11 | --- 12 | Program := stmts=AsgnStmt* _ 13 | AsgnStmt := IfStmt 14 | | BlockStmt 15 | | NuairStmt 16 | | LeStmt 17 | | CCStmt 18 | | BrisStmt 19 | | CtlchStmt 20 | | GniomhStmt 21 | | ToradhStmt 22 | | AssgnStmt 23 | | DefnStmt 24 | | Expr 25 | NonAsgnStmt := IfStmt 26 | | NuairStmt 27 | | LeStmt 28 | | CCStmt 29 | | BrisStmt 30 | | ToradhStmt 31 | | BlockStmt 32 | | AssgnStmt 33 | | Expr 34 | IfStmt := _ 'm[áa]' &gap expr=Expr &gap stmt=NonAsgnStmt elsebranch={_ 'n[oó]' &gap stmt=NonAsgnStmt}? 35 | .accept = Acceptor { return (v: ASTVisitor) => v.visitIfStmt(this); } 36 | BlockStmt := _ '{' blk=AsgnStmt* _ '}' 37 | .accept = Acceptor { return (v: ASTVisitor) => v.visitBlockStmt(this); } 38 | NuairStmt := _ 'nuair-a' expr=Expr &gap stmt=NonAsgnStmt 39 | .accept = Acceptor { return (v: ASTVisitor) => v.visitNuairStmt(this); } 40 | LeStmt := _ 'le' &gap id=ID _ 'idir' _ '\(' strt=Expr _ ',' end=Expr step={_ ',' step=Expr}? _ '\)' stmt=NonAsgnStmt 41 | .accept = Acceptor { return (v: ASTVisitor) => v.visitLeStmt(this); } 42 | DefnStmt := _ idstart=@ id=ID idend=@ _ ':=' _ expr=Expr 43 | .accept = Acceptor { return (v: ASTVisitor) => v.visitDefnStmt(this); } 44 | AssgnStmt := _ lstart=@ lhs=Postfix lend=@ _ op=AsgnOp _ expr=Expr 45 | .accept = Acceptor { return (v: ASTVisitor) => v.visitAssgnStmt(this); } 46 | GniomhStmt := _ 'gn[íi]omh' &gap id=ID _ '\(' args=CSIDs? _ '\)' _ '{' 47 | stmts=AsgnStmt* 48 | _ '}' 49 | .accept = Acceptor { return (v: ASTVisitor) => v.visitGniomhStmt(this); } 50 | CtlchStmt := _ 'creatlach' &gap id=ID tuis={_ 'ó' &gap parentstart=@ id=ID parentend=@}? _ '{' 51 | gniomhs=GniomhStmt* 52 | _ '}' 53 | .accept = Acceptor { return (v: ASTVisitor) => v.visitCtlchStmt(this); } 54 | BrisStmt := _ 'bris' 55 | CCStmt := _ 'chun-cinn' 56 | ToradhStmt := _ 'toradh' &gap exp=Expr? 57 | .accept = Acceptor { return (v: ASTVisitor) => v.visitToradhStmt(this); } 58 | Expr := And 59 | And := start=@ head=Or tail={_ '\&' trm=Or}* end=@ 60 | .evalfn = EvalFn { return andBinOp(this); } 61 | .qeval = Quick.MaybeEv { return andQuickBinOp(this); } 62 | .accept = Acceptor { return (v: ASTVisitor) => v.visitAnd(this); } 63 | Or := start=@ head=Eq tail={_ '\|' trm=Eq}* end=@ 64 | .evalfn = EvalFn { return orBinOp(this) } 65 | .qeval = Quick.MaybeEv { return orQuickBinOp(this); } 66 | .accept = Acceptor { return (v: ASTVisitor) => v.visitOr(this); } 67 | Eq := start=@ head=Comp tail={_ op='[!=]=' trm=Comp}* end=@ 68 | .evalfn = EvalFn { return binOpEvalFn(this) } 69 | .qeval = Quick.MaybeEv { return binOpQuickEvalFn(this); } 70 | .accept = Acceptor { return (v: ASTVisitor) => v.visitEq(this); } 71 | Comp := start=@ head=Sum tail={_ op=Compare trm=Sum}* end=@ 72 | .evalfn = EvalFn { return binOpEvalFn(this) } 73 | .qeval = Quick.MaybeEv { return binOpQuickEvalFn(this); } 74 | .accept = Acceptor { return (v: ASTVisitor) => v.visitComp(this); } 75 | Sum := start=@ head=Product tail={_ op=PlusMinus trm=Product}* end=@ 76 | .evalfn = EvalFn { return binOpEvalFn(this) } 77 | .qeval = Quick.MaybeEv { return binOpQuickEvalFn(this); } 78 | .accept = Acceptor { return (v: ASTVisitor) => v.visitSum(this); } 79 | Product := start=@ head=Prefix tail={_ op=MulDiv trm=Prefix}* end=@ 80 | .evalfn = EvalFn { return binOpEvalFn(this); } 81 | .qeval = Quick.MaybeEv { return binOpQuickEvalFn(this); } 82 | .accept = Acceptor { return (v: ASTVisitor) => v.visitProduct(this); } 83 | Prefix := _ start=@ op='-|!'? pf=Postfix end=@ 84 | .evalfn = EvalFn { return prefEval(this); } 85 | .qeval = Quick.MaybeEv { return Quick.qPrefEval(this); } 86 | Postfix := start=@ at=ObjLookups ops=PostOp* end=@ 87 | .evalfn = EvalFn { return postfixArgsEval(this); } 88 | .qeval = Quick.MaybeEv { return Quick.qPostfixArgsEval(this); } 89 | .accept = Acceptor { return (v: ASTVisitor) => v.visitPostfix(this); } 90 | ObjLookups := start=@ attrs={id=ID '@' !wspace}* root=Atom end=@ 91 | .evalfn = EvalFn { return objLookupsEval(this); } 92 | .qeval = Quick.MaybeEv { return Quick.qObjLookupsEval(this); } 93 | .accept = Acceptor { return (v: ASTVisitor) => v.visitObjLookups(this); } 94 | PostOp := '\(' args=CSArgs? _ '\)' | '\[' expr=Expr _ '\]' 95 | Atom := _ '\(' trm=Expr _ '\)' 96 | .evalfn = EvalFn { return (env: Context) => this.trm.evalfn(env); } 97 | .qeval = Quick.MaybeEv { 98 | const childF = this.trm.qeval; 99 | return childF === null ? null : childF.bind(this.trm); 100 | } 101 | .accept = Acceptor { return (v: ASTVisitor) => v.visitExpr(this.trm); } 102 | | ID 103 | | Teacs 104 | | Int 105 | | Bool 106 | | Neamhni 107 | | ListLit 108 | | GniomhExpr 109 | GniomhExpr := _ 'gn[íi]omh' _ '\(' args=CSIDs? _ '\)' _ '{' 110 | stmts=AsgnStmt* 111 | _ '}' 112 | .evalfn = EvalFn { return qEvalToEval(Quick.qGníomhEval(this)); } 113 | .qeval = Quick.EvalFn { return Quick.qGníomhEval(this); } 114 | .accept = Acceptor { return (v: ASTVisitor) => v.visitGniomhExpr(this); } 115 | ListLit := _ '\[' els=CSArgs? _ '\]' 116 | .evalfn = EvalFn { 117 | return (env: Context) => this.els ? this.els.evalfn(env) : Promise.resolve([]); 118 | } 119 | .qeval = Quick.MaybeEv { return Quick.qListLitEval(this); } 120 | .accept = Acceptor { return (v: ASTVisitor) => v.visitListLit(this); } 121 | CSArgs := start=@ head=Expr tail={_ ',' exp=Expr}* end=@ 122 | .evalfn = (env:Context) => Promise { return csArgsEval(this); } 123 | .qeval = ((env:Context) => Value[]) | null { return Quick.qCSArgsEval(this); } 124 | .exprs = Expr[] { return [this.head].concat(this.tail.map((x) => x.exp)); } 125 | CSIDs := head=ID tail={_ ',' id=ID}* 126 | .ids = ID[] { return [this.head].concat(this.tail.map((x) => x.id)); } 127 | ID := _ !{Keyword gap} start=@ id='[a-zA-Z_áéíóúÁÉÍÓÚ][a-zA-Z_áéíóúÁÉÍÓÚ0-9]*' end=@ 128 | .evalfn = EvalFn { return qEvalToEval(Quick.qIdEval(this)); } 129 | .qeval = Quick.EvalFn { return Quick.qIdEval(this); } 130 | .accept = Acceptor { return (v: ASTVisitor) => v.visitID(this); } 131 | .depth = PossibleResolution { return {resolved: false}; } 132 | Bool := _ bool='f[ií]or|br[eé]ag' 133 | .evalfn = EvalFn { return qEvalToEval(Quick.qBoolEval(this.bool)); } 134 | .qeval = Quick.EvalFn { return Quick.qBoolEval(this.bool); } 135 | Neamhni := _ 'neamhn[ií]' 136 | .evalfn = EvalFn { return () => Promise.resolve(null); } 137 | .qeval = Quick.EvalFn { return () => null; } 138 | Int := _ int='-?[0-9]+(?:\.[0-9]+)?' 139 | .evalfn = EvalFn { return qEvalToEval(Quick.qIntEval(this.int)); } 140 | .qeval = Quick.EvalFn { return Quick.qIntEval(this.int); } 141 | Teacs := _ lit={ start=@ '\'' val='([^\'\\]|\\.)*' '\'' end=@ | 142 | start=@ '"' val='([^"\\]|\\.)*' '"' end=@ } 143 | .evalfn = EvalFn { return qEvalToEval(Quick.qTéacsEval(this.lit.val, this.lit.start, this.lit.end)); } 144 | .qeval = Quick.EvalFn { return Quick.qTéacsEval(this.lit.val, this.lit.start, this.lit.end); } 145 | _ := wspace* 146 | wspace := '(?:\s|>--(?:(?!--<).)*(--<|\n|$))' 147 | gap := wspace | '[^a-zA-Z0-9áéíóúÁÉÍÓÚ]' | '$' 148 | PlusMinus := '\+|-' 149 | AsgnOp := '=|\+=|\*=|-=|%=|\/=|\/\/=' 150 | MulDiv := '\*|\/\/|%|\/' 151 | Compare := '<=|>=|<|>' 152 | Keyword := 'm[áa]' | 'n[oó]' | 'nuair-a' | 'f[ií]or|br[eé]ag' 153 | | 'gn[ií]omh' | 'chun-cinn' | 'neamhn[ií]' | 'toradh' | 'creatlach' 154 | -------------------------------------------------------------------------------- /src/test/setanta_test/run.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | import { Interpreter } from "setanta/node_build/i10r"; 4 | 5 | test("Run simple program", async () => { 6 | // Calculate 21 fibonacci numbers 7 | const fibProg = `fib := [0, 1] 8 | le i idir (0, 20) 9 | fib += [fib[fad@fib - 2] + fib[fad@fib - 1]] 10 | res := fib[fad@fib - 1] 11 | `; 12 | const res = parse(fibProg); 13 | expect(res.errs).toEqual([]); 14 | expect(res.ast).not.toBeNull(); 15 | const i = new Interpreter(); 16 | 17 | await i.interpret(res.ast!); 18 | 19 | // Expect result to be 10946, the fibonacci number 20 | expect(i.global.env.getGlobalValDirect("res")).toEqual(10946); 21 | }); 22 | 23 | -------------------------------------------------------------------------------- /src/test/setanta_test/test.test.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "./parser"; 2 | 3 | // Just ensure they all parse 4 | 5 | describe("test calculator", () => { 6 | interface TestCase { inp: string } 7 | const tcs: TestCase[] = [ 8 | { 9 | inp: `masses := [] 10 | nuair-a fíor { 11 | l := léigh_líne() 12 | má !l 13 | bris 14 | masses = masses + [go_uimh(l)] 15 | } 16 | 17 | gníomh cuida() { 18 | sm := 0 19 | le i idir (0, fad(masses)) { 20 | mass := masses[i] 21 | sm = sm + (mass - mass%3)/3 - 2 22 | } 23 | toradh sm 24 | } 25 | 26 | gníomh cuidb() { 27 | sm := 0 28 | le i idir (0, fad(masses)) { 29 | mass := masses[i] 30 | mass = (mass - mass%3)/3 - 2 31 | nuair-a mass > 0 { 32 | sm = sm + mass 33 | mass = (mass - mass%3)/3 - 2 34 | } 35 | } 36 | toradh sm 37 | } 38 | 39 | scríobh('Cuid A', cuida()) 40 | scríobh('Cuid B', cuidb())`, 41 | }, 42 | { 43 | inp: `mns := [ 44 | [-7,17,-11], 45 | [9, 12, 5], 46 | [-9, 0, -4], 47 | [4, 6, 0] 48 | ] 49 | 50 | vs := [ 51 | [0,0,0], 52 | [0,0,0], 53 | [0,0,0], 54 | [0,0,0] 55 | ] 56 | 57 | gníomh ad(x, y) { 58 | n := [] 59 | le i idir (0, fad(y)) 60 | n = n + [x[i] + y[i]] 61 | toradh n 62 | } 63 | 64 | gníomh cmp(x, y) { 65 | má x == y 66 | toradh 0 67 | má x > y 68 | toradh -1 69 | toradh 1 70 | } 71 | 72 | gníomh abs(x) { 73 | má x > 0 74 | toradh x 75 | toradh -1 * x 76 | } 77 | 78 | gníomh gcd(a, b) { 79 | nuair-a b != 0 { 80 | t := a % b 81 | a = b 82 | b = t 83 | } 84 | toradh a 85 | } 86 | 87 | le t idir (0, 1000) { 88 | le a idir (0, 4){ 89 | le b idir (0, a) { 90 | le i idir (0, 3) { 91 | df := cmp(mns[a][i], mns[b][i]) 92 | vs[a][i] = vs[a][i] + df 93 | vs[b][i] = vs[b][i] - df 94 | } 95 | } 96 | } 97 | le k idir (0, 4) 98 | mns[k] = ad(mns[k], vs[k]) 99 | } 100 | 101 | sm := 0 102 | le m idir (0, 4) { 103 | vsm := 0 104 | psm := 0 105 | le i idir(0, 3) { 106 | vsm = vsm + abs(vs[m][i]) 107 | psm = psm + abs(mns[m][i]) 108 | } 109 | sm = sm + vsm*psm 110 | } 111 | scríobh(sm)`, 112 | }, 113 | { 114 | inp: ` 115 | línte := [] 116 | nuair-a fíor { 117 | líne := léigh_líne() 118 | má !líne 119 | bris 120 | línte = línte + [líne] 121 | } 122 | >-- Aimsigh eochair i liosta --< 123 | gníomh ams(ls, k){ 124 | le i idir (0, fad(ls)) 125 | má ls[i] == k 126 | toradh i 127 | } 128 | 129 | grph := [] 130 | le i idir (0, fad(línte)) 131 | grph = grph + [roinn(athchuir(athchuir(línte[i], ',', ''), '=> ', ''), ' ')] 132 | 133 | >-- Eochracha --< 134 | ecrcha := [] 135 | gníomh cuir_e(k){ 136 | le i idir (0, fad(ecrcha)) 137 | má ecrcha[i] == k 138 | bris 139 | ecrcha = ecrcha + [k] 140 | } 141 | cuir_e('ORE') 142 | le i idir(0, fad(grph)) 143 | cuir_e(grph[i][fad(grph[i])-1]) 144 | K := fad(ecrcha) 145 | 146 | méad := [0]*K 147 | adjLs := [] 148 | le i idir (0, K) 149 | adjLs = adjLs + [[]] 150 | 151 | le i idir (0, fad(grph)){ 152 | g := grph[i] 153 | ga := ams(ecrcha, g[fad(g)-1]) 154 | gb := go_uimh(g[fad(g)-2]) 155 | méad[ga] = gb 156 | le j idir(0, fad(g)) { 157 | má 2*j + 1 >= fad(g) - 2 158 | bris 159 | a := g[2*j] 160 | b := g[2*j+1] 161 | adjLs[ga] = adjLs[ga] + [[go_uimh(a), ams(ecrcha, b)]] 162 | } 163 | } 164 | 165 | feicthe := [breag]*K 166 | ord := [] 167 | gníomh siul(u) { 168 | feicthe[u] = fíor 169 | ls := adjLs[u] 170 | le i idir (0, fad(ls)){ 171 | v := ls[i][1] 172 | má !feicthe[v] 173 | siul(v) 174 | } 175 | ord = ord + [u] 176 | } 177 | 178 | siul(ams(ecrcha, 'FUEL')) 179 | 180 | gníomh idiv(a, b) { 181 | toradh (a - (a % b))/b 182 | } 183 | 184 | gníomh reitigh(tgt) { 185 | reqs := [0]*K 186 | reqs[ams(ecrcha, 'FUEL')] = tgt 187 | ol := ams(ecrcha, 'ORE') 188 | le i idir (0, fad(ord)) { 189 | ind := fad(ord) - i - 1 190 | x := ord[ind] 191 | r := reqs[x] 192 | má x == ol 193 | toradh r 194 | m := méad[x] 195 | tms := idiv(r+m-1, m) 196 | le j idir (0, fad(adjLs[x])) 197 | reqs[adjLs[x][j][1]] = reqs[adjLs[x][j][1]] + tms * adjLs[x][j][0] 198 | } 199 | } 200 | 201 | TR := 1000000000000 202 | L := 0 203 | R := TR 204 | 205 | nuair-a L < R { 206 | md := idiv(L+R+1,2) 207 | v := reitigh(md) 208 | má v > TR 209 | R = md -1 210 | nó 211 | L = md 212 | } 213 | 214 | scríobh('Cuid A') 215 | scríobh(reitigh(1)) 216 | scríobh('Cuid B') 217 | scríobh(L)`, 218 | }, 219 | { 220 | inp: ` 221 | mvs := [] 222 | 223 | nuair-a fíor { 224 | líne := léigh_líne() 225 | má !líne 226 | bris 227 | mvs = mvs + [roinn(líne, ' ')] 228 | } 229 | 230 | gníomh iol(x, y, N) { 231 | a := x[0][0] b := x[0][1] 232 | c := x[1][0] d := x[1][1] 233 | e := y[0][0] f := y[0][1] 234 | g := y[1][0] h := y[1][1] 235 | toradh [ 236 | [(a * e + b * g) % N, (a * f + b * h) % N], 237 | [(c * e + d * g) % N, (c * f + d * h) % N] 238 | ] 239 | } 240 | 241 | gníomh modPow(x, id, iolfn, n, N) { 242 | ans := [[1, 0], [0, 1]] 243 | nuair-a n { 244 | má n % 2 == 1 245 | ans = iol(ans, x, N) 246 | x = iol(x, x, N) 247 | n = (n - (n % 2))/2 248 | } 249 | toradh ans 250 | } 251 | 252 | gníomh ab(N) { 253 | a := 1 254 | b := 0 255 | le i idir (0, fad(mvs)) { 256 | mv := mvs[i] 257 | má mv[0] == 'cut' { 258 | b = (b - go_uimh(mv[1])) % N 259 | } nó má mv[1] == 'with' { 260 | n := go_uimh(mv[3]) 261 | a = (n*a) % N 262 | b = (n*b) % N 263 | } nó { 264 | a = -a % N 265 | b = (- b - 1) % N 266 | } 267 | } 268 | toradh [a, b] 269 | } 270 | 271 | p := ab(10007) 272 | scríobh('Cuid 1', (p[0]*2019 + p[1])%10007)`, 273 | }, 274 | { 275 | inp: ` 276 | gníomh fac(x) { 277 | má x <= 1 278 | toradh 1 279 | toradh x * fac(x - 1) 280 | } 281 | 282 | scríobh(fac(10))`, 283 | }, 284 | { 285 | inp: `s := 'test' 286 | 287 | gníomh copy(arr) { 288 | nua := [fíor]*fad(arr) 289 | le i idir (0, fad(arr)) 290 | nua[i] = arr[i] 291 | toradh nua 292 | } 293 | 294 | gníomh gen(ind, used, st) { 295 | má ind == fad(s) 296 | scríobh(st) 297 | le i idir (0, fad(s)) { 298 | má used[i] 299 | chun-cinn 300 | nused := copy(used) 301 | nused[i] = fíor 302 | gen(ind + 1, nused, st + s[i]) 303 | } 304 | } 305 | 306 | gen(0, [breag]*fad(s), '')`, 307 | }, 308 | { 309 | inp: ` 310 | >-- Comhair na uimhreacha phríomha 311 | gníomh príómha(x) { 312 | má x <= 2 313 | toradh x == 2 314 | le i idir(2, x) { 315 | má i*i > x >-- Is feidir linn stad anseo --< 316 | bris 317 | má x % i == 0 318 | toradh breag 319 | } 320 | toradh fíor 321 | } 322 | 323 | le i idir (2, 100) { 324 | má príómha(i) 325 | scríobh(i) 326 | }`, 327 | }, 328 | ]; 329 | for (let i = 0; i < tcs.length; ++i) { 330 | const tc = tcs[i]; 331 | test(`inp: ${tc.inp}`, () => { 332 | const res = parse(tc.inp); 333 | expect(res.errs).toEqual([]); 334 | expect(res.ast).not.toBeNull(); 335 | }); 336 | } 337 | }); 338 | 339 | /* TODO re-enable after Setanta update 340 | test("Expect simple syntax error", () => { 341 | const prog = `x := [1, 2`; 342 | const res = parse(prog); 343 | expect(res.err).not.toBeNull(); 344 | const expmatches = res.err!.expmatches; 345 | const regs: string[] = []; 346 | for(const match of expmatches) { 347 | expect(match.kind).toEqual("RegexMatch"); 348 | regs.push((match as RegexMatch).literal); 349 | } 350 | expect(regs).toContain("\\]"); 351 | expect(regs).toContain(","); 352 | }); 353 | */ 354 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import { ASTKinds, ATOM, MATCH, PREOP } from "./meta"; 2 | 3 | export function matchType(expr: MATCH): string { 4 | // Check if special rule 5 | if (expr.kind === ASTKinds.SPECIAL) 6 | return "PosInfo"; 7 | if (expr.op === null) 8 | return preType(expr.pre); 9 | if (expr.op.kind === ASTKinds.RANGESPEC){ 10 | return `${preType(expr.pre)}[]`; 11 | } 12 | if (expr.op.op === "?") 13 | return `Nullable<${preType(expr.pre)}>`; 14 | if (expr.op.op === '+') 15 | return `[${preType(expr.pre)}, ...${preType(expr.pre)}[]]`; 16 | return `${preType(expr.pre)}[]`; 17 | } 18 | 19 | export function preType(expr: PREOP): string { 20 | if (expr.op && expr.op === "!") { // Negation types return null if matched, true otherwise 21 | return "boolean"; 22 | } 23 | return atomType(expr.at); 24 | } 25 | 26 | export function atomType(at: ATOM): string { 27 | if (at.kind === ASTKinds.ATOM_1) 28 | return at.name; 29 | if (at.kind === ASTKinds.ATOM_2) 30 | return "string"; 31 | if(at.kind === ASTKinds.EOF) 32 | return '{kind: ASTKinds.$EOF}'; 33 | const subname = at.name; 34 | if (subname) 35 | return subname; 36 | throw new Error("Unknown subrule"); 37 | } 38 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | import { ALT, ASTKinds, PosInfo } from "./meta"; 2 | import { CheckError } from "./checks"; 3 | 4 | export type Rule = ALT[]; 5 | export type Grammar = Ruledef[]; 6 | export interface Ruledef { 7 | name: string; 8 | rule: Rule; 9 | // pos is possibly undefined as subrules don't have 10 | // a well defined definition location 11 | pos?: PosInfo; 12 | } 13 | 14 | export type Block = Array 15 | 16 | export function indentBlock(blk: string[]): string[] { 17 | return blk.filter((x) => x).map((x) => " " + x); 18 | } 19 | 20 | export function altNames(rd: Ruledef): string[] { 21 | if(rd.rule.length === 1) 22 | return [rd.name]; 23 | return rd.rule.map((_, i) => `${rd.name}_${i + 1}`); 24 | } 25 | 26 | export function writeBlock(blk: Block): string[] { 27 | const res: string[] = []; 28 | for (const x of blk) { 29 | if (typeof x === "string") { 30 | res.push(x); 31 | continue; 32 | } 33 | const sub = indentBlock(writeBlock(x)); 34 | res.push(...sub); 35 | } 36 | return res; 37 | } 38 | 39 | export function unescapeSeqs(s: string): string { 40 | let out = ""; 41 | for (let i = 0; i < s.length; ++i) { 42 | if (s[i] !== "\\") { 43 | out += s[i]; 44 | continue; 45 | } 46 | if (s[i + 1] === "{" || s[i + 1] === "}" || s[i + 1] === "\\") { 47 | out += s[i + 1]; 48 | } else { 49 | throw new Error(`Unknown escape code \\${s[i + 1]}`); 50 | } 51 | ++i; 52 | } 53 | return out; 54 | } 55 | 56 | // escapeBackticks replaces backticks in strings with escaped backticks 57 | // for use in inserting into a call to String.raw`` 58 | export function escapeBackticks(s: string): string { 59 | return s.replace('`', '\\`'); 60 | } 61 | 62 | export function getRuleFromGram(gram: Grammar, name: string): Ruledef | null { 63 | for(const rule of gram) 64 | if(rule.name === name) 65 | return rule; 66 | return null; 67 | } 68 | 69 | export function assertValidRegex(s: string, start?: PosInfo): void { 70 | try { 71 | new RegExp(s); 72 | } catch (err) { 73 | throw new CheckError(`Couldn't compile regex '${s}': ${err}`, start); 74 | } 75 | } 76 | 77 | export function usesEOF(gram: Grammar): boolean { 78 | for(const rd of gram) { 79 | for(const alt of rd.rule) { 80 | for(const matchspec of alt.matches) { 81 | const match = matchspec.rule; 82 | if(match.kind === ASTKinds.SPECIAL) 83 | continue; 84 | const at = match.pre.at; 85 | if(at.kind === ASTKinds.EOF) 86 | return true; 87 | } 88 | } 89 | } 90 | return false; 91 | } 92 | 93 | export function flattenBlock(ls: Block[]): Block { 94 | return ([] as Block).concat(...ls); 95 | } 96 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2016", 4 | "module": "nodenext", 5 | "outDir": "./tsbuild", 6 | "rootDir": "./src", 7 | "strict": true, 8 | "noUnusedLocals": true 9 | }, 10 | "exclude": [ 11 | "demos" 12 | ] 13 | } 14 | --------------------------------------------------------------------------------