├── .editorconfig ├── .github └── workflows │ └── on-pull-request.yml ├── .gitignore ├── .vscode └── launch.json ├── Changelog.md ├── LICENSE ├── README.md ├── examples ├── arithmetics.pegjs ├── bad-examples-1.pegjs ├── bar.ts ├── bulkOpening.pegjs ├── css.pegjs ├── javascript.pegjs ├── json.pegjs ├── minimal-with-dep.pegjs ├── minimal.pegjs ├── snake-case-rules.pegjs └── st.pegjs ├── output └── .eslintrc ├── package-lock.json ├── package.json ├── src ├── .eslintrc ├── cli.ts ├── libs │ ├── get-unique-name.ts │ ├── helpers.ts │ ├── list-rules.ts │ ├── prune-circular-references.ts │ ├── snake-to-camel.ts │ └── type-extractor.ts ├── passes │ ├── constants.ts │ └── generate-ts.ts ├── tspegjs.ts └── types.ts ├── test ├── README ├── arithmetics.test.ts ├── cli.test.ts ├── computed-types.test.ts ├── generate.test.ts ├── genoptions1.json ├── genoptions2.json └── type-extractor.test.ts ├── tsconfig.json ├── tslint.json └── vite.config.ts /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | indent_style = space 9 | indent_size = 2 10 | 11 | # Set default charset 12 | [*.{js,ts}] 13 | charset = utf-8 14 | end_of_line = lf 15 | insert_final_newline = true 16 | indent_style = space 17 | indent_size = 2 18 | -------------------------------------------------------------------------------- /.github/workflows/on-pull-request.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | 3 | on: 4 | push: 5 | branches: ['master'] 6 | pull_request: 7 | branches: ['master'] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | env: 13 | CI: false 14 | 15 | strategy: 16 | matrix: 17 | node-version: [18.x] 18 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | with: 23 | submodules: recursive 24 | - name: Use Node.js ${{ matrix.node-version }} 25 | uses: actions/setup-node@v3 26 | with: 27 | node-version: ${{ matrix.node-version }} 28 | cache: 'npm' 29 | - run: npm ci 30 | 31 | - name: 'Lint' 32 | run: npm run lint 33 | 34 | - name: 'Production Build' 35 | run: npm run build 36 | 37 | - name: 'Generation Tests' 38 | run: npm run test 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules/ 3 | output/*.js 4 | output/*.ts 5 | dist 6 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Test pass params to plugin", 9 | "cwd": "${workspaceFolder}/test", 10 | "program": "${workspaceFolder}/test/pass-params-to-plugin.js", 11 | "request": "launch", 12 | "skipFiles": ["/**"], 13 | "type": "node" 14 | }, 15 | { 16 | "type": "node", 17 | "request": "launch", 18 | "name": "Debug Current Test File", 19 | "autoAttachChildProcesses": true, 20 | "skipFiles": ["/**", "**/node_modules/**"], 21 | "program": "${workspaceRoot}/node_modules/vitest/vitest.mjs", 22 | "args": ["run", "${relativeFile}"], 23 | "smartStep": true 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Version 4.2.1 4 | 5 | - Added [#113](https://github.com/metadevpro/ts-pegjs/pull/113) Added missing type exports for: `TsPegjsParserBuildOptions` and `TsPegjsOptions` by @ppodds 6 | 7 | ## Version 4.2.0 8 | 9 | - Added [#111](https://github.com/metadevpro/ts-pegjs/pull/112) Emit all type aliases as default by @ppodds 10 | 11 | ## Version 4.1.1 12 | 13 | - Fix [#109](https://github.com/metadevpro/ts-pegjs/pull/109) Add shebang to bash scripts in vite by @siefkenj 14 | 15 | ## Version 4.1.0 16 | 17 | - Fix [#103](https://github.com/metadevpro/ts-pegjs/pull/103) Fix exception handler export: `PeggySyntaxError` & `DefaultTracer` 18 | - Fix [#104](https://github.com/metadevpro/ts-pegjs/pull/104) Rename grammarSource->source 19 | - Feature [#105](https://github.com/metadevpro/ts-pegjs/pull/105) Add ability to specify `--dependency` style deps by @siefkenj 20 | - Upgrade to `rimraf` 5.0 adding `--glob` 21 | 22 | ## Version 4.0.0 23 | 24 | - [#97](https://github.com/metadevpro/ts-pegjs/pull/97) Ported project to TS. Thansk to @siefkenj 25 | - [#98](https://github.com/metadevpro/ts-pegjs/pull/98) Use os `ts-morph` to infer the type for the rules. Thansk to @siefkenj 26 | 27 | ## Version 3.2.0 28 | 29 | - Added [PR #94](https://github.com/metadevpro/ts-pegjs/pull/93) Converted to vite build and esm. Thansk to @siefkenj 30 | 31 | ## Version 3.1.0 32 | 33 | - Added [PR #91](https://github.com/metadevpro/ts-pegjs/pull/91) Convert new peggy --dependencies or --dependency into TypeScript imports. Thanks to @M00TSDNS 34 | 35 | ## Version 3.0.0 36 | 37 | **Breaking changes!** 38 | 39 | - _Breaking change:_ Default for errorName changes from `SyntaxError` to `PeggySyntaxError` from [#86](https://github.com/metadevpro/ts-pegjs/pull/86). Reason: better aligment with Peggy. Allow users to override `SyntaxError` native type. 40 | - _Breaking change:_ Exported Interfaces now does not have the `I` prefix from [#75](https://github.com/metadevpro/ts-pegjs/issues/75). Reason: follow TypeScript conventions for interfaces with no prefix. 41 | |**Interface**|**Renamed to**| 42 | |---|---| 43 | |`IParseOptions`|`ParseOptions`| 44 | |`ICached`|`Cached`| 45 | |`ITraceEvent`|`TraceEvent`| 46 | |`IEndExpectation`|`EndExpectation`| 47 | |`IOtherExpectation`|`OtherExpectation`| 48 | |`IAnyExpectation`|`AnyExpectation`| 49 | |`IClassExpectation`|`ClassExpectation`| 50 | |`IClassParts`|`ClassParts`| 51 | |`ILiteralExpectation`|`LiteralExpectation`| 52 | |`IFileRange`|`FileRange`| 53 | |`IFilePosition`|`FilePosition`| 54 | 55 | ## Version 2.2.1 56 | 57 | - Fix [#84](https://github.com/metadevpro/ts-pegjs/issues/84) Same convention as peggy. Make `grammarSource` optional. 58 | 59 | ## Version 2.2.0 60 | 61 | - Added support for option `errorName`[#86](https://github.com/metadevpro/ts-pegjs/issues/86) Thanks to @iccicci. Needed after a breaking change in peggy. 62 | 63 | ## Version 2.1.0 64 | 65 | - Add support for the **pluck** operator. Thanks to @hildjj. [#66](https://github.com/metadevpro/ts-pegjs/issues/66) 66 | 67 | ## Version 2.0.2 68 | 69 | - Fix breaking change on peggy-to-plugin integration to receive peggy options. [#79](https://github.com/metadevpro/ts-pegjs/issues/79) 70 | 71 | ## Version 2.0.1 72 | 73 | - Added support for peggy 2.0.1 74 | - Fix [#78](https://github.com/metadevpro/ts-pegjs/issues/78) 75 | - Fix missing helper function 76 | 77 | ## Version 1.2.2 78 | 79 | - Remove prod. dependency for peggy [#65](https://github.com/metadevpro/ts-pegjs/issues/65) 80 | - Added more tests. 81 | - Update libs. 82 | 83 | ## Version 1.2.1 84 | 85 | - Fix src Type for IntelliJ [#70](https://github.com/metadevpro/ts-pegjs/issues/70). 86 | 87 | ## Version 1.2.0 88 | 89 | - Add SyntaxError.format method [#67](https://github.com/metadevpro/ts-pegjs/issues/67). 90 | - Now requires output to be ES6 at minimum (string.repeat needed). 91 | 92 | ## Version 1.1.1 93 | 94 | 2021.07.02 95 | 96 | - Add missing file to package.json [#64](https://github.com/metadevpro/ts-pegjs/pull/64). 97 | 98 | ## Version 1.1.0 99 | 100 | 2021.07.02 101 | 102 | - Small changes for peggy [#61](https://github.com/metadevpro/ts-pegjs/pull/61). 103 | - Fixed peggy dependency version [#62](https://github.com/metadevpro/ts-pegjs/pull/62). 104 | - Added top level initializer block [#63](https://github.com/metadevpro/ts-pegjs/pull/63). 105 | - Update libs to latests versions. 106 | 107 | ## Version 1.0.0 108 | 109 | 2021.05.26 110 | 111 | - Major change: [#59](https://github.com/metadevpro/ts-pegjs/issues/59). Changed dependency from [pegjs](https://github.com/pegjs/pegjs) (unmantained) in favour of [peggy](https://github.com/peggyjs/peggy) (a sensible mantained successor). 112 | 113 | ## Version 0.3.1 114 | 115 | 2021.02.09 116 | 117 | - Fix [#53](https://github.com/metadevpro/ts-pegjs/issues/53). Errata in `--allowed-start-rules`. 118 | 119 | ## Version 0.3.0 120 | 121 | 2020.12.31 122 | 123 | - Added ESLint for checking TS. 124 | - Removed support for TSLint rules. Removed options: `noTslint` and `tslintIgnores` 125 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017-2021 Pedro J. Molina 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TS PEG.js 2 | 3 | TS PEG.js is a TS code generation plugin for [peggy](https://www.npmjs.com/package/peggy). 4 | 5 | [![Build Status](https://travis-ci.org/metadevpro/ts-pegjs.svg?branch=master)](https://travis-ci.org/metadevpro/ts-pegjs) 6 | [![Known Vulnerabilities](https://snyk.io/test/github/metadevpro/ts-pegjs/badge.svg)](https://snyk.io/test/github/metadevpro/ts-pegjs) 7 | [![npm version](https://badge.fury.io/js/ts-pegjs.svg)](http://badge.fury.io/js/ts-pegjs) 8 | 9 | [![NPM](https://nodei.co/npm/ts-pegjs.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/ts-pegjs/) 10 | 11 | ## Requirements 12 | 13 | - [peggy](https://www.npmjs.com/package/peggy) (previous versions use: [pegjs](https://pegjs.org)) 14 | 15 | ## Installation 16 | 17 | ### Node.js 18 | 19 | Installs ts-pegjs + peggy 20 | 21 | $ npm install ts-pegjs 22 | 23 | ## Usage 24 | 25 | ### Generating a Parser from JS code 26 | 27 | In Node.js, require both the peggy parser generator and the ts-pegjs plugin: 28 | 29 | ```typescript 30 | var peggy = require('peggy'); 31 | var tspegjs = require('ts-pegjs'); 32 | ``` 33 | 34 | To generate a TS parser, pass to `pegjs.generate` ts-pegjs plugin and your grammar: 35 | 36 | ```typescript 37 | var parser = pegjs.generate("start = ('a' / 'b')+", { 38 | output: 'source', 39 | format: 'commonjs', 40 | plugins: [tspegjs], 41 | tspegjs: { 42 | customHeader: "// import lib\nimport { Lib } from 'mylib';" 43 | } 44 | }); 45 | ``` 46 | 47 | The method will return source code of generated parser as a string. 48 | 49 | Supported options of `pegjs.generate`: 50 | 51 | - `cache` — if `true`, makes the parser cache results, avoiding exponential 52 | parsing time in pathological cases but making the parser slower (default: 53 | `false`). This is strongly recommended for big grammars 54 | (like javascript.pegjs or css.pegjs in example folder) 55 | - `allowedStartRules` — rules the parser will be allowed to start parsing from 56 | (default: the first rule in the grammar) 57 | 58 | ### Plugin options 59 | 60 | **Note:** Options in CLI mode are written in POSIX (long names as kebab-case) convention e.g. `--custom-header` but with camelcase on JavaScript e.g. `customHeader`. 61 | 62 | - `customHeader` — A string or an array of strings which are a valid TS code to be injected on the header of the output file. E.g. provides a convenient place for adding library imports. 63 | - `customHeaderFile` — A header file to include. 64 | - `errorName` — The name of the exported internal error class to override. The default value from version 3.0.0 is `PeggySyntaxError`. Previous one was `SyntaxError`. 65 | - `returnTypes` — An object containing rule names as keys and a valid TS return type as string. 66 | - `skipTypeComputation` — Boolean. If `true`, `ts-pegjs` will not try to use TS to infer types based on your grammar rules. 67 | - `onlyGenerateGrammarTypes` — Boolean. If `true`, only types for your grammar rules (and no parser) will be generated. Cannot be used with `skipTypeComputation`. 68 | - `doNotCamelCaseTypes` — Boolean. By default type names for grammar rules are converted to CamelCase. If `true`, this conversion is not done and type names will match the casing of your grammar rules. 69 | 70 | ### Generating a Parser from CLI 71 | 72 | Sample usage: 73 | 74 | ``` 75 | peggy --plugin ./src/tspegjs -o examples/arithmetics.ts --cache examples/arithmetics.pegjs 76 | ``` 77 | 78 | (Note `./src/tspegjs` is the path to `tspegjs.ts` in the project. If you installed ts-pegjs using npm, it should probably be `./node_modules/ts-pegjs/src/tspegjs`.) 79 | 80 | It will generarate the parser in the TS flavour. 81 | 82 | If you need to pass specific plugin options you can use the option `--extra-options-file` provided by pegjs and pass it a filename (e.g. pegconfig.json) containing specific options like the following JSON sample: 83 | 84 | ``` 85 | peggy --plugin ./src/tspegjs --extra-options-file pegconfig.json -o examples/arithmetics.ts --cache examples/arithmetics.pegjs 86 | ``` 87 | 88 | ```json 89 | { 90 | "tspegjs": { 91 | "customHeader": "// import lib\nimport { Lib } from 'mylib';" 92 | }, 93 | "returnTypes": { 94 | "Integer": "number", 95 | "Expression": "number", 96 | } 97 | } 98 | ``` 99 | > For rules not listed in `returnTypes` object `any` type is declared by default. 100 | 101 | > Make sure to pass any additional CLI options, like `--extra-options-file` before the parameter `-o` as these will otherwise be treated as arguments to that one. 102 | 103 | ## Using the Parser 104 | 105 | 1. Save parser generated by `pegjs.generate` to a file or use the one generated from the CLI tool. 106 | 107 | 2. In client TS code: 108 | 109 | ```typescript 110 | import { PeggySyntaxError, parse } from './arithmetics'; 111 | 112 | try { 113 | const sampleOutput = parse('my sample...'); 114 | } catch (ex: PeggySyntaxError) { 115 | // Handle parsing error 116 | // [...] 117 | } 118 | ``` 119 | 120 | ## Changelog 121 | 122 | [Changelog](./Changelog.md). 123 | 124 | ## Acknowledgments 125 | 126 | Thanks to: 127 | 128 | - [David Majda](https://github.com/dmajda) for creating pegjs 129 | - [Elantcev Mikhail](https://github.com/Nordth) for providing the pegjs PHP plugin, inspiration on this one. 130 | 131 | ## License 132 | 133 | [The MIT License (MIT)](http://opensource.org/licenses/MIT) 134 | 135 | --- 136 | 137 | (c) 2017-2023, [Pedro J. Molina](https://github.com/pjmolina) at [metadev.pro](https://metadev.pro) 138 | -------------------------------------------------------------------------------- /examples/arithmetics.pegjs: -------------------------------------------------------------------------------- 1 | // Simple Arithmetics Grammar 2 | // ========================== 3 | // 4 | // Accepts expressions like "2 * (3 + 4)" and computes their value. 5 | 6 | Expression 7 | = head:Term tail:(_ ("+" / "-") _ Term)* { 8 | return tail.reduce(function(result: number, element: any[]) { 9 | if (element[1] === "+") { return result + element[3]; } 10 | if (element[1] === "-") { return result - element[3]; } 11 | }, head); 12 | } 13 | 14 | Term 15 | = head:Factor tail:(_ ("*" / "/") _ Factor)* { 16 | return tail.reduce(function(result: number, element: any[]) { 17 | if (element[1] === "*") { return result * element[3]; } 18 | if (element[1] === "/") { return result / element[3]; } 19 | }, head); 20 | } 21 | 22 | Factor 23 | = "(" _ @Expression _ ")" 24 | / Integer 25 | 26 | Integer "integer" 27 | = _ [0-9]+ { return parseInt(text(), 10); } 28 | 29 | _ "whitespace" 30 | = [ \t\n\r]* 31 | -------------------------------------------------------------------------------- /examples/bad-examples-1.pegjs: -------------------------------------------------------------------------------- 1 | // Example grammar with some unicode rules. These can cause errors if they 2 | // are not escaped properly. 3 | WhiteSpace "whitespace" 4 | = "\t" 5 | / "\v" 6 | / "\f" 7 | / " " 8 | / "\u00A0" 9 | / "\uFEFF" 10 | / Zs 11 | 12 | LineTerminator 13 | = [\n\r\u2028\u2029] 14 | 15 | LineTerminatorSequence "end of line" 16 | = "\n" 17 | / "\r\n" 18 | / "\r" 19 | / "\u2028" 20 | / "\u2029" 21 | 22 | Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] 23 | -------------------------------------------------------------------------------- /examples/bar.ts: -------------------------------------------------------------------------------- 1 | // This file exists as a dependency of minimal-with-dep.pegjs 2 | const foo = 'I AM THE CONST FOO'; 3 | export default foo; 4 | -------------------------------------------------------------------------------- /examples/bulkOpening.pegjs: -------------------------------------------------------------------------------- 1 | // Sample submited by Ahryman40k 2 | 3 | BulkOpening "bulk_opening" = BulkStartKey _ clientId:Integer _ date:Date _ hour:Hour _ bulkId:Integer _ financialYear:Integer _ period:Integer _ appVersion:StrangeKey { 4 | const d = new Date(date); 5 | const h = new Date(hour); 6 | 7 | d.setHours( h.getHours() ) 8 | d.setMinutes( h.getMinutes() ) 9 | d.setSeconds( h.getSeconds() ) 10 | 11 | return { 12 | type: 'BulkHeader', 13 | clientId, 14 | bulkId, 15 | date: d, 16 | financialYear, 17 | period, 18 | appVersion 19 | } 20 | } 21 | 22 | BulkStartKey = "0000" 23 | 24 | Date "date" = $([0-9]+) { 25 | const date = text(); 26 | return Date.parse(date.slice(0, 4) + "-" + date.slice(4, 6) + "-" + date.slice(6, 8)); 27 | } 28 | 29 | Hour "hour" = $([0-9]+) { 30 | const h = text(); 31 | return new Date().setHours( +h.slice(0,2), +h.slice(2,4), +h.slice(4,6)); 32 | } 33 | 34 | StrangeKey "id" = ([0-9][0-9].[0-9][0-9].[0-9][0-9].[0-9][0-9]) { 35 | return text(); 36 | } 37 | 38 | Integer "integer" = [0-9]+ { 39 | return parseInt(text(), 10); 40 | } 41 | 42 | _ "whitespace" = [ \t\n\r]* -------------------------------------------------------------------------------- /examples/css.pegjs: -------------------------------------------------------------------------------- 1 | // CSS Grammar 2 | // =========== 3 | // 4 | // Based on grammar from CSS 2.1 specification [1] (including the errata [2]). 5 | // Generated parser builds a syntax tree composed of nested JavaScript objects, 6 | // vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not 7 | // expressive enough (e.g. selectors are reflected as text, not structured 8 | // objects) and somewhat cumbersome. 9 | // 10 | // Limitations: 11 | // 12 | // * Many errors which should be recovered from according to the specification 13 | // (e.g. malformed declarations or unexpected end of stylesheet) are fatal. 14 | // This is a result of straightforward rewrite of the CSS grammar to PEG.js. 15 | // 16 | // [1] http://www.w3.org/TR/2011/REC-CSS2-20110607 17 | // [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html 18 | // [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html 19 | 20 | { 21 | function extractOptional(optional, index) { 22 | return optional ? optional[index] : null; 23 | } 24 | 25 | function extractList(list, index) { 26 | return list.map(function(element) { return element[index]; }); 27 | } 28 | 29 | function buildList(head, tail, index) { 30 | return [head].concat(extractList(tail, index)) 31 | .filter(function(element) { return element !== null; }); 32 | } 33 | 34 | function buildExpression(head, tail) { 35 | return tail.reduce(function(result, element) { 36 | return { 37 | type: "Expression", 38 | operator: element[0], 39 | left: result, 40 | right: element[1] 41 | }; 42 | }, head); 43 | } 44 | } 45 | 46 | start 47 | = stylesheet:stylesheet comment* { return stylesheet; } 48 | 49 | // ----- G.1 Grammar ----- 50 | 51 | stylesheet 52 | = charset:(CHARSET_SYM STRING ";")? (S / CDO / CDC)* 53 | imports:(import (CDO S* / CDC S*)*)* 54 | rules:((ruleset / media / page) (CDO S* / CDC S*)*)* 55 | { 56 | return { 57 | type: "StyleSheet", 58 | charset: extractOptional(charset, 1), 59 | imports: extractList(imports, 0), 60 | rules: extractList(rules, 0) 61 | }; 62 | } 63 | 64 | import 65 | = IMPORT_SYM S* href:(STRING / URI) S* media:media_list? ";" S* { 66 | return { 67 | type: "ImportRule", 68 | href: href, 69 | media: media !== null ? media : [] 70 | }; 71 | } 72 | 73 | media 74 | = MEDIA_SYM S* media:media_list "{" S* rules:ruleset* "}" S* { 75 | return { 76 | type: "MediaRule", 77 | media: media, 78 | rules: rules 79 | }; 80 | } 81 | 82 | media_list 83 | = head:medium tail:("," S* medium)* { return buildList(head, tail, 2); } 84 | 85 | medium 86 | = name:IDENT S* { return name; } 87 | 88 | page 89 | = PAGE_SYM S* selector:pseudo_page? 90 | "{" S* 91 | declarationsHead:declaration? 92 | declarationsTail:(";" S* declaration?)* 93 | "}" S* 94 | { 95 | return { 96 | type: "PageRule", 97 | selector: selector, 98 | declarations: buildList(declarationsHead, declarationsTail, 2) 99 | }; 100 | } 101 | 102 | pseudo_page 103 | = ":" value:IDENT S* { return { type: "PseudoSelector", value: value }; } 104 | 105 | operator 106 | = "/" S* { return "/"; } 107 | / "," S* { return ","; } 108 | 109 | combinator 110 | = "+" S* { return "+"; } 111 | / ">" S* { return ">"; } 112 | 113 | property 114 | = name:IDENT S* { return name; } 115 | 116 | ruleset 117 | = selectorsHead:selector 118 | selectorsTail:("," S* selector)* 119 | "{" S* 120 | declarationsHead:declaration? 121 | declarationsTail:(";" S* declaration?)* 122 | "}" S* 123 | { 124 | return { 125 | type: "RuleSet", 126 | selectors: buildList(selectorsHead, selectorsTail, 2), 127 | declarations: buildList(declarationsHead, declarationsTail, 2) 128 | }; 129 | } 130 | 131 | selector 132 | = left:simple_selector S* combinator:combinator right:selector { 133 | return { 134 | type: "Selector", 135 | combinator: combinator, 136 | left: left, 137 | right: right 138 | }; 139 | } 140 | / left:simple_selector S+ right:selector { 141 | return { 142 | type: "Selector", 143 | combinator: " ", 144 | left: left, 145 | right: right 146 | }; 147 | } 148 | / selector:simple_selector S* { return selector; } 149 | 150 | simple_selector 151 | = element:element_name qualifiers:(id / class / attrib / pseudo)* { 152 | return { 153 | type: "SimpleSelector", 154 | element: element, 155 | qualifiers: qualifiers 156 | }; 157 | } 158 | / qualifiers:(id / class / attrib / pseudo)+ { 159 | return { 160 | type: "SimpleSelector", 161 | element: "*", 162 | qualifiers: qualifiers 163 | }; 164 | } 165 | 166 | id 167 | = id:HASH { return { type: "IDSelector", id: id }; } 168 | 169 | class 170 | = "." cl:IDENT { return { type: "ClassSelector", class: cl }; } 171 | 172 | element_name 173 | = IDENT 174 | / "*" 175 | 176 | attrib 177 | = "[" S* 178 | attribute:IDENT S* 179 | operatorAndValue:(("=" / INCLUDES / DASHMATCH) S* (IDENT / STRING) S*)? 180 | "]" 181 | { 182 | return { 183 | type: "AttributeSelector", 184 | attribute: attribute, 185 | operator: extractOptional(operatorAndValue, 0), 186 | value: extractOptional(operatorAndValue, 2) 187 | }; 188 | } 189 | 190 | pseudo 191 | = ":" 192 | value:( 193 | name:FUNCTION S* params:(IDENT S*)? ")" { 194 | return { 195 | type: "Function", 196 | name: name, 197 | params: params !== null ? [params[0]] : [] 198 | }; 199 | } 200 | / IDENT 201 | ) 202 | { return { type: "PseudoSelector", value: value }; } 203 | 204 | declaration 205 | = name:property ':' S* value:expr prio:prio? { 206 | return { 207 | type: "Declaration", 208 | name: name, 209 | value: value, 210 | important: prio !== null 211 | }; 212 | } 213 | 214 | prio 215 | = IMPORTANT_SYM S* 216 | 217 | expr 218 | = head:term tail:(operator? term)* { return buildExpression(head, tail); } 219 | 220 | term 221 | = quantity:(PERCENTAGE / LENGTH / EMS / EXS / ANGLE / TIME / FREQ / NUMBER) 222 | S* 223 | { 224 | return { 225 | type: "Quantity", 226 | value: quantity.value, 227 | unit: quantity.unit 228 | }; 229 | } 230 | / value:STRING S* { return { type: "String", value: value }; } 231 | / value:URI S* { return { type: "URI", value: value }; } 232 | / function 233 | / hexcolor 234 | / value:IDENT S* { return { type: "Ident", value: value }; } 235 | 236 | function 237 | = name:FUNCTION S* params:expr ")" S* { 238 | return { type: "Function", name: name, params: params }; 239 | } 240 | 241 | hexcolor 242 | = value:HASH S* { return { type: "Hexcolor", value: value }; } 243 | 244 | // ----- G.2 Lexical scanner ----- 245 | 246 | // Macros 247 | 248 | h 249 | = [0-9a-f]i 250 | 251 | nonascii 252 | = [\x80-\uFFFF] 253 | 254 | unicode 255 | = "\\" digits:$(h h? h? h? h? h?) ("\r\n" / [ \t\r\n\f])? { 256 | return String.fromCharCode(parseInt(digits, 16)); 257 | } 258 | 259 | escape 260 | = unicode 261 | / "\\" ch:[^\r\n\f0-9a-f]i { return ch; } 262 | 263 | nmstart 264 | = [_a-z]i 265 | / nonascii 266 | / escape 267 | 268 | nmchar 269 | = [_a-z0-9-]i 270 | / nonascii 271 | / escape 272 | 273 | string1 274 | = '"' chars:([^\n\r\f\\"] / "\\" nl:nl { return ""; } / escape)* '"' { 275 | return chars.join(""); 276 | } 277 | 278 | string2 279 | = "'" chars:([^\n\r\f\\'] / "\\" nl:nl { return ""; } / escape)* "'" { 280 | return chars.join(""); 281 | } 282 | 283 | comment 284 | = "/*" [^*]* "*"+ ([^/*] [^*]* "*"+)* "/" 285 | 286 | ident 287 | = prefix:$"-"? start:nmstart chars:nmchar* { 288 | return prefix + start + chars.join(""); 289 | } 290 | 291 | name 292 | = chars:nmchar+ { return chars.join(""); } 293 | 294 | num 295 | = [+-]? ([0-9]* "." [0-9]+ / [0-9]+) ("e" [+-]? [0-9]+)? { 296 | return parseFloat(text()); 297 | } 298 | 299 | string 300 | = string1 301 | / string2 302 | 303 | url 304 | = chars:([!#$%&*-\[\]-~] / nonascii / escape)* { return chars.join(""); } 305 | 306 | s 307 | = [ \t\r\n\f]+ 308 | 309 | w 310 | = s? 311 | 312 | nl 313 | = "\n" 314 | / "\r\n" 315 | / "\r" 316 | / "\f" 317 | 318 | A = "a"i / "\\" "0"? "0"? "0"? "0"? [\x41\x61] ("\r\n" / [ \t\r\n\f])? { return "a"; } 319 | C = "c"i / "\\" "0"? "0"? "0"? "0"? [\x43\x63] ("\r\n" / [ \t\r\n\f])? { return "c"; } 320 | D = "d"i / "\\" "0"? "0"? "0"? "0"? [\x44\x64] ("\r\n" / [ \t\r\n\f])? { return "d"; } 321 | E = "e"i / "\\" "0"? "0"? "0"? "0"? [\x45\x65] ("\r\n" / [ \t\r\n\f])? { return "e"; } 322 | G = "g"i / "\\" "0"? "0"? "0"? "0"? [\x47\x67] ("\r\n" / [ \t\r\n\f])? / "\\g"i { return "g"; } 323 | H = "h"i / "\\" "0"? "0"? "0"? "0"? [\x48\x68] ("\r\n" / [ \t\r\n\f])? / "\\h"i { return "h"; } 324 | I = "i"i / "\\" "0"? "0"? "0"? "0"? [\x49\x69] ("\r\n" / [ \t\r\n\f])? / "\\i"i { return "i"; } 325 | K = "k"i / "\\" "0"? "0"? "0"? "0"? [\x4b\x6b] ("\r\n" / [ \t\r\n\f])? / "\\k"i { return "k"; } 326 | L = "l"i / "\\" "0"? "0"? "0"? "0"? [\x4c\x6c] ("\r\n" / [ \t\r\n\f])? / "\\l"i { return "l"; } 327 | M = "m"i / "\\" "0"? "0"? "0"? "0"? [\x4d\x6d] ("\r\n" / [ \t\r\n\f])? / "\\m"i { return "m"; } 328 | N = "n"i / "\\" "0"? "0"? "0"? "0"? [\x4e\x6e] ("\r\n" / [ \t\r\n\f])? / "\\n"i { return "n"; } 329 | O = "o"i / "\\" "0"? "0"? "0"? "0"? [\x4f\x6f] ("\r\n" / [ \t\r\n\f])? / "\\o"i { return "o"; } 330 | P = "p"i / "\\" "0"? "0"? "0"? "0"? [\x50\x70] ("\r\n" / [ \t\r\n\f])? / "\\p"i { return "p"; } 331 | R = "r"i / "\\" "0"? "0"? "0"? "0"? [\x52\x72] ("\r\n" / [ \t\r\n\f])? / "\\r"i { return "r"; } 332 | S_ = "s"i / "\\" "0"? "0"? "0"? "0"? [\x53\x73] ("\r\n" / [ \t\r\n\f])? / "\\s"i { return "s"; } 333 | T = "t"i / "\\" "0"? "0"? "0"? "0"? [\x54\x74] ("\r\n" / [ \t\r\n\f])? / "\\t"i { return "t"; } 334 | U = "u"i / "\\" "0"? "0"? "0"? "0"? [\x55\x75] ("\r\n" / [ \t\r\n\f])? / "\\u"i { return "u"; } 335 | X = "x"i / "\\" "0"? "0"? "0"? "0"? [\x58\x78] ("\r\n" / [ \t\r\n\f])? / "\\x"i { return "x"; } 336 | Z = "z"i / "\\" "0"? "0"? "0"? "0"? [\x5a\x7a] ("\r\n" / [ \t\r\n\f])? / "\\z"i { return "z"; } 337 | 338 | // Tokens 339 | 340 | S "whitespace" 341 | = comment* s 342 | 343 | CDO "" 347 | = comment* "-->" 348 | 349 | INCLUDES "~=" 350 | = comment* "~=" 351 | 352 | DASHMATCH "|=" 353 | = comment* "|=" 354 | 355 | STRING "string" 356 | = comment* string1:string { return string1; } 357 | 358 | IDENT "identifier" 359 | = comment* ident:ident { return ident; } 360 | 361 | HASH "hash" 362 | = comment* "#" name:name { return "#" + name; } 363 | 364 | IMPORT_SYM "@import" 365 | = comment* "@" I M P O R T 366 | 367 | PAGE_SYM "@page" 368 | = comment* "@" P A G E 369 | 370 | MEDIA_SYM "@media" 371 | = comment* "@" M E D I A 372 | 373 | CHARSET_SYM "@charset" 374 | = comment* "@charset " 375 | 376 | // We use |s| instead of |w| here to avoid infinite recursion. 377 | IMPORTANT_SYM "!important" 378 | = comment* "!" (s / comment)* I M P O R T A N T 379 | 380 | EMS "length" 381 | = comment* value:num E M { return { value: value, unit: "em" }; } 382 | 383 | EXS "length" 384 | = comment* value:num E X { return { value: value, unit: "ex" }; } 385 | 386 | LENGTH "length" 387 | = comment* value:num P X { return { value: value, unit: "px" }; } 388 | / comment* value:num C M { return { value: value, unit: "cm" }; } 389 | / comment* value:num M M { return { value: value, unit: "mm" }; } 390 | / comment* value:num I N { return { value: value, unit: "in" }; } 391 | / comment* value:num P T { return { value: value, unit: "pt" }; } 392 | / comment* value:num P C { return { value: value, unit: "pc" }; } 393 | 394 | ANGLE "angle" 395 | = comment* value:num D E G { return { value: value, unit: "deg" }; } 396 | / comment* value:num R A D { return { value: value, unit: "rad" }; } 397 | / comment* value:num G R A D { return { value: value, unit: "grad" }; } 398 | 399 | TIME "time" 400 | = comment* value:num M S_ { return { value: value, unit: "ms" }; } 401 | / comment* value:num S_ { return { value: value, unit: "s" }; } 402 | 403 | FREQ "frequency" 404 | = comment* value:num H Z { return { value: value, unit: "hz" }; } 405 | / comment* value:num K H Z { return { value: value, unit: "kh" }; } 406 | 407 | PERCENTAGE "percentage" 408 | = comment* value:num "%" { return { value: value, unit: "%" }; } 409 | 410 | NUMBER "number" 411 | = comment* value:num { return { value: value, unit: null }; } 412 | 413 | URI "uri" 414 | = comment* U R L "("i w url:string w ")" { return url; } 415 | / comment* U R L "("i w url:url w ")" { return url; } 416 | 417 | FUNCTION "function" 418 | = comment* name:ident "(" { return name; } 419 | -------------------------------------------------------------------------------- /examples/javascript.pegjs: -------------------------------------------------------------------------------- 1 | // JavaScript Grammar 2 | // ================== 3 | // 4 | // Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a 5 | // syntax tree compatible with the ESTree spec [2]. 6 | // 7 | // Limitations: 8 | // 9 | // * Non-BMP characters are completely ignored to avoid surrogate pair 10 | // handling. 11 | // 12 | // * One can create identifiers containing illegal characters using Unicode 13 | // escape sequences. For example, "abcd\u0020efgh" is not a valid 14 | // identifier, but it is accepted by the parser. 15 | // 16 | // * Strict mode is not recognized. This means that within strict mode code, 17 | // "implements", "interface", "let", "package", "private", "protected", 18 | // "public", "static" and "yield" can be used as names. Many other 19 | // restrictions and exceptions from Annex C are also not applied. 20 | // 21 | // All the limitations could be resolved, but the costs would likely outweigh 22 | // the benefits. 23 | // 24 | // Many thanks to inimino [3] for his grammar [4] which helped me to solve some 25 | // problems (such as automatic semicolon insertion) and also served to double 26 | // check that I converted the original grammar correctly. 27 | // 28 | // [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm 29 | // [2] https://github.com/estree/estree 30 | // [3] http://inimino.org/~inimino/blog/ 31 | // [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg 32 | 33 | {{ 34 | const TYPES_TO_PROPERTY_NAMES = { 35 | CallExpression: "callee", 36 | MemberExpression: "object", 37 | }; 38 | 39 | function filledArray(count, value) { 40 | return Array.apply(null, new Array(count)) 41 | .map(function() { return value; }); 42 | } 43 | 44 | function extractOptional(optional, index) { 45 | return optional ? optional[index] : null; 46 | } 47 | 48 | function optionalList(value) { 49 | return value !== null ? value : []; 50 | } 51 | }} 52 | 53 | { 54 | function extractList(list, index) { 55 | return list.map(function(element) { return element[index]; }); 56 | } 57 | 58 | function buildList(head, tail, index) { 59 | return [head].concat(extractList(tail, index)); 60 | } 61 | 62 | function buildBinaryExpression(head, tail) { 63 | return tail.reduce(function(result, element) { 64 | return { 65 | type: "BinaryExpression", 66 | operator: element[1], 67 | left: result, 68 | right: element[3] 69 | }; 70 | }, head); 71 | } 72 | 73 | function buildLogicalExpression(head, tail) { 74 | return tail.reduce(function(result, element) { 75 | return { 76 | type: "LogicalExpression", 77 | operator: element[1], 78 | left: result, 79 | right: element[3] 80 | }; 81 | }, head); 82 | } 83 | } 84 | 85 | Start 86 | = __ program:Program __ { return program; } 87 | 88 | // ----- A.1 Lexical Grammar ----- 89 | 90 | SourceCharacter 91 | = . 92 | 93 | WhiteSpace "whitespace" 94 | = "\t" 95 | / "\v" 96 | / "\f" 97 | / " " 98 | / "\u00A0" 99 | / "\uFEFF" 100 | / Zs 101 | 102 | LineTerminator 103 | = [\n\r\u2028\u2029] 104 | 105 | LineTerminatorSequence "end of line" 106 | = "\n" 107 | / "\r\n" 108 | / "\r" 109 | / "\u2028" 110 | / "\u2029" 111 | 112 | Comment "comment" 113 | = MultiLineComment 114 | / SingleLineComment 115 | 116 | MultiLineComment 117 | = "/*" (!"*/" SourceCharacter)* "*/" 118 | 119 | MultiLineCommentNoLineTerminator 120 | = "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/" 121 | 122 | SingleLineComment 123 | = "//" (!LineTerminator SourceCharacter)* 124 | 125 | Identifier 126 | = !ReservedWord name:IdentifierName { return name; } 127 | 128 | IdentifierName "identifier" 129 | = head:IdentifierStart tail:IdentifierPart* { 130 | return { 131 | type: "Identifier", 132 | name: head + tail.join("") 133 | }; 134 | } 135 | 136 | IdentifierStart 137 | = UnicodeLetter 138 | / "$" 139 | / "_" 140 | / "\\" sequence:UnicodeEscapeSequence { return sequence; } 141 | 142 | IdentifierPart 143 | = IdentifierStart 144 | / UnicodeCombiningMark 145 | / UnicodeDigit 146 | / UnicodeConnectorPunctuation 147 | / "\u200C" 148 | / "\u200D" 149 | 150 | UnicodeLetter 151 | = Lu 152 | / Ll 153 | / Lt 154 | / Lm 155 | / Lo 156 | / Nl 157 | 158 | UnicodeCombiningMark 159 | = Mn 160 | / Mc 161 | 162 | UnicodeDigit 163 | = Nd 164 | 165 | UnicodeConnectorPunctuation 166 | = Pc 167 | 168 | ReservedWord 169 | = Keyword 170 | / FutureReservedWord 171 | / NullLiteral 172 | / BooleanLiteral 173 | 174 | Keyword 175 | = BreakToken 176 | / CaseToken 177 | / CatchToken 178 | / ContinueToken 179 | / DebuggerToken 180 | / DefaultToken 181 | / DeleteToken 182 | / DoToken 183 | / ElseToken 184 | / FinallyToken 185 | / ForToken 186 | / FunctionToken 187 | / IfToken 188 | / InstanceofToken 189 | / InToken 190 | / NewToken 191 | / ReturnToken 192 | / SwitchToken 193 | / ThisToken 194 | / ThrowToken 195 | / TryToken 196 | / TypeofToken 197 | / VarToken 198 | / VoidToken 199 | / WhileToken 200 | / WithToken 201 | 202 | FutureReservedWord 203 | = ClassToken 204 | / ConstToken 205 | / EnumToken 206 | / ExportToken 207 | / ExtendsToken 208 | / ImportToken 209 | / SuperToken 210 | 211 | Literal 212 | = NullLiteral 213 | / BooleanLiteral 214 | / NumericLiteral 215 | / StringLiteral 216 | / RegularExpressionLiteral 217 | 218 | NullLiteral 219 | = NullToken { return { type: "Literal", value: null }; } 220 | 221 | BooleanLiteral 222 | = TrueToken { return { type: "Literal", value: true }; } 223 | / FalseToken { return { type: "Literal", value: false }; } 224 | 225 | // The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official 226 | // grammar, it comes from text in section 7.8.3. 227 | NumericLiteral "number" 228 | = literal:HexIntegerLiteral !(IdentifierStart / DecimalDigit) { 229 | return literal; 230 | } 231 | / literal:DecimalLiteral !(IdentifierStart / DecimalDigit) { 232 | return literal; 233 | } 234 | 235 | DecimalLiteral 236 | = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? { 237 | return { type: "Literal", value: parseFloat(text()) }; 238 | } 239 | / "." DecimalDigit+ ExponentPart? { 240 | return { type: "Literal", value: parseFloat(text()) }; 241 | } 242 | / DecimalIntegerLiteral ExponentPart? { 243 | return { type: "Literal", value: parseFloat(text()) }; 244 | } 245 | 246 | DecimalIntegerLiteral 247 | = "0" 248 | / NonZeroDigit DecimalDigit* 249 | 250 | DecimalDigit 251 | = [0-9] 252 | 253 | NonZeroDigit 254 | = [1-9] 255 | 256 | ExponentPart 257 | = ExponentIndicator SignedInteger 258 | 259 | ExponentIndicator 260 | = "e"i 261 | 262 | SignedInteger 263 | = [+-]? DecimalDigit+ 264 | 265 | HexIntegerLiteral 266 | = "0x"i digits:$HexDigit+ { 267 | return { type: "Literal", value: parseInt(digits, 16) }; 268 | } 269 | 270 | HexDigit 271 | = [0-9a-f]i 272 | 273 | StringLiteral "string" 274 | = '"' chars:DoubleStringCharacter* '"' { 275 | return { type: "Literal", value: chars.join("") }; 276 | } 277 | / "'" chars:SingleStringCharacter* "'" { 278 | return { type: "Literal", value: chars.join("") }; 279 | } 280 | 281 | DoubleStringCharacter 282 | = !('"' / "\\" / LineTerminator) SourceCharacter { return text(); } 283 | / "\\" sequence:EscapeSequence { return sequence; } 284 | / LineContinuation 285 | 286 | SingleStringCharacter 287 | = !("'" / "\\" / LineTerminator) SourceCharacter { return text(); } 288 | / "\\" sequence:EscapeSequence { return sequence; } 289 | / LineContinuation 290 | 291 | LineContinuation 292 | = "\\" LineTerminatorSequence { return ""; } 293 | 294 | EscapeSequence 295 | = CharacterEscapeSequence 296 | / "0" !DecimalDigit { return "\0"; } 297 | / HexEscapeSequence 298 | / UnicodeEscapeSequence 299 | 300 | CharacterEscapeSequence 301 | = SingleEscapeCharacter 302 | / NonEscapeCharacter 303 | 304 | SingleEscapeCharacter 305 | = "'" 306 | / '"' 307 | / "\\" 308 | / "b" { return "\b"; } 309 | / "f" { return "\f"; } 310 | / "n" { return "\n"; } 311 | / "r" { return "\r"; } 312 | / "t" { return "\t"; } 313 | / "v" { return "\v"; } 314 | 315 | NonEscapeCharacter 316 | = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); } 317 | 318 | EscapeCharacter 319 | = SingleEscapeCharacter 320 | / DecimalDigit 321 | / "x" 322 | / "u" 323 | 324 | HexEscapeSequence 325 | = "x" digits:$(HexDigit HexDigit) { 326 | return String.fromCharCode(parseInt(digits, 16)); 327 | } 328 | 329 | UnicodeEscapeSequence 330 | = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) { 331 | return String.fromCharCode(parseInt(digits, 16)); 332 | } 333 | 334 | RegularExpressionLiteral "regular expression" 335 | = "/" pattern:$RegularExpressionBody "/" flags:$RegularExpressionFlags { 336 | let value; 337 | 338 | try { 339 | value = new RegExp(pattern, flags); 340 | } catch (e) { 341 | error(e.message); 342 | } 343 | 344 | return { type: "Literal", value: value }; 345 | } 346 | 347 | RegularExpressionBody 348 | = RegularExpressionFirstChar RegularExpressionChar* 349 | 350 | RegularExpressionFirstChar 351 | = ![*\\/[] RegularExpressionNonTerminator 352 | / RegularExpressionBackslashSequence 353 | / RegularExpressionClass 354 | 355 | RegularExpressionChar 356 | = ![\\/[] RegularExpressionNonTerminator 357 | / RegularExpressionBackslashSequence 358 | / RegularExpressionClass 359 | 360 | RegularExpressionBackslashSequence 361 | = "\\" RegularExpressionNonTerminator 362 | 363 | RegularExpressionNonTerminator 364 | = !LineTerminator SourceCharacter 365 | 366 | RegularExpressionClass 367 | = "[" RegularExpressionClassChar* "]" 368 | 369 | RegularExpressionClassChar 370 | = ![\]\\] RegularExpressionNonTerminator 371 | / RegularExpressionBackslashSequence 372 | 373 | RegularExpressionFlags 374 | = IdentifierPart* 375 | 376 | // Unicode Character Categories 377 | // 378 | // Extracted from the following Unicode Character Database file: 379 | // 380 | // http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt 381 | // 382 | // Unix magic used: 383 | // 384 | // grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters 385 | // cut -f1 -d " " | # Extract code points 386 | // grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters 387 | // sed -e 's/\.\./-/' | # Adjust formatting 388 | // sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting 389 | // tr -d '\n' # Join lines 390 | // 391 | // ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one 392 | // at the time of writing. 393 | // 394 | // Non-BMP characters are completely ignored to avoid surrogate pair handling 395 | // (detecting surrogate pairs isn't possible with a simple character class and 396 | // other methods would degrade performance). I don't consider it a big deal as 397 | // even parsers in JavaScript engines of common browsers seem to ignore them. 398 | 399 | // Letter, Lowercase 400 | Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] 401 | 402 | // Letter, Modifier 403 | Lm = [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uFF70\uFF9E-\uFF9F] 404 | 405 | // Letter, Other 406 | Lo = [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05F0-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u08A0-\u08B4\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u10D0-\u10FA\u10FD-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FD5\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC] 407 | 408 | // Letter, Titlecase 409 | Lt = [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC] 410 | 411 | // Letter, Uppercase 412 | Lu = [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AD\uA7B0-\uA7B4\uA7B6\uFF21-\uFF3A] 413 | 414 | // Mark, Spacing Combining 415 | Mc = [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF2-\u1CF3\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BD-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC] 416 | 417 | // Mark, Nonspacing 418 | Mn = [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D01\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DF5\u1DFC-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA8C4\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F] 419 | 420 | // Number, Decimal Digit 421 | Nd = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19] 422 | 423 | // Number, Letter 424 | Nl = [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF] 425 | 426 | // Punctuation, Connector 427 | Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] 428 | 429 | // Separator, Space 430 | Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] 431 | 432 | // Tokens 433 | 434 | BreakToken = "break" !IdentifierPart 435 | CaseToken = "case" !IdentifierPart 436 | CatchToken = "catch" !IdentifierPart 437 | ClassToken = "class" !IdentifierPart 438 | ConstToken = "const" !IdentifierPart 439 | ContinueToken = "continue" !IdentifierPart 440 | DebuggerToken = "debugger" !IdentifierPart 441 | DefaultToken = "default" !IdentifierPart 442 | DeleteToken = "delete" !IdentifierPart 443 | DoToken = "do" !IdentifierPart 444 | ElseToken = "else" !IdentifierPart 445 | EnumToken = "enum" !IdentifierPart 446 | ExportToken = "export" !IdentifierPart 447 | ExtendsToken = "extends" !IdentifierPart 448 | FalseToken = "false" !IdentifierPart 449 | FinallyToken = "finally" !IdentifierPart 450 | ForToken = "for" !IdentifierPart 451 | FunctionToken = "function" !IdentifierPart 452 | GetToken = "get" !IdentifierPart 453 | IfToken = "if" !IdentifierPart 454 | ImportToken = "import" !IdentifierPart 455 | InstanceofToken = "instanceof" !IdentifierPart 456 | InToken = "in" !IdentifierPart 457 | NewToken = "new" !IdentifierPart 458 | NullToken = "null" !IdentifierPart 459 | ReturnToken = "return" !IdentifierPart 460 | SetToken = "set" !IdentifierPart 461 | SuperToken = "super" !IdentifierPart 462 | SwitchToken = "switch" !IdentifierPart 463 | ThisToken = "this" !IdentifierPart 464 | ThrowToken = "throw" !IdentifierPart 465 | TrueToken = "true" !IdentifierPart 466 | TryToken = "try" !IdentifierPart 467 | TypeofToken = "typeof" !IdentifierPart 468 | VarToken = "var" !IdentifierPart 469 | VoidToken = "void" !IdentifierPart 470 | WhileToken = "while" !IdentifierPart 471 | WithToken = "with" !IdentifierPart 472 | 473 | // Skipped 474 | 475 | __ 476 | = (WhiteSpace / LineTerminatorSequence / Comment)* 477 | 478 | _ 479 | = (WhiteSpace / MultiLineCommentNoLineTerminator)* 480 | 481 | // Automatic Semicolon Insertion 482 | 483 | EOS 484 | = __ ";" 485 | / _ SingleLineComment? LineTerminatorSequence 486 | / _ &"}" 487 | / __ EOF 488 | 489 | EOF 490 | = !. 491 | 492 | // ----- A.2 Number Conversions ----- 493 | 494 | // Irrelevant. 495 | 496 | // ----- A.3 Expressions ----- 497 | 498 | PrimaryExpression 499 | = ThisToken { return { type: "ThisExpression" }; } 500 | / Identifier 501 | / Literal 502 | / ArrayLiteral 503 | / ObjectLiteral 504 | / "(" __ expression:Expression __ ")" { return expression; } 505 | 506 | ArrayLiteral 507 | = "[" __ elision:(Elision __)? "]" { 508 | return { 509 | type: "ArrayExpression", 510 | elements: optionalList(extractOptional(elision, 0)) 511 | }; 512 | } 513 | / "[" __ elements:ElementList __ "]" { 514 | return { 515 | type: "ArrayExpression", 516 | elements: elements 517 | }; 518 | } 519 | / "[" __ elements:ElementList __ "," __ elision:(Elision __)? "]" { 520 | return { 521 | type: "ArrayExpression", 522 | elements: elements.concat(optionalList(extractOptional(elision, 0))) 523 | }; 524 | } 525 | 526 | ElementList 527 | = head:( 528 | elision:(Elision __)? element:AssignmentExpression { 529 | return optionalList(extractOptional(elision, 0)).concat(element); 530 | } 531 | ) 532 | tail:( 533 | __ "," __ elision:(Elision __)? element:AssignmentExpression { 534 | return optionalList(extractOptional(elision, 0)).concat(element); 535 | } 536 | )* 537 | { return Array.prototype.concat.apply(head, tail); } 538 | 539 | Elision 540 | = "," commas:(__ ",")* { return filledArray(commas.length + 1, null); } 541 | 542 | ObjectLiteral 543 | = "{" __ "}" { return { type: "ObjectExpression", properties: [] }; } 544 | / "{" __ properties:PropertyNameAndValueList __ "}" { 545 | return { type: "ObjectExpression", properties: properties }; 546 | } 547 | / "{" __ properties:PropertyNameAndValueList __ "," __ "}" { 548 | return { type: "ObjectExpression", properties: properties }; 549 | } 550 | PropertyNameAndValueList 551 | = head:PropertyAssignment tail:(__ "," __ PropertyAssignment)* { 552 | return buildList(head, tail, 3); 553 | } 554 | 555 | PropertyAssignment 556 | = key:PropertyName __ ":" __ value:AssignmentExpression { 557 | return { type: "Property", key: key, value: value, kind: "init" }; 558 | } 559 | / GetToken __ key:PropertyName __ 560 | "(" __ ")" __ 561 | "{" __ body:FunctionBody __ "}" 562 | { 563 | return { 564 | type: "Property", 565 | key: key, 566 | value: { 567 | type: "FunctionExpression", 568 | id: null, 569 | params: [], 570 | body: body 571 | }, 572 | kind: "get" 573 | }; 574 | } 575 | / SetToken __ key:PropertyName __ 576 | "(" __ params:PropertySetParameterList __ ")" __ 577 | "{" __ body:FunctionBody __ "}" 578 | { 579 | return { 580 | type: "Property", 581 | key: key, 582 | value: { 583 | type: "FunctionExpression", 584 | id: null, 585 | params: params, 586 | body: body 587 | }, 588 | kind: "set" 589 | }; 590 | } 591 | 592 | PropertyName 593 | = IdentifierName 594 | / StringLiteral 595 | / NumericLiteral 596 | 597 | PropertySetParameterList 598 | = id:Identifier { return [id]; } 599 | 600 | MemberExpression 601 | = head:( 602 | PrimaryExpression 603 | / FunctionExpression 604 | / NewToken __ callee:MemberExpression __ args:Arguments { 605 | return { type: "NewExpression", callee: callee, arguments: args }; 606 | } 607 | ) 608 | tail:( 609 | __ "[" __ property:Expression __ "]" { 610 | return { property: property, computed: true }; 611 | } 612 | / __ "." __ property:IdentifierName { 613 | return { property: property, computed: false }; 614 | } 615 | )* 616 | { 617 | return tail.reduce(function(result, element) { 618 | return { 619 | type: "MemberExpression", 620 | object: result, 621 | property: element.property, 622 | computed: element.computed 623 | }; 624 | }, head); 625 | } 626 | 627 | NewExpression 628 | = MemberExpression 629 | / NewToken __ callee:NewExpression { 630 | return { type: "NewExpression", callee: callee, arguments: [] }; 631 | } 632 | 633 | CallExpression 634 | = head:( 635 | callee:MemberExpression __ args:Arguments { 636 | return { type: "CallExpression", callee: callee, arguments: args }; 637 | } 638 | ) 639 | tail:( 640 | __ args:Arguments { 641 | return { type: "CallExpression", arguments: args }; 642 | } 643 | / __ "[" __ property:Expression __ "]" { 644 | return { 645 | type: "MemberExpression", 646 | property: property, 647 | computed: true 648 | }; 649 | } 650 | / __ "." __ property:IdentifierName { 651 | return { 652 | type: "MemberExpression", 653 | property: property, 654 | computed: false 655 | }; 656 | } 657 | )* 658 | { 659 | return tail.reduce(function(result, element) { 660 | element[TYPES_TO_PROPERTY_NAMES[element.type]] = result; 661 | 662 | return element; 663 | }, head); 664 | } 665 | 666 | Arguments 667 | = "(" __ args:(ArgumentList __)? ")" { 668 | return optionalList(extractOptional(args, 0)); 669 | } 670 | 671 | ArgumentList 672 | = head:AssignmentExpression tail:(__ "," __ AssignmentExpression)* { 673 | return buildList(head, tail, 3); 674 | } 675 | 676 | LeftHandSideExpression 677 | = CallExpression 678 | / NewExpression 679 | 680 | PostfixExpression 681 | = argument:LeftHandSideExpression _ operator:PostfixOperator { 682 | return { 683 | type: "UpdateExpression", 684 | operator: operator, 685 | argument: argument, 686 | prefix: false 687 | }; 688 | } 689 | / LeftHandSideExpression 690 | 691 | PostfixOperator 692 | = "++" 693 | / "--" 694 | 695 | UnaryExpression 696 | = PostfixExpression 697 | / operator:UnaryOperator __ argument:UnaryExpression { 698 | const type = (operator === "++" || operator === "--") 699 | ? "UpdateExpression" 700 | : "UnaryExpression"; 701 | 702 | return { 703 | type: type, 704 | operator: operator, 705 | argument: argument, 706 | prefix: true 707 | }; 708 | } 709 | 710 | UnaryOperator 711 | = $DeleteToken 712 | / $VoidToken 713 | / $TypeofToken 714 | / "++" 715 | / "--" 716 | / $("+" !"=") 717 | / $("-" !"=") 718 | / "~" 719 | / "!" 720 | 721 | MultiplicativeExpression 722 | = head:UnaryExpression 723 | tail:(__ MultiplicativeOperator __ UnaryExpression)* 724 | { return buildBinaryExpression(head, tail); } 725 | 726 | MultiplicativeOperator 727 | = $("*" !"=") 728 | / $("/" !"=") 729 | / $("%" !"=") 730 | 731 | AdditiveExpression 732 | = head:MultiplicativeExpression 733 | tail:(__ AdditiveOperator __ MultiplicativeExpression)* 734 | { return buildBinaryExpression(head, tail); } 735 | 736 | AdditiveOperator 737 | = $("+" ![+=]) 738 | / $("-" ![-=]) 739 | 740 | ShiftExpression 741 | = head:AdditiveExpression 742 | tail:(__ ShiftOperator __ AdditiveExpression)* 743 | { return buildBinaryExpression(head, tail); } 744 | 745 | ShiftOperator 746 | = $("<<" !"=") 747 | / $(">>>" !"=") 748 | / $(">>" !"=") 749 | 750 | RelationalExpression 751 | = head:ShiftExpression 752 | tail:(__ RelationalOperator __ ShiftExpression)* 753 | { return buildBinaryExpression(head, tail); } 754 | 755 | RelationalOperator 756 | = "<=" 757 | / ">=" 758 | / $("<" !"<") 759 | / $(">" !">") 760 | / $InstanceofToken 761 | / $InToken 762 | 763 | RelationalExpressionNoIn 764 | = head:ShiftExpression 765 | tail:(__ RelationalOperatorNoIn __ ShiftExpression)* 766 | { return buildBinaryExpression(head, tail); } 767 | 768 | RelationalOperatorNoIn 769 | = "<=" 770 | / ">=" 771 | / $("<" !"<") 772 | / $(">" !">") 773 | / $InstanceofToken 774 | 775 | EqualityExpression 776 | = head:RelationalExpression 777 | tail:(__ EqualityOperator __ RelationalExpression)* 778 | { return buildBinaryExpression(head, tail); } 779 | 780 | EqualityExpressionNoIn 781 | = head:RelationalExpressionNoIn 782 | tail:(__ EqualityOperator __ RelationalExpressionNoIn)* 783 | { return buildBinaryExpression(head, tail); } 784 | 785 | EqualityOperator 786 | = "===" 787 | / "!==" 788 | / "==" 789 | / "!=" 790 | 791 | BitwiseANDExpression 792 | = head:EqualityExpression 793 | tail:(__ BitwiseANDOperator __ EqualityExpression)* 794 | { return buildBinaryExpression(head, tail); } 795 | 796 | BitwiseANDExpressionNoIn 797 | = head:EqualityExpressionNoIn 798 | tail:(__ BitwiseANDOperator __ EqualityExpressionNoIn)* 799 | { return buildBinaryExpression(head, tail); } 800 | 801 | BitwiseANDOperator 802 | = $("&" ![&=]) 803 | 804 | BitwiseXORExpression 805 | = head:BitwiseANDExpression 806 | tail:(__ BitwiseXOROperator __ BitwiseANDExpression)* 807 | { return buildBinaryExpression(head, tail); } 808 | 809 | BitwiseXORExpressionNoIn 810 | = head:BitwiseANDExpressionNoIn 811 | tail:(__ BitwiseXOROperator __ BitwiseANDExpressionNoIn)* 812 | { return buildBinaryExpression(head, tail); } 813 | 814 | BitwiseXOROperator 815 | = $("^" !"=") 816 | 817 | BitwiseORExpression 818 | = head:BitwiseXORExpression 819 | tail:(__ BitwiseOROperator __ BitwiseXORExpression)* 820 | { return buildBinaryExpression(head, tail); } 821 | 822 | BitwiseORExpressionNoIn 823 | = head:BitwiseXORExpressionNoIn 824 | tail:(__ BitwiseOROperator __ BitwiseXORExpressionNoIn)* 825 | { return buildBinaryExpression(head, tail); } 826 | 827 | BitwiseOROperator 828 | = $("|" ![|=]) 829 | 830 | LogicalANDExpression 831 | = head:BitwiseORExpression 832 | tail:(__ LogicalANDOperator __ BitwiseORExpression)* 833 | { return buildLogicalExpression(head, tail); } 834 | 835 | LogicalANDExpressionNoIn 836 | = head:BitwiseORExpressionNoIn 837 | tail:(__ LogicalANDOperator __ BitwiseORExpressionNoIn)* 838 | { return buildLogicalExpression(head, tail); } 839 | 840 | LogicalANDOperator 841 | = "&&" 842 | 843 | LogicalORExpression 844 | = head:LogicalANDExpression 845 | tail:(__ LogicalOROperator __ LogicalANDExpression)* 846 | { return buildLogicalExpression(head, tail); } 847 | 848 | LogicalORExpressionNoIn 849 | = head:LogicalANDExpressionNoIn 850 | tail:(__ LogicalOROperator __ LogicalANDExpressionNoIn)* 851 | { return buildLogicalExpression(head, tail); } 852 | 853 | LogicalOROperator 854 | = "||" 855 | 856 | ConditionalExpression 857 | = test:LogicalORExpression __ 858 | "?" __ consequent:AssignmentExpression __ 859 | ":" __ alternate:AssignmentExpression 860 | { 861 | return { 862 | type: "ConditionalExpression", 863 | test: test, 864 | consequent: consequent, 865 | alternate: alternate 866 | }; 867 | } 868 | / LogicalORExpression 869 | 870 | ConditionalExpressionNoIn 871 | = test:LogicalORExpressionNoIn __ 872 | "?" __ consequent:AssignmentExpression __ 873 | ":" __ alternate:AssignmentExpressionNoIn 874 | { 875 | return { 876 | type: "ConditionalExpression", 877 | test: test, 878 | consequent: consequent, 879 | alternate: alternate 880 | }; 881 | } 882 | / LogicalORExpressionNoIn 883 | 884 | AssignmentExpression 885 | = left:LeftHandSideExpression __ 886 | "=" !"=" __ 887 | right:AssignmentExpression 888 | { 889 | return { 890 | type: "AssignmentExpression", 891 | operator: "=", 892 | left: left, 893 | right: right 894 | }; 895 | } 896 | / left:LeftHandSideExpression __ 897 | operator:AssignmentOperator __ 898 | right:AssignmentExpression 899 | { 900 | return { 901 | type: "AssignmentExpression", 902 | operator: operator, 903 | left: left, 904 | right: right 905 | }; 906 | } 907 | / ConditionalExpression 908 | 909 | AssignmentExpressionNoIn 910 | = left:LeftHandSideExpression __ 911 | "=" !"=" __ 912 | right:AssignmentExpressionNoIn 913 | { 914 | return { 915 | type: "AssignmentExpression", 916 | operator: "=", 917 | left: left, 918 | right: right 919 | }; 920 | } 921 | / left:LeftHandSideExpression __ 922 | operator:AssignmentOperator __ 923 | right:AssignmentExpressionNoIn 924 | { 925 | return { 926 | type: "AssignmentExpression", 927 | operator: operator, 928 | left: left, 929 | right: right 930 | }; 931 | } 932 | / ConditionalExpressionNoIn 933 | 934 | AssignmentOperator 935 | = "*=" 936 | / "/=" 937 | / "%=" 938 | / "+=" 939 | / "-=" 940 | / "<<=" 941 | / ">>=" 942 | / ">>>=" 943 | / "&=" 944 | / "^=" 945 | / "|=" 946 | 947 | Expression 948 | = head:AssignmentExpression tail:(__ "," __ AssignmentExpression)* { 949 | return tail.length > 0 950 | ? { type: "SequenceExpression", expressions: buildList(head, tail, 3) } 951 | : head; 952 | } 953 | 954 | ExpressionNoIn 955 | = head:AssignmentExpressionNoIn tail:(__ "," __ AssignmentExpressionNoIn)* { 956 | return tail.length > 0 957 | ? { type: "SequenceExpression", expressions: buildList(head, tail, 3) } 958 | : head; 959 | } 960 | 961 | // ----- A.4 Statements ----- 962 | 963 | Statement 964 | = Block 965 | / VariableStatement 966 | / EmptyStatement 967 | / ExpressionStatement 968 | / IfStatement 969 | / IterationStatement 970 | / ContinueStatement 971 | / BreakStatement 972 | / ReturnStatement 973 | / WithStatement 974 | / LabelledStatement 975 | / SwitchStatement 976 | / ThrowStatement 977 | / TryStatement 978 | / DebuggerStatement 979 | 980 | Block 981 | = "{" __ body:(StatementList __)? "}" { 982 | return { 983 | type: "BlockStatement", 984 | body: optionalList(extractOptional(body, 0)) 985 | }; 986 | } 987 | 988 | StatementList 989 | = head:Statement tail:(__ Statement)* { return buildList(head, tail, 1); } 990 | 991 | VariableStatement 992 | = VarToken __ declarations:VariableDeclarationList EOS { 993 | return { 994 | type: "VariableDeclaration", 995 | declarations: declarations, 996 | kind: "var" 997 | }; 998 | } 999 | 1000 | VariableDeclarationList 1001 | = head:VariableDeclaration tail:(__ "," __ VariableDeclaration)* { 1002 | return buildList(head, tail, 3); 1003 | } 1004 | 1005 | VariableDeclarationListNoIn 1006 | = head:VariableDeclarationNoIn tail:(__ "," __ VariableDeclarationNoIn)* { 1007 | return buildList(head, tail, 3); 1008 | } 1009 | 1010 | VariableDeclaration 1011 | = id:Identifier init:(__ Initialiser)? { 1012 | return { 1013 | type: "VariableDeclarator", 1014 | id: id, 1015 | init: extractOptional(init, 1) 1016 | }; 1017 | } 1018 | 1019 | VariableDeclarationNoIn 1020 | = id:Identifier init:(__ InitialiserNoIn)? { 1021 | return { 1022 | type: "VariableDeclarator", 1023 | id: id, 1024 | init: extractOptional(init, 1) 1025 | }; 1026 | } 1027 | 1028 | Initialiser 1029 | = "=" !"=" __ expression:AssignmentExpression { return expression; } 1030 | 1031 | InitialiserNoIn 1032 | = "=" !"=" __ expression:AssignmentExpressionNoIn { return expression; } 1033 | 1034 | EmptyStatement 1035 | = ";" { return { type: "EmptyStatement" }; } 1036 | 1037 | ExpressionStatement 1038 | = !("{" / FunctionToken) expression:Expression EOS { 1039 | return { 1040 | type: "ExpressionStatement", 1041 | expression: expression 1042 | }; 1043 | } 1044 | 1045 | IfStatement 1046 | = IfToken __ "(" __ test:Expression __ ")" __ 1047 | consequent:Statement __ 1048 | ElseToken __ 1049 | alternate:Statement 1050 | { 1051 | return { 1052 | type: "IfStatement", 1053 | test: test, 1054 | consequent: consequent, 1055 | alternate: alternate 1056 | }; 1057 | } 1058 | / IfToken __ "(" __ test:Expression __ ")" __ 1059 | consequent:Statement { 1060 | return { 1061 | type: "IfStatement", 1062 | test: test, 1063 | consequent: consequent, 1064 | alternate: null 1065 | }; 1066 | } 1067 | 1068 | IterationStatement 1069 | = DoToken __ 1070 | body:Statement __ 1071 | WhileToken __ "(" __ test:Expression __ ")" EOS 1072 | { return { type: "DoWhileStatement", body: body, test: test }; } 1073 | / WhileToken __ "(" __ test:Expression __ ")" __ 1074 | body:Statement 1075 | { return { type: "WhileStatement", test: test, body: body }; } 1076 | / ForToken __ 1077 | "(" __ 1078 | init:(ExpressionNoIn __)? ";" __ 1079 | test:(Expression __)? ";" __ 1080 | update:(Expression __)? 1081 | ")" __ 1082 | body:Statement 1083 | { 1084 | return { 1085 | type: "ForStatement", 1086 | init: extractOptional(init, 0), 1087 | test: extractOptional(test, 0), 1088 | update: extractOptional(update, 0), 1089 | body: body 1090 | }; 1091 | } 1092 | / ForToken __ 1093 | "(" __ 1094 | VarToken __ declarations:VariableDeclarationListNoIn __ ";" __ 1095 | test:(Expression __)? ";" __ 1096 | update:(Expression __)? 1097 | ")" __ 1098 | body:Statement 1099 | { 1100 | return { 1101 | type: "ForStatement", 1102 | init: { 1103 | type: "VariableDeclaration", 1104 | declarations: declarations, 1105 | kind: "var" 1106 | }, 1107 | test: extractOptional(test, 0), 1108 | update: extractOptional(update, 0), 1109 | body: body 1110 | }; 1111 | } 1112 | / ForToken __ 1113 | "(" __ 1114 | left:LeftHandSideExpression __ 1115 | InToken __ 1116 | right:Expression __ 1117 | ")" __ 1118 | body:Statement 1119 | { 1120 | return { 1121 | type: "ForInStatement", 1122 | left: left, 1123 | right: right, 1124 | body: body 1125 | }; 1126 | } 1127 | / ForToken __ 1128 | "(" __ 1129 | VarToken __ declarations:VariableDeclarationListNoIn __ 1130 | InToken __ 1131 | right:Expression __ 1132 | ")" __ 1133 | body:Statement 1134 | { 1135 | return { 1136 | type: "ForInStatement", 1137 | left: { 1138 | type: "VariableDeclaration", 1139 | declarations: declarations, 1140 | kind: "var" 1141 | }, 1142 | right: right, 1143 | body: body 1144 | }; 1145 | } 1146 | 1147 | ContinueStatement 1148 | = ContinueToken EOS { 1149 | return { type: "ContinueStatement", label: null }; 1150 | } 1151 | / ContinueToken _ label:Identifier EOS { 1152 | return { type: "ContinueStatement", label: label }; 1153 | } 1154 | 1155 | BreakStatement 1156 | = BreakToken EOS { 1157 | return { type: "BreakStatement", label: null }; 1158 | } 1159 | / BreakToken _ label:Identifier EOS { 1160 | return { type: "BreakStatement", label: label }; 1161 | } 1162 | 1163 | ReturnStatement 1164 | = ReturnToken EOS { 1165 | return { type: "ReturnStatement", argument: null }; 1166 | } 1167 | / ReturnToken _ argument:Expression EOS { 1168 | return { type: "ReturnStatement", argument: argument }; 1169 | } 1170 | 1171 | WithStatement 1172 | = WithToken __ "(" __ object:Expression __ ")" __ 1173 | body:Statement 1174 | { return { type: "WithStatement", object: object, body: body }; } 1175 | 1176 | SwitchStatement 1177 | = SwitchToken __ "(" __ discriminant:Expression __ ")" __ 1178 | cases:CaseBlock 1179 | { 1180 | return { 1181 | type: "SwitchStatement", 1182 | discriminant: discriminant, 1183 | cases: cases 1184 | }; 1185 | } 1186 | 1187 | CaseBlock 1188 | = "{" __ clauses:(CaseClauses __)? "}" { 1189 | return optionalList(extractOptional(clauses, 0)); 1190 | } 1191 | / "{" __ 1192 | before:(CaseClauses __)? 1193 | default1:DefaultClause __ 1194 | after:(CaseClauses __)? "}" 1195 | { 1196 | return optionalList(extractOptional(before, 0)) 1197 | .concat(default1) 1198 | .concat(optionalList(extractOptional(after, 0))); 1199 | } 1200 | 1201 | CaseClauses 1202 | = head:CaseClause tail:(__ CaseClause)* { return buildList(head, tail, 1); } 1203 | 1204 | CaseClause 1205 | = CaseToken __ test:Expression __ ":" consequent:(__ StatementList)? { 1206 | return { 1207 | type: "SwitchCase", 1208 | test: test, 1209 | consequent: optionalList(extractOptional(consequent, 1)) 1210 | }; 1211 | } 1212 | 1213 | DefaultClause 1214 | = DefaultToken __ ":" consequent:(__ StatementList)? { 1215 | return { 1216 | type: "SwitchCase", 1217 | test: null, 1218 | consequent: optionalList(extractOptional(consequent, 1)) 1219 | }; 1220 | } 1221 | 1222 | LabelledStatement 1223 | = label:Identifier __ ":" __ body:Statement { 1224 | return { type: "LabeledStatement", label: label, body: body }; 1225 | } 1226 | 1227 | ThrowStatement 1228 | = ThrowToken _ argument:Expression EOS { 1229 | return { type: "ThrowStatement", argument: argument }; 1230 | } 1231 | 1232 | TryStatement 1233 | = TryToken __ block:Block __ handler:Catch __ finalizer:Finally { 1234 | return { 1235 | type: "TryStatement", 1236 | block: block, 1237 | handler: handler, 1238 | finalizer: finalizer 1239 | }; 1240 | } 1241 | / TryToken __ block:Block __ handler:Catch { 1242 | return { 1243 | type: "TryStatement", 1244 | block: block, 1245 | handler: handler, 1246 | finalizer: null 1247 | }; 1248 | } 1249 | / TryToken __ block:Block __ finalizer:Finally { 1250 | return { 1251 | type: "TryStatement", 1252 | block: block, 1253 | handler: null, 1254 | finalizer: finalizer 1255 | }; 1256 | } 1257 | 1258 | Catch 1259 | = CatchToken __ "(" __ param:Identifier __ ")" __ body:Block { 1260 | return { 1261 | type: "CatchClause", 1262 | param: param, 1263 | body: body 1264 | }; 1265 | } 1266 | 1267 | Finally 1268 | = FinallyToken __ block:Block { return block; } 1269 | 1270 | DebuggerStatement 1271 | = DebuggerToken EOS { return { type: "DebuggerStatement" }; } 1272 | 1273 | // ----- A.5 Functions and Programs ----- 1274 | 1275 | FunctionDeclaration 1276 | = FunctionToken __ id:Identifier __ 1277 | "(" __ params:(FormalParameterList __)? ")" __ 1278 | "{" __ body:FunctionBody __ "}" 1279 | { 1280 | return { 1281 | type: "FunctionDeclaration", 1282 | id: id, 1283 | params: optionalList(extractOptional(params, 0)), 1284 | body: body 1285 | }; 1286 | } 1287 | 1288 | FunctionExpression 1289 | = FunctionToken __ id:(Identifier __)? 1290 | "(" __ params:(FormalParameterList __)? ")" __ 1291 | "{" __ body:FunctionBody __ "}" 1292 | { 1293 | return { 1294 | type: "FunctionExpression", 1295 | id: extractOptional(id, 0), 1296 | params: optionalList(extractOptional(params, 0)), 1297 | body: body 1298 | }; 1299 | } 1300 | 1301 | FormalParameterList 1302 | = head:Identifier tail:(__ "," __ Identifier)* { 1303 | return buildList(head, tail, 3); 1304 | } 1305 | 1306 | FunctionBody 1307 | = body:SourceElements? { 1308 | return { 1309 | type: "BlockStatement", 1310 | body: optionalList(body) 1311 | }; 1312 | } 1313 | 1314 | Program 1315 | = body:SourceElements? { 1316 | return { 1317 | type: "Program", 1318 | body: optionalList(body) 1319 | }; 1320 | } 1321 | 1322 | SourceElements 1323 | = head:SourceElement tail:(__ SourceElement)* { 1324 | return buildList(head, tail, 1); 1325 | } 1326 | 1327 | SourceElement 1328 | = Statement 1329 | / FunctionDeclaration 1330 | 1331 | // ----- A.6 Universal Resource Identifier Character Classes ----- 1332 | 1333 | // Irrelevant. 1334 | 1335 | // ----- A.7 Regular Expressions ----- 1336 | 1337 | // Irrelevant. 1338 | 1339 | // ----- A.8 JSON ----- 1340 | 1341 | // Irrelevant. 1342 | -------------------------------------------------------------------------------- /examples/json.pegjs: -------------------------------------------------------------------------------- 1 | // JSON Grammar 2 | // ============ 3 | // 4 | // Based on the grammar from RFC 7159 [1]. 5 | // 6 | // Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the 7 | // JSON website [4] (somewhat informally). The RFC seems the most authoritative 8 | // source, which is confirmed e.g. by [5]. 9 | // 10 | // [1] http://tools.ietf.org/html/rfc7159 11 | // [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm 12 | // [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm 13 | // [4] http://json.org/ 14 | // [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON 15 | 16 | // ----- 2. JSON Grammar ----- 17 | 18 | JSON_text 19 | = ws value:value ws { return value; } 20 | 21 | begin_array = ws "[" ws 22 | begin_object = ws "{" ws 23 | end_array = ws "]" ws 24 | end_object = ws "}" ws 25 | name_separator = ws ":" ws 26 | value_separator = ws "," ws 27 | 28 | ws "whitespace" = [ \t\n\r]* 29 | 30 | // ----- 3. Values ----- 31 | 32 | value 33 | = false 34 | / null 35 | / true 36 | / object 37 | / array 38 | / number 39 | / string 40 | 41 | false = "false" { return false; } 42 | null = "null" { return null; } 43 | true = "true" { return true; } 44 | 45 | // ----- 4. Objects ----- 46 | 47 | object 48 | = begin_object 49 | members:( 50 | head:member 51 | tail:(value_separator m:member { return m; })* 52 | { 53 | const result = {}; 54 | 55 | [head].concat(tail).forEach(function(element) { 56 | result[element.name] = element.value; 57 | }); 58 | 59 | return result; 60 | } 61 | )? 62 | end_object 63 | { return members !== null ? members : {}; } 64 | 65 | member 66 | = name:string name_separator value:value { 67 | return { name: name, value: value }; 68 | } 69 | 70 | // ----- 5. Arrays ----- 71 | 72 | array 73 | = begin_array 74 | values:( 75 | head:value 76 | tail:(value_separator v:value { return v; })* 77 | { return [head].concat(tail); } 78 | )? 79 | end_array 80 | { return values !== null ? values : []; } 81 | 82 | // ----- 6. Numbers ----- 83 | 84 | number "number" 85 | = minus? int frac? exp? { return parseFloat(text()); } 86 | 87 | decimal_point 88 | = "." 89 | 90 | digit1_9 91 | = [1-9] 92 | 93 | e 94 | = [eE] 95 | 96 | exp 97 | = e (minus / plus)? DIGIT+ 98 | 99 | frac 100 | = decimal_point DIGIT+ 101 | 102 | int 103 | = zero / (digit1_9 DIGIT*) 104 | 105 | minus 106 | = "-" 107 | 108 | plus 109 | = "+" 110 | 111 | zero 112 | = "0" 113 | 114 | // ----- 7. Strings ----- 115 | 116 | string "string" 117 | = quotation_mark chars:char* quotation_mark { return chars.join(""); } 118 | 119 | char 120 | = unescaped 121 | / escape 122 | sequence:( 123 | '"' 124 | / "\\" 125 | / "/" 126 | / "b" { return "\b"; } 127 | / "f" { return "\f"; } 128 | / "n" { return "\n"; } 129 | / "r" { return "\r"; } 130 | / "t" { return "\t"; } 131 | / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) { 132 | return String.fromCharCode(parseInt(digits, 16)); 133 | } 134 | ) 135 | { return sequence; } 136 | 137 | escape 138 | = "\\" 139 | 140 | quotation_mark 141 | = '"' 142 | 143 | unescaped 144 | = [^\0-\x1F\x22\x5C] 145 | 146 | // ----- Core ABNF Rules ----- 147 | 148 | // See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4234). 149 | DIGIT = [0-9] 150 | HEXDIG = [0-9a-f]i 151 | -------------------------------------------------------------------------------- /examples/minimal-with-dep.pegjs: -------------------------------------------------------------------------------- 1 | START 2 | = "a" { return foo } -------------------------------------------------------------------------------- /examples/minimal.pegjs: -------------------------------------------------------------------------------- 1 | START 2 | = "a" 3 | / "b" -------------------------------------------------------------------------------- /examples/snake-case-rules.pegjs: -------------------------------------------------------------------------------- 1 | // Example grammar with rules written in snake_case to test type renaming. 2 | start = . / other_rule 3 | other_rule = . 4 | -------------------------------------------------------------------------------- /examples/st.pegjs: -------------------------------------------------------------------------------- 1 | /* 2 | [The "BSD licence"] 3 | Copyright (c) 2015, John Snyders 4 | Copyright (c) 2017, Pedro J. Molina (adaptation for TypeScript) 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions 9 | are met: 10 | 1. Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 3. The name of the author may not be used to endorse or promote products 16 | derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | /* 30 | * stGrammar.pegjs 31 | * This is the grammar for StringTemplate including group files, template files, and raw templates 32 | * current command to compile this is: 33 | * pegjs --allowed-start-rules groupFile,templateFile,templateFileRaw,templateAndEOF stGrammar.pegjs 34 | */ 35 | 36 | { 37 | // tslint:disable:indent 38 | // tslint:disable:align 39 | 40 | const VALID_DELIMITERS = "#$%^&*<>"; 41 | 42 | let delimiterStartChar = "<"; 43 | let delimiterStopChar = ">"; 44 | const curGroup = options.group; 45 | let curDict: any = null; 46 | let outside = true; // tell if we are inside or outside a template: outsideoutside 47 | let subtemplateDepth = 0; // handle nesting of subtemplates: { ... {...} ...} 48 | let inConditional = false; 49 | let verbose = false; 50 | const ignoreNewLines = options.ignoreNewLines || false; 51 | let formalArgsHasOptional = false; 52 | const lineOffset = options.lineOffset || 0; 53 | 54 | let logger = function(message: string) { 55 | // tslint:disable-next-line:no-console 56 | console.log(message); 57 | }; 58 | 59 | function verboseLog(message: string) { 60 | if (verbose) { 61 | logger(message); 62 | } 63 | } 64 | 65 | // /** 66 | // * The parse() function was renamed peg$parse() in pegjs 0.9.0 . 67 | // */ 68 | // function parse(arg1, args2, args3, args4, arg5) { 69 | // return peg$parse(arg1, args2, args3, args4, arg5); 70 | // } 71 | 72 | /** 73 | * This function exists in pegjs 0.8.0 but is missing in pegjs 0.9.0 . 74 | */ 75 | function line() { 76 | return peg$computePosDetails(peg$savedPos).line; 77 | } 78 | 79 | /** 80 | * This function exists in pegjs 0.8.0 but is missing in pegjs 0.9.0 . 81 | */ 82 | function column() { 83 | return peg$computePosDetails(peg$savedPos).column; 84 | } 85 | 86 | function getLocation() { 87 | return { 88 | line: line() + lineOffset, 89 | column: column() 90 | }; 91 | } 92 | 93 | function makeList(first: any, rest: any) { 94 | let list; 95 | if (first && rest) { 96 | list = [first].concat(rest); 97 | } else if (first) { 98 | list = [first]; 99 | } else if (rest) { 100 | list = rest; 101 | } else { 102 | list = []; 103 | } 104 | return list; 105 | } 106 | 107 | function parseTemplate(template: any) { 108 | let ignoreNewLines2: boolean; 109 | let lineOffset2 = line() - 1; 110 | 111 | ignoreNewLines2 = false; 112 | if (template.ignoreNewLines) { 113 | ignoreNewLines2 = true; 114 | template = template.string; 115 | } 116 | if (template.charAt(0) === "\n" || (template.charAt(0) === "\r" && template.charAt(1) === "\n")) { 117 | lineOffset2 += 1; 118 | } 119 | template = template.replace(/^\r?\n/, ""); // remove a single leading new line if any 120 | template = template.replace(/\r?\n$/, ""); // remove a single trailing new line if any 121 | 122 | outside = true; // just in case, make sure always start parsing a template on the outside 123 | try { 124 | return parse(template, { 125 | startRule: "templateAndEOF", 126 | group: curGroup, 127 | nested: true, 128 | verbose: verbose, 129 | lineOffset: lineOffset2, 130 | ignoreNewLines: ignoreNewLines2, 131 | delimiterStartChar: delimiterStartChar, 132 | delimiterStopChar: delimiterStopChar 133 | }); 134 | } catch (ex) { 135 | if (ex instanceof PeggySyntaxError) { 136 | (ex as any).line += lineOffset2; 137 | } 138 | throw ex; 139 | } 140 | } 141 | 142 | delimiterStartChar = options.delimiterStartChar || "<"; 143 | delimiterStopChar = options.delimiterStopChar || ">"; 144 | verbose = options.verbose || false; 145 | if (options.logger) { 146 | logger = options.logger; 147 | } 148 | 149 | if (!options.nested) { 150 | verboseLog("Default delimiters: " + delimiterStartChar + ", " + delimiterStopChar); 151 | } 152 | } 153 | 154 | /* 155 | * GROUP 156 | */ 157 | 158 | /* 159 | * ENTRY POINT: groupFile 160 | * This entry point is for a .stg file. 161 | * 162 | * There should be at least one definition but not enforced 163 | */ 164 | groupFile 165 | = __ delimiters? __ import* __ def* __ EOF { 166 | return curGroup; 167 | } 168 | 169 | /* 170 | * Match: 171 | * import "" 172 | */ 173 | import 174 | = __ "import" __ file:STRING __ { 175 | curGroup.addImports(file.value); 176 | return null; 177 | } 178 | 179 | /* 180 | * Match: 181 | * delimiters "", "" 182 | * must be a valid delimiter. 183 | */ 184 | delimiters 185 | = "delimiters" __ s:STRING __ "," __ e:STRING { 186 | const start = s.value; 187 | const stop = e.value; 188 | if (start.length !== 1 || stop.length !== 1) { 189 | error("Delimiter value must be exactly one character."); 190 | } 191 | if (VALID_DELIMITERS.indexOf(start) < 0 || VALID_DELIMITERS.indexOf(stop) < 0) { 192 | error("Invalid delimiter character."); 193 | } 194 | delimiterStartChar = s.value.charAt(0); 195 | delimiterStopChar = e.value.charAt(0); 196 | verboseLog("Delimiters: " + delimiterStartChar + ", " + delimiterStopChar); 197 | return null; 198 | } 199 | 200 | /* 201 | * Match a dictionary or template definition 202 | */ 203 | def 204 | = __ dictDef __ { return null; } 205 | / __ templateDef __ { return null; } 206 | 207 | /* 208 | * Template and region definitions and template aliases 209 | * Match: 210 | * () ::= << >> // multi-line template 211 | * () ::= <% %> // multi-line template new lines not significant 212 | * () ::= "" // single line template 213 | * ::= 214 | * @.() ::= << >> 215 | * @.() ::= <% %> 216 | * @.() ::= " " 217 | */ 218 | templateDef 219 | = def:( "@" enclosing:ID "." n:ID "(" __ ")" { 220 | // todo region stuff 221 | return { 222 | name: n.value, 223 | enclosingTemplate: enclosing.value 224 | }; 225 | } 226 | / n:ID __ "(" __ args:formalArgs __ ")" { 227 | return { 228 | name: n.value, 229 | args: args 230 | }; 231 | } 232 | ) 233 | __ "::=" __ 234 | template:( 235 | s:STRING { return s.value; } 236 | / s:BIGSTRING { return s.value; } 237 | / s:BIGSTRING_NO_NL { return s.value; } 238 | /* In pegjs 0.9.0 an action as first element of an alternative is not allowed. */ 239 | / &{ return true; } { error("Missing template."); } 240 | ) { 241 | if (def.enclosingTemplate) { 242 | verboseLog("Region definition: " + def.enclosingTemplate + "." + def.name); 243 | def.template = parseTemplate(template).value; 244 | curGroup.addRegion(def); 245 | } else { 246 | verboseLog("Template definition: " + def.name); 247 | def.template = parseTemplate(template).value; 248 | curGroup.addTemplate(def); 249 | } 250 | return null; 251 | } 252 | / alias:ID __ '::=' __ target:ID { 253 | verboseLog("Template alias: " + alias.value + " > " + target.value); 254 | curGroup.addTemplateAlias(alias.value, target.value); 255 | return null; 256 | } 257 | 258 | formalArgs 259 | = &{ formalArgsHasOptional = false; return true; } first:formalArg rest:( __ "," __ e:formalArg { return e; } )* { 260 | return makeList(first, rest); 261 | } 262 | /* In pegjs 0.9.0 an action as first element of an alternative is not allowed. */ 263 | / &{ return true; } { return []; } 264 | 265 | formalArg 266 | = name:ID defaultValue:( __ '=' __ 267 | v:( STRING 268 | / anonymousTemplate 269 | / TRUE 270 | / FALSE 271 | / EMPTY_LIST ) { 272 | formalArgsHasOptional = true; 273 | return v; 274 | } 275 | )? { 276 | let ret; 277 | 278 | if (formalArgsHasOptional && defaultValue === null) { 279 | error("Required argument after optional not allowed."); 280 | } 281 | ret = { 282 | type: "FORMAL_ARG", 283 | loc: getLocation(), 284 | name: name.value, 285 | defaultValue: undefined 286 | }; 287 | if (defaultValue) { 288 | ret.defaultValue = defaultValue; 289 | } 290 | return ret; 291 | } 292 | 293 | dictDef 294 | = (__ id:ID __ '::=' { curDict = { name: id.value, map: {}, default: null }; }) dict { 295 | verboseLog("Dictionary definition: " + curDict.name); 296 | curGroup.addDictionary(curDict); 297 | curDict = null; 298 | return null; 299 | } 300 | 301 | dict 302 | = __ "[" __ dictPairs "]" __ 303 | 304 | dictPairs 305 | = __ keyValuePair (__ "," __ keyValuePair)* (__ "," __ defaultValuePair)? 306 | / __ def:defaultValuePair 307 | 308 | defaultValuePair 309 | = "default" __ ":" __ v:keyValue __ { curDict.default = v; } 310 | 311 | keyValuePair 312 | = k:STRING __ ':' __ v:keyValue __ { curDict.map[k.value] = v; } 313 | 314 | keyValue 315 | = v:BIGSTRING { 316 | return { 317 | type: "ANON_TEMPLATE", 318 | loc: getLocation(), 319 | value: parseTemplate(v.value).value 320 | }; 321 | } 322 | / v:BIGSTRING_NO_NL { 323 | return { 324 | type: "ANON_TEMPLATE", 325 | loc: getLocation(), 326 | value: parseTemplate(v.value).value 327 | }; 328 | } 329 | / STRING 330 | / anonymousTemplate 331 | / TRUE 332 | / FALSE 333 | / "key" { return { type: "DICT_KEY_VALUE", loc: getLocation(), value: null }; } 334 | / EMPTY_LIST 335 | 336 | 337 | /* 338 | * Anonymous template 339 | * Match: 340 | * {} 341 | */ 342 | anonymousTemplate "anonymous template" 343 | = "{" &{ subtemplateDepth += 1; return true; } 344 | t:template "}" { 345 | subtemplateDepth -= 1; // xxx is this subtemplate depth stuff needed? 346 | return { 347 | type: "ANON_TEMPLATE", 348 | loc: getLocation(), 349 | value: t.value 350 | }; 351 | } 352 | 353 | 354 | /* 355 | * TEMPLATE FILE 356 | */ 357 | /* 358 | * ENTRY POINT: templateFile 359 | * This entry point is for a non-raw .st file 360 | * Testing with Java reference implementation shows that region and alias definitions are not useful in .st files 361 | * even though they are allowed while parsing. 362 | */ 363 | templateFile 364 | = __ name:ID "(" __ args:formalArgs __ ")" 365 | __ "::=" __ 366 | template:( 367 | s:STRING { return s.value; } 368 | / s:BIGSTRING { return s.value; } 369 | / s:BIGSTRING_NO_NL { return s.value; } 370 | /* In pegjs 0.9.0 an action as first element of an alternative is not allowed. */ 371 | / &{ return true; } { error("Missing template."); } 372 | ) __ { 373 | if (name.value !== curGroup.fileName) { 374 | error("Template name must match filename."); 375 | } 376 | verboseLog("Template definition: " + name.value); 377 | curGroup.addTemplate({ 378 | name: name.value, 379 | args: args, 380 | template: parseTemplate(template).value 381 | }); 382 | return curGroup; 383 | } 384 | 385 | /* 386 | * RAW TEMPLATE 387 | */ 388 | 389 | /* 390 | * ENTRY POINT: templateAndEOF 391 | * This entry point is used internally to parse the body of a template definition 392 | */ 393 | templateAndEOF 394 | = t:template EOF { 395 | return t; 396 | } 397 | 398 | /* 399 | * ENTRY POINT: templateFileRaw 400 | * This entry point is for raw .st files 401 | */ 402 | templateFileRaw 403 | = t:template EOF { 404 | curGroup.addTemplate({ 405 | name: curGroup.fileName, 406 | args: null, // xxx is this OK? 407 | template: t.value 408 | }); 409 | return curGroup; 410 | } 411 | 412 | /* xxx the !(...) used to include / "}" why was that? */ 413 | template 414 | = e:(!(INDENT? START_CHAR "elseif" / INDENT? START_CHAR "else" / INDENT? START_CHAR "endif" ) i:element { return i; })* { return { 415 | type: "TEMPLATE", 416 | value: e || null // xxx should this be null or text token with empty string value 417 | }; 418 | } 419 | 420 | /* 421 | * Match any of the elements of an expression 422 | */ 423 | element 424 | = &{ return column() === 1; } INDENT? ST_COMMENT NEWLINE { return null; } // a comment optionally preceded by indent 425 | // and immediately followed by a new line 426 | // is ignored including the newline 427 | / i:INDENT se:singleElement { 428 | if (ignoreNewLines) { 429 | return se; 430 | } else { 431 | return { 432 | type: "INDENTED_EXPR", 433 | loc: getLocation(), 434 | indent: i.value, 435 | value: se 436 | }; 437 | } 438 | } 439 | / &{ outside = true; return true; } se:singleElement { 440 | return se; 441 | } 442 | / &{ outside = true; return true; } ce:compoundElement { 443 | return ce; 444 | } 445 | 446 | 447 | singleElement 448 | = TEXT 449 | / n:NEWLINE { 450 | if (ignoreNewLines) { 451 | return null; 452 | } else { 453 | return n; 454 | } 455 | } 456 | / ST_COMMENT { return null; } 457 | / exprTag 458 | 459 | compoundElement 460 | = ifstat 461 | / region 462 | 463 | exprTag 464 | = START __ e:expr opts:( ';' __ o:exprOptions { return o; } )? __ STOP { 465 | // tslint:disable:indent 466 | const ret = { 467 | type: "EXPR", 468 | loc: getLocation(), 469 | expr: e, 470 | options: null 471 | }; 472 | if (opts) { 473 | ret.options = opts; 474 | } 475 | return ret; 476 | } 477 | 478 | // xxx todo region stuff 479 | region 480 | = INDENT? START '@' ID STOP template INDENT? START '@end' STOP 481 | 482 | /*xxx // kill \n for <@end> on line by itself if multi-line embedded region 483 | ({$region.start.getLine()!=input.LT(1).getLine()}?=> NEWLINE)? 484 | -> {indent!=null}? 485 | ^(INDENTED_EXPR $i ^(REGION[$x] ID template?)) 486 | -> ^(REGION[$x] ID template?) */ 487 | 488 | 489 | /* 490 | * Anonymous sub template 491 | * Match: 492 | * {|} 493 | * {} 494 | * 495 | * ignore final INDENT before } as it's not part of outer indent 496 | */ 497 | subtemplate 498 | = "{" &{ subtemplateDepth += 1; return true; } args:( __ a:formalArgsNoDefault __ "|" __ { return a; })? 499 | t:template INDENT? __ "}" { 500 | subtemplateDepth -= 1; 501 | outside = false; 502 | return { 503 | type: "SUBTEMPLATE", 504 | loc: getLocation(), 505 | args: args, 506 | template: t.value 507 | }; 508 | } 509 | 510 | formalArgsNoDefault 511 | = first:ID 512 | rest:( __ "," __ a:ID { return { 513 | type: "FORMAL_ARG", 514 | loc: getLocation(), 515 | name: a.value 516 | }; 517 | })* { 518 | return makeList({type: "FORMAL_ARG", loc: getLocation(), name: first.value}, rest); 519 | } 520 | 521 | ifstat 522 | = i:INDENT? START "if" __ "(" __ 523 | &{ inConditional = true; return true; } c1:conditional &{ inConditional = false; return true; } 524 | __ ")" STOP /*xxx{ if (input.LA(1)!=NEWLINE) indent=$i; } */ 525 | t:template 526 | ei:( !(INDENT? START_CHAR "else" STOP_CHAR / INDENT? START_CHAR "endif" STOP_CHAR) 527 | INDENT? START "elseif" __ "(" __ 528 | &{ inConditional = true; return true; } c:conditional &{ inConditional = false; return true; } 529 | __ ")" STOP t1:template { 530 | return { 531 | type: "ELSEIF", 532 | loc: getLocation(), 533 | condition: c, 534 | template: t1.value 535 | }; 536 | })* 537 | e:( !(INDENT? START_CHAR "endif" STOP_CHAR) 538 | INDENT? START "else" STOP t2:template { 539 | return { 540 | type: "ELSE", 541 | loc: getLocation(), 542 | template: t2.value 543 | }; 544 | })? 545 | INDENT? START "endif" STOP { 546 | return { 547 | type: "IF", 548 | loc: getLocation(), 549 | condition: c1, 550 | template: t.value, 551 | elseifPart: ei, 552 | elsePart: e 553 | }; 554 | } 555 | /*xxx // kill \n for on line by itself if multi-line IF 556 | ({$ifstat.start.getLine()!=input.LT(1).getLine()}?=> NEWLINE)? 557 | -> {indent!=null}? 558 | ^(INDENTED_EXPR $i ^('if' $c1 $t1? ^('elseif' $c2 $t2)* ^('else' $t3?)?)) 559 | -> ^('if' $c1 $t1? ^('elseif' $c2 $t2)* ^('else' $t3?)?) */ 560 | 561 | conditional 562 | = l:andConditional __ "||" __ r:conditional { 563 | return { 564 | type: "OR", 565 | loc: getLocation(), 566 | left: l, 567 | right: r 568 | }; 569 | } 570 | / andConditional 571 | 572 | andConditional 573 | = l:notConditional __ "&&" __ r:andConditional { 574 | return { 575 | type: "AND", 576 | loc: getLocation(), 577 | left: l, 578 | right: r 579 | }; 580 | } 581 | / notConditional 582 | 583 | notConditional 584 | = "!" __ n:notConditional { 585 | return { 586 | type: "NOT", 587 | loc: getLocation(), 588 | value: n 589 | }; 590 | } 591 | / e:memberExpr 592 | 593 | 594 | exprOptions 595 | = first:option rest:( __ "," __ o:option { return o; } )* { 596 | return makeList(first, rest); 597 | } 598 | 599 | option 600 | = name:ID val:( __ "=" __ e:exprNoComma { return e; } )? { 601 | const optionName = name.value; 602 | let value; 603 | if (!curGroup.isValidOption(optionName)) { 604 | error("No such option " + optionName + "."); 605 | } 606 | 607 | value = val || curGroup.defaultOptionValue(optionName); 608 | if (value === null) { 609 | error("Value required for option " + optionName + "."); 610 | } 611 | return { 612 | name: optionName, 613 | value: value 614 | }; 615 | } 616 | 617 | exprNoComma 618 | = me:memberExpr ref:( ':' tr:mapTemplateRef { return tr; } )? { 619 | if (ref) { 620 | return { 621 | type: "MAP", 622 | loc: getLocation(), 623 | expr: me, 624 | using: ref 625 | }; 626 | } else { 627 | return me; 628 | } 629 | } 630 | 631 | expr "expression" 632 | = mapExpr 633 | 634 | /*xxx 635 | // xxx comment from ST 636 | // more complicated than necessary to avoid backtracking, which ruins 637 | // error handling 638 | mapExpr 639 | = first:memberExpr ( ("," rest:memberExpr)+ ":" mapTemplateRef { 640 | return { 641 | type: "ZIP", 642 | loc: getLocation(), 643 | value: "xxx" // ^(ELEMENTS memberExpr+) mapTemplateRef 644 | } 645 | }) 646 | / { return first; } 647 | ) 648 | ( /// xxx { if ($x!=null) $x.clear(); } // don't keep queueing x; new list for each iteration 649 | ":" x:mapTemplateRef ({$c==null}?=> "," xs:mapTemplateRef )* { 650 | return { 651 | type 652 | }; 653 | } 654 | //xxx -> ^(MAP[$col] $mapExpr $x+) 655 | )* 656 | */ 657 | 658 | /* 659 | * xxx 660 | */ 661 | mapExpr 662 | = m1:memberExpr zip:( mn:( __ "," __ m:memberExpr { return m; } )+ __ ":" __ tr:mapTemplateRef { return [ mn, tr ]; } )? 663 | maps:( __ ":" __ first:mapTemplateRef rest:( __ "," __ r:mapTemplateRef { return r; } )* { return makeList(first, rest); } )* { 664 | let res = m1; 665 | if (zip) { 666 | res = { 667 | type: "ZIP", 668 | loc: getLocation(), 669 | expr: makeList(m1, zip[0]), 670 | using: zip[1] 671 | }; 672 | } 673 | if (maps.length > 0) { 674 | res = { 675 | type: "MAP", 676 | loc: getLocation(), 677 | expr: res, 678 | using: maps 679 | }; 680 | // need to handle the implicit first argument 681 | // xxx deal with array of arrays here 682 | for (const expr2 of maps[0]) { 683 | if (expr2.type === "INCLUDE") { 684 | expr2.args.splice(0, 0, { 685 | type: "STRING", 686 | loc: getLocation(), 687 | value: "" 688 | }); 689 | } 690 | } 691 | } 692 | return res; 693 | } 694 | 695 | /* 696 | * Match: 697 | * expr:template(args) apply template to expr 698 | * expr:{arg | ...} apply subtemplate to expr 699 | * expr:(e)(args) convert e to a string template name and apply to expr 700 | */ 701 | mapTemplateRef 702 | = i:ID '(' a:args ')' { 703 | return { 704 | type: "INCLUDE", 705 | loc: getLocation(), 706 | templateName: i.value, 707 | args: a.value, 708 | }; 709 | } 710 | / subtemplate 711 | / '(' mapExpr ')' '(' argExprList? ')' // xxx -> ^(INCLUDE_IND mapExpr argExprList?) 712 | 713 | /* 714 | * Match: 715 | * . // value of property of object attribute 716 | * .. // any number of levels of property references 717 | * .() // indirect property reference. value of expr is name of property of object attribute 718 | * .().() // any number of levels allowed 719 | * ..(). // can mix direct and indirect property references 720 | * 721 | * xxx it seems strange that member references are allowed on anything other than an attribute 722 | * what does true.myProp mean? 723 | * or template(arg1, arg2).prop2 724 | */ 725 | memberExpr 726 | = e:includeExpr 727 | props:( '.' prop:ID { 728 | return { 729 | type: "PROP", 730 | loc: getLocation(), 731 | property: prop.value 732 | }; 733 | } 734 | / '.' '(' e1:mapExpr ')' { 735 | return { 736 | type: "PROP_IND", 737 | loc: getLocation(), 738 | property: e1 739 | }; 740 | } 741 | )* { 742 | if (props.length > 0) { 743 | return { 744 | type: "MEMBER_EXPR", 745 | loc: getLocation(), 746 | object: e, 747 | properties: props // xxx is this being an array a problem? 748 | }; 749 | } else { 750 | return e; 751 | } 752 | } 753 | /* 754 | * Handle template includes as well as functions because the syntax is the same 755 | * Match: 756 | * () // func is one of the built in functions: first, length, strlen, last, rest, reverse, trunc, strip, trim 757 | * super.() 758 | * () 759 | * xxx 760 | * Or primary 761 | */ 762 | includeExpr 763 | = i:ID &{ return curGroup.isFunction(i.value); } __ '(' __ e:expr? __ ')' { 764 | return { 765 | type: "FUNCTION", 766 | loc: getLocation(), 767 | name: i.value, 768 | arg: e 769 | }; 770 | } 771 | / "super." i:ID '(' a:args ')' { // xxx todo region stuff 772 | return { 773 | type: "INCLUDE_SUPER", 774 | loc: getLocation(), 775 | name: i.value, 776 | args: a 777 | }; 778 | } 779 | / i:ID '(' a:args ')' { 780 | return { 781 | type: "INCLUDE", 782 | loc: getLocation(), 783 | templateName: i.value, 784 | args: a.value, 785 | argsNamed: !!a.named, 786 | argsPassThrough: !!a.passThrough 787 | }; 788 | } 789 | // xxx todo region stuff 790 | //xxx | '@' 'super' '.' ID '(' rp=')' -> ^(INCLUDE_SUPER_REGION ID) 791 | //xxx | '@' ID '(' rp=')' -> ^(INCLUDE_REGION ID) 792 | / primary 793 | 794 | /* 795 | * Match: 796 | * true 797 | * false 798 | * 799 | * 800 | * 801 | * 802 | * if currently parsing a condition 803 | * ( ) 804 | * else 805 | * () 806 | * ()() 807 | */ 808 | primary 809 | = TRUE 810 | / FALSE 811 | / i:ID { return { 812 | type: "ATTRIBUTE", 813 | loc: getLocation(), 814 | name: i.value 815 | }; 816 | } 817 | / s:STRING { return s; } 818 | / subtemplate 819 | / list 820 | / &{ return inConditional; } "(" c:conditional ")" { return c; } 821 | / &{ return !inConditional; } "(" e:expr ")" a:( "(" a:argExprList? ")" { return a; } )? { 822 | if (a) { 823 | return { 824 | type: "INCLUDE_IND", 825 | loc: getLocation(), 826 | expr: e, 827 | args: a.value 828 | }; 829 | } else { 830 | return { 831 | type: "TO_STR", 832 | loc: getLocation(), 833 | expr: e 834 | }; 835 | } 836 | } 837 | 838 | args 839 | = first:namedArg rest:( __ "," __ a:namedArg { return a; } )* passThrough:( __ "," __ pt:'...' { return true; })? { 840 | const ret = { 841 | type: "ARGS", 842 | value: makeList(first, rest), 843 | named: true, 844 | passThrough: !!passThrough 845 | }; 846 | return ret; 847 | } 848 | / '...' { 849 | return { 850 | type: "ARGS", 851 | value: [], 852 | passThrough: true 853 | }; 854 | } 855 | / argExprList 856 | 857 | argExprList 858 | = first:arg rest:( __ "," __ a:arg { return a; } )* { 859 | return { 860 | type: "ARGS", 861 | value: makeList(first, rest) 862 | }; 863 | } 864 | /* In pegjs 0.9.0 an action as first element of an alternative is not allowed. */ 865 | / &{ return true; } { 866 | return { 867 | type: "ARGS", 868 | value: [] 869 | }; 870 | } 871 | 872 | arg 873 | = exprNoComma 874 | 875 | namedArg 876 | = i:ID __ "=" __ v:arg { 877 | v.argName = i.value; 878 | return v; 879 | } 880 | 881 | /* 882 | * Match: 883 | * [ * ] 884 | */ 885 | list 886 | = "[" __ first:listElement? rest:( __ "," __ i:listElement { return i; } )* __ "]" { 887 | return { 888 | type: "LIST", 889 | loc: getLocation(), 890 | value: makeList(first, rest) 891 | }; 892 | } 893 | 894 | listElement 895 | = exprNoComma 896 | /* In pegjs 0.9.0 an action as first element of an alternative is not allowed. */ 897 | / &{ return true; } { return null; } 898 | 899 | /* 900 | * lexical terminals 901 | */ 902 | 903 | WS_CHAR 904 | = " " 905 | / "\t" 906 | 907 | EOL "end of line" 908 | = "\n" 909 | / "\r\n" 910 | / "\r" 911 | 912 | COMMENT 913 | = "/*" (!"*/" .)* "*/" 914 | 915 | LINE_COMMENT 916 | = "//" (!EOL .)* 917 | 918 | __ "white space" 919 | = (WS_CHAR / EOL / COMMENT / LINE_COMMENT )* 920 | 921 | /* 922 | * xxx when defining a template "/" is not allowed but in a template when referencing a template it is. 923 | */ 924 | ID "identifier" 925 | = !(RESERVED) [a-zA-Z_/] [a-zA-Z_/0-9]* { 926 | return { 927 | type: "ID", 928 | value: text() 929 | }; 930 | } 931 | 932 | /* 933 | * According to the doc these are all "reserved words" but the Java ST parser seems to allow some in some contexts 934 | * true, false, import, default, key, group, implements, first, last, rest, trunc, strip, trim, length, strlen, reverse, if, else, elseif, endif, delimiters 935 | */ 936 | RESERVED 937 | = "true" 938 | / "false" 939 | / "if" 940 | / "else " 941 | / "elseif " 942 | / "endif " 943 | / "super" 944 | /*xxx / "import" should be able to have a template by this name */ 945 | /*xxx / "default" should be able to have a property by this name */ 946 | /* / "key" xxx */ 947 | / "group" 948 | / "delimiters" 949 | // This is old v3 keyword so allow it 950 | // / "implements" 951 | // The functions need to be included as identifiers because they are tested to be functions later 952 | 953 | TRUE 954 | = "true" { return { type: "BOOLEAN", loc: getLocation(), value: true }; } 955 | 956 | FALSE 957 | = "false" { return { type: "BOOLEAN", loc: getLocation(), value: false }; } 958 | 959 | EMPTY_LIST "empty list" 960 | = '[' __ ']' { return { type: "EMPTY_LIST", loc: getLocation(), value: null }; } 961 | 962 | STRING "string" 963 | = '"' chars:STRING_CHAR* '"' { 964 | return { type: "STRING", loc: getLocation(), value: chars.join("") }; 965 | } 966 | /* This conditions were in STRING_CHAR in grammar for pegjs 0.8.0, 967 | * however pegjs 0.9.0 complains, correctly, about detection of a possible infinite loops. 968 | */ 969 | / '"' chars:STRING_CHAR* EOL { error("Unterminated string."); } 970 | / '"' chars:STRING_CHAR* EOF { error("Unterminated string."); } 971 | 972 | STRING_CHAR 973 | = !('"' / "\\" / "\r" / "\n") . { return text(); } 974 | / "\\" sequence:ESCAPE_CHAR { return sequence; } 975 | 976 | ESCAPE_CHAR 977 | = "n" { return "\n"; } 978 | / "r" { return "\r"; } 979 | / "t" { return "\t"; } 980 | / . { return text(); } 981 | 982 | /** Match <<...>> but also allow <<..>> so we can have tag on end. 983 | Escapes: >\> means >> inside of <<...>>. 984 | Escapes: \>> means >> inside of <<...>> unless at end like <<...\>>>>. 985 | In that case, use <%..>>%> instead. 986 | */ 987 | BIGSTRING "big string" 988 | = "<<" chars:BIGSTRING_CHAR* ">>" { 989 | return { 990 | type: "BIGSTRING", 991 | value: chars.join("") 992 | }; 993 | } 994 | /* This condition was in BIGSTRING_CHAR in grammar for pegjs 0.8.0, 995 | * however pegjs 0.9.0 complains, correctly, about detection of a possible infinite loops. 996 | */ 997 | / "<<" chars:BIGSTRING_CHAR* EOF { error("Unterminated big string."); } 998 | 999 | BIGSTRING_CHAR 1000 | = !(">>" / "\\>>" / ">\\>") . { return text(); } 1001 | / "\\>>" { return ">>"; } 1002 | / ">\\>" { return ">>"; } 1003 | 1004 | // same as BIGSTRING but means ignore newlines later 1005 | BIGSTRING_NO_NL "big string" 1006 | = "<%" (!"%>" .)* "%>" { 1007 | const txt = text(); 1008 | return { 1009 | type: "BIGSTRING_NO_NL", 1010 | value: { // xxx 1011 | ignoreNewLines: true, 1012 | // %\> is the escape to avoid end of string 1013 | string: txt.substring(2, txt.length - 2).replace(/\%\\>/g, "%>") 1014 | } 1015 | }; 1016 | } 1017 | / "<%" .* EOF { error("Unterminated big string."); } 1018 | 1019 | EOF "end of file" 1020 | = !. 1021 | 1022 | /* 1023 | * OUTSIDE 1024 | */ 1025 | 1026 | INDENT 1027 | = &{ return outside && column() === 1; } WS_CHAR+ { 1028 | return { 1029 | type: "INDENT", 1030 | value: text() 1031 | }; 1032 | } 1033 | 1034 | START 1035 | = &{ return outside; } !( START_CHAR "!") START_CHAR { 1036 | outside = false; 1037 | return { type: "START" }; 1038 | } 1039 | /* 1040 | * Character that starts an expression. This is configurable. Typically < or $ 1041 | */ 1042 | START_CHAR 1043 | = &{ return (input.charAt(peg$currPos) === delimiterStartChar); } . 1044 | 1045 | /* 1046 | * 1047 | */ 1048 | ST_COMMENT 1049 | = &{ return outside; } START_CHAR "!" (!("!" STOP_CHAR) .)* "!" STOP_CHAR { 1050 | return { type: "ST_COMMENT" }; 1051 | } 1052 | /* 1053 | * Any text outside an expression except for new lines 1054 | * text returned as is except for escapes 1055 | */ 1056 | TEXT 1057 | = &{ return outside; } chars:TEXT_CHAR+ { 1058 | return { 1059 | type: "TEXT", 1060 | loc: getLocation(), 1061 | value: chars.join("") // can't use text() unless it fixes up escapes 1062 | }; 1063 | } 1064 | 1065 | /* 1066 | * \< -> < 1067 | * <\\> ([ \t])*(\r|\r\n|\n) -> // ignores new line 1068 | * don't match end of line, $, or } (if in a sub template) 1069 | * otherwise: . -> . 1070 | */ 1071 | TEXT_CHAR 1072 | = !(EOL / START_CHAR / "\\" START_CHAR / "\\\\" / "\\}" / &{ return subtemplateDepth > 0; } "}") . { 1073 | return text(); 1074 | } 1075 | / "\\" START_CHAR { return delimiterStartChar; } 1076 | / "\\\\" { return "\\"; } 1077 | / "\\}" { return String.fromCharCode(125); } /* pegjs doesn't like "}" in the action */ 1078 | / "\\" { return "\\"; } 1079 | / START_CHAR !("\\\\") e:ESCAPE STOP_CHAR { return e; } 1080 | / START_CHAR "\\\\" STOP_CHAR WS_CHAR* EOL { return ""; } 1081 | 1082 | /* 1083 | * <\ >, <\n>, <\t> -> space, new line, tab 1084 | * <\uhhhh> -> Unicode character (hhhh is a hex number) 1085 | */ 1086 | ESCAPE 1087 | = "\\" ch:( "u" HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT { return String.fromCharCode(parseInt(text().substr(1), 16)); } 1088 | / "n" { return "\n"; } 1089 | / "t" { return "\t"; } 1090 | / " " { return " "; } 1091 | / . { 1092 | error("Invalid escape character '" + text() + "'."); 1093 | } 1094 | ) { return ch; } 1095 | 1096 | HEX_DIGIT 1097 | = [0-9a-fA-F] 1098 | 1099 | NEWLINE 1100 | = &{ return outside; } EOL { 1101 | return { 1102 | type: "NEWLINE", 1103 | loc: getLocation(), 1104 | value: text() 1105 | }; 1106 | } 1107 | 1108 | /* 1109 | * INSIDE 1110 | */ 1111 | STOP "stop delimiter" 1112 | = !{ return outside; } STOP_CHAR { 1113 | outside = true; 1114 | return { type: "STOP" }; 1115 | } 1116 | /* 1117 | * Character that stops an expression. This is configurable. Typically > or $ 1118 | */ 1119 | STOP_CHAR 1120 | = &{ return (input.charAt(peg$currPos) === delimiterStopChar); } . 1121 | -------------------------------------------------------------------------------- /output/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "extends": [ 4 | "plugin:@typescript-eslint/recommended" 5 | ], 6 | "parserOptions": { 7 | "ecmaVersion": 2018, 8 | "sourceType": "module" 9 | }, 10 | "rules": { 11 | // Rule customizations 12 | // "@typescript-eslint/explicit-function-return-type": "off", 13 | "prefer-const": 0, 14 | "prefer-spread": 0, 15 | "@typescript-eslint/no-explicit-any": 0, 16 | "@typescript-eslint/no-unused-vars": 0, 17 | "@typescript-eslint/no-this-alias": 0, 18 | "@typescript-eslint/explicit-module-boundary-types": 0, 19 | "@typescript-eslint/no-empty-interface": 0, 20 | "@typescript-eslint/ban-types": 0 21 | } 22 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ts-pegjs", 3 | "version": "4.2.1", 4 | "description": "TS target for peggy parser generator", 5 | "author": "Pedro J. Molina", 6 | "license": "MIT", 7 | "bugs": { 8 | "url": "https://github.com/metadevpro/ts-pegjs/issues" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/metadevpro/ts-pegjs.git" 13 | }, 14 | "keywords": [ 15 | "peggy", 16 | "pegjs", 17 | "plugin", 18 | "ts", 19 | "typescript", 20 | "parser", 21 | "generator" 22 | ], 23 | "bin": { 24 | "tspegjs": "dist/cli.mjs" 25 | }, 26 | "exports": { 27 | "import": "./dist/tspegjs.mjs", 28 | "require": "./dist/tspegjs.js" 29 | }, 30 | "types": "./dist/tspegjs.d.ts", 31 | "files": [ 32 | "LICENSE", 33 | "package.json", 34 | "dist/**/*" 35 | ], 36 | "scripts": { 37 | "clean": "rimraf --glob output/*.ts output/*.js", 38 | "pretest": "npm run clean", 39 | "build": "vite build", 40 | "test": "vite build && vitest run", 41 | "lint": "eslint src/ --ext .ts", 42 | "release": "npm test && npm publish" 43 | }, 44 | "peerDependencies": { 45 | "peggy": "^3.0.2" 46 | }, 47 | "devDependencies": { 48 | "@types/node": "^20.2.5", 49 | "@types/prettier": "^2.7.3", 50 | "@typescript-eslint/eslint-plugin": "^5.59.8", 51 | "@typescript-eslint/parser": "^5.59.8", 52 | "eslint": "^8.42.0", 53 | "eslint-config-prettier": "^8.8.0", 54 | "eslint-plugin-prettier": "^4.2.1", 55 | "rimraf": "^5.0.1", 56 | "rollup-plugin-add-shebang": "^0.3.1", 57 | "typescript": "^5.1.3", 58 | "vite": "^4.3.9", 59 | "vite-plugin-dts": "^2.3.0", 60 | "vite-tsconfig-paths": "^4.2.0", 61 | "vitest": "^0.31.4" 62 | }, 63 | "dependencies": { 64 | "prettier": "^2.8.8", 65 | "ts-morph": "^24.0.0" 66 | }, 67 | "prettier": { 68 | "tabWidth": 2, 69 | "semi": true, 70 | "printWidth": 100, 71 | "singleQuote": true, 72 | "trailingComma": "none" 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "plugins": [ 4 | "@typescript-eslint" 5 | ], 6 | "parserOptions": { 7 | "sourceType": "module" 8 | }, 9 | "env": { 10 | "browser": true, 11 | "es6": true, 12 | "node": true 13 | }, 14 | "extends": [ 15 | "eslint:recommended" 16 | ], 17 | "rules": { 18 | "linebreak-style": [ 19 | "off", 20 | "unix" 21 | ], 22 | "quotes": 0, 23 | "semi": [ 24 | "error", 25 | "always" 26 | ], 27 | "no-unused-vars": 0, 28 | "@typescript-eslint/no-unused-vars": [ 29 | "warn", 30 | { 31 | "argsIgnorePattern": "^_", 32 | "varsIgnorePattern": "^_", 33 | "caughtErrorsIgnorePattern": "^_" 34 | } 35 | ] 36 | } 37 | } -------------------------------------------------------------------------------- /src/cli.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as fs from 'node:fs'; 4 | import * as process from 'node:process'; 5 | import peggy, { ParserBuildOptions } from 'peggy'; 6 | import tspegjs from './tspegjs'; 7 | import { version } from '../package.json'; 8 | import { TsPegjsParserBuildOptions } from './types'; 9 | 10 | const generate = peggy.generate; 11 | 12 | let args = process.argv; 13 | args.shift(); 14 | args.shift(); 15 | 16 | const needHelp = args.find((a) => a === '-h'); 17 | 18 | if (args.length === 0 || needHelp) { 19 | showHelp(); 20 | process.exit(0); 21 | } 22 | 23 | const inFile = args[args.length - 1]; 24 | let outFile = inFile.replace('.pegjs', '.ts'); 25 | args.forEach((arg, index) => { 26 | if (arg === '-o') { 27 | outFile = args[index + 1]; 28 | } 29 | }); 30 | let allowedStartRules = null; 31 | let customHeaderFile = null; 32 | let customHeader = null; 33 | 34 | args.forEach((arg, index) => { 35 | if (arg === '--allowed-start-rules') { 36 | allowedStartRules = (args[index + 1] || '').split(','); 37 | } 38 | if (arg === '--custom-header') { 39 | customHeader = args[index + 1]; 40 | } 41 | if (arg === '--custom-header-file') { 42 | customHeaderFile = args[index + 1]; 43 | } 44 | }); 45 | 46 | const trace = args.find((a) => a === '--trace') ? true : false; 47 | const cache = args.find((a) => a === '--cache') ? true : false; 48 | 49 | function showHelp() { 50 | /* eslint-disable no-console */ 51 | console.log('tspegjs v.' + version + ' TS target for pegjs'); 52 | console.log('Usage:'); 53 | console.log( 54 | ' tspegjs [-o outFile.ts] [--allowed-start-rules ] [--trace] [--cache] [--no-tslint] [--tslint-ignores ] [--custom-header
] [--custom-header-file ] ' 55 | ); 56 | } 57 | 58 | function generateParser( 59 | input_file: string, 60 | output_file: string, 61 | trace: boolean, 62 | cache: boolean, 63 | allowedStartRules: null | string[], 64 | customHeader?: null | string, 65 | customHeaderFile?: null | string 66 | ) { 67 | fs.readFile(input_file, function (err, data) { 68 | if (err) throw err; 69 | 70 | if (customHeaderFile && !customHeader) { 71 | customHeader = fs.readFileSync(customHeaderFile).toString(); 72 | } 73 | 74 | const opts: TsPegjsParserBuildOptions = { 75 | output: 'source', 76 | trace: trace, 77 | cache: cache, 78 | plugins: [tspegjs], 79 | tspegjs: { 80 | customHeader 81 | } 82 | }; 83 | if (allowedStartRules) { 84 | opts.allowedStartRules = allowedStartRules; 85 | } 86 | 87 | // We must cast `opts` as a workaround for https://github.com/peggyjs/peggy/issues/403 88 | // Remove when issue fixed 89 | let parser = generate(data.toString(), opts as ParserBuildOptions); 90 | fs.writeFileSync(output_file, parser.toString()); 91 | }); 92 | } 93 | 94 | generateParser(inFile, outFile, trace, cache, allowedStartRules, customHeader, customHeaderFile); 95 | -------------------------------------------------------------------------------- /src/libs/get-unique-name.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Produces a name based off `nameProposal` that is guaranteed to not be a key in `nameMap`. 3 | */ 4 | export function getUniqueName( 5 | nameProposal: string, 6 | nameMap: Map, 7 | excludeReservedWords?: boolean 8 | ): string { 9 | const origProposal = nameProposal; 10 | let i = 1; 11 | while (nameMap.has(nameProposal) || (excludeReservedWords && RESERVED_WORDS.has(nameProposal))) { 12 | nameProposal = `${origProposal}_${i}`; 13 | i++; 14 | } 15 | return nameProposal; 16 | } 17 | 18 | /** 19 | * Checks if `name` is a Javascript or Typescript reserved word. 20 | */ 21 | export function isKeyword(name: string): boolean { 22 | return RESERVED_WORDS.has(name); 23 | } 24 | 25 | const RESERVED_WORDS = new Set([ 26 | // Javascript 27 | 'abstract', 28 | 'boolean', 29 | 'break', 30 | 'byte', 31 | 'case', 32 | 'catch', 33 | 'char', 34 | 'class', 35 | 'continue', 36 | 'const', 37 | 'debugger', 38 | 'default', 39 | 'delete', 40 | 'do', 41 | 'double', 42 | 'else', 43 | 'enum', 44 | 'export', 45 | 'extends', 46 | 'false', 47 | 'final', 48 | 'finally', 49 | 'float', 50 | 'for', 51 | 'function', 52 | 'goto', 53 | 'if', 54 | 'implements', 55 | 'import', 56 | 'in', 57 | 'instanceof', 58 | 'int', 59 | 'interface', 60 | 'long', 61 | 'native', 62 | 'new', 63 | 'null', 64 | 'package', 65 | 'private', 66 | 'protected', 67 | 'public', 68 | 'return', 69 | 'short', 70 | 'static', 71 | 'super', 72 | 'switch', 73 | 'synchronized', 74 | 'this', 75 | 'throw', 76 | 'throws', 77 | 'transient', 78 | 'true', 79 | 'try', 80 | 'typeof', 81 | 'var', 82 | 'void', 83 | 'while', 84 | 'with', 85 | 'alert', 86 | 'arguments', 87 | 'Array', 88 | 'blur', 89 | 'Boolean', 90 | 'callee', 91 | 'caller', 92 | 'captureEvents', 93 | 'clearInterval', 94 | 'clearTimeout', 95 | 'close', 96 | 'closed', 97 | 'confirm', 98 | 'constructor', 99 | 'Date', 100 | 'defaultStatus', 101 | 'document', 102 | 'escape', 103 | 'eval', 104 | 'find', 105 | 'focus', 106 | 'frames', 107 | 'Function', 108 | 'history', 109 | 'home', 110 | 'Infinity', 111 | 'innerHeight', 112 | 'innerWidth', 113 | 'isFinite', 114 | 'isNaN', 115 | 'java', 116 | 'length', 117 | 'location', 118 | 'locationbar', 119 | 'Map', 120 | 'Math', 121 | 'menubar', 122 | 'moveBy', 123 | 'name', 124 | 'NaN', 125 | 'netscape', 126 | 'Number', 127 | 'Object', 128 | 'open', 129 | 'opener', 130 | 'outerHeight', 131 | 'outerWidth', 132 | 'Packages', 133 | 'pageXOffset', 134 | 'pageYOffset', 135 | 'parent', 136 | 'parseFloat', 137 | 'parseInt', 138 | 'personalbar', 139 | 'print', 140 | 'prompt', 141 | 'prototype', 142 | 'RegExp', 143 | 'releaseEvents', 144 | 'resizeBy', 145 | 'resizeTo', 146 | 'routeEvent', 147 | 'scroll', 148 | 'scrollbars', 149 | 'scrollBy', 150 | 'scrollTo', 151 | 'self', 152 | 'Set', 153 | 'setInterval', 154 | 'setTimeout', 155 | 'status', 156 | 'statusbar', 157 | 'stop', 158 | 'String', 159 | 'toolbar', 160 | 'top', 161 | 'toString', 162 | 'unescape', 163 | 'unwatch', 164 | 'valueOf', 165 | 'watch', 166 | 'window', 167 | 168 | // Typescript 169 | 'as', 170 | 'implements', 171 | 'interface', 172 | 'let', 173 | 'package', 174 | 'private', 175 | 'protected', 176 | 'public', 177 | 'static', 178 | 'yield', 179 | 'any', 180 | 'boolean', 181 | 'constructor', 182 | 'declare', 183 | 'get', 184 | 'module', 185 | 'require', 186 | 'number', 187 | 'set', 188 | 'string', 189 | 'symbol', 190 | 'type', 191 | 'from', 192 | 'of', 193 | 'Awaited', 194 | 'Partial', 195 | 'Required', 196 | 'Readonly', 197 | 'Record', 198 | 'Pick', 199 | 'Omit', 200 | 'Exclude', 201 | 'Extract', 202 | 'NonNullable', 203 | 'Parameters', 204 | 'ConstructorParameters', 205 | 'ReturnType', 206 | 'InstanceType', 207 | 'ThisParameterType', 208 | 'OmitThisParameter', 209 | 'ThisType', 210 | 'Intrinsic String Manipulation Types', 211 | 'Uppercase', 212 | 'Lowercase', 213 | 'Capitalize', 214 | 'Uncapitalize', 215 | 'number', 216 | 'string', 217 | 'boolean', 218 | 'object', 219 | 'enum', 220 | 'void', 221 | 'null', 222 | 'undefined', 223 | 'any', 224 | 'never', 225 | 'Array', 226 | 'tuple', 227 | 'Number', 228 | 'String', 229 | 'Boolean', 230 | 'Object', 231 | 'Function' 232 | ]); 233 | -------------------------------------------------------------------------------- /src/libs/helpers.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ArrayLiteralExpression, 3 | ArrowFunction, 4 | FunctionDeclaration, 5 | FunctionExpression, 6 | Node, 7 | NumericLiteral, 8 | ObjectLiteralExpression, 9 | StringLiteral, 10 | ts, 11 | TypeNode 12 | } from 'ts-morph'; 13 | 14 | /** 15 | * Wraps an expression in a `as const` declaration. E.g., `{foo: 1}` -> `{foo: 1} as const` 16 | */ 17 | export function wrapNodeInAsConstDeclaration(node: Node) { 18 | node.transform((t) => 19 | t.factory.createAsExpression( 20 | node.compilerNode, 21 | t.factory.createTypeReferenceNode(t.factory.createIdentifier('const')) 22 | ) 23 | ); 24 | } 25 | 26 | /** 27 | * Returns the nearest enclosing function-like node. (E.g., a function declaration or arrow function, etc.) 28 | */ 29 | export function getEnclosingFunction(node: Node) { 30 | return node.getParentWhile((n) => !isFunctionLike(n))?.getParent() as 31 | | FunctionDeclaration 32 | | FunctionExpression 33 | | ArrowFunction 34 | | undefined; 35 | } 36 | 37 | /** Returns whether a node is function-like */ 38 | function isFunctionLike( 39 | node: Node 40 | ): node is FunctionDeclaration | FunctionExpression | ArrowFunction { 41 | return ( 42 | node.isKind(ts.SyntaxKind.FunctionDeclaration) || 43 | node.isKind(ts.SyntaxKind.FunctionExpression) || 44 | node.isKind(ts.SyntaxKind.ArrowFunction) 45 | ); 46 | } 47 | 48 | /** 49 | * Replace `typeNode` with an array of the same type. 50 | * 51 | * **Warning**: this function invalidates previous references to the type. 52 | * (E.g., if you obtained a reference via `typeDeclaration.getType()`, you must do so again, 53 | * because the old reference will be stale.) 54 | */ 55 | export function makeTypeAnArray(typeNode: TypeNode) { 56 | return typeNode.transform((traversal) => 57 | traversal.factory.createArrayTypeNode(typeNode.compilerNode) 58 | ); 59 | } 60 | 61 | /** 62 | * Replace `typeNode` with an union type including undefined. 63 | * 64 | * **Warning**: this function invalidates previous references to the type. 65 | * (E.g., if you obtained a reference via `typeDeclaration.getType()`, you must do so again, 66 | * because the old reference will be stale.) 67 | */ 68 | export function unionWithNull(typeNode: TypeNode) { 69 | return typeNode.transform((t) => 70 | t.factory.createUnionTypeNode([ 71 | typeNode.compilerNode, 72 | t.factory.createLiteralTypeNode(t.factory.createNull()) 73 | ]) 74 | ); 75 | } 76 | 77 | /** 78 | * Determine if `node` is a literal. E.g. `[5,6]` or `{a: 7}` or `"foo"`. 79 | */ 80 | export function isLiteral( 81 | node: Node | undefined 82 | ): node is ObjectLiteralExpression | StringLiteral | ArrayLiteralExpression | NumericLiteral { 83 | if (!node) { 84 | return false; 85 | } 86 | return ( 87 | node.isKind(ts.SyntaxKind.ObjectLiteralExpression) || 88 | node.isKind(ts.SyntaxKind.StringLiteral) || 89 | node.isKind(ts.SyntaxKind.ArrayLiteralExpression) || 90 | node.isKind(ts.SyntaxKind.NumericLiteral) 91 | ); 92 | } 93 | 94 | /** 95 | * Returns a union type with all duplicate entries removed. 96 | */ 97 | export function formatUnionType(subtypes: string[]): string { 98 | const uniqueTypes = Array.from(new Set(subtypes)); 99 | return uniqueTypes.join(' | '); 100 | } 101 | 102 | /** 103 | * Safely "stringify" a string. This works in both Nodejs and 104 | * in the browser. All non-ascii characters are converted to 105 | * unicode escape sequences. 106 | */ 107 | export function escapedString(str: string): string { 108 | return JSON.stringify(str).replace(/[\u007F-\uFFFF]/g, function (chr) { 109 | return '\\u' + ('0000' + chr.charCodeAt(0).toString(16)).slice(-4); 110 | }); 111 | } 112 | -------------------------------------------------------------------------------- /src/libs/list-rules.ts: -------------------------------------------------------------------------------- 1 | import type { ast } from 'peggy'; 2 | type Grammar = ast.Grammar; 3 | 4 | /** 5 | * Gets a list of all named rules from the grammar. 6 | */ 7 | export function listRuleNames(grammar: Grammar): string[] { 8 | return grammar.rules.map((r) => r.name); 9 | } 10 | -------------------------------------------------------------------------------- /src/libs/prune-circular-references.ts: -------------------------------------------------------------------------------- 1 | import { Diagnostic, SourceFile, ts, TypeNode, TypeReferenceNode } from 'ts-morph'; 2 | 3 | /** 4 | * Auto-generated types may contain circular references, like 5 | * ```typescript 6 | * type A = "a" | B 7 | * type B = "b" | A 8 | * ``` 9 | * These circular references likely do not exist in the grammar (since it could produce 10 | * a parser with an infinite loop). They were likely created when inferring types for a grammar 11 | * like 12 | * ``` 13 | * A = "a" / B 14 | * B = "b" / "(" A ")" 15 | * ``` 16 | * 17 | * This function attempts to detect and remove these circular references. 18 | * @param file 19 | */ 20 | export function pruneCircularReferences(file: SourceFile) { 21 | // Diagnostic code `2456` is the code for circular type references 22 | let diagnostics = file.getPreEmitDiagnostics().filter((d) => d.getCode() === 2456); 23 | if (diagnostics.length === 0) { 24 | return; 25 | } 26 | 27 | // Circular references must be a loop of size at least two, so we can bound how many 28 | // times we must check for this error 29 | const maxLoops = diagnostics.length / 2; 30 | for (let i = 0; i < maxLoops; i++) { 31 | if (i > 0) { 32 | diagnostics = file.getPreEmitDiagnostics().filter((d) => d.getCode() === 2456); 33 | } 34 | if (diagnostics.length === 0) { 35 | return; 36 | } 37 | const info = diagnostics.map((d) => getInfoFromDiagnostic(d, file)); 38 | 39 | // Eliminate the first thing 40 | const eliminateRefName = info[0].name; 41 | for (const typeInfo of info.slice(1)) { 42 | for (const typeRef of typeInfo.unionedIdentifiers) { 43 | if (typeRef.refName === eliminateRefName) { 44 | const node = typeRef.node; 45 | // We force the chain to be broken by inserting the `void` type. 46 | // Since this should occur in a union type anyways, there should be no effect. 47 | node.replaceWithText('void'); 48 | } 49 | } 50 | } 51 | } 52 | diagnostics = file.getPreEmitDiagnostics().filter((d) => d.getCode() === 2456); 53 | if (diagnostics.length > 0) { 54 | console.warn( 55 | `Tried, but failed to eliminate circular references in generated types. The following errors remain: ${diagnostics 56 | .map((d) => d.getMessageText()) 57 | .join('; ')}` 58 | ); 59 | } 60 | } 61 | 62 | function getInfoFromDiagnostic(d: Diagnostic, file: SourceFile) { 63 | const start = d.getStart(); 64 | if (start == null) { 65 | throw new Error(`Diagnostic has no start position`); 66 | } 67 | const node = file.getDescendantAtPos(start); 68 | if (node == null) { 69 | throw new Error(`Cannot find node at position ${start}`); 70 | } 71 | const parent = node.getParentOrThrow(); 72 | if (!parent.isKind(ts.SyntaxKind.TypeAliasDeclaration)) { 73 | throw new Error( 74 | `Parent of node is of kind "${parent.getKindName()}", not "TypeAliasDeclaration"` 75 | ); 76 | } 77 | 78 | const typeBodyNode = parent.getTypeNodeOrThrow(); 79 | if (!typeBodyNode.isKind(ts.SyntaxKind.UnionType)) { 80 | throw new Error( 81 | `Can only remove recursive type definitions on union types not "${typeBodyNode.getKindName()}"` 82 | ); 83 | } 84 | 85 | return { name: node.getText(), unionedIdentifiers: listUnionedIdentifiers(typeBodyNode) }; 86 | } 87 | 88 | function listUnionedIdentifiers(node: TypeNode): { refName: string; node: TypeReferenceNode }[] { 89 | if (node.isKind(ts.SyntaxKind.UnionType)) { 90 | return node.getTypeNodes().flatMap((n) => listUnionedIdentifiers(n)); 91 | } 92 | if (node.isKind(ts.SyntaxKind.ParenthesizedType)) { 93 | return listUnionedIdentifiers(node.getTypeNode()); 94 | } 95 | if (node.isKind(ts.SyntaxKind.TypeReference)) { 96 | return [{ refName: node.getText(), node }]; 97 | } 98 | 99 | return []; 100 | } 101 | -------------------------------------------------------------------------------- /src/libs/snake-to-camel.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Convert from snake case to camel case, but preserve leading underscores. 3 | */ 4 | export function snakeToCamel(str: string): string { 5 | if (str.startsWith('_')) { 6 | return '_' + snakeToCamel(str.slice(1)); 7 | } 8 | return str 9 | .split('_') 10 | .map((s) => capitalize(s)) 11 | .join(''); 12 | } 13 | 14 | function capitalize(str: string): string { 15 | if (str.length === 0) { 16 | return str; 17 | } 18 | if (str.charAt(0) !== str.charAt(0).toUpperCase()) { 19 | return str.charAt(0).toUpperCase() + str.slice(1); 20 | } 21 | return str; 22 | } 23 | -------------------------------------------------------------------------------- /src/libs/type-extractor.ts: -------------------------------------------------------------------------------- 1 | import type { ast } from 'peggy'; 2 | import * as peggy from 'peggy'; 3 | import * as prettierPluginTypescript from 'prettier/parser-typescript'; 4 | import prettier from 'prettier/standalone'; 5 | import { Project, ScriptTarget, ts } from 'ts-morph'; 6 | import { getUniqueName, isKeyword } from './get-unique-name'; 7 | import { 8 | escapedString, 9 | formatUnionType, 10 | getEnclosingFunction, 11 | isLiteral, 12 | wrapNodeInAsConstDeclaration 13 | } from './helpers'; 14 | import { pruneCircularReferences } from './prune-circular-references'; 15 | import { snakeToCamel } from './snake-to-camel'; 16 | 17 | type Grammar = ast.Grammar; 18 | type Expression = ast.Expression; 19 | type Rule = ast.Rule; 20 | type ActionExpression = ast.Action; 21 | type Named = ast.Named; 22 | 23 | /** 24 | * Header string that is inserted at the top of all autogenerated types. 25 | */ 26 | const TYPES_HEADER = `// These types were autogenerated by ts-pegjs 27 | `; 28 | 29 | /** 30 | * Source code that is inserted before processing with typescript to help typescript guess certain types. 31 | */ 32 | const SOURCE_HEADER = ` 33 | // Peggy has built-in globals that we want to be always declared. 34 | declare const options: {}; 35 | declare function text(): string; 36 | declare function location(): { source: string | undefined; start: { offset: number; line: number; column: number }; end: { offset: number; line: number; column: number } }; 37 | declare function offset(): { offset: number; line: number; column: number }; 38 | declare function range(): {source: string | undefined, start: number, end: number}; 39 | 40 | // We need an export, otherwise typescript will insist that "location" refers to "window.location" 41 | export {}; 42 | `; 43 | 44 | /** 45 | * Generate a _probably_ unique name based on the number `i`. 46 | * Calling this function with the same number produces the same name. 47 | * 48 | * The purpose of this function is to create unique names for type template parameters 49 | * to avoid collisions with other (possibly defined) type names. 50 | */ 51 | function uniqueTypeParam(i: number) { 52 | return `__T_${i}`; 53 | } 54 | 55 | type TypeExtractorOptions = { 56 | /** 57 | * Autogenerated types may be marked with the `readonly` keyword, as this 58 | * keyword is sometimes inserted during type generation. Setting this flag 59 | * causes the `readonly` keyword to be removed after processing. 60 | */ 61 | removeReadonlyKeyword?: boolean; 62 | /** 63 | * Whether to force type names to be camel case. If `false`, 64 | * type names will be named the same as the rules from the Peggy grammar. 65 | */ 66 | camelCaseTypeNames?: boolean; 67 | }; 68 | 69 | /** 70 | * Object that handles type creation and extraction from 71 | * a Peggy Grammar. By default, type names are created from 72 | * Peggy grammar rules and converted to CamelCase. 73 | * 74 | * Example usage 75 | * ``` 76 | * const typeExtractor = new TypeExtractor(peggyGrammar); 77 | * const fullTypescriptTypes = typeExtractor.getTypes(); 78 | * const specificTypeForGrammarRule = typeExtractor.typeCache.get("RuleName"); 79 | * ``` 80 | */ 81 | export class TypeExtractor { 82 | grammar: Grammar; 83 | sourceHeader = SOURCE_HEADER; 84 | project = new Project({ 85 | compilerOptions: { 86 | allowJs: true, 87 | target: ScriptTarget.ESNext, 88 | strict: true 89 | }, 90 | skipAddingFilesFromTsConfig: true, 91 | skipFileDependencyResolution: true, 92 | useInMemoryFileSystem: true 93 | }); 94 | nameMap: Map = new Map(); 95 | typeCache: Map = new Map(); 96 | options: TypeExtractorOptions = { 97 | removeReadonlyKeyword: true, 98 | camelCaseTypeNames: true 99 | }; 100 | formatter = (str: string) => { 101 | try { 102 | return prettier.format(str, { 103 | parser: 'typescript', 104 | plugins: [prettierPluginTypescript] 105 | }); 106 | } catch (e) { 107 | console.warn('Encountered error when formatting types with Prettier', e); 108 | } 109 | return str; 110 | }; 111 | 112 | constructor(grammar: Grammar | string, options?: TypeExtractorOptions) { 113 | if (typeof grammar === 'string') { 114 | grammar = peggy.generate(grammar, { output: 'ast' }); 115 | } 116 | this.grammar = grammar; 117 | Object.assign(this.options, options || {}); 118 | this.#initSourceHeader(); 119 | this.#initNameMap(); 120 | this.#renameGrammarRules(); 121 | } 122 | 123 | /** 124 | * Create typescript source code for the types in the grammar. 125 | * 126 | * @param typeOverrides - An object whose keys are rule names and values are types. These will override any computed type. They can be full typescript expressions (e.g. `Foo | Bar`). 127 | */ 128 | getTypes(options?: { typeOverrides?: Record }) { 129 | let { typeOverrides } = options || {}; 130 | 131 | const file = this.project.createSourceFile('__types__.ts', TYPES_HEADER, { overwrite: true }); 132 | 133 | const ensureCached = (rule: { name: string; type: string }): { name: string; type: string } => { 134 | // Save the type in case we want to retrieve individual rules later (e.g., for testing) 135 | let typeCacheString = `type ${rule.name} = ${rule.type}`; 136 | try { 137 | typeCacheString = this.formatter(typeCacheString).trim(); 138 | if (typeCacheString.endsWith(';')) { 139 | typeCacheString = typeCacheString.slice(0, typeCacheString.length - 1); 140 | } 141 | } catch { 142 | // Purposely empty catch 143 | } 144 | this.typeCache.set(rule.name, typeCacheString); 145 | this.typeCache.set(this.nameMap.get(rule.name) || 'UNKNOWN', typeCacheString); 146 | return rule; 147 | }; 148 | 149 | // XXX: For some reason adding all types at once with `file.addTypeAliases()` fails 150 | // while adding the types one-by-one succeeds... 151 | const _declarations = this.grammar.rules 152 | .map((rule) => { 153 | if (typeOverrides?.[rule.name]) { 154 | return ensureCached({ 155 | name: rule.name, 156 | type: typeOverrides[rule.name] 157 | }); 158 | } 159 | let type = this.getTypeForExpression(rule.expression); 160 | if (this.options.removeReadonlyKeyword) { 161 | type = type.replace(/readonly\s/g, ''); 162 | } 163 | 164 | return ensureCached({ 165 | name: rule.name, 166 | type 167 | }); 168 | }) 169 | .map((dec) => { 170 | return file.addTypeAlias(dec).setIsExported(true); 171 | }); 172 | 173 | pruneCircularReferences(file); 174 | 175 | return this.formatter(file.getFullText()); 176 | } 177 | 178 | /** 179 | * Rename all grammar rules and references 180 | * as specified in the name map. 181 | */ 182 | #renameGrammarRules() { 183 | const nameMap = this.nameMap; 184 | function rename(node: Grammar | Rule | Expression | Named) { 185 | const type = node.type; 186 | switch (type) { 187 | case 'named': 188 | rename(node.expression); 189 | break; 190 | case 'grammar': 191 | node.rules.forEach(rename); 192 | break; 193 | case 'rule': 194 | node.name = nameMap.get(node.name) || 'RENAME_ERROR'; 195 | rename(node.expression); 196 | break; 197 | case 'rule_ref': 198 | node.name = nameMap.get(node.name) || 'RENAME_ERROR'; 199 | break; 200 | case 'sequence': 201 | node.elements.forEach(rename); 202 | break; 203 | case 'action': 204 | rename(node.expression); 205 | break; 206 | case 'choice': 207 | node.alternatives.forEach(rename); 208 | break; 209 | case 'group': 210 | case 'labeled': 211 | case 'one_or_more': 212 | case 'optional': 213 | case 'simple_and': 214 | case 'simple_not': 215 | case 'zero_or_more': 216 | case 'text': 217 | rename(node.expression); 218 | break; 219 | case 'any': 220 | case 'semantic_and': 221 | case 'semantic_not': 222 | case 'literal': 223 | case 'class': 224 | break; 225 | case 'repeated': 226 | rename(node.expression); 227 | if (node.delimiter) { 228 | rename(node.delimiter); 229 | } 230 | break; 231 | default: { 232 | const _unused: never = type; 233 | console.warn('Did not handle renaming of Peggy node with type', type); 234 | } 235 | } 236 | } 237 | rename(this.grammar); 238 | } 239 | 240 | /** 241 | * Create a map of Peggy grammar rules to their type names. 242 | * Depending on the options, this will convert names to CamelCase. 243 | */ 244 | #initNameMap() { 245 | const newNames: [string, string][] = this.options.camelCaseTypeNames 246 | ? this.grammar.rules.map((rule) => [rule.name, snakeToCamel(rule.name)]) 247 | : this.grammar.rules.map((rule) => [rule.name, rule.name]); 248 | // Make sure no generated names clash with Javascript/Typescript keywords 249 | const existingNames: Map = new Map(newNames.map((n) => [n[1], n[0]])); 250 | for (let i = 0; i < newNames.length; i++) { 251 | const [oldName, newName] = newNames[i]; 252 | if (isKeyword(newName)) { 253 | const nonClashingName = getUniqueName(newName, existingNames, true); 254 | existingNames.set(nonClashingName, newName); 255 | newNames[i] = [oldName, nonClashingName]; 256 | } 257 | } 258 | 259 | // Take care not to clobber names that are already in CamelCase. They get priority. 260 | newNames.filter(([a, b]) => a === b).forEach(([a, b]) => this.nameMap.set(a, b)); 261 | for (const [oldName, newName] of newNames) { 262 | if (this.nameMap.get(oldName) === newName) { 263 | // Name is already here. No need to add it twice. 264 | continue; 265 | } 266 | // If we made it here, we haven't put our new name into the list yet. 267 | // We must first check that is unique. Because we changed snake case to 268 | // CamelCase, we may have introduced name collisions. 269 | const nameProposal = getUniqueName(newName, this.nameMap); 270 | this.nameMap.set(oldName, nameProposal); 271 | this.nameMap.set(nameProposal, oldName); 272 | } 273 | } 274 | 275 | /** 276 | * Add the global initializer and the initializer code to 277 | * the header (the parts between `{...}` at the start of a grammar) 278 | */ 279 | #initSourceHeader() { 280 | if (this.grammar.topLevelInitializer?.code) { 281 | // Insert extra semicolons incase the code boundaries were ambiguous 282 | this.sourceHeader += 283 | '\n;// Global Initializer\n' + this.grammar.topLevelInitializer.code + '\n;\n'; 284 | } 285 | if (this.grammar.initializer?.code) { 286 | // Insert extra semicolons incase the code boundaries were ambiguous 287 | this.sourceHeader += '\n;// Initializer\n' + this.grammar.initializer.code + '\n;\n'; 288 | } 289 | } 290 | 291 | /** 292 | * Returns the best-guess type for an Expression node in the grammar. 293 | * 294 | * For example, a rule with definition `Foo = [a-z]` would be type `string`. 295 | * A rule with definition `Foo = Bar / Baz` would be type `Bar | Baz`. 296 | */ 297 | getTypeForExpression(expr: Expression | Named): string { 298 | const type = expr.type; 299 | switch (type) { 300 | case 'named': 301 | return this.getTypeForExpression(expr.expression); 302 | // For each of these, we cannot get a narrower type. 303 | // any == `.` matches any character 304 | // class == `[char-range]` 305 | // text == `$(concatenated strings)` 306 | case 'any': 307 | case 'class': 308 | case 'text': 309 | return 'string'; 310 | case 'literal': 311 | if (expr.ignoreCase) { 312 | return 'string'; 313 | } 314 | return escapedString(expr.value); 315 | case 'rule_ref': 316 | return expr.name; 317 | case 'optional': 318 | return `(${this.getTypeForExpression(expr.expression)}) | null`; 319 | case 'zero_or_more': 320 | case 'one_or_more': 321 | case 'repeated': 322 | return `(${this.getTypeForExpression(expr.expression)})[]`; 323 | case 'choice': 324 | return formatUnionType(expr.alternatives.map((e) => `(${this.getTypeForExpression(e)})`)); 325 | case 'sequence': { 326 | // If a sequence has a pluck operator, the type is the type 327 | // of that item. Otherwise, the type is an array of all items 328 | const pickedElement = expr.elements.find((e) => e.type === 'labeled' && e.pick); 329 | if (pickedElement) { 330 | return this.getTypeForExpression(pickedElement); 331 | } 332 | return `[ ${expr.elements.map((e) => this.getTypeForExpression(e)).join(' , ')} ]`; 333 | } 334 | case 'simple_and': 335 | case 'simple_not': 336 | case 'semantic_and': 337 | case 'semantic_not': 338 | return 'undefined'; 339 | case 'group': 340 | return this.getTypeForExpression(expr.expression); 341 | case 'labeled': 342 | return this.getTypeForExpression(expr.expression); 343 | case 'action': 344 | return this._getTypeForAction(expr); 345 | } 346 | const unknownType: never = type; 347 | console.warn('Peggy node of type', unknownType, 'is currently not processed'); 348 | return 'unknown'; 349 | } 350 | 351 | _getTypeForAction(action: ActionExpression): string { 352 | const file = this.project.createSourceFile('__temp__.ts', this.sourceHeader, { 353 | overwrite: true 354 | }); 355 | 356 | const expressions = 357 | action.expression.type === 'sequence' ? action.expression.elements : [action.expression]; 358 | 359 | const labelNames = expressions.flatMap((e) => { 360 | if (e.type === 'labeled') { 361 | return [ 362 | { 363 | name: e.label ?? 'UNKNOWN_LABEL', 364 | type: this.getTypeForExpression(e.expression) 365 | } 366 | ]; 367 | } 368 | return []; 369 | }); 370 | 371 | const func = file.addFunction({ 372 | name: 'tmpFunc', 373 | statements: action.code, 374 | parameters: labelNames.map((l, i) => ({ 375 | name: l.name, 376 | type: uniqueTypeParam(i) 377 | })), 378 | typeParameters: labelNames.map((l, i) => ({ 379 | name: uniqueTypeParam(i), 380 | constraint: l.type 381 | })) 382 | }); 383 | func 384 | .getBodyOrThrow() 385 | .getChildrenOfKind(ts.SyntaxKind.ReturnStatement) 386 | .forEach((r) => { 387 | const parent = getEnclosingFunction(r); 388 | if (!parent || parent.getStart() !== func.getStart()) { 389 | // We found a return statement that was nested in a subfunction... 390 | return; 391 | } 392 | const returnExpression = r.getExpression(); 393 | if (isLiteral(returnExpression)) { 394 | wrapNodeInAsConstDeclaration(returnExpression); 395 | } 396 | }); 397 | 398 | const returnType = func.getReturnType(); 399 | // Now that we have the return type with generic parameters, 400 | // we replace those generic parameters explicitly. 401 | const finalType = file.addTypeAlias({ 402 | name: 'tmpType', 403 | typeParameters: labelNames.map((l, i) => ({ 404 | name: uniqueTypeParam(i) 405 | })), 406 | type: returnType.getText(func, ts.TypeFormatFlags.NoTruncation) 407 | }); 408 | let finalTypeNode = finalType.getTypeNodeOrThrow(); 409 | 410 | // A way to convert from generic params to the corresponding type. 411 | const paramsToType = new Map(labelNames.map((l, i) => [uniqueTypeParam(i), l.type])); 412 | 413 | // Substitute in the types of any type parameters 414 | if (finalTypeNode.isKind(ts.SyntaxKind.TypeReference)) { 415 | // If the type node consists solely of a type reference, we need to treat it differently, 416 | // since it has no decedents. 417 | const name = finalTypeNode.getTypeName(); 418 | const identifier = name.getText(); 419 | if (paramsToType.has(identifier)) { 420 | finalType.setType(paramsToType.get(identifier) || `ERROR`); 421 | } 422 | } else { 423 | finalTypeNode.forEachDescendant((c) => { 424 | if (c.isKind(ts.SyntaxKind.TypeReference)) { 425 | const name = c.getTypeName(); 426 | const identifier = name.getText(); 427 | if (paramsToType.has(identifier)) { 428 | c.replaceWithText(`(${paramsToType.get(identifier)})` || `ERROR`); 429 | } 430 | } 431 | }); 432 | } 433 | 434 | finalTypeNode = finalType.getTypeNodeOrThrow(); 435 | return finalTypeNode.getText(); 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /src/passes/constants.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Types that are common to any generated parser. 3 | */ 4 | export const COMMON_TYPES_STR = ` 5 | export interface FilePosition { 6 | offset: number; 7 | line: number; 8 | column: number; 9 | } 10 | 11 | export interface FileRange { 12 | start: FilePosition; 13 | end: FilePosition; 14 | source: string; 15 | } 16 | 17 | export interface LiteralExpectation { 18 | type: "literal"; 19 | text: string; 20 | ignoreCase: boolean; 21 | } 22 | 23 | export interface ClassParts extends Array {} 24 | 25 | export interface ClassExpectation { 26 | type: "class"; 27 | parts: ClassParts; 28 | inverted: boolean; 29 | ignoreCase: boolean; 30 | } 31 | 32 | export interface AnyExpectation { 33 | type: "any"; 34 | } 35 | 36 | export interface EndExpectation { 37 | type: "end"; 38 | } 39 | 40 | export interface OtherExpectation { 41 | type: "other"; 42 | description: string; 43 | } 44 | 45 | export type Expectation = LiteralExpectation | ClassExpectation | AnyExpectation | EndExpectation | OtherExpectation; 46 | 47 | declare class _PeggySyntaxError extends Error { 48 | public static buildMessage(expected: Expectation[], found: string | null): string; 49 | public message: string; 50 | public expected: Expectation[]; 51 | public found: string | null; 52 | public location: FileRange; 53 | public name: string; 54 | constructor(message: string, expected: Expectation[], found: string | null, location: FileRange); 55 | format(sources: { 56 | source?: any; 57 | text: string; 58 | }[]): string; 59 | } 60 | 61 | export interface TraceEvent { 62 | type: string; 63 | rule: string; 64 | result?: any; 65 | location: FileRange; 66 | } 67 | 68 | declare class _DefaultTracer { 69 | private indentLevel: number; 70 | public trace(event: TraceEvent): void; 71 | } 72 | \n`; 73 | -------------------------------------------------------------------------------- /src/passes/generate-ts.ts: -------------------------------------------------------------------------------- 1 | import type { Config, ast } from 'peggy'; 2 | import { escapedString } from '../libs/helpers'; 3 | import { TypeExtractor } from '../libs/type-extractor'; 4 | import { TsPegjsParserBuildOptions } from '../types'; 5 | import { COMMON_TYPES_STR } from './constants'; 6 | 7 | // The types for `SourceNode` are currently incorrect; override them with correct types. 8 | type SourceNode = NonNullable & { children: (SourceNode | string)[] }; 9 | 10 | export const generateParser: Config['passes']['generate'][number] = ( 11 | ast, 12 | options: TsPegjsParserBuildOptions, 13 | _session 14 | ) => { 15 | const code = ast.code; 16 | if (!code) { 17 | throw new Error( 18 | `tspegjs requires peggy to generate source Javascript source code before continuing, but something went wrong and no generated source code was found` 19 | ); 20 | } 21 | 22 | let computedTypes = ''; 23 | const typeExtractor = new TypeExtractor(ast, { 24 | camelCaseTypeNames: !options.tspegjs?.doNotCamelCaseTypes 25 | }); 26 | if (!options.tspegjs?.skipTypeComputation || options.tspegjs?.onlyGenerateGrammarTypes) { 27 | computedTypes = typeExtractor.getTypes({ 28 | typeOverrides: options.returnTypes 29 | }); 30 | } 31 | 32 | if (options.tspegjs?.onlyGenerateGrammarTypes) { 33 | code.children.length = 0; 34 | code.add(options.tspegjs.customHeader || ''); 35 | if (!(options.tspegjs.customHeader || '').endsWith('\n')) { 36 | code.add('\n'); 37 | } 38 | code.add(computedTypes); 39 | return; 40 | } 41 | 42 | // We are using a mix of Typescript and Peggy-generated Javascript in this file. 43 | // We don't want Typescript to complain if a user configures options like `strict`, 44 | // There is no option to apply `@ts-ignore` to a block of code ( https://github.com/Microsoft/TypeScript/issues/19573 ) 45 | // so instead we take an ugly approach: insert `@ts-ignore` comments before every line of source. 46 | // 47 | // An alternative is to add a // @ts-nocheck to the whole file, but that means the types that we 48 | // generate also won't be checked. 49 | annotateWithTsIgnore(code); 50 | 51 | const SourceNode = code.constructor as any; 52 | const rootNode: SourceNode = new SourceNode(); 53 | 54 | // Store everything that Peggy generated for us so that we can manipulate the code. 55 | const destructuredParser: SourceNode = new SourceNode(); 56 | rootNode.add(destructuredParser); 57 | destructuredParser.add(code); 58 | 59 | // Set a new rootNode that we control 60 | ast.code = rootNode; 61 | 62 | if (options.dependencies) { 63 | const dependencyImports = Object.entries(options.dependencies).map( 64 | ([varName, importPath]) => `import ${varName} from ${escapedString(importPath)};` 65 | ); 66 | rootNode.prepend('\n' + dependencyImports.join('\n') + '\n'); 67 | } 68 | 69 | // Custom import statements should come near the top, if there are any 70 | if (options.tspegjs?.customHeader) { 71 | rootNode.prepend(options.tspegjs.customHeader + '\n\n'); 72 | } 73 | 74 | // eslint in this repo is configured to disable @ts-ignore directives; we disable it. 75 | rootNode.prepend('/* eslint-disable */\n\n'); 76 | 77 | // destructure what's been generated by Peggy so that we can re-export it. 78 | destructuredParser.prepend( 79 | `const peggyParser: {parse: any, SyntaxError: any, DefaultTracer?: any} = ` 80 | ); 81 | 82 | // These types are always the same 83 | rootNode.add(COMMON_TYPES_STR); 84 | 85 | const errorName = options.tspegjs?.errorName || 'PeggySyntaxError'; 86 | // Very basic test to make sure no horrible identifier has been passed in 87 | if (errorName !== JSON.stringify(errorName).slice(1, errorName.length + 1)) { 88 | throw new Error( 89 | `The errorName ${JSON.stringify(errorName)} is not a valid Javascript identifier` 90 | ); 91 | } 92 | 93 | rootNode.add(`peggyParser.SyntaxError.prototype.name = ${JSON.stringify(errorName)};\n`); 94 | 95 | const defaultStartRule = (options.allowedStartRules || [])[0] || ast.rules[0]?.name; 96 | if (!defaultStartRule) { 97 | throw new Error(`Something wen't wrong...Could not determine the default start rule.`); 98 | } 99 | 100 | // Generate an explicit type listing all the start rules 101 | // that are allowed by the parser. 102 | let startRuleType = 'string'; 103 | if (options.allowedStartRules) { 104 | startRuleType = options.allowedStartRules.map((x) => JSON.stringify(x)).join(' | '); 105 | } 106 | const parseFunctionType = computedTypes 107 | ? createParseFunctionType(options.allowedStartRules || [], typeExtractor) 108 | : `export type ParseFunction = (input: string, options?: ParseOptions) => any`; 109 | rootNode.add(` 110 | export interface ParseOptions { 111 | filename?: string; 112 | startRule?: ${startRuleType}; 113 | tracer?: any; 114 | [key: string]: any; 115 | } 116 | ${parseFunctionType} 117 | export const parse: ParseFunction = peggyParser.parse; 118 | `); 119 | rootNode.add([ 120 | `\nexport const ${errorName} = peggyParser.SyntaxError as typeof _PeggySyntaxError;\n`, 121 | `\nexport type ${errorName} = _PeggySyntaxError;\n` 122 | ]); 123 | if (options.trace) { 124 | rootNode.add([ 125 | `\nexport const DefaultTracer = peggyParser.DefaultTracer as typeof _DefaultTracer;\n`, 126 | `\nexport type DefaultTracer = _DefaultTracer;\n` 127 | ]); 128 | } 129 | 130 | if (computedTypes) { 131 | rootNode.add('\n'); 132 | rootNode.add(computedTypes); 133 | } 134 | }; 135 | 136 | /** 137 | * Add `// @ts-ignore` before every line in `code`. 138 | */ 139 | function annotateWithTsIgnore(code: SourceNode) { 140 | if (!code.children || code.children.length === 0) { 141 | return; 142 | } 143 | const children = [...code.children]; 144 | code.children.length = 0; 145 | for (const child of children) { 146 | if (typeof child === 'string') { 147 | if (tsIgnoreShouldApply(child)) { 148 | code.children.push('// @ts-ignore\n'); 149 | } 150 | code.children.push(child); 151 | } else if (typeof child === 'object' && child.children) { 152 | annotateWithTsIgnore(child); 153 | code.children.push(child); 154 | } 155 | } 156 | } 157 | 158 | /** 159 | * Determine if a line has content. 160 | */ 161 | function tsIgnoreShouldApply(line: string): boolean { 162 | line = line.trim(); 163 | if (!line || line.startsWith('//')) { 164 | return false; 165 | } 166 | // Pure punctuation doesn't need a @ts-ignore 167 | if (!line.match(/[a-zA-Z]/)) { 168 | return false; 169 | } 170 | return true; 171 | } 172 | 173 | /** 174 | * Create a type signature for the `parse` function that will infer the return type based on the value of 175 | * `options.startRule`. 176 | */ 177 | function createParseFunctionType( 178 | allowedStartRules: string[], 179 | typeExtractor: TypeExtractor 180 | ): string { 181 | const defaultStartRule = typeExtractor.nameMap.get(allowedStartRules[0]); 182 | if (!defaultStartRule) { 183 | throw new Error('Cannot determine the default starting rule.'); 184 | } 185 | 186 | let startRuleChain = 187 | allowedStartRules 188 | .map( 189 | (rule) => `StartRule extends ${JSON.stringify(rule)} ? ${typeExtractor.nameMap.get(rule)} :` 190 | ) 191 | .join('\n ') + ` ${defaultStartRule}`; 192 | 193 | return `export type ParseFunction = ( 194 | input: string, 195 | options?: Options 196 | ) => Options extends { startRule: infer StartRule } ? 197 | ${startRuleChain} 198 | : ${defaultStartRule};`; 199 | } 200 | -------------------------------------------------------------------------------- /src/tspegjs.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from 'peggy'; 2 | import { generateParser } from './passes/generate-ts'; 3 | import { TsPegjsParserBuildOptions } from './types'; 4 | 5 | export * from "./types"; 6 | 7 | export default { 8 | use(config: Config, options: TsPegjsParserBuildOptions) { 9 | // We depend on the code generated being an IIF 10 | (options as any).format = 'bare'; 11 | 12 | config.passes.generate.push(generateParser); 13 | 14 | if (!options.tspegjs) { 15 | options.tspegjs = {}; 16 | } 17 | if (options.tspegjs.customHeader === undefined) { 18 | options.tspegjs.customHeader = null; 19 | } 20 | } 21 | }; 22 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import type { SourceOutputs, BuildOptionsBase } from 'peggy'; 2 | 3 | export type TsPegjsOptions = { 4 | /** 5 | * Items inserted at the top of generated grammar. This can be used for import statements, etc. 6 | */ 7 | customHeader?: null | string; 8 | /** 9 | * The base name for the supplementary files generated by ts-pegjs. 10 | */ 11 | baseName?: string; 12 | errorName?: string | 'PeggySyntaxError'; 13 | /** 14 | * If `true`, do not attempt to have Typescript guess the type of each rule 15 | * by using the return value of each action. This will result in `unknown` being 16 | * returned as the parsed AST's type. 17 | */ 18 | skipTypeComputation?: boolean; 19 | /** 20 | * If `true`, only types for grammar rules will be exported (that is, the parser itself will not be created) 21 | */ 22 | onlyGenerateGrammarTypes?: boolean; 23 | /** 24 | * The default when creating types is to base type names off of CamelCase versions 25 | * of rules from the Pegjs/Peggy grammar. Set to `true` to prevent this conversion. 26 | */ 27 | doNotCamelCaseTypes?: boolean; 28 | }; 29 | 30 | // Included as a workaround for https://github.com/peggyjs/peggy/issues/403 31 | interface _ParserBuildOptions extends BuildOptionsBase { 32 | output?: SourceOutputs; 33 | } 34 | 35 | export type TsPegjsParserBuildOptions = _ParserBuildOptions & { 36 | tspegjs?: TsPegjsOptions; 37 | returnTypes?: Record; 38 | dependencies?: Record; 39 | }; 40 | -------------------------------------------------------------------------------- /test/README: -------------------------------------------------------------------------------- 1 | 1. Launch 2 | 3 | ``` 4 | npm install 5 | npm test 6 | ``` 7 | 8 | 2. Review source samples at `examples/` folder and output samples at `output/` folder. 9 | -------------------------------------------------------------------------------- /test/arithmetics.test.ts: -------------------------------------------------------------------------------- 1 | import { exec as execNode } from 'node:child_process'; 2 | import { existsSync } from 'node:fs'; 3 | import * as fs from 'node:fs/promises'; 4 | import * as path from 'node:path'; 5 | import { fileURLToPath } from 'node:url'; 6 | import { promisify } from 'node:util'; 7 | import peggy from 'peggy'; 8 | import { describe, expect, it } from 'vitest'; 9 | 10 | // Local imports 11 | import tspegjs from '../src/tspegjs'; 12 | 13 | const exec = promisify(execNode); 14 | 15 | const EXAMPLES_DIR = fileURLToPath(new URL('../examples', import.meta.url)); 16 | const OUTPUT_DIR = fileURLToPath(new URL('../output', import.meta.url)); 17 | 18 | describe('Can generate parser for `arithmetics.pegjs` with custom return type', () => { 19 | const sampleGrammarName = 'arithmetics.pegjs'; 20 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 21 | const ext = path.extname(sampleGrammarName); 22 | const outBaseName = path.join(OUTPUT_DIR, sampleGrammarName).slice(0, -ext.length) + '-typed'; 23 | const outTsName = outBaseName + '.ts'; 24 | const outJsName = outBaseName + '.js'; 25 | it(`Can generate parser for \`${sampleGrammarName}\``, async () => { 26 | await generateParser(grammarFile, outTsName, `// Arithmetic`, { 27 | Integer: 'number', 28 | Expression: 'number', 29 | Term: 'number', 30 | Factor: 'number' 31 | }); 32 | }); 33 | it.concurrent(`Generated \`ts\` file passes eslint check`, async () => { 34 | const { stdout, stderr } = await exec(`eslint "${outTsName}"`); 35 | if (stderr) { 36 | throw new Error(stderr); 37 | } 38 | }); 39 | 40 | it.concurrent(`Generated \`ts\` file contains custom header`, async () => { 41 | const source = await fs.readFile(outTsName, { encoding: 'utf-8' }); 42 | expect(source.match(/\/\/ Arithmetic/)).toBeTruthy(); 43 | }); 44 | 45 | it(`Can compile \`ts\` file to \`js\``, async () => { 46 | const { stdout, stderr } = await exec( 47 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 48 | ); 49 | if (stderr) { 50 | throw new Error(stderr); 51 | } 52 | }); 53 | 54 | it(`Can parse arithmetics`, async () => { 55 | const arithmetics = await import(outJsName); 56 | // Run a test where we successfully parse 57 | const goodSource = { 58 | grammarSource: 'someFile.txt', 59 | text: '3 * 4' 60 | }; 61 | expect(arithmetics.parse(goodSource.text, { grammarSource: goodSource.grammarSource })).toBe( 62 | 12 63 | ); 64 | 65 | // Run a test where parsing is unsuccessful 66 | const badSource = { 67 | grammarSource: 'someFile.txt', 68 | text: '3 ** 4' 69 | }; 70 | expect(() => { 71 | try { 72 | arithmetics.parse(badSource.text, { grammarSource: badSource.grammarSource }); 73 | } catch (e: any) { 74 | throw new Error(e.format([badSource])); 75 | } 76 | }).toThrow( 77 | `Error: Expected "(", integer, or whitespace but "*" found. 78 | at someFile.txt:1:4` 79 | ); 80 | }); 81 | }); 82 | 83 | describe('Can generate parser for `minimal.pegjs` with custom return type', () => { 84 | const sampleGrammarName = 'minimal.pegjs'; 85 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 86 | const ext = path.extname(sampleGrammarName); 87 | const outBaseName = path.join(OUTPUT_DIR, sampleGrammarName).slice(0, -ext.length) + '-typed'; 88 | const outTsName = outBaseName + '.ts'; 89 | const outJsName = outBaseName + '.js'; 90 | it(`Can generate parser for \`${sampleGrammarName}\``, async () => { 91 | await generateParser(grammarFile, outTsName, `// Minimal`, { 92 | START: 'string' 93 | }); 94 | }); 95 | it.concurrent(`Generated \`ts\` file passes eslint check`, async () => { 96 | const { stdout, stderr } = await exec(`eslint "${outTsName}"`); 97 | if (stderr) { 98 | throw new Error(stderr); 99 | } 100 | }); 101 | 102 | it.concurrent(`Generated \`ts\` file contains custom header`, async () => { 103 | const source = await fs.readFile(outTsName, { encoding: 'utf-8' }); 104 | expect(source.match(/\/\/ Minimal/)).toBeTruthy(); 105 | }); 106 | 107 | it(`Can compile \`ts\` file to \`js\``, async () => { 108 | const { stdout, stderr } = await exec( 109 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 110 | ); 111 | if (stderr) { 112 | throw new Error(stderr); 113 | } 114 | }); 115 | }); 116 | 117 | async function generateParser( 118 | inFile: string, 119 | outFile: string, 120 | customHeader = '// customHeader a\n// customHeader b', 121 | returnTypes: Record = {} 122 | ) { 123 | if (!existsSync(inFile)) { 124 | throw new Error(`File "${inFile}" doesn't exist. Cannot proceed`); 125 | } 126 | 127 | const source = await fs.readFile(inFile, { encoding: 'utf-8' }); 128 | const parser = peggy.generate(source, { 129 | // @ts-ignore 130 | output: 'source', 131 | //trace: true, 132 | cache: true, 133 | plugins: [tspegjs], 134 | // The Peggy types do not allow extending the config when a plugin is added, so we have to disable ts temporarily 135 | // @ts-ignore-next-line 136 | tspegjs: { 137 | customHeader 138 | }, 139 | returnTypes 140 | }); 141 | await fs.writeFile(outFile, parser, { encoding: 'utf-8' }); 142 | } 143 | -------------------------------------------------------------------------------- /test/cli.test.ts: -------------------------------------------------------------------------------- 1 | import { beforeEach, describe, expect, it } from 'vitest'; 2 | import * as fs from 'node:fs/promises'; 3 | import { existsSync } from 'node:fs'; 4 | import * as path from 'node:path'; 5 | import { fileURLToPath } from 'node:url'; 6 | import { promisify } from 'node:util'; 7 | import { exec as execNode } from 'node:child_process'; 8 | import peggy from 'peggy'; 9 | 10 | // Local imports 11 | import packageJson from '../package.json'; 12 | 13 | const exec = promisify(execNode); 14 | 15 | const ROOT_DIR = fileURLToPath(new URL('../', import.meta.url)); 16 | const PLUGIN_PATH = path.join(ROOT_DIR, packageJson.exports.require); 17 | const CLI_PATH = path.join(ROOT_DIR, packageJson.bin.tspegjs); 18 | const OPTIONS_FILE = path.join(ROOT_DIR, 'test/genoptions2.json'); 19 | const GRAMMAR_FILE = path.join(ROOT_DIR, 'examples/st.pegjs'); 20 | const outTsName = path.join(ROOT_DIR, 'output/st2.ts'); 21 | 22 | describe('CLI Tests', () => { 23 | beforeEach(ensureCliIsBuilt); 24 | it(`Can import tspegjs as a Peggy plugin`, async () => { 25 | const { stdout, stderr } = await exec( 26 | `npx peggy --plugin "${PLUGIN_PATH}" --extra-options-file "${OPTIONS_FILE}" --allowed-start-rules groupFile,templateFile,templateFileRaw,templateAndEOF -o "${outTsName}" "${GRAMMAR_FILE}"` 27 | ); 28 | if (stderr) { 29 | throw new Error(stderr); 30 | } 31 | }); 32 | it.concurrent(`Generated \`ts\` file passes eslint check`, async () => { 33 | const { stdout, stderr } = await exec(`eslint "${outTsName}"`); 34 | if (stderr) { 35 | throw new Error(stderr); 36 | } 37 | }); 38 | it.concurrent(`Can compile \`ts\` file to \`js\``, async () => { 39 | const { stdout, stderr } = await exec( 40 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 41 | ); 42 | if (stderr) { 43 | throw new Error(stderr); 44 | } 45 | }); 46 | it(`Can specify dependency list`, async () => { 47 | const GRAMMAR_FILE = path.join(ROOT_DIR, 'examples/minimal-with-dep.pegjs'); 48 | const outTsName = path.join(ROOT_DIR, 'output/minimal-with-dep.ts'); 49 | const barSource = path.join(ROOT_DIR, 'examples/bar.ts'); 50 | const barDest = path.join(ROOT_DIR, 'output/bar.ts'); 51 | // Copy the dependency to the output directory 52 | await exec(`cp "${barSource}" "${barDest}"`); 53 | { 54 | // Create the parser 55 | const { stdout, stderr } = await exec( 56 | `npx peggy --plugin "${PLUGIN_PATH}" --dependency foo:./bar -o "${outTsName}" "${GRAMMAR_FILE}"` 57 | ); 58 | if (stderr) { 59 | throw new Error(stderr); 60 | } 61 | } 62 | { 63 | // Compile the parser 64 | const { stdout, stderr } = await exec( 65 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 66 | ); 67 | if (stderr) { 68 | throw new Error(stderr); 69 | } 70 | } 71 | { 72 | // Run the parser 73 | const parserPath = path.join(ROOT_DIR, 'output/minimal-with-dep.js'); 74 | const parser = await import(parserPath); 75 | expect(parser.parse('a')).toEqual('I AM THE CONST FOO'); 76 | } 77 | }); 78 | }); 79 | 80 | /** 81 | * Checks if CLI has been built and errors if it has not. 82 | */ 83 | function ensureCliIsBuilt() { 84 | if (!existsSync(CLI_PATH)) { 85 | throw new Error( 86 | `File "${CLI_PATH}" doesn't exist. You must run \`npm run build\` before executing this test.` 87 | ); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /test/computed-types.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest'; 2 | import * as fs from 'node:fs/promises'; 3 | import { existsSync } from 'node:fs'; 4 | import * as path from 'node:path'; 5 | import { fileURLToPath } from 'node:url'; 6 | import { promisify } from 'node:util'; 7 | import { exec as execNode } from 'node:child_process'; 8 | import peggy from 'peggy'; 9 | 10 | // Local imports 11 | import tspegjs from '../src/tspegjs'; 12 | import { TsPegjsOptions } from '../src/types'; 13 | 14 | const exec = promisify(execNode); 15 | 16 | const EXAMPLES_DIR = fileURLToPath(new URL('../examples', import.meta.url)); 17 | 18 | describe('Automatic type generation', () => { 19 | { 20 | const sampleGrammarName = 'arithmetics.pegjs'; 21 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 22 | it(`Can generate types for \`${sampleGrammarName}\``, async () => { 23 | expect( 24 | await generateParser(grammarFile, `// Arithmetic`, { 25 | Integer: 'number', 26 | Expression: 'number', 27 | Term: 'number', 28 | Factor: 'number' 29 | }) 30 | ).toMatchInlineSnapshot(` 31 | "// Arithmetic 32 | // These types were autogenerated by ts-pegjs 33 | export type Expression = number; 34 | export type Term = number; 35 | export type Factor = number; 36 | export type Integer = number; 37 | export type _ = string[]; 38 | " 39 | `); 40 | expect(await generateParser(grammarFile, `// Arithmetic`, { Term: 'number' })) 41 | .toMatchInlineSnapshot(` 42 | "// Arithmetic 43 | // These types were autogenerated by ts-pegjs 44 | export type Expression = number; 45 | export type Term = number; 46 | export type Factor = Expression | Integer; 47 | export type Integer = number; 48 | export type _ = string[]; 49 | " 50 | `); 51 | }); 52 | } 53 | { 54 | const sampleGrammarName = 'minimal.pegjs'; 55 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 56 | it(`Can generate types for \`${sampleGrammarName}\``, async () => { 57 | expect( 58 | await generateParser(grammarFile, `// Arithmetic`, { 59 | START: 'string' 60 | }) 61 | ).toMatchInlineSnapshot(` 62 | "// Arithmetic 63 | // These types were autogenerated by ts-pegjs 64 | export type START = string; 65 | " 66 | `); 67 | expect(await generateParser(grammarFile, `// Arithmetic`, {})).toMatchInlineSnapshot(` 68 | "// Arithmetic 69 | // These types were autogenerated by ts-pegjs 70 | export type START = \\"a\\" | \\"b\\"; 71 | " 72 | `); 73 | }); 74 | } 75 | { 76 | const sampleGrammarName = 'snake-case-rules.pegjs'; 77 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 78 | it(`Can generate types for \`${sampleGrammarName}\``, async () => { 79 | expect(await generateParser(grammarFile, '', {}, { doNotCamelCaseTypes: true })) 80 | .toMatchInlineSnapshot(` 81 | " 82 | // These types were autogenerated by ts-pegjs 83 | export type start = string | other_rule; 84 | export type other_rule = string; 85 | " 86 | `); 87 | expect(await generateParser(grammarFile, '', {}, { doNotCamelCaseTypes: false })) 88 | .toMatchInlineSnapshot(` 89 | " 90 | // These types were autogenerated by ts-pegjs 91 | export type Start = string | OtherRule; 92 | export type OtherRule = string; 93 | " 94 | `); 95 | // Default behavior is to CamelCase type names. 96 | expect(await generateParser(grammarFile, '', {})).toMatchInlineSnapshot(` 97 | " 98 | // These types were autogenerated by ts-pegjs 99 | export type Start = string | OtherRule; 100 | export type OtherRule = string; 101 | " 102 | `); 103 | }); 104 | } 105 | { 106 | const sampleGrammarName = 'bad-examples-1.pegjs'; 107 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 108 | it(`Can generate types for \`${sampleGrammarName}\``, async () => { 109 | expect(await generateParser(grammarFile, '', {})) 110 | .toMatchInlineSnapshot(` 111 | " 112 | // These types were autogenerated by ts-pegjs 113 | export type WhiteSpace = 114 | | \\"\\\\t\\" 115 | | \\"\\\\u000b\\" 116 | | \\"\\\\f\\" 117 | | \\" \\" 118 | | \\"\\\\u00a0\\" 119 | | \\"\\\\ufeff\\" 120 | | Zs; 121 | export type LineTerminator = string; 122 | export type LineTerminatorSequence = \\"\\\\n\\" | \\"\\\\r\\\\n\\" | \\"\\\\r\\" | \\"\\\\u2028\\" | \\"\\\\u2029\\"; 123 | export type Zs = string; 124 | " 125 | `); 126 | }); 127 | } 128 | }); 129 | 130 | async function generateParser( 131 | inFile: string, 132 | customHeader = '// customHeader a\n// customHeader b', 133 | returnTypes: Record = {}, 134 | additionalOptions: TsPegjsOptions = {} 135 | ) { 136 | if (!existsSync(inFile)) { 137 | throw new Error(`File "${inFile}" doesn't exist. Cannot proceed`); 138 | } 139 | 140 | const source = await fs.readFile(inFile, { encoding: 'utf-8' }); 141 | const parser = peggy.generate(source, { 142 | // @ts-ignore 143 | output: 'source', 144 | //trace: true, 145 | cache: true, 146 | plugins: [tspegjs], 147 | // The Peggy types do not allow extending the config when a plugin is added, so we have to disable ts temporarily 148 | // @ts-ignore-next-line 149 | tspegjs: { 150 | customHeader, 151 | onlyGenerateGrammarTypes: true, 152 | ...additionalOptions 153 | }, 154 | returnTypes 155 | }); 156 | return parser; 157 | } 158 | -------------------------------------------------------------------------------- /test/generate.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest'; 2 | import * as fs from 'node:fs/promises'; 3 | import { existsSync } from 'node:fs'; 4 | import * as path from 'node:path'; 5 | import { fileURLToPath } from 'node:url'; 6 | import { promisify } from 'node:util'; 7 | import { exec as execNode } from 'node:child_process'; 8 | import peggy from 'peggy'; 9 | 10 | // Local imports 11 | import tspegjs from '../src/tspegjs'; 12 | 13 | const exec = promisify(execNode); 14 | 15 | const EXAMPLES_DIR = fileURLToPath(new URL('../examples', import.meta.url)); 16 | const OUTPUT_DIR = fileURLToPath(new URL('../output', import.meta.url)); 17 | 18 | const SAMPLE_GRAMMARS = [ 19 | 'arithmetics.pegjs', 20 | 'json.pegjs', 21 | 'css.pegjs', 22 | 'javascript.pegjs', 23 | 'st.pegjs', 24 | 'bulkOpening.pegjs', 25 | 'minimal.pegjs' 26 | ]; 27 | 28 | describe('Build and lint samples', () => { 29 | for (const sampleGrammarName of SAMPLE_GRAMMARS) { 30 | describe(sampleGrammarName, () => { 31 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 32 | const ext = path.extname(sampleGrammarName); 33 | const outBaseName = path.join(OUTPUT_DIR, sampleGrammarName).slice(0, -ext.length); 34 | const outTsName = outBaseName + '.ts'; 35 | 36 | it(`Can generate parser for \`${sampleGrammarName}\``, async () => { 37 | await generateParser(grammarFile, outTsName); 38 | }); 39 | 40 | it.concurrent(`Generated \`ts\` file passes eslint check`, async () => { 41 | const { stdout, stderr } = await exec(`eslint "${outTsName}"`); 42 | if (stderr) { 43 | throw new Error(stderr); 44 | } 45 | }); 46 | 47 | it.concurrent(`Generated \`ts\` file contains custom header`, async () => { 48 | const source = await fs.readFile(outTsName, { encoding: 'utf-8' }); 49 | expect(source.match(/\/\/ customHeader a/)).toBeTruthy(); 50 | expect(source.match(/\/\/ customHeader b/)).toBeTruthy(); 51 | }); 52 | 53 | it.concurrent(`Can compile \`ts\` file to \`js\``, async () => { 54 | const { stdout, stderr } = await exec( 55 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 56 | ); 57 | if (stderr) { 58 | throw new Error(stderr); 59 | } 60 | }); 61 | }); 62 | } 63 | 64 | describe('Can generate parser for `minimal.pegjs` with custom return type', () => { 65 | const sampleGrammarName = 'minimal.pegjs'; 66 | const grammarFile = path.join(EXAMPLES_DIR, sampleGrammarName); 67 | const ext = path.extname(sampleGrammarName); 68 | const outBaseName = path.join(OUTPUT_DIR, sampleGrammarName).slice(0, -ext.length) + '-typed'; 69 | const outTsName = outBaseName + '.ts'; 70 | it(`Can generate parser for \`${sampleGrammarName}\``, async () => { 71 | await generateParser(grammarFile, outTsName, `// Minimal`, { START: 'string' }); 72 | }); 73 | it.concurrent(`Generated \`ts\` file passes eslint check`, async () => { 74 | const { stdout, stderr } = await exec(`eslint "${outTsName}"`); 75 | if (stderr) { 76 | throw new Error(stderr); 77 | } 78 | }); 79 | it.concurrent(`Generated \`ts\` file contains custom header`, async () => { 80 | const source = await fs.readFile(outTsName, { encoding: 'utf-8' }); 81 | expect(source.match(/\/\/ Minimal/)).toBeTruthy(); 82 | }); 83 | 84 | it.concurrent(`Can compile \`ts\` file to \`js\``, async () => { 85 | const { stdout, stderr } = await exec( 86 | `tsc --target es6 --module commonjs --declaration "${outTsName}"` 87 | ); 88 | if (stderr) { 89 | throw new Error(stderr); 90 | } 91 | }); 92 | }); 93 | }); 94 | 95 | async function generateParser( 96 | inFile: string, 97 | outFile: string, 98 | customHeader = '// customHeader a\n// customHeader b', 99 | returnTypes: Record = {} 100 | ) { 101 | if (!existsSync(inFile)) { 102 | throw new Error(`File "${inFile}" doesn't exist. Cannot proceed`); 103 | } 104 | 105 | const source = await fs.readFile(inFile, { encoding: 'utf-8' }); 106 | const parser = peggy.generate(source, { 107 | output: 'source', 108 | trace: true, 109 | cache: true, 110 | plugins: [tspegjs], 111 | // The Peggy types do not allow extending the config when a plugin is added, so we have to disable ts temporarily 112 | // @ts-ignore-next-line 113 | tspegjs: { 114 | customHeader 115 | }, 116 | returnTypes 117 | }); 118 | await fs.writeFile(outFile, parser, { encoding: 'utf-8' }); 119 | } 120 | -------------------------------------------------------------------------------- /test/genoptions1.json: -------------------------------------------------------------------------------- 1 | { 2 | "tspegjs": {} 3 | } -------------------------------------------------------------------------------- /test/genoptions2.json: -------------------------------------------------------------------------------- 1 | { 2 | "tspegjs": { 3 | "customHeader": "// customHeader a\n// customHeader b" 4 | } 5 | } -------------------------------------------------------------------------------- /test/type-extractor.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import util from 'util'; 3 | import * as peggy from 'peggy'; 4 | import { listRuleNames } from '../src/libs/list-rules'; 5 | import { TypeExtractor } from '../src/libs/type-extractor'; 6 | import { snakeToCamel } from '../src/libs/snake-to-camel'; 7 | 8 | /* eslint-env jest */ 9 | 10 | // Make console.log pretty-print by default 11 | const origLog = console.log; 12 | console.log = (...args) => { 13 | origLog(...args.map((x) => util.inspect(x, false, 10, true))); 14 | }; 15 | 16 | const parse = (source: string) => peggy.generate(source, { output: 'ast' }); 17 | 18 | const SIMPLE_GRAMMAR = ` 19 | // Simple Arithmetics Grammar 20 | // ========================== 21 | // 22 | // Accepts expressions like "2 * (3 + 4)" and computes their value. 23 | 24 | expression 25 | = head:term tail:(_ ("+" / "-") _ term)* { 26 | return tail.reduce(function(result, element) { 27 | if (element[1] === "+") { return result + element[3]; } 28 | if (element[1] === "-") { return result - element[3]; } 29 | }, head); 30 | } 31 | 32 | term 33 | = head:factor tail:(_ ("*" / "/") _ factor)* { 34 | return tail.reduce(function(result, element) { 35 | if (element[1] === "*") { return result * element[3]; } 36 | if (element[1] === "/") { return result / element[3]; } 37 | }, head); 38 | } 39 | 40 | factor 41 | = "(" _ expr:expression _ ")" { return expr; } 42 | / integer 43 | 44 | integer "integer" 45 | = _ [0-9]+ { return parseInt(text(), 10); } 46 | 47 | _ "whitespace" 48 | = [ \\t\\n\\r]* 49 | `; 50 | const SIMPLE_GRAMMAR2 = ` 51 | a = "a" b:b? { return {x:'a', y:b}; } 52 | b = "b" a:a? { return {x:'b', y:a}; } 53 | `; 54 | const SIMPLE_GRAMMAR3 = ` 55 | a = "a" b:b? { return ['a', b]; } 56 | b = "b" a:a? { return ['b', a]; } 57 | `; 58 | const SIMPLE_GRAMMAR3B = ` 59 | a = "a" b:A? { return ['a', b]; } 60 | A = "A" a:a? { return ['A', a]; } 61 | `; 62 | const SIMPLE_GRAMMAR4 = ` 63 | expression 64 | = head:term tail:(_ ("+" / "-") _ @term)* { return {type: "add", contents: [head].concat(tail)} } 65 | 66 | term 67 | = head:factor tail:(_ ("*" / "/") _ @factor)* { return {type: "mul", contents: [head].concat(tail)} } 68 | 69 | factor 70 | = "(" _ expr:expression _ ")" {return expr} 71 | / integer 72 | 73 | integer "integer" 74 | = _ [0-9]+ { return parseInt(text(), 10); } 75 | 76 | _ "whitespace" 77 | = [ \\t\\n\\r]* 78 | `; 79 | 80 | describe('Basic Type Extraction', () => { 81 | it('can parse a minimal grammar', () => { 82 | parse(`start="a"`); 83 | }); 84 | it('can parse a simple grammar', () => { 85 | parse(SIMPLE_GRAMMAR); 86 | }); 87 | it('can list grammar rules', () => { 88 | const grammar = parse(SIMPLE_GRAMMAR); 89 | expect(listRuleNames(grammar)).toEqual(['expression', 'term', 'factor', 'integer', '_']); 90 | }); 91 | it('can generate types for AB grammar', () => { 92 | const typeExtractor = new TypeExtractor(SIMPLE_GRAMMAR2); 93 | typeExtractor.getTypes(); 94 | expect(typeExtractor.typeCache.get('A')).toEqual(`type A = { x: "a"; y: B | null }`); 95 | expect(typeExtractor.typeCache.get('B')).toEqual(`type B = { x: "b"; y: A | null }`); 96 | }); 97 | it('can generate types for AB grammar 2', () => { 98 | const typeExtractor = new TypeExtractor(SIMPLE_GRAMMAR3); 99 | typeExtractor.getTypes(); 100 | expect(typeExtractor.typeCache.get('A')).toEqual(`type A = ["a", B | null]`); 101 | expect(typeExtractor.typeCache.get('B')).toEqual(`type B = ["b", A | null]`); 102 | }); 103 | it('can generate types without renaming rules', () => { 104 | const typeExtractor = new TypeExtractor(SIMPLE_GRAMMAR2, { 105 | camelCaseTypeNames: false 106 | }); 107 | typeExtractor.getTypes(); 108 | expect(typeExtractor.typeCache.get('a')).toEqual(`type a = { x: "a"; y: b | null }`); 109 | expect(typeExtractor.typeCache.get('b')).toEqual(`type b = { x: "b"; y: a | null }`); 110 | }); 111 | it('can generate types without name clash', () => { 112 | const typeExtractor = new TypeExtractor(SIMPLE_GRAMMAR3B); 113 | typeExtractor.getTypes(); 114 | expect(typeExtractor.typeCache.get('a')).toEqual(`type A_1 = ["a", A | null]`); 115 | expect(typeExtractor.typeCache.get('A')).toEqual(`type A = ["A", A_1 | null]`); 116 | }); 117 | it('type of pluck operator `@` computed correctly', () => { 118 | const typeExtractor = new TypeExtractor(`Start = "a" @"b"`); 119 | typeExtractor.getTypes(); 120 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "b"`); 121 | }); 122 | it('can create type from simple return type', () => { 123 | const typeExtractor = new TypeExtractor(`Start = x:"a" { return x; }`); 124 | typeExtractor.getTypes(); 125 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "a"`); 126 | }); 127 | it('can identify a returned string as a type literal', () => { 128 | const typeExtractor = new TypeExtractor(`Start = "a" { return "a"; }`); 129 | typeExtractor.getTypes(); 130 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "a"`); 131 | }); 132 | it('can identify a returned number as a type literal', () => { 133 | const typeExtractor = new TypeExtractor(`Start = "a" { return 7; }`); 134 | typeExtractor.getTypes(); 135 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = 7`); 136 | }); 137 | it('returns the correct type for location()', () => { 138 | const typeExtractor = new TypeExtractor(`Start = "a" { return location(); }`); 139 | typeExtractor.getTypes(); 140 | expect(typeExtractor.typeCache.get('Start')).toEqual( 141 | `type Start = { 142 | source: string | undefined; 143 | start: { offset: number; line: number; column: number }; 144 | end: { offset: number; line: number; column: number }; 145 | }` 146 | ); 147 | }); 148 | it('optional rules return possibly null', () => { 149 | const typeExtractor = new TypeExtractor(`Start = "a"?`); 150 | typeExtractor.getTypes(); 151 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "a" | null`); 152 | }); 153 | it('start rule is exported', () => { 154 | const typeExtractor = new TypeExtractor(`Start = "a"\nEnd = "b"`); 155 | const types = typeExtractor.getTypes(); 156 | expect(types).toEqual( 157 | `// These types were autogenerated by ts-pegjs\nexport type Start = "a";\nexport type End = "b";\n` 158 | ); 159 | }); 160 | it('duplicate rules are removed', () => { 161 | const typeExtractor = new TypeExtractor(`Start = "a" / "a"`); 162 | typeExtractor.getTypes(); 163 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "a"`); 164 | }); 165 | it('semantic predicates return undefined', () => { 166 | const typeExtractor = new TypeExtractor(`Start = "a" & "a"`); 167 | typeExtractor.getTypes(); 168 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = ["a", undefined]`); 169 | }); 170 | it('handles repetition operator', () => { 171 | const typeExtractor = new TypeExtractor(`Start = "a"|4|`); 172 | typeExtractor.getTypes(); 173 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = "a"[]`); 174 | }); 175 | it('renames rules referenced by repetition operator', () => { 176 | const typeExtractor = new TypeExtractor(`Start = b|4|\nb="a"`); 177 | typeExtractor.getTypes(); 178 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = B[]`); 179 | }); 180 | it('can override generated type', () => { 181 | const typeExtractor = new TypeExtractor(`Start = "a"`); 182 | typeExtractor.getTypes({ typeOverrides: { Start: 'string' } }); 183 | expect(typeExtractor.typeCache.get('Start')).toEqual(`type Start = string`); 184 | }); 185 | it('can detect a self-referencing type', () => { 186 | const typeExtractor = new TypeExtractor(`Start = "a" / B\nB = "b" / "(" @Start ")"`); 187 | const types = typeExtractor.getTypes(); 188 | expect(types).toMatchInlineSnapshot(` 189 | "// These types were autogenerated by ts-pegjs 190 | export type Start = \\"a\\" | B; 191 | export type B = \\"b\\" | void; 192 | " 193 | `); 194 | }); 195 | it('can detect a self-referencing type 2', () => { 196 | const typeExtractor = new TypeExtractor(`Start = "a" / B\nB = "b" / "(" @Start ")"\nEnd = "x" / Y\nY = "y" / "(" @End ")"`); 197 | const types = typeExtractor.getTypes(); 198 | expect(types).toMatchInlineSnapshot(` 199 | "// These types were autogenerated by ts-pegjs 200 | export type Start = \\"a\\" | B; 201 | export type B = \\"b\\" | void; 202 | export type End = \\"x\\" | Y; 203 | export type Y = \\"y\\" | void; 204 | " 205 | `); 206 | }); 207 | }); 208 | 209 | describe('Util tests', () => { 210 | it('Can convert snake to camel case', () => { 211 | expect(snakeToCamel('foo')).toEqual('Foo'); 212 | expect(snakeToCamel('foo_bar')).toEqual('FooBar'); 213 | expect(snakeToCamel('foo__bar')).toEqual('FooBar'); 214 | expect(snakeToCamel('_foo_bar')).toEqual('_FooBar'); 215 | expect(snakeToCamel('_Foo_Bar')).toEqual('_FooBar'); 216 | }); 217 | }); 218 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "allowJs": true, 5 | "skipLibCheck": true, 6 | "esModuleInterop": true, 7 | "allowSyntheticDefaultImports": true, 8 | "strict": true, 9 | "strictNullChecks": true, 10 | "noImplicitAny": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "module": "ESNext", 13 | "moduleResolution": "Node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "noEmit": true 17 | }, 18 | "include": ["src"] 19 | } 20 | -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaultSeverity": "error", 3 | "extends": [ 4 | "tslint:recommended" 5 | ], 6 | "jsRules": {}, 7 | "rules": { 8 | "max-classes-per-file": false 9 | }, 10 | "rulesDirectory": [], 11 | "linterOptions": { 12 | "exclude": [ 13 | "output/*d.ts" 14 | ] 15 | } 16 | } -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import viteTsconfigPaths from 'vite-tsconfig-paths'; 2 | import rollupPluginShebang from 'rollup-plugin-add-shebang'; 3 | import { defineConfig } from 'vitest/config'; 4 | import dts from 'vite-plugin-dts'; 5 | 6 | /** 7 | * The main configuration for Vite. This config includes 8 | * a custom plugin to pre-process `html` files when run in development mode. 9 | */ 10 | export default defineConfig({ 11 | plugins: [ 12 | viteTsconfigPaths(), 13 | rollupPluginShebang({ include: ['**/cli.js', '**/cli.mjs'], shebang: '#!/usr/bin/env node' }), 14 | dts({ 15 | exclude: ["./src/cli.ts"] 16 | }), 17 | ], 18 | base: './', 19 | build: { 20 | outDir: 'dist', 21 | minify: false, 22 | lib: { entry: ['./src/cli.ts', './src/tspegjs.ts'], formats: ['es', 'cjs'] }, 23 | rollupOptions: { external: [/^node:/, 'peggy', 'ts-morph', /^prettier/] } 24 | }, 25 | test: { globals: true, testTimeout: 25000 } 26 | }); 27 | --------------------------------------------------------------------------------