├── .eslintrc.js ├── .github └── workflows │ └── on-pull-request.yml ├── .gitignore ├── CHANGELOG ├── LICENSE ├── README.md ├── package-lock.json ├── package.json ├── src ├── grammars │ └── pegjs.peggy ├── libs │ ├── parser.ts │ └── printer.ts ├── prettier-plugin-pegjs.ts ├── standalone.ts ├── tests │ ├── __snapshots__ │ │ └── printer.test.js.snap │ ├── endtoend.test.js │ ├── error.test.js │ ├── grammars │ │ ├── arithmetic.pegjs │ │ ├── css.pegjs │ │ ├── javascript.pegjs │ │ ├── json.pegjs │ │ ├── latex.pegjs │ │ └── pegjs-modified.pegjs │ ├── parser.test.js │ └── printer.test.js └── types │ ├── index.ts │ └── peggy-parser.ts ├── tsconfig.json └── vite.config.ts /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "browser": true, 4 | "es6": true 5 | }, 6 | "extends": "eslint:recommended", 7 | "globals": { 8 | "Atomics": "readonly", 9 | "SharedArrayBuffer": "readonly" 10 | }, 11 | "parserOptions": { 12 | "ecmaVersion": 2018, 13 | "sourceType": "module" 14 | }, 15 | "rules": { 16 | } 17 | }; -------------------------------------------------------------------------------- /.github/workflows/on-pull-request.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | branches: ["master"] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | env: 13 | CI: false 14 | 15 | strategy: 16 | matrix: 17 | node-version: [18.x] 18 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | with: 23 | submodules: recursive 24 | - name: Use Node.js ${{ matrix.node-version }} 25 | uses: actions/setup-node@v3 26 | with: 27 | node-version: ${{ matrix.node-version }} 28 | cache: "npm" 29 | - run: npm ci 30 | 31 | - name: "Production Build" 32 | run: npm run build 33 | 34 | - name: "Tests" 35 | run: npm run test 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # webpack build 13 | /build 14 | 15 | # Runtime data 16 | pids 17 | *.pid 18 | *.seed 19 | *.pid.lock 20 | 21 | # Directory for instrumented libs generated by jscoverage/JSCover 22 | lib-cov 23 | 24 | # Coverage directory used by tools like istanbul 25 | coverage 26 | *.lcov 27 | 28 | # nyc test coverage 29 | .nyc_output 30 | 31 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 32 | .grunt 33 | 34 | # Bower dependency directory (https://bower.io/) 35 | bower_components 36 | 37 | # node-waf configuration 38 | .lock-wscript 39 | 40 | # Compiled binary addons (https://nodejs.org/api/addons.html) 41 | build/Release 42 | 43 | # Dependency directories 44 | node_modules/ 45 | jspm_packages/ 46 | 47 | # TypeScript v1 declaration files 48 | typings/ 49 | 50 | # TypeScript cache 51 | *.tsbuildinfo 52 | 53 | # Optional npm cache directory 54 | .npm 55 | 56 | # Optional eslint cache 57 | .eslintcache 58 | 59 | # Microbundle cache 60 | .rpt2_cache/ 61 | .rts2_cache_cjs/ 62 | .rts2_cache_es/ 63 | .rts2_cache_umd/ 64 | 65 | # Optional REPL history 66 | .node_repl_history 67 | 68 | # Output of 'npm pack' 69 | *.tgz 70 | 71 | # Yarn Integrity file 72 | .yarn-integrity 73 | 74 | # dotenv environment variables file 75 | .env 76 | .env.test 77 | 78 | # parcel-bundler cache (https://parceljs.org/) 79 | .cache 80 | 81 | # Next.js build output 82 | .next 83 | 84 | # Nuxt.js build / generate output 85 | .nuxt 86 | dist 87 | 88 | # Gatsby files 89 | .cache/ 90 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 91 | # https://nextjs.org/blog/next-9-1#public-directory-support 92 | # public 93 | 94 | # vuepress build output 95 | .vuepress/dist 96 | 97 | # Serverless directories 98 | .serverless/ 99 | 100 | # FuseBox cache 101 | .fusebox/ 102 | 103 | # DynamoDB Local files 104 | .dynamodb/ 105 | 106 | # TernJS port file 107 | .tern-port 108 | 109 | experiments 110 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 2.0.3 2 | - Fixes to package.json to make importing easier in esm/cjs environments 3 | 4 | 2.0.0 5 | - Switch to Prettier version 3 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jason Siefken 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prettier-plugin-pegjs 2 | 3 | A prettier plugin for formatting Pegjs grammars. You can try it out online in the [playground](https://siefkenj.github.io/prettier-pegjs-playground/) 4 | 5 | ## Intro 6 | 7 | Prettier is an opinionated code formatter. It enforces a consistent style by parsing your code and re-printing it with its own rules that take the maximum line length into account, wrapping code when necessary. 8 | 9 | This plugin adds support for the Pegjs language to Prettier. 10 | 11 | ### Input 12 | 13 | ``` 14 | Expression = head:Term tail:(_("+"/"-")_ Term) * { 15 | return tail.reduce(function(result, element) {if (element[1] === "+") { return result + element[3]; } 16 | if (element[1] === "-") { return result - element[3]; } 17 | }, head)} 18 | ``` 19 | 20 | ### Output 21 | 22 | ``` 23 | Expression 24 | = head:Term tail:(_ "+" / "-" _ Term)* { 25 | return tail.reduce(function (result, element) { 26 | if (element[1] === "+") { 27 | return result + element[3]; 28 | } 29 | if (element[1] === "-") { 30 | return result - element[3]; 31 | } 32 | }, head); 33 | } 34 | ``` 35 | 36 | ## Install 37 | 38 | yarn: 39 | 40 | ```bash 41 | yarn add --dev prettier prettier-plugin-pegjs 42 | # or globally 43 | yarn global add prettier prettier-plugin-pegjs 44 | ``` 45 | 46 | npm: 47 | 48 | ```bash 49 | npm install --save-dev prettier prettier-plugin-pegjs 50 | # or globally 51 | npm install --global prettier prettier-plugin-pegjs 52 | ``` 53 | 54 | ### Notes on global installation 55 | 56 | As of v3, Prettier no longer automatically searches for plugins, so you must specify plugins 57 | with the `--plugin` option. You can try `--plugin=prettier-plugin-pegjs`. If that doesn't work, 58 | specify `--plugin=path/to/node_modules/prettier-plugin-pegjs/dist/prettier-plugin-pegjs.js` 59 | 60 | ## Use 61 | 62 | ### With Node.js 63 | 64 | If you installed prettier as a local dependency, you can add prettier as a 65 | script in your `package.json`, 66 | 67 | ```json 68 | { 69 | "scripts": { 70 | "prettier": "prettier" 71 | } 72 | } 73 | ``` 74 | 75 | and then run it via 76 | 77 | ```bash 78 | yarn run prettier path/to/grammar.pegjs --write 79 | # or 80 | npm run prettier path/to/grammar.pegjs --write 81 | ``` 82 | 83 | If you installed globally, run 84 | 85 | ```bash 86 | prettier path/to/grammar.pegjs --write 87 | ``` 88 | 89 | ### In the Browser 90 | 91 | This package exposes a `standalone.ts` that wraps prettier and exports a 92 | `printPrettier` function that can be called as 93 | 94 | ```js 95 | printPrettier(YOUR_CODE, { 96 | // example option 97 | tabWidth: 2, 98 | }); 99 | ``` 100 | 101 | ## Options 102 | 103 | The standard Prettier options (such as `tabWidth`) can be used. Additionally, 104 | you may set `actionParser` to specify how the code inside a Pegjs `action` is 105 | printed. `actionParser` can be the parser from any valid Prettier plugin. 106 | It defaults to `"babel-ts"` for Javascript and Typescript, but it could be set 107 | to a different parser if your actions are written in a different language/dialect. 108 | 109 | ## Development 110 | 111 | To make a production build, run 112 | 113 | ``` 114 | npm run build 115 | ``` 116 | 117 | To develop, run 118 | 119 | ``` 120 | npm run watch 121 | ``` 122 | 123 | You can then execute Prettier with 124 | 125 | ``` 126 | prettier --plugin-search-dir=./ ... 127 | ``` 128 | 129 | or 130 | 131 | ``` 132 | prettier --plugin=./build/prettier-plugin-pegjs.js ... 133 | ``` 134 | 135 | and the Pegjs plugin will load from the current directory. 136 | 137 | ### Code structure 138 | 139 | `prettier-plugin-pegjs` uses a Pegjs grammar (located in `grammars/`) 140 | to parse Pegjs grammars! This grammar is slightly modified from Pegjs's 141 | official grammar to include delimiters and strings as AST nodes. 142 | For example, the `=` in `Rule = a / b` is assigned an AST node. 143 | This is so that `prettier-plugin-pegjs` can use Prettier's automatic 144 | comment placement algorithm, which searches through the AST and places comments 145 | based on an AST node's `start` and `end` position. 146 | 147 | `prettier-plugin-pegjs` uses webpack to dynamically compile imported 148 | Pegjs grammars, so they can be used _like_ native ES6 imports, though 149 | of course they are not. 150 | 151 | The plugin is organized as follows: 152 | 153 | - `prettier-plugin-pegjs.ts` This file exports the objects required of a 154 | Prettier plugin. 155 | - `standalone.ts` This file wraps the Prettier parser and pre-loads 156 | `prettier-plugin-pegjs` as a plugin. 157 | - `grammars/pegjs.peggy` The Pegjs grammar that parsers Pegjs/Peggy grammars. 158 | - `libs/parser.ts` The parser which loads a Pegjs-created parser and creates 159 | an AST from a string. 160 | - `libs/printer.ts` Printers take an AST and produce a Doc (the intermediate 161 | format that Prettier uses). This is where most of the details of the plugin lie. 162 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "prettier-plugin-pegjs", 3 | "description": "A Prettier plugin for formatting Pegjs and Peggy-js grammars", 4 | "version": "2.0.3", 5 | "type": "module", 6 | "author": { 7 | "name": "Jason Siefken", 8 | "email": "siefkenj@gmail.com" 9 | }, 10 | "keywords": [ 11 | "pegjs", 12 | "parser", 13 | "prettier" 14 | ], 15 | "main": "dist/prettier-plugin-pegjs.js", 16 | "files": [ 17 | "dist/**/*.js", 18 | "dist/**/*.ts", 19 | "dist/**/*.js.map", 20 | "dist/**/*.cjs", 21 | "dist/**/*.cts", 22 | "dist/**/*.cjs.map" 23 | ], 24 | "exports": { 25 | ".": { 26 | "import": "./dist/prettier-plugin-pegjs.js", 27 | "require": "./dist/prettier-plugin-pegjs.cjs" 28 | }, 29 | "./standalone": { 30 | "import": "./dist/standalone.js", 31 | "require": "./dist/standalone.cjs" 32 | } 33 | }, 34 | "scripts": { 35 | "test": "vitest", 36 | "watch": "vite build --watch", 37 | "build": "vite build" 38 | }, 39 | "dependencies": { 40 | "prettier": "^3.0.2" 41 | }, 42 | "devDependencies": { 43 | "@types/node": "^20.5.6", 44 | "@types/prettier": "^3.0.0", 45 | "eslint": "^8.48.0", 46 | "jsdom": "^22.1.0", 47 | "mkdirp": "3.0.1", 48 | "peggy": "^3.0.2", 49 | "resolve-cwd": "^3.0.0", 50 | "vite": "^4.4.9", 51 | "vite-plugin-dts": "^3.5.2", 52 | "vitest": "^0.34.3" 53 | }, 54 | "repository": { 55 | "type": "git", 56 | "url": "git+https://github.com/siefkenj/prettier-plugin-pegjs.git" 57 | }, 58 | "license": "MIT", 59 | "bugs": { 60 | "url": "https://github.com/siefkenj/prettier-plugin-pegjs/issues" 61 | }, 62 | "homepage": "https://github.com/siefkenj/prettier-plugin-pegjs#readme", 63 | "engines": { 64 | "node": ">= 16.0" 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/grammars/pegjs.peggy: -------------------------------------------------------------------------------- 1 | 2 | // PEG.js Grammar 3 | // ============== 4 | // 5 | // PEG.js grammar syntax is designed to be simple, expressive, and similar to 6 | // JavaScript where possible. This means that many rules, especially in the 7 | // lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some 8 | // are directly taken or adapted from the JavaScript example grammar (see 9 | // examples/javascript.pegjs). 10 | // 11 | // Limitations: 12 | // 13 | // * Non-BMP characters are completely ignored to avoid surrogate pair 14 | // handling. 15 | // 16 | // * One can create identifiers containing illegal characters using Unicode 17 | // escape sequences. For example, "abcd\u0020efgh" is not a valid 18 | // identifier, but it is accepted by the parser. 19 | // 20 | // Both limitations could be resolved, but the costs would likely outweigh 21 | // the benefits. 22 | // 23 | // [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm 24 | 25 | { 26 | // Used as a shorthand property name for `LabeledExpression` 27 | const pick = true; 28 | 29 | // Used by `LabelIdentifier` to disallow the use of certain words as labels 30 | const RESERVED_WORDS = {}; 31 | 32 | // Populate `RESERVED_WORDS` using the optional option `reservedWords` 33 | const reservedWords = options.reservedWords || [ 34 | // Keyword 35 | "break", 36 | "case", 37 | "catch", 38 | "continue", 39 | "debugger", 40 | "default", 41 | "delete", 42 | "do", 43 | "else", 44 | "finally", 45 | "for", 46 | "function", 47 | "if", 48 | "in", 49 | "instanceof", 50 | "new", 51 | "return", 52 | "switch", 53 | "this", 54 | "throw", 55 | "try", 56 | "typeof", 57 | "var", 58 | "void", 59 | "while", 60 | "with", 61 | 62 | // FutureReservedWord 63 | "class", 64 | "const", 65 | "enum", 66 | "export", 67 | "extends", 68 | "implements", 69 | "import", 70 | "interface", 71 | "let", 72 | "package", 73 | "private", 74 | "protected", 75 | "public", 76 | "static", 77 | "super", 78 | "yield", 79 | 80 | // Literal 81 | "false", 82 | "null", 83 | "true", 84 | ]; 85 | if (Array.isArray(reservedWords)) 86 | reservedWords.forEach((word) => { 87 | RESERVED_WORDS[word] = true; 88 | }); 89 | 90 | // Helper to construct a new AST Node 91 | function createNode(type, details) { 92 | const ret = { 93 | type, 94 | loc: location(), 95 | }; 96 | if (details != null) { 97 | Object.assign(ret, details); 98 | } 99 | return ret; 100 | } 101 | 102 | // Used by `addComment` to store comments for the Grammar AST 103 | const comments = []; 104 | 105 | // Helper that collects all the comments to pass to the Grammar AST 106 | function addComment(text, multiline) { 107 | const loc = location(); 108 | 109 | // If there is a node already stored with our starting location, 110 | // we are being processed a second time, so just it 111 | if (comments.find((c) => c.loc.start.offset === loc.start.offset)) { 112 | return text; 113 | } 114 | 115 | comments.push({ 116 | type: "comment", 117 | value: text, 118 | multiline: multiline, 119 | loc, 120 | }); 121 | 122 | return text; 123 | } 124 | console.clear(); 125 | } 126 | 127 | // ---- Syntactic Grammar ----- 128 | 129 | Grammar 130 | = __ 131 | ginitializer:(a:GInitializer __ { return a; })? 132 | initializer:(a:Initializer __ { return a; })? 133 | rules:(a:Rule __ { return a; })+ { 134 | return createNode("grammar", { 135 | ginitializer, 136 | initializer, 137 | rules, 138 | comments, 139 | }); 140 | } 141 | 142 | GInitializer 143 | = code:( 144 | "{" code:CodeBlock "}" { 145 | return createNode("ginitializer", { code }); 146 | } 147 | ) 148 | EOS { return code; } 149 | 150 | Initializer 151 | = code:(code:CodeBlock { return createNode("initializer", { code }); }) 152 | EOS { return code; } 153 | 154 | Rule 155 | = name:Identifier 156 | __ 157 | displayName:( 158 | a:( 159 | a:StringLiteral { 160 | return createNode("stringliteral", { value: a }); 161 | } 162 | ) 163 | __ { return a; } 164 | )? 165 | delimiter:("=" { return createNode("delimiter", { value: "=" }); }) 166 | __ 167 | expression:Expression 168 | EOS { 169 | return createNode("rule", { 170 | name, 171 | displayName, 172 | expression, 173 | delimiter, 174 | }); 175 | } 176 | 177 | Expression = ChoiceExpression 178 | 179 | ChoiceExpression 180 | = head:ActionExpression 181 | tail:( 182 | __ 183 | delimiter:( 184 | "/" { return createNode("delimiter", { value: "/" }); } 185 | ) 186 | __ 187 | a:ActionExpression { return [a, delimiter]; } 188 | )* { 189 | if (tail.length === 0) { 190 | return head; 191 | } 192 | 193 | return createNode("choice", { 194 | alternatives: [head].concat(tail.map((x) => x[0])), 195 | delimiters: tail.map((x) => x[1]), 196 | }); 197 | } 198 | 199 | ActionExpression 200 | = expression:SequenceExpression code:(__ a:CodeBlock { return a; })? { 201 | if (code === null) return expression; 202 | 203 | return createNode("action", { expression, code }); 204 | } 205 | 206 | SequenceExpression 207 | = head:LabeledExpression tail:(__ a:LabeledExpression { return a; })* { 208 | let elements = [head]; 209 | 210 | if (tail.length === 0) { 211 | if (head.type !== "labeled" || !head.pick) return head; 212 | } else { 213 | elements = elements.concat(tail); 214 | } 215 | 216 | return createNode("sequence", { elements }); 217 | } 218 | 219 | LabeledExpression 220 | = "@" label:LabelIdentifier? __ expression:PrefixedExpression { 221 | return createNode("labeled", { pick, label, expression }); 222 | } 223 | / label:LabelIdentifier __ expression:PrefixedExpression { 224 | return createNode("labeled", { label, expression }); 225 | } 226 | / PrefixedExpression 227 | 228 | LabelIdentifier 229 | = name:Identifier __ ":" { 230 | if (RESERVED_WORDS[name] !== true) return name; 231 | 232 | error(`Label can't be a reserved word "${name}".`, location()); 233 | } 234 | 235 | PrefixedExpression 236 | = operator:PrefixedOperator __ expression:SuffixedExpression { 237 | return createNode(operator, { expression }); 238 | } 239 | / SuffixedExpression 240 | 241 | PrefixedOperator 242 | = "$" { return "text"; } 243 | / "&" { return "simple_and"; } 244 | / "!" { return "simple_not"; } 245 | 246 | SuffixedExpression 247 | = expression:PrimaryExpression __ operator:SuffixedOperator { 248 | return createNode(operator, { expression }); 249 | } 250 | / RepeatedExpression 251 | / PrimaryExpression 252 | 253 | SuffixedOperator 254 | = "?" { return "optional"; } 255 | / "*" { return "zero_or_more"; } 256 | / "+" { return "one_or_more"; } 257 | 258 | // Added in Pegg v3 259 | RepeatedExpression 260 | = expression:PrimaryExpression 261 | __ 262 | "|" 263 | __ 264 | boundaries:Boundaries 265 | __ 266 | delimiter:("," __ @Expression __)? 267 | "|" { 268 | return createNode("repeated", { 269 | min: boundaries[0], 270 | max: boundaries[1], 271 | expression, 272 | delimiter, 273 | }); 274 | } 275 | 276 | Boundaries 277 | = min:Boundary? __ ".." __ max:Boundary? { 278 | return [ 279 | min !== null ? min : createNode("constant", { value: 0 }), 280 | max !== null ? max : createNode("constant", { value: null }), 281 | ]; 282 | } 283 | / exact:Boundary { return [null, exact]; } 284 | 285 | Boundary 286 | = value:Integer { return createNode("constant", { value }); } 287 | / value:Identifier { return createNode("variable", { value }); } 288 | / value:CodeBlock { 289 | return createNode("function", { 290 | value: value, 291 | }); 292 | } 293 | 294 | PrimaryExpression 295 | = LiteralMatcher 296 | / CharacterClassMatcher 297 | / AnyMatcher 298 | / RuleReferenceExpression 299 | / SemanticPredicateExpression 300 | / "(" __ e:Expression __ ")" { 301 | // The purpose of the "group" AST node is just to isolate label scope. We 302 | // don't need to put it around nodes that can't contain any labels or 303 | // nodes that already isolate label scope themselves. 304 | if (e.type !== "labeled" && e.type !== "sequence") return e; 305 | 306 | // This leaves us with "labeled" and "sequence". 307 | return createNode("group", { expression: e }); 308 | } 309 | 310 | RuleReferenceExpression 311 | = name:Identifier !(__ (StringLiteral __)? "=") { 312 | return createNode("rule_ref", { name }); 313 | } 314 | 315 | SemanticPredicateExpression 316 | = operator:SemanticPredicateOperator __ code:CodeBlock { 317 | return createNode(operator, { code }); 318 | } 319 | 320 | SemanticPredicateOperator 321 | = "&" { return "semantic_and"; } 322 | / "!" { return "semantic_not"; } 323 | 324 | // ---- Lexical Grammar ----- 325 | 326 | SourceCharacter = . 327 | 328 | WhiteSpace "whitespace" 329 | = "\t" 330 | / "\v" 331 | / "\f" 332 | / " " 333 | / "\u00A0" 334 | / "\uFEFF" 335 | / Zs 336 | 337 | LineTerminator = [\n\r\u2028\u2029] 338 | 339 | LineTerminatorSequence "end of line" 340 | = "\n" 341 | / "\r\n" 342 | / "\r" 343 | / "\u2028" 344 | / "\u2029" 345 | 346 | Comment "comment" 347 | = a:MultiLineComment { 348 | return createNode("comment", { block: true, text: a }); 349 | } 350 | / a:SingleLineComment { 351 | return createNode("comment", { block: true, text: a }); 352 | } 353 | 354 | MultiLineComment 355 | = "/*" comment:$(!"*/" SourceCharacter)* "*/" { 356 | return addComment(comment, true); 357 | } 358 | 359 | MultiLineCommentNoLineTerminator 360 | = "/*" comment:$(!("*/" / LineTerminator) SourceCharacter)* "*/" { 361 | return addComment(comment, true); 362 | } 363 | 364 | SingleLineComment 365 | = "//" comment:$(!LineTerminator SourceCharacter)* { 366 | return addComment(comment, false); 367 | } 368 | 369 | Identifier "identifier" 370 | = head:IdentifierStart tail:IdentifierPart* { return head + tail.join(""); } 371 | 372 | IdentifierStart 373 | = UnicodeLetter 374 | / "$" 375 | / "_" 376 | / "\\" a:UnicodeEscapeSequence { return a; } 377 | 378 | IdentifierPart 379 | = IdentifierStart 380 | / UnicodeCombiningMark 381 | / UnicodeDigit 382 | / UnicodeConnectorPunctuation 383 | / "\u200C" 384 | / "\u200D" 385 | 386 | UnicodeLetter 387 | = Lu 388 | / Ll 389 | / Lt 390 | / Lm 391 | / Lo 392 | / Nl 393 | 394 | UnicodeCombiningMark 395 | = Mn 396 | / Mc 397 | 398 | UnicodeDigit = Nd 399 | 400 | UnicodeConnectorPunctuation = Pc 401 | 402 | LiteralMatcher "literal" 403 | = value:StringLiteral ignoreCase:"i"? { 404 | return createNode("literal", { 405 | value: value, 406 | ignoreCase: ignoreCase !== null, 407 | }); 408 | } 409 | 410 | StringLiteral "string" 411 | = "\"" chars:DoubleStringCharacter* "\"" { return chars.join(""); } 412 | / "'" chars:SingleStringCharacter* "'" { return chars.join(""); } 413 | 414 | DoubleStringCharacter 415 | = !("\"" / "\\" / LineTerminator) a:SourceCharacter { return a; } 416 | / "\\" a:EscapeSequence { return "\\" + a; } 417 | / LineContinuation 418 | 419 | SingleStringCharacter 420 | = !("'" / "\\" / LineTerminator) a:SourceCharacter { return a; } 421 | / "\\" a:EscapeSequence { return "\\" + a; } 422 | / LineContinuation 423 | 424 | CharacterClassMatcher "character class" 425 | = "[" inverted:"^"? parts:CharacterPart* "]" ignoreCase:"i"? { 426 | return createNode("class", { 427 | parts: parts.filter((part) => part !== ""), 428 | inverted: inverted !== null, 429 | ignoreCase: ignoreCase !== null, 430 | }); 431 | } 432 | 433 | CharacterPart 434 | = ClassCharacterRange 435 | / ClassCharacter 436 | 437 | ClassCharacterRange 438 | = begin:ClassCharacter "-" end:ClassCharacter { 439 | if (begin.charCodeAt(0) > end.charCodeAt(0)) 440 | error("Invalid character range: " + text() + "."); 441 | 442 | return [begin, end]; 443 | } 444 | 445 | ClassCharacter 446 | = !("]" / "\\" / LineTerminator) a:SourceCharacter { return a; } 447 | / "\\" a:EscapeSequence { return "\\" + a; } 448 | / LineContinuation 449 | 450 | LineContinuation = "\\" LineTerminatorSequence { return ""; } 451 | 452 | EscapeSequence 453 | = CharacterEscapeSequence 454 | / "0" !DecimalDigit { return "0"; } 455 | / HexEscapeSequence 456 | / UnicodeEscapeSequence 457 | 458 | CharacterEscapeSequence 459 | = SingleEscapeCharacter 460 | / NonEscapeCharacter 461 | 462 | SingleEscapeCharacter 463 | = "'" 464 | / "\"" 465 | / "\\" 466 | / "b" 467 | / "f" 468 | / "n" 469 | / "r" 470 | / "t" 471 | / "v" 472 | 473 | NonEscapeCharacter 474 | = !(EscapeCharacter / LineTerminator) a:SourceCharacter { return a; } 475 | 476 | EscapeCharacter 477 | = SingleEscapeCharacter 478 | / DecimalDigit 479 | / "x" 480 | / "u" 481 | 482 | HexEscapeSequence = "x" digits:$(HexDigit HexDigit) { return "x" + digits; } 483 | 484 | UnicodeEscapeSequence 485 | = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) { return "u" + digits; } 486 | 487 | DecimalDigit = [0-9] 488 | 489 | HexDigit = [0-9a-f]i 490 | 491 | AnyMatcher = "." { return createNode("any"); } 492 | 493 | CodeBlock "code block" 494 | = "{" a:Code "}" { return a; } 495 | / "{" { error("Unbalanced brace."); } 496 | 497 | Code = $((![{}] SourceCharacter)+ / "{" Code "}")* 498 | 499 | Integer = digits:$DecimalDigit+ { return parseInt(digits, 10); } 500 | 501 | // Unicode Character Categories 502 | // 503 | // Extracted from the following Unicode Character Database file: 504 | // 505 | // http://www.unicode.org/Public/11.0.0/ucd/extracted/DerivedGeneralCategory.txt 506 | // 507 | // Unix magic used: 508 | // 509 | // grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters 510 | // cut -f1 -d " " | # Extract code points 511 | // grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters 512 | // sed -e 's/\.\./-/' | # Adjust formatting 513 | // sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting 514 | // tr -d '\n' # Join lines 515 | // 516 | // ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one 517 | // at the time of writing. 518 | // 519 | // Non-BMP characters are completely ignored to avoid surrogate pair handling 520 | // (detecting surrogate pairs isn't possible with a simple character class and 521 | // other methods would degrade performance). I don't consider it a big deal as 522 | // even parsers in JavaScript engines of common browsers seem to ignore them. 523 | 524 | // Letter, Lowercase 525 | Ll 526 | = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] 527 | 528 | // Letter, Modifier 529 | Lm 530 | = [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uFF70\uFF9E-\uFF9F] 531 | 532 | // Letter, Other 533 | Lo 534 | = [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05EF-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u0860-\u086A\u08A0-\u08B4\u08B6-\u08BD\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u09FC\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FEF\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC] 535 | 536 | // Letter, Titlecase 537 | Lt 538 | = [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC] 539 | 540 | // Letter, Uppercase 541 | Lu 542 | = [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uFF21-\uFF3A] 543 | 544 | // Mark, Spacing Combining 545 | Mc 546 | = [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF2-\u1CF3\u1CF7\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BD-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC] 547 | 548 | // Mark, Nonspacing 549 | Mn 550 | = [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D3-\u08E1\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u09FE\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0AFA-\u0AFF\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C04\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D00-\u0D01\u0D3B-\u0D3C\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u1885-\u1886\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DF9\u1DFB-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA8C4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F] 551 | 552 | // Number, Decimal Digit 553 | Nd 554 | = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19] 555 | 556 | // Number, Letter 557 | Nl 558 | = [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF] 559 | 560 | // Punctuation, Connector 561 | Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] 562 | 563 | // Separator, Space 564 | Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] 565 | 566 | // Skipped 567 | 568 | __ = (WhiteSpace / LineTerminatorSequence / Comment)* 569 | 570 | _ = (WhiteSpace / MultiLineCommentNoLineTerminator)* 571 | 572 | // Automatic Semicolon Insertion 573 | 574 | EOS 575 | = (__ ";")+ 576 | / _ SingleLineComment? LineTerminatorSequence 577 | / __ EOF 578 | 579 | EOF = !. 580 | -------------------------------------------------------------------------------- /src/libs/parser.ts: -------------------------------------------------------------------------------- 1 | // This file needs to be here because typescript does not know how to use babel's transpiler 2 | // to directly load Pegjs grammars. 3 | // @ts-nocheck 4 | import * as _PeggyParser from "../grammars/pegjs.peggy"; 5 | import { Grammar } from "./types"; 6 | 7 | type PeggyParser = { 8 | parse: (input: string) => Grammar; 9 | SyntaxError: ( 10 | message: string, 11 | expected: string, 12 | found: unknown, 13 | location: unknown, 14 | ) => unknown; 15 | }; 16 | 17 | const PeggyParser = _PeggyParser as PeggyParser; 18 | const parse = PeggyParser.parse; 19 | 20 | export { PeggyParser, parse }; 21 | -------------------------------------------------------------------------------- /src/libs/printer.ts: -------------------------------------------------------------------------------- 1 | import type { AstPath, Printer } from "prettier"; 2 | import { util } from "prettier"; 3 | import { builders, utils } from "prettier/doc"; 4 | import { AstNode, Comment } from "../types"; 5 | 6 | type Doc = builders.Doc; 7 | type PrinterPrint = Printer["print"]; 8 | type PrinterEmbed = NonNullable["embed"]>; 9 | type PrinterComment = NonNullable["printComment"]>; 10 | 11 | // Commands to build the prettier syntax tree 12 | const { 13 | group, 14 | //fill, 15 | //ifBreak, 16 | line, 17 | softline, 18 | hardline, 19 | //lineSuffix, 20 | //lineSuffixBoundary, 21 | indent, 22 | join, 23 | //markAsRoot, 24 | breakParent, 25 | } = builders; 26 | 27 | function wrapInParenGroup(doc: Doc): Doc { 28 | return group(["(", indent([softline, doc]), softline, ")"]); 29 | } 30 | 31 | const SEMANTIC_SUFFIX_MAP = { 32 | semantic_and: "&", 33 | semantic_not: "!", 34 | } as const; 35 | 36 | const PREFIX_MAP = { 37 | text: "$", 38 | simple_and: "&", 39 | simple_not: "!", 40 | } as const; 41 | 42 | const SUFFIX_MAP = { 43 | optional: "?", 44 | zero_or_more: "*", 45 | one_or_more: "+", 46 | repeated: "|..|", 47 | } as const; 48 | 49 | function isPrefixOperator(node: AstNode) { 50 | return node.type in PREFIX_MAP; 51 | } 52 | 53 | function isSuffixOperator(node: AstNode) { 54 | return node.type in SUFFIX_MAP; 55 | } 56 | 57 | function hasCodeBlock(node: AstNode) { 58 | return [ 59 | "action", 60 | "semantic_and", 61 | "semantic_not", 62 | "initializer", 63 | "ginitializer", 64 | "function", 65 | ].includes(node.type); 66 | } 67 | 68 | /** 69 | * Returns true if `node.expression` should be wrapped in 70 | * parens to avoid potential confusion (e.g., because 71 | * the reader has forgotten the precedence of operations) 72 | * 73 | * @param {*} node 74 | */ 75 | function nodeExpressionNeedsWrapping(node: AstNode) { 76 | if (!("expression" in node) || !node.expression) { 77 | return false; 78 | } 79 | // Most of the time we want to wrap expressions like `&foo?` in 80 | // parenthesis like `&(foo?)`. The exceptions are `$foo*`, etc., whose meaning 81 | // should be clear 82 | if ( 83 | isPrefixOperator(node) && 84 | node.type !== "text" && 85 | isSuffixOperator(node.expression) 86 | ) { 87 | return true; 88 | } 89 | if (node.type === "labeled" && isSuffixOperator(node.expression)) { 90 | // Suffix operators will wrap their arguments in parenthesis if needed 91 | // so we don't need to wrap them in another set 92 | return false; 93 | } 94 | // A suffix operator with a prefix/suffix operator child must have parens. 95 | // E.g. `($"x"+)?` otherwise there may be two suffix operators that appear in a row! 96 | if ( 97 | isSuffixOperator(node) && 98 | (isPrefixOperator(node.expression) || isSuffixOperator(node.expression)) 99 | ) { 100 | return true; 101 | } 102 | // Normally `labeled` expressions are wrapped in parens, but 103 | // if they are part of a choice, we don't want them wrapped. 104 | // For example `a:Rule {return a}` should *not* become 105 | // `(a:Rule) {return a}`. 106 | if (node.type === "action" && node.expression.type === "labeled") { 107 | return false; 108 | } 109 | if (["choice", "labeled", "action"].includes(node.expression.type)) { 110 | return true; 111 | } 112 | return false; 113 | } 114 | 115 | // The signature of this function is determined by the Prettier 116 | // plugin API. 117 | export const printPegjsAst: PrinterPrint = (path, options, print) => { 118 | const node = path.node; 119 | if (!node) { 120 | console.warn("Got `undefined` node while printing"); 121 | return ""; 122 | } 123 | 124 | const type = node.type; 125 | switch (type) { 126 | case "grammar": { 127 | // This is the root node of a Pegjs grammar 128 | // A `hardline` is inserted at the end so that any trailing comments 129 | // are printed 130 | const body: Doc[] = [ 131 | join([hardline, hardline], path.map(print, "rules")), 132 | hardline, 133 | ]; 134 | 135 | if (node.initializer) { 136 | body.unshift( 137 | path.call(print, "initializer"), 138 | hardline, 139 | hardline, 140 | ); 141 | } 142 | 143 | if (node.ginitializer) { 144 | body.unshift( 145 | path.call(print, "ginitializer"), 146 | hardline, 147 | hardline, 148 | ); 149 | } 150 | 151 | return body; 152 | } 153 | case "rule": { 154 | const lhs: Doc[] = [node.name]; 155 | if (node.displayName) { 156 | lhs.push(" ", path.call(print, "displayName")); 157 | } 158 | 159 | const rhs = [ 160 | line, 161 | path.call(print, "delimiter"), 162 | " ", 163 | path.call(print, "expression"), 164 | ]; 165 | return group(lhs.concat(indent(rhs))); 166 | } 167 | case "rule_ref": 168 | return node.name; 169 | 170 | // This is a string a quoted string. E.g., literally `"abc"` 171 | case "stringliteral": 172 | return util.makeString(node.value, '"'); 173 | 174 | case "delimiter": 175 | return node.value; 176 | 177 | case "any": 178 | return "."; 179 | 180 | case "choice": { 181 | const rhs = path.map(print, "alternatives"); 182 | if (rhs.length === 0) { 183 | return ""; 184 | } 185 | // Delimiters (i.e., "/") are theoretically all the same, 186 | // but they may have comments surrounding them. To preserve these 187 | // comments, we actually print them. 188 | const delimiters = path.map(print, "delimiters"); 189 | 190 | const body = [rhs[0]]; 191 | for (let i = 0; i < delimiters.length; i++) { 192 | body.push(line, delimiters[i], " ", rhs[i + 1]); 193 | } 194 | 195 | const parent = path.getParentNode(); 196 | if (parent && parent.type === "rule") { 197 | // Rules are the top-level objects of a grammar. If we are the child 198 | // of a rule, we want to line-break no matter what. 199 | body.push(breakParent); 200 | } 201 | 202 | return body; 203 | } 204 | case "literal": 205 | if (node.ignoreCase) { 206 | return [util.makeString(node.value, '"'), "i"]; 207 | } 208 | return util.makeString(node.value, '"'); 209 | 210 | case "group": 211 | return wrapInParenGroup(path.call(print, "expression")); 212 | 213 | case "sequence": { 214 | let body = path.map(print, "elements"); 215 | // Any `action` or `choice` that appears in a sequence needs to 216 | // be wrapped in parens. 217 | body = body.map((printed, i) => { 218 | const child = node.elements[i]; 219 | if (["action", "choice"].includes(child.type)) { 220 | return wrapInParenGroup(printed); 221 | } 222 | return printed; 223 | }); 224 | return group(indent(join(line, body))); 225 | } 226 | 227 | case "labeled": { 228 | const label = node.label; 229 | let rhs = path.call(print, "expression"); 230 | if (nodeExpressionNeedsWrapping(node)) { 231 | rhs = wrapInParenGroup(rhs); 232 | } 233 | let lhs = []; 234 | if (node.pick) { 235 | lhs.push("@"); 236 | } 237 | if (label) { 238 | lhs.push(label, ":"); 239 | } 240 | return [...lhs, rhs]; 241 | } 242 | // suffix operators 243 | case "optional": 244 | case "zero_or_more": 245 | case "one_or_more": { 246 | const suffix = SUFFIX_MAP[node.type]; 247 | const body = path.call(print, "expression"); 248 | if (nodeExpressionNeedsWrapping(node)) { 249 | return [wrapInParenGroup(body), suffix]; 250 | } 251 | return [body, suffix]; 252 | } 253 | // prefix operators 254 | case "text": 255 | case "simple_and": 256 | case "simple_not": { 257 | const prefix = PREFIX_MAP[node.type]; 258 | if (nodeExpressionNeedsWrapping(node)) { 259 | return [ 260 | prefix, 261 | wrapInParenGroup(path.call(print, "expression")), 262 | ]; 263 | } 264 | return [prefix, path.call(print, "expression")]; 265 | } 266 | // Things in square brackets (e.g. `[a-zUVW]`) 267 | case "class": { 268 | const prefix = node.inverted ? "^" : ""; 269 | const suffix = node.ignoreCase ? "i" : ""; 270 | const lhs = node.parts.map((part) => { 271 | if (Array.isArray(part)) { 272 | return part.join("-"); 273 | } 274 | return part; 275 | }); 276 | 277 | return ["[", prefix, ...lhs, "]", suffix]; 278 | } 279 | case "repeated": { 280 | let body = path.call(print, "expression"); 281 | if (nodeExpressionNeedsWrapping(node)) { 282 | body = wrapInParenGroup(body); 283 | } 284 | let min = node.min != null ? path.call(print, "min") : ""; 285 | if (min === "0") { 286 | // A minimum value of zero is the same as not listing an explicit minimum at all. 287 | min = ""; 288 | } 289 | const max = node.max != null ? path.call(print, "max") : ""; 290 | let range: Doc[] = [min, "..", max]; 291 | if (node.min == null) { 292 | range = [max]; 293 | } 294 | if (node.min == null && node.max == null) { 295 | range = [".."]; 296 | } 297 | let delim: Doc[] = []; 298 | if (node.delimiter) { 299 | delim.push(",", " ", path.call(print, "delimiter")); 300 | } 301 | return [body, "|", ...range, ...delim, "|"]; 302 | } 303 | case "constant": 304 | case "variable": 305 | return node.value != null ? String(node.value) : ""; 306 | 307 | case "function": 308 | case "initializer": 309 | case "ginitializer": 310 | case "action": 311 | case "comment": 312 | case "semantic_and": 313 | case "semantic_not": 314 | console.warn( 315 | `Encountered node of type "${type}"; this type of node should have been processed by its parent. If you're seeing this, please report an issue on Github.`, 316 | ); 317 | return ""; 318 | 319 | default: { 320 | const unmatchedType: void = type; 321 | console.warn( 322 | `Found node with unknown type '${unmatchedType}'`, 323 | JSON.stringify(node), 324 | ); 325 | } 326 | } 327 | 328 | throw new Error(`Could not find printer for node ${JSON.stringify(node)}`); 329 | }; 330 | 331 | /** 332 | * This is called by Prettier whenever a comment is to be printed. 333 | * Comments are stored outside of the AST, but Prettier will make its best guess 334 | * about which node a comment "belongs to". The return Doc of this function 335 | * is inserted in the appropriate place. 336 | * 337 | * @param {*} commentPath 338 | * @param {*} options 339 | */ 340 | export const printComment: PrinterComment = (commentPath) => { 341 | const comment = commentPath.node as Comment; 342 | 343 | if (comment.multiline) { 344 | return ["/*", comment.value, "*/"]; 345 | } 346 | return ["//", comment.value]; 347 | }; 348 | 349 | /** 350 | * Used to print embedded javascript codeblocks. This function 351 | * is called on every node. If `null` is returned, the Pegjs 352 | * printer is used. Otherwise, `textToDoc` can be used to select a 353 | * different one. 354 | */ 355 | export const embed: PrinterEmbed = (path: AstPath, options) => { 356 | return async (textToDoc, print) => { 357 | const node = path.node; 358 | if (!hasCodeBlock(node)) { 359 | // Returning null tells Prettier to use the default printer 360 | // (in this case, the Pegjs printer) 361 | return undefined; 362 | } 363 | 364 | /** 365 | * Format code, and wrap it in `{ }` or `{{ }}` 366 | * 367 | * @param {string} code - text of the embedded code to format. 368 | * @param {boolean} double - whether to use single or double braces 369 | */ 370 | async function wrapCode(code: string, double = false): Promise { 371 | // By default, prettier will add a hardline at the end of a parsed document. 372 | // We don't want this hardline in embedded code. 373 | const parser = (options as any).actionParser || "babel-ts"; 374 | try { 375 | const formatted = utils.stripTrailingHardline( 376 | await textToDoc(code, { parser }), 377 | ); 378 | return group([ 379 | double ? "{{" : "{", 380 | indent([line, formatted]), 381 | line, 382 | double ? "}}" : "}", 383 | ]); 384 | } catch (e: any) { 385 | console.warn( 386 | `Could not process the following code with the '${parser}' parser, so leaving unformatted. Code:`, 387 | JSON.stringify(code), 388 | `Error message:`, 389 | e.message, 390 | ); 391 | return [double ? "{{" : "{", code, double ? "}}" : "}"]; 392 | } 393 | } 394 | 395 | let prefix, body; 396 | switch (node.type) { 397 | case "action": 398 | body = path.call(print, "expression"); 399 | if (nodeExpressionNeedsWrapping(node)) { 400 | body = wrapInParenGroup(body); 401 | } 402 | body = [body, indent([" ", await wrapCode(node.code)])]; 403 | return body; 404 | case "semantic_and": 405 | case "semantic_not": 406 | prefix = SEMANTIC_SUFFIX_MAP[node.type]; 407 | return [prefix, indent([" ", await wrapCode(node.code)])]; 408 | case "function": 409 | return wrapCode(node.value); 410 | case "initializer": 411 | return wrapCode(node.code); 412 | case "ginitializer": 413 | return wrapCode(node.code, true); 414 | default: 415 | return undefined; 416 | } 417 | }; 418 | }; 419 | -------------------------------------------------------------------------------- /src/prettier-plugin-pegjs.ts: -------------------------------------------------------------------------------- 1 | import { SupportOption, util } from "prettier"; 2 | import { parse } from "./libs/parser"; 3 | import { printPegjsAst, printComment, embed } from "./libs/printer"; 4 | import type { Printer } from "prettier"; 5 | import { AstNode } from "./types"; 6 | 7 | type PrinterComments = NonNullable; 8 | type PrinterOwnLine = NonNullable; 9 | type PrinterEndOfLine = NonNullable; 10 | type PrinterRemaining = NonNullable; 11 | 12 | /** 13 | * This is called by prettier whenever it detects a comment on its own 14 | * line (i.e., only whitespace before/after). We have a chance to change what 15 | * node the comment is assigned to. 16 | * 17 | * @returns {boolean} `true` if the comment was handled, `false` to pass through to Prettier's default handler 18 | */ 19 | const handleOwnLineComment: PrinterOwnLine = ( 20 | comment, 21 | text, 22 | options, 23 | ast, 24 | isLastComment, 25 | ): boolean => { 26 | if ((options as any).debugComments && comment.value.includes("debug")) { 27 | console.log("handleOwnLineComment", comment); 28 | } 29 | 30 | const { enclosingNode, precedingNode, followingNode } = comment; 31 | 32 | if (enclosingNode && enclosingNode.type === "choice") { 33 | // If the comment is on its own line in a `choice` block, 34 | // It is probably meant to describe one of the choices. In this case, 35 | // it should be rendered *before* the node it is currently attached to. 36 | // I.e., at the end of the previous node. 37 | if (precedingNode) { 38 | util.addTrailingComment(precedingNode, comment); 39 | return true; 40 | } 41 | } 42 | if (enclosingNode && enclosingNode.type === "rule") { 43 | if (followingNode && followingNode.type === "delimiter") { 44 | // We have a comment before the `=`. For example, 45 | // abc 46 | // // my comment 47 | // = some rule 48 | util.addLeadingComment(followingNode, comment); 49 | return true; 50 | } 51 | } 52 | return false; 53 | }; 54 | 55 | /** 56 | * This is called by prettier whenever it detects a comment at the end of a line 57 | * (i.e., there is some non-whitespace on the same line as the comment, but nothing 58 | * after the comment). We have a chance to change what 59 | * node the comment is assigned to. 60 | * 61 | * @returns {boolean} `true` if the comment was handled, `false` to pass through to Prettier's default handler 62 | */ 63 | const handleEndOfLineComment: PrinterEndOfLine = ( 64 | comment, 65 | text, 66 | options, 67 | ast, 68 | isLastComment, 69 | ): boolean => { 70 | if ((options as any).debugComments && comment.value.includes("debug")) { 71 | console.log("handleEndOfLineComment", comment); 72 | } 73 | 74 | const { enclosingNode, precedingNode, followingNode } = comment; 75 | 76 | if ( 77 | enclosingNode && 78 | (enclosingNode.type === "choice" || enclosingNode.type === "rule") 79 | ) { 80 | if ( 81 | !comment.multiline && 82 | precedingNode && 83 | precedingNode.type === "delimiter" 84 | ) { 85 | // Single-line comments that come directly after a delimiter should really 86 | // be printed *before* that delimiter. 87 | util.addLeadingComment(precedingNode, comment); 88 | return true; 89 | } 90 | 91 | if ( 92 | !comment.multiline && 93 | followingNode && 94 | followingNode.type === "stringliteral" 95 | ) { 96 | // Reformat rules like 97 | // a //some comment 98 | // "label" = y 99 | // to 100 | // a "label" // some comment 101 | // = y 102 | util.addTrailingComment(followingNode, comment); 103 | return true; 104 | } 105 | } 106 | return false; 107 | }; 108 | 109 | /** 110 | * This is called by prettier whenever it finds a comment that it cannot classify 111 | * as `ownLine` or `endOfLine`. We have a chance to change what 112 | * node the comment is assigned to. 113 | * @returns {boolean} `true` if the comment was handled, `false` to pass through to Prettier's default handler 114 | */ 115 | const handleRemainingComment: PrinterRemaining = ( 116 | comment, 117 | text, 118 | options, 119 | ast, 120 | isLastComment, 121 | ): boolean => { 122 | if ((options as any).debugComments && comment.value.includes("debug")) { 123 | console.log("handleRemainingComment", comment); 124 | } 125 | return false; 126 | }; 127 | 128 | export const languages = [ 129 | { 130 | name: "pegjs", 131 | extensions: [".pegjs", ".peggy"], 132 | parsers: ["pegjs-parser"], 133 | }, 134 | ]; 135 | 136 | export const parsers = { 137 | "pegjs-parser": { 138 | parse, 139 | astFormat: "pegjs-ast", 140 | locStart: (node: AstNode) => 141 | (node.loc || { start: { offset: 0 } }).start.offset, 142 | locEnd: (node: AstNode) => 143 | (node.loc || { end: { offset: 0 } }).end.offset, 144 | }, 145 | }; 146 | 147 | export const printers = { 148 | "pegjs-ast": { 149 | print: printPegjsAst, 150 | embed, 151 | canAttachComment: (node: AstNode) => 152 | node && node.type && node.type !== "comment", 153 | isBlockComment: (node: AstNode) => 154 | node && node.type === "comment" && node.multiline === true, 155 | printComment, 156 | handleComments: { 157 | ownLine: handleOwnLineComment, 158 | endOfLine: handleEndOfLineComment, 159 | remaining: handleRemainingComment, 160 | }, 161 | }, 162 | }; 163 | 164 | export const options: Record = { 165 | actionParser: { 166 | type: "string", 167 | category: "Global", 168 | default: "babel-ts", 169 | description: "The parser to use for the content of Pegjs actions", 170 | }, 171 | }; 172 | 173 | export const defaultOptions = { 174 | tabWidth: 2, 175 | actionParser: "babel-ts", 176 | }; 177 | 178 | export default { languages, parsers, printers, options }; 179 | -------------------------------------------------------------------------------- /src/standalone.ts: -------------------------------------------------------------------------------- 1 | // 2 | // This file contains a standalone parser which includes Prettier 3 | // 4 | import * as Prettier from "prettier/standalone"; 5 | import * as prettierPluginPegjs from "./prettier-plugin-pegjs"; 6 | 7 | // Because we're importing the standalone version of prettier, we also have 8 | // to import the specific plugins we want 9 | 10 | import * as babelPlugin from "prettier/plugins/babel"; 11 | import * as estreePlugin from "prettier/plugins/estree"; 12 | import { parsers as _ } from "prettier/plugins/babel"; 13 | import type { Options, Plugin } from "prettier"; 14 | 15 | /** 16 | * Format `source` code using Prettier to format/render 17 | * the code. 18 | * 19 | * @export 20 | * @param [source=""] - code to be formatted 21 | * @param [options={}] - Prettier options object (you can set `printWidth` here) 22 | * @returns formatted code 23 | */ 24 | async function printPrettier( 25 | source = "", 26 | options: Options & { actionParser?: string } = {}, 27 | ) { 28 | // Load the prettier and babel plugins, but also allow 29 | // other plugins to be passed in. 30 | const plugins = options.plugins || []; 31 | plugins.push(prettierPluginPegjs, babelPlugin, estreePlugin); //, estreePlugin); 32 | 33 | return await Prettier.format(source, { 34 | printWidth: 80, 35 | useTabs: false, 36 | tabWidth: 2, 37 | ...options, 38 | parser: "pegjs-parser", 39 | plugins, 40 | }); 41 | } 42 | 43 | export { Prettier, printPrettier, prettierPluginPegjs }; 44 | -------------------------------------------------------------------------------- /src/tests/__snapshots__/printer.test.js.snap: -------------------------------------------------------------------------------- 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html 2 | 3 | exports[`Printer > Issue 18 nested optional concat 1`] = ` 4 | "start = ($\\"x\\"+)? 5 | " 6 | `; 7 | 8 | exports[`Printer > Issue 18 nested optional concat 2`] = ` 9 | "start = $(\\"x\\"+)? 10 | " 11 | `; 12 | 13 | exports[`Printer > Print repetition rules 1`] = ` 14 | "start = \\"a\\"|..| 15 | " 16 | `; 17 | 18 | exports[`Printer > Print repetition rules 2`] = ` 19 | "start = \\"a\\"|2..| 20 | " 21 | `; 22 | 23 | exports[`Printer > Print repetition rules 3`] = ` 24 | "start = \\"a\\"|2..5| 25 | " 26 | `; 27 | 28 | exports[`Printer > Print repetition rules 4`] = ` 29 | "start = \\"a\\"|..5| 30 | " 31 | `; 32 | 33 | exports[`Printer > Print repetition rules 5`] = ` 34 | "start = \\"a\\"|2..5, \\"x\\"| 35 | " 36 | `; 37 | 38 | exports[`Printer > Print repetition rules 6`] = ` 39 | "start = \\"a\\"|5| 40 | " 41 | `; 42 | 43 | exports[`Printer > Print repetition rules 7`] = ` 44 | "start = x:\\"a\\"|{ return parseInt(x, 10); }| 45 | " 46 | `; 47 | 48 | exports[`Printer > Print repetition rules 8`] = ` 49 | "start = x:\\"a\\"|{ return parseInt(x, 10); }..7| 50 | " 51 | `; 52 | 53 | exports[`Printer > Print repetition with suffix operator 1`] = ` 54 | "start = (\\"a\\"|..|)? 55 | " 56 | `; 57 | 58 | exports[`Printer > Print repetition with suffix operator 2`] = ` 59 | "start = (\\"a\\"|..|)|..| 60 | " 61 | `; 62 | 63 | exports[`Printer > Print repetition with suffix operator 3`] = ` 64 | "start = (\\"a\\" \\"b\\")|..| 65 | " 66 | `; 67 | 68 | exports[`Printer > Print typescript action 1`] = ` 69 | "start = \\"a\\" { return 5 as any; } 70 | " 71 | `; 72 | 73 | exports[`Printer > Prints grammars with actions 1`] = ` 74 | "start = (a:\\"a\\") & { return a === \\"a\\"; } 75 | " 76 | `; 77 | 78 | exports[`Printer > Prints grammars with actions 2`] = ` 79 | "start = (a:\\"a\\")? & { return a === \\"a\\"; } 80 | " 81 | `; 82 | 83 | exports[`Printer > Prints grammars with actions 3`] = ` 84 | "start = (a:\\"a\\")* & { return a === \\"a\\"; } 85 | " 86 | `; 87 | 88 | exports[`Printer > Prints grammars with actions 4`] = ` 89 | "start = (a:\\"a\\")+ & { return a === \\"a\\"; } 90 | " 91 | `; 92 | 93 | exports[`Printer > Prints grammars with actions 5`] = ` 94 | "start = $(a:\\"a\\") & { return a === \\"a\\"; } 95 | " 96 | `; 97 | 98 | exports[`Printer > Prints grammars with actions 6`] = ` 99 | "start = &(a:\\"a\\") \\"a\\" & { return a === \\"a\\"; } 100 | " 101 | `; 102 | 103 | exports[`Printer > Prints grammars with actions 7`] = ` 104 | "start = !(a:\\"a\\") \\"b\\" & { return a === \\"a\\"; } 105 | " 106 | `; 107 | 108 | exports[`Printer > Prints grammars with actions 8`] = ` 109 | "start = b:(a:\\"a\\") & { return a === \\"a\\"; } 110 | " 111 | `; 112 | 113 | exports[`Printer > Prints grammars with actions 9`] = ` 114 | "start = (\\"a\\" b:\\"b\\" \\"c\\") & { return b === \\"b\\"; } 115 | " 116 | `; 117 | 118 | exports[`Printer > Prints grammars with actions 10`] = ` 119 | "start = (a:\\"a\\" { return a; }) & { return a === \\"a\\"; } 120 | " 121 | `; 122 | 123 | exports[`Printer > Prints grammars with actions 11`] = ` 124 | "start = (\\"a\\" / b:\\"b\\" / \\"c\\") & { return b === \\"b\\"; } 125 | " 126 | `; 127 | 128 | exports[`Printer > Prints grammars with actions 12`] = ` 129 | "start = (a:\\"a\\") ! { return a !== \\"a\\"; } 130 | " 131 | `; 132 | 133 | exports[`Printer > Prints grammars with actions 13`] = ` 134 | "start = (a:\\"a\\")? ! { return a !== \\"a\\"; } 135 | " 136 | `; 137 | 138 | exports[`Printer > Prints grammars with actions 14`] = ` 139 | "start = (a:\\"a\\")* ! { return a !== \\"a\\"; } 140 | " 141 | `; 142 | 143 | exports[`Printer > Prints grammars with actions 15`] = ` 144 | "start = (a:\\"a\\")+ ! { return a !== \\"a\\"; } 145 | " 146 | `; 147 | 148 | exports[`Printer > Prints grammars with actions 16`] = ` 149 | "start = $(a:\\"a\\") ! { return a !== \\"a\\"; } 150 | " 151 | `; 152 | 153 | exports[`Printer > Prints grammars with actions 17`] = ` 154 | "start = &(a:\\"a\\") \\"a\\" ! { return a !== \\"a\\"; } 155 | " 156 | `; 157 | 158 | exports[`Printer > Prints grammars with actions 18`] = ` 159 | "start = !(a:\\"a\\") \\"b\\" ! { return a !== \\"a\\"; } 160 | " 161 | `; 162 | 163 | exports[`Printer > Prints grammars with actions 19`] = ` 164 | "start = b:(a:\\"a\\") ! { return a !== \\"a\\"; } 165 | " 166 | `; 167 | 168 | exports[`Printer > Prints grammars with actions 20`] = ` 169 | "start = (\\"a\\" b:\\"b\\" \\"c\\") ! { return b !== \\"b\\"; } 170 | " 171 | `; 172 | 173 | exports[`Printer > Prints grammars with actions 21`] = ` 174 | "start = (a:\\"a\\" { return a; }) ! { return a !== \\"a\\"; } 175 | " 176 | `; 177 | 178 | exports[`Printer > Prints grammars with actions 22`] = ` 179 | "start = (\\"a\\" / b:\\"b\\" / \\"c\\") ! { return b !== \\"b\\"; } 180 | " 181 | `; 182 | 183 | exports[`Printer > Prints grammars with actions 23`] = ` 184 | "start = (a:\\"a\\") { return a; } 185 | " 186 | `; 187 | 188 | exports[`Printer > Prints grammars with actions 24`] = ` 189 | "start = (a:\\"a\\")? { return a; } 190 | " 191 | `; 192 | 193 | exports[`Printer > Prints grammars with actions 25`] = ` 194 | "start = (a:\\"a\\")* { return a; } 195 | " 196 | `; 197 | 198 | exports[`Printer > Prints grammars with actions 26`] = ` 199 | "start = (a:\\"a\\")+ { return a; } 200 | " 201 | `; 202 | 203 | exports[`Printer > Prints grammars with actions 27`] = ` 204 | "start = $(a:\\"a\\") { return a; } 205 | " 206 | `; 207 | 208 | exports[`Printer > Prints grammars with actions 28`] = ` 209 | "start = &(a:\\"a\\") \\"a\\" { return a; } 210 | " 211 | `; 212 | 213 | exports[`Printer > Prints grammars with actions 29`] = ` 214 | "start = !(a:\\"a\\") \\"b\\" { return a; } 215 | " 216 | `; 217 | 218 | exports[`Printer > Prints grammars with actions 30`] = ` 219 | "start = b:(a:\\"a\\") { return a; } 220 | " 221 | `; 222 | 223 | exports[`Printer > Prints grammars with actions 31`] = ` 224 | "start = (\\"a\\" b:\\"b\\" \\"c\\") { return b; } 225 | " 226 | `; 227 | 228 | exports[`Printer > Prints grammars with actions 32`] = ` 229 | "start = (a:\\"a\\" { return a; }) { return a; } 230 | " 231 | `; 232 | 233 | exports[`Printer > Prints grammars with actions 33`] = ` 234 | "start = (\\"a\\" / b:\\"b\\" / \\"c\\") { return b; } 235 | " 236 | `; 237 | 238 | exports[`Printer > Prints grammars with comments 1`] = ` 239 | "start 240 | // a comment 241 | = a 242 | / b 243 | " 244 | `; 245 | 246 | exports[`Printer > Prints grammars with comments 2`] = ` 247 | "start 248 | = a // a comment 249 | / b 250 | " 251 | `; 252 | 253 | exports[`Printer > Prints grammars with comments 3`] = ` 254 | "start 255 | = a 256 | // a comment 257 | / b 258 | " 259 | `; 260 | 261 | exports[`Printer > Prints grammars with comments 4`] = ` 262 | "start 263 | = a 264 | / b 265 | // a comment 266 | " 267 | `; 268 | 269 | exports[`Printer > Prints grammars with comments 5`] = ` 270 | "// a comment 271 | start 272 | = a 273 | / b 274 | " 275 | `; 276 | 277 | exports[`Printer > Prints grammars with comments 6`] = ` 278 | "start 279 | /*inline comment*/ = a 280 | / b 281 | " 282 | `; 283 | 284 | exports[`Printer > Prints grammars with comments 7`] = ` 285 | "start 286 | = a 287 | / /*inline comment*/ b 288 | " 289 | `; 290 | 291 | exports[`Printer > Prints grammars with comments 8`] = ` 292 | "start 293 | = a 294 | / x /*inline comment*/ b 295 | " 296 | `; 297 | 298 | exports[`Printer > Prints grammars with comments 9`] = ` 299 | "start /*inline comment*/ \\"Start Label\\" 300 | = a 301 | / b 302 | " 303 | `; 304 | 305 | exports[`Printer > Prints grammars with initializer 1`] = ` 306 | "{ console.log(\\"initializing\\"); } 307 | 308 | Rule 309 | = a 310 | / b 311 | / c 312 | " 313 | `; 314 | 315 | exports[`Printer > Prints grammars with initializer 2`] = ` 316 | "{ console.log(\\"initializing\\"); } 317 | 318 | Rule 319 | = a 320 | / b 321 | / c 322 | " 323 | `; 324 | 325 | exports[`Printer > Prints grammars with initializer 3`] = ` 326 | "{{ console.log(\\"initializing global\\"); }} 327 | 328 | Rule 329 | = a 330 | / b 331 | / c 332 | " 333 | `; 334 | 335 | exports[`Printer > Prints grammars with initializer 4`] = ` 336 | "{{ console.log(\\"initializing global\\"); }} 337 | 338 | Rule 339 | = a 340 | / b 341 | / c 342 | " 343 | `; 344 | 345 | exports[`Printer > Prints grammars with initializer 5`] = ` 346 | "{{ console.log(\\"initializing global\\"); }} 347 | 348 | { console.log(\\"initializing local\\"); } 349 | 350 | Rule 351 | = a 352 | / b 353 | / c 354 | " 355 | `; 356 | 357 | exports[`Printer > Prints grammars with initializer 6`] = ` 358 | "{{ console.log(\\"initializing global\\"); }} 359 | 360 | { console.log(\\"initializing local\\"); } 361 | 362 | Rule 363 | = a 364 | / b 365 | / c 366 | " 367 | `; 368 | 369 | exports[`Printer > Prints grammars without actions 1`] = ` 370 | "Rule 371 | = a 372 | / b 373 | / c 374 | " 375 | `; 376 | 377 | exports[`Printer > Prints grammars without actions 2`] = ` 378 | "Rule = a 379 | " 380 | `; 381 | 382 | exports[`Printer > Prints grammars without actions 3`] = ` 383 | "Rule = [a-zA-Z] 384 | " 385 | `; 386 | 387 | exports[`Printer > Prints grammars without actions 4`] = ` 388 | "Rule = a* 389 | " 390 | `; 391 | 392 | exports[`Printer > Prints grammars without actions 5`] = ` 393 | "Rule = (a / b)? 394 | " 395 | `; 396 | 397 | exports[`Printer > Prints grammars without actions 6`] = ` 398 | "Rule = (a / b / c)? 399 | " 400 | `; 401 | 402 | exports[`Printer > Prints grammars without actions 7`] = ` 403 | "Rule = (a / (b / c)+)? 404 | " 405 | `; 406 | 407 | exports[`Printer > Prints grammars without actions 8`] = ` 408 | "Rule = $(a / (b / c)+)? 409 | " 410 | `; 411 | 412 | exports[`Printer > Prints grammars without actions 9`] = ` 413 | "Rule = $(a / (b / c)+)? 414 | 415 | OtherRule = Rule &\\"q\\" 416 | " 417 | `; 418 | -------------------------------------------------------------------------------- /src/tests/endtoend.test.js: -------------------------------------------------------------------------------- 1 | import util from "util"; 2 | import fs from "fs/promises"; 3 | import path from "path"; 4 | import * as peggy from "peggy"; 5 | import { printPrettier } from "../standalone"; 6 | import Prettier from "prettier"; 7 | 8 | /* eslint-env jest */ 9 | /* global __dirname */ 10 | 11 | // Make console.log pretty-print by default 12 | export const origLog = console.log; 13 | console.log = (...args) => { 14 | origLog(...args.map((x) => util.inspect(x, false, 10, true))); 15 | }; 16 | 17 | describe("End to end", () => { 18 | // 19 | // We compile a grammar with Pegjs, then pretty print it and compile it 20 | // again and make sure the results match. Since the pretty printing shouldn't 21 | // change the AST, the results should always match. 22 | // 23 | // Dynamically generate tests for each of the listed files. 24 | for (const file of [ 25 | "arithmetic.pegjs", 26 | "pegjs-modified.pegjs", 27 | "javascript.pegjs", 28 | "latex.pegjs", 29 | "json.pegjs", 30 | "css.pegjs", 31 | ]) { 32 | it(`Doesn't change the grammar of ${file}`, async () => { 33 | const originalGrammar = ( 34 | await fs.readFile(path.join(__dirname, "./grammars/", file)) 35 | ).toString(); 36 | const prettyGrammar = await printPrettier(originalGrammar); 37 | 38 | let originalParser = peggy.generate(originalGrammar, { 39 | output: "source", 40 | }); 41 | let prettyParser = peggy.generate(prettyGrammar, { 42 | output: "source", 43 | }); 44 | 45 | // We run the parsers through prettier again to normalize the output 46 | originalParser = Prettier.format(originalParser, { 47 | parser: "babel", 48 | }); 49 | prettyParser = Prettier.format(prettyParser, { parser: "babel" }); 50 | 51 | expect(originalParser).toEqual(prettyParser); 52 | }); 53 | it(`Doesn't change formatting of ${file}`, async () => { 54 | const originalGrammar = ( 55 | await fs.readFile(path.join(__dirname, "./grammars/", file)) 56 | ).toString(); 57 | const prettyGrammar = await printPrettier(originalGrammar); 58 | 59 | expect(prettyGrammar).toEqual(originalGrammar); 60 | }); 61 | } 62 | }); 63 | -------------------------------------------------------------------------------- /src/tests/error.test.js: -------------------------------------------------------------------------------- 1 | import util from "util"; 2 | 3 | import * as pegjsParser from "../libs/parser"; 4 | import { printPrettier } from "../standalone"; 5 | 6 | /* eslint-env jest */ 7 | 8 | // Make console.log pretty-print by default 9 | export const origLog = console.log; 10 | console.log = (...args) => { 11 | origLog(...args.map((x) => util.inspect(x, false, 10, true))); 12 | }; 13 | 14 | describe("Test grammars with errors", () => { 15 | it("Fails on invalid PEG grammar", () => { 16 | expect(() => pegjsParser.parse('a \n = "a')).toThrow(); 17 | }); 18 | 19 | it("Succeeds on invalid javascript", () => { 20 | expect(() => pegjsParser.parse('a \n = "a" {const 7}')).not.toThrow(); 21 | }); 22 | 23 | it("Invalid javascript is left unformatted", async () => { 24 | const origWarn = console.warn; 25 | let warnings = 0; 26 | // Mock `console.warn` so that its output doesn't clutter up the test output 27 | global.console.warn = () => { 28 | ++warnings; 29 | }; 30 | expect( 31 | async () => await printPrettier('a \n = "a" { const 7}'), 32 | ).not.toThrow(); 33 | expect(await printPrettier('a \n = "a" { const 7}')).toEqual( 34 | 'a = "a" { const 7}\n', 35 | ); 36 | expect(warnings).toEqual(2); 37 | 38 | global.console.warn = origWarn; 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /src/tests/grammars/arithmetic.pegjs: -------------------------------------------------------------------------------- 1 | // Simple Arithmetics Grammar 2 | // ========================== 3 | // 4 | // Accepts expressions like "2 * (3 + 4)" and computes their value. 5 | 6 | Expression 7 | = head:Term tail:(_ ("+" / "-") _ Term)* { 8 | return tail.reduce(function (result, element) { 9 | if (element[1] === "+") { 10 | return result + element[3]; 11 | } 12 | if (element[1] === "-") { 13 | return result - element[3]; 14 | } 15 | }, head); 16 | } 17 | 18 | Term 19 | = head:Factor tail:(_ ("*" / "/") _ Factor)* { 20 | return tail.reduce(function (result, element) { 21 | if (element[1] === "*") { 22 | return result * element[3]; 23 | } 24 | if (element[1] === "/") { 25 | return result / element[3]; 26 | } 27 | }, head); 28 | } 29 | 30 | Factor 31 | = "(" _ expr:Expression _ ")" { return expr; } 32 | / Integer 33 | 34 | Integer "integer" = _ [0-9]+ { return parseInt(text(), 10); } 35 | 36 | _ "whitespace" = [ \t\n\r]* 37 | -------------------------------------------------------------------------------- /src/tests/grammars/css.pegjs: -------------------------------------------------------------------------------- 1 | // CSS Grammar 2 | // =========== 3 | // 4 | // Based on grammar from CSS 2.1 specification [1] (including the errata [2]). 5 | // Generated parser builds a syntax tree composed of nested JavaScript objects, 6 | // vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not 7 | // expressive enough (e.g. selectors are reflected as text, not structured 8 | // objects) and somewhat cumbersome. 9 | // 10 | // Limitations: 11 | // 12 | // * Many errors which should be recovered from according to the specification 13 | // (e.g. malformed declarations or unexpected end of stylesheet) are fatal. 14 | // This is a result of straightforward rewrite of the CSS grammar to PEG.js. 15 | // 16 | // [1] http://www.w3.org/TR/2011/REC-CSS2-20110607 17 | // [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html 18 | // [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html 19 | 20 | { 21 | function extractOptional(optional, index) { 22 | return optional ? optional[index] : null; 23 | } 24 | 25 | function extractList(list, index) { 26 | return list.map(function (element) { 27 | return element[index]; 28 | }); 29 | } 30 | 31 | function buildList(head, tail, index) { 32 | return [head].concat(extractList(tail, index)).filter(function (element) { 33 | return element !== null; 34 | }); 35 | } 36 | 37 | function buildExpression(head, tail) { 38 | return tail.reduce(function (result, element) { 39 | return { 40 | type: "Expression", 41 | operator: element[0], 42 | left: result, 43 | right: element[1], 44 | }; 45 | }, head); 46 | } 47 | } 48 | 49 | start = stylesheet:stylesheet comment* { return stylesheet; } 50 | 51 | // ----- G.1 Grammar ----- 52 | 53 | stylesheet 54 | = charset:(CHARSET_SYM STRING ";")? 55 | (S / CDO / CDC)* 56 | imports:(import (CDO S* / CDC S*)*)* 57 | rules:((ruleset / media / page) (CDO S* / CDC S*)*)* { 58 | return { 59 | type: "StyleSheet", 60 | charset: extractOptional(charset, 1), 61 | imports: extractList(imports, 0), 62 | rules: extractList(rules, 0), 63 | }; 64 | } 65 | 66 | import 67 | = IMPORT_SYM S* href:(STRING / URI) S* media:media_list? ";" S* { 68 | return { 69 | type: "ImportRule", 70 | href: href, 71 | media: media !== null ? media : [], 72 | }; 73 | } 74 | 75 | media 76 | = MEDIA_SYM S* media:media_list "{" S* rules:ruleset* "}" S* { 77 | return { 78 | type: "MediaRule", 79 | media: media, 80 | rules: rules, 81 | }; 82 | } 83 | 84 | media_list 85 | = head:medium tail:("," S* medium)* { return buildList(head, tail, 2); } 86 | 87 | medium = name:IDENT S* { return name; } 88 | 89 | page 90 | = PAGE_SYM 91 | S* 92 | selector:pseudo_page? 93 | "{" 94 | S* 95 | declarationsHead:declaration? 96 | declarationsTail:(";" S* declaration?)* 97 | "}" 98 | S* { 99 | return { 100 | type: "PageRule", 101 | selector: selector, 102 | declarations: buildList(declarationsHead, declarationsTail, 2), 103 | }; 104 | } 105 | 106 | pseudo_page 107 | = ":" value:IDENT S* { return { type: "PseudoSelector", value: value }; } 108 | 109 | operator 110 | = "/" S* { return "/"; } 111 | / "," S* { return ","; } 112 | 113 | combinator 114 | = "+" S* { return "+"; } 115 | / ">" S* { return ">"; } 116 | 117 | property = name:IDENT S* { return name; } 118 | 119 | ruleset 120 | = selectorsHead:selector 121 | selectorsTail:("," S* selector)* 122 | "{" 123 | S* 124 | declarationsHead:declaration? 125 | declarationsTail:(";" S* declaration?)* 126 | "}" 127 | S* { 128 | return { 129 | type: "RuleSet", 130 | selectors: buildList(selectorsHead, selectorsTail, 2), 131 | declarations: buildList(declarationsHead, declarationsTail, 2), 132 | }; 133 | } 134 | 135 | selector 136 | = left:simple_selector S* combinator:combinator right:selector { 137 | return { 138 | type: "Selector", 139 | combinator: combinator, 140 | left: left, 141 | right: right, 142 | }; 143 | } 144 | / left:simple_selector S+ right:selector { 145 | return { 146 | type: "Selector", 147 | combinator: " ", 148 | left: left, 149 | right: right, 150 | }; 151 | } 152 | / selector:simple_selector S* { return selector; } 153 | 154 | simple_selector 155 | = element:element_name qualifiers:(id / class / attrib / pseudo)* { 156 | return { 157 | type: "SimpleSelector", 158 | element: element, 159 | qualifiers: qualifiers, 160 | }; 161 | } 162 | / qualifiers:(id / class / attrib / pseudo)+ { 163 | return { 164 | type: "SimpleSelector", 165 | element: "*", 166 | qualifiers: qualifiers, 167 | }; 168 | } 169 | 170 | id = id:HASH { return { type: "IDSelector", id: id }; } 171 | 172 | class = "." class_:IDENT { return { type: "ClassSelector", class: class_ }; } 173 | 174 | element_name 175 | = IDENT 176 | / "*" 177 | 178 | attrib 179 | = "[" 180 | S* 181 | attribute:IDENT 182 | S* 183 | operatorAndValue:(("=" / INCLUDES / DASHMATCH) S* (IDENT / STRING) S*)? 184 | "]" { 185 | return { 186 | type: "AttributeSelector", 187 | attribute: attribute, 188 | operator: extractOptional(operatorAndValue, 0), 189 | value: extractOptional(operatorAndValue, 2), 190 | }; 191 | } 192 | 193 | pseudo 194 | = ":" 195 | value:( 196 | name:FUNCTION S* params:(IDENT S*)? ")" { 197 | return { 198 | type: "Function", 199 | name: name, 200 | params: params !== null ? [params[0]] : [], 201 | }; 202 | } 203 | / IDENT 204 | ) { return { type: "PseudoSelector", value: value }; } 205 | 206 | declaration 207 | = name:property ":" S* value:expr prio:prio? { 208 | return { 209 | type: "Declaration", 210 | name: name, 211 | value: value, 212 | important: prio !== null, 213 | }; 214 | } 215 | 216 | prio = IMPORTANT_SYM S* 217 | 218 | expr = head:term tail:(operator? term)* { return buildExpression(head, tail); } 219 | 220 | term 221 | = quantity:(PERCENTAGE / LENGTH / EMS / EXS / ANGLE / TIME / FREQ / NUMBER) 222 | S* { 223 | return { 224 | type: "Quantity", 225 | value: quantity.value, 226 | unit: quantity.unit, 227 | }; 228 | } 229 | / value:STRING S* { return { type: "String", value: value }; } 230 | / value:URI S* { return { type: "URI", value: value }; } 231 | / function 232 | / hexcolor 233 | / value:IDENT S* { return { type: "Ident", value: value }; } 234 | 235 | function 236 | = name:FUNCTION S* params:expr ")" S* { 237 | return { type: "Function", name: name, params: params }; 238 | } 239 | 240 | hexcolor = value:HASH S* { return { type: "Hexcolor", value: value }; } 241 | 242 | // ----- G.2 Lexical scanner ----- 243 | 244 | // Macros 245 | 246 | h = [0-9a-f]i 247 | 248 | nonascii = [\x80-\uFFFF] 249 | 250 | unicode 251 | = "\\" digits:$(h h? h? h? h? h?) ("\r\n" / [ \t\r\n\f])? { 252 | return String.fromCharCode(parseInt(digits, 16)); 253 | } 254 | 255 | escape 256 | = unicode 257 | / "\\" ch:[^\r\n\f0-9a-f]i { return ch; } 258 | 259 | nmstart 260 | = [_a-z]i 261 | / nonascii 262 | / escape 263 | 264 | nmchar 265 | = [_a-z0-9-]i 266 | / nonascii 267 | / escape 268 | 269 | string1 270 | = "\"" chars:([^\n\r\f\\"] / "\\" nl:nl { return ""; } / escape)* "\"" { 271 | return chars.join(""); 272 | } 273 | 274 | string2 275 | = "'" chars:([^\n\r\f\\'] / "\\" nl:nl { return ""; } / escape)* "'" { 276 | return chars.join(""); 277 | } 278 | 279 | comment = "/*" [^*]* "*"+ ([^/*] [^*]* "*"+)* "/" 280 | 281 | ident 282 | = prefix:$"-"? start:nmstart chars:nmchar* { 283 | return prefix + start + chars.join(""); 284 | } 285 | 286 | name = chars:nmchar+ { return chars.join(""); } 287 | 288 | num 289 | = [+-]? ([0-9]* "." [0-9]+ / [0-9]+) ("e" [+-]? [0-9]+)? { 290 | return parseFloat(text()); 291 | } 292 | 293 | string 294 | = string1 295 | / string2 296 | 297 | url = chars:([!#$%&*-\[\]-~] / nonascii / escape)* { return chars.join(""); } 298 | 299 | s = [ \t\r\n\f]+ 300 | 301 | w = s? 302 | 303 | nl 304 | = "\n" 305 | / "\r\n" 306 | / "\r" 307 | / "\f" 308 | 309 | A 310 | = "a"i 311 | / "\\" "0"? "0"? "0"? "0"? [\x41\x61] ("\r\n" / [ \t\r\n\f])? { return "a"; } 312 | 313 | C 314 | = "c"i 315 | / "\\" "0"? "0"? "0"? "0"? [\x43\x63] ("\r\n" / [ \t\r\n\f])? { return "c"; } 316 | 317 | D 318 | = "d"i 319 | / "\\" "0"? "0"? "0"? "0"? [\x44\x64] ("\r\n" / [ \t\r\n\f])? { return "d"; } 320 | 321 | E 322 | = "e"i 323 | / "\\" "0"? "0"? "0"? "0"? [\x45\x65] ("\r\n" / [ \t\r\n\f])? { return "e"; } 324 | 325 | G 326 | = "g"i 327 | / "\\" "0"? "0"? "0"? "0"? [\x47\x67] ("\r\n" / [ \t\r\n\f])? 328 | / "\\g"i { return "g"; } 329 | 330 | H 331 | = "h"i 332 | / "\\" "0"? "0"? "0"? "0"? [\x48\x68] ("\r\n" / [ \t\r\n\f])? 333 | / "\\h"i { return "h"; } 334 | 335 | I 336 | = "i"i 337 | / "\\" "0"? "0"? "0"? "0"? [\x49\x69] ("\r\n" / [ \t\r\n\f])? 338 | / "\\i"i { return "i"; } 339 | 340 | K 341 | = "k"i 342 | / "\\" "0"? "0"? "0"? "0"? [\x4b\x6b] ("\r\n" / [ \t\r\n\f])? 343 | / "\\k"i { return "k"; } 344 | 345 | L 346 | = "l"i 347 | / "\\" "0"? "0"? "0"? "0"? [\x4c\x6c] ("\r\n" / [ \t\r\n\f])? 348 | / "\\l"i { return "l"; } 349 | 350 | M 351 | = "m"i 352 | / "\\" "0"? "0"? "0"? "0"? [\x4d\x6d] ("\r\n" / [ \t\r\n\f])? 353 | / "\\m"i { return "m"; } 354 | 355 | N 356 | = "n"i 357 | / "\\" "0"? "0"? "0"? "0"? [\x4e\x6e] ("\r\n" / [ \t\r\n\f])? 358 | / "\\n"i { return "n"; } 359 | 360 | O 361 | = "o"i 362 | / "\\" "0"? "0"? "0"? "0"? [\x4f\x6f] ("\r\n" / [ \t\r\n\f])? 363 | / "\\o"i { return "o"; } 364 | 365 | P 366 | = "p"i 367 | / "\\" "0"? "0"? "0"? "0"? [\x50\x70] ("\r\n" / [ \t\r\n\f])? 368 | / "\\p"i { return "p"; } 369 | 370 | R 371 | = "r"i 372 | / "\\" "0"? "0"? "0"? "0"? [\x52\x72] ("\r\n" / [ \t\r\n\f])? 373 | / "\\r"i { return "r"; } 374 | 375 | S_ 376 | = "s"i 377 | / "\\" "0"? "0"? "0"? "0"? [\x53\x73] ("\r\n" / [ \t\r\n\f])? 378 | / "\\s"i { return "s"; } 379 | 380 | T 381 | = "t"i 382 | / "\\" "0"? "0"? "0"? "0"? [\x54\x74] ("\r\n" / [ \t\r\n\f])? 383 | / "\\t"i { return "t"; } 384 | 385 | U 386 | = "u"i 387 | / "\\" "0"? "0"? "0"? "0"? [\x55\x75] ("\r\n" / [ \t\r\n\f])? 388 | / "\\u"i { return "u"; } 389 | 390 | X 391 | = "x"i 392 | / "\\" "0"? "0"? "0"? "0"? [\x58\x78] ("\r\n" / [ \t\r\n\f])? 393 | / "\\x"i { return "x"; } 394 | 395 | Z 396 | = "z"i 397 | / "\\" "0"? "0"? "0"? "0"? [\x5a\x7a] ("\r\n" / [ \t\r\n\f])? 398 | / "\\z"i { return "z"; } 399 | 400 | // Tokens 401 | 402 | S "whitespace" = comment* s 403 | 404 | CDO "" = comment* "-->" 407 | 408 | INCLUDES "~=" = comment* "~=" 409 | 410 | DASHMATCH "|=" = comment* "|=" 411 | 412 | STRING "string" = comment* string:string { return string; } 413 | 414 | IDENT "identifier" = comment* ident:ident { return ident; } 415 | 416 | HASH "hash" = comment* "#" name:name { return "#" + name; } 417 | 418 | IMPORT_SYM "@import" = comment* "@" I M P O R T 419 | 420 | PAGE_SYM "@page" = comment* "@" P A G E 421 | 422 | MEDIA_SYM "@media" = comment* "@" M E D I A 423 | 424 | CHARSET_SYM "@charset" = comment* "@charset " 425 | 426 | // We use |s| instead of |w| here to avoid infinite recursion. 427 | IMPORTANT_SYM "!important" = comment* "!" (s / comment)* I M P O R T A N T 428 | 429 | EMS "length" = comment* value:num E M { return { value: value, unit: "em" }; } 430 | 431 | EXS "length" = comment* value:num E X { return { value: value, unit: "ex" }; } 432 | 433 | LENGTH "length" 434 | = comment* value:num P X { return { value: value, unit: "px" }; } 435 | / comment* value:num C M { return { value: value, unit: "cm" }; } 436 | / comment* value:num M M { return { value: value, unit: "mm" }; } 437 | / comment* value:num I N { return { value: value, unit: "in" }; } 438 | / comment* value:num P T { return { value: value, unit: "pt" }; } 439 | / comment* value:num P C { return { value: value, unit: "pc" }; } 440 | 441 | ANGLE "angle" 442 | = comment* value:num D E G { return { value: value, unit: "deg" }; } 443 | / comment* value:num R A D { return { value: value, unit: "rad" }; } 444 | / comment* value:num G R A D { return { value: value, unit: "grad" }; } 445 | 446 | TIME "time" 447 | = comment* value:num M S_ { return { value: value, unit: "ms" }; } 448 | / comment* value:num S_ { return { value: value, unit: "s" }; } 449 | 450 | FREQ "frequency" 451 | = comment* value:num H Z { return { value: value, unit: "hz" }; } 452 | / comment* value:num K H Z { return { value: value, unit: "kh" }; } 453 | 454 | PERCENTAGE "percentage" 455 | = comment* value:num "%" { return { value: value, unit: "%" }; } 456 | 457 | NUMBER "number" = comment* value:num { return { value: value, unit: null }; } 458 | 459 | URI "uri" 460 | = comment* U R L "("i w url:string w ")" { return url; } 461 | / comment* U R L "("i w url:url w ")" { return url; } 462 | 463 | FUNCTION "function" = comment* name:ident "(" { return name; } 464 | -------------------------------------------------------------------------------- /src/tests/grammars/javascript.pegjs: -------------------------------------------------------------------------------- 1 | // JavaScript Grammar 2 | // ================== 3 | // 4 | // Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a 5 | // syntax tree compatible with the ESTree spec [2]. 6 | // 7 | // Limitations: 8 | // 9 | // * Non-BMP characters are completely ignored to avoid surrogate pair 10 | // handling. 11 | // 12 | // * One can create identifiers containing illegal characters using Unicode 13 | // escape sequences. For example, "abcd\u0020efgh" is not a valid 14 | // identifier, but it is accepted by the parser. 15 | // 16 | // * Strict mode is not recognized. This means that within strict mode code, 17 | // "implements", "interface", "let", "package", "private", "protected", 18 | // "public", "static" and "yield" can be used as names. Many other 19 | // restrictions and exceptions from Annex C are also not applied. 20 | // 21 | // All the limitations could be resolved, but the costs would likely outweigh 22 | // the benefits. 23 | // 24 | // Many thanks to inimino [3] for his grammar [4] which helped me to solve some 25 | // problems (such as automatic semicolon insertion) and also served to double 26 | // check that I converted the original grammar correctly. 27 | // 28 | // [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm 29 | // [2] https://github.com/estree/estree 30 | // [3] http://inimino.org/~inimino/blog/ 31 | // [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg 32 | 33 | { 34 | var TYPES_TO_PROPERTY_NAMES = { 35 | CallExpression: "callee", 36 | MemberExpression: "object", 37 | }; 38 | 39 | function filledArray(count, value) { 40 | return Array.apply(null, new Array(count)).map(function () { 41 | return value; 42 | }); 43 | } 44 | 45 | function extractOptional(optional, index) { 46 | return optional ? optional[index] : null; 47 | } 48 | 49 | function extractList(list, index) { 50 | return list.map(function (element) { 51 | return element[index]; 52 | }); 53 | } 54 | 55 | function buildList(head, tail, index) { 56 | return [head].concat(extractList(tail, index)); 57 | } 58 | 59 | function buildBinaryExpression(head, tail) { 60 | return tail.reduce(function (result, element) { 61 | return { 62 | type: "BinaryExpression", 63 | operator: element[1], 64 | left: result, 65 | right: element[3], 66 | }; 67 | }, head); 68 | } 69 | 70 | function buildLogicalExpression(head, tail) { 71 | return tail.reduce(function (result, element) { 72 | return { 73 | type: "LogicalExpression", 74 | operator: element[1], 75 | left: result, 76 | right: element[3], 77 | }; 78 | }, head); 79 | } 80 | 81 | function optionalList(value) { 82 | return value !== null ? value : []; 83 | } 84 | } 85 | 86 | Start = __ program:Program __ { return program; } 87 | 88 | // ----- A.1 Lexical Grammar ----- 89 | 90 | SourceCharacter = . 91 | 92 | WhiteSpace "whitespace" 93 | = "\t" 94 | / "\v" 95 | / "\f" 96 | / " " 97 | / "\u00A0" 98 | / "\uFEFF" 99 | / Zs 100 | 101 | LineTerminator = [\n\r\u2028\u2029] 102 | 103 | LineTerminatorSequence "end of line" 104 | = "\n" 105 | / "\r\n" 106 | / "\r" 107 | / "\u2028" 108 | / "\u2029" 109 | 110 | Comment "comment" 111 | = MultiLineComment 112 | / SingleLineComment 113 | 114 | MultiLineComment = "/*" (!"*/" SourceCharacter)* "*/" 115 | 116 | MultiLineCommentNoLineTerminator 117 | = "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/" 118 | 119 | SingleLineComment = "//" (!LineTerminator SourceCharacter)* 120 | 121 | Identifier = !ReservedWord name:IdentifierName { return name; } 122 | 123 | IdentifierName "identifier" 124 | = head:IdentifierStart tail:IdentifierPart* { 125 | return { 126 | type: "Identifier", 127 | name: head + tail.join(""), 128 | }; 129 | } 130 | 131 | IdentifierStart 132 | = UnicodeLetter 133 | / "$" 134 | / "_" 135 | / "\\" sequence:UnicodeEscapeSequence { return sequence; } 136 | 137 | IdentifierPart 138 | = IdentifierStart 139 | / UnicodeCombiningMark 140 | / UnicodeDigit 141 | / UnicodeConnectorPunctuation 142 | / "\u200C" 143 | / "\u200D" 144 | 145 | UnicodeLetter 146 | = Lu 147 | / Ll 148 | / Lt 149 | / Lm 150 | / Lo 151 | / Nl 152 | 153 | UnicodeCombiningMark 154 | = Mn 155 | / Mc 156 | 157 | UnicodeDigit = Nd 158 | 159 | UnicodeConnectorPunctuation = Pc 160 | 161 | ReservedWord 162 | = Keyword 163 | / FutureReservedWord 164 | / NullLiteral 165 | / BooleanLiteral 166 | 167 | Keyword 168 | = BreakToken 169 | / CaseToken 170 | / CatchToken 171 | / ContinueToken 172 | / DebuggerToken 173 | / DefaultToken 174 | / DeleteToken 175 | / DoToken 176 | / ElseToken 177 | / FinallyToken 178 | / ForToken 179 | / FunctionToken 180 | / IfToken 181 | / InstanceofToken 182 | / InToken 183 | / NewToken 184 | / ReturnToken 185 | / SwitchToken 186 | / ThisToken 187 | / ThrowToken 188 | / TryToken 189 | / TypeofToken 190 | / VarToken 191 | / VoidToken 192 | / WhileToken 193 | / WithToken 194 | 195 | FutureReservedWord 196 | = ClassToken 197 | / ConstToken 198 | / EnumToken 199 | / ExportToken 200 | / ExtendsToken 201 | / ImportToken 202 | / SuperToken 203 | 204 | Literal 205 | = NullLiteral 206 | / BooleanLiteral 207 | / NumericLiteral 208 | / StringLiteral 209 | / RegularExpressionLiteral 210 | 211 | NullLiteral = NullToken { return { type: "Literal", value: null }; } 212 | 213 | BooleanLiteral 214 | = TrueToken { return { type: "Literal", value: true }; } 215 | / FalseToken { return { type: "Literal", value: false }; } 216 | 217 | // The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official 218 | // grammar, it comes from text in section 7.8.3. 219 | NumericLiteral "number" 220 | = literal:HexIntegerLiteral !(IdentifierStart / DecimalDigit) { 221 | return /* literal 1 */ literal; 222 | } 223 | / literal:DecimalLiteral !(IdentifierStart / DecimalDigit) { 224 | return /* literal 2 */ literal; 225 | } 226 | 227 | DecimalLiteral 228 | = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? { 229 | return { type: "Literal", value: parseFloat(text()) }; 230 | } 231 | / "." DecimalDigit+ ExponentPart? { 232 | return { type: "Literal", value: parseFloat(text()) }; 233 | } 234 | / DecimalIntegerLiteral ExponentPart? { 235 | return { type: "Literal", value: parseFloat(text()) }; 236 | } 237 | 238 | DecimalIntegerLiteral 239 | = "0" 240 | / NonZeroDigit DecimalDigit* 241 | 242 | DecimalDigit = [0-9] 243 | 244 | NonZeroDigit = [1-9] 245 | 246 | ExponentPart = ExponentIndicator SignedInteger 247 | 248 | ExponentIndicator = "e"i 249 | 250 | SignedInteger = [+-]? DecimalDigit+ 251 | 252 | HexIntegerLiteral 253 | = "0x"i digits:$HexDigit+ { 254 | return { type: "Literal", value: parseInt(digits, 16) }; 255 | } 256 | 257 | HexDigit = [0-9a-f]i 258 | 259 | StringLiteral "string" 260 | = "\"" chars:DoubleStringCharacter* "\"" { 261 | return { type: "Literal", value: chars.join("") }; 262 | } 263 | / "'" chars:SingleStringCharacter* "'" { 264 | return { type: "Literal", value: chars.join("") }; 265 | } 266 | 267 | DoubleStringCharacter 268 | = !("\"" / "\\" / LineTerminator) SourceCharacter { return text(); } 269 | / "\\" sequence:EscapeSequence { return sequence; } 270 | / LineContinuation 271 | 272 | SingleStringCharacter 273 | = !("'" / "\\" / LineTerminator) SourceCharacter { return text(); } 274 | / "\\" sequence:EscapeSequence { return sequence; } 275 | / LineContinuation 276 | 277 | LineContinuation = "\\" LineTerminatorSequence { return ""; } 278 | 279 | EscapeSequence 280 | = CharacterEscapeSequence 281 | / "0" !DecimalDigit { return "\0"; } 282 | / HexEscapeSequence 283 | / UnicodeEscapeSequence 284 | 285 | CharacterEscapeSequence 286 | = SingleEscapeCharacter 287 | / NonEscapeCharacter 288 | 289 | SingleEscapeCharacter 290 | = "'" 291 | / "\"" 292 | / "\\" 293 | / "b" { return "\b"; } 294 | / "f" { return "\f"; } 295 | / "n" { return "\n"; } 296 | / "r" { return "\r"; } 297 | / "t" { return "\t"; } 298 | / "v" { return "\v"; } 299 | 300 | NonEscapeCharacter 301 | = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); } 302 | 303 | EscapeCharacter 304 | = SingleEscapeCharacter 305 | / DecimalDigit 306 | / "x" 307 | / "u" 308 | 309 | HexEscapeSequence 310 | = "x" digits:$(HexDigit HexDigit) { 311 | return String.fromCharCode(parseInt(digits, 16)); 312 | } 313 | 314 | UnicodeEscapeSequence 315 | = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) { 316 | return String.fromCharCode(parseInt(digits, 16)); 317 | } 318 | 319 | RegularExpressionLiteral "regular expression" 320 | = "/" pattern:$RegularExpressionBody "/" flags:$RegularExpressionFlags { 321 | var value; 322 | 323 | try { 324 | value = new RegExp(pattern, flags); 325 | } catch (e) { 326 | error(e.message); 327 | } 328 | 329 | return { type: "Literal", value: value }; 330 | } 331 | 332 | RegularExpressionBody = RegularExpressionFirstChar RegularExpressionChar* 333 | 334 | RegularExpressionFirstChar 335 | = ![*\\/[] RegularExpressionNonTerminator 336 | / RegularExpressionBackslashSequence 337 | / RegularExpressionClass 338 | 339 | RegularExpressionChar 340 | = ![\\/[] RegularExpressionNonTerminator 341 | / RegularExpressionBackslashSequence 342 | / RegularExpressionClass 343 | 344 | RegularExpressionBackslashSequence = "\\" RegularExpressionNonTerminator 345 | 346 | RegularExpressionNonTerminator = !LineTerminator SourceCharacter 347 | 348 | RegularExpressionClass = "[" RegularExpressionClassChar* "]" 349 | 350 | RegularExpressionClassChar 351 | = ![\]\\] RegularExpressionNonTerminator 352 | / RegularExpressionBackslashSequence 353 | 354 | RegularExpressionFlags = IdentifierPart* 355 | 356 | // Unicode Character Categories 357 | // 358 | // Extracted from the following Unicode Character Database file: 359 | // 360 | // http://www.unicode.org/Public/11.0.0/ucd/extracted/DerivedGeneralCategory.txt 361 | // 362 | // Unix magic used: 363 | // 364 | // grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters 365 | // cut -f1 -d " " | # Extract code points 366 | // grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters 367 | // sed -e 's/\.\./-/' | # Adjust formatting 368 | // sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting 369 | // tr -d '\n' # Join lines 370 | // 371 | // ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one 372 | // at the time of writing. 373 | // 374 | // Non-BMP characters are completely ignored to avoid surrogate pair handling 375 | // (detecting surrogate pairs isn't possible with a simple character class and 376 | // other methods would degrade performance). I don't consider it a big deal as 377 | // even parsers in JavaScript engines of common browsers seem to ignore them. 378 | 379 | // Letter, Lowercase 380 | Ll 381 | = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] 382 | 383 | // Letter, Modifier 384 | Lm 385 | = [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uFF70\uFF9E-\uFF9F] 386 | 387 | // Letter, Other 388 | Lo 389 | = [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05EF-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u0860-\u086A\u08A0-\u08B4\u08B6-\u08BD\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u09FC\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FEF\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC] 390 | 391 | // Letter, Titlecase 392 | Lt 393 | = [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC] 394 | 395 | // Letter, Uppercase 396 | Lu 397 | = [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uFF21-\uFF3A] 398 | 399 | // Mark, Spacing Combining 400 | Mc 401 | = [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF2-\u1CF3\u1CF7\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BD-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC] 402 | 403 | // Mark, Nonspacing 404 | Mn 405 | = [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D3-\u08E1\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u09FE\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0AFA-\u0AFF\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C04\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D00-\u0D01\u0D3B-\u0D3C\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u1885-\u1886\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DF9\u1DFB-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA8C4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F] 406 | 407 | // Number, Decimal Digit 408 | Nd 409 | = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19] 410 | 411 | // Number, Letter 412 | Nl 413 | = [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF] 414 | 415 | // Punctuation, Connector 416 | Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] 417 | 418 | // Separator, Space 419 | Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] 420 | 421 | // Tokens 422 | 423 | BreakToken = "break" !IdentifierPart 424 | 425 | CaseToken = "case" !IdentifierPart 426 | 427 | CatchToken = "catch" !IdentifierPart 428 | 429 | ClassToken = "class" !IdentifierPart 430 | 431 | ConstToken = "const" !IdentifierPart 432 | 433 | ContinueToken = "continue" !IdentifierPart 434 | 435 | DebuggerToken = "debugger" !IdentifierPart 436 | 437 | DefaultToken = "default" !IdentifierPart 438 | 439 | DeleteToken = "delete" !IdentifierPart 440 | 441 | DoToken = "do" !IdentifierPart 442 | 443 | ElseToken = "else" !IdentifierPart 444 | 445 | EnumToken = "enum" !IdentifierPart 446 | 447 | ExportToken = "export" !IdentifierPart 448 | 449 | ExtendsToken = "extends" !IdentifierPart 450 | 451 | FalseToken = "false" !IdentifierPart 452 | 453 | FinallyToken = "finally" !IdentifierPart 454 | 455 | ForToken = "for" !IdentifierPart 456 | 457 | FunctionToken = "function" !IdentifierPart 458 | 459 | GetToken = "get" !IdentifierPart 460 | 461 | IfToken = "if" !IdentifierPart 462 | 463 | ImportToken = "import" !IdentifierPart 464 | 465 | InstanceofToken = "instanceof" !IdentifierPart 466 | 467 | InToken = "in" !IdentifierPart 468 | 469 | NewToken = "new" !IdentifierPart 470 | 471 | NullToken = "null" !IdentifierPart 472 | 473 | ReturnToken = "return" !IdentifierPart 474 | 475 | SetToken = "set" !IdentifierPart 476 | 477 | SuperToken = "super" !IdentifierPart 478 | 479 | SwitchToken = "switch" !IdentifierPart 480 | 481 | ThisToken = "this" !IdentifierPart 482 | 483 | ThrowToken = "throw" !IdentifierPart 484 | 485 | TrueToken = "true" !IdentifierPart 486 | 487 | TryToken = "try" !IdentifierPart 488 | 489 | TypeofToken = "typeof" !IdentifierPart 490 | 491 | VarToken = "var" !IdentifierPart 492 | 493 | VoidToken = "void" !IdentifierPart 494 | 495 | WhileToken = "while" !IdentifierPart 496 | 497 | WithToken = "with" !IdentifierPart 498 | 499 | // Skipped 500 | 501 | __ = (WhiteSpace / LineTerminatorSequence / Comment)* 502 | 503 | _ = (WhiteSpace / MultiLineCommentNoLineTerminator)* 504 | 505 | // Automatic Semicolon Insertion 506 | 507 | EOS 508 | = __ ";" 509 | / _ SingleLineComment? LineTerminatorSequence 510 | / _ &"}" 511 | / __ EOF 512 | 513 | EOF = !. 514 | 515 | // ----- A.2 Number Conversions ----- 516 | 517 | // Irrelevant. 518 | 519 | // ----- A.3 Expressions ----- 520 | 521 | PrimaryExpression 522 | = ThisToken { return { type: "ThisExpression" }; } 523 | / Identifier 524 | / Literal 525 | / ArrayLiteral 526 | / ObjectLiteral 527 | / "(" __ expression:Expression __ ")" { return expression; } 528 | 529 | ArrayLiteral 530 | = "[" __ elision:(Elision __)? "]" { 531 | return { 532 | type: "ArrayExpression", 533 | elements: optionalList(extractOptional(elision, 0)), 534 | }; 535 | } 536 | / "[" __ elements:ElementList __ "]" { 537 | return { 538 | type: "ArrayExpression", 539 | elements: elements, 540 | }; 541 | } 542 | / "[" __ elements:ElementList __ "," __ elision:(Elision __)? "]" { 543 | return { 544 | type: "ArrayExpression", 545 | elements: elements.concat(optionalList(extractOptional(elision, 0))), 546 | }; 547 | } 548 | 549 | ElementList 550 | = head:( 551 | elision:(Elision __)? element:AssignmentExpression { 552 | return optionalList(extractOptional(elision, 0)).concat(element); 553 | } 554 | ) 555 | tail:( 556 | __ "," __ elision:(Elision __)? element:AssignmentExpression { 557 | return optionalList(extractOptional(elision, 0)).concat(element); 558 | } 559 | )* { return Array.prototype.concat.apply(head, tail); } 560 | 561 | Elision = "," commas:(__ ",")* { return filledArray(commas.length + 1, null); } 562 | 563 | ObjectLiteral 564 | = "{" __ "}" { return { type: "ObjectExpression", properties: [] }; } 565 | / "{" __ properties:PropertyNameAndValueList __ "}" { 566 | return { type: "ObjectExpression", properties: properties }; 567 | } 568 | / "{" __ properties:PropertyNameAndValueList __ "," __ "}" { 569 | return { type: "ObjectExpression", properties: properties }; 570 | } 571 | 572 | PropertyNameAndValueList 573 | = head:PropertyAssignment tail:(__ "," __ PropertyAssignment)* { 574 | return buildList(head, tail, 3); 575 | } 576 | 577 | PropertyAssignment 578 | = key:PropertyName __ ":" __ value:AssignmentExpression { 579 | return { type: "Property", key: key, value: value, kind: "init" }; 580 | } 581 | / GetToken 582 | __ 583 | key:PropertyName 584 | __ 585 | "(" 586 | __ 587 | ")" 588 | __ 589 | "{" 590 | __ 591 | body:FunctionBody 592 | __ 593 | "}" { 594 | return { 595 | type: "Property", 596 | key: key, 597 | value: { 598 | type: "FunctionExpression", 599 | id: null, 600 | params: [], 601 | body: body, 602 | }, 603 | kind: "get", 604 | }; 605 | } 606 | / SetToken 607 | __ 608 | key:PropertyName 609 | __ 610 | "(" 611 | __ 612 | params:PropertySetParameterList 613 | __ 614 | ")" 615 | __ 616 | "{" 617 | __ 618 | body:FunctionBody 619 | __ 620 | "}" { 621 | return { 622 | type: "Property", 623 | key: key, 624 | value: { 625 | type: "FunctionExpression", 626 | id: null, 627 | params: params, 628 | body: body, 629 | }, 630 | kind: "set", 631 | }; 632 | } 633 | 634 | PropertyName 635 | = IdentifierName 636 | / StringLiteral 637 | / NumericLiteral 638 | 639 | PropertySetParameterList = id:Identifier { return [id]; } 640 | 641 | MemberExpression 642 | = head:( 643 | PrimaryExpression 644 | / FunctionExpression 645 | / NewToken __ callee:MemberExpression __ args:Arguments { 646 | return { type: "NewExpression", callee: callee, arguments: args }; 647 | } 648 | ) 649 | tail:( 650 | __ "[" __ property:Expression __ "]" { 651 | return { property: property, computed: true }; 652 | } 653 | / __ "." __ property:IdentifierName { 654 | return { property: property, computed: false }; 655 | } 656 | )* { 657 | return tail.reduce(function (result, element) { 658 | return { 659 | type: "MemberExpression", 660 | object: result, 661 | property: element.property, 662 | computed: element.computed, 663 | }; 664 | }, head); 665 | } 666 | 667 | NewExpression 668 | = MemberExpression 669 | / NewToken __ callee:NewExpression { 670 | return { type: "NewExpression", callee: callee, arguments: [] }; 671 | } 672 | 673 | CallExpression 674 | = head:( 675 | callee:MemberExpression __ args:Arguments { 676 | return { type: "CallExpression", callee: callee, arguments: args }; 677 | } 678 | ) 679 | tail:( 680 | __ args:Arguments { return { type: "CallExpression", arguments: args }; } 681 | / __ "[" __ property:Expression __ "]" { 682 | return { 683 | type: "MemberExpression", 684 | property: property, 685 | computed: true, 686 | }; 687 | } 688 | / __ "." __ property:IdentifierName { 689 | return { 690 | type: "MemberExpression", 691 | property: property, 692 | computed: false, 693 | }; 694 | } 695 | )* { 696 | return tail.reduce(function (result, element) { 697 | element[TYPES_TO_PROPERTY_NAMES[element.type]] = result; 698 | 699 | return element; 700 | }, head); 701 | } 702 | 703 | Arguments 704 | = "(" __ args:(ArgumentList __)? ")" { 705 | return optionalList(extractOptional(args, 0)); 706 | } 707 | 708 | ArgumentList 709 | = head:AssignmentExpression tail:(__ "," __ AssignmentExpression)* { 710 | return buildList(head, tail, 3); 711 | } 712 | 713 | LeftHandSideExpression 714 | = CallExpression 715 | / NewExpression 716 | 717 | PostfixExpression 718 | = argument:LeftHandSideExpression _ operator:PostfixOperator { 719 | return { 720 | type: "UpdateExpression", 721 | operator: operator, 722 | argument: argument, 723 | prefix: false, 724 | }; 725 | } 726 | / LeftHandSideExpression 727 | 728 | PostfixOperator 729 | = "++" 730 | / "--" 731 | 732 | UnaryExpression 733 | = PostfixExpression 734 | / operator:UnaryOperator __ argument:UnaryExpression { 735 | var type = 736 | operator === "++" || operator === "--" 737 | ? "UpdateExpression" 738 | : "UnaryExpression"; 739 | 740 | return { 741 | type: type, 742 | operator: operator, 743 | argument: argument, 744 | prefix: true, 745 | }; 746 | } 747 | 748 | UnaryOperator 749 | = $DeleteToken 750 | / $VoidToken 751 | / $TypeofToken 752 | / "++" 753 | / "--" 754 | / $("+" !"=") 755 | / $("-" !"=") 756 | / "~" 757 | / "!" 758 | 759 | MultiplicativeExpression 760 | = head:UnaryExpression tail:(__ MultiplicativeOperator __ UnaryExpression)* { 761 | return buildBinaryExpression(head, tail); 762 | } 763 | 764 | MultiplicativeOperator 765 | = $("*" !"=") 766 | / $("/" !"=") 767 | / $("%" !"=") 768 | 769 | AdditiveExpression 770 | = head:MultiplicativeExpression 771 | tail:(__ AdditiveOperator __ MultiplicativeExpression)* { 772 | return buildBinaryExpression(head, tail); 773 | } 774 | 775 | AdditiveOperator 776 | = $("+" ![+=]) 777 | / $("-" ![-=]) 778 | 779 | ShiftExpression 780 | = head:AdditiveExpression tail:(__ ShiftOperator __ AdditiveExpression)* { 781 | return buildBinaryExpression(head, tail); 782 | } 783 | 784 | ShiftOperator 785 | = $("<<" !"=") 786 | / $(">>>" !"=") 787 | / $(">>" !"=") 788 | 789 | RelationalExpression 790 | = head:ShiftExpression tail:(__ RelationalOperator __ ShiftExpression)* { 791 | return buildBinaryExpression(head, tail); 792 | } 793 | 794 | RelationalOperator 795 | = "<=" 796 | / ">=" 797 | / $("<" !"<") 798 | / $(">" !">") 799 | / $InstanceofToken 800 | / $InToken 801 | 802 | RelationalExpressionNoIn 803 | = head:ShiftExpression tail:(__ RelationalOperatorNoIn __ ShiftExpression)* { 804 | return buildBinaryExpression(head, tail); 805 | } 806 | 807 | RelationalOperatorNoIn 808 | = "<=" 809 | / ">=" 810 | / $("<" !"<") 811 | / $(">" !">") 812 | / $InstanceofToken 813 | 814 | EqualityExpression 815 | = head:RelationalExpression 816 | tail:(__ EqualityOperator __ RelationalExpression)* { 817 | return buildBinaryExpression(head, tail); 818 | } 819 | 820 | EqualityExpressionNoIn 821 | = head:RelationalExpressionNoIn 822 | tail:(__ EqualityOperator __ RelationalExpressionNoIn)* { 823 | return buildBinaryExpression(head, tail); 824 | } 825 | 826 | EqualityOperator 827 | = "===" 828 | / "!==" 829 | / "==" 830 | / "!=" 831 | 832 | BitwiseANDExpression 833 | = head:EqualityExpression 834 | tail:(__ BitwiseANDOperator __ EqualityExpression)* { 835 | return buildBinaryExpression(head, tail); 836 | } 837 | 838 | BitwiseANDExpressionNoIn 839 | = head:EqualityExpressionNoIn 840 | tail:(__ BitwiseANDOperator __ EqualityExpressionNoIn)* { 841 | return buildBinaryExpression(head, tail); 842 | } 843 | 844 | BitwiseANDOperator = $("&" ![&=]) 845 | 846 | BitwiseXORExpression 847 | = head:BitwiseANDExpression 848 | tail:(__ BitwiseXOROperator __ BitwiseANDExpression)* { 849 | return buildBinaryExpression(head, tail); 850 | } 851 | 852 | BitwiseXORExpressionNoIn 853 | = head:BitwiseANDExpressionNoIn 854 | tail:(__ BitwiseXOROperator __ BitwiseANDExpressionNoIn)* { 855 | return buildBinaryExpression(head, tail); 856 | } 857 | 858 | BitwiseXOROperator = $("^" !"=") 859 | 860 | BitwiseORExpression 861 | = head:BitwiseXORExpression 862 | tail:(__ BitwiseOROperator __ BitwiseXORExpression)* { 863 | return buildBinaryExpression(head, tail); 864 | } 865 | 866 | BitwiseORExpressionNoIn 867 | = head:BitwiseXORExpressionNoIn 868 | tail:(__ BitwiseOROperator __ BitwiseXORExpressionNoIn)* { 869 | return buildBinaryExpression(head, tail); 870 | } 871 | 872 | BitwiseOROperator = $("|" ![|=]) 873 | 874 | LogicalANDExpression 875 | = head:BitwiseORExpression 876 | tail:(__ LogicalANDOperator __ BitwiseORExpression)* { 877 | return buildLogicalExpression(head, tail); 878 | } 879 | 880 | LogicalANDExpressionNoIn 881 | = head:BitwiseORExpressionNoIn 882 | tail:(__ LogicalANDOperator __ BitwiseORExpressionNoIn)* { 883 | return buildLogicalExpression(head, tail); 884 | } 885 | 886 | LogicalANDOperator = "&&" 887 | 888 | LogicalORExpression 889 | = head:LogicalANDExpression 890 | tail:(__ LogicalOROperator __ LogicalANDExpression)* { 891 | return buildLogicalExpression(head, tail); 892 | } 893 | 894 | LogicalORExpressionNoIn 895 | = head:LogicalANDExpressionNoIn 896 | tail:(__ LogicalOROperator __ LogicalANDExpressionNoIn)* { 897 | return buildLogicalExpression(head, tail); 898 | } 899 | 900 | LogicalOROperator = "||" 901 | 902 | ConditionalExpression 903 | = test:LogicalORExpression 904 | __ 905 | "?" 906 | __ 907 | consequent:AssignmentExpression 908 | __ 909 | ":" 910 | __ 911 | alternate:AssignmentExpression { 912 | return { 913 | type: "ConditionalExpression", 914 | test: test, 915 | consequent: consequent, 916 | alternate: alternate, 917 | }; 918 | } 919 | / LogicalORExpression 920 | 921 | ConditionalExpressionNoIn 922 | = test:LogicalORExpressionNoIn 923 | __ 924 | "?" 925 | __ 926 | consequent:AssignmentExpression 927 | __ 928 | ":" 929 | __ 930 | alternate:AssignmentExpressionNoIn { 931 | return { 932 | type: "ConditionalExpression", 933 | test: test, 934 | consequent: consequent, 935 | alternate: alternate, 936 | }; 937 | } 938 | / LogicalORExpressionNoIn 939 | 940 | AssignmentExpression 941 | = left:LeftHandSideExpression __ "=" !"=" __ right:AssignmentExpression { 942 | return { 943 | type: "AssignmentExpression", 944 | operator: "=", 945 | left: left, 946 | right: right, 947 | }; 948 | } 949 | / left:LeftHandSideExpression 950 | __ 951 | operator:AssignmentOperator 952 | __ 953 | right:AssignmentExpression { 954 | return { 955 | type: "AssignmentExpression", 956 | operator: operator, 957 | left: left, 958 | right: right, 959 | }; 960 | } 961 | / ConditionalExpression 962 | 963 | AssignmentExpressionNoIn 964 | = left:LeftHandSideExpression __ "=" !"=" __ right:AssignmentExpressionNoIn { 965 | return { 966 | type: "AssignmentExpression", 967 | operator: "=", 968 | left: left, 969 | right: right, 970 | }; 971 | } 972 | / left:LeftHandSideExpression 973 | __ 974 | operator:AssignmentOperator 975 | __ 976 | right:AssignmentExpressionNoIn { 977 | return { 978 | type: "AssignmentExpression", 979 | operator: operator, 980 | left: left, 981 | right: right, 982 | }; 983 | } 984 | / ConditionalExpressionNoIn 985 | 986 | AssignmentOperator 987 | = "*=" 988 | / "/=" 989 | / "%=" 990 | / "+=" 991 | / "-=" 992 | / "<<=" 993 | / ">>=" 994 | / ">>>=" 995 | / "&=" 996 | / "^=" 997 | / "|=" 998 | 999 | Expression 1000 | = head:AssignmentExpression tail:(__ "," __ AssignmentExpression)* { 1001 | return tail.length > 0 1002 | ? { type: "SequenceExpression", expressions: buildList(head, tail, 3) } 1003 | : head; 1004 | } 1005 | 1006 | ExpressionNoIn 1007 | = head:AssignmentExpressionNoIn tail:(__ "," __ AssignmentExpressionNoIn)* { 1008 | return tail.length > 0 1009 | ? { type: "SequenceExpression", expressions: buildList(head, tail, 3) } 1010 | : head; 1011 | } 1012 | 1013 | // ----- A.4 Statements ----- 1014 | 1015 | Statement 1016 | = Block 1017 | / VariableStatement 1018 | / EmptyStatement 1019 | / ExpressionStatement 1020 | / IfStatement 1021 | / IterationStatement 1022 | / ContinueStatement 1023 | / BreakStatement 1024 | / ReturnStatement 1025 | / WithStatement 1026 | / LabelledStatement 1027 | / SwitchStatement 1028 | / ThrowStatement 1029 | / TryStatement 1030 | / DebuggerStatement 1031 | 1032 | Block 1033 | = "{" __ body:(StatementList __)? "}" { 1034 | return { 1035 | type: "BlockStatement", 1036 | body: optionalList(extractOptional(body, 0)), 1037 | }; 1038 | } 1039 | 1040 | StatementList 1041 | = head:Statement tail:(__ Statement)* { return buildList(head, tail, 1); } 1042 | 1043 | VariableStatement 1044 | = VarToken __ declarations:VariableDeclarationList EOS { 1045 | return { 1046 | type: "VariableDeclaration", 1047 | declarations: declarations, 1048 | kind: "var", 1049 | }; 1050 | } 1051 | 1052 | VariableDeclarationList 1053 | = head:VariableDeclaration tail:(__ "," __ VariableDeclaration)* { 1054 | return buildList(head, tail, 3); 1055 | } 1056 | 1057 | VariableDeclarationListNoIn 1058 | = head:VariableDeclarationNoIn tail:(__ "," __ VariableDeclarationNoIn)* { 1059 | return buildList(head, tail, 3); 1060 | } 1061 | 1062 | VariableDeclaration 1063 | = id:Identifier init:(__ Initialiser)? { 1064 | return { 1065 | type: "VariableDeclarator", 1066 | id: id, 1067 | init: extractOptional(init, 1), 1068 | }; 1069 | } 1070 | 1071 | VariableDeclarationNoIn 1072 | = id:Identifier init:(__ InitialiserNoIn)? { 1073 | return { 1074 | type: "VariableDeclarator", 1075 | id: id, 1076 | init: extractOptional(init, 1), 1077 | }; 1078 | } 1079 | 1080 | Initialiser = "=" !"=" __ expression:AssignmentExpression { return expression; } 1081 | 1082 | InitialiserNoIn 1083 | = "=" !"=" __ expression:AssignmentExpressionNoIn { return expression; } 1084 | 1085 | EmptyStatement = ";" { return { type: "EmptyStatement" }; } 1086 | 1087 | ExpressionStatement 1088 | = !("{" / FunctionToken) expression:Expression EOS { 1089 | return { 1090 | type: "ExpressionStatement", 1091 | expression: expression, 1092 | }; 1093 | } 1094 | 1095 | IfStatement 1096 | = IfToken 1097 | __ 1098 | "(" 1099 | __ 1100 | test:Expression 1101 | __ 1102 | ")" 1103 | __ 1104 | consequent:Statement 1105 | __ 1106 | ElseToken 1107 | __ 1108 | alternate:Statement { 1109 | return { 1110 | type: "IfStatement", 1111 | test: test, 1112 | consequent: consequent, 1113 | alternate: alternate, 1114 | }; 1115 | } 1116 | / IfToken __ "(" __ test:Expression __ ")" __ consequent:Statement { 1117 | return { 1118 | type: "IfStatement", 1119 | test: test, 1120 | consequent: consequent, 1121 | alternate: null, 1122 | }; 1123 | } 1124 | 1125 | IterationStatement 1126 | = DoToken 1127 | __ 1128 | body:Statement 1129 | __ 1130 | WhileToken 1131 | __ 1132 | "(" 1133 | __ 1134 | test:Expression 1135 | __ 1136 | ")" 1137 | EOS { return { type: "DoWhileStatement", body: body, test: test }; } 1138 | / WhileToken __ "(" __ test:Expression __ ")" __ body:Statement { 1139 | return { type: "WhileStatement", test: test, body: body }; 1140 | } 1141 | / ForToken 1142 | __ 1143 | "(" 1144 | __ 1145 | init:(ExpressionNoIn __)? 1146 | ";" 1147 | __ 1148 | test:(Expression __)? 1149 | ";" 1150 | __ 1151 | update:(Expression __)? 1152 | ")" 1153 | __ 1154 | body:Statement { 1155 | return { 1156 | type: "ForStatement", 1157 | init: extractOptional(init, 0), 1158 | test: extractOptional(test, 0), 1159 | update: extractOptional(update, 0), 1160 | body: body, 1161 | }; 1162 | } 1163 | / ForToken 1164 | __ 1165 | "(" 1166 | __ 1167 | VarToken 1168 | __ 1169 | declarations:VariableDeclarationListNoIn 1170 | __ 1171 | ";" 1172 | __ 1173 | test:(Expression __)? 1174 | ";" 1175 | __ 1176 | update:(Expression __)? 1177 | ")" 1178 | __ 1179 | body:Statement { 1180 | return { 1181 | type: "ForStatement", 1182 | init: { 1183 | type: "VariableDeclaration", 1184 | declarations: declarations, 1185 | kind: "var", 1186 | }, 1187 | test: extractOptional(test, 0), 1188 | update: extractOptional(update, 0), 1189 | body: body, 1190 | }; 1191 | } 1192 | / ForToken 1193 | __ 1194 | "(" 1195 | __ 1196 | left:LeftHandSideExpression 1197 | __ 1198 | InToken 1199 | __ 1200 | right:Expression 1201 | __ 1202 | ")" 1203 | __ 1204 | body:Statement { 1205 | return { 1206 | type: "ForInStatement", 1207 | left: left, 1208 | right: right, 1209 | body: body, 1210 | }; 1211 | } 1212 | / ForToken 1213 | __ 1214 | "(" 1215 | __ 1216 | VarToken 1217 | __ 1218 | declarations:VariableDeclarationListNoIn 1219 | __ 1220 | InToken 1221 | __ 1222 | right:Expression 1223 | __ 1224 | ")" 1225 | __ 1226 | body:Statement { 1227 | return { 1228 | type: "ForInStatement", 1229 | left: { 1230 | type: "VariableDeclaration", 1231 | declarations: declarations, 1232 | kind: "var", 1233 | }, 1234 | right: right, 1235 | body: body, 1236 | }; 1237 | } 1238 | 1239 | ContinueStatement 1240 | = ContinueToken EOS { return { type: "ContinueStatement", label: null }; } 1241 | / ContinueToken _ label:Identifier EOS { 1242 | return { type: "ContinueStatement", label: label }; 1243 | } 1244 | 1245 | BreakStatement 1246 | = BreakToken EOS { return { type: "BreakStatement", label: null }; } 1247 | / BreakToken _ label:Identifier EOS { 1248 | return { type: "BreakStatement", label: label }; 1249 | } 1250 | 1251 | ReturnStatement 1252 | = ReturnToken EOS { return { type: "ReturnStatement", argument: null }; } 1253 | / ReturnToken _ argument:Expression EOS { 1254 | return { type: "ReturnStatement", argument: argument }; 1255 | } 1256 | 1257 | WithStatement 1258 | = WithToken __ "(" __ object:Expression __ ")" __ body:Statement { 1259 | return { type: "WithStatement", object: object, body: body }; 1260 | } 1261 | 1262 | SwitchStatement 1263 | = SwitchToken __ "(" __ discriminant:Expression __ ")" __ cases:CaseBlock { 1264 | return { 1265 | type: "SwitchStatement", 1266 | discriminant: discriminant, 1267 | cases: cases, 1268 | }; 1269 | } 1270 | 1271 | CaseBlock 1272 | = "{" __ clauses:(CaseClauses __)? "}" { 1273 | return optionalList(extractOptional(clauses, 0)); 1274 | } 1275 | / "{" 1276 | __ 1277 | before:(CaseClauses __)? 1278 | default_:DefaultClause 1279 | __ 1280 | after:(CaseClauses __)? 1281 | "}" { 1282 | return optionalList(extractOptional(before, 0)) 1283 | .concat(default_) 1284 | .concat(optionalList(extractOptional(after, 0))); 1285 | } 1286 | 1287 | CaseClauses 1288 | = head:CaseClause tail:(__ CaseClause)* { return buildList(head, tail, 1); } 1289 | 1290 | CaseClause 1291 | = CaseToken __ test:Expression __ ":" consequent:(__ StatementList)? { 1292 | return { 1293 | type: "SwitchCase", 1294 | test: test, 1295 | consequent: optionalList(extractOptional(consequent, 1)), 1296 | }; 1297 | } 1298 | 1299 | DefaultClause 1300 | = DefaultToken __ ":" consequent:(__ StatementList)? { 1301 | return { 1302 | type: "SwitchCase", 1303 | test: null, 1304 | consequent: optionalList(extractOptional(consequent, 1)), 1305 | }; 1306 | } 1307 | 1308 | LabelledStatement 1309 | = label:Identifier __ ":" __ body:Statement { 1310 | return { type: "LabeledStatement", label: label, body: body }; 1311 | } 1312 | 1313 | ThrowStatement 1314 | = ThrowToken _ argument:Expression EOS { 1315 | return { type: "ThrowStatement", argument: argument }; 1316 | } 1317 | 1318 | TryStatement 1319 | = TryToken __ block:Block __ handler:Catch __ finalizer:Finally { 1320 | return { 1321 | type: "TryStatement", 1322 | block: block, 1323 | handler: handler, 1324 | finalizer: finalizer, 1325 | }; 1326 | } 1327 | / TryToken __ block:Block __ handler:Catch { 1328 | return { 1329 | type: "TryStatement", 1330 | block: block, 1331 | handler: handler, 1332 | finalizer: null, 1333 | }; 1334 | } 1335 | / TryToken __ block:Block __ finalizer:Finally { 1336 | return { 1337 | type: "TryStatement", 1338 | block: block, 1339 | handler: null, 1340 | finalizer: finalizer, 1341 | }; 1342 | } 1343 | 1344 | Catch 1345 | = CatchToken __ "(" __ param:Identifier __ ")" __ body:Block { 1346 | return { 1347 | type: "CatchClause", 1348 | param: param, 1349 | body: body, 1350 | }; 1351 | } 1352 | 1353 | Finally = FinallyToken __ block:Block { return block; } 1354 | 1355 | DebuggerStatement = DebuggerToken EOS { return { type: "DebuggerStatement" }; } 1356 | 1357 | // ----- A.5 Functions and Programs ----- 1358 | 1359 | FunctionDeclaration 1360 | = FunctionToken 1361 | __ 1362 | id:Identifier 1363 | __ 1364 | "(" 1365 | __ 1366 | params:(FormalParameterList __)? 1367 | ")" 1368 | __ 1369 | "{" 1370 | __ 1371 | body:FunctionBody 1372 | __ 1373 | "}" { 1374 | return { 1375 | type: "FunctionDeclaration", 1376 | id: id, 1377 | params: optionalList(extractOptional(params, 0)), 1378 | body: body, 1379 | }; 1380 | } 1381 | 1382 | FunctionExpression 1383 | = FunctionToken 1384 | __ 1385 | id:(Identifier __)? 1386 | "(" 1387 | __ 1388 | params:(FormalParameterList __)? 1389 | ")" 1390 | __ 1391 | "{" 1392 | __ 1393 | body:FunctionBody 1394 | __ 1395 | "}" { 1396 | return { 1397 | type: "FunctionExpression", 1398 | id: extractOptional(id, 0), 1399 | params: optionalList(extractOptional(params, 0)), 1400 | body: body, 1401 | }; 1402 | } 1403 | 1404 | FormalParameterList 1405 | = head:Identifier tail:(__ "," __ Identifier)* { 1406 | return buildList(head, tail, 3); 1407 | } 1408 | 1409 | FunctionBody 1410 | = body:SourceElements? { 1411 | return { 1412 | type: "BlockStatement", 1413 | body: optionalList(body), 1414 | }; 1415 | } 1416 | 1417 | Program 1418 | = body:SourceElements? { 1419 | return { 1420 | type: "Program", 1421 | body: optionalList(body), 1422 | }; 1423 | } 1424 | 1425 | SourceElements 1426 | = head:SourceElement tail:(__ SourceElement)* { 1427 | return buildList(head, tail, 1); 1428 | } 1429 | 1430 | SourceElement 1431 | = Statement 1432 | / FunctionDeclaration 1433 | 1434 | // ----- A.6 Universal Resource Identifier Character Classes ----- 1435 | 1436 | // Irrelevant. 1437 | 1438 | // ----- A.7 Regular Expressions ----- 1439 | 1440 | // Irrelevant. 1441 | 1442 | // ----- A.8 JSON ----- 1443 | 1444 | // Irrelevant. 1445 | -------------------------------------------------------------------------------- /src/tests/grammars/json.pegjs: -------------------------------------------------------------------------------- 1 | // JSON Grammar 2 | // ============ 3 | // 4 | // Based on the grammar from RFC 7159 [1]. 5 | // 6 | // Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the 7 | // JSON website [4] (somewhat informally). The RFC seems the most authoritative 8 | // source, which is confirmed e.g. by [5]. 9 | // 10 | // [1] http://tools.ietf.org/html/rfc7159 11 | // [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm 12 | // [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm 13 | // [4] http://json.org/ 14 | // [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON 15 | 16 | // ----- 2. JSON Grammar ----- 17 | 18 | JSON_text = ws value:value ws { return value; } 19 | 20 | begin_array = ws "[" ws 21 | 22 | begin_object = ws "{" ws 23 | 24 | end_array = ws "]" ws 25 | 26 | end_object = ws "}" ws 27 | 28 | name_separator = ws ":" ws 29 | 30 | value_separator = ws "," ws 31 | 32 | ws "whitespace" = [ \t\n\r]* 33 | 34 | // ----- 3. Values ----- 35 | 36 | value 37 | = false 38 | / null 39 | / true 40 | / object 41 | / array 42 | / number 43 | / string 44 | 45 | false = "false" { return false; } 46 | 47 | null = "null" { return null; } 48 | 49 | true = "true" { return true; } 50 | 51 | // ----- 4. Objects ----- 52 | 53 | object 54 | = begin_object 55 | members:( 56 | head:member tail:(value_separator m:member { return m; })* { 57 | var result = {}; 58 | 59 | [head].concat(tail).forEach(function (element) { 60 | result[element.name] = element.value; 61 | }); 62 | 63 | return result; 64 | } 65 | )? 66 | end_object { return members !== null ? members : {}; } 67 | 68 | member 69 | = name:string name_separator value:value { 70 | return { name: name, value: value }; 71 | } 72 | 73 | // ----- 5. Arrays ----- 74 | 75 | array 76 | = begin_array 77 | values:( 78 | head:value tail:(value_separator v:value { return v; })* { 79 | return [head].concat(tail); 80 | } 81 | )? 82 | end_array { return values !== null ? values : []; } 83 | 84 | // ----- 6. Numbers ----- 85 | 86 | number "number" = minus? int frac? exp? { return parseFloat(text()); } 87 | 88 | decimal_point = "." 89 | 90 | digit1_9 = [1-9] 91 | 92 | e = [eE] 93 | 94 | exp = e (minus / plus)? DIGIT+ 95 | 96 | frac = decimal_point DIGIT+ 97 | 98 | int 99 | = zero 100 | / (digit1_9 DIGIT*) 101 | 102 | minus = "-" 103 | 104 | plus = "+" 105 | 106 | zero = "0" 107 | 108 | // ----- 7. Strings ----- 109 | 110 | string "string" 111 | = quotation_mark chars:char* quotation_mark { return chars.join(""); } 112 | 113 | char 114 | = unescaped 115 | / escape 116 | sequence:( 117 | "\"" 118 | / "\\" 119 | / "/" 120 | / "b" { return "\b"; } 121 | / "f" { return "\f"; } 122 | / "n" { return "\n"; } 123 | / "r" { return "\r"; } 124 | / "t" { return "\t"; } 125 | / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) { 126 | return String.fromCharCode(parseInt(digits, 16)); 127 | } 128 | ) { return sequence; } 129 | 130 | escape = "\\" 131 | 132 | quotation_mark = "\"" 133 | 134 | unescaped = [^\0-\x1F\x22\x5C] 135 | 136 | // ----- Core ABNF Rules ----- 137 | 138 | // See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4234). 139 | DIGIT = [0-9] 140 | 141 | HEXDIG = [0-9a-f]i 142 | -------------------------------------------------------------------------------- /src/tests/grammars/latex.pegjs: -------------------------------------------------------------------------------- 1 | { 2 | function compare_env(g1, g2) { 3 | return g1.content.join("") == g2.content.join(""); 4 | } 5 | } 6 | 7 | document "document" = token* 8 | 9 | token "token" 10 | = special_macro 11 | / macro 12 | / full_comment 13 | / group 14 | / math_shift eq:(!math_shift t:math_token { return t; })+ math_shift { 15 | return { type: "inlinemath", content: eq }; 16 | } 17 | / alignment_tab 18 | / parbreak 19 | / macro_parameter 20 | / superscript 21 | / subscript 22 | / ignore 23 | / number 24 | / whitespace 25 | / punctuation 26 | / x:(!nonchar_token x:. { return x; })+ { return x.join(""); } 27 | 28 | parbreak "parbreak" = sp* nl sp* nl+ sp* { return { type: "parbreak" }; } 29 | 30 | math_token "math token" 31 | = special_macro 32 | / macro 33 | / x:full_comment { return x; } 34 | / whitespace* x:group whitespace* { return x; } 35 | / whitespace* x:alignment_tab whitespace* { return x; } 36 | / whitespace* x:macro_parameter whitespace* { return x; } 37 | / whitespace* superscript whitespace* x:math_token { 38 | return { type: "superscript", content: x }; 39 | } 40 | / whitespace* subscript whitespace* x:math_token { 41 | return { type: "subscript", content: x }; 42 | } 43 | / ignore 44 | / whitespace 45 | / . 46 | 47 | args_token "args token" 48 | = special_macro 49 | / macro 50 | / full_comment 51 | / group 52 | / math_shift eq:(!math_shift t:math_token { return t; })+ math_shift { 53 | return { type: "inlinemath", content: eq }; 54 | } 55 | / alignment_tab 56 | / sp* nl sp* nl+ sp* { return { type: "parbreak" }; } 57 | / macro_parameter 58 | / superscript 59 | / subscript 60 | / ignore 61 | / number 62 | / whitespace 63 | / punctuation 64 | / x:(!(nonchar_token / "," / "]") x:. { return x; })+ { return x.join(""); } 65 | 66 | nonchar_token "nonchar token" 67 | = escape 68 | / "%" 69 | / begin_group 70 | / end_group 71 | / math_shift 72 | / alignment_tab 73 | / nl 74 | / macro_parameter 75 | / superscript 76 | / subscript 77 | / ignore 78 | / sp 79 | / punctuation 80 | / EOF 81 | 82 | whitespace "whitespace" 83 | = (nl sp* / sp+ nl !comment sp* !nl / sp+) { return { type: "whitespace" }; } 84 | 85 | number "number" 86 | = a:num+ "." b:num+ { return a.join("") + "." + b.join(""); } 87 | / "." b:num+ { return "." + b.join(""); } 88 | / a:num+ "." { return a.join("") + "."; } 89 | 90 | special_macro "special macro" // for the special macros like \[ \] and \begin{} \end{} etc. 91 | // \verb|xxx| and \verb*|xxx| 92 | = escape 93 | env:("verb*" / "verb") 94 | e:. 95 | x:(!(end:. & { return end == e; }) x:. { return x; })* 96 | (end:. & { return end == e; }) { 97 | return { type: "verb", env: env, escape: e, content: x.join("") }; 98 | } 99 | // verbatim environment 100 | / verbatim_environment 101 | //display math with \[...\] 102 | / begin_display_math 103 | x:(!end_display_math x:math_token { return x; })* 104 | end_display_math { return { type: "displaymath", content: x }; } 105 | //inline math with \(...\) 106 | / begin_inline_math 107 | x:(!end_inline_math x:math_token { return x; })* 108 | end_inline_math { return { type: "inlinemath", content: x }; } 109 | //display math with $$ $$ 110 | / math_shift 111 | math_shift 112 | x:(!(math_shift math_shift) x:math_token { return x; })* 113 | math_shift 114 | math_shift { return { type: "displaymath", content: x }; } 115 | / math_environment 116 | / environment 117 | 118 | verbatim_environment "verbatim environment" 119 | = begin_env 120 | begin_group 121 | env:verbatim_env_name 122 | end_group 123 | body:( 124 | !( 125 | end_env 126 | end_env:group 127 | & { return compare_env({ content: [env] }, end_env); } 128 | ) 129 | x:. { return x; } 130 | )* 131 | end_env 132 | begin_group 133 | verbatim_env_name 134 | end_group { 135 | return { 136 | type: "verbatim", 137 | env: env, 138 | content: body.join(""), 139 | }; 140 | } 141 | 142 | verbatim_env_name 143 | // standard verbatim enviroments. `verbatim*` must be listed first 144 | = "verbatim*" 145 | / "verbatim" 146 | // comment environment provided by \usepackage{verbatim} 147 | / "comment" 148 | // lstlisting environment provided by \usepackage{listings} 149 | / "lstlistings" 150 | 151 | macro "macro" 152 | = m:(escape n:char+ { return n.join(""); } / escape n:. { return n; }) { 153 | return { type: "macro", content: m }; 154 | } 155 | 156 | group "group" 157 | = begin_group x:(!end_group c:token { return c; })* end_group { 158 | return { type: "group", content: x }; 159 | } 160 | 161 | argument_list "argument list" 162 | = whitespace* "[" body:(!"]" x:("," / args_token) { return x; })* "]" { 163 | return { type: "argument", content: body, openMark: "[", closeMark: "]" }; 164 | } 165 | 166 | environment "environment" 167 | = begin_env 168 | env:group 169 | args:argument_list? 170 | env_comment:env_comment? 171 | body:( 172 | !(end_env end_env:group & { return compare_env(env, end_env); }) x:token { 173 | return x; 174 | } 175 | )* 176 | end_env 177 | group { 178 | return { 179 | type: "environment", 180 | env: env.content, 181 | args: args, 182 | content: env_comment ? [env_comment, ...body] : body, 183 | }; 184 | } 185 | 186 | math_environment "math environment" 187 | = begin_env 188 | begin_group 189 | env:math_env_name 190 | end_group 191 | env_comment:env_comment? 192 | body:( 193 | !( 194 | end_env 195 | end_env:group 196 | & { return compare_env({ content: [env] }, end_env); } 197 | ) 198 | x:math_token { return x; } 199 | )* 200 | end_env 201 | begin_group 202 | math_env_name 203 | end_group { 204 | return { 205 | type: "mathenv", 206 | env: env, 207 | content: env_comment ? [env_comment, ...body] : body, 208 | }; 209 | } 210 | 211 | math_group "math group" // group that assumes you're in math mode. If you use "\text{}" this isn't a good idea.... 212 | = begin_group x:(!end_group c:math_token { return c; })* end_group { 213 | return { type: "group", content: x }; 214 | } 215 | 216 | full_comment "full comment" // comment that detects whether it is at the end of a line or on a new line 217 | = start_of_line x:comment { 218 | return { type: "comment", content: x, sameline: false }; 219 | } 220 | / leading_sp x:comment { 221 | return { 222 | type: "comment", 223 | content: x, 224 | sameline: false, 225 | leadingWhitespace: true, 226 | }; 227 | } 228 | / sp* nl leading_sp? x:comment_and_parbreak { 229 | return { 230 | type: "comment", 231 | content: x, 232 | sameline: false, 233 | suffixParbreak: true, 234 | }; 235 | } 236 | / sp* nl leading_sp? x:comment { 237 | return { type: "comment", content: x, sameline: false }; 238 | } 239 | / x:comment_and_parbreak { 240 | return { 241 | type: "comment", 242 | content: x, 243 | sameline: true, 244 | suffixParbreak: true, 245 | }; 246 | } 247 | / x:comment { return { type: "comment", content: x, sameline: true }; } 248 | 249 | env_comment "environment comment" 250 | = sp:sp* comment:comment { 251 | return { 252 | type: "comment", 253 | content: comment, 254 | sameline: true, 255 | leadingWhitespace: sp.length > 0, 256 | }; 257 | } 258 | 259 | begin_display_math = escape "[" 260 | 261 | end_display_math = escape "]" 262 | 263 | begin_inline_math = escape "(" 264 | 265 | end_inline_math = escape ")" 266 | 267 | begin_env = escape "begin" 268 | 269 | end_env = escape "end" 270 | 271 | math_env_name 272 | = "equation*" 273 | / "equation" 274 | / "align*" 275 | / "align" 276 | / "alignat*" 277 | / "alignat" 278 | / "gather*" 279 | / "gather" 280 | / "multline*" 281 | / "multline" 282 | / "flalign*" 283 | / "flalign" 284 | / "split" 285 | / "math" 286 | / "displaymath" 287 | 288 | escape "escape" 289 | = "\\" // catcode 0 290 | 291 | begin_group 292 | = "{" // catcode 1 293 | 294 | end_group 295 | = "}" // catcode 2 296 | 297 | math_shift 298 | = "$" // catcode 3 299 | 300 | alignment_tab 301 | = "&" // catcode 4 302 | 303 | nl "newline" 304 | = !"\r" "\n" 305 | / "\r" 306 | / "\r\n" // catcode 5 (linux, os x, windows) 307 | 308 | macro_parameter 309 | = "#" // catcode 6 310 | 311 | superscript 312 | = "^" // catcode 7 313 | 314 | subscript 315 | = "_" // catcode 8 316 | 317 | ignore 318 | = "\0" // catcode 9 319 | 320 | sp "whitespace" 321 | = [ \t]+ { return " "; } // catcode 10 322 | 323 | char "letter" 324 | = c:[a-zA-Z] // catcode 11 325 | 326 | num "digit" 327 | = n:[0-9] // catcode 12 (other) 328 | 329 | punctuation "punctuation" 330 | = p:[.,;:\-\*/()!?=+<>\[\]] // catcode 12 331 | 332 | // catcode 14, including the newline 333 | comment_start = "%" 334 | 335 | comment_and_parbreak 336 | = comment_start c:(!nl c:. { return c; })* &parbreak { return c.join(""); } // parbreaks following a comment are preserved 337 | 338 | comment "comment" 339 | // A comment normally consumes the next newline and all leading whitespace. 340 | // The exception is if the next line consists solely of a comment. In that case, 341 | // consume the newline but leave the whitespace (`full_comment` will eat the 342 | // leading whitspace) 343 | = comment_start 344 | c:(!nl c:. { return c; })* 345 | (nl sp* !comment_start / nl / EOF) { return c.join(""); } // if a comment is not followed by a parbreak, the newline is consumed 346 | 347 | // Whitespace at the start of a line only 348 | leading_sp = start_of_line sp+ { return " "; } 349 | 350 | start_of_line 351 | = & { 352 | var loc = location(); 353 | return loc.start.column === 1; 354 | } 355 | 356 | EOF = !. 357 | -------------------------------------------------------------------------------- /src/tests/grammars/pegjs-modified.pegjs: -------------------------------------------------------------------------------- 1 | // PEG.js Grammar 2 | // ============== 3 | // 4 | // PEG.js grammar syntax is designed to be simple, expressive, and similar to 5 | // JavaScript where possible. This means that many rules, especially in the 6 | // lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some 7 | // are directly taken or adapted from the JavaScript example grammar (see 8 | // examples/javascript.pegjs). 9 | // 10 | // Limitations: 11 | // 12 | // * Non-BMP characters are completely ignored to avoid surrogate pair 13 | // handling. 14 | // 15 | // * One can create identifiers containing illegal characters using Unicode 16 | // escape sequences. For example, "abcd\u0020efgh" is not a valid 17 | // identifier, but it is accepted by the parser. 18 | // 19 | // Both limitations could be resolved, but the costs would likely outweigh 20 | // the benefits. 21 | // 22 | // [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm 23 | 24 | { 25 | // Used as a shorthand property name for `LabeledExpression` 26 | const pick = true; 27 | 28 | // Used by `LabelIdentifier` to disallow the use of certain words as labels 29 | const RESERVED_WORDS = {}; 30 | 31 | // Populate `RESERVED_WORDS` using the optional option `reservedWords` 32 | const reservedWords = options.reservedWords || ["if", "in"]; 33 | if (Array.isArray(reservedWords)) 34 | reservedWords.forEach((word) => { 35 | RESERVED_WORDS[word] = true; 36 | }); 37 | 38 | // Helper to construct a new AST Node 39 | function createNode(type, details) { 40 | const ret = { 41 | type, 42 | loc: location(), 43 | }; 44 | if (details != null) { 45 | Object.assign(ret, details); 46 | } 47 | return ret; 48 | } 49 | 50 | // Used by `addComment` to store comments for the Grammar AST 51 | const comments = []; 52 | options.extractComments = true; 53 | 54 | // Helper that collects all the comments to pass to the Grammar AST 55 | function addComment(text, multiline) { 56 | //console.log("found comment", text) 57 | if (options.extractComments) { 58 | const loc = location(); 59 | 60 | // If there is a node already stored with our starting location, 61 | // we are being processed a second time, so just it 62 | if (comments.find((c) => c.loc.start.offset === loc.start.offset)) { 63 | return text; 64 | } 65 | 66 | comments.push({ 67 | type: "comment", 68 | value: text, 69 | multiline: multiline, 70 | loc, 71 | }); 72 | } 73 | 74 | return text; 75 | } 76 | console.clear(); 77 | } 78 | 79 | // ---- Syntactic Grammar ----- 80 | 81 | Grammar 82 | = __ 83 | initializer:(a:Initializer { return a; })? 84 | __? 85 | rules:(a:Rule __ { return a; })+ { 86 | return createNode("grammar", { 87 | initializer, 88 | rules, 89 | comments, 90 | location: location(), 91 | }); 92 | } 93 | 94 | Initializer 95 | = code:(code:CodeBlock { return createNode("initializer", { code }); }) EOS { 96 | return code; 97 | } 98 | 99 | //Rule 100 | // = name:Identifier __ displayName:(a:(a:StringLiteral {return createNode("stringliteral", {value: a})}) __ {return a})? 101 | // delimiter:("=" {return createNode("delimiter", {value: "="})}) __ expression:Expression EOS { 102 | // return createNode( "rule", { name, displayName, expression, delimiter } ); 103 | // } 104 | Rule 105 | = name:Identifier 106 | __ 107 | displayName:( 108 | a:(a:StringLiteral { return createNode("stringliteral", { value: a }); }) 109 | __ { return a; } 110 | )? 111 | delimiter:("=" { return createNode("delimiter", { value: "=" }); }) 112 | __ 113 | expression:Expression 114 | EOS { 115 | return createNode("rule", { 116 | name, 117 | displayName, 118 | expression, 119 | delimiter, 120 | }); 121 | } 122 | 123 | Expression = ChoiceExpression 124 | 125 | ChoiceExpression 126 | = head:ActionExpression 127 | tail:( 128 | __ 129 | delimiter:("/" { return createNode("delimiter", { value: "/" }); }) 130 | __ 131 | a:ActionExpression { return [a, delimiter]; } 132 | )* { 133 | if (tail.length === 0) { 134 | return head; 135 | } 136 | 137 | return createNode("choice", { 138 | alternatives: [head].concat(tail.map((x) => x[0])), 139 | delimiters: tail.map((x) => x[1]), 140 | }); 141 | } 142 | 143 | ActionExpression 144 | = expression:SequenceExpression code:(__ a:CodeBlock { return a; })? { 145 | if (code === null) return expression; 146 | 147 | return createNode("action", { expression, code }); 148 | } 149 | 150 | SequenceExpression 151 | = head:LabeledExpression tail:(__ a:LabeledExpression { return a; })* { 152 | let elements = [head]; 153 | 154 | if (tail.length === 0) { 155 | if (head.type !== "labeled" || !head.pick) return head; 156 | } else { 157 | elements = elements.concat(tail); 158 | } 159 | 160 | return createNode("sequence", { elements }); 161 | } 162 | 163 | LabeledExpression 164 | = "@" label:LabelIdentifier? __ expression:PrefixedExpression { 165 | return createNode("labeled", { pick, label, expression }); 166 | } 167 | / label:LabelIdentifier __ expression:PrefixedExpression { 168 | return createNode("labeled", { label, expression }); 169 | } 170 | / PrefixedExpression 171 | 172 | LabelIdentifier 173 | = name:Identifier __ ":" { 174 | if (RESERVED_WORDS[name] !== true) return name; 175 | 176 | error(`Label can't be a reserved word "${name}".`, location()); 177 | } 178 | 179 | PrefixedExpression 180 | = operator:PrefixedOperator __ expression:SuffixedExpression { 181 | return createNode(operator, { expression }); 182 | } 183 | / SuffixedExpression 184 | 185 | PrefixedOperator 186 | = "$" { return "text"; } 187 | / "&" { return "simple_and"; } 188 | / "!" { return "simple_not"; } 189 | 190 | SuffixedExpression 191 | = expression:PrimaryExpression __ operator:SuffixedOperator { 192 | return createNode(operator, { expression }); 193 | } 194 | / PrimaryExpression 195 | 196 | SuffixedOperator 197 | = "?" { return "optional"; } 198 | / "*" { return "zero_or_more"; } 199 | / "+" { return "one_or_more"; } 200 | 201 | PrimaryExpression 202 | = LiteralMatcher 203 | / CharacterClassMatcher 204 | / AnyMatcher 205 | / RuleReferenceExpression 206 | / SemanticPredicateExpression 207 | / "(" __ e:Expression __ ")" { 208 | // The purpose of the "group" AST node is just to isolate label scope. We 209 | // don't need to put it around nodes that can't contain any labels or 210 | // nodes that already isolate label scope themselves. 211 | if (e.type !== "labeled" && e.type !== "sequence") return e; 212 | 213 | // This leaves us with "labeled" and "sequence". 214 | return createNode("group", { expression: e }); 215 | } 216 | 217 | RuleReferenceExpression 218 | = name:Identifier !(__ (StringLiteral __)? "=") { 219 | return createNode("rule_ref", { name }); 220 | } 221 | 222 | SemanticPredicateExpression 223 | = operator:SemanticPredicateOperator __ code:CodeBlock { 224 | return createNode(operator, { code }); 225 | } 226 | 227 | SemanticPredicateOperator 228 | = "&" { return "semantic_and"; } 229 | / "!" { return "semantic_not"; } 230 | 231 | // ---- Lexical Grammar ----- 232 | 233 | SourceCharacter = . 234 | 235 | WhiteSpace "whitespace" 236 | = "\t" 237 | / "\v" 238 | / "\f" 239 | / " " 240 | / "\u00A0" 241 | / "\uFEFF" 242 | / Zs 243 | 244 | LineTerminator = [\n\r\u2028\u2029] 245 | 246 | LineTerminatorSequence "end of line" 247 | = "\n" 248 | / "\r\n" 249 | / "\r" 250 | / "\u2028" 251 | / "\u2029" 252 | 253 | Comment "comment" 254 | = a:MultiLineComment { 255 | return createNode("comment", { block: true, text: a }); 256 | } 257 | / a:SingleLineComment { 258 | return createNode("comment", { block: true, text: a }); 259 | } 260 | 261 | MultiLineComment 262 | = "/*" comment:$(!"*/" SourceCharacter)* "*/" { 263 | return addComment(comment, true); 264 | } 265 | 266 | MultiLineCommentNoLineTerminator 267 | = "/*" comment:$(!("*/" / LineTerminator) SourceCharacter)* "*/" { 268 | return addComment(comment, true); 269 | } 270 | 271 | SingleLineComment 272 | = "//" comment:$(!LineTerminator SourceCharacter)* { 273 | return addComment(comment, false); 274 | } 275 | 276 | Identifier "identifier" 277 | = head:IdentifierStart tail:IdentifierPart* { return head + tail.join(""); } 278 | 279 | IdentifierStart 280 | = UnicodeLetter 281 | / "$" 282 | / "_" 283 | / "\\" a:UnicodeEscapeSequence { return a; } 284 | 285 | IdentifierPart 286 | = IdentifierStart 287 | / UnicodeCombiningMark 288 | / UnicodeDigit 289 | / UnicodeConnectorPunctuation 290 | / "\u200C" 291 | / "\u200D" 292 | 293 | UnicodeLetter 294 | = Lu 295 | / Ll 296 | / Lt 297 | / Lm 298 | / Lo 299 | / Nl 300 | 301 | UnicodeCombiningMark 302 | = Mn 303 | / Mc 304 | 305 | UnicodeDigit = Nd 306 | 307 | UnicodeConnectorPunctuation = Pc 308 | 309 | LiteralMatcher "literal" 310 | = value:StringLiteral ignoreCase:"i"? { 311 | return createNode("literal", { 312 | value: value, 313 | ignoreCase: ignoreCase !== null, 314 | }); 315 | } 316 | 317 | StringLiteral "string" 318 | = "\"" chars:DoubleStringCharacter* "\"" { return chars.join(""); } 319 | / "'" chars:SingleStringCharacter* "'" { return chars.join(""); } 320 | 321 | DoubleStringCharacter 322 | = !("\"" / "\\" / LineTerminator) a:SourceCharacter { return a; } 323 | / "\\" a:EscapeSequence { return "\\" + a; } 324 | / LineContinuation 325 | 326 | SingleStringCharacter 327 | = !("'" / "\\" / LineTerminator) a:SourceCharacter { return a; } 328 | / "\\" a:EscapeSequence { return "\\" + a; } 329 | / LineContinuation 330 | 331 | CharacterClassMatcher "character class" 332 | = "[" inverted:"^"? parts:CharacterPart* "]" ignoreCase:"i"? { 333 | return createNode("class", { 334 | parts: parts.filter((part) => part !== ""), 335 | inverted: inverted !== null, 336 | ignoreCase: ignoreCase !== null, 337 | }); 338 | } 339 | 340 | CharacterPart 341 | = ClassCharacterRange 342 | / ClassCharacter 343 | 344 | ClassCharacterRange 345 | = begin:ClassCharacter "-" end:ClassCharacter { 346 | if (begin.charCodeAt(0) > end.charCodeAt(0)) 347 | error("Invalid character range: " + text() + "."); 348 | 349 | return [begin, end]; 350 | } 351 | 352 | ClassCharacter 353 | = !("]" / "\\" / LineTerminator) a:SourceCharacter { return a; } 354 | // This line needs to be changed from the actual `pegjs.pegjs` file 355 | // to produce consistent parser output. See https://github.com/pegjs/pegjs/issues/650 356 | / "\\" a:EscapeSequence { return /*SPECAIL COMMENT*/ "\\" + a; } 357 | / LineContinuation 358 | 359 | LineContinuation = "\\" LineTerminatorSequence { return ""; } 360 | 361 | EscapeSequence 362 | = CharacterEscapeSequence 363 | / "0" !DecimalDigit { return "0"; } 364 | / HexEscapeSequence 365 | / UnicodeEscapeSequence 366 | 367 | CharacterEscapeSequence 368 | = SingleEscapeCharacter 369 | / NonEscapeCharacter 370 | 371 | SingleEscapeCharacter 372 | = "'" 373 | / "\"" 374 | / "\\" 375 | / "b" 376 | / "f" 377 | / "n" 378 | / "r" 379 | / "t" 380 | / "v" 381 | 382 | NonEscapeCharacter 383 | = !(EscapeCharacter / LineTerminator) a:SourceCharacter { return a; } 384 | 385 | EscapeCharacter 386 | = SingleEscapeCharacter 387 | / DecimalDigit 388 | / "x" 389 | / "u" 390 | 391 | HexEscapeSequence = "x" digits:$(HexDigit HexDigit) { return "x" + digits; } 392 | 393 | UnicodeEscapeSequence 394 | = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) { return "u" + digits; } 395 | 396 | DecimalDigit = [0-9] 397 | 398 | HexDigit = [0-9a-f]i 399 | 400 | AnyMatcher = "." { return createNode("any"); } 401 | 402 | CodeBlock "code block" 403 | = "{" a:Code "}" { return a; } 404 | / "{" { error("Unbalanced brace."); } 405 | 406 | Code = $((![{}] SourceCharacter)+ / "{" Code "}")* 407 | 408 | // Unicode Character Categories 409 | // 410 | // Extracted from the following Unicode Character Database file: 411 | // 412 | // http://www.unicode.org/Public/11.0.0/ucd/extracted/DerivedGeneralCategory.txt 413 | // 414 | // Unix magic used: 415 | // 416 | // grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters 417 | // cut -f1 -d " " | # Extract code points 418 | // grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters 419 | // sed -e 's/\.\./-/' | # Adjust formatting 420 | // sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting 421 | // tr -d '\n' # Join lines 422 | // 423 | // ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one 424 | // at the time of writing. 425 | // 426 | // Non-BMP characters are completely ignored to avoid surrogate pair handling 427 | // (detecting surrogate pairs isn't possible with a simple character class and 428 | // other methods would degrade performance). I don't consider it a big deal as 429 | // even parsers in JavaScript engines of common browsers seem to ignore them. 430 | 431 | // Letter, Lowercase 432 | Ll 433 | = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] 434 | 435 | // Letter, Modifier 436 | Lm 437 | = [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uFF70\uFF9E-\uFF9F] 438 | 439 | // Letter, Other 440 | Lo 441 | = [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05EF-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u0860-\u086A\u08A0-\u08B4\u08B6-\u08BD\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u09FC\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FEF\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC] 442 | 443 | // Letter, Titlecase 444 | Lt 445 | = [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC] 446 | 447 | // Letter, Uppercase 448 | Lu 449 | = [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uFF21-\uFF3A] 450 | 451 | // Mark, Spacing Combining 452 | Mc 453 | = [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF2-\u1CF3\u1CF7\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BD-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC] 454 | 455 | // Mark, Nonspacing 456 | Mn 457 | = [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D3-\u08E1\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u09FE\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0AFA-\u0AFF\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C04\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D00-\u0D01\u0D3B-\u0D3C\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u1885-\u1886\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DF9\u1DFB-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA8C4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F] 458 | 459 | // Number, Decimal Digit 460 | Nd 461 | = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19] 462 | 463 | // Number, Letter 464 | Nl 465 | = [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF] 466 | 467 | // Punctuation, Connector 468 | Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] 469 | 470 | // Separator, Space 471 | Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] 472 | 473 | // Skipped 474 | 475 | __ = (WhiteSpace / LineTerminatorSequence / Comment)* 476 | 477 | _ = (WhiteSpace / MultiLineCommentNoLineTerminator)* 478 | 479 | // Automatic Semicolon Insertion 480 | 481 | EOS 482 | = __ ";" 483 | / _ SingleLineComment? LineTerminatorSequence 484 | / __ EOF 485 | 486 | EOF = !. 487 | -------------------------------------------------------------------------------- /src/tests/parser.test.js: -------------------------------------------------------------------------------- 1 | import util from "util"; 2 | 3 | import * as pegjsParser from "../libs/parser"; 4 | 5 | /* eslint-env jest */ 6 | 7 | // Make console.log pretty-print by default 8 | export const origLog = console.log; 9 | console.log = (...args) => { 10 | origLog(...args.map((x) => util.inspect(x, false, 10, true))); 11 | }; 12 | 13 | describe("Basic parse", () => { 14 | it("Parses trivial grammar", () => { 15 | pegjsParser.parse('a = "a"'); 16 | pegjsParser.parse('a \n = "a"'); 17 | pegjsParser.parse('a = \n"a"'); 18 | pegjsParser.parse('a = "a" / "b"'); 19 | }); 20 | 21 | it("Parses simple grammars from pegjs test suite", () => { 22 | const grammars = [ 23 | "start = (a:'a') &{ return a === 'a'; }", 24 | "start = (a:'a')? &{ return a === 'a'; }", 25 | "start = (a:'a')* &{ return a === 'a'; }", 26 | "start = (a:'a')+ &{ return a === 'a'; }", 27 | "start = $(a:'a') &{ return a === 'a'; }", 28 | "start = &(a:'a') 'a' &{ return a === 'a'; }", 29 | "start = !(a:'a') 'b' &{ return a === 'a'; }", 30 | "start = b:(a:'a') &{ return a === 'a'; }", 31 | "start = ('a' b:'b' 'c') &{ return b === 'b'; }", 32 | "start = (a:'a' { return a; }) &{ return a === 'a'; }", 33 | "start = ('a' / b:'b' / 'c') &{ return b === 'b'; }", 34 | "start = (a:'a') !{ return a !== 'a'; }", 35 | "start = (a:'a')? !{ return a !== 'a'; }", 36 | "start = (a:'a')* !{ return a !== 'a'; }", 37 | "start = (a:'a')+ !{ return a !== 'a'; }", 38 | "start = $(a:'a') !{ return a !== 'a'; }", 39 | "start = &(a:'a') 'a' !{ return a !== 'a'; }", 40 | "start = !(a:'a') 'b' !{ return a !== 'a'; }", 41 | "start = b:(a:'a') !{ return a !== 'a'; }", 42 | "start = ('a' b:'b' 'c') !{ return b !== 'b'; }", 43 | "start = (a:'a' { return a; }) !{ return a !== 'a'; }", 44 | "start = ('a' / b:'b' / 'c') !{ return b !== 'b'; }", 45 | "start = (a:'a') { return a; }", 46 | "start = (a:'a')? { return a; }", 47 | "start = (a:'a')* { return a; }", 48 | "start = (a:'a')+ { return a; }", 49 | "start = $(a:'a') { return a; }", 50 | "start = &(a:'a') 'a' { return a; }", 51 | "start = !(a:'a') 'b' { return a; }", 52 | "start = b:(a:'a') { return a; }", 53 | "start = ('a' b:'b' 'c') { return b; }", 54 | "start = (a:'a' { return a; }) { return a; }", 55 | "start = ('a' / b:'b' / 'c') { return b; }", 56 | // Typescript action 57 | "start = ('a' / b:'b' / 'c') { return b as string; }", 58 | ]; 59 | 60 | for (const grammar of grammars) { 61 | pegjsParser.parse(grammar); 62 | } 63 | }); 64 | }); 65 | -------------------------------------------------------------------------------- /src/tests/printer.test.js: -------------------------------------------------------------------------------- 1 | import util from "util"; 2 | 3 | import { printPrettier } from "../standalone"; 4 | 5 | /* eslint-env jest */ 6 | 7 | // Make console.log pretty-print by default 8 | export const origLog = console.log; 9 | console.log = (...args) => { 10 | origLog(...args.map((x) => util.inspect(x, false, 10, true))); 11 | }; 12 | 13 | describe("Printer", () => { 14 | it("Prints grammars without actions", async () => { 15 | const sources = [ 16 | "Rule = a/b/c", 17 | "Rule = a", 18 | "Rule = [a-zA-Z]", 19 | "Rule = a *", 20 | "Rule = (a/b)?", 21 | "Rule = (a/b/c) ?", 22 | "Rule = (a/(b/c)+) ?", 23 | "Rule = $(a/(b/c)+) ?", 24 | "Rule = $(a/(b/c)+) ?\n OtherRule= Rule & 'q'", 25 | ]; 26 | 27 | for (const src of sources) { 28 | const formatted = await printPrettier(src, { printWidth: 80 }); 29 | expect(formatted).toMatchSnapshot(); 30 | } 31 | }); 32 | it("Prints grammars with actions", async () => { 33 | const sources = [ 34 | "start = (a:'a') &{ return a === 'a'; }", 35 | "start = (a:'a')? &{ return a === 'a'; }", 36 | "start = (a:'a')* &{ return a === 'a'; }", 37 | "start = (a:'a')+ &{ return a === 'a'; }", 38 | "start = $(a:'a') &{ return a === 'a'; }", 39 | "start = &(a:'a') 'a' &{ return a === 'a'; }", 40 | "start = !(a:'a') 'b' &{ return a === 'a'; }", 41 | "start = b:(a:'a') &{ return a === 'a'; }", 42 | "start = ('a' b:'b' 'c') &{ return b === 'b'; }", 43 | "start = (a:'a' { return a; }) &{ return a === 'a'; }", 44 | "start = ('a' / b:'b' / 'c') &{ return b === 'b'; }", 45 | "start = (a:'a') !{ return a !== 'a'; }", 46 | "start = (a:'a')? !{ return a !== 'a'; }", 47 | "start = (a:'a')* !{ return a !== 'a'; }", 48 | "start = (a:'a')+ !{ return a !== 'a'; }", 49 | "start = $(a:'a') !{ return a !== 'a'; }", 50 | "start = &(a:'a') 'a' !{ return a !== 'a'; }", 51 | "start = !(a:'a') 'b' !{ return a !== 'a'; }", 52 | "start = b:(a:'a') !{ return a !== 'a'; }", 53 | "start = ('a' b:'b' 'c') !{ return b !== 'b'; }", 54 | "start = (a:'a' { return a; }) !{ return a !== 'a'; }", 55 | "start = ('a' / b:'b' / 'c') !{ return b !== 'b'; }", 56 | "start = (a:'a') { return a; }", 57 | "start = (a:'a')? { return a; }", 58 | "start = (a:'a')* { return a; }", 59 | "start = (a:'a')+ { return a; }", 60 | "start = $(a:'a') { return a; }", 61 | "start = &(a:'a') 'a' { return a; }", 62 | "start = !(a:'a') 'b' { return a; }", 63 | "start = b:(a:'a') { return a; }", 64 | "start = ('a' b:'b' 'c') { return b; }", 65 | "start = (a:'a' { return a; }) { return a; }", 66 | "start = ('a' / b:'b' / 'c') { return b; }", 67 | ]; 68 | 69 | for (const src of sources) { 70 | const formatted = await printPrettier(src, { printWidth: 80 }); 71 | expect(formatted).toMatchSnapshot(); 72 | } 73 | }); 74 | it("Prints grammars with initializer", async () => { 75 | const sources = [ 76 | "{console.log('initializing')}; Rule = a/b/c", 77 | "{console.log('initializing')}\n\n Rule = a/b/c", 78 | "{{console.log('initializing global')}}\n\n Rule = a/b/c", 79 | "{{console.log('initializing global')}}; Rule = a/b/c", 80 | "{{console.log('initializing global')}}\n\n {console.log('initializing local')}\n\n Rule = a/b/c", 81 | "{{console.log('initializing global')}}; {console.log('initializing local')}; Rule = a/b/c", 82 | ]; 83 | 84 | for (const src of sources) { 85 | const formatted = await printPrettier(src, { printWidth: 80 }); 86 | expect(formatted).toMatchSnapshot(); 87 | } 88 | }); 89 | it("Prints grammars with comments", async () => { 90 | const sources = [ 91 | "start = // a comment\n a / b", 92 | "start = a // a comment\n / b", 93 | "start = a \n// a comment\n / b", 94 | "start = a / \n// a comment\n b", 95 | "// a comment\n start = a / b", 96 | "start /*inline comment*/= a / b", 97 | "start = a / /*inline comment*/ b", 98 | "start = a / x /*inline comment*/ b", 99 | 'start /*inline comment*/ "Start Label"= a / b', 100 | ]; 101 | 102 | for (const src of sources) { 103 | const formatted = await printPrettier(src, { printWidth: 80 }); 104 | expect(formatted).toMatchSnapshot(); 105 | } 106 | }); 107 | it("Issue 18 nested optional concat", async () => { 108 | const sources = ['start = ($"x"+)?', 'start = $("x"+)?']; 109 | 110 | for (const src of sources) { 111 | const formatted = await printPrettier(src, { printWidth: 80 }); 112 | expect(formatted).toMatchSnapshot(); 113 | } 114 | }); 115 | it("Print typescript action", async () => { 116 | const sources = ["start = 'a' {return 5 as any}"]; 117 | 118 | for (const src of sources) { 119 | const formatted = await printPrettier(src, { printWidth: 80 }); 120 | expect(formatted).toMatchSnapshot(); 121 | } 122 | }); 123 | it("Print repetition rules", async () => { 124 | const sources = [ 125 | "start = 'a'| .. |", 126 | "start = 'a'|2 ..|", 127 | "start = 'a'|2 .. 5|", 128 | "start = 'a'| .. 5|", 129 | "start = 'a'|2 .. 5,'x'|", 130 | "start = 'a'| 5 |", 131 | "start = x:'a'| {return parseInt(x , 10)} |", 132 | "start = x:'a'| {return parseInt(x , 10)} .. 7 |", 133 | ]; 134 | 135 | for (const src of sources) { 136 | const formatted = await printPrettier(src, { printWidth: 80 }); 137 | expect(formatted).toMatchSnapshot(); 138 | } 139 | }); 140 | it("Print repetition with suffix operator", async () => { 141 | const sources = [ 142 | "start = ('a'| .. |)?", 143 | "start = ('a'| .. |)|..|", 144 | "start = ('a' 'b')| .. |", 145 | ]; 146 | 147 | for (const src of sources) { 148 | const formatted = await printPrettier(src, { printWidth: 80 }); 149 | expect(formatted).toMatchSnapshot(); 150 | } 151 | }); 152 | }); 153 | -------------------------------------------------------------------------------- /src/types/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./peggy-parser"; 2 | -------------------------------------------------------------------------------- /src/types/peggy-parser.ts: -------------------------------------------------------------------------------- 1 | interface Node { 2 | type: T; 3 | loc: Location; 4 | } 5 | 6 | export type AstNode = 7 | | Grammar 8 | | Comment 9 | | Initializer 10 | | GInitializer 11 | | Rule 12 | | StringLiteral 13 | | Literal 14 | | Delimiter 15 | | Expression 16 | | Variable 17 | | Constant 18 | | FunctionNode; 19 | 20 | export type Location = { 21 | source?: string; 22 | start: { offset: number; line: number; column: number }; 23 | end: { offset: number; line: number; column: number }; 24 | }; 25 | 26 | export interface Grammar extends Node<"grammar"> { 27 | ginitializer: Initializer | undefined; 28 | initializer: GInitializer | undefined; 29 | rules: Rule[]; 30 | comments: Comment[]; 31 | } 32 | 33 | export interface Comment extends Node<"comment"> { 34 | value: string; 35 | multiline: boolean; 36 | } 37 | 38 | export interface Initializer extends Node<"initializer"> { 39 | code: string; 40 | } 41 | export interface GInitializer extends Node<"ginitializer"> { 42 | code: string; 43 | } 44 | 45 | export interface Rule extends Node<"rule"> { 46 | name: Identifier; 47 | displayName: StringLiteral; 48 | delimiter: Delimiter; 49 | expression: Expression; 50 | } 51 | 52 | type Identifier = string; 53 | 54 | export interface StringLiteral extends Node<"stringliteral"> { 55 | value: string; 56 | } 57 | export interface Literal extends Node<"literal"> { 58 | value: string; 59 | ignoreCase: boolean; 60 | } 61 | export interface Delimiter extends Node<"delimiter"> { 62 | value: "=" | "/"; 63 | } 64 | 65 | export type Expression = 66 | | ChoiceExpression 67 | | ActionExpression 68 | | SequenceExpression 69 | | LabeledExpression 70 | | PrefixedExpression 71 | | SuffixedExpression 72 | | PrimaryExpression; 73 | 74 | export interface ChoiceExpression extends Node<"choice"> { 75 | alternatives: ActionExpression[]; 76 | delimiters: Delimiter[]; 77 | } 78 | 79 | export interface LabeledExpression extends Node<"labeled"> { 80 | label: Identifier; 81 | expression: Expression; 82 | pick?: boolean; 83 | } 84 | 85 | export interface PrefixedExpression 86 | extends Node<"text" | "simple_and" | "simple_not"> { 87 | expression: Expression; 88 | } 89 | export interface SuffixedExpression 90 | extends Node<"optional" | "zero_or_more" | "one_or_more"> { 91 | expression: Expression; 92 | } 93 | 94 | type PrimaryExpression = 95 | | Literal 96 | | CharacterClass 97 | | Node<"any"> 98 | | RuleReference 99 | | SemanticPredicate 100 | | Group 101 | | RepeatedExpression; 102 | 103 | export interface RepeatedExpression extends Node<"repeated"> { 104 | min: Constant | Variable | FunctionNode | null; 105 | max: Constant | Variable | FunctionNode | null; 106 | expression: PrimaryExpression; 107 | delimiter: Expression | null; 108 | } 109 | 110 | export interface CharacterClass extends Node<"class"> { 111 | parts: string[]; 112 | inverted: boolean; 113 | ignoreCase: boolean; 114 | } 115 | 116 | export interface RuleReference extends Node<"rule_ref"> { 117 | name: Identifier; 118 | } 119 | 120 | export interface SemanticPredicate 121 | extends Node<"semantic_and" | "semantic_not"> { 122 | code: string; 123 | } 124 | 125 | export interface Group extends Node<"group"> { 126 | expression: Expression; 127 | } 128 | 129 | export interface ActionExpression extends Node<"action"> { 130 | expression: SequenceExpression | Expression; 131 | code: string; 132 | } 133 | 134 | export interface SequenceExpression extends Node<"sequence"> { 135 | elements: Expression[]; 136 | } 137 | 138 | export interface Variable extends Node<"variable"> { 139 | value: Identifier; 140 | } 141 | export interface Constant extends Node<"constant"> { 142 | value: number | null; 143 | } 144 | export interface FunctionNode extends Node<"function"> { 145 | value: string; 146 | } 147 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": ["**/*.test.ts", "**/*.stub.ts", "node_modules", "**/tests/"], 3 | // Having more success with listing entry points explicitly rather than specifying a directory 4 | // "include": ["src"], 5 | "files": ["./src/standalone.ts", "./src/prettier-plugin-pegjs.ts"], 6 | "compilerOptions": { 7 | // We will use esbuild to make the actual js files. 8 | // We don't use tsc because it will not map imports for us so if we write 9 | // `import foo from "./bar"` it does not get translated to `import foo from "./bar.js"` 10 | // which means it cannot be imported via node. 11 | "emitDeclarationOnly": true, 12 | "declaration": true, 13 | "outDir": "./dist", 14 | "rootDir": "./src", 15 | "noImplicitAny": true, 16 | "strictNullChecks": true, 17 | "module": "ES2020", 18 | "target": "ES2020", 19 | "jsx": "react", 20 | "allowJs": true, 21 | "allowSyntheticDefaultImports": true, 22 | "esModuleInterop": true, 23 | "strict": true, 24 | "moduleResolution": "node", 25 | "resolveJsonModule": true, 26 | "isolatedModules": true 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { PluginOption } from "vite"; 2 | import { defineConfig } from "vitest/config"; 3 | import peggy from "peggy"; 4 | import vts from "vite-plugin-dts" 5 | 6 | export default defineConfig({ 7 | plugins: [peggyTransformer(), vts()], 8 | build: { 9 | // We clear the outDir before building in the `npm run build` step 10 | // and then populate it with typescript files, so avoid clearing here. 11 | emptyOutDir: false, 12 | outDir: "./dist", 13 | lib: { 14 | entry: { 15 | "prettier-plugin-pegjs": "./src/prettier-plugin-pegjs.ts", 16 | standalone: "./src/standalone.ts", 17 | }, 18 | formats: ["es", "cjs"], 19 | }, 20 | rollupOptions: { 21 | output: { 22 | exports: "named", 23 | manualChunks: {}, 24 | }, 25 | }, 26 | sourcemap: true, 27 | }, 28 | 29 | test: { 30 | globals: true, 31 | }, 32 | }); 33 | 34 | function peggyTransformer(): PluginOption { 35 | return { 36 | name: "rollup-plugin-peggy", 37 | transform(code, id, options) { 38 | if (!id.match(/\.(peggy|pegjs)$/)) { 39 | return; 40 | } 41 | const parserSource = peggy.generate(code, { 42 | output: "source", 43 | format: "es", 44 | }); 45 | return { code: parserSource }; 46 | }, 47 | }; 48 | } 49 | --------------------------------------------------------------------------------