├── .gitignore ├── .idea └── .gitignore ├── .npmignore ├── LICENSE ├── README.md ├── __tests__ ├── __snapshots__ │ └── language_parsing_test.ts.snap ├── broken_langage_test.ts ├── data │ ├── broken_language.ts │ ├── json_language │ ├── json_sample.json │ ├── sample_language.ts │ └── tmp │ │ └── .gitkeep ├── json_test.ts ├── language_parsing_test.ts ├── lexer │ ├── __snapshots__ │ │ ├── controller_test.ts.snap │ │ └── lexer_test.ts.snap │ ├── controller_test.ts │ └── lexer_test.ts ├── parser │ └── parser_test.ts ├── parsergenerator │ ├── closureitem_test.ts │ ├── closureset_test.ts │ ├── firstset_test.ts │ ├── nullableset_test.ts │ ├── symboldiscriminator_test.ts │ └── syntaxdb_test.ts ├── precompiler │ └── precompiler_test.ts └── tsconfig.json ├── gulpfile.js ├── json_language ├── language ├── package-lock.json ├── package.json ├── src ├── def │ ├── index.ts │ ├── language.ts │ ├── parsingtable.ts │ └── token.ts ├── index.ts ├── lexer │ ├── index.ts │ ├── lexcontroller.ts │ └── lexer.ts ├── parser │ ├── ast.ts │ ├── factory.ts │ ├── index.ts │ └── parser.ts ├── parsergenerator │ ├── closureitem.ts │ ├── closureset.ts │ ├── dfagenerator.ts │ ├── firstset.ts │ ├── grammardb.ts │ ├── index.ts │ ├── nullableset.ts │ ├── parsergenerator.ts │ └── symboldiscriminator.ts ├── precompiler │ ├── index.ts │ ├── precompiler.ts │ └── ruleparser.ts └── sample.ts ├── tsconfig.json └── tslint.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | coverage/ 4 | docs/ 5 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | .idea/ 3 | node_modules/ 4 | __tests__/ 5 | coverage/ 6 | docs/ 7 | .gitignore 8 | .npmignore 9 | gulpfile.js 10 | package-lock.json 11 | tsconfig.json 12 | tslint.json 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Tatamo 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LavRia 2 | TypeScript LALR(1) Parser Generator 3 | 4 | ## Installation 5 | ``` 6 | $ mkdir pg 7 | $ cd pg 8 | $ npm init 9 | $ npm install lavriapg --save 10 | ``` 11 | 12 | ### Examples 13 | ``` 14 | $ echo 1+1 | node ./node_modules/lavriapg/dist/sample.js 15 | ``` 16 | 17 | [language](/language) and [json_language](/json_language) are sample language definitions. 18 | `language_parser` parses the language definition file to generate a parser. 19 | 20 | Run the following code to see how it works: 21 | ```TypeScript 22 | // TypeScript 23 | import {language_parser, ParserGenerator} from "lavriapg"; 24 | import {readFileSync} from "fs"; 25 | 26 | const input = `{ 27 | "foo": 123.45, 28 | "bar": [ true, false, null ], 29 | "baz": { 30 | "nested": "hello" 31 | }, 32 | "x": "/1{}" 33 | }`; 34 | 35 | const language = language_parser.parse(readFileSync("./node_modules/lavriapg/json_language", "utf8") as string); 36 | const parser = new ParserGenerator(language).getParser(); 37 | console.log(JSON.stringify(parser.parse(input), undefined, 2)); 38 | ``` 39 | 40 | ```JavaScript 41 | // JavaScript (CommonJS) 42 | const pg = require("lavriapg"); 43 | const fs = require("fs"); 44 | const input = `{ 45 | "foo": 123.45, 46 | "bar": [ true, false, null ], 47 | "baz": { 48 | "nested": "hello" 49 | }, 50 | "x": "/1{}" 51 | }`; 52 | const language = pg.language_parser.parse(fs.readFileSync("./node_modules/lavriapg/json_language", "utf8")); 53 | const parser = new pg.ParserGenerator(language).getParser(); 54 | console.log(JSON.stringify(parser.parse(input), undefined, 2)); 55 | ``` 56 | 57 | ## Usage 58 | [language](/language) is its own language definition: 59 | ```TypeScript 60 | // TypeScript 61 | const input = readFileSync("./node_modules/lavriapg/language", "utf8") as string; 62 | const language = language_parser.parse(input); 63 | 64 | const replacer = (key: string, value: any) => { 65 | if (typeof value === "function") return value.toString(); 66 | if (value instanceof RegExp) return value.toString(); 67 | return value; 68 | }; 69 | 70 | console.log(JSON.stringify(language, replacer, 2)); 71 | 72 | const parser = new ParserGenerator(language).getParser(); 73 | console.log(JSON.stringify(parser.parse(input), replacer, 2)); 74 | ``` 75 | 76 | `language_parser` parses [language](/language) to generate a language definition. 77 | The parser generated from that definition behaves just like `language_parser`. 78 | -------------------------------------------------------------------------------- /__tests__/__snapshots__/language_parsing_test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`syntax functions test #extend 1`] = ` 4 | Array [ 5 | Object { 6 | "token": "A", 7 | "value": "a", 8 | }, 9 | Object { 10 | "token": "B", 11 | "value": "b", 12 | }, 13 | Object { 14 | "token": "C", 15 | "value": "c", 16 | }, 17 | Object { 18 | "token": Symbol(EOF), 19 | "value": "", 20 | }, 21 | ] 22 | `; 23 | 24 | exports[`syntax functions test #extend 2`] = ` 25 | Object { 26 | "grammar": Object { 27 | "rules": Array [ 28 | Object { 29 | "ltoken": "S", 30 | "pattern": Array [ 31 | "A", 32 | "B", 33 | "C", 34 | ], 35 | }, 36 | ], 37 | "start_symbol": "S", 38 | }, 39 | "lex": Object { 40 | "rules": Array [ 41 | Object { 42 | "pattern": /a/, 43 | "states": Array [ 44 | "state3", 45 | ], 46 | "token": "A", 47 | }, 48 | Object { 49 | "pattern": /b/, 50 | "states": Array [ 51 | "state2", 52 | ], 53 | "token": "B", 54 | }, 55 | Object { 56 | "pattern": /c/, 57 | "states": Array [ 58 | "default", 59 | ], 60 | "token": "C", 61 | }, 62 | ], 63 | "start_state": "state3", 64 | "states": Array [ 65 | Object { 66 | "inheritance": "default", 67 | "label": "state1", 68 | }, 69 | Object { 70 | "inheritance": "default", 71 | "label": "state2", 72 | }, 73 | Object { 74 | "inheritance": "state2", 75 | "label": "state3", 76 | }, 77 | ], 78 | }, 79 | } 80 | `; 81 | 82 | exports[`syntax functions test #start 1`] = ` 83 | Object { 84 | "grammar": Object { 85 | "rules": Array [ 86 | Object { 87 | "ltoken": "S", 88 | "pattern": Array [ 89 | "A", 90 | "B", 91 | ], 92 | }, 93 | ], 94 | "start_symbol": "S", 95 | }, 96 | "lex": Object { 97 | "rules": Array [ 98 | Object { 99 | "pattern": /a/, 100 | "states": Array [ 101 | "default", 102 | ], 103 | "token": "A", 104 | }, 105 | Object { 106 | "pattern": /a/, 107 | "states": Array [ 108 | "state1", 109 | "state2", 110 | ], 111 | "token": "A2", 112 | }, 113 | Object { 114 | "pattern": /b/, 115 | "token": "B", 116 | }, 117 | ], 118 | "start_state": "state1", 119 | }, 120 | } 121 | `; 122 | 123 | exports[`syntax functions test callback delimiters 1`] = ` 124 | Object { 125 | "grammar": Object { 126 | "rules": Array [ 127 | Object { 128 | "callback": [Function], 129 | "ltoken": "S", 130 | "pattern": Array [ 131 | "T", 132 | ], 133 | }, 134 | Object { 135 | "callback": [Function], 136 | "ltoken": "T", 137 | "pattern": Array [ 138 | "E", 139 | ], 140 | }, 141 | Object { 142 | "callback": [Function], 143 | "ltoken": "E", 144 | "pattern": Array [], 145 | }, 146 | Object { 147 | "ltoken": "E", 148 | "pattern": Array [ 149 | "A", 150 | ], 151 | }, 152 | ], 153 | "start_symbol": "S", 154 | }, 155 | "lex": Object { 156 | "rules": Array [ 157 | Object { 158 | "callback": [Function], 159 | "pattern": "a", 160 | "token": "A", 161 | }, 162 | ], 163 | }, 164 | } 165 | `; 166 | 167 | exports[`syntax functions test callback delimiters 2`] = ` 168 | Array [ 169 | "function anonymous(children,token,lexer 170 | ) { 171 | const s = {}; 172 | }", 173 | "function anonymous(children,token,lexer 174 | ) { 175 | const t = \\"}%, }}%, }}%%, }%%%, }}%%%\\"; 176 | }", 177 | "function anonymous(children,token,lexer 178 | ) { 179 | const e = \\"}%\\"+\\"}}\\"; 180 | }", 181 | undefined, 182 | ] 183 | `; 184 | 185 | exports[`syntax functions test callbacks 1`] = ` 186 | Object { 187 | "grammar": Object { 188 | "rules": Array [ 189 | Object { 190 | "callback": [Function], 191 | "ltoken": "S", 192 | "pattern": Array [ 193 | "T", 194 | ], 195 | }, 196 | Object { 197 | "callback": [Function], 198 | "ltoken": "T", 199 | "pattern": Array [ 200 | "A", 201 | ], 202 | }, 203 | Object { 204 | "callback": [Function], 205 | "ltoken": "T", 206 | "pattern": Array [ 207 | "E", 208 | ], 209 | }, 210 | Object { 211 | "callback": [Function], 212 | "ltoken": "T", 213 | "pattern": Array [], 214 | }, 215 | Object { 216 | "callback": [Function], 217 | "ltoken": "E", 218 | "pattern": Array [], 219 | }, 220 | Object { 221 | "ltoken": "E", 222 | "pattern": Array [ 223 | "B", 224 | ], 225 | }, 226 | ], 227 | "start_symbol": "S", 228 | }, 229 | "lex": Object { 230 | "rules": Array [ 231 | Object { 232 | "callback": [Function], 233 | "pattern": "a", 234 | "token": "A", 235 | }, 236 | Object { 237 | "callback": [Function], 238 | "pattern": /b/, 239 | "token": "B", 240 | }, 241 | ], 242 | }, 243 | } 244 | `; 245 | 246 | exports[`syntax functions test callbacks 2`] = ` 247 | Array [ 248 | "function anonymous(children,token,lexer 249 | ) { 250 | callback_of_S(); 251 | }", 252 | "function anonymous(children,token,lexer 253 | ) { 254 | callback_of_T_1(); 255 | }", 256 | "function anonymous(children,token,lexer 257 | ) { 258 | callback_of_T_2(); 259 | }", 260 | "function anonymous(children,token,lexer 261 | ) { 262 | callback_of_T_3(); 263 | }", 264 | "function anonymous(children,token,lexer 265 | ) { 266 | callback_of_E(); 267 | }", 268 | undefined, 269 | ] 270 | `; 271 | 272 | exports[`syntax functions test ex-callbacks 1`] = ` 273 | Object { 274 | "grammar": Object { 275 | "begin_callback": [Function], 276 | "default_callback": [Function], 277 | "end_callback": [Function], 278 | "rules": Array [ 279 | Object { 280 | "ltoken": "S", 281 | "pattern": Array [ 282 | "A", 283 | ], 284 | }, 285 | ], 286 | "start_symbol": "S", 287 | }, 288 | "lex": Object { 289 | "begin_callback": [Function], 290 | "default_callback": [Function], 291 | "end_callback": [Function], 292 | "rules": Array [ 293 | Object { 294 | "pattern": "a", 295 | "token": "A", 296 | }, 297 | ], 298 | }, 299 | } 300 | `; 301 | 302 | exports[`syntax functions test ex-callbacks 2`] = ` 303 | "function anonymous(value,token,lex 304 | ) { 305 | lex_begin_callback(); 306 | }" 307 | `; 308 | 309 | exports[`syntax functions test ex-callbacks 3`] = ` 310 | "function anonymous(value,token,lex 311 | ) { 312 | lex_default_callback(); 313 | }" 314 | `; 315 | 316 | exports[`syntax functions test ex-callbacks 4`] = ` 317 | "function anonymous(value,token,lex 318 | ) { 319 | lex_end_callback(); 320 | }" 321 | `; 322 | 323 | exports[`syntax functions test ex-callbacks 5`] = ` 324 | "function anonymous(children,token,lexer 325 | ) { 326 | grammar_begin_callback(); 327 | }" 328 | `; 329 | 330 | exports[`syntax functions test ex-callbacks 6`] = ` 331 | "function anonymous(children,token,lexer 332 | ) { 333 | grammar_default_callback(); 334 | }" 335 | `; 336 | 337 | exports[`syntax functions test ex-callbacks 7`] = ` 338 | "function anonymous(children,token,lexer 339 | ) { 340 | grammar_end_callback(); 341 | }" 342 | `; 343 | 344 | exports[`syntax functions test lex-state 1`] = ` 345 | Object { 346 | "grammar": Object { 347 | "rules": Array [ 348 | Object { 349 | "ltoken": "S", 350 | "pattern": Array [ 351 | "A", 352 | "B2", 353 | "C", 354 | ], 355 | }, 356 | ], 357 | "start_symbol": "S", 358 | }, 359 | "lex": Object { 360 | "rules": Array [ 361 | Object { 362 | "pattern": /a/, 363 | "token": "A", 364 | }, 365 | Object { 366 | "pattern": /b/, 367 | "states": Array [ 368 | "state1", 369 | "state2", 370 | ], 371 | "token": "B", 372 | }, 373 | Object { 374 | "pattern": /b/, 375 | "states": Array [ 376 | "default", 377 | ], 378 | "token": "B2", 379 | }, 380 | Object { 381 | "pattern": /c/, 382 | "token": "C", 383 | }, 384 | ], 385 | }, 386 | } 387 | `; 388 | -------------------------------------------------------------------------------- /__tests__/broken_langage_test.ts: -------------------------------------------------------------------------------- 1 | import {ParserGenerator} from "../src/parsergenerator/parsergenerator"; 2 | import {test_broken_language} from "./data/broken_language"; 3 | 4 | describe("Calculator test with broken language", () => { 5 | // TODO: パーサが壊れていることを(コンソール出力以外で)知る方法 6 | const pg = new ParserGenerator(test_broken_language); 7 | const parser = pg.getParser(); 8 | test("parsing table is broken", () => { 9 | expect(pg.isConflicted()).toBe(true); 10 | expect(pg.getTableType()).toBe("CONFLICTED"); 11 | }); 12 | test('"1+1" equals 2', () => { 13 | expect(parser.parse("1+1")).toBe(2); 14 | }); 15 | test('"( 1+1 )*3 + ( (1+1) * (1+2*3+4) )\\n" equals 28 (to be failed)', () => { 16 | expect(parser.parse("( 1+1 )*3 + ( (1+1) * (1+2*3+4) )\n")).not.toBe(28); 17 | }); 18 | }); 19 | -------------------------------------------------------------------------------- /__tests__/data/broken_language.ts: -------------------------------------------------------------------------------- 1 | import {Language, LexDefinition, GrammarDefinition} from "../../src/def/language"; 2 | 3 | export const test_broken_grammar: GrammarDefinition = { 4 | rules: [ 5 | { 6 | ltoken: "EXP", 7 | pattern: ["EXP", "PLUS", "EXP"], 8 | callback: (c) => c[0] + c[2] 9 | }, 10 | { 11 | ltoken: "EXP", 12 | pattern: ["TERM"], 13 | callback: (c) => c[0] 14 | }, 15 | { 16 | ltoken: "TERM", 17 | pattern: ["TERM", "ASTERISK", "ATOM"], 18 | callback: (c) => c[0] * c[2] 19 | }, 20 | { 21 | ltoken: "TERM", 22 | pattern: ["ATOM"], 23 | callback: (c) => c[0] 24 | }, 25 | { 26 | ltoken: "ATOM", 27 | pattern: ["DIGITS"], 28 | callback: (c) => +c[0] 29 | }, 30 | { 31 | ltoken: "ATOM", 32 | pattern: ["LPAREN", "EXP", "RPAREN"], 33 | callback: (c) => c[1] 34 | } 35 | ], 36 | start_symbol: "EXP" 37 | }; 38 | 39 | export const test_broken_lex: LexDefinition = { 40 | rules: [ 41 | {token: "DIGITS", pattern: /[1-9][0-9]*/}, 42 | {token: "PLUS", pattern: "+"}, 43 | {token: "ASTERISK", pattern: "*"}, 44 | {token: "LPAREN", pattern: "("}, 45 | {token: "RPAREN", pattern: ")"}, 46 | {token: null, pattern: /(\r\n|\r|\n)+/}, 47 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 48 | {token: "INVALID", pattern: /./} 49 | ] 50 | }; 51 | 52 | export const test_broken_language: Language = { 53 | lex: test_broken_lex, 54 | grammar: test_broken_grammar 55 | }; 56 | -------------------------------------------------------------------------------- /__tests__/data/json_language: -------------------------------------------------------------------------------- 1 | true "true" 2 | false "false" 3 | null "null" 4 | lbrace "{" 5 | rbrace "}" 6 | lbracket "[" 7 | rbracket "]" 8 | colon ":" 9 | comma "," 10 | digit1_9 /[1-9]/ 11 | digit0 /0/ 12 | minus "-" 13 | period "." 14 | string /".*?"/ 15 | ! /(\r\n|\r|\n)+/ 16 | ! /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/ 17 | invalid /./ 18 | 19 | digit : digit1_9 | digit0; 20 | digits : digit | digit digits; 21 | int : digit | digit1_9 digits | 22 | minus digit | minus digit1_9 digits; 23 | frac : period digits; 24 | number : int | int frac; 25 | 26 | object : lbrace rbrace | lbrace members rbrace; 27 | members : pair | pair comma members; 28 | pair : string colon value; 29 | array : lbracket rbracket | lbracket elements rbracket; 30 | elements : value | value comma elements; 31 | $value : string | number | object | array | true | false | null; 32 | -------------------------------------------------------------------------------- /__tests__/data/json_sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "meta": { 3 | "name": "Freeciv2.5 Classic(English)", 4 | "language": "en", 5 | "ruleset": "classic", 6 | "freeciv_version": "2.5" 7 | }, 8 | "units": [ 9 | { 10 | "id": "warriors", 11 | "label": "Warriors", 12 | "label_detail": "Warriors", 13 | "phonetics": [ 14 | "warriors" 15 | ], 16 | "class": "land", 17 | "flags": [], 18 | "hp": 10, 19 | "attack": 1, 20 | "defence": 1, 21 | "firepower": 1 22 | }, 23 | { 24 | "id": "phalanx", 25 | "label": "Phalanx", 26 | "label_detail": "Phalanx", 27 | "phonetics": [ 28 | "phalanx" 29 | ], 30 | "class": "land", 31 | "flags": [], 32 | "hp": 10, 33 | "attack": 1, 34 | "defence": 2, 35 | "firepower": 1 36 | }, 37 | { 38 | "id": "pikemen", 39 | "label": "Pikemen", 40 | "label_detail": "Pikemen", 41 | "phonetics": [ 42 | "pikemen" 43 | ], 44 | "class": "land", 45 | "flags": [ 46 | "pikemen" 47 | ], 48 | "hp": 10, 49 | "attack": 1, 50 | "defence": 2, 51 | "firepower": 1 52 | }, 53 | { 54 | "id": "archers", 55 | "label": "Archers", 56 | "label_detail": "Archers", 57 | "phonetics": [ 58 | "archers" 59 | ], 60 | "class": "land", 61 | "flags": [], 62 | "hp": 10, 63 | "attack": 3, 64 | "defence": 2, 65 | "firepower": 1 66 | }, 67 | { 68 | "id": "legion", 69 | "label": "Legion", 70 | "label_detail": "Legion", 71 | "phonetics": [ 72 | "legion" 73 | ], 74 | "class": "land", 75 | "flags": [], 76 | "hp": 10, 77 | "attack": 4, 78 | "defence": 2, 79 | "firepower": 1 80 | }, 81 | { 82 | "id": "musketeers", 83 | "label": "Musketeers", 84 | "label_detail": "Musketeers", 85 | "phonetics": [ 86 | "musketeers" 87 | ], 88 | "class": "land", 89 | "flags": [], 90 | "hp": 20, 91 | "attack": 3, 92 | "defence": 3, 93 | "firepower": 1 94 | }, 95 | { 96 | "id": "riflemen", 97 | "label": "Riflemen", 98 | "label_detail": "Riflemen", 99 | "phonetics": [ 100 | "riflemen" 101 | ], 102 | "class": "land", 103 | "flags": [], 104 | "hp": 20, 105 | "attack": 5, 106 | "defence": 4, 107 | "firepower": 1 108 | }, 109 | { 110 | "id": "alpine-troops", 111 | "label": "Alpine Troops", 112 | "label_detail": "Alpine Troops", 113 | "phonetics": [ 114 | "alpine troops" 115 | ], 116 | "class": "land", 117 | "flags": [], 118 | "hp": 20, 119 | "attack": 5, 120 | "defence": 5, 121 | "firepower": 1 122 | }, 123 | { 124 | "id": "mech-inf", 125 | "label": "Mech. Inf.", 126 | "label_detail": "Mech. Inf.", 127 | "phonetics": [ 128 | "mech. inf." 129 | ], 130 | "class": "land", 131 | "flags": [], 132 | "hp": 30, 133 | "attack": 6, 134 | "defence": 6, 135 | "firepower": 1 136 | }, 137 | { 138 | "id": "horsemen", 139 | "label": "Horsemen", 140 | "label_detail": "Horsemen", 141 | "phonetics": [ 142 | "horsemen" 143 | ], 144 | "class": "land", 145 | "flags": [ 146 | "mounted" 147 | ], 148 | "hp": 10, 149 | "attack": 2, 150 | "defence": 1, 151 | "firepower": 1 152 | }, 153 | { 154 | "id": "chariot", 155 | "label": "Chariot", 156 | "label_detail": "Chariot", 157 | "phonetics": [ 158 | "chariot" 159 | ], 160 | "class": "land", 161 | "flags": [ 162 | "mounted" 163 | ], 164 | "hp": 10, 165 | "attack": 3, 166 | "defence": 1, 167 | "firepower": 1 168 | }, 169 | { 170 | "id": "knights", 171 | "label": "Knights", 172 | "label_detail": "Knights", 173 | "phonetics": [ 174 | "knights" 175 | ], 176 | "class": "land", 177 | "flags": [ 178 | "mounted" 179 | ], 180 | "hp": 10, 181 | "attack": 4, 182 | "defence": 2, 183 | "firepower": 1 184 | }, 185 | { 186 | "id": "dragoons", 187 | "label": "Dragoons", 188 | "label_detail": "Dragoons", 189 | "phonetics": [ 190 | "dragoons" 191 | ], 192 | "class": "land", 193 | "flags": [ 194 | "mounted" 195 | ], 196 | "hp": 20, 197 | "attack": 5, 198 | "defence": 2, 199 | "firepower": 1 200 | }, 201 | { 202 | "id": "cavalry", 203 | "label": "Cavalry", 204 | "label_detail": "Cavalry", 205 | "phonetics": [ 206 | "cavalry" 207 | ], 208 | "class": "land", 209 | "flags": [], 210 | "hp": 20, 211 | "attack": 8, 212 | "defence": 3, 213 | "firepower": 1 214 | }, 215 | { 216 | "id": "armor", 217 | "label": "Armor", 218 | "label_detail": "Armor", 219 | "phonetics": [ 220 | "armor" 221 | ], 222 | "class": "land", 223 | "flags": [], 224 | "hp": 30, 225 | "attack": 10, 226 | "defence": 5, 227 | "firepower": 1 228 | }, 229 | { 230 | "id": "catapult", 231 | "label": "Catapult", 232 | "label_detail": "Catapult", 233 | "phonetics": [ 234 | "catapult" 235 | ], 236 | "class": "land", 237 | "flags": [], 238 | "hp": 10, 239 | "attack": 6, 240 | "defence": 1, 241 | "firepower": 1 242 | }, 243 | { 244 | "id": "cannon", 245 | "label": "Cannon", 246 | "label_detail": "Cannon", 247 | "phonetics": [ 248 | "cannon" 249 | ], 250 | "class": "land", 251 | "flags": [], 252 | "hp": 20, 253 | "attack": 8, 254 | "defence": 1, 255 | "firepower": 1 256 | }, 257 | { 258 | "id": "artillery", 259 | "label": "Artillery", 260 | "label_detail": "Artillery", 261 | "phonetics": [ 262 | "artillery" 263 | ], 264 | "class": "land", 265 | "flags": [], 266 | "hp": 20, 267 | "attack": 10, 268 | "defence": 1, 269 | "firepower": 2 270 | }, 271 | { 272 | "id": "howitzer", 273 | "label": "Howitzer", 274 | "label_detail": "Howitzer", 275 | "phonetics": [ 276 | "howitzer" 277 | ], 278 | "class": "land", 279 | "flags": [ 280 | "igwall" 281 | ], 282 | "hp": 30, 283 | "attack": 12, 284 | "defence": 2, 285 | "firepower": 2 286 | }, 287 | { 288 | "id": "partisan", 289 | "label": "Partisan", 290 | "label_detail": "Partisan", 291 | "phonetics": [ 292 | "partisan" 293 | ], 294 | "class": "land", 295 | "flags": [], 296 | "hp": 20, 297 | "attack": 4, 298 | "defence": 4, 299 | "firepower": 1 300 | }, 301 | { 302 | "id": "marines", 303 | "label": "Marines", 304 | "label_detail": "Marines", 305 | "phonetics": [ 306 | "marines" 307 | ], 308 | "class": "land", 309 | "flags": [], 310 | "hp": 20, 311 | "attack": 8, 312 | "defence": 5, 313 | "firepower": 1 314 | }, 315 | { 316 | "id": "paratroopers", 317 | "label": "Paratroopers", 318 | "label_detail": "Paratroopers", 319 | "phonetics": [ 320 | "paratroopers" 321 | ], 322 | "class": "land", 323 | "flags": [], 324 | "hp": 20, 325 | "attack": 6, 326 | "defence": 4, 327 | "firepower": 1 328 | }, 329 | { 330 | "id": "trireme", 331 | "label": "Trireme", 332 | "label_detail": "Trireme", 333 | "phonetics": [ 334 | "trireme" 335 | ], 336 | "class": "trireme", 337 | "flags": [], 338 | "hp": 10, 339 | "attack": 1, 340 | "defence": 1, 341 | "firepower": 1 342 | }, 343 | { 344 | "id": "caravel", 345 | "label": "Caravel", 346 | "label_detail": "Caravel", 347 | "phonetics": [ 348 | "caravel" 349 | ], 350 | "class": "sea", 351 | "flags": [], 352 | "hp": 10, 353 | "attack": 2, 354 | "defence": 1, 355 | "firepower": 1 356 | }, 357 | { 358 | "id": "galleon", 359 | "label": "Galleon", 360 | "label_detail": "Galleon", 361 | "phonetics": [ 362 | "galleon" 363 | ], 364 | "class": "sea", 365 | "flags": [], 366 | "hp": 20, 367 | "attack": 0, 368 | "defence": 2, 369 | "firepower": 1 370 | }, 371 | { 372 | "id": "transport", 373 | "label": "Transport", 374 | "label_detail": "Transport", 375 | "phonetics": [ 376 | "transport" 377 | ], 378 | "class": "sea", 379 | "flags": [], 380 | "hp": 30, 381 | "attack": 0, 382 | "defence": 3, 383 | "firepower": 1 384 | }, 385 | { 386 | "id": "frigate", 387 | "label": "Frigate", 388 | "label_detail": "Frigate", 389 | "phonetics": [ 390 | "frigate" 391 | ], 392 | "class": "sea", 393 | "flags": [], 394 | "hp": 20, 395 | "attack": 4, 396 | "defence": 2, 397 | "firepower": 1 398 | }, 399 | { 400 | "id": "ironclad", 401 | "label": "Ironclad", 402 | "label_detail": "Ironclad", 403 | "phonetics": [ 404 | "ironclad" 405 | ], 406 | "class": "sea", 407 | "flags": [], 408 | "hp": 30, 409 | "attack": 4, 410 | "defence": 4, 411 | "firepower": 1 412 | }, 413 | { 414 | "id": "destroyer", 415 | "label": "Destroyer", 416 | "label_detail": "Destroyer", 417 | "phonetics": [ 418 | "destroyer" 419 | ], 420 | "class": "sea", 421 | "flags": [], 422 | "hp": 30, 423 | "attack": 4, 424 | "defence": 4, 425 | "firepower": 1 426 | }, 427 | { 428 | "id": "cruiser", 429 | "label": "Cruiser", 430 | "label_detail": "Cruiser", 431 | "phonetics": [ 432 | "cruiser" 433 | ], 434 | "class": "sea", 435 | "flags": [], 436 | "hp": 30, 437 | "attack": 6, 438 | "defence": 6, 439 | "firepower": 2 440 | }, 441 | { 442 | "id": "aegis-cruiser", 443 | "label": "AEGIS Cruiser", 444 | "label_detail": "AEGIS Cruiser", 445 | "phonetics": [ 446 | "aegis cruiser" 447 | ], 448 | "class": "sea", 449 | "flags": [ 450 | "aegis" 451 | ], 452 | "hp": 30, 453 | "attack": 8, 454 | "defence": 8, 455 | "firepower": 2 456 | }, 457 | { 458 | "id": "submarine", 459 | "label": "Submarine", 460 | "label_detail": "Submarine", 461 | "phonetics": [ 462 | "submarine" 463 | ], 464 | "class": "sea", 465 | "flags": [], 466 | "hp": 30, 467 | "attack": 12, 468 | "defence": 2, 469 | "firepower": 2 470 | }, 471 | { 472 | "id": "battleship", 473 | "label": "Battleship", 474 | "label_detail": "Battleship", 475 | "phonetics": [ 476 | "battleship" 477 | ], 478 | "class": "sea", 479 | "flags": [], 480 | "hp": 40, 481 | "attack": 12, 482 | "defence": 12, 483 | "firepower": 2 484 | }, 485 | { 486 | "id": "carrier", 487 | "label": "Carrier", 488 | "label_detail": "Carrier", 489 | "phonetics": [ 490 | "carrier" 491 | ], 492 | "class": "sea", 493 | "flags": [], 494 | "hp": 40, 495 | "attack": 1, 496 | "defence": 9, 497 | "firepower": 2 498 | }, 499 | { 500 | "id": "fighter", 501 | "label": "Fighter", 502 | "label_detail": "Fighter", 503 | "phonetics": [ 504 | "fighter" 505 | ], 506 | "class": "air", 507 | "flags": [ 508 | "fighter" 509 | ], 510 | "hp": 20, 511 | "attack": 4, 512 | "defence": 3, 513 | "firepower": 2 514 | }, 515 | { 516 | "id": "bomber", 517 | "label": "Bomber", 518 | "label_detail": "Bomber", 519 | "phonetics": [ 520 | "bomber" 521 | ], 522 | "class": "air", 523 | "flags": [], 524 | "hp": 20, 525 | "attack": 12, 526 | "defence": 1, 527 | "firepower": 2 528 | }, 529 | { 530 | "id": "awacs", 531 | "label": "AWACS", 532 | "label_detail": "AWACS", 533 | "phonetics": [ 534 | "awacs" 535 | ], 536 | "class": "air", 537 | "flags": [], 538 | "hp": 20, 539 | "attack": 0, 540 | "defence": 1, 541 | "firepower": 1 542 | }, 543 | { 544 | "id": "cruise-missile", 545 | "label": "Cruise Missile", 546 | "label_detail": "Cruise Missile", 547 | "phonetics": [ 548 | "cruise missile" 549 | ], 550 | "class": "missile", 551 | "flags": [], 552 | "hp": 10, 553 | "attack": 18, 554 | "defence": 0, 555 | "firepower": 3 556 | }, 557 | { 558 | "id": "nuclear", 559 | "label": "Nuclear", 560 | "label_detail": "Nuclear", 561 | "phonetics": [ 562 | "nuclear" 563 | ], 564 | "class": "missile", 565 | "flags": [], 566 | "hp": 10, 567 | "attack": 99, 568 | "defence": 0, 569 | "firepower": 1 570 | }, 571 | { 572 | "id": "helicopter", 573 | "label": "Helicopter", 574 | "label_detail": "Helicopter", 575 | "phonetics": [ 576 | "helicopter" 577 | ], 578 | "class": "helicopter", 579 | "flags": [], 580 | "hp": 20, 581 | "attack": 10, 582 | "defence": 3, 583 | "firepower": 2 584 | }, 585 | { 586 | "id": "stealth-fighter", 587 | "label": "Stealth Fighter", 588 | "label_detail": "Stealth Fighter", 589 | "phonetics": [ 590 | "stealth fighter" 591 | ], 592 | "class": "air", 593 | "flags": [ 594 | "fighter" 595 | ], 596 | "hp": 20, 597 | "attack": 8, 598 | "defence": 4, 599 | "firepower": 2 600 | }, 601 | { 602 | "id": "stealth-bomber", 603 | "label": "Stealth Bomber", 604 | "label_detail": "Stealth Bomber", 605 | "phonetics": [ 606 | "stealth bomber" 607 | ], 608 | "class": "air", 609 | "flags": [], 610 | "hp": 20, 611 | "attack": 18, 612 | "defence": 5, 613 | "firepower": 2 614 | }, 615 | { 616 | "id": "engineers", 617 | "label": "Engineers", 618 | "label_detail": "Engineers", 619 | "phonetics": [ 620 | "engineers" 621 | ], 622 | "class": "land", 623 | "flags": [], 624 | "hp": 20, 625 | "attack": 0, 626 | "defence": 2, 627 | "firepower": 1 628 | }, 629 | { 630 | "id": "leader", 631 | "label": "Leader", 632 | "label_detail": "Leader", 633 | "phonetics": [ 634 | "leader" 635 | ], 636 | "class": "land", 637 | "flags": [], 638 | "hp": 20, 639 | "attack": 0, 640 | "defence": 2, 641 | "firepower": 1 642 | } 643 | ], 644 | "unitclass": [ 645 | { 646 | "id": "land", 647 | "label": "Land" 648 | }, 649 | { 650 | "id": "sea", 651 | "label": "Sea" 652 | }, 653 | { 654 | "id": "trireme", 655 | "label": "Trireme" 656 | }, 657 | { 658 | "id": "air", 659 | "label": "Air" 660 | }, 661 | { 662 | "id": "helicopter", 663 | "label": "Helicopter" 664 | }, 665 | { 666 | "id": "missile", 667 | "label": "Missile" 668 | } 669 | ], 670 | "veteranlevel": [ 671 | { 672 | "level": 1, 673 | "id": "recruit", 674 | "label": "Recruit(100%)", 675 | "value": 100, 676 | "chance_for_promotion": 50 677 | }, 678 | { 679 | "level": 2, 680 | "id": "veteran", 681 | "label": "Veteran(150%)", 682 | "value": 150, 683 | "chance_for_promotion": 33 684 | }, 685 | { 686 | "level": 3, 687 | "id": "hardened", 688 | "label": "Hardened(175%)", 689 | "value": 175, 690 | "chance_for_promotion": 20 691 | }, 692 | { 693 | "level": 4, 694 | "id": "elite", 695 | "label": "Elite(200%)", 696 | "value": 200, 697 | "chance_for_promotion": 0 698 | } 699 | ], 700 | "terrains": [ 701 | { 702 | "id": "plains-grassland", 703 | "label": "Plains,Glassland(100%)", 704 | "value": 100 705 | }, 706 | { 707 | "id": "forest", 708 | "label": "Forest(150%)", 709 | "value": 150 710 | }, 711 | { 712 | "id": "hills", 713 | "label": "Hills(200%)", 714 | "value": 200 715 | }, 716 | { 717 | "id": "mountains", 718 | "label": "Mountains(300%)", 719 | "value": 300 720 | }, 721 | { 722 | "id": "ocean", 723 | "label": "Ocean,Lake(100%)", 724 | "value": 100 725 | }, 726 | { 727 | "id": "glacier", 728 | "label": "Glacier(100%)", 729 | "value": 100 730 | }, 731 | { 732 | "id": "tundra", 733 | "label": "Tundra(100%)", 734 | "value": 100 735 | }, 736 | { 737 | "id": "desert", 738 | "label": "Desert(100%)", 739 | "value": 100 740 | }, 741 | { 742 | "id": "swamp", 743 | "label": "Swamp(150%)", 744 | "value": 150 745 | }, 746 | { 747 | "id": "jungle", 748 | "label": "Jungle(150%)", 749 | "value": 150 750 | } 751 | ], 752 | "flags": { 753 | "basic": [ 754 | { 755 | "id": "in-city", 756 | "label": "In city(Land Unit:150%)", 757 | "description": "if defender is land unit: 150%" 758 | }, 759 | { 760 | "id": "defender-fortified", 761 | "label": "Fortified(150%)", 762 | "description": "land unit only can be gain 150%" 763 | } 764 | ], 765 | "structure": [ 766 | { 767 | "id": "city-walls", 768 | "label": "City walls (against land units or helicopter (except howitzer))(300%)" 769 | }, 770 | { 771 | "id": "city-coastal-defense", 772 | "label": "Coastal defense (against ships)(200%)" 773 | }, 774 | { 775 | "id": "city-sam-battery", 776 | "label": "SAM battery (against aircrafts (except helicopter))(200%)" 777 | }, 778 | { 779 | "id": "city-sdi-defense", 780 | "label": "SDI defense (against missiles)(200%)" 781 | } 782 | ], 783 | "roads": [ 784 | { 785 | "id": "river", 786 | "label": "River(150%)", 787 | "description": "land unit only can be gain 150%" 788 | } 789 | ], 790 | "bases": [ 791 | { 792 | "id": "in-fortress", 793 | "label": "Fortress(200%)", 794 | "description": "land unit only can be gain 200%" 795 | } 796 | ], 797 | "ex": [] 798 | }, 799 | "adjustments": [ 800 | { 801 | "id": "attacker-vereran", 802 | "label": "Veteran level (attacker)", 803 | "condition": [ 804 | "true" 805 | ], 806 | "effect": [ 807 | { 808 | "type": "attacker-strength-multiply", 809 | "value": "attacker-veteran()" 810 | } 811 | ] 812 | }, 813 | { 814 | "id": "defender-vereran", 815 | "label": "Veteran level (defender)", 816 | "condition": [ 817 | "true" 818 | ], 819 | "effect": [ 820 | { 821 | "type": "defender-strength-multiply", 822 | "value": "defender-veteran()" 823 | } 824 | ] 825 | }, 826 | { 827 | "id": "terrain", 828 | "label": "Terrain", 829 | "condition": [ 830 | "defender-class(land)" 831 | ], 832 | "effect": [ 833 | { 834 | "type": "defender-strength-multiply", 835 | "value": "terrain()" 836 | } 837 | ] 838 | }, 839 | { 840 | "id": "terrain-river", 841 | "label": "Terrain (river)", 842 | "condition": [ 843 | "defender-class(land)", 844 | "flag(river)", 845 | [ 846 | "NOT", 847 | [ 848 | [ 849 | "NOT", 850 | "flag(in-city)" 851 | ], 852 | "flag(in-fortress)" 853 | ] 854 | ] 855 | ], 856 | "effect": [ 857 | { 858 | "type": "defender-strength-multiply", 859 | "value": "150" 860 | } 861 | ] 862 | }, 863 | { 864 | "id": "land-unit-in-fortress", 865 | "label": "Terrain (fortress)", 866 | "condition": [ 867 | "defender-class(land)", 868 | [ 869 | "NOT", 870 | "flag(in-city)" 871 | ], 872 | "flag(in-fortress)", 873 | [ 874 | "NOT", 875 | "flag(river)" 876 | ] 877 | ], 878 | "effect": [ 879 | { 880 | "type": "defender-strength-multiply", 881 | "value": "200" 882 | } 883 | ] 884 | }, 885 | { 886 | "id": "in-fortress-and-river", 887 | "label": "Terrain (both river and fortress)", 888 | "condition": [ 889 | "defender-class(land)", 890 | [ 891 | "NOT", 892 | "flag(in-city)" 893 | ], 894 | "flag(in-fortress)", 895 | "flag(river)" 896 | ], 897 | "effect": [ 898 | { 899 | "type": "defender-strength-multiply", 900 | "value": "250" 901 | } 902 | ] 903 | }, 904 | { 905 | "id": "land-unit-fortified-or-in-city", 906 | "label": "Land unit fortified or in city", 907 | "condition": [ 908 | "defender-class(land)", 909 | [ 910 | "OR", 911 | "flag(defender-fortified)", 912 | "flag(in-city)" 913 | ] 914 | ], 915 | "effect": [ 916 | { 917 | "type": "defender-strength-multiply", 918 | "value": "150" 919 | } 920 | ] 921 | }, 922 | { 923 | "id": "city-walls", 924 | "label": "City walls", 925 | "condition": [ 926 | "flag(in-city)", 927 | "flag(city-walls)", 928 | [ 929 | "OR", 930 | "attacker-class(land)", 931 | "attacker-class(helicopter)" 932 | ], 933 | [ 934 | "NOT", 935 | "attacker-flag(igwall)" 936 | ] 937 | ], 938 | "effect": [ 939 | { 940 | "type": "defender-strength-multiply", 941 | "value": "300" 942 | } 943 | ] 944 | }, 945 | { 946 | "id": "city-coastal-defense", 947 | "label": "coastal defense", 948 | "condition": [ 949 | "flag(in-city)", 950 | "flag(city-coastal-defense)", 951 | "attacker-class(sea)" 952 | ], 953 | "effect": [ 954 | { 955 | "type": "defender-strength-multiply", 956 | "value": "200" 957 | } 958 | ] 959 | }, 960 | { 961 | "id": "city-sam-battery", 962 | "label": "SAM battery", 963 | "condition": [ 964 | "flag(in-city)", 965 | "flag(city-sam-battery)", 966 | "attacker-class(air)" 967 | ], 968 | "effect": [ 969 | { 970 | "type": "defender-strength-multiply", 971 | "value": "200" 972 | } 973 | ] 974 | }, 975 | { 976 | "id": "city-sdi-defense", 977 | "label": "SDI defense", 978 | "condition": [ 979 | "flag(in-city)", 980 | "flag(city-sdi-defense)", 981 | "attacker-class(missile)" 982 | ], 983 | "effect": [ 984 | { 985 | "type": "defender-strength-multiply", 986 | "value": "200" 987 | } 988 | ] 989 | }, 990 | { 991 | "id": "pikemen-attacked-by-mounted", 992 | "label": "Pikemen attacker by a mounted unit", 993 | "condition": [ 994 | "defender-flag(pikemen)", 995 | "attacker-flag(mounted)" 996 | ], 997 | "effect": [ 998 | { 999 | "type": "defender-strength-multiply", 1000 | "value": "200" 1001 | } 1002 | ] 1003 | }, 1004 | { 1005 | "id": "aegis-cruiser-attacked-by-aircraft-missile-or-helicopter", 1006 | "label": "AEGIS Cruiser attacker by an aircraft, missile or helicopter", 1007 | "condition": [ 1008 | "defender-flag(aegis)", 1009 | [ 1010 | "OR", 1011 | "attacker-class(air)", 1012 | "attacker-class(missile)", 1013 | "attacker-class(helicopter)" 1014 | ] 1015 | ], 1016 | "effect": [ 1017 | { 1018 | "type": "defender-strength-multiply", 1019 | "value": "500" 1020 | } 1021 | ] 1022 | }, 1023 | { 1024 | "id": "fighter-attacks-helicopter", 1025 | "label": "Fighter attacks a helicopter", 1026 | "condition": [ 1027 | "attacker-flag(fighter)", 1028 | "defender-class(helicopter)" 1029 | ], 1030 | "effect": [ 1031 | { 1032 | "type": "defender-strength-multiply", 1033 | "value": "50" 1034 | }, 1035 | { 1036 | "type": "defender-firepower-set", 1037 | "value": "1" 1038 | } 1039 | ] 1040 | }, 1041 | { 1042 | "id": "ship-in-city-attacked", 1043 | "label": "Ship in city attacked", 1044 | "condition": [ 1045 | [ 1046 | "OR", 1047 | "defender-class(sea)", 1048 | "defender-class(trireme)" 1049 | ], 1050 | "flag(in-city)" 1051 | ], 1052 | "effect": [ 1053 | { 1054 | "type": "attacker-firepower-multiply", 1055 | "value": "200" 1056 | }, 1057 | { 1058 | "type": "defender-firepower-set", 1059 | "value": "1" 1060 | } 1061 | ] 1062 | }, 1063 | { 1064 | "id": "ship-attacks-land-unit", 1065 | "label": "Ship attacks land unit", 1066 | "condition": [ 1067 | "attacker-class(sea)", 1068 | "defender-class(land)" 1069 | ], 1070 | "effect": [ 1071 | { 1072 | "type": "attacker-firepower-set", 1073 | "value": "1" 1074 | }, 1075 | { 1076 | "type": "defender-firepower-set", 1077 | "value": "1" 1078 | } 1079 | ] 1080 | } 1081 | ] 1082 | } 1083 | -------------------------------------------------------------------------------- /__tests__/data/sample_language.ts: -------------------------------------------------------------------------------- 1 | import {Language, LexDefinition, GrammarDefinition, DEFAULT_LEX_STATE, LexCallback} from "../../src/def/language"; 2 | 3 | export const test_sample_grammar: GrammarDefinition = { 4 | rules: [ 5 | { 6 | ltoken: "S", 7 | pattern: ["E"] 8 | }, 9 | { 10 | ltoken: "E", 11 | pattern: ["LIST", "SEMICOLON"] 12 | }, 13 | { 14 | ltoken: "E", 15 | pattern: ["HOGE"] 16 | }, 17 | { 18 | ltoken: "LIST", 19 | pattern: ["T"] 20 | }, 21 | { 22 | ltoken: "LIST", 23 | pattern: ["LIST", "SEPARATE", "T"] 24 | }, 25 | { 26 | ltoken: "T", 27 | pattern: ["ATOM"] 28 | }, 29 | { 30 | ltoken: "T", 31 | pattern: [] 32 | }, 33 | { 34 | ltoken: "HOGE", 35 | pattern: ["ID"] 36 | } 37 | ], 38 | start_symbol: "S" 39 | }; 40 | 41 | export const test_sample_lex: LexDefinition = { 42 | rules: [ 43 | {token: "ATOM", pattern: "x"}, 44 | {token: "ID", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/}, 45 | {token: "SEMICOLON", pattern: ";"}, 46 | {token: "SEPARATE", pattern: "|"}, 47 | {token: null, pattern: /(\r\n|\r|\n)+/}, 48 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 49 | {token: "INVALID", pattern: /./} 50 | ] 51 | }; 52 | 53 | export const test_sample_language: Language = { 54 | lex: test_sample_lex, 55 | grammar: test_sample_grammar 56 | }; 57 | 58 | export const test_empty_language: Language = { 59 | lex: {rules: []}, 60 | grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"} 61 | }; 62 | 63 | export const test_calc_grammar: GrammarDefinition = { 64 | rules: [ 65 | { 66 | ltoken: "EXP", 67 | pattern: ["EXP", "PLUS", "TERM"], 68 | callback: (c) => c[0] + c[2] 69 | }, 70 | { 71 | ltoken: "EXP", 72 | pattern: ["TERM"], 73 | callback: (c) => c[0] 74 | }, 75 | { 76 | ltoken: "TERM", 77 | pattern: ["TERM", "ASTERISK", "ATOM"], 78 | callback: (c) => c[0] * c[2] 79 | }, 80 | { 81 | ltoken: "TERM", 82 | pattern: ["ATOM"], 83 | callback: (c) => c[0] 84 | }, 85 | { 86 | ltoken: "ATOM", 87 | pattern: ["DIGITS"], 88 | callback: (c) => +(c[0]) 89 | }, 90 | { 91 | ltoken: "ATOM", 92 | pattern: ["LPAREN", "EXP", "RPAREN"], 93 | callback: (c) => c[1] 94 | } 95 | ], 96 | start_symbol: "EXP" 97 | }; 98 | 99 | export const test_calc_lex: LexDefinition = { 100 | rules: [ 101 | {token: "DIGITS", pattern: /[1-9][0-9]*/}, 102 | {token: "PLUS", pattern: "+"}, 103 | {token: "ASTERISK", pattern: "*"}, 104 | {token: "LPAREN", pattern: "("}, 105 | {token: "RPAREN", pattern: ")"}, 106 | {token: null, pattern: /(\r\n|\r|\n)+/}, 107 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 108 | {token: "INVALID", pattern: /./} 109 | ] 110 | }; 111 | 112 | export const test_calc_language: Language = { 113 | lex: test_calc_lex, 114 | grammar: test_calc_grammar 115 | }; 116 | 117 | export const test_calc_language_raw_string = `DIGITS /[1-9][0-9]*/ 118 | PLUS "+" 119 | ASTERISK "*" 120 | LPAREN "(" 121 | RPAREN ")" 122 | !ENDLINE /(\\r\\n|\\r|\\n)+/ 123 | !WHITESPACE /[ \\f\\t\\v\\u00a0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\ufeff]+/ 124 | INVALID /./ 125 | 126 | $EXP : EXP PLUS TERM | TERM; 127 | TERM : TERM ASTERISK ATOM | ATOM; 128 | ATOM : DIGITS | LPAREN EXP RPAREN; 129 | `; 130 | 131 | export const test_lexstate_lex: LexDefinition = { 132 | rules: [ 133 | {token: "NUMBER", pattern: /0|[1-9][0-9]*/, states: ["in-parenthesis"]}, 134 | {token: "ID", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/}, 135 | {token: "ASTERISK", pattern: "*", states: ["super-in-parenthesis"]}, 136 | {token: "PLUS", pattern: "+", states: [DEFAULT_LEX_STATE, "in-parenthesis"]}, 137 | {token: "DOLLAR", pattern: "$", states: ["in-braces"]}, 138 | { 139 | token: "LPAREN", pattern: "(", 140 | callback: (token, value, lex) => { 141 | lex.callState("in-parenthesis"); 142 | } 143 | }, 144 | { 145 | token: "RPAREN", pattern: ")", states: ["in-parenthesis"], 146 | callback: (token, value, lex) => { 147 | lex.returnState(); 148 | } 149 | }, 150 | { 151 | token: "LBRACE", pattern: "{", 152 | callback: (token, value, lex) => { 153 | lex.callState("in-braces"); 154 | } 155 | }, 156 | { 157 | token: "RBRACE", pattern: "}", states: ["in-braces"], 158 | callback: (token, value, lex) => { 159 | lex.returnState(); 160 | } 161 | }, 162 | {token: null, pattern: /(\r\n|\r|\n)+/}, 163 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 164 | {token: "INVALID", pattern: /./, states: [DEFAULT_LEX_STATE, "in-parenthesis"]} 165 | ], 166 | states: [ 167 | {label: "super-in-parenthesis"}, 168 | {label: "in-parenthesis", inheritance: "super-in-parenthesis"}, 169 | {label: "in-braces", inheritance: DEFAULT_LEX_STATE} 170 | ] 171 | }; 172 | 173 | export const test_lexstate_language: Language = { 174 | lex: test_lexstate_lex, 175 | grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"} 176 | }; 177 | 178 | export const test_dynamic_lexrules_lex: LexDefinition = { 179 | rules: [ 180 | { 181 | token: "LNEST", pattern: /%+{/, 182 | callback: ((): LexCallback => { 183 | let i = 0; 184 | return (value, token, lex) => { 185 | const label = i.toString(); 186 | // lex.setState({label, is_exclusive: false}); 187 | lex.callState(label); 188 | lex.addRule(label, { 189 | token: "RNEST", pattern: `}${"%".repeat(value.length - 1)}`, states: [label], 190 | callback: (v, t, l) => { 191 | l.returnState(); 192 | l.removeRule(label); 193 | l.removeRule(`${label}-invalid`); 194 | // lex.removeState(label); 195 | } 196 | }); 197 | lex.addRule(`${label}-invalid`, { 198 | token: "INVALID", pattern: /./, states: [label] 199 | }); 200 | i++; 201 | }; 202 | })() 203 | }, 204 | {token: "INVALID", pattern: /./} 205 | ] 206 | }; 207 | 208 | export const test_dynamic_lexrules_language: Language = { 209 | lex: test_dynamic_lexrules_lex, 210 | grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"} 211 | }; 212 | -------------------------------------------------------------------------------- /__tests__/data/tmp/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tatamo/lavriapg/534e2ab5922238b146adaae0c41720582c11ff25/__tests__/data/tmp/.gitkeep -------------------------------------------------------------------------------- /__tests__/json_test.ts: -------------------------------------------------------------------------------- 1 | import {language_parser} from "../src/precompiler/index"; 2 | import {ParserGenerator} from "../src/parsergenerator/index"; 3 | import {ParserFactory} from "../src/parser/index"; 4 | 5 | const input = require("fs").readFileSync("__tests__/data/json_language", "utf8"); 6 | describe("json parse test", () => { 7 | const json_lang = language_parser.parse(input); 8 | const pg = new ParserGenerator(json_lang); 9 | const parser = ParserFactory.create(json_lang, pg.getParsingTable()); 10 | 11 | test("no conflict found", () => { 12 | expect(pg.getTableType()).toBe("LALR1"); 13 | }); 14 | 15 | test("no error occured in parsing", () => { 16 | const json_input = require("fs").readFileSync("__tests__/data/json_sample.json", "utf8"); 17 | expect(() => parser.parse(json_input)).not.toThrow(); 18 | }); 19 | }); 20 | -------------------------------------------------------------------------------- /__tests__/language_parsing_test.ts: -------------------------------------------------------------------------------- 1 | import {language_language, language_parser} from "../src/precompiler/ruleparser"; 2 | import {ParserGenerator} from "../src/parsergenerator/parsergenerator"; 3 | import {Language} from "../src"; 4 | import {Lexer} from "../src/lexer/lexer"; 5 | import {SYMBOL_EOF} from "../src/def/token"; 6 | 7 | describe("language parsing test", () => { 8 | const input = require("fs").readFileSync("language", "utf8"); 9 | const removeCallback = (language: Language): Language => { 10 | const lex = {...language.lex}; 11 | lex.rules = lex.rules.map(({token, pattern, states}) => ({token, pattern, states})); 12 | const grammar = {...language.grammar}; 13 | grammar.rules = grammar.rules.map(({ltoken, pattern}) => ({ltoken, pattern})); 14 | return {lex, grammar}; 15 | }; 16 | 17 | const language_language_without_callback = removeCallback(language_language); 18 | const pg = new ParserGenerator(language_language); 19 | test("valid parser", () => { 20 | expect(pg.isConflicted()).toBeFalsy(); 21 | }); 22 | const parser = pg.getParser(); // language_parserと同一のものであることが期待される 23 | test("parsing language file", () => { 24 | expect(removeCallback(parser.parse(input))).toEqual(language_language_without_callback); 25 | }); 26 | // languageファイルを読み取ってパーサを生成したい 27 | test("language_parser", () => { 28 | expect(removeCallback(language_parser.parse(input))).toEqual(language_language_without_callback); 29 | }); 30 | }); 31 | 32 | describe("syntax functions test", () => { 33 | const pg = new ParserGenerator(language_language); 34 | const parser = pg.getParser(); 35 | test("lex-state", () => { 36 | const input = ` 37 | A /a/ 38 | B /b/ 39 | B2 /b/ 40 | C /c/ 41 | $S : A B2 C; 42 | `; 43 | expect(new Lexer(parser.parse(input)).exec("b")).toEqual([{token: "B2", value: "b"}, {token: SYMBOL_EOF, value: ""}]); 44 | expect(parser.parse(input)).toMatchSnapshot(); 45 | }); 46 | test("#start", () => { 47 | // #startが複数ある場合は一番下を採用 48 | // TODO: 明示された仕様とするか、それとも複数の#startを許容しないようにするか 49 | const input = ` 50 | #start 51 | #start 52 | 53 | A /a/ 54 | A2 /a/ 55 | B /b/ 56 | $S : A B; 57 | `; 58 | expect(new Lexer(parser.parse(input)).exec("a")).toEqual([{token: "A2", value: "a"}, {token: SYMBOL_EOF, value: ""}]); 59 | expect(() => new Lexer(parser.parse(input)).exec("b")).toThrow(); 60 | expect(parser.parse(input)).toMatchSnapshot(); 61 | }); 62 | test("#extend", () => { 63 | const input = ` 64 | #start 65 | #extend 66 | #extend 67 | 68 | A /a/ 69 | B /b/ 70 | C /c/ 71 | $S : A B C; 72 | `; 73 | expect(new Lexer(parser.parse(input)).exec("abc")).toMatchSnapshot(); 74 | expect(parser.parse(input)).toMatchSnapshot(); 75 | }); 76 | test("callbacks", () => { 77 | const input = ` 78 | A "a" { callback_of_A(); } 79 | B /b/ { callback_of_B(); } 80 | 81 | $S : T { callback_of_S(); }; 82 | T : A { callback_of_T_1(); } | E { callback_of_T_2(); } | { callback_of_T_3(); }; 83 | E : { callback_of_E(); } | B; 84 | `; 85 | const result = parser.parse(input); 86 | expect(result).toMatchSnapshot(); 87 | // @ts-ignore 88 | expect(result.grammar.rules.map((rule) => "callback" in rule ? rule.callback.toString() : undefined)).toMatchSnapshot(); 89 | }); 90 | test("ex-callbacks", () => { 91 | const input = ` 92 | #lex_default { lex_default_callback(); } 93 | #lex_end { lex_end_callback(); } 94 | #lex_begin { lex_begin_callback(); } 95 | A "a" 96 | 97 | #begin { grammar_begin_callback(); } 98 | #end { grammar_end_callback(); } 99 | #default { grammar_default_callback(); } 100 | $S : A; 101 | `; 102 | const result = parser.parse(input); 103 | expect(result).toMatchSnapshot(); 104 | expect(result.lex.begin_callback.toString()).toMatchSnapshot(); 105 | expect(result.lex.default_callback.toString()).toMatchSnapshot(); 106 | expect(result.lex.end_callback.toString()).toMatchSnapshot(); 107 | expect(result.grammar.begin_callback.toString()).toMatchSnapshot(); 108 | expect(result.grammar.default_callback.toString()).toMatchSnapshot(); 109 | expect(result.grammar.end_callback.toString()).toMatchSnapshot(); 110 | }); 111 | test("callback delimiters", () => { 112 | const input = ` 113 | A "a" {{ if(1+1===3){ foo(); } }} 114 | 115 | $S : T %{ const s = {}; }%; 116 | T : E %%{ const t = "}%, }}%, }}%%, }%%%, }}%%%"; }%%; 117 | E : { const e = "}%"+"}}"; } | A; 118 | `; 119 | const result = parser.parse(input); 120 | expect(result).toMatchSnapshot(); 121 | // @ts-ignore 122 | expect(result.grammar.rules.map((rule) => "callback" in rule ? rule.callback.toString() : undefined)).toMatchSnapshot(); 123 | }); 124 | }); 125 | -------------------------------------------------------------------------------- /__tests__/lexer/__snapshots__/controller_test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`begin/end callbacks test using variables 1`] = ` 4 | Array [ 5 | Object { 6 | "token": "A", 7 | "value": "0", 8 | }, 9 | Object { 10 | "token": "A", 11 | "value": "1", 12 | }, 13 | Object { 14 | "token": "A", 15 | "value": "2", 16 | }, 17 | Object { 18 | "token": "A", 19 | "value": "3", 20 | }, 21 | Object { 22 | "token": Symbol(EOF), 23 | "value": "", 24 | }, 25 | ] 26 | `; 27 | 28 | exports[`begin/end callbacks test using variables 2`] = ` 29 | Array [ 30 | Object { 31 | "token": "A", 32 | "value": "0", 33 | }, 34 | Object { 35 | "token": "A", 36 | "value": "1", 37 | }, 38 | Object { 39 | "token": "A", 40 | "value": "2", 41 | }, 42 | Object { 43 | "token": "A", 44 | "value": "3", 45 | }, 46 | Object { 47 | "token": "A", 48 | "value": "4", 49 | }, 50 | Object { 51 | "token": Symbol(EOF), 52 | "value": "", 53 | }, 54 | ] 55 | `; 56 | 57 | exports[`dynamic lex rules test adding and removing rules 1`] = ` 58 | Array [ 59 | Object { 60 | "token": "LNEST", 61 | "value": "%%{", 62 | }, 63 | Object { 64 | "token": "INVALID", 65 | "value": "}", 66 | }, 67 | Object { 68 | "token": "INVALID", 69 | "value": "%", 70 | }, 71 | Object { 72 | "token": "RNEST", 73 | "value": "}%%", 74 | }, 75 | Object { 76 | "token": "INVALID", 77 | "value": "}", 78 | }, 79 | Object { 80 | "token": "INVALID", 81 | "value": "%", 82 | }, 83 | Object { 84 | "token": "INVALID", 85 | "value": "%", 86 | }, 87 | Object { 88 | "token": Symbol(EOF), 89 | "value": "", 90 | }, 91 | ] 92 | `; 93 | 94 | exports[`lex state test exclusive state 1`] = ` 95 | Array [ 96 | Object { 97 | "token": "LPAREN", 98 | "value": "(", 99 | }, 100 | Object { 101 | "token": "PLUS", 102 | "value": "+", 103 | }, 104 | Object { 105 | "token": "INVALID", 106 | "value": "a", 107 | }, 108 | Object { 109 | "token": "INVALID", 110 | "value": "{", 111 | }, 112 | Object { 113 | "token": "ASTERISK", 114 | "value": "*", 115 | }, 116 | Object { 117 | "token": "RPAREN", 118 | "value": ")", 119 | }, 120 | Object { 121 | "token": Symbol(EOF), 122 | "value": "", 123 | }, 124 | ] 125 | `; 126 | 127 | exports[`lex state test nested states 1`] = ` 128 | Array [ 129 | Object { 130 | "token": "INVALID", 131 | "value": "$", 132 | }, 133 | Object { 134 | "token": "LBRACE", 135 | "value": "{", 136 | }, 137 | Object { 138 | "token": "DOLLAR", 139 | "value": "$", 140 | }, 141 | Object { 142 | "token": "LPAREN", 143 | "value": "(", 144 | }, 145 | Object { 146 | "token": "INVALID", 147 | "value": "$", 148 | }, 149 | Object { 150 | "token": "NUMBER", 151 | "value": "123", 152 | }, 153 | Object { 154 | "token": "RPAREN", 155 | "value": ")", 156 | }, 157 | Object { 158 | "token": "DOLLAR", 159 | "value": "$", 160 | }, 161 | Object { 162 | "token": "RBRACE", 163 | "value": "}", 164 | }, 165 | Object { 166 | "token": Symbol(EOF), 167 | "value": "", 168 | }, 169 | ] 170 | `; 171 | 172 | exports[`lex state test non-exclusive state 1`] = ` 173 | Array [ 174 | Object { 175 | "token": "INVALID", 176 | "value": "$", 177 | }, 178 | Object { 179 | "token": "LBRACE", 180 | "value": "{", 181 | }, 182 | Object { 183 | "token": "DOLLAR", 184 | "value": "$", 185 | }, 186 | Object { 187 | "token": "PLUS", 188 | "value": "+", 189 | }, 190 | Object { 191 | "token": "ID", 192 | "value": "a", 193 | }, 194 | Object { 195 | "token": "INVALID", 196 | "value": "*", 197 | }, 198 | Object { 199 | "token": "RBRACE", 200 | "value": "}", 201 | }, 202 | Object { 203 | "token": Symbol(EOF), 204 | "value": "", 205 | }, 206 | ] 207 | `; 208 | 209 | exports[`lex state test reset state after process 1`] = ` 210 | Array [ 211 | Object { 212 | "token": "LBRACE", 213 | "value": "{", 214 | }, 215 | Object { 216 | "token": "LPAREN", 217 | "value": "(", 218 | }, 219 | Object { 220 | "token": Symbol(EOF), 221 | "value": "", 222 | }, 223 | ] 224 | `; 225 | 226 | exports[`lex state test reset state after process 2`] = ` 227 | Array [ 228 | Object { 229 | "token": "INVALID", 230 | "value": ")", 231 | }, 232 | Object { 233 | "token": "INVALID", 234 | "value": "}", 235 | }, 236 | Object { 237 | "token": Symbol(EOF), 238 | "value": "", 239 | }, 240 | ] 241 | `; 242 | -------------------------------------------------------------------------------- /__tests__/lexer/__snapshots__/lexer_test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`Lexer test exec valid input 1`] = ` 4 | Array [ 5 | Object { 6 | "token": "ID", 7 | "value": "xabc", 8 | }, 9 | Object { 10 | "token": "SEMICOLON", 11 | "value": ";", 12 | }, 13 | Object { 14 | "token": "ATOM", 15 | "value": "x", 16 | }, 17 | Object { 18 | "token": "SEPARATE", 19 | "value": "|", 20 | }, 21 | Object { 22 | "token": "INVALID", 23 | "value": "&", 24 | }, 25 | Object { 26 | "token": "INVALID", 27 | "value": "0", 28 | }, 29 | Object { 30 | "token": "ID", 31 | "value": "ax", 32 | }, 33 | Object { 34 | "token": "ATOM", 35 | "value": "x", 36 | }, 37 | Object { 38 | "token": "ID", 39 | "value": "z", 40 | }, 41 | Object { 42 | "token": "SEMICOLON", 43 | "value": ";", 44 | }, 45 | Object { 46 | "token": Symbol(EOF), 47 | "value": "", 48 | }, 49 | ] 50 | `; 51 | 52 | exports[`Lexer test longest match 1`] = ` 53 | Array [ 54 | Object { 55 | "token": "PM", 56 | "value": "+-", 57 | }, 58 | Object { 59 | "token": "PMA", 60 | "value": "+-*", 61 | }, 62 | Object { 63 | "token": "ABCD", 64 | "value": "abcd", 65 | }, 66 | Object { 67 | "token": Symbol(EOF), 68 | "value": "", 69 | }, 70 | ] 71 | `; 72 | 73 | exports[`Lexer test regexp flags 1`] = ` 74 | Array [ 75 | Object { 76 | "token": "I", 77 | "value": "abc", 78 | }, 79 | Object { 80 | "token": "M", 81 | "value": "x 82 | yz", 83 | }, 84 | Object { 85 | "token": "U", 86 | "value": "def", 87 | }, 88 | Object { 89 | "token": "G", 90 | "value": "pqr", 91 | }, 92 | Object { 93 | "token": "A", 94 | "value": "a 95 | c", 96 | }, 97 | Object { 98 | "token": Symbol(EOF), 99 | "value": "", 100 | }, 101 | ] 102 | `; 103 | 104 | exports[`Lexer test rule priority 1`] = ` 105 | Array [ 106 | Object { 107 | "token": "PM", 108 | "value": "+-", 109 | }, 110 | Object { 111 | "token": "PMA", 112 | "value": "+-*", 113 | }, 114 | Object { 115 | "token": "ABCD2", 116 | "value": "abcd", 117 | }, 118 | Object { 119 | "token": "XYZ", 120 | "value": "xyz", 121 | }, 122 | Object { 123 | "token": "W", 124 | "value": "w", 125 | }, 126 | Object { 127 | "token": Symbol(EOF), 128 | "value": "", 129 | }, 130 | ] 131 | `; 132 | 133 | exports[`Lexer test skip string pattern if the following is \\w 1`] = ` 134 | Array [ 135 | Object { 136 | "token": "REGEXP", 137 | "value": "abc", 138 | }, 139 | Object { 140 | "token": "XYZ", 141 | "value": "xyz", 142 | }, 143 | Object { 144 | "token": "ASTERISK", 145 | "value": "*", 146 | }, 147 | Object { 148 | "token": "STR", 149 | "value": "abc", 150 | }, 151 | Object { 152 | "token": "ASTERISK", 153 | "value": "*", 154 | }, 155 | Object { 156 | "token": "XYZ", 157 | "value": "xyz", 158 | }, 159 | Object { 160 | "token": "ASTERISK", 161 | "value": "*", 162 | }, 163 | Object { 164 | "token": "REGEXP", 165 | "value": "abc", 166 | }, 167 | Object { 168 | "token": "STR", 169 | "value": "abc", 170 | }, 171 | Object { 172 | "token": Symbol(EOF), 173 | "value": "", 174 | }, 175 | ] 176 | `; 177 | -------------------------------------------------------------------------------- /__tests__/lexer/controller_test.ts: -------------------------------------------------------------------------------- 1 | import {test_dynamic_lexrules_language, test_lexstate_language} from "../data/sample_language"; 2 | import {SYMBOL_EOF} from "../../src/def/token"; 3 | import {Lexer} from "../../src/lexer/lexer"; 4 | 5 | describe("lex state test", () => { 6 | test("nested states", () => { 7 | const lexer = new Lexer(test_lexstate_language); 8 | expect(lexer.exec("${$($123)$}")).toMatchSnapshot(); 9 | }); 10 | test("rule of non-default state", () => { 11 | const lexer = new Lexer(test_lexstate_language); 12 | expect(lexer.exec("123")).not.toEqual([ 13 | {token: "NUMBER", value: "123"}, 14 | {token: SYMBOL_EOF, value: ""} 15 | ]); 16 | }); 17 | test("reset state after process", () => { 18 | const lexer = new Lexer(test_lexstate_language); 19 | expect(lexer.exec("{(")).toMatchSnapshot(); 20 | expect(lexer.exec(")}")).toMatchSnapshot(); 21 | }); 22 | test("exclusive state", () => { 23 | const lexer = new Lexer(test_lexstate_language); 24 | expect(lexer.exec("(+a{*)")).toMatchSnapshot(); 25 | }); 26 | test("non-exclusive state", () => { 27 | const lexer = new Lexer(test_lexstate_language); 28 | expect(lexer.exec("${$+a*}")).toMatchSnapshot(); 29 | }); 30 | }); 31 | 32 | describe("dynamic lex rules test", () => { 33 | test("adding and removing rules", () => { 34 | const lexer = new Lexer(test_dynamic_lexrules_language); 35 | expect(lexer.exec("%%{}%}%%}%%")).toMatchSnapshot(); 36 | }); 37 | }); 38 | 39 | describe("begin/end callbacks test", () => { 40 | test("using variables", () => { 41 | let counter = 0; 42 | const lexer = new Lexer({ 43 | grammar: {rules: [], start_symbol: ""}, lex: { 44 | rules: [{token: "A", pattern: /a/, callback: () => ["A", (counter++).toString()]}], 45 | begin_callback: () => { 46 | counter = 0; 47 | } 48 | } 49 | }); 50 | expect(lexer.exec("aaaa")).toMatchSnapshot(); 51 | expect(lexer.exec("aaaaa")).toMatchSnapshot(); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /__tests__/lexer/lexer_test.ts: -------------------------------------------------------------------------------- 1 | import {Lexer} from "../../src/lexer/lexer"; 2 | import {test_empty_language, test_sample_language} from "../data/sample_language"; 3 | import {SYMBOL_EOF} from "../../src/def/token"; 4 | import {Language} from "../../src/def/language"; 5 | 6 | describe("Lexer test", () => { 7 | const empty_lang: Language = {lex: {rules: []}, grammar: {rules: [], start_symbol: ""}}; 8 | test("exec valid input", () => { 9 | const lexer = new Lexer(test_sample_language); 10 | expect(lexer.exec("xabc;x|&0ax x z;")).toMatchSnapshot(); 11 | }); 12 | test("exec invalid input", () => { 13 | const lexer = new Lexer(test_empty_language); 14 | expect(() => { 15 | lexer.exec("xabc;x|&0ax x z;"); 16 | }).toThrow(/no pattern matched/); 17 | }); 18 | test("exec no length input", () => { 19 | const lexer = new Lexer(test_sample_language); 20 | expect(lexer.exec("")).toEqual([ 21 | {token: SYMBOL_EOF, value: ""} 22 | ]); 23 | const lexer2 = new Lexer(test_empty_language); 24 | expect(lexer2.exec("")).toEqual([ 25 | {token: SYMBOL_EOF, value: ""} 26 | ]); 27 | }); 28 | test("regexp flags", () => { 29 | const lexer = new Lexer({ 30 | grammar: {rules: [], start_symbol: ""}, lex: { 31 | rules: [ 32 | {token: "I", pattern: /AbC/i}, 33 | {token: "M", pattern: /x\nyz/m}, 34 | {token: "U", pattern: /\u{64}\u{65}\u{66}/u}, 35 | {token: "G", pattern: /pqr/g}, 36 | {token: "A", pattern: /\u{61}\nC/imugy} 37 | ] 38 | } 39 | }); 40 | expect(lexer.exec("abcx\nyzdefpqra\nc")).toMatchSnapshot(); 41 | }); 42 | test("skip string pattern if the following is \\w", () => { 43 | const lexer = new Lexer({ 44 | grammar: {rules: [], start_symbol: ""}, lex: { 45 | rules: [ 46 | {token: "STR", pattern: "abc"}, 47 | {token: "REGEXP", pattern: /abc/}, 48 | {token: "ASTERISK", pattern: "*"}, 49 | {token: "XYZ", pattern: "xyz"} 50 | ] 51 | } 52 | }); 53 | expect(lexer.exec("abcxyz*abc*xyz*abcabc")).toMatchSnapshot(); 54 | }); 55 | test("rule priority", () => { 56 | const lexer = new Lexer({ 57 | grammar: {rules: [], start_symbol: ""}, lex: { 58 | rules: [ 59 | {token: "PM", pattern: "+-"}, 60 | {token: "PMA", pattern: "+-*"}, 61 | {token: "ASTERISK", pattern: "*", priority: 1}, 62 | {token: "ABC", pattern: /abc/}, 63 | {token: "ABCD", pattern: /abcd/}, 64 | {token: "ABCD2", pattern: /abcd/, priority: 2}, 65 | {token: "D", pattern: /d/}, 66 | {token: "XYZ", pattern: /xyz/}, 67 | {token: "XYZW", pattern: /xyzw/, priority: -1}, 68 | {token: "W", pattern: /w/}, 69 | {token: null, pattern: " "} 70 | ] 71 | } 72 | }); 73 | expect(lexer.exec(" +-+-*abcd xyzw")).toMatchSnapshot(); 74 | }); 75 | test("longest match", () => { 76 | const lexer = new Lexer({ 77 | grammar: {rules: [], start_symbol: ""}, lex: { 78 | rules: [ 79 | {token: "PM", pattern: "+-"}, 80 | {token: "PMA", pattern: "+-*"}, 81 | {token: "ASTERISK", pattern: "*"}, 82 | {token: "ABC", pattern: /abc/}, 83 | {token: "ABCD", pattern: /abcd/}, 84 | {token: "ABCD2", pattern: /abcd/}, 85 | {token: "D", pattern: /d/}, 86 | {token: null, pattern: " "} 87 | ] 88 | } 89 | }); 90 | expect(lexer.exec(" +-+-*abcd ")).toMatchSnapshot(); 91 | }); 92 | test("callbacks", () => { 93 | // 引数として与えられるLexControllerを使用した詳細なテストはcontroller_test.tsで 94 | const lexer = new Lexer({ 95 | grammar: {rules: [], start_symbol: ""}, lex: { 96 | rules: [ 97 | {token: "A", pattern: /a/}, 98 | {token: "B", pattern: /b/, callback: (value, token) => token}, 99 | {token: "C", pattern: /c/, callback: (value, token) => ({token, value: "2"})}, 100 | { 101 | token: "D", pattern: /d/, 102 | callback: () => { 103 | return; 104 | } 105 | }, 106 | {token: "E", pattern: /e/, callback: () => null}, 107 | {token: null, pattern: " "} 108 | ], 109 | default_callback: (value, token) => { 110 | return [token, "1"]; 111 | } 112 | } 113 | }); 114 | expect(lexer.exec("abc de")).toEqual([ 115 | {token: "A", value: "1"}, 116 | {token: "B", value: "b"}, 117 | {token: "C", value: "2"}, 118 | {token: "D", value: "d"}, 119 | {token: SYMBOL_EOF, value: ""} 120 | ]); 121 | }); 122 | }); 123 | -------------------------------------------------------------------------------- /__tests__/parser/parser_test.ts: -------------------------------------------------------------------------------- 1 | import {Parser} from "../../src/parser/parser"; 2 | import {ParserFactory} from "../../src/parser/factory"; 3 | import {ParserGenerator} from "../../src/parsergenerator/parsergenerator"; 4 | import {test_calc_language} from "../data/sample_language"; 5 | 6 | describe("parser test", () => { 7 | const parsingtable = new ParserGenerator(test_calc_language).getParsingTable(); 8 | const parser = ParserFactory.create(test_calc_language, new ParserGenerator(test_calc_language).getParsingTable()); 9 | test("parser factory", () => { 10 | expect(ParserFactory.create(test_calc_language, parsingtable)).toBeInstanceOf(Parser); 11 | }); 12 | test("custom callback in grammar", () => { 13 | expect(parser.parse("2*(3+4)")).toBe(14); 14 | }); 15 | /* 16 | test("getting calc language ast", () => { 17 | expect(parser.parse("1+1")).toEqual({ 18 | type: "EXP", value: null, children: 19 | [ 20 | {type: "EXP", value: null, children: [{type: "TERM", value: null, children: [{type: "ATOM", value: null, children: [{type: "DIGITS", value: "1", children: []}]}]}]}, 21 | {type: "PLUS", value: "+", children: []}, 22 | {type: "TERM", value: null, children: [{type: "ATOM", value: null, children: [{type: "DIGITS", value: "1", children: []}]}]} 23 | ] 24 | }); 25 | }); 26 | */ 27 | test("invalid input", () => { 28 | expect(parser.parse("1zzz")).toEqual("1"); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/closureitem_test.ts: -------------------------------------------------------------------------------- 1 | import {ClosureItem} from "../../src/parsergenerator/closureitem"; 2 | import {test_sample_language} from "../data/sample_language"; 3 | import {GrammarDB} from "../../src/index"; 4 | import {SYMBOL_EOF} from "../../src/def/token"; 5 | 6 | describe("ClosureItem test", () => { 7 | const grammardb = new GrammarDB(test_sample_language); 8 | describe("{S' -> . S [$]}", () => { 9 | const ci = new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]); 10 | test("getter", () => { 11 | expect(ci.rule_id).toBe(-1); 12 | expect(ci.dot_index).toBe(0); 13 | expect(ci.lookaheads).toEqual([SYMBOL_EOF]); 14 | }); 15 | test("ClosureItem Hash", () => { 16 | const id_eof = grammardb.getTokenId(SYMBOL_EOF); 17 | expect(ci.getLR0Hash()).toBe("-1,0"); 18 | expect(ci.getLR1Hash()).toBe(`-1,0,[${id_eof}]`); 19 | }); 20 | describe("ClosureItem equality", () => { 21 | test("compare itself", () => { 22 | expect(ci.isSameLR0(ci)).toBeTruthy(); 23 | expect(ci.isSameLR1(ci)).toBeTruthy(); 24 | }); 25 | test("same ClosureItem", () => { 26 | const ci2 = new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]); 27 | expect(ci.isSameLR0(ci2)).toBeTruthy(); 28 | expect(ci.isSameLR1(ci2)).toBeTruthy(); 29 | }); 30 | test("not same ClosureItem", () => { 31 | const ci2 = new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]); 32 | expect(ci.isSameLR0(ci2)).toBeFalsy(); 33 | expect(ci.isSameLR1(ci2)).toBeFalsy(); 34 | }); 35 | test("not same lookahead item", () => { 36 | const ci2 = new ClosureItem(grammardb, -1, 0, ["ID"]); 37 | expect(ci.isSameLR0(ci2)).toBeTruthy(); 38 | expect(ci.isSameLR1(ci2)).toBeFalsy(); 39 | }); 40 | }); 41 | test("invalid lookahead item", () => { 42 | expect(()=>new ClosureItem(grammardb, -1, 0, ["X"])).toThrow(/invalid token/); 43 | }); 44 | }); 45 | describe("invalid ClosureItem", () => { 46 | test("invalid grammar id", () => { 47 | expect(()=>new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF])).toThrow(); 48 | }); 49 | test("invalid dot position", () => { 50 | expect(()=>new ClosureItem(grammardb, -1, -1, [SYMBOL_EOF])).toThrow(); 51 | }); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/closureset_test.ts: -------------------------------------------------------------------------------- 1 | import {GrammarDB} from "../../src/parsergenerator/grammardb"; 2 | import {test_empty_language, test_sample_language} from "../data/sample_language"; 3 | import {ClosureItem} from "../../src/parsergenerator/closureitem"; 4 | import {SYMBOL_EOF} from "../../src/def/token"; 5 | import {ClosureSet} from "../../src/parsergenerator/closureset"; 6 | 7 | describe("ClosureSet test", () => { 8 | describe("Closure{S' -> . S [$]}", () => { 9 | const grammardb = new GrammarDB(test_sample_language); 10 | const cs = new ClosureSet(grammardb, [new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF])]); 11 | /* 12 | S' -> . S [$] 13 | S -> . E [$] 14 | E -> . LIST SEMICOLON [$] 15 | E -> . HOGE [$] 16 | LIST -> . T [SEMICOLON SEPARATE] 17 | LIST > . LIST SEPARATE T [SEMICOLON SEPARATE] 18 | T -> . ATOM [SEMICOLON SEPARATE] 19 | T -> . [SEMICOLON SEPARATE] 20 | HOGE -> . ID [$] 21 | */ 22 | const expanded = [ 23 | new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]), 24 | new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]), 25 | new ClosureItem(grammardb, 1, 0, [SYMBOL_EOF]), 26 | new ClosureItem(grammardb, 2, 0, [SYMBOL_EOF]), 27 | new ClosureItem(grammardb, 3, 0, ["SEMICOLON", "SEPARATE"]), 28 | new ClosureItem(grammardb, 4, 0, ["SEPARATE", "SEMICOLON"]), // test changing lookaheads order 29 | new ClosureItem(grammardb, 5, 0, ["SEMICOLON", "SEPARATE"]), 30 | new ClosureItem(grammardb, 6, 0, ["SEMICOLON", "SEPARATE"]), 31 | new ClosureItem(grammardb, 7, 0, [SYMBOL_EOF]) 32 | ]; 33 | const expanded_shuffled = [ 34 | new ClosureItem(grammardb, 5, 0, ["SEMICOLON", "SEPARATE"]), 35 | new ClosureItem(grammardb, 2, 0, [SYMBOL_EOF]), 36 | new ClosureItem(grammardb, 1, 0, [SYMBOL_EOF]), 37 | new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]), 38 | new ClosureItem(grammardb, 4, 0, ["SEPARATE", "SEMICOLON"]), 39 | new ClosureItem(grammardb, 7, 0, [SYMBOL_EOF]), 40 | new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]), 41 | new ClosureItem(grammardb, 3, 0, ["SEMICOLON", "SEPARATE"]), 42 | new ClosureItem(grammardb, 6, 0, ["SEPARATE", "SEMICOLON"]) 43 | ]; 44 | test("ClosureSet size", () => { 45 | expect(cs.size).toBe(9); 46 | }); 47 | test("ClosureSet array", () => { 48 | expect(cs.getArray()).toEqual(expect.arrayContaining(expanded)); 49 | }); 50 | describe("ClosureSet equality", () => { 51 | test("compare itself", () => { 52 | expect(cs.isSameLR0(cs)).toBeTruthy(); 53 | expect(cs.isSameLR1(cs)).toBeTruthy(); 54 | }); 55 | test("compare closureset that is given expanded items to constructor", () => { 56 | expect(cs.isSameLR0(new ClosureSet(grammardb, expanded_shuffled))).toBeTruthy(); 57 | expect(cs.isSameLR1(new ClosureSet(grammardb, expanded_shuffled))).toBeTruthy(); 58 | }); 59 | }); 60 | test("ClosureSet#include", () => { 61 | for (const ci of expanded) { 62 | expect(cs.includes(ci)).toBeTruthy(); 63 | } 64 | }); 65 | test("ClosureSet#include invalid inputs", () => { 66 | expect(()=>cs.includes(new ClosureItem(grammardb, 0, 1, [SYMBOL_EOF]))).not.toThrow(); 67 | expect(()=>cs.includes(new ClosureItem(grammardb, 0, 2, [SYMBOL_EOF]))).toThrow(/out of range/); 68 | expect(()=>cs.includes(new ClosureItem(grammardb, 0, -1, [SYMBOL_EOF]))).toThrow(/out of range/); 69 | expect(()=>cs.includes(new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF]))).toThrow(/invalid grammar id/); 70 | expect(()=>cs.includes(new ClosureItem(grammardb, -8, 0, [SYMBOL_EOF]))).toThrow(/invalid grammar id/); 71 | }); 72 | describe("invalid ClosureSet", () => { 73 | test("invalid grammar id", () => { 74 | expect(()=>new ClosureSet(grammardb, [new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF])])).toThrow(/invalid grammar id/); 75 | }); 76 | test("invalid dot position", () => { 77 | expect(()=>new ClosureSet(grammardb, [new ClosureItem(grammardb, 0, -1, [SYMBOL_EOF])])).toThrow(/out of range/); 78 | }); 79 | }); 80 | }); 81 | describe("empty grammar", () => { 82 | const grammardb = new GrammarDB(test_empty_language); 83 | const cs = new ClosureSet(grammardb, [new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF])]); 84 | const expanded = [ 85 | new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]), 86 | new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]) 87 | ]; 88 | test("ClosureSet size", () => { 89 | expect(cs.size).toBe(2); 90 | }); 91 | test("ClosureSet array", () => { 92 | expect(cs.getArray()).toEqual(expect.arrayContaining(expanded)); 93 | }); 94 | test("ClosureSet#include", () => { 95 | for (const ci of expanded) { 96 | expect(cs.includes(ci)).toBeTruthy(); 97 | } 98 | }); 99 | }); 100 | }); 101 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/firstset_test.ts: -------------------------------------------------------------------------------- 1 | import {FirstSet} from "../../src/parsergenerator/firstset"; 2 | import {test_empty_language, test_sample_grammar} from "../data/sample_language"; 3 | import {SymbolDiscriminator} from "../../src/parsergenerator/symboldiscriminator"; 4 | 5 | describe("FirstSet test", () => { 6 | const first = new FirstSet(test_sample_grammar, new SymbolDiscriminator(test_sample_grammar)); 7 | describe("valid one terminal and nonterminal symbol", () => { 8 | test("First(S) is {SEMICOLON, SEPARATE, ATOM, ID}", () => { 9 | for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) { 10 | expect(first.get("S")).toContain(symbol); 11 | } 12 | expect(first.get("S").size).toBe(4); 13 | }); 14 | test("First(E) is {SEMICOLON, SEPARATE, ATOM, ID}", () => { 15 | for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) { 16 | expect(first.get("E")).toContain(symbol); 17 | } 18 | expect(first.get("E").size).toBe(4); 19 | }); 20 | test("First([E]) is {SEMICOLON, SEPARATE, ATOM, ID}", () => { 21 | for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) { 22 | expect(first.get(["E"])).toContain(symbol); 23 | } 24 | expect(first.get(["E"]).size).toBe(4); 25 | }); 26 | test("First(LIST) is {SEPARATE, ATOM}", () => { 27 | for (const symbol of ["SEPARATE", "ATOM"]) { 28 | expect(first.get("LIST")).toContain(symbol); 29 | } 30 | expect(first.get("LIST").size).toBe(2); 31 | }); 32 | test("First(T) is {ATOM}", () => { 33 | expect(first.get("T")).toContain("ATOM"); 34 | expect(first.get("T").size).toBe(1); 35 | }); 36 | test("First(HOGE) is {ID}", () => { 37 | expect(first.get("HOGE")).toContain("ID"); 38 | expect(first.get("HOGE").size).toBe(1); 39 | }); 40 | test("First(ID) is {ID}", () => { 41 | expect(first.get("ID")).toContain("ID"); 42 | expect(first.get("ID").size).toBe(1); 43 | }); 44 | }); 45 | describe("valid word (multiple terminal or nonterminal symbols)", () => { 46 | test("First(LIST ID) is {SEPARATE ATOM ID}", () => { 47 | for (const symbol of ["SEPARATE", "ATOM", "ID"]) { 48 | expect(first.get(["LIST", "ID"])).toContain(symbol); 49 | } 50 | expect(first.get(["LIST", "ID"]).size).toBe(3); 51 | }); 52 | test("First(HOGE HOGE) is {ID}", () => { 53 | expect(first.get(["HOGE", "HOGE"])).toContain("ID"); 54 | expect(first.get(["HOGE", "HOGE"]).size).toBe(1); 55 | }); 56 | }); 57 | describe("invalid input (contains neither terminal nor nonterminal symbols)", () => { 58 | test("First(FOO) throws error", () => { 59 | expect(() => first.get("FOO")).toThrow(/invalid token/); 60 | }); 61 | test("First(INVALID) throws error", () => { 62 | expect(() => first.get("INVALID")).toThrow(/invalid token/); 63 | }); 64 | test("First(INVALID INVALID) throws error", () => { 65 | expect(() => first.get(["INVALID", "INVALID"])).toThrow(/invalid token/); 66 | }); 67 | test("First(INVALID S) throws error", () => { 68 | expect(() => first.get(["INVALID", "S"])).toThrow(/invalid token/); 69 | }); 70 | test("First(S INVALID) throws error", () => { 71 | expect(() => first.get(["S", "INVALID"])).toThrow(/invalid token/); 72 | }); 73 | }); 74 | }); 75 | 76 | describe("FirstSet test(empty language)", () => { 77 | const first = new FirstSet(test_empty_language.grammar, new SymbolDiscriminator(test_empty_language.grammar)); 78 | test("First(S) is {}", () => { 79 | expect(first.get("S").size).toBe(0); 80 | }); 81 | }); 82 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/nullableset_test.ts: -------------------------------------------------------------------------------- 1 | import {NullableSet} from "../../src/parsergenerator/nullableset"; 2 | import {test_sample_grammar} from "../data/sample_language"; 3 | 4 | describe("NullableSet test", () => { 5 | const nulls = new NullableSet(test_sample_grammar); 6 | test("T is Nullable", () => { 7 | expect(nulls.isNullable("T")).toBeTruthy(); 8 | }); 9 | test("LIST is Nullable", () => { 10 | expect(nulls.isNullable("LIST")).toBeTruthy(); 11 | }); 12 | test("HOGE is not Nullable", () => { 13 | expect(nulls.isNullable("HOGE")).toBeFalsy(); 14 | }); 15 | test("E is not Nullable", () => { 16 | expect(nulls.isNullable("E")).toBeFalsy(); 17 | }); 18 | test("S is not Nullable", () => { 19 | expect(nulls.isNullable("S")).toBeFalsy(); 20 | }); 21 | }); 22 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/symboldiscriminator_test.ts: -------------------------------------------------------------------------------- 1 | import {test_calc_grammar, test_empty_language, test_sample_grammar} from "../data/sample_language"; 2 | import {SymbolDiscriminator} from "../../src/parsergenerator/symboldiscriminator"; 3 | import {Token} from "../../src/def/token"; 4 | 5 | describe("SymbolDiscriminator test", () => { 6 | describe("test sample language", () => { 7 | const symbols = new SymbolDiscriminator(test_sample_grammar); 8 | test("S is Nonterminal", () => { 9 | expect(symbols.isNonterminalSymbol("S")).toBeTruthy(); 10 | expect(symbols.isTerminalSymbol("S")).toBeFalsy(); 11 | }); 12 | test("E is Nonterminal", () => { 13 | expect(symbols.isNonterminalSymbol("E")).toBeTruthy(); 14 | expect(symbols.isTerminalSymbol("E")).toBeFalsy(); 15 | }); 16 | test("LIST is Nonterminal", () => { 17 | expect(symbols.isNonterminalSymbol("LIST")).toBeTruthy(); 18 | expect(symbols.isTerminalSymbol("LIST")).toBeFalsy(); 19 | }); 20 | test("T is Nonterminal", () => { 21 | expect(symbols.isNonterminalSymbol("T")).toBeTruthy(); 22 | expect(symbols.isTerminalSymbol("T")).toBeFalsy(); 23 | }); 24 | test("HOGE is Nonterminal", () => { 25 | expect(symbols.isNonterminalSymbol("HOGE")).toBeTruthy(); 26 | expect(symbols.isTerminalSymbol("HOGE")).toBeFalsy(); 27 | }); 28 | test("SEMICOLON is Terminal", () => { 29 | expect(symbols.isNonterminalSymbol("SEMICOLON")).toBeFalsy(); 30 | expect(symbols.isTerminalSymbol("SEMICOLON")).toBeTruthy(); 31 | }); 32 | test("SEPARATE is Terminal", () => { 33 | expect(symbols.isNonterminalSymbol("SEPARATE")).toBeFalsy(); 34 | expect(symbols.isTerminalSymbol("SEPARATE")).toBeTruthy(); 35 | }); 36 | test("ATOM is Terminal", () => { 37 | expect(symbols.isNonterminalSymbol("ATOM")).toBeFalsy(); 38 | expect(symbols.isTerminalSymbol("ATOM")).toBeTruthy(); 39 | }); 40 | test("ID is Terminal", () => { 41 | expect(symbols.isNonterminalSymbol("ID")).toBeFalsy(); 42 | expect(symbols.isTerminalSymbol("ID")).toBeTruthy(); 43 | }); 44 | test("INVALID (not appear in grammar) is neither Nonterminal nor Terminal", () => { 45 | expect(symbols.isNonterminalSymbol("INVALID")).toBeFalsy(); 46 | expect(symbols.isTerminalSymbol("INVALID")).toBeFalsy(); 47 | }); 48 | test("Check nonterminal symbols set", () => { 49 | const nt: Set = symbols.getNonterminalSymbols(); 50 | for (const symbol of ["S", "E", "LIST", "T", "HOGE"]) { 51 | expect(nt).toContain(symbol); 52 | } 53 | expect(nt.size).toBe(5); 54 | }); 55 | test("Check terminal symbols set", () => { 56 | const t: Set = symbols.getTerminalSymbols(); 57 | for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) { 58 | expect(t).toContain(symbol); 59 | } 60 | expect(t.size).toBe(4); 61 | }); 62 | }); 63 | describe("test sample language", () => { 64 | const symbols = new SymbolDiscriminator(test_calc_grammar); 65 | test("Check nonterminal symbols set", () => { 66 | const nt: Set = symbols.getNonterminalSymbols(); 67 | for (const symbol of ["EXP", "TERM", "ATOM"]) { 68 | expect(nt).toContain(symbol); 69 | } 70 | expect(nt.size).toBe(3); 71 | }); 72 | test("Check terminal symbols set", () => { 73 | const t: Set = symbols.getTerminalSymbols(); 74 | for (const symbol of ["PLUS", "ASTERISK", "DIGITS", "LPAREN", "RPAREN"]) { 75 | expect(t).toContain(symbol); 76 | } 77 | expect(t.size).toBe(5); 78 | }); 79 | }); 80 | describe("test empty language", () => { 81 | const symbols = new SymbolDiscriminator(test_empty_language.grammar); 82 | test("Check nonterminal symbols set", () => { 83 | const nt: Set = symbols.getNonterminalSymbols(); 84 | expect(nt).toContain("S"); 85 | expect(nt.size).toBe(1); 86 | }); 87 | test("Check terminal symbols set", () => { 88 | const t: Set = symbols.getTerminalSymbols(); 89 | expect(t.size).toBe(0); 90 | }); 91 | }); 92 | }); 93 | -------------------------------------------------------------------------------- /__tests__/parsergenerator/syntaxdb_test.ts: -------------------------------------------------------------------------------- 1 | import {test_sample_language} from "../data/sample_language"; 2 | import {GrammarDB} from "../../src/parsergenerator/grammardb"; 3 | import {SYMBOL_SYNTAX} from "../../src/def/token"; 4 | 5 | describe("GrammarDB test", () => { 6 | const grammardb = new GrammarDB(test_sample_language); 7 | 8 | describe("findRules test", () => { 9 | test("get rules of E", () => { 10 | expect(grammardb.findRules("E")).toEqual([ 11 | {id: 1, rule: {ltoken: "E", pattern: ["LIST", "SEMICOLON"]}}, 12 | {id: 2, rule: {ltoken: "E", pattern: ["HOGE"]}} 13 | ]); 14 | }); 15 | test("get a rule of HOGE", () => { 16 | expect(grammardb.findRules("HOGE")).toEqual([ 17 | {id: 7, rule: {ltoken: "HOGE", pattern: ["ID"]}} 18 | ]); 19 | }); 20 | }); 21 | describe("getRuleById test", () => { 22 | test("rule of grammar 1 is: E -> LIST SEMICOLON", () => { 23 | expect(grammardb.getRuleById(1)).toEqual({ltoken: "E", pattern: ["LIST", "SEMICOLON"]}); 24 | }); 25 | test("rule of grammar -1 is: S' -> S", () => { 26 | expect(grammardb.getRuleById(-1)).toEqual({ltoken: SYMBOL_SYNTAX, pattern: ["S"]}); 27 | }); 28 | test("throw error by calling rule of grammar -2", () => { 29 | expect(() => grammardb.getRuleById(-2)).toThrow(/out of range/); 30 | }); 31 | test("no error occurs in rule of grammar 7", () => { 32 | expect(() => grammardb.getRuleById(7)).not.toThrow(); 33 | }); 34 | test("throw error by calling rule of grammar 8", () => { 35 | expect(() => grammardb.getRuleById(8)).toThrow(/out of range/); 36 | }); 37 | }); 38 | }); 39 | 40 | -------------------------------------------------------------------------------- /__tests__/precompiler/precompiler_test.ts: -------------------------------------------------------------------------------- 1 | import {PreCompiler} from "../../src/precompiler/precompiler"; 2 | import {test_calc_language_raw_string} from "../data/sample_language"; 3 | import * as fs from "fs"; 4 | 5 | describe("precompiler test", () => { 6 | const precompiler = new PreCompiler("../../../src"); 7 | const source = precompiler.exec(test_calc_language_raw_string); 8 | fs.writeFileSync("./__tests__/data/tmp/precompiler_result.ts", source); 9 | const p = require("../data/tmp/precompiler_result.ts"); 10 | test("parse \"1+1\" by using compiled parser", () => { 11 | expect(() => p.parser.parse("1+1")).not.toThrow(); 12 | }); 13 | // まだ定義ファイルにアクションを定義できないのでむり 14 | /* 15 | test("parse \"1+1\" equals to 2 by using compiled parser and custom callback controller", () => { 16 | expect(p.parser.parse("1+1")).toBe(2); 17 | }); 18 | */ 19 | fs.unlinkSync("./__tests__/data/tmp/precompiler_result.ts"); 20 | }); 21 | -------------------------------------------------------------------------------- /__tests__/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "strictNullChecks": true, 4 | "noImplicitAny": true, 5 | "target": "es6", 6 | "module": "CommonJS", 7 | "outDir": "dist/", 8 | "declaration": true, 9 | "noEmitOnError": true 10 | } 11 | } -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const gulp = require("gulp"); 3 | const merge2 = require("merge2"); 4 | const sourcemaps = require("gulp-sourcemaps"); 5 | const typescript = require("gulp-typescript"); 6 | 7 | const tslint = require("gulp-tslint"); 8 | 9 | // use tsconfig.json 10 | const tsProject = typescript.createProject("tsconfig.json"); 11 | gulp.task("tsc", () => { 12 | let tsr = gulp.src("src/**/*.ts") 13 | .pipe(tsProject()); 14 | return merge2([ 15 | tsr.dts.pipe(gulp.dest("dist/")), 16 | tsr.js.pipe(sourcemaps.write()).pipe(gulp.dest("dist")) 17 | ]); 18 | }); 19 | 20 | gulp.task("watch-tsc", gulp.task("tsc"), () => { 21 | gulp.watch("src/**/*.ts", gulp.task("tsc")); 22 | }); 23 | 24 | gulp.task("tslint", () => { 25 | return gulp.src("src/**/*.ts") 26 | .pipe(tslint({ 27 | formatter: "verbose" 28 | })) 29 | .pipe(tslint.report()); 30 | }); 31 | 32 | gulp.task("watch-tslint", gulp.task("tslint"), () => { 33 | gulp.watch("src/**/*.ts", gulp.task("tslint")); 34 | }); 35 | 36 | gulp.task("default", gulp.series("tsc", "tslint")); 37 | gulp.task("watch", gulp.series("watch-tsc", "watch-tslint")); 38 | -------------------------------------------------------------------------------- /json_language: -------------------------------------------------------------------------------- 1 | true "true" 2 | false "false" 3 | null "null" 4 | lbrace "{" 5 | rbrace "}" 6 | lbracket "[" 7 | rbracket "]" 8 | colon ":" 9 | comma "," 10 | digit1_9 /[1-9]/ 11 | digit0 /0/ 12 | minus "-" 13 | period "." 14 | string /".*?"/ %{ return {token: "string", value: value.slice(1, -1)}; }% 15 | ! /(\r\n|\r|\n)+/ 16 | ! /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/ 17 | invalid /./ 18 | 19 | #default { return children[0] } 20 | 21 | digit : digit1_9 | digit0; 22 | digits : digit | 23 | digit digits { return children[0] + children[1] }; 24 | int : digit | 25 | digit1_9 digits { return children[0] + children[1] } | 26 | minus digit { return children[0] + children[1] } | 27 | minus digit1_9 digits { return children[0] + children[1] + children[2] }; 28 | frac : period digits { return children[0] + children[1] }; 29 | number : int { return +children[0] } | 30 | int frac { return +(children[0] + children[1]) }; 31 | 32 | object : lbrace rbrace %{ return {} }% | 33 | lbrace members rbrace %{ return children[1] }%; 34 | members : pair { return [children[0]] } | 35 | pair comma members { return [children[0], ...children[2]] }; 36 | pair : string colon value %{ return {key: children[0], ...children[2]} }%; 37 | array : lbracket rbracket { return [] } | 38 | lbracket elements rbracket { return children[1] }; 39 | elements : value { return [children[0]] } | 40 | value comma elements { return [children[0], ...children[2]] }; 41 | $value : 42 | string %{ return {type: "string", value: children[0]} }% | 43 | number %{ return {type: "number", value: children[0]} }% | 44 | object %{ return {type: "object", value: children[0]} }% | 45 | array %{ return {type: "array", value: children[0]} }% | 46 | true %{ return {type: "boolean", value: true} }% | 47 | false %{ return {type: "boolean", value: false} }% | 48 | null %{ return {type: "null", value: null} }%; 49 | -------------------------------------------------------------------------------- /language: -------------------------------------------------------------------------------- 1 | EXCLAMATION "!" 2 | VBAR "|" 3 | DOLLAR "$" 4 | COLON ":" 5 | SEMICOLON ";" 6 | LT "<" 7 | GT ">" 8 | COMMA "," 9 | LEX_BEGIN "#lex_begin" 10 | LEX_END "#lex_end" 11 | LEX_DEFAULT "#lex_default" 12 | START "#start" 13 | EXTEND "#extend" 14 | BEGIN "#begin" 15 | END "#end" 16 | DEFAULT "#default" 17 | LABEL /[a-zA-Z_][a-zA-Z0-9_]*/ 18 | REGEXP /\/.*\/[gimuy]*/ { 19 | const match = /\/(.*)\/([gimuy]*)/.exec(value); 20 | return ["REGEXP", new RegExp(match[1], match[2])]; 21 | } 22 | STRING /".*"/ { return ["STRING", value.slice(1, -1)] } 23 | STRING /'.*'/ { return ["STRING", value.slice(1, -1)] } 24 | START_BLOCK /%*{+/ %%{ 25 | const match = /(%*)({+)/.exec(value); 26 | const end_delimiter = "}".repeat(match[2].length) + match[1]; 27 | lex.callState("callback"); 28 | lex.addRule("body_block", { 29 | token: "BODY_BLOCK", 30 | pattern: new RegExp(`(?:.|\\s)*?(? { 38 | lex.returnState(); 39 | lex.removeRule("body_block"); 40 | lex.removeRule("end_block"); 41 | } 42 | }); 43 | }%% 44 | !ENDLINE /(\r\n|\r|\n)+/ 45 | !WHITESPACE /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/ 46 | INVALID /./ 47 | 48 | 49 | $LANGUAGE 50 | : LEX_OPTIONS LEX EX_CALLBACKS GRAMMAR %{ 51 | let start_symbol = children[3].start_symbol; 52 | // 開始記号の指定がない場合、最初の規則に設定 53 | if (start_symbol === null) { 54 | if (children[3].sect.length > 0) start_symbol = children[3].sect[0].ltoken; 55 | else start_symbol = ""; 56 | } 57 | const lex = {rules: children[1]}; 58 | if (children[0].callbacks !== undefined) { 59 | for (const callback of children[0].callbacks) { 60 | switch (callback.type) { 61 | case "#lex_begin": 62 | lex.begin_callback = callback.callback; 63 | break; 64 | case "#lex_end": 65 | lex.end_callback = callback.callback; 66 | break; 67 | case "#lex_default": 68 | lex.default_callback = callback.callback; 69 | break; 70 | } 71 | } 72 | } 73 | if (children[0].start_state !== undefined) { 74 | lex.start_state = children[0].start_state; 75 | } 76 | if (children[0].states.length > 0) { 77 | lex.states = children[0].states; 78 | } 79 | const grammar = {rules: children[3].grammar, start_symbol}; 80 | if (children[2] !== undefined) { 81 | for (const callback of children[2]) { 82 | switch (callback.type) { 83 | case "#begin": 84 | grammar.begin_callback = callback.callback; 85 | break; 86 | case "#end": 87 | grammar.end_callback = callback.callback; 88 | break; 89 | case "#default": 90 | grammar.default_callback = callback.callback; 91 | break; 92 | } 93 | } 94 | } 95 | return {lex, grammar}; 96 | }% 97 | | LEX_OPTIONS LEX GRAMMAR %{ 98 | let start_symbol = children[2].start_symbol; 99 | // 開始記号の指定がない場合、最初の規則に設定 100 | if (start_symbol === null) { 101 | if (children[2].sect.length > 0) start_symbol = children[2].sect[0].ltoken; 102 | else start_symbol = ""; 103 | } 104 | const lex = {rules: children[1]}; 105 | if (children[0].callbacks !== undefined) { 106 | for (const callback of children[0].callbacks) { 107 | switch (callback.type) { 108 | case "#lex_begin": 109 | lex.begin_callback = callback.callback; 110 | break; 111 | case "#lex_end": 112 | lex.end_callback = callback.callback; 113 | break; 114 | case "#lex_default": 115 | lex.default_callback = callback.callback; 116 | break; 117 | } 118 | } 119 | } 120 | if (children[0].start_state !== undefined) { 121 | lex.start_state = children[0].start_state; 122 | } 123 | if (children[0].states.length > 0) { 124 | lex.states = children[0].states; 125 | } 126 | return {lex, grammar: {rules: children[2].grammar, start_symbol: start_symbol}}; 127 | }%; 128 | 129 | LEX_OPTIONS 130 | : OPTIONAL_LEX_EX_CALLBACKS LEX_STATES %{ 131 | const states = []; 132 | const states_set = new Set(); 133 | for (const inherit of children[1].inheritance) { 134 | for (const sub_state of inherit.sub) { 135 | if (states_set.has(inherit.sub)) { 136 | // 既に登録されている場合、一つのstateが複数のstateを継承することはできない 137 | continue; 138 | } 139 | states.push({label: sub_state, inheritance: inherit.base}); 140 | states_set.add(sub_state); 141 | } 142 | } 143 | return {callbacks: children[0], start_state: children[1].start_state, states}; 144 | }%; 145 | LEX_STATES 146 | : LEX_STATES LEXSTATE_DEFINITIONS %{ 147 | if (children[1].type === "#start") { 148 | children[0].start_state = children[1].value; 149 | } 150 | else if (children[1].type === "#extend") { 151 | children[0].inheritance.push(children[1].value); 152 | } 153 | return children[0]; 154 | }% 155 | | %{ return {start_state: undefined, inheritance: []}; }%; 156 | LEXSTATE_DEFINITIONS 157 | : STARTSTATE %{ return {type: "#start", value: children[0]}; }% 158 | | STATE_EXTEND %{ return {type: "#extend", value: children[0]}; }%; 159 | STARTSTATE : START LEXSTATE { return children[0]; }; 160 | STATE_EXTEND : EXTEND MULTIPLE_LEXSTATE LEXSTATE %{ return {sub: children[1], base: children[2]}; }%; 161 | 162 | OPTIONAL_LEX_EX_CALLBACKS : LEX_EX_CALLBACKS | ; 163 | LEX_EX_CALLBACKS 164 | : LEX_EX_CALLBACKS LEX_EX_CALLBACK { return children[0].concat([children[1]]); } 165 | | LEX_EX_CALLBACK { return [children[0]]; } ; 166 | LEX_EX_CALLBACK : LEX_EX_CALLBACK_LABEL BLOCK %{ return {type: children[0], callback: makeLexCallback(children[1])}; }%; 167 | LEX_EX_CALLBACK_LABEL : LEX_BEGIN | LEX_END | LEX_DEFAULT; 168 | 169 | LEX 170 | : LEX LEXSECT { return children[0].concat([children[1]]); } 171 | | LEXSECT { return [children[0]]; }; 172 | LEXSECT 173 | : MULTIPLE_LEXSTATE LEXLABEL LEXDEF LEXCALLBACK %{ 174 | return children[3] === undefined ? 175 | {token: children[1], pattern: children[2], states: children[0]} : 176 | {token: children[1], pattern: children[2], states: children[0], callback: new Function("value", "token", "lex", children[3])}; 177 | }% 178 | | LEXLABEL LEXDEF LEXCALLBACK %{ 179 | return children[2] === undefined ? 180 | {token: children[0], pattern: children[1]} : 181 | {token: children[0], pattern: children[1], callback: new Function("value", "token", "lex", children[2])}; 182 | }%; 183 | LEXLABEL 184 | : LABEL 185 | | EXCLAMATION { return null; } 186 | | EXCLAMATION LABEL {return null; }; 187 | LEXDEF : STRING | REGEXP; 188 | 189 | MULTIPLE_LEXSTATE : LT LEXSTATE_LIST GT { return children[1]; }; 190 | LEXSTATE_LIST 191 | : LABEL COMMA LEXSTATE_LIST { return [children[0]].concat(children[2]); } 192 | | LABEL { return [children[0]]; }; 193 | LEXSTATE : LT LABEL GT { return children[1]; }; 194 | LEXCALLBACK : BLOCK | ; 195 | 196 | EX_CALLBACKS 197 | : EX_CALLBACKS EX_CALLBACK { return children[0].concat([children[1]]); } 198 | | EX_CALLBACK { return [children[0]]; }; 199 | EX_CALLBACK : EX_CALLBACK_LABEL BLOCK %{ return {type: children[0], callback: new Function("children", "token", "lexer", children[1])}; }%; 200 | EX_CALLBACK_LABEL : BEGIN | END | DEFAULT; 201 | 202 | GRAMMAR : RULES; 203 | RULES 204 | : SECT RULES %{ 205 | let start_symbol = children[1].start_symbol; 206 | if (children[0].start_symbol !== null) { 207 | start_symbol = children[0].start_symbol; 208 | } 209 | return { 210 | start_symbol, 211 | grammar: children[0].sect.concat(children[1].grammar) 212 | }; 213 | }% 214 | | SECT %{ 215 | let start_symbol = null; 216 | if (children[0].start_symbol !== null) { 217 | start_symbol = children[0].start_symbol; 218 | } 219 | return { 220 | start_symbol, 221 | grammar: children[0].sect 222 | }; 223 | }%; 224 | SECT : SECTLABEL COLON DEF SEMICOLON %{ 225 | const result = []; 226 | for (const def of children[2]) { 227 | result.push({ltoken: children[0].label, ...def}); 228 | } 229 | return {start_symbol: children[0].start_symbol, sect: result}; 230 | }%; 231 | SECTLABEL 232 | : LABEL %{ return {start_symbol: null, label: children[0]}; }% 233 | | DOLLAR LABEL %{ return {start_symbol: children[1], label: children[1]}; }%; 234 | DEF 235 | : PATTERN CALLBACK VBAR DEF %{ return [children[1] === null ? {pattern: children[0]} : {pattern: children[0], callback: new Function("children", "token", "lexer", children[1])}].concat(children[3]); }% 236 | | PATTERN CALLBACK %{ return [children[1] === null ? {pattern: children[0]} : {pattern: children[0], callback: new Function("children", "token", "lexer", children[1])}]; }%; 237 | PATTERN 238 | : SYMBOLLIST 239 | | { return []; }; 240 | SYMBOLLIST 241 | : LABEL SYMBOLLIST { return [children[0]].concat(children[1]); } 242 | | LABEL { return [children[0]]; }; 243 | CALLBACK : BLOCK | { return null; }; 244 | 245 | BLOCK : START_BLOCK BODY_BLOCK END_BLOCK { return children[1]; }; 246 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "lavriapg", 3 | "version": "0.4.4", 4 | "description": "lalr(1) parser generator written in TypeScript", 5 | "keywords": [ 6 | "parser", 7 | "generator", 8 | "lexer", 9 | "parsing", 10 | "compiler" 11 | ], 12 | "main": "dist/index.js", 13 | "types": "dist/index.d.ts", 14 | "engines": { 15 | "node": ">= 9.2.0" 16 | }, 17 | "scripts": { 18 | "test": "jest", 19 | "test-w": "jest --watchAll", 20 | "build": "gulp", 21 | "watch": "gulp watch", 22 | "build-doc": "typedoc --mode file --out docs/ --tsconfig tsconfig.json ./src" 23 | }, 24 | "repository": { 25 | "type": "git", 26 | "url": "https://github.com/Tatamo/lavriapg" 27 | }, 28 | "author": "Tatamo", 29 | "license": "BSD-3-Clause", 30 | "devDependencies": { 31 | "@types/jest": "^22.2.2", 32 | "@types/node": "^9.6.1", 33 | "gulp": "^4.0.2", 34 | "gulp-sourcemaps": "^2.6.4", 35 | "gulp-tslint": "^8.1.3", 36 | "gulp-typescript": "^4.0.2", 37 | "jest": "^22.4.3", 38 | "merge2": "^1.2.1", 39 | "ts-jest": "^22.4.2", 40 | "tslint": "^5.9.1", 41 | "typedoc": "^0.11.1", 42 | "typescript": "^2.8.1" 43 | }, 44 | "jest": { 45 | "transform": { 46 | "^.+\\.tsx?$": "/node_modules/ts-jest/preprocessor.js" 47 | }, 48 | "testRegex": "(/__tests__/(?!data/).*|\\.(test|spec))\\.(tsx?|jsx?)$", 49 | "moduleFileExtensions": [ 50 | "ts", 51 | "js", 52 | "json" 53 | ], 54 | "watchPathIgnorePatterns": [ 55 | "/__tests__/data/tmp/" 56 | ], 57 | "coveragePathIgnorePatterns": [ 58 | "/__tests__/data/", 59 | "/dist/" 60 | ], 61 | "globals": { 62 | "ts-jest": { 63 | "tsConfigFile": "__tests__/tsconfig.json" 64 | } 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/def/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./language"; 2 | export * from "./parsingtable"; 3 | export * from "./token"; 4 | -------------------------------------------------------------------------------- /src/def/language.ts: -------------------------------------------------------------------------------- 1 | import {Token} from "./token"; 2 | import {ILexer} from "../lexer/lexer"; 3 | import {LexController} from "../lexer/lexcontroller"; 4 | 5 | /** 6 | * 字句解析器の状態を区別するためのラベル型 7 | */ 8 | export type LexStateLabel = string; 9 | 10 | /** 11 | * デフォルトの字句解析器の状態 12 | */ 13 | export const DEFAULT_LEX_STATE = "default"; 14 | 15 | /** 16 | * 字句解析器に与える状態 17 | */ 18 | export interface LexState { 19 | label: LexStateLabel; 20 | inheritance?: LexStateLabel; 21 | } 22 | 23 | /** 24 | * 字句規則マッチ時に呼び出されるコールバック 25 | */ 26 | export type LexCallback = (value: string, token: string | null, lex: LexController) => [string | null, any] | { token: string | null, value: any } | string | null | void; 27 | 28 | /** 29 | * 単一の字句ルール 30 | */ 31 | // TODO: tokenはlabelに名称変更してもよい? 32 | export interface LexRule { 33 | token: Token | null; 34 | pattern: string | RegExp; 35 | states?: Array; 36 | is_disabled?: boolean; 37 | priority?: number; 38 | callback?: LexCallback; 39 | } 40 | 41 | /** 42 | * 字句規則 43 | */ 44 | export interface LexDefinition { 45 | rules: Array; 46 | states?: Array; 47 | start_state?: LexStateLabel; 48 | default_callback?: LexCallback; 49 | begin_callback?: (lex: LexController) => void; 50 | end_callback?: (lex: LexController) => void; 51 | } 52 | 53 | /** 54 | * 構文のreduce時に呼び出されるコールバック 55 | */ 56 | export type GrammarCallback = (children: Array, token: string, lexer: ILexer) => any; 57 | 58 | /** 59 | * 単一の構文ルール 60 | */ 61 | export interface GrammarRule { 62 | ltoken: Token; 63 | pattern: Array; 64 | callback?: GrammarCallback; 65 | } 66 | 67 | /** 68 | * 構文規則 69 | */ 70 | export interface GrammarDefinition { 71 | rules: Array; 72 | start_symbol: Token; 73 | default_callback?: GrammarCallback; 74 | // TODO: 存在はするが呼び出さないのを修正 75 | begin_callback?: () => void; 76 | end_callback?: () => void; 77 | } 78 | 79 | /** 80 | * 言語定義 81 | */ 82 | export interface Language { 83 | lex: LexDefinition; 84 | grammar: GrammarDefinition; 85 | } 86 | -------------------------------------------------------------------------------- /src/def/parsingtable.ts: -------------------------------------------------------------------------------- 1 | import {Token} from "./token"; 2 | 3 | /** 4 | * Shiftオペレーション 5 | */ 6 | export type ShiftOperation = { type: "shift", to: number }; 7 | 8 | /** 9 | * Reduceオペレーション 10 | */ 11 | export type ReduceOperation = { type: "reduce", grammar_id: number }; 12 | 13 | /** 14 | * Shift/Reduceコンフリクト 15 | */ 16 | export type ConflictedOperation = { type: "conflict", shift_to: Array, reduce_grammar: Array }; 17 | 18 | /** 19 | * Acceptオペレーション 20 | */ 21 | export type AcceptOperation = { type: "accept" }; 22 | 23 | /** 24 | * Gotoオペレーション 25 | */ 26 | export type GotoOperation = { type: "goto", to: number }; 27 | 28 | /** 29 | * 構文解析器の実行する命令群 30 | */ 31 | export type ParsingOperation = ShiftOperation | ReduceOperation | ConflictedOperation | AcceptOperation | GotoOperation; 32 | 33 | /** 34 | * 構文解析表 35 | */ 36 | export type ParsingTable = Array>; 37 | -------------------------------------------------------------------------------- /src/def/token.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * トークン名 3 | */ 4 | export type Token = string | symbol; 5 | 6 | /** 7 | * トークン化された入力 8 | * トークン名と、字句規則にマッチした元々の入力 9 | */ 10 | export type TokenizedInput = { token: Token, value: T }; 11 | 12 | /** 13 | * 入力の終端を表す終端記号名 14 | * @type {symbol} 15 | */ 16 | export const SYMBOL_EOF: Token = Symbol("EOF"); 17 | 18 | /** 19 | * `S' -> S $` (Sは開始記号)となるような非終端記号S'を表す非終端記号名 20 | * @type {symbol} 21 | */ 22 | export const SYMBOL_SYNTAX: Token = Symbol("S'"); 23 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./def"; 2 | export * from "./lexer"; 3 | export * from "./parser"; 4 | export * from "./parsergenerator"; 5 | export * from "./precompiler"; 6 | -------------------------------------------------------------------------------- /src/lexer/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./lexer"; 2 | -------------------------------------------------------------------------------- /src/lexer/lexcontroller.ts: -------------------------------------------------------------------------------- 1 | import {DEFAULT_LEX_STATE, Language, LexCallback, LexDefinition, LexRule, LexState, LexStateLabel} from "../def/language"; 2 | 3 | export type LexRuleLabel = string; 4 | 5 | /** 6 | * 字句解析器の状態と字句ルールの紐付けと管理を行うクラス 7 | */ 8 | class LexRuleManager { 9 | // private states: { states: Map, index: Map>, inheritance: Map }; 10 | private states: Map }>; 11 | private rules: { rules: Array, labels: Map }; 12 | // 各ルールに一意なidを割り当てるためのカウンタ 13 | private id_counter: number; 14 | // ルールの削除によって割り当てがなくなったid 15 | private free_ids: Array; 16 | constructor(language: Language) { 17 | const lex = language.lex; 18 | this.id_counter = 0; 19 | this.free_ids = []; 20 | 21 | // initialize lex states map 22 | this.states = new Map(); 23 | // もしlexの定義内にデフォルト状態の記述があっても上書きされるだけなので問題ない 24 | this.setState({label: DEFAULT_LEX_STATE}); 25 | if (lex.states !== undefined) { 26 | for (const state of lex.states) { 27 | this.setState(state); 28 | } 29 | } 30 | 31 | // initialize lex rules 32 | this.rules = {rules: [], labels: new Map()}; 33 | 34 | for (const rule of lex.rules.map((r) => LexRuleManager.formatLexRule(r))) { 35 | this.rules.rules[this.id_counter] = rule; 36 | // 状態ごとにインデックスを張る 37 | for (const state of rule.states!) { 38 | // TODO: statesに入れる 39 | if (!this.states.has(state)) { 40 | this.setState(state); 41 | } 42 | this.states.get(state)!.index.add(this.id_counter); 43 | } 44 | this.id_counter += 1; 45 | } 46 | } 47 | /** 48 | * 新しい状態を追加する 既に存在している場合は上書きするが、状態に登録されたルールは維持される 49 | * @param {LexStateLabel} label 新しい状態の名前 名前以外のプロパティは初期値が用いられる 50 | * @returns {boolean} 追加に成功したかどうか 継承関係が循環していた場合、追加は行われずfalseが返る 51 | */ 52 | setState(label: LexStateLabel): boolean; 53 | /** 54 | * 新しい状態を追加する 既に存在している場合は上書きするが、状態に登録されたルールは維持される 55 | * @param {LexState} state 新しい状態 56 | * @returns {boolean} 追加に成功したかどうか 継承関係が循環していた場合、追加は行われずfalseが返る 57 | */ 58 | setState(state: LexState): boolean; 59 | setState(s: LexStateLabel | LexState): boolean { 60 | let state: LexState; 61 | if (typeof s === "object") { 62 | state = s; 63 | } 64 | else { 65 | state = {label: s}; 66 | } 67 | state = LexRuleManager.formatLexState(state); 68 | // ループチェック 69 | const isLooped = (state: LexState): boolean => { 70 | if (state.inheritance !== undefined) { 71 | let flg_loop = false; 72 | let parent = this.states.get(state.inheritance); 73 | while (parent !== undefined && parent.state.inheritance !== undefined) { 74 | // 状態を追加するたびにチェックするので、自身にたどりつかないことを調べればよい 75 | if (parent.state.inheritance === state.label) { 76 | flg_loop = true; 77 | break; 78 | } 79 | parent = this.states.get(parent.state.inheritance); 80 | } 81 | if (flg_loop) return true; 82 | } 83 | return false; 84 | }; 85 | // 循環継承が存在する場合は追加できない 86 | if (isLooped(state)) return false; 87 | if (this.states.has(state.label)) { 88 | // 既に追加済みの場合はindexをそのまま維持する 89 | this.states.get(state.label)!.state = state; 90 | } 91 | else { 92 | this.states.set(state.label, {state, index: new Set()}); 93 | } 94 | return true; 95 | } 96 | // TODO: パフォーマンス改善 97 | /** 98 | * 与えられた状態に登録されている字句ルールの一覧をイテレータとして返す 99 | * @param {LexStateLabel} label 字句ルールを取得する状態の名前 100 | * @returns {IterableIterator} 字句ルールが得られるイテレータ 101 | */ 102 | getRulesItr(label: LexStateLabel): IterableIterator { 103 | // そんな状態はない 104 | if (!this.states.has(label)) return [][Symbol.iterator](); 105 | 106 | // 継承を加味 107 | let result: Array = []; 108 | let s = this.states.get(label); 109 | while (s !== undefined) { 110 | result = result.concat([...s.index]); 111 | if (s.state.inheritance === undefined) break; 112 | s = this.states.get(s.state.inheritance); 113 | } 114 | // 暫定的処置 115 | result.sort((a: number, b: number) => a - b); 116 | 117 | return (function* (self, itr) { 118 | for (const id of itr) { 119 | if (self.rules.rules[id] !== undefined) yield self.rules.rules[id]!; 120 | } 121 | })(this, new Set(result)[Symbol.iterator]()); 122 | } 123 | // TODO べつにlabelを省略可能にしてもいいのでは 124 | /** 125 | * 新しい字句ルールを名前をつけて追加する 既に存在している場合は上書きする 126 | * @param {LexRuleLabel} label 新しいルールの名前 127 | * @param {LexRule} rule 新しく追加するルール 128 | */ 129 | setRule(label: LexRuleLabel, rule: LexRule): void { 130 | // 同名の既存ルールを破棄 131 | this.removeRule(label); 132 | 133 | const formatted_rule = LexRuleManager.formatLexRule(rule); 134 | 135 | const id = this.free_ids.length > 0 ? this.free_ids.pop()! : this.id_counter++; 136 | this.rules.rules[id] = formatted_rule; 137 | this.rules.labels.set(label, id); 138 | for (const state of formatted_rule.states!) { 139 | if (!this.states.has(state)) this.setState(state); 140 | this.states.get(state)!.index.add(id); 141 | } 142 | } 143 | /** 144 | * 名前がついた字句ルールを指定して削除する 145 | * @param {LexRuleLabel} label 削除するルールの名前 146 | * @returns {LexRule | undefined} 削除したルール 該当するものがない場合はundefined 147 | */ 148 | removeRule(label: LexRuleLabel): LexRule | undefined { 149 | if (!this.rules.labels.has(label)) { 150 | return undefined; 151 | } 152 | const id = this.rules.labels.get(label)!; 153 | this.rules.labels.delete(label); 154 | const rule = this.rules.rules[id]; 155 | if (rule === undefined) return undefined; 156 | 157 | for (const state of rule.states!) { 158 | if (this.states.has(state)) { 159 | this.states.get(state)!.index.delete(id); 160 | } 161 | } 162 | this.rules.rules[id] = undefined; 163 | this.free_ids.push(id); 164 | return rule; 165 | } 166 | /** 167 | * 未定義プロパティに初期値を割り当てるなど、扱いやすい形に整形した新しい状態を生成する 168 | * @param {LexState} state もともとの状態 169 | * @returns {LexState} 整形された新しい状態 170 | */ 171 | static formatLexState(state: LexState): LexState { 172 | // clone state 173 | return {...state}; 174 | } 175 | /** 176 | * 未定義プロパティに初期値を割り当てるなど、扱いやすい形に整形した新しい字句ルールを生成する 177 | * @param {LexRule} rule もともとの字句ルール 178 | * @returns {LexRule} 整形された新しい字句ルール 179 | */ 180 | static formatLexRule(rule: LexRule): LexRule { 181 | // clone rule 182 | const result: LexRule = {...rule}; 183 | if (result.is_disabled === undefined) result.is_disabled = false; 184 | // 状態指定を省略された場合はデフォルト状態のみとする 185 | if (result.states === undefined) result.states = [DEFAULT_LEX_STATE]; 186 | // 正規表現を字句解析に適した形に整形 187 | if (result.pattern instanceof RegExp) { 188 | result.pattern = LexRuleManager.formatRegExp(result.pattern); 189 | } 190 | return result; 191 | } 192 | /** 193 | * 字句解析時に必要なフラグを追加し、不要なフラグを取り除いた新しい正規表現オブジェクトを生成する 194 | * @param {RegExp} pattern もともとの正規表現 195 | * @returns {RegExp} 整形された新しい正規表現 196 | */ 197 | private static formatRegExp(pattern: RegExp): RegExp { 198 | // フラグを整形する 199 | let flags: string = ""; 200 | // gフラグは邪魔なので取り除く 201 | // i,m,uフラグがあれば維持する 202 | if (pattern.ignoreCase) { 203 | flags += "i"; 204 | } 205 | if (pattern.multiline) { 206 | flags += "m"; 207 | } 208 | if (pattern.unicode) { 209 | flags += "u"; 210 | } 211 | // yフラグは必ずつける 212 | flags += "y"; 213 | return new RegExp(pattern, flags); 214 | } 215 | } 216 | 217 | /** 218 | * 解析中の字句解析器の状態を操作するクラス 219 | */ 220 | export class LexController { 221 | private _lex: LexDefinition; 222 | private _current_state: LexStateLabel; 223 | private _state_stack: Array; 224 | private _rules: LexRuleManager; 225 | constructor(language: Language) { 226 | this._lex = language.lex; 227 | this._current_state = language.lex.start_state !== undefined ? language.lex.start_state : DEFAULT_LEX_STATE; 228 | this._state_stack = []; 229 | this._rules = new LexRuleManager(language); 230 | } 231 | /** 232 | * 個別にコールバックが設定されていない規則に対して適用するデフォルトコールバックを得る 233 | * @returns {LexCallback | undefined} デフォルトコールバック 定義されていない場合はundefined 234 | */ 235 | get defaultCallback(): LexCallback | undefined { 236 | return this._lex.default_callback; 237 | } 238 | /** 239 | * 字句解析開始時のコールバックを呼び出す 240 | */ 241 | onBegin(): void { 242 | if (this._lex.begin_callback !== undefined) this._lex.begin_callback(this); 243 | } 244 | /** 245 | * 字句解析終了時のコールバックを呼び出す 246 | */ 247 | onEnd(): void { 248 | if (this._lex.end_callback !== undefined) this._lex.end_callback(this); 249 | } 250 | /** 251 | * 現在の状態で適用可能な字句ルールをイテレータとして返す 252 | * @returns {IterableIterator} 字句ルールを得ることができるイテレータ 253 | */ 254 | getRulesItr(): IterableIterator { 255 | return this._rules.getRulesItr(this._current_state); 256 | } 257 | /** 258 | * 新しい字句ルールを名前をつけて追加する 259 | * @param {string} label ルールの区別のために与える名前 260 | * @param {LexRule} rule 追加する字句ルール 261 | */ 262 | addRule(label: string, rule: LexRule): void { 263 | this._rules.setRule(label, rule); 264 | } 265 | /** 266 | * 既存の字句ルールを削除する 267 | * @param {string} label 削除するルールの名前 268 | * @returns {LexRule | undefined} 削除したルール 該当するものがない場合はundefined 269 | */ 270 | removeRule(label: string): LexRule | undefined { 271 | return this._rules.removeRule(label); 272 | } 273 | /** 274 | * 現在の字句解析器の状態名を得る 275 | * @returns {LexStateLabel} 現在の状態名 276 | */ 277 | getCurrentState(): LexStateLabel { 278 | return this._current_state; 279 | } 280 | /** 281 | * 字句解析機の解析状態を別の状態に変更する 282 | * @param {LexStateLabel} label 新しい状態の名前 283 | */ 284 | jumpState(label: LexStateLabel): void { 285 | this._current_state = label; 286 | } 287 | /** 288 | * 現在の状態をスタックに積んでから別の状態に変更する 289 | * @param {LexStateLabel} label 新しい状態の名前 290 | */ 291 | callState(label: LexStateLabel): void { 292 | this._state_stack.push(this._current_state); 293 | this._current_state = label; 294 | } 295 | /** 296 | * スタックから1つ取り出し、その状態に変更する 297 | * スタックが空の場合は状態を変更しない 298 | * @returns {LexStateLabel | undefined} 変更した状態の名前 スタックが空の場合はundefined 299 | */ 300 | returnState(): LexStateLabel | undefined { 301 | const pop = this._state_stack.pop(); 302 | if (pop !== undefined) this._current_state = pop; 303 | return pop; 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /src/lexer/lexer.ts: -------------------------------------------------------------------------------- 1 | import {Language, LexRule} from "../def/language"; 2 | import {SYMBOL_EOF, TokenizedInput} from "../def/token"; 3 | import {LexController} from "./lexcontroller"; 4 | 5 | /** 6 | * 字句解析器用のinterface 7 | * 8 | * TODO: 要改善 9 | */ 10 | export interface ILexer { 11 | exec(input: string): Array; 12 | } 13 | 14 | /** 15 | * 字句解析器 16 | * 入力を受け取ってトークン化する 17 | */ 18 | export class Lexer implements ILexer { 19 | constructor(private language: Language) { 20 | // do nothing 21 | } 22 | /** 23 | * 入力を受け取って字句解析を行う 24 | * @param {string} input 入力文字列 25 | * @returns {Array} 字句規則によって分割されたトークン列 26 | */ 27 | exec(input: string): Array { 28 | const result: Array = []; 29 | let next_index = 0; 30 | const controller = new LexController(this.language); 31 | controller.onBegin(); 32 | while (next_index < input.length) { 33 | // 念の為undefined対策 34 | // const current_rules = this.rules.has(controller.getCurrentState()) ? this.rules.get(controller.getCurrentState())! : []; 35 | const current_rules = controller.getRulesItr(); 36 | const {rule, matched} = Lexer.match(current_rules, input, next_index); 37 | if (rule === null) { 38 | // マッチする規則がなかった 39 | throw new Error("no pattern matched"); 40 | } 41 | else { 42 | let token = rule.token; 43 | let value = matched; 44 | // コールバック呼び出し 45 | if (typeof rule.token !== "symbol" && (rule.callback !== undefined || controller.defaultCallback !== undefined)) { 46 | const callback_result = rule.callback !== undefined ? rule.callback(matched, rule.token, controller) : controller.defaultCallback!(matched, rule.token, controller); 47 | if (callback_result === null) { 48 | token = null; 49 | } 50 | else if (typeof callback_result === "string") { 51 | token = callback_result; 52 | } 53 | else if (Array.isArray(callback_result)) { 54 | token = callback_result[0]; 55 | value = callback_result[1]; 56 | } 57 | else if (callback_result !== undefined) { 58 | token = callback_result.token; 59 | value = callback_result.value; 60 | } 61 | // callback_result === undefinedなら何もしない 62 | } 63 | // tokenがnullなら処理を飛ばす 64 | if (token !== null) { 65 | result.push({token: token, value: value}); 66 | } 67 | // 読む位置を進める 68 | next_index += matched.length; 69 | } 70 | } 71 | result.push({token: SYMBOL_EOF, value: ""}); 72 | controller.onEnd(); 73 | return result; 74 | } 75 | private static match(rules: Iterable, input: string, next_index: number): { rule: LexRule | null, matched: string } { 76 | let result_matched: string = ""; 77 | let result_rule: LexRule | null = null; 78 | let result_priority: number | null = null; 79 | for (const rule of rules) { 80 | let match = ""; 81 | if (typeof rule.pattern === "string") { 82 | const tmp_next_index = next_index + rule.pattern.length; 83 | if (input.substring(next_index, tmp_next_index) != rule.pattern) continue; // マッチしない 84 | // マッチした文字列の末尾が\wで、その直後の文字が\wの場合はスキップ 85 | if (tmp_next_index < input.length && /\w/.test(rule.pattern.substring(0, 1)) && /\w/.test(input[tmp_next_index])) continue; 86 | match = rule.pattern; 87 | } 88 | else { 89 | // pattern: RegExp 90 | rule.pattern.lastIndex = next_index; 91 | const m = rule.pattern.exec(input); 92 | if (m === null) continue; // マッチ失敗 93 | match = m[0]; 94 | } 95 | // 同じ優先度の場合、最長マッチまたは出現順(match_priorityで設定) 96 | const priority = rule.priority !== undefined ? rule.priority : 0; 97 | if (result_priority === null || 98 | priority > result_priority || 99 | priority === result_priority && match.length > result_matched.length) { 100 | result_matched = match; 101 | result_rule = rule; 102 | result_priority = priority; 103 | } 104 | } 105 | return {rule: result_rule, matched: result_matched}; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/parser/ast.ts: -------------------------------------------------------------------------------- 1 | import {Token} from "../def/token"; 2 | 3 | /** 4 | * AST 5 | */ 6 | export interface ASTNode { 7 | type: Token; 8 | value: string | null; 9 | children: Array; 10 | } 11 | -------------------------------------------------------------------------------- /src/parser/factory.ts: -------------------------------------------------------------------------------- 1 | import {Language} from "../def/language"; 2 | import {ParsingTable} from "../def/parsingtable"; 3 | import {Lexer} from "../lexer/lexer"; 4 | import {Parser} from "./parser"; 5 | 6 | /** 7 | * Parserを生成するためのファクトリクラス 8 | */ 9 | export class ParserFactory { 10 | /** 11 | * 言語 12 | * @param {Language} language 解析する言語情報 13 | * @param {ParsingTable} parsing_table 構文解析表 14 | * @returns {Parser} 生成されたパーサ 15 | */ 16 | public static create(language: Language, parsing_table: ParsingTable): Parser { 17 | const lexer = new Lexer(language); 18 | return new Parser(lexer, language.grammar, parsing_table); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/parser/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./ast"; 2 | export * from "./factory"; 3 | export * from "./parser"; 4 | -------------------------------------------------------------------------------- /src/parser/parser.ts: -------------------------------------------------------------------------------- 1 | import {GrammarDefinition} from "../def/language"; 2 | import {ParsingTable} from "../def/parsingtable"; 3 | import {Token, TokenizedInput} from "../def/token"; 4 | import {ILexer} from "../lexer/lexer"; 5 | 6 | /** 7 | * 構文解析器 8 | */ 9 | export class Parser { 10 | /** 11 | * @param {ILexer} lexer 字句解析の際に使用する字句解析器 12 | * @param {GrammarDefinition} grammar 解析する構文定義 13 | * @param {ParsingTable} parsingtable 解析する構文解析表 14 | */ 15 | constructor(private lexer: ILexer, private grammar: GrammarDefinition, private parsingtable: ParsingTable) { 16 | } 17 | 18 | /** 19 | * 構文解析を実行する 20 | * @param {string} input 解析する入力文字列 21 | * @returns {any} 解析結果(返る結果はコントローラによって異なる) 22 | */ 23 | public parse(input: string): any { 24 | return this._parse(this.lexer.exec(input)); 25 | } 26 | 27 | // parsingtableはconflictを含む以外は正しさが保証されているものと仮定する 28 | // inputsは正しくないトークンが与えられる可能性を含む 29 | // TODO: 詳細な例外処理、エラー検知 30 | private _parse(inputs: Array): any { 31 | let read_index: number = 0; // 次に読むべき入力記号のインデックス 32 | const inputs_length: number = inputs.length; 33 | const state_stack: Array = [0]; // 現在読んでいる構文解析表の状態番号を置くスタック 34 | const result_stack: Array = []; // 解析中のASTノードを置くスタック 35 | let flg_error: boolean = false; 36 | 37 | // 構文解析する 38 | while (read_index < inputs_length) { 39 | let token: Token = inputs[read_index].token; 40 | let state: number = state_stack[state_stack.length - 1]; 41 | if (!this.parsingtable[state].has(token)) { 42 | // 未定義 43 | console.error("parse failed: unexpected token:", token); 44 | flg_error = true; 45 | break; 46 | } 47 | let action = this.parsingtable[state].get(token)!; 48 | if (action.type == "shift") { 49 | // shiftオペレーション 50 | // 次の状態をスタックに追加 51 | state_stack.push(action.to); 52 | 53 | result_stack.push(inputs[read_index].value); 54 | 55 | // 入力を一つ消費 56 | read_index += 1; 57 | } 58 | else if (action.type == "reduce") { 59 | // reduceオペレーション 60 | const grammar_rule = this.grammar.rules[action.grammar_id]; 61 | const rnum = grammar_rule.pattern.length; 62 | // 対応する規則の右辺の記号の数だけスタックからポップする 63 | for (let i = 0; i < rnum; i++) state_stack.pop(); 64 | 65 | // rnumが0でないなら、右辺の記号の数だけスタックからポップする 66 | const children = []; 67 | for (let i = 0; i < rnum; i++) children[rnum - 1 - i] = result_stack.pop(); 68 | 69 | if (typeof grammar_rule.ltoken !== "symbol" && grammar_rule.callback !== undefined) { 70 | result_stack.push(grammar_rule.callback(children, grammar_rule.ltoken, this.lexer)); 71 | } 72 | else if (typeof grammar_rule.ltoken !== "symbol" && this.grammar.default_callback !== undefined) { 73 | result_stack.push(this.grammar.default_callback(children, grammar_rule.ltoken, this.lexer)); 74 | } 75 | else { 76 | result_stack.push(children[0]); 77 | } 78 | 79 | // このままgotoオペレーションを行う 80 | state = state_stack[state_stack.length - 1]; 81 | token = grammar_rule.ltoken; 82 | if (!this.parsingtable[state].has(token)) { 83 | // 未定義 84 | console.error("parse failed: unexpected token:", token); 85 | flg_error = true; 86 | break; 87 | } 88 | action = this.parsingtable[state].get(token)!; 89 | if (action.type != "goto") { 90 | // gotoアクションでなければおかしい 91 | console.error("parse failed: goto operation expected after reduce operation"); 92 | flg_error = true; 93 | break; 94 | } 95 | state_stack.push(action.to); 96 | } 97 | else if (action.type == "accept") { 98 | // 構文解析完了 99 | break; 100 | } 101 | else if (action.type == "conflict") { 102 | console.error("conflict found:"); 103 | console.error("current state " + state + ":", this.parsingtable[state]); 104 | console.error("shift:", action.shift_to, ",reduce:", action.reduce_grammar); 105 | action.shift_to.forEach((to: number) => { 106 | console.error("shift to " + to.toString() + ":", this.parsingtable[to]); 107 | }); 108 | action.reduce_grammar.forEach((grammar_id: number) => { 109 | console.error("reduce grammar " + grammar_id.toString() + ":", this.parsingtable[grammar_id]); 110 | }); 111 | console.error("parser cannot parse conflicted grammar"); 112 | flg_error = true; 113 | break; 114 | } 115 | } 116 | if (flg_error) { 117 | console.error("parse failed."); 118 | } 119 | if (result_stack.length != 1) { 120 | console.error("failed to construct tree."); 121 | } 122 | return result_stack[0]; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/parsergenerator/closureitem.ts: -------------------------------------------------------------------------------- 1 | import {Token} from "../def/token"; 2 | import {GrammarDB} from "./grammardb"; 3 | 4 | /** 5 | * 単一のLRアイテムであり、`S -> A . B [$]` のようなアイテムの規則id・ドットの位置・先読み記号の集合の情報を持つ 6 | * 7 | * [[GrammarDB]]から与えられるトークンIDをもとにして、LR(0)およびLR(1)アイテムとしてのハッシュ値を生成することができる 8 | * 9 | * Immutableであるべきオブジェクトであるため、インスタンス生成後は内部状態が変化することはないと仮定される 10 | */ 11 | export class ClosureItem { 12 | private _lr0_hash: string; 13 | private _lr1_hash: string; 14 | /** 15 | * @param {GrammarDB} grammardb 使用する構文の情報 16 | * @param {number} _rule_id 構文のid 17 | * @param {number} _dot_index ドットの位置 18 | * @param {Array} _lookaheads 先読み記号の集合 19 | */ 20 | constructor(private grammardb: GrammarDB, private _rule_id: number, private _dot_index: number, private _lookaheads: Array) { 21 | // 有効な値かどうか調べる 22 | if (!this.grammardb.hasRuleId(this._rule_id)) { 23 | throw new Error("invalid grammar id"); 24 | } 25 | if (this._dot_index < 0 || this._dot_index > this.grammardb.getRuleById(this._rule_id).pattern.length) { 26 | throw new Error("dot index out of range"); 27 | } 28 | if (this._lookaheads.length == 0) { 29 | // 必要か? 30 | throw new Error("one or more lookahead symbols needed"); 31 | } 32 | this.sortLA(); 33 | this.updateHash(); 34 | } 35 | /** 36 | * 自身の規則idを返す 37 | * 38 | * 規則idはルールの定義順に0,1,2,...と割り振られる 39 | * @returns {number} 40 | */ 41 | get rule_id(): number { 42 | return this._rule_id; 43 | } 44 | /** 45 | * 現在の読み込み位置を意味するドットの位置を返す 46 | * @returns {number} 47 | */ 48 | get dot_index(): number { 49 | return this._dot_index; 50 | } 51 | /** 52 | * LR(1)先読み記号の集合を配列として返す 53 | * 54 | * 配列のコピーではなく参照が返されるので、結果しとて得られた配列に変更を加えてはならない 55 | * @returns {Array} 56 | */ 57 | get lookaheads(): Array { 58 | return this._lookaheads; 59 | } 60 | /** 61 | * 先読み記号の配列を、[[GrammarDB]]によって割り振られるトークンid順にソートする 62 | */ 63 | private sortLA() { 64 | this.lookaheads.sort((t1: Token, t2: Token) => { 65 | return this.grammardb.getTokenId(t1) - this.grammardb.getTokenId(t2); 66 | }); 67 | } 68 | /** 69 | * ハッシュ文字列を生成する 70 | */ 71 | private updateHash() { 72 | this._lr0_hash = this.rule_id.toString() + "," + this.dot_index.toString(); 73 | let la_hash = "["; 74 | for (let i = 0; i < this.lookaheads.length; i++) { 75 | la_hash += this.grammardb.getTokenId(this.lookaheads[i]).toString(); 76 | if (i != this.lookaheads.length - 1) la_hash += ","; 77 | } 78 | la_hash += "]"; 79 | this._lr1_hash = this._lr0_hash + "," + la_hash; 80 | } 81 | /** 82 | * 先読み部分を除いたLR(0)アイテムとしてのハッシュ文字列を得る 83 | * @returns {string} 84 | */ 85 | public getLR0Hash(): string { 86 | return this._lr0_hash; 87 | } 88 | /** 89 | * 先読み部分を含めたLR(1)アイテムとしてのハッシュ文字列を得る 90 | * @returns {string} 91 | */ 92 | public getLR1Hash(): string { 93 | return this._lr1_hash; 94 | } 95 | /** 96 | * LR(0)ハッシュの一致を調べる 97 | * @param {ClosureItem} c 比較対象のLRアイテム 98 | * @returns {boolean} 99 | */ 100 | public isSameLR0(c: ClosureItem): boolean { 101 | return this.getLR0Hash() == c.getLR0Hash(); 102 | } 103 | /** 104 | * LR(1)ハッシュの一致を調べる 105 | * @param {ClosureItem} c 比較対象のLRアイテム 106 | * @returns {boolean} 107 | */ 108 | public isSameLR1(c: ClosureItem): boolean { 109 | return this.getLR1Hash() == c.getLR1Hash(); 110 | } 111 | /** 112 | * LR0部分を維持しながらLR1先読み記号ごとにClosureItemを分割し、先読み記号の数が1のClosureItemの集合を生成する 113 | */ 114 | public separateByLookAheads(): Array { 115 | // this.lookaheadsの要素数が1未満の状況は存在しない 116 | const result = []; 117 | for (const la of this.lookaheads) { 118 | result.push(new ClosureItem(this.grammardb, this.rule_id, this.dot_index, [la])); 119 | } 120 | return result; 121 | } 122 | /** 123 | * LR0部分が同じ2つのClosureItemについて、先読み部分を統合した新しいClosureItemを生成する 124 | * 125 | * 異なるLR(0)アイテムであった場合、nullを返す 126 | * @param {ClosureItem} c マージ対象のLRアイテム 127 | * @returns {ClosureItem | null} 先読み部分がマージされた新しいLRアイテム 128 | */ 129 | public merge(c: ClosureItem): ClosureItem | null { 130 | // LR0部分が違っている場合はnullを返す 131 | if (!this.isSameLR0(c)) return null; 132 | // LR1部分まで同じ場合は自身を返す 133 | if (this.isSameLR1(c)) return this; 134 | // 双方のlookaheads配列はソート済みであると仮定できる 135 | let i1 = 0; 136 | let i2 = 0; 137 | const new_la = []; 138 | // 2つのLA配列をマージして新しい配列を生成する 139 | while (i1 < this.lookaheads.length || i2 < c.lookaheads.length) { 140 | if (i1 == this.lookaheads.length) { 141 | new_la.push(c.lookaheads[i2++]); 142 | } 143 | else if (i2 == c.lookaheads.length) { 144 | new_la.push(this.lookaheads[i1++]); 145 | } 146 | else if (this.lookaheads[i1] == c.lookaheads[i2]) { 147 | new_la.push(this.lookaheads[i1++]); 148 | i2++; 149 | } 150 | else if (this.grammardb.getTokenId(this.lookaheads[i1]) < this.grammardb.getTokenId(c.lookaheads[i2])) { 151 | new_la.push(this.lookaheads[i1++]); 152 | } 153 | else { 154 | new_la.push(c.lookaheads[i2++]); 155 | } 156 | } 157 | return new ClosureItem(this.grammardb, this.rule_id, this.dot_index, new_la); 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/parsergenerator/closureset.ts: -------------------------------------------------------------------------------- 1 | import {Token} from "../def/token"; 2 | import {ClosureItem} from "./closureitem"; 3 | import {GrammarDB} from "./grammardb"; 4 | 5 | /** 6 | * 複数のLRアイテムを保持するアイテム集合であり、インスタンス生成時に自身をクロージャー展開する 7 | * 8 | * [[GrammarDB]]から与えられるトークンIDをもとにして、LR(0)およびLR(1)アイテム集合としてのハッシュ値を生成することができる 9 | * 10 | * Immutableであるべきオブジェクトであるため、インスタンス生成後は内部状態が変化することはないと仮定される 11 | */ 12 | export class ClosureSet { 13 | // インスタンス生成後に内部状態が変化することはないものとする 14 | private _lr0_hash: string; 15 | private _lr1_hash: string; 16 | /** 17 | * @param {GrammarDB} grammardb 使用する構文の情報 18 | * @param {Array} closureset 19 | */ 20 | constructor(private grammardb: GrammarDB, private closureset: Array) { 21 | this.expandClosure(); 22 | this.sort(); 23 | this.updateHash(); 24 | } 25 | /** 26 | * 自身が保持する複数の[[ClosureItem]]は、常にLR(1)ハッシュによってソートされた状態に保たれているようにする 27 | */ 28 | private sort() { 29 | this.closureset.sort((i1: ClosureItem, i2: ClosureItem) => { 30 | if (i1.getLR1Hash() < i2.getLR1Hash()) return -1; 31 | else if (i1.getLR1Hash() > i2.getLR1Hash()) return 1; 32 | return 0; 33 | }); 34 | } 35 | /** 36 | * 保持しているLRアイテムの数 37 | */ 38 | get size() { 39 | return this.closureset.length; 40 | } 41 | /** 42 | * 保持している[[ClosureItem]]の配列を得る 43 | * @param {boolean} prevent_copy trueを与えると配列をコピーせず返す 44 | * 45 | * 得られた配列に変更が加えられないと保証される場合に用いる 46 | * @returns {Array} 47 | */ 48 | public getArray(prevent_copy: boolean = false): Array { 49 | if (prevent_copy) return this.closureset; 50 | // デフォルトではコピーして返す(パフォーマンスは少し落ちる) 51 | return this.closureset.concat(); 52 | } 53 | /** 54 | * LRアイテムが集合に含まれているかどうかを調べる 55 | * 56 | * @param {ClosureItem} item 57 | * @returns {boolean} 58 | */ 59 | public includes(item: ClosureItem): boolean { 60 | // 二分探索を用いて高速に探索する 61 | let min = 0; 62 | let max = this.closureset.length - 1; 63 | while (min <= max) { 64 | const mid = min + Math.floor((max - min) / 2); 65 | if (item.getLR1Hash() < this.closureset[mid].getLR1Hash()) { 66 | max = mid - 1; 67 | } 68 | else if (item.getLR1Hash() > this.closureset[mid].getLR1Hash()) { 69 | min = mid + 1; 70 | } 71 | else { 72 | // itemとclosureset[mid]が等しい 73 | return true; 74 | } 75 | } 76 | return false; 77 | } 78 | /** 79 | * LR(0)ハッシュの一致を調べる 80 | * @param {ClosureSet} cs 比較対象のアイテム集合 81 | * @returns {boolean} 82 | */ 83 | public isSameLR0(cs: ClosureSet): boolean { 84 | return this.getLR0Hash() == cs.getLR0Hash(); 85 | } 86 | /** 87 | * LR(1)ハッシュの一致を調べる 88 | * @param {ClosureSet} cs 比較対象のアイテム集合 89 | * @returns {boolean} 90 | */ 91 | public isSameLR1(cs: ClosureSet): boolean { 92 | return this.getLR1Hash() == cs.getLR1Hash(); 93 | } 94 | /** 95 | * ハッシュ文字列を生成する 96 | */ 97 | private updateHash() { 98 | let lr0_hash = ""; 99 | let lr1_hash = ""; 100 | for (let i = 0; i < this.closureset.length; i++) { 101 | lr0_hash += this.closureset[i].getLR0Hash(); 102 | lr1_hash += this.closureset[i].getLR1Hash(); 103 | if (i != this.closureset.length - 1) { 104 | lr0_hash += "|"; 105 | lr1_hash += "|"; 106 | } 107 | } 108 | this._lr0_hash = lr0_hash; 109 | this._lr1_hash = lr1_hash; 110 | } 111 | /** 112 | * LR(0)アイテム集合としてのハッシュ文字列を得る 113 | * @returns {string} 114 | */ 115 | public getLR0Hash() { 116 | return this._lr0_hash; 117 | } 118 | /** 119 | * LR(1)アイテム集合としてのハッシュ文字列を得る 120 | * @returns {string} 121 | */ 122 | public getLR1Hash() { 123 | return this._lr1_hash; 124 | } 125 | /** 126 | * LR(0)部分が同じ2つのClosureSetについて、先読み部分を統合した新しいClosureSetを生成する 127 | * 128 | * 異なるLR(0)アイテム集合であった場合、nullを返す 129 | * @param {ClosureSet} cs マージ対象のアイテム集合 130 | * @returns {ClosureSet | null} 先読み部分がマージされた新しいアイテム集合 131 | */ 132 | public mergeLA(cs: ClosureSet): ClosureSet | null { 133 | // LR0部分が違っている場合はnullを返す 134 | if (!this.isSameLR0(cs)) return null; 135 | // LR1部分まで同じ場合は自身を返す 136 | if (this.isSameLR1(cs)) return this; 137 | const a1 = this.getArray(); 138 | const a2 = cs.getArray(); 139 | const new_set: Array = []; 140 | // 2つの配列においてLR部分は順序を含めて等しい 141 | for (let i = 0; i < a1.length; i++) { 142 | const new_item = a1[i].merge(a2[i]); 143 | if (new_item != null) new_set.push(new_item); 144 | } 145 | return new ClosureSet(this.grammardb, new_set); 146 | } 147 | 148 | /** 149 | * クロージャー展開を行う 150 | * 151 | * TODO: リファクタリング 152 | */ 153 | private expandClosure() { 154 | // 展開処理中はClosureItemのlookaheadsの要素数を常に1に保つこととする 155 | // 初期化 156 | const set: Array = []; 157 | // ClosureItemをlookaheadsごとに分解する 158 | for (const ci of this.closureset) { 159 | set.push(...ci.separateByLookAheads()); 160 | } 161 | this.closureset = set; 162 | this.sort(); 163 | 164 | // 変更がなくなるまで繰り返す 165 | let index = 0; 166 | while (index < this.closureset.length) { 167 | const ci = this.closureset[index++]; 168 | const pattern = this.grammardb.getRuleById(ci.rule_id).pattern; 169 | 170 | if (ci.dot_index == pattern.length) continue; // .が末尾にある場合はスキップ 171 | const follow = pattern[ci.dot_index]; 172 | if (!this.grammardb.symbols.isNonterminalSymbol(follow)) continue; // .の次の記号が非終端記号でないならばスキップ 173 | 174 | // クロージャー展開を行う 175 | 176 | // 先読み記号を導出 177 | // ci.lookaheadsは要素数1のため、0番目のインデックスのみを参照すればよい 178 | const lookaheads = [...this.grammardb.first.get(pattern.slice(ci.dot_index + 1).concat(ci.lookaheads[0])).values()]; 179 | lookaheads.sort((t1: Token, t2: Token) => { 180 | return this.grammardb.getTokenId(t1) - this.grammardb.getTokenId(t2); 181 | }); 182 | 183 | // symbolを左辺にもつ全ての規則を、先読み記号を付与して追加 184 | const rules = this.grammardb.findRules(follow); 185 | for (const {id} of rules) { 186 | for (const la of lookaheads) { 187 | const new_ci = new ClosureItem(this.grammardb, id, 0, [la]); 188 | // 重複がなければ新しいアイテムを追加する 189 | let flg_duplicated = false; 190 | for (const existing_item of this.closureset) { 191 | if (new_ci.isSameLR1(existing_item)) { 192 | flg_duplicated = true; 193 | break; 194 | } 195 | } 196 | if (!flg_duplicated) { 197 | this.closureset.push(new_ci); 198 | } 199 | } 200 | } 201 | } 202 | this.sort(); 203 | 204 | // ClosureItemの先読み部分をマージする 205 | const tmp = this.closureset; 206 | this.closureset = []; 207 | let merged_lookaheads = []; 208 | for (let i = 0; i < tmp.length; i++) { 209 | merged_lookaheads.push(tmp[i].lookaheads[0]); 210 | if (i == tmp.length - 1 || !tmp[i].isSameLR0(tmp[i + 1])) { 211 | this.closureset.push(new ClosureItem(this.grammardb, tmp[i].rule_id, tmp[i].dot_index, merged_lookaheads)); 212 | merged_lookaheads = []; 213 | } 214 | } 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/parsergenerator/dfagenerator.ts: -------------------------------------------------------------------------------- 1 | import {SYMBOL_EOF, Token} from "../def/token"; 2 | import {ClosureItem} from "./closureitem"; 3 | import {ClosureSet} from "./closureset"; 4 | import {GrammarDB} from "./grammardb"; 5 | 6 | export type DFAEdge = Map; 7 | export type DFANode = { closure: ClosureSet, edge: DFAEdge }; 8 | export type DFA = Array; 9 | 10 | /** 11 | * 構文規則からLR(1)DFAおよびLALR(1)DFAを生成する 12 | */ 13 | export class DFAGenerator { 14 | private lr_dfa: DFA; 15 | private lalr_dfa: DFA; 16 | /** 17 | * @param {GrammarDB} grammardb 構文規則 18 | */ 19 | constructor(private grammardb: GrammarDB) { 20 | this.generateDFA(); 21 | this.mergeLA(); 22 | } 23 | /** 24 | * LR(1)DFAを得る 25 | * @returns {DFA} 26 | */ 27 | public getLR1DFA(): DFA { 28 | return this.lr_dfa; 29 | } 30 | /** 31 | * LALR(1)DFAを得る 32 | * @returns {DFA} 33 | */ 34 | public getLALR1DFA(): DFA { 35 | return this.lalr_dfa; 36 | } 37 | /** 38 | * DFAの生成 39 | */ 40 | private generateDFA(): void { 41 | const initial_item: ClosureItem = new ClosureItem(this.grammardb, -1, 0, [SYMBOL_EOF]); 42 | const initial_set: ClosureSet = new ClosureSet(this.grammardb, [initial_item]); 43 | const dfa: DFA = [{closure: initial_set, edge: new Map()}]; 44 | 45 | // 変更がなくなるまでループ 46 | let flg_changed = true; 47 | let i = 0; 48 | while (flg_changed) { 49 | flg_changed = false; 50 | while (i < dfa.length) { 51 | const closure = dfa[i].closure; 52 | const edge = dfa[i].edge; 53 | const new_sets: Map = this.generateNewClosureSets(closure); 54 | 55 | // 与えられたDFANodeと全く同じDFANodeがある場合、そのindexを返す 56 | // 見つからなければ-1を返す 57 | const getIndexOfDuplicatedNode = (dfa: DFA, new_node: DFANode): number => { 58 | let index = -1; 59 | for (const [i, node] of dfa.entries()) { 60 | if (new_node.closure.isSameLR1(node.closure)) { 61 | index = i; 62 | break; 63 | } 64 | } 65 | return index; 66 | }; 67 | // 新しいノードを生成する 68 | for (const [edge_label, cs] of new_sets) { 69 | const new_node: DFANode = {closure: cs, edge: new Map()}; 70 | // 既存のNodeのなかに同一のClosureSetを持つものがないか調べる 71 | const duplicated_index = getIndexOfDuplicatedNode(dfa, new_node); 72 | let index_to; 73 | if (duplicated_index == -1) { 74 | // 既存の状態と重複しない 75 | dfa.push(new_node); 76 | index_to = dfa.length - 1; 77 | flg_changed = true; 78 | } 79 | else { 80 | // 既存の状態と規則が重複する 81 | // 新しいノードの追加は行わず、重複する既存ノードに対して辺を張る 82 | index_to = duplicated_index; 83 | } 84 | // 辺を追加する 85 | if (!edge.has(edge_label)) { 86 | edge.set(edge_label, index_to); 87 | // 新しい辺が追加された 88 | flg_changed = true; 89 | // DFAを更新 90 | dfa[i] = {closure, edge}; 91 | } 92 | } 93 | i++; 94 | } 95 | i = 0; 96 | } 97 | this.lr_dfa = dfa; 98 | } 99 | /** 100 | * LR(1)オートマトンの先読み部分をマージして、LALR(1)オートマトンを作る 101 | */ 102 | private mergeLA(): void { 103 | if (this.lalr_dfa !== undefined || this.lr_dfa === undefined) return; 104 | const base: Array = this.lr_dfa.slice(); // nullを許容する 105 | const merge_to: Map = new Map(); // マージ先への対応関係を保持する 106 | 107 | for (let i = 0; i < base.length; i++) { 108 | if (base[i] === null) continue; 109 | for (let ii = i + 1; ii < base.length; ii++) { 110 | if (base[ii] === null) continue; 111 | // LR(0)アイテムセット部分が重複 112 | if (base[i]!.closure.isSameLR0(base[ii]!.closure)) { 113 | // ii番目の先読み部分をi番目にマージする 114 | // インデックス番号の大きい方が削除される 115 | // 辺情報は、削除された要素の持つ辺の対象もいずれマージされて消えるため操作しなくてよい 116 | 117 | // 更新 118 | // Nodeに変更をかけるとLR(1)DFAの中身まで変化してしまうため新しいオブジェクトを生成する 119 | base[i] = {closure: base[i]!.closure.mergeLA(base[ii]!.closure)!, edge: base[i]!.edge}; 120 | // ii番目を削除 121 | base[ii] = null; 122 | // マージ元->マージ先への対応関係を保持 123 | merge_to.set(ii, i); 124 | } 125 | } 126 | } 127 | // 削除した部分を配列から抜き取る 128 | const prev_length = base.length; // ノードをマージする前のノード総数 129 | const fix = new Array(prev_length); // (元のindex->現在のindex)の対応表を作る 130 | let d = 0; // ずれ 131 | // nullで埋めた部分を消すことによるindexの変化 132 | for (let i = 0; i < prev_length; i++) { 133 | if (base[i] === null) d += 1; // ノードが削除されていた場合、以降のインデックスを1つずらす 134 | else fix[i] = i - d; 135 | } 136 | // 配列からnull埋めした部分を削除したものを作る 137 | const shortened: Array = []; 138 | for (const node of base) { 139 | if (node !== null) shortened.push(node); 140 | } 141 | // fixのうち、ノードが削除された部分を正しい対応で埋める 142 | for (const [from, to] of merge_to) { 143 | let index = to; 144 | while (merge_to.has(index)) index = merge_to.get(index)!; 145 | if (index !== to) merge_to.set(to, index); // 対応表を更新しておく 146 | fix[from] = fix[index]; // toを繰り返し辿っているので未定義部分へのアクセスは発生しない 147 | } 148 | 149 | const result: DFA = []; 150 | // インデックスの対応表をもとに辺情報を書き換える 151 | for (const node of shortened) { 152 | const new_edge = new Map(); 153 | for (const [token, node_index] of node.edge) { 154 | new_edge.set(token, fix[node_index]); 155 | } 156 | result.push({closure: node.closure, edge: new_edge}); 157 | } 158 | this.lalr_dfa = result; 159 | } 160 | /** 161 | * 既存のClosureSetから新しい規則を生成し、対応する記号ごとにまとめる 162 | * @param closureset 163 | */ 164 | private generateNewClosureSets(closureset: ClosureSet): Map { 165 | const tmp: Map> = new Map>(); 166 | // 規則から新しい規則を生成し、対応する記号ごとにまとめる 167 | for (const {rule_id, dot_index, lookaheads} of closureset.getArray()) { 168 | const pattern = this.grammardb.getRuleById(rule_id).pattern; 169 | if (dot_index == pattern.length) continue; // .が末尾にある場合はスキップ 170 | const new_ci = new ClosureItem(this.grammardb, rule_id, dot_index + 1, lookaheads); 171 | const edge_label: Token = pattern[dot_index]; 172 | 173 | let items: Array; 174 | if (tmp.has(edge_label)) { 175 | // 既に同じ記号が登録されている 176 | items = tmp.get(edge_label)!; 177 | } 178 | else { 179 | // 同じ記号が登録されていない 180 | items = []; 181 | } 182 | items.push(new_ci); 183 | tmp.set(edge_label, items); 184 | } 185 | // ClosureItemの配列からClosureSetに変換 186 | const result: Map = new Map(); 187 | for (const [edge_label, items] of tmp) { 188 | result.set(edge_label, new ClosureSet(this.grammardb, items)); 189 | } 190 | return result; 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/parsergenerator/firstset.ts: -------------------------------------------------------------------------------- 1 | import {GrammarDefinition} from "../def/language"; 2 | import {SYMBOL_EOF, Token} from "../def/token"; 3 | import {NullableSet} from "./nullableset"; 4 | import {SymbolDiscriminator} from "./symboldiscriminator"; 5 | 6 | type Constraint = Array<{ superset: Token, subset: Token }>; 7 | 8 | /** 9 | * First集合 10 | */ 11 | export class FirstSet { 12 | private first_map: Map>; 13 | private nulls: NullableSet; 14 | 15 | /** 16 | * @param {GrammarDefinition} grammar 構文規則 17 | * @param {SymbolDiscriminator} symbols 終端/非終端記号の判別に用いる分類器 18 | */ 19 | constructor(private grammar: GrammarDefinition, private symbols: SymbolDiscriminator) { 20 | this.first_map = new Map>(); 21 | this.nulls = new NullableSet(this.grammar); 22 | this.generateFirst(); 23 | } 24 | 25 | /** 26 | * First集合を生成する 27 | */ 28 | private generateFirst() { 29 | // Firstを導出 30 | const first_result: Map> = new Map>(); 31 | // 初期化 32 | // FIRST($) = {$} だけ手動で追加 33 | first_result.set(SYMBOL_EOF, new Set([SYMBOL_EOF])); 34 | // 終端記号Xに対してFirst(X)=X 35 | const terminal_symbols = this.symbols.getTerminalSymbols(); 36 | terminal_symbols.forEach((value: Token) => { 37 | first_result.set(value, new Set([value])); 38 | }); 39 | // 非終端記号はFirst(Y)=∅で初期化 40 | const nonterminal_symbols = this.symbols.getNonterminalSymbols(); 41 | nonterminal_symbols.forEach((value: Token) => { 42 | first_result.set(value, new Set()); 43 | }); 44 | 45 | // 包含についての制約を生成 46 | const constraint: Constraint = []; 47 | for (const rule of this.grammar.rules) { 48 | const sup: Token = rule.ltoken; 49 | // 右辺の左から順に、non-nullableな記号が現れるまで制約に追加 50 | // 最初のnon-nullableな記号は制約に含める 51 | for (const sub of rule.pattern) { 52 | if (sup != sub) { 53 | constraint.push({superset: sup, subset: sub}); 54 | } 55 | if (!this.nulls.isNullable(sub)) { 56 | break; 57 | } 58 | } 59 | } 60 | 61 | // 制約解消 62 | let flg_changed = true; 63 | while (flg_changed) { 64 | flg_changed = false; 65 | for (const pair of constraint) { 66 | const sup: Token = pair.superset; 67 | const sub: Token = pair.subset; 68 | const superset: Set = first_result.get(sup)!; 69 | const subset: Set = first_result.get(sub)!; 70 | subset.forEach((token: Token) => { 71 | // subset内の要素がsupersetに含まれていない 72 | if (!superset.has(token)) { 73 | // subset内の要素をsupersetに入れる 74 | superset.add(token); 75 | flg_changed = true; 76 | } 77 | }); 78 | // First集合を更新 79 | first_result.set(sup, superset); 80 | } 81 | } 82 | this.first_map = first_result; 83 | } 84 | 85 | /** 86 | * 記号または記号列を与えて、その記号から最初に導かれうる非終端記号の集合を返す 87 | * @param {Token | Token[]} arg 88 | * @returns {Set} 89 | */ 90 | public get(arg: Token | Token[]): Set { 91 | // 単一の記号の場合 92 | if (!Array.isArray(arg)) { 93 | if (!this.first_map.has(arg)) { 94 | throw new Error(`invalid token found: ${arg}`); 95 | } 96 | return this.first_map.get(arg)!; 97 | } 98 | // 記号列の場合 99 | const tokens: Token[] = arg; 100 | 101 | // 不正な記号を発見 102 | for (const token of tokens) { 103 | if (!this.first_map.has(token)) { 104 | throw new Error(`invalid token found: ${token}`); 105 | } 106 | } 107 | const result: Set = new Set(); 108 | for (const token of tokens) { 109 | const add = this.first_map.get(token)!; // トークン列の先頭から順にFirst集合を取得 110 | // 追加 111 | add.forEach((t: Token) => { 112 | if (!result.has(t)) { 113 | result.add(t); 114 | } 115 | }); 116 | if (!this.nulls.isNullable(token)) { 117 | // 現在のトークン ∉ Nulls ならばここでストップ 118 | break; 119 | } 120 | } 121 | return result; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/parsergenerator/grammardb.ts: -------------------------------------------------------------------------------- 1 | import {Language, GrammarDefinition, GrammarRule} from "../def/language"; 2 | import {SYMBOL_EOF, SYMBOL_SYNTAX, Token} from "../def/token"; 3 | import {FirstSet} from "./firstset"; 4 | import {SymbolDiscriminator} from "./symboldiscriminator"; 5 | 6 | /** 7 | * 言語定義から得られる、構文規則に関する情報を管理するクラス 8 | */ 9 | export class GrammarDB { 10 | private grammar: GrammarDefinition; 11 | private _start_symbol: Token; 12 | private _first: FirstSet; 13 | private _symbols: SymbolDiscriminator; 14 | private tokenmap: Map; 15 | private tokenid_counter: number; 16 | private rulemap: Map>; 17 | 18 | constructor(language: Language) { 19 | this.grammar = language.grammar; 20 | this._start_symbol = language.grammar.start_symbol; 21 | this._symbols = new SymbolDiscriminator(this.grammar); 22 | this._first = new FirstSet(this.grammar, this.symbols); 23 | 24 | this.initTokenMap(); 25 | this.initDefMap(); 26 | } 27 | 28 | /** 29 | * それぞれの記号にidを割り振り、Token->numberの対応を生成 30 | */ 31 | private initTokenMap() { 32 | this.tokenid_counter = 0; 33 | this.tokenmap = new Map(); 34 | 35 | // 入力の終端$の登録 36 | this.tokenmap.set(SYMBOL_EOF, this.tokenid_counter++); 37 | // 仮の開始記号S'の登録 38 | this.tokenmap.set(SYMBOL_SYNTAX, this.tokenid_counter++); 39 | 40 | // 左辺値の登録 41 | for (const rule of this.grammar.rules) { 42 | const ltoken = rule.ltoken; 43 | // 構文規則の左辺に現れる記号は非終端記号 44 | if (!this.tokenmap.has(ltoken)) { 45 | this.tokenmap.set(ltoken, this.tokenid_counter++); 46 | } 47 | } 48 | // 右辺値の登録 49 | for (const rule of this.grammar.rules) { 50 | for (const symbol of rule.pattern) { 51 | if (!this.tokenmap.has(symbol)) { 52 | // 非終端記号でない(=左辺値に現れない)場合、終端記号である 53 | this.tokenmap.set(symbol, this.tokenid_counter++); 54 | } 55 | } 56 | } 57 | } 58 | 59 | /** 60 | * ある記号を左辺とするような構文ルールとそのidの対応を生成 61 | */ 62 | private initDefMap() { 63 | this.rulemap = new Map>(); 64 | for (let i = 0; i < this.grammar.rules.length; i++) { 65 | let tmp: Array<{ id: number, rule: GrammarRule }>; 66 | if (this.rulemap.has(this.grammar.rules[i].ltoken)) { 67 | tmp = this.rulemap.get(this.grammar.rules[i].ltoken)!; 68 | } 69 | else { 70 | tmp = []; 71 | } 72 | tmp.push({id: i, rule: this.grammar.rules[i]}); 73 | this.rulemap.set(this.grammar.rules[i].ltoken, tmp); 74 | } 75 | } 76 | 77 | /** 78 | * 開始記号を得る 79 | */ 80 | get start_symbol(): Token { 81 | return this._start_symbol; 82 | } 83 | /** 84 | * First集合を得る 85 | * @returns {FirstSet} 86 | */ 87 | get first(): FirstSet { 88 | return this._first; 89 | } 90 | /** 91 | * 終端/非終端記号分類器を得る 92 | * @returns {SymbolDiscriminator} 93 | */ 94 | get symbols(): SymbolDiscriminator { 95 | return this._symbols; 96 | } 97 | /** 98 | * 構文規則がいくつあるかを返す ただし-1番の規則は含めない 99 | */ 100 | get rule_size(): number { 101 | return this.grammar.rules.length; 102 | } 103 | 104 | /** 105 | * 与えられたidの規則が存在するかどうかを調べる 106 | * @param {number} id 107 | * @returns {boolean} 108 | */ 109 | public hasRuleId(id: number): boolean { 110 | return id >= -1 && id < this.rule_size; 111 | } 112 | /** 113 | * 非終端記号xに対し、それが左辺として対応する定義を得る 114 | * 115 | * 対応する定義が存在しない場合は空の配列を返す 116 | * @param x 117 | */ 118 | public findRules(x: Token): Array<{ id: number, rule: GrammarRule }> { 119 | if (this.rulemap.has(x)) { 120 | return this.rulemap.get(x)!; 121 | } 122 | return []; 123 | } 124 | /** 125 | * 規則idに対応した規則を返す 126 | * 127 | * -1が与えられた時は S' -> S $の規則を返す 128 | * @param id 129 | */ 130 | public getRuleById(id: number): GrammarRule { 131 | if (id == -1) { 132 | return {ltoken: SYMBOL_SYNTAX, pattern: [this.start_symbol]}; 133 | // return {ltoken: SYMBOL_SYNTAX, pattern: [this.start_symbol, SYMBOL_EOF]}; 134 | } 135 | else if (id >= 0 && id < this.grammar.rules.length) return this.grammar.rules[id]; 136 | throw new Error("grammar id out of range"); 137 | } 138 | /** 139 | * [[Token]]を与えると一意なidを返す 140 | * @param {Token} token 141 | * @returns {number} 142 | */ 143 | public getTokenId(token: Token): number { 144 | if (!this.tokenmap.has(token)) { 145 | // this.tokenmap.set(token, this.tokenid_counter++); 146 | // return -1; 147 | throw new Error(`invalid token ${token}`); 148 | } 149 | return this.tokenmap.get(token)!; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/parsergenerator/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./closureset"; 2 | export * from "./closureitem"; 3 | export * from "./dfagenerator"; 4 | export * from "./firstset"; 5 | export * from "./nullableset"; 6 | export * from "./parsergenerator"; 7 | export * from "./symboldiscriminator"; 8 | export * from "./grammardb"; 9 | -------------------------------------------------------------------------------- /src/parsergenerator/nullableset.ts: -------------------------------------------------------------------------------- 1 | import {GrammarDefinition} from "../def/language"; 2 | import {Token} from "../def/token"; 3 | 4 | /** 5 | * ある非終端記号から空列が導かれうるかどうかを判定する 6 | */ 7 | export class NullableSet { 8 | private nulls: Set; 9 | /** 10 | * @param {GrammarDefinition} grammar 構文規則 11 | */ 12 | constructor(private grammar: GrammarDefinition) { 13 | this.generateNulls(); 14 | } 15 | private generateNulls() { 16 | // 制約条件を導出するために、 17 | // 空列になりうる記号の集合nullsを導出 18 | this.nulls = new Set(); 19 | for (const rule of this.grammar.rules) { 20 | // 右辺の記号の数が0の規則を持つ記号は空列になりうる 21 | if (rule.pattern.length == 0) { 22 | this.nulls.add(rule.ltoken); 23 | } 24 | } 25 | 26 | // 変更が起きなくなるまでループする 27 | let flg_changed: boolean = true; 28 | while (flg_changed) { 29 | flg_changed = false; 30 | for (const rule of this.grammar.rules) { 31 | // 既にnullsに含まれていればスキップ 32 | if (this.isNullable(rule.ltoken)) continue; 33 | 34 | let flg_nulls = true; 35 | // 右辺に含まれる記号がすべてnullableの場合はその左辺はnullable 36 | for (const token of rule.pattern) { 37 | if (!this.isNullable(token)) { 38 | // 一つでもnullableでない記号があるならnon-nullable 39 | flg_nulls = false; 40 | break; 41 | } 42 | } 43 | if (flg_nulls) { 44 | flg_changed = true; 45 | this.nulls.add(rule.ltoken); 46 | } 47 | } 48 | } 49 | } 50 | /** 51 | * 与えられた[[Token]]がNullableかどうかを調べる 52 | * @param {Token} token 53 | * @returns {boolean} 54 | */ 55 | public isNullable(token: Token) { 56 | return this.nulls.has(token); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/parsergenerator/parsergenerator.ts: -------------------------------------------------------------------------------- 1 | import {Language} from "../def/language"; 2 | import {ParsingTable, AcceptOperation, ConflictedOperation, GotoOperation, ParsingOperation, ReduceOperation, ShiftOperation} from "../def/parsingtable"; 3 | import {SYMBOL_EOF, Token} from "../def/token"; 4 | import {ParserFactory} from "../parser/factory"; 5 | import {Parser} from "../parser/parser"; 6 | import {DFA, DFAGenerator} from "./dfagenerator"; 7 | import {GrammarDB} from "./grammardb"; 8 | 9 | /** 10 | * 言語定義から構文解析表および構文解析器を生成するパーサジェネレータ 11 | */ 12 | export class ParserGenerator { 13 | private parsing_table: ParsingTable; 14 | private table_type: "LR1" | "LALR1" | "CONFLICTED"; 15 | private grammardb: GrammarDB; 16 | private dfa_generator: DFAGenerator; 17 | 18 | /** 19 | * @param {Language} language 言語定義 20 | */ 21 | constructor(private language: Language) { 22 | this.grammardb = new GrammarDB(this.language); 23 | this.dfa_generator = new DFAGenerator(this.grammardb); 24 | this.init(); 25 | } 26 | 27 | /** 28 | * 構文解析表の生成 29 | */ 30 | private init() { 31 | const lalr_result = this.generateParsingTable(this.dfa_generator.getLALR1DFA()); 32 | if (lalr_result.success) { 33 | this.parsing_table = lalr_result.table; 34 | this.table_type = "LALR1"; 35 | return; 36 | } 37 | // LALR(1)構文解析表の生成に失敗 38 | // LR(1)構文解析表の生成を試みる 39 | console.error("LALR parsing conflict found. use LR(1) table."); 40 | const lr_result = this.generateParsingTable(this.dfa_generator.getLR1DFA()); 41 | this.parsing_table = lr_result.table; 42 | this.table_type = "LR1"; 43 | if (!lr_result.success) { 44 | // LR(1)構文解析表の生成に失敗 45 | this.table_type = "CONFLICTED"; 46 | console.error("LR(1) parsing conflict found. use LR(1) conflicted table."); 47 | } 48 | } 49 | 50 | /** 51 | * 構文解析器を得る 52 | * @returns {Parser} 53 | */ 54 | public getParser(): Parser { 55 | return ParserFactory.create(this.language, this.parsing_table); 56 | } 57 | 58 | /** 59 | * 構文解析表を得る 60 | * @returns {ParsingTable} 61 | */ 62 | public getParsingTable(): ParsingTable { 63 | return this.parsing_table; 64 | } 65 | 66 | /** 67 | * 生成された構文解析表に衝突が発生しているかどうかを調べる 68 | * @returns {boolean} 69 | */ 70 | public isConflicted(): boolean { 71 | return this.table_type === "CONFLICTED"; 72 | } 73 | 74 | /** 75 | * 構文解析表の種類を得る 76 | * 77 | * パーサジェネレータはまずLALR(1)構文解析表を生成し、LALR(1)構文解析表にコンフリクトを検知した場合はLR(1)構文解析表を使用する 78 | * @returns {"LR1" | "LALR1" | "CONFLICTED"} 79 | */ 80 | public getTableType(): "LR1" | "LALR1" | "CONFLICTED" { 81 | return this.table_type; 82 | } 83 | 84 | /** 85 | * DFAから構文解析表を構築する 86 | * @param {DFA} dfa 87 | */ 88 | private generateParsingTable(dfa: DFA): { table: ParsingTable, success: boolean } { 89 | const parsing_table: ParsingTable = []; 90 | let flg_conflicted = false; 91 | 92 | for (const node of dfa) { 93 | const table_row = new Map(); 94 | // 辺をもとにshiftとgotoオペレーションを追加 95 | for (const [label, to] of node.edge) { 96 | if (this.grammardb.symbols.isTerminalSymbol(label)) { 97 | // ラベルが終端記号の場合 98 | // shiftオペレーションを追加 99 | const operation: ShiftOperation = {type: "shift", to}; 100 | table_row.set(label, operation); 101 | } 102 | else if (this.grammardb.symbols.isNonterminalSymbol(label)) { 103 | // ラベルが非終端記号の場合 104 | // gotoオペレーションを追加 105 | const operation: GotoOperation = {type: "goto", to}; 106 | table_row.set(label, operation); 107 | } 108 | } 109 | 110 | // Closureをもとにacceptとreduceオペレーションを追加していく 111 | for (const item of node.closure.getArray()) { 112 | // 規則末尾が.でないならスキップ 113 | // if(item.pattern.getRuleById(item.pattern.size-1) != SYMBOL_DOT) return; 114 | if (item.dot_index != this.grammardb.getRuleById(item.rule_id).pattern.length) continue; 115 | if (item.rule_id == -1) { 116 | // acceptオペレーション 117 | // この規則を読み終わると解析終了 118 | // $をラベルにacceptオペレーションを追加 119 | const operation: AcceptOperation = {type: "accept"}; 120 | table_row.set(SYMBOL_EOF, operation); 121 | continue; 122 | } 123 | for (const label of item.lookaheads) { 124 | const operation: ReduceOperation = {type: "reduce", grammar_id: item.rule_id}; 125 | // 既に同じ記号でオペレーションが登録されていないか確認 126 | 127 | if (table_row.has(label)) { 128 | // コンフリクトが発生 129 | flg_conflicted = true; // 構文解析に失敗 130 | const existing_operation = table_row.get(label)!; // 上で.has(label)のチェックを行っているためnon-nullable 131 | const conflicted_operation: ConflictedOperation = {type: "conflict", shift_to: [], reduce_grammar: []}; 132 | if (existing_operation.type == "shift") { 133 | // shift/reduce コンフリクト 134 | conflicted_operation.shift_to = [existing_operation.to]; 135 | conflicted_operation.reduce_grammar = [operation.grammar_id]; 136 | } 137 | else if (existing_operation.type == "reduce") { 138 | // reduce/reduce コンフリクト 139 | conflicted_operation.shift_to = []; 140 | conflicted_operation.reduce_grammar = [existing_operation.grammar_id, operation.grammar_id]; 141 | } 142 | else if (existing_operation.type == "conflict") { 143 | // もっとやばい衝突 144 | conflicted_operation.shift_to = existing_operation.shift_to; 145 | conflicted_operation.reduce_grammar = existing_operation.reduce_grammar.concat([operation.grammar_id]); 146 | } 147 | // とりあえず衝突したオペレーションを登録しておく 148 | table_row.set(label, conflicted_operation); 149 | } 150 | else { 151 | // 衝突しないのでreduceオペレーションを追加 152 | table_row.set(label, operation); 153 | } 154 | } 155 | } 156 | parsing_table.push(table_row); 157 | } 158 | return {table: parsing_table, success: !flg_conflicted}; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/parsergenerator/symboldiscriminator.ts: -------------------------------------------------------------------------------- 1 | import {GrammarDefinition} from "../def/language"; 2 | import {Token} from "../def/token"; 3 | 4 | /** 5 | * 終端/非終端記号の判別を行う 6 | */ 7 | export class SymbolDiscriminator { 8 | private terminal_symbols: Set; 9 | private nonterminal_symbols: Set; 10 | constructor(grammar: GrammarDefinition) { 11 | this.terminal_symbols = new Set(); 12 | this.nonterminal_symbols = new Set(); 13 | 14 | // 左辺値の登録 15 | for (const rule of grammar.rules) { 16 | const symbol = rule.ltoken; 17 | // 構文規則の左辺に現れる記号は非終端記号 18 | this.nonterminal_symbols.add(symbol); 19 | } 20 | // 右辺値の登録 21 | for (const rule of grammar.rules) { 22 | for (const symbol of rule.pattern) { 23 | if (!this.nonterminal_symbols.has(symbol)) { 24 | // 非終端記号でない(=左辺値に現れない)場合、終端記号である 25 | this.terminal_symbols.add(symbol); 26 | } 27 | } 28 | } 29 | } 30 | /** 31 | * 終端記号の集合をSetで得る 32 | * @param {boolean} prevent_copy trueを与えるとSetをコピーせず返す 33 | * 結果に変更が加えられないと保証される場合に用いる 34 | * @returns {Set} 35 | */ 36 | public getTerminalSymbols(prevent_copy: boolean = false): Set { 37 | if (prevent_copy) return this.terminal_symbols; 38 | // コピーを返す 39 | return new Set(this.terminal_symbols); 40 | } 41 | /** 42 | * 非終端記号の集合をSetで得る 43 | * @param {boolean} prevent_copy trueを与えるとSetをコピーせず返す 44 | * 結果に変更が加えられないと保証される場合に用いる 45 | * @returns {Set} 46 | */ 47 | public getNonterminalSymbols(prevent_copy: boolean = false): Set { 48 | if (prevent_copy) return this.nonterminal_symbols; 49 | // コピーを返す 50 | return new Set(this.nonterminal_symbols); 51 | } 52 | /** 53 | * 与えられた記号が終端記号かどうかを調べる 54 | * @param {Token} symbol 55 | * @returns {boolean} 56 | */ 57 | public isTerminalSymbol(symbol: Token): boolean { 58 | return this.terminal_symbols.has(symbol); 59 | } 60 | /** 61 | * 与えられた記号が非終端記号かどうかを調べる 62 | * @param {Token} symbol 63 | * @returns {boolean} 64 | */ 65 | public isNonterminalSymbol(symbol: Token): boolean { 66 | return this.nonterminal_symbols.has(symbol); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/precompiler/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./precompiler"; 2 | export * from "./ruleparser"; 3 | -------------------------------------------------------------------------------- /src/precompiler/precompiler.ts: -------------------------------------------------------------------------------- 1 | import {Language} from "../def/language"; 2 | import {SYMBOL_EOF, Token} from "../def/token"; 3 | import {ParserGenerator} from "../parsergenerator/parsergenerator"; 4 | import {language_parser} from "./ruleparser"; 5 | import {ParsingOperation} from "../def/parsingtable"; 6 | 7 | /** 8 | * 予め構文解析器を生成しておいて利用するためのソースコードを生成する 9 | */ 10 | export class PreCompiler { 11 | /** 12 | * @param import_path パーサジェネレータをimportするためのディレクトリパス 13 | */ 14 | constructor(private import_path: string = "lavriapg") { 15 | if (import_path[import_path.length - 1] != "/") this.import_path += "/"; 16 | } 17 | /** 18 | * 構文ファイルを受け取り、それを処理できるパーサを構築するためのソースコードを返す 19 | * @param {string} input 言語定義文法によって記述された、解析対象となる言語 20 | * @returns {string} 生成されたパーサのソースコード 21 | */ 22 | public exec(input: string): string { 23 | const language: Language = language_parser.parse(input); 24 | const parsing_table = new ParserGenerator(language).getParsingTable(); 25 | let result = ""; 26 | 27 | result += `import {Token, SYMBOL_EOF} from "${this.import_path}def/token"; 28 | import {Language} from "${this.import_path}def/language"; 29 | import {ParsingOperation, ParsingTable} from "${this.import_path}def/parsingtable"; 30 | import {Parser} from "${this.import_path}parser/parser"; 31 | import {ParserFactory} from "${this.import_path}parser/factory"; 32 | 33 | export const language: Language = { 34 | lex: { 35 | rules: [ 36 | ${language.lex.rules.map(({token, pattern}) => { 37 | return `\t\t\t{token: ${token === null ? "null" : `"${token}"`}, ` + 38 | `pattern: ${pattern instanceof RegExp ? pattern : `"${pattern}"`}}`; 39 | }).join(",\n")} 40 | ] 41 | }, 42 | grammar: { 43 | rules: [ 44 | ${language.grammar.rules.map(({ltoken, pattern}) => `\t\t\t{ 45 | ltoken: "${ltoken as string}", 46 | pattern: [${pattern.map((t) => `"${t as string}"`).join(", ")}] 47 | }`).join(",\n")} 48 | ], 49 | start_symbol: "${language.grammar.start_symbol as string}" 50 | } 51 | }; 52 | 53 | export const parsing_table: ParsingTable = [ 54 | ${parsing_table.map((row: Map) => `\tnew Map([ 55 | ${(() => { 56 | let line = ""; 57 | for (const [key, value] of row) { 58 | line += `\t\t[${key === SYMBOL_EOF ? "SYMBOL_EOF" : `"${key as string}"`}, ${JSON.stringify(value)}],\n`; 59 | } 60 | return line.slice(0, -2); 61 | })()}`).join("\n\t]),\n")} 62 | ]) 63 | ]; 64 | 65 | export const parser: Parser = ParserFactory.create(language, parsing_table); 66 | `; 67 | return result; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/precompiler/ruleparser.ts: -------------------------------------------------------------------------------- 1 | import {LexDefinition, Language, GrammarDefinition, LexStateLabel, LexState, LexCallback, GrammarCallback} from "../def/language"; 2 | import {ParsingOperation, ParsingTable} from "../def/parsingtable"; 3 | import {SYMBOL_EOF, Token} from "../def/token"; 4 | import {Parser} from "../parser/parser"; 5 | import {ParserGenerator} from "../parsergenerator/parsergenerator"; 6 | 7 | const lex: LexDefinition = { 8 | rules: [ 9 | {token: "EXCLAMATION", pattern: "!"}, 10 | {token: "VBAR", pattern: "|"}, 11 | {token: "DOLLAR", pattern: "$"}, 12 | {token: "COLON", pattern: ":"}, 13 | {token: "SEMICOLON", pattern: ";"}, 14 | {token: "LT", pattern: "<"}, 15 | {token: "GT", pattern: ">"}, 16 | {token: "COMMA", pattern: ","}, 17 | {token: "LEX_BEGIN", pattern: "#lex_begin"}, 18 | {token: "LEX_END", pattern: "#lex_end"}, 19 | {token: "LEX_DEFAULT", pattern: "#lex_default"}, 20 | {token: "START", pattern: "#start"}, 21 | {token: "EXTEND", pattern: "#extend"}, 22 | {token: "BEGIN", pattern: "#begin"}, 23 | {token: "END", pattern: "#end"}, 24 | {token: "DEFAULT", pattern: "#default"}, 25 | {token: "LABEL", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/}, 26 | { 27 | token: "REGEXP", pattern: /\/.*\/[gimuy]*/, 28 | callback: (v) => { 29 | const tmp = v.split("/"); 30 | const flags = tmp[tmp.length - 1]; 31 | const p = v.slice(1, -1 - flags.length); 32 | return ["REGEXP", new RegExp(p, flags)]; 33 | } 34 | }, 35 | {token: "STRING", pattern: /".*"/, callback: (v) => ["STRING", v.slice(1, -1)]}, 36 | {token: "STRING", pattern: /'.*'/, callback: (v) => ["STRING", v.slice(1, -1)]}, 37 | { 38 | token: "START_BLOCK", pattern: /%*{+/, 39 | callback: (value, token, lex) => { 40 | const match = /(%*)({+)/.exec(value)!; 41 | const end_delimiter = "}".repeat(match[2].length) + match[1]!; 42 | lex.callState("callback"); 43 | lex.addRule("body_block", {token: "BODY_BLOCK", pattern: new RegExp(`(?:.|\\s)*?(? { 47 | lex.returnState(); 48 | lex.removeRule("body_block"); 49 | lex.removeRule("end_block"); 50 | } 51 | }); 52 | } 53 | }, 54 | {token: null, pattern: /(\r\n|\r|\n)+/}, 55 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 56 | {token: "INVALID", pattern: /./} 57 | ] 58 | }; 59 | 60 | const makeLexCallback = (body: string): LexCallback => { 61 | return new Function("value", "token", "lex", body) as LexCallback; 62 | }; 63 | 64 | const makeGrammarCallback = (body: string): GrammarCallback => { 65 | return new Function("children", "token", "lexer", body) as GrammarCallback; 66 | }; 67 | 68 | const grammar: GrammarDefinition = { 69 | rules: [ 70 | { 71 | ltoken: "LANGUAGE", 72 | pattern: ["LEX_OPTIONS", "LEX", "EX_CALLBACKS", "GRAMMAR"], 73 | callback: (c) => { 74 | let start_symbol = c[3].start_symbol; 75 | // 開始記号の指定がない場合、最初の規則に設定] 76 | if (start_symbol === null) { 77 | if (c[3].sect.length > 0) start_symbol = c[3].sect[0].ltoken; 78 | else start_symbol = ""; 79 | } 80 | const lex: LexDefinition = {rules: c[1]}; 81 | if (c[0].callbacks !== undefined) { 82 | for (const callback of c[0].callbacks) { 83 | switch (callback.type) { 84 | case "#lex_begin": 85 | lex.begin_callback = callback.callback; 86 | break; 87 | case "#lex_end": 88 | lex.end_callback = callback.callback; 89 | break; 90 | case "#lex_default": 91 | lex.default_callback = callback.callback; 92 | break; 93 | } 94 | } 95 | } 96 | if (c[0].start_state !== undefined) { 97 | lex.start_state = c[0].start_state; 98 | } 99 | if (c[0].states.length > 0) { 100 | lex.states = c[0].states; 101 | } 102 | const grammar: GrammarDefinition = {rules: c[3].grammar, start_symbol}; 103 | if (c[2] !== undefined) { 104 | for (const callback of c[2]) { 105 | switch (callback.type) { 106 | case "#begin": 107 | grammar.begin_callback = callback.callback; 108 | break; 109 | case "#end": 110 | grammar.end_callback = callback.callback; 111 | break; 112 | case "#default": 113 | grammar.default_callback = callback.callback; 114 | break; 115 | } 116 | } 117 | } 118 | return {lex, grammar}; 119 | } 120 | }, 121 | { 122 | ltoken: "LANGUAGE", 123 | pattern: ["LEX_OPTIONS", "LEX", "GRAMMAR"], 124 | callback: (c) => { 125 | let start_symbol = c[2].start_symbol; 126 | // 開始記号の指定がない場合、最初の規則に設定] 127 | if (start_symbol === null) { 128 | if (c[2].sect.length > 0) start_symbol = c[2].sect[0].ltoken; 129 | else start_symbol = ""; 130 | } 131 | const lex: LexDefinition = {rules: c[1]}; 132 | if (c[0].callbacks !== undefined) { 133 | for (const callback of c[0].callbacks) { 134 | switch (callback.type) { 135 | case "#lex_begin": 136 | lex.begin_callback = callback.callback; 137 | break; 138 | case "#lex_end": 139 | lex.end_callback = callback.callback; 140 | break; 141 | case "#lex_default": 142 | lex.default_callback = callback.callback; 143 | break; 144 | } 145 | } 146 | } 147 | if (c[0].start_state !== undefined) { 148 | lex.start_state = c[0].start_state; 149 | } 150 | if (c[0].states.length > 0) { 151 | lex.states = c[0].states; 152 | } 153 | return {lex, grammar: {rules: c[2].grammar, start_symbol: start_symbol}}; 154 | } 155 | }, 156 | { 157 | ltoken: "LEX_OPTIONS", 158 | pattern: ["OPTIONAL_LEX_EX_CALLBACKS", "LEX_STATES"], 159 | callback: (c) => { 160 | const states: Array = []; 161 | const states_set = new Set(); 162 | for (const inherit of c[1].inheritance) { 163 | for (const sub_state of inherit.sub) { 164 | if (states_set.has(inherit.sub)) { 165 | // 既に登録されている場合、一つのstateが複数のstateを継承することはできない 166 | continue; 167 | } 168 | states.push({label: sub_state, inheritance: inherit.base}); 169 | states_set.add(sub_state); 170 | } 171 | } 172 | return {callbacks: c[0], start_state: c[1].start_state, states}; 173 | } 174 | }, 175 | { 176 | ltoken: "LEX_STATES", 177 | pattern: ["LEX_STATES", "LEXSTATE_DEFINITIONS"], 178 | callback: ([c1, c2]) => { 179 | if (c2.type === "#start") { 180 | c1.start_state = c2.value; 181 | } 182 | else if (c2.type === "#extend") { 183 | c1.inheritance.push(c2.value); 184 | } 185 | return c1; 186 | } 187 | }, 188 | { 189 | ltoken: "LEX_STATES", 190 | pattern: [], 191 | callback: () => ({start_state: undefined, inheritance: []}) 192 | }, 193 | { 194 | ltoken: "LEXSTATE_DEFINITIONS", 195 | pattern: ["STARTSTATE"], 196 | callback: ([c]) => ({type: "#start", value: c}) 197 | }, 198 | { 199 | ltoken: "LEXSTATE_DEFINITIONS", 200 | pattern: ["STATE_EXTEND"], 201 | callback: ([c]) => ({type: "#extend", value: c}) 202 | }, 203 | { 204 | ltoken: "STARTSTATE", 205 | pattern: ["START", "LEXSTATE"], 206 | callback: (c) => c[1] 207 | }, 208 | { 209 | ltoken: "STATE_EXTEND", 210 | pattern: ["EXTEND", "MULTIPLE_LEXSTATE", "LEXSTATE"], 211 | callback: (c) => ({sub: c[1], base: c[2]}) 212 | }, 213 | { 214 | ltoken: "OPTIONAL_LEX_EX_CALLBACKS", 215 | pattern: ["LEX_EX_CALLBACKS"] 216 | }, 217 | { 218 | ltoken: "OPTIONAL_LEX_EX_CALLBACKS", 219 | pattern: [] 220 | }, 221 | { 222 | ltoken: "LEX_EX_CALLBACKS", 223 | pattern: ["LEX_EX_CALLBACKS", "LEX_EX_CALLBACK"], 224 | callback: (c) => c[0].concat([c[1]]) 225 | }, 226 | { 227 | ltoken: "LEX_EX_CALLBACKS", 228 | pattern: ["LEX_EX_CALLBACK"], 229 | callback: (c) => [c[0]] 230 | }, 231 | { 232 | ltoken: "LEX_EX_CALLBACK", 233 | pattern: ["LEX_EX_CALLBACK_LABEL", "BLOCK"], 234 | callback: (c) => ({type: c[0], callback: makeLexCallback(c[1])}) 235 | }, 236 | { 237 | ltoken: "LEX_EX_CALLBACK_LABEL", 238 | pattern: ["LEX_BEGIN"] 239 | }, 240 | { 241 | ltoken: "LEX_EX_CALLBACK_LABEL", 242 | pattern: ["LEX_END"] 243 | }, 244 | { 245 | ltoken: "LEX_EX_CALLBACK_LABEL", 246 | pattern: ["LEX_DEFAULT"] 247 | }, 248 | { 249 | ltoken: "LEX", 250 | pattern: ["LEX", "LEXSECT"], 251 | callback: (c) => c[0].concat([c[1]]) 252 | }, 253 | { 254 | ltoken: "LEX", 255 | pattern: ["LEXSECT"], 256 | callback: (c) => [c[0]] 257 | }, 258 | { 259 | ltoken: "LEXSECT", 260 | pattern: ["MULTIPLE_LEXSTATE", "LEXLABEL", "LEXDEF", "LEXCALLBACK"], 261 | callback: (c) => (c[3] === undefined ? {token: c[1], pattern: c[2], states: c[0]} : {token: c[1], pattern: c[2], states: c[0], callback: makeLexCallback(c[3])}) 262 | }, 263 | { 264 | ltoken: "LEXSECT", 265 | pattern: ["LEXLABEL", "LEXDEF", "LEXCALLBACK"], 266 | callback: (c) => (c[2] === undefined ? {token: c[0], pattern: c[1]} : {token: c[0], pattern: c[1], callback: makeLexCallback(c[2])}) 267 | }, 268 | { 269 | ltoken: "LEXLABEL", 270 | pattern: ["LABEL"] 271 | }, 272 | { 273 | ltoken: "LEXLABEL", 274 | pattern: ["EXCLAMATION"], 275 | callback: () => null 276 | }, 277 | { 278 | ltoken: "LEXLABEL", 279 | pattern: ["EXCLAMATION", "LABEL"], 280 | callback: () => null 281 | }, 282 | { 283 | ltoken: "LEXDEF", 284 | pattern: ["STRING"] 285 | }, 286 | { 287 | ltoken: "LEXDEF", 288 | pattern: ["REGEXP"] 289 | }, 290 | { 291 | ltoken: "MULTIPLE_LEXSTATE", 292 | pattern: ["LT", "LEXSTATE_LIST", "GT"], 293 | callback: (c) => c[1] 294 | }, 295 | { 296 | ltoken: "LEXSTATE_LIST", 297 | pattern: ["LABEL", "COMMA", "LEXSTATE_LIST"], 298 | callback: (c) => [c[0], ...c[2]] 299 | }, 300 | { 301 | ltoken: "LEXSTATE_LIST", 302 | pattern: ["LABEL"], 303 | callback: (c) => [c[0]] 304 | }, 305 | { 306 | ltoken: "LEXSTATE", 307 | pattern: ["LT", "LABEL", "GT"], 308 | callback: (c) => c[1] 309 | }, 310 | { 311 | ltoken: "LEXCALLBACK", 312 | pattern: ["BLOCK"] 313 | }, 314 | { 315 | ltoken: "LEXCALLBACK", 316 | pattern: [] 317 | }, 318 | { 319 | ltoken: "EX_CALLBACKS", 320 | pattern: ["EX_CALLBACKS", "EX_CALLBACK"], 321 | callback: (c) => c[0].concat([c[1]]) 322 | }, 323 | { 324 | ltoken: "EX_CALLBACKS", 325 | pattern: ["EX_CALLBACK"], 326 | callback: (c) => [c[0]] 327 | }, 328 | { 329 | ltoken: "EX_CALLBACK", 330 | pattern: ["EX_CALLBACK_LABEL", "BLOCK"], 331 | callback: (c) => ({type: c[0], callback: makeGrammarCallback(c[1])}) 332 | }, 333 | { 334 | ltoken: "EX_CALLBACK_LABEL", 335 | pattern: ["BEGIN"] 336 | }, 337 | { 338 | ltoken: "EX_CALLBACK_LABEL", 339 | pattern: ["END"] 340 | }, 341 | { 342 | ltoken: "EX_CALLBACK_LABEL", 343 | pattern: ["DEFAULT"] 344 | }, 345 | { 346 | ltoken: "GRAMMAR", 347 | pattern: ["RULES"] 348 | }, 349 | { 350 | ltoken: "RULES", 351 | pattern: ["SECT", "RULES"], 352 | callback: (c) => { 353 | let start_symbol = c[1].start_symbol; 354 | if (c[0].start_symbol !== null) { 355 | start_symbol = c[0].start_symbol; 356 | } 357 | return { 358 | start_symbol, 359 | grammar: c[0].sect.concat(c[1].grammar) 360 | }; 361 | } 362 | }, 363 | { 364 | ltoken: "RULES", 365 | pattern: ["SECT"], 366 | callback: (c) => { 367 | let start_symbol = null; 368 | if (c[0].start_symbol !== null) { 369 | start_symbol = c[0].start_symbol; 370 | } 371 | return { 372 | start_symbol, 373 | grammar: c[0].sect 374 | }; 375 | } 376 | }, 377 | { 378 | ltoken: "SECT", 379 | pattern: ["SECTLABEL", "COLON", "DEF", "SEMICOLON"], 380 | callback: (c) => { 381 | const result = []; 382 | for (const def of c[2]) { 383 | result.push({ltoken: c[0].label, ...def}); 384 | } 385 | return {start_symbol: c[0].start_symbol, sect: result}; 386 | } 387 | }, 388 | { 389 | ltoken: "SECTLABEL", 390 | pattern: ["LABEL"], 391 | callback: (c) => ({start_symbol: null, label: c[0]}) 392 | }, 393 | { 394 | ltoken: "SECTLABEL", 395 | pattern: ["DOLLAR", "LABEL"], 396 | callback: (c) => ({start_symbol: c[1], label: c[1]}) 397 | }, 398 | { 399 | ltoken: "DEF", 400 | pattern: ["PATTERN", "CALLBACK", "VBAR", "DEF"], 401 | callback: (c) => [c[1] === null ? {pattern: c[0]} : {pattern: c[0], callback: makeGrammarCallback(c[1])}].concat(c[3]) 402 | }, 403 | { 404 | ltoken: "DEF", 405 | pattern: ["PATTERN", "CALLBACK"], 406 | callback: (c) => [c[1] === null ? {pattern: c[0]} : {pattern: c[0], callback: makeGrammarCallback(c[1])}] 407 | }, 408 | { 409 | ltoken: "PATTERN", 410 | pattern: ["SYMBOLLIST"] 411 | }, 412 | { 413 | ltoken: "PATTERN", 414 | pattern: [], 415 | callback: () => [] 416 | }, 417 | { 418 | ltoken: "SYMBOLLIST", 419 | pattern: ["LABEL", "SYMBOLLIST"], 420 | callback: (c) => [c[0]].concat(c[1]) 421 | }, 422 | { 423 | ltoken: "SYMBOLLIST", 424 | pattern: ["LABEL"], 425 | callback: (c) => [c[0]] 426 | }, 427 | { 428 | ltoken: "CALLBACK", 429 | pattern: ["BLOCK"] 430 | }, 431 | { 432 | ltoken: "CALLBACK", 433 | pattern: [], 434 | callback: () => null 435 | }, 436 | { 437 | ltoken: "BLOCK", 438 | pattern: ["START_BLOCK", "BODY_BLOCK", "END_BLOCK"], 439 | callback: (c) => c[1] 440 | } 441 | ], start_symbol: "LANGUAGE" 442 | }; 443 | 444 | /** 445 | * 言語定義文法の言語定義 446 | * @type Language 447 | */ 448 | export const language_language: Language = {lex: lex, grammar: grammar}; 449 | 450 | // 予めParsingTableを用意しておくことで高速化 451 | /** 452 | * 言語定義文法の言語定義、の構文解析表 453 | * @type ParsingTable 454 | */ 455 | export const language_parsing_table: ParsingTable = [ 456 | new Map([ 457 | ["LANGUAGE", {type: "goto", to: 1}], 458 | ["LEX", {type: "goto", to: 2}], 459 | ["LEXSECT", {type: "goto", to: 3}], 460 | ["LEXLABEL", {type: "goto", to: 4}], 461 | ["LABEL", {type: "shift", to: 5}], 462 | ["EXCLAMATION", {type: "shift", to: 6}]]), 463 | new Map([ 464 | [SYMBOL_EOF, {type: "accept"}]]), 465 | new Map([ 466 | ["GRAMMAR", {type: "goto", to: 7}], 467 | ["LEXSECT", {type: "goto", to: 8}], 468 | ["SECT", {type: "goto", to: 9}], 469 | ["SECTLABEL", {type: "goto", to: 10}], 470 | ["LABEL", {type: "shift", to: 11}], 471 | ["DOLLAR", {type: "shift", to: 12}], 472 | ["LEXLABEL", {type: "goto", to: 4}], 473 | ["EXCLAMATION", {type: "shift", to: 6}]]), 474 | new Map([ 475 | ["LABEL", {type: "reduce", grammar_id: 2}], 476 | ["DOLLAR", {type: "reduce", grammar_id: 2}], 477 | ["EXCLAMATION", {type: "reduce", grammar_id: 2}]]), 478 | new Map([ 479 | ["LEXDEF", {type: "goto", to: 13}], 480 | ["STRING", {type: "shift", to: 14}], 481 | ["REGEXP", {type: "shift", to: 15}]]), 482 | new Map([ 483 | ["STRING", {type: "reduce", grammar_id: 4}], 484 | ["REGEXP", {type: "reduce", grammar_id: 4}]]), 485 | new Map([ 486 | ["LABEL", {type: "shift", to: 16}], 487 | ["STRING", {type: "reduce", grammar_id: 5}], 488 | ["REGEXP", {type: "reduce", grammar_id: 5}]]), 489 | new Map([ 490 | [SYMBOL_EOF, {type: "reduce", grammar_id: 0}]]), 491 | new Map([ 492 | ["LABEL", {type: "reduce", grammar_id: 1}], 493 | ["DOLLAR", {type: "reduce", grammar_id: 1}], 494 | ["EXCLAMATION", {type: "reduce", grammar_id: 1}]]), 495 | new Map([ 496 | ["SECT", {type: "goto", to: 9}], 497 | ["SECTLABEL", {type: "goto", to: 10}], 498 | ["LABEL", {type: "shift", to: 17}], 499 | ["DOLLAR", {type: "shift", to: 12}], 500 | ["GRAMMAR", {type: "goto", to: 18}], 501 | [SYMBOL_EOF, {type: "reduce", grammar_id: 10}]]), 502 | new Map([ 503 | ["COLON", {type: "shift", to: 19}]]), 504 | new Map([ 505 | ["COLON", {type: "reduce", grammar_id: 12}], 506 | ["STRING", {type: "reduce", grammar_id: 4}], 507 | ["REGEXP", {type: "reduce", grammar_id: 4}]]), 508 | new Map([ 509 | ["LABEL", {type: "shift", to: 20}]]), 510 | new Map([ 511 | ["LABEL", {type: "reduce", grammar_id: 3}], 512 | ["DOLLAR", {type: "reduce", grammar_id: 3}], 513 | ["EXCLAMATION", {type: "reduce", grammar_id: 3}]]), 514 | new Map([ 515 | ["LABEL", {type: "reduce", grammar_id: 7}], 516 | ["DOLLAR", {type: "reduce", grammar_id: 7}], 517 | ["EXCLAMATION", {type: "reduce", grammar_id: 7}]]), 518 | new Map([ 519 | ["LABEL", {type: "reduce", grammar_id: 8}], 520 | ["DOLLAR", {type: "reduce", grammar_id: 8}], 521 | ["EXCLAMATION", {type: "reduce", grammar_id: 8}]]), 522 | new Map([ 523 | ["STRING", {type: "reduce", grammar_id: 6}], 524 | ["REGEXP", {type: "reduce", grammar_id: 6}]]), 525 | new Map([ 526 | ["COLON", {type: "reduce", grammar_id: 12}]]), 527 | new Map([ 528 | [SYMBOL_EOF, {type: "reduce", grammar_id: 9}]]), 529 | new Map([ 530 | ["DEF", {type: "goto", to: 21}], 531 | ["PATTERN", {type: "goto", to: 22}], 532 | ["SYMBOLLIST", {type: "goto", to: 23}], 533 | ["LABEL", {type: "shift", to: 24}], 534 | ["SEMICOLON", {type: "reduce", grammar_id: 17}], 535 | ["VBAR", {type: "reduce", grammar_id: 17}]]), 536 | new Map([ 537 | ["COLON", {type: "reduce", grammar_id: 13}]]), 538 | new Map([ 539 | ["SEMICOLON", {type: "shift", to: 25}]]), 540 | new Map([ 541 | ["VBAR", {type: "shift", to: 26}], 542 | ["SEMICOLON", {type: "reduce", grammar_id: 15}]]), 543 | new Map([ 544 | ["SEMICOLON", {type: "reduce", grammar_id: 16}], 545 | ["VBAR", {type: "reduce", grammar_id: 16}]]), 546 | new Map([ 547 | ["LABEL", {type: "shift", to: 24}], 548 | ["SYMBOLLIST", {type: "goto", to: 27}], 549 | ["SEMICOLON", {type: "reduce", grammar_id: 19}], 550 | ["VBAR", {type: "reduce", grammar_id: 19}]]), 551 | new Map([ 552 | [SYMBOL_EOF, {type: "reduce", grammar_id: 11}], 553 | ["LABEL", {type: "reduce", grammar_id: 11}], 554 | ["DOLLAR", {type: "reduce", grammar_id: 11}]]), 555 | new Map([ 556 | ["PATTERN", {type: "goto", to: 22}], 557 | ["DEF", {type: "goto", to: 28}], 558 | ["SYMBOLLIST", {type: "goto", to: 23}], 559 | ["LABEL", {type: "shift", to: 24}], 560 | ["SEMICOLON", {type: "reduce", grammar_id: 17}], 561 | ["VBAR", {type: "reduce", grammar_id: 17}]]), 562 | new Map([ 563 | ["SEMICOLON", {type: "reduce", grammar_id: 18}], 564 | ["VBAR", {type: "reduce", grammar_id: 18}]]), 565 | new Map([ 566 | ["SEMICOLON", {type: "reduce", grammar_id: 14}]]) 567 | ]; 568 | 569 | /** 570 | * 言語定義ファイルを読み込むための構文解析器 571 | * @type {Parser} 572 | */ 573 | 574 | // language_parsing_tableの用意がまだなので直接生成する 575 | // export const language_parser: Parser = ParserFactory.create(language_language, language_parsing_table); 576 | export const language_parser: Parser = new ParserGenerator(language_language).getParser(); 577 | -------------------------------------------------------------------------------- /src/sample.ts: -------------------------------------------------------------------------------- 1 | import {readFileSync} from "fs"; 2 | import {ParserGenerator} from "./parsergenerator/parsergenerator"; 3 | import {Language, LexDefinition, GrammarDefinition} from "./def/language"; 4 | import {ParserFactory} from "./parser/factory"; 5 | 6 | const input = readFileSync("/dev/stdin", "utf8"); 7 | 8 | const grammar: GrammarDefinition = { 9 | rules: [ 10 | { 11 | ltoken: "EXP", 12 | pattern: ["EXP", "PLUS", "TERM"], 13 | callback: (c) => c[0] + c[2] 14 | }, 15 | { 16 | ltoken: "EXP", 17 | pattern: ["TERM"] 18 | }, 19 | { 20 | ltoken: "TERM", 21 | pattern: ["TERM", "ASTERISK", "ATOM"], 22 | callback: (c) => c[0] * c[2] 23 | }, 24 | { 25 | ltoken: "TERM", 26 | pattern: ["ATOM"] 27 | }, 28 | { 29 | ltoken: "ATOM", 30 | pattern: ["DIGITS"], 31 | callback: (c) => +c[0] 32 | }, 33 | { 34 | ltoken: "ATOM", 35 | pattern: ["LPAREN", "EXP", "RPAREN"], 36 | callback: (c) => c[1] 37 | } 38 | ], 39 | start_symbol: "EXP" 40 | }; 41 | const lex: LexDefinition = { 42 | rules: [ 43 | {token: "DIGITS", pattern: /[1-9][0-9]*/}, 44 | {token: "PLUS", pattern: "+"}, 45 | {token: "ASTERISK", pattern: "*"}, 46 | {token: "LPAREN", pattern: "("}, 47 | {token: "RPAREN", pattern: ")"}, 48 | {token: null, pattern: /(\r\n|\r|\n)+/}, 49 | {token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/}, 50 | {token: "INVALID", pattern: /./} 51 | ] 52 | }; 53 | const language: Language = { 54 | lex: lex, 55 | grammar: grammar 56 | }; 57 | 58 | console.time("process"); 59 | console.log(JSON.stringify(ParserFactory.create(language, new ParserGenerator(language).getParsingTable()).parse(input), undefined, 2)); 60 | console.timeEnd("process"); 61 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "strictNullChecks": true, 4 | "noImplicitAny": true, 5 | "target": "es6", 6 | "module": "CommonJS", 7 | "outDir": "dist/", 8 | "declaration": true, 9 | "noEmitOnError": true 10 | }, 11 | "include": [ 12 | "src/**/*.ts" 13 | ], 14 | "exclude": [ 15 | "node_modules", 16 | "dist" 17 | ] 18 | } -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaultSeverity": "error", 3 | "rules": { 4 | "adjacent-overload-signatures": true, 5 | "align": [true, "parameters", "statements"], 6 | "array-type": false, 7 | "arrow-parens": true, 8 | "arrow-return-shorthand": true, 9 | "ban-types": [ 10 | true, 11 | ["Object", "Avoid using the `Object` type. Did you mean `object`?"], 12 | ["Function", "Avoid using the `Function` type. Prefer a specific function type, like `() => void`."], 13 | ["Boolean", "Avoid using the `Boolean` type. Did you mean `boolean`?"], 14 | ["Number", "Avoid using the `Number` type. Did you mean `number`?"], 15 | ["String", "Avoid using the `String` type. Did you mean `string`?"], 16 | ["Symbol", "Avoid using the `Symbol` type. Did you mean `symbol`?"] 17 | ], 18 | "callable-types": true, 19 | "class-name": true, 20 | "comment-format": [true, "check-space"], 21 | "curly": [true, "ignore-same-line"], 22 | "eofline": true, 23 | "forin": true, 24 | "import-spacing": true, 25 | "indent": [true, "tabs"], 26 | "interface-name": false, 27 | "interface-over-type-literal": false, 28 | "jsdoc-format": true, 29 | "label-position": true, 30 | "max-classes-per-file": false, 31 | "max-line-length": false, 32 | "member-access": false, 33 | "member-ordering": false, 34 | "new-parens": true, 35 | "no-angle-bracket-type-assertion": true, 36 | "no-arg": true, 37 | "no-consecutive-blank-lines": [true, 2], 38 | "no-console": false, 39 | "no-duplicate-variable": true, 40 | "no-empty": true, 41 | "no-eval": true, 42 | "no-internal-module": true, 43 | "no-trailing-whitespace": true, 44 | "no-var-keyword": true, 45 | "object-literal-shorthand": false, 46 | "object-literal-sort-keys": false, 47 | "one-line": [true, "check-whitespace", "check-open-brace"], 48 | "one-variable-per-declaration": true, 49 | "ordered-imports": false, 50 | "prefer-const": true, 51 | "prefer-for-of": false, 52 | "quotemark": [true, "double", "avoid-escape"], 53 | "semicolon": [true, "always"], 54 | "trailing-comma": [ 55 | true, { 56 | "multiline": "never", 57 | "singleline": "never" 58 | } 59 | ], 60 | "triple-equals": false, 61 | "use-isnan": true, 62 | "variable-name": [true, "ban-keywords"], 63 | "whitespace": [ 64 | true, 65 | "check-branch", 66 | "check-decl", 67 | "check-separator", 68 | "check-type" 69 | ] 70 | } 71 | } --------------------------------------------------------------------------------