├── .gitignore
├── .idea
    └── .gitignore
├── .npmignore
├── LICENSE
├── README.md
├── __tests__
    ├── __snapshots__
    │   └── language_parsing_test.ts.snap
    ├── broken_langage_test.ts
    ├── data
    │   ├── broken_language.ts
    │   ├── json_language
    │   ├── json_sample.json
    │   ├── sample_language.ts
    │   └── tmp
    │   │   └── .gitkeep
    ├── json_test.ts
    ├── language_parsing_test.ts
    ├── lexer
    │   ├── __snapshots__
    │   │   ├── controller_test.ts.snap
    │   │   └── lexer_test.ts.snap
    │   ├── controller_test.ts
    │   └── lexer_test.ts
    ├── parser
    │   └── parser_test.ts
    ├── parsergenerator
    │   ├── closureitem_test.ts
    │   ├── closureset_test.ts
    │   ├── firstset_test.ts
    │   ├── nullableset_test.ts
    │   ├── symboldiscriminator_test.ts
    │   └── syntaxdb_test.ts
    ├── precompiler
    │   └── precompiler_test.ts
    └── tsconfig.json
├── gulpfile.js
├── json_language
├── language
├── package-lock.json
├── package.json
├── src
    ├── def
    │   ├── index.ts
    │   ├── language.ts
    │   ├── parsingtable.ts
    │   └── token.ts
    ├── index.ts
    ├── lexer
    │   ├── index.ts
    │   ├── lexcontroller.ts
    │   └── lexer.ts
    ├── parser
    │   ├── ast.ts
    │   ├── factory.ts
    │   ├── index.ts
    │   └── parser.ts
    ├── parsergenerator
    │   ├── closureitem.ts
    │   ├── closureset.ts
    │   ├── dfagenerator.ts
    │   ├── firstset.ts
    │   ├── grammardb.ts
    │   ├── index.ts
    │   ├── nullableset.ts
    │   ├── parsergenerator.ts
    │   └── symboldiscriminator.ts
    ├── precompiler
    │   ├── index.ts
    │   ├── precompiler.ts
    │   └── ruleparser.ts
    └── sample.ts
├── tsconfig.json
└── tslint.json


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | coverage/
4 | docs/
5 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | .idea/
 3 | node_modules/
 4 | __tests__/
 5 | coverage/
 6 | docs/
 7 | .gitignore
 8 | .npmignore
 9 | gulpfile.js
10 | package-lock.json
11 | tsconfig.json
12 | tslint.json
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Tatamo
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LavRia
 2 | TypeScript LALR(1) Parser Generator
 3 | 
 4 | ## Installation
 5 | ```
 6 | $ mkdir pg
 7 | $ cd pg
 8 | $ npm init
 9 | $ npm install lavriapg --save
10 | ```
11 | 
12 | ### Examples
13 | ```
14 | $ echo 1+1 | node ./node_modules/lavriapg/dist/sample.js
15 | ```
16 | 
17 | [language](/language) and [json_language](/json_language) are sample language definitions.
18 | `language_parser` parses the language definition file to generate a parser.
19 | 
20 | Run the following code to see how it works:
21 | ```TypeScript
22 | // TypeScript
23 | import {language_parser, ParserGenerator} from "lavriapg";
24 | import {readFileSync} from "fs";
25 | 
26 | const input = `{
27 |   "foo": 123.45,
28 |   "bar": [ true, false, null ],
29 |   "baz": {
30 |     "nested": "hello"
31 |   },
32 |   "x": "/1{}"
33 | }`;
34 | 
35 | const language = language_parser.parse(readFileSync("./node_modules/lavriapg/json_language", "utf8") as string);
36 | const parser = new ParserGenerator(language).getParser();
37 | console.log(JSON.stringify(parser.parse(input), undefined, 2));
38 | ```
39 | 
40 | ```JavaScript
41 | // JavaScript (CommonJS)
42 | const pg = require("lavriapg");
43 | const fs = require("fs");
44 | const input = `{
45 |   "foo": 123.45,
46 |   "bar": [ true, false, null ],
47 |   "baz": {
48 |     "nested": "hello"
49 |   },
50 |   "x": "/1{}"
51 | }`;
52 | const language = pg.language_parser.parse(fs.readFileSync("./node_modules/lavriapg/json_language", "utf8"));
53 | const parser = new pg.ParserGenerator(language).getParser();
54 | console.log(JSON.stringify(parser.parse(input), undefined, 2));
55 | ```
56 | 
57 | ## Usage
58 | [language](/language) is its own language definition:
59 | ```TypeScript
60 | // TypeScript
61 | const input = readFileSync("./node_modules/lavriapg/language", "utf8") as string;
62 | const language = language_parser.parse(input);
63 | 
64 | const replacer = (key: string, value: any) => {
65 |   if (typeof value === "function") return value.toString();
66 |   if (value instanceof RegExp) return value.toString();
67 |   return value;
68 | };
69 | 
70 | console.log(JSON.stringify(language, replacer, 2));
71 | 
72 | const parser = new ParserGenerator(language).getParser();
73 | console.log(JSON.stringify(parser.parse(input), replacer, 2));
74 | ```
75 | 
76 | `language_parser` parses [language](/language) to generate a language definition.
77 | The parser generated from that definition behaves just like `language_parser`.
78 | 


--------------------------------------------------------------------------------
/__tests__/__snapshots__/language_parsing_test.ts.snap:
--------------------------------------------------------------------------------
  1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
  2 | 
  3 | exports[`syntax functions test #extend 1`] = `
  4 | Array [
  5 |   Object {
  6 |     "token": "A",
  7 |     "value": "a",
  8 |   },
  9 |   Object {
 10 |     "token": "B",
 11 |     "value": "b",
 12 |   },
 13 |   Object {
 14 |     "token": "C",
 15 |     "value": "c",
 16 |   },
 17 |   Object {
 18 |     "token": Symbol(EOF),
 19 |     "value": "",
 20 |   },
 21 | ]
 22 | `;
 23 | 
 24 | exports[`syntax functions test #extend 2`] = `
 25 | Object {
 26 |   "grammar": Object {
 27 |     "rules": Array [
 28 |       Object {
 29 |         "ltoken": "S",
 30 |         "pattern": Array [
 31 |           "A",
 32 |           "B",
 33 |           "C",
 34 |         ],
 35 |       },
 36 |     ],
 37 |     "start_symbol": "S",
 38 |   },
 39 |   "lex": Object {
 40 |     "rules": Array [
 41 |       Object {
 42 |         "pattern": /a/,
 43 |         "states": Array [
 44 |           "state3",
 45 |         ],
 46 |         "token": "A",
 47 |       },
 48 |       Object {
 49 |         "pattern": /b/,
 50 |         "states": Array [
 51 |           "state2",
 52 |         ],
 53 |         "token": "B",
 54 |       },
 55 |       Object {
 56 |         "pattern": /c/,
 57 |         "states": Array [
 58 |           "default",
 59 |         ],
 60 |         "token": "C",
 61 |       },
 62 |     ],
 63 |     "start_state": "state3",
 64 |     "states": Array [
 65 |       Object {
 66 |         "inheritance": "default",
 67 |         "label": "state1",
 68 |       },
 69 |       Object {
 70 |         "inheritance": "default",
 71 |         "label": "state2",
 72 |       },
 73 |       Object {
 74 |         "inheritance": "state2",
 75 |         "label": "state3",
 76 |       },
 77 |     ],
 78 |   },
 79 | }
 80 | `;
 81 | 
 82 | exports[`syntax functions test #start 1`] = `
 83 | Object {
 84 |   "grammar": Object {
 85 |     "rules": Array [
 86 |       Object {
 87 |         "ltoken": "S",
 88 |         "pattern": Array [
 89 |           "A",
 90 |           "B",
 91 |         ],
 92 |       },
 93 |     ],
 94 |     "start_symbol": "S",
 95 |   },
 96 |   "lex": Object {
 97 |     "rules": Array [
 98 |       Object {
 99 |         "pattern": /a/,
100 |         "states": Array [
101 |           "default",
102 |         ],
103 |         "token": "A",
104 |       },
105 |       Object {
106 |         "pattern": /a/,
107 |         "states": Array [
108 |           "state1",
109 |           "state2",
110 |         ],
111 |         "token": "A2",
112 |       },
113 |       Object {
114 |         "pattern": /b/,
115 |         "token": "B",
116 |       },
117 |     ],
118 |     "start_state": "state1",
119 |   },
120 | }
121 | `;
122 | 
123 | exports[`syntax functions test callback delimiters 1`] = `
124 | Object {
125 |   "grammar": Object {
126 |     "rules": Array [
127 |       Object {
128 |         "callback": [Function],
129 |         "ltoken": "S",
130 |         "pattern": Array [
131 |           "T",
132 |         ],
133 |       },
134 |       Object {
135 |         "callback": [Function],
136 |         "ltoken": "T",
137 |         "pattern": Array [
138 |           "E",
139 |         ],
140 |       },
141 |       Object {
142 |         "callback": [Function],
143 |         "ltoken": "E",
144 |         "pattern": Array [],
145 |       },
146 |       Object {
147 |         "ltoken": "E",
148 |         "pattern": Array [
149 |           "A",
150 |         ],
151 |       },
152 |     ],
153 |     "start_symbol": "S",
154 |   },
155 |   "lex": Object {
156 |     "rules": Array [
157 |       Object {
158 |         "callback": [Function],
159 |         "pattern": "a",
160 |         "token": "A",
161 |       },
162 |     ],
163 |   },
164 | }
165 | `;
166 | 
167 | exports[`syntax functions test callback delimiters 2`] = `
168 | Array [
169 |   "function anonymous(children,token,lexer
170 | ) {
171 |  const s = {}; 
172 | }",
173 |   "function anonymous(children,token,lexer
174 | ) {
175 |  const t = \\"}%, }}%, }}%%, }%%%, }}%%%\\"; 
176 | }",
177 |   "function anonymous(children,token,lexer
178 | ) {
179 |  const e = \\"}%\\"+\\"}}\\"; 
180 | }",
181 |   undefined,
182 | ]
183 | `;
184 | 
185 | exports[`syntax functions test callbacks 1`] = `
186 | Object {
187 |   "grammar": Object {
188 |     "rules": Array [
189 |       Object {
190 |         "callback": [Function],
191 |         "ltoken": "S",
192 |         "pattern": Array [
193 |           "T",
194 |         ],
195 |       },
196 |       Object {
197 |         "callback": [Function],
198 |         "ltoken": "T",
199 |         "pattern": Array [
200 |           "A",
201 |         ],
202 |       },
203 |       Object {
204 |         "callback": [Function],
205 |         "ltoken": "T",
206 |         "pattern": Array [
207 |           "E",
208 |         ],
209 |       },
210 |       Object {
211 |         "callback": [Function],
212 |         "ltoken": "T",
213 |         "pattern": Array [],
214 |       },
215 |       Object {
216 |         "callback": [Function],
217 |         "ltoken": "E",
218 |         "pattern": Array [],
219 |       },
220 |       Object {
221 |         "ltoken": "E",
222 |         "pattern": Array [
223 |           "B",
224 |         ],
225 |       },
226 |     ],
227 |     "start_symbol": "S",
228 |   },
229 |   "lex": Object {
230 |     "rules": Array [
231 |       Object {
232 |         "callback": [Function],
233 |         "pattern": "a",
234 |         "token": "A",
235 |       },
236 |       Object {
237 |         "callback": [Function],
238 |         "pattern": /b/,
239 |         "token": "B",
240 |       },
241 |     ],
242 |   },
243 | }
244 | `;
245 | 
246 | exports[`syntax functions test callbacks 2`] = `
247 | Array [
248 |   "function anonymous(children,token,lexer
249 | ) {
250 |  callback_of_S(); 
251 | }",
252 |   "function anonymous(children,token,lexer
253 | ) {
254 |  callback_of_T_1(); 
255 | }",
256 |   "function anonymous(children,token,lexer
257 | ) {
258 |  callback_of_T_2(); 
259 | }",
260 |   "function anonymous(children,token,lexer
261 | ) {
262 |  callback_of_T_3(); 
263 | }",
264 |   "function anonymous(children,token,lexer
265 | ) {
266 |  callback_of_E(); 
267 | }",
268 |   undefined,
269 | ]
270 | `;
271 | 
272 | exports[`syntax functions test ex-callbacks 1`] = `
273 | Object {
274 |   "grammar": Object {
275 |     "begin_callback": [Function],
276 |     "default_callback": [Function],
277 |     "end_callback": [Function],
278 |     "rules": Array [
279 |       Object {
280 |         "ltoken": "S",
281 |         "pattern": Array [
282 |           "A",
283 |         ],
284 |       },
285 |     ],
286 |     "start_symbol": "S",
287 |   },
288 |   "lex": Object {
289 |     "begin_callback": [Function],
290 |     "default_callback": [Function],
291 |     "end_callback": [Function],
292 |     "rules": Array [
293 |       Object {
294 |         "pattern": "a",
295 |         "token": "A",
296 |       },
297 |     ],
298 |   },
299 | }
300 | `;
301 | 
302 | exports[`syntax functions test ex-callbacks 2`] = `
303 | "function anonymous(value,token,lex
304 | ) {
305 |  lex_begin_callback(); 
306 | }"
307 | `;
308 | 
309 | exports[`syntax functions test ex-callbacks 3`] = `
310 | "function anonymous(value,token,lex
311 | ) {
312 |  lex_default_callback(); 
313 | }"
314 | `;
315 | 
316 | exports[`syntax functions test ex-callbacks 4`] = `
317 | "function anonymous(value,token,lex
318 | ) {
319 |  lex_end_callback(); 
320 | }"
321 | `;
322 | 
323 | exports[`syntax functions test ex-callbacks 5`] = `
324 | "function anonymous(children,token,lexer
325 | ) {
326 |  grammar_begin_callback(); 
327 | }"
328 | `;
329 | 
330 | exports[`syntax functions test ex-callbacks 6`] = `
331 | "function anonymous(children,token,lexer
332 | ) {
333 |  grammar_default_callback(); 
334 | }"
335 | `;
336 | 
337 | exports[`syntax functions test ex-callbacks 7`] = `
338 | "function anonymous(children,token,lexer
339 | ) {
340 |  grammar_end_callback(); 
341 | }"
342 | `;
343 | 
344 | exports[`syntax functions test lex-state 1`] = `
345 | Object {
346 |   "grammar": Object {
347 |     "rules": Array [
348 |       Object {
349 |         "ltoken": "S",
350 |         "pattern": Array [
351 |           "A",
352 |           "B2",
353 |           "C",
354 |         ],
355 |       },
356 |     ],
357 |     "start_symbol": "S",
358 |   },
359 |   "lex": Object {
360 |     "rules": Array [
361 |       Object {
362 |         "pattern": /a/,
363 |         "token": "A",
364 |       },
365 |       Object {
366 |         "pattern": /b/,
367 |         "states": Array [
368 |           "state1",
369 |           "state2",
370 |         ],
371 |         "token": "B",
372 |       },
373 |       Object {
374 |         "pattern": /b/,
375 |         "states": Array [
376 |           "default",
377 |         ],
378 |         "token": "B2",
379 |       },
380 |       Object {
381 |         "pattern": /c/,
382 |         "token": "C",
383 |       },
384 |     ],
385 |   },
386 | }
387 | `;
388 | 


--------------------------------------------------------------------------------
/__tests__/broken_langage_test.ts:
--------------------------------------------------------------------------------
 1 | import {ParserGenerator} from "../src/parsergenerator/parsergenerator";
 2 | import {test_broken_language} from "./data/broken_language";
 3 | 
 4 | describe("Calculator test with broken language", () => {
 5 | 	// TODO: パーサが壊れていることを(コンソール出力以外で)知る方法
 6 | 	const pg = new ParserGenerator(test_broken_language);
 7 | 	const parser = pg.getParser();
 8 | 	test("parsing table is broken", () => {
 9 | 		expect(pg.isConflicted()).toBe(true);
10 | 		expect(pg.getTableType()).toBe("CONFLICTED");
11 | 	});
12 | 	test('"1+1" equals 2', () => {
13 | 		expect(parser.parse("1+1")).toBe(2);
14 | 	});
15 | 	test('"( 1+1 )*3 + ( (1+1) * (1+2*3+4) )\\n" equals 28 (to be failed)', () => {
16 | 		expect(parser.parse("( 1+1 )*3 + ( (1+1) * (1+2*3+4) )\n")).not.toBe(28);
17 | 	});
18 | });
19 | 


--------------------------------------------------------------------------------
/__tests__/data/broken_language.ts:
--------------------------------------------------------------------------------
 1 | import {Language, LexDefinition, GrammarDefinition} from "../../src/def/language";
 2 | 
 3 | export const test_broken_grammar: GrammarDefinition = {
 4 | 	rules: [
 5 | 		{
 6 | 			ltoken: "EXP",
 7 | 			pattern: ["EXP", "PLUS", "EXP"],
 8 | 			callback: (c) => c[0] + c[2]
 9 | 		},
10 | 		{
11 | 			ltoken: "EXP",
12 | 			pattern: ["TERM"],
13 | 			callback: (c) => c[0]
14 | 		},
15 | 		{
16 | 			ltoken: "TERM",
17 | 			pattern: ["TERM", "ASTERISK", "ATOM"],
18 | 			callback: (c) => c[0] * c[2]
19 | 		},
20 | 		{
21 | 			ltoken: "TERM",
22 | 			pattern: ["ATOM"],
23 | 			callback: (c) => c[0]
24 | 		},
25 | 		{
26 | 			ltoken: "ATOM",
27 | 			pattern: ["DIGITS"],
28 | 			callback: (c) => +c[0]
29 | 		},
30 | 		{
31 | 			ltoken: "ATOM",
32 | 			pattern: ["LPAREN", "EXP", "RPAREN"],
33 | 			callback: (c) => c[1]
34 | 		}
35 | 	],
36 | 	start_symbol: "EXP"
37 | };
38 | 
39 | export const test_broken_lex: LexDefinition = {
40 | 	rules: [
41 | 		{token: "DIGITS", pattern: /[1-9][0-9]*/},
42 | 		{token: "PLUS", pattern: "+"},
43 | 		{token: "ASTERISK", pattern: "*"},
44 | 		{token: "LPAREN", pattern: "("},
45 | 		{token: "RPAREN", pattern: ")"},
46 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
47 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
48 | 		{token: "INVALID", pattern: /./}
49 | 	]
50 | };
51 | 
52 | export const test_broken_language: Language = {
53 | 	lex: test_broken_lex,
54 | 	grammar: test_broken_grammar
55 | };
56 | 


--------------------------------------------------------------------------------
/__tests__/data/json_language:
--------------------------------------------------------------------------------
 1 | true        "true"
 2 | false       "false"
 3 | null        "null"
 4 | lbrace      "{"
 5 | rbrace      "}"
 6 | lbracket    "["
 7 | rbracket    "]"
 8 | colon       ":"
 9 | comma       ","
10 | digit1_9    /[1-9]/
11 | digit0      /0/
12 | minus       "-"
13 | period      "."
14 | string      /".*?"/
15 | !           /(\r\n|\r|\n)+/
16 | !           /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/
17 | invalid     /./
18 | 
19 | digit :     digit1_9 | digit0;
20 | digits :    digit | digit digits;
21 | int :       digit | digit1_9 digits |
22 |             minus digit | minus digit1_9 digits;
23 | frac :      period digits;
24 | number :    int | int frac;
25 | 
26 | object :    lbrace rbrace | lbrace members rbrace;
27 | members :   pair | pair comma members;
28 | pair :      string colon value;
29 | array :     lbracket rbracket | lbracket elements rbracket;
30 | elements :  value | value comma elements;
31 | $value :    string | number | object | array | true | false | null;
32 | 


--------------------------------------------------------------------------------
/__tests__/data/json_sample.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "meta": {
   3 | 	"name": "Freeciv2.5 Classic(English)",
   4 | 	"language": "en",
   5 | 	"ruleset": "classic",
   6 | 	"freeciv_version": "2.5"
   7 |   },
   8 |   "units": [
   9 | 	{
  10 | 	  "id": "warriors",
  11 | 	  "label": "Warriors",
  12 | 	  "label_detail": "Warriors",
  13 | 	  "phonetics": [
  14 | 		"warriors"
  15 | 	  ],
  16 | 	  "class": "land",
  17 | 	  "flags": [],
  18 | 	  "hp": 10,
  19 | 	  "attack": 1,
  20 | 	  "defence": 1,
  21 | 	  "firepower": 1
  22 | 	},
  23 | 	{
  24 | 	  "id": "phalanx",
  25 | 	  "label": "Phalanx",
  26 | 	  "label_detail": "Phalanx",
  27 | 	  "phonetics": [
  28 | 		"phalanx"
  29 | 	  ],
  30 | 	  "class": "land",
  31 | 	  "flags": [],
  32 | 	  "hp": 10,
  33 | 	  "attack": 1,
  34 | 	  "defence": 2,
  35 | 	  "firepower": 1
  36 | 	},
  37 | 	{
  38 | 	  "id": "pikemen",
  39 | 	  "label": "Pikemen",
  40 | 	  "label_detail": "Pikemen",
  41 | 	  "phonetics": [
  42 | 		"pikemen"
  43 | 	  ],
  44 | 	  "class": "land",
  45 | 	  "flags": [
  46 | 		"pikemen"
  47 | 	  ],
  48 | 	  "hp": 10,
  49 | 	  "attack": 1,
  50 | 	  "defence": 2,
  51 | 	  "firepower": 1
  52 | 	},
  53 | 	{
  54 | 	  "id": "archers",
  55 | 	  "label": "Archers",
  56 | 	  "label_detail": "Archers",
  57 | 	  "phonetics": [
  58 | 		"archers"
  59 | 	  ],
  60 | 	  "class": "land",
  61 | 	  "flags": [],
  62 | 	  "hp": 10,
  63 | 	  "attack": 3,
  64 | 	  "defence": 2,
  65 | 	  "firepower": 1
  66 | 	},
  67 | 	{
  68 | 	  "id": "legion",
  69 | 	  "label": "Legion",
  70 | 	  "label_detail": "Legion",
  71 | 	  "phonetics": [
  72 | 		"legion"
  73 | 	  ],
  74 | 	  "class": "land",
  75 | 	  "flags": [],
  76 | 	  "hp": 10,
  77 | 	  "attack": 4,
  78 | 	  "defence": 2,
  79 | 	  "firepower": 1
  80 | 	},
  81 | 	{
  82 | 	  "id": "musketeers",
  83 | 	  "label": "Musketeers",
  84 | 	  "label_detail": "Musketeers",
  85 | 	  "phonetics": [
  86 | 		"musketeers"
  87 | 	  ],
  88 | 	  "class": "land",
  89 | 	  "flags": [],
  90 | 	  "hp": 20,
  91 | 	  "attack": 3,
  92 | 	  "defence": 3,
  93 | 	  "firepower": 1
  94 | 	},
  95 | 	{
  96 | 	  "id": "riflemen",
  97 | 	  "label": "Riflemen",
  98 | 	  "label_detail": "Riflemen",
  99 | 	  "phonetics": [
 100 | 		"riflemen"
 101 | 	  ],
 102 | 	  "class": "land",
 103 | 	  "flags": [],
 104 | 	  "hp": 20,
 105 | 	  "attack": 5,
 106 | 	  "defence": 4,
 107 | 	  "firepower": 1
 108 | 	},
 109 | 	{
 110 | 	  "id": "alpine-troops",
 111 | 	  "label": "Alpine Troops",
 112 | 	  "label_detail": "Alpine Troops",
 113 | 	  "phonetics": [
 114 | 		"alpine troops"
 115 | 	  ],
 116 | 	  "class": "land",
 117 | 	  "flags": [],
 118 | 	  "hp": 20,
 119 | 	  "attack": 5,
 120 | 	  "defence": 5,
 121 | 	  "firepower": 1
 122 | 	},
 123 | 	{
 124 | 	  "id": "mech-inf",
 125 | 	  "label": "Mech. Inf.",
 126 | 	  "label_detail": "Mech. Inf.",
 127 | 	  "phonetics": [
 128 | 		"mech. inf."
 129 | 	  ],
 130 | 	  "class": "land",
 131 | 	  "flags": [],
 132 | 	  "hp": 30,
 133 | 	  "attack": 6,
 134 | 	  "defence": 6,
 135 | 	  "firepower": 1
 136 | 	},
 137 | 	{
 138 | 	  "id": "horsemen",
 139 | 	  "label": "Horsemen",
 140 | 	  "label_detail": "Horsemen",
 141 | 	  "phonetics": [
 142 | 		"horsemen"
 143 | 	  ],
 144 | 	  "class": "land",
 145 | 	  "flags": [
 146 | 		"mounted"
 147 | 	  ],
 148 | 	  "hp": 10,
 149 | 	  "attack": 2,
 150 | 	  "defence": 1,
 151 | 	  "firepower": 1
 152 | 	},
 153 | 	{
 154 | 	  "id": "chariot",
 155 | 	  "label": "Chariot",
 156 | 	  "label_detail": "Chariot",
 157 | 	  "phonetics": [
 158 | 		"chariot"
 159 | 	  ],
 160 | 	  "class": "land",
 161 | 	  "flags": [
 162 | 		"mounted"
 163 | 	  ],
 164 | 	  "hp": 10,
 165 | 	  "attack": 3,
 166 | 	  "defence": 1,
 167 | 	  "firepower": 1
 168 | 	},
 169 | 	{
 170 | 	  "id": "knights",
 171 | 	  "label": "Knights",
 172 | 	  "label_detail": "Knights",
 173 | 	  "phonetics": [
 174 | 		"knights"
 175 | 	  ],
 176 | 	  "class": "land",
 177 | 	  "flags": [
 178 | 		"mounted"
 179 | 	  ],
 180 | 	  "hp": 10,
 181 | 	  "attack": 4,
 182 | 	  "defence": 2,
 183 | 	  "firepower": 1
 184 | 	},
 185 | 	{
 186 | 	  "id": "dragoons",
 187 | 	  "label": "Dragoons",
 188 | 	  "label_detail": "Dragoons",
 189 | 	  "phonetics": [
 190 | 		"dragoons"
 191 | 	  ],
 192 | 	  "class": "land",
 193 | 	  "flags": [
 194 | 		"mounted"
 195 | 	  ],
 196 | 	  "hp": 20,
 197 | 	  "attack": 5,
 198 | 	  "defence": 2,
 199 | 	  "firepower": 1
 200 | 	},
 201 | 	{
 202 | 	  "id": "cavalry",
 203 | 	  "label": "Cavalry",
 204 | 	  "label_detail": "Cavalry",
 205 | 	  "phonetics": [
 206 | 		"cavalry"
 207 | 	  ],
 208 | 	  "class": "land",
 209 | 	  "flags": [],
 210 | 	  "hp": 20,
 211 | 	  "attack": 8,
 212 | 	  "defence": 3,
 213 | 	  "firepower": 1
 214 | 	},
 215 | 	{
 216 | 	  "id": "armor",
 217 | 	  "label": "Armor",
 218 | 	  "label_detail": "Armor",
 219 | 	  "phonetics": [
 220 | 		"armor"
 221 | 	  ],
 222 | 	  "class": "land",
 223 | 	  "flags": [],
 224 | 	  "hp": 30,
 225 | 	  "attack": 10,
 226 | 	  "defence": 5,
 227 | 	  "firepower": 1
 228 | 	},
 229 | 	{
 230 | 	  "id": "catapult",
 231 | 	  "label": "Catapult",
 232 | 	  "label_detail": "Catapult",
 233 | 	  "phonetics": [
 234 | 		"catapult"
 235 | 	  ],
 236 | 	  "class": "land",
 237 | 	  "flags": [],
 238 | 	  "hp": 10,
 239 | 	  "attack": 6,
 240 | 	  "defence": 1,
 241 | 	  "firepower": 1
 242 | 	},
 243 | 	{
 244 | 	  "id": "cannon",
 245 | 	  "label": "Cannon",
 246 | 	  "label_detail": "Cannon",
 247 | 	  "phonetics": [
 248 | 		"cannon"
 249 | 	  ],
 250 | 	  "class": "land",
 251 | 	  "flags": [],
 252 | 	  "hp": 20,
 253 | 	  "attack": 8,
 254 | 	  "defence": 1,
 255 | 	  "firepower": 1
 256 | 	},
 257 | 	{
 258 | 	  "id": "artillery",
 259 | 	  "label": "Artillery",
 260 | 	  "label_detail": "Artillery",
 261 | 	  "phonetics": [
 262 | 		"artillery"
 263 | 	  ],
 264 | 	  "class": "land",
 265 | 	  "flags": [],
 266 | 	  "hp": 20,
 267 | 	  "attack": 10,
 268 | 	  "defence": 1,
 269 | 	  "firepower": 2
 270 | 	},
 271 | 	{
 272 | 	  "id": "howitzer",
 273 | 	  "label": "Howitzer",
 274 | 	  "label_detail": "Howitzer",
 275 | 	  "phonetics": [
 276 | 		"howitzer"
 277 | 	  ],
 278 | 	  "class": "land",
 279 | 	  "flags": [
 280 | 		"igwall"
 281 | 	  ],
 282 | 	  "hp": 30,
 283 | 	  "attack": 12,
 284 | 	  "defence": 2,
 285 | 	  "firepower": 2
 286 | 	},
 287 | 	{
 288 | 	  "id": "partisan",
 289 | 	  "label": "Partisan",
 290 | 	  "label_detail": "Partisan",
 291 | 	  "phonetics": [
 292 | 		"partisan"
 293 | 	  ],
 294 | 	  "class": "land",
 295 | 	  "flags": [],
 296 | 	  "hp": 20,
 297 | 	  "attack": 4,
 298 | 	  "defence": 4,
 299 | 	  "firepower": 1
 300 | 	},
 301 | 	{
 302 | 	  "id": "marines",
 303 | 	  "label": "Marines",
 304 | 	  "label_detail": "Marines",
 305 | 	  "phonetics": [
 306 | 		"marines"
 307 | 	  ],
 308 | 	  "class": "land",
 309 | 	  "flags": [],
 310 | 	  "hp": 20,
 311 | 	  "attack": 8,
 312 | 	  "defence": 5,
 313 | 	  "firepower": 1
 314 | 	},
 315 | 	{
 316 | 	  "id": "paratroopers",
 317 | 	  "label": "Paratroopers",
 318 | 	  "label_detail": "Paratroopers",
 319 | 	  "phonetics": [
 320 | 		"paratroopers"
 321 | 	  ],
 322 | 	  "class": "land",
 323 | 	  "flags": [],
 324 | 	  "hp": 20,
 325 | 	  "attack": 6,
 326 | 	  "defence": 4,
 327 | 	  "firepower": 1
 328 | 	},
 329 | 	{
 330 | 	  "id": "trireme",
 331 | 	  "label": "Trireme",
 332 | 	  "label_detail": "Trireme",
 333 | 	  "phonetics": [
 334 | 		"trireme"
 335 | 	  ],
 336 | 	  "class": "trireme",
 337 | 	  "flags": [],
 338 | 	  "hp": 10,
 339 | 	  "attack": 1,
 340 | 	  "defence": 1,
 341 | 	  "firepower": 1
 342 | 	},
 343 | 	{
 344 | 	  "id": "caravel",
 345 | 	  "label": "Caravel",
 346 | 	  "label_detail": "Caravel",
 347 | 	  "phonetics": [
 348 | 		"caravel"
 349 | 	  ],
 350 | 	  "class": "sea",
 351 | 	  "flags": [],
 352 | 	  "hp": 10,
 353 | 	  "attack": 2,
 354 | 	  "defence": 1,
 355 | 	  "firepower": 1
 356 | 	},
 357 | 	{
 358 | 	  "id": "galleon",
 359 | 	  "label": "Galleon",
 360 | 	  "label_detail": "Galleon",
 361 | 	  "phonetics": [
 362 | 		"galleon"
 363 | 	  ],
 364 | 	  "class": "sea",
 365 | 	  "flags": [],
 366 | 	  "hp": 20,
 367 | 	  "attack": 0,
 368 | 	  "defence": 2,
 369 | 	  "firepower": 1
 370 | 	},
 371 | 	{
 372 | 	  "id": "transport",
 373 | 	  "label": "Transport",
 374 | 	  "label_detail": "Transport",
 375 | 	  "phonetics": [
 376 | 		"transport"
 377 | 	  ],
 378 | 	  "class": "sea",
 379 | 	  "flags": [],
 380 | 	  "hp": 30,
 381 | 	  "attack": 0,
 382 | 	  "defence": 3,
 383 | 	  "firepower": 1
 384 | 	},
 385 | 	{
 386 | 	  "id": "frigate",
 387 | 	  "label": "Frigate",
 388 | 	  "label_detail": "Frigate",
 389 | 	  "phonetics": [
 390 | 		"frigate"
 391 | 	  ],
 392 | 	  "class": "sea",
 393 | 	  "flags": [],
 394 | 	  "hp": 20,
 395 | 	  "attack": 4,
 396 | 	  "defence": 2,
 397 | 	  "firepower": 1
 398 | 	},
 399 | 	{
 400 | 	  "id": "ironclad",
 401 | 	  "label": "Ironclad",
 402 | 	  "label_detail": "Ironclad",
 403 | 	  "phonetics": [
 404 | 		"ironclad"
 405 | 	  ],
 406 | 	  "class": "sea",
 407 | 	  "flags": [],
 408 | 	  "hp": 30,
 409 | 	  "attack": 4,
 410 | 	  "defence": 4,
 411 | 	  "firepower": 1
 412 | 	},
 413 | 	{
 414 | 	  "id": "destroyer",
 415 | 	  "label": "Destroyer",
 416 | 	  "label_detail": "Destroyer",
 417 | 	  "phonetics": [
 418 | 		"destroyer"
 419 | 	  ],
 420 | 	  "class": "sea",
 421 | 	  "flags": [],
 422 | 	  "hp": 30,
 423 | 	  "attack": 4,
 424 | 	  "defence": 4,
 425 | 	  "firepower": 1
 426 | 	},
 427 | 	{
 428 | 	  "id": "cruiser",
 429 | 	  "label": "Cruiser",
 430 | 	  "label_detail": "Cruiser",
 431 | 	  "phonetics": [
 432 | 		"cruiser"
 433 | 	  ],
 434 | 	  "class": "sea",
 435 | 	  "flags": [],
 436 | 	  "hp": 30,
 437 | 	  "attack": 6,
 438 | 	  "defence": 6,
 439 | 	  "firepower": 2
 440 | 	},
 441 | 	{
 442 | 	  "id": "aegis-cruiser",
 443 | 	  "label": "AEGIS Cruiser",
 444 | 	  "label_detail": "AEGIS Cruiser",
 445 | 	  "phonetics": [
 446 | 		"aegis cruiser"
 447 | 	  ],
 448 | 	  "class": "sea",
 449 | 	  "flags": [
 450 | 		"aegis"
 451 | 	  ],
 452 | 	  "hp": 30,
 453 | 	  "attack": 8,
 454 | 	  "defence": 8,
 455 | 	  "firepower": 2
 456 | 	},
 457 | 	{
 458 | 	  "id": "submarine",
 459 | 	  "label": "Submarine",
 460 | 	  "label_detail": "Submarine",
 461 | 	  "phonetics": [
 462 | 		"submarine"
 463 | 	  ],
 464 | 	  "class": "sea",
 465 | 	  "flags": [],
 466 | 	  "hp": 30,
 467 | 	  "attack": 12,
 468 | 	  "defence": 2,
 469 | 	  "firepower": 2
 470 | 	},
 471 | 	{
 472 | 	  "id": "battleship",
 473 | 	  "label": "Battleship",
 474 | 	  "label_detail": "Battleship",
 475 | 	  "phonetics": [
 476 | 		"battleship"
 477 | 	  ],
 478 | 	  "class": "sea",
 479 | 	  "flags": [],
 480 | 	  "hp": 40,
 481 | 	  "attack": 12,
 482 | 	  "defence": 12,
 483 | 	  "firepower": 2
 484 | 	},
 485 | 	{
 486 | 	  "id": "carrier",
 487 | 	  "label": "Carrier",
 488 | 	  "label_detail": "Carrier",
 489 | 	  "phonetics": [
 490 | 		"carrier"
 491 | 	  ],
 492 | 	  "class": "sea",
 493 | 	  "flags": [],
 494 | 	  "hp": 40,
 495 | 	  "attack": 1,
 496 | 	  "defence": 9,
 497 | 	  "firepower": 2
 498 | 	},
 499 | 	{
 500 | 	  "id": "fighter",
 501 | 	  "label": "Fighter",
 502 | 	  "label_detail": "Fighter",
 503 | 	  "phonetics": [
 504 | 		"fighter"
 505 | 	  ],
 506 | 	  "class": "air",
 507 | 	  "flags": [
 508 | 		"fighter"
 509 | 	  ],
 510 | 	  "hp": 20,
 511 | 	  "attack": 4,
 512 | 	  "defence": 3,
 513 | 	  "firepower": 2
 514 | 	},
 515 | 	{
 516 | 	  "id": "bomber",
 517 | 	  "label": "Bomber",
 518 | 	  "label_detail": "Bomber",
 519 | 	  "phonetics": [
 520 | 		"bomber"
 521 | 	  ],
 522 | 	  "class": "air",
 523 | 	  "flags": [],
 524 | 	  "hp": 20,
 525 | 	  "attack": 12,
 526 | 	  "defence": 1,
 527 | 	  "firepower": 2
 528 | 	},
 529 | 	{
 530 | 	  "id": "awacs",
 531 | 	  "label": "AWACS",
 532 | 	  "label_detail": "AWACS",
 533 | 	  "phonetics": [
 534 | 		"awacs"
 535 | 	  ],
 536 | 	  "class": "air",
 537 | 	  "flags": [],
 538 | 	  "hp": 20,
 539 | 	  "attack": 0,
 540 | 	  "defence": 1,
 541 | 	  "firepower": 1
 542 | 	},
 543 | 	{
 544 | 	  "id": "cruise-missile",
 545 | 	  "label": "Cruise Missile",
 546 | 	  "label_detail": "Cruise Missile",
 547 | 	  "phonetics": [
 548 | 		"cruise missile"
 549 | 	  ],
 550 | 	  "class": "missile",
 551 | 	  "flags": [],
 552 | 	  "hp": 10,
 553 | 	  "attack": 18,
 554 | 	  "defence": 0,
 555 | 	  "firepower": 3
 556 | 	},
 557 | 	{
 558 | 	  "id": "nuclear",
 559 | 	  "label": "Nuclear",
 560 | 	  "label_detail": "Nuclear",
 561 | 	  "phonetics": [
 562 | 		"nuclear"
 563 | 	  ],
 564 | 	  "class": "missile",
 565 | 	  "flags": [],
 566 | 	  "hp": 10,
 567 | 	  "attack": 99,
 568 | 	  "defence": 0,
 569 | 	  "firepower": 1
 570 | 	},
 571 | 	{
 572 | 	  "id": "helicopter",
 573 | 	  "label": "Helicopter",
 574 | 	  "label_detail": "Helicopter",
 575 | 	  "phonetics": [
 576 | 		"helicopter"
 577 | 	  ],
 578 | 	  "class": "helicopter",
 579 | 	  "flags": [],
 580 | 	  "hp": 20,
 581 | 	  "attack": 10,
 582 | 	  "defence": 3,
 583 | 	  "firepower": 2
 584 | 	},
 585 | 	{
 586 | 	  "id": "stealth-fighter",
 587 | 	  "label": "Stealth Fighter",
 588 | 	  "label_detail": "Stealth Fighter",
 589 | 	  "phonetics": [
 590 | 		"stealth fighter"
 591 | 	  ],
 592 | 	  "class": "air",
 593 | 	  "flags": [
 594 | 		"fighter"
 595 | 	  ],
 596 | 	  "hp": 20,
 597 | 	  "attack": 8,
 598 | 	  "defence": 4,
 599 | 	  "firepower": 2
 600 | 	},
 601 | 	{
 602 | 	  "id": "stealth-bomber",
 603 | 	  "label": "Stealth Bomber",
 604 | 	  "label_detail": "Stealth Bomber",
 605 | 	  "phonetics": [
 606 | 		"stealth bomber"
 607 | 	  ],
 608 | 	  "class": "air",
 609 | 	  "flags": [],
 610 | 	  "hp": 20,
 611 | 	  "attack": 18,
 612 | 	  "defence": 5,
 613 | 	  "firepower": 2
 614 | 	},
 615 | 	{
 616 | 	  "id": "engineers",
 617 | 	  "label": "Engineers",
 618 | 	  "label_detail": "Engineers",
 619 | 	  "phonetics": [
 620 | 		"engineers"
 621 | 	  ],
 622 | 	  "class": "land",
 623 | 	  "flags": [],
 624 | 	  "hp": 20,
 625 | 	  "attack": 0,
 626 | 	  "defence": 2,
 627 | 	  "firepower": 1
 628 | 	},
 629 | 	{
 630 | 	  "id": "leader",
 631 | 	  "label": "Leader",
 632 | 	  "label_detail": "Leader",
 633 | 	  "phonetics": [
 634 | 		"leader"
 635 | 	  ],
 636 | 	  "class": "land",
 637 | 	  "flags": [],
 638 | 	  "hp": 20,
 639 | 	  "attack": 0,
 640 | 	  "defence": 2,
 641 | 	  "firepower": 1
 642 | 	}
 643 |   ],
 644 |   "unitclass": [
 645 | 	{
 646 | 	  "id": "land",
 647 | 	  "label": "Land"
 648 | 	},
 649 | 	{
 650 | 	  "id": "sea",
 651 | 	  "label": "Sea"
 652 | 	},
 653 | 	{
 654 | 	  "id": "trireme",
 655 | 	  "label": "Trireme"
 656 | 	},
 657 | 	{
 658 | 	  "id": "air",
 659 | 	  "label": "Air"
 660 | 	},
 661 | 	{
 662 | 	  "id": "helicopter",
 663 | 	  "label": "Helicopter"
 664 | 	},
 665 | 	{
 666 | 	  "id": "missile",
 667 | 	  "label": "Missile"
 668 | 	}
 669 |   ],
 670 |   "veteranlevel": [
 671 | 	{
 672 | 	  "level": 1,
 673 | 	  "id": "recruit",
 674 | 	  "label": "Recruit(100%)",
 675 | 	  "value": 100,
 676 | 	  "chance_for_promotion": 50
 677 | 	},
 678 | 	{
 679 | 	  "level": 2,
 680 | 	  "id": "veteran",
 681 | 	  "label": "Veteran(150%)",
 682 | 	  "value": 150,
 683 | 	  "chance_for_promotion": 33
 684 | 	},
 685 | 	{
 686 | 	  "level": 3,
 687 | 	  "id": "hardened",
 688 | 	  "label": "Hardened(175%)",
 689 | 	  "value": 175,
 690 | 	  "chance_for_promotion": 20
 691 | 	},
 692 | 	{
 693 | 	  "level": 4,
 694 | 	  "id": "elite",
 695 | 	  "label": "Elite(200%)",
 696 | 	  "value": 200,
 697 | 	  "chance_for_promotion": 0
 698 | 	}
 699 |   ],
 700 |   "terrains": [
 701 | 	{
 702 | 	  "id": "plains-grassland",
 703 | 	  "label": "Plains,Glassland(100%)",
 704 | 	  "value": 100
 705 | 	},
 706 | 	{
 707 | 	  "id": "forest",
 708 | 	  "label": "Forest(150%)",
 709 | 	  "value": 150
 710 | 	},
 711 | 	{
 712 | 	  "id": "hills",
 713 | 	  "label": "Hills(200%)",
 714 | 	  "value": 200
 715 | 	},
 716 | 	{
 717 | 	  "id": "mountains",
 718 | 	  "label": "Mountains(300%)",
 719 | 	  "value": 300
 720 | 	},
 721 | 	{
 722 | 	  "id": "ocean",
 723 | 	  "label": "Ocean,Lake(100%)",
 724 | 	  "value": 100
 725 | 	},
 726 | 	{
 727 | 	  "id": "glacier",
 728 | 	  "label": "Glacier(100%)",
 729 | 	  "value": 100
 730 | 	},
 731 | 	{
 732 | 	  "id": "tundra",
 733 | 	  "label": "Tundra(100%)",
 734 | 	  "value": 100
 735 | 	},
 736 | 	{
 737 | 	  "id": "desert",
 738 | 	  "label": "Desert(100%)",
 739 | 	  "value": 100
 740 | 	},
 741 | 	{
 742 | 	  "id": "swamp",
 743 | 	  "label": "Swamp(150%)",
 744 | 	  "value": 150
 745 | 	},
 746 | 	{
 747 | 	  "id": "jungle",
 748 | 	  "label": "Jungle(150%)",
 749 | 	  "value": 150
 750 | 	}
 751 |   ],
 752 |   "flags": {
 753 | 	"basic": [
 754 | 	  {
 755 | 		"id": "in-city",
 756 | 		"label": "In city(Land Unit:150%)",
 757 | 		"description": "if defender is land unit: 150%"
 758 | 	  },
 759 | 	  {
 760 | 		"id": "defender-fortified",
 761 | 		"label": "Fortified(150%)",
 762 | 		"description": "land unit only can be gain 150%"
 763 | 	  }
 764 | 	],
 765 | 	"structure": [
 766 | 	  {
 767 | 		"id": "city-walls",
 768 | 		"label": "City walls (against land units or helicopter (except howitzer))(300%)"
 769 | 	  },
 770 | 	  {
 771 | 		"id": "city-coastal-defense",
 772 | 		"label": "Coastal defense (against ships)(200%)"
 773 | 	  },
 774 | 	  {
 775 | 		"id": "city-sam-battery",
 776 | 		"label": "SAM battery (against aircrafts (except helicopter))(200%)"
 777 | 	  },
 778 | 	  {
 779 | 		"id": "city-sdi-defense",
 780 | 		"label": "SDI defense (against missiles)(200%)"
 781 | 	  }
 782 | 	],
 783 | 	"roads": [
 784 | 	  {
 785 | 		"id": "river",
 786 | 		"label": "River(150%)",
 787 | 		"description": "land unit only can be gain 150%"
 788 | 	  }
 789 | 	],
 790 | 	"bases": [
 791 | 	  {
 792 | 		"id": "in-fortress",
 793 | 		"label": "Fortress(200%)",
 794 | 		"description": "land unit only can be gain 200%"
 795 | 	  }
 796 | 	],
 797 | 	"ex": []
 798 |   },
 799 |   "adjustments": [
 800 | 	{
 801 | 	  "id": "attacker-vereran",
 802 | 	  "label": "Veteran level (attacker)",
 803 | 	  "condition": [
 804 | 		"true"
 805 | 	  ],
 806 | 	  "effect": [
 807 | 		{
 808 | 		  "type": "attacker-strength-multiply",
 809 | 		  "value": "attacker-veteran()"
 810 | 		}
 811 | 	  ]
 812 | 	},
 813 | 	{
 814 | 	  "id": "defender-vereran",
 815 | 	  "label": "Veteran level (defender)",
 816 | 	  "condition": [
 817 | 		"true"
 818 | 	  ],
 819 | 	  "effect": [
 820 | 		{
 821 | 		  "type": "defender-strength-multiply",
 822 | 		  "value": "defender-veteran()"
 823 | 		}
 824 | 	  ]
 825 | 	},
 826 | 	{
 827 | 	  "id": "terrain",
 828 | 	  "label": "Terrain",
 829 | 	  "condition": [
 830 | 		"defender-class(land)"
 831 | 	  ],
 832 | 	  "effect": [
 833 | 		{
 834 | 		  "type": "defender-strength-multiply",
 835 | 		  "value": "terrain()"
 836 | 		}
 837 | 	  ]
 838 | 	},
 839 | 	{
 840 | 	  "id": "terrain-river",
 841 | 	  "label": "Terrain (river)",
 842 | 	  "condition": [
 843 | 		"defender-class(land)",
 844 | 		"flag(river)",
 845 | 		[
 846 | 		  "NOT",
 847 | 		  [
 848 | 			[
 849 | 			  "NOT",
 850 | 			  "flag(in-city)"
 851 | 			],
 852 | 			"flag(in-fortress)"
 853 | 		  ]
 854 | 		]
 855 | 	  ],
 856 | 	  "effect": [
 857 | 		{
 858 | 		  "type": "defender-strength-multiply",
 859 | 		  "value": "150"
 860 | 		}
 861 | 	  ]
 862 | 	},
 863 | 	{
 864 | 	  "id": "land-unit-in-fortress",
 865 | 	  "label": "Terrain (fortress)",
 866 | 	  "condition": [
 867 | 		"defender-class(land)",
 868 | 		[
 869 | 		  "NOT",
 870 | 		  "flag(in-city)"
 871 | 		],
 872 | 		"flag(in-fortress)",
 873 | 		[
 874 | 		  "NOT",
 875 | 		  "flag(river)"
 876 | 		]
 877 | 	  ],
 878 | 	  "effect": [
 879 | 		{
 880 | 		  "type": "defender-strength-multiply",
 881 | 		  "value": "200"
 882 | 		}
 883 | 	  ]
 884 | 	},
 885 | 	{
 886 | 	  "id": "in-fortress-and-river",
 887 | 	  "label": "Terrain (both river and fortress)",
 888 | 	  "condition": [
 889 | 		"defender-class(land)",
 890 | 		[
 891 | 		  "NOT",
 892 | 		  "flag(in-city)"
 893 | 		],
 894 | 		"flag(in-fortress)",
 895 | 		"flag(river)"
 896 | 	  ],
 897 | 	  "effect": [
 898 | 		{
 899 | 		  "type": "defender-strength-multiply",
 900 | 		  "value": "250"
 901 | 		}
 902 | 	  ]
 903 | 	},
 904 | 	{
 905 | 	  "id": "land-unit-fortified-or-in-city",
 906 | 	  "label": "Land unit fortified or in city",
 907 | 	  "condition": [
 908 | 		"defender-class(land)",
 909 | 		[
 910 | 		  "OR",
 911 | 		  "flag(defender-fortified)",
 912 | 		  "flag(in-city)"
 913 | 		]
 914 | 	  ],
 915 | 	  "effect": [
 916 | 		{
 917 | 		  "type": "defender-strength-multiply",
 918 | 		  "value": "150"
 919 | 		}
 920 | 	  ]
 921 | 	},
 922 | 	{
 923 | 	  "id": "city-walls",
 924 | 	  "label": "City walls",
 925 | 	  "condition": [
 926 | 		"flag(in-city)",
 927 | 		"flag(city-walls)",
 928 | 		[
 929 | 		  "OR",
 930 | 		  "attacker-class(land)",
 931 | 		  "attacker-class(helicopter)"
 932 | 		],
 933 | 		[
 934 | 		  "NOT",
 935 | 		  "attacker-flag(igwall)"
 936 | 		]
 937 | 	  ],
 938 | 	  "effect": [
 939 | 		{
 940 | 		  "type": "defender-strength-multiply",
 941 | 		  "value": "300"
 942 | 		}
 943 | 	  ]
 944 | 	},
 945 | 	{
 946 | 	  "id": "city-coastal-defense",
 947 | 	  "label": "coastal defense",
 948 | 	  "condition": [
 949 | 		"flag(in-city)",
 950 | 		"flag(city-coastal-defense)",
 951 | 		"attacker-class(sea)"
 952 | 	  ],
 953 | 	  "effect": [
 954 | 		{
 955 | 		  "type": "defender-strength-multiply",
 956 | 		  "value": "200"
 957 | 		}
 958 | 	  ]
 959 | 	},
 960 | 	{
 961 | 	  "id": "city-sam-battery",
 962 | 	  "label": "SAM battery",
 963 | 	  "condition": [
 964 | 		"flag(in-city)",
 965 | 		"flag(city-sam-battery)",
 966 | 		"attacker-class(air)"
 967 | 	  ],
 968 | 	  "effect": [
 969 | 		{
 970 | 		  "type": "defender-strength-multiply",
 971 | 		  "value": "200"
 972 | 		}
 973 | 	  ]
 974 | 	},
 975 | 	{
 976 | 	  "id": "city-sdi-defense",
 977 | 	  "label": "SDI defense",
 978 | 	  "condition": [
 979 | 		"flag(in-city)",
 980 | 		"flag(city-sdi-defense)",
 981 | 		"attacker-class(missile)"
 982 | 	  ],
 983 | 	  "effect": [
 984 | 		{
 985 | 		  "type": "defender-strength-multiply",
 986 | 		  "value": "200"
 987 | 		}
 988 | 	  ]
 989 | 	},
 990 | 	{
 991 | 	  "id": "pikemen-attacked-by-mounted",
 992 | 	  "label": "Pikemen attacker by a mounted unit",
 993 | 	  "condition": [
 994 | 		"defender-flag(pikemen)",
 995 | 		"attacker-flag(mounted)"
 996 | 	  ],
 997 | 	  "effect": [
 998 | 		{
 999 | 		  "type": "defender-strength-multiply",
1000 | 		  "value": "200"
1001 | 		}
1002 | 	  ]
1003 | 	},
1004 | 	{
1005 | 	  "id": "aegis-cruiser-attacked-by-aircraft-missile-or-helicopter",
1006 | 	  "label": "AEGIS Cruiser attacker by an aircraft, missile or helicopter",
1007 | 	  "condition": [
1008 | 		"defender-flag(aegis)",
1009 | 		[
1010 | 		  "OR",
1011 | 		  "attacker-class(air)",
1012 | 		  "attacker-class(missile)",
1013 | 		  "attacker-class(helicopter)"
1014 | 		]
1015 | 	  ],
1016 | 	  "effect": [
1017 | 		{
1018 | 		  "type": "defender-strength-multiply",
1019 | 		  "value": "500"
1020 | 		}
1021 | 	  ]
1022 | 	},
1023 | 	{
1024 | 	  "id": "fighter-attacks-helicopter",
1025 | 	  "label": "Fighter attacks a helicopter",
1026 | 	  "condition": [
1027 | 		"attacker-flag(fighter)",
1028 | 		"defender-class(helicopter)"
1029 | 	  ],
1030 | 	  "effect": [
1031 | 		{
1032 | 		  "type": "defender-strength-multiply",
1033 | 		  "value": "50"
1034 | 		},
1035 | 		{
1036 | 		  "type": "defender-firepower-set",
1037 | 		  "value": "1"
1038 | 		}
1039 | 	  ]
1040 | 	},
1041 | 	{
1042 | 	  "id": "ship-in-city-attacked",
1043 | 	  "label": "Ship in city attacked",
1044 | 	  "condition": [
1045 | 		[
1046 | 		  "OR",
1047 | 		  "defender-class(sea)",
1048 | 		  "defender-class(trireme)"
1049 | 		],
1050 | 		"flag(in-city)"
1051 | 	  ],
1052 | 	  "effect": [
1053 | 		{
1054 | 		  "type": "attacker-firepower-multiply",
1055 | 		  "value": "200"
1056 | 		},
1057 | 		{
1058 | 		  "type": "defender-firepower-set",
1059 | 		  "value": "1"
1060 | 		}
1061 | 	  ]
1062 | 	},
1063 | 	{
1064 | 	  "id": "ship-attacks-land-unit",
1065 | 	  "label": "Ship attacks land unit",
1066 | 	  "condition": [
1067 | 		"attacker-class(sea)",
1068 | 		"defender-class(land)"
1069 | 	  ],
1070 | 	  "effect": [
1071 | 		{
1072 | 		  "type": "attacker-firepower-set",
1073 | 		  "value": "1"
1074 | 		},
1075 | 		{
1076 | 		  "type": "defender-firepower-set",
1077 | 		  "value": "1"
1078 | 		}
1079 | 	  ]
1080 | 	}
1081 |   ]
1082 | }
1083 | 


--------------------------------------------------------------------------------
/__tests__/data/sample_language.ts:
--------------------------------------------------------------------------------
  1 | import {Language, LexDefinition, GrammarDefinition, DEFAULT_LEX_STATE, LexCallback} from "../../src/def/language";
  2 | 
  3 | export const test_sample_grammar: GrammarDefinition = {
  4 | 	rules: [
  5 | 		{
  6 | 			ltoken: "S",
  7 | 			pattern: ["E"]
  8 | 		},
  9 | 		{
 10 | 			ltoken: "E",
 11 | 			pattern: ["LIST", "SEMICOLON"]
 12 | 		},
 13 | 		{
 14 | 			ltoken: "E",
 15 | 			pattern: ["HOGE"]
 16 | 		},
 17 | 		{
 18 | 			ltoken: "LIST",
 19 | 			pattern: ["T"]
 20 | 		},
 21 | 		{
 22 | 			ltoken: "LIST",
 23 | 			pattern: ["LIST", "SEPARATE", "T"]
 24 | 		},
 25 | 		{
 26 | 			ltoken: "T",
 27 | 			pattern: ["ATOM"]
 28 | 		},
 29 | 		{
 30 | 			ltoken: "T",
 31 | 			pattern: []
 32 | 		},
 33 | 		{
 34 | 			ltoken: "HOGE",
 35 | 			pattern: ["ID"]
 36 | 		}
 37 | 	],
 38 | 	start_symbol: "S"
 39 | };
 40 | 
 41 | export const test_sample_lex: LexDefinition = {
 42 | 	rules: [
 43 | 		{token: "ATOM", pattern: "x"},
 44 | 		{token: "ID", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/},
 45 | 		{token: "SEMICOLON", pattern: ";"},
 46 | 		{token: "SEPARATE", pattern: "|"},
 47 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
 48 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
 49 | 		{token: "INVALID", pattern: /./}
 50 | 	]
 51 | };
 52 | 
 53 | export const test_sample_language: Language = {
 54 | 	lex: test_sample_lex,
 55 | 	grammar: test_sample_grammar
 56 | };
 57 | 
 58 | export const test_empty_language: Language = {
 59 | 	lex: {rules: []},
 60 | 	grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"}
 61 | };
 62 | 
 63 | export const test_calc_grammar: GrammarDefinition = {
 64 | 	rules: [
 65 | 		{
 66 | 			ltoken: "EXP",
 67 | 			pattern: ["EXP", "PLUS", "TERM"],
 68 | 			callback: (c) => c[0] + c[2]
 69 | 		},
 70 | 		{
 71 | 			ltoken: "EXP",
 72 | 			pattern: ["TERM"],
 73 | 			callback: (c) => c[0]
 74 | 		},
 75 | 		{
 76 | 			ltoken: "TERM",
 77 | 			pattern: ["TERM", "ASTERISK", "ATOM"],
 78 | 			callback: (c) => c[0] * c[2]
 79 | 		},
 80 | 		{
 81 | 			ltoken: "TERM",
 82 | 			pattern: ["ATOM"],
 83 | 			callback: (c) => c[0]
 84 | 		},
 85 | 		{
 86 | 			ltoken: "ATOM",
 87 | 			pattern: ["DIGITS"],
 88 | 			callback: (c) => +(c[0])
 89 | 		},
 90 | 		{
 91 | 			ltoken: "ATOM",
 92 | 			pattern: ["LPAREN", "EXP", "RPAREN"],
 93 | 			callback: (c) => c[1]
 94 | 		}
 95 | 	],
 96 | 	start_symbol: "EXP"
 97 | };
 98 | 
 99 | export const test_calc_lex: LexDefinition = {
100 | 	rules: [
101 | 		{token: "DIGITS", pattern: /[1-9][0-9]*/},
102 | 		{token: "PLUS", pattern: "+"},
103 | 		{token: "ASTERISK", pattern: "*"},
104 | 		{token: "LPAREN", pattern: "("},
105 | 		{token: "RPAREN", pattern: ")"},
106 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
107 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
108 | 		{token: "INVALID", pattern: /./}
109 | 	]
110 | };
111 | 
112 | export const test_calc_language: Language = {
113 | 	lex: test_calc_lex,
114 | 	grammar: test_calc_grammar
115 | };
116 | 
117 | export const test_calc_language_raw_string = `DIGITS		/[1-9][0-9]*/
118 | PLUS		"+"
119 | ASTERISK	"*"
120 | LPAREN		 "("
121 | RPAREN		")"
122 | !ENDLINE	/(\\r\\n|\\r|\\n)+/
123 | !WHITESPACE	/[ \\f\\t\\v\\u00a0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\ufeff]+/
124 | INVALID		/./
125 | 
126 | $EXP : EXP PLUS TERM | TERM;
127 | TERM : TERM ASTERISK ATOM | ATOM;
128 | ATOM : DIGITS | LPAREN EXP RPAREN;
129 | `;
130 | 
131 | export const test_lexstate_lex: LexDefinition = {
132 | 	rules: [
133 | 		{token: "NUMBER", pattern: /0|[1-9][0-9]*/, states: ["in-parenthesis"]},
134 | 		{token: "ID", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/},
135 | 		{token: "ASTERISK", pattern: "*", states: ["super-in-parenthesis"]},
136 | 		{token: "PLUS", pattern: "+", states: [DEFAULT_LEX_STATE, "in-parenthesis"]},
137 | 		{token: "DOLLAR", pattern: "$", states: ["in-braces"]},
138 | 		{
139 | 			token: "LPAREN", pattern: "(",
140 | 			callback: (token, value, lex) => {
141 | 				lex.callState("in-parenthesis");
142 | 			}
143 | 		},
144 | 		{
145 | 			token: "RPAREN", pattern: ")", states: ["in-parenthesis"],
146 | 			callback: (token, value, lex) => {
147 | 				lex.returnState();
148 | 			}
149 | 		},
150 | 		{
151 | 			token: "LBRACE", pattern: "{",
152 | 			callback: (token, value, lex) => {
153 | 				lex.callState("in-braces");
154 | 			}
155 | 		},
156 | 		{
157 | 			token: "RBRACE", pattern: "}", states: ["in-braces"],
158 | 			callback: (token, value, lex) => {
159 | 				lex.returnState();
160 | 			}
161 | 		},
162 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
163 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
164 | 		{token: "INVALID", pattern: /./, states: [DEFAULT_LEX_STATE, "in-parenthesis"]}
165 | 	],
166 | 	states: [
167 | 		{label: "super-in-parenthesis"},
168 | 		{label: "in-parenthesis", inheritance: "super-in-parenthesis"},
169 | 		{label: "in-braces", inheritance: DEFAULT_LEX_STATE}
170 | 	]
171 | };
172 | 
173 | export const test_lexstate_language: Language = {
174 | 	lex: test_lexstate_lex,
175 | 	grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"}
176 | };
177 | 
178 | export const test_dynamic_lexrules_lex: LexDefinition = {
179 | 	rules: [
180 | 		{
181 | 			token: "LNEST", pattern: /%+{/,
182 | 			callback: ((): LexCallback => {
183 | 				let i = 0;
184 | 				return (value, token, lex) => {
185 | 					const label = i.toString();
186 | 					// lex.setState({label, is_exclusive: false});
187 | 					lex.callState(label);
188 | 					lex.addRule(label, {
189 | 						token: "RNEST", pattern: `}${"%".repeat(value.length - 1)}`, states: [label],
190 | 						callback: (v, t, l) => {
191 | 							l.returnState();
192 | 							l.removeRule(label);
193 | 							l.removeRule(`${label}-invalid`);
194 | 							// lex.removeState(label);
195 | 						}
196 | 					});
197 | 					lex.addRule(`${label}-invalid`, {
198 | 						token: "INVALID", pattern: /./, states: [label]
199 | 					});
200 | 					i++;
201 | 				};
202 | 			})()
203 | 		},
204 | 		{token: "INVALID", pattern: /./}
205 | 	]
206 | };
207 | 
208 | export const test_dynamic_lexrules_language: Language = {
209 | 	lex: test_dynamic_lexrules_lex,
210 | 	grammar: {rules: [{ltoken: "S", pattern: []}], start_symbol: "S"}
211 | };
212 | 


--------------------------------------------------------------------------------
/__tests__/data/tmp/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tatamo/lavriapg/534e2ab5922238b146adaae0c41720582c11ff25/__tests__/data/tmp/.gitkeep


--------------------------------------------------------------------------------
/__tests__/json_test.ts:
--------------------------------------------------------------------------------
 1 | import {language_parser} from "../src/precompiler/index";
 2 | import {ParserGenerator} from "../src/parsergenerator/index";
 3 | import {ParserFactory} from "../src/parser/index";
 4 | 
 5 | const input = require("fs").readFileSync("__tests__/data/json_language", "utf8");
 6 | describe("json parse test", () => {
 7 | 	const json_lang = language_parser.parse(input);
 8 | 	const pg = new ParserGenerator(json_lang);
 9 | 	const parser = ParserFactory.create(json_lang, pg.getParsingTable());
10 | 
11 | 	test("no conflict found", () => {
12 | 		expect(pg.getTableType()).toBe("LALR1");
13 | 	});
14 | 
15 | 	test("no error occured in parsing", () => {
16 | 		const json_input = require("fs").readFileSync("__tests__/data/json_sample.json", "utf8");
17 | 		expect(() => parser.parse(json_input)).not.toThrow();
18 | 	});
19 | });
20 | 


--------------------------------------------------------------------------------
/__tests__/language_parsing_test.ts:
--------------------------------------------------------------------------------
  1 | import {language_language, language_parser} from "../src/precompiler/ruleparser";
  2 | import {ParserGenerator} from "../src/parsergenerator/parsergenerator";
  3 | import {Language} from "../src";
  4 | import {Lexer} from "../src/lexer/lexer";
  5 | import {SYMBOL_EOF} from "../src/def/token";
  6 | 
  7 | describe("language parsing test", () => {
  8 | 	const input = require("fs").readFileSync("language", "utf8");
  9 | 	const removeCallback = (language: Language): Language => {
 10 | 		const lex = {...language.lex};
 11 | 		lex.rules = lex.rules.map(({token, pattern, states}) => ({token, pattern, states}));
 12 | 		const grammar = {...language.grammar};
 13 | 		grammar.rules = grammar.rules.map(({ltoken, pattern}) => ({ltoken, pattern}));
 14 | 		return {lex, grammar};
 15 | 	};
 16 | 
 17 | 	const language_language_without_callback = removeCallback(language_language);
 18 | 	const pg = new ParserGenerator(language_language);
 19 | 	test("valid parser", () => {
 20 | 		expect(pg.isConflicted()).toBeFalsy();
 21 | 	});
 22 | 	const parser = pg.getParser(); // language_parserと同一のものであることが期待される
 23 | 	test("parsing language file", () => {
 24 | 		expect(removeCallback(parser.parse(input))).toEqual(language_language_without_callback);
 25 | 	});
 26 | 	// languageファイルを読み取ってパーサを生成したい
 27 | 	test("language_parser", () => {
 28 | 		expect(removeCallback(language_parser.parse(input))).toEqual(language_language_without_callback);
 29 | 	});
 30 | });
 31 | 
 32 | describe("syntax functions test", () => {
 33 | 	const pg = new ParserGenerator(language_language);
 34 | 	const parser = pg.getParser();
 35 | 	test("lex-state", () => {
 36 | 		const input = `
 37 | A	/a/
 38 | <state1, state2>B	/b/
 39 | <default>B2	/b/
 40 | C	/c/
 41 | $S : A B2 C;
 42 | `;
 43 | 		expect(new Lexer(parser.parse(input)).exec("b")).toEqual([{token: "B2", value: "b"}, {token: SYMBOL_EOF, value: ""}]);
 44 | 		expect(parser.parse(input)).toMatchSnapshot();
 45 | 	});
 46 | 	test("#start", () => {
 47 | 		// #startが複数ある場合は一番下を採用
 48 | 		// TODO: 明示された仕様とするか、それとも複数の#startを許容しないようにするか
 49 | 		const input = `
 50 | #start <default>
 51 | #start <state1>
 52 | 
 53 | <default>A	/a/
 54 | <state1, state2>A2	/a/
 55 | B	/b/
 56 | $S : A B;
 57 | `;
 58 | 		expect(new Lexer(parser.parse(input)).exec("a")).toEqual([{token: "A2", value: "a"}, {token: SYMBOL_EOF, value: ""}]);
 59 | 		expect(() => new Lexer(parser.parse(input)).exec("b")).toThrow();
 60 | 		expect(parser.parse(input)).toMatchSnapshot();
 61 | 	});
 62 | 	test("#extend", () => {
 63 | 		const input = `
 64 | #start <state3>
 65 | #extend <state1, state2> <default>
 66 | #extend <state3><state2>
 67 | 
 68 | <state3>A	/a/
 69 | <state2>B	/b/
 70 | <default>C	/c/
 71 | $S : A B C;
 72 | `;
 73 | 		expect(new Lexer(parser.parse(input)).exec("abc")).toMatchSnapshot();
 74 | 		expect(parser.parse(input)).toMatchSnapshot();
 75 | 	});
 76 | 	test("callbacks", () => {
 77 | 		const input = `
 78 | A	"a" { callback_of_A(); }
 79 | B	/b/ { callback_of_B(); }
 80 | 
 81 | $S : T { callback_of_S(); };
 82 | T : A { callback_of_T_1(); } | E { callback_of_T_2(); } | { callback_of_T_3(); };
 83 | E : { callback_of_E(); } | B;
 84 | `;
 85 | 		const result = parser.parse(input);
 86 | 		expect(result).toMatchSnapshot();
 87 | 		// @ts-ignore
 88 | 		expect(result.grammar.rules.map((rule) => "callback" in rule ? rule.callback.toString() : undefined)).toMatchSnapshot();
 89 | 	});
 90 | 	test("ex-callbacks", () => {
 91 | 		const input = `
 92 | #lex_default { lex_default_callback(); }
 93 | #lex_end { lex_end_callback(); }
 94 | #lex_begin { lex_begin_callback(); }
 95 | A	"a"
 96 | 
 97 | #begin { grammar_begin_callback(); }
 98 | #end { grammar_end_callback(); }
 99 | #default { grammar_default_callback(); }
100 | $S : A;
101 | `;
102 | 		const result = parser.parse(input);
103 | 		expect(result).toMatchSnapshot();
104 | 		expect(result.lex.begin_callback.toString()).toMatchSnapshot();
105 | 		expect(result.lex.default_callback.toString()).toMatchSnapshot();
106 | 		expect(result.lex.end_callback.toString()).toMatchSnapshot();
107 | 		expect(result.grammar.begin_callback.toString()).toMatchSnapshot();
108 | 		expect(result.grammar.default_callback.toString()).toMatchSnapshot();
109 | 		expect(result.grammar.end_callback.toString()).toMatchSnapshot();
110 | 	});
111 | 	test("callback delimiters", () => {
112 | 		const input = `
113 | A	"a" {{ if(1+1===3){ foo(); } }}
114 | 
115 | $S : T %{ const s = {}; }%;
116 | T : E %%{ const t = "}%, }}%, }}%%, }%%%, }}%%%"; }%%;
117 | E : { const e = "}%"+"}}"; } | A;
118 | `;
119 | 		const result = parser.parse(input);
120 | 		expect(result).toMatchSnapshot();
121 | 		// @ts-ignore
122 | 		expect(result.grammar.rules.map((rule) => "callback" in rule ? rule.callback.toString() : undefined)).toMatchSnapshot();
123 | 	});
124 | });
125 | 


--------------------------------------------------------------------------------
/__tests__/lexer/__snapshots__/controller_test.ts.snap:
--------------------------------------------------------------------------------
  1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
  2 | 
  3 | exports[`begin/end callbacks test using variables 1`] = `
  4 | Array [
  5 |   Object {
  6 |     "token": "A",
  7 |     "value": "0",
  8 |   },
  9 |   Object {
 10 |     "token": "A",
 11 |     "value": "1",
 12 |   },
 13 |   Object {
 14 |     "token": "A",
 15 |     "value": "2",
 16 |   },
 17 |   Object {
 18 |     "token": "A",
 19 |     "value": "3",
 20 |   },
 21 |   Object {
 22 |     "token": Symbol(EOF),
 23 |     "value": "",
 24 |   },
 25 | ]
 26 | `;
 27 | 
 28 | exports[`begin/end callbacks test using variables 2`] = `
 29 | Array [
 30 |   Object {
 31 |     "token": "A",
 32 |     "value": "0",
 33 |   },
 34 |   Object {
 35 |     "token": "A",
 36 |     "value": "1",
 37 |   },
 38 |   Object {
 39 |     "token": "A",
 40 |     "value": "2",
 41 |   },
 42 |   Object {
 43 |     "token": "A",
 44 |     "value": "3",
 45 |   },
 46 |   Object {
 47 |     "token": "A",
 48 |     "value": "4",
 49 |   },
 50 |   Object {
 51 |     "token": Symbol(EOF),
 52 |     "value": "",
 53 |   },
 54 | ]
 55 | `;
 56 | 
 57 | exports[`dynamic lex rules test adding and removing rules 1`] = `
 58 | Array [
 59 |   Object {
 60 |     "token": "LNEST",
 61 |     "value": "%%{",
 62 |   },
 63 |   Object {
 64 |     "token": "INVALID",
 65 |     "value": "}",
 66 |   },
 67 |   Object {
 68 |     "token": "INVALID",
 69 |     "value": "%",
 70 |   },
 71 |   Object {
 72 |     "token": "RNEST",
 73 |     "value": "}%%",
 74 |   },
 75 |   Object {
 76 |     "token": "INVALID",
 77 |     "value": "}",
 78 |   },
 79 |   Object {
 80 |     "token": "INVALID",
 81 |     "value": "%",
 82 |   },
 83 |   Object {
 84 |     "token": "INVALID",
 85 |     "value": "%",
 86 |   },
 87 |   Object {
 88 |     "token": Symbol(EOF),
 89 |     "value": "",
 90 |   },
 91 | ]
 92 | `;
 93 | 
 94 | exports[`lex state test exclusive state 1`] = `
 95 | Array [
 96 |   Object {
 97 |     "token": "LPAREN",
 98 |     "value": "(",
 99 |   },
100 |   Object {
101 |     "token": "PLUS",
102 |     "value": "+",
103 |   },
104 |   Object {
105 |     "token": "INVALID",
106 |     "value": "a",
107 |   },
108 |   Object {
109 |     "token": "INVALID",
110 |     "value": "{",
111 |   },
112 |   Object {
113 |     "token": "ASTERISK",
114 |     "value": "*",
115 |   },
116 |   Object {
117 |     "token": "RPAREN",
118 |     "value": ")",
119 |   },
120 |   Object {
121 |     "token": Symbol(EOF),
122 |     "value": "",
123 |   },
124 | ]
125 | `;
126 | 
127 | exports[`lex state test nested states 1`] = `
128 | Array [
129 |   Object {
130 |     "token": "INVALID",
131 |     "value": "$",
132 |   },
133 |   Object {
134 |     "token": "LBRACE",
135 |     "value": "{",
136 |   },
137 |   Object {
138 |     "token": "DOLLAR",
139 |     "value": "$",
140 |   },
141 |   Object {
142 |     "token": "LPAREN",
143 |     "value": "(",
144 |   },
145 |   Object {
146 |     "token": "INVALID",
147 |     "value": "$",
148 |   },
149 |   Object {
150 |     "token": "NUMBER",
151 |     "value": "123",
152 |   },
153 |   Object {
154 |     "token": "RPAREN",
155 |     "value": ")",
156 |   },
157 |   Object {
158 |     "token": "DOLLAR",
159 |     "value": "$",
160 |   },
161 |   Object {
162 |     "token": "RBRACE",
163 |     "value": "}",
164 |   },
165 |   Object {
166 |     "token": Symbol(EOF),
167 |     "value": "",
168 |   },
169 | ]
170 | `;
171 | 
172 | exports[`lex state test non-exclusive state 1`] = `
173 | Array [
174 |   Object {
175 |     "token": "INVALID",
176 |     "value": "$",
177 |   },
178 |   Object {
179 |     "token": "LBRACE",
180 |     "value": "{",
181 |   },
182 |   Object {
183 |     "token": "DOLLAR",
184 |     "value": "$",
185 |   },
186 |   Object {
187 |     "token": "PLUS",
188 |     "value": "+",
189 |   },
190 |   Object {
191 |     "token": "ID",
192 |     "value": "a",
193 |   },
194 |   Object {
195 |     "token": "INVALID",
196 |     "value": "*",
197 |   },
198 |   Object {
199 |     "token": "RBRACE",
200 |     "value": "}",
201 |   },
202 |   Object {
203 |     "token": Symbol(EOF),
204 |     "value": "",
205 |   },
206 | ]
207 | `;
208 | 
209 | exports[`lex state test reset state after process 1`] = `
210 | Array [
211 |   Object {
212 |     "token": "LBRACE",
213 |     "value": "{",
214 |   },
215 |   Object {
216 |     "token": "LPAREN",
217 |     "value": "(",
218 |   },
219 |   Object {
220 |     "token": Symbol(EOF),
221 |     "value": "",
222 |   },
223 | ]
224 | `;
225 | 
226 | exports[`lex state test reset state after process 2`] = `
227 | Array [
228 |   Object {
229 |     "token": "INVALID",
230 |     "value": ")",
231 |   },
232 |   Object {
233 |     "token": "INVALID",
234 |     "value": "}",
235 |   },
236 |   Object {
237 |     "token": Symbol(EOF),
238 |     "value": "",
239 |   },
240 | ]
241 | `;
242 | 


--------------------------------------------------------------------------------
/__tests__/lexer/__snapshots__/lexer_test.ts.snap:
--------------------------------------------------------------------------------
  1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
  2 | 
  3 | exports[`Lexer test exec valid input 1`] = `
  4 | Array [
  5 |   Object {
  6 |     "token": "ID",
  7 |     "value": "xabc",
  8 |   },
  9 |   Object {
 10 |     "token": "SEMICOLON",
 11 |     "value": ";",
 12 |   },
 13 |   Object {
 14 |     "token": "ATOM",
 15 |     "value": "x",
 16 |   },
 17 |   Object {
 18 |     "token": "SEPARATE",
 19 |     "value": "|",
 20 |   },
 21 |   Object {
 22 |     "token": "INVALID",
 23 |     "value": "&",
 24 |   },
 25 |   Object {
 26 |     "token": "INVALID",
 27 |     "value": "0",
 28 |   },
 29 |   Object {
 30 |     "token": "ID",
 31 |     "value": "ax",
 32 |   },
 33 |   Object {
 34 |     "token": "ATOM",
 35 |     "value": "x",
 36 |   },
 37 |   Object {
 38 |     "token": "ID",
 39 |     "value": "z",
 40 |   },
 41 |   Object {
 42 |     "token": "SEMICOLON",
 43 |     "value": ";",
 44 |   },
 45 |   Object {
 46 |     "token": Symbol(EOF),
 47 |     "value": "",
 48 |   },
 49 | ]
 50 | `;
 51 | 
 52 | exports[`Lexer test longest match 1`] = `
 53 | Array [
 54 |   Object {
 55 |     "token": "PM",
 56 |     "value": "+-",
 57 |   },
 58 |   Object {
 59 |     "token": "PMA",
 60 |     "value": "+-*",
 61 |   },
 62 |   Object {
 63 |     "token": "ABCD",
 64 |     "value": "abcd",
 65 |   },
 66 |   Object {
 67 |     "token": Symbol(EOF),
 68 |     "value": "",
 69 |   },
 70 | ]
 71 | `;
 72 | 
 73 | exports[`Lexer test regexp flags 1`] = `
 74 | Array [
 75 |   Object {
 76 |     "token": "I",
 77 |     "value": "abc",
 78 |   },
 79 |   Object {
 80 |     "token": "M",
 81 |     "value": "x
 82 | yz",
 83 |   },
 84 |   Object {
 85 |     "token": "U",
 86 |     "value": "def",
 87 |   },
 88 |   Object {
 89 |     "token": "G",
 90 |     "value": "pqr",
 91 |   },
 92 |   Object {
 93 |     "token": "A",
 94 |     "value": "a
 95 | c",
 96 |   },
 97 |   Object {
 98 |     "token": Symbol(EOF),
 99 |     "value": "",
100 |   },
101 | ]
102 | `;
103 | 
104 | exports[`Lexer test rule priority 1`] = `
105 | Array [
106 |   Object {
107 |     "token": "PM",
108 |     "value": "+-",
109 |   },
110 |   Object {
111 |     "token": "PMA",
112 |     "value": "+-*",
113 |   },
114 |   Object {
115 |     "token": "ABCD2",
116 |     "value": "abcd",
117 |   },
118 |   Object {
119 |     "token": "XYZ",
120 |     "value": "xyz",
121 |   },
122 |   Object {
123 |     "token": "W",
124 |     "value": "w",
125 |   },
126 |   Object {
127 |     "token": Symbol(EOF),
128 |     "value": "",
129 |   },
130 | ]
131 | `;
132 | 
133 | exports[`Lexer test skip string pattern if the following is \\w 1`] = `
134 | Array [
135 |   Object {
136 |     "token": "REGEXP",
137 |     "value": "abc",
138 |   },
139 |   Object {
140 |     "token": "XYZ",
141 |     "value": "xyz",
142 |   },
143 |   Object {
144 |     "token": "ASTERISK",
145 |     "value": "*",
146 |   },
147 |   Object {
148 |     "token": "STR",
149 |     "value": "abc",
150 |   },
151 |   Object {
152 |     "token": "ASTERISK",
153 |     "value": "*",
154 |   },
155 |   Object {
156 |     "token": "XYZ",
157 |     "value": "xyz",
158 |   },
159 |   Object {
160 |     "token": "ASTERISK",
161 |     "value": "*",
162 |   },
163 |   Object {
164 |     "token": "REGEXP",
165 |     "value": "abc",
166 |   },
167 |   Object {
168 |     "token": "STR",
169 |     "value": "abc",
170 |   },
171 |   Object {
172 |     "token": Symbol(EOF),
173 |     "value": "",
174 |   },
175 | ]
176 | `;
177 | 


--------------------------------------------------------------------------------
/__tests__/lexer/controller_test.ts:
--------------------------------------------------------------------------------
 1 | import {test_dynamic_lexrules_language, test_lexstate_language} from "../data/sample_language";
 2 | import {SYMBOL_EOF} from "../../src/def/token";
 3 | import {Lexer} from "../../src/lexer/lexer";
 4 | 
 5 | describe("lex state test", () => {
 6 | 	test("nested states", () => {
 7 | 		const lexer = new Lexer(test_lexstate_language);
 8 | 		expect(lexer.exec("${$($123)$}")).toMatchSnapshot();
 9 | 	});
10 | 	test("rule of non-default state", () => {
11 | 		const lexer = new Lexer(test_lexstate_language);
12 | 		expect(lexer.exec("123")).not.toEqual([
13 | 			{token: "NUMBER", value: "123"},
14 | 			{token: SYMBOL_EOF, value: ""}
15 | 		]);
16 | 	});
17 | 	test("reset state after process", () => {
18 | 		const lexer = new Lexer(test_lexstate_language);
19 | 		expect(lexer.exec("{(")).toMatchSnapshot();
20 | 		expect(lexer.exec(")}")).toMatchSnapshot();
21 | 	});
22 | 	test("exclusive state", () => {
23 | 		const lexer = new Lexer(test_lexstate_language);
24 | 		expect(lexer.exec("(+a{*)")).toMatchSnapshot();
25 | 	});
26 | 	test("non-exclusive state", () => {
27 | 		const lexer = new Lexer(test_lexstate_language);
28 | 		expect(lexer.exec("${$+a*}")).toMatchSnapshot();
29 | 	});
30 | });
31 | 
32 | describe("dynamic lex rules test", () => {
33 | 	test("adding and removing rules", () => {
34 | 		const lexer = new Lexer(test_dynamic_lexrules_language);
35 | 		expect(lexer.exec("%%{}%}%%}%%")).toMatchSnapshot();
36 | 	});
37 | });
38 | 
39 | describe("begin/end callbacks test", () => {
40 | 	test("using variables", () => {
41 | 		let counter = 0;
42 | 		const lexer = new Lexer({
43 | 			grammar: {rules: [], start_symbol: ""}, lex: {
44 | 				rules: [{token: "A", pattern: /a/, callback: () => ["A", (counter++).toString()]}],
45 | 				begin_callback: () => {
46 | 					counter = 0;
47 | 				}
48 | 			}
49 | 		});
50 | 		expect(lexer.exec("aaaa")).toMatchSnapshot();
51 | 		expect(lexer.exec("aaaaa")).toMatchSnapshot();
52 | 	});
53 | });
54 | 


--------------------------------------------------------------------------------
/__tests__/lexer/lexer_test.ts:
--------------------------------------------------------------------------------
  1 | import {Lexer} from "../../src/lexer/lexer";
  2 | import {test_empty_language, test_sample_language} from "../data/sample_language";
  3 | import {SYMBOL_EOF} from "../../src/def/token";
  4 | import {Language} from "../../src/def/language";
  5 | 
  6 | describe("Lexer test", () => {
  7 | 	const empty_lang: Language = {lex: {rules: []}, grammar: {rules: [], start_symbol: ""}};
  8 | 	test("exec valid input", () => {
  9 | 		const lexer = new Lexer(test_sample_language);
 10 | 		expect(lexer.exec("xabc;x|&0ax x z;")).toMatchSnapshot();
 11 | 	});
 12 | 	test("exec invalid input", () => {
 13 | 		const lexer = new Lexer(test_empty_language);
 14 | 		expect(() => {
 15 | 			lexer.exec("xabc;x|&0ax x z;");
 16 | 		}).toThrow(/no pattern matched/);
 17 | 	});
 18 | 	test("exec no length input", () => {
 19 | 		const lexer = new Lexer(test_sample_language);
 20 | 		expect(lexer.exec("")).toEqual([
 21 | 			{token: SYMBOL_EOF, value: ""}
 22 | 		]);
 23 | 		const lexer2 = new Lexer(test_empty_language);
 24 | 		expect(lexer2.exec("")).toEqual([
 25 | 			{token: SYMBOL_EOF, value: ""}
 26 | 		]);
 27 | 	});
 28 | 	test("regexp flags", () => {
 29 | 		const lexer = new Lexer({
 30 | 			grammar: {rules: [], start_symbol: ""}, lex: {
 31 | 				rules: [
 32 | 					{token: "I", pattern: /AbC/i},
 33 | 					{token: "M", pattern: /x\nyz/m},
 34 | 					{token: "U", pattern: /\u{64}\u{65}\u{66}/u},
 35 | 					{token: "G", pattern: /pqr/g},
 36 | 					{token: "A", pattern: /\u{61}\nC/imugy}
 37 | 				]
 38 | 			}
 39 | 		});
 40 | 		expect(lexer.exec("abcx\nyzdefpqra\nc")).toMatchSnapshot();
 41 | 	});
 42 | 	test("skip string pattern if the following is \\w", () => {
 43 | 		const lexer = new Lexer({
 44 | 			grammar: {rules: [], start_symbol: ""}, lex: {
 45 | 				rules: [
 46 | 					{token: "STR", pattern: "abc"},
 47 | 					{token: "REGEXP", pattern: /abc/},
 48 | 					{token: "ASTERISK", pattern: "*"},
 49 | 					{token: "XYZ", pattern: "xyz"}
 50 | 				]
 51 | 			}
 52 | 		});
 53 | 		expect(lexer.exec("abcxyz*abc*xyz*abcabc")).toMatchSnapshot();
 54 | 	});
 55 | 	test("rule priority", () => {
 56 | 		const lexer = new Lexer({
 57 | 			grammar: {rules: [], start_symbol: ""}, lex: {
 58 | 				rules: [
 59 | 					{token: "PM", pattern: "+-"},
 60 | 					{token: "PMA", pattern: "+-*"},
 61 | 					{token: "ASTERISK", pattern: "*", priority: 1},
 62 | 					{token: "ABC", pattern: /abc/},
 63 | 					{token: "ABCD", pattern: /abcd/},
 64 | 					{token: "ABCD2", pattern: /abcd/, priority: 2},
 65 | 					{token: "D", pattern: /d/},
 66 | 					{token: "XYZ", pattern: /xyz/},
 67 | 					{token: "XYZW", pattern: /xyzw/, priority: -1},
 68 | 					{token: "W", pattern: /w/},
 69 | 					{token: null, pattern: " "}
 70 | 				]
 71 | 			}
 72 | 		});
 73 | 		expect(lexer.exec(" +-+-*abcd xyzw")).toMatchSnapshot();
 74 | 	});
 75 | 	test("longest match", () => {
 76 | 		const lexer = new Lexer({
 77 | 			grammar: {rules: [], start_symbol: ""}, lex: {
 78 | 				rules: [
 79 | 					{token: "PM", pattern: "+-"},
 80 | 					{token: "PMA", pattern: "+-*"},
 81 | 					{token: "ASTERISK", pattern: "*"},
 82 | 					{token: "ABC", pattern: /abc/},
 83 | 					{token: "ABCD", pattern: /abcd/},
 84 | 					{token: "ABCD2", pattern: /abcd/},
 85 | 					{token: "D", pattern: /d/},
 86 | 					{token: null, pattern: " "}
 87 | 				]
 88 | 			}
 89 | 		});
 90 | 		expect(lexer.exec(" +-+-*abcd ")).toMatchSnapshot();
 91 | 	});
 92 | 	test("callbacks", () => {
 93 | 		// 引数として与えられるLexControllerを使用した詳細なテストはcontroller_test.tsで
 94 | 		const lexer = new Lexer({
 95 | 			grammar: {rules: [], start_symbol: ""}, lex: {
 96 | 				rules: [
 97 | 					{token: "A", pattern: /a/},
 98 | 					{token: "B", pattern: /b/, callback: (value, token) => token},
 99 | 					{token: "C", pattern: /c/, callback: (value, token) => ({token, value: "2"})},
100 | 					{
101 | 						token: "D", pattern: /d/,
102 | 						callback: () => {
103 | 							return;
104 | 						}
105 | 					},
106 | 					{token: "E", pattern: /e/, callback: () => null},
107 | 					{token: null, pattern: " "}
108 | 				],
109 | 				default_callback: (value, token) => {
110 | 					return [token, "1"];
111 | 				}
112 | 			}
113 | 		});
114 | 		expect(lexer.exec("abc de")).toEqual([
115 | 			{token: "A", value: "1"},
116 | 			{token: "B", value: "b"},
117 | 			{token: "C", value: "2"},
118 | 			{token: "D", value: "d"},
119 | 			{token: SYMBOL_EOF, value: ""}
120 | 		]);
121 | 	});
122 | });
123 | 


--------------------------------------------------------------------------------
/__tests__/parser/parser_test.ts:
--------------------------------------------------------------------------------
 1 | import {Parser} from "../../src/parser/parser";
 2 | import {ParserFactory} from "../../src/parser/factory";
 3 | import {ParserGenerator} from "../../src/parsergenerator/parsergenerator";
 4 | import {test_calc_language} from "../data/sample_language";
 5 | 
 6 | describe("parser test", () => {
 7 | 	const parsingtable = new ParserGenerator(test_calc_language).getParsingTable();
 8 | 	const parser = ParserFactory.create(test_calc_language, new ParserGenerator(test_calc_language).getParsingTable());
 9 | 	test("parser factory", () => {
10 | 		expect(ParserFactory.create(test_calc_language, parsingtable)).toBeInstanceOf(Parser);
11 | 	});
12 | 	test("custom callback in grammar", () => {
13 | 		expect(parser.parse("2*(3+4)")).toBe(14);
14 | 	});
15 | 	/*
16 | 	test("getting calc language ast", () => {
17 | 		expect(parser.parse("1+1")).toEqual({
18 | 			type: "EXP", value: null, children:
19 | 				[
20 | 					{type: "EXP", value: null, children: [{type: "TERM", value: null, children: [{type: "ATOM", value: null, children: [{type: "DIGITS", value: "1", children: []}]}]}]},
21 | 					{type: "PLUS", value: "+", children: []},
22 | 					{type: "TERM", value: null, children: [{type: "ATOM", value: null, children: [{type: "DIGITS", value: "1", children: []}]}]}
23 | 				]
24 | 		});
25 | 	});
26 | 	*/
27 | 	test("invalid input", () => {
28 | 		expect(parser.parse("1zzz")).toEqual("1");
29 | 	});
30 | });
31 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/closureitem_test.ts:
--------------------------------------------------------------------------------
 1 | import {ClosureItem} from "../../src/parsergenerator/closureitem";
 2 | import {test_sample_language} from "../data/sample_language";
 3 | import {GrammarDB} from "../../src/index";
 4 | import {SYMBOL_EOF} from "../../src/def/token";
 5 | 
 6 | describe("ClosureItem test", () => {
 7 | 	const grammardb = new GrammarDB(test_sample_language);
 8 | 	describe("{S' -> . S [$]}", () => {
 9 | 		const ci = new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]);
10 | 		test("getter", () => {
11 | 			expect(ci.rule_id).toBe(-1);
12 | 			expect(ci.dot_index).toBe(0);
13 | 			expect(ci.lookaheads).toEqual([SYMBOL_EOF]);
14 | 		});
15 | 		test("ClosureItem Hash", () => {
16 | 			const id_eof = grammardb.getTokenId(SYMBOL_EOF);
17 | 			expect(ci.getLR0Hash()).toBe("-1,0");
18 | 			expect(ci.getLR1Hash()).toBe(`-1,0,[${id_eof}]`);
19 | 		});
20 | 		describe("ClosureItem equality", () => {
21 | 			test("compare itself", () => {
22 | 				expect(ci.isSameLR0(ci)).toBeTruthy();
23 | 				expect(ci.isSameLR1(ci)).toBeTruthy();
24 | 			});
25 | 			test("same ClosureItem", () => {
26 | 				const ci2 = new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]);
27 | 				expect(ci.isSameLR0(ci2)).toBeTruthy();
28 | 				expect(ci.isSameLR1(ci2)).toBeTruthy();
29 | 			});
30 | 			test("not same ClosureItem", () => {
31 | 				const ci2 = new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]);
32 | 				expect(ci.isSameLR0(ci2)).toBeFalsy();
33 | 				expect(ci.isSameLR1(ci2)).toBeFalsy();
34 | 			});
35 | 			test("not same lookahead item", () => {
36 | 				const ci2 = new ClosureItem(grammardb, -1, 0, ["ID"]);
37 | 				expect(ci.isSameLR0(ci2)).toBeTruthy();
38 | 				expect(ci.isSameLR1(ci2)).toBeFalsy();
39 | 			});
40 | 		});
41 | 		test("invalid lookahead item", () => {
42 | 			expect(()=>new ClosureItem(grammardb, -1, 0, ["X"])).toThrow(/invalid token/);
43 | 		});
44 | 	});
45 | 	describe("invalid ClosureItem", () => {
46 | 		test("invalid grammar id", () => {
47 | 			expect(()=>new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF])).toThrow();
48 | 		});
49 | 		test("invalid dot position", () => {
50 | 			expect(()=>new ClosureItem(grammardb, -1, -1, [SYMBOL_EOF])).toThrow();
51 | 		});
52 | 	});
53 | });
54 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/closureset_test.ts:
--------------------------------------------------------------------------------
  1 | import {GrammarDB} from "../../src/parsergenerator/grammardb";
  2 | import {test_empty_language, test_sample_language} from "../data/sample_language";
  3 | import {ClosureItem} from "../../src/parsergenerator/closureitem";
  4 | import {SYMBOL_EOF} from "../../src/def/token";
  5 | import {ClosureSet} from "../../src/parsergenerator/closureset";
  6 | 
  7 | describe("ClosureSet test", () => {
  8 | 	describe("Closure{S' -> . S [$]}", () => {
  9 | 		const grammardb = new GrammarDB(test_sample_language);
 10 | 		const cs = new ClosureSet(grammardb, [new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF])]);
 11 | 		/*
 12 | 		S' -> . S [$]
 13 | 		S -> . E [$]
 14 | 		E -> . LIST SEMICOLON [$]
 15 | 		E -> . HOGE [$]
 16 | 		LIST -> . T [SEMICOLON SEPARATE]
 17 | 		LIST > . LIST SEPARATE T [SEMICOLON SEPARATE]
 18 | 		T -> . ATOM [SEMICOLON SEPARATE]
 19 | 		T -> . [SEMICOLON SEPARATE]
 20 | 		HOGE -> . ID [$]
 21 | 		 */
 22 | 		const expanded = [
 23 | 			new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]),
 24 | 			new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]),
 25 | 			new ClosureItem(grammardb, 1, 0, [SYMBOL_EOF]),
 26 | 			new ClosureItem(grammardb, 2, 0, [SYMBOL_EOF]),
 27 | 			new ClosureItem(grammardb, 3, 0, ["SEMICOLON", "SEPARATE"]),
 28 | 			new ClosureItem(grammardb, 4, 0, ["SEPARATE", "SEMICOLON"]), // test changing lookaheads order
 29 | 			new ClosureItem(grammardb, 5, 0, ["SEMICOLON", "SEPARATE"]),
 30 | 			new ClosureItem(grammardb, 6, 0, ["SEMICOLON", "SEPARATE"]),
 31 | 			new ClosureItem(grammardb, 7, 0, [SYMBOL_EOF])
 32 | 		];
 33 | 		const expanded_shuffled = [
 34 | 			new ClosureItem(grammardb, 5, 0, ["SEMICOLON", "SEPARATE"]),
 35 | 			new ClosureItem(grammardb, 2, 0, [SYMBOL_EOF]),
 36 | 			new ClosureItem(grammardb, 1, 0, [SYMBOL_EOF]),
 37 | 			new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF]),
 38 | 			new ClosureItem(grammardb, 4, 0, ["SEPARATE", "SEMICOLON"]),
 39 | 			new ClosureItem(grammardb, 7, 0, [SYMBOL_EOF]),
 40 | 			new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]),
 41 | 			new ClosureItem(grammardb, 3, 0, ["SEMICOLON", "SEPARATE"]),
 42 | 			new ClosureItem(grammardb, 6, 0, ["SEPARATE", "SEMICOLON"])
 43 | 		];
 44 | 		test("ClosureSet size", () => {
 45 | 			expect(cs.size).toBe(9);
 46 | 		});
 47 | 		test("ClosureSet array", () => {
 48 | 			expect(cs.getArray()).toEqual(expect.arrayContaining(expanded));
 49 | 		});
 50 | 		describe("ClosureSet equality", () => {
 51 | 			test("compare itself", () => {
 52 | 				expect(cs.isSameLR0(cs)).toBeTruthy();
 53 | 				expect(cs.isSameLR1(cs)).toBeTruthy();
 54 | 			});
 55 | 			test("compare closureset that is given expanded items to constructor", () => {
 56 | 				expect(cs.isSameLR0(new ClosureSet(grammardb, expanded_shuffled))).toBeTruthy();
 57 | 				expect(cs.isSameLR1(new ClosureSet(grammardb, expanded_shuffled))).toBeTruthy();
 58 | 			});
 59 | 		});
 60 | 		test("ClosureSet#include", () => {
 61 | 			for (const ci of expanded) {
 62 | 				expect(cs.includes(ci)).toBeTruthy();
 63 | 			}
 64 | 		});
 65 | 		test("ClosureSet#include invalid inputs", () => {
 66 | 			expect(()=>cs.includes(new ClosureItem(grammardb, 0, 1, [SYMBOL_EOF]))).not.toThrow();
 67 | 			expect(()=>cs.includes(new ClosureItem(grammardb, 0, 2, [SYMBOL_EOF]))).toThrow(/out of range/);
 68 | 			expect(()=>cs.includes(new ClosureItem(grammardb, 0, -1, [SYMBOL_EOF]))).toThrow(/out of range/);
 69 | 			expect(()=>cs.includes(new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF]))).toThrow(/invalid grammar id/);
 70 | 			expect(()=>cs.includes(new ClosureItem(grammardb, -8, 0, [SYMBOL_EOF]))).toThrow(/invalid grammar id/);
 71 | 		});
 72 | 		describe("invalid ClosureSet", () => {
 73 | 			test("invalid grammar id", () => {
 74 | 				expect(()=>new ClosureSet(grammardb, [new ClosureItem(grammardb, -2, 0, [SYMBOL_EOF])])).toThrow(/invalid grammar id/);
 75 | 			});
 76 | 			test("invalid dot position", () => {
 77 | 				expect(()=>new ClosureSet(grammardb, [new ClosureItem(grammardb, 0, -1, [SYMBOL_EOF])])).toThrow(/out of range/);
 78 | 			});
 79 | 		});
 80 | 	});
 81 | 	describe("empty grammar", () => {
 82 | 		const grammardb = new GrammarDB(test_empty_language);
 83 | 		const cs = new ClosureSet(grammardb, [new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF])]);
 84 | 		const expanded = [
 85 | 			new ClosureItem(grammardb, -1, 0, [SYMBOL_EOF]),
 86 | 			new ClosureItem(grammardb, 0, 0, [SYMBOL_EOF])
 87 | 		];
 88 | 		test("ClosureSet size", () => {
 89 | 			expect(cs.size).toBe(2);
 90 | 		});
 91 | 		test("ClosureSet array", () => {
 92 | 			expect(cs.getArray()).toEqual(expect.arrayContaining(expanded));
 93 | 		});
 94 | 		test("ClosureSet#include", () => {
 95 | 			for (const ci of expanded) {
 96 | 				expect(cs.includes(ci)).toBeTruthy();
 97 | 			}
 98 | 		});
 99 | 	});
100 | });
101 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/firstset_test.ts:
--------------------------------------------------------------------------------
 1 | import {FirstSet} from "../../src/parsergenerator/firstset";
 2 | import {test_empty_language, test_sample_grammar} from "../data/sample_language";
 3 | import {SymbolDiscriminator} from "../../src/parsergenerator/symboldiscriminator";
 4 | 
 5 | describe("FirstSet test", () => {
 6 | 	const first = new FirstSet(test_sample_grammar, new SymbolDiscriminator(test_sample_grammar));
 7 | 	describe("valid one terminal and nonterminal symbol", () => {
 8 | 		test("First(S) is {SEMICOLON, SEPARATE, ATOM, ID}", () => {
 9 | 			for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) {
10 | 				expect(first.get("S")).toContain(symbol);
11 | 			}
12 | 			expect(first.get("S").size).toBe(4);
13 | 		});
14 | 		test("First(E) is {SEMICOLON, SEPARATE, ATOM, ID}", () => {
15 | 			for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) {
16 | 				expect(first.get("E")).toContain(symbol);
17 | 			}
18 | 			expect(first.get("E").size).toBe(4);
19 | 		});
20 | 		test("First([E]) is {SEMICOLON, SEPARATE, ATOM, ID}", () => {
21 | 			for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) {
22 | 				expect(first.get(["E"])).toContain(symbol);
23 | 			}
24 | 			expect(first.get(["E"]).size).toBe(4);
25 | 		});
26 | 		test("First(LIST) is {SEPARATE, ATOM}", () => {
27 | 			for (const symbol of ["SEPARATE", "ATOM"]) {
28 | 				expect(first.get("LIST")).toContain(symbol);
29 | 			}
30 | 			expect(first.get("LIST").size).toBe(2);
31 | 		});
32 | 		test("First(T) is {ATOM}", () => {
33 | 			expect(first.get("T")).toContain("ATOM");
34 | 			expect(first.get("T").size).toBe(1);
35 | 		});
36 | 		test("First(HOGE) is {ID}", () => {
37 | 			expect(first.get("HOGE")).toContain("ID");
38 | 			expect(first.get("HOGE").size).toBe(1);
39 | 		});
40 | 		test("First(ID) is {ID}", () => {
41 | 			expect(first.get("ID")).toContain("ID");
42 | 			expect(first.get("ID").size).toBe(1);
43 | 		});
44 | 	});
45 | 	describe("valid word (multiple terminal or nonterminal symbols)", () => {
46 | 		test("First(LIST ID) is {SEPARATE ATOM ID}", () => {
47 | 			for (const symbol of ["SEPARATE", "ATOM", "ID"]) {
48 | 				expect(first.get(["LIST", "ID"])).toContain(symbol);
49 | 			}
50 | 			expect(first.get(["LIST", "ID"]).size).toBe(3);
51 | 		});
52 | 		test("First(HOGE HOGE) is {ID}", () => {
53 | 			expect(first.get(["HOGE", "HOGE"])).toContain("ID");
54 | 			expect(first.get(["HOGE", "HOGE"]).size).toBe(1);
55 | 		});
56 | 	});
57 | 	describe("invalid input (contains neither terminal nor nonterminal symbols)", () => {
58 | 		test("First(FOO) throws error", () => {
59 | 			expect(() => first.get("FOO")).toThrow(/invalid token/);
60 | 		});
61 | 		test("First(INVALID) throws error", () => {
62 | 			expect(() => first.get("INVALID")).toThrow(/invalid token/);
63 | 		});
64 | 		test("First(INVALID INVALID) throws error", () => {
65 | 			expect(() => first.get(["INVALID", "INVALID"])).toThrow(/invalid token/);
66 | 		});
67 | 		test("First(INVALID S) throws error", () => {
68 | 			expect(() => first.get(["INVALID", "S"])).toThrow(/invalid token/);
69 | 		});
70 | 		test("First(S INVALID) throws error", () => {
71 | 			expect(() => first.get(["S", "INVALID"])).toThrow(/invalid token/);
72 | 		});
73 | 	});
74 | });
75 | 
76 | describe("FirstSet test(empty language)", () => {
77 | 	const first = new FirstSet(test_empty_language.grammar, new SymbolDiscriminator(test_empty_language.grammar));
78 | 	test("First(S) is {}", () => {
79 | 		expect(first.get("S").size).toBe(0);
80 | 	});
81 | });
82 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/nullableset_test.ts:
--------------------------------------------------------------------------------
 1 | import {NullableSet} from "../../src/parsergenerator/nullableset";
 2 | import {test_sample_grammar} from "../data/sample_language";
 3 | 
 4 | describe("NullableSet test", () => {
 5 | 	const nulls = new NullableSet(test_sample_grammar);
 6 | 	test("T is Nullable", () => {
 7 | 		expect(nulls.isNullable("T")).toBeTruthy();
 8 | 	});
 9 | 	test("LIST is Nullable", () => {
10 | 		expect(nulls.isNullable("LIST")).toBeTruthy();
11 | 	});
12 | 	test("HOGE is not Nullable", () => {
13 | 		expect(nulls.isNullable("HOGE")).toBeFalsy();
14 | 	});
15 | 	test("E is not Nullable", () => {
16 | 		expect(nulls.isNullable("E")).toBeFalsy();
17 | 	});
18 | 	test("S is not Nullable", () => {
19 | 		expect(nulls.isNullable("S")).toBeFalsy();
20 | 	});
21 | });
22 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/symboldiscriminator_test.ts:
--------------------------------------------------------------------------------
 1 | import {test_calc_grammar, test_empty_language, test_sample_grammar} from "../data/sample_language";
 2 | import {SymbolDiscriminator} from "../../src/parsergenerator/symboldiscriminator";
 3 | import {Token} from "../../src/def/token";
 4 | 
 5 | describe("SymbolDiscriminator test", () => {
 6 | 	describe("test sample language", () => {
 7 | 		const symbols = new SymbolDiscriminator(test_sample_grammar);
 8 | 		test("S is Nonterminal", () => {
 9 | 			expect(symbols.isNonterminalSymbol("S")).toBeTruthy();
10 | 			expect(symbols.isTerminalSymbol("S")).toBeFalsy();
11 | 		});
12 | 		test("E is Nonterminal", () => {
13 | 			expect(symbols.isNonterminalSymbol("E")).toBeTruthy();
14 | 			expect(symbols.isTerminalSymbol("E")).toBeFalsy();
15 | 		});
16 | 		test("LIST is Nonterminal", () => {
17 | 			expect(symbols.isNonterminalSymbol("LIST")).toBeTruthy();
18 | 			expect(symbols.isTerminalSymbol("LIST")).toBeFalsy();
19 | 		});
20 | 		test("T is Nonterminal", () => {
21 | 			expect(symbols.isNonterminalSymbol("T")).toBeTruthy();
22 | 			expect(symbols.isTerminalSymbol("T")).toBeFalsy();
23 | 		});
24 | 		test("HOGE is Nonterminal", () => {
25 | 			expect(symbols.isNonterminalSymbol("HOGE")).toBeTruthy();
26 | 			expect(symbols.isTerminalSymbol("HOGE")).toBeFalsy();
27 | 		});
28 | 		test("SEMICOLON is Terminal", () => {
29 | 			expect(symbols.isNonterminalSymbol("SEMICOLON")).toBeFalsy();
30 | 			expect(symbols.isTerminalSymbol("SEMICOLON")).toBeTruthy();
31 | 		});
32 | 		test("SEPARATE is Terminal", () => {
33 | 			expect(symbols.isNonterminalSymbol("SEPARATE")).toBeFalsy();
34 | 			expect(symbols.isTerminalSymbol("SEPARATE")).toBeTruthy();
35 | 		});
36 | 		test("ATOM is Terminal", () => {
37 | 			expect(symbols.isNonterminalSymbol("ATOM")).toBeFalsy();
38 | 			expect(symbols.isTerminalSymbol("ATOM")).toBeTruthy();
39 | 		});
40 | 		test("ID is Terminal", () => {
41 | 			expect(symbols.isNonterminalSymbol("ID")).toBeFalsy();
42 | 			expect(symbols.isTerminalSymbol("ID")).toBeTruthy();
43 | 		});
44 | 		test("INVALID (not appear in grammar) is neither Nonterminal nor Terminal", () => {
45 | 			expect(symbols.isNonterminalSymbol("INVALID")).toBeFalsy();
46 | 			expect(symbols.isTerminalSymbol("INVALID")).toBeFalsy();
47 | 		});
48 | 		test("Check nonterminal symbols set", () => {
49 | 			const nt: Set<Token> = symbols.getNonterminalSymbols();
50 | 			for (const symbol of ["S", "E", "LIST", "T", "HOGE"]) {
51 | 				expect(nt).toContain(symbol);
52 | 			}
53 | 			expect(nt.size).toBe(5);
54 | 		});
55 | 		test("Check terminal symbols set", () => {
56 | 			const t: Set<Token> = symbols.getTerminalSymbols();
57 | 			for (const symbol of ["SEMICOLON", "SEPARATE", "ATOM", "ID"]) {
58 | 				expect(t).toContain(symbol);
59 | 			}
60 | 			expect(t.size).toBe(4);
61 | 		});
62 | 	});
63 | 	describe("test sample language", () => {
64 | 		const symbols = new SymbolDiscriminator(test_calc_grammar);
65 | 		test("Check nonterminal symbols set", () => {
66 | 			const nt: Set<Token> = symbols.getNonterminalSymbols();
67 | 			for (const symbol of ["EXP", "TERM", "ATOM"]) {
68 | 				expect(nt).toContain(symbol);
69 | 			}
70 | 			expect(nt.size).toBe(3);
71 | 		});
72 | 		test("Check terminal symbols set", () => {
73 | 			const t: Set<Token> = symbols.getTerminalSymbols();
74 | 			for (const symbol of ["PLUS", "ASTERISK", "DIGITS", "LPAREN", "RPAREN"]) {
75 | 				expect(t).toContain(symbol);
76 | 			}
77 | 			expect(t.size).toBe(5);
78 | 		});
79 | 	});
80 | 	describe("test empty language", () => {
81 | 		const symbols = new SymbolDiscriminator(test_empty_language.grammar);
82 | 		test("Check nonterminal symbols set", () => {
83 | 			const nt: Set<Token> = symbols.getNonterminalSymbols();
84 | 			expect(nt).toContain("S");
85 | 			expect(nt.size).toBe(1);
86 | 		});
87 | 		test("Check terminal symbols set", () => {
88 | 			const t: Set<Token> = symbols.getTerminalSymbols();
89 | 			expect(t.size).toBe(0);
90 | 		});
91 | 	});
92 | });
93 | 


--------------------------------------------------------------------------------
/__tests__/parsergenerator/syntaxdb_test.ts:
--------------------------------------------------------------------------------
 1 | import {test_sample_language} from "../data/sample_language";
 2 | import {GrammarDB} from "../../src/parsergenerator/grammardb";
 3 | import {SYMBOL_SYNTAX} from "../../src/def/token";
 4 | 
 5 | describe("GrammarDB test", () => {
 6 | 	const grammardb = new GrammarDB(test_sample_language);
 7 | 
 8 | 	describe("findRules test", () => {
 9 | 		test("get rules of E", () => {
10 | 			expect(grammardb.findRules("E")).toEqual([
11 | 				{id: 1, rule: {ltoken: "E", pattern: ["LIST", "SEMICOLON"]}},
12 | 				{id: 2, rule: {ltoken: "E", pattern: ["HOGE"]}}
13 | 			]);
14 | 		});
15 | 		test("get a rule of HOGE", () => {
16 | 			expect(grammardb.findRules("HOGE")).toEqual([
17 | 				{id: 7, rule: {ltoken: "HOGE", pattern: ["ID"]}}
18 | 			]);
19 | 		});
20 | 	});
21 | 	describe("getRuleById test", () => {
22 | 		test("rule of grammar 1 is: E -> LIST SEMICOLON", () => {
23 | 			expect(grammardb.getRuleById(1)).toEqual({ltoken: "E", pattern: ["LIST", "SEMICOLON"]});
24 | 		});
25 | 		test("rule of grammar -1 is: S' -> S", () => {
26 | 			expect(grammardb.getRuleById(-1)).toEqual({ltoken: SYMBOL_SYNTAX, pattern: ["S"]});
27 | 		});
28 | 		test("throw error by calling rule of grammar -2", () => {
29 | 			expect(() => grammardb.getRuleById(-2)).toThrow(/out of range/);
30 | 		});
31 | 		test("no error occurs in rule of grammar 7", () => {
32 | 			expect(() => grammardb.getRuleById(7)).not.toThrow();
33 | 		});
34 | 		test("throw error by calling rule of grammar 8", () => {
35 | 			expect(() => grammardb.getRuleById(8)).toThrow(/out of range/);
36 | 		});
37 | 	});
38 | });
39 | 
40 | 


--------------------------------------------------------------------------------
/__tests__/precompiler/precompiler_test.ts:
--------------------------------------------------------------------------------
 1 | import {PreCompiler} from "../../src/precompiler/precompiler";
 2 | import {test_calc_language_raw_string} from "../data/sample_language";
 3 | import * as fs from "fs";
 4 | 
 5 | describe("precompiler test", () => {
 6 | 	const precompiler = new PreCompiler("../../../src");
 7 | 	const source = precompiler.exec(test_calc_language_raw_string);
 8 | 	fs.writeFileSync("./__tests__/data/tmp/precompiler_result.ts", source);
 9 | 	const p = require("../data/tmp/precompiler_result.ts");
10 | 	test("parse \"1+1\" by using compiled parser", () => {
11 | 		expect(() => p.parser.parse("1+1")).not.toThrow();
12 | 	});
13 | 	// まだ定義ファイルにアクションを定義できないのでむり
14 | 	/*
15 | 	test("parse \"1+1\" equals to 2 by using compiled parser and custom callback controller", () => {
16 | 		expect(p.parser.parse("1+1")).toBe(2);
17 | 	});
18 | 	*/
19 | 	fs.unlinkSync("./__tests__/data/tmp/precompiler_result.ts");
20 | });
21 | 


--------------------------------------------------------------------------------
/__tests__/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"compilerOptions": {
 3 | 		"strictNullChecks": true,
 4 | 		"noImplicitAny": true,
 5 | 		"target": "es6",
 6 | 		"module": "CommonJS",
 7 | 		"outDir": "dist/",
 8 | 		"declaration": true,
 9 | 		"noEmitOnError": true
10 | 	}
11 | }


--------------------------------------------------------------------------------
/gulpfile.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | const gulp = require("gulp");
 3 | const merge2 = require("merge2");
 4 | const sourcemaps = require("gulp-sourcemaps");
 5 | const typescript = require("gulp-typescript");
 6 | 
 7 | const tslint = require("gulp-tslint");
 8 | 
 9 | // use tsconfig.json
10 | const tsProject = typescript.createProject("tsconfig.json");
11 | gulp.task("tsc", () => {
12 | 	let tsr = gulp.src("src/**/*.ts")
13 | 		.pipe(tsProject());
14 | 	return merge2([
15 | 		tsr.dts.pipe(gulp.dest("dist/")),
16 | 		tsr.js.pipe(sourcemaps.write()).pipe(gulp.dest("dist"))
17 | 	]);
18 | });
19 | 
20 | gulp.task("watch-tsc", gulp.task("tsc"), () => {
21 | 	gulp.watch("src/**/*.ts", gulp.task("tsc"));
22 | });
23 | 
24 | gulp.task("tslint", () => {
25 | 	return gulp.src("src/**/*.ts")
26 | 		.pipe(tslint({
27 | 			formatter: "verbose"
28 | 		}))
29 | 		.pipe(tslint.report());
30 | });
31 | 
32 | gulp.task("watch-tslint", gulp.task("tslint"), () => {
33 | 	gulp.watch("src/**/*.ts", gulp.task("tslint"));
34 | });
35 | 
36 | gulp.task("default", gulp.series("tsc", "tslint"));
37 | gulp.task("watch", gulp.series("watch-tsc", "watch-tslint"));
38 | 


--------------------------------------------------------------------------------
/json_language:
--------------------------------------------------------------------------------
 1 | true        "true"
 2 | false       "false"
 3 | null        "null"
 4 | lbrace      "{"
 5 | rbrace      "}"
 6 | lbracket    "["
 7 | rbracket    "]"
 8 | colon       ":"
 9 | comma       ","
10 | digit1_9    /[1-9]/
11 | digit0      /0/
12 | minus       "-"
13 | period      "."
14 | string      /".*?"/ %{ return {token: "string", value: value.slice(1, -1)}; }%
15 | !           /(\r\n|\r|\n)+/
16 | !           /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/
17 | invalid     /./
18 | 
19 | #default { return children[0] }
20 | 
21 | digit :     digit1_9 | digit0;
22 | digits :    digit |
23 |             digit digits { return children[0] + children[1] };
24 | int :       digit |
25 |             digit1_9 digits { return children[0] + children[1] } |
26 |             minus digit { return children[0] + children[1] } |
27 |             minus digit1_9 digits { return children[0] + children[1] + children[2] };
28 | frac :      period digits { return children[0] + children[1] };
29 | number :    int { return +children[0] } |
30 |             int frac { return +(children[0] + children[1]) };
31 | 
32 | object :    lbrace rbrace %{ return {} }% |
33 |             lbrace members rbrace %{ return children[1] }%;
34 | members :   pair { return [children[0]] } |
35 |             pair comma members { return [children[0], ...children[2]] };
36 | pair :      string colon value %{ return {key: children[0], ...children[2]} }%;
37 | array :     lbracket rbracket { return [] } |
38 |             lbracket elements rbracket { return children[1] };
39 | elements :  value { return [children[0]] } |
40 |             value comma elements { return [children[0], ...children[2]] };
41 | $value :
42 |   string    %{ return {type: "string", value: children[0]} }% |
43 |   number    %{ return {type: "number", value: children[0]} }% |
44 |   object    %{ return {type: "object", value: children[0]} }% |
45 |   array     %{ return {type: "array", value: children[0]} }% |
46 |   true      %{ return {type: "boolean", value: true} }% |
47 |   false     %{ return {type: "boolean", value: false} }% |
48 |   null      %{ return {type: "null", value: null} }%;
49 | 


--------------------------------------------------------------------------------
/language:
--------------------------------------------------------------------------------
  1 | EXCLAMATION   "!"
  2 | VBAR          "|"
  3 | DOLLAR        "$"
  4 | COLON         ":"
  5 | SEMICOLON     ";"
  6 | LT            "<"
  7 | GT            ">"
  8 | COMMA         ","
  9 | LEX_BEGIN     "#lex_begin"
 10 | LEX_END       "#lex_end"
 11 | LEX_DEFAULT   "#lex_default"
 12 | START         "#start"
 13 | EXTEND        "#extend"
 14 | BEGIN         "#begin"
 15 | END           "#end"
 16 | DEFAULT       "#default"
 17 | LABEL         /[a-zA-Z_][a-zA-Z0-9_]*/
 18 | REGEXP        /\/.*\/[gimuy]*/ {
 19 |   const match = /\/(.*)\/([gimuy]*)/.exec(value);
 20 |   return ["REGEXP", new RegExp(match[1], match[2])];
 21 | }
 22 | STRING        /".*"/ { return ["STRING", value.slice(1, -1)] }
 23 | STRING        /'.*'/ { return ["STRING", value.slice(1, -1)] }
 24 | START_BLOCK   /%*{+/ %%{
 25 |   const match = /(%*)({+)/.exec(value);
 26 |   const end_delimiter = "}".repeat(match[2].length) + match[1];
 27 |   lex.callState("callback");
 28 |   lex.addRule("body_block", {
 29 |     token: "BODY_BLOCK",
 30 |     pattern: new RegExp(`(?:.|\\s)*?(?<!})(?=${end_delimiter})(?!${end_delimiter}%+)(?!${end_delimiter}}+)`),
 31 |     states: ["callback"]
 32 |   });
 33 |   lex.addRule("end_block", {
 34 |     token: "END_BLOCK",
 35 |     pattern: end_delimiter,
 36 |     states: ["callback"],
 37 |     callback: (value, token, lex) => {
 38 |       lex.returnState();
 39 |       lex.removeRule("body_block");
 40 |       lex.removeRule("end_block");
 41 |     }
 42 |   });
 43 | }%%
 44 | !ENDLINE      /(\r\n|\r|\n)+/
 45 | !WHITESPACE   /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/
 46 | INVALID       /./
 47 | 
 48 | 
 49 | $LANGUAGE
 50 |   : LEX_OPTIONS LEX EX_CALLBACKS GRAMMAR %{
 51 |     let start_symbol = children[3].start_symbol;
 52 |     // 開始記号の指定がない場合、最初の規則に設定
 53 |     if (start_symbol === null) {
 54 |       if (children[3].sect.length > 0) start_symbol = children[3].sect[0].ltoken;
 55 |       else start_symbol = "";
 56 |     }
 57 |     const lex = {rules: children[1]};
 58 |     if (children[0].callbacks !== undefined) {
 59 |       for (const callback of children[0].callbacks) {
 60 |         switch (callback.type) {
 61 |           case "#lex_begin":
 62 |             lex.begin_callback = callback.callback;
 63 |             break;
 64 |           case "#lex_end":
 65 |             lex.end_callback = callback.callback;
 66 |             break;
 67 |           case "#lex_default":
 68 |             lex.default_callback = callback.callback;
 69 |             break;
 70 |           }
 71 |       }
 72 |     }
 73 |     if (children[0].start_state !== undefined) {
 74 |         lex.start_state = children[0].start_state;
 75 |     }
 76 |     if (children[0].states.length > 0) {
 77 |         lex.states = children[0].states;
 78 |     }
 79 |     const grammar = {rules: children[3].grammar, start_symbol};
 80 |     if (children[2] !== undefined) {
 81 |         for (const callback of children[2]) {
 82 |             switch (callback.type) {
 83 |                 case "#begin":
 84 |                     grammar.begin_callback = callback.callback;
 85 |                     break;
 86 |                 case "#end":
 87 |                     grammar.end_callback = callback.callback;
 88 |                     break;
 89 |                 case "#default":
 90 |                     grammar.default_callback = callback.callback;
 91 |                     break;
 92 |             }
 93 |         }
 94 |     }
 95 |     return {lex, grammar};
 96 |   }%
 97 |   | LEX_OPTIONS LEX GRAMMAR %{
 98 |     let start_symbol = children[2].start_symbol;
 99 |     // 開始記号の指定がない場合、最初の規則に設定
100 |     if (start_symbol === null) {
101 |       if (children[2].sect.length > 0) start_symbol = children[2].sect[0].ltoken;
102 |       else start_symbol = "";
103 |     }
104 |     const lex = {rules: children[1]};
105 |     if (children[0].callbacks !== undefined) {
106 |       for (const callback of children[0].callbacks) {
107 |         switch (callback.type) {
108 |           case "#lex_begin":
109 |             lex.begin_callback = callback.callback;
110 |             break;
111 |           case "#lex_end":
112 |             lex.end_callback = callback.callback;
113 |             break;
114 |           case "#lex_default":
115 |             lex.default_callback = callback.callback;
116 |             break;
117 |         }
118 |       }
119 |     }
120 |     if (children[0].start_state !== undefined) {
121 |       lex.start_state = children[0].start_state;
122 |     }
123 |     if (children[0].states.length > 0) {
124 |       lex.states = children[0].states;
125 |     }
126 |     return {lex, grammar: {rules: children[2].grammar, start_symbol: start_symbol}};
127 | }%;
128 | 
129 | LEX_OPTIONS
130 |   : OPTIONAL_LEX_EX_CALLBACKS LEX_STATES %{
131 |     const states = [];
132 |     const states_set = new Set();
133 |     for (const inherit of children[1].inheritance) {
134 |       for (const sub_state of inherit.sub) {
135 |         if (states_set.has(inherit.sub)) {
136 |           // 既に登録されている場合、一つのstateが複数のstateを継承することはできない
137 |           continue;
138 |         }
139 |         states.push({label: sub_state, inheritance: inherit.base});
140 |         states_set.add(sub_state);
141 |       }
142 |     }
143 |     return {callbacks: children[0], start_state: children[1].start_state, states};
144 |   }%;
145 | LEX_STATES
146 |   : LEX_STATES LEXSTATE_DEFINITIONS %{
147 |     if (children[1].type === "#start") {
148 |       children[0].start_state = children[1].value;
149 |     }
150 |     else if (children[1].type === "#extend") {
151 |       children[0].inheritance.push(children[1].value);
152 |     }
153 |     return children[0];
154 |   }%
155 |   | %{ return {start_state: undefined, inheritance: []}; }%;
156 | LEXSTATE_DEFINITIONS
157 |   : STARTSTATE %{ return {type: "#start", value: children[0]}; }%
158 |   | STATE_EXTEND %{ return {type: "#extend", value: children[0]}; }%;
159 | STARTSTATE : START LEXSTATE { return children[0]; };
160 | STATE_EXTEND : EXTEND MULTIPLE_LEXSTATE LEXSTATE %{ return {sub: children[1], base: children[2]}; }%;
161 | 
162 | OPTIONAL_LEX_EX_CALLBACKS : LEX_EX_CALLBACKS | ;
163 | LEX_EX_CALLBACKS
164 |   : LEX_EX_CALLBACKS LEX_EX_CALLBACK { return children[0].concat([children[1]]); }
165 |   | LEX_EX_CALLBACK { return [children[0]]; } ;
166 | LEX_EX_CALLBACK : LEX_EX_CALLBACK_LABEL BLOCK %{ return {type: children[0], callback: makeLexCallback(children[1])}; }%;
167 | LEX_EX_CALLBACK_LABEL : LEX_BEGIN | LEX_END | LEX_DEFAULT;
168 | 
169 | LEX
170 |   : LEX LEXSECT { return children[0].concat([children[1]]); }
171 |   | LEXSECT { return [children[0]]; };
172 | LEXSECT
173 |   : MULTIPLE_LEXSTATE LEXLABEL LEXDEF LEXCALLBACK %{
174 |     return children[3] === undefined ?
175 |       {token: children[1], pattern: children[2], states: children[0]} :
176 |       {token: children[1], pattern: children[2], states: children[0], callback: new Function("value", "token", "lex", children[3])};
177 |   }%
178 |   | LEXLABEL LEXDEF LEXCALLBACK %{
179 |     return children[2] === undefined ?
180 |       {token: children[0], pattern: children[1]} :
181 |       {token: children[0], pattern: children[1], callback: new Function("value", "token", "lex", children[2])};
182 |   }%;
183 | LEXLABEL
184 |   : LABEL
185 |   | EXCLAMATION { return null; }
186 |   | EXCLAMATION LABEL {return null; };
187 | LEXDEF : STRING | REGEXP;
188 | 
189 | MULTIPLE_LEXSTATE : LT LEXSTATE_LIST GT { return children[1]; };
190 | LEXSTATE_LIST
191 |   : LABEL COMMA LEXSTATE_LIST { return [children[0]].concat(children[2]); }
192 |   | LABEL { return [children[0]]; };
193 | LEXSTATE : LT LABEL GT { return children[1]; };
194 | LEXCALLBACK : BLOCK | ;
195 | 
196 | EX_CALLBACKS
197 |   : EX_CALLBACKS EX_CALLBACK { return children[0].concat([children[1]]); }
198 |   | EX_CALLBACK { return [children[0]]; };
199 | EX_CALLBACK : EX_CALLBACK_LABEL BLOCK %{ return {type: children[0], callback: new Function("children", "token", "lexer", children[1])}; }%;
200 | EX_CALLBACK_LABEL : BEGIN | END | DEFAULT;
201 | 
202 | GRAMMAR : RULES;
203 | RULES
204 |   : SECT RULES %{
205 |     let start_symbol = children[1].start_symbol;
206 |     if (children[0].start_symbol !== null) {
207 |       start_symbol = children[0].start_symbol;
208 |     }
209 |     return {
210 |       start_symbol,
211 |       grammar: children[0].sect.concat(children[1].grammar)
212 |     };
213 |   }%
214 |   | SECT %{
215 |     let start_symbol = null;
216 |     if (children[0].start_symbol !== null) {
217 |       start_symbol = children[0].start_symbol;
218 |     }
219 |     return {
220 |       start_symbol,
221 |       grammar: children[0].sect
222 |     };
223 |   }%;
224 | SECT : SECTLABEL COLON DEF SEMICOLON %{
225 |   const result = [];
226 |   for (const def of children[2]) {
227 |     result.push({ltoken: children[0].label, ...def});
228 |   }
229 |   return {start_symbol: children[0].start_symbol, sect: result};
230 | }%;
231 | SECTLABEL
232 |   : LABEL %{ return {start_symbol: null, label: children[0]}; }%
233 |   | DOLLAR LABEL %{ return {start_symbol: children[1], label: children[1]}; }%;
234 | DEF
235 |   : PATTERN CALLBACK VBAR DEF %{ return [children[1] === null ? {pattern: children[0]} : {pattern: children[0], callback: new Function("children", "token", "lexer", children[1])}].concat(children[3]); }%
236 |   | PATTERN CALLBACK %{ return [children[1] === null ? {pattern: children[0]} : {pattern: children[0], callback: new Function("children", "token", "lexer", children[1])}]; }%;
237 | PATTERN
238 |   : SYMBOLLIST
239 |   | { return []; };
240 | SYMBOLLIST
241 |   : LABEL SYMBOLLIST { return [children[0]].concat(children[1]); }
242 |   | LABEL { return [children[0]]; };
243 | CALLBACK : BLOCK | { return null; };
244 | 
245 | BLOCK : START_BLOCK BODY_BLOCK END_BLOCK { return children[1]; };
246 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "lavriapg",
 3 | 	"version": "0.4.4",
 4 | 	"description": "lalr(1) parser generator written in TypeScript",
 5 | 	"keywords": [
 6 | 		"parser",
 7 | 		"generator",
 8 | 		"lexer",
 9 | 		"parsing",
10 | 		"compiler"
11 | 	],
12 | 	"main": "dist/index.js",
13 | 	"types": "dist/index.d.ts",
14 | 	"engines": {
15 | 		"node": ">= 9.2.0"
16 | 	},
17 | 	"scripts": {
18 | 		"test": "jest",
19 | 		"test-w": "jest --watchAll",
20 | 		"build": "gulp",
21 | 		"watch": "gulp watch",
22 | 		"build-doc": "typedoc --mode file --out docs/ --tsconfig tsconfig.json ./src"
23 | 	},
24 | 	"repository": {
25 | 		"type": "git",
26 | 		"url": "https://github.com/Tatamo/lavriapg"
27 | 	},
28 | 	"author": "Tatamo",
29 | 	"license": "BSD-3-Clause",
30 | 	"devDependencies": {
31 | 		"@types/jest": "^22.2.2",
32 | 		"@types/node": "^9.6.1",
33 | 		"gulp": "^4.0.2",
34 | 		"gulp-sourcemaps": "^2.6.4",
35 | 		"gulp-tslint": "^8.1.3",
36 | 		"gulp-typescript": "^4.0.2",
37 | 		"jest": "^22.4.3",
38 | 		"merge2": "^1.2.1",
39 | 		"ts-jest": "^22.4.2",
40 | 		"tslint": "^5.9.1",
41 | 		"typedoc": "^0.11.1",
42 | 		"typescript": "^2.8.1"
43 | 	},
44 | 	"jest": {
45 | 		"transform": {
46 | 			"^.+\\.tsx?$": "<rootDir>/node_modules/ts-jest/preprocessor.js"
47 | 		},
48 | 		"testRegex": "(/__tests__/(?!data/).*|\\.(test|spec))\\.(tsx?|jsx?)$",
49 | 		"moduleFileExtensions": [
50 | 			"ts",
51 | 			"js",
52 | 			"json"
53 | 		],
54 | 		"watchPathIgnorePatterns": [
55 | 			"<rootDir>/__tests__/data/tmp/"
56 | 		],
57 | 		"coveragePathIgnorePatterns": [
58 | 			"<rootDir>/__tests__/data/",
59 | 			"<rootDir>/dist/"
60 | 		],
61 | 		"globals": {
62 | 			"ts-jest": {
63 | 				"tsConfigFile": "__tests__/tsconfig.json"
64 | 			}
65 | 		}
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/src/def/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./language";
2 | export * from "./parsingtable";
3 | export * from "./token";
4 | 


--------------------------------------------------------------------------------
/src/def/language.ts:
--------------------------------------------------------------------------------
 1 | import {Token} from "./token";
 2 | import {ILexer} from "../lexer/lexer";
 3 | import {LexController} from "../lexer/lexcontroller";
 4 | 
 5 | /**
 6 |  * 字句解析器の状態を区別するためのラベル型
 7 |  */
 8 | export type LexStateLabel = string;
 9 | 
10 | /**
11 |  * デフォルトの字句解析器の状態
12 |  */
13 | export const DEFAULT_LEX_STATE = "default";
14 | 
15 | /**
16 |  * 字句解析器に与える状態
17 |  */
18 | export interface LexState {
19 | 	label: LexStateLabel;
20 | 	inheritance?: LexStateLabel;
21 | }
22 | 
23 | /**
24 |  * 字句規則マッチ時に呼び出されるコールバック
25 |  */
26 | export type LexCallback = (value: string, token: string | null, lex: LexController) => [string | null, any] | { token: string | null, value: any } | string | null | void;
27 | 
28 | /**
29 |  * 単一の字句ルール
30 |  */
31 | // TODO: tokenはlabelに名称変更してもよい？
32 | export interface LexRule {
33 | 	token: Token | null;
34 | 	pattern: string | RegExp;
35 | 	states?: Array<LexStateLabel>;
36 | 	is_disabled?: boolean;
37 | 	priority?: number;
38 | 	callback?: LexCallback;
39 | }
40 | 
41 | /**
42 |  * 字句規則
43 |  */
44 | export interface LexDefinition {
45 | 	rules: Array<LexRule>;
46 | 	states?: Array<LexState>;
47 | 	start_state?: LexStateLabel;
48 | 	default_callback?: LexCallback;
49 | 	begin_callback?: (lex: LexController) => void;
50 | 	end_callback?: (lex: LexController) => void;
51 | }
52 | 
53 | /**
54 |  * 構文のreduce時に呼び出されるコールバック
55 |  */
56 | export type GrammarCallback = (children: Array<any>, token: string, lexer: ILexer) => any;
57 | 
58 | /**
59 |  * 単一の構文ルール
60 |  */
61 | export interface GrammarRule {
62 | 	ltoken: Token;
63 | 	pattern: Array<Token>;
64 | 	callback?: GrammarCallback;
65 | }
66 | 
67 | /**
68 |  * 構文規則
69 |  */
70 | export interface GrammarDefinition {
71 | 	rules: Array<GrammarRule>;
72 | 	start_symbol: Token;
73 | 	default_callback?: GrammarCallback;
74 | 	// TODO: 存在はするが呼び出さないのを修正
75 | 	begin_callback?: () => void;
76 | 	end_callback?: () => void;
77 | }
78 | 
79 | /**
80 |  * 言語定義
81 |  */
82 | export interface Language {
83 | 	lex: LexDefinition;
84 | 	grammar: GrammarDefinition;
85 | }
86 | 


--------------------------------------------------------------------------------
/src/def/parsingtable.ts:
--------------------------------------------------------------------------------
 1 | import {Token} from "./token";
 2 | 
 3 | /**
 4 |  * Shiftオペレーション
 5 |  */
 6 | export type ShiftOperation = { type: "shift", to: number };
 7 | 
 8 | /**
 9 |  * Reduceオペレーション
10 |  */
11 | export type ReduceOperation = { type: "reduce", grammar_id: number };
12 | 
13 | /**
14 |  * Shift/Reduceコンフリクト
15 |  */
16 | export type ConflictedOperation = { type: "conflict", shift_to: Array<number>, reduce_grammar: Array<number> };
17 | 
18 | /**
19 |  * Acceptオペレーション
20 |  */
21 | export type AcceptOperation = { type: "accept" };
22 | 
23 | /**
24 |  * Gotoオペレーション
25 |  */
26 | export type GotoOperation = { type: "goto", to: number };
27 | 
28 | /**
29 |  * 構文解析器の実行する命令群
30 |  */
31 | export type ParsingOperation = ShiftOperation | ReduceOperation | ConflictedOperation | AcceptOperation | GotoOperation;
32 | 
33 | /**
34 |  * 構文解析表
35 |  */
36 | export type ParsingTable = Array<Map<Token, ParsingOperation>>;
37 | 


--------------------------------------------------------------------------------
/src/def/token.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * トークン名
 3 |  */
 4 | export type Token = string | symbol;
 5 | 
 6 | /**
 7 |  * トークン化された入力
 8 |  * トークン名と、字句規則にマッチした元々の入力
 9 |  */
10 | export type TokenizedInput<T = string> = { token: Token, value: T };
11 | 
12 | /**
13 |  * 入力の終端を表す終端記号名
14 |  * @type {symbol}
15 |  */
16 | export const SYMBOL_EOF: Token = Symbol("EOF");
17 | 
18 | /**
19 |  * `S' -> S $` (Sは開始記号)となるような非終端記号S'を表す非終端記号名
20 |  * @type {symbol}
21 |  */
22 | export const SYMBOL_SYNTAX: Token = Symbol("S'");
23 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./def";
2 | export * from "./lexer";
3 | export * from "./parser";
4 | export * from "./parsergenerator";
5 | export * from "./precompiler";
6 | 


--------------------------------------------------------------------------------
/src/lexer/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./lexer";
2 | 


--------------------------------------------------------------------------------
/src/lexer/lexcontroller.ts:
--------------------------------------------------------------------------------
  1 | import {DEFAULT_LEX_STATE, Language, LexCallback, LexDefinition, LexRule, LexState, LexStateLabel} from "../def/language";
  2 | 
  3 | export type LexRuleLabel = string;
  4 | 
  5 | /**
  6 |  * 字句解析器の状態と字句ルールの紐付けと管理を行うクラス
  7 |  */
  8 | class LexRuleManager {
  9 | 	// private states: { states: Map<LexStateLabel, LexState>, index: Map<LexStateLabel, Set<number>>, inheritance: Map<LexStateLabel, LexStateLabel> };
 10 | 	private states: Map<LexStateLabel, { state: LexState, index: Set<number> }>;
 11 | 	private rules: { rules: Array<LexRule | undefined>, labels: Map<LexRuleLabel, number> };
 12 | 	// 各ルールに一意なidを割り当てるためのカウンタ
 13 | 	private id_counter: number;
 14 | 	// ルールの削除によって割り当てがなくなったid
 15 | 	private free_ids: Array<number>;
 16 | 	constructor(language: Language) {
 17 | 		const lex = language.lex;
 18 | 		this.id_counter = 0;
 19 | 		this.free_ids = [];
 20 | 
 21 | 		// initialize lex states map
 22 | 		this.states = new Map();
 23 | 		// もしlexの定義内にデフォルト状態の記述があっても上書きされるだけなので問題ない
 24 | 		this.setState({label: DEFAULT_LEX_STATE});
 25 | 		if (lex.states !== undefined) {
 26 | 			for (const state of lex.states) {
 27 | 				this.setState(state);
 28 | 			}
 29 | 		}
 30 | 
 31 | 		// initialize lex rules
 32 | 		this.rules = {rules: [], labels: new Map()};
 33 | 
 34 | 		for (const rule of lex.rules.map((r) => LexRuleManager.formatLexRule(r))) {
 35 | 			this.rules.rules[this.id_counter] = rule;
 36 | 			// 状態ごとにインデックスを張る
 37 | 			for (const state of rule.states!) {
 38 | 				// TODO: statesに入れる
 39 | 				if (!this.states.has(state)) {
 40 | 					this.setState(state);
 41 | 				}
 42 | 				this.states.get(state)!.index.add(this.id_counter);
 43 | 			}
 44 | 			this.id_counter += 1;
 45 | 		}
 46 | 	}
 47 | 	/**
 48 | 	 * 新しい状態を追加する 既に存在している場合は上書きするが、状態に登録されたルールは維持される
 49 | 	 * @param {LexStateLabel} label 新しい状態の名前 名前以外のプロパティは初期値が用いられる
 50 | 	 * @returns {boolean} 追加に成功したかどうか　継承関係が循環していた場合、追加は行われずfalseが返る
 51 | 	 */
 52 | 	setState(label: LexStateLabel): boolean;
 53 | 	/**
 54 | 	 * 新しい状態を追加する 既に存在している場合は上書きするが、状態に登録されたルールは維持される
 55 | 	 * @param {LexState} state 新しい状態
 56 | 	 * @returns {boolean} 追加に成功したかどうか　継承関係が循環していた場合、追加は行われずfalseが返る
 57 | 	 */
 58 | 	setState(state: LexState): boolean;
 59 | 	setState(s: LexStateLabel | LexState): boolean {
 60 | 		let state: LexState;
 61 | 		if (typeof s === "object") {
 62 | 			state = s;
 63 | 		}
 64 | 		else {
 65 | 			state = {label: s};
 66 | 		}
 67 | 		state = LexRuleManager.formatLexState(state);
 68 | 		// ループチェック
 69 | 		const isLooped = (state: LexState): boolean => {
 70 | 			if (state.inheritance !== undefined) {
 71 | 				let flg_loop = false;
 72 | 				let parent = this.states.get(state.inheritance);
 73 | 				while (parent !== undefined && parent.state.inheritance !== undefined) {
 74 | 					// 状態を追加するたびにチェックするので、自身にたどりつかないことを調べればよい
 75 | 					if (parent.state.inheritance === state.label) {
 76 | 						flg_loop = true;
 77 | 						break;
 78 | 					}
 79 | 					parent = this.states.get(parent.state.inheritance);
 80 | 				}
 81 | 				if (flg_loop) return true;
 82 | 			}
 83 | 			return false;
 84 | 		};
 85 | 		// 循環継承が存在する場合は追加できない
 86 | 		if (isLooped(state)) return false;
 87 | 		if (this.states.has(state.label)) {
 88 | 			// 既に追加済みの場合はindexをそのまま維持する
 89 | 			this.states.get(state.label)!.state = state;
 90 | 		}
 91 | 		else {
 92 | 			this.states.set(state.label, {state, index: new Set()});
 93 | 		}
 94 | 		return true;
 95 | 	}
 96 | 	// TODO: パフォーマンス改善
 97 | 	/**
 98 | 	 * 与えられた状態に登録されている字句ルールの一覧をイテレータとして返す
 99 | 	 * @param {LexStateLabel} label 字句ルールを取得する状態の名前
100 | 	 * @returns {IterableIterator<LexRule>} 字句ルールが得られるイテレータ
101 | 	 */
102 | 	getRulesItr(label: LexStateLabel): IterableIterator<LexRule> {
103 | 		// そんな状態はない
104 | 		if (!this.states.has(label)) return [][Symbol.iterator]();
105 | 
106 | 		// 継承を加味
107 | 		let result: Array<number> = [];
108 | 		let s = this.states.get(label);
109 | 		while (s !== undefined) {
110 | 			result = result.concat([...s.index]);
111 | 			if (s.state.inheritance === undefined) break;
112 | 			s = this.states.get(s.state.inheritance);
113 | 		}
114 | 		// 暫定的処置
115 | 		result.sort((a: number, b: number) => a - b);
116 | 
117 | 		return (function* (self, itr) {
118 | 			for (const id of itr) {
119 | 				if (self.rules.rules[id] !== undefined) yield self.rules.rules[id]!;
120 | 			}
121 | 		})(this, new Set(result)[Symbol.iterator]());
122 | 	}
123 | 	// TODO べつにlabelを省略可能にしてもいいのでは
124 | 	/**
125 | 	 * 新しい字句ルールを名前をつけて追加する 既に存在している場合は上書きする
126 | 	 * @param {LexRuleLabel} label 新しいルールの名前
127 | 	 * @param {LexRule} rule 新しく追加するルール
128 | 	 */
129 | 	setRule(label: LexRuleLabel, rule: LexRule): void {
130 | 		// 同名の既存ルールを破棄
131 | 		this.removeRule(label);
132 | 
133 | 		const formatted_rule = LexRuleManager.formatLexRule(rule);
134 | 
135 | 		const id = this.free_ids.length > 0 ? this.free_ids.pop()! : this.id_counter++;
136 | 		this.rules.rules[id] = formatted_rule;
137 | 		this.rules.labels.set(label, id);
138 | 		for (const state of formatted_rule.states!) {
139 | 			if (!this.states.has(state)) this.setState(state);
140 | 			this.states.get(state)!.index.add(id);
141 | 		}
142 | 	}
143 | 	/**
144 | 	 * 名前がついた字句ルールを指定して削除する
145 | 	 * @param {LexRuleLabel} label 削除するルールの名前
146 | 	 * @returns {LexRule | undefined} 削除したルール 該当するものがない場合はundefined
147 | 	 */
148 | 	removeRule(label: LexRuleLabel): LexRule | undefined {
149 | 		if (!this.rules.labels.has(label)) {
150 | 			return undefined;
151 | 		}
152 | 		const id = this.rules.labels.get(label)!;
153 | 		this.rules.labels.delete(label);
154 | 		const rule = this.rules.rules[id];
155 | 		if (rule === undefined) return undefined;
156 | 
157 | 		for (const state of rule.states!) {
158 | 			if (this.states.has(state)) {
159 | 				this.states.get(state)!.index.delete(id);
160 | 			}
161 | 		}
162 | 		this.rules.rules[id] = undefined;
163 | 		this.free_ids.push(id);
164 | 		return rule;
165 | 	}
166 | 	/**
167 | 	 * 未定義プロパティに初期値を割り当てるなど、扱いやすい形に整形した新しい状態を生成する
168 | 	 * @param {LexState} state もともとの状態
169 | 	 * @returns {LexState} 整形された新しい状態
170 | 	 */
171 | 	static formatLexState(state: LexState): LexState {
172 | 		// clone state
173 | 		return {...state};
174 | 	}
175 | 	/**
176 | 	 * 未定義プロパティに初期値を割り当てるなど、扱いやすい形に整形した新しい字句ルールを生成する
177 | 	 * @param {LexRule} rule もともとの字句ルール
178 | 	 * @returns {LexRule} 整形された新しい字句ルール
179 | 	 */
180 | 	static formatLexRule(rule: LexRule): LexRule {
181 | 		// clone rule
182 | 		const result: LexRule = {...rule};
183 | 		if (result.is_disabled === undefined) result.is_disabled = false;
184 | 		// 状態指定を省略された場合はデフォルト状態のみとする
185 | 		if (result.states === undefined) result.states = [DEFAULT_LEX_STATE];
186 | 		// 正規表現を字句解析に適した形に整形
187 | 		if (result.pattern instanceof RegExp) {
188 | 			result.pattern = LexRuleManager.formatRegExp(result.pattern);
189 | 		}
190 | 		return result;
191 | 	}
192 | 	/**
193 | 	 * 字句解析時に必要なフラグを追加し、不要なフラグを取り除いた新しい正規表現オブジェクトを生成する
194 | 	 * @param {RegExp} pattern もともとの正規表現
195 | 	 * @returns {RegExp} 整形された新しい正規表現
196 | 	 */
197 | 	private static formatRegExp(pattern: RegExp): RegExp {
198 | 		// フラグを整形する
199 | 		let flags: string = "";
200 | 		// gフラグは邪魔なので取り除く
201 | 		// i,m,uフラグがあれば維持する
202 | 		if (pattern.ignoreCase) {
203 | 			flags += "i";
204 | 		}
205 | 		if (pattern.multiline) {
206 | 			flags += "m";
207 | 		}
208 | 		if (pattern.unicode) {
209 | 			flags += "u";
210 | 		}
211 | 		// yフラグは必ずつける
212 | 		flags += "y";
213 | 		return new RegExp(pattern, flags);
214 | 	}
215 | }
216 | 
217 | /**
218 |  * 解析中の字句解析器の状態を操作するクラス
219 |  */
220 | export class LexController {
221 | 	private _lex: LexDefinition;
222 | 	private _current_state: LexStateLabel;
223 | 	private _state_stack: Array<LexStateLabel>;
224 | 	private _rules: LexRuleManager;
225 | 	constructor(language: Language) {
226 | 		this._lex = language.lex;
227 | 		this._current_state = language.lex.start_state !== undefined ? language.lex.start_state : DEFAULT_LEX_STATE;
228 | 		this._state_stack = [];
229 | 		this._rules = new LexRuleManager(language);
230 | 	}
231 | 	/**
232 | 	 * 個別にコールバックが設定されていない規則に対して適用するデフォルトコールバックを得る
233 | 	 * @returns {LexCallback | undefined} デフォルトコールバック 定義されていない場合はundefined
234 | 	 */
235 | 	get defaultCallback(): LexCallback | undefined {
236 | 		return this._lex.default_callback;
237 | 	}
238 | 	/**
239 | 	 * 字句解析開始時のコールバックを呼び出す
240 | 	 */
241 | 	onBegin(): void {
242 | 		if (this._lex.begin_callback !== undefined) this._lex.begin_callback(this);
243 | 	}
244 | 	/**
245 | 	 * 字句解析終了時のコールバックを呼び出す
246 | 	 */
247 | 	onEnd(): void {
248 | 		if (this._lex.end_callback !== undefined) this._lex.end_callback(this);
249 | 	}
250 | 	/**
251 | 	 * 現在の状態で適用可能な字句ルールをイテレータとして返す
252 | 	 * @returns {IterableIterator<LexRule>} 字句ルールを得ることができるイテレータ
253 | 	 */
254 | 	getRulesItr(): IterableIterator<LexRule> {
255 | 		return this._rules.getRulesItr(this._current_state);
256 | 	}
257 | 	/**
258 | 	 * 新しい字句ルールを名前をつけて追加する
259 | 	 * @param {string} label ルールの区別のために与える名前
260 | 	 * @param {LexRule} rule 追加する字句ルール
261 | 	 */
262 | 	addRule(label: string, rule: LexRule): void {
263 | 		this._rules.setRule(label, rule);
264 | 	}
265 | 	/**
266 | 	 * 既存の字句ルールを削除する
267 | 	 * @param {string} label 削除するルールの名前
268 | 	 * @returns {LexRule | undefined} 削除したルール 該当するものがない場合はundefined
269 | 	 */
270 | 	removeRule(label: string): LexRule | undefined {
271 | 		return this._rules.removeRule(label);
272 | 	}
273 | 	/**
274 | 	 * 現在の字句解析器の状態名を得る
275 | 	 * @returns {LexStateLabel} 現在の状態名
276 | 	 */
277 | 	getCurrentState(): LexStateLabel {
278 | 		return this._current_state;
279 | 	}
280 | 	/**
281 | 	 * 字句解析機の解析状態を別の状態に変更する
282 | 	 * @param {LexStateLabel} label 新しい状態の名前
283 | 	 */
284 | 	jumpState(label: LexStateLabel): void {
285 | 		this._current_state = label;
286 | 	}
287 | 	/**
288 | 	 * 現在の状態をスタックに積んでから別の状態に変更する
289 | 	 * @param {LexStateLabel} label 新しい状態の名前
290 | 	 */
291 | 	callState(label: LexStateLabel): void {
292 | 		this._state_stack.push(this._current_state);
293 | 		this._current_state = label;
294 | 	}
295 | 	/**
296 | 	 * スタックから1つ取り出し、その状態に変更する
297 | 	 * スタックが空の場合は状態を変更しない
298 | 	 * @returns {LexStateLabel | undefined} 変更した状態の名前 スタックが空の場合はundefined
299 | 	 */
300 | 	returnState(): LexStateLabel | undefined {
301 | 		const pop = this._state_stack.pop();
302 | 		if (pop !== undefined) this._current_state = pop;
303 | 		return pop;
304 | 	}
305 | }
306 | 


--------------------------------------------------------------------------------
/src/lexer/lexer.ts:
--------------------------------------------------------------------------------
  1 | import {Language, LexRule} from "../def/language";
  2 | import {SYMBOL_EOF, TokenizedInput} from "../def/token";
  3 | import {LexController} from "./lexcontroller";
  4 | 
  5 | /**
  6 |  * 字句解析器用のinterface
  7 |  *
  8 |  * TODO: 要改善
  9 |  */
 10 | export interface ILexer {
 11 | 	exec(input: string): Array<TokenizedInput>;
 12 | }
 13 | 
 14 | /**
 15 |  * 字句解析器
 16 |  * 入力を受け取ってトークン化する
 17 |  */
 18 | export class Lexer implements ILexer {
 19 | 	constructor(private language: Language) {
 20 | 		// do nothing
 21 | 	}
 22 | 	/**
 23 | 	 * 入力を受け取って字句解析を行う
 24 | 	 * @param {string} input 入力文字列
 25 | 	 * @returns {Array<TokenizedInput>} 字句規則によって分割されたトークン列
 26 | 	 */
 27 | 	exec(input: string): Array<TokenizedInput> {
 28 | 		const result: Array<TokenizedInput> = [];
 29 | 		let next_index = 0;
 30 | 		const controller = new LexController(this.language);
 31 | 		controller.onBegin();
 32 | 		while (next_index < input.length) {
 33 | 			// 念の為undefined対策
 34 | 			// const current_rules = this.rules.has(controller.getCurrentState()) ? this.rules.get(controller.getCurrentState())! : [];
 35 | 			const current_rules = controller.getRulesItr();
 36 | 			const {rule, matched} = Lexer.match(current_rules, input, next_index);
 37 | 			if (rule === null) {
 38 | 				// マッチする規則がなかった
 39 | 				throw new Error("no pattern matched");
 40 | 			}
 41 | 			else {
 42 | 				let token = rule.token;
 43 | 				let value = matched;
 44 | 				// コールバック呼び出し
 45 | 				if (typeof rule.token !== "symbol" && (rule.callback !== undefined || controller.defaultCallback !== undefined)) {
 46 | 					const callback_result = rule.callback !== undefined ? rule.callback(matched, rule.token, controller) : controller.defaultCallback!(matched, rule.token, controller);
 47 | 					if (callback_result === null) {
 48 | 						token = null;
 49 | 					}
 50 | 					else if (typeof callback_result === "string") {
 51 | 						token = callback_result;
 52 | 					}
 53 | 					else if (Array.isArray(callback_result)) {
 54 | 						token = callback_result[0];
 55 | 						value = callback_result[1];
 56 | 					}
 57 | 					else if (callback_result !== undefined) {
 58 | 						token = callback_result.token;
 59 | 						value = callback_result.value;
 60 | 					}
 61 | 					// callback_result === undefinedなら何もしない
 62 | 				}
 63 | 				// tokenがnullなら処理を飛ばす
 64 | 				if (token !== null) {
 65 | 					result.push({token: token, value: value});
 66 | 				}
 67 | 				// 読む位置を進める
 68 | 				next_index += matched.length;
 69 | 			}
 70 | 		}
 71 | 		result.push({token: SYMBOL_EOF, value: ""});
 72 | 		controller.onEnd();
 73 | 		return result;
 74 | 	}
 75 | 	private static match(rules: Iterable<LexRule>, input: string, next_index: number): { rule: LexRule | null, matched: string } {
 76 | 		let result_matched: string = "";
 77 | 		let result_rule: LexRule | null = null;
 78 | 		let result_priority: number | null = null;
 79 | 		for (const rule of rules) {
 80 | 			let match = "";
 81 | 			if (typeof rule.pattern === "string") {
 82 | 				const tmp_next_index = next_index + rule.pattern.length;
 83 | 				if (input.substring(next_index, tmp_next_index) != rule.pattern) continue; // マッチしない
 84 | 				// マッチした文字列の末尾が\wで、その直後の文字が\wの場合はスキップ
 85 | 				if (tmp_next_index < input.length && /\w/.test(rule.pattern.substring(0, 1)) && /\w/.test(input[tmp_next_index])) continue;
 86 | 				match = rule.pattern;
 87 | 			}
 88 | 			else {
 89 | 				// pattern: RegExp
 90 | 				rule.pattern.lastIndex = next_index;
 91 | 				const m = rule.pattern.exec(input);
 92 | 				if (m === null) continue; // マッチ失敗
 93 | 				match = m[0];
 94 | 			}
 95 | 			// 同じ優先度の場合、最長マッチまたは出現順(match_priorityで設定)
 96 | 			const priority = rule.priority !== undefined ? rule.priority : 0;
 97 | 			if (result_priority === null ||
 98 | 				priority > result_priority ||
 99 | 				priority === result_priority && match.length > result_matched.length) {
100 | 				result_matched = match;
101 | 				result_rule = rule;
102 | 				result_priority = priority;
103 | 			}
104 | 		}
105 | 		return {rule: result_rule, matched: result_matched};
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/src/parser/ast.ts:
--------------------------------------------------------------------------------
 1 | import {Token} from "../def/token";
 2 | 
 3 | /**
 4 |  * AST
 5 |  */
 6 | export interface ASTNode {
 7 | 	type: Token;
 8 | 	value: string | null;
 9 | 	children: Array<ASTNode>;
10 | }
11 | 


--------------------------------------------------------------------------------
/src/parser/factory.ts:
--------------------------------------------------------------------------------
 1 | import {Language} from "../def/language";
 2 | import {ParsingTable} from "../def/parsingtable";
 3 | import {Lexer} from "../lexer/lexer";
 4 | import {Parser} from "./parser";
 5 | 
 6 | /**
 7 |  * Parserを生成するためのファクトリクラス
 8 |  */
 9 | export class ParserFactory {
10 | 	/**
11 | 	 * 言語
12 | 	 * @param {Language} language 解析する言語情報
13 | 	 * @param {ParsingTable} parsing_table 構文解析表
14 | 	 * @returns {Parser} 生成されたパーサ
15 | 	 */
16 | 	public static create(language: Language, parsing_table: ParsingTable): Parser {
17 | 		const lexer = new Lexer(language);
18 | 		return new Parser(lexer, language.grammar, parsing_table);
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/src/parser/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./ast";
2 | export * from "./factory";
3 | export * from "./parser";
4 | 


--------------------------------------------------------------------------------
/src/parser/parser.ts:
--------------------------------------------------------------------------------
  1 | import {GrammarDefinition} from "../def/language";
  2 | import {ParsingTable} from "../def/parsingtable";
  3 | import {Token, TokenizedInput} from "../def/token";
  4 | import {ILexer} from "../lexer/lexer";
  5 | 
  6 | /**
  7 |  * 構文解析器
  8 |  */
  9 | export class Parser {
 10 | 	/**
 11 | 	 * @param {ILexer} lexer 字句解析の際に使用する字句解析器
 12 | 	 * @param {GrammarDefinition} grammar 解析する構文定義
 13 | 	 * @param {ParsingTable} parsingtable 解析する構文解析表
 14 | 	 */
 15 | 	constructor(private lexer: ILexer, private grammar: GrammarDefinition, private parsingtable: ParsingTable) {
 16 | 	}
 17 | 
 18 | 	/**
 19 | 	 * 構文解析を実行する
 20 | 	 * @param {string} input 解析する入力文字列
 21 | 	 * @returns {any} 解析結果(返る結果はコントローラによって異なる)
 22 | 	 */
 23 | 	public parse(input: string): any {
 24 | 		return this._parse(this.lexer.exec(input));
 25 | 	}
 26 | 
 27 | 	// parsingtableはconflictを含む以外は正しさが保証されているものと仮定する
 28 | 	// inputsは正しくないトークンが与えられる可能性を含む
 29 | 	// TODO: 詳細な例外処理、エラー検知
 30 | 	private _parse(inputs: Array<TokenizedInput>): any {
 31 | 		let read_index: number = 0; // 次に読むべき入力記号のインデックス
 32 | 		const inputs_length: number = inputs.length;
 33 | 		const state_stack: Array<number> = [0]; // 現在読んでいる構文解析表の状態番号を置くスタック
 34 | 		const result_stack: Array<any> = []; // 解析中のASTノードを置くスタック
 35 | 		let flg_error: boolean = false;
 36 | 
 37 | 		// 構文解析する
 38 | 		while (read_index < inputs_length) {
 39 | 			let token: Token = inputs[read_index].token;
 40 | 			let state: number = state_stack[state_stack.length - 1];
 41 | 			if (!this.parsingtable[state].has(token)) {
 42 | 				// 未定義
 43 | 				console.error("parse failed: unexpected token:", token);
 44 | 				flg_error = true;
 45 | 				break;
 46 | 			}
 47 | 			let action = this.parsingtable[state].get(token)!;
 48 | 			if (action.type == "shift") {
 49 | 				// shiftオペレーション
 50 | 				// 次の状態をスタックに追加
 51 | 				state_stack.push(action.to);
 52 | 
 53 | 				result_stack.push(inputs[read_index].value);
 54 | 
 55 | 				// 入力を一つ消費
 56 | 				read_index += 1;
 57 | 			}
 58 | 			else if (action.type == "reduce") {
 59 | 				// reduceオペレーション
 60 | 				const grammar_rule = this.grammar.rules[action.grammar_id];
 61 | 				const rnum = grammar_rule.pattern.length;
 62 | 				// 対応する規則の右辺の記号の数だけスタックからポップする
 63 | 				for (let i = 0; i < rnum; i++) state_stack.pop();
 64 | 
 65 | 				// rnumが0でないなら、右辺の記号の数だけスタックからポップする
 66 | 				const children = [];
 67 | 				for (let i = 0; i < rnum; i++) children[rnum - 1 - i] = result_stack.pop();
 68 | 
 69 | 				if (typeof grammar_rule.ltoken !== "symbol" && grammar_rule.callback !== undefined) {
 70 | 					result_stack.push(grammar_rule.callback(children, grammar_rule.ltoken, this.lexer));
 71 | 				}
 72 | 				else if (typeof grammar_rule.ltoken !== "symbol" && this.grammar.default_callback !== undefined) {
 73 | 					result_stack.push(this.grammar.default_callback(children, grammar_rule.ltoken, this.lexer));
 74 | 				}
 75 | 				else {
 76 | 					result_stack.push(children[0]);
 77 | 				}
 78 | 
 79 | 				// このままgotoオペレーションを行う
 80 | 				state = state_stack[state_stack.length - 1];
 81 | 				token = grammar_rule.ltoken;
 82 | 				if (!this.parsingtable[state].has(token)) {
 83 | 					// 未定義
 84 | 					console.error("parse failed: unexpected token:", token);
 85 | 					flg_error = true;
 86 | 					break;
 87 | 				}
 88 | 				action = this.parsingtable[state].get(token)!;
 89 | 				if (action.type != "goto") {
 90 | 					// gotoアクションでなければおかしい
 91 | 					console.error("parse failed: goto operation expected after reduce operation");
 92 | 					flg_error = true;
 93 | 					break;
 94 | 				}
 95 | 				state_stack.push(action.to);
 96 | 			}
 97 | 			else if (action.type == "accept") {
 98 | 				// 構文解析完了
 99 | 				break;
100 | 			}
101 | 			else if (action.type == "conflict") {
102 | 				console.error("conflict found:");
103 | 				console.error("current state " + state + ":", this.parsingtable[state]);
104 | 				console.error("shift:", action.shift_to, ",reduce:", action.reduce_grammar);
105 | 				action.shift_to.forEach((to: number) => {
106 | 					console.error("shift to " + to.toString() + ":", this.parsingtable[to]);
107 | 				});
108 | 				action.reduce_grammar.forEach((grammar_id: number) => {
109 | 					console.error("reduce grammar " + grammar_id.toString() + ":", this.parsingtable[grammar_id]);
110 | 				});
111 | 				console.error("parser cannot parse conflicted grammar");
112 | 				flg_error = true;
113 | 				break;
114 | 			}
115 | 		}
116 | 		if (flg_error) {
117 | 			console.error("parse failed.");
118 | 		}
119 | 		if (result_stack.length != 1) {
120 | 			console.error("failed to construct tree.");
121 | 		}
122 | 		return result_stack[0];
123 | 	}
124 | }
125 | 


--------------------------------------------------------------------------------
/src/parsergenerator/closureitem.ts:
--------------------------------------------------------------------------------
  1 | import {Token} from "../def/token";
  2 | import {GrammarDB} from "./grammardb";
  3 | 
  4 | /**
  5 |  * 単一のLRアイテムであり、`S -> A . B [$]` のようなアイテムの規則id・ドットの位置・先読み記号の集合の情報を持つ
  6 |  *
  7 |  * [[GrammarDB]]から与えられるトークンIDをもとにして、LR(0)およびLR(1)アイテムとしてのハッシュ値を生成することができる
  8 |  *
  9 |  * Immutableであるべきオブジェクトであるため、インスタンス生成後は内部状態が変化することはないと仮定される
 10 |  */
 11 | export class ClosureItem {
 12 | 	private _lr0_hash: string;
 13 | 	private _lr1_hash: string;
 14 | 	/**
 15 | 	 * @param {GrammarDB} grammardb 使用する構文の情報
 16 | 	 * @param {number} _rule_id 構文のid
 17 | 	 * @param {number} _dot_index ドットの位置
 18 | 	 * @param {Array<Token>} _lookaheads 先読み記号の集合
 19 | 	 */
 20 | 	constructor(private grammardb: GrammarDB, private _rule_id: number, private _dot_index: number, private _lookaheads: Array<Token>) {
 21 | 		// 有効な値かどうか調べる
 22 | 		if (!this.grammardb.hasRuleId(this._rule_id)) {
 23 | 			throw new Error("invalid grammar id");
 24 | 		}
 25 | 		if (this._dot_index < 0 || this._dot_index > this.grammardb.getRuleById(this._rule_id).pattern.length) {
 26 | 			throw new Error("dot index out of range");
 27 | 		}
 28 | 		if (this._lookaheads.length == 0) {
 29 | 			// 必要か？
 30 | 			throw new Error("one or more lookahead symbols needed");
 31 | 		}
 32 | 		this.sortLA();
 33 | 		this.updateHash();
 34 | 	}
 35 | 	/**
 36 | 	 * 自身の規則idを返す
 37 | 	 *
 38 | 	 * 規則idはルールの定義順に0,1,2,...と割り振られる
 39 | 	 * @returns {number}
 40 | 	 */
 41 | 	get rule_id(): number {
 42 | 		return this._rule_id;
 43 | 	}
 44 | 	/**
 45 | 	 * 現在の読み込み位置を意味するドットの位置を返す
 46 | 	 * @returns {number}
 47 | 	 */
 48 | 	get dot_index(): number {
 49 | 		return this._dot_index;
 50 | 	}
 51 | 	/**
 52 | 	 * LR(1)先読み記号の集合を配列として返す
 53 | 	 *
 54 | 	 * 配列のコピーではなく参照が返されるので、結果しとて得られた配列に変更を加えてはならない
 55 | 	 * @returns {Array<Token>}
 56 | 	 */
 57 | 	get lookaheads(): Array<Token> {
 58 | 		return this._lookaheads;
 59 | 	}
 60 | 	/**
 61 | 	 * 先読み記号の配列を、[[GrammarDB]]によって割り振られるトークンid順にソートする
 62 | 	 */
 63 | 	private sortLA() {
 64 | 		this.lookaheads.sort((t1: Token, t2: Token) => {
 65 | 			return this.grammardb.getTokenId(t1) - this.grammardb.getTokenId(t2);
 66 | 		});
 67 | 	}
 68 | 	/**
 69 | 	 * ハッシュ文字列を生成する
 70 | 	 */
 71 | 	private updateHash() {
 72 | 		this._lr0_hash = this.rule_id.toString() + "," + this.dot_index.toString();
 73 | 		let la_hash = "[";
 74 | 		for (let i = 0; i < this.lookaheads.length; i++) {
 75 | 			la_hash += this.grammardb.getTokenId(this.lookaheads[i]).toString();
 76 | 			if (i != this.lookaheads.length - 1) la_hash += ",";
 77 | 		}
 78 | 		la_hash += "]";
 79 | 		this._lr1_hash = this._lr0_hash + "," + la_hash;
 80 | 	}
 81 | 	/**
 82 | 	 * 先読み部分を除いたLR(0)アイテムとしてのハッシュ文字列を得る
 83 | 	 * @returns {string}
 84 | 	 */
 85 | 	public getLR0Hash(): string {
 86 | 		return this._lr0_hash;
 87 | 	}
 88 | 	/**
 89 | 	 * 先読み部分を含めたLR(1)アイテムとしてのハッシュ文字列を得る
 90 | 	 * @returns {string}
 91 | 	 */
 92 | 	public getLR1Hash(): string {
 93 | 		return this._lr1_hash;
 94 | 	}
 95 | 	/**
 96 | 	 * LR(0)ハッシュの一致を調べる
 97 | 	 * @param {ClosureItem} c 比較対象のLRアイテム
 98 | 	 * @returns {boolean}
 99 | 	 */
100 | 	public isSameLR0(c: ClosureItem): boolean {
101 | 		return this.getLR0Hash() == c.getLR0Hash();
102 | 	}
103 | 	/**
104 | 	 * LR(1)ハッシュの一致を調べる
105 | 	 * @param {ClosureItem} c 比較対象のLRアイテム
106 | 	 * @returns {boolean}
107 | 	 */
108 | 	public isSameLR1(c: ClosureItem): boolean {
109 | 		return this.getLR1Hash() == c.getLR1Hash();
110 | 	}
111 | 	/**
112 | 	 * LR0部分を維持しながらLR1先読み記号ごとにClosureItemを分割し、先読み記号の数が1のClosureItemの集合を生成する
113 | 	 */
114 | 	public separateByLookAheads(): Array<ClosureItem> {
115 | 		// this.lookaheadsの要素数が1未満の状況は存在しない
116 | 		const result = [];
117 | 		for (const la of this.lookaheads) {
118 | 			result.push(new ClosureItem(this.grammardb, this.rule_id, this.dot_index, [la]));
119 | 		}
120 | 		return result;
121 | 	}
122 | 	/**
123 | 	 * LR0部分が同じ2つのClosureItemについて、先読み部分を統合した新しいClosureItemを生成する
124 | 	 *
125 | 	 * 異なるLR(0)アイテムであった場合、nullを返す
126 | 	 * @param {ClosureItem} c マージ対象のLRアイテム
127 | 	 * @returns {ClosureItem | null} 先読み部分がマージされた新しいLRアイテム
128 | 	 */
129 | 	public merge(c: ClosureItem): ClosureItem | null {
130 | 		// LR0部分が違っている場合はnullを返す
131 | 		if (!this.isSameLR0(c)) return null;
132 | 		// LR1部分まで同じ場合は自身を返す
133 | 		if (this.isSameLR1(c)) return this;
134 | 		// 双方のlookaheads配列はソート済みであると仮定できる
135 | 		let i1 = 0;
136 | 		let i2 = 0;
137 | 		const new_la = [];
138 | 		// 2つのLA配列をマージして新しい配列を生成する
139 | 		while (i1 < this.lookaheads.length || i2 < c.lookaheads.length) {
140 | 			if (i1 == this.lookaheads.length) {
141 | 				new_la.push(c.lookaheads[i2++]);
142 | 			}
143 | 			else if (i2 == c.lookaheads.length) {
144 | 				new_la.push(this.lookaheads[i1++]);
145 | 			}
146 | 			else if (this.lookaheads[i1] == c.lookaheads[i2]) {
147 | 				new_la.push(this.lookaheads[i1++]);
148 | 				i2++;
149 | 			}
150 | 			else if (this.grammardb.getTokenId(this.lookaheads[i1]) < this.grammardb.getTokenId(c.lookaheads[i2])) {
151 | 				new_la.push(this.lookaheads[i1++]);
152 | 			}
153 | 			else {
154 | 				new_la.push(c.lookaheads[i2++]);
155 | 			}
156 | 		}
157 | 		return new ClosureItem(this.grammardb, this.rule_id, this.dot_index, new_la);
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/src/parsergenerator/closureset.ts:
--------------------------------------------------------------------------------
  1 | import {Token} from "../def/token";
  2 | import {ClosureItem} from "./closureitem";
  3 | import {GrammarDB} from "./grammardb";
  4 | 
  5 | /**
  6 |  * 複数のLRアイテムを保持するアイテム集合であり、インスタンス生成時に自身をクロージャー展開する
  7 |  *
  8 |  * [[GrammarDB]]から与えられるトークンIDをもとにして、LR(0)およびLR(1)アイテム集合としてのハッシュ値を生成することができる
  9 |  *
 10 |  * Immutableであるべきオブジェクトであるため、インスタンス生成後は内部状態が変化することはないと仮定される
 11 |  */
 12 | export class ClosureSet {
 13 | 	// インスタンス生成後に内部状態が変化することはないものとする
 14 | 	private _lr0_hash: string;
 15 | 	private _lr1_hash: string;
 16 | 	/**
 17 | 	 * @param {GrammarDB} grammardb 使用する構文の情報
 18 | 	 * @param {Array<ClosureItem>} closureset
 19 | 	 */
 20 | 	constructor(private grammardb: GrammarDB, private closureset: Array<ClosureItem>) {
 21 | 		this.expandClosure();
 22 | 		this.sort();
 23 | 		this.updateHash();
 24 | 	}
 25 | 	/**
 26 | 	 * 自身が保持する複数の[[ClosureItem]]は、常にLR(1)ハッシュによってソートされた状態に保たれているようにする
 27 | 	 */
 28 | 	private sort() {
 29 | 		this.closureset.sort((i1: ClosureItem, i2: ClosureItem) => {
 30 | 			if (i1.getLR1Hash() < i2.getLR1Hash()) return -1;
 31 | 			else if (i1.getLR1Hash() > i2.getLR1Hash()) return 1;
 32 | 			return 0;
 33 | 		});
 34 | 	}
 35 | 	/**
 36 | 	 * 保持しているLRアイテムの数
 37 | 	 */
 38 | 	get size() {
 39 | 		return this.closureset.length;
 40 | 	}
 41 | 	/**
 42 | 	 * 保持している[[ClosureItem]]の配列を得る
 43 | 	 * @param {boolean} prevent_copy trueを与えると配列をコピーせず返す
 44 | 	 *
 45 | 	 * 得られた配列に変更が加えられないと保証される場合に用いる
 46 | 	 * @returns {Array<ClosureItem>}
 47 | 	 */
 48 | 	public getArray(prevent_copy: boolean = false): Array<ClosureItem> {
 49 | 		if (prevent_copy) return this.closureset;
 50 | 		// デフォルトではコピーして返す(パフォーマンスは少し落ちる)
 51 | 		return this.closureset.concat();
 52 | 	}
 53 | 	/**
 54 | 	 * LRアイテムが集合に含まれているかどうかを調べる
 55 | 	 *
 56 | 	 * @param {ClosureItem} item
 57 | 	 * @returns {boolean}
 58 | 	 */
 59 | 	public includes(item: ClosureItem): boolean {
 60 | 		// 二分探索を用いて高速に探索する
 61 | 		let min = 0;
 62 | 		let max = this.closureset.length - 1;
 63 | 		while (min <= max) {
 64 | 			const mid = min + Math.floor((max - min) / 2);
 65 | 			if (item.getLR1Hash() < this.closureset[mid].getLR1Hash()) {
 66 | 				max = mid - 1;
 67 | 			}
 68 | 			else if (item.getLR1Hash() > this.closureset[mid].getLR1Hash()) {
 69 | 				min = mid + 1;
 70 | 			}
 71 | 			else {
 72 | 				// itemとclosureset[mid]が等しい
 73 | 				return true;
 74 | 			}
 75 | 		}
 76 | 		return false;
 77 | 	}
 78 | 	/**
 79 | 	 * LR(0)ハッシュの一致を調べる
 80 | 	 * @param {ClosureSet} cs 比較対象のアイテム集合
 81 | 	 * @returns {boolean}
 82 | 	 */
 83 | 	public isSameLR0(cs: ClosureSet): boolean {
 84 | 		return this.getLR0Hash() == cs.getLR0Hash();
 85 | 	}
 86 | 	/**
 87 | 	 * LR(1)ハッシュの一致を調べる
 88 | 	 * @param {ClosureSet} cs 比較対象のアイテム集合
 89 | 	 * @returns {boolean}
 90 | 	 */
 91 | 	public isSameLR1(cs: ClosureSet): boolean {
 92 | 		return this.getLR1Hash() == cs.getLR1Hash();
 93 | 	}
 94 | 	/**
 95 | 	 * ハッシュ文字列を生成する
 96 | 	 */
 97 | 	private updateHash() {
 98 | 		let lr0_hash = "";
 99 | 		let lr1_hash = "";
100 | 		for (let i = 0; i < this.closureset.length; i++) {
101 | 			lr0_hash += this.closureset[i].getLR0Hash();
102 | 			lr1_hash += this.closureset[i].getLR1Hash();
103 | 			if (i != this.closureset.length - 1) {
104 | 				lr0_hash += "|";
105 | 				lr1_hash += "|";
106 | 			}
107 | 		}
108 | 		this._lr0_hash = lr0_hash;
109 | 		this._lr1_hash = lr1_hash;
110 | 	}
111 | 	/**
112 | 	 * LR(0)アイテム集合としてのハッシュ文字列を得る
113 | 	 * @returns {string}
114 | 	 */
115 | 	public getLR0Hash() {
116 | 		return this._lr0_hash;
117 | 	}
118 | 	/**
119 | 	 * LR(1)アイテム集合としてのハッシュ文字列を得る
120 | 	 * @returns {string}
121 | 	 */
122 | 	public getLR1Hash() {
123 | 		return this._lr1_hash;
124 | 	}
125 | 	/**
126 | 	 * LR(0)部分が同じ2つのClosureSetについて、先読み部分を統合した新しいClosureSetを生成する
127 | 	 *
128 | 	 * 異なるLR(0)アイテム集合であった場合、nullを返す
129 | 	 * @param {ClosureSet} cs マージ対象のアイテム集合
130 | 	 * @returns {ClosureSet | null} 先読み部分がマージされた新しいアイテム集合
131 | 	 */
132 | 	public mergeLA(cs: ClosureSet): ClosureSet | null {
133 | 		// LR0部分が違っている場合はnullを返す
134 | 		if (!this.isSameLR0(cs)) return null;
135 | 		// LR1部分まで同じ場合は自身を返す
136 | 		if (this.isSameLR1(cs)) return this;
137 | 		const a1 = this.getArray();
138 | 		const a2 = cs.getArray();
139 | 		const new_set: Array<ClosureItem> = [];
140 | 		// 2つの配列においてLR部分は順序を含めて等しい
141 | 		for (let i = 0; i < a1.length; i++) {
142 | 			const new_item = a1[i].merge(a2[i]);
143 | 			if (new_item != null) new_set.push(new_item);
144 | 		}
145 | 		return new ClosureSet(this.grammardb, new_set);
146 | 	}
147 | 
148 | 	/**
149 | 	 * クロージャー展開を行う
150 | 	 *
151 | 	 * TODO: リファクタリング
152 | 	 */
153 | 	private expandClosure() {
154 | 		// 展開処理中はClosureItemのlookaheadsの要素数を常に1に保つこととする
155 | 		// 初期化
156 | 		const set: Array<ClosureItem> = [];
157 | 		// ClosureItemをlookaheadsごとに分解する
158 | 		for (const ci of this.closureset) {
159 | 			set.push(...ci.separateByLookAheads());
160 | 		}
161 | 		this.closureset = set;
162 | 		this.sort();
163 | 
164 | 		// 変更がなくなるまで繰り返す
165 | 		let index = 0;
166 | 		while (index < this.closureset.length) {
167 | 			const ci = this.closureset[index++];
168 | 			const pattern = this.grammardb.getRuleById(ci.rule_id).pattern;
169 | 
170 | 			if (ci.dot_index == pattern.length) continue; // .が末尾にある場合はスキップ
171 | 			const follow = pattern[ci.dot_index];
172 | 			if (!this.grammardb.symbols.isNonterminalSymbol(follow)) continue; // .の次の記号が非終端記号でないならばスキップ
173 | 
174 | 			// クロージャー展開を行う
175 | 
176 | 			// 先読み記号を導出
177 | 			// ci.lookaheadsは要素数1のため、0番目のインデックスのみを参照すればよい
178 | 			const lookaheads = [...this.grammardb.first.get(pattern.slice(ci.dot_index + 1).concat(ci.lookaheads[0])).values()];
179 | 			lookaheads.sort((t1: Token, t2: Token) => {
180 | 				return this.grammardb.getTokenId(t1) - this.grammardb.getTokenId(t2);
181 | 			});
182 | 
183 | 			// symbolを左辺にもつ全ての規則を、先読み記号を付与して追加
184 | 			const rules = this.grammardb.findRules(follow);
185 | 			for (const {id} of rules) {
186 | 				for (const la of lookaheads) {
187 | 					const new_ci = new ClosureItem(this.grammardb, id, 0, [la]);
188 | 					// 重複がなければ新しいアイテムを追加する
189 | 					let flg_duplicated = false;
190 | 					for (const existing_item of this.closureset) {
191 | 						if (new_ci.isSameLR1(existing_item)) {
192 | 							flg_duplicated = true;
193 | 							break;
194 | 						}
195 | 					}
196 | 					if (!flg_duplicated) {
197 | 						this.closureset.push(new_ci);
198 | 					}
199 | 				}
200 | 			}
201 | 		}
202 | 		this.sort();
203 | 
204 | 		// ClosureItemの先読み部分をマージする
205 | 		const tmp = this.closureset;
206 | 		this.closureset = [];
207 | 		let merged_lookaheads = [];
208 | 		for (let i = 0; i < tmp.length; i++) {
209 | 			merged_lookaheads.push(tmp[i].lookaheads[0]);
210 | 			if (i == tmp.length - 1 || !tmp[i].isSameLR0(tmp[i + 1])) {
211 | 				this.closureset.push(new ClosureItem(this.grammardb, tmp[i].rule_id, tmp[i].dot_index, merged_lookaheads));
212 | 				merged_lookaheads = [];
213 | 			}
214 | 		}
215 | 	}
216 | }
217 | 


--------------------------------------------------------------------------------
/src/parsergenerator/dfagenerator.ts:
--------------------------------------------------------------------------------
  1 | import {SYMBOL_EOF, Token} from "../def/token";
  2 | import {ClosureItem} from "./closureitem";
  3 | import {ClosureSet} from "./closureset";
  4 | import {GrammarDB} from "./grammardb";
  5 | 
  6 | export type DFAEdge = Map<Token, number>;
  7 | export type DFANode = { closure: ClosureSet, edge: DFAEdge };
  8 | export type DFA = Array<DFANode>;
  9 | 
 10 | /**
 11 |  * 構文規則からLR(1)DFAおよびLALR(1)DFAを生成する
 12 |  */
 13 | export class DFAGenerator {
 14 | 	private lr_dfa: DFA;
 15 | 	private lalr_dfa: DFA;
 16 | 	/**
 17 | 	 * @param {GrammarDB} grammardb 構文規則
 18 | 	 */
 19 | 	constructor(private grammardb: GrammarDB) {
 20 | 		this.generateDFA();
 21 | 		this.mergeLA();
 22 | 	}
 23 | 	/**
 24 | 	 * LR(1)DFAを得る
 25 | 	 * @returns {DFA}
 26 | 	 */
 27 | 	public getLR1DFA(): DFA {
 28 | 		return this.lr_dfa;
 29 | 	}
 30 | 	/**
 31 | 	 * LALR(1)DFAを得る
 32 | 	 * @returns {DFA}
 33 | 	 */
 34 | 	public getLALR1DFA(): DFA {
 35 | 		return this.lalr_dfa;
 36 | 	}
 37 | 	/**
 38 | 	 * DFAの生成
 39 | 	 */
 40 | 	private generateDFA(): void {
 41 | 		const initial_item: ClosureItem = new ClosureItem(this.grammardb, -1, 0, [SYMBOL_EOF]);
 42 | 		const initial_set: ClosureSet = new ClosureSet(this.grammardb, [initial_item]);
 43 | 		const dfa: DFA = [{closure: initial_set, edge: new Map<Token, number>()}];
 44 | 
 45 | 		// 変更がなくなるまでループ
 46 | 		let flg_changed = true;
 47 | 		let i = 0;
 48 | 		while (flg_changed) {
 49 | 			flg_changed = false;
 50 | 			while (i < dfa.length) {
 51 | 				const closure = dfa[i].closure;
 52 | 				const edge = dfa[i].edge;
 53 | 				const new_sets: Map<Token, ClosureSet> = this.generateNewClosureSets(closure);
 54 | 
 55 | 				// 与えられたDFANodeと全く同じDFANodeがある場合、そのindexを返す
 56 | 				// 見つからなければ-1を返す
 57 | 				const getIndexOfDuplicatedNode = (dfa: DFA, new_node: DFANode): number => {
 58 | 					let index = -1;
 59 | 					for (const [i, node] of dfa.entries()) {
 60 | 						if (new_node.closure.isSameLR1(node.closure)) {
 61 | 							index = i;
 62 | 							break;
 63 | 						}
 64 | 					}
 65 | 					return index;
 66 | 				};
 67 | 				// 新しいノードを生成する
 68 | 				for (const [edge_label, cs] of new_sets) {
 69 | 					const new_node: DFANode = {closure: cs, edge: new Map<Token, number>()};
 70 | 					// 既存のNodeのなかに同一のClosureSetを持つものがないか調べる
 71 | 					const duplicated_index = getIndexOfDuplicatedNode(dfa, new_node);
 72 | 					let index_to;
 73 | 					if (duplicated_index == -1) {
 74 | 						// 既存の状態と重複しない
 75 | 						dfa.push(new_node);
 76 | 						index_to = dfa.length - 1;
 77 | 						flg_changed = true;
 78 | 					}
 79 | 					else {
 80 | 						// 既存の状態と規則が重複する
 81 | 						// 新しいノードの追加は行わず、重複する既存ノードに対して辺を張る
 82 | 						index_to = duplicated_index;
 83 | 					}
 84 | 					// 辺を追加する
 85 | 					if (!edge.has(edge_label)) {
 86 | 						edge.set(edge_label, index_to);
 87 | 						// 新しい辺が追加された
 88 | 						flg_changed = true;
 89 | 						// DFAを更新
 90 | 						dfa[i] = {closure, edge};
 91 | 					}
 92 | 				}
 93 | 				i++;
 94 | 			}
 95 | 			i = 0;
 96 | 		}
 97 | 		this.lr_dfa = dfa;
 98 | 	}
 99 | 	/**
100 | 	 * LR(1)オートマトンの先読み部分をマージして、LALR(1)オートマトンを作る
101 | 	 */
102 | 	private mergeLA(): void {
103 | 		if (this.lalr_dfa !== undefined || this.lr_dfa === undefined) return;
104 | 		const base: Array<DFANode | null> = this.lr_dfa.slice(); // nullを許容する
105 | 		const merge_to: Map<number, number> = new Map<number, number>(); // マージ先への対応関係を保持する
106 | 
107 | 		for (let i = 0; i < base.length; i++) {
108 | 			if (base[i] === null) continue;
109 | 			for (let ii = i + 1; ii < base.length; ii++) {
110 | 				if (base[ii] === null) continue;
111 | 				// LR(0)アイテムセット部分が重複
112 | 				if (base[i]!.closure.isSameLR0(base[ii]!.closure)) {
113 | 					// ii番目の先読み部分をi番目にマージする
114 | 					// インデックス番号の大きい方が削除される
115 | 					// 辺情報は、削除された要素の持つ辺の対象もいずれマージされて消えるため操作しなくてよい
116 | 
117 | 					// 更新
118 | 					// Nodeに変更をかけるとLR(1)DFAの中身まで変化してしまうため新しいオブジェクトを生成する
119 | 					base[i] = {closure: base[i]!.closure.mergeLA(base[ii]!.closure)!, edge: base[i]!.edge};
120 | 					// ii番目を削除
121 | 					base[ii] = null;
122 | 					// マージ元->マージ先への対応関係を保持
123 | 					merge_to.set(ii, i);
124 | 				}
125 | 			}
126 | 		}
127 | 		// 削除した部分を配列から抜き取る
128 | 		const prev_length = base.length; // ノードをマージする前のノード総数
129 | 		const fix = new Array(prev_length); // (元のindex->現在のindex)の対応表を作る
130 | 		let d = 0; // ずれ
131 | 		// nullで埋めた部分を消すことによるindexの変化
132 | 		for (let i = 0; i < prev_length; i++) {
133 | 			if (base[i] === null) d += 1; // ノードが削除されていた場合、以降のインデックスを1つずらす
134 | 			else fix[i] = i - d;
135 | 		}
136 | 		// 配列からnull埋めした部分を削除したものを作る
137 | 		const shortened: Array<DFANode> = [];
138 | 		for (const node of base) {
139 | 			if (node !== null) shortened.push(node);
140 | 		}
141 | 		// fixのうち、ノードが削除された部分を正しい対応で埋める
142 | 		for (const [from, to] of merge_to) {
143 | 			let index = to;
144 | 			while (merge_to.has(index)) index = merge_to.get(index)!;
145 | 			if (index !== to) merge_to.set(to, index); // 対応表を更新しておく
146 | 			fix[from] = fix[index]; // toを繰り返し辿っているので未定義部分へのアクセスは発生しない
147 | 		}
148 | 
149 | 		const result: DFA = [];
150 | 		// インデックスの対応表をもとに辺情報を書き換える
151 | 		for (const node of shortened) {
152 | 			const new_edge = new Map<Token, number>();
153 | 			for (const [token, node_index] of node.edge) {
154 | 				new_edge.set(token, fix[node_index]);
155 | 			}
156 | 			result.push({closure: node.closure, edge: new_edge});
157 | 		}
158 | 		this.lalr_dfa = result;
159 | 	}
160 | 	/**
161 | 	 * 既存のClosureSetから新しい規則を生成し、対応する記号ごとにまとめる
162 | 	 * @param closureset
163 | 	 */
164 | 	private generateNewClosureSets(closureset: ClosureSet): Map<Token, ClosureSet> {
165 | 		const tmp: Map<Token, Array<ClosureItem>> = new Map<Token, Array<ClosureItem>>();
166 | 		// 規則から新しい規則を生成し、対応する記号ごとにまとめる
167 | 		for (const {rule_id, dot_index, lookaheads} of closureset.getArray()) {
168 | 			const pattern = this.grammardb.getRuleById(rule_id).pattern;
169 | 			if (dot_index == pattern.length) continue; // .が末尾にある場合はスキップ
170 | 			const new_ci = new ClosureItem(this.grammardb, rule_id, dot_index + 1, lookaheads);
171 | 			const edge_label: Token = pattern[dot_index];
172 | 
173 | 			let items: Array<ClosureItem>;
174 | 			if (tmp.has(edge_label)) {
175 | 				// 既に同じ記号が登録されている
176 | 				items = tmp.get(edge_label)!;
177 | 			}
178 | 			else {
179 | 				// 同じ記号が登録されていない
180 | 				items = [];
181 | 			}
182 | 			items.push(new_ci);
183 | 			tmp.set(edge_label, items);
184 | 		}
185 | 		// ClosureItemの配列からClosureSetに変換
186 | 		const result: Map<Token, ClosureSet> = new Map<Token, ClosureSet>();
187 | 		for (const [edge_label, items] of tmp) {
188 | 			result.set(edge_label, new ClosureSet(this.grammardb, items));
189 | 		}
190 | 		return result;
191 | 	}
192 | }
193 | 


--------------------------------------------------------------------------------
/src/parsergenerator/firstset.ts:
--------------------------------------------------------------------------------
  1 | import {GrammarDefinition} from "../def/language";
  2 | import {SYMBOL_EOF, Token} from "../def/token";
  3 | import {NullableSet} from "./nullableset";
  4 | import {SymbolDiscriminator} from "./symboldiscriminator";
  5 | 
  6 | type Constraint = Array<{ superset: Token, subset: Token }>;
  7 | 
  8 | /**
  9 |  * First集合
 10 |  */
 11 | export class FirstSet {
 12 | 	private first_map: Map<Token, Set<Token>>;
 13 | 	private nulls: NullableSet;
 14 | 
 15 | 	/**
 16 | 	 * @param {GrammarDefinition} grammar 構文規則
 17 | 	 * @param {SymbolDiscriminator} symbols 終端/非終端記号の判別に用いる分類器
 18 | 	 */
 19 | 	constructor(private grammar: GrammarDefinition, private symbols: SymbolDiscriminator) {
 20 | 		this.first_map = new Map<Token, Set<Token>>();
 21 | 		this.nulls = new NullableSet(this.grammar);
 22 | 		this.generateFirst();
 23 | 	}
 24 | 
 25 | 	/**
 26 | 	 * First集合を生成する
 27 | 	 */
 28 | 	private generateFirst() {
 29 | 		// Firstを導出
 30 | 		const first_result: Map<Token, Set<Token>> = new Map<Token, Set<Token>>();
 31 | 		// 初期化
 32 | 		// FIRST($) = {$} だけ手動で追加
 33 | 		first_result.set(SYMBOL_EOF, new Set<Token>([SYMBOL_EOF]));
 34 | 		// 終端記号Xに対してFirst(X)=X
 35 | 		const terminal_symbols = this.symbols.getTerminalSymbols();
 36 | 		terminal_symbols.forEach((value: Token) => {
 37 | 			first_result.set(value, new Set<Token>([value]));
 38 | 		});
 39 | 		// 非終端記号はFirst(Y)=∅で初期化
 40 | 		const nonterminal_symbols = this.symbols.getNonterminalSymbols();
 41 | 		nonterminal_symbols.forEach((value: Token) => {
 42 | 			first_result.set(value, new Set<Token>());
 43 | 		});
 44 | 
 45 | 		// 包含についての制約を生成
 46 | 		const constraint: Constraint = [];
 47 | 		for (const rule of this.grammar.rules) {
 48 | 			const sup: Token = rule.ltoken;
 49 | 			// 右辺の左から順に、non-nullableな記号が現れるまで制約に追加
 50 | 			// 最初のnon-nullableな記号は制約に含める
 51 | 			for (const sub of rule.pattern) {
 52 | 				if (sup != sub) {
 53 | 					constraint.push({superset: sup, subset: sub});
 54 | 				}
 55 | 				if (!this.nulls.isNullable(sub)) {
 56 | 					break;
 57 | 				}
 58 | 			}
 59 | 		}
 60 | 
 61 | 		// 制約解消
 62 | 		let flg_changed = true;
 63 | 		while (flg_changed) {
 64 | 			flg_changed = false;
 65 | 			for (const pair of constraint) {
 66 | 				const sup: Token = pair.superset;
 67 | 				const sub: Token = pair.subset;
 68 | 				const superset: Set<Token> = first_result.get(sup)!;
 69 | 				const subset: Set<Token> = first_result.get(sub)!;
 70 | 				subset.forEach((token: Token) => {
 71 | 					// subset内の要素がsupersetに含まれていない
 72 | 					if (!superset.has(token)) {
 73 | 						// subset内の要素をsupersetに入れる
 74 | 						superset.add(token);
 75 | 						flg_changed = true;
 76 | 					}
 77 | 				});
 78 | 				// First集合を更新
 79 | 				first_result.set(sup, superset);
 80 | 			}
 81 | 		}
 82 | 		this.first_map = first_result;
 83 | 	}
 84 | 
 85 | 	/**
 86 | 	 * 記号または記号列を与えて、その記号から最初に導かれうる非終端記号の集合を返す
 87 | 	 * @param {Token | Token[]} arg
 88 | 	 * @returns {Set<Token>}
 89 | 	 */
 90 | 	public get(arg: Token | Token[]): Set<Token> {
 91 | 		// 単一の記号の場合
 92 | 		if (!Array.isArray(arg)) {
 93 | 			if (!this.first_map.has(arg)) {
 94 | 				throw new Error(`invalid token found: ${arg}`);
 95 | 			}
 96 | 			return this.first_map.get(arg)!;
 97 | 		}
 98 | 		// 記号列の場合
 99 | 		const tokens: Token[] = arg;
100 | 
101 | 		// 不正な記号を発見
102 | 		for (const token of tokens) {
103 | 			if (!this.first_map.has(token)) {
104 | 				throw new Error(`invalid token found: ${token}`);
105 | 			}
106 | 		}
107 | 		const result: Set<Token> = new Set<Token>();
108 | 		for (const token of tokens) {
109 | 			const add = this.first_map.get(token)!; // トークン列の先頭から順にFirst集合を取得
110 | 			// 追加
111 | 			add.forEach((t: Token) => {
112 | 				if (!result.has(t)) {
113 | 					result.add(t);
114 | 				}
115 | 			});
116 | 			if (!this.nulls.isNullable(token)) {
117 | 				// 現在のトークン ∉ Nulls ならばここでストップ
118 | 				break;
119 | 			}
120 | 		}
121 | 		return result;
122 | 	}
123 | }
124 | 


--------------------------------------------------------------------------------
/src/parsergenerator/grammardb.ts:
--------------------------------------------------------------------------------
  1 | import {Language, GrammarDefinition, GrammarRule} from "../def/language";
  2 | import {SYMBOL_EOF, SYMBOL_SYNTAX, Token} from "../def/token";
  3 | import {FirstSet} from "./firstset";
  4 | import {SymbolDiscriminator} from "./symboldiscriminator";
  5 | 
  6 | /**
  7 |  * 言語定義から得られる、構文規則に関する情報を管理するクラス
  8 |  */
  9 | export class GrammarDB {
 10 | 	private grammar: GrammarDefinition;
 11 | 	private _start_symbol: Token;
 12 | 	private _first: FirstSet;
 13 | 	private _symbols: SymbolDiscriminator;
 14 | 	private tokenmap: Map<Token, number>;
 15 | 	private tokenid_counter: number;
 16 | 	private rulemap: Map<Token, Array<{ id: number, rule: GrammarRule }>>;
 17 | 
 18 | 	constructor(language: Language) {
 19 | 		this.grammar = language.grammar;
 20 | 		this._start_symbol = language.grammar.start_symbol;
 21 | 		this._symbols = new SymbolDiscriminator(this.grammar);
 22 | 		this._first = new FirstSet(this.grammar, this.symbols);
 23 | 
 24 | 		this.initTokenMap();
 25 | 		this.initDefMap();
 26 | 	}
 27 | 
 28 | 	/**
 29 | 	 * それぞれの記号にidを割り振り、Token->numberの対応を生成
 30 | 	 */
 31 | 	private initTokenMap() {
 32 | 		this.tokenid_counter = 0;
 33 | 		this.tokenmap = new Map<Token, number>();
 34 | 
 35 | 		// 入力の終端$の登録
 36 | 		this.tokenmap.set(SYMBOL_EOF, this.tokenid_counter++);
 37 | 		// 仮の開始記号S'の登録
 38 | 		this.tokenmap.set(SYMBOL_SYNTAX, this.tokenid_counter++);
 39 | 
 40 | 		// 左辺値の登録
 41 | 		for (const rule of this.grammar.rules) {
 42 | 			const ltoken = rule.ltoken;
 43 | 			// 構文規則の左辺に現れる記号は非終端記号
 44 | 			if (!this.tokenmap.has(ltoken)) {
 45 | 				this.tokenmap.set(ltoken, this.tokenid_counter++);
 46 | 			}
 47 | 		}
 48 | 		// 右辺値の登録
 49 | 		for (const rule of this.grammar.rules) {
 50 | 			for (const symbol of rule.pattern) {
 51 | 				if (!this.tokenmap.has(symbol)) {
 52 | 					// 非終端記号でない(=左辺値に現れない)場合、終端記号である
 53 | 					this.tokenmap.set(symbol, this.tokenid_counter++);
 54 | 				}
 55 | 			}
 56 | 		}
 57 | 	}
 58 | 
 59 | 	/**
 60 | 	 * ある記号を左辺とするような構文ルールとそのidの対応を生成
 61 | 	 */
 62 | 	private initDefMap() {
 63 | 		this.rulemap = new Map<Token, Array<{ id: number, rule: GrammarRule }>>();
 64 | 		for (let i = 0; i < this.grammar.rules.length; i++) {
 65 | 			let tmp: Array<{ id: number, rule: GrammarRule }>;
 66 | 			if (this.rulemap.has(this.grammar.rules[i].ltoken)) {
 67 | 				tmp = this.rulemap.get(this.grammar.rules[i].ltoken)!;
 68 | 			}
 69 | 			else {
 70 | 				tmp = [];
 71 | 			}
 72 | 			tmp.push({id: i, rule: this.grammar.rules[i]});
 73 | 			this.rulemap.set(this.grammar.rules[i].ltoken, tmp);
 74 | 		}
 75 | 	}
 76 | 
 77 | 	/**
 78 | 	 * 開始記号を得る
 79 | 	 */
 80 | 	get start_symbol(): Token {
 81 | 		return this._start_symbol;
 82 | 	}
 83 | 	/**
 84 | 	 * First集合を得る
 85 | 	 * @returns {FirstSet}
 86 | 	 */
 87 | 	get first(): FirstSet {
 88 | 		return this._first;
 89 | 	}
 90 | 	/**
 91 | 	 * 終端/非終端記号分類器を得る
 92 | 	 * @returns {SymbolDiscriminator}
 93 | 	 */
 94 | 	get symbols(): SymbolDiscriminator {
 95 | 		return this._symbols;
 96 | 	}
 97 | 	/**
 98 | 	 * 構文規則がいくつあるかを返す ただし-1番の規則は含めない
 99 | 	 */
100 | 	get rule_size(): number {
101 | 		return this.grammar.rules.length;
102 | 	}
103 | 
104 | 	/**
105 | 	 * 与えられたidの規則が存在するかどうかを調べる
106 | 	 * @param {number} id
107 | 	 * @returns {boolean}
108 | 	 */
109 | 	public hasRuleId(id: number): boolean {
110 | 		return id >= -1 && id < this.rule_size;
111 | 	}
112 | 	/**
113 | 	 * 非終端記号xに対し、それが左辺として対応する定義を得る
114 | 	 *
115 | 	 * 対応する定義が存在しない場合は空の配列を返す
116 | 	 * @param x
117 | 	 */
118 | 	public findRules(x: Token): Array<{ id: number, rule: GrammarRule }> {
119 | 		if (this.rulemap.has(x)) {
120 | 			return this.rulemap.get(x)!;
121 | 		}
122 | 		return [];
123 | 	}
124 | 	/**
125 | 	 * 規則idに対応した規則を返す
126 | 	 *
127 | 	 * -1が与えられた時は S' -> S $の規則を返す
128 | 	 * @param id
129 | 	 */
130 | 	public getRuleById(id: number): GrammarRule {
131 | 		if (id == -1) {
132 | 			return {ltoken: SYMBOL_SYNTAX, pattern: [this.start_symbol]};
133 | 			// return {ltoken: SYMBOL_SYNTAX, pattern: [this.start_symbol, SYMBOL_EOF]};
134 | 		}
135 | 		else if (id >= 0 && id < this.grammar.rules.length) return this.grammar.rules[id];
136 | 		throw new Error("grammar id out of range");
137 | 	}
138 | 	/**
139 | 	 * [[Token]]を与えると一意なidを返す
140 | 	 * @param {Token} token
141 | 	 * @returns {number}
142 | 	 */
143 | 	public getTokenId(token: Token): number {
144 | 		if (!this.tokenmap.has(token)) {
145 | 			// this.tokenmap.set(token, this.tokenid_counter++);
146 | 			// return -1;
147 | 			throw new Error(`invalid token ${token}`);
148 | 		}
149 | 		return this.tokenmap.get(token)!;
150 | 	}
151 | }
152 | 


--------------------------------------------------------------------------------
/src/parsergenerator/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./closureset";
2 | export * from "./closureitem";
3 | export * from "./dfagenerator";
4 | export * from "./firstset";
5 | export * from "./nullableset";
6 | export * from "./parsergenerator";
7 | export * from "./symboldiscriminator";
8 | export * from "./grammardb";
9 | 


--------------------------------------------------------------------------------
/src/parsergenerator/nullableset.ts:
--------------------------------------------------------------------------------
 1 | import {GrammarDefinition} from "../def/language";
 2 | import {Token} from "../def/token";
 3 | 
 4 | /**
 5 |  * ある非終端記号から空列が導かれうるかどうかを判定する
 6 |  */
 7 | export class NullableSet {
 8 | 	private nulls: Set<Token>;
 9 | 	/**
10 | 	 * @param {GrammarDefinition} grammar 構文規則
11 | 	 */
12 | 	constructor(private grammar: GrammarDefinition) {
13 | 		this.generateNulls();
14 | 	}
15 | 	private generateNulls() {
16 | 		// 制約条件を導出するために、
17 | 		// 空列になりうる記号の集合nullsを導出
18 | 		this.nulls = new Set<Token>();
19 | 		for (const rule of this.grammar.rules) {
20 | 			// 右辺の記号の数が0の規則を持つ記号は空列になりうる
21 | 			if (rule.pattern.length == 0) {
22 | 				this.nulls.add(rule.ltoken);
23 | 			}
24 | 		}
25 | 
26 | 		// 変更が起きなくなるまでループする
27 | 		let flg_changed: boolean = true;
28 | 		while (flg_changed) {
29 | 			flg_changed = false;
30 | 			for (const rule of this.grammar.rules) {
31 | 				// 既にnullsに含まれていればスキップ
32 | 				if (this.isNullable(rule.ltoken)) continue;
33 | 
34 | 				let flg_nulls = true;
35 | 				// 右辺に含まれる記号がすべてnullableの場合はその左辺はnullable
36 | 				for (const token of rule.pattern) {
37 | 					if (!this.isNullable(token)) {
38 | 						// 一つでもnullableでない記号があるならnon-nullable
39 | 						flg_nulls = false;
40 | 						break;
41 | 					}
42 | 				}
43 | 				if (flg_nulls) {
44 | 					flg_changed = true;
45 | 					this.nulls.add(rule.ltoken);
46 | 				}
47 | 			}
48 | 		}
49 | 	}
50 | 	/**
51 | 	 * 与えられた[[Token]]がNullableかどうかを調べる
52 | 	 * @param {Token} token
53 | 	 * @returns {boolean}
54 | 	 */
55 | 	public isNullable(token: Token) {
56 | 		return this.nulls.has(token);
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/src/parsergenerator/parsergenerator.ts:
--------------------------------------------------------------------------------
  1 | import {Language} from "../def/language";
  2 | import {ParsingTable, AcceptOperation, ConflictedOperation, GotoOperation, ParsingOperation, ReduceOperation, ShiftOperation} from "../def/parsingtable";
  3 | import {SYMBOL_EOF, Token} from "../def/token";
  4 | import {ParserFactory} from "../parser/factory";
  5 | import {Parser} from "../parser/parser";
  6 | import {DFA, DFAGenerator} from "./dfagenerator";
  7 | import {GrammarDB} from "./grammardb";
  8 | 
  9 | /**
 10 |  * 言語定義から構文解析表および構文解析器を生成するパーサジェネレータ
 11 |  */
 12 | export class ParserGenerator {
 13 | 	private parsing_table: ParsingTable;
 14 | 	private table_type: "LR1" | "LALR1" | "CONFLICTED";
 15 | 	private grammardb: GrammarDB;
 16 | 	private dfa_generator: DFAGenerator;
 17 | 
 18 | 	/**
 19 | 	 * @param {Language} language 言語定義
 20 | 	 */
 21 | 	constructor(private language: Language) {
 22 | 		this.grammardb = new GrammarDB(this.language);
 23 | 		this.dfa_generator = new DFAGenerator(this.grammardb);
 24 | 		this.init();
 25 | 	}
 26 | 
 27 | 	/**
 28 | 	 * 構文解析表の生成
 29 | 	 */
 30 | 	private init() {
 31 | 		const lalr_result = this.generateParsingTable(this.dfa_generator.getLALR1DFA());
 32 | 		if (lalr_result.success) {
 33 | 			this.parsing_table = lalr_result.table;
 34 | 			this.table_type = "LALR1";
 35 | 			return;
 36 | 		}
 37 | 		// LALR(1)構文解析表の生成に失敗
 38 | 		// LR(1)構文解析表の生成を試みる
 39 | 		console.error("LALR parsing conflict found. use LR(1) table.");
 40 | 		const lr_result = this.generateParsingTable(this.dfa_generator.getLR1DFA());
 41 | 		this.parsing_table = lr_result.table;
 42 | 		this.table_type = "LR1";
 43 | 		if (!lr_result.success) {
 44 | 			// LR(1)構文解析表の生成に失敗
 45 | 			this.table_type = "CONFLICTED";
 46 | 			console.error("LR(1) parsing conflict found. use LR(1) conflicted table.");
 47 | 		}
 48 | 	}
 49 | 
 50 | 	/**
 51 | 	 * 構文解析器を得る
 52 | 	 * @returns {Parser}
 53 | 	 */
 54 | 	public getParser(): Parser {
 55 | 		return ParserFactory.create(this.language, this.parsing_table);
 56 | 	}
 57 | 
 58 | 	/**
 59 | 	 * 構文解析表を得る
 60 | 	 * @returns {ParsingTable}
 61 | 	 */
 62 | 	public getParsingTable(): ParsingTable {
 63 | 		return this.parsing_table;
 64 | 	}
 65 | 
 66 | 	/**
 67 | 	 * 生成された構文解析表に衝突が発生しているかどうかを調べる
 68 | 	 * @returns {boolean}
 69 | 	 */
 70 | 	public isConflicted(): boolean {
 71 | 		return this.table_type === "CONFLICTED";
 72 | 	}
 73 | 
 74 | 	/**
 75 | 	 * 構文解析表の種類を得る
 76 | 	 *
 77 | 	 * パーサジェネレータはまずLALR(1)構文解析表を生成し、LALR(1)構文解析表にコンフリクトを検知した場合はLR(1)構文解析表を使用する
 78 | 	 * @returns {"LR1" | "LALR1" | "CONFLICTED"}
 79 | 	 */
 80 | 	public getTableType(): "LR1" | "LALR1" | "CONFLICTED" {
 81 | 		return this.table_type;
 82 | 	}
 83 | 
 84 | 	/**
 85 | 	 * DFAから構文解析表を構築する
 86 | 	 * @param {DFA} dfa
 87 | 	 */
 88 | 	private generateParsingTable(dfa: DFA): { table: ParsingTable, success: boolean } {
 89 | 		const parsing_table: ParsingTable = [];
 90 | 		let flg_conflicted = false;
 91 | 
 92 | 		for (const node of dfa) {
 93 | 			const table_row = new Map<Token, ParsingOperation>();
 94 | 			// 辺をもとにshiftとgotoオペレーションを追加
 95 | 			for (const [label, to] of node.edge) {
 96 | 				if (this.grammardb.symbols.isTerminalSymbol(label)) {
 97 | 					// ラベルが終端記号の場合
 98 | 					// shiftオペレーションを追加
 99 | 					const operation: ShiftOperation = {type: "shift", to};
100 | 					table_row.set(label, operation);
101 | 				}
102 | 				else if (this.grammardb.symbols.isNonterminalSymbol(label)) {
103 | 					// ラベルが非終端記号の場合
104 | 					// gotoオペレーションを追加
105 | 					const operation: GotoOperation = {type: "goto", to};
106 | 					table_row.set(label, operation);
107 | 				}
108 | 			}
109 | 
110 | 			// Closureをもとにacceptとreduceオペレーションを追加していく
111 | 			for (const item of node.closure.getArray()) {
112 | 				// 規則末尾が.でないならスキップ
113 | 				// if(item.pattern.getRuleById(item.pattern.size-1) != SYMBOL_DOT) return;
114 | 				if (item.dot_index != this.grammardb.getRuleById(item.rule_id).pattern.length) continue;
115 | 				if (item.rule_id == -1) {
116 | 					// acceptオペレーション
117 | 					// この規則を読み終わると解析終了
118 | 					// $をラベルにacceptオペレーションを追加
119 | 					const operation: AcceptOperation = {type: "accept"};
120 | 					table_row.set(SYMBOL_EOF, operation);
121 | 					continue;
122 | 				}
123 | 				for (const label of item.lookaheads) {
124 | 					const operation: ReduceOperation = {type: "reduce", grammar_id: item.rule_id};
125 | 					// 既に同じ記号でオペレーションが登録されていないか確認
126 | 
127 | 					if (table_row.has(label)) {
128 | 						// コンフリクトが発生
129 | 						flg_conflicted = true; // 構文解析に失敗
130 | 						const existing_operation = table_row.get(label)!; // 上で.has(label)のチェックを行っているためnon-nullable
131 | 						const conflicted_operation: ConflictedOperation = {type: "conflict", shift_to: [], reduce_grammar: []};
132 | 						if (existing_operation.type == "shift") {
133 | 							// shift/reduce コンフリクト
134 | 							conflicted_operation.shift_to = [existing_operation.to];
135 | 							conflicted_operation.reduce_grammar = [operation.grammar_id];
136 | 						}
137 | 						else if (existing_operation.type == "reduce") {
138 | 							// reduce/reduce コンフリクト
139 | 							conflicted_operation.shift_to = [];
140 | 							conflicted_operation.reduce_grammar = [existing_operation.grammar_id, operation.grammar_id];
141 | 						}
142 | 						else if (existing_operation.type == "conflict") {
143 | 							// もっとやばい衝突
144 | 							conflicted_operation.shift_to = existing_operation.shift_to;
145 | 							conflicted_operation.reduce_grammar = existing_operation.reduce_grammar.concat([operation.grammar_id]);
146 | 						}
147 | 						// とりあえず衝突したオペレーションを登録しておく
148 | 						table_row.set(label, conflicted_operation);
149 | 					}
150 | 					else {
151 | 						// 衝突しないのでreduceオペレーションを追加
152 | 						table_row.set(label, operation);
153 | 					}
154 | 				}
155 | 			}
156 | 			parsing_table.push(table_row);
157 | 		}
158 | 		return {table: parsing_table, success: !flg_conflicted};
159 | 	}
160 | }
161 | 


--------------------------------------------------------------------------------
/src/parsergenerator/symboldiscriminator.ts:
--------------------------------------------------------------------------------
 1 | import {GrammarDefinition} from "../def/language";
 2 | import {Token} from "../def/token";
 3 | 
 4 | /**
 5 |  * 終端/非終端記号の判別を行う
 6 |  */
 7 | export class SymbolDiscriminator {
 8 | 	private terminal_symbols: Set<Token>;
 9 | 	private nonterminal_symbols: Set<Token>;
10 | 	constructor(grammar: GrammarDefinition) {
11 | 		this.terminal_symbols = new Set<Token>();
12 | 		this.nonterminal_symbols = new Set<Token>();
13 | 
14 | 		// 左辺値の登録
15 | 		for (const rule of grammar.rules) {
16 | 			const symbol = rule.ltoken;
17 | 			// 構文規則の左辺に現れる記号は非終端記号
18 | 			this.nonterminal_symbols.add(symbol);
19 | 		}
20 | 		// 右辺値の登録
21 | 		for (const rule of grammar.rules) {
22 | 			for (const symbol of rule.pattern) {
23 | 				if (!this.nonterminal_symbols.has(symbol)) {
24 | 					// 非終端記号でない(=左辺値に現れない)場合、終端記号である
25 | 					this.terminal_symbols.add(symbol);
26 | 				}
27 | 			}
28 | 		}
29 | 	}
30 | 	/**
31 | 	 * 終端記号の集合をSetで得る
32 | 	 * @param {boolean} prevent_copy trueを与えるとSetをコピーせず返す
33 | 	 * 結果に変更が加えられないと保証される場合に用いる
34 | 	 * @returns {Set<Token>}
35 | 	 */
36 | 	public getTerminalSymbols(prevent_copy: boolean = false): Set<Token> {
37 | 		if (prevent_copy) return this.terminal_symbols;
38 | 		// コピーを返す
39 | 		return new Set(this.terminal_symbols);
40 | 	}
41 | 	/**
42 | 	 * 非終端記号の集合をSetで得る
43 | 	 * @param {boolean} prevent_copy trueを与えるとSetをコピーせず返す
44 | 	 * 結果に変更が加えられないと保証される場合に用いる
45 | 	 * @returns {Set<Token>}
46 | 	 */
47 | 	public getNonterminalSymbols(prevent_copy: boolean = false): Set<Token> {
48 | 		if (prevent_copy) return this.nonterminal_symbols;
49 | 		// コピーを返す
50 | 		return new Set(this.nonterminal_symbols);
51 | 	}
52 | 	/**
53 | 	 * 与えられた記号が終端記号かどうかを調べる
54 | 	 * @param {Token} symbol
55 | 	 * @returns {boolean}
56 | 	 */
57 | 	public isTerminalSymbol(symbol: Token): boolean {
58 | 		return this.terminal_symbols.has(symbol);
59 | 	}
60 | 	/**
61 | 	 * 与えられた記号が非終端記号かどうかを調べる
62 | 	 * @param {Token} symbol
63 | 	 * @returns {boolean}
64 | 	 */
65 | 	public isNonterminalSymbol(symbol: Token): boolean {
66 | 		return this.nonterminal_symbols.has(symbol);
67 | 	}
68 | }
69 | 


--------------------------------------------------------------------------------
/src/precompiler/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./precompiler";
2 | export * from "./ruleparser";
3 | 


--------------------------------------------------------------------------------
/src/precompiler/precompiler.ts:
--------------------------------------------------------------------------------
 1 | import {Language} from "../def/language";
 2 | import {SYMBOL_EOF, Token} from "../def/token";
 3 | import {ParserGenerator} from "../parsergenerator/parsergenerator";
 4 | import {language_parser} from "./ruleparser";
 5 | import {ParsingOperation} from "../def/parsingtable";
 6 | 
 7 | /**
 8 |  * 予め構文解析器を生成しておいて利用するためのソースコードを生成する
 9 |  */
10 | export class PreCompiler {
11 | 	/**
12 | 	 * @param import_path パーサジェネレータをimportするためのディレクトリパス
13 | 	 */
14 | 	constructor(private import_path: string = "lavriapg") {
15 | 		if (import_path[import_path.length - 1] != "/") this.import_path += "/";
16 | 	}
17 | 	/**
18 | 	 * 構文ファイルを受け取り、それを処理できるパーサを構築するためのソースコードを返す
19 | 	 * @param {string} input 言語定義文法によって記述された、解析対象となる言語
20 | 	 * @returns {string} 生成されたパーサのソースコード
21 | 	 */
22 | 	public exec(input: string): string {
23 | 		const language: Language = language_parser.parse(input);
24 | 		const parsing_table = new ParserGenerator(language).getParsingTable();
25 | 		let result = "";
26 | 
27 | 		result += `import {Token, SYMBOL_EOF} from "${this.import_path}def/token";
28 | import {Language} from "${this.import_path}def/language";
29 | import {ParsingOperation, ParsingTable} from "${this.import_path}def/parsingtable";
30 | import {Parser} from "${this.import_path}parser/parser";
31 | import {ParserFactory} from "${this.import_path}parser/factory";
32 | 
33 | export const language: Language = {
34 | 	lex: {
35 | 		rules: [
36 | ${language.lex.rules.map(({token, pattern}) => {
37 | 			return `\t\t\t{token: ${token === null ? "null" : `"${token}"`}, ` +
38 | 				`pattern: ${pattern instanceof RegExp ? pattern : `"${pattern}"`}}`;
39 | 		}).join(",\n")}
40 | 		]
41 | 	},
42 | 	grammar: {
43 | 		rules: [
44 | ${language.grammar.rules.map(({ltoken, pattern}) => `\t\t\t{
45 | 				ltoken: "${ltoken as string}",
46 | 				pattern: [${pattern.map((t) => `"${t as string}"`).join(", ")}]
47 | 			}`).join(",\n")}
48 | 		],
49 | 		start_symbol: "${language.grammar.start_symbol as string}"
50 | 	}
51 | };
52 | 
53 | export const parsing_table: ParsingTable = [
54 | ${parsing_table.map((row: Map<Token, ParsingOperation>) => `\tnew Map<Token, ParsingOperation>([
55 | ${(() => {
56 | 			let line = "";
57 | 			for (const [key, value] of row) {
58 | 				line += `\t\t[${key === SYMBOL_EOF ? "SYMBOL_EOF" : `"${key as string}"`}, ${JSON.stringify(value)}],\n`;
59 | 			}
60 | 			return line.slice(0, -2);
61 | 		})()}`).join("\n\t]),\n")}
62 | 	])
63 | ];
64 | 
65 | export const parser: Parser = ParserFactory.create(language, parsing_table);
66 | `;
67 | 		return result;
68 | 	}
69 | }
70 | 


--------------------------------------------------------------------------------
/src/precompiler/ruleparser.ts:
--------------------------------------------------------------------------------
  1 | import {LexDefinition, Language, GrammarDefinition, LexStateLabel, LexState, LexCallback, GrammarCallback} from "../def/language";
  2 | import {ParsingOperation, ParsingTable} from "../def/parsingtable";
  3 | import {SYMBOL_EOF, Token} from "../def/token";
  4 | import {Parser} from "../parser/parser";
  5 | import {ParserGenerator} from "../parsergenerator/parsergenerator";
  6 | 
  7 | const lex: LexDefinition = {
  8 | 	rules: [
  9 | 		{token: "EXCLAMATION", pattern: "!"},
 10 | 		{token: "VBAR", pattern: "|"},
 11 | 		{token: "DOLLAR", pattern: "$"},
 12 | 		{token: "COLON", pattern: ":"},
 13 | 		{token: "SEMICOLON", pattern: ";"},
 14 | 		{token: "LT", pattern: "<"},
 15 | 		{token: "GT", pattern: ">"},
 16 | 		{token: "COMMA", pattern: ","},
 17 | 		{token: "LEX_BEGIN", pattern: "#lex_begin"},
 18 | 		{token: "LEX_END", pattern: "#lex_end"},
 19 | 		{token: "LEX_DEFAULT", pattern: "#lex_default"},
 20 | 		{token: "START", pattern: "#start"},
 21 | 		{token: "EXTEND", pattern: "#extend"},
 22 | 		{token: "BEGIN", pattern: "#begin"},
 23 | 		{token: "END", pattern: "#end"},
 24 | 		{token: "DEFAULT", pattern: "#default"},
 25 | 		{token: "LABEL", pattern: /[a-zA-Z_][a-zA-Z0-9_]*/},
 26 | 		{
 27 | 			token: "REGEXP", pattern: /\/.*\/[gimuy]*/,
 28 | 			callback: (v) => {
 29 | 				const tmp = v.split("/");
 30 | 				const flags = tmp[tmp.length - 1];
 31 | 				const p = v.slice(1, -1 - flags.length);
 32 | 				return ["REGEXP", new RegExp(p, flags)];
 33 | 			}
 34 | 		},
 35 | 		{token: "STRING", pattern: /".*"/, callback: (v) => ["STRING", v.slice(1, -1)]},
 36 | 		{token: "STRING", pattern: /'.*'/, callback: (v) => ["STRING", v.slice(1, -1)]},
 37 | 		{
 38 | 			token: "START_BLOCK", pattern: /%*{+/,
 39 | 			callback: (value, token, lex) => {
 40 | 				const match = /(%*)({+)/.exec(value)!;
 41 | 				const end_delimiter = "}".repeat(match[2].length) + match[1]!;
 42 | 				lex.callState("callback");
 43 | 				lex.addRule("body_block", {token: "BODY_BLOCK", pattern: new RegExp(`(?:.|\\s)*?(?<!})(?=${end_delimiter})(?!${end_delimiter}%+)(?!${end_delimiter}}+)`), states: ["callback"]});
 44 | 				lex.addRule("end_block", {
 45 | 					token: "END_BLOCK", pattern: end_delimiter, states: ["callback"],
 46 | 					callback: (value, token, lex) => {
 47 | 						lex.returnState();
 48 | 						lex.removeRule("body_block");
 49 | 						lex.removeRule("end_block");
 50 | 					}
 51 | 				});
 52 | 			}
 53 | 		},
 54 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
 55 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
 56 | 		{token: "INVALID", pattern: /./}
 57 | 	]
 58 | };
 59 | 
 60 | const makeLexCallback = (body: string): LexCallback => {
 61 | 	return new Function("value", "token", "lex", body) as LexCallback;
 62 | };
 63 | 
 64 | const makeGrammarCallback = (body: string): GrammarCallback => {
 65 | 	return new Function("children", "token", "lexer", body) as GrammarCallback;
 66 | };
 67 | 
 68 | const grammar: GrammarDefinition = {
 69 | 	rules: [
 70 | 		{
 71 | 			ltoken: "LANGUAGE",
 72 | 			pattern: ["LEX_OPTIONS", "LEX", "EX_CALLBACKS", "GRAMMAR"],
 73 | 			callback: (c) => {
 74 | 				let start_symbol = c[3].start_symbol;
 75 | 				// 開始記号の指定がない場合、最初の規則に設定]
 76 | 				if (start_symbol === null) {
 77 | 					if (c[3].sect.length > 0) start_symbol = c[3].sect[0].ltoken;
 78 | 					else start_symbol = "";
 79 | 				}
 80 | 				const lex: LexDefinition = {rules: c[1]};
 81 | 				if (c[0].callbacks !== undefined) {
 82 | 					for (const callback of c[0].callbacks) {
 83 | 						switch (callback.type) {
 84 | 							case "#lex_begin":
 85 | 								lex.begin_callback = callback.callback;
 86 | 								break;
 87 | 							case "#lex_end":
 88 | 								lex.end_callback = callback.callback;
 89 | 								break;
 90 | 							case "#lex_default":
 91 | 								lex.default_callback = callback.callback;
 92 | 								break;
 93 | 						}
 94 | 					}
 95 | 				}
 96 | 				if (c[0].start_state !== undefined) {
 97 | 					lex.start_state = c[0].start_state;
 98 | 				}
 99 | 				if (c[0].states.length > 0) {
100 | 					lex.states = c[0].states;
101 | 				}
102 | 				const grammar: GrammarDefinition = {rules: c[3].grammar, start_symbol};
103 | 				if (c[2] !== undefined) {
104 | 					for (const callback of c[2]) {
105 | 						switch (callback.type) {
106 | 							case "#begin":
107 | 								grammar.begin_callback = callback.callback;
108 | 								break;
109 | 							case "#end":
110 | 								grammar.end_callback = callback.callback;
111 | 								break;
112 | 							case "#default":
113 | 								grammar.default_callback = callback.callback;
114 | 								break;
115 | 						}
116 | 					}
117 | 				}
118 | 				return {lex, grammar};
119 | 			}
120 | 		},
121 | 		{
122 | 			ltoken: "LANGUAGE",
123 | 			pattern: ["LEX_OPTIONS", "LEX", "GRAMMAR"],
124 | 			callback: (c) => {
125 | 				let start_symbol = c[2].start_symbol;
126 | 				// 開始記号の指定がない場合、最初の規則に設定]
127 | 				if (start_symbol === null) {
128 | 					if (c[2].sect.length > 0) start_symbol = c[2].sect[0].ltoken;
129 | 					else start_symbol = "";
130 | 				}
131 | 				const lex: LexDefinition = {rules: c[1]};
132 | 				if (c[0].callbacks !== undefined) {
133 | 					for (const callback of c[0].callbacks) {
134 | 						switch (callback.type) {
135 | 							case "#lex_begin":
136 | 								lex.begin_callback = callback.callback;
137 | 								break;
138 | 							case "#lex_end":
139 | 								lex.end_callback = callback.callback;
140 | 								break;
141 | 							case "#lex_default":
142 | 								lex.default_callback = callback.callback;
143 | 								break;
144 | 						}
145 | 					}
146 | 				}
147 | 				if (c[0].start_state !== undefined) {
148 | 					lex.start_state = c[0].start_state;
149 | 				}
150 | 				if (c[0].states.length > 0) {
151 | 					lex.states = c[0].states;
152 | 				}
153 | 				return {lex, grammar: {rules: c[2].grammar, start_symbol: start_symbol}};
154 | 			}
155 | 		},
156 | 		{
157 | 			ltoken: "LEX_OPTIONS",
158 | 			pattern: ["OPTIONAL_LEX_EX_CALLBACKS", "LEX_STATES"],
159 | 			callback: (c) => {
160 | 				const states: Array<LexState> = [];
161 | 				const states_set = new Set<LexStateLabel>();
162 | 				for (const inherit of c[1].inheritance) {
163 | 					for (const sub_state of inherit.sub) {
164 | 						if (states_set.has(inherit.sub)) {
165 | 							// 既に登録されている場合、一つのstateが複数のstateを継承することはできない
166 | 							continue;
167 | 						}
168 | 						states.push({label: sub_state, inheritance: inherit.base});
169 | 						states_set.add(sub_state);
170 | 					}
171 | 				}
172 | 				return {callbacks: c[0], start_state: c[1].start_state, states};
173 | 			}
174 | 		},
175 | 		{
176 | 			ltoken: "LEX_STATES",
177 | 			pattern: ["LEX_STATES", "LEXSTATE_DEFINITIONS"],
178 | 			callback: ([c1, c2]) => {
179 | 				if (c2.type === "#start") {
180 | 					c1.start_state = c2.value;
181 | 				}
182 | 				else if (c2.type === "#extend") {
183 | 					c1.inheritance.push(c2.value);
184 | 				}
185 | 				return c1;
186 | 			}
187 | 		},
188 | 		{
189 | 			ltoken: "LEX_STATES",
190 | 			pattern: [],
191 | 			callback: () => ({start_state: undefined, inheritance: []})
192 | 		},
193 | 		{
194 | 			ltoken: "LEXSTATE_DEFINITIONS",
195 | 			pattern: ["STARTSTATE"],
196 | 			callback: ([c]) => ({type: "#start", value: c})
197 | 		},
198 | 		{
199 | 			ltoken: "LEXSTATE_DEFINITIONS",
200 | 			pattern: ["STATE_EXTEND"],
201 | 			callback: ([c]) => ({type: "#extend", value: c})
202 | 		},
203 | 		{
204 | 			ltoken: "STARTSTATE",
205 | 			pattern: ["START", "LEXSTATE"],
206 | 			callback: (c) => c[1]
207 | 		},
208 | 		{
209 | 			ltoken: "STATE_EXTEND",
210 | 			pattern: ["EXTEND", "MULTIPLE_LEXSTATE", "LEXSTATE"],
211 | 			callback: (c) => ({sub: c[1], base: c[2]})
212 | 		},
213 | 		{
214 | 			ltoken: "OPTIONAL_LEX_EX_CALLBACKS",
215 | 			pattern: ["LEX_EX_CALLBACKS"]
216 | 		},
217 | 		{
218 | 			ltoken: "OPTIONAL_LEX_EX_CALLBACKS",
219 | 			pattern: []
220 | 		},
221 | 		{
222 | 			ltoken: "LEX_EX_CALLBACKS",
223 | 			pattern: ["LEX_EX_CALLBACKS", "LEX_EX_CALLBACK"],
224 | 			callback: (c) => c[0].concat([c[1]])
225 | 		},
226 | 		{
227 | 			ltoken: "LEX_EX_CALLBACKS",
228 | 			pattern: ["LEX_EX_CALLBACK"],
229 | 			callback: (c) => [c[0]]
230 | 		},
231 | 		{
232 | 			ltoken: "LEX_EX_CALLBACK",
233 | 			pattern: ["LEX_EX_CALLBACK_LABEL", "BLOCK"],
234 | 			callback: (c) => ({type: c[0], callback: makeLexCallback(c[1])})
235 | 		},
236 | 		{
237 | 			ltoken: "LEX_EX_CALLBACK_LABEL",
238 | 			pattern: ["LEX_BEGIN"]
239 | 		},
240 | 		{
241 | 			ltoken: "LEX_EX_CALLBACK_LABEL",
242 | 			pattern: ["LEX_END"]
243 | 		},
244 | 		{
245 | 			ltoken: "LEX_EX_CALLBACK_LABEL",
246 | 			pattern: ["LEX_DEFAULT"]
247 | 		},
248 | 		{
249 | 			ltoken: "LEX",
250 | 			pattern: ["LEX", "LEXSECT"],
251 | 			callback: (c) => c[0].concat([c[1]])
252 | 		},
253 | 		{
254 | 			ltoken: "LEX",
255 | 			pattern: ["LEXSECT"],
256 | 			callback: (c) => [c[0]]
257 | 		},
258 | 		{
259 | 			ltoken: "LEXSECT",
260 | 			pattern: ["MULTIPLE_LEXSTATE", "LEXLABEL", "LEXDEF", "LEXCALLBACK"],
261 | 			callback: (c) => (c[3] === undefined ? {token: c[1], pattern: c[2], states: c[0]} : {token: c[1], pattern: c[2], states: c[0], callback: makeLexCallback(c[3])})
262 | 		},
263 | 		{
264 | 			ltoken: "LEXSECT",
265 | 			pattern: ["LEXLABEL", "LEXDEF", "LEXCALLBACK"],
266 | 			callback: (c) => (c[2] === undefined ? {token: c[0], pattern: c[1]} : {token: c[0], pattern: c[1], callback: makeLexCallback(c[2])})
267 | 		},
268 | 		{
269 | 			ltoken: "LEXLABEL",
270 | 			pattern: ["LABEL"]
271 | 		},
272 | 		{
273 | 			ltoken: "LEXLABEL",
274 | 			pattern: ["EXCLAMATION"],
275 | 			callback: () => null
276 | 		},
277 | 		{
278 | 			ltoken: "LEXLABEL",
279 | 			pattern: ["EXCLAMATION", "LABEL"],
280 | 			callback: () => null
281 | 		},
282 | 		{
283 | 			ltoken: "LEXDEF",
284 | 			pattern: ["STRING"]
285 | 		},
286 | 		{
287 | 			ltoken: "LEXDEF",
288 | 			pattern: ["REGEXP"]
289 | 		},
290 | 		{
291 | 			ltoken: "MULTIPLE_LEXSTATE",
292 | 			pattern: ["LT", "LEXSTATE_LIST", "GT"],
293 | 			callback: (c) => c[1]
294 | 		},
295 | 		{
296 | 			ltoken: "LEXSTATE_LIST",
297 | 			pattern: ["LABEL", "COMMA", "LEXSTATE_LIST"],
298 | 			callback: (c) => [c[0], ...c[2]]
299 | 		},
300 | 		{
301 | 			ltoken: "LEXSTATE_LIST",
302 | 			pattern: ["LABEL"],
303 | 			callback: (c) => [c[0]]
304 | 		},
305 | 		{
306 | 			ltoken: "LEXSTATE",
307 | 			pattern: ["LT", "LABEL", "GT"],
308 | 			callback: (c) => c[1]
309 | 		},
310 | 		{
311 | 			ltoken: "LEXCALLBACK",
312 | 			pattern: ["BLOCK"]
313 | 		},
314 | 		{
315 | 			ltoken: "LEXCALLBACK",
316 | 			pattern: []
317 | 		},
318 | 		{
319 | 			ltoken: "EX_CALLBACKS",
320 | 			pattern: ["EX_CALLBACKS", "EX_CALLBACK"],
321 | 			callback: (c) => c[0].concat([c[1]])
322 | 		},
323 | 		{
324 | 			ltoken: "EX_CALLBACKS",
325 | 			pattern: ["EX_CALLBACK"],
326 | 			callback: (c) => [c[0]]
327 | 		},
328 | 		{
329 | 			ltoken: "EX_CALLBACK",
330 | 			pattern: ["EX_CALLBACK_LABEL", "BLOCK"],
331 | 			callback: (c) => ({type: c[0], callback: makeGrammarCallback(c[1])})
332 | 		},
333 | 		{
334 | 			ltoken: "EX_CALLBACK_LABEL",
335 | 			pattern: ["BEGIN"]
336 | 		},
337 | 		{
338 | 			ltoken: "EX_CALLBACK_LABEL",
339 | 			pattern: ["END"]
340 | 		},
341 | 		{
342 | 			ltoken: "EX_CALLBACK_LABEL",
343 | 			pattern: ["DEFAULT"]
344 | 		},
345 | 		{
346 | 			ltoken: "GRAMMAR",
347 | 			pattern: ["RULES"]
348 | 		},
349 | 		{
350 | 			ltoken: "RULES",
351 | 			pattern: ["SECT", "RULES"],
352 | 			callback: (c) => {
353 | 				let start_symbol = c[1].start_symbol;
354 | 				if (c[0].start_symbol !== null) {
355 | 					start_symbol = c[0].start_symbol;
356 | 				}
357 | 				return {
358 | 					start_symbol,
359 | 					grammar: c[0].sect.concat(c[1].grammar)
360 | 				};
361 | 			}
362 | 		},
363 | 		{
364 | 			ltoken: "RULES",
365 | 			pattern: ["SECT"],
366 | 			callback: (c) => {
367 | 				let start_symbol = null;
368 | 				if (c[0].start_symbol !== null) {
369 | 					start_symbol = c[0].start_symbol;
370 | 				}
371 | 				return {
372 | 					start_symbol,
373 | 					grammar: c[0].sect
374 | 				};
375 | 			}
376 | 		},
377 | 		{
378 | 			ltoken: "SECT",
379 | 			pattern: ["SECTLABEL", "COLON", "DEF", "SEMICOLON"],
380 | 			callback: (c) => {
381 | 				const result = [];
382 | 				for (const def of c[2]) {
383 | 					result.push({ltoken: c[0].label, ...def});
384 | 				}
385 | 				return {start_symbol: c[0].start_symbol, sect: result};
386 | 			}
387 | 		},
388 | 		{
389 | 			ltoken: "SECTLABEL",
390 | 			pattern: ["LABEL"],
391 | 			callback: (c) => ({start_symbol: null, label: c[0]})
392 | 		},
393 | 		{
394 | 			ltoken: "SECTLABEL",
395 | 			pattern: ["DOLLAR", "LABEL"],
396 | 			callback: (c) => ({start_symbol: c[1], label: c[1]})
397 | 		},
398 | 		{
399 | 			ltoken: "DEF",
400 | 			pattern: ["PATTERN", "CALLBACK", "VBAR", "DEF"],
401 | 			callback: (c) => [c[1] === null ? {pattern: c[0]} : {pattern: c[0], callback: makeGrammarCallback(c[1])}].concat(c[3])
402 | 		},
403 | 		{
404 | 			ltoken: "DEF",
405 | 			pattern: ["PATTERN", "CALLBACK"],
406 | 			callback: (c) => [c[1] === null ? {pattern: c[0]} : {pattern: c[0], callback: makeGrammarCallback(c[1])}]
407 | 		},
408 | 		{
409 | 			ltoken: "PATTERN",
410 | 			pattern: ["SYMBOLLIST"]
411 | 		},
412 | 		{
413 | 			ltoken: "PATTERN",
414 | 			pattern: [],
415 | 			callback: () => []
416 | 		},
417 | 		{
418 | 			ltoken: "SYMBOLLIST",
419 | 			pattern: ["LABEL", "SYMBOLLIST"],
420 | 			callback: (c) => [c[0]].concat(c[1])
421 | 		},
422 | 		{
423 | 			ltoken: "SYMBOLLIST",
424 | 			pattern: ["LABEL"],
425 | 			callback: (c) => [c[0]]
426 | 		},
427 | 		{
428 | 			ltoken: "CALLBACK",
429 | 			pattern: ["BLOCK"]
430 | 		},
431 | 		{
432 | 			ltoken: "CALLBACK",
433 | 			pattern: [],
434 | 			callback: () => null
435 | 		},
436 | 		{
437 | 			ltoken: "BLOCK",
438 | 			pattern: ["START_BLOCK", "BODY_BLOCK", "END_BLOCK"],
439 | 			callback: (c) => c[1]
440 | 		}
441 | 	], start_symbol: "LANGUAGE"
442 | };
443 | 
444 | /**
445 |  * 言語定義文法の言語定義
446 |  * @type Language
447 |  */
448 | export const language_language: Language = {lex: lex, grammar: grammar};
449 | 
450 | // 予めParsingTableを用意しておくことで高速化
451 | /**
452 |  * 言語定義文法の言語定義、の構文解析表
453 |  * @type ParsingTable
454 |  */
455 | export const language_parsing_table: ParsingTable = [
456 | 	new Map<Token, ParsingOperation>([
457 | 		["LANGUAGE", {type: "goto", to: 1}],
458 | 		["LEX", {type: "goto", to: 2}],
459 | 		["LEXSECT", {type: "goto", to: 3}],
460 | 		["LEXLABEL", {type: "goto", to: 4}],
461 | 		["LABEL", {type: "shift", to: 5}],
462 | 		["EXCLAMATION", {type: "shift", to: 6}]]),
463 | 	new Map<Token, ParsingOperation>([
464 | 		[SYMBOL_EOF, {type: "accept"}]]),
465 | 	new Map<Token, ParsingOperation>([
466 | 		["GRAMMAR", {type: "goto", to: 7}],
467 | 		["LEXSECT", {type: "goto", to: 8}],
468 | 		["SECT", {type: "goto", to: 9}],
469 | 		["SECTLABEL", {type: "goto", to: 10}],
470 | 		["LABEL", {type: "shift", to: 11}],
471 | 		["DOLLAR", {type: "shift", to: 12}],
472 | 		["LEXLABEL", {type: "goto", to: 4}],
473 | 		["EXCLAMATION", {type: "shift", to: 6}]]),
474 | 	new Map<Token, ParsingOperation>([
475 | 		["LABEL", {type: "reduce", grammar_id: 2}],
476 | 		["DOLLAR", {type: "reduce", grammar_id: 2}],
477 | 		["EXCLAMATION", {type: "reduce", grammar_id: 2}]]),
478 | 	new Map<Token, ParsingOperation>([
479 | 		["LEXDEF", {type: "goto", to: 13}],
480 | 		["STRING", {type: "shift", to: 14}],
481 | 		["REGEXP", {type: "shift", to: 15}]]),
482 | 	new Map<Token, ParsingOperation>([
483 | 		["STRING", {type: "reduce", grammar_id: 4}],
484 | 		["REGEXP", {type: "reduce", grammar_id: 4}]]),
485 | 	new Map<Token, ParsingOperation>([
486 | 		["LABEL", {type: "shift", to: 16}],
487 | 		["STRING", {type: "reduce", grammar_id: 5}],
488 | 		["REGEXP", {type: "reduce", grammar_id: 5}]]),
489 | 	new Map<Token, ParsingOperation>([
490 | 		[SYMBOL_EOF, {type: "reduce", grammar_id: 0}]]),
491 | 	new Map<Token, ParsingOperation>([
492 | 		["LABEL", {type: "reduce", grammar_id: 1}],
493 | 		["DOLLAR", {type: "reduce", grammar_id: 1}],
494 | 		["EXCLAMATION", {type: "reduce", grammar_id: 1}]]),
495 | 	new Map<Token, ParsingOperation>([
496 | 		["SECT", {type: "goto", to: 9}],
497 | 		["SECTLABEL", {type: "goto", to: 10}],
498 | 		["LABEL", {type: "shift", to: 17}],
499 | 		["DOLLAR", {type: "shift", to: 12}],
500 | 		["GRAMMAR", {type: "goto", to: 18}],
501 | 		[SYMBOL_EOF, {type: "reduce", grammar_id: 10}]]),
502 | 	new Map<Token, ParsingOperation>([
503 | 		["COLON", {type: "shift", to: 19}]]),
504 | 	new Map<Token, ParsingOperation>([
505 | 		["COLON", {type: "reduce", grammar_id: 12}],
506 | 		["STRING", {type: "reduce", grammar_id: 4}],
507 | 		["REGEXP", {type: "reduce", grammar_id: 4}]]),
508 | 	new Map<Token, ParsingOperation>([
509 | 		["LABEL", {type: "shift", to: 20}]]),
510 | 	new Map<Token, ParsingOperation>([
511 | 		["LABEL", {type: "reduce", grammar_id: 3}],
512 | 		["DOLLAR", {type: "reduce", grammar_id: 3}],
513 | 		["EXCLAMATION", {type: "reduce", grammar_id: 3}]]),
514 | 	new Map<Token, ParsingOperation>([
515 | 		["LABEL", {type: "reduce", grammar_id: 7}],
516 | 		["DOLLAR", {type: "reduce", grammar_id: 7}],
517 | 		["EXCLAMATION", {type: "reduce", grammar_id: 7}]]),
518 | 	new Map<Token, ParsingOperation>([
519 | 		["LABEL", {type: "reduce", grammar_id: 8}],
520 | 		["DOLLAR", {type: "reduce", grammar_id: 8}],
521 | 		["EXCLAMATION", {type: "reduce", grammar_id: 8}]]),
522 | 	new Map<Token, ParsingOperation>([
523 | 		["STRING", {type: "reduce", grammar_id: 6}],
524 | 		["REGEXP", {type: "reduce", grammar_id: 6}]]),
525 | 	new Map<Token, ParsingOperation>([
526 | 		["COLON", {type: "reduce", grammar_id: 12}]]),
527 | 	new Map<Token, ParsingOperation>([
528 | 		[SYMBOL_EOF, {type: "reduce", grammar_id: 9}]]),
529 | 	new Map<Token, ParsingOperation>([
530 | 		["DEF", {type: "goto", to: 21}],
531 | 		["PATTERN", {type: "goto", to: 22}],
532 | 		["SYMBOLLIST", {type: "goto", to: 23}],
533 | 		["LABEL", {type: "shift", to: 24}],
534 | 		["SEMICOLON", {type: "reduce", grammar_id: 17}],
535 | 		["VBAR", {type: "reduce", grammar_id: 17}]]),
536 | 	new Map<Token, ParsingOperation>([
537 | 		["COLON", {type: "reduce", grammar_id: 13}]]),
538 | 	new Map<Token, ParsingOperation>([
539 | 		["SEMICOLON", {type: "shift", to: 25}]]),
540 | 	new Map<Token, ParsingOperation>([
541 | 		["VBAR", {type: "shift", to: 26}],
542 | 		["SEMICOLON", {type: "reduce", grammar_id: 15}]]),
543 | 	new Map<Token, ParsingOperation>([
544 | 		["SEMICOLON", {type: "reduce", grammar_id: 16}],
545 | 		["VBAR", {type: "reduce", grammar_id: 16}]]),
546 | 	new Map<Token, ParsingOperation>([
547 | 		["LABEL", {type: "shift", to: 24}],
548 | 		["SYMBOLLIST", {type: "goto", to: 27}],
549 | 		["SEMICOLON", {type: "reduce", grammar_id: 19}],
550 | 		["VBAR", {type: "reduce", grammar_id: 19}]]),
551 | 	new Map<Token, ParsingOperation>([
552 | 		[SYMBOL_EOF, {type: "reduce", grammar_id: 11}],
553 | 		["LABEL", {type: "reduce", grammar_id: 11}],
554 | 		["DOLLAR", {type: "reduce", grammar_id: 11}]]),
555 | 	new Map<Token, ParsingOperation>([
556 | 		["PATTERN", {type: "goto", to: 22}],
557 | 		["DEF", {type: "goto", to: 28}],
558 | 		["SYMBOLLIST", {type: "goto", to: 23}],
559 | 		["LABEL", {type: "shift", to: 24}],
560 | 		["SEMICOLON", {type: "reduce", grammar_id: 17}],
561 | 		["VBAR", {type: "reduce", grammar_id: 17}]]),
562 | 	new Map<Token, ParsingOperation>([
563 | 		["SEMICOLON", {type: "reduce", grammar_id: 18}],
564 | 		["VBAR", {type: "reduce", grammar_id: 18}]]),
565 | 	new Map<Token, ParsingOperation>([
566 | 		["SEMICOLON", {type: "reduce", grammar_id: 14}]])
567 | ];
568 | 
569 | /**
570 |  * 言語定義ファイルを読み込むための構文解析器
571 |  * @type {Parser}
572 |  */
573 | 
574 | // language_parsing_tableの用意がまだなので直接生成する
575 | // export const language_parser: Parser = ParserFactory.create(language_language, language_parsing_table);
576 | export const language_parser: Parser = new ParserGenerator(language_language).getParser();
577 | 


--------------------------------------------------------------------------------
/src/sample.ts:
--------------------------------------------------------------------------------
 1 | import {readFileSync} from "fs";
 2 | import {ParserGenerator} from "./parsergenerator/parsergenerator";
 3 | import {Language, LexDefinition, GrammarDefinition} from "./def/language";
 4 | import {ParserFactory} from "./parser/factory";
 5 | 
 6 | const input = readFileSync("/dev/stdin", "utf8");
 7 | 
 8 | const grammar: GrammarDefinition = {
 9 | 	rules: [
10 | 		{
11 | 			ltoken: "EXP",
12 | 			pattern: ["EXP", "PLUS", "TERM"],
13 | 			callback: (c) => c[0] + c[2]
14 | 		},
15 | 		{
16 | 			ltoken: "EXP",
17 | 			pattern: ["TERM"]
18 | 		},
19 | 		{
20 | 			ltoken: "TERM",
21 | 			pattern: ["TERM", "ASTERISK", "ATOM"],
22 | 			callback: (c) => c[0] * c[2]
23 | 		},
24 | 		{
25 | 			ltoken: "TERM",
26 | 			pattern: ["ATOM"]
27 | 		},
28 | 		{
29 | 			ltoken: "ATOM",
30 | 			pattern: ["DIGITS"],
31 | 			callback: (c) => +c[0]
32 | 		},
33 | 		{
34 | 			ltoken: "ATOM",
35 | 			pattern: ["LPAREN", "EXP", "RPAREN"],
36 | 			callback: (c) => c[1]
37 | 		}
38 | 	],
39 | 	start_symbol: "EXP"
40 | };
41 | const lex: LexDefinition = {
42 | 	rules: [
43 | 		{token: "DIGITS", pattern: /[1-9][0-9]*/},
44 | 		{token: "PLUS", pattern: "+"},
45 | 		{token: "ASTERISK", pattern: "*"},
46 | 		{token: "LPAREN", pattern: "("},
47 | 		{token: "RPAREN", pattern: ")"},
48 | 		{token: null, pattern: /(\r\n|\r|\n)+/},
49 | 		{token: null, pattern: /[ \f\t\v\u00a0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]+/},
50 | 		{token: "INVALID", pattern: /./}
51 | 	]
52 | };
53 | const language: Language = {
54 | 	lex: lex,
55 | 	grammar: grammar
56 | };
57 | 
58 | console.time("process");
59 | console.log(JSON.stringify(ParserFactory.create(language, new ParserGenerator(language).getParsingTable()).parse(input), undefined, 2));
60 | console.timeEnd("process");
61 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"compilerOptions": {
 3 | 		"strictNullChecks": true,
 4 | 		"noImplicitAny": true,
 5 | 		"target": "es6",
 6 | 		"module": "CommonJS",
 7 | 		"outDir": "dist/",
 8 | 		"declaration": true,
 9 | 		"noEmitOnError": true
10 | 	},
11 | 	"include": [
12 | 		"src/**/*.ts"
13 | 	],
14 | 	"exclude": [
15 | 		"node_modules",
16 | 		"dist"
17 | 	]
18 | }


--------------------------------------------------------------------------------
/tslint.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"defaultSeverity": "error",
 3 | 	"rules": {
 4 | 		"adjacent-overload-signatures": true,
 5 | 		"align": [true, "parameters", "statements"],
 6 | 		"array-type": false,
 7 | 		"arrow-parens": true,
 8 | 		"arrow-return-shorthand": true,
 9 | 		"ban-types": [
10 | 			true,
11 | 			["Object", "Avoid using the `Object` type. Did you mean `object`?"],
12 | 			["Function", "Avoid using the `Function` type. Prefer a specific function type, like `() => void`."],
13 | 			["Boolean", "Avoid using the `Boolean` type. Did you mean `boolean`?"],
14 | 			["Number", "Avoid using the `Number` type. Did you mean `number`?"],
15 | 			["String", "Avoid using the `String` type. Did you mean `string`?"],
16 | 			["Symbol", "Avoid using the `Symbol` type. Did you mean `symbol`?"]
17 | 		],
18 | 		"callable-types": true,
19 | 		"class-name": true,
20 | 		"comment-format": [true, "check-space"],
21 | 		"curly": [true, "ignore-same-line"],
22 | 		"eofline": true,
23 | 		"forin": true,
24 | 		"import-spacing": true,
25 | 		"indent": [true, "tabs"],
26 | 		"interface-name": false,
27 | 		"interface-over-type-literal": false,
28 | 		"jsdoc-format": true,
29 | 		"label-position": true,
30 | 		"max-classes-per-file": false,
31 | 		"max-line-length": false,
32 | 		"member-access": false,
33 | 		"member-ordering": false,
34 | 		"new-parens": true,
35 | 		"no-angle-bracket-type-assertion": true,
36 | 		"no-arg": true,
37 | 		"no-consecutive-blank-lines": [true, 2],
38 | 		"no-console": false,
39 | 		"no-duplicate-variable": true,
40 | 		"no-empty": true,
41 | 		"no-eval": true,
42 | 		"no-internal-module": true,
43 | 		"no-trailing-whitespace": true,
44 | 		"no-var-keyword": true,
45 | 		"object-literal-shorthand": false,
46 | 		"object-literal-sort-keys": false,
47 | 		"one-line": [true, "check-whitespace", "check-open-brace"],
48 | 		"one-variable-per-declaration": true,
49 | 		"ordered-imports": false,
50 | 		"prefer-const": true,
51 | 		"prefer-for-of": false,
52 | 		"quotemark": [true, "double", "avoid-escape"],
53 | 		"semicolon": [true, "always"],
54 | 		"trailing-comma": [
55 | 			true, {
56 | 				"multiline": "never",
57 | 				"singleline": "never"
58 | 			}
59 | 		],
60 | 		"triple-equals": false,
61 | 		"use-isnan": true,
62 | 		"variable-name": [true, "ban-keywords"],
63 | 		"whitespace": [
64 | 			true,
65 | 			"check-branch",
66 | 			"check-decl",
67 | 			"check-separator",
68 | 			"check-type"
69 | 		]
70 | 	}
71 | }


--------------------------------------------------------------------------------