├── .gitignore ├── Gruntfile.js ├── LICENSE ├── README.md ├── example ├── epsilon.l ├── epsilon.y ├── expr.l ├── expr.y ├── gen_test_cfg_parser.js ├── js.l ├── js.y ├── lr1.html ├── test.l ├── test.y ├── testcfg.js └── testparser.js ├── package.json ├── src ├── bnf-parser-generate.js ├── bnf-parser.js ├── datatypes.js ├── generator.js ├── lex-parser-generate.js ├── lex-parser.js └── lexer.js └── test └── grammar_test.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | refs 4 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | module.exports = function(grunt){ 2 | 3 | grunt.initConfig({ 4 | pkg: grunt.file.readJSON('package.json'), 5 | 6 | nodeunit: { 7 | all: ['test/*_test.js'], 8 | options: { 9 | reporter: 'tap' 10 | } 11 | } 12 | }); 13 | 14 | grunt.loadNpmTasks('grunt-contrib-nodeunit'); 15 | 16 | grunt.registerTask('default', ['nodeunit']); 17 | }; 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 CecilLee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | jsbison 2 | ======= 3 | 4 | 5 | jsbison是使用javascript实现的lalr解析器的生成器,基于bnf生成lalr-parser的javascript实现。 6 | 7 | 8 | -------------------------------------------------------------------------------- /example/epsilon.l: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | a return "a"; 4 | b return "b"; 5 | c return "c"; 6 | -------------------------------------------------------------------------------- /example/epsilon.y: -------------------------------------------------------------------------------- 1 | %start A 2 | 3 | %% 4 | 5 | A 6 | : 'a' 7 | { 8 | this.$$ = $1; 9 | } 10 | | A B C 11 | { 12 | this.$$ = $1 + $2 + $3; 13 | } 14 | ; 15 | 16 | B 17 | : /*epsilon*/ 18 | { 19 | this.$$ = ""; 20 | } 21 | | 'b' 22 | { 23 | this.$$ = $1; 24 | } 25 | ; 26 | 27 | C : { this.$$ = '' } 28 | | 'c' { this.$$ = $1 } 29 | ; 30 | -------------------------------------------------------------------------------- /example/expr.l: -------------------------------------------------------------------------------- 1 | %% 2 | \d+ return "NUMBER"; 3 | \+ return "+"; 4 | \* return "*"; 5 | \- return "-"; 6 | \/ return "/"; 7 | \( return "("; 8 | \) return ")"; 9 | -------------------------------------------------------------------------------- /example/expr.y: -------------------------------------------------------------------------------- 1 | %start term 2 | %defaultAction { 3 | this.$$ = $1+2; 4 | console.log($0); 5 | } 6 | 7 | %% 8 | 9 | 10 | term 11 | : term '*' factoy 12 | { 13 | this.$$ = $1 * $3; 14 | } 15 | | factoy 16 | ; 17 | 18 | factoy 19 | : 'NUMBER' 20 | { 21 | this.$$ = parseInt($1, 10); 22 | } 23 | ; 24 | -------------------------------------------------------------------------------- /example/gen_test_cfg_parser.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | 3 | var lexfile = './expr.l'; 4 | var bnffile = './expr.y'; 5 | var inputs = '1'; 6 | 7 | 8 | var fs = require('fs'); 9 | var Generator = require('../src/generator.js'); 10 | var lexParser = require('../src/lex-parser.js'); 11 | var bnfParser = require('../src/bnf-parser.js'); 12 | 13 | var lexcontent = fs.readFileSync(lexfile).toString(); 14 | var bnfcontent = fs.readFileSync(bnffile).toString(); 15 | 16 | 17 | if(lexParser.parse(lexcontent)){ 18 | var lexcfg = lexParser.$$; 19 | } 20 | 21 | if(bnfParser.parse(bnfcontent,true)){ 22 | var bnfcfg = bnfParser.$$; 23 | } 24 | 25 | bnfcfg.lex = lexcfg; 26 | bnfcfg.type = 'LR(1)'; 27 | 28 | var rule, i=0; 29 | while(rule = lexcfg.rules[i++]){ 30 | rule.regex = rule.regex.toString(); 31 | } 32 | fs.writeFileSync('testcfg.js', 'var testcfg = '+JSON.stringify(bnfcfg, null, ' ')); 33 | 34 | var start = +new Date; 35 | var ExprParserGenerator = new Generator(bnfcfg); 36 | console.log(+new Date - start + 'ms'); 37 | console.log('closure call time:' + ExprParserGenerator.closureCount); 38 | console.log('repeat calc goto time:' + ExprParserGenerator.gotoItemSetRepeatCount); 39 | 40 | fs.writeFileSync('./generator.txt', ExprParserGenerator.toString()); 41 | 42 | var exprParserCode = ExprParserGenerator.generate(); 43 | fs.writeFileSync('./testparser.js', exprParserCode); 44 | 45 | var exprParser = eval(exprParserCode); 46 | 47 | exprParser.parse(inputs, true) 48 | console.log(exprParser.lexer.input , 'parse result:', exprParser.$$); 49 | 50 | 51 | 52 | 53 | 54 | })(); 55 | -------------------------------------------------------------------------------- /example/js.l: -------------------------------------------------------------------------------- 1 | 2 | %% 3 | 4 | \/\/[^\n]* /* skip singleline comment */ 5 | \/\*(.|\n|\r)*?\*\/ /* skip multiline comment */ 6 | 7 | "(\\"|[^"])*" return "STRING"; 8 | '(\\'|[^'])*' return "STRING"; 9 | 10 | ((0|[1-9][0-9]*)(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?|[0][xX][0-9a-fA-F]+ return "NUMBER" 11 | 12 | >>>= return ">>>="; //URSHIFT EQUAL 13 | !== return "!=="; //STRNEX 14 | === return "==="; //STREQ 15 | >>> return ">>>"; //URSHIFT 16 | \<<= return "<<="; //LSHIFT EQUAL 17 | >>= return ">>="; //RSHIFT EQUAL 18 | %= return "%="; //MOD EQUAL 19 | && return "&&"; //AND 20 | &= return "&="; //AND EQUAL 21 | \*= return "*="; //MULT EQUAL 22 | \+\+ return "++"; //PLUS PLUS 23 | \+= return "+="; //PLUS EQUAL 24 | -- return "--"; //MINUS MINUS 25 | -= return "-="; //MINUS EQUAL 26 | \/= return "/="; //DIV EQUAL 27 | \<< return "<<"; //LSHIFT 28 | \<= return "<="; //LE 29 | >= return ">="; //GE 30 | == return "=="; //EQEQ 31 | >> return ">>"; //RSHIFT 32 | \^= return "^="; //XOR EQUAL 33 | \|= return "|="; //OR EQUAL 34 | \|\| return "||"; //OR 35 | & return "&"; //LOGIC AND 36 | % return "%"; //MOD 37 | != return "!="; //NE 38 | \= return "="; 39 | \( return "("; 40 | \) return ")"; 41 | \+ return "+"; 42 | \* return "*"; 43 | \, return ","; 44 | \- return "-"; 45 | \! return "!"; 46 | \. return "."; 47 | \/ return "/"; 48 | : return ":"; 49 | \; return ";"; 50 | \< return "<"; 51 | > return ">"; 52 | \? return "?"; 53 | \[ return "["; 54 | \] return "]"; 55 | \^ return "^"; 56 | \{ return "{"; 57 | \} return "}"; 58 | \| return "|"; 59 | \~ return "~"; 60 | \& return "&"; 61 | break return "BREAK"; 62 | case return "CASE"; 63 | catch return "CATCH"; 64 | continue return "CONTINUE"; 65 | default return "DEFAULT"; 66 | delete return "DELETE"; 67 | do return "DO"; 68 | else return "ELSE"; 69 | false return "FALSE"; 70 | finally return "FINALLY"; 71 | for return "FOR"; 72 | function return "FUNCTION"; 73 | get return "GET"; 74 | if return "IF"; 75 | in return "IN"; 76 | instanceof return "INSTANCEOF"; 77 | new return "NEW"; 78 | return return "RETURN"; 79 | set return "SET"; 80 | switch return "SWITCH"; 81 | this return "THIS"; 82 | throw return "THROW"; 83 | true return "TRUE"; 84 | try return "TRY"; 85 | typeof return "TYPEOF"; 86 | var return "VAR"; 87 | const return "CONST"; 88 | void return "VOID"; 89 | while return "WHILE"; 90 | with return "WITH"; 91 | [A-Za-z$_]\w* return "IDENT"; 92 | \s+ /* skip whitespace */ 93 | \n /* skip lineterminal */ 94 | . return "INVALID"; 95 | 96 | 97 | -------------------------------------------------------------------------------- /example/js.y: -------------------------------------------------------------------------------- 1 | %start Program 2 | 3 | %% 4 | 5 | Program 6 | : SourceElements $end 7 | { 8 | this.$$ = new Program($1); 9 | setType(this.$$); 10 | } 11 | | $end 12 | { 13 | this.$$ = new Program(0); 14 | } 15 | ; 16 | 17 | SourceElements 18 | : SourceElements SourceElement 19 | { 20 | this.$$ = $1; 21 | this.$$.push($2); 22 | } 23 | | SourceElement 24 | { 25 | this.$$ = [$1]; 26 | } 27 | ; 28 | 29 | SourceElement 30 | : Statement 31 | | FunctionDeclaration 32 | ; 33 | 34 | Statement 35 | : Block 36 | | VariableStatement 37 | | EmptyStatement 38 | | ExpressionStatement 39 | | IfStatement 40 | | IterationStatement 41 | | ContinueStatement 42 | | BreakStatement 43 | | ReturnStatement 44 | | WithStatement 45 | | LabelledStatement 46 | | SwitchStatement 47 | | ThrowStatement 48 | | TryStatement 49 | ; 50 | 51 | FunctionDeclaration 52 | : FUNCTION Identifier '(' ')' FunctionBody 53 | { 54 | this.$$ = new FunctionDeclaration($2, 0, $5); 55 | } 56 | | FUNCTION Identifier '(' FormalParameterList ')' FunctionBody 57 | { 58 | this.$$ = new FunctionDeclaration($2, $4, $6); 59 | } 60 | ; 61 | 62 | FunctionBody 63 | : '{' '}' 64 | { 65 | this.$$ = new FunctionBody(0); 66 | } 67 | | '{' SourceElements '}' 68 | { 69 | this.$$ = new FunctionBody($2); 70 | } 71 | ; 72 | 73 | FormalParameterList 74 | : Identifier 75 | { 76 | this.$$ = [$1]; 77 | } 78 | | FormalParameterList ',' Identifier 79 | { 80 | this.$$ = $1; 81 | this.$$.push($3); 82 | } 83 | ; 84 | 85 | Block 86 | : '{' '}' 87 | { 88 | this.$$ = new Block(0); 89 | } 90 | | '{' StatementList '}' 91 | { 92 | this.$$ = new Block($2); 93 | } 94 | ; 95 | 96 | StatementList 97 | : StatementList Statement 98 | { 99 | $1.push($2); 100 | this.$$ = $1; 101 | } 102 | | Statement 103 | { 104 | this.$$ = new StatementList($1); 105 | } 106 | ; 107 | 108 | VariableStatement 109 | : VAR VariableDeclarationList ';' 110 | { 111 | this.$$ = $2; 112 | } 113 | ; 114 | 115 | 116 | IfStatement 117 | : IF '(' CommaExpression ')' Statement 118 | { 119 | this.$$ = new IfStatement($3, $5, 0); 120 | } 121 | | IF '(' CommaExpression ')' Statement ELSE Statement 122 | { 123 | this.$$ = new IfStatement($3, $5, $7); 124 | } 125 | ; 126 | IterationStatement 127 | : DO Statement WHILE '(' CommaExpression ')' ';' 128 | { 129 | this.$$ = new DoWhileStatement($2, $5); 130 | } 131 | | WHILE '(' CommaExpression ')' Statement 132 | { 133 | this.$$ = new WhileStatement($3, $5); 134 | } 135 | | FOR '(' Identifier IN CommaExpression ')' Statement 136 | { 137 | this.$$ = new ForInStatement($3, $5, $7); 138 | } 139 | | FOR '(' LeftHandSideExpression IN CommaExpression ')' Statement 140 | { 141 | this.$$ = new ForInStatement($3, $5, $7); 142 | } 143 | | FOR '(' VAR Identifier IN CommaExpression ')' Statement 144 | { 145 | this.$$ = new ForInStatement($4, $6, $8); 146 | } 147 | | FOR '(' VAR VariableDeclarationNoIn IN CommaExpression ')' Statement 148 | { 149 | this.$$ = new ForInStatement($4, $6, $8); 150 | } 151 | | FOR '(' ExpressionNoInOpt ';' ExpressionOpt ';' ExpressionOpt ')' Statement 152 | { 153 | this.$$ = new ForStatement($3, $5, $7, $9); 154 | } 155 | | FOR '(' VAR VariableDeclarationListNoIn ';' ExpressionOpt ';' ExpressionOpt ')' Statement 156 | { 157 | this.$$ = new ForStatement($4, $6, $8, $10); 158 | } 159 | ; 160 | 161 | ContinueStatement 162 | : CONTINUE ';' 163 | { 164 | this.$$ = new ContinueStatement(undefined); 165 | } 166 | ; 167 | 168 | BreakStatement 169 | : BREAK ';' 170 | { 171 | this.$$ = new BreakStatement(undefined); 172 | } 173 | ; 174 | 175 | ReturnStatement 176 | : RETURN ';' 177 | { 178 | this.$$ = new ReturnStatement(undefined); 179 | } 180 | | RETURN CommaExpression ';' 181 | { 182 | this.$$ = new ReturnStatement($2); 183 | } 184 | ; 185 | WithStatement 186 | : WITH '(' CommaExpression ')' Statement 187 | { 188 | this.$$ = new WithStatement($3, $5); 189 | } 190 | ; 191 | LabelledStatement 192 | : IDENT ':' Statement 193 | { 194 | this.$$ = new LabelledStatement($1, $3); 195 | } 196 | ; 197 | SwitchStatement 198 | : SWITCH '(' CommaExpression ')' CaseBlock 199 | { 200 | this.$$ = new SwitchStatement($3, $5); 201 | } 202 | ; 203 | ThrowStatement 204 | : THROW CommaExpression ';' 205 | { 206 | this.$$ = new ThrowStatement($2); 207 | } 208 | ; 209 | 210 | TryStatement 211 | : TRY Block Catch 212 | { 213 | this.$$ = new TryStatement($2, $3, 0); 214 | } 215 | | TRY Block Finally 216 | { 217 | this.$$ = new TryStatement($2, 0, $3); 218 | } 219 | | TRY Block Catch Finally 220 | { 221 | this.$$ = new TryStatement($2, $3, $4); 222 | } 223 | ; 224 | 225 | Catch 226 | : CATCH '(' IDENT ')' Block 227 | { 228 | this.$$ = new Catch($3, $5); 229 | } 230 | ; 231 | 232 | Finally 233 | : FINALLY Block 234 | { 235 | this.$$ = new Finally($2); 236 | } 237 | ; 238 | 239 | CaseBlock 240 | : '{' CaseClausesOpt '}' 241 | { 242 | this.$$ = new CaseBlock($2, 0, 0); 243 | } 244 | | '{' CaseClausesOpt DefaultClause CaseClausesOpt '}' 245 | { 246 | this.$$ = new CaseBlock($2, $3, $4); 247 | } 248 | ; 249 | 250 | CaseClausesOpt 251 | : /* epsilon */ 252 | { 253 | this.$$ = 0; 254 | } 255 | | CaseClauses 256 | ; 257 | 258 | CaseClauses 259 | : CASE CommaExpression ':' StatementListOpt 260 | { 261 | this.$$ = new CaseClause($2, $4); 262 | } 263 | ; 264 | 265 | DefaultClause 266 | : DEFAULT ':' StatementListOpt 267 | { 268 | this.$$ = new CaseClause(0, $3); 269 | } 270 | ; 271 | 272 | StatementListOpt 273 | : /* epsilon */ 274 | { 275 | this.$$ = 0; 276 | } 277 | | StatementList 278 | ; 279 | 280 | 281 | 282 | ExpressionOpt 283 | : /* epsilon */ 284 | { 285 | this.$$ = 0; 286 | } 287 | | CommaExpression 288 | ; 289 | 290 | ExpressionNoInOpt 291 | : 292 | { 293 | this.$$ = 0; 294 | } 295 | | ExpressionNoIn 296 | { 297 | this.$$ = $1; 298 | } 299 | ; 300 | 301 | ExpressionNoIn 302 | : AssignmentExpressionNoIn 303 | { 304 | this.$$ = [$1]; 305 | } 306 | | ExpressionNoIn ',' AssignmentExpressionNoIn 307 | { 308 | this.$$ = $1; 309 | this.$$.push($3); 310 | } 311 | ; 312 | 313 | EmptyStatement 314 | : ';' 315 | ; 316 | 317 | 318 | ExpressionStatement 319 | : CommaExpression ';' 320 | { 321 | this.$$ = new ExpressionStatement($1); 322 | } 323 | ; 324 | 325 | 326 | 327 | 328 | VariableDeclarationList 329 | : VariableDeclarationList ',' VariableDeclaration 330 | { 331 | this.$$ = $1; 332 | this.$$.push($3); 333 | } 334 | | VariableDeclaration 335 | { 336 | this.$$ = [$1]; 337 | } 338 | ; 339 | VariableDeclarationListNoIn 340 | : VariableDeclarationListNoIn ',' VariableDeclarationNoIn 341 | { 342 | this.$$ = $1; 343 | this.$$.push($3); 344 | } 345 | | VariableDeclarationNoIn 346 | { 347 | this.$$ = [$1]; 348 | } 349 | ; 350 | 351 | 352 | 353 | VariableDeclaration 354 | : Identifier 355 | { 356 | this.$$ = new VariableDeclaration($1, 0); 357 | } 358 | | Identifier Initializer 359 | { 360 | this.$$ = new VariableDeclaration($1, $2); 361 | } 362 | ; 363 | 364 | VariableDeclarationNoIn 365 | : Identifier 366 | { 367 | this.$$ = new VariableDeclaration($1, 0); 368 | } 369 | | Identifier InitializerNoIn 370 | { 371 | this.$$ = new VariableDeclaration($1, $2); 372 | } 373 | ; 374 | Initializer 375 | : '=' AssignmentExpression 376 | { 377 | this.$$ = $2; 378 | } 379 | ; 380 | InitializerNoIn 381 | : '=' AssignmentExpressionNoIn 382 | { 383 | this.$$ = $2; 384 | } 385 | ; 386 | 387 | CommaExpression 388 | : AssignmentExpression 389 | { 390 | this.$$ = [$1]; 391 | } 392 | | CommaExpression ',' AssignmentExpression 393 | { 394 | this.$$ = $1; 395 | this.$$.push($3); 396 | } 397 | ; 398 | 399 | 400 | AssignmentExpression 401 | : ConditionalExpression 402 | | LeftHandSideExpression AssignmentOperator AssignmentExpression 403 | { 404 | this.$$ = new AssignmentExpression($1, $2, $3); 405 | } 406 | ; 407 | AssignmentExpressionNoIn 408 | : ConditionalExpressionNoIn 409 | | LeftHandSideExpression AssignmentOperator AssignmentExpressionNoIn 410 | { 411 | this.$$ = new AssignmentExpression($1, $2, $3); 412 | } 413 | ; 414 | 415 | 416 | AssignmentOperator 417 | : '=' 418 | | '+=' 419 | | '-=' 420 | | '/=' 421 | | '*=' 422 | | '<<=' 423 | | '>>=' 424 | | '>>>=' 425 | | '&=' 426 | | '^=' 427 | | '|=' 428 | | '%=' 429 | ; 430 | 431 | ConditionalExpression 432 | : LogicalORExpression '?' AssignmentExpression ':' AssignmentExpression 433 | { 434 | this.$$ = new ConditionalExpression($1, $3, $5); 435 | } 436 | | LogicalORExpression 437 | ; 438 | ConditionalExpressionNoIn 439 | : LogicalORExpressionNoIn '?' AssignmentExpression ':' AssignmentExpressionNoIn 440 | { 441 | this.$$ = new ConditionalExpression($1, $3, $5); 442 | } 443 | | LogicalORExpressionNoIn 444 | ; 445 | LogicalORExpression 446 | : LogicalANDExpression 447 | | LogicalORExpression '||' LogicalANDExpression 448 | { 449 | this.$$ = new BinaryExpression($1, $2, $3); 450 | } 451 | ; 452 | LogicalORExpressionNoIn 453 | : LogicalANDExpressionNoIn 454 | | LogicalORExpressionNoIn '||' LogicalANDExpressionNoIn 455 | { 456 | this.$$ = new BinaryExpression($1, $2, $3); 457 | } 458 | ; 459 | 460 | LogicalANDExpression 461 | : BitwiseORExpression 462 | | LogicalANDExpression '&&' BitwiseORExpression 463 | { 464 | this.$$ = new BinaryExpression($1, $2, $3); 465 | } 466 | ; 467 | LogicalANDExpressionNoIn 468 | : BitwiseORExpressionNoIn 469 | | LogicalANDExpressionNoIn '&&' BitwiseORExpressionNoIn 470 | { 471 | this.$$ = new BinaryExpression($1, $2, $3); 472 | } 473 | ; 474 | 475 | BitwiseORExpression 476 | : BitwiseXORExpression 477 | | BitwiseORExpression '|' BitwiseXORExpression 478 | { 479 | this.$$ = new BinaryExpression($1, $2, $3); 480 | } 481 | ; 482 | BitwiseORExpressionNoIn 483 | : BitwiseXORExpressionNoIn 484 | | BitwiseORExpressionNoIn '|' BitwiseXORExpressionNoIn 485 | { 486 | this.$$ = new BinaryExpression($1, $2, $3); 487 | } 488 | ; 489 | 490 | BitwiseXORExpression 491 | : BitwiseANDExpression 492 | | BitwiseXORExpression '^' BitwiseANDExpression 493 | { 494 | this.$$ = new BinaryExpression($1, $2, $3); 495 | } 496 | ; 497 | BitwiseXORExpressionNoIn 498 | : BitwiseANDExpressionNoIn 499 | | BitwiseXORExpressionNoIn '^' BitwiseANDExpressionNoIn 500 | { 501 | this.$$ = new BinaryExpression($1, $2, $3); 502 | } 503 | ; 504 | 505 | BitwiseANDExpression 506 | : EqualityExpression 507 | | BitwiseANDExpression '&' EqualityExpression 508 | { 509 | this.$$ = new BinaryExpression($1, $2, $3); 510 | } 511 | ; 512 | BitwiseANDExpressionNoIn 513 | : EqualityExpressionNoIn 514 | | BitwiseANDExpressionNoIn '&' EqualityExpressionNoIn 515 | { 516 | this.$$ = new BinaryExpression($1, $2, $3); 517 | } 518 | ; 519 | 520 | EqualityExpression 521 | : RelationalExpression 522 | | EqualityExpression '==' RelationalExpression 523 | { 524 | this.$$ = new BinaryExpression($1, $2, $3); 525 | } 526 | | EqualityExpression '!=' RelationalExpression 527 | { 528 | this.$$ = new BinaryExpression($1, $2, $3); 529 | } 530 | | EqualityExpression '===' RelationalExpression 531 | { 532 | this.$$ = new BinaryExpression($1, $2, $3); 533 | } 534 | | EqualityExpression '!==' RelationalExpression 535 | { 536 | this.$$ = new BinaryExpression($1, $2, $3); 537 | } 538 | ; 539 | 540 | EqualityExpressionNoIn 541 | : RelationalExpressionNoIn 542 | | EqualityExpressionNoIn '==' RelationalExpressionNoIn 543 | { 544 | this.$$ = new BinaryExpression($1, $2, $3); 545 | } 546 | | EqualityExpressionNoIn '!=' RelationalExpressionNoIn 547 | { 548 | this.$$ = new BinaryExpression($1, $2, $3); 549 | } 550 | | EqualityExpressionNoIn '===' RelationalExpressionNoIn 551 | { 552 | this.$$ = new BinaryExpression($1, $2, $3); 553 | } 554 | | EqualityExpressionNoIn '!==' RelationalExpressionNoIn 555 | { 556 | this.$$ = new BinaryExpression($1, $2, $3); 557 | } 558 | ; 559 | 560 | 561 | RelationalExpression 562 | : RelationalExpression '<' ShiftExpression 563 | { 564 | this.$$ = new BinaryExpression($1, $2, $3); 565 | } 566 | | RelationalExpression '>' ShiftExpression 567 | { 568 | this.$$ = new BinaryExpression($1, $2, $3); 569 | } 570 | | RelationalExpression '<=' ShiftExpression 571 | { 572 | this.$$ = new BinaryExpression($1, $2, $3); 573 | } 574 | | RelationalExpression '>=' ShiftExpression 575 | { 576 | this.$$ = new BinaryExpression($1, $2, $3); 577 | } 578 | | RelationalExpression 'instanceof' ShiftExpression 579 | { 580 | this.$$ = new BinaryExpression($1, $2, $3); 581 | } 582 | | RelationalExpression 'in' ShiftExpression 583 | { 584 | this.$$ = new BinaryExpression($1, $2, $3); 585 | } 586 | | ShiftExpression 587 | ; 588 | RelationalExpressionNoIn 589 | : RelationalExpressionNoIn '<' ShiftExpression 590 | { 591 | this.$$ = new BinaryExpression($1, $2, $3); 592 | } 593 | | RelationalExpressionNoIn '>' ShiftExpression 594 | { 595 | this.$$ = new BinaryExpression($1, $2, $3); 596 | } 597 | | RelationalExpressionNoIn '<=' ShiftExpression 598 | { 599 | this.$$ = new BinaryExpression($1, $2, $3); 600 | } 601 | | RelationalExpressionNoIn '>=' ShiftExpression 602 | { 603 | this.$$ = new BinaryExpression($1, $2, $3); 604 | } 605 | | RelationalExpressionNoIn 'instanceof' ShiftExpression 606 | { 607 | this.$$ = new BinaryExpression($1, $2, $3); 608 | } 609 | | ShiftExpression 610 | ; 611 | 612 | 613 | ShiftExpression 614 | : AdditiveExpression 615 | | ShiftExpression '<<' AdditiveExpression 616 | { 617 | this.$$ = new BinaryExpression($1, $2, $3); 618 | } 619 | | ShiftExpression '>>' AdditiveExpression 620 | { 621 | this.$$ = new BinaryExpression($1, $2, $3); 622 | } 623 | | ShiftExpression '>>>' AdditiveExpression 624 | { 625 | this.$$ = new BinaryExpression($1, $2, $3); 626 | } 627 | ; 628 | AdditiveExpression 629 | : MultiplicativeExpression 630 | | AdditiveExpression '+' MultiplicativeExpression 631 | { 632 | this.$$ = new BinaryExpression($1, $2, $3); 633 | } 634 | | AdditiveExpression '-' MultiplicativeExpression 635 | { 636 | this.$$ = new BinaryExpression($1, $2, $3); 637 | } 638 | ; 639 | 640 | MultiplicativeExpression 641 | : UnaryExpression 642 | | MultiplicativeExpression '*' UnaryExpression 643 | { 644 | this.$$ = new BinaryExpression($1, $2, $3); 645 | } 646 | | MultiplicativeExpression '/' UnaryExpression 647 | { 648 | this.$$ = new BinaryExpression($1, $2, $3); 649 | } 650 | | MultiplicativeExpression '%' UnaryExpression 651 | { 652 | this.$$ = new BinaryExpression($1, $2, $3); 653 | } 654 | ; 655 | UnaryExpression 656 | : PostfixExpression 657 | | DELETE UnaryExpression 658 | { 659 | this.$$ = new UnaryExpression($1, $2); 660 | } 661 | | VOID UnaryExpression 662 | { 663 | this.$$ = new UnaryExpression($1, $2); 664 | } 665 | | TYPEOF UnaryExpression 666 | { 667 | this.$$ = new UnaryExpression($1, $2); 668 | } 669 | | '++' UnaryExpression 670 | { 671 | this.$$ = new UpdateExpression($1, $2, true); 672 | } 673 | | '--' UnaryExpression 674 | { 675 | this.$$ = new UpdateExpression($1, $2, true); 676 | } 677 | | '+' UnaryExpression 678 | { 679 | this.$$ = new UnaryExpression($1, $2); 680 | } 681 | | '-' UnaryExpression 682 | { 683 | this.$$ = new UnaryExpression($1, $2); 684 | } 685 | | '~' UnaryExpression 686 | { 687 | this.$$ = new UnaryExpression($1, $2); 688 | } 689 | | '!' UnaryExpression 690 | { 691 | this.$$ = new UnaryExpression($1, $2); 692 | } 693 | ; 694 | PostfixExpression 695 | : LeftHandSideExpression 696 | | LeftHandSideExpression '++' 697 | { 698 | this.$$ = new UpdateExpression($2, $1, false); 699 | } 700 | | LeftHandSideExpression '--' 701 | { 702 | this.$$ = new UpdateExpression($2, $1, false); 703 | } 704 | ; 705 | LeftHandSideExpression 706 | : NewExpression 707 | | CallExpression 708 | ; 709 | 710 | CallExpression 711 | : MemberExpression Arguments 712 | { 713 | this.$$ = new CallExpression($1, $2); 714 | } 715 | | CallExpression Arguments 716 | { 717 | this.$$ = new CallExpression($1, $2); 718 | } 719 | | CallExpression '[' CommaExpression ']' 720 | { 721 | this.$$ = new MemberExpression($1, $3); 722 | } 723 | | CallExpression '.' Identifier 724 | { 725 | this.$$ = new MemberExpression($1, $3); 726 | } 727 | ; 728 | 729 | NewExpression 730 | : MemberExpression 731 | | NEW NewExpression 732 | { 733 | this.$$ = new NewExpression($2); 734 | } 735 | | NEW MemberExpression Arguments 736 | { 737 | this.$$ = new NewExpression($2, $3); 738 | } 739 | ; 740 | Arguments 741 | : '(' ')' 742 | { 743 | this.$$ = new Arguments(0); 744 | } 745 | | '(' ArgumentList ')' 746 | { 747 | this.$$ = new Arguments($2); 748 | } 749 | ; 750 | ArgumentList 751 | : AssignmentExpression 752 | { 753 | this.$$ = [$1]; 754 | } 755 | | ArgumentList ',' AssignmentExpression 756 | { 757 | this.$$ = $1; 758 | this.$$.push($3); 759 | } 760 | ; 761 | MemberExpression 762 | : PrimaryExpression 763 | | FunctionExpression 764 | | MemberExpression '[' CommaExpression ']' 765 | { 766 | this.$$ = new MemberExpression($1, $3); 767 | } 768 | | MemberExpression '.' IdentifierName 769 | { 770 | this.$$ = new MemberExpression($1, $3); 771 | } 772 | ; 773 | IdentifierName 774 | : IDENT 775 | | BREAK 776 | | CASE 777 | | CATCH 778 | | CONTINUE 779 | | DEFAULT 780 | | DELETE 781 | | DO 782 | | ELSE 783 | | FALSE 784 | | FINALLY 785 | | FOR 786 | | FUNCTION 787 | | GET 788 | | IF 789 | | IN 790 | | INSTANCEOF 791 | | NEW 792 | | NULL 793 | | RETURN 794 | | SET 795 | | SWITCH 796 | | THIS 797 | | THROW 798 | | TRUE 799 | | TRY 800 | | TYPEOF 801 | | VAR 802 | | CONST 803 | | VOID 804 | | WHILE 805 | | WITH 806 | ; 807 | FunctionExpression 808 | : FUNCTION '(' ')' FunctionBody 809 | { 810 | this.$$ = new FunctionExpression(0, $4); 811 | } 812 | | FUNCTION '(' FormalParameterList ')' FunctionBody 813 | { 814 | this.$$ = new FunctionExpression($3, $5); 815 | } 816 | | FUNCTION Identifier '(' ')' FunctionBody 817 | { 818 | this.$$ = new FunctionExpression(0, $5, $2); 819 | } 820 | | FUNCTION Identifier '(' FormalParameterList ')' FunctionBody 821 | { 822 | this.$$ = new FunctionExpression($4, $6, $2); 823 | } 824 | ; 825 | 826 | Identifier 827 | : IDENT 828 | { 829 | this.$$ = new Identifier($1); 830 | } 831 | ; 832 | 833 | PrimaryExpression 834 | : Literal 835 | ; 836 | 837 | Literal 838 | : Identifier 839 | { 840 | this.$$ = $1; 841 | } 842 | | NULL 843 | | TRUE 844 | | FALSE 845 | | NUMBER 846 | { 847 | this.$$ = new Literal($1, 'NUMBER'); 848 | } 849 | | STRING 850 | { 851 | this.$$ = new Literal($1, 'STRING'); 852 | } 853 | ; 854 | 855 | %% 856 | 857 | function Program(body){ 858 | this.body = body; 859 | this.childrens = [body]; 860 | } 861 | function SourceElements(sourceElement){ 862 | this.childrens = [sourceElement]; 863 | } 864 | function Block(sourceElements){ 865 | this.childrens = [!!sourceElements ? sourceElements : 0]; 866 | } 867 | function StatementList(statement){ 868 | this.childrens = [statement]; 869 | } 870 | function VarStatement(name){ 871 | this.childrens = [name]; 872 | } 873 | function IfStatement(expression, ifStatement, elseStatement){ 874 | this.childrens = [expression, ifStatement, elseStatement]; 875 | } 876 | function DoWhileStatement(statement, expr){ 877 | this.statement = statement; 878 | this.expr = expr; 879 | this.childrens = [statement, expr]; 880 | } 881 | function WhileStatement(expr, statement){ 882 | this.expr = expr; 883 | this.statement = statement; 884 | this.childrens = [expr, statement]; 885 | } 886 | function ForInStatement(left, right, statement){ 887 | this.left = left; 888 | this.right = right; 889 | this.statement = statement; 890 | this.childrens = [left, right, statement]; 891 | } 892 | function ForStatement(expr1, expr2, expr3, statement){ 893 | this.expr1 = expr1; 894 | this.expr2 = expr2; 895 | this.expr3 = expr3; 896 | this.statement = statement; 897 | this.childrens = [expr1, expr2, expr3, statement]; 898 | } 899 | function ExpressionStatement(expression){ 900 | this.childrens = [expression]; 901 | } 902 | function ContinueStatement(ident){ 903 | this.ident = ident; 904 | this.childrens = [ident]; 905 | } 906 | function BreakStatement(ident){ 907 | this.ident = ident; 908 | this.childrens = [ident]; 909 | } 910 | function ReturnStatement(expr){ 911 | this.expr = expr; 912 | this.childrens = [expr]; 913 | } 914 | function WithStatement(object, statement){ 915 | this.object = object; 916 | this.statement = statement; 917 | this.childrens = [object, statement]; 918 | } 919 | function LabelledStatement(ident, statement){ 920 | this.ident = ident; 921 | this.statement = statement; 922 | this.childrens = [ident, statement]; 923 | } 924 | function SwitchStatement(expr, caseblock){ 925 | this.expr = expr; 926 | this.caseblock = caseblock; 927 | this.childrens = [expr, caseblock]; 928 | } 929 | function ThrowStatement(expr){ 930 | this.expr = expr; 931 | this.childrens = [expr]; 932 | } 933 | function TryStatement(block, catchBlock, finallyBlock){ 934 | this.block = block; 935 | this.catchBlock = catchBlock; 936 | this.finallyBlock = finallyBlock; 937 | this.childrens = [block, catchBlock, finallyBlock]; 938 | } 939 | function Catch(block){ 940 | this.block = block; 941 | this.childrens = [block]; 942 | } 943 | function Finally(block){ 944 | this.block = block; 945 | this.childrens = [block]; 946 | } 947 | function CaseBlock(opts1, defs, opts2){ 948 | this.opts1 = opts1; 949 | this.defs = defs; 950 | this.opts2 = opts2; 951 | this.childrens = [opts1, defs, opts2]; 952 | } 953 | function CaseClause(expr, statementlist){ 954 | this.expr = expr; 955 | this.statementlist = statementlist; 956 | this.childrens = [expr, statementlist]; 957 | } 958 | function VariableDeclarationList(varDeclaration){ 959 | this.childrens = [varDeclaration]; 960 | } 961 | function VariableDeclaration(id, init){ 962 | this.id = id; 963 | this.init = init; 964 | this.childrens = [id, init]; 965 | } 966 | function CommaExpression(assignExpression){ 967 | this.childrens = [assignExpression]; 968 | } 969 | function AssignmentExpression(left, op, right){ 970 | this.childrens = [left, op, right]; 971 | } 972 | function ConditionalExpression(test, consequent, alternate){ 973 | this.test = test; 974 | this.consequent = consequent; 975 | this.alternate = alternate; 976 | this.childrens = [test, consequent, alternate]; 977 | } 978 | function BinaryExpression(left, operator, right){ 979 | this.left = left; 980 | this.operator = operator; 981 | this.right = right; 982 | this.childrens = [left, operator, right]; 983 | } 984 | function UnaryExpression(operator, augument){ 985 | this.operator = operator; 986 | this.augument = augument; 987 | this.childrens = [operator, augument]; 988 | } 989 | function UpdateExpression(operator, augument, prefix){ 990 | this.operator = operator; 991 | this.augument = augument; 992 | this.prefix = prefix; 993 | this.childrens = [operator, augument, prefix]; 994 | } 995 | function Arguments(argumentList){ 996 | this.childrens = [argumentList]; 997 | } 998 | function ArgumentList(assignmentExpression){ 999 | this.childrens = [assignmentExpression]; 1000 | } 1001 | function MemberExpression(object, property){ 1002 | this.object = object; 1003 | this.property = property; 1004 | this.childrens = [object, property]; 1005 | } 1006 | function FunctionDeclaration(id, args, body){ 1007 | this.id = id; 1008 | this.args = args; 1009 | this.body = body; 1010 | this.childrens = [id, args, body]; 1011 | } 1012 | 1013 | function FunctionExpression(params, block, name){ 1014 | this.childrens = [params, block, name]; 1015 | } 1016 | 1017 | function NewExpression(func, args){ 1018 | this.func = func; 1019 | this.args = args; 1020 | this.childrens = [func, args]; 1021 | } 1022 | function CallExpression(func, args){ 1023 | this.func = func; 1024 | this.args = args; 1025 | this.childrens = [func, args]; 1026 | } 1027 | 1028 | function Parameter(param){ 1029 | this.childrens = [param]; 1030 | } 1031 | function FunctionBody(sourceElements){ 1032 | this.sourceElements = sourceElements; 1033 | this.childrens = [sourceElements]; 1034 | } 1035 | function Identifier(name){ 1036 | this.name = name; 1037 | this.childrens = [name]; 1038 | } 1039 | function Literal(raw, type){ 1040 | this.raw = raw; 1041 | switch(type){ 1042 | case 'NUMBER': 1043 | this.value = Number(raw); 1044 | break; 1045 | case 'STRING': 1046 | this.value = eval(raw); 1047 | break; 1048 | } 1049 | this.childrens = [raw]; 1050 | } 1051 | 1052 | /** 1053 | * set type is function.constructor 1054 | * set type is node first property 1055 | */ 1056 | function setType(node){ 1057 | var childrens; 1058 | if(node){ 1059 | if(node.constructor !== Object && node.constructor !== Array && node instanceof Object){ 1060 | if(node.type === undefined){ 1061 | node.type = node.constructor.name; 1062 | } 1063 | Object.keys(node).forEach(function(prop){ 1064 | var val = node[prop]; 1065 | if(prop !== 'type'){ 1066 | delete node[prop]; 1067 | node[prop] = val; 1068 | } 1069 | }); 1070 | } 1071 | if(node.constructor === Array){ 1072 | for(var i=0; i 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /example/test.l: -------------------------------------------------------------------------------- 1 | %% 2 | 3 | \/\/[^\n]* /* skip singleline comment */ 4 | \/\*(.|\n|\r)*?\*\/ /* skip multiline comment */ 5 | 6 | "(\\"|[^"])*" return "STRING"; 7 | '(\\'|[^'])*' return "STRING"; 8 | 9 | ((0|[1-9][0-9]*)(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?|[0][xX][0-9a-fA-F]+ return "NUMBER" 10 | 11 | >>>= return ">>>="; //URSHIFT EQUAL 12 | !== return "!=="; //STRNEX 13 | === return "==="; //STREQ 14 | >>> return ">>>"; //URSHIFT 15 | \<<= return "<<="; //LSHIFT EQUAL 16 | >>= return ">>="; //RSHIFT EQUAL 17 | %= return "%="; //MOD EQUAL 18 | && return "&&"; //AND 19 | &= return "&="; //AND EQUAL 20 | \*= return "*="; //MULT EQUAL 21 | \+\+ return "++"; //PLUS PLUS 22 | \+= return "+="; //PLUS EQUAL 23 | -- return "--"; //MINUS MINUS 24 | -= return "-="; //MINUS EQUAL 25 | \/= return "/="; //DIV EQUAL 26 | \<< return "<<"; //LSHIFT 27 | \<= return "<="; //LE 28 | >= return ">="; //GE 29 | == return "=="; //EQEQ 30 | >> return ">>"; //RSHIFT 31 | \^= return "^="; //XOR EQUAL 32 | \|= return "|="; //OR EQUAL 33 | \|\| return "||"; //OR 34 | & return "&"; //LOGIC AND 35 | % return "%"; //MOD 36 | != return "!="; //NE 37 | \= return "="; 38 | \( return "("; 39 | \) return ")"; 40 | \+ return "+"; 41 | \* return "*"; 42 | \, return ","; 43 | \- return "-"; 44 | \! return "!"; 45 | \. return "."; 46 | \/ return "/"; 47 | : return ":"; 48 | \; return ";"; 49 | \< return "<"; 50 | > return ">"; 51 | \? return "?"; 52 | \[ return "["; 53 | \] return "]"; 54 | \^ return "^"; 55 | \{ return "{"; 56 | \} return "}"; 57 | \| return "|"; 58 | \~ return "~"; 59 | \& return "&"; 60 | break return "BREAK"; 61 | case return "CASE"; 62 | catch return "CATCH"; 63 | continue return "CONTINUE"; 64 | default return "DEFAULT"; 65 | delete return "DELETE"; 66 | do return "DO"; 67 | else return "ELSE"; 68 | false return "FALSE"; 69 | finally return "FINALLY"; 70 | for return "FOR"; 71 | function return "FUNCTION"; 72 | get return "GET"; 73 | if return "IF"; 74 | in return "IN"; 75 | instanceof return "INSTANCEOF"; 76 | new return "NEW"; 77 | return return "RETURN"; 78 | set return "SET"; 79 | switch return "SWITCH"; 80 | this return "THIS"; 81 | throw return "THROW"; 82 | true return "TRUE"; 83 | try return "TRY"; 84 | typeof return "TYPEOF"; 85 | var return "VAR"; 86 | const return "CONST"; 87 | void return "VOID"; 88 | while return "WHILE"; 89 | whith return "WHITH"; 90 | [A-Za-z$_]\w* return "IDENT"; 91 | \s+ /* skip whitespace */ 92 | \n /* skip lineterminal */ 93 | . return "INVALID"; 94 | 95 | 96 | -------------------------------------------------------------------------------- /example/test.y: -------------------------------------------------------------------------------- 1 | %start Program 2 | 3 | %% 4 | 5 | Program 6 | : SourceElements $end 7 | { 8 | this.$$ = new Program($1); 9 | setNodeName(this.$$); 10 | } 11 | | $end 12 | { 13 | this.$$ = new Program(0); 14 | } 15 | ; 16 | 17 | SourceElements 18 | : SourceElements SourceElement 19 | { 20 | $1.childrens.push($2); 21 | this.$$ = $1; 22 | } 23 | | SourceElement 24 | { 25 | this.$$ = new SourceElements($1); 26 | } 27 | ; 28 | 29 | SourceElement 30 | : Statement 31 | | FunctionDeclaration 32 | ; 33 | 34 | Statement 35 | : Block 36 | | VariableStatement 37 | | EmptyStatement 38 | | ExpressionStatement 39 | | IfStatement 40 | | IterationStatement 41 | | ContinueStatement 42 | | BreakStatement 43 | | ReturnStatement 44 | | WithStatement 45 | | LabelledStatement 46 | | SwitchStatement 47 | | ThrowStatement 48 | | TryStatement 49 | ; 50 | 51 | FunctionDeclaration 52 | : FUNCTION IDENT '(' ')' FunctionBody 53 | { 54 | this.$$ = new FunctionDecl($2, 0, $5); 55 | } 56 | | FUNCTION IDENT '(' FormalParameterList ')' FunctionBody 57 | { 58 | this.$$ = new FunctionDecl($2, $4, $6); 59 | } 60 | ; 61 | 62 | FunctionBody 63 | : '{' '}' 64 | { 65 | this.$$ = new FunctionBody(0); 66 | } 67 | | '{' SourceElements '}' 68 | { 69 | this.$$ = new FunctionBody($2); 70 | } 71 | ; 72 | 73 | FormalParameterList 74 | : Ident 75 | { 76 | this.$$ = new Parameter($1); 77 | } 78 | | FormalParameterList ',' Ident 79 | { 80 | this.$$ = $1; 81 | this.$$.push($2); 82 | } 83 | ; 84 | 85 | Block 86 | : '{' '}' 87 | { 88 | this.$$ = new Block(0); 89 | } 90 | | '{' StatementList '}' 91 | { 92 | this.$$ = new Block($2); 93 | } 94 | ; 95 | 96 | StatementList 97 | : StatementList Statement 98 | { 99 | $1.push($2); 100 | this.$$ = $1; 101 | } 102 | | Statement 103 | { 104 | this.$$ = new StatementList($1); 105 | } 106 | ; 107 | 108 | 109 | IfStatement 110 | : IF '(' CommaExpression ')' Statement 111 | { 112 | this.$$ = new IfStatement($3, $5, 0); 113 | } 114 | | IF '(' CommaExpression ')' Statement ELSE Statement 115 | { 116 | this.$$ = new IfStatement($3, $5, $7); 117 | } 118 | ; 119 | 120 | 121 | EmptyStatement 122 | : ';' 123 | ; 124 | 125 | 126 | ExpressionStatement 127 | : AssignmentExpression ';' 128 | { 129 | this.$$ = $1; 130 | } 131 | ; 132 | 133 | VariableStatement 134 | : VAR VariableDeclarationList ';' 135 | { 136 | this.$$ = new VarStatement($2) 137 | } 138 | ; 139 | 140 | 141 | VariableDeclarationList 142 | : VariableDeclarationList ',' VariableDeclaration 143 | { 144 | this.$$ = $1; 145 | this.$$.childrens.push($3); 146 | } 147 | | VariableDeclaration 148 | { 149 | this.$$ = new VarDeclList($1); 150 | } 151 | ; 152 | VariableDeclaration 153 | : Ident 154 | { 155 | this.$$ = new VarDecl($1, 0); 156 | } 157 | | Ident Initializer 158 | { 159 | this.$$ = new VarDecl($1, $2); 160 | } 161 | ; 162 | Ident 163 | : IDENT 164 | { 165 | this.$$ = $1; 166 | } 167 | ; 168 | Initializer 169 | : '=' AssignmentExpression 170 | { 171 | this.$$ = new AssignExpression($2); 172 | } 173 | ; 174 | 175 | CommaExpression 176 | : AssignmentExpression 177 | { 178 | this.$$ = new CommaExpression($1); 179 | } 180 | | CommaExpression ',' AssignmentExpression 181 | { 182 | this.$$ = $1; 183 | this.$$.push($3); 184 | } 185 | ; 186 | 187 | 188 | AssignmentExpression 189 | : ConditionalExpression 190 | ; 191 | 192 | AssignmentOperator 193 | : '=' 194 | | '+=' 195 | | '-=' 196 | | '/=' 197 | | '*=' 198 | | '<<=' 199 | | '>>=' 200 | | '>>>=' 201 | | '&=' 202 | | '^=' 203 | | '|=' 204 | | '%=' 205 | ; 206 | 207 | ConditionalExpression 208 | : LogicalORExpression '?' AssignmentExpression ':' AssignmentExpression 209 | { 210 | this.$$ = new Conditional($1, $3, $5); 211 | } 212 | | LogicalORExpression 213 | ; 214 | LogicalORExpression 215 | : LogicalANDExpression 216 | | LogicalORExpression OR LogicalANDExpression 217 | { 218 | this.$$ = new BinaryLogical($1, $2, $3, true); 219 | } 220 | ; 221 | 222 | LogicalANDExpression 223 | : BitwiseORExpression 224 | ; 225 | 226 | BitwiseORExpression 227 | : BitwiseXORExpression 228 | ; 229 | 230 | BitwiseXORExpression 231 | : BitwiseANDExpression 232 | ; 233 | 234 | BitwiseANDExpression 235 | : EqualityExpression 236 | ; 237 | 238 | EqualityExpression 239 | : RelationalExpression 240 | | EqualityExpression '==' RelationalExpression 241 | { 242 | this.$$ = new EqualityExpression($1, $2, $3); 243 | } 244 | | EqualityExpression '!=' RelationalExpression 245 | { 246 | this.$$ = new EqualityExpression($1, $2, $3); 247 | } 248 | | EqualityExpression '===' RelationalExpression 249 | { 250 | this.$$ = new EqualityExpression($1, $2, $3); 251 | } 252 | | EqualityExpression '!==' RelationalExpression 253 | { 254 | this.$$ = new EqualityExpression($1, $2, $3); 255 | } 256 | ; 257 | 258 | RelationalExpression 259 | : ShiftExpression 260 | ; 261 | 262 | ShiftExpression 263 | : AdditiveExpression 264 | ; 265 | AdditiveExpression 266 | : MultiplicativeExpression 267 | ; 268 | 269 | MultiplicativeExpression 270 | : UnaryExpression 271 | ; 272 | UnaryExpression 273 | : PostfixExpression 274 | ; 275 | PostfixExpression 276 | : LeftHandSideExpression 277 | ; 278 | LeftHandSideExpression 279 | : NewExpression 280 | | CallExpression 281 | ; 282 | 283 | NewExpression 284 | : MemberExpression 285 | | NEW NewExpression 286 | { 287 | this.$$ = new NewExpression($2); 288 | } 289 | | NEW MemberExpression Arguments 290 | { 291 | this.$$ = new NewExpression($2, $3); 292 | } 293 | ; 294 | Arguments 295 | : '(' ')' 296 | { 297 | this.$$ = new Arguments(0); 298 | } 299 | | '(' ArgumentList ')' 300 | { 301 | this.$$ = new Arguments($2); 302 | } 303 | ; 304 | ArgumentList 305 | : AssignmentExpression 306 | { 307 | this.$$ = new ArgumentList($1); 308 | } 309 | | ArgumentList ',' AssignmentExpression 310 | { 311 | this.$$ = $1; 312 | this.$$.push($3); 313 | } 314 | ; 315 | MemberExpression 316 | : PrimaryExpression 317 | | FunctionExpression 318 | | MemberExpression '[' CommaExpression ']' 319 | { 320 | this.$$ = new BracketAccessor($1, $3); 321 | } 322 | | MemberExpression '.' IdentifierName 323 | { 324 | this.$$ = new DotAccessor($1, $3); 325 | } 326 | ; 327 | IdentifierName 328 | : IDENT 329 | | BREAK 330 | | CASE 331 | | CATCH 332 | | CONTINUE 333 | | DEFAULT 334 | | DELETE 335 | | DO 336 | | ELSE 337 | | FALSE 338 | | FINALLY 339 | | FOR 340 | | FUNCTION 341 | | GET 342 | | IF 343 | | IN 344 | | INSTANCEOF 345 | | NEW 346 | | NULL 347 | | RETURN 348 | | SET 349 | | SWITCH 350 | | THIS 351 | | THROW 352 | | TRUE 353 | | TRY 354 | | TYPEOF 355 | | VAR 356 | | CONST 357 | | VOID 358 | | WHILE 359 | | WITH 360 | ; 361 | FunctionExpression 362 | : FUNCTION '(' ')' FunctionBody 363 | { 364 | this.$$ = new FunctionExpression(0, $4); 365 | } 366 | | FUNCTION '(' FormalParameterList ')' FunctionBody 367 | { 368 | this.$$ = new FunctionExpression($3, $5); 369 | } 370 | | FUNCTION IDENT '(' ')' FunctionBody 371 | { 372 | this.$$ = new FunctionExpression(0, $5, $2); 373 | } 374 | | FUNCTION IDENT '(' FormalParameterList ')' FunctionBody 375 | { 376 | this.$$ = new FunctionExpression($4, $6, $2); 377 | } 378 | ; 379 | 380 | PrimaryExpression 381 | : Literal 382 | ; 383 | 384 | Literal 385 | : IDENT 386 | | NULL 387 | | TRUE 388 | | FALSE 389 | | NUMBER 390 | | STRING 391 | ; 392 | 393 | %% 394 | 395 | function Program(x){ 396 | this.childrens = [x]; 397 | } 398 | function SourceElements(x){ 399 | this.childrens = [x]; 400 | } 401 | function Block(x){ 402 | this.childrens = [!!x ? x : 0]; 403 | } 404 | function StatementList(x){ 405 | this.childrens = [x]; 406 | } 407 | function VarStatement(x){ 408 | this.childrens = [x]; 409 | } 410 | function IfStatement(x, y, z){ 411 | this.childrens = [x, y, z]; 412 | } 413 | function VarDeclList(x){ 414 | this.childrens = [x]; 415 | } 416 | function VarDecl(x, y){ 417 | this.childrens = [x, y]; 418 | } 419 | function CommaExpression(x){ 420 | this.childrens = [x]; 421 | } 422 | function AssignExpression(x){ 423 | this.childrens = [x]; 424 | } 425 | function EqualityExpression(x, y, z){ 426 | this.childrens = [x, y, z]; 427 | } 428 | function Arguments(argumentList){ 429 | this.childrens = [argumentList]; 430 | } 431 | function ArgumentList(assignmentExpression){ 432 | this.childrens = [assignmentExpression]; 433 | } 434 | 435 | function BracketAccessor(memberExpression, commaExpression){ 436 | this.childrens = [memberExpression, commaExpression]; 437 | } 438 | function DotAccessor(memberExpression, identifierName){ 439 | this.childrens = [memberExpression, identifierName]; 440 | } 441 | 442 | 443 | function FuncitonDecl(x, y, z){ 444 | this.childrens = [x, y, z]; 445 | } 446 | 447 | function FunctionExpression(params, block, name){ 448 | this.childrens = [params, block, name]; 449 | } 450 | 451 | function Parameter(x){ 452 | this.childrens = [x]; 453 | } 454 | function FunctionBody(x){ 455 | this.childrens = [x]; 456 | } 457 | 458 | function setNodeName(node){ 459 | var childrens; 460 | if(node){ 461 | if(node.constructor !== Object){ 462 | node.nodeName = node.constructor.name; 463 | childrens = node.childrens; 464 | delete node.childrens; 465 | node.childrens = childrens; 466 | } 467 | if(node.childrens){ 468 | for(var i=0; i -1)){ 41 | activeRules.push(rules[i]); 42 | } 43 | } 44 | 45 | return activeRules; 46 | }, 47 | setInput:function (input){ 48 | _.merge(this, { 49 | input: input, 50 | position: 0, 51 | matched: '', 52 | text: '', 53 | yytext: '', 54 | lineno: 1, 55 | firstline: 1, 56 | lastline: 1, 57 | firstcolumn: 1, 58 | lastcolumn: 1, 59 | _more: false 60 | }); 61 | }, 62 | getToken:function (isDebug){ 63 | var self = this, 64 | token = self.getToken_(isDebug); 65 | 66 | if(!token){ 67 | token = self.getToken(isDebug); 68 | } 69 | 70 | return token; 71 | }, 72 | unToken:function (charsNum){ 73 | this.position -= charsNum; 74 | }, 75 | getToken_:function (isDebug){ 76 | var self = this, 77 | input = self.input.slice(self.position), 78 | regex, 79 | activeRules = self.getCurrentRules(), 80 | matches; 81 | 82 | if(!input){ 83 | return self.CONST.EOF; 84 | } 85 | 86 | if(!activeRules.length && isDebug){ 87 | debugger 88 | //这个断点的原因是,这是编写lex文法时常见的错误,就是自动机陷入一个没有任何规则激活的状态中了 89 | } 90 | 91 | var possibleInputs = [], 92 | maxLength = 0; 93 | 94 | for(var i=0,len=activeRules.length; i matches[0].length ? maxLength : matches[0].length; 100 | } 101 | } 102 | 103 | if(possibleInputs.length){ 104 | possibleInputs = _.filter(possibleInputs, function(possible){ 105 | return possible.match.length === maxLength; 106 | }); 107 | 108 | if(self._more){ 109 | self.yytext += possibleInputs[0].match; 110 | }else{ 111 | self.yytext = possibleInputs[0].match; 112 | } 113 | self.position += possibleInputs[0].match.length; 114 | self.yyleng = self.yytext.length; 115 | self._more = false; 116 | return (new Function(possibleInputs[0].rule.action)).call(self); 117 | } 118 | 119 | if(isDebug){ 120 | debugger 121 | //这个断点的原因是,没有在循环体中return 说明当前输入已经无法命中任何规则,自动机将陷入死循环 122 | } 123 | throw('invalid input: ' + input); 124 | }, 125 | reset:function (){ 126 | this.setInput(this.input); 127 | } 128 | }; 129 | })(), 130 | lrtable: {"actions":{"0":{"NUMBER":["shift",3]},"1":{"$end":["shift",4],"*":["shift",5]},"2":{"$end":["reduce",2],"*":["reduce",2]},"3":{"$end":["reduce",3],"*":["reduce",3]},"4":{"$end":["accept",0]},"5":{"NUMBER":["shift",3]},"6":{"$end":["reduce",1],"*":["reduce",1]}},"gotos":{"0":{"term":1,"factoy":2,"NUMBER":3},"1":{"$end":4,"*":5},"2":{},"3":{},"4":{},"5":{"factoy":6,"NUMBER":3},"6":{}}}, 131 | productions: [{"symbol":"$accept","nullable":false,"firsts":["NUMBER"],"rhs":["term","$end"],"srhs":"term $end","id":0,"actionCode":""},{"symbol":"term","nullable":false,"firsts":["NUMBER"],"rhs":["term","*","factoy"],"srhs":"term * factoy","id":1,"actionCode":"\n this.$$ = $1 * $3;\n "},{"symbol":"term","nullable":false,"firsts":["NUMBER"],"rhs":["factoy"],"srhs":"factoy","id":2,"actionCode":""},{"symbol":"factoy","nullable":false,"firsts":["NUMBER"],"rhs":["NUMBER"],"srhs":"NUMBER","id":3,"actionCode":"\n this.$$ = parseInt($1, 10);\n "}], 132 | defaultAction: " this.$$ = $1+2; console.log($0);", 133 | parse:function (input, isDebug){ 134 | var self = this, 135 | 136 | stateStack = [0], //状态栈 初始状态0 137 | symbolStack = [], //符号栈 138 | valueStack = [], //值栈 139 | 140 | lexer = self.lexer, 141 | token, 142 | state; 143 | 144 | lexer.setInput(input); 145 | token = self.lexer.getToken(isDebug); 146 | 147 | while(true){ 148 | 149 | state = stateStack[stateStack.length - 1]; 150 | 151 | var action = self.lrtable.actions[state] && self.lrtable.actions[state][token]; 152 | 153 | if(!action && isDebug){ 154 | //这是编写bnf时容易出错的,通过当前输入和当前状态(状态隐含了当前入栈的符号) 155 | //无法找到右端句柄,也无法通过当前输入决定应进行移进动作 156 | debugger 157 | } 158 | 159 | if(isDebug){ 160 | console.log('当前状态:'+state, '输入符号:'+token, '动作:'+action); 161 | } 162 | if(action){ 163 | if(action[0] === 'shift'){ 164 | stateStack.push(action[1]); 165 | symbolStack.push(token); 166 | valueStack.push(lexer.yytext); 167 | token = lexer.getToken(isDebug); 168 | }else if(action[0] === 'reduce'){ 169 | var production = self.productions[action[1]]; 170 | 171 | var reduceCode = ('/*' + production.symbol + ' -> ' + production.srhs + ';*/' 172 | + (self.defaultAction || 'this.$$ = $1;') 173 | + production.actionCode) 174 | .replace(/\$0/g, JSON.stringify({symbol: production.symbol, rhs: production.rhs})) 175 | .replace(/\$(\d+)/g, function(_, n){ 176 | return 'valueStack[' + (valueStack.length - production.rhs.length + parseInt(n, 10) - 1) + ']' 177 | }); 178 | 179 | eval(reduceCode); 180 | 181 | 182 | if(isDebug){ 183 | console.log(' 当前右端句柄为:' + production.rhs); 184 | console.log(' 右端句柄对应值栈内容为:' + JSON.stringify(valueStack.slice(-production.rhs.length))); 185 | console.log(' 归约后的值为:' + JSON.stringify(this.$$)); 186 | } 187 | 188 | //如果是当前归约用的产生式不是epsilon: 189 | // 符号栈才需要对右端句柄包含的各个symbol出栈,归约为产生式的非终结符(lhs)再入栈 190 | // 值栈才需要对右端句柄对应的各个值出栈,进行归约计算为某个lhs值,再把lhs值入栈 191 | // 状态栈也才需要对代表右端句柄的各个状态进行出栈,查goto表找到代表lhs符号的新状态入栈 192 | //否则,应用epsilon,各栈保持原地不动 193 | if(production.rhs.length){ 194 | symbolStack = symbolStack.slice(0, -production.rhs.length); 195 | valueStack = valueStack.slice(0, -production.rhs.length); 196 | stateStack = stateStack.slice(0, -production.rhs.length); 197 | } 198 | 199 | var curstate = stateStack[stateStack.length-1]; 200 | 201 | //查goto表,找到代表归约后的lhs符号的新状态 202 | var newstate = self.lrtable.gotos[curstate] && self.lrtable.gotos[curstate][production.symbol]; 203 | 204 | 205 | if(isDebug){ 206 | console.log(' 右端句柄归约后的符号:'+production.symbol+',应转移到:'+newstate); 207 | } 208 | symbolStack.push(production.symbol); //归约后的lhs符号,压入符号栈 209 | valueStack.push(this.$$); //语义动作中归约后的值(rhs各项计算出的lhs值),压入值栈 210 | stateStack.push(newstate); //goto表查到的新状态,压入状态栈 211 | 212 | 213 | }else if(action[0] === 'accept'){ 214 | if(isDebug){ 215 | console.log('accept'); 216 | } 217 | return true; 218 | }else{ 219 | return false; 220 | } 221 | }else{ 222 | return false; 223 | } 224 | } 225 | } 226 | }; 227 | if(typeof module == "object"){module.exports = parser} 228 | 229 | return parser; 230 | })(this); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jsbison", 3 | "description": "javascript对bison的实现", 4 | "version": "0.3.6", 5 | "homepage": "https://github.com/CecilLee", 6 | "author": { 7 | "name": "CecilLee", 8 | "email": "cciscecil@gmail.com" 9 | }, 10 | "main": "src/generator.js", 11 | "repository": { 12 | "type": "git", 13 | "url": "git@github.com:CecilLee/jsbison.git" 14 | }, 15 | "licenses": [ 16 | { 17 | "type": "BSD", 18 | "url": "/blob/master/LICENSE" 19 | } 20 | ], 21 | "engines": { 22 | "node": ">= 0.8.0" 23 | }, 24 | "devDependencies": { 25 | "grunt": "~0.4.1", 26 | "grunt-contrib-nodeunit": "^0.4.1" 27 | }, 28 | "dependencies": { 29 | "lodash": "^2.4.1" 30 | }, 31 | "keywords": [ 32 | "lib", 33 | "parser", 34 | "bison", 35 | "bnf", 36 | "yacc" 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /src/bnf-parser-generate.js: -------------------------------------------------------------------------------- 1 | /** 2 | * canonical bnffile to jsbison-cfg(jsbison使用的json格式的context free grammar) 3 | */ 4 | var Generator = require('./generator.js'); 5 | 6 | var bnfParserCode = new Generator({ 7 | lex: { 8 | states:{ 9 | exclusive: 'parse_token parse_tokens productions parse_colon parse_symbols default_action parse_code parse_all_code' 10 | }, 11 | rules: [{ 12 | regex: /\s+/, 13 | action: '' //skip whitespace 14 | }, { 15 | regex: /\/\/.*/, 16 | action: '' //skip singleline comment 17 | }, { 18 | regex: /\/\*(.|\n|\r)*?\*\//, 19 | action: '' //skip multiline comment 20 | }, { 21 | regex: /%start/, 22 | action: 'this.pushState("parse_token"); return "DEC_START";' 23 | }, { 24 | regex: /%defaultAction/, 25 | action: 'this.pushState("default_action"); return "DEC_DEFACTION";' 26 | }, { 27 | conditions: ['default_action'], 28 | regex: /\s*\{/, 29 | action: 'this.depth = 1; this.pushState("parse_code"); return "{";' 30 | }, { 31 | conditions: ['default_action'], 32 | regex: /\}/, 33 | action: 'this.popState(); return "}";' 34 | }, { 35 | conditions: ['parse_token'], 36 | regex: /[^\s]+/, 37 | action: 'this.popState(); return "TOKEN";' 38 | }, { 39 | regex: /%token/, 40 | action: 'this.pushState("parse_tokens"); return "DEC_TOKEN";' 41 | }, { 42 | regex: /%(left|rigth|assoc)/, 43 | action: 'this.pushState("parse_tokens"); return "DEC_ASSOC";' 44 | }, { 45 | conditions: ['parse_tokens'], 46 | regex: /[^\r\n]+/, 47 | action: 'this.popState(); return "TOKENS";' 48 | }, { 49 | regex: /%%/, 50 | action: 'this.pushState("productions"); return "%%";' 51 | }, { 52 | conditions: ['productions'], 53 | regex: /%%/, 54 | action: 'this.pushState("parse_all_code");return "%%";' 55 | }, { 56 | conditions: ['productions'], 57 | regex: /\|/, 58 | action: 'this.pushState("parse_rhs");return "|";' 59 | }, { 60 | conditions: ['productions'], 61 | regex: /;/, 62 | action: 'return ";";' 63 | }, { 64 | conditions: ['productions'], 65 | regex: /[\w_]+/, 66 | action: 'this.pushState("parse_colon");return "TOKEN";' 67 | }, { 68 | conditions: ['parse_colon'], 69 | regex: /:/, 70 | action: 'this.popState();this.pushState("parse_rhs"); return ":";' 71 | }, { 72 | conditions: ['parse_rhs'], 73 | regex: /[a-zA-Z_$][\w$]*/, 74 | action: 'return "SYMBOL";' 75 | }, { 76 | conditions: ['parse_rhs'], 77 | regex: /(['"])(?:\\\1|[^\1])*?\1/, 78 | action: 'this.yytext = this.yytext.slice(1, -1).trim();return "TOKEN";' 79 | }, { 80 | conditions: ['parse_rhs'], 81 | regex: /\[[a-zA-Z_$][\w$]*?\]/, 82 | action: 'return "PROP";' 83 | }, { 84 | conditions: ['parse_rhs'], 85 | regex: /{/, 86 | action: 'this.pushState("parse_code"); this.depth=1; return "{"; ' 87 | }, { 88 | conditions: ['parse_rhs'], 89 | regex: /\|/, 90 | action: 'return "|";' 91 | }, { 92 | conditions: ['parse_rhs'], 93 | regex: /;/, 94 | action: 'this.popState();return ";";' 95 | }, { 96 | conditions: ['parse_rhs'], 97 | regex: /}/, 98 | action: 'return "}";' 99 | }, { 100 | conditions: ['parse_code'], 101 | regex: /(.|\r|\n)*?[}{]/, 102 | action: 'if(this.yytext[this.yyleng-1]=="{"){this.depth++;}else{this.depth--;}if(this.depth){this.yymore();}else{this.unToken(1);this.yytext=this.yytext.substr(0,this.yytext.length-1);this.popState();return "CODE"}' 103 | }, { 104 | conditions: ['parse_all_code'], 105 | regex: /[\s\S]*/, 106 | action: 'this.popState();return "CODE";' 107 | }, { 108 | conditions: ['parse_token', 'parse_tokens', 'productions', 'parse_colon', 'parse_code'], 109 | regex: /[\s]+/, 110 | action: '' 111 | } 112 | ] 113 | }, 114 | 115 | type: 'LR(1)', 116 | start: 'bnf', 117 | tokens: '%% CODE TOKEN SYMBOL PROP TOKENS { } DEC_TOKEN DEC_DEFACTION DEC_START PRIORITY DEC_ASSOC', 118 | bnf: { 119 | 'bnf' : { 120 | 'declarations %% productions opt_ends $end': ' this.$$ = $1; this.$$.bnf = $3; ', 121 | 'declarations %% productions %% CODE $end' : 'this.$$ = $1; this.$$.bnf = $3; this.$$.code = $5;' 122 | }, 123 | 'opt_ends' : { 124 | '%%' : '', 125 | '' : '' 126 | }, 127 | 'declarations' : { 128 | 'declarations declaration' : '_.merge($1, $2); this.$$ = $1;', 129 | 'declaration': 'this.$$ = $1;', 130 | '': 'this.$$ = {};' 131 | }, 132 | 'declaration' : { 133 | 'DEC_TOKEN TOKENS' : 'this.$$ = {tokens: $2}; ', 134 | 'DEC_DEFACTION { CODE }' : 'this.$$ = {defaultAction: $3};', 135 | 'DEC_START TOKEN': 'this.$$ = {start: $2};', 136 | 'operator PRIORITY' : 'this.$$ = $1; this.$$.priority = $2; ', 137 | 'operator' : 'this.$$ = $1; this.$$.priority = 0; ' 138 | }, 139 | 'operator': { 140 | 'DEC_ASSOC TOKENS' : 'this.$$ = {}; this.$$[$1] = $2;' 141 | }, 142 | 'productions': { 143 | 'productions production': '_.merge($1, $2); this.$$ = $1;', 144 | 'production': 'this.$$ = $1;' 145 | }, 146 | 'production': { 147 | 'TOKEN : rhslist ;' : 'this.$$ = {}; this.$$[$1] = $3;' 148 | }, 149 | 'rhslist': { 150 | 'rhslist | rhscode': 'this.$$ = $1; _.merge(this.$$, $3);', 151 | 'rhscode' : 'this.$$ = $1' 152 | }, 153 | 'rhscode': { 154 | 'rhs { CODE }': 'this.$$ = {}; this.$$[$1] = $3;', 155 | 'rhs': 'this.$$ = {}; this.$$[$1] = ""', 156 | '{ CODE }': 'this.$$ = {}; this.$$[""] = $2;' 157 | }, 158 | 'rhs' : { 159 | 'SYMBOL' : 'this.$$ = $1', 160 | 'TOKEN' : 'this.$$ = $1', 161 | 'PROP' : 'this.$$ = $1', 162 | 'rhs rhs' : 'this.$$ = $1 + " " +$2' 163 | } 164 | }, 165 | code: 'global.bnfParser = parser;' 166 | 167 | 168 | }).generate(); 169 | 170 | 171 | var fs = require('fs'); 172 | 173 | fs.writeFileSync('./bnf-parser.js', bnfParserCode); 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /src/bnf-parser.js: -------------------------------------------------------------------------------- 1 | (function(global, undef){ 2 | if(typeof require === "function"){ _ = require("lodash");} 3 | var parser = { 4 | EOF:"$end", 5 | reset:function (){ 6 | var self = this; 7 | self.lexer.reset(); 8 | }, 9 | lexer: (function(){ 10 | return { 11 | CONST:{"INITIAL":"INITIAL","EOF":"$end"}, 12 | states:{"exclusive":{"parse_token":true,"parse_tokens":true,"productions":true,"parse_colon":true,"parse_symbols":true,"default_action":true,"parse_code":true,"parse_all_code":true}}, 13 | rules: [{regex:/^\s+/,action:''},{regex:/^\/\/.*/,action:''},{regex:/^\/\*(.|\n|\r)*?\*\//,action:''},{regex:/^%start/,action:'this.pushState("parse_token"); return "DEC_START";'},{regex:/^%defaultAction/,action:'this.pushState("default_action"); return "DEC_DEFACTION";'},{regex:/^\s*\{/,action:'this.depth = 1; this.pushState("parse_code"); return "{";', conditions:["default_action"]},{regex:/^\}/,action:'this.popState(); return "}";', conditions:["default_action"]},{regex:/^[^\s]+/,action:'this.popState(); return "TOKEN";', conditions:["parse_token"]},{regex:/^%token/,action:'this.pushState("parse_tokens"); return "DEC_TOKEN";'},{regex:/^%(left|rigth|assoc)/,action:'this.pushState("parse_tokens"); return "DEC_ASSOC";'},{regex:/^[^\r\n]+/,action:'this.popState(); return "TOKENS";', conditions:["parse_tokens"]},{regex:/^%%/,action:'this.pushState("productions"); return "%%";'},{regex:/^%%/,action:'this.pushState("parse_all_code");return "%%";', conditions:["productions"]},{regex:/^\|/,action:'this.pushState("parse_rhs");return "|";', conditions:["productions"]},{regex:/^;/,action:'return ";";', conditions:["productions"]},{regex:/^[\w_]+/,action:'this.pushState("parse_colon");return "TOKEN";', conditions:["productions"]},{regex:/^:/,action:'this.popState();this.pushState("parse_rhs"); return ":";', conditions:["parse_colon"]},{regex:/^[a-zA-Z_$][\w$]*/,action:'return "SYMBOL";', conditions:["parse_rhs"]},{regex:/^(['"])(?:\\\1|[^\1])*?\1/,action:'this.yytext = this.yytext.slice(1, -1).trim();return "TOKEN";', conditions:["parse_rhs"]},{regex:/^\[[a-zA-Z_$][\w$]*?\]/,action:'return "PROP";', conditions:["parse_rhs"]},{regex:/^{/,action:'this.pushState("parse_code"); this.depth=1; return "{"; ', conditions:["parse_rhs"]},{regex:/^\|/,action:'return "|";', conditions:["parse_rhs"]},{regex:/^;/,action:'this.popState();return ";";', conditions:["parse_rhs"]},{regex:/^}/,action:'return "}";', conditions:["parse_rhs"]},{regex:/^(.|\r|\n)*?[}{]/,action:'if(this.yytext[this.yyleng-1]=="{"){this.depth++;}else{this.depth--;}if(this.depth){this.yymore();}else{this.unToken(1);this.yytext=this.yytext.substr(0,this.yytext.length-1);this.popState();return "CODE"}', conditions:["parse_code"]},{regex:/^[\s\S]*/,action:'this.popState();return "CODE";', conditions:["parse_all_code"]},{regex:/^[\s]+/,action:'', conditions:["parse_token","parse_tokens","productions","parse_colon","parse_code"]}], 14 | yymore:function (){ 15 | this._more = true; 16 | }, 17 | stateStack:["INITIAL"], 18 | pushState:function (state){ 19 | this.stateStack.push(state); 20 | }, 21 | popState:function (){ 22 | return this.stateStack.pop(); 23 | }, 24 | getCurrentRules:function (){ 25 | var self = this, 26 | rules = self.rules, 27 | curState = self.stateStack[self.stateStack.length-1], 28 | activeRules = [], 29 | isInclusiveState = true; //是否为包容状态 30 | 31 | if(self.states.exclusive[curState]){ 32 | isInclusiveState = false; 33 | } 34 | 35 | 36 | for(var i=0, len=rules.length; i -1)){ 41 | activeRules.push(rules[i]); 42 | } 43 | } 44 | 45 | return activeRules; 46 | }, 47 | setInput:function (input){ 48 | _.merge(this, { 49 | input: input, 50 | position: 0, 51 | matched: '', 52 | text: '', 53 | yytext: '', 54 | lineno: 1, 55 | firstline: 1, 56 | lastline: 1, 57 | firstcolumn: 1, 58 | lastcolumn: 1, 59 | _more: false 60 | }); 61 | }, 62 | getToken:function (isDebug){ 63 | var self = this, 64 | token = self.getToken_(isDebug); 65 | 66 | if(!token){ 67 | token = self.getToken(isDebug); 68 | } 69 | 70 | return token; 71 | }, 72 | unToken:function (token){ 73 | this.position -= this.yytext.length; 74 | this.input = this.input.substr(0, this.position) + token + this.input.substr(this.position); 75 | }, 76 | getToken_:function (isDebug){ 77 | var self = this, 78 | input = self.input.slice(self.position), 79 | regex, 80 | activeRules = self.getCurrentRules(), 81 | matches; 82 | 83 | if(!input){ 84 | return self.CONST.EOF; 85 | } 86 | 87 | if(!activeRules.length && isDebug){ 88 | debugger 89 | //这个断点的原因是,这是编写lex文法时常见的错误,就是自动机陷入一个没有任何规则激活的状态中了 90 | } 91 | 92 | var possibleInputs = [], 93 | maxLength = 0; 94 | 95 | for(var i=0,len=activeRules.length; i matches[0].length ? maxLength : matches[0].length; 101 | } 102 | } 103 | 104 | if(possibleInputs.length){ 105 | possibleInputs = _.filter(possibleInputs, function(possible){ 106 | return possible.match.length === maxLength; 107 | }); 108 | 109 | if(self._more){ 110 | self.yytext += possibleInputs[0].match; 111 | }else{ 112 | self.yytext = possibleInputs[0].match; 113 | } 114 | self.position += possibleInputs[0].match.length; 115 | self.yyleng = self.yytext.length; 116 | self._more = false; 117 | return (new Function(possibleInputs[0].rule.action)).call(self); 118 | } 119 | 120 | if(isDebug){ 121 | debugger 122 | //这个断点的原因是,没有在循环体中return 说明当前输入已经无法命中任何规则,自动机将陷入死循环 123 | } 124 | throw('invalid input: ' + input); 125 | }, 126 | reset:function (){ 127 | this.setInput(this.input); 128 | } 129 | }; 130 | })(), 131 | lrtable: {"actions":{"0":{"%%":["reduce",7],"DEC_ASSOC":["shift",8],"DEC_DEFACTION":["shift",5],"DEC_START":["shift",6],"DEC_TOKEN":["shift",4]},"1":{"$end":["shift",9]},"2":{"%%":["shift",10],"DEC_TOKEN":["shift",4],"DEC_DEFACTION":["shift",5],"DEC_START":["shift",6],"DEC_ASSOC":["shift",8]},"3":{"%%":["reduce",6],"DEC_ASSOC":["reduce",6],"DEC_DEFACTION":["reduce",6],"DEC_START":["reduce",6],"DEC_TOKEN":["reduce",6]},"4":{"TOKENS":["shift",12]},"5":{"{":["shift",13]},"6":{"TOKEN":["shift",14]},"7":{"PRIORITY":["shift",15],"%%":["reduce",12],"DEC_ASSOC":["reduce",12],"DEC_DEFACTION":["reduce",12],"DEC_START":["reduce",12],"DEC_TOKEN":["reduce",12]},"8":{"TOKENS":["shift",16]},"9":{"$end":["accept",0]},"10":{"TOKEN":["shift",19]},"11":{"%%":["reduce",5],"DEC_ASSOC":["reduce",5],"DEC_DEFACTION":["reduce",5],"DEC_START":["reduce",5],"DEC_TOKEN":["reduce",5]},"12":{"%%":["reduce",8],"DEC_ASSOC":["reduce",8],"DEC_DEFACTION":["reduce",8],"DEC_START":["reduce",8],"DEC_TOKEN":["reduce",8]},"13":{"CODE":["shift",20]},"14":{"%%":["reduce",10],"DEC_ASSOC":["reduce",10],"DEC_DEFACTION":["reduce",10],"DEC_START":["reduce",10],"DEC_TOKEN":["reduce",10]},"15":{"%%":["reduce",11],"DEC_ASSOC":["reduce",11],"DEC_DEFACTION":["reduce",11],"DEC_START":["reduce",11],"DEC_TOKEN":["reduce",11]},"16":{"PRIORITY":["reduce",13],"%%":["reduce",13],"DEC_ASSOC":["reduce",13],"DEC_DEFACTION":["reduce",13],"DEC_START":["reduce",13],"DEC_TOKEN":["reduce",13]},"17":{"%%":["shift",22],"$end":["reduce",4],"TOKEN":["shift",19]},"18":{"$end":["reduce",15],"%%":["reduce",15],"TOKEN":["reduce",15]},"19":{":":["shift",24]},"20":{"}":["shift",25]},"21":{"$end":["shift",26]},"22":{"CODE":["shift",27],"$end":["reduce",3]},"23":{"$end":["reduce",14],"%%":["reduce",14],"TOKEN":["reduce",14]},"24":{"{":["shift",31],"SYMBOL":["shift",32],"TOKEN":["shift",33],"PROP":["shift",34]},"25":{"%%":["reduce",9],"DEC_ASSOC":["reduce",9],"DEC_DEFACTION":["reduce",9],"DEC_START":["reduce",9],"DEC_TOKEN":["reduce",9]},"26":{"$end":["reduce",1]},"27":{"$end":["shift",35]},"28":{";":["shift",36],"|":["shift",37]},"29":{";":["reduce",18],"|":["reduce",18]},"30":{"{":["shift",38],";":["reduce",20],"|":["reduce",20],"SYMBOL":["shift",32],"TOKEN":["shift",33],"PROP":["shift",34]},"31":{"CODE":["shift",40]},"32":{"{":["reduce",22],";":["reduce",22],"|":["reduce",22],"PROP":["reduce",22],"SYMBOL":["reduce",22],"TOKEN":["reduce",22]},"33":{"{":["reduce",23],";":["reduce",23],"|":["reduce",23],"PROP":["reduce",23],"SYMBOL":["reduce",23],"TOKEN":["reduce",23]},"34":{"{":["reduce",24],";":["reduce",24],"|":["reduce",24],"PROP":["reduce",24],"SYMBOL":["reduce",24],"TOKEN":["reduce",24]},"35":{"$end":["reduce",2]},"36":{"$end":["reduce",16],"%%":["reduce",16],"TOKEN":["reduce",16]},"37":{"{":["shift",31],"SYMBOL":["shift",32],"TOKEN":["shift",33],"PROP":["shift",34]},"38":{"CODE":["shift",42]},"39":{"{":["reduce",25],";":["reduce",25],"|":["reduce",25],"PROP":["shift",34],"SYMBOL":["shift",32],"TOKEN":["shift",33]},"40":{"}":["shift",43]},"41":{";":["reduce",17],"|":["reduce",17]},"42":{"}":["shift",44]},"43":{";":["reduce",21],"|":["reduce",21]},"44":{";":["reduce",19],"|":["reduce",19]}},"gotos":{"0":{"bnf":1,"declarations":2,"declaration":3,"DEC_TOKEN":4,"DEC_DEFACTION":5,"DEC_START":6,"operator":7,"DEC_ASSOC":8},"1":{"$end":9},"2":{"%%":10,"declaration":11,"DEC_TOKEN":4,"DEC_DEFACTION":5,"DEC_START":6,"operator":7,"DEC_ASSOC":8},"3":{},"4":{"TOKENS":12},"5":{"{":13},"6":{"TOKEN":14},"7":{"PRIORITY":15},"8":{"TOKENS":16},"9":{},"10":{"productions":17,"production":18,"TOKEN":19},"11":{},"12":{},"13":{"CODE":20},"14":{},"15":{},"16":{},"17":{"opt_ends":21,"%%":22,"production":23,"TOKEN":19},"18":{},"19":{":":24},"20":{"}":25},"21":{"$end":26},"22":{"CODE":27},"23":{},"24":{"rhslist":28,"rhscode":29,"rhs":30,"{":31,"SYMBOL":32,"TOKEN":33,"PROP":34},"25":{},"26":{},"27":{"$end":35},"28":{";":36,"|":37},"29":{},"30":{"{":38,"rhs":39,"SYMBOL":32,"TOKEN":33,"PROP":34},"31":{"CODE":40},"32":{},"33":{},"34":{},"35":{},"36":{},"37":{"rhscode":41,"rhs":30,"{":31,"SYMBOL":32,"TOKEN":33,"PROP":34},"38":{"CODE":42},"39":{"rhs":39,"SYMBOL":32,"TOKEN":33,"PROP":34},"40":{"}":43},"41":{},"42":{"}":44},"43":{},"44":{}}}, 132 | productions: [{"symbol":"$accept","nullable":false,"firsts":["DEC_TOKEN","DEC_DEFACTION","DEC_START","DEC_ASSOC","%%"],"rhs":["bnf","$end"],"symbolRhs":[{"name":"bnf"},{"name":"$end"}],"srhs":"bnf $end","id":0,"actionCode":""},{"symbol":"bnf","nullable":false,"firsts":["DEC_TOKEN","DEC_DEFACTION","DEC_START","DEC_ASSOC","%%"],"rhs":["declarations","%%","productions","opt_ends","$end"],"symbolRhs":[{"name":"declarations"},{"name":"%%"},{"name":"productions"},{"name":"opt_ends"},{"name":"$end"}],"srhs":"declarations %% productions opt_ends $end","id":1,"actionCode":" this.$$ = $1; this.$$.bnf = $3; "},{"symbol":"bnf","nullable":false,"firsts":["DEC_TOKEN","DEC_DEFACTION","DEC_START","DEC_ASSOC","%%"],"rhs":["declarations","%%","productions","%%","CODE","$end"],"symbolRhs":[{"name":"declarations"},{"name":"%%"},{"name":"productions"},{"name":"%%"},{"name":"CODE"},{"name":"$end"}],"srhs":"declarations %% productions %% CODE $end","id":2,"actionCode":"this.$$ = $1; this.$$.bnf = $3; this.$$.code = $5;"},{"symbol":"opt_ends","nullable":false,"firsts":["%%"],"rhs":["%%"],"symbolRhs":[{"name":"%%"}],"srhs":"%%","id":3,"actionCode":""},{"symbol":"opt_ends","nullable":true,"firsts":[],"rhs":[],"symbolRhs":[],"srhs":"","id":4,"actionCode":""},{"symbol":"declarations","nullable":false,"firsts":["DEC_TOKEN","DEC_DEFACTION","DEC_START","DEC_ASSOC"],"rhs":["declarations","declaration"],"symbolRhs":[{"name":"declarations"},{"name":"declaration"}],"srhs":"declarations declaration","id":5,"actionCode":"_.merge($1, $2); this.$$ = $1;"},{"symbol":"declarations","nullable":false,"firsts":["DEC_TOKEN","DEC_DEFACTION","DEC_START","DEC_ASSOC"],"rhs":["declaration"],"symbolRhs":[{"name":"declaration"}],"srhs":"declaration","id":6,"actionCode":"this.$$ = $1;"},{"symbol":"declarations","nullable":true,"firsts":[],"rhs":[],"symbolRhs":[],"srhs":"","id":7,"actionCode":"this.$$ = {};"},{"symbol":"declaration","nullable":false,"firsts":["DEC_TOKEN"],"rhs":["DEC_TOKEN","TOKENS"],"symbolRhs":[{"name":"DEC_TOKEN"},{"name":"TOKENS"}],"srhs":"DEC_TOKEN TOKENS","id":8,"actionCode":"this.$$ = {tokens: $2}; "},{"symbol":"declaration","nullable":false,"firsts":["DEC_DEFACTION"],"rhs":["DEC_DEFACTION","{","CODE","}"],"symbolRhs":[{"name":"DEC_DEFACTION"},{"name":"{"},{"name":"CODE"},{"name":"}"}],"srhs":"DEC_DEFACTION { CODE }","id":9,"actionCode":"this.$$ = {defaultAction: $3};"},{"symbol":"declaration","nullable":false,"firsts":["DEC_START"],"rhs":["DEC_START","TOKEN"],"symbolRhs":[{"name":"DEC_START"},{"name":"TOKEN"}],"srhs":"DEC_START TOKEN","id":10,"actionCode":"this.$$ = {start: $2};"},{"symbol":"declaration","nullable":false,"firsts":["DEC_ASSOC"],"rhs":["operator","PRIORITY"],"symbolRhs":[{"name":"operator"},{"name":"PRIORITY"}],"srhs":"operator PRIORITY","id":11,"actionCode":"this.$$ = $1; this.$$.priority = $2; "},{"symbol":"declaration","nullable":false,"firsts":["DEC_ASSOC"],"rhs":["operator"],"symbolRhs":[{"name":"operator"}],"srhs":"operator","id":12,"actionCode":"this.$$ = $1; this.$$.priority = 0; "},{"symbol":"operator","nullable":false,"firsts":["DEC_ASSOC"],"rhs":["DEC_ASSOC","TOKENS"],"symbolRhs":[{"name":"DEC_ASSOC"},{"name":"TOKENS"}],"srhs":"DEC_ASSOC TOKENS","id":13,"actionCode":"this.$$ = {}; this.$$[$1] = $2;"},{"symbol":"productions","nullable":false,"firsts":["TOKEN"],"rhs":["productions","production"],"symbolRhs":[{"name":"productions"},{"name":"production"}],"srhs":"productions production","id":14,"actionCode":"_.merge($1, $2); this.$$ = $1;"},{"symbol":"productions","nullable":false,"firsts":["TOKEN"],"rhs":["production"],"symbolRhs":[{"name":"production"}],"srhs":"production","id":15,"actionCode":"this.$$ = $1;"},{"symbol":"production","nullable":false,"firsts":["TOKEN"],"rhs":["TOKEN",":","rhslist",";"],"symbolRhs":[{"name":"TOKEN"},{"name":":"},{"name":"rhslist"},{"name":";"}],"srhs":"TOKEN : rhslist ;","id":16,"actionCode":"this.$$ = {}; this.$$[$1] = $3;"},{"symbol":"rhslist","nullable":false,"firsts":["{","SYMBOL","TOKEN","PROP"],"rhs":["rhslist","|","rhscode"],"symbolRhs":[{"name":"rhslist"},{"name":"|"},{"name":"rhscode"}],"srhs":"rhslist | rhscode","id":17,"actionCode":"this.$$ = $1; _.merge(this.$$, $3);"},{"symbol":"rhslist","nullable":false,"firsts":["SYMBOL","TOKEN","PROP","{"],"rhs":["rhscode"],"symbolRhs":[{"name":"rhscode"}],"srhs":"rhscode","id":18,"actionCode":"this.$$ = $1"},{"symbol":"rhscode","nullable":false,"firsts":["SYMBOL","TOKEN","PROP"],"rhs":["rhs","{","CODE","}"],"symbolRhs":[{"name":"rhs"},{"name":"{"},{"name":"CODE"},{"name":"}"}],"srhs":"rhs { CODE }","id":19,"actionCode":"this.$$ = {}; this.$$[$1] = $3;"},{"symbol":"rhscode","nullable":false,"firsts":["SYMBOL","TOKEN","PROP"],"rhs":["rhs"],"symbolRhs":[{"name":"rhs"}],"srhs":"rhs","id":20,"actionCode":"this.$$ = {}; this.$$[$1] = \"\""},{"symbol":"rhscode","nullable":false,"firsts":["{"],"rhs":["{","CODE","}"],"symbolRhs":[{"name":"{"},{"name":"CODE"},{"name":"}"}],"srhs":"{ CODE }","id":21,"actionCode":"this.$$ = {}; this.$$[\"\"] = $2;"},{"symbol":"rhs","nullable":false,"firsts":["SYMBOL"],"rhs":["SYMBOL"],"symbolRhs":[{"name":"SYMBOL"}],"srhs":"SYMBOL","id":22,"actionCode":"this.$$ = $1"},{"symbol":"rhs","nullable":false,"firsts":["TOKEN"],"rhs":["TOKEN"],"symbolRhs":[{"name":"TOKEN"}],"srhs":"TOKEN","id":23,"actionCode":"this.$$ = $1"},{"symbol":"rhs","nullable":false,"firsts":["PROP"],"rhs":["PROP"],"symbolRhs":[{"name":"PROP"}],"srhs":"PROP","id":24,"actionCode":"this.$$ = $1"},{"symbol":"rhs","nullable":false,"firsts":["SYMBOL","TOKEN","PROP"],"rhs":["rhs","rhs"],"symbolRhs":[{"name":"rhs"},{"name":"rhs"}],"srhs":"rhs rhs","id":25,"actionCode":"this.$$ = $1 + \" \" +$2"}], 133 | 134 | parse:function (input, isDebug){ 135 | var self = this, 136 | 137 | stateStack = [0], //状态栈 初始状态0 138 | symbolStack = [], //符号栈 139 | valueStack = [], //值栈 140 | 141 | lexer = self.lexer, 142 | token, 143 | state; 144 | 145 | delete self.$$; //初始化归约值 146 | 147 | lexer.setInput(input); 148 | token = self.lexer.getToken(isDebug); 149 | 150 | while(true){ 151 | 152 | state = stateStack[stateStack.length - 1]; 153 | 154 | var action = self.lrtable.actions[state] && self.lrtable.actions[state][token]; 155 | 156 | if(!action && isDebug){ 157 | //这是编写bnf时容易出错的,通过当前输入和当前状态(状态隐含了当前入栈的符号) 158 | //无法找到右端句柄,也无法通过当前输入决定应进行移进动作 159 | debugger 160 | } 161 | 162 | if(isDebug){ 163 | console.log('当前状态:'+state, '输入符号:'+token, '动作:'+action); 164 | } 165 | if(action){ 166 | if(action[0] === 'shift'){ 167 | stateStack.push(action[1]); 168 | symbolStack.push(token); 169 | valueStack.push(lexer.yytext); 170 | token = lexer.getToken(isDebug); 171 | 172 | }else if(action[0] === 'reduce'){ 173 | var production = self.productions[action[1]]; 174 | 175 | //for [@caliburn](https://github.com/takumi4ichi/caliburn) 176 | //token is restricted token 177 | //在这里检查产生式中是否包含受限token 178 | if(production.isRestricted){ 179 | debugger 180 | } 181 | 182 | var reduceCode = ('/*' + production.symbol + ' -> ' + production.srhs + ';*/' 183 | + (self.defaultAction || 'this.$$ = $1;') 184 | + production.actionCode) 185 | .replace(/\$0/g, JSON.stringify({symbol: production.symbol, rhs: production.rhs})) 186 | .replace(/\$(\d+)/g, function(_, n){ 187 | return 'valueStack[' + (valueStack.length - production.rhs.length + parseInt(n, 10) - 1) + ']' 188 | }); 189 | 190 | eval(reduceCode); 191 | 192 | 193 | if(isDebug){ 194 | console.log(' 当前右端句柄为:' + production.rhs); 195 | console.log(' 右端句柄对应值栈内容为:' + JSON.stringify(valueStack.slice(-production.rhs.length))); 196 | console.log(' 归约后的值为:' + JSON.stringify(this.$$)); 197 | } 198 | 199 | //如果是当前归约用的产生式不是epsilon: 200 | // 符号栈才需要对右端句柄包含的各个symbol出栈,归约为产生式的非终结符(lhs)再入栈 201 | // 值栈才需要对右端句柄对应的各个值出栈,进行归约计算为某个lhs值,再把lhs值入栈 202 | // 状态栈也才需要对代表右端句柄的各个状态进行出栈,查goto表找到代表lhs符号的新状态入栈 203 | //否则,应用epsilon,各栈保持原地不动 204 | if(production.rhs.length){ 205 | symbolStack = symbolStack.slice(0, -production.rhs.length); 206 | valueStack = valueStack.slice(0, -production.rhs.length); 207 | stateStack = stateStack.slice(0, -production.rhs.length); 208 | } 209 | 210 | var curstate = stateStack[stateStack.length-1]; 211 | 212 | //查goto表,找到代表归约后的lhs符号的新状态 213 | var newstate = self.lrtable.gotos[curstate] && self.lrtable.gotos[curstate][production.symbol]; 214 | 215 | 216 | if(isDebug){ 217 | console.log(' 右端句柄归约后的符号:'+production.symbol+',应转移到:'+newstate); 218 | } 219 | symbolStack.push(production.symbol); //归约后的lhs符号,压入符号栈 220 | valueStack.push(this.$$); //语义动作中归约后的值(rhs各项计算出的lhs值),压入值栈 221 | stateStack.push(newstate); //goto表查到的新状态,压入状态栈 222 | 223 | 224 | }else if(action[0] === 'accept'){ 225 | if(isDebug){ 226 | console.log('accept'); 227 | } 228 | return true; 229 | }else{ 230 | return false; 231 | } 232 | }else{ 233 | //for [@caliburn](https://github.com/takumi4ichi/caliburn) 234 | // 235 | //offtending token: 236 | // 1. 该token是} 237 | // 2. 该token前面存在LineTerminator 238 | // 239 | //输入流自动插入一个semicolon 240 | if(token !== ';'){ 241 | if(token === '}'){ 242 | lexer.unToken(';'); 243 | token = lexer.getToken(isDebug); 244 | }else if(/[\n\r\u2028\u2029]\s*?$/.test(lexer.input.slice(0, lexer.position-lexer.yytext.length))){ 245 | 246 | //这里要判断,自动补全semicolon后 247 | //该semicolon不可以被归约为EmptyStatement 248 | //这个是ES5.1-Grammar 7.9章的自动补全分号的前置条件 249 | if(self.lrtable.actions[state] && self.lrtable.actions[state][';']){ 250 | var semicolonAction = self.lrtable.actions[state][';']; 251 | if(semicolonAction[0] === 'shift'){ 252 | semicolonAction = self.lrtable.actions[semicolonAction[1]][';']; 253 | if(semicolonAction[0] === 'shift' || (semicolonAction[0] === 'reduce' && self.productions[semicolonAction[1]].symbol !== 'EmptyStatement')){ 254 | lexer.unToken(';'); 255 | token = lexer.getToken(isDebug); 256 | continue; 257 | } 258 | }else if(semicolonAction[0] === 'reduce' && self.productions[semicolonAction[1]].symbol !== 'EmptyStatement'){ 259 | lexer.unToken(';'); 260 | token = lexer.getToken(isDebug); 261 | continue; 262 | } 263 | } 264 | 265 | return false; 266 | }else{ 267 | return false; 268 | } 269 | }else{ 270 | return false; 271 | } 272 | } 273 | } 274 | } 275 | }; 276 | if(typeof module == "object"){module.exports = parser} 277 | global.bnfParser = parser; 278 | return parser; 279 | })(this); -------------------------------------------------------------------------------- /src/datatypes.js: -------------------------------------------------------------------------------- 1 | (function(global){ 2 | 3 | if(typeof require === 'function'){ 4 | _ = require('lodash'); 5 | } 6 | 7 | var DataTypes = function(){ 8 | 9 | function Production(symbol, rhs, id, actionCode){ 10 | this.symbol = symbol; 11 | this.nullable = false; 12 | this.firsts = []; 13 | this.rhs = []; 14 | this.symbolRhs = []; 15 | var rhsSymbol; 16 | var nextTokenProp; 17 | 18 | if(_.isString(rhs)){ 19 | rhs = rhs.trim(); 20 | if(rhs === ''){ 21 | rhs = []; 22 | }else{ 23 | rhs = rhs.split(' '); 24 | } 25 | } 26 | for(var i=0,len=rhs.length; i 1){ 333 | conflict[p.symbol] = token; 334 | isConflict = true; 335 | } 336 | }); 337 | 338 | if(p.nullable){ 339 | _.each(self.nonterminals[p.symbol].follows, function(token){ 340 | symbol[token] = symbol[token] || []; 341 | symbol[token].push(p.id); 342 | if(symbol[token].length > 1){ 343 | conflict[p.symbol] = token; 344 | isConflict = true; 345 | } 346 | }); 347 | } 348 | }); 349 | 350 | if (isConflict) { 351 | var conflicts = []; 352 | _.forIn(conflict, function(token, symbol){ 353 | conflicts[conflicts.length] = { 354 | symbol: symbol, 355 | token: token, 356 | productions: _.map(lltable[symbol][token],function(pid){return self.productions[pid].srhs}) 357 | }; 358 | }); 359 | this.conflict = conflicts; 360 | } 361 | 362 | return lltable; 363 | }, 364 | 365 | llparse: function(input){ 366 | var self = this, 367 | lexer = this.lexer = new Lexer(this.cfg.lex, input), 368 | symbolStack = [this.acceptSymbol], 369 | token = lexer.getToken(), 370 | symbol, 371 | pid, 372 | production; 373 | 374 | while(token){ 375 | symbol = symbolStack[symbolStack.length-1]; 376 | if(self.nonterminals[symbol]){ 377 | if(pid = this.lltable[symbol] && this.lltable[symbol][token]){ 378 | symbolStack.pop(); 379 | if(!pid){ 380 | alert('unexpected token:' + token); 381 | }else{ 382 | pid = pid[0]; 383 | production = self.productions[pid]; 384 | if(production.srhs === ''){//epsilon 385 | //none 386 | }else{ 387 | _.eachRight(production.rhs, function(rsymbol){ 388 | symbolStack.push(rsymbol); 389 | }); 390 | } 391 | } 392 | } 393 | }else if(self.terminals.indexOf(symbol) > -1){ 394 | if(symbol == token){ 395 | symbolStack.pop(); 396 | valueStack.pop(); 397 | token = lexer.getToken(); 398 | }else{ 399 | alert('unexpected token:' + token); 400 | } 401 | } 402 | 403 | } 404 | }, 405 | 406 | 407 | 408 | /** 409 | * 构造项集族 410 | * 411 | * 1. 首先创建初始状态的项集: 基于增广文法符号S'的产生式的项集,并计算项集闭包 412 | * 2. 进入C0循环,C0循环的结束条件是,上次循环执行后,未使项集族生长 413 | * 3. 遍历项集族中每个项集I,遍历项集I上输入点不在最后的项x,拿到项x的输入点位置的dotSymbol 414 | * 4. 计算项集I的dotSymbol的goto项集G 415 | * 5. 设置项集I的goto表上dotSymbol的转换为项集G 416 | * 6. 将新项集加入项集族 417 | * 418 | */ 419 | buildItemSets: function(){ 420 | var self = this, 421 | item0 = new DataTypes.Item(self.productions[0], 0, [self.EOF]), //S' -> S #dotPos=0 422 | itemSet0 = new DataTypes.ItemSet(); 423 | 424 | itemSet0.push(item0); 425 | 426 | var firstItemSet = self._closureItemSet(itemSet0), 427 | itemSets = this.itemSets = [], 428 | itemSetsHash = {}, //这个hash是为了判断项集是否已存在(在closure扩展项集之前生成key来判断) 429 | itemSet, 430 | formState, 431 | curIdx, 432 | dotSymbolHash; //这个hash是为了基于当前项集 和 各子项的dotSymbol进行goto运算时,避免已经goto运算过的dotSymbol再次运算 433 | 434 | //为什么是itemSet0.key() 不是firstItemSet.key() 435 | //因为要用仅包含内核项的项集做KEY,下次GOTO运算的结果项集在未CLOSURE运算前, 436 | //就能判断是否已经存在于项集族中,如果已存在,就直接拿到ID,否则进行CLOSURE运算增加到项集族中 437 | //避免没必要的CLOSURE计算 438 | itemSetsHash[itemSet0.key()] = 0; 439 | 440 | itemSets.push(firstItemSet); 441 | 442 | curIdx=0; 443 | 444 | while(curIdx !== itemSets.length){ //C0 445 | 446 | itemSet = itemSets[curIdx]; 447 | curIdx++; 448 | 449 | dotSymbolHash = {}; 450 | 451 | _.each(itemSet.subItems, function(item){ 452 | 453 | if(item.dotSymbol && !dotSymbolHash[item.dotSymbol]){ 454 | 455 | dotSymbolHash[item.dotSymbol] = true; 456 | var gotoItemSet = self._gotoItemSet(itemSet, item.dotSymbol); 457 | 458 | if(itemSetsHash[gotoItemSet.key()]){ 459 | itemSet.gotos[item.dotSymbol] = itemSetsHash[gotoItemSet.key()]; 460 | self.gotoItemSetRepeatCount = self.gotoItemSetRepeatCount || 0; 461 | self.gotoItemSetRepeatCount += 1; 462 | }else{ 463 | itemSetsHash[gotoItemSet.key()] = itemSets.length; 464 | if (gotoItemSet.subItems.length){ 465 | gotoItemSet = self._closureItemSet(gotoItemSet); 466 | } 467 | //原itemSet通过该dotSymbol,转换到的新itemSet的序号 468 | itemSet.gotos[item.dotSymbol] = itemSets.length; 469 | itemSets.push(gotoItemSet); 470 | console.log('generate lr state:' + itemSets.length); 471 | } 472 | } 473 | }); 474 | 475 | } 476 | 477 | return itemSets; 478 | }, 479 | 480 | /** 481 | * 求某个项集状态上,通过某个symbol的输入,所能到达的项集 482 | * 483 | * 遍历该项集的每个项, 484 | * 如果指定的symbol是这个项的dotSymbol, 485 | * 基于该项的产生式,dotPos后移一位,创建新项, 486 | * 增加到goto项集中,计算goto项集的闭包项集,返回 487 | * 488 | */ 489 | _gotoItemSet: function(itemSet, symbol){ 490 | 491 | var self = this, 492 | gotoItemSet = new DataTypes.ItemSet(); 493 | 494 | _.each(itemSet.subItems, function(item){ 495 | if(item.dotSymbol === symbol){ 496 | gotoItemSet.push(new DataTypes.Item(item.production, item.dotPosition+1, item.lookaheads)); 497 | } 498 | }); 499 | 500 | return gotoItemSet; 501 | }, 502 | 503 | /** 504 | * 求一个项集的闭包项集: 505 | * 506 | * 在一个大的循环体C0中,遍历当前闭包项集中的每个项,查看每个项的dotSymbol是否为非终结符 507 | * 如dotSymbol是非终结符,就基于 推导该非终结符的产生式,创建dotPosition为0的闭包项(非内核项) 508 | * 509 | * 外层的大循环体C0的结束条件是: 510 | * 该次C0循环中遍历闭包项集时,并未找到未推导过的非终结符,所以未创建过闭包项 511 | * 512 | */ 513 | _closureItemSet: function(itemSet){ 514 | 515 | this.closureCount = this.closureCount || 0; 516 | this.closureCount++; 517 | 518 | var self = this, 519 | closureItemSet = new DataTypes.ItemSet(); 520 | 521 | var set = itemSet, 522 | itemQueue; 523 | do{ //C0 524 | itemQueue = new DataTypes.ItemSet(); 525 | closureItemSet.union(set); 526 | 527 | _.each(set.subItems, function(item){ //each closureItems 528 | if(item.dotSymbol && self.nonterminals[item.dotSymbol]){ 529 | /* 530 | if(!closureSymbolHash[item.dotSymbol]){ //exists un derivation nonterminal 531 | */ 532 | var afterRhs = item.production.rhs.slice(item.dotPosition+1); 533 | lookaheads = self._first(afterRhs); 534 | if(lookaheads.length === 0 || item.production.nullable || self._nullable(afterRhs)){ 535 | lookaheads = _.union(lookaheads, item.lookaheads); 536 | } 537 | 538 | _.each(self.nonterminals[item.dotSymbol].productions, function(p){ 539 | var item = new DataTypes.Item(p, 0, lookaheads); 540 | if(!closureItemSet.contains(item) && !itemQueue.contains(item)){ 541 | itemQueue.push(item); //new clsoure-item 542 | } 543 | }); 544 | 545 | 546 | /** 547 | * 求闭包的项的输入点非终结符后面的RHS 和 求闭包的项的每个lookahead符号 连接 548 | * 然后进行FIRSTS运算,拿到lookaheads 549 | * 每个lookaheads,进行并集运算,得到最终的lookaheads 550 | */ 551 | /* 552 | var lookaheads = []; 553 | _.each(item.lookaheads, function(fchr){ 554 | var afterRHS = item.production.rhs.slice(item.dotPosition+1) 555 | afterRHS.push(fchr); 556 | lookaheads = _.union(lookaheads, self._first(afterRHS)); 557 | }); 558 | */ 559 | 560 | //求闭包的项的输入点非终结符,的每个产生式p 561 | //查找闭包集合中是否已经存在相同核心的产生式 562 | //存在则合并lookaheads 563 | //不存在则创建 564 | /* 565 | _.each(self.nonterminals[item.dotSymbol].productions, function(p){ 566 | var item = new DataTypes.Item(p, 0, lookaheads), 567 | itemIdx; 568 | if((itemIdx = closureItemSet.coreIndexOf(item)) === -1){ 569 | itemSet.push(item); //new clsoure-item 570 | cont = true; //C0不动点循环继续 571 | }else{ 572 | item = closureItemSet.subItems[itemIdx]; 573 | item.lookaheads = _.union(item.lookaheads, lookaheads); 574 | } 575 | }); 576 | */ 577 | //closureSymbolHash[item.dotSymbol] = true; //cur nonterminal derivated 578 | /* 579 | } 580 | */ 581 | } 582 | }); 583 | set = itemQueue; 584 | }while(set.subItems.length); 585 | 586 | return closureItemSet; 587 | }, 588 | 589 | buildLRTable: function(){ 590 | 591 | var self = this; 592 | 593 | self.buildLRActionTable(); 594 | self.buildLRGotoTable(); 595 | 596 | }, 597 | buildLRActionTable: function(){ 598 | var self = this, 599 | states = this.states = {}; 600 | 601 | _.each(self.itemSets, function(itemSet, stateNum){ 602 | var state = states[stateNum] = states[stateNum] || {}; 603 | 604 | _.each(itemSet.subItems, function(item){ 605 | 606 | // A -> ab.c and itemSet.gotos[c] 607 | if(item.dotSymbol && _.indexOf(self.terminals, item.dotSymbol) > -1){ 608 | //移入 609 | if(!!itemSet.gotos[item.dotSymbol]){ 610 | state[item.dotSymbol] = ['shift', itemSet.gotos[item.dotSymbol]]; 611 | } 612 | } 613 | 614 | // A -> abc. 615 | if(!item.dotSymbol){ 616 | 617 | if(item.production.symbol === self.acceptSymbol){ 618 | 619 | //A === S' 接受 620 | state[self.EOF] = ['accept', item.production.id]; 621 | }else{ 622 | //A !== S' 归约 623 | var terms = self.terminals; 624 | if(self.cfg.type === 'SLR(1)'){ 625 | terms = self.nonterminals[item.production.symbol].follows; 626 | }else if(self.cfg.type === 'LR(1)'){ 627 | terms = item.lookaheads; 628 | }else if(self.cfg.type === 'LALR'){ 629 | terms = item.follows; 630 | } 631 | _.each(terms, function(symbol){ 632 | state[symbol] = ['reduce', item.production.id]; 633 | }); 634 | } 635 | } 636 | }); 637 | }); 638 | }, 639 | 640 | buildLRGotoTable: function(){ 641 | 642 | var self = this, 643 | states = self.states, 644 | gotos = self.gotos = {}, 645 | lrtable = self.lrtable = {actions: states, gotos: gotos}; 646 | 647 | //遍历项集族 648 | _.each(self.itemSets, function(itemSet, itemNum){ 649 | var g = {}, 650 | hasGoto = false; 651 | 652 | //遍历项集的gotos表 653 | _.forIn(itemSet.gotos, function(goItemNum, symbol){ 654 | 655 | //只把非终结符的转换,设置到g上 656 | if(true||self.nonterminals[symbol]){ 657 | g[symbol] = goItemNum; 658 | hasGoto = true; 659 | } 660 | }); 661 | if(true||hasGoto){ 662 | gotos[itemNum] = g; 663 | } 664 | 665 | }); 666 | 667 | //构造gotos表就是把数组的项集族,设为以ItemNum(state状态)为key的HASH表 668 | //每个value是状态的goto表,并筛选非终结符的goto 669 | return gotos; 670 | }, 671 | 672 | 673 | lrreset: function(){ 674 | var self = this; 675 | self.lexer.reset(); 676 | }, 677 | lrparse: function(input, isDebug){ 678 | var self = this, 679 | 680 | stateStack = [0], //状态栈 初始状态0 681 | symbolStack = [], //符号栈 682 | valueStack = [], //值栈 683 | 684 | lexer = self.lexer, 685 | token, 686 | state; 687 | 688 | delete self.$$; //初始化归约值 689 | 690 | lexer.setInput(input); 691 | token = self.lexer.getToken(isDebug); 692 | 693 | while(true){ 694 | 695 | state = stateStack[stateStack.length - 1]; 696 | 697 | var action = self.lrtable.actions[state] && self.lrtable.actions[state][token]; 698 | 699 | if(!action && isDebug){ 700 | //这是编写bnf时容易出错的,通过当前输入和当前状态(状态隐含了当前入栈的符号) 701 | //无法找到右端句柄,也无法通过当前输入决定应进行移进动作 702 | debugger 703 | } 704 | 705 | if(isDebug){ 706 | console.log('当前状态:'+state, '输入符号:'+token, '动作:'+action); 707 | } 708 | if(action){ 709 | if(action[0] === 'shift'){ 710 | stateStack.push(action[1]); 711 | symbolStack.push(token); 712 | valueStack.push(lexer.yytext); 713 | token = lexer.getToken(isDebug); 714 | 715 | }else if(action[0] === 'reduce'){ 716 | var production = self.productions[action[1]]; 717 | 718 | //for [@caliburn](https://github.com/takumi4ichi/caliburn) 719 | //token is restricted token 720 | //在这里检查产生式中是否包含受限token 721 | if(production.isRestricted){ 722 | debugger 723 | } 724 | 725 | var reduceCode = ('/*' + production.symbol + ' -> ' + production.srhs + ';*/' 726 | + (self.defaultAction || 'this.$$ = $1;') 727 | + production.actionCode) 728 | .replace(/\$0/g, JSON.stringify({symbol: production.symbol, rhs: production.rhs})) 729 | .replace(/\$(\d+)/g, function(_, n){ 730 | return 'valueStack[' + (valueStack.length - production.rhs.length + parseInt(n, 10) - 1) + ']' 731 | }); 732 | 733 | eval(reduceCode); 734 | 735 | 736 | if(isDebug){ 737 | console.log(' 当前右端句柄为:' + production.rhs); 738 | console.log(' 右端句柄对应值栈内容为:' + JSON.stringify(valueStack.slice(-production.rhs.length))); 739 | console.log(' 归约后的值为:' + JSON.stringify(this.$$)); 740 | } 741 | 742 | //如果是当前归约用的产生式不是epsilon: 743 | // 符号栈才需要对右端句柄包含的各个symbol出栈,归约为产生式的非终结符(lhs)再入栈 744 | // 值栈才需要对右端句柄对应的各个值出栈,进行归约计算为某个lhs值,再把lhs值入栈 745 | // 状态栈也才需要对代表右端句柄的各个状态进行出栈,查goto表找到代表lhs符号的新状态入栈 746 | //否则,应用epsilon,各栈保持原地不动 747 | if(production.rhs.length){ 748 | symbolStack = symbolStack.slice(0, -production.rhs.length); 749 | valueStack = valueStack.slice(0, -production.rhs.length); 750 | stateStack = stateStack.slice(0, -production.rhs.length); 751 | } 752 | 753 | var curstate = stateStack[stateStack.length-1]; 754 | 755 | //查goto表,找到代表归约后的lhs符号的新状态 756 | var newstate = self.lrtable.gotos[curstate] && self.lrtable.gotos[curstate][production.symbol]; 757 | 758 | 759 | if(isDebug){ 760 | console.log(' 右端句柄归约后的符号:'+production.symbol+',应转移到:'+newstate); 761 | } 762 | symbolStack.push(production.symbol); //归约后的lhs符号,压入符号栈 763 | valueStack.push(this.$$); //语义动作中归约后的值(rhs各项计算出的lhs值),压入值栈 764 | stateStack.push(newstate); //goto表查到的新状态,压入状态栈 765 | 766 | 767 | }else if(action[0] === 'accept'){ 768 | if(isDebug){ 769 | console.log('accept'); 770 | } 771 | return true; 772 | }else{ 773 | return false; 774 | } 775 | }else{ 776 | //for [@caliburn](https://github.com/takumi4ichi/caliburn) 777 | // 778 | //offtending token: 779 | // 1. 该token是} 780 | // 2. 该token前面存在LineTerminator 781 | // 782 | //输入流自动插入一个semicolon 783 | if(token !== ';'){ 784 | if(token === '}'){ 785 | lexer.unToken(';'); 786 | token = lexer.getToken(isDebug); 787 | }else if(/[\n\r\u2028\u2029]\s*?$/.test(lexer.input.slice(0, lexer.position-lexer.yytext.length))){ 788 | 789 | //这里要判断,自动补全semicolon后 790 | //该semicolon不可以被归约为EmptyStatement 791 | //这个是ES5.1-Grammar 7.9章的自动补全分号的前置条件 792 | if(self.lrtable.actions[state] && self.lrtable.actions[state][';']){ 793 | var semicolonAction = self.lrtable.actions[state][';']; 794 | if(semicolonAction[0] === 'shift'){ 795 | semicolonAction = self.lrtable.actions[semicolonAction[1]][';']; 796 | if(semicolonAction[0] === 'shift' || (semicolonAction[0] === 'reduce' && self.productions[semicolonAction[1]].symbol !== 'EmptyStatement')){ 797 | lexer.unToken(';'); 798 | token = lexer.getToken(isDebug); 799 | continue; 800 | } 801 | }else if(semicolonAction[0] === 'reduce' && self.productions[semicolonAction[1]].symbol !== 'EmptyStatement'){ 802 | lexer.unToken(';'); 803 | token = lexer.getToken(isDebug); 804 | continue; 805 | } 806 | } 807 | 808 | return false; 809 | }else{ 810 | return false; 811 | } 812 | }else{ 813 | return false; 814 | } 815 | } 816 | } 817 | }, 818 | generate: function(){ 819 | var self = this; 820 | 821 | var code = [ 822 | '(function(global, undef){', 823 | 'if(typeof require === "function"){ _ = require("lodash");}', 824 | 'var parser = {', 825 | 'EOF:"'+self.EOF+'",', 826 | 'reset:' + self.lrreset.toString() + ',', 827 | 'lexer: ' + (new Lexer(self.cfg.lex)).generate() + ',', 828 | 'lrtable: ' + JSON.stringify(self.lrtable, null, '') + ',', 829 | 'productions: ' + JSON.stringify(self.productions, null, '') + ',', 830 | self.cfg.defaultAction ? 'defaultAction: "' + self.cfg.defaultAction.replace(/\n|\r/g,'') + '",' : '', 831 | 'parse:' + self.lrparse.toString(), 832 | '};', 833 | 'if(typeof module == "object"){module.exports = parser}', 834 | self.cfg.code || '', 835 | 'return parser;', 836 | '})(this);' 837 | ].join('\n'); 838 | return code; 839 | } 840 | 841 | }; 842 | 843 | if(typeof lexParser === 'object'){ 844 | Generator.lexParser = lexParser; 845 | } 846 | if(typeof bnfParser === 'object'){ 847 | Generator.bnfParser = bnfParser; 848 | } 849 | 850 | 851 | if(typeof module == 'object' && module.exports){ 852 | module.exports = Generator; 853 | }else{ 854 | global.Generator = Generator; 855 | } 856 | })(this); 857 | -------------------------------------------------------------------------------- /src/lex-parser-generate.js: -------------------------------------------------------------------------------- 1 | /** 2 | * canonical lexfile to jsbison-lexer-cfg(jsbison中的lexer使用的json格式的context free grammar) 3 | */ 4 | var Generator = require('./generator.js'); 5 | 6 | var lexParserCode = new Generator({ 7 | lex: { 8 | states:{ 9 | exclusive: 'rules start_conditions condition_names actioncode multi_actioncode' 10 | }, 11 | rules: [{ 12 | regex: /\s+/, 13 | action: '' //skip whitespace 14 | }, { 15 | regex: /\/\/.*/, 16 | action: '' //skip singleline comment 17 | }, { 18 | regex: /\/\*(.|\n|\r)*?\*\//, 19 | action: '' //skip multiline comment 20 | }, { 21 | conditions: ['rules'], 22 | regex: /\s+/, 23 | action: '' 24 | }, { 25 | conditions: ['rules'], 26 | regex: //, 39 | action: 'this.popState(); return ">";' 40 | }, { 41 | conditions: ['INITIAL'], 42 | regex: /%s/, 43 | action: 'this.pushState("condition_names"); return "INCLUSIVE_CONDITION";' 44 | }, { 45 | conditions: ['INITIAL'], 46 | regex: /%x/, 47 | action: 'this.pushState("condition_names"); return "EXCLUSIVE_CONDITION";' 48 | }, { 49 | conditions: ['INITIAL'], 50 | regex: /%%/, 51 | action: 'this.pushState("rules");return "%%";' 52 | }, { 53 | conditions: ['condition_names'], 54 | regex: /[\r\n]/, 55 | action: 'this.popState();' 56 | }, { 57 | conditions: ['condition_names'], 58 | regex: /\s+/, 59 | action: '/* skip */' 60 | }, { 61 | conditions: ['condition_names'], 62 | regex: /[a-zA-Z]\w+/, 63 | action: 'return "CONDITION";' 64 | }, { 65 | //匹配正则表达式 66 | conditions: ['rules'], 67 | regex: /[^\s]+/, 68 | action: 'this.pushState("actioncode");return "REGEX";' 69 | }, { 70 | //在actioncode状态,一旦匹配到{,如果actionDepth为0,则转为multi_actioncode状态,并返回'{' 71 | //否则不返回任何TOKEN,只是actionDepth增加 72 | //转换为multi_actioncode状态后就不会在匹配到这个{了 73 | conditions: ['actioncode'], 74 | regex: /\s*\{/, 75 | action: 'this.pushState("multi_actioncode"); this.depth=1; return "{";' 76 | }, { 77 | conditions: ['multi_actioncode'], 78 | regex: /\}/, 79 | action: 'this.popState();this.popState(); return "}"' 80 | }, { 81 | conditions: ['multi_actioncode'], 82 | regex: /(.|\r|\n)*?[}{]/, 83 | action: 'if(this.yytext[this.yyleng-1] === "{"){this.depth++;}else{this.depth--;}if(!this.depth){this.unToken(1);this.yytext = this.yytext.substr(0,this.yyleng-1);return "ACTIONBODY";}else{this.yymore()}' 84 | }, { 85 | conditions: ['actioncode'], 86 | regex: /[^\r\n]*/, 87 | action: 'this.popState();return "ACTIONBODY";' 88 | } 89 | ] 90 | }, 91 | 92 | start: 'lex', 93 | tokens: '< > { } REGEX ACTIONBODY EXCLUSIVE_CONDITION INCLUSIVE_CONDITION %% START_CONDITION ,', 94 | type: 'LR(1)', 95 | bnf: { 96 | 'lex' : { 97 | 'definitionlist %% rulelist': 'this.$$ = {rules: $3}; this.$$.states = {}; if($1.inclusive){this.$$.states.inclusive = $1.inclusive;} if($1.exclusive){this.$$.states.exclusive = $1.exclusive;}' 98 | }, 99 | 'definitionlist': { 100 | 'INCLUSIVE_CONDITION condition_names': 'this.$$ = {"inclusive":$2};', 101 | 'EXCLUSIVE_CONDITION condition_names': 'this.$$ = {"exclusive":$2};', 102 | '': 'this.$$ = {}' 103 | }, 104 | 'condition_names': { 105 | 'condition_names CONDITION' : '$1.push($1);this.$$=$1;', 106 | 'CONDITION' : 'this.$$ = [$1];' 107 | }, 108 | 'rulelist': { 109 | 'rulelist rule': '$1.push($2); this.$$ = $1;', 110 | 'rule': 'this.$$ = [$1]' 111 | }, 112 | 'rule': { 113 | '< start_conditions > REGEX action': 'this.$$ = {regex: (new RegExp($4)), action:$5}; if($1){this.$$.conditions=$2} ', 114 | 'REGEX action': 'this.$$ = {regex: (new RegExp($1)), action:$2}; ' 115 | }, 116 | 'start_conditions':{ 117 | 'start_conditions , START_CONDITION': '$1.push($3); this.$$ = $1;', 118 | 'START_CONDITION': 'this.$$ = [$1];' 119 | }, 120 | 'action': { 121 | 'ACTIONBODY': 'this.$$ = $1', 122 | '{ ACTIONBODY }': 'this.$$ = $2.replace(/[\\r\\n]/g,"")', 123 | '{ }': 'this.$$ = ""' 124 | } 125 | }, 126 | code: 'global.lexParser = parser;' 127 | 128 | }).generate(); 129 | 130 | var fs = require('fs'); 131 | 132 | fs.writeFileSync('./lex-parser.js', lexParserCode); 133 | -------------------------------------------------------------------------------- /src/lex-parser.js: -------------------------------------------------------------------------------- 1 | (function(global, undef){ 2 | if(typeof require === "function"){ _ = require("lodash");} 3 | var parser = { 4 | EOF:"$end", 5 | reset:function (){ 6 | var self = this; 7 | self.lexer.reset(); 8 | }, 9 | lexer: (function(){ 10 | return { 11 | CONST:{"INITIAL":"INITIAL","EOF":"$end"}, 12 | states:{"exclusive":{"rules":true,"start_conditions":true,"condition_names":true,"actioncode":true,"multi_actioncode":true}}, 13 | rules: [{regex:/^\s+/,action:''},{regex:/^\/\/.*/,action:''},{regex:/^\/\*(.|\n|\r)*?\*\//,action:''},{regex:/^\s+/,action:'', conditions:["rules"]},{regex:/^/,action:'this.popState(); return ">";', conditions:["start_conditions"]},{regex:/^%s/,action:'this.pushState("condition_names"); return "INCLUSIVE_CONDITION";', conditions:["INITIAL"]},{regex:/^%x/,action:'this.pushState("condition_names"); return "EXCLUSIVE_CONDITION";', conditions:["INITIAL"]},{regex:/^%%/,action:'this.pushState("rules");return "%%";', conditions:["INITIAL"]},{regex:/^[\r\n]/,action:'this.popState();', conditions:["condition_names"]},{regex:/^\s+/,action:'/* skip */', conditions:["condition_names"]},{regex:/^[a-zA-Z]\w+/,action:'return "CONDITION";', conditions:["condition_names"]},{regex:/^[^\s]+/,action:'this.pushState("actioncode");return "REGEX";', conditions:["rules"]},{regex:/^\s*\{/,action:'this.pushState("multi_actioncode"); this.depth=1; return "{";', conditions:["actioncode"]},{regex:/^\}/,action:'this.popState();this.popState(); return "}"', conditions:["multi_actioncode"]},{regex:/^(.|\r|\n)*?[}{]/,action:'if(this.yytext[this.yyleng-1] === "{"){this.depth++;}else{this.depth--;}if(!this.depth){this.unToken(1);this.yytext = this.yytext.substr(0,this.yyleng-1);return "ACTIONBODY";}else{this.yymore()}', conditions:["multi_actioncode"]},{regex:/^[^\r\n]*/,action:'this.popState();return "ACTIONBODY";', conditions:["actioncode"]}], 14 | yymore:function (){ 15 | this._more = true; 16 | }, 17 | stateStack:["INITIAL"], 18 | pushState:function (state){ 19 | this.stateStack.push(state); 20 | }, 21 | popState:function (){ 22 | return this.stateStack.pop(); 23 | }, 24 | getCurrentRules:function (){ 25 | var self = this, 26 | rules = self.rules, 27 | curState = self.stateStack[self.stateStack.length-1], 28 | activeRules = [], 29 | isInclusiveState = true; //是否为包容状态 30 | 31 | if(self.states.exclusive[curState]){ 32 | isInclusiveState = false; 33 | } 34 | 35 | 36 | for(var i=0, len=rules.length; i -1)){ 41 | activeRules.push(rules[i]); 42 | } 43 | } 44 | 45 | return activeRules; 46 | }, 47 | setInput:function (input){ 48 | _.merge(this, { 49 | input: input, 50 | position: 0, 51 | matched: '', 52 | text: '', 53 | yytext: '', 54 | lineno: 1, 55 | firstline: 1, 56 | lastline: 1, 57 | firstcolumn: 1, 58 | lastcolumn: 1, 59 | _more: false 60 | }); 61 | }, 62 | getToken:function (isDebug){ 63 | var self = this, 64 | token = self.getToken_(isDebug); 65 | 66 | if(!token){ 67 | token = self.getToken(isDebug); 68 | } 69 | 70 | return token; 71 | }, 72 | unToken:function (charsNum){ 73 | this.position -= charsNum; 74 | }, 75 | getToken_:function (isDebug){ 76 | var self = this, 77 | input = self.input.slice(self.position), 78 | regex, 79 | activeRules = self.getCurrentRules(), 80 | matches; 81 | 82 | if(!input){ 83 | return self.CONST.EOF; 84 | } 85 | 86 | if(!activeRules.length && isDebug){ 87 | debugger 88 | //这个断点的原因是,这是编写lex文法时常见的错误,就是自动机陷入一个没有任何规则激活的状态中了 89 | } 90 | 91 | var possibleInputs = [], 92 | maxLength = 0; 93 | 94 | for(var i=0,len=activeRules.length; i matches[0].length ? maxLength : matches[0].length; 100 | } 101 | } 102 | 103 | if(possibleInputs.length){ 104 | possibleInputs = _.filter(possibleInputs, function(possible){ 105 | return possible.match.length === maxLength; 106 | }); 107 | 108 | if(self._more){ 109 | self.yytext += possibleInputs[0].match; 110 | }else{ 111 | self.yytext = possibleInputs[0].match; 112 | } 113 | self.position += possibleInputs[0].match.length; 114 | self.yyleng = self.yytext.length; 115 | self._more = false; 116 | return (new Function(possibleInputs[0].rule.action)).call(self); 117 | } 118 | 119 | if(isDebug){ 120 | debugger 121 | //这个断点的原因是,没有在循环体中return 说明当前输入已经无法命中任何规则,自动机将陷入死循环 122 | } 123 | throw('invalid input: ' + input); 124 | }, 125 | reset:function (){ 126 | this.setInput(this.input); 127 | } 128 | }; 129 | })(), 130 | lrtable: {"actions":{"0":{"INCLUSIVE_CONDITION":["shift",3],"EXCLUSIVE_CONDITION":["shift",4],"%%":["reduce",4]},"1":{"$end":["shift",5]},"2":{"%%":["shift",6]},"3":{"CONDITION":["shift",8]},"4":{"CONDITION":["shift",8]},"5":{"$end":["accept",0]},"6":{"<":["shift",12],"REGEX":["shift",13]},"7":{"%%":["reduce",2],"CONDITION":["shift",14]},"8":{"%%":["reduce",6],"CONDITION":["reduce",6]},"9":{"%%":["reduce",3],"CONDITION":["shift",14]},"10":{"$end":["reduce",1],"<":["shift",12],"REGEX":["shift",13]},"11":{"$end":["reduce",8],"<":["reduce",8],"REGEX":["reduce",8]},"12":{"START_CONDITION":["shift",17]},"13":{"ACTIONBODY":["shift",19],"{":["shift",20]},"14":{"%%":["reduce",5],"CONDITION":["reduce",5]},"15":{"$end":["reduce",7],"<":["reduce",7],"REGEX":["reduce",7]},"16":{">":["shift",21],",":["shift",22]},"17":{">":["reduce",12],",":["reduce",12]},"18":{"$end":["reduce",10],"<":["reduce",10],"REGEX":["reduce",10]},"19":{"$end":["reduce",13],"<":["reduce",13],"REGEX":["reduce",13]},"20":{"ACTIONBODY":["shift",23],"}":["shift",24]},"21":{"REGEX":["shift",25]},"22":{"START_CONDITION":["shift",26]},"23":{"}":["shift",27]},"24":{"$end":["reduce",15],"<":["reduce",15],"REGEX":["reduce",15]},"25":{"ACTIONBODY":["shift",19],"{":["shift",20]},"26":{">":["reduce",11],",":["reduce",11]},"27":{"$end":["reduce",14],"<":["reduce",14],"REGEX":["reduce",14]},"28":{"$end":["reduce",9],"<":["reduce",9],"REGEX":["reduce",9]}},"gotos":{"0":{"lex":1,"definitionlist":2,"INCLUSIVE_CONDITION":3,"EXCLUSIVE_CONDITION":4},"1":{"$end":5},"2":{"%%":6},"3":{"condition_names":7,"CONDITION":8},"4":{"condition_names":9,"CONDITION":8},"5":{},"6":{"rulelist":10,"rule":11,"<":12,"REGEX":13},"7":{"CONDITION":14},"8":{},"9":{"CONDITION":14},"10":{"rule":15,"<":12,"REGEX":13},"11":{},"12":{"start_conditions":16,"START_CONDITION":17},"13":{"action":18,"ACTIONBODY":19,"{":20},"14":{},"15":{},"16":{">":21,",":22},"17":{},"18":{},"19":{},"20":{"ACTIONBODY":23,"}":24},"21":{"REGEX":25},"22":{"START_CONDITION":26},"23":{"}":27},"24":{},"25":{"action":28,"ACTIONBODY":19,"{":20},"26":{},"27":{},"28":{}}}, 131 | productions: [{"symbol":"$accept","nullable":false,"firsts":["INCLUSIVE_CONDITION","EXCLUSIVE_CONDITION","%%"],"rhs":["lex","$end"],"srhs":"lex $end","id":0,"actionCode":""},{"symbol":"lex","nullable":false,"firsts":["INCLUSIVE_CONDITION","EXCLUSIVE_CONDITION","%%"],"rhs":["definitionlist","%%","rulelist"],"srhs":"definitionlist %% rulelist","id":1,"actionCode":"this.$$ = {rules: $3}; this.$$.states = {}; if($1.inclusive){this.$$.states.inclusive = $1.inclusive;} if($1.exclusive){this.$$.states.exclusive = $1.exclusive;}"},{"symbol":"definitionlist","nullable":false,"firsts":["INCLUSIVE_CONDITION"],"rhs":["INCLUSIVE_CONDITION","condition_names"],"srhs":"INCLUSIVE_CONDITION condition_names","id":2,"actionCode":"this.$$ = {\"inclusive\":$2};"},{"symbol":"definitionlist","nullable":false,"firsts":["EXCLUSIVE_CONDITION"],"rhs":["EXCLUSIVE_CONDITION","condition_names"],"srhs":"EXCLUSIVE_CONDITION condition_names","id":3,"actionCode":"this.$$ = {\"exclusive\":$2};"},{"symbol":"definitionlist","nullable":true,"firsts":[],"rhs":[],"srhs":"","id":4,"actionCode":"this.$$ = {}"},{"symbol":"condition_names","nullable":false,"firsts":["CONDITION"],"rhs":["condition_names","CONDITION"],"srhs":"condition_names CONDITION","id":5,"actionCode":"$1.push($1);this.$$=$1;"},{"symbol":"condition_names","nullable":false,"firsts":["CONDITION"],"rhs":["CONDITION"],"srhs":"CONDITION","id":6,"actionCode":"this.$$ = [$1];"},{"symbol":"rulelist","nullable":false,"firsts":["<","REGEX"],"rhs":["rulelist","rule"],"srhs":"rulelist rule","id":7,"actionCode":"$1.push($2); this.$$ = $1;"},{"symbol":"rulelist","nullable":false,"firsts":["<","REGEX"],"rhs":["rule"],"srhs":"rule","id":8,"actionCode":"this.$$ = [$1]"},{"symbol":"rule","nullable":false,"firsts":["<"],"rhs":["<","start_conditions",">","REGEX","action"],"srhs":"< start_conditions > REGEX action","id":9,"actionCode":"this.$$ = {regex: (new RegExp($4)), action:$5}; if($1){this.$$.conditions=$2} "},{"symbol":"rule","nullable":false,"firsts":["REGEX"],"rhs":["REGEX","action"],"srhs":"REGEX action","id":10,"actionCode":"this.$$ = {regex: (new RegExp($1)), action:$2}; "},{"symbol":"start_conditions","nullable":false,"firsts":["START_CONDITION"],"rhs":["start_conditions",",","START_CONDITION"],"srhs":"start_conditions , START_CONDITION","id":11,"actionCode":"$1.push($3); this.$$ = $1;"},{"symbol":"start_conditions","nullable":false,"firsts":["START_CONDITION"],"rhs":["START_CONDITION"],"srhs":"START_CONDITION","id":12,"actionCode":"this.$$ = [$1];"},{"symbol":"action","nullable":false,"firsts":["ACTIONBODY"],"rhs":["ACTIONBODY"],"srhs":"ACTIONBODY","id":13,"actionCode":"this.$$ = $1"},{"symbol":"action","nullable":false,"firsts":["{"],"rhs":["{","ACTIONBODY","}"],"srhs":"{ ACTIONBODY }","id":14,"actionCode":"this.$$ = $2.replace(/[\\r\\n]/g,\"\")"},{"symbol":"action","nullable":false,"firsts":["{"],"rhs":["{","}"],"srhs":"{ }","id":15,"actionCode":"this.$$ = \"\""}], 132 | 133 | parse:function (input, isDebug){ 134 | var self = this, 135 | 136 | stateStack = [0], //状态栈 初始状态0 137 | symbolStack = [], //符号栈 138 | valueStack = [], //值栈 139 | 140 | lexer = self.lexer, 141 | token, 142 | state; 143 | 144 | lexer.setInput(input); 145 | token = self.lexer.getToken(isDebug); 146 | 147 | while(true){ 148 | 149 | state = stateStack[stateStack.length - 1]; 150 | 151 | var action = self.lrtable.actions[state] && self.lrtable.actions[state][token]; 152 | 153 | if(!action && isDebug){ 154 | //这是编写bnf时容易出错的,通过当前输入和当前状态(状态隐含了当前入栈的符号) 155 | //无法找到右端句柄,也无法通过当前输入决定应进行移进动作 156 | debugger 157 | } 158 | 159 | if(isDebug){ 160 | console.log('当前状态:'+state, '输入符号:'+token, '动作:'+action); 161 | } 162 | if(action){ 163 | if(action[0] === 'shift'){ 164 | stateStack.push(action[1]); 165 | symbolStack.push(token); 166 | valueStack.push(lexer.yytext); 167 | token = lexer.getToken(isDebug); 168 | }else if(action[0] === 'reduce'){ 169 | var production = self.productions[action[1]]; 170 | 171 | var reduceCode = ('/*' + production.symbol + ' -> ' + production.srhs + ';*/' 172 | + (self.defaultAction || 'this.$$ = $1;') 173 | + production.actionCode).replace(/\$(\d+)/g, function(_, n){ 174 | return 'valueStack[' + (valueStack.length - production.rhs.length + parseInt(n, 10) - 1) + ']' 175 | }); 176 | 177 | eval(reduceCode); 178 | 179 | 180 | if(isDebug){ 181 | console.log(' 当前右端句柄为:' + production.rhs); 182 | console.log(' 右端句柄对应值栈内容为:' + JSON.stringify(valueStack.slice(-production.rhs.length))); 183 | console.log(' 归约后的值为:' + JSON.stringify(this.$$)); 184 | } 185 | 186 | //如果是当前归约用的产生式不是epsilon: 187 | // 符号栈才需要对右端句柄包含的各个symbol出栈,归约为产生式的非终结符(lhs)再入栈 188 | // 值栈才需要对右端句柄对应的各个值出栈,进行归约计算为某个lhs值,再把lhs值入栈 189 | // 状态栈也才需要对代表右端句柄的各个状态进行出栈,查goto表找到代表lhs符号的新状态入栈 190 | //否则,应用epsilon,各栈保持原地不动 191 | if(production.rhs.length){ 192 | symbolStack = symbolStack.slice(0, -production.rhs.length); 193 | valueStack = valueStack.slice(0, -production.rhs.length); 194 | stateStack = stateStack.slice(0, -production.rhs.length); 195 | } 196 | 197 | var curstate = stateStack[stateStack.length-1]; 198 | 199 | //查goto表,找到代表归约后的lhs符号的新状态 200 | var newstate = self.lrtable.gotos[curstate] && self.lrtable.gotos[curstate][production.symbol]; 201 | 202 | 203 | if(isDebug){ 204 | console.log(' 右端句柄归约后的符号:'+production.symbol+',应转移到:'+newstate); 205 | } 206 | symbolStack.push(production.symbol); //归约后的lhs符号,压入符号栈 207 | valueStack.push(this.$$); //语义动作中归约后的值(rhs各项计算出的lhs值),压入值栈 208 | stateStack.push(newstate); //goto表查到的新状态,压入状态栈 209 | 210 | 211 | }else if(action[0] === 'accept'){ 212 | if(isDebug){ 213 | console.log('accept'); 214 | } 215 | return true; 216 | }else{ 217 | return false; 218 | } 219 | }else{ 220 | return false; 221 | } 222 | } 223 | } 224 | }; 225 | if(typeof module == "object"){module.exports = parser} 226 | global.lexParser = parser; 227 | return parser; 228 | })(this); -------------------------------------------------------------------------------- /src/lexer.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Yet another JavaScript Lexer Generator 3 | */ 4 | 5 | (function(global){ 6 | 7 | if(typeof require === 'function'){ 8 | _ = require('lodash'); 9 | } 10 | 11 | function Lexer(lex){ 12 | 13 | /** 14 | * example: 15 | * [{ 16 | * state: 'INITIAL', //该规则在哪个状态下激活 17 | * regex: /\d+/, //该规则的正则,匹配成功的话,已匹配文本在this.yytext,在语义动作代码中可修改 18 | * action: 'return "NUMBER"' //该规则的语义动作,return的是TOKEN-name 19 | * }] 20 | * 21 | */ 22 | this.states = lex.states || {}; 23 | this.states.exclusive = this.states.exclusive || {}; 24 | 25 | if(typeof this.states.exclusive === 'string'){ 26 | var exclusive = {}; 27 | _.each(this.states.exclusive.trim().split(' '), function(exclusiveState){ 28 | exclusive[exclusiveState] = true; 29 | }); 30 | this.states.exclusive = exclusive; 31 | } 32 | 33 | this.rules = lex.rules; 34 | this.stateStack = [Lexer.CONST.INITIAL]; 35 | 36 | this._setRegExRowBeginTag(); 37 | } 38 | Lexer.CONST = { 39 | INITIAL: 'INITIAL', 40 | EOF:'$end' 41 | }; 42 | 43 | Lexer.prototype = { 44 | /** 45 | * regex /\d/ transfer /^\d/ 46 | */ 47 | _setRegExRowBeginTag: function(){ 48 | _.each(this.rules, function(rule){ 49 | rule.regex = eval(rule.regex.toString().replace(/^\//,'/^')); 50 | }); 51 | }, 52 | pushState: function(state){ 53 | this.stateStack.push(state); 54 | }, 55 | popState: function(){ 56 | return this.stateStack.pop(); 57 | }, 58 | reset: function(){ 59 | this.setInput(this.input); 60 | }, 61 | setInput: function(input){ 62 | _.merge(this, { 63 | input: input, 64 | position: 0, 65 | matched: '', 66 | text: '', 67 | yytext: '', 68 | lineno: 1, 69 | firstline: 1, 70 | lastline: 1, 71 | firstcolumn: 1, 72 | lastcolumn: 1, 73 | _more: false 74 | }); 75 | }, 76 | getToken: function(isDebug){ 77 | var self = this, 78 | token = self.getToken_(isDebug); 79 | 80 | if(!token){ 81 | token = self.getToken(isDebug); 82 | } 83 | 84 | return token; 85 | }, 86 | unToken: function(token){ 87 | this.position -= this.yytext.length; 88 | this.input = this.input.substr(0, this.position) + token + this.input.substr(this.position); 89 | }, 90 | getCurrentRules: function(){ 91 | var self = this, 92 | rules = self.rules, 93 | curState = self.stateStack[self.stateStack.length-1], 94 | activeRules = [], 95 | isInclusiveState = true; //是否为包容状态 96 | 97 | if(self.states.exclusive[curState]){ 98 | isInclusiveState = false; 99 | } 100 | 101 | 102 | for(var i=0, len=rules.length; i -1)){ 107 | activeRules.push(rules[i]); 108 | } 109 | } 110 | 111 | return activeRules; 112 | }, 113 | getToken_: function(isDebug){ 114 | var self = this, 115 | input = self.input.slice(self.position), 116 | regex, 117 | activeRules = self.getCurrentRules(), 118 | matches; 119 | 120 | if(!input){ 121 | return self.CONST.EOF; 122 | } 123 | 124 | if(!activeRules.length && isDebug){ 125 | debugger 126 | //这个断点的原因是,这是编写lex文法时常见的错误,就是自动机陷入一个没有任何规则激活的状态中了 127 | } 128 | 129 | var possibleInputs = [], 130 | maxLength = 0; 131 | 132 | for(var i=0,len=activeRules.length; i matches[0].length ? maxLength : matches[0].length; 138 | } 139 | } 140 | 141 | if(possibleInputs.length){ 142 | possibleInputs = _.filter(possibleInputs, function(possible){ 143 | return possible.match.length === maxLength; 144 | }); 145 | 146 | if(self._more){ 147 | self.yytext += possibleInputs[0].match; 148 | }else{ 149 | self.yytext = possibleInputs[0].match; 150 | } 151 | self.position += possibleInputs[0].match.length; 152 | self.yyleng = self.yytext.length; 153 | self._more = false; 154 | return (new Function(possibleInputs[0].rule.action)).call(self); 155 | } 156 | 157 | if(isDebug){ 158 | debugger 159 | //这个断点的原因是,没有在循环体中return 说明当前输入已经无法命中任何规则,自动机将陷入死循环 160 | } 161 | throw('invalid input: ' + input); 162 | }, 163 | yymore: function(){ 164 | this._more = true; 165 | }, 166 | generate: function(){ 167 | var self = this, 168 | rules = _.map(self.rules, function(rule){ 169 | return '{regex:'+rule.regex.toString()+',action:\''+rule.action+'\'' + (rule.conditions ? ', conditions:'+JSON.stringify(rule.conditions) : '') + '}'; 170 | }), 171 | code = [ 172 | '(function(){', 173 | 'return {', 174 | 'CONST:' + JSON.stringify(Lexer.CONST) + ',', 175 | 'states:' + JSON.stringify(self.states) + ',', 176 | 'rules: [' + rules.join(',') + '],', 177 | 'yymore:' + Lexer.prototype.yymore.toString() + ',', 178 | 'stateStack:' + JSON.stringify(self.stateStack) + ',', 179 | 'pushState:' + Lexer.prototype.pushState.toString() + ',', 180 | 'popState:' + Lexer.prototype.popState.toString() + ',', 181 | 'getCurrentRules:' + Lexer.prototype.getCurrentRules.toString() + ',', 182 | 'setInput:' + Lexer.prototype.setInput.toString() + ',', 183 | 'getToken:' + Lexer.prototype.getToken.toString() + ',', 184 | 'unToken:' + Lexer.prototype.unToken.toString() + ',', 185 | 'getToken_:' + Lexer.prototype.getToken_.toString() + ',', 186 | 'reset:' + Lexer.prototype.reset.toString() + '', 187 | '};', 188 | '})()' 189 | ].join('\n'); 190 | return code; 191 | } 192 | }; 193 | 194 | if(typeof module == 'object' && module.exports){ 195 | module.exports = Lexer; 196 | }else{ 197 | global.Lexer = Lexer; 198 | } 199 | 200 | 201 | })(this); 202 | -------------------------------------------------------------------------------- /test/grammar_test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * bnf / lex 文法测试用例 4 | * 5 | */ 6 | 7 | module.exports = (function(){ 8 | 9 | var Generator = require('../src/generator.js'); 10 | var jscode; 11 | 12 | return { 13 | vardecl: function(test){ 14 | var parseCode = new Generator({ 15 | lex: { 16 | rules: [ 17 | { 18 | regex: /\d+/, 19 | action: 'return "NUMBER";' 20 | }, 21 | { 22 | regex: /;/, 23 | action: 'return ";";' 24 | }, 25 | { 26 | regex: /[\r\n]/, 27 | action: 'return "NEWLINE";' 28 | }, 29 | { 30 | regex: /\+\+/, 31 | action: 'return "++";' 32 | } 33 | ] 34 | }, 35 | 36 | start: 'line', 37 | token: 'NUMBER ;', 38 | type: 'LR(1)', 39 | bnf: { 40 | 'line' : { 41 | 'NUMBER [noLineTerminator] ++ ;' : 'this.$$ = parseInt($1, 10) + 1;', //产生式1 42 | 'NUMBER ;' : 'this.$$ = $1' //产生式2 43 | } 44 | }, 45 | code: 'global.parser = parser;' 46 | }).generate(); 47 | 48 | var parser = eval(parseCode); 49 | 50 | console.log(JSON.stringify(parser, null, ' ')); 51 | 52 | 53 | /* 54 | var text = '1++;'; //应用产生式1 55 | parser.parse(text, true); 56 | test.equal(parser.$$, '2', text + ' PASSED'); 57 | 58 | var text = '1;'; //应用产生式2 59 | parser.parse(text, true); 60 | test.equal(parser.$$, '1', text + ' PASSED'); 61 | */ 62 | 63 | 64 | /* 65 | var text = '1'; //自动补全semicolon后应用产生式2 66 | parser.parse(text); 67 | test.equal(parser.$$, '1', text + ' PASSED'); 68 | */ 69 | 70 | test.done(); 71 | } 72 | }; 73 | 74 | }()); 75 | --------------------------------------------------------------------------------