├── .gitignore ├── CombinatorialParsing.dart ├── README.md ├── UnarySelectorParser.dart ├── dart_executable_grammar.dart ├── dart_runnable_grammar.dart ├── pubspec.yaml ├── smalltalkGrammar.dart ├── smalltalkGrammarRevised.dart ├── smalltalkParser.dart ├── smalltalkRevisedParser.dart ├── testDartGrammar.dart └── testParserCombinators.dart /.gitignore: -------------------------------------------------------------------------------- 1 | pubspec.lock 2 | packages/ 3 | 4 | -------------------------------------------------------------------------------- /CombinatorialParsing.dart: -------------------------------------------------------------------------------- 1 | library combinatorialParsing; 2 | 3 | /* A Dart version of the Newspeak parser combinator library. 4 | 5 | Copyright 2008 Cadence Design Systems, Inc. 6 | Copyright 2012 Cadence Design Systems, Inc. 7 | Copyright 2013 Google Inc. 8 | 9 | Licensed under the Apache License, Version 2.0 (the ''License''); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | */ 14 | 15 | import 'dart:mirrors'; 16 | 17 | class AlternatingParser extends CombinatorialParser { 18 | /* A parser that parses either P or Q. */ 19 | 20 | CombinatorialParser p; 21 | CombinatorialParser q; 22 | 23 | AlternatingParser(this.p, this.q); 24 | 25 | // parsing 26 | parseWithContext(ParserContext context) { 27 | var pos = context.position; 28 | try { 29 | return p.parseWithContext(context); 30 | } on ParserError catch (e1) { 31 | context.position = pos; 32 | context.recordFailure([e1.message, e1.position]); 33 | try { 34 | return q.parseWithContext(context); 35 | } on ParserError catch(e2) { 36 | context.recordFailure([e2.message, e2.position]); 37 | if (e1.position > e2.position) throw e1; 38 | if (e2.position > e1.position) throw e2; 39 | _combineErrorsWith(e1.message, e2.message, e1.position); 40 | }} 41 | } 42 | 43 | // private 44 | _combineErrorsWith(String e1, String e2, int pos) { 45 | String msg; 46 | String or = ((e1 == '') || (e2 == '')) ? '' :'or'; 47 | if (e1 == e2) {msg = e1;} else msg = '$e1 $or $e2'; 48 | return throw new ParserError(msg, pos); 49 | } 50 | 51 | toString() => name == null ? '[$p | $q]' : name; 52 | } 53 | 54 | class CharParser extends PredicateTokenParser { 55 | /* This constructor leverages the assumption that single character stringss are immutable values. 56 | Hence, if an input 57 | equals the specified token, it is indistiguishable from it, and we can just return token as the result 58 | of the parse. Consequently, the wrapper function we pass to the superclass constructor ignores its 59 | input and returns token. 60 | */ 61 | CharParser(String tokenToParse): super((t){ return t == tokenToParse;}, '${tokenToParse} expected'); 62 | } 63 | 64 | 65 | class CollectingCommentParser extends CommentParser { 66 | /* A special parser used for inputs that need to be rapidly scanned over. 67 | It differs from its superclass in that it actually collects the characters it scans, in case they are needed (e.g., for pretty printers). 68 | 69 | Ideally,we should not have to do this, but until we do proper optimization by compiling combinators, this will have to suffice. 70 | It provides a marked improvement in performance, 71 | By using such parsers for comments, whitespace and strings, the overall performance of the Newspeak parser improved by a factor of 2 or so. 72 | */ 73 | 74 | var comment; 75 | 76 | CollectingCommentParser(tb): super(tb); 77 | // parsing 78 | parseWithContext(ParserContext context) { 79 | var c; 80 | 81 | comment = new List(); 82 | while (!termBlock(context)) { 83 | c = context.nextIfAbsent( 84 | () => throw new ParserError('Premature end of input', context.position-1) 85 | ); 86 | comment.add(c); 87 | }; 88 | return comment; 89 | } 90 | } 91 | 92 | abstract class CombinatorialParser { 93 | /* This class is intended to implement ParserCombinators. 94 | * A CombinatorialParser returns a value of type T after 95 | * successful parsing. 96 | * 97 | * The class is abstract. It does not implement 98 | * the parsing routine parseWithContext() . 99 | * 100 | * Concrete subclasses should implement specific grammars. 101 | * 102 | * Parsing is initiated by calling parse(). This routine takes a String as input. 103 | * If parsing fails, it is the caller's responsibility to set the input stream back 104 | * to its original position (Q: is this a good idea?). 105 | */ 106 | 107 | String name; 108 | 109 | toString() => name == null ? super.toString() : name; 110 | 111 | /* Used to bypass 0 .. n ForwardReferenceParsers to get to the real parser. 112 | * Usually, this is 'this'. 113 | * Only ForwardReferenceParsers forward the request to their forwardee 114 | * 115 | */ 116 | get ultimateParser => this; 117 | 118 | recordFailure(f) => this; /* Do nothing, save time */ //? 119 | 120 | get value => this; //? 121 | 122 | // combinators 123 | 124 | CombinatorialParser operator << (CombinatorialParser p) { 125 | /* The sequencing combinator (implicit in BNF). */ 126 | var o = new List()..add(this)..add(p); 127 | return new SequentialParser(o); 128 | } 129 | 130 | SequentialParser operator & (CombinatorialParser p) { 131 | /* The flattening sequencing combinator. This is what one should typically use in a grammar. 132 | * It differs from '<<' in its specification. '<<' is not intended to flatten the resulting parser tree, 133 | * while '&' is; this achieved by overriding '&' in SequentialParser to do the flattening. 134 | * 135 | * Why would one want to flatten the tree? Because, given a production 136 | * 137 | * Foo -> Bam Ban Bar Bat 138 | * 139 | * one doesn't want to build the AST by writing 140 | * 141 | * Foo = (Bam << Ban << Bar << Bat).wrapper((start, end) { 142 | * FooNode.b1(start [0], start[1], start[2], end) 143 | * } 144 | * 145 | * It is much more convenient to flatten the tree and have a flat list of the correct arity. However, we only 146 | * want to flatten one level. Otherwise, we have an associative sequencing operator and we can no longer 147 | * tell the arity of the sequence produced in a given production. 148 | **/ 149 | 150 | return this << p; 151 | } 152 | 153 | 154 | 155 | CombinatorialParser get empty => new EmptyParser(); 156 | 157 | CombinatorialParser get eoi => tokenFor(new EOIParser()); 158 | 159 | CombinatorialParser get fail => new FailingParser(); 160 | 161 | CombinatorialParser get not => new NegatingParser(this); 162 | 163 | /* [P] = P | e */ 164 | CombinatorialParser get opt => this | empty; 165 | 166 | /* Return a parser that accepts one or more repetitions of what the receiver accepts. Denoted by the postfix + in BNF */ 167 | /* P+ = P & P* ; However, we must flatten the list one level */ 168 | CombinatorialParser get plus => new PlusParser(this); 169 | 170 | 171 | CombinatorialParser plusSeparatedBy(CombinatorialParser separator) { 172 | /* Utility for the common case of a list with separators. The separators are discarded, as they are usually only used to 173 | * guide parsing and have no semantic value. If one needs them, one can always build the rule directly 174 | * */ 175 | 176 | return (this << ((separator << this).wrapper((s, v) => v)).star). 177 | wrapper((fst, rst){ 178 | new List()..add(fst)..addAll(rst); /* could be optimized to reuse rst */ 179 | }); 180 | } 181 | 182 | CombinatorialParser plusSeparatedOrTerminatedBy(CombinatorialParser separator) { 183 | 184 | /* Utility for the common case of a list with separators, allowing for an optional appearance of the separator at the end. 185 | * The separators are discarded, as they are usually only used to guide parsing and have no semantic value. If one needs them, 186 | * one can always build the rule directly 187 | * */ 188 | return ( plusSeparatedBy(separator) & separator.opt).wrapper((lst, end) => lst); 189 | } 190 | 191 | /* Return a parser that accepts zero or more repetitions of what the receiver accepts. Denoted by the postfix * in BNF */ 192 | /* P* = [P+] */ 193 | /* We tweak the classic formulation by wrapping it in a parser that takes care to avoid returning null. 194 | In the ordinary case, if the input is empty, the empty parser will return null as the result. 195 | However, we'd rather not have to check for null every time we get a result from a starred 196 | production; it is verbose and error prone. In the case of star, it is better to return an empty list 197 | for empty input. The call to wrap: below accomplishes that. */ 198 | /* would be good to cache this, as well as plus and opt */ 199 | CombinatorialParser get star => new StarParser(this); 200 | 201 | CombinatorialParser starSeparatedBy(CombinatorialParser separator) { 202 | /* See analogous plus methods. Must wrap to prevent returning null in empty case */ 203 | return plusSeparatedBy(separator).opt.wrap((rs) => rs == null ? new List() :rs); 204 | } 205 | 206 | CombinatorialParser starSeparatedOrTerminatedBy(CombinatorialParser separator) { 207 | 208 | /* See analogous plus methods. Must wrap to prevent returning null in empty case */ 209 | return plusSeparatedOrTerminatedBy(separator).opt.wrap((rs) => rs == null ? new List() :rs); 210 | } 211 | 212 | /* The alternation combinator - denoted by | in BNF */ 213 | CombinatorialParser operator | (CombinatorialParser p) => new AlternatingParser(this, p); 214 | 215 | // parsing 216 | parse(String input) => parseNoContext(input); 217 | 218 | parseNoContext(String input) { 219 | /* YK - a context-less protocol for speeding up parsing */ 220 | /* Turns out maintaining a context is expensive in runtime and doesn't 221 | do much for locating errors. Experimenting with other error localization 222 | mechanism. To minimize impact, the parse:inContext:ifError: protocol 223 | is maintained, and a bogus reportFailure is implemented on this */ 224 | return parseWithContext(new DummyParserContext(input)); 225 | } 226 | 227 | parseWithContext(ParserContext context); 228 | 229 | // private 230 | _combineErrors(e1, e2, pos) { 231 | String or = ((e1 == '') || (e2 == '')) ? '' :'or'; 232 | String msg = e1 == e2 ? e1 : '$e1 $or $e2'; 233 | throw new ParserError(msg, pos); 234 | } 235 | 236 | // utilities 237 | CombinatorialParser get aWhitespaceChar { 238 | return new PredicateTokenParser((String c) => c.codeUnitAt(0) <= ' '.codeUnitAt(0), 239 | 'whitespace expected'); 240 | } 241 | 242 | CombinatorialParser char(String c) { 243 | return new CharParser(c); 244 | } 245 | 246 | CombinatorialParser charBetween(String c1, String c2) { 247 | return new PredicateTokenParser( 248 | (String c) => 249 | (c1.codeUnitAt(0) <= c.codeUnitAt(0)) && 250 | (c.codeUnitAt(0) <= c2.codeUnitAt(0)), 251 | 'character between $c1 and $c2 expected'); 252 | } 253 | 254 | CombinatorialParser get comment => fail; 255 | 256 | 257 | CombinatorialParser tokenFor(CombinatorialParser p) { 258 | 259 | /* Tokenizing involves throwing away leading whitespace and comments. 260 | In addition, it involves associating the token with a starting position within the input stream; 261 | We do the latter first by wrapping p in a TokenizingParser; then we prefix it with a parser 262 | that deals with whitespace and comments, and return the result. */ 263 | 264 | var posParser = new TokenizingParser(p); 265 | 266 | return ((whitespace | comment).star & posParser).wrapper((dontCare, t) => t); 267 | /* type safety note: wrapper is only defined on SequentialParser. The call is always< 268 | statically unsafe but checked dynamically (see its definition). One could 269 | cast to a SequentialParser, but that would not be enough to silence 270 | the typechecker anyway 271 | */ 272 | /* Design note: It seems tempting to define a combinator, 'token', that returns a tokenized version of its receiver. 273 | * Alas, this doesn't work out, since tokenization relies on concepts of whitespace and comment, which are often specific 274 | * to a given grammar. Hence, the combinator needs to be an operation of the grammar, not of a specific production. 275 | **/ 276 | } 277 | 278 | CombinatorialParser tokenFromChar(String c) => tokenFor(char(c)); 279 | 280 | 281 | CombinatorialParser tokenFromSymbol(String s) => tokenFor(new SymbolicTokenParser(s)); 282 | 283 | CombinatorialParser token(String s) => tokenFor(new SymbolicTokenParser(s));// we can probably eliminate the two variants above 284 | 285 | /* It's rare that anyone will need to change this definition */ 286 | /* return aWhitespaceChar plus. */ 287 | /* As an optimization, we process whitespace with a dedicated scanning parser. Of course, this regrettable, 288 | but it is a significant win. */ 289 | CombinatorialParser get whitespace => new WhitespaceParser(); 290 | 291 | wrap(blk) => new WrappingParser(this, blk); 292 | 293 | namedWrap(blk, msg) => new NamedWrappingParser(this,blk)..name = msg; 294 | 295 | 296 | wrapper(blk) {//? varying arity stuff here 297 | return wrap( (rs) => Function.apply(blk, (rs is List) ? rs : [rs])); //? can this cope with a singleton? 298 | /* return wrap: blk */ 299 | } 300 | 301 | namedWrapper(blk, msg) { 302 | return namedWrap( (rs) => Function.apply(blk, (rs is List) ? rs : [rs]), msg); 303 | } 304 | 305 | } 306 | 307 | class CommentParser extends CombinatorialParser { 308 | var termBlock; 309 | CommentParser(this.termBlock); 310 | // parsing 311 | parseWithContext(ParserContext context) { 312 | while (!termBlock(context)){ 313 | context.nextIfAbsent(() => throw new ParserError('Premature end of input', context.position-1)); 314 | }; 315 | } 316 | } 317 | 318 | class EOIParser extends CombinatorialParser { 319 | /* A parser that only succeeds at the end of the input. This addresses a common problem with combinator parsers. 320 | * If there is garbage at the end of the input, no production matches it. 321 | * Consequently, the parsers backtrack to the point where the legal input was consumed, without giving an error message 322 | * about the junk at the end. 323 | * */ 324 | 325 | // parsing 326 | parseWithContext(ParserContext context) { 327 | return context.atEnd ? true : throw new ParserError('Unexpected input', context.position+1); 328 | } 329 | } 330 | 331 | 332 | class EmptyParser extends CombinatorialParser { 333 | /* The parser that parses the empty input. It always succeeds. This class is a singleton. */ 334 | // parsing 335 | parseWithContext(ParserContext context) => null; 336 | 337 | toString() => 'empty'; 338 | } 339 | 340 | class FailingParser extends CombinatorialParser { 341 | /* The parser that always fails. It never parses anything. This class is a singleton. */ 342 | // parsing 343 | parseWithContext(ParserContext context) => 344 | throw new ParserError('Failing Parser invoked', context.position); 345 | 346 | toString() => 'fail'; 347 | } 348 | 349 | 350 | class NamedWrappingParser extends WrappingParser { 351 | /* This is exactly the same as a WrappingParser, but it passes itself down 352 | * in the context parameter, to provide more meaningful error messages. 353 | * */ 354 | 355 | NamedWrappingParser(CombinatorialParser p, Function b): super(p, b); 356 | 357 | parseWithContext(ParserContext context) => wrapperBlock(parser.parseWithContext(context)); 358 | } 359 | 360 | 361 | class NegatingParser extends CombinatorialParser { 362 | /* A parser that implements the 'not' combinator, as in Ford's PEGs. 363 | * It contains a parser p, and succeeds if p fails and vice versa. 364 | * It does not move the input forward if it succeeds. 365 | * */ 366 | CombinatorialParser p; 367 | NegatingParser(this.p); 368 | 369 | // parsing 370 | 371 | parseWithContext(ParserContext context) { 372 | var position = context.position; 373 | try { 374 | p.parseWithContext(context); 375 | } on ParserError catch (e) { 376 | context.position = position; 377 | return true; 378 | }; 379 | throw new ParserError('not combinator failed', position); 380 | } 381 | 382 | toString() => name == null ? '~[$p]' : name; 383 | } 384 | 385 | abstract class ParserContext { 386 | /* This class defines a context that is shared among a set of combinatorial 387 | * parsers during a parse. 388 | * Information includes the position of the input and could also include 389 | * error tracking information, memoization, state for context-sensitive 390 | * grammars etc. 391 | */ 392 | final List failures = new List(); 393 | final String _input; 394 | int position = 0; 395 | 396 | ParserContext(this._input); 397 | 398 | bool get atEnd => position >= _input.length; 399 | String get next => atEnd? null : _input[position++]; 400 | String nextIfAbsent(Function handler) 401 | => atEnd? handler() : _input[position++]; 402 | String peekIfAbsent(Function handler) => atEnd? handler() : _input[position]; 403 | 404 | recordFailure(f) ; 405 | } 406 | 407 | class ErrorTrackingParserContext extends ParserContext { 408 | /* This class defines a context that is shared among a set of combinatorial 409 | * parsers during a parse. The context can be used to manage information on 410 | * parsing errors: rather than always report the latest failure that occurred, 411 | * we can report the one that occurred deepest in the input stream, or 412 | * implement some other policy - as long as we can record what failures took 413 | * place. 414 | */ 415 | final List failures = new List(); 416 | 417 | ErrorTrackingParserContext(String input): super(input); 418 | get errorMessage { 419 | if (failures.isEmpty) return ''; 420 | return failures.last.first;//? protocol 421 | } 422 | 423 | get errorPosition { 424 | if (failures.isEmpty) return -1; 425 | return failures.last.last; 426 | } 427 | 428 | recordFailure(f) { 429 | if (failures.isEmpty || (failures.last.last <= f.last)) failures.add(f); 430 | } 431 | } 432 | 433 | class DummyParserContext extends ParserContext { 434 | /* A dummy context to save on the cost of error recording */ 435 | 436 | DummyParserContext(String input): super(input); 437 | get errorMessage => ''; 438 | get errorPosition => -1; 439 | recordFailure(f) {} // do nothing 440 | } 441 | 442 | class ParserError extends Error { 443 | String message; 444 | int position; 445 | 446 | ParserError(this.message, this.position); 447 | // as yet unclassified 448 | get description => 'ParserError: $message'; 449 | } 450 | 451 | class PlusParser extends CombinatorialParser { 452 | /* An attempt to optimize the + operator by having a dedicated parser for it. */ 453 | CombinatorialParser p; 454 | 455 | PlusParser(this.p); 456 | 457 | // parsing 458 | parseWithContext(ParserContext context) { 459 | int currentPos; 460 | var nextResult; 461 | 462 | var results = new List(); 463 | results.add (p.parseWithContext(context)); 464 | while (true){ 465 | currentPos = context.position; 466 | try { 467 | nextResult = p.parseWithContext(context); 468 | } on ParserError catch (e) { 469 | context.position = currentPos; 470 | return results; 471 | }; 472 | results.add(nextResult); 473 | }; 474 | } 475 | 476 | toString() => name == null ? '[$p]+' : name; 477 | 478 | } 479 | 480 | class PredicateTokenParser extends CombinatorialParser { 481 | /* Parses a single token matching a given 482 | predicate. */ 483 | 484 | var predicate; 485 | var errMsg; 486 | 487 | PredicateTokenParser(this.predicate, this.errMsg); 488 | 489 | 490 | // parsing 491 | parseWithContext(ParserContext context) { 492 | var t = context.nextIfAbsent(() => throw new ParserError(errMsg, context.position-1)); 493 | if (!predicate(t)) throw new ParserError(errMsg, context.position-1); 494 | return t; 495 | } 496 | } 497 | 498 | 499 | class SequentialParser extends CombinatorialParser { 500 | /* A parser that activates a sequence of subparsers (P1, ... ,Pn). 501 | 502 | One might think that it would be sufficient to define a class that 503 | combined two parsers in sequence, corresponding to the << 504 | operator, just like AlternatingParser corresponds to the | operator. 505 | However, grammar productions typically involve several elements, so 506 | the typical sequencing operation is n-ary */ 507 | 508 | List subparsers; 509 | SequentialParser(this.subparsers); 510 | 511 | // combinators 512 | SequentialParser operator & (CombinatorialParser p) { 513 | List o = new List()..addAll(subparsers)..add(p); 514 | return new SequentialParser(o); 515 | } 516 | 517 | 518 | // parsing 519 | parseWithContext(ParserContext context) { 520 | return subparsers.map((p) => p.parseWithContext(context)).toList(); 521 | } 522 | 523 | // wrapping 524 | wrapper(blk) { 525 | 526 | /* untypesafe, but convenient. We can dynamically ensure 527 | that the arity of the incoming block matches that of this parser. 528 | Given that this routine is only called during parser construction, 529 | dynamic failure of the asserts is sufficient. 530 | 531 | We cannot ensure type correctness of the arguments to the block using 532 | this interface. One can use the more verbose followedBy: combinators 533 | if that is deemed essential. 534 | */ 535 | //assert(blk.numArgs == subparsers.length); 536 | return wrap((rs) => Function.apply(blk, rs)); 537 | } 538 | 539 | toString() => name == null ? subparsers.join(' & ') : name; 540 | 541 | } 542 | 543 | class StarParser extends CombinatorialParser { 544 | /* An attempt to optimize the * operator by having a dedicated parser for it. */ 545 | 546 | CombinatorialParser p; 547 | 548 | StarParser(this.p); 549 | // parsing 550 | parseWithContext(ParserContext context) { 551 | var currentPos; 552 | var nextResult; 553 | 554 | var results = new List(); 555 | 556 | while (true) { 557 | currentPos = context.position; 558 | try { 559 | nextResult = p.parseWithContext(context); 560 | } on ParserError catch (e) { 561 | context.position = currentPos; 562 | return results; 563 | }; 564 | results.add(nextResult); 565 | }; 566 | } 567 | 568 | toString() => name == null ? '[$p]*' : name; 569 | 570 | } 571 | 572 | class SymbolicTokenParser extends CombinatorialParser { 573 | /* Parses a given string. One could derive this as an alternation of character parsers, but the derivation is more verbose 574 | * than defining it directly, and less efficient, so why bother? 575 | * */ 576 | String symbol; 577 | 578 | SymbolicTokenParser(this.symbol); 579 | 580 | // parsing 581 | parseWithContext(ParserContext context) { 582 | var errMsg = '$symbol expected'; 583 | var pos = context.position; 584 | for (var i = 0; i < symbol.length; i++) { 585 | var c = symbol[i]; 586 | if (!(c == (context.nextIfAbsent(() => throw new ParserError(errMsg, pos))))) 587 | throw new ParserError(errMsg, pos); 588 | } 589 | // symbol.forEach((c){ if (!(c == (context.nextIfAbsent(() => throw new ParserError(errMsg, pos))))) 590 | // throw new ParserError(errMsg, pos);}); //? 591 | return symbol; 592 | } 593 | 594 | toString() => name == null ? symbol : name; 595 | 596 | } 597 | 598 | class Token { 599 | /* Represents a token of input. Basically, it attaches a start position 600 | to the token's value. 601 | 602 | It's not yet clear if we should bother adding token codes or values here. */ 603 | 604 | var token; 605 | int start, end; 606 | 607 | bool operator == (other) { 608 | return other.runtimeType == this.runtimeType 609 | && other.token == token 610 | && other.start == start 611 | && other.end == end; 612 | } 613 | 614 | int get concreteEnd => end; 615 | 616 | int get concreteStart => start; 617 | 618 | int get hashCode => token.hashCode; 619 | 620 | toString() => 'Token $token $start:$end'; 621 | 622 | Token(this.token, this.start, this.end); 623 | 624 | } 625 | 626 | class TokenParser extends PredicateTokenParser { 627 | /* A parser that accepts a single, specified token. 628 | */ 629 | 630 | TokenParser(tokenToParse): super((t) => t == tokenToParse, '${tokenToParse} expected'); 631 | } 632 | 633 | class TokenizingParser extends CombinatorialParser { 634 | CombinatorialParser parser; 635 | 636 | TokenizingParser(this.parser); 637 | 638 | // parsing 639 | parseWithContext(ParserContext context) { 640 | var pos = context.position + 1; 641 | var res = parser.parseWithContext(context); 642 | return new Token(res, pos, context.position); 643 | } 644 | 645 | toString() => name == null ? '[$parser token]' : name; 646 | 647 | } 648 | 649 | class WhitespaceParser extends CombinatorialParser { 650 | /* A simple scanner to optimize the handling of whitespace. Should be equivalent to 651 | * 652 | * aWhitespaceChar.plus 653 | * 654 | * Eventually, the framework should optimize well enough that this will be unnecessary. 655 | * */ 656 | var comment; 657 | // parsing 658 | parseWithContext(ParserContext context) { 659 | int pos = context.position; 660 | comment = new List(); 661 | for (String c = context.peekIfAbsent((){}); c == null ? false : c.codeUnitAt(0) <= 32; c = context.peekIfAbsent((){})) 662 | {comment.add(context.next);}; 663 | if (comment.isEmpty) throw new ParserError('Whitespace expected', pos); 664 | return comment; 665 | } 666 | 667 | toString() => 'whitespace'; 668 | } 669 | 670 | class WrappingParser extends CombinatorialParser { 671 | /* Used to transform the output of another parser. A wrapping parser accepts exactly the same input as the wrapped 672 | parser does, and performs the same error handling. The only difference is that it takes the output of the wrapped 673 | parser and passes it on to a wrapper block which uses it to produce a new result, which is the output of the wrapping 674 | parser. A typical use is to build nodes of an abstract syntax tree. 675 | 676 | The output type of the wrapped parser, S, is also the input to the wrapper. The output type of the wrapper is the output of this 677 | (the wrapping) parser. */ 678 | 679 | var parser; 680 | var wrapperBlock; 681 | 682 | WrappingParser(this.parser, this.wrapperBlock); 683 | 684 | // parsing 685 | parseWithContext(ParserContext context) { 686 | return wrapperBlock(parser.parseWithContext(context)); 687 | } 688 | 689 | toString() => name == null ? 'wrap($parser)' : name; 690 | 691 | } 692 | 693 | class ForwardingWrappingParser extends WrappingParser { 694 | /* When a ForwardingReferenceParser is wrapped using the wrapper() combinator, we don't know what the arity 695 | * the wrapping function should have - it will depend on the arity of the parser we forward to. 696 | * We cannot determine whether to use the implementation of wrapper() given in ordinary parsers, which forwards 697 | * to the wrap() combinator (designed for functions with arity 1) or the implementation used in SequentialParsers, 698 | * (designed for n-ary functions, where n is the length of the list of parsers the SequentialParser sequences). 699 | * Instead, we must defer the decision on how to handle the situation until the parser tree is complete. 700 | * This is accomplished by using this class as the result of the wrapper() combinator for ForwardReferenceParser. 701 | * 702 | * Instances of this class determine how to act when asked to parse. At that time, the parse tree must be complete, 703 | * and they can ask the ultimate parser for a wrapping parser that is suitable configured, and forward requests 704 | * to it. */ 705 | var wrappingParser; 706 | 707 | ForwardingWrappingParser(p, b): super(p, b); // would a named constructor like ForwardingWrappingParser.wrap() be nicer here? 708 | 709 | parseWithContext(ParserContext context) { 710 | return trueWrappingParser.parseWithContext(context); 711 | } 712 | 713 | WrappingParser get trueWrappingParser { 714 | if (wrappingParser == null) 715 | wrappingParser = parser.ultimateParser.wrapper(wrapperBlock); 716 | return wrappingParser; 717 | } 718 | } 719 | 720 | class ForwardReferenceParser extends CombinatorialParser { 721 | var forwardee; 722 | var bindingRoutine; 723 | 724 | bind(CombinatorialParser p) { 725 | if (p is CombinatorialParser) forwardee = p; /* as a precaution, only bind if p is a parser */ 726 | } 727 | 728 | CombinatorialParser get parserToForwardTo { 729 | if (forwardee == null) bindingRoutine(); 730 | return forwardee; 731 | } 732 | 733 | CombinatorialParser get ultimateParser => parserToForwardTo.ultimateParser; 734 | 735 | CombinatorialParser wrapper(blk) { 736 | /* see comments in ForwardingWrappingParser */ 737 | return new ForwardingWrappingParser(this, blk); 738 | } 739 | 740 | // combinators 741 | CombinatorialParser operator << (CombinatorialParser p) { 742 | return forwardee == null ? super << p : forwardee << p; 743 | } 744 | 745 | CombinatorialParser get opt { 746 | return (forwardee == null) ? super.opt : forwardee.opt; 747 | } 748 | 749 | CombinatorialParser operator |(p) { 750 | return (forwardee == null) ? super | p : forwardee | p; 751 | } 752 | 753 | // parsing 754 | parseWithContext(ParserContext context) { 755 | return parserToForwardTo.parseWithContext(context); 756 | } 757 | 758 | toString() => name == null ? 'forward($forwardee)' : name; 759 | 760 | } 761 | 762 | abstract class ExecutableGrammar extends CombinatorialParser { 763 | /* This class is intended to implement Parser 764 | Combinators. A ExecutableGrammar[T] 765 | returns a value of type T after successful 766 | parsing. 767 | 768 | The class is abstract. It does not implement 769 | the parsing routine parseWithContext(). 770 | 771 | Concrete subclasses should implement specific grammars. 772 | 773 | Parsing is initiated by calling parse(). This routine takes a String as input. 774 | If parsing fails, it is the caller's responsibility to set the input stream back to its original position 775 | (Q: is this a good idea?). 776 | If an error occurs, the error block passed in is called. */ 777 | 778 | Map forwardReferenceTable = new Map(); 779 | InstanceMirror selfMirror; 780 | 781 | ExecutableGrammar() { 782 | selfMirror = reflect(this); 783 | setupForwardReferences; 784 | bindForwardReferences; 785 | } 786 | 787 | // forward references 788 | get bindForwardReferences { 789 | forwardReferenceTable.values.forEach((v){v.bindingRoutine = () => finalBindForwardReferences;}); 790 | } 791 | 792 | bool isField(Mirror m) => m is VariableMirror; 793 | 794 | CombinatorialParser getRealField(Symbol k){ 795 | /* A gross hack to work around the deficiencies in Dart's mirror lib. 796 | * If the current class has a getter k but not a field 797 | * then assume that its getter k overrides the production k stored 798 | * in a field in the superclass with a wrapping parser on the field contents. 799 | * */ 800 | 801 | var p = selfMirror.getField(k).reflectee; 802 | if (selfMirror.type.instanceMembers.containsKey(k) && selfMirror.type.declarations.values.where(isField).isEmpty) 803 | return p.parser; 804 | else return p; 805 | } 806 | 807 | 808 | get finalBindForwardReferences { // we could do this more eagerly after grammar was constructed, chaining it to 809 | // grammar construction. The result would be a future that one could chain to in order to parse 810 | // and the results of parsing would also be futures. Call back hell. 811 | forwardReferenceTable.forEach((k, v){ 812 | var p = getRealField(k); 813 | if (p is CombinatorialParser) { 814 | v.bind(p); 815 | p.name = k; /* a good place to name the productions */ 816 | }}); 817 | forwardReferenceTable = null; // get rid of all unused forwarding parsers 818 | // still leaks; if no forwarding porser is ever called. this never runs! 819 | } 820 | 821 | // helper methods; unused 822 | 823 | List get _allSuperClasses { 824 | List allSuperclasses = new List(); 825 | ClassMirror sc = selfMirror.type; 826 | while (sc.simpleName != 'Object') { 827 | allSuperclasses.add(sc); 828 | sc = sc.superclass; 829 | } 830 | return allSuperclasses; 831 | } 832 | 833 | List get _allSlotMirrors { 834 | List allSlots = new List(); 835 | _allSuperClasses.forEach((sc) => 836 | allSlots.addAll(sc.declarations.values.where(isField))); 837 | } 838 | 839 | // helper method because declarations do not climb the class hierarchy 840 | 841 | List get _allProductions { 842 | List allProductions = new List(); 843 | ClassMirror gc = selfMirror.type; 844 | while (gc.simpleName != #ExecutableGrammar) { 845 | allProductions.addAll(gc.declarations.values.where(isField)); 846 | gc = gc.superclass; 847 | } 848 | return allProductions; 849 | } 850 | 851 | get setupForwardReferences { 852 | /* go thru all non-nil instance variables and set them to a fresh forward reference */ 853 | /* If these do not correspond to productions, they will be overridden by the subclass */ 854 | _allProductions.forEach((VariableMirror slot){ 855 | Symbol iv = slot.simpleName; 856 | var fref = new ForwardReferenceParser(); 857 | /* Change due to deficiency in Dart mirror lib. Since getField invokes the getter rather than 858 | * actually accessing the field, once we override a production, the original code no longer works. 859 | * Instead, assume that all slots that are defined in subclasses of ExecutableGrammar are productions. 860 | * They will get overridden if they have another use. Only if a field with the same name is defined by a 861 | * subclass will this fail. 862 | * */ 863 | // if ((selfMirror.getField(iv)).value.reflectee == null) { 864 | forwardReferenceTable[iv] = fref; 865 | /* set iv to fref */ 866 | selfMirror.setField(iv, fref); 867 | // would need to chain all these, and chain grammar construction to the result. This means grammar would have to be 868 | // in a known abstract method called by the constructor of this class. 869 | // }; 870 | }); 871 | } 872 | 873 | CombinatorialParser nameProductions() { 874 | for (VariableMirror iv in _allProductions) { 875 | Symbol pn = iv.simpleName; 876 | var production = selfMirror.getField(pn).reflectee; 877 | production.name = MirrorSystem.getName(pn); 878 | } 879 | return this; 880 | } 881 | } 882 | 883 | 884 | abstract class RunnableGrammar extends CombinatorialParser { 885 | /* This class is intended to implement Parser Combinators. 886 | * A RunnableGrammar[T] returns a value of type T after successful parsing. 887 | * 888 | * The class is abstract. It does not implement 889 | * the parsing routine parseWithContext(). 890 | * 891 | * Concrete subclasses should implement specific grammars. 892 | * 893 | * Parsing is initiated by calling parse(). This routine takes a String as input. 894 | * If parsing fails, it is the caller's responsibility to set the input stream 895 | * back to its original position (Q: is this a good idea?). 896 | * 897 | * This class differs from ExecutableGrammar in that it does not rely on reflection 898 | * for its implementation. Instead, it makes heavy use of noSuchMethod(). 899 | * The advantage is that users do not have to declare all productions 900 | * as fields before actually setting the production value in the constructor. 901 | * There is also a temporary advantage that this version can be used with dart2js 902 | * right now, before mirrors are 903 | * implemented; it may be that even when mirrors are supported by dart2js, 904 | * the space advantages may be significant. 905 | * 906 | * The disadvantages are that parsing is slower, because productions are 907 | * looked up via noSuchMethod() instead of as fields, and the absence of 908 | * type information. 909 | */ 910 | 911 | Map forwardReferenceTable = new Map(); 912 | Map productions = new Map(); 913 | // non-parsers might be inserted by noSuchMethod, so values have type dynamic 914 | 915 | RunnableGrammar(); 916 | 917 | noSuchMethod(Invocation im){ 918 | // if it's a get, look it up in the productions map; if it's missing, install a forwarder and return that 919 | if (im.isGetter) { 920 | var result; 921 | return (result = productions[im.memberName]) == null ? setupForwardReference(im.memberName): result; 922 | }; 923 | // if it is a set, install in the productions map 924 | if (im.isSetter){ 925 | // Must be careful, since setter name and getter name differ by trailing '='! 926 | String setterName = MirrorSystem.getName(im.memberName); 927 | Symbol fieldName = new Symbol(setterName.substring(0, setterName.length - 1)); 928 | var parser = im.positionalArguments[0]; 929 | parser.name = MirrorSystem.getName(fieldName); // name the production here! 930 | return productions[fieldName] = parser; 931 | }; 932 | // otherwise forward to super method 933 | return super.noSuchMethod(im); 934 | } 935 | 936 | 937 | ForwardReferenceParser setupForwardReference(Symbol productionName){ 938 | ForwardReferenceParser fref = new ForwardReferenceParser(); 939 | fref.bindingRoutine = () => finalBindForwardReferences; 940 | return productions[productionName] = forwardReferenceTable[productionName] = fref; 941 | } 942 | 943 | 944 | // forward references 945 | 946 | get finalBindForwardReferences { 947 | forwardReferenceTable.forEach((k, v){ 948 | var p = productions[k]; 949 | if (p is CombinatorialParser) { 950 | v.bind(p); 951 | p.name = k; /* a good place to name the productions */ 952 | }}); 953 | } 954 | } 955 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dart-executable-grammars 2 | ======================== 3 | 4 | A parser combinator library for Dart based on a port of Newspeak's executable grammars. See 5 | 6 | http://bracha.org/executableGrammars.pdf 7 | 8 | for a discussion of the main ideas. 9 | 10 | The original Newspeak implementation relied on reflection as described in the above paper, and in the slides at 11 | 12 | http://bracha.org/newspeak-parsers.pdf 13 | 14 | 15 | The class ExecutableGrammar embodies that logic. The class RunnableGrammar embodies a variation that uses noSuchMethod instead. 16 | All one needs to do to switch from one implementation to the other is to inherit from RunnableGrammar instead of 17 | ExecutableGrammar. 18 | 19 | The current DartGrammar needs debugging so that the grammar is truly accurate. 20 | There are also examples of functioning Smalltalk grammars and parsers. All of these are available in versions that use the either ExecutableGrammer 21 | (dartGrammar.dart, smalltalkGrammar.dart, smalltalkParser.dart) and RunnableGrammar (dart_runnable_grammar.dart, 22 | smalltalkGrammarRevised.dart, smalltalkRevisedParser.dart). 23 | There is a little bit of test code in testParserCombinators.dart. 24 | 25 | One advantage of RunnableGrammar is that it is no longer necessary to declare all productions as fields of the grammar class. 26 | In the original Newspeak design, each production was declared and initialized as a slot in the classes' instance initializer. 27 | However, in Dart this solution did not work quite so well. Field initializers in Dart are not allowed to access 'this' and 28 | since almost any production requires such access, the pattern was to declare the fields, and then initialize them inside 29 | the constructor. This duplication is annoying to write and harder to read. 30 | 31 | Since RunnableGrammar maintains the actual production objects in a map, no fields need to be declared. Instead, references to 32 | productions in the constructor call non-existant setters and getters which are trapped by noSuchMethod and handled properly. 33 | This does mean that actual parsing is likely to be slower, as each use of a production goes through noSuchMethod. We have not 34 | yet established the nature of the performance differences. 35 | 36 | Another issue is the abundance of spurious warnings one gets using RunnableGrammar, as all productions are undefined from 37 | the perspective of the Dart analyzer. Planned silencing mechanisms like @ExpectWarnings will help. 38 | -------------------------------------------------------------------------------- /UnarySelectorParser.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2008 Cadence Design Systems, Inc. 3 | Copyright 2013 Google Inc. 4 | 5 | Licensed under the Apache License, Version 2.0 (the ''License''); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | library unary_selector_parser; 10 | import 'CombinatorialParsing.dart'; 11 | 12 | 13 | class UnarySelectorParser extends CombinatorialParser { 14 | /* In the absence of a separate lexer, an ambiguity arises, which this parser deals with. 15 | The problem is that when parsing a unary expression that is an argument to a keyword 16 | message, one expects a unary selector, which is an identifer. However, it may be that the next 17 | token is actually a keyword, part of the surrounding message. If we aren't actually tokenizing, 18 | the prefix of the keyword will be misinterpreted as an identifier and hence as another unary 19 | message being sent to the argument. 20 | 21 | Using a lexer solves this but introduces a subtlety around the assignment operator :=,. In that case 22 | if there is no whitespace between a variable name and the assignment, the variable name will 23 | be tokenized as a keyword rather than as an identifier. The Strongtalk parser, DeltaParser, deals 24 | with this specially. In the longterm, that is probably the way to go. 25 | 26 | */ 27 | var p; 28 | 29 | UnarySelectorParser(this.p); 30 | 31 | parseWithContext(context) { 32 | var pos = context.position; 33 | try { 34 | p.keyword.parseWithContext(context); 35 | } catch (e) { 36 | context.position = pos; 37 | return p.identifier.parseWithContext(context); 38 | }; 39 | context.position = pos; 40 | throw new ParserError('should not print', pos); 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /dart_executable_grammar.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the ''License''); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | 10 | library dart_grammar; 11 | import 'CombinatorialParsing.dart'; 12 | 13 | 14 | class DartGrammar extends ExecutableGrammar { 15 | CombinatorialParser variableDeclaration, declaredIdentifier, finalConstVarOrType, varOrType, initializedVariableDeclaration, 16 | initializedIdentifier, initializedIdentifierList, functionSignature, returnType, functionBody, block, formalParameterList, 17 | normalFormalParameter, normalFormalParameters, optionalFormalParameters, optionalPositionalFormalParameters, namedFormalParameters, namedFormalParameter, 18 | simpleFormalParameter, fieldFormalParameter, defaultFormalParameter, defaultNamedParameter, classDefinition, mixins, classMemberDefinition, 19 | methodSignature, declaration, staticFinalDeclarationList, staticFinalDeclaration, operatorSignature, operator, binaryOperator, 20 | getterSignature, setterSignature, constructorSignature, redirection, initializers, superCallOrFieldInitializer, fieldInitializer, 21 | factoryConstructorSignature, redirectingFactoryConstructorSignature, superclass, interfaces, mixinApplication, typeParameter, 22 | typeParameters, metadata, expression, expressionWithoutCascade, expressionList, primary, literal, nullLiteral, numericLiteral, 23 | NUMBER, EXPONENT, HEX_NUMBER, HEX_DIGIT, booleanLiteral, stringLiteral, singleLineString, multilineString, ESCAPE_SEQUENCE, 24 | HEX_DIGIT_SEQUENCE, stringContentDQ, stringContentSQ, stringContentTDQ, stringContentTSQ, NEWLINE, stringInterpolation, 25 | listLiteral, mapLiteral, mapLiteralEntry, throwExpression, throwExpressionWithoutCascade, rethrowStatement, functionExpression, 26 | functionExpressionBody, thisExpression, newExpression, constObjectExpression, arguments, argumentList, namedArgument, cascadeSection, 27 | cascadeSelector, assignmentOperator, compoundAssignmentOperator, conditionalExpression, logicalOrExpression, logicalAndExpression, 28 | bitwiseOrExpression, bitwiseXorExpression, bitwiseAndExpression, bitwiseOperator, equalityExpression, equalityOperator, 29 | relationalExpression, relationalOperator, shiftExpression, shiftOperator, additiveExpression, additiveOperator, multiplicativeExpression, 30 | multiplicativeOperator, unaryExpression, prefixOperator, unaryOperator, postfixExpression, postfixOperator, selector, 31 | incrementOperator, assignableExpression, assignableSelector, identifier, IDENTIFIER_NO_DOLLAR, IDENTIFIER, BUILT_IN_IDENTIFIER, 32 | IDENTIFIER_START, IDENTIFIER_START_NO_DOLLAR, IDENTIFIER_PART_NO_DOLLAR, IDENTIFIER_PART, qualified, typeTest, isOperator, 33 | typeCast, asOperator, argumentDefinitionTest, statements, statement, expressionStatement, localVariableDeclaration, 34 | nonLabelledStatement, localFunctionDeclaration, ifStatement, forStatement, forLoopParts, forInitializerStatement, whileStatement, 35 | doStatement, constantConstructorSignature, switchStatement, switchCase, defaultCase, tryStatement, onPart, catchPart, finallyPart, 36 | returnStatement, label, breakStatement, continueStatement, assertStatement, topLevelDefinition, getOrSet, libraryDefinition, 37 | libraryName, importOrExport, libraryImport, combinator, identifierList, libraryExport, partDirective, partHeader, partDeclaration, 38 | scriptDefinition, scriptTag, uri, type, typeName, typeArguments, typeList, typeAlias, typeAliasBody, functionTypeAlias, 39 | functionPrefix, LETTER, DIGIT, WHITESPACE, SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT, ASSERT, BREAK, CASE, CATCH, CLASS, CONST, 40 | CONTINUE, DEFAULT, DO, ELSE, EXTENDS, FALSE, FINAL, FINALLY, FOR, IF, IN, IS, NEW, NULL, RETHROW, RETURN, SUPER, SWITCH, THIS, THROW, 41 | TRUE, TRY, VAR, VOID, WHILE, WITH, backSlash, colon, comma, dollar, dot, 42 | doubleQuote, equalSign, lparen, minus, plus, rparen, semicolon, singleQuote, 43 | tripleDoubleQuote, tripleSingleQuote, ABSTRACT, AS, DYNAMIC, EXPORT, 44 | EXTERNAL, FACTORY, GET, IMPLEMENTS, IMPORT, LIBRARY, OPERATOR, PART, SET, STATIC, TYPEDEF, HIDE, SHOW, OF, ON; 45 | 46 | DartGrammar() { 47 | //lexemes 48 | 49 | backSlash = token('\\'); 50 | colon = token(':'); 51 | comma = token(','); 52 | dollar = token('\$'); 53 | dot = token('.'); 54 | doubleQuote = token('"'); 55 | equalSign = token('='); 56 | lparen = token('('); 57 | minus = token('-'); 58 | plus = token('+'); 59 | rparen = token(')'); 60 | semicolon = token(';'); 61 | singleQuote = token("'"); 62 | tripleDoubleQuote = token('"""'); 63 | tripleSingleQuote = token("'''"); 64 | 65 | DIGIT = charBetween('0', '9'); 66 | LETTER = new PredicateTokenParser((c) => isLetter(c), 'letter expected'); 67 | HEX_DIGIT = 68 | charBetween('a','f') 69 | | charBetween('A','F') 70 | | DIGIT 71 | ; 72 | 73 | HEX_NUMBER = 74 | token('0x') & HEX_DIGIT.plus 75 | | token('0X') & HEX_DIGIT.plus 76 | ; 77 | 78 | EXPONENT = 79 | (token('e') | token('E')) & (plus | minus).opt & DIGIT.plus 80 | ; 81 | 82 | NUMBER = 83 | DIGIT.plus & (dot & DIGIT.plus).opt & EXPONENT.opt 84 | | dot & DIGIT.plus & EXPONENT.opt 85 | ; 86 | 87 | HEX_DIGIT_SEQUENCE = 88 | HEX_DIGIT & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt; 89 | 90 | ESCAPE_SEQUENCE = 91 | token('\n') 92 | | token('\r') 93 | | token('\f') 94 | | token('\b') 95 | | token('\t') 96 | | token('\v') 97 | | token('\\x') & HEX_DIGIT & HEX_DIGIT 98 | | token('\\u') & HEX_DIGIT & HEX_DIGIT & HEX_DIGIT & HEX_DIGIT 99 | | token('\\u{') & HEX_DIGIT_SEQUENCE & token('}') 100 | ; 101 | 102 | 103 | 104 | 105 | 106 | 107 | //keywords 108 | ASSERT = token('assert'); 109 | BREAK = token('break'); 110 | CASE = token('case'); 111 | CATCH = token('catch'); 112 | CLASS = token('class'); 113 | CONST = token('const'); 114 | CONTINUE = token('continue'); 115 | DEFAULT = token('default'); 116 | DO = token('do'); 117 | ELSE = token('else'); 118 | EXTENDS = token('extends'); 119 | FALSE = token('false'); 120 | FINAL = token('final'); 121 | FINALLY = token('finally'); 122 | FOR = token('for'); 123 | IF = token('if'); 124 | IN = token('in'); 125 | IS = token('is'); 126 | NEW = token('new'); 127 | NULL = token('null'); 128 | RETHROW = token('rethrow'); 129 | RETURN = token('return'); 130 | SUPER = token('super'); 131 | SWITCH = token('switch'); 132 | THIS = token('this'); 133 | THROW = token('throw'); 134 | TRUE = token('true'); 135 | TRY = token('try'); 136 | VAR = token('var'); 137 | VOID = token('void'); 138 | WHILE = token('while'); 139 | WITH = token('with'); 140 | 141 | // built-in identifiers 142 | 143 | ABSTRACT = token('abstract'); 144 | AS = token('as'); 145 | DYNAMIC = token('dynamic'); 146 | EXPORT = token('export'); 147 | EXTERNAL = token('external'); 148 | FACTORY = token('factory'); 149 | GET = token('get'); 150 | IMPLEMENTS = token('implements'); 151 | IMPORT = token('import'); 152 | LIBRARY = token('library'); 153 | OPERATOR = token('operator'); 154 | PART = token('part'); 155 | SET = token('set'); 156 | STATIC = token('static'); 157 | TYPEDEF = token('typedef'); 158 | 159 | // special cases 160 | HIDE = token('hide'); 161 | SHOW = token('show'); 162 | OF = token('of'); 163 | ON = token('on'); 164 | //syntax 165 | 166 | // types 167 | 168 | typeArguments = token('<') & typeList & token('>'); 169 | typeName = qualified; 170 | type = typeName & typeArguments.opt; 171 | typeList = type.starSeparatedBy(comma); 172 | functionPrefix = returnType.opt & identifier; 173 | functionTypeAlias = functionPrefix & typeParameters.opt & formalParameterList & semicolon; 174 | typeAliasBody = identifier & typeParameters.opt & equalSign & ABSTRACT.opt & mixinApplication | functionTypeAlias; 175 | typeAlias = metadata & TYPEDEF & typeAliasBody; 176 | 177 | // declarations 178 | 179 | metadata = (token('@') & qualified & (dot & identifier).opt & arguments.opt).star; 180 | 181 | typeParameter = metadata & identifier & (EXTENDS & type).opt; 182 | typeParameters = token('<') & typeParameter.plusSeparatedBy(comma) & token('>'); 183 | 184 | returnType = 185 | VOID 186 | | type 187 | ; 188 | 189 | varOrType = 190 | VAR 191 | | type 192 | ; 193 | 194 | finalConstVarOrType = 195 | FINAL & type.opt 196 | | CONST & type.opt 197 | | varOrType 198 | ; 199 | 200 | declaredIdentifier = metadata & finalConstVarOrType & identifier; 201 | variableDeclaration = declaredIdentifier.plusSeparatedBy(comma); 202 | initializedIdentifier = identifier & (equalSign & expression).opt; 203 | initializedVariableDeclaration = declaredIdentifier & (equalSign & expression).opt & (comma & initializedIdentifier).star; 204 | initializedIdentifierList = initializedIdentifier.plusSeparatedBy(comma); 205 | 206 | 207 | fieldFormalParameter = metadata & finalConstVarOrType.opt & THIS & dot & identifier; 208 | 209 | simpleFormalParameter = 210 | declaredIdentifier 211 | | metadata & identifier 212 | ; 213 | 214 | normalFormalParameter = 215 | functionSignature 216 | | fieldFormalParameter 217 | | simpleFormalParameter 218 | ; 219 | 220 | normalFormalParameters = normalFormalParameter.plusSeparatedBy(comma); 221 | defaultFormalParameter = normalFormalParameter & (equalSign & expression).opt; 222 | defaultNamedParameter = normalFormalParameter & (colon & expression).opt; 223 | optionalPositionalFormalParameters = token('[') & defaultFormalParameter.plusSeparatedBy(comma) & token(']'); 224 | 225 | optionalFormalParameters = 226 | optionalPositionalFormalParameters | 227 | namedFormalParameters 228 | ; 229 | 230 | formalParameterList = 231 | lparen & rparen 232 | | lparen & normalFormalParameters & (comma & optionalFormalParameters).opt & rparen 233 | | lparen & optionalFormalParameters & rparen 234 | ; 235 | 236 | namedFormalParameters = token('{') & defaultNamedParameter.plusSeparatedBy(comma) & token('}'); 237 | 238 | functionSignature = metadata & returnType.opt & identifier & formalParameterList; 239 | block = token('{') & statements & token('}'); 240 | 241 | functionBody = 242 | token('=>') & expression & semicolon 243 | | block 244 | ; 245 | 246 | 247 | interfaces = IMPLEMENTS & typeList; 248 | superclass = EXTENDS & type; 249 | ; 250 | 251 | constantConstructorSignature = CONST & qualified & formalParameterList; 252 | redirectingFactoryConstructorSignature = 253 | CONST.opt & FACTORY & identifier & (dot & identifier).opt & formalParameterList & equalSign & type & (dot & identifier).opt 254 | ; 255 | factoryConstructorSignature = 256 | FACTORY & identifier & (dot & identifier).opt & formalParameterList 257 | ; 258 | 259 | 260 | fieldInitializer = (THIS & dot).opt & identifier & equalSign & conditionalExpression & cascadeSection.star; 261 | superCallOrFieldInitializer = 262 | SUPER & arguments 263 | | SUPER & dot & identifier & arguments 264 | | fieldInitializer 265 | ; 266 | 267 | initializers = colon & superCallOrFieldInitializer.plusSeparatedBy(comma); 268 | redirection = colon & THIS & (dot & identifier).opt & arguments; 269 | constructorSignature = identifier & (dot & identifier).opt & formalParameterList; 270 | setterSignature = returnType.opt & SET & identifier & formalParameterList; 271 | getterSignature = type.opt & GET & identifier; 272 | 273 | binaryOperator = 274 | multiplicativeOperator 275 | | additiveOperator 276 | | shiftOperator 277 | | relationalOperator 278 | | token('==') 279 | | bitwiseOperator 280 | ; 281 | 282 | operator = 283 | token('~') 284 | | binaryOperator 285 | | token('[') & token(']') 286 | | token('[') & token(']') & equalSign 287 | ; 288 | 289 | operatorSignature = returnType.opt & OPERATOR & operator & formalParameterList; 290 | 291 | mixins = WITH & typeList; 292 | 293 | methodSignature = 294 | constructorSignature & initializers.opt 295 | | factoryConstructorSignature 296 | | STATIC.opt & functionSignature 297 | | STATIC.opt & getterSignature 298 | | STATIC.opt & setterSignature 299 | | operatorSignature 300 | ; 301 | 302 | staticFinalDeclaration = identifier & equalSign & expression; 303 | staticFinalDeclarationList = staticFinalDeclaration.plusSeparatedBy(comma); 304 | 305 | declaration = 306 | constantConstructorSignature & (redirection | initializers).opt 307 | | constructorSignature & (redirection | initializers).opt 308 | | EXTERNAL & constantConstructorSignature 309 | | EXTERNAL & constructorSignature 310 | | EXTERNAL & factoryConstructorSignature 311 | | (EXTERNAL & STATIC.opt).opt & getterSignature 312 | | (EXTERNAL & STATIC.opt).opt & setterSignature 313 | | EXTERNAL.opt & operatorSignature 314 | | (EXTERNAL & STATIC.opt).opt & functionSignature 315 | | getterSignature 316 | | setterSignature 317 | | operatorSignature 318 | | functionSignature 319 | | STATIC & (FINAL | CONST) & type.opt & staticFinalDeclarationList 320 | | CONST & type.opt & staticFinalDeclarationList 321 | | FINAL & type.opt & initializedIdentifierList 322 | | STATIC.opt & (VAR | type) & initializedIdentifierList 323 | ; 324 | 325 | 326 | 327 | classMemberDefinition = 328 | declaration & semicolon 329 | | methodSignature & functionBody 330 | ; 331 | 332 | classDefinition = 333 | metadata & ABSTRACT.opt & CLASS & identifier & typeParameters.opt & (superclass & mixins.opt).opt & interfaces.opt 334 | & token('{') & (metadata & classMemberDefinition).star & token('}') 335 | ; 336 | 337 | mixinApplication = type & mixins & interfaces.opt; 338 | 339 | 340 | 341 | 342 | namedArgument = label & expression; 343 | argumentList = 344 | namedArgument.plusSeparatedBy(comma) 345 | | expressionList.plusSeparatedBy(comma) 346 | ; 347 | 348 | arguments = lparen & argumentList.opt & rparen; 349 | 350 | 351 | 352 | 353 | 354 | 355 | // expressions 356 | 357 | IDENTIFIER_START_NO_DOLLAR = 358 | LETTER 359 | | token('_') 360 | ; 361 | 362 | IDENTIFIER_START = 363 | IDENTIFIER_START_NO_DOLLAR 364 | | dollar 365 | ; 366 | 367 | IDENTIFIER_PART_NO_DOLLAR = 368 | IDENTIFIER_START_NO_DOLLAR 369 | | DIGIT 370 | ; 371 | 372 | 373 | IDENTIFIER_PART = 374 | IDENTIFIER_START 375 | | DIGIT 376 | ; 377 | 378 | IDENTIFIER_NO_DOLLAR = IDENTIFIER_START_NO_DOLLAR & IDENTIFIER_PART_NO_DOLLAR.star; 379 | 380 | IDENTIFIER = IDENTIFIER_START & IDENTIFIER_PART.star; 381 | 382 | identifier = tokenFor(IDENTIFIER); 383 | qualified = identifier.plusSeparatedBy(dot); 384 | 385 | assignableSelector = 386 | token('[') & expression & token(']') 387 | | dot & identifier 388 | ; 389 | 390 | assignableExpression = 391 | primary & (arguments.star & assignableSelector).plus 392 | | SUPER & assignableSelector 393 | | identifier 394 | ; 395 | 396 | incrementOperator = token('++') | token('--'); 397 | selector = assignableSelector | arguments; 398 | postfixOperator = incrementOperator; 399 | 400 | postfixExpression = 401 | assignableExpression & postfixOperator 402 | | primary & selector.star 403 | ; 404 | 405 | unaryOperator = token('!') | token('~'); 406 | prefixOperator = minus | unaryOperator; 407 | unaryExpression = 408 | prefixOperator & unaryExpression 409 | | postfixExpression 410 | | prefixOperator & SUPER 411 | | incrementOperator & assignableExpression 412 | ; 413 | 414 | multiplicativeOperator = token('*') | token('/') | token('%') | token('~/'); 415 | multiplicativeExpression = 416 | unaryExpression & (multiplicativeOperator & unaryExpression).star 417 | | SUPER & (multiplicativeOperator & unaryExpression).plus 418 | ; 419 | 420 | additiveOperator = plus | minus; 421 | 422 | additiveExpression = 423 | multiplicativeExpression & (additiveOperator & multiplicativeExpression).star 424 | | SUPER & (additiveOperator & multiplicativeExpression).plus 425 | ; 426 | 427 | shiftOperator = token('<<') | token('>>'); 428 | shiftExpression = 429 | additiveExpression & (shiftOperator & additiveExpression).star 430 | | SUPER & (shiftOperator & additiveExpression).plus 431 | ; 432 | 433 | relationalOperator = token('<') | token('>') | token('<=') | token('>='); 434 | relationalExpression = 435 | shiftExpression & (typeTest | typeCast | relationalOperator & shiftExpression).opt 436 | | SUPER & relationalOperator & shiftExpression 437 | ; 438 | 439 | equalityOperator = token('==') | token('!='); 440 | equalityExpression = 441 | relationalExpression & (equalityOperator & relationalExpression).opt 442 | | SUPER & equalityOperator & relationalExpression 443 | ; 444 | 445 | bitwiseOperator = token('|') | token('&') | token('^'); 446 | bitwiseAndExpression = 447 | equalityExpression & (token('&') & equalityExpression).star 448 | | SUPER & (token('&') & equalityExpression).plus 449 | ; 450 | bitwiseXorExpression = 451 | bitwiseAndExpression & (token('^') & bitwiseAndExpression).star 452 | | SUPER & (token('^') & bitwiseAndExpression).plus 453 | ; 454 | bitwiseOrExpression = 455 | bitwiseXorExpression & (token('|') & bitwiseXorExpression).star 456 | | SUPER & (token('|') & bitwiseXorExpression).plus 457 | ; 458 | 459 | logicalAndExpression = bitwiseOrExpression & (token('&&') & bitwiseOrExpression).star; 460 | logicalOrExpression = logicalAndExpression & (token('||') & logicalAndExpression).star; 461 | 462 | conditionalExpression = logicalOrExpression & (token('?') & expressionWithoutCascade & colon & expressionWithoutCascade).opt; 463 | 464 | compoundAssignmentOperator = 465 | token('*=') 466 | | token('/=') 467 | | token('~/=') 468 | | token('%=') 469 | | token('+=') 470 | | token('-=') 471 | | token('<<=') 472 | | token('>>=') 473 | | token('&=') 474 | | token('^=') 475 | | token('|=') 476 | ; 477 | assignmentOperator = equalSign | compoundAssignmentOperator; 478 | 479 | cascadeSelector = 480 | token('[') & expression & token(']') 481 | | identifier 482 | ; 483 | cascadeSection = 484 | token('..') & 485 | (cascadeSelector & arguments.star) & 486 | (assignableSelector & arguments.star).star & 487 | (assignmentOperator & expressionWithoutCascade).opt 488 | ; 489 | 490 | isOperator = IS & token('!').opt; 491 | typeTest = isOperator & type; 492 | typeCast = AS & type; 493 | argumentDefinitionTest = token('?') & identifier; 494 | 495 | constObjectExpression = CONST & type & (dot & identifier).opt & arguments; 496 | newExpression = NEW & type & (dot & identifier).opt & arguments; 497 | 498 | thisExpression = THIS; 499 | 500 | functionExpressionBody = 501 | token('=>') & expression 502 | | block 503 | ; 504 | functionExpression = formalParameterList & functionExpressionBody; 505 | 506 | throwExpression = THROW & expression; 507 | throwExpressionWithoutCascade = THROW & expressionWithoutCascade; 508 | 509 | mapLiteralEntry = stringLiteral & colon & expression; 510 | mapLiteral = 511 | CONST.opt & 512 | typeArguments.opt & 513 | token('{') & 514 | (mapLiteralEntry & (dot & mapLiteralEntry).star & comma.opt).opt & 515 | token('}'); 516 | 517 | listLiteral = 518 | CONST.opt & typeArguments.opt & token('[') & (expressionList & comma.opt).opt & token(']'); 519 | 520 | stringInterpolation = dollar & IDENTIFIER_NO_DOLLAR | 521 | dollar & token('{') & expression & token('}'); 522 | NEWLINE = token('\\n') | token('\r'); 523 | stringContentDQ = (backSlash | doubleQuote | dollar | NEWLINE).not | 524 | backSlash & NEWLINE.not | 525 | stringInterpolation; 526 | stringContentSQ = (backSlash | singleQuote | dollar | NEWLINE).not | 527 | backSlash & NEWLINE.not | 528 | stringInterpolation; 529 | stringContentTDQ = (backSlash | tripleDoubleQuote | dollar | NEWLINE).not | 530 | backSlash & NEWLINE.not | 531 | stringInterpolation; 532 | stringContentTSQ = (backSlash | tripleSingleQuote | dollar | NEWLINE).not | 533 | backSlash & NEWLINE.not | 534 | stringInterpolation; 535 | 536 | multilineString = 537 | tripleDoubleQuote & stringContentTDQ.star & tripleDoubleQuote 538 | | tripleSingleQuote & stringContentTSQ.star & tripleSingleQuote 539 | | token('r') & tripleDoubleQuote & doubleQuote.not.star & tripleDoubleQuote 540 | | token('r') & tripleSingleQuote & singleQuote.not.star & tripleSingleQuote 541 | ; 542 | 543 | singleLineString = 544 | doubleQuote & stringContentDQ.star & doubleQuote 545 | | singleQuote & stringContentSQ.star & singleQuote 546 | | token('r') & doubleQuote & ( doubleQuote | NEWLINE ).not.star & doubleQuote 547 | | token('r') & singleQuote & ( singleQuote | NEWLINE ).not.star & singleQuote 548 | ; 549 | 550 | 551 | stringLiteral = 552 | multilineString.plus 553 | | singleLineString.plus 554 | ; 555 | 556 | numericLiteral = 557 | NUMBER 558 | | HEX_NUMBER 559 | ; 560 | 561 | booleanLiteral = 562 | TRUE 563 | | FALSE 564 | ; 565 | 566 | nullLiteral = NULL; 567 | 568 | literal = 569 | nullLiteral 570 | | booleanLiteral 571 | | numericLiteral 572 | | stringLiteral 573 | | mapLiteral 574 | | listLiteral 575 | ; 576 | 577 | expression = 578 | assignableExpression & assignmentOperator & expression 579 | | conditionalExpression & cascadeSection.star 580 | | throwExpression 581 | ; 582 | 583 | 584 | 585 | expressionWithoutCascade = 586 | assignableExpression & assignmentOperator & expressionWithoutCascade 587 | | conditionalExpression 588 | | throwExpressionWithoutCascade 589 | ; 590 | 591 | expressionList = expression.plusSeparatedBy(comma); 592 | 593 | 594 | primary = 595 | thisExpression 596 | | SUPER & assignableSelector 597 | | functionExpression 598 | | literal 599 | | identifier 600 | | newExpression 601 | | constObjectExpression 602 | | lparen & expression & rparen 603 | | argumentDefinitionTest 604 | ; 605 | // statements 606 | 607 | assertStatement = ASSERT & lparen & conditionalExpression & rparen & semicolon; 608 | continueStatement = CONTINUE & identifier.opt & semicolon; 609 | breakStatement = BREAK & identifier.opt & semicolon; 610 | label = identifier & colon; 611 | returnStatement = RETURN & expression.opt & semicolon; 612 | 613 | finallyPart = FINALLY & block; 614 | catchPart = CATCH & lparen & identifier & (comma & identifier).opt & rparen; 615 | onPart = 616 | catchPart & block 617 | | ON & type & catchPart.opt & block 618 | ; 619 | 620 | tryStatement = TRY & block & (onPart.plus & finallyPart.opt | finallyPart); 621 | 622 | defaultCase = label.star & DEFAULT & colon & statements; 623 | switchCase = label.star & (CASE & expression & colon) & statements; 624 | switchStatement = SWITCH & lparen & expression & rparen & token('{') & switchCase.star & defaultCase.opt & token('}'); 625 | doStatement = DO & statement & WHILE & lparen & expression & rparen & semicolon; 626 | whileStatement = WHILE & lparen & expression & rparen & statement; 627 | 628 | forInitializerStatement = 629 | localVariableDeclaration & semicolon 630 | | expression.opt & semicolon 631 | ; 632 | 633 | forLoopParts = 634 | forInitializerStatement & expression.opt & semicolon & expressionList.opt 635 | | declaredIdentifier & IN & expression 636 | | identifier & IN & expression 637 | ; 638 | 639 | forStatement = FOR & lparen & forLoopParts & rparen & statement; 640 | ifStatement = IF & lparen & expression & rparen & statement & (ELSE & statement).opt; 641 | rethrowStatement = RETHROW; 642 | localFunctionDeclaration = functionSignature & functionBody; 643 | localVariableDeclaration = initializedVariableDeclaration & semicolon; 644 | expressionStatement = expression.opt & semicolon; 645 | 646 | nonLabelledStatement = 647 | block 648 | | localVariableDeclaration & semicolon 649 | | forStatement 650 | | whileStatement 651 | | doStatement 652 | | switchStatement 653 | | ifStatement 654 | | rethrowStatement 655 | | tryStatement 656 | | breakStatement 657 | | continueStatement 658 | | returnStatement 659 | | expressionStatement 660 | | assertStatement 661 | | localFunctionDeclaration; 662 | 663 | 664 | statement = label.star & nonLabelledStatement; 665 | statements = statement.star; 666 | 667 | // libraries and scripts 668 | 669 | uri = stringLiteral; 670 | getOrSet = GET | SET; 671 | 672 | topLevelDefinition = 673 | classDefinition 674 | | mixinApplication 675 | | typeAlias 676 | | EXTERNAL & functionSignature 677 | | EXTERNAL & getterSignature 678 | | EXTERNAL & setterSignature 679 | | functionSignature & functionBody 680 | | returnType.opt & getOrSet & identifier & formalParameterList & functionBody 681 | | (FINAL | CONST) & type.opt & staticFinalDeclarationList & semicolon 682 | | variableDeclaration & semicolon 683 | ; 684 | 685 | identifierList = identifier.starSeparatedBy(comma); 686 | combinator = 687 | SHOW & identifierList 688 | | HIDE & identifierList; 689 | 690 | 691 | libraryImport = metadata & IMPORT & (AS & identifier).opt & combinator.star & semicolon; 692 | libraryExport = metadata & EXPORT & uri & combinator.star & semicolon; 693 | importOrExport = libraryImport | libraryExport; 694 | 695 | libraryName = metadata & LIBRARY & identifier.plusSeparatedBy(dot) & semicolon; 696 | 697 | partDirective = metadata & PART & stringLiteral & semicolon; 698 | partHeader = metadata & PART & OF & identifier.plusSeparatedBy(dot) & semicolon; 699 | partDeclaration = partHeader & topLevelDefinition.star & eoi; 700 | 701 | libraryDefinition = libraryName.opt & importOrExport.star & partDirective.star & topLevelDefinition.star; 702 | 703 | scriptTag = token('#!') & NEWLINE.not.star & NEWLINE; 704 | scriptDefinition = scriptTag.opt & libraryDefinition & eoi; 705 | } 706 | 707 | bool isLetter(c) { 708 | int code = c.codeUnitAt(0); 709 | return (code >= 97 && code <= 122) || (code >= 65 && code <= 90); 710 | } 711 | } 712 | -------------------------------------------------------------------------------- /dart_runnable_grammar.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the ''License''); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | 10 | library dart_grammar; 11 | import 'CombinatorialParsing.dart'; 12 | 13 | 14 | class DartGrammar extends RunnableGrammar { 15 | 16 | DartGrammar() { 17 | //lexemes 18 | 19 | backSlash = token('\\'); 20 | colon = token(':'); 21 | comma = token(','); 22 | dollar = token('\$'); 23 | dot = token('.'); 24 | doubleQuote = token('"'); 25 | equalSign = token('='); 26 | lparen = token('('); 27 | minus = token('-'); 28 | plus = token('+'); 29 | rparen = token(')'); 30 | semicolon = token(';'); 31 | singleQuote = token("'"); 32 | tripleDoubleQuote = token('"""'); 33 | tripleSingleQuote = token("'''"); 34 | 35 | DIGIT = charBetween('0', '9'); 36 | LETTER = new PredicateTokenParser((c) => isLetter(c), 'letter expected'); 37 | HEX_DIGIT = 38 | charBetween('a','f') 39 | | charBetween('A','F') 40 | | DIGIT 41 | ; 42 | 43 | HEX_NUMBER = 44 | token('0x') & HEX_DIGIT.plus 45 | | token('0X') & HEX_DIGIT.plus 46 | ; 47 | 48 | EXPONENT = 49 | (token('e') | token('E')) & (plus | minus).opt & DIGIT.plus 50 | ; 51 | 52 | NUMBER = 53 | DIGIT.plus & (dot & DIGIT.plus).opt & EXPONENT.opt 54 | | dot & DIGIT.plus & EXPONENT.opt 55 | ; 56 | 57 | HEX_DIGIT_SEQUENCE = 58 | HEX_DIGIT & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt & HEX_DIGIT.opt; 59 | 60 | ESCAPE_SEQUENCE = 61 | token('\n') 62 | | token('\r') 63 | | token('\f') 64 | | token('\b') 65 | | token('\t') 66 | | token('\v') 67 | | token('\\x') & HEX_DIGIT & HEX_DIGIT 68 | | token('\\u') & HEX_DIGIT & HEX_DIGIT & HEX_DIGIT & HEX_DIGIT 69 | | token('\\u{') & HEX_DIGIT_SEQUENCE & token('}') 70 | ; 71 | 72 | 73 | 74 | 75 | 76 | 77 | //keywords 78 | ASSERT = token('assert'); 79 | BREAK = token('break'); 80 | CASE = token('case'); 81 | CATCH = token('catch'); 82 | CLASS = token('class'); 83 | CONST = token('const'); 84 | CONTINUE = token('continue'); 85 | DEFAULT = token('default'); 86 | DO = token('do'); 87 | ELSE = token('else'); 88 | EXTENDS = token('extends'); 89 | FALSE = token('false'); 90 | FINAL = token('final'); 91 | FINALLY = token('finally'); 92 | FOR = token('for'); 93 | IF = token('if'); 94 | IN = token('in'); 95 | IS = token('is'); 96 | NEW = token('new'); 97 | NULL = token('null'); 98 | RETHROW = token('rethrow'); 99 | RETURN = token('return'); 100 | SUPER = token('super'); 101 | SWITCH = token('switch'); 102 | THIS = token('this'); 103 | THROW = token('throw'); 104 | TRUE = token('true'); 105 | TRY = token('try'); 106 | VAR = token('var'); 107 | VOID = token('void'); 108 | WHILE = token('while'); 109 | WITH = token('with'); 110 | 111 | // built-in identifiers 112 | 113 | ABSTRACT = token('abstract'); 114 | AS = token('as'); 115 | DYNAMIC = token('dynamic'); 116 | EXPORT = token('export'); 117 | EXTERNAL = token('external'); 118 | FACTORY = token('factory'); 119 | GET = token('get'); 120 | IMPLEMENTS = token('implements'); 121 | IMPORT = token('import'); 122 | LIBRARY = token('library'); 123 | OPERATOR = token('operator'); 124 | PART = token('part'); 125 | SET = token('set'); 126 | STATIC = token('static'); 127 | TYPEDEF = token('typedef'); 128 | 129 | // special cases 130 | HIDE = token('hide'); 131 | SHOW = token('show'); 132 | OF = token('of'); 133 | ON = token('on'); 134 | //syntax 135 | 136 | // types 137 | 138 | typeArguments = token('<') & typeList & token('>'); 139 | typeName = qualified; 140 | type = typeName & typeArguments.opt; 141 | typeList = type.starSeparatedBy(comma); 142 | functionPrefix = returnType.opt & identifier; 143 | functionTypeAlias = functionPrefix & typeParameters.opt & formalParameterList & semicolon; 144 | typeAliasBody = identifier & typeParameters.opt & equalSign & ABSTRACT.opt & mixinApplication | functionTypeAlias; 145 | typeAlias = metadata & TYPEDEF & typeAliasBody; 146 | 147 | // declarations 148 | 149 | metadata = (token('@') & qualified & (dot & identifier).opt & arguments.opt).star; 150 | 151 | typeParameter = metadata & identifier & (EXTENDS & type).opt; 152 | typeParameters = token('<') & typeParameter.plusSeparatedBy(comma) & token('>'); 153 | 154 | returnType = 155 | VOID 156 | | type 157 | ; 158 | 159 | varOrType = 160 | VAR 161 | | type 162 | ; 163 | 164 | finalConstVarOrType = 165 | FINAL & type.opt 166 | | CONST & type.opt 167 | | varOrType 168 | ; 169 | 170 | declaredIdentifier = metadata & finalConstVarOrType & identifier; 171 | variableDeclaration = declaredIdentifier.plusSeparatedBy(comma); 172 | initializedIdentifier = identifier & (equalSign & expression).opt; 173 | initializedVariableDeclaration = declaredIdentifier & (equalSign & expression).opt & (comma & initializedIdentifier).star; 174 | initializedIdentifierList = initializedIdentifier.plusSeparatedBy(comma); 175 | 176 | 177 | fieldFormalParameter = metadata & finalConstVarOrType.opt & THIS & dot & identifier; 178 | 179 | simpleFormalParameter = 180 | declaredIdentifier 181 | | metadata & identifier 182 | ; 183 | 184 | normalFormalParameter = 185 | functionSignature 186 | | fieldFormalParameter 187 | | simpleFormalParameter 188 | ; 189 | 190 | normalFormalParameters = normalFormalParameter.plusSeparatedBy(comma); 191 | defaultFormalParameter = normalFormalParameter & (equalSign & expression).opt; 192 | defaultNamedParameter = normalFormalParameter & (colon & expression).opt; 193 | optionalPositionalFormalParameters = token('[') & defaultFormalParameter.plusSeparatedBy(comma) & token(']'); 194 | 195 | optionalFormalParameters = 196 | optionalPositionalFormalParameters | 197 | namedFormalParameters 198 | ; 199 | 200 | formalParameterList = 201 | lparen & rparen 202 | | lparen & normalFormalParameters & (comma & optionalFormalParameters).opt & rparen 203 | | lparen & optionalFormalParameters & rparen 204 | ; 205 | 206 | namedFormalParameters = token('{') & defaultNamedParameter.plusSeparatedBy(comma) & token('}'); 207 | 208 | functionSignature = metadata & returnType.opt & identifier & formalParameterList; 209 | block = token('{') & statements & token('}'); 210 | 211 | functionBody = 212 | token('=>') & expression & semicolon 213 | | block 214 | ; 215 | 216 | 217 | interfaces = IMPLEMENTS & typeList; 218 | superclass = EXTENDS & type; 219 | ; 220 | 221 | constantConstructorSignature = CONST & qualified & formalParameterList; 222 | redirectingFactoryConstructorSignature = 223 | CONST.opt & FACTORY & identifier & (dot & identifier).opt & formalParameterList & equalSign & type & (dot & identifier).opt 224 | ; 225 | factoryConstructorSignature = 226 | FACTORY & identifier & (dot & identifier).opt & formalParameterList 227 | ; 228 | 229 | 230 | fieldInitializer = (THIS & dot).opt & identifier & equalSign & conditionalExpression & cascadeSection.star; 231 | superCallOrFieldInitializer = 232 | SUPER & arguments 233 | | SUPER & dot & identifier & arguments 234 | | fieldInitializer 235 | ; 236 | 237 | initializers = colon & superCallOrFieldInitializer.plusSeparatedBy(comma); 238 | redirection = colon & THIS & (dot & identifier).opt & arguments; 239 | constructorSignature = identifier & (dot & identifier).opt & formalParameterList; 240 | setterSignature = returnType.opt & SET & identifier & formalParameterList; 241 | getterSignature = type.opt & GET & identifier; 242 | 243 | binaryOperator = 244 | multiplicativeOperator 245 | | additiveOperator 246 | | shiftOperator 247 | | relationalOperator 248 | | token('==') 249 | | bitwiseOperator 250 | ; 251 | 252 | operator = 253 | token('~') 254 | | binaryOperator 255 | | token('[') & token(']') 256 | | token('[') & token(']') & equalSign 257 | ; 258 | 259 | operatorSignature = returnType.opt & OPERATOR & operator & formalParameterList; 260 | 261 | mixins = WITH & typeList; 262 | 263 | methodSignature = 264 | constructorSignature & initializers.opt 265 | | factoryConstructorSignature 266 | | STATIC.opt & functionSignature 267 | | STATIC.opt & getterSignature 268 | | STATIC.opt & setterSignature 269 | | operatorSignature 270 | ; 271 | 272 | staticFinalDeclaration = identifier & equalSign & expression; 273 | staticFinalDeclarationList = staticFinalDeclaration.plusSeparatedBy(comma); 274 | 275 | declaration = 276 | constantConstructorSignature & (redirection | initializers).opt 277 | | constructorSignature & (redirection | initializers).opt 278 | | EXTERNAL & constantConstructorSignature 279 | | EXTERNAL & constructorSignature 280 | | EXTERNAL & factoryConstructorSignature 281 | | (EXTERNAL & STATIC.opt).opt & getterSignature 282 | | (EXTERNAL & STATIC.opt).opt & setterSignature 283 | | EXTERNAL.opt & operatorSignature 284 | | (EXTERNAL & STATIC.opt).opt & functionSignature 285 | | getterSignature 286 | | setterSignature 287 | | operatorSignature 288 | | functionSignature 289 | | STATIC & (FINAL | CONST) & type.opt & staticFinalDeclarationList 290 | | CONST & type.opt & staticFinalDeclarationList 291 | | FINAL & type.opt & initializedIdentifierList 292 | | STATIC.opt & (VAR | type) & initializedIdentifierList 293 | ; 294 | 295 | 296 | 297 | classMemberDefinition = 298 | declaration & semicolon 299 | | methodSignature & functionBody 300 | ; 301 | 302 | classDefinition = 303 | metadata & ABSTRACT.opt & CLASS & identifier & typeParameters.opt & (superclass & mixins.opt).opt & interfaces.opt 304 | & token('{') & (metadata & classMemberDefinition).star & token('}') 305 | ; 306 | 307 | mixinApplication = type & mixins & interfaces.opt; 308 | 309 | 310 | 311 | 312 | namedArgument = label & expression; 313 | argumentList = 314 | namedArgument.plusSeparatedBy(comma) 315 | | expressionList.plusSeparatedBy(comma) 316 | ; 317 | 318 | arguments = lparen & argumentList.opt & rparen; 319 | 320 | 321 | 322 | 323 | 324 | 325 | // expressions 326 | 327 | IDENTIFIER_START_NO_DOLLAR = 328 | LETTER 329 | | token('_') 330 | ; 331 | 332 | IDENTIFIER_START = 333 | IDENTIFIER_START_NO_DOLLAR 334 | | dollar 335 | ; 336 | 337 | IDENTIFIER_PART_NO_DOLLAR = 338 | IDENTIFIER_START_NO_DOLLAR 339 | | DIGIT 340 | ; 341 | 342 | 343 | IDENTIFIER_PART = 344 | IDENTIFIER_START 345 | | DIGIT 346 | ; 347 | 348 | IDENTIFIER_NO_DOLLAR = IDENTIFIER_START_NO_DOLLAR & IDENTIFIER_PART_NO_DOLLAR.star; 349 | 350 | IDENTIFIER = IDENTIFIER_START & IDENTIFIER_PART.star; 351 | 352 | identifier = tokenFor(IDENTIFIER); 353 | qualified = identifier.plusSeparatedBy(dot); 354 | 355 | assignableSelector = 356 | token('[') & expression & token(']') 357 | | dot & identifier 358 | ; 359 | 360 | assignableExpression = 361 | primary & (arguments.star & assignableSelector).plus 362 | | SUPER & assignableSelector 363 | | identifier 364 | ; 365 | 366 | incrementOperator = token('++') | token('--'); 367 | selector = assignableSelector | arguments; 368 | postfixOperator = incrementOperator; 369 | 370 | postfixExpression = 371 | assignableExpression & postfixOperator 372 | | primary & selector.star 373 | ; 374 | 375 | unaryOperator = token('!') | token('~'); 376 | prefixOperator = minus | unaryOperator; 377 | unaryExpression = 378 | prefixOperator & unaryExpression 379 | | postfixExpression 380 | | prefixOperator & SUPER 381 | | incrementOperator & assignableExpression 382 | ; 383 | 384 | multiplicativeOperator = token('*') | token('/') | token('%') | token('~/'); 385 | multiplicativeExpression = 386 | unaryExpression & (multiplicativeOperator & unaryExpression).star 387 | | SUPER & (multiplicativeOperator & unaryExpression).plus 388 | ; 389 | 390 | additiveOperator = plus | minus; 391 | 392 | additiveExpression = 393 | multiplicativeExpression & (additiveOperator & multiplicativeExpression).star 394 | | SUPER & (additiveOperator & multiplicativeExpression).plus 395 | ; 396 | 397 | shiftOperator = token('<<') | token('>>'); 398 | shiftExpression = 399 | additiveExpression & (shiftOperator & additiveExpression).star 400 | | SUPER & (shiftOperator & additiveExpression).plus 401 | ; 402 | 403 | relationalOperator = token('<') | token('>') | token('<=') | token('>='); 404 | relationalExpression = 405 | shiftExpression & (typeTest | typeCast | relationalOperator & shiftExpression).opt 406 | | SUPER & relationalOperator & shiftExpression 407 | ; 408 | 409 | equalityOperator = token('==') | token('!='); 410 | equalityExpression = 411 | relationalExpression & (equalityOperator & relationalExpression).opt 412 | | SUPER & equalityOperator & relationalExpression 413 | ; 414 | 415 | bitwiseOperator = token('|') | token('&') | token('^'); 416 | bitwiseAndExpression = 417 | equalityExpression & (token('&') & equalityExpression).star 418 | | SUPER & (token('&') & equalityExpression).plus 419 | ; 420 | bitwiseXorExpression = 421 | bitwiseAndExpression & (token('^') & bitwiseAndExpression).star 422 | | SUPER & (token('^') & bitwiseAndExpression).plus 423 | ; 424 | bitwiseOrExpression = 425 | bitwiseXorExpression & (token('|') & bitwiseXorExpression).star 426 | | SUPER & (token('|') & bitwiseXorExpression).plus 427 | ; 428 | 429 | logicalAndExpression = bitwiseOrExpression & (token('&&') & bitwiseOrExpression).star; 430 | logicalOrExpression = logicalAndExpression & (token('||') & logicalAndExpression).star; 431 | 432 | conditionalExpression = logicalOrExpression & (token('?') & expressionWithoutCascade & colon & expressionWithoutCascade).opt; 433 | 434 | compoundAssignmentOperator = 435 | token('*=') 436 | | token('/=') 437 | | token('~/=') 438 | | token('%=') 439 | | token('+=') 440 | | token('-=') 441 | | token('<<=') 442 | | token('>>=') 443 | | token('&=') 444 | | token('^=') 445 | | token('|=') 446 | ; 447 | assignmentOperator = equalSign | compoundAssignmentOperator; 448 | 449 | cascadeSelector = 450 | token('[') & expression & token(']') 451 | | identifier 452 | ; 453 | cascadeSection = 454 | token('..') & 455 | (cascadeSelector & arguments.star) & 456 | (assignableSelector & arguments.star).star & 457 | (assignmentOperator & expressionWithoutCascade).opt 458 | ; 459 | 460 | isOperator = IS & token('!').opt; 461 | typeTest = isOperator & type; 462 | typeCast = AS & type; 463 | argumentDefinitionTest = token('?') & identifier; 464 | 465 | constObjectExpression = CONST & type & (dot & identifier).opt & arguments; 466 | newExpression = NEW & type & (dot & identifier).opt & arguments; 467 | 468 | thisExpression = THIS; 469 | 470 | functionExpressionBody = 471 | token('=>') & expression 472 | | block 473 | ; 474 | functionExpression = formalParameterList & functionExpressionBody; 475 | 476 | throwExpression = THROW & expression; 477 | throwExpressionWithoutCascade = THROW & expressionWithoutCascade; 478 | 479 | mapLiteralEntry = stringLiteral & colon & expression; 480 | mapLiteral = 481 | CONST.opt & 482 | typeArguments.opt & 483 | token('{') & 484 | (mapLiteralEntry & (dot & mapLiteralEntry).star & comma.opt).opt & 485 | token('}'); 486 | 487 | listLiteral = 488 | CONST.opt & typeArguments.opt & token('[') & (expressionList & comma.opt).opt & token(']'); 489 | 490 | stringInterpolation = dollar & IDENTIFIER_NO_DOLLAR | 491 | dollar & token('{') & expression & token('}'); 492 | NEWLINE = token('\\n') | token('\r'); 493 | stringContentDQ = (backSlash | doubleQuote | dollar | NEWLINE).not | 494 | backSlash & NEWLINE.not | 495 | stringInterpolation; 496 | stringContentSQ = (backSlash | singleQuote | dollar | NEWLINE).not | 497 | backSlash & NEWLINE.not | 498 | stringInterpolation; 499 | stringContentTDQ = (backSlash | tripleDoubleQuote | dollar | NEWLINE).not | 500 | backSlash & NEWLINE.not | 501 | stringInterpolation; 502 | stringContentTSQ = (backSlash | tripleSingleQuote | dollar | NEWLINE).not | 503 | backSlash & NEWLINE.not | 504 | stringInterpolation; 505 | 506 | multilineString = 507 | tripleDoubleQuote & stringContentTDQ.star & tripleDoubleQuote 508 | | tripleSingleQuote & stringContentTSQ.star & tripleSingleQuote 509 | | token('r') & tripleDoubleQuote & doubleQuote.not.star & tripleDoubleQuote 510 | | token('r') & tripleSingleQuote & singleQuote.not.star & tripleSingleQuote 511 | ; 512 | 513 | singleLineString = 514 | doubleQuote & stringContentDQ.star & doubleQuote 515 | | singleQuote & stringContentSQ.star & singleQuote 516 | | token('r') & doubleQuote & ( doubleQuote | NEWLINE ).not.star & doubleQuote 517 | | token('r') & singleQuote & ( singleQuote | NEWLINE ).not.star & singleQuote 518 | ; 519 | 520 | 521 | stringLiteral = 522 | multilineString.plus 523 | | singleLineString.plus 524 | ; 525 | 526 | numericLiteral = 527 | NUMBER 528 | | HEX_NUMBER 529 | ; 530 | 531 | booleanLiteral = 532 | TRUE 533 | | FALSE 534 | ; 535 | 536 | nullLiteral = NULL; 537 | 538 | literal = 539 | nullLiteral 540 | | booleanLiteral 541 | | numericLiteral 542 | | stringLiteral 543 | | mapLiteral 544 | | listLiteral 545 | ; 546 | 547 | expression = 548 | assignableExpression & assignmentOperator & expression 549 | | conditionalExpression & cascadeSection.star 550 | | throwExpression 551 | ; 552 | 553 | 554 | 555 | expressionWithoutCascade = 556 | assignableExpression & assignmentOperator & expressionWithoutCascade 557 | | conditionalExpression 558 | | throwExpressionWithoutCascade 559 | ; 560 | 561 | expressionList = expression.plusSeparatedBy(comma); 562 | 563 | 564 | primary = 565 | thisExpression 566 | | SUPER & assignableSelector 567 | | functionExpression 568 | | literal 569 | | identifier 570 | | newExpression 571 | | constObjectExpression 572 | | lparen & expression & rparen 573 | | argumentDefinitionTest 574 | ; 575 | // statements 576 | 577 | assertStatement = ASSERT & lparen & conditionalExpression & rparen & semicolon; 578 | continueStatement = CONTINUE & identifier.opt & semicolon; 579 | breakStatement = BREAK & identifier.opt & semicolon; 580 | label = identifier & colon; 581 | returnStatement = RETURN & expression.opt & semicolon; 582 | 583 | finallyPart = FINALLY & block; 584 | catchPart = CATCH & lparen & identifier & (comma & identifier).opt & rparen; 585 | onPart = 586 | catchPart & block 587 | | ON & type & catchPart.opt & block 588 | ; 589 | 590 | tryStatement = TRY & block & (onPart.plus & finallyPart.opt | finallyPart); 591 | 592 | defaultCase = label.star & DEFAULT & colon & statements; 593 | switchCase = label.star & (CASE & expression & colon) & statements; 594 | switchStatement = SWITCH & lparen & expression & rparen & token('{') & switchCase.star & defaultCase.opt & token('}'); 595 | doStatement = DO & statement & WHILE & lparen & expression & rparen & semicolon; 596 | whileStatement = WHILE & lparen & expression & rparen & statement; 597 | 598 | forInitializerStatement = 599 | localVariableDeclaration & semicolon 600 | | expression.opt & semicolon 601 | ; 602 | 603 | forLoopParts = 604 | forInitializerStatement & expression.opt & semicolon & expressionList.opt 605 | | declaredIdentifier & IN & expression 606 | | identifier & IN & expression 607 | ; 608 | 609 | forStatement = FOR & lparen & forLoopParts & rparen & statement; 610 | ifStatement = IF & lparen & expression & rparen & statement & (ELSE & statement).opt; 611 | rethrowStatement = RETHROW; 612 | localFunctionDeclaration = functionSignature & functionBody; 613 | localVariableDeclaration = initializedVariableDeclaration & semicolon; 614 | expressionStatement = expression.opt & semicolon; 615 | 616 | nonLabelledStatement = 617 | block 618 | | localVariableDeclaration & semicolon 619 | | forStatement 620 | | whileStatement 621 | | doStatement 622 | | switchStatement 623 | | ifStatement 624 | | rethrowStatement 625 | | tryStatement 626 | | breakStatement 627 | | continueStatement 628 | | returnStatement 629 | | expressionStatement 630 | | assertStatement 631 | | localFunctionDeclaration; 632 | 633 | 634 | statement = label.star & nonLabelledStatement; 635 | statements = statement.star; 636 | 637 | // libraries and scripts 638 | 639 | uri = stringLiteral; 640 | getOrSet = GET | SET; 641 | 642 | topLevelDefinition = 643 | classDefinition 644 | | mixinApplication 645 | | typeAlias 646 | | EXTERNAL & functionSignature 647 | | EXTERNAL & getterSignature 648 | | EXTERNAL & setterSignature 649 | | functionSignature & functionBody 650 | | returnType.opt & getOrSet & identifier & formalParameterList & functionBody 651 | | (FINAL | CONST) & type.opt & staticFinalDeclarationList & semicolon 652 | | variableDeclaration & semicolon 653 | ; 654 | 655 | identifierList = identifier.starSeparatedBy(comma); 656 | combinator = 657 | SHOW & identifierList 658 | | HIDE & identifierList; 659 | 660 | 661 | libraryImport = metadata & IMPORT & (AS & identifier).opt & combinator.star & semicolon; 662 | libraryExport = metadata & EXPORT & uri & combinator.star & semicolon; 663 | importOrExport = libraryImport | libraryExport; 664 | 665 | libraryName = metadata & LIBRARY & identifier.plusSeparatedBy(dot) & semicolon; 666 | 667 | partDirective = metadata & PART & stringLiteral & semicolon; 668 | partHeader = metadata & PART & OF & identifier.plusSeparatedBy(dot) & semicolon; 669 | partDeclaration = partHeader & topLevelDefinition.star & eoi; 670 | 671 | libraryDefinition = libraryName.opt & importOrExport.star & partDirective.star & topLevelDefinition.star; 672 | 673 | scriptTag = token('#!') & NEWLINE.not.star & NEWLINE; 674 | scriptDefinition = scriptTag.opt & libraryDefinition & scriptDefinition & eoi; 675 | } 676 | 677 | bool isLetter(c) { 678 | int code = c.codeUnitAt(0); 679 | return (code >= 97 && code <= 122) || (code >= 65 && code <= 90); 680 | } 681 | } 682 | 683 | 684 | -------------------------------------------------------------------------------- /pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: parserCombinators 2 | description: Executable Grammar library for Dart 3 | 4 | dependencies: 5 | # js: any 6 | unittest: any 7 | -------------------------------------------------------------------------------- /smalltalkGrammar.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2008 Cadence Design Systems, Inc. 3 | Copyright 2013 Google Inc. 4 | 5 | Licensed under the Apache License, Version 2.0 (the ''License''); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | 10 | library smalltalk_grammar; 11 | import 'CombinatorialParsing.dart'; 12 | import 'UnarySelectorParser.dart'; 13 | 14 | class SmalltalkGrammar extends ExecutableGrammar{ 15 | CombinatorialParser colon, comma, dollar, dot, equalSign, hat, lbracket, 16 | lcurly, lparen, langlebracket, pound, ranglebracket, rbracket, rcurly, 17 | rparen, semicolon, slash, vbar, digit, digits, extendedDigits, radix, 18 | fraction, extendedFraction, exponent, decimalNum, radixNum, num, number, 19 | letter, specialCharacter, character, id, identifier, Char, 20 | characterConstant, twoQuotes, str, string, kw, kws, keyword, sym, symbol, 21 | commentDelimiter, beginComment, endComment, twoDblQuotes, comment, binSel, 22 | binarySelector, assign, assignment, symbolConstant, array, arrayConstant, 23 | tuple, literal, variableName, unarySelector, parenthesizedExpression, 24 | primary, unaryExpression, binaryMsg, binaryExpression, keywordMsg, keywordExpression, 25 | cascadeMsg, cascadedMessageExpression, assignmentLHS, expression, 26 | returnStatement, furtherStatements, statementSequence, statements, 27 | blockParameter, blockParameters, varDecls, temporaries, codeBody, block, 28 | variableDecl, unaryMsgPattern, binaryMsgPattern, keywordMsgPattern, 29 | messagePattern, pragma, methodBody, method, methodDecl, category, 30 | classComment, classBody, classSideDecl, languageId, classCategory, 31 | classFormat, classDefinition, extendedClass, packageName, package; 32 | 33 | SmalltalkGrammar() { 34 | // tokens 35 | 36 | colon = tokenFromChar(':'); 37 | comma = tokenFromChar(','); 38 | dollar = tokenFromChar('\$'); 39 | dot = tokenFromChar('.'); 40 | equalSign = tokenFromChar('='); 41 | hat = tokenFromChar('^'); 42 | lbracket = tokenFromChar('['); 43 | lcurly = tokenFromChar('{'); 44 | lparen = tokenFromChar('('); 45 | langlebracket = tokenFromChar('<'); 46 | pound = tokenFromChar('#'); 47 | ranglebracket = tokenFromChar('>'); 48 | rbracket = tokenFromChar(']'); 49 | rcurly = tokenFromChar('}'); 50 | rparen = tokenFromChar(')'); 51 | semicolon = tokenFromChar(';'); 52 | slash = tokenFromChar('//'); 53 | vbar = tokenFromChar('|'); 54 | 55 | // lexical grammar 56 | 57 | digit = charBetween('0', '9'); 58 | digits = digit.plus; 59 | extendedDigits = (digit | letter).plus; 60 | radix = (digits & char('r')).wrapper( 61 | (ds, r) => ds 62 | ); 63 | fraction = (dot & digits).wrapper( 64 | (period, ds) => ds 65 | ); 66 | extendedFraction = (extendedDigits & char('r')).wrapper( 67 | (ds, r) => ds 68 | ); 69 | exponent = (char('e') & char('-').opt & digits).wrapper( 70 | (e, sgn, ds) => sgn == null ? ds : ds //? 71 | ); 72 | decimalNum = char('-').opt & digits & fraction.opt & exponent.opt; 73 | radixNum = radix & char('-').opt & extendedDigits & extendedFraction.opt & exponent.opt; 74 | num = radixNum | decimalNum; 75 | 76 | /* must distinguish internal use of productions from use as tokens */ 77 | number = tokenFor(num); 78 | letter = new PredicateTokenParser((c) => isLetter(c), 'letter expected'); 79 | 80 | specialCharacter = 81 | char('+') | char('/') | 82 | char('\\') | char('*') | 83 | char('~') | char('<') | char('>') | 84 | char('=') | char('@') | 85 | char('%') | char('|') | 86 | char('&') | char('?') | 87 | char('!') | char(',') | char('`'); 88 | 89 | character = digit | letter | specialCharacter | //? 90 | char('[') | char(']') | 91 | char('{') | char('}') | char('(') | 92 | char(')') | char('^') | char(';') | char('\$') | char('#') | 93 | char(':') | char('.') | char('-') | char('_') | char('`') 94 | /* the Smalltalk grammar neglects to add - to characters, or to comments. 95 | * It does add | [char: $' ], but these are both bugs. 96 | * We intend to support underscores, which Squeak insists on turning into assignment arrows. 97 | * However, we do not support these as assignments. 98 | * At the moment, we do not accept them in identifiers, but this will change in time. 99 | * */; 100 | 101 | id = (letter & (letter | digit | char('_')).star).wrapper( 102 | (fst, snd) => '$fst${new StringBuffer()..writeAll(snd)}' 103 | ); 104 | 105 | identifier = tokenFor(id); 106 | 107 | Char = 108 | char('\$') & new PredicateTokenParser(()=> true, ""); 109 | 110 | characterConstant = tokenFor(Char); 111 | 112 | twoQuotes = (char("'") & char("'")).wrapper((q1, q2) => "'"); 113 | 114 | str = (char("'") & stringBody & char("'")).wrapper( 115 | (oq, es, eq) => es.inject('', (s, e) => '$s$e') 116 | ); 117 | 118 | string = tokenFor(str); 119 | 120 | kw = (id & char(':')).wrapper((i, c) => '$i:'); 121 | 122 | kws = kw.plus.wrap((c) => c.inject('', (s, e) => '$s$e')); 123 | 124 | keyword = tokenFor(kw); 125 | 126 | sym = str | kws | binSel | id; 127 | 128 | symbol = tokenFor(sym); 129 | 130 | commentDelimiter = char('"'); 131 | 132 | beginComment = commentDelimiter; 133 | 134 | endComment = commentDelimiter; 135 | 136 | twoDblQuotes = (char('"') & char('"')).wrapper((q1, q2) => '"'); 137 | 138 | comment = beginComment & commentBody & endComment; 139 | 140 | binSel = ((specialCharacter | char('-')) & specialCharacter.opt & specialCharacter.opt).wrapper( 141 | (c1, c2, c3) => '${c1 == null? "": c1}${c2 == null? "": c2}${c3 == null? "": c3}'); //asSymbol? 142 | 143 | binarySelector = tokenFor(binSel); 144 | 145 | assign = (char(':') & char('=')).wrapper((c, e) => ':='); 146 | 147 | assignment = tokenFor(assign | char('_')); /* Temporary hack */ 148 | 149 | /* syntacticGrammar */ 150 | 151 | symbolConstant = pound & symbol; 152 | 153 | array = (lparen & (number | symbolConstant | symbol | string | characterConstant | arrayConstant | array).star & rparen) 154 | | 155 | /* Byte array literal */ 156 | (lbracket & number.star & rbracket); 157 | 158 | arrayConstant = pound & array; 159 | 160 | tuple = lcurly & expression.starSeparatedOrTerminatedBy(dot) & rcurly; 161 | 162 | literal = number | symbolConstant | characterConstant | string | arrayConstant | tuple; 163 | 164 | variableName = identifier; 165 | 166 | unarySelector = new UnarySelectorParser(this); /* the one hack/flaw. See UnarySelector parser for details */ 167 | 168 | parenthesizedExpression = lparen & expression & rparen; 169 | 170 | primary = variableName | literal | block | parenthesizedExpression; 171 | 172 | unaryExpression = primary & unarySelector.star; 173 | 174 | binaryMsg = binarySelector & unaryExpression; 175 | 176 | binaryExpression = unaryExpression & binaryMsg.star; 177 | 178 | keywordMsg = (keyword & binaryExpression).plus; 179 | 180 | keywordExpression = binaryExpression & keywordMsg.opt; 181 | 182 | cascadeMsg = semicolon & (keywordMsg | binaryMsg | unarySelector); 183 | 184 | cascadedMessageExpression = keywordExpression & cascadeMsg.star; 185 | 186 | assignmentLHS = variableName & assignment; 187 | 188 | expression = assignmentLHS.star & cascadedMessageExpression; 189 | 190 | returnStatement = hat & expression & dot.opt; 191 | 192 | furtherStatements = dot & statements; 193 | 194 | statementSequence = (expression & furtherStatements.opt) | furtherStatements; 195 | 196 | statements = returnStatement | statementSequence | empty; 197 | 198 | blockParameter = colon & variableDecl; 199 | 200 | blockParameters = blockParameter.plus & vbar; 201 | 202 | varDecls = vbar & variableDecl.star & vbar; 203 | 204 | temporaries = varDecls; 205 | 206 | codeBody = temporaries.opt & pragma.star & statements; 207 | 208 | block = (lbracket & blockParameters.opt & codeBody & rbracket) | (lbracket & blockParameter.star & rbracket); 209 | 210 | variableDecl = identifier; 211 | 212 | unaryMsgPattern = unarySelector; 213 | 214 | binaryMsgPattern = binarySelector & variableDecl; 215 | 216 | keywordMsgPattern = (keyword & variableDecl).plus; 217 | 218 | messagePattern = unaryMsgPattern | binaryMsgPattern | keywordMsgPattern; 219 | 220 | /* Yuck, a pragma can appear before or after |temps| */ 221 | pragma = langlebracket & (unarySelector | (keyword & (literal | variableDecl)).plus) & ranglebracket; 222 | 223 | methodBody = pragma.star & codeBody; 224 | 225 | method = messagePattern & methodBody & eoi; /* A method in a browser */ 226 | 227 | /* Top level productions for classes */ 228 | 229 | methodDecl = messagePattern & equalSign & lparen & methodBody & rparen; 230 | 231 | category = string & methodDecl.star; 232 | 233 | classComment = whitespace & comment; /* A hack, to preserve comments from a complete class declaration */ 234 | 235 | classBody = lparen & classComment.opt & varDecls.opt & varDecls.opt & varDecls.opt & category.star & rparen; 236 | 237 | classSideDecl = colon & classBody; 238 | 239 | languageId = identifier; 240 | 241 | classCategory = string /*.opt */; 242 | 243 | classFormat = 244 | tokenFromSymbol('class') | 245 | tokenFromSymbol('weakclass') | 246 | tokenFromSymbol('variableclass') | 247 | tokenFromSymbol('variablebyteclass') | 248 | tokenFromSymbol('variablewordclass'); 249 | 250 | classDefinition = classCategory & classFormat & identifier & equalSign & identifier & classBody & classSideDecl.opt; 251 | 252 | extendedClass = tokenFromSymbol('extensions') & identifier & equalSign & lparen & category.star & rparen & colon & lparen & category.star & rparen; 253 | 254 | packageName = string; 255 | 256 | package = languageId & tokenFromSymbol('package') & packageName & equalSign & lparen & classDefinition.star & extendedClass.star & rparen & eoi; 257 | } 258 | 259 | // as yet unclassified 260 | CombinatorialParser get commentBody { 261 | 262 | /* As an optimization, we process the body of a comment with a dedicated scanning parser. 263 | It should be equivalent to: 264 | 265 | return (character | aWhitespaceChar | char("'") | twoDblQuotes).star */ 266 | return new CollectingCommentParser( (input) { 267 | var c = input.peek; 268 | if (c == null) false; /* let main routine handle end of input */ 269 | else if (c == '"') false; 270 | else { 271 | var pos = input.position; 272 | input.next; 273 | if (input.peek == '"') false; else {input.position = pos; true;} 274 | }}); 275 | } 276 | 277 | 278 | CombinatorialParser get stringBody { 279 | 280 | /* As an optimization, we process the body of a string with a dedicated scanning parser. 281 | It should be equivalent to: 282 | 283 | return (character | aWhitespaceChar | char('"') | twoDblQuotes).star */ 284 | return new CollectingCommentParser((input) { 285 | var c = input.peek; 286 | if (c == null) false; /* let main routine handle end of input */ 287 | else if (c == "'") false; 288 | else { 289 | var pos = input.position; 290 | input.next; 291 | if (input.peek == "'") false; else {input.position = pos; true;} 292 | }}); 293 | } 294 | 295 | bool isLetter(c) { 296 | int 297 | code = c.codeUnitAt(0); 298 | return (code >= 97 && code <= 122) || (code >= 65 && code <= 90); 299 | } 300 | } 301 | 302 | -------------------------------------------------------------------------------- /smalltalkGrammarRevised.dart: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Copyright 2008 Cadence Design Systems, Inc. 4 | Copyright 2013 Google Inc. 5 | 6 | Licensed under the Apache License, Version 2.0 (the ''License''); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | 12 | library smalltalk_grammar_revised; 13 | import 'CombinatorialParsing.dart'; 14 | import 'UnarySelectorParser.dart'; 15 | 16 | class SmalltalkGrammarRevised extends RunnableGrammar{ 17 | /* CombinatorialParser colon, comma, dollar, dot, equalSign, hat, lbracket, 18 | lcurly, lparen, langlebracket, pound, ranglebracket, rbracket, rcurly, 19 | rparen, semicolon, slash, vbar, digit, digits, extendedDigits, radix, 20 | fraction, extendedFraction, exponent, decimalNum, radixNum, num, number, 21 | letter, specialCharacter, character, id, identifier, Char, 22 | characterConstant, twoQuotes, str, string, kw, kws, keyword, sym, symbol, 23 | commentDelimiter, beginComment, endComment, twoDblQuotes, comment, binSel, 24 | binarySelector, assign, assignment, symbolConstant, array, arrayConstant, 25 | tuple, literal, variableName, unarySelector, parenthesizedExpression, 26 | primary, unaryExpression, binaryMsg, binaryExpression, keywordMsg, keywordExpression, 27 | cascadeMsg, cascadedMessageExpression, assignmentLHS, expression, 28 | returnStatement, furtherStatements, statementSequence, statements, 29 | blockParameter, blockParameters, varDecls, temporaries, codeBody, block, 30 | variableDecl, unaryMsgPattern, binaryMsgPattern, keywordMsgPattern, 31 | messagePattern, pragma, methodBody, method, methodDecl, category, 32 | classComment, classBody, classSideDecl, languageId, classCategory, 33 | classFormat, classDefinition, extendedClass, packageName, package;*/ 34 | 35 | SmalltalkGrammarRevised() { 36 | // tokens 37 | 38 | colon = token(':'); 39 | comma = token(','); 40 | dollar = token('\$'); 41 | dot = token('.'); 42 | equalSign = token('='); 43 | hat = token('^'); 44 | lbracket = token('['); 45 | lcurly = token('{'); 46 | lparen = token('('); 47 | langlebracket = token('<'); 48 | pound = token('#'); 49 | ranglebracket = token('>'); 50 | rbracket = token(']'); 51 | rcurly = token('}'); 52 | rparen = token(')'); 53 | semicolon = token(';'); 54 | slash = token('//'); 55 | vbar = token('|'); 56 | 57 | // lexical grammar 58 | 59 | digit = charBetween('0', '9'); 60 | digits = digit.plus; 61 | extendedDigits = (digit | letter).plus; 62 | radix = (digits & char('r')).wrapper( 63 | (ds, r) => ds 64 | ); 65 | fraction = (dot & digits).wrapper( 66 | (period, ds) => ds 67 | ); 68 | extendedFraction = (extendedDigits & char('r')).wrapper( 69 | (ds, r) => ds 70 | ); 71 | exponent = (char('e') & char('-').opt & digits).wrapper( 72 | (e, sgn, ds) => sgn == null ? ds : ds //? 73 | ); 74 | decimalNum = char('-').opt & digits & fraction.opt & exponent.opt; 75 | radixNum = radix & char('-').opt & extendedDigits & extendedFraction.opt & exponent.opt; 76 | nmr = radixNum | decimalNum; 77 | 78 | /* must distinguish internal use of productions from use as tokens */ 79 | number = tokenFor(nmr); 80 | letter = new PredicateTokenParser((c) => isLetter(c), 'letter expected'); 81 | 82 | specialCharacter = 83 | char('+') | char('/') | 84 | char('\\') | char('*') | 85 | char('~') | char('<') | char('>') | 86 | char('=') | char('@') | 87 | char('%') | char('|') | 88 | char('&') | char('?') | 89 | char('!') | char(',') | char('`'); 90 | 91 | character = digit | letter | specialCharacter | //? 92 | char('[') | char(']') | 93 | char('{') | char('}') | char('(') | 94 | char(')') | char('^') | char(';') | char('\$') | char('#') | 95 | char(':') | char('.') | char('-') | char('_') | char('`') 96 | /* the Smalltalk grammar neglects to add - to characters, or to comments. 97 | * It does add | [char: $' ], but these are both bugs. 98 | * We intend to support underscores, which Squeak insists on turning into assignment arrows. 99 | * However, we do not support these as assignments. 100 | * At the moment, we do not accept them in identifiers, but this will change in time. 101 | * */; 102 | 103 | id = (letter & (letter | digit | char('_')).star).wrapper( 104 | (fst, snd) => '$fst${new StringBuffer()..writeAll(snd)}' 105 | ); 106 | 107 | identifier = tokenFor(id); 108 | 109 | Char = 110 | char('\$') & new PredicateTokenParser(()=> true, ""); 111 | 112 | characterConstant = tokenFor(Char); 113 | 114 | twoQuotes = (char("'") & char("'")).wrapper((q1, q2) => "'"); 115 | 116 | str = (char("'") & stringBody & char("'")).wrapper( 117 | (oq, es, eq) => es.inject('', (s, e) => '$s$e') 118 | ); 119 | 120 | string = tokenFor(str); 121 | 122 | kw = (id & char(':')).wrapper((i, c) => '$i:'); 123 | 124 | kws = kw.plus.wrap((c) => c.inject('', (s, e) => '$s$e')); 125 | 126 | keyword = tokenFor(kw); 127 | 128 | sym = str | kws | binSel | id; 129 | 130 | symbol = tokenFor(sym); 131 | 132 | commentDelimiter = char('"'); 133 | 134 | beginComment = commentDelimiter; 135 | 136 | endComment = commentDelimiter; 137 | 138 | twoDblQuotes = (char('"') & char('"')).wrapper((q1, q2) => '"'); 139 | 140 | comment = beginComment & commentBody & endComment; 141 | 142 | binSel = ((specialCharacter | char('-')) & specialCharacter.opt & specialCharacter.opt).wrapper( 143 | (c1, c2, c3) => '${c1 == null? "": c1}${c2 == null? "": c2}${c3 == null? "": c3}'); //asSymbol? 144 | 145 | binarySelector = tokenFor(binSel); 146 | 147 | assign = (char(':') & char('=')).wrapper((c, e) => ':='); 148 | 149 | assignment = tokenFor(assign | char('_')); /* Temporary hack */ 150 | 151 | /* syntacticGrammar */ 152 | 153 | symbolConstant = pound & symbol; 154 | 155 | array = (lparen & (number | symbolConstant | symbol | string | characterConstant | arrayConstant | array).star & rparen) 156 | | 157 | /* Byte array literal */ 158 | (lbracket & number.star & rbracket); 159 | 160 | arrayConstant = pound & array; 161 | 162 | tuple = lcurly & expression.starSeparatedOrTerminatedBy(dot) & rcurly; 163 | 164 | literal = number | symbolConstant | characterConstant | string | arrayConstant | tuple; 165 | 166 | variableName = identifier; 167 | 168 | unarySelector = new UnarySelectorParser(this); /* the one hack/flaw. See UnarySelector parser for details */ 169 | 170 | parenthesizedExpression = lparen & expression & rparen; 171 | 172 | primary = variableName | literal | block | parenthesizedExpression; 173 | 174 | unaryExpression = primary & unarySelector.star; 175 | 176 | binaryMsg = binarySelector & unaryExpression; 177 | 178 | binaryExpression = unaryExpression & binaryMsg.star; 179 | 180 | keywordMsg = (keyword & binaryExpression).plus; 181 | 182 | keywordExpression = binaryExpression & keywordMsg.opt; 183 | 184 | cascadeMsg = semicolon & (keywordMsg | binaryMsg | unarySelector); 185 | 186 | cascadedMessageExpression = keywordExpression & cascadeMsg.star; 187 | 188 | assignmentLHS = variableName & assignment; 189 | 190 | expression = assignmentLHS.star & cascadedMessageExpression; 191 | 192 | returnStatement = hat & expression & dot.opt; 193 | 194 | furtherStatements = dot & statements; 195 | 196 | statementSequence = (expression & furtherStatements.opt) | furtherStatements; 197 | 198 | statements = returnStatement | statementSequence | empty; 199 | 200 | blockParameter = colon & variableDecl; 201 | 202 | blockParameters = blockParameter.plus & vbar; 203 | 204 | varDecls = vbar & variableDecl.star & vbar; 205 | 206 | temporaries = varDecls; 207 | 208 | codeBody = temporaries.opt & pragma.star & statements; 209 | 210 | block = (lbracket & blockParameters.opt & codeBody & rbracket) | (lbracket & blockParameter.star & rbracket); 211 | 212 | variableDecl = identifier; 213 | 214 | unaryMsgPattern = unarySelector; 215 | 216 | binaryMsgPattern = binarySelector & variableDecl; 217 | 218 | keywordMsgPattern = (keyword & variableDecl).plus; 219 | 220 | messagePattern = unaryMsgPattern | binaryMsgPattern | keywordMsgPattern; 221 | 222 | /* Yuck, a pragma can appear before or after |temps| */ 223 | pragma = langlebracket & (unarySelector | (keyword & (literal | variableDecl)).plus) & ranglebracket; 224 | 225 | methodBody = pragma.star & codeBody; 226 | 227 | method = messagePattern & methodBody & eoi; /* A method in a browser */ 228 | 229 | /* Top level productions for classes */ 230 | 231 | methodDecl = messagePattern & equalSign & lparen & methodBody & rparen; 232 | 233 | category = string & methodDecl.star; 234 | 235 | classComment = whitespace & comment; /* A hack, to preserve comments from a complete class declaration */ 236 | 237 | classBody = lparen & classComment.opt & varDecls.opt & varDecls.opt & varDecls.opt & category.star & rparen; 238 | 239 | classSideDecl = colon & classBody; 240 | 241 | languageId = identifier; 242 | 243 | classCategory = string /*.opt */; 244 | 245 | classFormat = 246 | token('class') | 247 | token('weakclass') | 248 | token('variableclass') | 249 | token('variablebyteclass') | 250 | token('variablewordclass'); 251 | 252 | classDefinition = classCategory & classFormat & identifier & equalSign & identifier & classBody & classSideDecl.opt; 253 | 254 | extendedClass = token('extensions') & identifier & equalSign & lparen & category.star & rparen & colon & lparen & category.star & rparen; 255 | 256 | packageName = string; 257 | 258 | package = languageId & token('package') & packageName & equalSign & lparen & classDefinition.star & extendedClass.star & rparen & eoi; 259 | } 260 | 261 | // as yet unclassified 262 | CombinatorialParser get commentBody { 263 | 264 | /* As an optimization, we process the body of a comment with a dedicated scanning parser. 265 | It should be equivalent to: 266 | 267 | return (character | aWhitespaceChar | char("'") | twoDblQuotes).star */ 268 | return new CollectingCommentParser( (input) { 269 | var c = input.peek; 270 | if (c == null) false; /* let main routine handle end of input */ 271 | else if (c == '"') false; 272 | else { 273 | var pos = input.position; 274 | input.next; 275 | if (input.peek == '"') false; else {input.position = pos; true;} 276 | }}); 277 | } 278 | 279 | 280 | CombinatorialParser get stringBody { 281 | 282 | /* As an optimization, we process the body of a string with a dedicated scanning parser. 283 | It should be equivalent to: 284 | 285 | return (character | aWhitespaceChar | char('"') | twoDblQuotes).star */ 286 | return new CollectingCommentParser((input) { 287 | var c = input.peek; 288 | if (c == null) false; /* let main routine handle end of input */ 289 | else if (c == "'") false; 290 | else { 291 | var pos = input.position; 292 | input.next; 293 | if (input.peek == "'") false; else {input.position = pos; true;} 294 | }}); 295 | } 296 | 297 | bool isLetter(c) { 298 | int 299 | code = c.codeUnitAt(0); 300 | return (code >= 97 && code <= 122) || (code >= 65 && code <= 90); 301 | } 302 | } 303 | 304 | 305 | -------------------------------------------------------------------------------- /smalltalkParser.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2008 Cadence Design Systems, Inc. 3 | Copyright 2013 Google Inc. 4 | 5 | Licensed under the Apache License, Version 2.0 (the ''License''); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | 10 | 11 | library smalltalk_parser; 12 | import 'CombinatorialParsing.dart'; 13 | import 'smalltalkGrammar.dart'; 14 | 15 | 16 | class StringAST extends AST { 17 | String val; 18 | StringAST(this.val); 19 | } 20 | 21 | 22 | class VarDeclAST extends AST { 23 | var name, type; 24 | VarDeclAST(this.name, this.type); 25 | } 26 | 27 | 28 | 29 | class PackageAST extends AST { 30 | var name, classes, extendedClasses; 31 | PackageAST(this.name, this.classes, this.extendedClasses); 32 | } 33 | 34 | class MessageAST extends AST { 35 | var sel, args; 36 | MessageAST(this.sel, this.args) { 37 | assert(sel is String); 38 | assert(args is List); 39 | } 40 | } 41 | 42 | 43 | class MethodAST extends AST { 44 | var pattern, body, visibility; 45 | MethodAST(this.pattern, this.body, this.visibility); 46 | } 47 | 48 | class AST { 49 | var start, end, cStart, cEnd; 50 | 51 | get concreteEnd => cEnd == null ? end : cEnd; 52 | set concreteEnd(p) => cEnd = p; 53 | get concreteStart => cStart == null ? start : cStart; 54 | set concreteStart(p) => cStart = p; 55 | } 56 | 57 | class SideAST extends AST { 58 | var vars, categories, classComment, classPoolVars, sharedPoolVars; 59 | } 60 | 61 | class ClassAST extends AST { 62 | var format, name, superclassName, instanceSide, classSide, category; 63 | } 64 | 65 | class MessagePatternAST extends AST { 66 | var selector, parameters, returnType, typePattern; 67 | MessagePatternAST(this.selector, this.parameters); 68 | } 69 | 70 | class CategoryAST extends AST { 71 | var name, methods; 72 | CategoryAST(this.name, this.methods); 73 | } 74 | 75 | 76 | 77 | 78 | class SmalltalkParser extends SmalltalkGrammar { 79 | get binaryMsgPattern { 80 | return super.binaryMsgPattern.wrapper((Token sel, p) { 81 | var params = new List(); 82 | params.add(p); 83 | return new MessagePatternAST(sel.token, params) 84 | ..start = sel.start 85 | ..end = p.end; 86 | }); 87 | } 88 | 89 | get category { 90 | return super.category.wrapper((StringAST cn, List ms) { 91 | var end = ms.isEmpty ? cn.end : ms.last.end; 92 | return new CategoryAST(cn.val, ms) 93 | ..start = cn.start 94 | ..end = end; 95 | }); 96 | } 97 | 98 | String charsToString(List cs) => cs.fold('', (s, c) => '$s $c'); 99 | 100 | 101 | get classBody { 102 | return super.classBody.wrapper((Token lp, cmnt, instVars, classPoolVars, sharedPoolVars, List categories, Token rp) { 103 | var vs = instVars == null? new List() : instVars; 104 | return new SideAST() 105 | ..vars = vs 106 | ..categories = categories 107 | ..classComment = cmnt 108 | ..start = lp.start 109 | ..end = rp.end 110 | ..classPoolVars = classPoolVars 111 | ..sharedPoolVars = sharedPoolVars; 112 | }); 113 | } 114 | 115 | get classComment { 116 | return super.classComment.wrapper((ws, c) => (c[2]) .fold('', (s, ch) => '$s$ch')); 117 | } 118 | 119 | get classDefinition { 120 | return super.classDefinition.wrapper((cat, format, className, eq, superClassName, instSide, classSide) { 121 | var klassSide = classSide == null ? (new SideAST()..vars = new List() ..categories = new List()) 122 | : classSide; 123 | var kat = cat == null ? 'Newsqueak-tests' : cat.val; 124 | return new ClassAST() 125 | ..name = className.snd 126 | ..superclassName = superClassName.snd 127 | ..instanceSide = instSide 128 | ..classSide = klassSide 129 | ..start = className.start 130 | ..end = (classSide == null ? instSide.end :classSide.end) 131 | ..category = kat 132 | ..format = format.token; 133 | }); 134 | } 135 | 136 | get classSideDecl => super.classSideDecl.wrapper((cn, side) => side); 137 | 138 | get extendedClass { 139 | return super.extendedClass.wrapper((extensions, name, eq, lp, instCategories, rp, colon, lp2, classCategories, rp2) => 140 | new ClassAST() 141 | ..name = name.snd 142 | ..instanceSide = (new SideAST()..categories = instCategories) 143 | ..classSide = (new SideAST()..categories = classCategories) 144 | ); 145 | } 146 | 147 | get keywordMsgPattern { 148 | return super.keywordMsgPattern.wrap((List kws) { 149 | var params = new List(); 150 | assert(!kws.isEmpty); 151 | var sel = kws.fold('', (s , kwp) { 152 | params.add(kwp.last); 153 | '$s ${kwp.first.token}'; 154 | }); 155 | return new MessagePatternAST(sel, params) 156 | ..start = kws.first.first.start 157 | ..end = params.last.end; 158 | }); 159 | } 160 | 161 | get method { 162 | return super.method.wrapper((msg, cb, ei) => 163 | new MethodAST(msg, cb,'public') 164 | ..start = msg.start 165 | // ..end = cb.end 166 | // ..concreteEnd = cb.concreteEnd 167 | ); 168 | } 169 | 170 | get methodDecl { 171 | return super.methodDecl.wrapper((msg, eq, lp, cb, rp) => 172 | new MethodAST(msg, cb,'public') 173 | ..start = msg.start 174 | ..end = rp.end 175 | ); 176 | } 177 | 178 | get package { 179 | return super.package.wrapper((langId, pkg, name, eq, lp, classes, extendedClasses, rp, end) => 180 | new PackageAST(name.val, 181 | classes == null ? new List(): classes, 182 | extendedClasses == null ? new List(): extendedClasses 183 | ) 184 | ); 185 | } 186 | 187 | get string { 188 | return super.string.wrap((t) => new StringAST(t.token) 189 | ..start = t.start 190 | ..end = t.end); 191 | } 192 | 193 | get unaryMsgPattern { 194 | return super.unaryMsgPattern.wrap((sel) => 195 | new MessagePatternAST(sel.sel, new List()) 196 | ..start = sel.start.. end = sel.end 197 | ); 198 | } 199 | 200 | get unarySelector { 201 | return super.unarySelector.wrap((u) => 202 | new MessageAST(u.token, new List()) 203 | ..start = u.start 204 | ..end = u.end 205 | ); 206 | } 207 | 208 | get varDecls { 209 | return super.varDecls.wrapper((vb1, vds, vb2) { 210 | if (!vds.isEmpty) { 211 | vds.first.concreteStart = vb1.start; 212 | vds.last.concreteEnd = vb2.end; 213 | }; 214 | return vds; 215 | }); 216 | } 217 | 218 | get variableDecl { 219 | return super.variableDecl.wrapper((Token n) => 220 | new VarDeclAST(n.token, null) 221 | ..start = n.start 222 | ..end = n.end 223 | ); 224 | } 225 | 226 | } -------------------------------------------------------------------------------- /smalltalkRevisedParser.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2008 Cadence Design Systems, Inc. 3 | Copyright 2013 Google Inc. 4 | 5 | Licensed under the Apache License, Version 2.0 (the ''License''); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | 10 | 11 | library smalltalk_revised_parser; 12 | import 'CombinatorialParsing.dart'; 13 | import 'smalltalkGrammarRevised.dart'; 14 | 15 | 16 | class StringAST extends AST { 17 | String val; 18 | StringAST(this.val); 19 | } 20 | 21 | 22 | class VarDeclAST extends AST { 23 | var name, type; 24 | VarDeclAST(this.name, this.type); 25 | } 26 | 27 | 28 | 29 | class PackageAST extends AST { 30 | var name, classes, extendedClasses; 31 | PackageAST(this.name, this.classes, this.extendedClasses); 32 | } 33 | 34 | class MessageAST extends AST { 35 | var sel, args; 36 | MessageAST(this.sel, this.args) { 37 | assert(sel is String); 38 | assert(args is List); 39 | } 40 | } 41 | 42 | 43 | class MethodAST extends AST { 44 | var pattern, body, visibility; 45 | MethodAST(this.pattern, this.body, this.visibility); 46 | } 47 | 48 | class AST { 49 | var start, end, cStart, cEnd; 50 | 51 | get concreteEnd => cEnd == null ? end : cEnd; 52 | set concreteEnd(p) => cEnd = p; 53 | get concreteStart => cStart == null ? start : cStart; 54 | set concreteStart(p) => cStart = p; 55 | } 56 | 57 | class SideAST extends AST { 58 | var vars, categories, classComment, classPoolVars, sharedPoolVars; 59 | } 60 | 61 | class ClassAST extends AST { 62 | var format, name, superclassName, instanceSide, classSide, category; 63 | } 64 | 65 | class MessagePatternAST extends AST { 66 | var selector, parameters, returnType, typePattern; 67 | MessagePatternAST(this.selector, this.parameters); 68 | } 69 | 70 | class CategoryAST extends AST { 71 | var name, methods; 72 | CategoryAST(this.name, this.methods); 73 | } 74 | 75 | 76 | 77 | 78 | class SmalltalkParser extends SmalltalkGrammarRevised { 79 | get binaryMsgPattern { 80 | return super.binaryMsgPattern.wrapper((Token sel, p) { 81 | var params = new List(); 82 | params.add(p); 83 | return new MessagePatternAST(sel.token, params) 84 | ..start = sel.start 85 | ..end = p.end; 86 | }); 87 | } 88 | 89 | get category { 90 | return super.category.wrapper((StringAST cn, List ms) { 91 | var end = ms.isEmpty ? cn.end : ms.last.end; 92 | return new CategoryAST(cn.val, ms) 93 | ..start = cn.start 94 | ..end = end; 95 | }); 96 | } 97 | 98 | String charsToString(List cs) => cs.fold('', (s, c) => '$s $c'); 99 | 100 | 101 | get classBody { 102 | return super.classBody.wrapper((Token lp, cmnt, instVars, classPoolVars, sharedPoolVars, List categories, Token rp) { 103 | var vs = instVars == null? new List() : instVars; 104 | return new SideAST() 105 | ..vars = vs 106 | ..categories = categories 107 | ..classComment = cmnt 108 | ..start = lp.start 109 | ..end = rp.end 110 | ..classPoolVars = classPoolVars 111 | ..sharedPoolVars = sharedPoolVars; 112 | }); 113 | } 114 | 115 | get classComment { 116 | return super.classComment.wrapper((ws, c) => (c[2]) .fold('', (s, ch) => '$s$ch')); 117 | } 118 | 119 | get classDefinition { 120 | return super.classDefinition.wrapper((cat, format, className, eq, superClassName, instSide, classSide) { 121 | var klassSide = classSide == null ? (new SideAST()..vars = new List() ..categories = new List()) 122 | : classSide; 123 | var kat = cat == null ? 'Newsqueak-tests' : cat.val; 124 | return new ClassAST() 125 | ..name = className.snd 126 | ..superclassName = superClassName.snd 127 | ..instanceSide = instSide 128 | ..classSide = klassSide 129 | ..start = className.start 130 | ..end = (classSide == null ? instSide.end :classSide.end) 131 | ..category = kat 132 | ..format = format.token; 133 | }); 134 | } 135 | 136 | get classSideDecl => super.classSideDecl.wrapper((cn, side) => side); 137 | 138 | get extendedClass { 139 | return super.extendedClass.wrapper((extensions, name, eq, lp, instCategories, rp, colon, lp2, classCategories, rp2) => 140 | new ClassAST() 141 | ..name = name.snd 142 | ..instanceSide = (new SideAST()..categories = instCategories) 143 | ..classSide = (new SideAST()..categories = classCategories) 144 | ); 145 | } 146 | 147 | get keywordMsgPattern { 148 | return super.keywordMsgPattern.wrap((List kws) { 149 | var params = new List(); 150 | assert(!kws.isEmpty); 151 | var sel = kws.fold('', (s , kwp) { 152 | params.add(kwp.last); 153 | '$s ${kwp.first.token}'; 154 | }); 155 | return new MessagePatternAST(sel, params) 156 | ..start = kws.first.first.start 157 | ..end = params.last.end; 158 | }); 159 | } 160 | 161 | get method { 162 | return super.method.wrapper((msg, cb, ei) => 163 | new MethodAST(msg, cb,'public') 164 | ..start = msg.start 165 | // ..end = cb.end 166 | // ..concreteEnd = cb.concreteEnd 167 | ); 168 | } 169 | 170 | get methodDecl { 171 | return super.methodDecl.wrapper((msg, eq, lp, cb, rp) => 172 | new MethodAST(msg, cb,'public') 173 | ..start = msg.start 174 | ..end = rp.end 175 | ); 176 | } 177 | 178 | get package { 179 | return super.package.wrapper((langId, pkg, name, eq, lp, classes, extendedClasses, rp, end) => 180 | new PackageAST(name.val, 181 | classes == null ? new List(): classes, 182 | extendedClasses == null ? new List(): extendedClasses 183 | ) 184 | ); 185 | } 186 | 187 | get string { 188 | return super.string.wrap((t) => new StringAST(t.token) 189 | ..start = t.start 190 | ..end = t.end); 191 | } 192 | 193 | get unaryMsgPattern { 194 | return super.unaryMsgPattern.wrap((sel) => 195 | new MessagePatternAST(sel.sel, new List()) 196 | ..start = sel.start.. end = sel.end 197 | ); 198 | } 199 | 200 | get unarySelector { 201 | return super.unarySelector.wrap((u) => 202 | new MessageAST(u.token, new List()) 203 | ..start = u.start 204 | ..end = u.end 205 | ); 206 | } 207 | 208 | get varDecls { 209 | return super.varDecls.wrapper((vb1, vds, vb2) { 210 | if (!vds.isEmpty) { 211 | vds.first.concreteStart = vb1.start; 212 | vds.last.concreteEnd = vb2.end; 213 | }; 214 | return vds; 215 | }); 216 | } 217 | 218 | get variableDecl { 219 | return super.variableDecl.wrapper((Token n) => 220 | new VarDeclAST(n.token, null) 221 | ..start = n.start 222 | ..end = n.end 223 | ); 224 | } 225 | 226 | } -------------------------------------------------------------------------------- /testDartGrammar.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the ''License''); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | library test_dart_grammar; 10 | 11 | import 'CombinatorialParsing.dart'; 12 | import 'dart_runnable_grammar.dart' as dartRunnableGrammar; 13 | import 'dart_executable_grammar.dart' as dartExecutableGrammar; 14 | 15 | accept(production, input) { 16 | try { 17 | production.parse(input); 18 | // print("PASS: Accepted <$input>"); 19 | } catch(e) { 20 | print("FAIL: Should have accepted <$input>"); 21 | } 22 | } 23 | 24 | reject(production, input) { 25 | try { 26 | production.parse(input); 27 | throw "FAIL: Should have rejected <$input>"; 28 | } catch(e) { 29 | // print("PASS: Rejected <$input>"); 30 | } 31 | } 32 | 33 | test(grammar) { 34 | accept(grammar.identifier, 'foo'); 35 | accept(grammar.identifier, 'bar9'); 36 | accept(grammar.identifier, 'dollar\$'); 37 | accept(grammar.identifier, '_foo'); 38 | accept(grammar.identifier, '_bar9'); 39 | accept(grammar.identifier, '_dollar\$'); 40 | accept(grammar.identifier, '\$'); 41 | accept(grammar.identifier, ' leadingSpace'); 42 | reject(grammar.identifier, '9'); 43 | reject(grammar.identifier, '3foo'); 44 | reject(grammar.identifier, ''); 45 | 46 | accept(grammar.numericLiteral, '0'); 47 | accept(grammar.numericLiteral, '1984'); 48 | accept(grammar.numericLiteral, ' 1984'); 49 | accept(grammar.numericLiteral, '-1984'); 50 | accept(grammar.numericLiteral, '0xCAFE'); 51 | accept(grammar.numericLiteral, '0XCAFE'); 52 | accept(grammar.numericLiteral, '0xcafe'); 53 | accept(grammar.numericLiteral, '0Xcafe'); 54 | accept(grammar.numericLiteral, '0xCaFe'); 55 | accept(grammar.numericLiteral, '0XCaFe'); 56 | accept(grammar.numericLiteral, '3e4'); 57 | accept(grammar.numericLiteral, '3e-4'); 58 | accept(grammar.numericLiteral, '-3e4'); 59 | accept(grammar.numericLiteral, '-3e-4'); 60 | accept(grammar.numericLiteral, '3E4'); 61 | accept(grammar.numericLiteral, '3E-4'); 62 | accept(grammar.numericLiteral, '-3E4'); 63 | accept(grammar.numericLiteral, '-3E-4'); 64 | accept(grammar.numericLiteral, '-3.14E4'); 65 | accept(grammar.numericLiteral, '-3.14E-4'); 66 | accept(grammar.numericLiteral, '-3.14'); 67 | accept(grammar.numericLiteral, '3.14'); 68 | reject(grammar.numericLiteral, '-3e--4'); 69 | reject(grammar.numericLiteral, '5.'); 70 | reject(grammar.numericLiteral, '-0xCAFE'); 71 | reject(grammar.numericLiteral, '-0XCAFE'); 72 | reject(grammar.numericLiteral, 'CAFE'); 73 | reject(grammar.numericLiteral, '0xGHIJ'); 74 | reject(grammar.numericLiteral, '-'); 75 | reject(grammar.numericLiteral, ''); 76 | 77 | accept(grammar.booleanLiteral, 'true'); 78 | accept(grammar.booleanLiteral, 'false'); 79 | accept(grammar.booleanLiteral, ' true'); 80 | accept(grammar.booleanLiteral, ' false'); 81 | reject(grammar.booleanLiteral, '9'); 82 | reject(grammar.booleanLiteral, '"foo"'); 83 | reject(grammar.booleanLiteral, "'foo'"); 84 | reject(grammar.booleanLiteral, 'TRUE'); 85 | reject(grammar.booleanLiteral, 'FALSE'); 86 | reject(grammar.booleanLiteral, 'null'); 87 | reject(grammar.booleanLiteral, '0xCAFE'); 88 | 89 | // Do we want to use the consuming or non-consuming variant of #not? 90 | //accept(grammar.stringLiteral, '"foo"'); 91 | //accept(grammar.stringLiteral, "'foo'"); 92 | } 93 | 94 | main() { 95 | print('Runnable:'); 96 | test(new dartRunnableGrammar.DartGrammar()); 97 | print('Executable:'); 98 | test(new dartExecutableGrammar.DartGrammar()); 99 | } 100 | -------------------------------------------------------------------------------- /testParserCombinators.dart: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the ''License''); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | */ 9 | library test_combinatorial_parsing; 10 | 11 | import 'CombinatorialParsing.dart'; 12 | import 'smalltalkRevisedParser.dart'; 13 | import 'package:unittest/unittest.dart'; 14 | 15 | class Test1 extends RunnableGrammar { 16 | // CombinatorialParser abc, de, start, alt; 17 | Test1() { 18 | abc = char('a') & char('b') & char('c'); 19 | de = char('d') & char('e').plus; 20 | alt = abc | de; 21 | start = whitespace.opt & (abc | de); 22 | } 23 | } 24 | 25 | class Test2 extends RunnableGrammar { 26 | // CombinatorialParser digit, number; 27 | Test2() { 28 | digit = charBetween("0", "9"); 29 | number = digit.plus;//number & digit | digit; 30 | } 31 | } 32 | 33 | main() { 34 | var x; 35 | Test1 t1 = new Test1(); 36 | print(x = t1.start.parse("abc")); 37 | print (x = t1.whitespace.opt.parse(" ")); 38 | print(x = t1.start.parse("deeee")); 39 | try {print(t1.start.parse("d"));} on ParserError catch(p){print(p);}; 40 | 41 | Test2 t2 = new Test2(); 42 | print(t2.number.parse("0")); 43 | print(t2.number.parse("1234")); 44 | 45 | //SmalltalkGrammar t3 = new SmalltalkGrammar(); 46 | SmalltalkParser t3 = new SmalltalkParser(); 47 | print(x = t3.identifier.parse('b1234')); 48 | print(x = t3.expression.parse('foo := self bar: x bam baz *2 + 4')); 49 | print(x = t3.method.parse('''forEach: x 50 | foo := self bar: x bam baz *2 + 4. 51 | [:x | x**2] value: 3. 52 | ^#shazam''')); 53 | } --------------------------------------------------------------------------------