├── LICENSE ├── README.md ├── examples ├── allocs_test.nim ├── ex_errors.nim └── node_location_test.nim ├── glossolalia.babel └── src ├── glossolalia.nim ├── glossolalia_v1.nim ├── glossolalia_v2.nim ├── glossolalia_v3.nim └── parsers └── xjson.nim /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | γλωσσολαλία 2 | =========== 3 | 4 | "Peace and safety; then sudden destruction cometh upon them, as in the metacircular evaluator described in section 5.4.2" http://kingjamesprogramming.tumblr.com 5 | -------------------------------------------------------------------------------- /examples/allocs_test.nim: -------------------------------------------------------------------------------- 1 | import glossolalia, strutils 2 | 3 | type 4 | MyNode = ref object 5 | value: string 6 | 7 | proc `$` (n:MyNode): string = n.value 8 | 9 | var allocations = 0 10 | grammar(MyNode): 11 | ident := 12 | (chr(IdentStartChars) and *chr(IdentChars)).save do (match:string)->MyNode: 13 | inc allocations 14 | MyNode(value: match) 15 | idents := 16 | +(present(ident) and ident and *chr(' ')) 17 | 18 | # lets see if the save function is called twice or four times 19 | let r = idents.match("x y") 20 | echo "allocations = ", allocations 21 | -------------------------------------------------------------------------------- /examples/ex_errors.nim: -------------------------------------------------------------------------------- 1 | import glossolalia, strutils 2 | 3 | type 4 | NodeKind = enum 5 | nkInt, nkStr, nkIdent, 6 | nkInfix 7 | Node = object 8 | case k: NodeKind 9 | of nkInt: 10 | i: int 11 | of nkStr .. nkIdent: 12 | s: string 13 | of nkInfix: 14 | sub: seq[Node] 15 | 16 | type ParserError = object of Exception 17 | loc: MatchLocation 18 | proc failParsing [N] (fn: proc(state:InputState):string): Rule[N] = 19 | Rule[N]( 20 | m: proc(input:var InputState): Match[N] = 21 | let exc = (ref ParserError)( 22 | loc: input.loc(input.pos), 23 | msg: fn(input)) 24 | raise exc 25 | ) 26 | 27 | grammar(Node): 28 | 29 | str_lit := 30 | chr('"') and 31 | ((chr('"').absent and str_char).repeat(0).save do (m:string)->Node: 32 | Node(k:nkStr, s:m)) and 33 | chr('"') 34 | 35 | const printableChr = {'\32' .. '\126'} 36 | str_char := 37 | chr('\\') and chr('"') or 38 | chr(printableChr) or 39 | (failParsing[Node]() do (input:InputState) -> string: 40 | "Invalid character in string: \\x$#" % toHex(input.currentChar.ord, 2) 41 | ) 42 | 43 | try: 44 | echo str_lit.match("\"he\Lllo\"") 45 | except ParserError: 46 | let c = (ref ParserError)(getCurrentException()) 47 | echo c.msg 48 | echo " at ", c.loc 49 | 50 | -------------------------------------------------------------------------------- /examples/node_location_test.nim: -------------------------------------------------------------------------------- 1 | import glossolalia 2 | 3 | type Node = object 4 | s: string 5 | locA,locB: MatchLocation 6 | 7 | grammar(Node): 8 | 9 | proc dbgNode [N] (r:Rule[N]): Rule[N] = 10 | Rule[N]( 11 | m: proc(input:var InputState): Match[N] = 12 | result = r.m(input) 13 | if result.kind == mNodes: 14 | for n in result.nodes: 15 | echo n 16 | ) 17 | 18 | proc saveNode (match:string; locA,locB:MatchLocation): Node = 19 | Node(s: match, locA: locA, locB: locB) 20 | 21 | whitespace := 22 | +(str("\r\L") or chr({'\L',' ','\t'})) 23 | 24 | test_rule := 25 | ?whitespace and 26 | chr('A').repeat(0,3).save(saveNode).dbgNode and 27 | ?whitespace and 28 | chr('B').repeat(0,3).save(saveNode).dbgNode and 29 | ?whitespace and 30 | chr('C').repeat(0,3).save(saveNode).dbgNode and 31 | ?whitespace 32 | 33 | let tests = [ 34 | ("AAABBBCCC", [ 35 | ((0,0),(0,2)), 36 | ((0,3),(0,5)), 37 | ((0,6),(0,8))]), 38 | ("AAA\LBBB\LCCC", [ 39 | ((0,0),(0,2)), 40 | ((1,0),(1,2)), 41 | ((2,0),(2,2))]), 42 | ("\LAAA\LBBB\LCCC\L", [ 43 | ((1,0),(1,2)), 44 | ((2,0),(2,2)), 45 | ((3,0),(3,2))]), 46 | ("\r\LAAA\r\LBBB\r\LCCC\r\L", [ 47 | ((1,0),(1,2)), 48 | ((2,0),(2,2)), 49 | ((3,0),(3,2))]) 50 | ] 51 | for test in tests: 52 | let (str, indices) = test 53 | let m = test_rule.match(str) 54 | do_assert m.kind == mNodes 55 | 56 | for idx, iset in indices: 57 | let n = m.nodes[idx] 58 | let locA = n.locA 59 | let locB = n.locB 60 | do_assert iset[0] == (locA.line,locA.col), "failed locA test $# for #$# ($#)".format(iset[0], idx, locA) 61 | do_assert iset[1] == (locB.line,locB.col), "failed locB test $# for #$# ($#)".format(iset[1], idx, locB) 62 | echo "_______" 63 | 64 | -------------------------------------------------------------------------------- /glossolalia.babel: -------------------------------------------------------------------------------- 1 | [Package] 2 | name: "glossolalia" 3 | version: "1.0" 4 | author: "fowl" 5 | description: "A DSL to quickly write parsers" 6 | license: "CC0" 7 | 8 | srcDir: "src" 9 | 10 | [Deps] 11 | requires: "nimrod > 0.9.2, fowltek >= 0.9.3" 12 | -------------------------------------------------------------------------------- /src/glossolalia.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## γλωσσολαλία 3 | ## 4 | ## Revision 4 5 | ## Added string representation of rules (`$` operator) 6 | ## save()d nodes are prefixed with their address (0xffff...) and their rule will 7 | ## only be shown once to prevent recursion. 8 | ## Optimization: while looking ahead, do not call save() callbacks 9 | ## this will prevent allocations for patterns under present() and absent() 10 | ## Optimization: capture position and length instead of allocating 11 | ## new strings. Now, allocations only happen on a save() or at 12 | ## the end, if no nodes are consumed. 13 | ## `&` and `|` changed to `and` and `or`, the precedence works better 14 | ## 15 | ## Revision 3 16 | ## Rules are generic, they match to a generic AST node. 17 | ## Use `save(rule) do (match: string|seq[N]) -> N: MyNode(...)` 18 | ## to save a capture 19 | ## 20 | ## Revision 2 21 | ## Rules match to build a JSON tree. This ends up not working well. 22 | ## 23 | ## Revision 1 24 | ## Rules return strings. 25 | ## Operators implemented: 26 | ## combination `&`, `|`, `*`, `+`, `?` 27 | ## Basic matchers 28 | ## str(), chr() 29 | ## 30 | import 31 | strutils, future 32 | # fowltek/maybe_t 33 | export 34 | strutils, future 35 | # maybe_t 36 | 37 | type 38 | InputState* = object 39 | str*: string 40 | len*,pos*: int 41 | lookingAhead*: bool 42 | newlines*: seq[int] 43 | 44 | MatchKind* = enum mNope, mUnrefined, mString, mNodes 45 | Match*[N] = object 46 | case kind*: MatchKind 47 | of mNope: 48 | nil 49 | of mUnrefined: 50 | pos*, len*: int 51 | of mString: 52 | str*: string 53 | of mNodes: 54 | nodes*: seq[N] 55 | 56 | RuleToStr* [N] = proc(r: Rule[N]; uniq: var seq[uint]): string 57 | Rule* [N] = ref RuleObj[N] 58 | RuleObj* [N] = object 59 | m*: proc(input:var InputState): Match[N] 60 | to_string*: proc(r: Rule[N]; uniq: var seq[uint]): string 61 | 62 | {.deprecated: [ 63 | TMatch: Match, 64 | TMatchKind: MatchKind, 65 | TInput: InputState, 66 | TMatchResult: Match 67 | ]} 68 | 69 | converter toBool* (some: Match): bool = 70 | some.kind != mNope 71 | 72 | template matchf (body:stmt):expr{.immediate,dirty.}= 73 | (proc(input: var InputState): Match[N] = 74 | body) 75 | 76 | proc toHex (r:Rule): string = 77 | "0x" & toHex(cast[uint](r).BiggestInt, sizeof(pointer)*2) 78 | 79 | from unsigned import `==` 80 | export unsigned.`==` 81 | 82 | template tostrf (body:stmt):expr{.immediate,dirty.} = 83 | (proc(r:Rule[N]; uniq: var seq[uint]): string = 84 | if cast[uint](r) in uniq: return toHex(r) 85 | body) 86 | 87 | proc toStr* [N] (r: Rule[N]; uniq: var seq[uint]): string = 88 | if r.to_string.isNil: return "???" 89 | return r.to_string(r, uniq) 90 | 91 | proc `$`* [N] (r: Rule[N]): string = 92 | var uniq = newSeq[uint]() 93 | result = r.toStr(uniq) 94 | 95 | proc currentChar* (I:InputState):char = I.str[I.pos] 96 | 97 | # template chrMatcher (N, chars): expr {.immediate.} = 98 | # (proc (input: var InputState): Match[N] = 99 | # if input.currentChar in chars: 100 | # result = Match[N]( 101 | # kind: mUnrefined, 102 | # pos: input.pos, 103 | # len: 1 104 | # ) 105 | # input.pos.inc) 106 | 107 | const printableAscii = {'\32' .. '\126'} 108 | template printableChar (c:char): string = 109 | (if c in printableAscii: $c else: "\\x"& toHex(c.BiggestInt, 2)) 110 | 111 | proc named* [N] (r: Rule[N]; name: string not nil): Rule[N] = 112 | # attaches a name to a rule, useful for nicer rule-to-string? 113 | # (tentative) 114 | Rule[N]( 115 | to_string: (tostrf do: result = name), 116 | m: (matchf do: result = r.m(input)) 117 | ) 118 | 119 | proc charMatcher* [N] (chrs: set[char]): Rule[N] = 120 | Rule[N]( 121 | to_string: (tostrf do: 122 | if chrs.card == 1: 123 | result = "'" 124 | for c in chrs: 125 | result.add printableChar(c) 126 | result.add '\'' 127 | return 128 | #im not proud of this 129 | result = "[" 130 | var 131 | cur = '\0' 132 | last = '\0' 133 | hasLast = false 134 | for c in chrs: 135 | if c != succ(cur): 136 | if hasLast: 137 | result.add '-' 138 | result.add printableChar(cur) 139 | result.add printableChar(c) 140 | last = c 141 | cur = c 142 | hasLast = true 143 | if hasLast and last != cur: 144 | result.add '-' 145 | result.add printableChar(cur) 146 | result.add ']' 147 | ), 148 | m: matchf do: 149 | if input.currentChar in chrs: 150 | result = Match[N]( 151 | kind: mUnrefined, 152 | pos: input.pos, 153 | len: 1 154 | ) 155 | input.pos.inc 156 | ) 157 | 158 | proc charMatcher* [N] (chrs: varargs[char]): Rule[N] = 159 | let chrs = @chrs 160 | return Rule[N]( 161 | to_string: (tostrf do: 162 | if chrs.len == 1: 163 | result = "'" 164 | result.add printableChar(chrs[0]) 165 | result.add '\'' 166 | return 167 | 168 | result = "[" 169 | for c in chrs: result.add printableChar(c) 170 | result.add ']'), 171 | m: matchf do: 172 | if input.currentChar in chrs: 173 | result = Match[N]( 174 | kind: mUnrefined, 175 | pos: input.pos, 176 | len: 1 177 | ) 178 | input.pos.inc 179 | ) 180 | 181 | proc strMatcher* [N] (str: string): Rule[N] = 182 | # Matches a string, case sensitive 183 | Rule[N]( 184 | to_string: (tostrf do: 185 | result = "'" 186 | for c in items(str): result.add printableChar(c) 187 | #result.add str 188 | result.add '\''), 189 | m: matchf do: 190 | if input.str.continuesWith(str, input.pos): 191 | result = Match[N]( 192 | kind: mUnrefined, 193 | pos: input.pos, 194 | len: str.len 195 | ) 196 | input.pos.inc str.len 197 | ) 198 | 199 | proc accumulate [N] (matches: varargs[Match[N]]): Match[N] = 200 | # saves positive matches by joining arrays of 201 | # saved AST nodes or concatenating raw strings 202 | assert matches.len > 0 203 | 204 | #try to find saved nodes 205 | var found_nodes = false 206 | for it in matches: 207 | if it.kind == mNodes: 208 | if result.kind != mNodes: 209 | result = Match[N](kind: mNodes, nodes: it.nodes) 210 | found_nodes = true 211 | else: 212 | result.nodes.add it.nodes 213 | if found_nodes: return 214 | 215 | #all strings, add up the captures 216 | result = Match[N](kind: mUnrefined, pos: matches[0].pos) 217 | var high = result.pos + matches[0].len 218 | for i in 1 .. 0: 327 | result = accumulate(results) 328 | else: 329 | result = good_match[N](input,0) 330 | ) 331 | 332 | 333 | proc repeat* [N] (R:Rule[N]; min:int): Rule[N] = 334 | Rule[N]( 335 | to_string: (tostrf do: 336 | result = "(" 337 | result.add toStr(R, uniq) 338 | result.add "){" 339 | result.add($min) 340 | result.add '}'), 341 | m: matchf do: 342 | var matches = 0 343 | let startPos = input.pos 344 | var results: seq[Match[N]] = @[] 345 | 346 | while input.pos < input.len: 347 | if (let match = R.m(input); match): 348 | results.add match 349 | inc matches 350 | continue 351 | break 352 | 353 | if matches < min: 354 | input.pos = startPos 355 | #result = match_fail 356 | else: 357 | if matches > 0: 358 | result = accumulate(results) 359 | else: 360 | result = good_match[N](input,0) 361 | ) 362 | proc `+`* [N] (R:Rule[N]): Rule[N] = R.repeat(1) 363 | proc `*`* [N] (R:Rule[N]): Rule[N] = R.repeat(0) 364 | proc `?`* [N] (R:Rule[N]): Rule[N] = R.repeat(0,1) 365 | 366 | proc join* [N] (r, on: Rule[N]; min,max = 0): Rule[N] = 367 | # Join a rule on another rule in the sequence (r & (on & r).repeat(min,max)) 368 | # `on & r` must repeat `min` times 369 | # `max` may be 0 to match forever 370 | r & (if max > 0: (on & r).repeat(min,max) else: (on & r).repeat(min)) 371 | 372 | proc high_pos* [N] (match: Match[N]): int = match.pos + match.len - 1 373 | 374 | proc save_tostr [N] (rule: Rule[N]): RuleToStr[N] = 375 | return (tostrf do: 376 | uniq.add cast[uint](r) 377 | result = toHex(r) 378 | result.add ":(" 379 | result.add tostr(rule, uniq) 380 | result.add ')') 381 | 382 | proc save* [N] (R:Rule[N]; cb: proc(match:string): N): Rule[N] = 383 | # store a string as an `N` 384 | # use it to catch butterflies! 385 | Rule[N]( 386 | to_string: save_tostr[N](R), 387 | m: matchf do: 388 | result = R.m(input) 389 | if result.kind == mUnrefined and not input.lookingAhead: 390 | result = good_match[N]( 391 | cb(input.str.substr(result.pos, result.high_pos)) 392 | ) 393 | ) 394 | 395 | 396 | 397 | 398 | import macros 399 | macro echoCode (x:varargs[untyped]): stmt = 400 | var call = newCall("echo") 401 | let high = len(x)-1 402 | for i in 0 .. high: 403 | let item = x[i] 404 | let code = repr(item) & ": " 405 | call.add( 406 | quote do: `code`, 407 | quote do: `item` 408 | ) 409 | if i < high: 410 | call.add newLit ", " 411 | 412 | if len(call) > 1: 413 | return call 414 | 415 | 416 | type MatchLocation* = object 417 | line*, col*: int 418 | index*: int 419 | 420 | const CrLfMask = 1 shl <(sizeof(int)*8) 421 | proc binarySearch (a: openArray[int], key: int): int = 422 | ## binary search for `key` in `a`. Returns insertion index. 423 | ## adapted from nim stdlib algorithm module 424 | var b = len(a) 425 | while result < b: 426 | var mid = (result + b) div 2 427 | if (a[mid] and not CrLfMask) < key: result = mid + 1 428 | else: b = mid 429 | if result >= len(a): result = -1 430 | 431 | 432 | proc findLineCol (input:InputState; index:int): MatchLocation = 433 | result.index = index 434 | 435 | if input.newlines.len == 0: 436 | # no newlines edgecase 437 | result.line = 0 438 | result.col = index 439 | return 440 | 441 | var line = input.newlines.binarySearch(index) 442 | # echoCode index, line, input.newlines 443 | 444 | if line == -1: line = len(input.newlines) 445 | var line_index, newline_size: int 446 | if line == 0: 447 | line_index = 0 448 | newline_size = 0 449 | else: 450 | line_index = input.newlines[ N: 525 | result = cb(@[]) 526 | 527 | proc match* [N] (rule:Rule[N]; str:string): Match[N] = 528 | var input = InputState(str: str, pos: 0, len: str.len) 529 | result = rule.m(input) 530 | if result and result.kind == mUnrefined: 531 | let high = result.len+result.pos-1 532 | result = Match[N]( 533 | kind: mString, 534 | str: input.str.substr(result.pos, high) 535 | ) 536 | 537 | 538 | 539 | 540 | 541 | import macros 542 | 543 | 544 | proc `:=`* [N] (a, b: Rule[N]) = 545 | # update rule a, set its matcher to rule b 546 | # you can use this to refer to rules before 547 | # they're initialized. 548 | a.m = b.m 549 | a.to_string = b.to_string 550 | discard """ a.tos = b.tos 551 | a.tos_alt = b.tos_alt """ 552 | proc newRule* [N] (): Rule [N] = 553 | # returns an uninitialized rule. you should give semantics 554 | # with `myrule := chr('G','T',...)` 555 | Rule[N]() 556 | 557 | macro genGrammar(TNode:expr; body:stmt):stmt {.immediate.}= 558 | # accepts a grammar in the form of `rulename := 559 | # 560 | ## digit := 561 | # digits <- repeat(digit, many(chr( {'0' .. '9'} )) 562 | # 563 | # you can refer to a rule here before it is defined 564 | # 565 | assert body.kind == nnkStmtList 566 | result = newStmtList() 567 | let varDecl = newNimNode(nnkVarSection) 568 | result.add varDecl 569 | 570 | for i in 0 .. < len(body): 571 | let s = body[i] 572 | if s.kind == nnkInfix and $(s[0]) in [":=","<-"]: 573 | varDecl.add newIdentDefs( 574 | s[1], 575 | newEmptyNode(), 576 | newNimNode(nnkCall).add( 577 | newNimNode(nnkBracketExpr).add(ident"newRule", TNode))) 578 | result.add s[1].infix(":=", s[2]) 579 | else: 580 | result.add s 581 | 582 | when defined(Debug): 583 | echo repr(result) 584 | 585 | 586 | #proc save (R:Rule[TNode]; cb:proc(match:string):TNode): Rule[TNode] = saveMatcher[TNode](cb) 587 | #template save (a,b): expr = saveMatcher[TNode](a,b) 588 | #template repeat(r,min): expr = repeatMatcher[TNode](r, min) 589 | #proc repeat (a,b): expr = repeatMatcher[TNode](a,b) 590 | #proc repeat (a,b,c): Rule[TNode] = repeatMatcher[TNode](a,b,c) 591 | template grammar* (TNode: expr; body: stmt): stmt {.immediate.} = 592 | proc chr (chars: varargs[char]): Rule[TNode] = charMatcher[TNode](chars) 593 | proc chr (chars: set[char]): Rule[TNode] = charMatcher[TNode](chars) 594 | proc str (str: string): Rule[TNode] = strMatcher[TNode](str) 595 | 596 | proc stri (s: string): Rule[TNode] = 597 | # case insensitive str 598 | # probably more efficient to use a regex rule here 599 | # example input: "a_b" 600 | # output: chr('a','A') and chr('_') and chr('b','B') 601 | 602 | template m(c): expr = 603 | (if c in strutils.Letters: charMatcher[TNode](c.toLower, c.toUpper) else: charMatcher[TNode](c)) 604 | result = m(s[0]) 605 | for i in 1 .. high(s): 606 | result = result and m(s[i]) 607 | 608 | # template accum (x): stmt = 609 | # if result.isNil: 610 | # result = x 611 | # else: 612 | # result = result and x 613 | 614 | # for character in s.items: 615 | # if character in strutils.Letters: 616 | # accum charMatcher[TNode](character.toLower, character.toUpper) 617 | # else: 618 | # accum charMatcher[TNode](character) 619 | 620 | proc keyword (s: string): Rule[TNode] = 621 | str(s) and charMatcher[TNode](strutils.IdentChars).absent 622 | 623 | genGrammar(TNode, body) 624 | 625 | 626 | 627 | 628 | 629 | 630 | when isMainModule: 631 | 632 | template echoCode (expression):stmt = 633 | echo astToStr(Expression), ": ", expression 634 | 635 | when true: 636 | block: 637 | grammar(int): 638 | space := chr(' ','\t','\L') 639 | digit := chr({'0'..'9'}) 640 | digits := +digit 641 | number := digits.save do (str: string) -> int: parseInt(str) 642 | numbers := number and *(space and number) 643 | 644 | echoCode digits.match("12311234") 645 | echoCode number.match("9001") 646 | echoCode numbers.match("99 44 11 6") 647 | echoCode stri("ballx").match("BAllX") 648 | 649 | -------------------------------------------------------------------------------- /src/glossolalia_v1.nim: -------------------------------------------------------------------------------- 1 | import fowltek/maybe_t, strutils 2 | 3 | type 4 | TMatcher* = proc(input:string;start:var int): TMaybe[string] 5 | Rule* = ref object 6 | matcher*: TMatcher 7 | 8 | proc newRule* (matcher: TMatcher): Rule = 9 | Rule(matcher: matcher) 10 | 11 | proc `&`* (a,b: Rule): Rule = 12 | # match `a` followed by `b` 13 | newRule(proc(input:string;start:var int): TMaybe[string] = 14 | let ZZ = start 15 | 16 | let x = a.matcher(input,start) 17 | if x.has: 18 | let y = b.matcher(input,start) 19 | if y.has: 20 | return just(x.val & y.val) 21 | 22 | start = ZZ 23 | ) 24 | 25 | proc `|`* (a,b: Rule): Rule = 26 | # match `a` or `b` 27 | newRule(proc(input:string; start:var int): TMaybe[string] = 28 | let ZZ = start 29 | 30 | let x = a.matcher(input,start) 31 | if x.has: 32 | return just(x.val) 33 | 34 | start = ZZ 35 | let y = b.matcher(input,start) 36 | if y.has: 37 | return just(y.val) 38 | 39 | start = ZZ 40 | ) 41 | 42 | 43 | proc str* (s:string): Rule = 44 | # match a string 45 | newRule(proc(input:string; start:var int): TMaybe[string] = 46 | if input.continuesWith(s, start): 47 | result = just(s) 48 | inc start, len(s) 49 | ) 50 | 51 | 52 | proc chr* (chrs: varargs[char]): Rule = 53 | # match one of a set of characters 54 | let chrs = @chrs 55 | newRule(proc(input:string;start:var int): TMaybe[string] = 56 | if input[start] in chrs: 57 | result = just($ input[start]) 58 | inc start 59 | ) 60 | proc chr* (chr: set[char]): Rule = 61 | # match one of a set of characters 62 | newRule(proc(input: string; start: var int): TMaybe[string] = 63 | if input[start] in chr: 64 | result = just($ input[start]) 65 | inc start 66 | ) 67 | 68 | 69 | 70 | 71 | proc repeat* (R:Rule; min: int): Rule = 72 | # Repeat a minimum `min` times 73 | newRule(proc(input:string; start:var int): TMaybe[string] = 74 | var i = 0 75 | let ZZ = start 76 | result.val = "" 77 | while true: 78 | let ret = r.matcher(input,start) 79 | if not ret.has: break 80 | result.val.add ret.val 81 | inc i 82 | if start > input.high: break 83 | 84 | if i >= min: 85 | result.has = true 86 | else: 87 | start = ZZ 88 | ) 89 | 90 | template reset_return : stmt = 91 | result.has = false 92 | result.val = "" 93 | return 94 | 95 | proc repeat* (R:Rule; slc: TSlice[int]): Rule = 96 | # Repeat a rule min..max times 97 | newRule(proc (input:string; start:var int): TMaybe[string] = 98 | let ZZ = start 99 | var i = 0 100 | 101 | result.val = "" 102 | 103 | while i <= slc.b: 104 | if start > input.high: break 105 | if (let ret = r.matcher(input,start); ret.has): 106 | result.val.add ret.val 107 | else: break 108 | inc i 109 | 110 | if i >= slc.a: 111 | result.has = true 112 | else: 113 | start = ZZ 114 | 115 | ) 116 | 117 | proc `+`* (rule: Rule): Rule = 118 | # match 1 or more 119 | result = rule.repeat(1) 120 | discard """ newRule(proc (input:string; start:var int): TMaybe[string] = 121 | result.val = "" 122 | while true: 123 | let (has,str) = rule.matcher(input,start) 124 | if has: 125 | result.has = true 126 | result.val.add str 127 | if not has or start > input.high: 128 | break 129 | ) """ 130 | proc many* (R:Rule): Rule {.inline.} = 131 | # match 1 or more 132 | +r 133 | 134 | proc `?`* (R:Rule): Rule = 135 | # consume 1 or 0 (always pass) 136 | newRule(proc(input:string; start:var int):TMaybe[string]= 137 | result = just(r.matcher(input,start).val) 138 | if result.val.isNil: result.val = "" 139 | ) 140 | proc maybe* (R:Rule): Rule {.inline.}= 141 | # consume 1 or 0 (always pass) 142 | ?r 143 | 144 | proc `*`* (R:Rule): Rule {.inline.} = 145 | # consume 0 or many (always pass) 146 | # this is just `?many(R)` 147 | #?many(R) 148 | repeat(R, 0) 149 | 150 | proc present* (R:Rule): Rule = 151 | # look ahead, returns true if the pattern matches but does 152 | # not consume 153 | newRule(proc(input:string; start:var int): TMaybe[string] = 154 | let x = start 155 | if r.matcher(input,start).has: 156 | result = just("") 157 | start = x 158 | ) 159 | proc absent* (R:Rule): Rule = 160 | # look ahead only, returns true if the pattern was NOT found 161 | newRule(proc(input:string; start:var int): TMaybe[string] = 162 | if not present(R).matcher(input,start).has: 163 | result = just("") 164 | ) 165 | proc `!`* (R: Rule): Rule {.inline.} = 166 | absent(R) 167 | 168 | proc tag (R:Rule; tag:string):Rule = 169 | # TODO 170 | # this will be used to store a match in the resulting tree 171 | # the tree will probably be json because its easy to work with 172 | Rule( 173 | matcher:proc(input:string;start:var int):TMaybe[string] = 174 | result = R.matcher(input,start) 175 | if result.has: 176 | #echo "tag discarded: ", tag, ": ", result.val 177 | ) 178 | 179 | when defined(HelperRules) or isMainModule: 180 | # Some helper rules 181 | const 182 | printableChars = {'\x20' .. '\x7E'} 183 | alphaChars = {'A'..'Z', 'a'..'z'} 184 | digits = {'0'..'9'} 185 | identChars = {'A'..'Z','a'..'z','0'..'9','_'} 186 | allChars = {'\x00' .. '\xFF'} 187 | 188 | proc join* (r, on: Rule; min = 0; max = -1): Rule = 189 | # Join a rule on another rule in the sequence 190 | # `on & r` must repeat `min` times 191 | # `max` may be -1 192 | if max > -1: r & (on & r).repeat(min .. max) 193 | else: r & (on & r).repeat(min) 194 | 195 | proc comma_list* (r: Rule; min = 0; max = -1): Rule = 196 | join(r, chr(','), min,max) 197 | 198 | proc wrapped_rule (open,close:Rule): proc(inner:Rule): Rule = 199 | # Creates a rule that matches like `open & inner & close` 200 | return proc(inner:Rule): Rule = 201 | when true: 202 | let 203 | open = open 204 | close = close 205 | Rule( 206 | matcher: proc(input:string; start:var int): TMaybe[string] = 207 | let x = start 208 | if open.matcher(input,start): 209 | result = inner.matcher(input,start) 210 | if result.has: 211 | if close.matcher(input,start): 212 | return 213 | #reset the result because the closing rule didnt match 214 | result.has = false 215 | start = x 216 | ) 217 | let 218 | braces* = wrapped_rule(chr('{'), chr('}')) 219 | parens* = wrapped_rule(chr('('), chr(')')) 220 | brackets* = wrapped_rule(chr('['), chr(']')) 221 | 222 | 223 | 224 | proc add (L:var string; R:TMaybe[string]) = 225 | if r.has: L.add r.val 226 | 227 | 228 | discard """ proc comma_list* (R:Rule; postComma: Rule = nil): Rule = 229 | 230 | newRule(proc(input:string; start:var int):TMaybe[string] = 231 | result.val = "" 232 | while(let (has,s) = r.matcher(input,start); has): 233 | result.has = true 234 | result.val.add s 235 | if input[start] != ',': 236 | break 237 | result.val.add ',' 238 | inc start 239 | if not PostComma.isNil: 240 | result.val.add postComma.matcher(input,start) 241 | ) 242 | """ 243 | 244 | 245 | proc keyword* (s: string): Rule = 246 | str(s) & absent(chr(identChars)) 247 | 248 | proc stri* (s: string): Rule = 249 | # case insensitive str 250 | # probably more efficient to use a regex rule here 251 | template accum (x): stmt = 252 | if result.isNil: 253 | result = x 254 | else: 255 | result = result & x 256 | 257 | for character in s.items: 258 | if character in alphaChars: 259 | accum chr({character.toLower, character.toUpper}) 260 | else: 261 | accum chr(character) 262 | 263 | 264 | 265 | proc newRule*(): Rule = Rule() 266 | proc `:=`* (a,b:Rule) = 267 | # update rule a, set its matcher to rule b 268 | # you can use this to refer to rules before 269 | # they're initialized. 270 | a.matcher = b.matcher 271 | 272 | 273 | import macros 274 | macro grammar* (name:expr; body:stmt):stmt {.immediate.} = 275 | # accepts a list of statements like 276 | # 277 | # hello := "Hello" 278 | # digits <- many(chr( {'0' .. '9'} )) 279 | # 280 | # you can refer to a rule here before it is defined 281 | # 282 | assert body.kind == nnkStmtList 283 | result = newStmtList() 284 | let varDecl = newNimNode(nnkVarSection) 285 | result.add varDecl 286 | 287 | for i in 0 .. < len(body): 288 | let s = body[i] 289 | if s.kind == nnkInfix and $(s[0]) in [":=","<-"]: 290 | varDecl.add newIdentDefs(s[1], newEmptyNode(), newCall("newRule")) 291 | result.add s[1].infix(":=", s[2]) 292 | elif s.kind == nnkAsgn: 293 | varDecl.add newIdentDefs(s[0], newEmptyNode(), newCall("newRule")) 294 | result.add s[0].infix(":=", s[1]) 295 | else: 296 | result.add s 297 | 298 | when defined(Debug): 299 | echo repr(result) 300 | 301 | 302 | proc match* (R:Rule; S:String): TMaybe[string] = 303 | var i = 0 304 | return r.matcher(s,i) 305 | 306 | when isMainModule: 307 | 308 | const 309 | spaceChars = {' ','\t'} 310 | 311 | 312 | grammar(my_grammar): 313 | #so meta 314 | 315 | program = 316 | +(space_newlines | rule_decl) & ?space_newlines 317 | 318 | rule_decl = 319 | name & ?space & 320 | (str("=") | str(":=") | str("<-")) & 321 | ?space & mainRule 322 | 323 | nl = chr({'\L'}) 324 | space = 325 | +chr(spaceChars) 326 | space_newlines = 327 | +( 328 | space | nl | 329 | (chr('#') & +(absent(chr({'\x00','\L'})) & chr(allChars))) 330 | ) 331 | 332 | name = chr({'A'..'Z','a'..'z','_'}) & *chr({'A'..'Z','a'..'z','_','0'..'9'}) 333 | 334 | mainRule = join(prefixRule, ?space & bin_op & ?space_newlines, 0) 335 | 336 | prefixChar = chr({'?','!','+','*'}) 337 | bin_op = chr({'&','|'}) 338 | 339 | prefixRule = ?(prefixChar & *(space & prefixChar)) & ?space & baseRule 340 | 341 | func_chr = 342 | keyword("chr") & 343 | parens( 344 | (?space & nim_char_set & ?space) | 345 | comma_list(?space & nim_char_lit & ?space) 346 | ) 347 | 348 | baseRule = 349 | ( keyword("str") & parens(?space & str_lit & ?space) ) | 350 | func_chr | 351 | 352 | ( (keyword("comma") )) | 353 | ( parens(mainRule) ) 354 | str_lit = 355 | chr('\"') & 356 | (repeat( 357 | ( chr('\\')& ( 358 | chr('"') | 359 | (chr('x') & repeat(chr({'A'..'F','a'..'f','0'..'9'}), 2..2)) 360 | )) | 361 | chr(allChars-{'"'}), 0 362 | )) & 363 | chr('\"') 364 | nim_char_lit = 365 | chr('\'') & 366 | ( 367 | (chr('\\') & 368 | ( chr({'\'','L'}) | 369 | chr('x') & repeat(chr({'A'..'F','a'..'f','0'..'9'}), 2..2) 370 | ) 371 | ) | 372 | chr(allChars-{'\''}) 373 | ) & 374 | chr('\'') 375 | nim_char_set = 376 | braces(comma_list(?space & nim_char_lit & ?(?space & str("..") & ?space & nim_char_lit), 0)) 377 | 378 | comma = chr(',') 379 | 380 | let r = program #(?space & mainrule & ?space) 381 | 382 | echo r.match("""rule1 = ?chr('x')""") 383 | 384 | when defined(repl): 385 | import rdstdin,tables 386 | block: 387 | 388 | const helptext = """ 389 | Commands: 390 | def-rules 391 | show-rules 392 | """ 393 | 394 | echo helptext 395 | var rules = initTable[string,Rule]() 396 | 397 | 398 | 399 | var input = "" 400 | var line: string 401 | while readlineFromStdin("> ", line): 402 | 403 | if (line.len == 0 and input.len > 0) or keyword("end").match(line): 404 | let m = program.match(input) 405 | echo m 406 | input.setLen 0 407 | else: 408 | input.add line 409 | input.add '\L' 410 | 411 | #quit 1 412 | 413 | # minitest thing 414 | template test (name:expr; body:stmt): stmt {.immediate.} = 415 | block thisBlock: 416 | template check (cond): stmt = 417 | if not cond: 418 | echo name, " [Failed] `", astToStr(cond), '`' 419 | break thisBlock 420 | body 421 | echo name, " [Passed]" 422 | template testAll (name:expr; body:stmt): stmt{.immediate.}= 423 | # like test but checks can fail 424 | block fowl_is_cool: 425 | template check(cond): stmt = 426 | when not definedInScope(failed): 427 | var failed{.inject.} = newSeq[string](0) 428 | if not cond: 429 | failed.add astToStr(cond) 430 | body 431 | if failed.len > 0: 432 | if failed.len == 1: 433 | echo name, " [", failed.len, " Failed] `", failed[0], '`' 434 | else: 435 | echo name, " [", failed.len, " Failed]" 436 | for expression in failed: 437 | echo " `", expression, '`' 438 | failed.setLen 0 439 | else: 440 | echo name, " [Passed]" 441 | 442 | 443 | 444 | block: 445 | const digit = {'0'..'9'} 446 | grammar(sendy): 447 | ws_soft = +chr(' ','\t') 448 | ws = +chr(' ','\t','\L') 449 | 450 | 451 | anyChar = chr({'\x00'..'\xFF'}) 452 | operator = +chr({'!','@','~','$','%','^','&','*','-','+','\\','/','<','>','?','.'}) 453 | assignment_op = +operator & chr('=') 454 | 455 | literal = str_lit | float_lit | int_lit 456 | float_lit = ?chr({'-','+'}) & +chr(digit) & chr('.') & +chr(digit) 457 | int_lit = ?chr({'-','+'}) & +chr(digit) 458 | str_lit = 459 | chr('"') & 460 | *('"'.chr.absent & anyChar) & 461 | chr('"') 462 | 463 | ident = chr({'A'..'Z','a'..'z','_'}) & *chr({'A'..'Z','a'..'z','_','-','0'..'9'}) 464 | message = 465 | ident & 466 | ?parens(comma_list(?ws & expression & ?ws, 0) | ?ws) 467 | 468 | base_expr = literal | parens(?ws & expression & ?ws) 469 | postfix_expr = 470 | (base_expr & *(ws_soft & message)) | 471 | message.join(ws_soft) 472 | prefix_expr = ?(operator & ?ws_soft) & postfix_expr 473 | binary_expr = prefix_expr & ?many(?ws_soft & operator & ?ws & prefix_expr) 474 | expression = binary_expr 475 | 476 | testall "Idents": 477 | check ident.match("x") 478 | check ident.match("print-line") 479 | 480 | testall "expressions": 481 | check expression.match "1 + 2" 482 | check expression.match "-1 abs" 483 | check expression.match("x foo(42)") 484 | check expression.match("""stdout print-line("Hello")""") 485 | check expression.match("actor message(foo, 1) linked-message(1.2)") 486 | 487 | quit 0 488 | 489 | 490 | # example usage for a weird js-like language 491 | # there is a test suite below 492 | 493 | 494 | block: 495 | grammar(js_like): 496 | 497 | ws <- 498 | many(chr({' ', '\t', '\L'})) 499 | ws_soft <- 500 | many(chr({' ', '\t'})) 501 | 502 | 503 | ident <- 504 | chr({'A'..'Z', 'a'..'z', '_'}) & *chr(identChars) 505 | 506 | int_lit <- 507 | +chr(digits) 508 | str_lit <- 509 | chr('"') & 510 | *(absent(chr('"')) & chr({char.low..char.high})) & 511 | chr('"') 512 | 513 | operator <- +chr({'!','@','~','$','%','^','&','*','-','+','\\','/','<','>','?','='}) 514 | 515 | 516 | base_expr <- 517 | int_lit | ident | str_lit | parens(?ws & expression & ?ws) 518 | postfix_expr <- 519 | base_expr & 520 | ?many( 521 | ( 522 | chr('.') & 523 | ident 524 | ) | 525 | ( 526 | parens(?ws & ?(commaList(?ws & expression & ?ws_soft,0) & ?ws)) 527 | ) | 528 | ( 529 | brackets(?ws & commaList(?ws & expression & ?ws_soft, 0).maybe) 530 | ) 531 | ) 532 | prefix_expr <- 533 | ?(operator & ?ws) & postfix_expr 534 | binary_expr <- 535 | prefix_expr & ?many(?ws_soft & operator & ?ws & prefix_expr) 536 | 537 | expression <- 538 | binary_expr 539 | 540 | braces_statements <- 541 | braces(?ws & statement_list & ?ws) 542 | statements_or_single <- 543 | (?ws & braces_statements) | 544 | (?ws & statement) 545 | 546 | assignment <- 547 | ident & ?ws_soft & 548 | chr('=') & ?ws & 549 | expression 550 | 551 | if_statement <- 552 | ( 553 | keyword("if") & ?ws & expression & 554 | statements_or_single 555 | ) & 556 | 557 | *( 558 | ?ws & keyword("elseif") & ws & expression & 559 | statements_or_single 560 | ) & 561 | 562 | ?( 563 | ?ws & keyword("else") & 564 | statements_or_single 565 | ) 566 | 567 | while_statement <- 568 | keyword("while") & ws & expression & statements_or_single 569 | 570 | return_statement <- 571 | keyword("return") & ws & expression 572 | 573 | statement <- 574 | return_statement | 575 | if_statement | 576 | while_statement | 577 | assignment | 578 | expression 579 | 580 | statement_list <- 581 | join(statement, ?ws_soft & chr({'\L',';'}) & ?ws) 582 | 583 | 584 | func_def <- 585 | keyword("fn") & ws & 586 | ident.tag("name") & ?ws & 587 | parens(comma_list(?ws & ident & ?ws) | ?ws) & ?ws & 588 | braces(?ws & statement_list & ?ws) 589 | 590 | toplevel_stmt <- 591 | func_def 592 | 593 | program <- 594 | ?ws & join(toplevel_stmt, ?ws, 0) & ?ws 595 | 596 | 597 | 598 | proc `=~` (S:String; L:Rule): bool = 599 | var i = 0 600 | result = l.matcher(s, i) 601 | if result and i < s.len: 602 | result = false 603 | 604 | 605 | 606 | 607 | 608 | testAll "stri()": 609 | check stri("FoO").match("foo") 610 | check stri("FoO").match("FOO") 611 | 612 | testAll "Numbers": 613 | check match(int_lit, "1") 614 | check match(int_lit, "6969") 615 | check match(expression, "911") 616 | test "Strings": 617 | check "\"\"" =~ str_lit 618 | check "\"hi\"" =~ str_lit 619 | check(not("\"hi" =~ str_lit)) 620 | 621 | test "Identifiers": 622 | check match(ident, "u") 623 | check match(ident, "x_y") 624 | check(not match(ident, "3e")) 625 | 626 | testAll "Whitespace": 627 | check match(?ws, " ") 628 | check match(?ws, "") 629 | check match(?ws_soft, " ") 630 | check match(?ws_soft, "") 631 | check(not match(ws_soft, "\L")) 632 | 633 | 634 | testAll "Assignment": 635 | check "y=2" =~ assignment 636 | check "x =\L 4" =~ assignment 637 | 638 | testAll "Expressions": 639 | check "42" =~ expression 640 | check "x + 1" =~ expression 641 | check "1 + (2-3)" =~ expression 642 | check "true" =~ expression 643 | check "hello(1)" =~ expression 644 | check "sup()" =~ expression 645 | 646 | testAll "Statements": 647 | check "if x {\L 42\L }" =~ statement 648 | check "if true {1} elseif false {0}" =~ statement 649 | check "if true {1} elseif false {0} else {3}" =~ statement 650 | check "42+1" =~ statements_or_single 651 | check "while false {print(\"Hello!\"); break}" =~ statement 652 | 653 | testAll "Function defintion": 654 | check("fn no_args() { return 1 }\L" =~ func_def & ?ws) 655 | check("fn two_args(a,b) {1+2}" =~ func_def) 656 | 657 | testAll "Program": 658 | check("""fn main() { 659 | print("Hello") 660 | } 661 | fn fib(x) { 662 | if x < 2 { return x } 663 | return fib(x-1)+fib(x-2) 664 | } 665 | """ =~ program) 666 | -------------------------------------------------------------------------------- /src/glossolalia_v2.nim: -------------------------------------------------------------------------------- 1 | import 2 | json,strutils, 3 | fowltek.maybe_t 4 | 5 | 6 | type 7 | TInput = object 8 | len, pos: int 9 | str: string 10 | 11 | TMatchKind* = enum 12 | mUnrefined, mJson 13 | TPositiveMatch = object 14 | case kind*: TMatchKind 15 | of mUnrefined: str*:string 16 | of mJSON: j*: PJsonNode 17 | TMatchResult = TMaybe[TPositiveMatch] 18 | TMatcher* = proc(input: var TInput): TMatchResult 19 | 20 | Rule* = ref object 21 | matcher: TMatcher 22 | 23 | proc newRule (m:TMatcher): Rule = 24 | Rule(matcher:m) 25 | 26 | proc `$`* (R: TPositiveMatch): string = 27 | case r.kind 28 | of mUnrefined: 29 | result = r.str 30 | of mJson: 31 | result = $r.j 32 | proc match* (R: Rule; str: string): TMatchResult = 33 | var input = TInput(len: str.len, str: str, pos: 0) 34 | result = r.matcher(input) 35 | 36 | template matchf(body:stmt):expr {.immediate.} = 37 | (proc(input: var TInput): TMatchResult = 38 | body) 39 | 40 | proc mk_unref (s:string): TMatchResult = 41 | just(TPositiveMatch(kind: mUnrefined, str: s)) 42 | proc mk_j (j:PJsonNode): TMatchResult = 43 | just(TPositiveMatch(kind: mJson, j: j)) 44 | 45 | proc isArray (r:TMatchResult): bool = 46 | r.has and r.val.kind == mJson and r.val.j.kind == jArray 47 | 48 | proc high* (i: TInput): int = i.str.high 49 | 50 | let matchFail = Nothing[TPositiveMatch]() 51 | 52 | 53 | template any* (iter, name, cond: expr): expr {.immediate.}= 54 | var res{.gensym.} = false 55 | for name in iter: 56 | if cond: 57 | res = true 58 | break 59 | res 60 | 61 | proc merge* (J1, J2: PJsonNode) = 62 | if j1.kind == jObject and j2.kind == jObject: 63 | for key,val in items(j2.fields): 64 | j1[key] = val 65 | 66 | template testFeature (name;body:stmt):stmt{.immediate.}= 67 | block: 68 | when not defined(failed_tests): 69 | var failed_tests{.inject.}: seq[string] = @[] 70 | 71 | template check (xpr): stmt = 72 | discard """ when not defined(failed_tests): 73 | var failed_tests{.inject.}: seq[string] = @[] """ 74 | if not xpr: 75 | failed_tests.add astToStr(xpr) 76 | 77 | body 78 | if failed_tests.len > 0: 79 | echo name, " [", failed_tests.len, " Failures]" 80 | for f in failed_tests: 81 | echo " ", f 82 | failed_tests.setLen 0 83 | else: 84 | echo name, " [Passed]" 85 | 86 | 87 | proc accumJson (results:varargs[TPositiveMatch]): TMatchResult = 88 | # discard any unrefineds 89 | var res = newSeq[TPositiveMatch](results.len) 90 | res.setLen 0 91 | for it in results: 92 | if it.kind == mUnrefined: continue 93 | res.add it 94 | assert res.len > 0 95 | 96 | if res.len == 1: 97 | result = just(res[0]) 98 | return 99 | 100 | # try to merge json objects 101 | var i = 0 102 | when defined(Debug): 103 | echo "res.len: ", res.len 104 | 105 | 106 | var iters = 0 107 | 108 | while i < high(res): 109 | 110 | echo iters, " (", i,")" 111 | 112 | if iters > 5: break 113 | inc iters 114 | 115 | if res[i].j.kind == jObject and res[i+1].j.kind == jObject: 116 | 117 | type TKV = tuple[key:string,val:PJsonNode] 118 | proc keys (J: PJsonNode): seq[string] = 119 | j.fields.map(proc(kv:TKV):string = kv.key) 120 | 121 | let 122 | r1 = res[i].j 123 | r2 = res[i+1].j 124 | r2_keys = r2.keys 125 | 126 | when defined(Debug): 127 | echo "Comparing $# and $#".format(r1,r2) 128 | 129 | proc filter (item:string): bool = 130 | result = item in r2_keys 131 | if result and defined(Debug): 132 | echo "Clash: ", item 133 | 134 | if any(r1.keys, it, filter(it)): 135 | when defined(debug): 136 | echo "Cannot join them." 137 | inc i, 1 138 | continue 139 | # merge and delete r2 140 | r1.merge r2 141 | res.delete i+1 142 | inc i, 1 143 | 144 | else: 145 | inc i 146 | 147 | 148 | if res.len == 1: 149 | return just(res[0]) 150 | else: 151 | # multiple results, return it as a jarray 152 | var arr = newJarray() 153 | for it in res: 154 | arr.add it.j 155 | 156 | result = mk_j(arr) 157 | 158 | 159 | proc currentChar* (input:TInput): char = input.str[input.pos] 160 | 161 | proc chrMatcher (chars: seq[char]|set[char]): TMatcher = 162 | return (matchf do: 163 | if input.currentChar in chars: 164 | result = mk_unref($ input.currentChar) 165 | input.pos.inc 166 | ) 167 | 168 | proc chr* (chars: varargs[char]): Rule = 169 | newRule(chrMatcher(@chars) ) 170 | proc chr* (chars: set[char]): Rule = 171 | newRule(chrMatcher(chars)) 172 | 173 | proc tag* (R:Rule; name:string): Rule = 174 | newRule( 175 | (matchf do: 176 | result = r.matcher(input) 177 | if result.has: 178 | if result.val.kind == mUnrefined: 179 | result = mk_j(%{ name: %result.val.str }) 180 | else: 181 | result = mk_j(%{ name: result.val.j }) 182 | )) 183 | testFeature "tag()": 184 | check((let r = chr('A').tag("x").match("A"); r.has and r.val.kind == mjson and $r.val.j == "{\"x\": \"A\"}")) 185 | 186 | 187 | proc accumulate (results: varargs[TPositiveMatch]): TMatchResult = 188 | if results.len == 1: return just(results[0]) 189 | 190 | if results.any(it, it.kind == mJson): 191 | result = results.accumJson 192 | else: 193 | # all unrefineds. join them. 194 | result = just(TPositiveMatch(kind: mUnrefined, str: "")) 195 | for it in results: 196 | result.val.str.add it.str 197 | 198 | proc `&` * (A,B: Rule): Rule = 199 | newRule(matchf do: 200 | let zz = input.pos 201 | 202 | let ma = a.matcher(input) 203 | if ma.has: 204 | let mb = b.matcher(input) 205 | if mb.has: 206 | result = accumulate(ma.val, mb.val) 207 | return 208 | 209 | input.pos = zz 210 | ) 211 | testFeature "& sequence": 212 | let rule = chr('A') & chr('x') 213 | check rule.match("Ax") 214 | check(not rule.match("xlkj")) 215 | 216 | let match_rule = rule.tag("match") 217 | check match_rule.match("Ax").val.j["match"].str == "Ax" 218 | 219 | let test5 = (chr('a') & chr('b') & chr('c') & chr('d')).tag("match") 220 | check test5.match("abcd").val.j["match"].str == "abcd" 221 | 222 | 223 | proc `|`* (A,B: Rule): Rule = 224 | newRule(matchf do: 225 | let zz = input.pos 226 | 227 | if (let (has,m) = a.matcher(input); has): 228 | return just(m) 229 | 230 | input.pos = zz 231 | 232 | if (let (has,m) = b.matcher(input); has): 233 | return just(m) 234 | 235 | input.pos = zz 236 | 237 | ) 238 | testFeature "OR sequence": 239 | let rule = chr('a') | chr('b') 240 | check rule.match("a") 241 | check rule.match("b") 242 | check(not rule.match("c")) 243 | 244 | 245 | proc repeat* (R:Rule; min,max:int): Rule = 246 | newRule(matchf do: 247 | var i = 0 248 | let zz = input.pos 249 | var results: seq[TPositiveMatch] = @[] 250 | 251 | while i < max and input.pos < input.len: 252 | if (let (has,res) = r.matcher(input); has): 253 | results.add res 254 | inc i 255 | else: 256 | break 257 | 258 | if i < min: 259 | input.pos = zz 260 | result = match_fail 261 | else: 262 | if i > 0: 263 | result = accumulate(results) 264 | else: 265 | result = mk_unref("") 266 | ) 267 | proc repeat* (R:Rule; min:int): Rule = 268 | newRule(matchf do: 269 | var i = 0 270 | let zz = input.pos 271 | var results: seq[TPositiveMatch] = @[] 272 | 273 | while input.pos < input.len: 274 | if (let (has,res) = r.matcher(input); has): 275 | results.add res 276 | inc i 277 | else: 278 | break 279 | 280 | if i < min: 281 | input.pos = zz 282 | result = match_fail 283 | else: 284 | if i > 0: 285 | result = accumulate(results) 286 | else: 287 | result = mk_unref("") 288 | ) 289 | testFeature "Repeat": 290 | 291 | #echo($chr('x').repeat(1).match("xx")) 292 | #check(not chr('x').repeat(1).match("x").has) 293 | check($chr('x').repeat(1).match("xx") == "xx") 294 | 295 | # repetitions are tagged together 296 | let rule = chr('x').repeat(1).tag("x") 297 | echo rule.match("xx") 298 | check($ chr('x').repeat(1).tag("x").match("xx").val.j == """{"x": "xx"}""") 299 | 300 | # repetitions are merged as an array 301 | block: 302 | #echo chr('x').tag("x").match("xx").val.j 303 | check($chr('x').tag("x").repeat(1).match("xx").val.j == """[{"x": "x"}, {"x": "x"}]""") 304 | 305 | 306 | proc `+`* (R:Rule): Rule = 307 | repeat(R, 1) 308 | proc `*`* (R:Rule): Rule = 309 | repeat(R, 0) 310 | proc `?`* (R:Rule): Rule = 311 | repeat(R, 0,1) 312 | 313 | 314 | block: 315 | const digits = {'0'..'9'} 316 | let 317 | int_lit = chr(digits).repeat(1).tag("x") 318 | space = +chr(' ','\t') 319 | 320 | expression = int_lit & *(space & int_lit) 321 | 322 | block: 323 | let x = expression.match("1 2") 324 | assert x.has and $x.val.j == """[{"x": "1"}, {"x": "2"}]""" 325 | 326 | #echo expression.match("1 2 3") 327 | 328 | 329 | 330 | -------------------------------------------------------------------------------- /src/glossolalia_v3.nim: -------------------------------------------------------------------------------- 1 | # v3, generic rules, build your own nodes with rule.save((x) => node(x)) 2 | 3 | import fowltek/maybe_t, strutils, sequtils,future 4 | export maybe_t, strutils, sequtils, future 5 | 6 | type 7 | TInput* = object 8 | str*: string 9 | len*,pos*:int 10 | 11 | TMatchKind* = enum 12 | mUnrefined, mNode 13 | TPositiveMatch*[N] = object 14 | case kind*: TMatchKind 15 | of mUnrefined: str*: string 16 | of mNode: nodes*: seq[N] 17 | TMatchResult*[N] = TMaybe[TPositiveMatch[N]] 18 | 19 | Rule* [N] = ref object 20 | m*: proc(input: var TInput): TMatchResult[N] 21 | tos*,tos_alt*: proc():string 22 | 23 | template wdd* (body:stmt): stmt = 24 | when defined(debug): 25 | body 26 | 27 | proc `$`* [N] (r:Rule[N]): string = 28 | when defined(useAltRuleToSTR): 29 | r.tos_alt() 30 | else: 31 | r.tos() 32 | 33 | proc `$`* [N] (m:TPositiveMatch[N]): string = 34 | mixin `$` 35 | result = case m.kind 36 | of mUnrefined: 37 | if m.str.len > 0: m.str else: "\"\"" 38 | of mNode: 39 | if m.nodes.len == 1: $ m.nodes[0] else: $ m.nodes 40 | 41 | 42 | proc match* [N] (R:Rule[N]; input:string): TMaybe[N] = 43 | # Matches rule R with input, return true if the input was 44 | # saved as a node 45 | var input = TInput(str: input, len: input.len, pos: 0) 46 | if (let (has, res) = R.m(input); has and res.kind == mNode): 47 | assert res.nodes.len == 1 48 | result = just(res.nodes[0]) 49 | proc raw_match* [N] (R:Rule[N]; input:string): TMatchResult[N] = 50 | # Same as match but this will return a string if no information 51 | # is save()'d 52 | var input = TInput(str: input, len: input.len, pos: 0) 53 | result = R.m(input) 54 | 55 | proc good [N] (s:string): TMatchResult[N] = 56 | just(TPositiveMatch[N](kind: mUnrefined, str: s)) 57 | proc good [N] (node:N): TMatchResult[N] = 58 | just(TPositiveMatch[N](kind: mNode, nodes: @[node])) 59 | template match_fail : expr = nothing[TPositiveMatch[N]]() 60 | 61 | proc currentChar* (input:TInput): char = input.str[input.pos] 62 | 63 | template tos_impl(body:stmt): expr {.immediate.}= 64 | (proc:string = body) 65 | template matchf (body:stmt): expr {.immediate.} = 66 | (proc(input: var TInput): TMatchResult[N] = 67 | body) 68 | template chrMatcher(c): expr = 69 | (matchf do: 70 | if input.currentChar in c: 71 | result = good[N]($ input.currentChar) 72 | input.pos.inc) 73 | 74 | proc charMatcher* [N] (chars: varargs[char]): Rule[N] = 75 | let chars = @chars 76 | proc tos_f: string = 77 | result = "chr(" 78 | let H = chars.high 79 | for i in 0 .. H: 80 | result.add chars[i] 81 | if i < H: 82 | result.add ',' 83 | result.add ')' 84 | Rule[N]( 85 | tos: tos_f, 86 | tos_alt: tos_f, 87 | m: chrMatcher(chars), 88 | ) 89 | 90 | proc charMatcher* [N] (chars: set[char]): Rule[N] = 91 | Rule[N]( 92 | m: chrMatcher(chars), 93 | tos: () => "chr($#)".format(chars), 94 | tos_alt: () => $chars 95 | ) 96 | 97 | proc strMatcher* [N] (str:string): Rule[N] = 98 | Rule[N]( 99 | tos: () => "str\"$#\"" % str, 100 | tos_alt: () => "str\"$#\"" % str, 101 | m: matchf do: 102 | if input.str.continuesWith(str, input.pos): 103 | result = good[n](str) 104 | input.pos.inc str.len 105 | ) 106 | 107 | template save_tos (R): expr = 108 | () => "save($#)".format(R) 109 | template save_tos_alt (R):expr = 110 | () => "$#.save".format(R) 111 | 112 | proc save* [N] (R:Rule[N]; cb: proc(match:string): N): Rule[N] = 113 | # store a string as an `N` 114 | # use it to catch butterflies! 115 | Rule[N]( 116 | tos: save_tos(R) , 117 | tos_alt: save_tos_alt(R), 118 | m: matchf do: 119 | result = r.m(input) 120 | if result.has and result.val.kind == mUnrefined: 121 | result = good(cb(result.val.str)) 122 | ) 123 | proc save* [N] (R:Rule[N]; cb: proc(match: seq[N]): N): Rule[N] = 124 | Rule[N]( 125 | tos: save_tos(R), 126 | tos_alt: save_tos_alt(R), 127 | m: matchf do: 128 | result = r.m(input) 129 | if result.has and result.val.kind == mNode: 130 | result = good(cb(result.val.nodes)) 131 | ) 132 | template any (iter, name, cond: expr): expr {.immediate.}= 133 | var res{.gensym.} = false 134 | for name in iter: 135 | if cond: 136 | res = true 137 | break 138 | res 139 | 140 | proc accum_repeat* [N] (results: seq[TPositiveMatch[N]]): TPositiveMatch[N] = 141 | ## Accumulate results. Nodes are accepted first, otherwise the result strings are concatenated 142 | assert results.len > 0 143 | 144 | if any(results, it, it.kind == mNode): 145 | result = TPositiveMatch[N](kind: mNode, nodes: @[]) 146 | for m in results: 147 | if m.kind == mNode: 148 | result.nodes.add m.nodes 149 | else: 150 | result = TPositiveMatch[N](kind: mUnrefined, str: "") 151 | for it in results: result.str.add it.str 152 | 153 | proc repeat* [N] (R:Rule[N]; min,max:int): Rule[N] = 154 | Rule[N]( 155 | tos: () => "repeat($#, $#,$#)".format(R, min,max), 156 | tos_alt: () => "$#.repeat($#,$#)".format(R, min,max), 157 | m: matchf do: 158 | var matches = 0 159 | let startPos = input.pos 160 | var results: seq[TPositiveMatch[N]] = @[] 161 | 162 | while input.pos < input.len and matches < max: 163 | if (let(has,res) = r.m(input); has): 164 | results.add res 165 | inc matches, 1 166 | continue 167 | break 168 | 169 | if matches < min: 170 | input.pos = startPos 171 | result = match_fail 172 | else: 173 | if matches > 0: 174 | result = just(accum_repeat(results)) 175 | else: 176 | result = good[n]("") 177 | ) 178 | proc repeat* [N] (R:Rule[N]; min:int): Rule[N] = 179 | Rule[N]( 180 | tos: () => "repeat($#, $#)".format(R, min), 181 | tos_alt: () => "$#.repeat($#)".format(R, min), 182 | m: matchf do: 183 | var matches = 0 184 | let startPos = input.pos 185 | var results: seq[TPositiveMatch[N]] = @[] 186 | 187 | while input.pos < input.len: 188 | if (let (has, res) = r.m(input); has): 189 | wdd: 190 | echo res 191 | results.add res 192 | inc matches 193 | continue 194 | break 195 | 196 | wdd: echo matches 197 | if matches < min: 198 | input.pos = startPos 199 | result = match_fail 200 | else: 201 | wdd: echo results 202 | if matches > 0: 203 | result = just(accum_repeat(results)) 204 | else: 205 | result = good[n]("") 206 | ) 207 | proc `+`* [N] (R:Rule[N]): Rule[N] = 208 | # match 1 or more 209 | repeat[N](R, 1) 210 | proc `*`* [N] (R:Rule[N]): Rule[N] = 211 | # Kleene star: match 0+ 212 | repeat[N](R, 0) 213 | proc `?`* [N] (R:Rule[N]): Rule[N] = 214 | # Option match: 0 or 1 215 | repeat[N](R, 0, 1) 216 | 217 | proc state* (input:TInput): string = 218 | let startPos = max(input.pos-5,0) 219 | let diff = input.pos - startPos 220 | let endPos = startPos + min(80, input.len - startPos) 221 | result = input.str[startPos .. endPos] 222 | result.add '\L' 223 | for i in 0 .. "present($#)" % $R, 229 | tos_alt: () => "$#.present" % $R, 230 | m: matchf do: 231 | let start = input.pos 232 | if r.m(input).has: 233 | result = good[n]("") 234 | input.pos = start 235 | ) 236 | proc absent* [N] (R:Rule[N]): Rule[N] = 237 | Rule[N]( 238 | tos: () => "absent($#)" % $R, 239 | tos_alt: () => "$#.absent" % $R, 240 | m: matchf do: 241 | let start = input.pos 242 | if not r.m(input).has: 243 | result = good[n]("") 244 | input.pos = start 245 | ) 246 | 247 | proc `|`* [N] (A,B:Rule[N]): Rule[N] = 248 | Rule[N]( 249 | tos: () => "($# | $#)".format(A,B), 250 | tos_alt: () => "($# | $#)".format(A,B), 251 | m: matchf do: 252 | let start = input.pos 253 | result = a.m(input) 254 | if result.has: 255 | return 256 | input.pos = start 257 | result = b.m(input) 258 | if result.has: 259 | return 260 | input.pos = start 261 | ) 262 | 263 | proc `&`* [N] (A,B:Rule[N]): Rule[N] = 264 | # TODO report issue: tos_f as a proc failed here with a mysterious error 265 | proc tos_f: string = "($# & $#)".format(A,B) 266 | #template tos_f : expr = () => "($# & $#)".format(A,B) 267 | 268 | Rule[N]( 269 | tos: tos_f, 270 | tos_alt: tos_f, 271 | m: matchf do: 272 | let start = input.pos 273 | if (let (has, m1) = a.m(input); has): 274 | if (let (has, m2) = b.m(input); has): 275 | wdd: echo "Combining ", m1, " and ", m2 276 | result = accum_repeat(@[ m1, m2 ]).just 277 | wdd: echo "Result: ", result 278 | return 279 | else: 280 | wdd: 281 | echo "No has ", b 282 | echo input.state 283 | else: 284 | wdd: 285 | echo "No has ", a 286 | input.pos = start 287 | result = match_fail 288 | ) 289 | 290 | 291 | 292 | proc join* [N] (r, on: Rule[N]; min,max = 0): Rule[N] = 293 | # Join a rule on another rule in the sequence (r & (on & r).repeat(min,max)) 294 | # `on & r` must repeat `min` times 295 | # `max` may be 0 to match forever 296 | r & (if max > 0: (on & r).repeat(min,max) else: (on & r).repeat(min)) 297 | 298 | import macros 299 | 300 | proc `:=`* [N] (a, b: Rule[N]) = 301 | # update rule a, set its matcher to rule b 302 | # you can use this to refer to rules before 303 | # they're initialized. 304 | a.m = b.m 305 | a.tos = b.tos 306 | a.tos_alt = b.tos_alt 307 | proc newRule* [N] (): Rule [N] = 308 | # returns an uninitialized rule. you should give semantics 309 | # with `myrule := chr('G','T',...)` 310 | Rule[N]() 311 | 312 | macro genGrammar(TNode:expr; body:stmt):stmt {.immediate.}= 313 | # accepts a list of statements like 314 | # 315 | # hello := "Hello" 316 | # digits <- many(chr( {'0' .. '9'} )) 317 | # 318 | # you can refer to a rule here before it is defined 319 | # 320 | assert body.kind == nnkStmtList 321 | result = newStmtList() 322 | let varDecl = newNimNode(nnkVarSection) 323 | result.add varDecl 324 | 325 | for i in 0 .. < len(body): 326 | let s = body[i] 327 | if s.kind == nnkInfix and $(s[0]) in [":=","<-"]: 328 | varDecl.add newIdentDefs( 329 | s[1], 330 | newEmptyNode(), 331 | newNimNode(nnkCall).add( 332 | newNimNode(nnkBracketExpr).add(ident"newRule", TNode))) 333 | result.add s[1].infix(":=", s[2]) 334 | else: 335 | result.add s 336 | 337 | when defined(Debug): 338 | echo repr(result) 339 | 340 | 341 | #proc save (R:Rule[TNode]; cb:proc(match:string):TNode): Rule[TNode] = saveMatcher[TNode](cb) 342 | #template save (a,b): expr = saveMatcher[TNode](a,b) 343 | #template repeat(r,min): expr = repeatMatcher[TNode](r, min) 344 | #proc repeat (a,b): expr = repeatMatcher[TNode](a,b) 345 | #proc repeat (a,b,c): Rule[TNode] = repeatMatcher[TNode](a,b,c) 346 | template grammar* (TNode: expr; body: stmt): stmt {.immediate.} = 347 | proc chr (chars: varargs[char]): Rule[TNode] = charMatcher[TNode](chars) 348 | proc chr (chars: set[char]): Rule[TNode] = charMatcher[TNode](chars) 349 | proc str (str: string): Rule[TNode] = strMatcher[TNode](str) 350 | 351 | proc stri (s: string): Rule[TNode] = 352 | # case insensitive str 353 | # probably more efficient to use a regex rule here 354 | template accum (x): stmt = 355 | if result.isNil: 356 | result = x 357 | else: 358 | result = result & x 359 | 360 | for character in s.items: 361 | if character in strutils.letters: 362 | accum charMatcher[TNode](character.toLower, character.toUpper) 363 | else: 364 | accum charMatcher[TNode](character) 365 | proc keyword (s: string): Rule[TNode] = 366 | str(s) & charMatcher[TNode](identChars).absent 367 | 368 | genGrammar(TNode, body) 369 | 370 | 371 | 372 | 373 | 374 | 375 | -------------------------------------------------------------------------------- /src/parsers/xjson.nim: -------------------------------------------------------------------------------- 1 | ## example json parser 2 | ## TODO comply with json standard 3 | ## (ECMA-404) 4 | import glossolalia, json 5 | export json.`$`, JsonNode, TJsonNodeKind, json.`[]`, json.`[]=`, 6 | new_j_array, new_j_bool, new_j_string, new_j_null, new_j_object, 7 | new_j_int, new_j_float, has_key, pretty, json.`%`,len 8 | 9 | proc new_j_array* (elems: seq[JsonNode]): JsonNode = 10 | JsonNode(kind: JArray, elems: elems) 11 | 12 | 13 | grammar(JsonNode): 14 | value := 15 | obj or arr or num or strng or bewl or null 16 | document := 17 | space and value and space and present(chr('\0')) # EOF check 18 | 19 | num := 20 | num_float or num_int 21 | num_float := 22 | (chr({'0'..'9'}).repeat(1) and chr('.') and chr({'0'..'9'})).save((m:string)->JsonNode => new_j_float(m.parseFloat)) 23 | num_int := 24 | (chr({'0'..'9'}).repeat(1).save do (m:string) -> JsonNode: new_j_int(m.parseInt)) 25 | 26 | bewl := 27 | (str("true") or str("false")).save do (m: string) -> JsonNode: new_j_bool(parseBool(m)) 28 | null := 29 | str("null").save do(m:string) -> JsonNode: new_j_null() 30 | 31 | strng := 32 | quote and 33 | ((quote.absent and chr({char.low..char.high})).repeat(0).save do (m:string)->JsonNode: new_j_string(m)) and 34 | quote 35 | quote := 36 | chr('"') 37 | 38 | proc obj_accept (match: seq[JsonNode]): JsonNode = 39 | # they come in as [key1,val1, key2,val2, key3,val3] 40 | result = new_j_object() 41 | for i in countup(0, high(match), 2): 42 | result.fields.add((match[i].str, match[i+1])) 43 | 44 | key_value := 45 | strng and colon and value 46 | obj := 47 | chr('{') and space and 48 | (? key_value.join(comma) and space 49 | ).saveNodesOrBlank(obj_accept) and 50 | chr('}') 51 | 52 | arr := 53 | chr('[') and space and 54 | (? value.join(comma) 55 | ).saveNodesOrBlank(new_j_array) and 56 | space and chr(']') 57 | 58 | space := *(chr({' ','\t','\L'}) or str("\r\L")) 59 | comma := space and chr(',') and space 60 | colon := space and chr(':') and space 61 | 62 | proc parseJson* (doc: string): JsonNode = 63 | let N = document.match(doc) 64 | if N: 65 | return N.nodes[0] 66 | else: 67 | raise newException(EInvalidValue, "Failed to parse JSON") 68 | 69 | proc parseFile* (file:string): JsonNode = readFile(file).parseJson 70 | 71 | when isMainModule: 72 | let x = parseJson("""{ 73 | "int": 1, "flt": 2.0, "b": false, "null":null, "str": "xx", "arr": [1, 2.3, []], 74 | 75 | "objs": [ 76 | {}, {"x":42,"z":9} 77 | ] 78 | }""") 79 | echo x.pretty 80 | 81 | template ec (xpr:expr):stmt = 82 | echo astToStr(xpr), ": ", $xpr 83 | ec strng 84 | ec bewl 85 | ec num 86 | 87 | 88 | 89 | when defined(stresstest): 90 | const fname = "big.json" 91 | const line = "{\"int\": 1,\"flt\": 2.0,\"b\": false, \"null\":null, \"str\":\"xx\", \"arr\": [1, 2.3, []], \"obj\": {\"x\":42, \"z\":\"foo\"}}" 92 | template pow (a,b: static[int]): int = 93 | when b == 0: 1 94 | else: a * pow(a, b-1) 95 | const targetBytes = 10.pow(7) 96 | import os, times 97 | 98 | if not fname.fileExists or fname.getFilesize < targetBytes: 99 | let x = open(fname, fmWrite) 100 | echo "creating big.json" 101 | let start = epochTime() 102 | x.writeLn "[" 103 | for i in countup(0, targetBytes, line.len+2): 104 | x.write line 105 | x.write ",\L" 106 | x.writeLn "]" 107 | x.close 108 | echo "finished in ", epochTime() - start, "s" 109 | 110 | when true: 111 | 112 | echo "reading ",fname," (", fname.getFilesize, " bytes)" 113 | let doc = readFile(fname) 114 | 115 | echo "parsing big.json" 116 | let start = epochTime() 117 | #let n = parseJson(doc) 118 | let n = (space and value and space).match(doc) 119 | echo "good match: ", toBool(n) 120 | echo "finished in ", epochTime() - start 121 | 122 | let zz = parseJson("[\L" & line & ",\L" & line & "]\L") 123 | echo zz 124 | 125 | echo targetBytes --------------------------------------------------------------------------------