├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.rst ├── azure-pipelines.yml ├── changelog.rst ├── nimly.nimble ├── src ├── nimly.nim └── nimly │ ├── lalr.nim │ ├── lexer.nim │ ├── lexgen.nim │ ├── lextypes.nim │ ├── lr.nim │ ├── parsegen.nim │ ├── parser.nim │ └── parsetypes.nim └── tests ├── config.nims ├── ex.nim ├── lexer_global_var.nim ├── parser_415.nim ├── parser_415_lr.nim ├── state_example.txt ├── state_parser.nim ├── state_parser_with_empty.nim ├── state_parser_with_empty_lr.nim ├── test_compile_time_parser.nim ├── test_empty_str_does_not_cause_error.nim ├── test_empty_str_error_msg.nim ├── test_error_state_parse.nim ├── test_issue_51.nim ├── test_lalr.nim ├── test_lexer.nim ├── test_lexer_counting_newlines.nim ├── test_lexer_global_var.nim ├── test_lexgen.nim ├── test_lexgen_import.nim ├── test_lr_lalr.nim ├── test_parse_calc.nim ├── test_parsegen.nim ├── test_readme_example.nim ├── test_state_parse.nim ├── test_state_parse_with_empty.nim └── test_state_parse_with_empty_lr.nim /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Cache nimble 15 | id: cache-nimble 16 | uses: actions/cache@v2 17 | with: 18 | path: ~/.nimble 19 | key: ${{ runner.os }}-nimble-${{ hashFiles('*.nimble') }} 20 | - uses: jiro4989/setup-nim-action@v1 21 | - run: nimble test -y 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !**/ 3 | !*.* 4 | 5 | .DS_Store 6 | 7 | # Private directories and files (IDEs) 8 | .*/ 9 | ~* 10 | 11 | !.github 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 loloicci 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ####### 2 | nimly 3 | ####### 4 | |github_workflow| |nimble| 5 | 6 | Lexer Generator and Parser Generator as a Macro Library in Nim. 7 | 8 | With nimly, you can make lexer/parser by writing definition 9 | in formats like lex/yacc. 10 | ``nimly`` generates lexer and parser by using macro in compile-time, 11 | so you can use ``nimly`` not as external tool of your program but as a library. 12 | 13 | niml 14 | ==== 15 | ``niml`` is a macro to generate a lexer. 16 | 17 | macro niml 18 | ---------- 19 | macro ``niml`` makes a lexer. 20 | Almost all part of constructing a lexer is done in compile-time. 21 | Example is as follows. 22 | 23 | .. code-block:: nim 24 | 25 | ## This makes a LexData object named myLexer. 26 | ## This lexer returns value with type ``Token`` when a token is found. 27 | niml myLexer[Token]: 28 | r"if": 29 | ## this part converted to procbody. 30 | ## the arg is (token: LToken). 31 | return TokenIf() 32 | r"else": 33 | return TokenElse() 34 | r"true": 35 | return TokenTrue() 36 | r"false": 37 | return TokenFalse() 38 | ## you can use ``..`` instead of ``-`` in ``[]``. 39 | r"[a..zA..Z\-_][a..zA..Z0..9\-_]*": 40 | return TokenIdentifier(token) 41 | ## you can define ``setUp`` and ``tearDown`` function. 42 | ## ``setUp`` is called from ``open``, ``newWithString`` and 43 | ## ``initWithString``. 44 | ## ``tearDown`` is called from ``close``. 45 | ## an example is ``test/lexer_global_var.nim``. 46 | setUp: 47 | doSomething() 48 | tearDown: 49 | doSomething() 50 | 51 | Meta charactors are as following: 52 | 53 | - ``\``: escape character 54 | - ``.``: match with any charactor 55 | - ``[``: start of character class 56 | - ``|``: means or 57 | - ``(``: start of subpattern 58 | - ``)``: end of subpattern 59 | - ``?``: 0 or 1 times quantifier 60 | - ``*``: 0 or more times quantifire 61 | - ``+``: 1 or more times quantifire 62 | - ``{``: ``{n,m}`` is n or more and m or less times quantifire 63 | 64 | In ``[]``, meta charactors are as following 65 | 66 | - ``\``: escape character 67 | - ``^``: negate character (only in first position) 68 | - ``]``: end of this class 69 | - ``-``: specify character range (``..`` can be used instead of this) 70 | 71 | Each of followings is recognized as character set. 72 | 73 | - ``\d``: ``[0..9]`` 74 | - ``\D``: ``[^0..9]`` 75 | - ``\s``: ``[ \t\n\r\f\v]`` 76 | - ``\S``: ``[^ \t\n\r\f\v]`` 77 | - ``\w``: ``[a..zA..Z0..9_]`` 78 | - ``\w``: ``[^a..zA..Z0..9_]`` 79 | 80 | nimy 81 | ==== 82 | ``nimy`` is a macro to generate a LALR(1) parser. 83 | 84 | macro nimy 85 | ---------- 86 | macro ``nimy`` makes a parser. 87 | Almost all part of constructing a parser is done in compile-time. 88 | Example is as follows. 89 | 90 | .. code-block:: nim 91 | 92 | ## This makes a LexData object named myParser. 93 | ## first cloud is the top-level of the BNF. 94 | ## This lexer recieve tokens with type ``Token`` and token must have a value 95 | ## ``kind`` with type enum ``[TokenTypeName]Kind``. 96 | ## This is naturally satisfied when you use ``patty`` to define the token. 97 | nimy myParser[Token]: 98 | ## the starting non-terminal 99 | ## the return type of the parser is ``Expr`` 100 | top[Expr]: 101 | ## a pattern. 102 | expr: 103 | ## proc body that is used when parse the pattern with single ``expr``. 104 | ## $1 means first position of the pattern (expr) 105 | return $1 106 | 107 | ## non-terminal named ``expr`` 108 | ## with returning type ``Expr`` 109 | expr[Expr]: 110 | ## first pattern of expr. 111 | ## ``LPAR`` and ``RPAR`` is TokenKind. 112 | LPAR expr RPAR: 113 | return $2 114 | 115 | ## second pattern of expr. 116 | ## ``PLUS`` is TokenKind. 117 | expr PLUS expr 118 | return $2 119 | 120 | You can use following EBNF functions: 121 | 122 | - ``XXX[]``: Option (0 or 1 ``XXX``). 123 | The type is ``seq[xxx]`` where ``xxx`` is type of ``XXX``. 124 | - ``XXX{}``: Repeat (0 or more ``XXX``). 125 | The type is ``seq[xxx]`` where ``xxx`` is type of ``XXX``. 126 | 127 | Example of these is in next section. 128 | 129 | Example 130 | ======= 131 | ``tests/test_readme_example.nim`` is an easy example. 132 | 133 | .. code-block:: nim 134 | 135 | import unittest 136 | import patty 137 | import strutils 138 | 139 | import nimly 140 | 141 | ## variant is defined in patty 142 | variant MyToken: 143 | PLUS 144 | MULTI 145 | NUM(val: int) 146 | DOT 147 | LPAREN 148 | RPAREN 149 | IGNORE 150 | 151 | niml testLex[MyToken]: 152 | r"\(": 153 | return LPAREN() 154 | r"\)": 155 | return RPAREN() 156 | r"\+": 157 | return PLUS() 158 | r"\*": 159 | return MULTI() 160 | r"\d": 161 | return NUM(parseInt(token.token)) 162 | r"\.": 163 | return DOT() 164 | r"\s": 165 | return IGNORE() 166 | 167 | nimy testPar[MyToken]: 168 | top[string]: 169 | plus: 170 | return $1 171 | 172 | plus[string]: 173 | mult PLUS plus: 174 | return $1 & " + " & $3 175 | 176 | mult: 177 | return $1 178 | 179 | mult[string]: 180 | num MULTI mult: 181 | return "[" & $1 & " * " & $3 & "]" 182 | 183 | num: 184 | return $1 185 | 186 | num[string]: 187 | LPAREN plus RPAREN: 188 | return "(" & $2 & ")" 189 | 190 | ## float (integer part is 0-9) or integer 191 | NUM DOT[] NUM{}: 192 | result = "" 193 | # type of `($1).val` is `int` 194 | result &= $(($1).val) 195 | if ($2).len > 0: 196 | result &= "." 197 | # type of `$3` is `seq[MyToken]` and each elements are NUM 198 | for tkn in $3: 199 | # type of `tkn.val` is `int` 200 | result &= $(tkn.val) 201 | 202 | test "test Lexer": 203 | var testLexer = testLex.newWithString("1 + 42 * 101010") 204 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 205 | 206 | var 207 | ret: seq[MyTokenKind] = @[] 208 | 209 | for token in testLexer.lexIter: 210 | ret.add(token.kind) 211 | 212 | check ret == @[MyTokenKind.NUM, MyTokenKind.PLUS, MyTokenKind.NUM, 213 | MyTokenKind.NUM, MyTokenKind.MULTI, 214 | MyTokenKind.NUM, MyTokenKind.NUM, MyTokenKind.NUM, 215 | MyTokenKind.NUM, MyTokenKind.NUM, MyTokenKind.NUM] 216 | 217 | test "test Parser 1": 218 | var testLexer = testLex.newWithString("1 + 42 * 101010") 219 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 220 | 221 | var parser = testPar.newParser() 222 | check parser.parse(testLexer) == "1 + [42 * 101010]" 223 | 224 | testLexer.initWithString("1 + 42 * 1010") 225 | 226 | parser.init() 227 | check parser.parse(testLexer) == "1 + [42 * 1010]" 228 | 229 | test "test Parser 2": 230 | var testLexer = testLex.newWithString("1 + 42 * 1.01010") 231 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 232 | 233 | var parser = testPar.newParser() 234 | check parser.parse(testLexer) == "1 + [42 * 1.01010]" 235 | 236 | testLexer.initWithString("1. + 4.2 * 101010") 237 | 238 | parser.init() 239 | check parser.parse(testLexer) == "1. + [4.2 * 101010]" 240 | 241 | test "test Parser 3": 242 | var testLexer = testLex.newWithString("(1 + 42) * 1.01010") 243 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 244 | 245 | var parser = testPar.newParser() 246 | check parser.parse(testLexer) == "[(1 + 42) * 1.01010]" 247 | 248 | Install 249 | ======= 250 | 1. ``nimble install nimly`` 251 | 252 | Now, you can use nimly with ``import nimly``. 253 | 254 | vmdef.MaxLoopIterations Problem 255 | ------------------------------- 256 | During compiling lexer/parser, you can encounter errors with ``interpretation requires too many iterations``. 257 | You can avoid this error to use the compiler option ``maxLoopIterationsVM:N`` 258 | which is available since nim v1.0.6. 259 | 260 | See https://github.com/loloicci/nimly/issues/11 to detail. 261 | 262 | Contribute 263 | ========== 264 | 1. Fork this 265 | 2. Create new branch 266 | 3. Commit your change 267 | 4. Push it to the branch 268 | 5. Create new pull request 269 | 270 | Changelog 271 | ========= 272 | See changelog.rst_. 273 | 274 | Developing 275 | ========== 276 | You can use ``nimldebug`` and ``nimydebug`` as a conditional symbol 277 | to print debug info. 278 | 279 | example: ``nim c -d:nimldebug -d:nimydebug -r tests/test_readme_example.nim`` 280 | 281 | 282 | .. |github_workflow| image:: https://github.com/loloicci/nimly/workflows/test/badge.svg 283 | :target: https://github.com/loloicci/nimly/actions?query=workflow%3Atest 284 | .. |nimble| image:: https://raw.githubusercontent.com/yglukhov/nimble-tag/master/nimble.png 285 | :target: https://github.com/yglukhov/nimble-tag 286 | .. _changelog.rst: ./changelog.rst 287 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - master 3 | 4 | pr: 5 | - master 6 | 7 | pool: 8 | vmImage: 'Ubuntu-16.04' 9 | 10 | container: nimlang/nim:latest 11 | 12 | steps: 13 | - script: nimble test -y 14 | -------------------------------------------------------------------------------- /changelog.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Changelog 3 | ########### 4 | 5 | [v0.7.0] - 2021-01-23 6 | ===================== 7 | 8 | Added 9 | ----- 10 | * Add function to define ``setUp`` and ``tearDown`` in niml (#54) 11 | 12 | Changed 13 | ------- 14 | * Change the design and how to use ``nimy`` (#59, #60) 15 | 16 | Other 17 | ----- 18 | * Remove functions toConst and reconstruct 19 | 20 | [v0.6.1] - 2020-07-14 21 | ===================== 22 | 23 | Fixed 24 | ----- 25 | * Issue #51 - [^...] in regex used in niml does not work. 26 | 27 | Other 28 | ----- 29 | * Do some Refactorings. 30 | * Rewrite about MaxLoopIteratios problem in README. 31 | 32 | [v0.6.0] - 2020-05-22 33 | ===================== 34 | 35 | Changed 36 | ------- 37 | * Change the licence to MIT. 38 | 39 | [v0.5.1] - 2020-01-28 40 | ===================== 41 | 42 | Changed 43 | ------- 44 | * Rename "test_nimly.nim" to "test_readme_example.nim". 45 | 46 | [v0.5.0] - 2020-01-08 47 | ===================== 48 | 49 | Changed 50 | ------- 51 | * Restricted exports in main module `nimly`. 52 | 53 | 54 | [v0.4.2] - 2020-01-08 55 | ===================== 56 | 57 | Fixed 58 | ----- 59 | * Remove an unused variable. 60 | 61 | 62 | [v0.4.1] - 2020-01-06 63 | ===================== 64 | 65 | Fixed 66 | ----- 67 | * Fix the bug that lexer sometimes count a newlines multiply 68 | for line number information (Issue #34) 69 | 70 | [v0.4.0] - 2019-09-25 71 | ===================== 72 | 73 | Changed 74 | ------- 75 | * Update depended ``nim`` version. 76 | 77 | * Change not to print debug massage in non-debug bulid. 78 | (the printing was needed not to issue error in generating parser. 79 | This relates some nim VM's bugs.) 80 | 81 | Fixed 82 | ----- 83 | * Avoid to issue some warns in sanity running. 84 | 85 | 86 | [v0.3.0] - 2019-07-04 87 | ===================== 88 | 89 | Changed 90 | ------- 91 | * Update depended ``nim`` version. 92 | 93 | * Change to use macros.strVal 94 | * Change to use xxxHashSet instead of xxxHash 95 | 96 | [v0.2.1] - 2019-04-12 97 | ===================== 98 | 99 | Added 100 | ----- 101 | * Add some tests. 102 | 103 | Fixed 104 | ----- 105 | * Fix the bug that ``nimy``'s rule clause cannot contains comments. 106 | 107 | [v0.2.0] - 2019-04-11 108 | ===================== 109 | 110 | Added 111 | ----- 112 | * Add ``[]`` and ``{}`` (for EBNF) to ``nimy``'s syntax. 113 | 114 | [v0.1.0] - 2019-04-04 115 | ===================== 116 | 117 | Added 118 | ----- 119 | * Add functions to generate LALR(1) parsing table. 120 | 121 | Changed 122 | ------- 123 | * Change ``nimy`` to make LALR(1) parsing table insted of LR(1) parsing table 124 | in default. 125 | * Many refactorings. 126 | 127 | [v0.0.0] - 2019-03-22 128 | ===================== 129 | The first release 130 | -------------------------------------------------------------------------------- /nimly.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.7.0" 4 | author = "loloicci" 5 | description = "Lexer Generator and Parser Generator as a library in Nim." 6 | license = "MIT" 7 | srcDir = "src" 8 | 9 | 10 | # Dependencies 11 | 12 | requires "nim >= 1.4.0" 13 | requires "patty >= 0.3.3" 14 | -------------------------------------------------------------------------------- /src/nimly.nim: -------------------------------------------------------------------------------- 1 | import nimly/lextypes 2 | import nimly/lexgen 3 | import nimly/lexer 4 | import nimly/parsetypes 5 | import nimly/parser 6 | import nimly/lr 7 | import nimly/lalr 8 | import nimly/parsegen 9 | 10 | export lextypes.LexError 11 | export lextypes.LToken 12 | export lextypes.LexData 13 | 14 | export lexgen 15 | 16 | export lexer 17 | 18 | export parsetypes.NimyError 19 | export parsetypes.NimyActionError 20 | export parsetypes.NimyGotoError 21 | export parsetypes.TermS 22 | export parsetypes.NonTermS 23 | export parsetypes.Rule 24 | export parsetypes.Symbol 25 | export parsetypes.End 26 | export parsetypes.Empty 27 | export parsetypes.hash 28 | export parsetypes.`==` 29 | export parsetypes.newrule 30 | export parsetypes.initGrammar 31 | export parsetypes.`$` 32 | 33 | export parser.ParseTree 34 | export parser.ConstTable 35 | export parser.Parser 36 | export parser.ParsingTable 37 | export parser.parseImpl 38 | export parser.newParser 39 | export parser.init 40 | export parser.`$` 41 | 42 | export lr.hash 43 | export lr.makeCanonicalCollection 44 | export lr.makeTableLR 45 | export lr.filterKernel 46 | export lr.`$` 47 | 48 | export lalr.hash 49 | export lalr.makeTableLALR 50 | 51 | export parsegen.RuleToProc 52 | export parsegen.initRuleToProc 53 | export parsegen.nimy 54 | -------------------------------------------------------------------------------- /src/nimly/lalr.nim: -------------------------------------------------------------------------------- 1 | import tables 2 | import sets 3 | import hashes 4 | 5 | import patty 6 | 7 | import parsetypes 8 | import parser 9 | import lr 10 | 11 | type 12 | LALRItem[T] = object 13 | rule: Rule[T] 14 | pos: int 15 | ahead: Symbol[T] 16 | LALRItems[T] = HashSet[LALRItem[T]] 17 | SetOfLALRItems[T] = OrderedTable[int, LALRItems[T]] 18 | PropagateTable[T] = Table[LRItem[T], HashSet[(int, LRItem[T])]] 19 | 20 | proc initLALRItems[T](): LALRItems[T] = 21 | result = initHashSet[LALRItem[T]]() 22 | 23 | proc initHashSetOfLALRItems[T](): SetOfLALRItems[T] = 24 | result = initOrderedTable[int, LALRItems[T]]() 25 | 26 | proc initPropagateTable[T](): PropagateTable[T] = 27 | result = initTable[LRItem[T], HashSet[(int, LRItem[T])]]() 28 | 29 | proc hash*[T](x: LALRItem[T]): Hash = 30 | var h: Hash = 0 31 | h = h !& hash(x.rule) 32 | h = h !& hash(x.pos) 33 | h = h !& hash(x.ahead) 34 | return !$h 35 | 36 | proc next[T](i: LALRItem[T]): Symbol[T] = 37 | if i.pos >= i.rule.len: 38 | return End[T]() 39 | result = i.rule.right[i.pos] 40 | 41 | proc nextSkipEmpty[T](i: LALRItem[T]): Symbol[T] = 42 | result = End[T]() 43 | for idx in i.pos.. 0: 59 | var new: LALRItems[T] 60 | new.init() 61 | for i in checkSet: 62 | match i.next: 63 | NonTermS: 64 | for r in g.filterRulesLeftIs(i.next): 65 | when defined(nimydebug): 66 | if i.ahead.kind == SymbolKind.Empty: 67 | echo "(ahead is emp) i: " & $i 68 | for fst in g.calFirsts(i.fromNextNext & i.ahead): 69 | when defined(nimydebug): 70 | if fst.kind == SymbolKind.Empty: 71 | echo "(gen emp fst) i: " & $i 72 | let n = LALRItem[T](rule: r, pos: 0, ahead: fst) 73 | if not result.containsOrIncl(n): 74 | new.incl(n) 75 | _: 76 | discard 77 | checkSet = new 78 | 79 | proc closure[T](g: Grammar[T], single: LALRItem[T]): LALRItems[T] = 80 | result = g.closure([single].toHashSet) 81 | 82 | proc toLALRItem[T](lrItem: LRItem[T], ahead: Symbol[T]): LALRItem[T] = 83 | result = LALRItem[T](rule: lrItem.rule, pos: lrItem.pos, ahead: ahead) 84 | 85 | proc toLRItem[T](lalrItem: LALRItem[T]): LRItem[T] = 86 | result = LRItem[T](rule: lalrItem.rule, pos: lalrItem.pos) 87 | 88 | proc `[]`[T](pt: PropagateTable[T], 89 | itm: LALRItem[T]): HashSet[(int, LRItem[T])] = 90 | result = pt[LRItem[T](rule: itm.rule, pos: itm.pos)] 91 | 92 | proc incl[T](ot: var OrderedTable[int, T], vl: T) = 93 | ot[ot.len] = vl 94 | 95 | proc foward[T](itm: LALRItem[T]): LALRItem[T] = 96 | result = LALRItem[T](rule: itm.rule, pos: itm.pos + 1, ahead: itm.ahead) 97 | 98 | proc firstItem[T](os: OrderedSet[T]): T = 99 | for i in os: 100 | return i 101 | 102 | proc getItemIfSingle[T](s: HashSet[T]): T = 103 | if s.card == 1: 104 | for i in s: 105 | return i 106 | raise newException(NimyError, "Unexpected: " & $s & " needs to be single.") 107 | 108 | ## Same as Dragonbook Argorithm 4.62 & 4.63 109 | proc toLALRKernel[T](lrKernel: SetOfLRItems[T], g: Grammar[T], 110 | tt: TransTable[T]): SetOfLALRItems[T] = 111 | # init result 112 | result = initHashSetOfLALRItems[T]() 113 | doAssert lrKernel.card > 0 114 | for idx in 0.. 0: 149 | var newSet = initLALRItems[T]() 150 | for itm in checkSet: 151 | for toInfo in propagation[itm]: 152 | let 153 | (idx, toItm) = toInfo 154 | new = toItm.toLALRItem(itm.ahead) 155 | if not (result[idx].containsOrIncl(new)): 156 | newSet.incl(new) 157 | checkSet = newSet 158 | 159 | proc makeTableLALR*[T](g: Grammar[T]): ParsingTable[T] = 160 | var 161 | actionTable: ActionTable[T] 162 | gotoTable: GotoTable[T] 163 | actionTable = initTable[State, ActionRow[T]]() 164 | gotoTable = initTable[State, GotoRow[T]]() 165 | when defined(nimydebug): 166 | echo "[nimly] start: make table for parser" 167 | let 168 | ag = if g.isAugument: 169 | g 170 | else: 171 | g.augument 172 | (cc, tt) = makeCanonicalCollection[T](ag) 173 | knl = cc.filterKernel 174 | lalrKnl = knl.toLALRKernel(ag, tt) 175 | when defined(nimydebug): 176 | echo "[nimly] done: make lalrkernel" 177 | for idx, itms in lalrKnl: 178 | when defined(nimydebug): 179 | echo "[nimly] processing: Collection " & $(idx + 1) & "/" & $lalrKnl.len 180 | actionTable[idx] = initTable[Symbol[T], ActionTableItem[T]]() 181 | gotoTable[idx] = initTable[Symbol[T], State]() 182 | when defined(nimydebug): 183 | echo "[nimly] processing: Collection " & $(idx + 1) & " - make closure" 184 | let clsr = ag.closure(itms) 185 | var cnt = 1 186 | for itm in clsr: 187 | when defined(nimydebug): 188 | echo "[nimly] processing: Collection " & $(idx + 1) & " - " & 189 | $cnt & "/" & $clsr.card 190 | inc(cnt) 191 | let sym = itm.nextSkipEmpty 192 | match sym: 193 | TermS: 194 | when defined(nimydebug): 195 | if actionTable[idx].haskey(sym) and 196 | actionTable[idx][sym].kind == ActionTableItemKind.Reduce: 197 | echo "LALR:CONFLICT!!!" & $idx & ":" & $sym 198 | actionTable[idx][sym] = Shift[T](tt[idx][sym]) 199 | NonTermS: 200 | gotoTable[idx][sym] = tt[idx][sym] 201 | End: 202 | if itm.rule.left == ag.start: 203 | actionTable[idx][End[T]()] = Accept[T]() 204 | else: 205 | if actionTable[idx].haskey(itm.ahead) and 206 | actionTable[idx][itm.ahead].kind == ActionTableItemKind.Shift: 207 | when defined(nimydebug): 208 | echo "LALR:CONFLICT!!!" & $idx & ":" & $itm.ahead 209 | continue 210 | actionTable[idx][itm.ahead] = Reduce[T](itm.rule) 211 | _: 212 | discard 213 | when defined(nimydebug): 214 | echo "[nimly] done: make tables" 215 | result = ParsingTable[T](action: actionTable, goto: gotoTable) 216 | when defined(nimydebug): 217 | echo "LALR:" 218 | echo result 219 | -------------------------------------------------------------------------------- /src/nimly/lexer.nim: -------------------------------------------------------------------------------- 1 | import lexbase 2 | import streams 3 | 4 | import lextypes 5 | import lexgen 6 | 7 | type 8 | NimlLexer*[T] = object of BaseLexer 9 | data*: LexData[T] 10 | ignoreIf*: proc(r: T): bool 11 | setUp*: proc() {.nimcall.} 12 | tearDown*: proc() {.nimcall.} 13 | NimlError* = object of Exception 14 | NimlEOFError* = object of NimlError 15 | 16 | proc newNimlLexer[T](data: LexData[T]): NimlLexer[T] = 17 | result = NimlLexer[T]( 18 | data: data, 19 | ignoreIf: proc(r: T): bool = false, 20 | setUp: data.setUp, 21 | tearDown: data.tearDown, 22 | ) 23 | 24 | proc open*[T](data: LexData[T], path: string): NimlLexer[T] = 25 | result = newNimlLexer(data) 26 | result.open(openFileStream(path)) 27 | result.setUp() 28 | 29 | proc newWithString*[T](data: LexData[T], str: string): NimlLexer[T] = 30 | result = newNimlLexer(data) 31 | result.open(newStringStream(str)) 32 | result.setUp() 33 | 34 | proc open*[T](lexer: var NimlLexer[T], path: string) = 35 | lexer.open(openFileStream(path)) 36 | lexer.setUp() 37 | 38 | proc initWithString*[T](lexer: var NimlLexer[T], str: string) = 39 | lexer.open(newStringStream(str)) 40 | lexer.setUp() 41 | 42 | proc close*[T](lexer: var NimlLexer[T]) = 43 | lexer.data.tearDown() 44 | lexbase.close(lexer) 45 | 46 | proc lex*[T](nl: var NimlLexer[T]): T = 47 | let 48 | colNum = nl.getColNumber(nl.bufpos) 49 | lineNum = nl.lineNumber 50 | lineInfo = nl.getCurrentLine 51 | var 52 | token: string = "" 53 | lastAccToken: string = "" 54 | state: State = 0 55 | lastAccState: State = deadState 56 | pos = nl.bufpos 57 | lastAccPos: int = -1 58 | lastAccLine: int = -1 59 | ltoken = LToken(colNum: colNum, lineNum: lineNum, lineInfo: lineInfo) 60 | when defined(nimldebug): 61 | echo "--lex start--" 62 | echo state 63 | while state != deadState: 64 | let c = nl.buf[pos] 65 | token &= c 66 | case c 67 | of '\L': 68 | pos = nl.handleLF(pos) 69 | when defined(nimldebug): 70 | echo "handleLF" 71 | of '\c': 72 | pos = nl.handleCR(pos) 73 | when defined(nimldebug): 74 | echo "handleCR" 75 | else: 76 | inc(pos) 77 | when defined(nimldebug): 78 | echo "handleOther" 79 | 80 | state = nl.data.nextState(state, c) 81 | when defined(nimldebug): 82 | echo "read:" & c 83 | echo "state:" & $state 84 | if nl.data.isAcc(state): 85 | lastAccToken = token 86 | lastAccState = state 87 | lastAccPos = pos 88 | lastAccLine = nl.lineNumber 89 | if c == EndOfFile and lastAccState == -1: 90 | raise newException(LexError, "invalid EOF while lexing") 91 | 92 | if lastAccState == -1: 93 | raise newException(LexError, "LexError:\n" & lineInfo) 94 | 95 | ltoken.token = lastAccToken 96 | 97 | result = nl.data.dba[lastAccState].accept.fun(ltoken) 98 | 99 | nl.bufpos = lastAccPos 100 | nl.lineNumber = lastAccLine 101 | when defined(nimldebug): 102 | echo "--lex end--" 103 | echo "token:" & lastAccToken 104 | try: 105 | echo "result:" & $result 106 | except: 107 | discard 108 | 109 | proc isEmpty*[T](nl: NimlLexer[T]): bool = 110 | nl.buf[nl.bufpos] == EndOfFile 111 | 112 | proc lexNext*[T](nl: var NimlLexer[T]): T = 113 | while nl.buf[nl.bufpos] != EndOfFile: 114 | result = nl.lex 115 | if not nl.ignoreIf(result): 116 | return 117 | raise newException(NimlEOFError, "read EOF") 118 | 119 | iterator lexIter*[T](nl: var NimlLexer[T]): T = 120 | while nl.buf[nl.bufpos] != EndOfFile: 121 | yield nl.lexNext 122 | -------------------------------------------------------------------------------- /src/nimly/lexgen.nim: -------------------------------------------------------------------------------- 1 | import tables 2 | import sets 3 | import strutils 4 | import macros 5 | import patty 6 | 7 | import lextypes 8 | 9 | export tables 10 | export sets 11 | 12 | proc `~`*[T](obj: T): ref T = 13 | new(result) 14 | result[] = obj 15 | 16 | type 17 | # for SynTree 18 | Pos* = int 19 | BOp* = enum 20 | bor, 21 | bcat 22 | Pos2PosSet = TableRef[Pos, HashSet[Pos]] 23 | 24 | # for DFA 25 | DState = int 26 | DTranslationsRow = TableRef[char, DState] 27 | DTranslations = TableRef[DState, DTranslationsRow] 28 | DAccepts[T] = TableRef[DState, AccProc[T]] 29 | DFA[T] = object 30 | start: DState 31 | accepts: DAccepts[T] 32 | stateNum: int 33 | translations: DTranslations 34 | 35 | variant LChar: 36 | End 37 | Real(c: char) 38 | 39 | variant Lit: 40 | Empty 41 | Char(pos: Pos, c: LChar) 42 | 43 | variantp ReSynTree: 44 | Term(lit: Lit) 45 | Bin(op: BOp, left: ref ReSynTree, right: ref ReSynTree) 46 | Star(child: ref ReSynTree) 47 | 48 | type 49 | AccPosProc[T] = TableRef[Pos, AccProc[T]] 50 | LexRe*[T] = object 51 | st: ReSynTree 52 | accPosProc: AccPosProc[T] 53 | 54 | proc newAccPosProc*[T](): AccPosProc[T] = 55 | result = newTable[Pos, AccProc[T]]() 56 | 57 | proc newPos2PosSet(): Pos2PosSet = 58 | result = newTable[Pos, HashSet[Pos]]() 59 | 60 | proc newDAccepts[T](): DAccepts[T] = 61 | result = newTable[DState, AccProc[T]]() 62 | 63 | proc newDTranslations(): DTranslations = 64 | result = newTable[DState, DTranslationsRow]() 65 | 66 | proc newDTranslationsRow(): DTranslationsRow = 67 | result = newTable[char, DState]() 68 | 69 | proc accPosImplDebug(t: ReSynTree): seq[Pos] {.used.} = 70 | result = @[] 71 | var checkSet: seq[ReSynTree] = @[t] 72 | while checkSet.len > 0: 73 | let ct = checkSet.pop 74 | match ct: 75 | Term(lit: l): 76 | match l: 77 | Empty: 78 | continue 79 | Char(pos: p, c: c): 80 | if c.kind == LCharKind.End: 81 | result &= p 82 | else: 83 | continue 84 | Star(child: c): 85 | checkSet.add(c[]) 86 | Bin(op:_, left: l, right: r): 87 | checkSet.add(r[]) 88 | checkSet.add(l[]) 89 | 90 | proc accPosImpl(t: ReSynTree): seq[Pos] {.used.} = 91 | result = @[] 92 | var checkSet: seq[ReSynTree] = @[t] 93 | while checkSet.len > 0: 94 | let ct = checkSet.pop 95 | match ct: 96 | Term(lit: l): 97 | match l: 98 | Empty: 99 | return @[] 100 | Char(pos: p, c: c): 101 | if c.kind == LCharKind.End: 102 | return @[p] 103 | else: 104 | return @[] 105 | Star(child: c): 106 | checkSet = @[c[]] & checkSet 107 | Bin(op:_, left: l, right: r): 108 | checkSet = @[r[]] & checkSet 109 | checkSet = @[l[]] & checkSet 110 | 111 | when not defined(release): 112 | import sequtils 113 | 114 | proc accPos*(t: ReSynTree): seq[Pos] = 115 | ## use it if acc is only one position 116 | result = @[] 117 | when defined(release): 118 | result = t.accPosImpl 119 | else: 120 | result = t.accPosImplDebug.deduplicate 121 | doassert result.len > 0, "No acc node" 122 | 123 | 124 | proc `$`(t: ReSynTree): string = 125 | match t: 126 | Term(l): 127 | $l 128 | Bin(op, l, r): 129 | $op & "(" & $(l[]) & ", \n" & $(r[]) & ")" 130 | Star(c): 131 | "*(" & $(c[]) & ")" 132 | 133 | proc reassignPos(t: ReSynTree, nextPos: var int): ReSynTree = 134 | match t: 135 | Term(lit: l): 136 | match l: 137 | Empty: 138 | return Term(Empty()) 139 | Char(pos: _, c: c): 140 | result = Term(Char(pos = nextPos, c = c)) 141 | inc(nextPos) 142 | return 143 | Bin(op: op, left: l, right: r): 144 | let 145 | left = ~(l[].reassignPos(nextPos)) 146 | right = ~(r[].reassignPos(nextPos)) 147 | return Bin(op = op, 148 | left = left, 149 | right = right) 150 | Star(child: c): 151 | return Star(child = ~(c[].reassignPos(nextPos))) 152 | 153 | proc collectChar(t: ReSynTree): set[char] = 154 | result = {} 155 | var checkSet: seq[ReSynTree] = @[t] 156 | while checkSet.len > 0: 157 | let ct = checkSet.pop 158 | match ct: 159 | Term(lit: l): 160 | match l: 161 | Empty: 162 | continue 163 | Char(pos: _, c: lc): 164 | match lc: 165 | End: 166 | continue 167 | Real(c: c): 168 | result.incl(c) 169 | Bin(op: _, left: l, right: r): 170 | checkSet.add(l[]) 171 | checkSet.add(r[]) 172 | Star(child: c): 173 | checkSet.add(c[]) 174 | 175 | proc nullable(t: ReSynTree): bool = 176 | match t: 177 | Term(lit: l): 178 | match l: 179 | Empty: 180 | return true 181 | Char: 182 | return false 183 | Bin(op: o, left: l, right: r): 184 | case o 185 | of bor: 186 | return l[].nullable or r[].nullable 187 | of bcat: 188 | return l[].nullable and r[].nullable 189 | Star: 190 | return true 191 | 192 | proc firstpos(t: ReSynTree): HashSet[Pos] = 193 | result.init 194 | var checkSet: seq[ReSynTree] = @[t] 195 | while checkSet.len > 0: 196 | let ct = checkSet.pop 197 | match ct: 198 | Term(lit: l): 199 | match l: 200 | Empty: 201 | continue 202 | Char(pos: p, c: _): 203 | result.incl(p) 204 | continue 205 | Bin(op: o, left: l, right: r): 206 | case o 207 | of bor: 208 | checkSet.add(l[]) 209 | checkSet.add(r[]) 210 | of bcat: 211 | checkSet.add(l[]) 212 | if l[].nullable: 213 | checkSet.add(r[]) 214 | Star(child: c): 215 | checkSet.add(c[]) 216 | 217 | proc lastpos(t: ReSynTree): HashSet[Pos] = 218 | result.init 219 | var checkSet: seq[ReSynTree] = @[t] 220 | while checkSet.len > 0: 221 | let ct = checkSet.pop 222 | match ct: 223 | Term(lit: l): 224 | match l: 225 | Empty: 226 | continue 227 | Char(pos: p, c: _): 228 | result.incl(p) 229 | continue 230 | Bin(op: o, left: l, right: r): 231 | case o 232 | of bor: 233 | checkSet.add(l[]) 234 | checkSet.add(r[]) 235 | of bcat: 236 | if r[].nullable: 237 | checkSet.add(l[]) 238 | checkSet.add(r[]) 239 | Star(child: c): 240 | checkSet.add(c[]) 241 | 242 | proc makeFollowposTable(t: ReSynTree): Pos2PosSet = 243 | # init 244 | result = newPos2PosSet() 245 | var checkSet: seq[ReSynTree] = @[t] 246 | 247 | while checkSet.len > 0: 248 | let ct = checkSet.pop 249 | # make 250 | match ct: 251 | Term: 252 | continue 253 | Bin(op: o, left: l, right: r): 254 | if o == bcat: 255 | for i in l[].lastpos: 256 | if result.hasKey(i): 257 | result[i] = result[i] + r[].firstpos 258 | else: 259 | result[i] = r[].firstpos 260 | checkSet.add(l[]) 261 | checkSet.add(r[]) 262 | Star(child: c): 263 | for i in ct.lastpos: 264 | if result.haskey(i): 265 | result[i] = result[i] + ct.firstpos 266 | else: 267 | result[i] = ct.firstpos 268 | checkSet.add(c[]) 269 | 270 | proc terms(t: ReSynTree): seq[Lit] = 271 | result = @[] 272 | var checkSet: seq[ReSynTree] = @[t] 273 | while checkSet.len > 0: 274 | let ct = checkSet.pop 275 | match ct: 276 | Term(lit: l): 277 | result.add(l) 278 | Bin(op: _, left: l, right: r): 279 | checkSet.add(l[]) 280 | checkSet.add(r[]) 281 | Star(child: c): 282 | checkSet.add(c[]) 283 | 284 | proc makeCharPossetTable(t: ReSynTree): TableRef[char, HashSet[Pos]] = 285 | # init 286 | let 287 | chars = t.collectChar 288 | 289 | result = newTable[char, HashSet[Pos]]() 290 | for c in chars: 291 | result[c] = initHashSet[Pos]() 292 | 293 | for l in t.terms: 294 | match l: 295 | Char(pos: p, c: l): 296 | match l: 297 | Real(c: c): 298 | result[c].incl(p) 299 | End: 300 | continue 301 | Empty: 302 | continue 303 | 304 | proc makeDFA*[T](lr: LexRe[T]): DFA[T] = 305 | when defined(nimldebug): 306 | echo "[nimly] start : make DFA" 307 | 308 | let 309 | t = lr.st 310 | followpos = t.makeFollowposTable 311 | 312 | var 313 | translations = newDTranslations() 314 | stateNum = 0 315 | posS2DState = newTable[HashSet[Pos], DSTate]() 316 | unmarked: seq[HashSet[Pos]] = @[] 317 | 318 | # init 319 | let 320 | chars = t.collectChar 321 | iState = stateNum 322 | iSPos = t.firstpos 323 | charPosset = t.makeCharPossetTable 324 | inc(stateNum) 325 | posS2DState[iSPos] = iState 326 | unmarked.add(iSPos) 327 | 328 | # make state and translations 329 | while unmarked.len > 0: 330 | let 331 | ps = unmarked.pop 332 | s = posS2DState[ps] 333 | translations[s] = newDTranslationsRow() 334 | for c in chars: 335 | let posSet = ps * charPosset[c] 336 | var newSPos: HashSet[Pos] = initHashSet[Pos]() 337 | for p in posSet: 338 | newSPos = newSPos + followpos[p] 339 | var nState: DState 340 | if posS2DState.hasKey(newSPos): 341 | nState = posS2DState[newSPos] 342 | else: 343 | nState = stateNum 344 | inc(stateNum) 345 | unmarked.add(newSPos) 346 | posS2DState[newSPos] = nState 347 | translations[s][c] = nState 348 | 349 | # make accepts 350 | var accepts = newDAccepts[T]() 351 | for k in posS2DState.keys: 352 | # the first acc position is expected acc 353 | var mp = high(int) 354 | for p in k: 355 | if lr.accPosProc.haskey(p): 356 | if p < mp: 357 | mp = p 358 | if mp != high(int): 359 | accepts[posS2DState[k]] = (lr.accPosProc[mp]) 360 | 361 | when defined(nimldebug): 362 | echo "[nimly] done : make DFA" 363 | # make DFA 364 | return DFA[T](start: iState, accepts: accepts, 365 | stateNum: stateNum, translations: translations) 366 | 367 | proc calculateTableCharsToNextState[T]( 368 | state: DState, 369 | partition: seq[HashSet[DState]], 370 | dfa: DFA[T]): TableRef[char, DState] = 371 | result = newTable[char, DState]() 372 | for c, s in dfa.translations[state]: 373 | for i, p in partition: 374 | if s in p: 375 | result[c] = DState(i) 376 | break 377 | 378 | proc grind[T](partition: var seq[HashSet[DState]], dfa: DFA[T]): bool = 379 | ## return true if this affects `partition` 380 | result = false 381 | var newPartition: seq[HashSet[DState]] = @[] 382 | for group in partition: 383 | # seq of (subgroup, translationsFromThisSubgroup) 384 | var grindedDFA: seq[(HashSet[DState], TableRef[char, DState])] = @[] 385 | for state in group: 386 | let translationsFromState = state.calculateTableCharsToNextState( 387 | partition, dfa 388 | ) 389 | var isNewPart = true 390 | for i, subgroupData in grindedDFA: 391 | let (subgroup, translationsFromSubgroup) = subgroupData 392 | if translationsFromState == translationsFromSubgroup: 393 | grindedDFA[i] = (subgroup + [state].toHashSet, translationsFromSubgroup) 394 | isNewPart = false 395 | break 396 | if isNewPart: 397 | grindedDFA.add(([state].toHashSet, translationsFromState)) 398 | 399 | # add seq of state set to renew parts 400 | for subgroupData in grindedDFA: 401 | let (subgroup, _) = subgroupData 402 | newPartition.add(subgroup) 403 | if grindedDFA.len > 1: 404 | result = true 405 | partition = newPartition 406 | 407 | proc removeDead[T](input: DFA[T]): DFA[T] = 408 | var dead = initHashSet[DState]() 409 | for s, tr in input.translations: 410 | if input.accepts.haskey(s): 411 | continue 412 | var f = true 413 | for ns in tr.values: 414 | if s != ns: 415 | f = false 416 | break 417 | if f: 418 | dead.incl(s) 419 | var newTranslations = newDTranslations() 420 | for s, tr in input.translations: 421 | if s in dead: 422 | continue 423 | var newRow = newDTranslationsRow() 424 | for c, ns in tr: 425 | if ns in dead: 426 | newRow[c] = deadState 427 | else: 428 | newRow[c] = ns 429 | newTranslations[s] = newRow 430 | result = DFA[T]( 431 | start: input.start, 432 | accepts: input.accepts, 433 | stateNum: input.stateNum - dead.card, 434 | translations: newTranslations 435 | ) 436 | 437 | proc minimizeStates[T](input: DFA[T], 438 | initPartition: seq[HashSet[DState]]): DFA[T] = 439 | ## The main part of `minimizeStates*[T](input: DFA[T]): DFA[T]`. 440 | ## `initPartiotion[0]` needs to be the state to accept. 441 | var 442 | partition = initPartition 443 | didChange = true 444 | while didChange: 445 | didChange = partition.grind(input) 446 | 447 | result = DFA[T](translations: newDTranslations(), 448 | accepts: newDAccepts[T]()) 449 | for i, p in partition: 450 | if input.start in p: 451 | result.start = i 452 | for acc in input.accepts.keys: 453 | if acc in p: 454 | result.accepts[i] = input.accepts[acc] 455 | inc(result.stateNum) 456 | for s in p: 457 | result.translations[i] = s.calculateTableCharsToNextState(partition, 458 | input) 459 | break 460 | 461 | result = result.removeDead 462 | 463 | proc minimizeStates*[T](input: DFA[T]): DFA[T] = 464 | ## Minimize the state of DNF. 465 | ## 466 | ## The algorithm is the same as what is explained in DragonBook 3.9.7. 467 | ## After despatching this function, each states to accept in DFA 468 | ## needs to correspond to the unique clause in the partition. 469 | when defined(nimldebug): 470 | echo "[nimly] start : minimize lexer state" 471 | var 472 | initPartition: seq[HashSet[DState]] = @[] 473 | other = initHashSet[DState]() 474 | for i in 0.. newState, base, newTranslations 586 | stateTable = newTable[int, (int, int, DTranslationsRow)]() 587 | assert dbaTable.len == 1 588 | for s, tr in dfa.translations: 589 | let 590 | (default, newTranslationsRow) = tr.defaultAndOther 591 | (ls, ll) = ncTable.longestEmpty 592 | (minC, maxC) = newTranslationsRow.minMaxCharIntOfRow 593 | var 594 | start: int 595 | base: int 596 | if maxC - minC >= ll: 597 | start = ncTable.len 598 | else: 599 | start = ls 600 | base = start - minC 601 | 602 | for c, next in newTranslationsRow: 603 | # Dummy 604 | ncTable.writeRow(base + int(c), 605 | DataRow(next = -2, check = -2)) 606 | 607 | var acc: Accept[T] 608 | if dfa.accepts.haskey(s): 609 | acc = Acc[T](dfa.accepts[s]) 610 | else: 611 | acc = NotAcc[T]() 612 | 613 | # default is a temporal value. 614 | let dba = DBA[T](default: default, base: base, accept: acc) 615 | if s == dfa.start: 616 | stateTable[s] = (0, base, newTranslationsRow) 617 | dbaTable[0] = dba 618 | else: 619 | stateTable[s] = (dbaTable.len, base, newTranslationsRow) 620 | dbaTable.add(dba) 621 | 622 | when defined(nimldebug): 623 | echo " calculated stateTable: " & $stateTable 624 | echo " calculated dbaTable: " & $dbaTable 625 | 626 | for k, v in stateTable: 627 | for c, next in v[2]: 628 | let nextState = if next == deadState: 629 | deadState 630 | else: 631 | stateTable[next][0] 632 | ncTable.writeRow(v[1] + int(c), 633 | DataRow( 634 | next = nextState, 635 | check = v[0]), 636 | force = true) 637 | 638 | # renew dbaTable's default state 639 | for i in 0..>" 645 | echo "default: " & $dbaTable[i].default 646 | echo "base: " & $dbaTable[i].base 647 | echo "accept: " & $dbaTable[i].accept.kind 648 | echo "<<--dbaRow----" 649 | 650 | when defined(nimldebug): 651 | echo "[nimly] done : make lexer table" 652 | return LexData[T](dba: dbaTable, nc: ncTable) 653 | 654 | proc nextState*[T](ld: LexData[T], s: State, a: char): State = 655 | assert ld.dba.len > s, "(" & $ld.dba.len & " !> " & $s & ")" 656 | assert s > -1, "(" & $s & " !> " & "-1)" 657 | let 658 | base = ld.dba[s].base 659 | default = ld.dba[s].default 660 | index = base + int(a) 661 | if ld.nc.len <= index or index < 0: 662 | return default 663 | let nc = ld.nc[index] 664 | match nc: 665 | EmptyRow: 666 | return default 667 | DataRow(n, c): 668 | if c == s: 669 | return n 670 | else: 671 | return default 672 | 673 | proc isAcc*[T](ld: LexData[T], s: State): bool = 674 | if s < 0 or ld.dba.len <= s: 675 | return false 676 | return ld.dba[s].accept.kind == AcceptKind.Acc 677 | 678 | variant RePart: 679 | RChar(c: char) 680 | Special(sc: char) 681 | Brace(s: int, e: int) 682 | Tree(tree: ReSynTree) 683 | 684 | const 685 | # char(0) is EOF 686 | allChars = {char(1)..char(255)} 687 | dChars = {'0'..'9'} 688 | nDChars = allChars - dChars 689 | sChars = {' ', '\t', '\n', '\r', '\f', '\v'} 690 | nSChars = allChars - sChars 691 | wChars = {'a'..'z', 'A'..'Z', '0'..'9', '-'} 692 | nWChars = allChars - wChars 693 | classTable = {'d': dChars, 'D': nDChars, 's': sChars, 'S': nSChars, 694 | 'w': wChars, 'W': nWChars, '.': allChars}.toTable() 695 | readingClass = int8(0) 696 | readingEscape = int8(1) 697 | readingDot = int8(2) 698 | # classHead = int8(3) 699 | readingBraceS = int8(4) 700 | readingBraceE = int8(5) 701 | readingClassRange = int8(6) 702 | classNegate = int8(7) 703 | 704 | 705 | proc classIncl(class: var set[char], c: char, 706 | classBfr: var int, flag: var set[int8]) = 707 | if readingClassRange in flag: 708 | doassert classBfr >= 0, "invalid - or .. in class" 709 | class = class + {char(classBfr)..c} 710 | classBfr = -1 711 | flag.excl(readingClassRange) 712 | else: 713 | class.incl(c) 714 | classBfr = int(c) 715 | 716 | proc classUnion(class: var set[char], s: set[char], 717 | classBfr: var int, flag: var set[int8]) = 718 | assert (not (readingClassRange in flag)), "invalid - or .. in class" 719 | classBfr = -1 720 | class = class + s 721 | 722 | proc convertToTree(input: set[char]): ReSynTree = 723 | var isFirst = true 724 | for c in input: 725 | if isFirst: 726 | result = Term(lit = Char(pos = -1, c = Real(c = c))) 727 | isFirst = false 728 | else: 729 | result = Bin(op = bor, 730 | left = ~Term(lit = Char(pos = -1, c = Real(c = c))), 731 | right = ~result) 732 | 733 | doassert (not isFirst), "There are some empty character class" 734 | 735 | proc convertToSeqRePart(re: string): seq[RePart] = 736 | result = @[] 737 | 738 | var 739 | flag: set[int8] = {} 740 | class: set[char] = {} 741 | classBfr = -1 742 | braceS = "" 743 | braceE = "" 744 | 745 | for i, c in re: 746 | if readingClass in flag: 747 | if readingEscape in flag: 748 | case c 749 | of ']', '\\', '-': 750 | class.classIncl(c, classBfr, flag) 751 | of '^': 752 | doassert class.card == 0, "invalid escaping for ^ in class" 753 | class.classIncl(c, classBfr, flag) 754 | of 'd', 'D', 's', 'S', 'w', 'W': 755 | class.classUnion(classTable[c], classBfr, flag) 756 | else: 757 | assert false, "invalid escaping in class" 758 | flag.excl(readingEscape) 759 | else: 760 | if readingDot in flag: 761 | flag.excl(readingDot) 762 | if c == '.': 763 | flag.incl(readingClassRange) 764 | continue 765 | else: 766 | class.classIncl(c, classBfr, flag) 767 | case c 768 | of '\\': 769 | flag.incl(readingEscape) 770 | of '-': 771 | flag.incl(readingClassRange) 772 | of '.': 773 | flag.incl(readingDot) 774 | of '^': 775 | if class.card == 0: 776 | flag.incl(classNegate) 777 | else: 778 | class.classIncl(c, classBfr, flag) 779 | of ']': 780 | doassert (not (readingClassRange in flag)), "invalid - or .. in []" 781 | if classNegate in flag: 782 | class = allChars - class 783 | result.add(Tree(class.convertToTree)) 784 | class = {} 785 | classBfr = -1 786 | flag.excl(classNegate) 787 | flag.excl(readingClass) 788 | else: 789 | class.classIncl(c, classBfr, flag) 790 | elif readingBraceS in flag: 791 | doassert c in dChars + {' ', ',', '}'}, "invalid {}" 792 | if c in dChars: 793 | braceS &= c 794 | elif c == ',': 795 | flag.excl(readingBraceS) 796 | flag.incl(readingBraceE) 797 | elif c == '}': 798 | result.add(Brace(s=braceS.parseInt, e=braceS.parseInt)) 799 | braceS = "" 800 | braceE = "" 801 | flag.excl(readingBraceS) 802 | elif readingBraceE in flag: 803 | doassert c in dChars + {' ', '}'}, "invalid {}" 804 | if c in dChars: 805 | braceE &= c 806 | elif c == '}': 807 | result.add(Brace(s=braceS.parseInt, e=braceE.parseInt)) 808 | braceS = "" 809 | braceE = "" 810 | flag.excl(readingBraceE) 811 | else: 812 | if readingEscape in flag: 813 | case c 814 | of '\\', '.', '[', '|', '(', ')', '?', '*', '+', '{': 815 | result.add(RChar(c=c)) 816 | of 'd', 'D', 's', 'S', 'w', 'W': 817 | result.add(Tree(classTable[c].convertToTree)) 818 | else: 819 | doassert false, "Invalid escaping \"\\" & c & "\" is in niml match pattern." 820 | flag.excl(readingEscape) 821 | else: 822 | case c 823 | of '\\': 824 | flag.incl(readingEscape) 825 | of '.': 826 | result.add(Tree(allChars.convertToTree)) 827 | of '[': 828 | assert (not (classNegate in flag)) 829 | assert (not (readingClassRange in flag)) 830 | assert classBfr == -1 831 | assert class.card == 0 832 | flag.incl(readingClass) 833 | of '{': 834 | flag.incl(readingBraceS) 835 | of '|', '(', ')', '?', '*', '+': 836 | result.add(Special(sc=c)) 837 | else: 838 | result.add(RChar(c=c)) 839 | when defined(niml_tree_debug): 840 | echo "ReTreeParts\n--------" 841 | echo result 842 | echo "--------\n" 843 | 844 | 845 | proc toTree(input: RePart): ReSynTree = 846 | match input: 847 | RChar(c: c): 848 | result = Term(lit = Char(pos = -1, c = Real(c = c))) 849 | Tree(tree: t): 850 | result = t 851 | Brace: 852 | doassert false, "Invalid Re (Brace)" 853 | Special(sc: sc): 854 | doassert false, "Invalid Re (" & $sc & ")" 855 | 856 | proc treeQuestion(input: RePart): ReSynTree = 857 | result = Bin(op = bor, 858 | left = ~Term(lit = Empty()), 859 | right = ~input.toTree()) 860 | 861 | proc treeStar(input: RePart): ReSynTree = 862 | result = Star(child = ~input.toTree) 863 | 864 | proc treePlus(input: RePart): ReSynTree = 865 | result = Bin(op = bcat, 866 | left = ~input.toTree, 867 | right = ~Star(child = ~input.toTree)) 868 | 869 | proc treeBrace(input: RePart, s, e: int): ReSynTree = 870 | result = input.toTree 871 | for i in 0..(e - s): 872 | result = Bin(op = bor, 873 | left = ~Term(Empty()), 874 | right = ~Bin(op = bcat, 875 | left = ~input.toTree, 876 | right = ~result)) 877 | for i in 0.. 0: 920 | result = ip.pop.toTree 921 | while ip.len > 0: 922 | result = Bin(op = bcat, 923 | left = ~ip.pop.toTree, 924 | right = ~result) 925 | 926 | proc handleOr(input: seq[RePart]): ReSynTree = 927 | for i, rp in input: 928 | if rp.kind == RePartKind.Special and rp.sc == '|': 929 | return Bin(op = bor, 930 | left = ~input[0..(i - 1)].handleCat, 931 | right = ~input[(i + 1)..(input.len - 1)].handleOr) 932 | return input.handleCat 933 | 934 | proc handleSubpattern(input: seq[RePart]): ReSynTree = 935 | var 936 | startPos = -1 937 | for i, rp in input: 938 | if rp.kind == RePartKind.Special and rp.sc == '(': 939 | startPos = i 940 | elif rp.kind == RePartKind.Special and rp.sc == ')': 941 | doassert startPos > -1, "Invalid end of Paren" 942 | return ( 943 | input[0..= i.rule.len: 37 | return End[T]() 38 | result = i.rule.right[i.pos] 39 | 40 | proc nextSkipEmpty[T](i: LRItem[T]): Symbol[T] = 41 | result = End[T]() 42 | for idx in i.pos.. 0: 56 | var new: LRItems[T] 57 | new.init() 58 | for i in checkSet: 59 | match i.next: 60 | NonTermS: 61 | for r in g.filterRulesLeftIs(i.next): 62 | let n = LRItem[T](rule: r, pos: 0) 63 | if not result.containsOrIncl(n): 64 | new.incl(n) 65 | _: 66 | discard 67 | checkSet = new 68 | 69 | proc goto[T](g: Grammar[T], itms: LRItems[T], s: Symbol[T]): LRItems[T] = 70 | doAssert s.kind != SymbolKind.End 71 | assert itms == g.closure(itms) 72 | var gotoHashSet = initHashSet[LRItem[T]]() 73 | for i in itms: 74 | if i.next == s: 75 | gotoHashSet.incl(i.pointForward) 76 | result = g.closure(gotoHashSet) 77 | 78 | proc hash*[T](x: LRItem[T]): Hash = 79 | var h: Hash = 0 80 | h = h !& hash(x.rule) 81 | h = h !& hash(x.pos) 82 | return !$h 83 | 84 | proc makeCanonicalCollection*[T](g: Grammar[T]): (SetOfLRItems[T], 85 | TransTable[T]) = 86 | let init = g.closure([LRItem[T](rule: g.startRule, pos: 0)].toHashSet) 87 | var 88 | cc = [ 89 | init 90 | ].toOrderedSet 91 | checkSet = cc 92 | tt: TransTable[T] = @[] 93 | tt.add(initTransTableRow[T]()) 94 | while checkSet.len > 0: 95 | var new: SetOfLRItems[T] 96 | new.init() 97 | for itms in checkSet: 98 | let frm = cc.indexOf(itms) 99 | assert itms == g.closure(itms) 100 | var done = initHashSet[Symbol[T]]() 101 | done.incl(End[T]()) 102 | for i in itms: 103 | let s = i.next 104 | if (not done.containsOrIncl(s)): 105 | let gt = goto[T](g, itms, s) 106 | if (not cc.containsOrIncl(gt)): 107 | tt.add(initTransTableRow[T]()) 108 | assert cc.card == tt.len 109 | new.incl(gt) 110 | tt[frm][s] = cc.indexOf(gt) 111 | checkSet = new 112 | doAssert cc.indexOf(init) == 0, "init state is not '0'" 113 | result = (cc, tt) 114 | 115 | proc makeTableLR*[T](g: Grammar[T]): ParsingTable[T] = 116 | var 117 | actionTable: ActionTable[T] 118 | gotoTable: GotoTable[T] 119 | actionTable = initTable[State, ActionRow[T]]() 120 | gotoTable = initTable[State, GotoRow[T]]() 121 | let 122 | ag = if g.isAugument: 123 | g 124 | else: 125 | g.augument 126 | (canonicalCollection, _) = makeCanonicalCollection[T](ag) 127 | for idx, itms in canonicalCollection: 128 | actionTable[idx] = initTable[Symbol[T], ActionTableItem[T]]() 129 | gotoTable[idx] = initTable[Symbol[T], State]() 130 | for item in itms: 131 | let sym = item.nextSkipEmpty 132 | match sym: 133 | TermS: 134 | let i = canonicalCollection.indexOf(ag.goto(itms, sym)) 135 | assert i > -1,"There is no 'items' which is equal to 'goto'" 136 | when defined(nimydebug): 137 | if actionTable[idx].haskey(sym) and 138 | actionTable[idx][sym].kind == ActionTableItemKind.Reduce: 139 | echo "LR:CONFLICT!!!" & $idx & ":" & $sym 140 | actionTable[idx][sym] = Shift[T](i) 141 | NonTermS: 142 | let i = canonicalCollection.indexOf(ag.goto(itms, sym)) 143 | assert i > -1, "There is no 'items' which is equal to 'goto'" 144 | gotoTable[idx][sym] = i 145 | End: 146 | if item.rule.left == ag.start: 147 | actionTable[idx][End[T]()] = Accept[T]() 148 | else: 149 | for flw in ag.followTable[item.rule.left]: 150 | if flw.kind == SymbolKind.TermS or flw.kind == SymbolKind.End: 151 | if actionTable[idx].haskey(flw) and 152 | actionTable[idx][flw].kind == ActionTableItemKind.Shift: 153 | when defined(nimydebug): 154 | echo "LR:CONFLICT!!!" & $idx & ":" & $flw 155 | continue 156 | actionTable[idx][flw] = Reduce[T](item.rule) 157 | _: 158 | when defined(nimy_debug): 159 | echo "LR: OTHER (" & $sym & ")" 160 | discard 161 | result = ParsingTable[T](action: actionTable, goto: gotoTable) 162 | when defined(nimydebug): 163 | echo "LR:" 164 | echo ag.followTable 165 | echo canonicalCollection 166 | echo result 167 | 168 | proc filterKernel*[T](cc: SetOfLRItems[T]): SetOfLRItems[T] = 169 | result = initOrderedSet[LRItems[T]]() 170 | let start = NonTermS[T]("__Start__") 171 | for i, itms in cc: 172 | for itm in itms: 173 | var kernelItems = initHashSet[LRItem[T]]() 174 | for itm in itms: 175 | if itm.pos != 0 or itm.rule.left == start: 176 | kernelItems.incl(itm) 177 | result.incl(kernelItems) 178 | -------------------------------------------------------------------------------- /src/nimly/parsegen.nim: -------------------------------------------------------------------------------- 1 | import macros 2 | import tables 3 | import sets 4 | 5 | import parsetypes 6 | import parser 7 | 8 | type 9 | PTProc[T, S, R] = proc(nimlytree: ParseTree[T, S]): R {.nimcall.} 10 | RuleToProc*[T, S, R] = Table[Rule[S], PTProc[T, S, R]] 11 | NimyKind = enum 12 | NonTerm 13 | Term 14 | NimyRow = object 15 | kind: NimyKind 16 | retTyNode: NimNode 17 | ruleToProc: NimNode 18 | optRule: NimNode 19 | repRule: NimNode 20 | NimyInfo = Table[string, NimyRow] 21 | 22 | iterator iter(a, b: NimNode, c: seq[NimNode]): (int, NimNode) = 23 | var cnt = 0 24 | yield (cnt, a) 25 | inc(cnt) 26 | for val in b: 27 | yield (cnt, val) 28 | inc(cnt) 29 | for val in c: 30 | yield (cnt, val) 31 | inc(cnt) 32 | 33 | proc initNimyRow(kind: NimyKind, 34 | rtn: NimNode = newEmptyNode(), 35 | rtp: NimNode = newEmptyNode(), 36 | opr: NimNode = newEmptyNode(), 37 | rpr: NimNode = newEmptyNode()): NimyRow = 38 | result = NimyRow(kind: kind, retTyNode: rtn, ruleToProc: rtp, optRule: opr, 39 | repRule: rpr) 40 | 41 | proc isNonTerm(s: string, nimyInfo: NimyInfo): bool = 42 | if not nimyInfo.haskey(s): 43 | return false 44 | return nimyInfo[s].kind == NonTerm 45 | 46 | proc isTerm(s: string, nimyInfo: NimyInfo): bool = 47 | if not nimyInfo.haskey(s): 48 | return false 49 | return nimyInfo[s].kind == Term 50 | 51 | proc initNimyInfo(): NimyInfo = 52 | return initTable[string, NimyRow]() 53 | 54 | proc initRuleToProc*[T, S, R](): RuleToProc[T, S, R] = 55 | return initTable[Rule[S], PTProc[T, S, R]]() 56 | 57 | proc initRuleToProcNode(tokenType, tokenKind, returnType: NimNode): NimNode = 58 | result = nnkAsgn.newTree( 59 | newIdentNode("result"), 60 | nnkCall.newTree( 61 | nnkBracketExpr.newTree( 62 | newIdentNode("initRuleToProc"), 63 | tokenType, 64 | tokenKind, 65 | returnType 66 | ) 67 | ) 68 | ) 69 | 70 | proc genKindNode(kindTy, kind: NimNode): NimNode = 71 | result = nnkDotExpr.newTree( 72 | kindTy, 73 | kind 74 | ) 75 | 76 | proc convertToSymNode(name: string, kindTy: NimNode, 77 | nimyInfo: NimyInfo): NimNode = 78 | if name.isNonTerm(nimyInfo): 79 | result = nnkCall.newTree( 80 | nnkBracketExpr.newTree( 81 | newIdentNode("NonTermS"), 82 | kindTy 83 | ), 84 | newStrLitNode(name) 85 | ) 86 | elif name.isTerm(nimyInfo): 87 | result = nnkCall.newTree( 88 | nnkBracketExpr.newTree( 89 | newIdentNode("TermS"), 90 | kindTy 91 | ), 92 | genKindNode(kindTy, newIdentNode(name)) 93 | ) 94 | else: 95 | doAssert false 96 | 97 | proc convertToSymNode(node, kindTy: NimNode, 98 | nimyInfo: NimyInfo, 99 | noEmpty: bool = true): NimNode = 100 | node.expectKind({nnkIdent, nnkBracket, nnkBracketExpr, nnkCurlyExpr}) 101 | case node.kind 102 | of nnkBracketExpr: 103 | doAssert node.len == 1 104 | let innerSym = node[0].strVal 105 | return nnkCall.newTree( 106 | nnkBracketExpr.newTree( 107 | newIdentNode("NonTermS"), 108 | kindTy 109 | ), 110 | newStrLitNode(nimyInfo[innerSym].optRule.strVal) 111 | ) 112 | of nnkCurlyExpr: 113 | doAssert node.len == 1 114 | let innerSym = node[0].strVal 115 | return nnkCall.newTree( 116 | nnkBracketExpr.newTree( 117 | newIdentNode("NonTermS"), 118 | kindTy 119 | ), 120 | newStrLitNode(nimyInfo[innerSym].repRule.strVal) 121 | ) 122 | of nnkBracket: 123 | doAssert node.len == 0 and (not (noEmpty)), "rule cannot empty or" & 124 | " contains [] if the rule is not empty" 125 | return nnkCall.newTree( 126 | nnkBracketExpr.newTree( 127 | newIdentNode("Empty"), 128 | kindTy 129 | ) 130 | ) 131 | of nnkIdent: 132 | let name = node.strVal 133 | return convertToSymNode(name, kindTy, nimyInfo) 134 | else: 135 | doAssert false 136 | 137 | proc newRuleMakerNode(kindTy, left: NimNode, 138 | right: varargs[NimNode]): NimNode = 139 | result = nnkCall.newTree( 140 | nnkBracketExpr.newTree( 141 | newIdentNode("newRule"), 142 | kindTy 143 | ), 144 | left 145 | ) 146 | for node in right: 147 | result.add(node) 148 | 149 | proc nonTermOrEmpty(node: NimNode, nimyInfo: NimyInfo): string = 150 | node.expectKind({nnkBracket, nnkIdent, nnkBracketExpr, nnkCurlyExpr}) 151 | case node.kind 152 | of nnkBracket: 153 | return "" 154 | of nnkBracketExpr: 155 | assert node.len == 1 156 | return nimyInfo[node[0].strVal].optRule.strVal 157 | of nnkCurlyExpr: 158 | assert node.len == 1 159 | return nimyInfo[node[0].strVal].repRule.strVal 160 | else: 161 | let s = node.strVal 162 | if s.isNonTerm(nimyInfo): 163 | result = s 164 | else: 165 | result = "" 166 | 167 | proc isTerm(node: NimNode, nimyInfo: NimyInfo): bool = 168 | node.expectKind({nnkBracket, nnkIdent, nnkBracketExpr, nnkCurlyExpr}) 169 | if node.kind in {nnkBracket, nnkBracketExpr, nnkCurlyExpr}: 170 | return false 171 | elif not (node.strVal.isNonTerm(nimyInfo)): 172 | return true 173 | return false 174 | 175 | iterator ruleRight(node: NimNode): NimNode = 176 | case node.kind 177 | of nnkCall: 178 | yield node[0] 179 | of nnkCommand: 180 | var nd = node 181 | while nd.kind == nnkCommand: 182 | yield nd[0] 183 | nd = nd[1] 184 | yield nd 185 | else: 186 | assert false 187 | 188 | proc parseRuleAndBody(node, kindTy, tokenType, left: NimNode, 189 | nimyInfo: var NimyInfo): ( 190 | NimNode, seq[string], NimNode) = 191 | node.expectKind({nnkCall, nnkCommand}) 192 | var 193 | right: seq[NimNode] = @[] 194 | types: seq[string] = @[] 195 | body: NimNode 196 | noEmpty: bool 197 | 198 | case node.kind: 199 | of nnkCall: 200 | body = node[1] 201 | noEmpty = false 202 | of nnkCommand: 203 | body = node[2] 204 | noEmpty = true 205 | else: 206 | doAssert false 207 | 208 | for sym in node.ruleRight: 209 | right.add(sym.convertToSymNode(kindTy, nimyInfo, noEmpty)) 210 | types.add(sym.nonTermOrEmpty(nimyInfo)) 211 | let ruleMaker = newRuleMakerNode(kindTy, left, right) 212 | result = (ruleMaker, types, body) 213 | 214 | proc parseLeft(clause: NimNode): (string, NimNode) = 215 | clause.expectKind(nnkCall) 216 | clause[0].expectKind(nnkBracketExpr) 217 | doAssert clause[0].len == 2 218 | let 219 | nonTerm = clause[0][0].strVal 220 | rType = clause[0][1] 221 | return (nonTerm, rType) 222 | 223 | proc isSpecialVar(n: NimNode): bool = 224 | return (n.kind == nnkPrefix and 225 | n.len == 2 and 226 | n[0] == newIdentNode("$") and 227 | n[1].kind == nnkIntLit) 228 | 229 | proc replaceBody(body, param: NimNode, 230 | types: seq[string], nimyInfo: NimyInfo): NimNode = 231 | proc replaceImpl(body: NimNode): NimNode = 232 | if body.isSpecialVar: 233 | let index = int((body[1].intVal) - 1) 234 | # term 235 | if types[index] == "": 236 | return nnkDotExpr.newTree( 237 | nnkBracketExpr.newTree( 238 | nnkDotExpr.newTree( 239 | param, 240 | newIdentNode("tree") 241 | ), 242 | newIntLitNode(index) 243 | ), 244 | newIdentNode("token") 245 | ) 246 | # nonterm 247 | else: 248 | # table[param[index].rule](param[index].tree) 249 | return nnkCall.newTree( 250 | nnkBracketExpr.newTree( 251 | nimyInfo[types[index]].ruleToProc, 252 | nnkDotExpr.newTree( 253 | nnkBracketExpr.newTree( 254 | nnkDotExpr.newTree( 255 | param, 256 | newIdentNode("tree") 257 | ), 258 | newIntLitNode(index) 259 | ), 260 | newIdentNode("rule") 261 | ) 262 | ), 263 | nnkBracketExpr.newTree( 264 | nnkDotExpr.newTree( 265 | param, 266 | newIdentNode("tree") 267 | ), 268 | newIntLitNode(index) 269 | ), 270 | ) 271 | 272 | else: 273 | if body.len > 0: 274 | result = newTree(body.kind) 275 | for c in body: 276 | result.add(c.replaceImpl) 277 | else: 278 | result = body 279 | result = replaceImpl(body) 280 | 281 | proc makeRuleProc(name, body, rTy, tokenType, tokenKind: NimNode, 282 | types: seq[string], nimyInfo: NimyInfo, pt=false): NimNode = 283 | let 284 | param = newIdentNode("nimlytree") 285 | pTy = nnkBracketExpr.newTree(newIdentNode("ParseTree"), 286 | tokenType, tokenKind) 287 | params = @[rTy, nnkIdentDefs.newTree(param, pTy, newEmptyNode())] 288 | var 289 | procBody: NimNode 290 | if not pt: 291 | procBody = body.replaceBody(param, types, nimyInfo) 292 | result = newProc(name, params, procBody) 293 | else: 294 | result = newProc(name, params) 295 | 296 | proc tableMakerProc(name, tokenType, tokenKind, topNonTerm, 297 | tableMaker: NimNode, 298 | rules, ruleDefs, syms: seq[NimNode]): NimNode = 299 | var body = nnkStmtList.newTree() 300 | body.add( 301 | nnkWhenStmt.newTree( 302 | nnkElifBranch.newTree( 303 | nnkCall.newTree( 304 | newIdentNode("defined"), 305 | newIdentNode("nimlydebug") 306 | ), 307 | nnkStmtList.newTree( 308 | nnkCommand.newTree( 309 | newIdentNode("echo"), 310 | newLit("START: makeing the Parser") 311 | ) 312 | ) 313 | ) 314 | ) 315 | ) 316 | for rd in ruleDefs: 317 | body.add(rd) 318 | let 319 | setId = genSym(nskVar) 320 | grmId = genSym() 321 | body.add( 322 | nnkVarSection.newTree( 323 | nnkIdentDefs.newTree( 324 | setId, 325 | nnkBracketExpr.newTree( 326 | newIdentNode("seq"), 327 | nnkBracketExpr.newTree( 328 | newIdentNode("Rule"), 329 | tokenKind 330 | ) 331 | ), 332 | nnkPrefix.newTree( 333 | newIdentNode("@"), 334 | nnkBracket.newTree( 335 | ) 336 | ) 337 | ) 338 | ) 339 | ) 340 | for rule in rules: 341 | body.add( 342 | nnkCall.newTree( 343 | nnkDotExpr.newTree( 344 | setId, 345 | newIdentNode("add") 346 | ), 347 | rule 348 | ) 349 | ) 350 | body.add( 351 | newLetStmt( 352 | grmId, 353 | nnkCall.newTree( 354 | newIdentNode("initGrammar"), 355 | setId, 356 | topNonTerm 357 | ) 358 | ) 359 | ) 360 | body.add( 361 | nnkAsgn.newTree( 362 | newIdentNode("result"), 363 | nnkCall.newTree( 364 | nnkBracketExpr.newTree( 365 | tableMaker, 366 | tokenKind 367 | ), 368 | grmId 369 | ) 370 | ) 371 | ) 372 | 373 | result = newProc( 374 | name, 375 | @[ 376 | nnkBracketExpr.newTree( 377 | newIdentNode("ParsingTable"), 378 | tokenKind 379 | ) 380 | ], 381 | body 382 | ) 383 | 384 | proc getOpt(sym, ty, nt: NimNode): NimNode = 385 | result = nnkCall.newTree( 386 | nnkBracketExpr.newTree( 387 | nt, 388 | nnkBracketExpr.newTree( 389 | newIdentNode("seq"), 390 | ty 391 | ) 392 | ), 393 | nnkStmtList.newTree( 394 | nnkCall.newTree( 395 | sym, 396 | nnkStmtList.newTree( 397 | nnkReturnStmt.newTree( 398 | nnkPrefix.newTree( 399 | newIdentNode("@"), 400 | nnkBracket.newTree( 401 | nnkPrefix.newTree( 402 | newIdentNode("$"), 403 | newLit(1) 404 | ) 405 | ) 406 | ) 407 | ) 408 | ) 409 | ), 410 | nnkCall.newTree( 411 | nnkBracket.newTree( 412 | ), 413 | nnkStmtList.newTree( 414 | nnkReturnStmt.newTree( 415 | nnkPrefix.newTree( 416 | newIdentNode("@"), 417 | nnkBracket.newTree( 418 | ) 419 | ) 420 | ) 421 | ) 422 | ) 423 | ) 424 | ) 425 | 426 | proc getRepOpt(sym, ty, nt: NimNode): NimNode = 427 | result = nnkCall.newTree( 428 | nnkBracketExpr.newTree( 429 | nt, 430 | nnkBracketExpr.newTree( 431 | newIdentNode("seq"), 432 | ty 433 | ) 434 | ), 435 | nnkStmtList.newTree( 436 | nnkCall.newTree( 437 | sym, 438 | nnkStmtList.newTree( 439 | nnkReturnStmt.newTree( 440 | nnkPrefix.newTree( 441 | newIdentNode("$"), 442 | newLit(1) 443 | ) 444 | ) 445 | ) 446 | ), 447 | nnkCall.newTree( 448 | nnkBracket.newTree( 449 | ), 450 | nnkStmtList.newTree( 451 | nnkReturnStmt.newTree( 452 | nnkPrefix.newTree( 453 | newIdentNode("@"), 454 | nnkBracket.newTree( 455 | ) 456 | ) 457 | ) 458 | ) 459 | ) 460 | ) 461 | ) 462 | 463 | proc getRep(sym, ty, nt, nnt: NimNode): seq[NimNode] = 464 | result = @[] 465 | result.add(getRepOpt(nnt, ty, nt)) 466 | let new = nnkCall.newTree( 467 | nnkBracketExpr.newTree( 468 | nnt, 469 | nnkBracketExpr.newTree( 470 | newIdentNode("seq"), 471 | ty 472 | ) 473 | ), 474 | nnkStmtList.newTree( 475 | nnkCommand.newTree( 476 | nnt, 477 | sym, 478 | nnkStmtList.newTree( 479 | nnkAsgn.newTree( 480 | newIdentNode("result"), 481 | nnkPrefix.newTree( 482 | newIdentNode("$"), 483 | newLit(1) 484 | ) 485 | ), 486 | nnkCall.newTree( 487 | nnkDotExpr.newTree( 488 | newIdentNode("result"), 489 | newIdentNode("add") 490 | ), 491 | nnkPrefix.newTree( 492 | newIdentNode("$"), 493 | newLit(2) 494 | ) 495 | ) 496 | ) 497 | ), 498 | nnkCall.newTree( 499 | sym, 500 | nnkStmtList.newTree( 501 | nnkReturnStmt.newTree( 502 | nnkPrefix.newTree( 503 | newIdentNode("@"), 504 | nnkBracket.newTree( 505 | nnkPrefix.newTree( 506 | newIdentNode("$"), 507 | newLit(1) 508 | ) 509 | ) 510 | ) 511 | ) 512 | ) 513 | ) 514 | ) 515 | ) 516 | result.add(new) 517 | 518 | macro nimy*(head, body: untyped): untyped = 519 | head.expectKind(nnkBracketExpr) 520 | body.expectKind(nnkStmtList) 521 | var 522 | tableMaker = newIdentNode("makeTableLALR") 523 | let 524 | parserName = head[0] 525 | tokenType = head[1] 526 | tokenKind = parseStmt(tokenType.strVal & "Kind")[0] 527 | for i, hd in head: 528 | if i > 1: 529 | if hd.kind == nnkIdent and $hd == "LR0": 530 | tableMaker = newIdentNode("makeTableLR") 531 | var 532 | nimyInfo = initNimyInfo() 533 | first = true 534 | topNonTerm: string 535 | topNonTermNode: NimNode 536 | returnType: Nimnode 537 | ruleIds: seq[NimNode] = @[] 538 | ruleDefs: seq[NimNode] = @[] 539 | ruleProcs: seq[NimNode] = @[] 540 | ruleToProcMakers: seq[NimNode] = @[] 541 | tableConstDefs: seq[NimNode] = @[] 542 | ruleProcPts: seq[NimNode] = @[] 543 | symNodes: seq[NimNode] = @[] 544 | let topProcId = genSym(nskProc) 545 | result = newTree(nnkStmtList) 546 | 547 | # read BNF first (collert info) 548 | for clause in body: 549 | if clause.kind == nnkCommentStmt: 550 | continue 551 | let (nonTerm, rType) = parseLeft(clause) 552 | doAssert (not (nimyInfo.haskey(nonTerm))), "some nonterm are duplicated" 553 | nimyInfo[nonTerm] = initNimyRow(NonTerm, rtn = rType, 554 | rtp = genSym(nskConst)) 555 | if first: 556 | topNonTerm = nonTerm 557 | topNonTermNode = nnkCall.newTree( 558 | nnkBracketExpr.newTree( 559 | newIdentNode("NonTermS"), 560 | tokenKind 561 | ), 562 | newStrLitNode(nonTerm) 563 | ) 564 | returnType = rType 565 | first = false 566 | nimyInfo["__Start__"] = initNimyRow(NonTerm, 567 | rtn = returnType, 568 | rtp = genSym(nskConst)) 569 | 570 | # make opt and rep 571 | var optAndRep: seq[NimNode] = @[] 572 | for clause in body: 573 | if clause.kind == nnkCommentStmt: 574 | continue 575 | for ruleClause in clause[1]: 576 | if ruleClause.kind == nnkCommentStmt: 577 | continue 578 | for sym in ruleClause.ruleRight: 579 | if sym.isTerm(nimyInfo) and not(nimyInfo.haskey(sym.strVal)): 580 | nimyInfo[sym.strVal] = initNimyRow(Term) 581 | if not (sym.kind in {nnkBracketExpr, nnkCurlyExpr}): 582 | continue 583 | doAssert sym.len == 1 584 | let innerSym = sym[0].strVal 585 | if sym[0].isTerm(nimyInfo) and 586 | not(nimyInfo.haskey(innersym)): 587 | nimyInfo[innerSym] = initNimyRow(Term) 588 | case sym.kind 589 | of nnkBracketExpr: 590 | if nimyInfo[innerSym].optRule.kind != nnkEmpty: 591 | continue 592 | let 593 | newStr = "__opt_" & innerSym 594 | new = newIdentNode(newStr) 595 | ty = if innerSym.isNonTerm(nimyInfo): 596 | nimyInfo[innerSym].retTyNode 597 | else: 598 | tokenType 599 | rt = nnkBracketExpr.newTree( 600 | newIdentNode("seq"), 601 | ty 602 | ) 603 | nr = nimyInfo[innerSym] 604 | 605 | optAndRep.add(getOpt(newIdentNode(innerSym), ty, new)) 606 | nimyInfo[newStr] = initNimyRow(NonTerm, 607 | rtn = rt, 608 | rtp = genSym(nskConst)) 609 | nimyInfo[innerSym] = NimyRow( 610 | kind: nr.kind, 611 | retTyNode: nr.retTyNode, 612 | ruleToProc: nr.ruleToProc, 613 | optRule: new, 614 | repRule: nr.repRule 615 | ) 616 | 617 | of nnkCurlyExpr: 618 | if nimyInfo[innerSym].optRule.kind != nnkEmpty: 619 | continue 620 | let 621 | newStr = "__rep_" & innerSym 622 | new = newIdentNode(newStr) 623 | newInnerStr = "__inner_" & newStr 624 | newInner = newIdentNode(newInnerStr) 625 | ty = if innerSym.isNonTerm(nimyInfo): 626 | nimyInfo[innerSym].retTyNode 627 | else: 628 | tokenType 629 | rt = nnkBracketExpr.newTree( 630 | newIdentNode("seq"), 631 | ty 632 | ) 633 | nr = nimyInfo[innerSym] 634 | 635 | optAndRep.add(getRep(newIdentNode(innerSym), ty, new, newInner)) 636 | nimyInfo[newStr] = initNimyRow(NonTerm, rtn = rt, 637 | rtp = genSym(nskConst)) 638 | nimyInfo[newInnerStr] = initNimyRow(NonTerm, rtn = rt, 639 | rtp = genSym(nskConst)) 640 | nimyInfo[innerSym] = NimyRow( 641 | kind: nr.kind, 642 | retTyNode: nr.retTyNode, 643 | ruleToProc: nr.ruleToProc, 644 | optRule: nr.optRule, 645 | repRule: new 646 | ) 647 | 648 | else: 649 | discard 650 | 651 | # make top clause proc 652 | let topClause = nnkCall.newTree( 653 | nnkBracketExpr.newTree( 654 | newIdentNode("__Start__"), 655 | returnType 656 | ), 657 | nnkStmtList.newTree( 658 | nnkCall.newTree( 659 | newIdentNode(topNonTerm), 660 | nnkStmtList.newTree( 661 | nnkReturnStmt.newTree( 662 | nnkPrefix.newTree( 663 | newIdentNode("$"), 664 | newLit(1) 665 | ) 666 | ) 667 | ) 668 | ) 669 | ) 670 | ) 671 | 672 | # read BNF second (make procs) 673 | for i, clause in iter(topClause, body, optAndRep): 674 | if clause.kind == nnkCommentStmt: 675 | continue 676 | let 677 | (nonTerm, rType) = parseLeft(clause) 678 | ruleClauses = clause[1] 679 | var ruleToProcMakerBody = nnkStmtList.newTree( 680 | initRuleToProcNode(tokenType, tokenKind, rType) 681 | ) 682 | 683 | # read Rule 684 | for j, ruleClause in ruleClauses: 685 | if ruleClause.kind == nnkCommentStmt: 686 | continue 687 | let 688 | left = nnkCall.newTree( 689 | nnkBracketExpr.newTree( 690 | newIdentNode("NonTermS"), 691 | tokenKind 692 | ), 693 | newStrLitNode(nonTerm) 694 | ) 695 | # argTypes: seq[string] (name if nonterm) 696 | (ruleMaker, argTypes, clauseBody) = parseRuleAndBody( 697 | ruleClause, tokenKind, tokenType, left, nimyInfo 698 | ) 699 | ruleId = genSym(nskConst) 700 | ruleProcId = if i == 0: 701 | topProcId 702 | else: 703 | genSym(nskProc) 704 | ruleIds.add(ruleId) 705 | let ruleDef = newConstStmt( 706 | ruleId, 707 | ruleMaker 708 | ) 709 | # makeRule 710 | ruleDefs.add( 711 | ruleDef 712 | ) 713 | 714 | # make proc and add to result 715 | ruleProcs.add( 716 | makeRuleProc(ruleProcId, clauseBody, nimyInfo[nonTerm].retTyNode, 717 | tokenType, tokenKind, argTypes, nimyInfo) 718 | ) 719 | ruleProcPts.add( 720 | makeRuleProc(ruleProcId, clauseBody, nimyInfo[nonTerm].retTyNode, 721 | tokenType, tokenKind, argTypes, nimyInfo, true) 722 | ) 723 | 724 | # add proc id table maker 725 | ruleToProcMakerBody.add( 726 | ruleDef 727 | ) 728 | ruleToProcMakerBody.add( 729 | nnkAsgn.newTree( 730 | nnkBracketExpr.newTree( 731 | newIdentNode("result"), 732 | ruleId 733 | ), 734 | ruleProcId 735 | ) 736 | ) 737 | # ruleToProcMakerDef 738 | let 739 | ruleToProcMakerName = genSym(nskProc) 740 | ruleToProcMakerNode = newProc( 741 | ruleToProcMakerName, 742 | @[nnkBracketExpr.newTree( 743 | newIdentNode("RuleToProc"), 744 | tokenType, 745 | tokenKind, 746 | rType 747 | )], 748 | ruleToProcMakerBody 749 | ) 750 | ruleToProcMakers.add( 751 | ruleToProcMakerNode 752 | ) 753 | # add table to result 754 | tableConstDefs.add( 755 | newConstStmt( 756 | nimyInfo[nonTerm].ruleToProc, 757 | nnkCall.newTree( 758 | ruleToProcMakerName 759 | ) 760 | ) 761 | ) 762 | 763 | result.add(ruleProcPts) 764 | result.add(ruleToProcMakers) 765 | result.add(tableConstDefs) 766 | result.add(ruleProcs) 767 | 768 | # makeGrammarAndParsingTable 769 | for nt in nimyInfo.keys: 770 | symNodes.add(convertToSymNode(nt, tokenKind, nimyInfo)) 771 | symNodes.add( 772 | newCall( 773 | nnkBracketExpr.newTree( 774 | newIdentNode("End"), 775 | tokenKind 776 | ) 777 | ) 778 | ) 779 | symNodes.add( 780 | newCall( 781 | nnkBracketExpr.newTree( 782 | newIdentNode("Empty"), 783 | tokenKind 784 | ) 785 | ) 786 | ) 787 | let 788 | tmpName = genSym(nskProc) 789 | result.add( 790 | tableMakerProc(tmpName, tokenType, tokenKind, topNonTermNode, tableMaker, 791 | ruleIds, ruleDefs, symNodes) 792 | ) 793 | when defined(nimylet): 794 | result.add( 795 | newLetStmt( 796 | nnkPostfix.newTree( 797 | newIdentNode("*"), 798 | parserName, 799 | ), 800 | nnkCall.newTree( 801 | tmpName 802 | ) 803 | ) 804 | ) 805 | else: 806 | result.add( 807 | newConstStmt( 808 | nnkPostfix.newTree( 809 | newIdentNode("*"), 810 | parserName, 811 | ), 812 | nnkCall.newTree( 813 | tmpName 814 | ) 815 | ) 816 | ) 817 | 818 | result.add(ruleDefs) 819 | 820 | # add proc parse 821 | result.add( 822 | nnkProcDef.newTree( 823 | nnkPostfix.newTree( 824 | newIdentNode("*"), 825 | newIdentNode("parse") 826 | ), 827 | newEmptyNode(), 828 | nnkGenericParams.newTree( 829 | nnkIdentDefs.newTree( 830 | newIdentNode("T"), 831 | newIdentNode("S"), 832 | newEmptyNode(), 833 | newEmptyNode() 834 | ) 835 | ), 836 | nnkFormalParams.newTree( 837 | returnType, 838 | nnkIdentDefs.newTree( 839 | newIdentNode("parser"), 840 | nnkVarTy.newTree( 841 | nnkBracketExpr.newTree( 842 | newIdentNode("Parser"), 843 | newIdentNode("S") 844 | ) 845 | ), 846 | newEmptyNode() 847 | ), 848 | nnkIdentDefs.newTree( 849 | newIdentNode("lexer"), 850 | nnkVarTy.newTree( 851 | nnkBracketExpr.newTree( 852 | newIdentNode("NimlLexer"), 853 | newIdentNode("T") 854 | ) 855 | ), 856 | newEmptyNode() 857 | ) 858 | ), 859 | newEmptyNode(), 860 | newEmptyNode(), 861 | nnkStmtList.newTree( 862 | nnkLetSection.newTree( 863 | nnkIdentDefs.newTree( 864 | newIdentNode("tree"), 865 | newEmptyNode(), 866 | nnkCall.newTree( 867 | newIdentNode("parseImpl"), 868 | newIdentNode("parser"), 869 | newIdentNode("lexer") 870 | ) 871 | ) 872 | ), 873 | nnkReturnStmt.newTree( 874 | nnkCall.newTree( 875 | topProcId, 876 | newIdentNode("tree"), 877 | ) 878 | ) 879 | ) 880 | ) 881 | ) 882 | when defined(nimydebug): 883 | echo toStrLit(result) 884 | -------------------------------------------------------------------------------- /src/nimly/parser.nim: -------------------------------------------------------------------------------- 1 | import tables 2 | import strutils 3 | 4 | import patty 5 | 6 | import lextypes 7 | import lexer 8 | import parsetypes 9 | 10 | type 11 | State* = int 12 | ActionTableItemKind* {.pure.} = enum 13 | Shift 14 | Reduce 15 | Accept 16 | Error 17 | ActionTableItem*[T] = object 18 | case kind*: ActionTableItemKind: 19 | of ActionTableItemKind.Shift: 20 | state*: State 21 | of ActionTableItemKind.Reduce: 22 | rule*: Rule[T] 23 | else: 24 | discard 25 | 26 | proc `$`*[T](i: ActionTableItem[T]): string = 27 | match i: 28 | Shift(state: s): 29 | return "Shift(" & $s & ")" 30 | Reduce(rule: r): 31 | return "Reduce(" & $r & ")" 32 | Accept: 33 | return "Accept" 34 | Error: 35 | return "Error" 36 | 37 | proc Shift*[T](state: State): ActionTableItem[T] = 38 | return ActionTableItem[T](kind: ActionTableItemKind.Shift, state: state) 39 | 40 | proc Reduce*[T](rule: Rule[T]): ActionTableItem[T] = 41 | return ActionTableItem[T](kind: ActionTableItemKind.Reduce, rule: rule) 42 | 43 | proc Accept*[T](): ActionTableItem[T] = 44 | return ActionTableItem[T](kind: ActionTableItemKind.Accept) 45 | 46 | proc Error*[T](): ActionTableItem[T] = 47 | return ActionTableItem[T](kind: ActionTableItemKind.Error) 48 | 49 | type 50 | ActionRow*[T] = Table[Symbol[T], ActionTableItem[T]] 51 | ActionTable*[T] = Table[State, ActionRow[T]] 52 | GotoRow*[T] = Table[Symbol[T], State] 53 | GotoTable*[T] = Table[State, GotoRow[T]] 54 | ParsingTable*[T] = object 55 | action*: ActionTable[T] 56 | goto*: GotoTable[T] 57 | ConstActionTable = seq[seq[int]] 58 | ConstGotoTable = seq[seq[int]] 59 | ConstTable* = (ConstActionTable, ConstGotoTable) 60 | Parser*[T] = object 61 | stack: seq[State] 62 | table: ParsingTable[T] 63 | 64 | proc `$`*[T](at: ActionTable[T]): string = 65 | result = "\nActionTable:\n--------\n" 66 | for s, row in at: 67 | result = result & $s & ":" & $row & "\n" 68 | result = result & "--------\n" 69 | 70 | proc `$`*[T](gt: GotoTable[T]): string = 71 | result = "\nGotoTable:\n--------\n" 72 | for s, row in gt: 73 | result = result & $s & ":" & $row & "\n" 74 | result = result & "--------\n" 75 | 76 | variantp ParseTree[T, S]: 77 | Terminal(token: T) 78 | NonTerminal(rule: Rule[S], tree: seq[ParseTree[T, S]]) 79 | 80 | proc `$`*[T, S](pt: ParseTree[T, S], indent: int = 0): string = 81 | match pt: 82 | Terminal(token: t): 83 | result = " ".repeat(indent) & $t & "\n" 84 | NonTerminal(rule: r, tree: t): 85 | result = " ".repeat(indent) & "rule: " & $r & "\n" 86 | for n in t: 87 | result = result & `$`(n, indent + 1) 88 | 89 | proc add[T](parser: var Parser[T], s: State) = 90 | parser.stack.add(s) 91 | 92 | proc push[T](parser: var Parser[T], s: State) = 93 | parser.add(s) 94 | 95 | proc pop[T](parser: var Parser[T]): State = 96 | return parser.stack.pop 97 | 98 | proc top[T](parser: Parser[T]): State = 99 | return parser.stack[parser.stack.high] 100 | 101 | proc parseImpl*[T, S](parser: var Parser[S], 102 | lexer: var NimlLexer[T]): ParseTree[T, S] = 103 | var tree: seq[ParseTree[T, S]] = @[] 104 | var token: T 105 | var symbol: Symbol[S] 106 | if lexer.isEmpty: 107 | symbol = End[S]() 108 | else: 109 | token = lexer.lexNext 110 | symbol = TermS[S](token.kind) 111 | while true: 112 | when defined(nimydebug): 113 | echo "parser stack:" & $parser.stack 114 | echo "read token:" & $symbol 115 | var action: ActionTableItem[S] 116 | try: 117 | action = parser.table.action[parser.top][symbol] 118 | except KeyError: 119 | var msg: string = "Unexpected token " & $symbol & " is passed." 120 | if symbol.kind == SymbolKind.End: 121 | msg = "Unexpected lexer stops (EOF). Cannot parse whole the tokens lexer passes." 122 | else: 123 | try: 124 | msg = msg & "\ntoken: " & $token 125 | except: 126 | discard 127 | raise newException(NimyActionError, msg) 128 | except: 129 | raise 130 | when defined(nimydebug): 131 | echo action 132 | case action.kind 133 | of ActionTableItemKind.Shift: 134 | let s = action.state 135 | tree.add(Terminal[T, S](token)) 136 | try: 137 | token = lexer.lexNext 138 | symbol = TermS[S](token.kind) 139 | except NimlEOFError: 140 | symbol = End[S]() 141 | except: 142 | raise 143 | parser.push(s) 144 | of ActionTableItemKind.Reduce: 145 | let r = action.rule 146 | let reseted = tree[^r.lenWithoutEmpty..^1] 147 | for i in 0.. spaces: 25 | top = indStack.pop() 26 | indStack.add(top) 27 | if top == spaces: 28 | return Indent(indStack.len - 1) 29 | else: 30 | indStack.add(spaces) 31 | return Indent(indStack.len - 1) 32 | -------------------------------------------------------------------------------- /tests/parser_415.nim: -------------------------------------------------------------------------------- 1 | import patty 2 | import nimly 3 | 4 | variantp MyTerm: 5 | EQ 6 | ST 7 | ID(val: string) 8 | IGNORE 9 | 10 | niml lex415[MyTerm]: 11 | r"=": 12 | return EQ() 13 | r"\*": 14 | return ST() 15 | r"[a..zA..Z\-_][a..zA..Z0..9\-_]*": 16 | return ID(token.token) 17 | r"\s": 18 | return IGNORE() 19 | 20 | nimy psr415LALR[MyTerm]: 21 | start[string]: 22 | left EQ right: 23 | return $1 & "=" & $3 24 | right: 25 | return $1 26 | left[string]: 27 | ST right: 28 | return "*" & $2 29 | ID: 30 | return ($1).val 31 | right[string]: 32 | left: 33 | return $1 34 | -------------------------------------------------------------------------------- /tests/parser_415_lr.nim: -------------------------------------------------------------------------------- 1 | import patty 2 | import nimly 3 | import parser_415 4 | 5 | nimy psr415LR[MyTerm, LR0]: 6 | start[string]: 7 | left EQ right: 8 | return $1 & "=" & $3 9 | right: 10 | return $1 11 | left[string]: 12 | ST right: 13 | return "*" & $2 14 | ID: 15 | return ($1).val 16 | right[string]: 17 | left: 18 | return $1 19 | -------------------------------------------------------------------------------- /tests/state_example.txt: -------------------------------------------------------------------------------- 1 | if test + 1 then { true } else { 2 * ( test + 3 ) } 2 | -------------------------------------------------------------------------------- /tests/state_parser.nim: -------------------------------------------------------------------------------- 1 | import patty 2 | import nimly 3 | import strutils 4 | 5 | variantp StateToken: 6 | SPLUS 7 | SMULTI 8 | SNUM(val: int) 9 | SID(str: string) 10 | SLPAREN 11 | SRPAREN 12 | SLBRACE 13 | SRBRACE 14 | SIF 15 | SELSE 16 | STHEN 17 | SIGNORE 18 | 19 | niml testStateLex[StateToken]: 20 | r"\+": 21 | return SPLUS() 22 | r"\*": 23 | return SMULTI() 24 | r"\d+": 25 | return SNUM(parseInt(token.token)) 26 | r"if": 27 | return SIF() 28 | r"else": 29 | return SELSE() 30 | r"then": 31 | return STHEN() 32 | r"\(": 33 | return SLPAREN() 34 | r"\)": 35 | return SRPAREN() 36 | r"\{": 37 | return SLBRACE() 38 | r"}": 39 | return SRBRACE() 40 | r"[a..zA..Z_]\w*": 41 | return SID(token.token) 42 | r"\s": 43 | return SIGNORE() 44 | 45 | nimy testStatePar[StateToken]: 46 | top[string]: 47 | state: 48 | return $1 49 | state[string]: 50 | SIF exp STHEN SLBRACE state SRBRACE SELSE SLBRACE state SRBRACE: 51 | return "IF(" & $2 & ")THEN{" & $5 & "}ELSE{" & $9 & "}" 52 | exp: 53 | return $1 54 | exp[string]: 55 | plus: 56 | return $1 57 | plus[string]: 58 | plus SPLUS plus: 59 | return $1 & "+" & $3 60 | mult: 61 | return $1 62 | mult[string]: 63 | mult SMULTI mult: 64 | return "(" & $1 & "*" & $3 & ")" 65 | num: 66 | return $1 67 | num[string]: 68 | SNUM: 69 | return $(($1).val) 70 | SID: 71 | return $(($1).str) 72 | SLPAREN exp SRPAREN: 73 | return "(" & $2 & ")" 74 | -------------------------------------------------------------------------------- /tests/state_parser_with_empty.nim: -------------------------------------------------------------------------------- 1 | import patty 2 | import nimly 3 | import strutils 4 | 5 | variantp StateToken: 6 | SPLUS 7 | SMULTI 8 | SNUM(val: int) 9 | SID(str: string) 10 | SLPAREN 11 | SRPAREN 12 | SLBRACE 13 | SRBRACE 14 | SIF 15 | SELSE 16 | STHEN 17 | SIGNORE 18 | 19 | niml testStateLex[StateToken]: 20 | r"\+": 21 | return SPLUS() 22 | r"\*": 23 | return SMULTI() 24 | r"\d+": 25 | return SNUM(parseInt(token.token)) 26 | r"if": 27 | return SIF() 28 | r"else": 29 | return SELSE() 30 | r"then": 31 | return STHEN() 32 | r"\(": 33 | return SLPAREN() 34 | r"\)": 35 | return SRPAREN() 36 | r"\{": 37 | return SLBRACE() 38 | r"}": 39 | return SRBRACE() 40 | r"[a..zA..Z_]\w*": 41 | return SID(token.token) 42 | r"\s": 43 | return SIGNORE() 44 | 45 | nimy testStatePar[StateToken]: 46 | top[string]: 47 | state: 48 | return $1 49 | state[string]: 50 | SIF cond STHEN SLBRACE state SRBRACE el: 51 | return "IF(" & $2 & ")THEN{" & $5 & "}" & $7 52 | exp: 53 | return $1 54 | el[string]: 55 | []: 56 | return "" 57 | SELSE SLBRACE state SRBRACE: 58 | return "ELSE{" & $3 & "}" 59 | cond[string]: 60 | []: 61 | return "" 62 | exp: 63 | return $1 64 | exp[string]: 65 | plus: 66 | return $1 67 | plus[string]: 68 | mult SPLUS plus: 69 | return $1 & "+" & $3 70 | mult: 71 | return $1 72 | mult[string]: 73 | num SMULTI mult: 74 | return "(" & $1 & "*" & $3 & ")" 75 | num: 76 | return $1 77 | num[string]: 78 | SNUM: 79 | return $(($1).val) 80 | SID: 81 | return $(($1).str) 82 | SLPAREN exp SRPAREN: 83 | return "(" & $2 & ")" 84 | -------------------------------------------------------------------------------- /tests/state_parser_with_empty_lr.nim: -------------------------------------------------------------------------------- 1 | import patty 2 | import nimly 3 | import strutils 4 | 5 | variantp StateToken: 6 | SPLUS 7 | SMULTI 8 | SNUM(val: int) 9 | SID(str: string) 10 | SLPAREN 11 | SRPAREN 12 | SLBRACE 13 | SRBRACE 14 | SIF 15 | SELSE 16 | STHEN 17 | SIGNORE 18 | 19 | niml testStateLex[StateToken]: 20 | r"\+": 21 | return SPLUS() 22 | r"\*": 23 | return SMULTI() 24 | r"\d+": 25 | return SNUM(parseInt(token.token)) 26 | r"if": 27 | return SIF() 28 | r"else": 29 | return SELSE() 30 | r"then": 31 | return STHEN() 32 | r"\(": 33 | return SLPAREN() 34 | r"\)": 35 | return SRPAREN() 36 | r"\{": 37 | return SLBRACE() 38 | r"}": 39 | return SRBRACE() 40 | r"[a..zA..Z_]\w*": 41 | return SID(token.token) 42 | r"\s": 43 | return SIGNORE() 44 | 45 | nimy testStatePar[StateToken,LR0]: 46 | top[string]: 47 | state: 48 | return $1 49 | state[string]: 50 | SIF cond STHEN SLBRACE state SRBRACE el: 51 | return "IF(" & $2 & ")THEN{" & $5 & "}" & $7 52 | exp: 53 | return $1 54 | el[string]: 55 | []: 56 | return "" 57 | SELSE SLBRACE state SRBRACE: 58 | return "ELSE{" & $3 & "}" 59 | cond[string]: 60 | []: 61 | return "" 62 | exp: 63 | return $1 64 | exp[string]: 65 | plus: 66 | return $1 67 | plus[string]: 68 | mult SPLUS plus: 69 | return $1 & "+" & $3 70 | mult: 71 | return $1 72 | mult[string]: 73 | num SMULTI mult: 74 | return "(" & $1 & "*" & $3 & ")" 75 | num: 76 | return $1 77 | num[string]: 78 | SNUM: 79 | return $(($1).val) 80 | SID: 81 | return $(($1).str) 82 | SLPAREN exp SRPAREN: 83 | return "(" & $2 & ")" 84 | -------------------------------------------------------------------------------- /tests/test_compile_time_parser.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import patty 3 | import strutils 4 | import macros 5 | 6 | import nimly 7 | 8 | ## variant is defined in patty 9 | variant MyToken: 10 | PLUS 11 | MULTI 12 | NUM(val: int) 13 | DOT 14 | LPAREN 15 | RPAREN 16 | IGNORE 17 | 18 | niml testLex[MyToken]: 19 | r"\(": 20 | return LPAREN() 21 | r"\)": 22 | return RPAREN() 23 | r"\+": 24 | return PLUS() 25 | r"\*": 26 | return MULTI() 27 | r"\d+": 28 | return NUM(parseInt(token.token)) 29 | r"\s": 30 | return IGNORE() 31 | 32 | nimy testPar[MyToken]: 33 | top[NimNode]: 34 | plus: 35 | return $1 36 | 37 | plus[NimNode]: 38 | mult PLUS plus: 39 | return newCall( 40 | ident("+"), 41 | $1, 42 | $3 43 | ) 44 | 45 | mult: 46 | return $1 47 | 48 | mult[NimNode]: 49 | num MULTI mult: 50 | return newCall( 51 | ident("*"), 52 | $1, 53 | $3 54 | ) 55 | 56 | num: 57 | return $1 58 | 59 | num[NimNode]: 60 | LPAREN plus RPAREN: 61 | return $2 62 | 63 | NUM: 64 | return newIntLitNode(($1).val) 65 | 66 | macro calculate(str: untyped): untyped = 67 | var 68 | lexer = testLex.newWithString($str) 69 | lexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 70 | var 71 | parser = testPar.newParser() 72 | return parser.parse(lexer) 73 | 74 | test "test compile-time parser": 75 | check calculate("(20 + 1) * 2") == 42 76 | -------------------------------------------------------------------------------- /tests/test_empty_str_does_not_cause_error.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import patty 3 | 4 | import nimly 5 | 6 | variant Token: 7 | CHARS(val: string) 8 | IGNORE 9 | 10 | niml testLex[Token]: 11 | r"\w+": 12 | return CHARS(token.token) 13 | r"\s": 14 | return IGNORE() 15 | 16 | nimy testPar[Token]: 17 | top[seq[string]]: 18 | word{}: 19 | return $1 20 | word[string]: 21 | CHARS: 22 | return ($1).val 23 | 24 | test "parser works": 25 | var testLexer = testLex.newWithString("This is a test") 26 | testLexer.ignoreIf = proc(r: Token): bool = r.kind == TokenKind.IGNORE 27 | var parser = testPar.newParser() 28 | check parser.parse(testLexer) == @["This", "is", "a", "test"] 29 | 30 | test "empty string does not cause error": 31 | var testLexer = testLex.newWithString("") 32 | testLexer.ignoreIf = proc(r: Token): bool = r.kind == TokenKind.IGNORE 33 | var parser = testPar.newParser() 34 | check parser.parse(testLexer).len == 0 35 | -------------------------------------------------------------------------------- /tests/test_empty_str_error_msg.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import strutils 3 | import patty 4 | 5 | import nimly 6 | 7 | variant Token: 8 | CHARS(val: string) 9 | IGNORE 10 | 11 | niml testLex[Token]: 12 | r"\w+": 13 | return CHARS(token.token) 14 | r"\s": 15 | return IGNORE() 16 | 17 | nimy testPar[Token]: 18 | top[seq[string]]: 19 | word word{}: 20 | return @[$1] & $2 21 | word[string]: 22 | CHARS: 23 | return ($1).val 24 | 25 | test "parser works": 26 | var testLexer = testLex.newWithString("This is a test") 27 | testLexer.ignoreIf = proc(r: Token): bool = r.kind == TokenKind.IGNORE 28 | var parser = testPar.newParser() 29 | check parser.parse(testLexer) == @["This", "is", "a", "test"] 30 | 31 | test "empty string does not cause error": 32 | var testLexer = testLex.newWithString("") 33 | testLexer.ignoreIf = proc(r: Token): bool = r.kind == TokenKind.IGNORE 34 | try: 35 | var parser = testPar.newParser() 36 | let parsed = parser.parse(testLexer).len 37 | assert false, "it expected to fail to parse" 38 | except NimyActionError as e: 39 | check e.msg.find("Unexpected lexer stops (EOF)") > -1 40 | -------------------------------------------------------------------------------- /tests/test_error_state_parse.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import nimly 4 | 5 | import state_parser 6 | 7 | test "test error": 8 | var lexer = testStateLex.newWithString("error if test + 1 then { true } else { 2 * ( test + 3 ) }") 9 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 10 | 11 | var parser = testStatePar.newParser() 12 | expect NimyActionError: 13 | discard parser.parse(lexer) 14 | -------------------------------------------------------------------------------- /tests/test_issue_51.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import patty 4 | import nimly 5 | 6 | variant MyToken: 7 | tSYM(str: string) 8 | 9 | niml lexer1[MyToken]: 10 | "[^ \t\r\n]+": tSYM(token.token) 11 | 12 | niml lexer2[MyToken]: 13 | r"\S+": 14 | return tSYM(token.token) 15 | 16 | test "test [^...] in regex": 17 | var testLexer = lexer1.newWithString("loloi<<1") 18 | testLexer.ignoreIf = proc(r: MyToken): bool = false 19 | var ret: seq[MyToken] = @[] 20 | for s in testLexer.lexIter: 21 | ret.add(s) 22 | check ret == @[tSYM("loloi<<1")] 23 | testLexer.close 24 | 25 | test "test [^...] in regex (exception)": 26 | var testLexer = lexer1.newWithString("loloi << 1") 27 | testLexer.ignoreIf = proc(r: MyToken): bool = false 28 | check testLexer.lexNext == tSYM("loloi") 29 | expect LexError: 30 | discard testLexer.lexNext 31 | 32 | test r"test \S in regex (exception)": 33 | var testLexer = lexer2.newWithString("loloi<<1") 34 | testLexer.ignoreIf = proc(r: MyToken): bool = false 35 | var ret: seq[MyToken] = @[] 36 | for s in testLexer.lexIter: 37 | ret.add(s) 38 | check ret == @[tSYM("loloi<<1")] 39 | testLexer.close 40 | 41 | test r"test \S in regex (exception)": 42 | var testLexer = lexer2.newWithString("loloi << 1") 43 | testLexer.ignoreIf = proc(r: MyToken): bool = false 44 | check testLexer.lexNext == tSYM("loloi") 45 | expect LexError: 46 | discard testLexer.lexNext 47 | 48 | -------------------------------------------------------------------------------- /tests/test_lalr.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | include nimly/lr 4 | include nimly/lalr 5 | 6 | let 7 | g = initGrammar[string]( 8 | [ 9 | newRule(NonTermS[string]("S"), 10 | NonTermS[string]("C"),NonTermS[string]("C")), 11 | newRule(NonTermS[string]("C"), TermS("c"), NonTermS[string]("C")), 12 | newRule(NonTermS[string]("C"), TermS("d")), 13 | ].toHashSet, 14 | NonTermS[string]("S") 15 | ).augument 16 | 17 | g415 = initGrammar[string]( 18 | [ 19 | newRule(NonTermS[string]("S"), NonTermS[string]("R")), 20 | newRule(NonTermS[string]("S"), 21 | NonTermS[string]("L"), TermS("="), NonTermS[string]("R")), 22 | newRule(NonTermS[string]("L"), TermS("*"), NonTermS[string]("R")), 23 | newRule(NonTermS[string]("L"), TermS("id")), 24 | newRule(NonTermS[string]("R"), NonTermS[string]("L")), 25 | ].toHashSet, 26 | NonTermS[string]("S") 27 | ).augument 28 | 29 | test "test closure for lalr": 30 | let 31 | itm = LALRItem[string](rule: g.startRule, pos: 0, ahead: End[string]()) 32 | c = closure(g, toHashSet[LALRItem[string]]([itm])) 33 | expected = [ 34 | itm, 35 | LALRItem[string]( 36 | rule: newRule(NonTermS[string]("S"), 37 | NonTermS[string]("C"),NonTermS[string]("C")), 38 | pos: 0, 39 | ahead: End[string]() 40 | ), 41 | LALRItem[string]( 42 | rule: newRule(NonTermS[string]("C"), TermS("c"), NonTermS[string]("C")), 43 | pos: 0, 44 | ahead: TermS("c") 45 | ), 46 | LALRItem[string]( 47 | rule: newRule(NonTermS[string]("C"), TermS("c"), NonTermS[string]("C")), 48 | pos: 0, 49 | ahead: TermS("d") 50 | ), 51 | LALRItem[string]( 52 | rule: newRule(NonTermS[string]("C"), TermS("d")), 53 | pos: 0, 54 | ahead: TermS("c") 55 | ), 56 | LALRItem[string]( 57 | rule: newRule(NonTermS[string]("C"), TermS("d")), 58 | pos: 0, 59 | ahead: TermS("d") 60 | ) 61 | ].toHashSet 62 | 63 | check c == expected 64 | 65 | proc contains[T](itms: LALRItems[T], itm: LRItem[T]): bool = 66 | result = false 67 | for i in itms: 68 | if i.toLRItem == itm: 69 | return true 70 | 71 | test "test make LALR kernel": 72 | let 73 | (cc, tt) = g415.makeCanonicalCollection 74 | kernel = cc.filterKernel 75 | check kernel.card == 10 76 | for i, itms in kernel: 77 | if itms.contains( 78 | LRItem[string]( 79 | rule: newRule(NonTermS[string]("S"), 80 | NonTermS[string]("L"), TermS("="), 81 | NonTermS[string]("R")), 82 | pos: 1 83 | ) 84 | ): 85 | check itms.card == 2 86 | else: 87 | check itms.card == 1 88 | let lalrKernel = kernel.toLALRKernel(g415, tt) 89 | for i, itms in lalrKernel: 90 | if itms.contains( 91 | LRItem[string]( 92 | rule: newRule(NonTermS[string]("R"), NonTermS[string]("L")), 93 | pos: 1 94 | ) 95 | ) or itms.contains( 96 | LRItem[string]( 97 | rule: newRule(NonTermS[string]("L"), 98 | TermS("*"), NonTermS[string]("R")), 99 | pos: 1 100 | ) 101 | ) or itms.contains( 102 | LRItem[string]( 103 | rule: newRule(NonTermS[string]("L"), TermS("id")), 104 | pos: 1 105 | ) 106 | ) or itms.contains( 107 | LRItem[string]( 108 | rule: newRule(NonTermS[string]("L"), 109 | TermS("*"), NonTermS[string]("R")), 110 | pos: 2 111 | ) 112 | ): 113 | check itms.card == 2 114 | else: 115 | check itms.card == 1 116 | 117 | 118 | -------------------------------------------------------------------------------- /tests/test_lexer.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import streams 3 | 4 | import nimly/lextypes 5 | import nimly/lexgen 6 | import nimly/lexer 7 | 8 | proc finalState[T](ld: LexData[T], str: string): int = 9 | result = 0 10 | for c in str: 11 | result = ld.nextState(result, c) 12 | if result == deadState: 13 | return 14 | 15 | proc doesAccept[T](ld: LexData[T], str: string): bool = 16 | let fs = finalState[T](ld, str) 17 | if fs == deadState: 18 | return false 19 | return ld.dba[fs].accept.kind == AcceptKind.Acc 20 | 21 | proc accProc[T](ld: LexData[T], str: string): AccProc[T] = 22 | let fs = finalState[T](ld, str) 23 | assert ld.dba[fs].accept.kind == AcceptKind.Acc 24 | return ld.dba[fs].accept.fun 25 | 26 | niml testLex[string]: 27 | r"if": 28 | return token.token 29 | r"else": 30 | return "acc" 31 | r"\s": 32 | return "" 33 | 34 | test "test macro niml (if/else)": 35 | var testLexer = testLex.newWithString("""if 36 | else if 37 | else""") 38 | testLexer.ignoreIf = proc(r: string): bool = false 39 | var ret: seq[string] = @[] 40 | for s in testLexer.lexIter(): 41 | ret.add(s) 42 | check ret == @["if", "", "acc", "", "if", "", "acc"] 43 | 44 | testLexer = testLex.newWithString("""if 45 | else if 46 | else""") 47 | testLexer.ignoreIf = proc(r: string): bool = r == "" 48 | ret = @[] 49 | for s in testLexer.lexIter: 50 | ret.add(s) 51 | check ret == @["if", "acc", "if", "acc"] 52 | testLexer.close 53 | -------------------------------------------------------------------------------- /tests/test_lexer_counting_newlines.nim: -------------------------------------------------------------------------------- 1 | import strutils 2 | import unittest 3 | 4 | import nimly 5 | 6 | 7 | type 8 | TokenType = enum 9 | OP 10 | INT 11 | IGNORE 12 | 13 | Token = object 14 | typ: TokenType 15 | val: string 16 | line: int 17 | col: int 18 | 19 | 20 | niml testLex[Token]: 21 | r"\+|-|\*|/": 22 | return Token(typ: OP, val: token.token, 23 | line: token.lineNum, col: token.colNum) 24 | r"\d+": 25 | return Token(typ: INT, val: token.token, 26 | line: token.lineNum, col: token.colNum) 27 | r"\s": 28 | return Token(typ: IGNORE, val: "", 29 | line: token.lineNum, col: token.colNum) 30 | 31 | 32 | test "test lexer counting newline (Issue #34)": 33 | let 34 | str = "22 / \n 11 +\n40" 35 | expected = @[ 36 | Token(typ: INT, val: "22", line: 1, col: 0), 37 | Token(typ: OP, val: "/", line: 1, col: 3), 38 | Token(typ: INT, val: "11", line: 2, col: 1), 39 | Token(typ: OP, val: "+", line: 2, col: 4), 40 | Token(typ: INT, val: "40", line: 3, col: 0) 41 | ] 42 | 43 | var 44 | calcLexer = testLex.newWithString(str) 45 | ret: seq[Token] = @[] 46 | 47 | calcLexer.ignoreIf = proc(r: Token): bool = r.val == "" 48 | 49 | for token in calcLexer.lexIter: 50 | ret.add(token) 51 | 52 | check ret == expected 53 | -------------------------------------------------------------------------------- /tests/test_lexer_global_var.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import nimly 4 | import lexer_global_var 5 | 6 | test "test global var": 7 | var lexer = lexGlobalVar.newWithString( 8 | "0\n 1\n 1\n 2\n\n 1\n 2" 9 | ) 10 | let expected = @[ 11 | Num("0"), 12 | Indent(1), 13 | Num("1"), 14 | Indent(1), 15 | Num("1"), 16 | Indent(2), 17 | Num("2"), 18 | Indent(1), 19 | Num("1"), 20 | Indent(2), 21 | Num("2") 22 | ] 23 | var ret: seq[Token] = @[] 24 | for s in lexer.lexIter: 25 | ret.add(s) 26 | check ret == expected 27 | 28 | # test setUp is executed with initWithString 29 | lexer.initWithString("\n 0\n 1\n 1\n 2\n\n 1\n 2") 30 | let expected2 = @[ 31 | Indent(1), 32 | Num("0"), 33 | Indent(1), 34 | Num("1"), 35 | Indent(1), 36 | Num("1"), 37 | Indent(2), 38 | Num("2"), 39 | Indent(1), 40 | Num("1"), 41 | Indent(2), 42 | Num("2") 43 | ] 44 | ret = @[] 45 | for s in lexer.lexIter: 46 | ret.add(s) 47 | check ret == expected2 48 | -------------------------------------------------------------------------------- /tests/test_lexgen.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | include nimly/lexgen 4 | 5 | proc strid(token: LToken): string = 6 | return token.token 7 | 8 | let 9 | dbookExampleSynT = Bin(bcat, 10 | ~Bin(bcat, 11 | ~Bin(bcat, 12 | ~Bin(bcat, 13 | ~Star(~Bin(bor, 14 | ~Term(Char(1, Real('a'))), 15 | ~Term(Char(2, Real('b')))) 16 | ), 17 | ~Term(Char(3, Real('a')))), 18 | ~Term(Char(4, Real('b')))), 19 | ~Term(Char(5, Real('b')))), 20 | ~Term(Char(6, End()))) 21 | 22 | dbookExampleLexRe = LexRe[string]( 23 | st: dbookExampleSynT, 24 | accPosProc: {6: strid}.newTable 25 | ) 26 | 27 | dbookExampleDFA3p36 = DFA[string]( 28 | start: 0, 29 | accepts: { 30 | 4: strid 31 | }.newTable, 32 | stateNum: 5, 33 | translations: { 34 | 0: {'a': 1, 'b': 2}.newTable(), 35 | 1: {'a': 1, 'b': 3}.newTable(), 36 | 2: {'a': 1, 'b': 2}.newTable(), 37 | 3: {'a': 1, 'b': 4}.newTable(), 38 | 4: {'a': 1, 'b': 2}.newTable(), 39 | }.newTable 40 | ) 41 | 42 | test "test makeFollowposTable (Dragonbook 3.9.4)": 43 | var 44 | one = initHashSet[Pos]() 45 | two = initHashSet[Pos]() 46 | three = initHashSet[Pos]() 47 | four = initHashSet[Pos]() 48 | five = initHashSet[Pos]() 49 | one.incl(1) 50 | one.incl(2) 51 | one.incl(3) 52 | two = one 53 | three.incl(4) 54 | four.incl(5) 55 | five.incl(6) 56 | let 57 | followpos: array[0..4, (Pos, HashSet[Pos])] = { 58 | 1: one, 59 | 2: two, 60 | 3: three, 61 | 4: four, 62 | 5: five 63 | } 64 | 65 | check dbookExampleSynT.makeFollowposTable == followpos.newTable 66 | 67 | test "test correctChar": 68 | check dbookExampleSynT.collectChar == {'a', 'b'} 69 | 70 | template checkDFA[A](dfa: DFA[A]) = 71 | let 72 | sa = dfa.translations[dfa.start]['a'] 73 | sab = dfa.translations[sa]['b'] 74 | acc = dfa.translations[sab]['b'] 75 | check dfa.stateNum == 4 76 | check dfa.translations[dfa.start]['b'] == dfa.start 77 | check dfa.translations[sa]['a'] == sa 78 | check dfa.translations[sab]['a'] == sa 79 | check dfa.accepts.haskey(acc) 80 | check dfa.translations[acc]['a'] == sa 81 | check dfa.translations[acc]['b'] == dfa.start 82 | 83 | test "test makeDFA (Dragonbook 3.9.5)": 84 | let dfa = makeDFA[string](dbookExampleLexRe) 85 | checkDFA(dfa) 86 | 87 | test "test minimizeStates (id)": 88 | let dfa = makeDFA[string](dbookExampleLexRe) 89 | var 90 | acc = initHashSet[DState]() 91 | other = initHashSet[DState]() 92 | for i in 0.. 0: 62 | result &= "." 63 | # type of `$3` is `seq[MyToken]` and each elements are NUM 64 | for tkn in $3: 65 | # type of `tkn.val` is `int` 66 | result &= $(tkn.val) 67 | 68 | test "test Lexer": 69 | var testLexer = testLex.newWithString("1 + 42 * 101010") 70 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 71 | 72 | var 73 | ret: seq[MyTokenKind] = @[] 74 | 75 | for token in testLexer.lexIter: 76 | ret.add(token.kind) 77 | 78 | check ret == @[MyTokenKind.NUM, MyTokenKind.PLUS, MyTokenKind.NUM, 79 | MyTokenKind.NUM, MyTokenKind.MULTI, 80 | MyTokenKind.NUM, MyTokenKind.NUM, MyTokenKind.NUM, 81 | MyTokenKind.NUM, MyTokenKind.NUM, MyTokenKind.NUM] 82 | 83 | test "test Parser 1": 84 | var testLexer = testLex.newWithString("1 + 42 * 101010") 85 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 86 | 87 | var parser = testPar.newParser() 88 | check parser.parse(testLexer) == "1 + [42 * 101010]" 89 | 90 | testLexer.initWithString("1 + 42 * 1010") 91 | 92 | parser.init() 93 | check parser.parse(testLexer) == "1 + [42 * 1010]" 94 | 95 | test "test Parser 2": 96 | var testLexer = testLex.newWithString("1 + 42 * 1.01010") 97 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 98 | 99 | var parser = testPar.newParser() 100 | check parser.parse(testLexer) == "1 + [42 * 1.01010]" 101 | 102 | testLexer.initWithString("1. + 4.2 * 101010") 103 | 104 | parser.init() 105 | check parser.parse(testLexer) == "1. + [4.2 * 101010]" 106 | 107 | test "test Parser 3": 108 | var testLexer = testLex.newWithString("(1 + 42) * 1.01010") 109 | testLexer.ignoreIf = proc(r: MyToken): bool = r.kind == MyTokenKind.IGNORE 110 | 111 | var parser = testPar.newParser() 112 | check parser.parse(testLexer) == "[(1 + 42) * 1.01010]" 113 | -------------------------------------------------------------------------------- /tests/test_state_parse.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import nimly 4 | 5 | import state_parser 6 | 7 | test "test state": 8 | var lexer = testStateLex.newWithString("if test + 1 then { true } else { 2 * ( test + 3 ) }") 9 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 10 | 11 | var parser = testStatePar.newParser() 12 | check parser.parse(lexer) == "IF(test+1)THEN{true}ELSE{(2*(test+3))}" 13 | 14 | test "test state (read from file)": 15 | var lexer = testStateLex.open("tests/state_example.txt") 16 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 17 | 18 | var parser = testStatePar.newParser() 19 | check parser.parse(lexer) == "IF(test+1)THEN{true}ELSE{(2*(test+3))}" 20 | -------------------------------------------------------------------------------- /tests/test_state_parse_with_empty.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import nimly 4 | 5 | import state_parser_with_empty 6 | 7 | test "test state": 8 | var lexer = testStateLex.newWithString("if test + 1 then { true } else { 2 * ( test + 3 ) }") 9 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 10 | 11 | var parser = testStatePar.newParser() 12 | check parser.parse(lexer) == "IF(test+1)THEN{true}ELSE{(2*(test+3))}" 13 | 14 | test "test state with empty": 15 | var lexer = testStateLex.newWithString("if test + 1 then { true }") 16 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 17 | 18 | var parser = testStatePar.newParser() 19 | check parser.parse(lexer) == "IF(test+1)THEN{true}" 20 | 21 | test "test state with empty2": 22 | var lexer = testStateLex.newWithString("if then { true }") 23 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 24 | 25 | var parser = testStatePar.newParser() 26 | check parser.parse(lexer) == "IF()THEN{true}" 27 | -------------------------------------------------------------------------------- /tests/test_state_parse_with_empty_lr.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import nimly 4 | 5 | import state_parser_with_empty_lr 6 | 7 | test "test state": 8 | var lexer = testStateLex.newWithString("if test + 1 then { true } else { 2 * ( test + 3 ) }") 9 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 10 | 11 | var parser = testStatePar.newParser() 12 | check parser.parse(lexer) == "IF(test+1)THEN{true}ELSE{(2*(test+3))}" 13 | 14 | test "test state with empty": 15 | var lexer = testStateLex.newWithString("if test + 1 then { true }") 16 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 17 | 18 | var parser = testStatePar.newParser() 19 | check parser.parse(lexer) == "IF(test+1)THEN{true}" 20 | 21 | test "test state with empty2": 22 | var lexer = testStateLex.newWithString("if then { true }") 23 | lexer.ignoreIf = proc(r: StateToken): bool = r.kind == StateTokenKind.SIGNORE 24 | 25 | var parser = testStatePar.newParser() 26 | check parser.parse(lexer) == "IF()THEN{true}" 27 | --------------------------------------------------------------------------------