├── README.md └── parser_gen.py /README.md: -------------------------------------------------------------------------------- 1 | # parser_gen 2 | 3 | 一个简单的`LR(1)/LALR(1)`解析器生成工具,适用于`C++17`或更高。 4 | 5 | Working in progress. 6 | 7 | ## TODO 8 | 9 | - [x] 完成LR(1)支持 10 | - [x] 冲突解决 11 | - [ ] 完成LALR支持 12 | - [ ] 人可读报错 13 | 14 | ## 快速开始 15 | 16 | Calculator.p: 17 | 18 | ``` 19 | term Plus assoc(left) prec(1); // + 20 | term Minus assoc(left) prec(1); // - 21 | term Multiply assoc(left) prec(2); // * 22 | term Division assoc(left) prec(2); // / 23 | term LeftParen; // ( 24 | term RightParen; // ) 25 | term LiteralNumber {% int %}; 26 | 27 | nonterm exp {% int %}; 28 | 29 | grammar { 30 | exp -> LiteralNumber(value) {% return value; %}; 31 | exp -> LeftParen exp(exp) RightParen {% return exp; %}; 32 | exp -> Minus exp(rhs) prec(10) {% return -rhs; %}; 33 | exp -> exp(lhs) Plus exp(rhs) {% return lhs + rhs; %}; 34 | exp -> exp(lhs) Minus exp(rhs) {% return lhs - rhs; %}; 35 | exp -> exp(lhs) Multiply exp(rhs) {% return lhs * rhs; %}; 36 | exp -> exp(lhs) Division exp(rhs) {% return lhs / rhs; %}; 37 | }; 38 | 39 | generator {% 40 | { 41 | "class_name": "CalculatorParser" 42 | } 43 | %}; 44 | ``` 45 | 46 | Main.cpp: 47 | 48 | ```c++ 49 | #include 50 | #include 51 | #include 52 | 53 | #include "CalculatorParser.hpp" 54 | 55 | class Tokenizer { 56 | public: 57 | Tokenizer(const char* buffer) 58 | : m_pBuffer(buffer) {} 59 | public: 60 | std::tuple Advance() { 61 | using TokenTypes = CalculatorParser::TokenTypes; 62 | using TokenValues = CalculatorParser::TokenValues; 63 | while (true) { 64 | if (*m_pBuffer == '\0') 65 | return { TokenTypes::_, TokenValues {} }; 66 | 67 | char c; 68 | switch (c = *(m_pBuffer++)) { 69 | case '+': return { TokenTypes::Plus, TokenValues {} }; 70 | case '-': return { TokenTypes::Minus, TokenValues {} }; 71 | case '*': return { TokenTypes::Multiply, TokenValues {} }; 72 | case '/': return { TokenTypes::Division, TokenValues {} }; 73 | case '(': return { TokenTypes::LeftParen, TokenValues {} }; 74 | case ')': return { TokenTypes::RightParen, TokenValues {} }; 75 | case ' ': 76 | case '\t': 77 | case '\n': 78 | case '\r': 79 | continue; 80 | default: 81 | if (c >= '0' && c <= '9') { 82 | int ret = (c - '0'); 83 | while (*m_pBuffer >= '0' && *m_pBuffer <= '9') 84 | ret = ret * 10 + (*(m_pBuffer++) - '0'); 85 | return { TokenTypes::LiteralNumber, TokenValues { ret } }; 86 | } 87 | else 88 | throw std::runtime_error("Bad input"); 89 | } 90 | } 91 | } 92 | private: 93 | const char* m_pBuffer; 94 | }; 95 | 96 | int main() { 97 | try { 98 | while (std::cin) { 99 | std::string input; 100 | std::getline(std::cin, input); 101 | 102 | Tokenizer tokenizer(input.c_str()); 103 | CalculatorParser parser; 104 | while (true) { 105 | auto [t, v] = tokenizer.Advance(); 106 | 107 | auto ret = parser(t, v); 108 | if (ret == CalculatorParser::ParseResult::Rejected) 109 | throw std::runtime_error("Parse error"); 110 | else if (ret == CalculatorParser::ParseResult::Accepted) 111 | std::cout << parser.Result() << std::endl; 112 | 113 | if (t == CalculatorParser::TokenTypes::_) 114 | break; 115 | } 116 | }; 117 | } 118 | catch (const std::exception& ex) { 119 | std::cerr << ex.what() << std::endl; 120 | return 1; 121 | } 122 | return 0; 123 | } 124 | ``` 125 | 126 | Build: 127 | 128 | ```bash 129 | ./parser_gen.py --header-file CalculatorParser.hpp --source-file CalculatorParser.cpp Calculator.p 130 | g++ CalculatorParser.cpp Main.cpp -std=c++17 -o calculator 131 | ``` 132 | 133 | Run it: 134 | 135 | ```bash 136 | ./calculator 137 | ``` 138 | 139 | ## 特性 140 | 141 | - 生成可重入代码 142 | - 不污染命名空间 143 | - 用户驱动接口 144 | 145 | ## 语法规则文件 146 | 147 | 语法规则文件由四部分声明构成: 148 | - 终结符 149 | - 非终结符 150 | - 规则 151 | - 生成器参数 152 | 153 | ### 终结符 154 | 155 | 终结符使用下述方式声明: 156 | 157 | ``` 158 | term 标识符 {% 替换 %} ; 159 | ``` 160 | 161 | 其中,标识符用于指定终结符的名称,可以由非数字开头的若干数字、字母或者下划线构成(下同),需要注意的是单独的`_`会被识别为关键词。 162 | 163 | 替换部分应当填写一个C/C++类型,当语法制导翻译遇到一个标识符时可以给出对应的C/C++类型的值供用户代码使用。 164 | 165 | 若替换部分留空,则该标识符的值不可在翻译过程中被使用。 166 | 167 | 此外,为了支撑算符优先冲突解决规则,可以在标识符后面使用关键字`assoc`和`prec`来指定左结合或右结合以及对应的优先级,例如: 168 | 169 | ``` 170 | term minus assoc(left) prec(1) {% Tokenizer::Token %}; 171 | ``` 172 | 173 | 其中`assoc`可以接`left`、`right`或者`none`,表明左结合、右结合或者无结合性。 174 | 175 | 其中`prec`用于指定算符优先级,算符优先级高的表达式会在`移进/规约`冲突中被优先选择。 176 | 177 | 在解决冲突时,如果发现算符无结合性则会产生错误,若解决冲突的任意一方不指定结合性或优先级,则会按照其他规约规则自动解决冲突。 178 | 179 | 此外,算符优先冲突解决规则仅适用于诸如:`Exp op Exp`的表达式,其中`op`是一个非终结符。 180 | 181 | ### 非终结符 182 | 183 | 非终结符使用下述方式声明: 184 | 185 | ``` 186 | nonterm 标识符 {% 替换 %}; 187 | ``` 188 | 189 | 具体规则和终结符一致,但是不可以声明结合性或者优先级,其他内容不再赘述。 190 | 191 | ### 语法规则 192 | 193 | 声明完终结符和非终结符后可以声明语法规则,举例如下: 194 | 195 | ``` 196 | grammar { 197 | Exp -> Exp(lhs) plus Exp(rhs) {% return Ast::BinExp(lhs, rhs, Ast::BinOp::Plus); %}; 198 | Exp -> Exp(lhs) minus Exp(rhs) {% return Ast::BinExp(lhs, rhs, Ast::BinOp::Minus); %}; 199 | } 200 | ``` 201 | 202 | 语法规则定义在`grammar`块中,一个产生式具备下述形式: 203 | 204 | ``` 205 | 非终结符 -> 符号1 ( 标识符1 ) 符号2 ( 标识符2 ) ... {% 替换 %} ; 206 | ``` 207 | 208 | 其中,非终结符指示从哪个终结符推导而来,整个产生式在规约后将会具备该终结符对应的类型。 209 | 210 | `符号1..n`指示产生式的构成,每个符号可以接一个标识符,将会在生成代码中使用符号对应的类型捕获值给解析器代码使用。 211 | 212 | 需要注意,首条规则被作为入口规则产生文法。此外如果产生式不规约任何符号,需要使用特殊的语法来声明: 213 | 214 | ``` 215 | 非终结符 -> _ {% 替换 %}; 216 | ``` 217 | 218 | 另外,为了支持单目运算符的特殊优先级,产生式本身可以指定一个独立的优先级,例如: 219 | 220 | ``` 221 | grammar { 222 | UnaryExp -> minus Exp(rhs) prec(10) {% ... %}; 223 | } 224 | ``` 225 | 226 | 此时,`prec`必须在产生式末尾,当生成器在解决`BinExp`和`UnaryExp`的冲突时会优先匹配`UnaryExp`。 227 | 228 | ### 代码生成参数 229 | 230 | 在完成上述定义后,你可以使用 Json 来向代码生成器传递参数,这些参数会被用于在模板中替换对应的变量: 231 | 232 | ``` 233 | generator {% 234 | { 235 | "namespace": "Test", 236 | "class_name": "MyParser", 237 | "includes": [ 238 | "Ast.hpp" 239 | ] 240 | } 241 | %} 242 | ``` 243 | 244 | ### 附录:关键词表 245 | 246 | ``` 247 | _ term nonterm grammar generator assoc prec left right none 248 | ``` 249 | 250 | ### 附录:规约/移进冲突解决规则 251 | 252 | - 下述规则被依次用于解决规约/移进冲突: 253 | - 尝试使用算符优先和结合性规则进行解决; 254 | - 采取移进规则解决; 255 | - 下述规则被依次用于解决规约/规约冲突: 256 | - 依照生成式的定义顺序解决,先定义的生成式会先被用于解决冲突; 257 | 258 | ## 生成代码接口 259 | 260 | 生成器将会依据模板产生下述样式的入口: 261 | 262 | ```c++ 263 | class Parser 264 | { 265 | public: 266 | enum class ParseResult 267 | { 268 | Undecided = 0, 269 | Accepted = 1, 270 | Rejected = 2, 271 | }; 272 | 273 | enum class TokenTypes 274 | { 275 | _ = 0, 276 | Division = 1, 277 | LeftParen = 2, 278 | LiteralNumber = 3, 279 | Minus = 4, 280 | Multiply = 5, 281 | Plus = 6, 282 | RightParen = 7, 283 | }; 284 | 285 | using TokenValues = std::variant; 286 | using ProductionValues = std::variant; 287 | using UnionValues = std::variant; 288 | 289 | public: 290 | Parser(); 291 | 292 | public: 293 | ParseResult operator()(TokenTypes token, const TokenValues& value); 294 | void Reset()noexcept; 295 | const int& Result()const noexcept { return m_stResult; } 296 | int& Result()noexcept { return m_stResult; } 297 | 298 | private: 299 | std::vector m_stStack; 300 | std::vector m_stValueStack; 301 | 302 | int m_stResult {}; 303 | }; 304 | ``` 305 | 306 | - Parser::TokenTypes 307 | 308 | 存放所有终结符的枚举表示,`Tokenizer`可以利用这里的`TokenTypes`向`Parser`传递下一个符号。 309 | 310 | - Parser::TokenValues 311 | 312 | 存放所有终结符的值表示,将会传递给用户定义的驱动函数使用。 313 | 314 | - Parser::ProductionValues 315 | 316 | 存放所有非终结符的值表示,将会在计算过程中被使用。 317 | 318 | - Parser::operator() 319 | 320 | 通过解析器的`operator()`向解析器喂一个`Token`。如果解析失败,返回`Reject`;如果解析成功,返回`Accept`,并且`Parser::Result()`可以访问存储的解析结果。 321 | 322 | 若内部抛出异常,需要手动执行`Reset()`重置状态,否则行为是未定义的。 323 | 324 | - Parser::Reset() 325 | 326 | 重置状态。 327 | 328 | - Parser::Result() 329 | 330 | 获取解析结果,对应第一个产生式的非终结符类型。 331 | 332 | ## License 333 | 334 | MIT License 335 | -------------------------------------------------------------------------------- /parser_gen.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # parser_gen 5 | # 一个 LR(1)/LALR 语法解析器生成工具。 6 | # 7 | # Copyright (C) 2020 Chen Chu<1871361697@qq.com> 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 10 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 11 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 12 | # persons to whom the Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 15 | # Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 18 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | import os 23 | import sys 24 | import json 25 | import argparse 26 | import datetime 27 | from typing import List, Set, Dict, Tuple, Optional 28 | 29 | 30 | # ---------------------------------------- 文法解析器部分 ---------------------------------------- 31 | # 文法解析器用于解析文法文件,一个文法文件包含语法的终极符、非终结符和对应的产生式及相关的属性。 32 | # 33 | # 终结符使用下述方式声明: 34 | # term 标识符 {% 替换 %} ; 35 | # 其中,标识符用于指定终结符的名称,可以由非数字开头的若干数字、字母或者下划线构成(下同),需要注意的是单独的_会被识别为关键词。 36 | # 替换部分应当填写一个C/C++类型,当语法制导翻译遇到一个标识符时可以给出对应的C/C++类型的值供用户代码使用。 37 | # 若替换部分留空,则该标识符的值不可在翻译过程中被使用。 38 | # 此外,为了支撑算符优先冲突解决规则,可以在标识符后面使用关键字 assoc 和 prec 来指定左结合或右结合以及对应的优先级,例如: 39 | # term minus assoc(left) prec(1) {% Tokenizer::Token %}; 40 | # 其中 assoc 可以接 left、right 或者 none,表明左结合、右结合或者无结合性。 41 | # 需要注意的是,在解决冲突时,如果发现算符无结合性则会产生错误,若不指定结合性,则会按照其他规约规则自动解决冲突。 42 | # 其中 prec 用于指定算符优先级,算符优先级高的表达式会在移进-规约冲突中被优先选择。 43 | # 44 | # 非终结符使用下述方式声明: 45 | # nonterm 标识符 {% 替换 %}; 46 | # 具体规则和终结符一致,但是不可以声明结合性或者优先级,其他内容不再赘述。 47 | # 48 | # 声明完终结符和非终结符后可以声明语法规则,举例如下: 49 | # grammar { 50 | # BinExp -> Exp(lhs) BinOp(op) Exp(rhs) {% return Ast::BinExp(lhs, rhs, op); %}; 51 | # BinOp -> minus {% 52 | # return Ast::BinOp::Minus; 53 | # %}; 54 | # BinOp -> plus {% 55 | # return Ast::BinOp::Plus; 56 | # %}; 57 | # } 58 | # 语法规则定义在Grammar块中,一个产生式具备下述形式: 59 | # 非终结符 -> 符号1 ( 标识符1 ) 符号2 ( 标识符2 ) ... {% 替换 %} ; 60 | # 其中,非终结符指示从哪个终结符推导而来,整个产生式在规约后将会具备该终结符对应的类型。 61 | # 符号1..n 指示产生式的构成,每个符号可以接一个标识符,将会在生成代码中使用符号对应的类型捕获值给解析器代码使用。 62 | # 需要注意,首条规则被作为入口规则产生文法。此外如果产生式不规约任何符号,需要使用特殊的语法来声明: 63 | # 非终结符 -> _ {% 替换 %}; 64 | # 另外,为了支持单目运算符的特殊优先级,产生式本身可以指定一个独立的优先级,例如: 65 | # grammar { 66 | # UnaryExp -> minus Exp(rhs) prec(10) {% ... %}; 67 | # } 68 | # 此时,prec 必须在产生式末尾,当生成器在解决 BinExp 和 UnaryExp 的冲突时会优先匹配 UnaryExp。 69 | # 70 | # 最后,在进行代码生成时,你可以使用 Json 来向生成器传递参数,这些参数会被用于在模板中替换对应的变量: 71 | # generator {% 72 | # { 73 | # "namespace": "Test", 74 | # "class_name": "MyParser" 75 | # } 76 | # %} 77 | # 78 | # 附录:关键词表 79 | # _ term nonterm grammar generator assoc prec left right none 80 | # 81 | # 附录:规约/移进冲突解决规则: 82 | # 下述规则被依次用于解决规约/移进冲突: 83 | # 1. 尝试使用算符优先和结合性规则进行解决; 84 | # 2. 采取移进规则解决; 85 | # 下述规则被依次用于解决规约/规约冲突: 86 | # 1. 依照生成式的定义顺序解决,先定义的生成式会先被用于解决冲突; 87 | # 88 | 89 | class Symbol: 90 | """ 91 | 符号 92 | 93 | 标识一个终结符或者非终结符。 94 | 符号不覆盖__eq__和__hash__,因为在一个实例中应该是唯一的。 95 | """ 96 | def __init__(self, t: int, id: str, replace: Optional[str] = None, assoc: int = 0, prec: int = 0, line: int=0): 97 | self._type = t 98 | self._id = id 99 | self._replace = None if replace is None else replace.strip() 100 | self._assoc = assoc 101 | self._prec = prec 102 | self._line = line 103 | 104 | def __repr__(self): 105 | return self._id 106 | 107 | def type(self) -> int: 108 | """ 109 | 获取符号类型 110 | :return: 符号类型 111 | """ 112 | return self._type 113 | 114 | def id(self) -> str: 115 | """ 116 | 获取标识符 117 | :return: 标识符 118 | """ 119 | return self._id 120 | 121 | def replace(self) -> Optional[str]: 122 | """ 123 | 获取替换文本 124 | :return: 替换文本 125 | """ 126 | return self._replace 127 | 128 | def associativity(self) -> int: 129 | """ 130 | 获取结合性 131 | :return: 结合性 132 | """ 133 | return self._assoc 134 | 135 | def precedence(self) -> int: 136 | """ 137 | 获取优先级 138 | :return: 优先级 139 | """ 140 | return self._prec 141 | 142 | def line_defined(self) -> int: 143 | """ 144 | 获取符号在源码中定义的行号 145 | :return: 行号 146 | """ 147 | return self._line 148 | 149 | 150 | SYMBOL_TESTER = -2 # special symbol '#', for generating LALR parser 151 | SYMBOL_ENTRY = -1 # special symbol '@', for extending the grammar 152 | SYMBOL_EOF = 0 # special symbol '$' 153 | SYMBOL_TERMINAL = 1 # terminal symbol 154 | SYMBOL_NON_TERMINAL = 2 # non-terminal symbol 155 | 156 | ASSOC_UNDEF = 0 157 | ASSOC_LEFT = 1 158 | ASSOC_RIGHT = 2 159 | ASSOC_NONE = 3 160 | 161 | kEofSymbol = Symbol(SYMBOL_EOF, "$", "") 162 | kEntrySymbol = Symbol(SYMBOL_ENTRY, "@", "") 163 | kTesterSymbol = Symbol(SYMBOL_TESTER, "#", "") 164 | 165 | 166 | class Production: 167 | """ 168 | 产生式 169 | 170 | 由一系列符号构成。 171 | """ 172 | def __init__(self, left: Symbol, right: List[Symbol], binding: Dict[int, str], replace: Optional[str] = None, 173 | prec: int = 0, line: int = -1, index: int = -1): 174 | self._left = left 175 | self._right = right 176 | self._binding = binding 177 | self._replace = replace 178 | self._prec = prec 179 | self._line = line 180 | self._index = index 181 | 182 | def __repr__(self): 183 | if self._prec != 0: 184 | return "%s -> %s prec(%d)" % (repr(self._left), " ".join([repr(x) for x in self._right]), self._prec) 185 | return "%s -> %s" % (repr(self._left), " ".join([repr(x) for x in self._right])) 186 | 187 | def __len__(self): 188 | return len(self._right) 189 | 190 | def __getitem__(self, item): 191 | assert isinstance(item, int) 192 | return self._right[item] 193 | 194 | def __eq__(self, obj) -> bool: # binding, replace, prec, line 不参与比较 195 | if not isinstance(obj, Production): 196 | return False 197 | if self._left != obj._left: 198 | return False 199 | if len(self._right) != len(obj._right): 200 | return False 201 | for i in range(0, len(self._right)): 202 | if self._right[i] != obj._right[i]: 203 | return False 204 | return True 205 | 206 | def __ne__(self, obj) -> bool: 207 | return not self == obj 208 | 209 | def __hash__(self) -> int: 210 | ret = hash(self._left) 211 | for i in range(0, len(self._right)): 212 | ret = ret ^ hash(self._right[i]) 213 | return ret 214 | 215 | def left(self) -> Symbol: 216 | """ 217 | 获取产生式对应的非终结符 218 | :return: 非终结符 219 | """ 220 | return self._left 221 | 222 | def binding(self) -> Dict[int, str]: 223 | """ 224 | 获取绑定参数名的映射情况 225 | :return: 绑定参数映射表 226 | """ 227 | return self._binding 228 | 229 | def replace(self) -> Optional[str]: 230 | """ 231 | 获取产生式对应的替代文本 232 | :return: 替代文本 233 | """ 234 | return self._replace 235 | 236 | def precedence(self) -> int: 237 | """ 238 | 获取优先级 239 | :return: 优先级 240 | """ 241 | return self._prec 242 | 243 | def line_defined(self) -> int: 244 | """ 245 | 获取符号在源码中定义的行号 246 | :return: 行号 247 | """ 248 | return self._line 249 | 250 | def index(self) -> int: 251 | """ 252 | 获取产生式在源码中的索引 253 | :return: 索引 254 | """ 255 | return self._index 256 | 257 | 258 | class ParseError(Exception): 259 | """ 260 | 解析错误 261 | """ 262 | def __init__(self, message: str, line: int, col: Optional[int] = None): 263 | Exception.__init__(self, message) 264 | self._message = message 265 | self._line = line 266 | self._col = col 267 | 268 | def __str__(self): 269 | if self._col is not None: 270 | return f"{self._message} (line {self._line}, col {self._col})" 271 | return f"{self._message} (line {self._line})" 272 | 273 | def message(self): 274 | return self._message 275 | 276 | def line(self): 277 | return self._line 278 | 279 | def col(self): 280 | return self._col 281 | 282 | 283 | class SourceReader: 284 | """ 285 | 源代码读取器 286 | """ 287 | def __init__(self, filename): 288 | with open(filename, "r", encoding="utf-8") as f: 289 | self._content = f.read() 290 | self._pos = 0 291 | self._line = 1 292 | self._col = 0 293 | 294 | def pos(self): 295 | return self._pos 296 | 297 | def line(self): 298 | return self._line 299 | 300 | def col(self): 301 | return self._col 302 | 303 | def peek(self): 304 | if self._pos >= len(self._content): 305 | return '\0' 306 | return self._content[self._pos] 307 | 308 | def read(self): 309 | ch = self.peek() 310 | if ch == '\0': 311 | return ch 312 | self._pos = self._pos + 1 313 | self._col = self._col + 1 314 | if ch == '\n': 315 | self._line = self._line + 1 316 | self._col = 0 317 | return ch 318 | 319 | def raise_error(self, msg): 320 | raise ParseError(msg, self._line, self._col) 321 | 322 | 323 | TOKEN_EOF = 0 324 | TOKEN_IDENTIFIER = 1 # 标识符 325 | TOKEN_LITERAL = 2 # 替换用文本 326 | TOKEN_INTEGER = 3 # 整数 327 | TOKEN_EOD = 4 # 分号 ; 328 | TOKEN_DEDUCE = 5 # 推导符号 -> 329 | TOKEN_BEGIN_BLOCK = 6 # { 330 | TOKEN_END_BLOCK = 7 # } 331 | TOKEN_BEGIN_ARG = 8 # ( 332 | TOKEN_END_ARG = 9 # ) 333 | TOKEN_EMPTY = 10 # 关键词 _ 334 | TOKEN_TERM = 11 # 关键词 term 335 | TOKEN_NON_TERM = 12 # 关键词 nonterm 336 | TOKEN_GRAMMAR = 13 # 关键词 grammar 337 | TOKEN_GENERATOR = 14 # 关键词 generator 338 | TOKEN_ASSOC = 15 # 关键词 assoc 339 | TOKEN_PREC = 16 # 关键词 prec 340 | TOKEN_LEFT = 17 # 关键词 left 341 | TOKEN_RIGHT = 18 # 关键词 right 342 | TOKEN_NONE = 19 # 关键词 none 343 | 344 | 345 | class GrammarDocument: 346 | """ 347 | 语法文件 348 | 349 | 存储语法文件内容并提供解析功能。 350 | 使用手写的递归下降来实现解析。 351 | 352 | @mq 353 | - 没有parser gen,要怎么解析语法文件 354 | - 写parser啊 355 | - 没有parser gen怎么写parser 356 | - 那就写parser gen 357 | - 写parser gen怎么解析语法规则!! 358 | - 写parser!!! 359 | """ 360 | def __init__(self): 361 | self._productions = [] # type: List[Production] 362 | self._symbols = set() # type: Set[Symbol] 363 | self._terminals = set() # type: Set[Symbol] 364 | self._non_terminals = set() # type: Set[Symbol] 365 | self._generator_args = None # type: Optional[Dict] 366 | 367 | def clear(self): 368 | self._productions = [] # type: List[Production] 369 | self._symbols = set() # type: Set[Symbol] 370 | self._terminals = set() # type: Set[Symbol] 371 | self._non_terminals = set() # type: Set[Symbol] 372 | self._generator_args = None # type: Optional[Dict] 373 | 374 | def productions(self) -> List[Production]: 375 | """ 376 | 获取所有产生式 377 | :return: 产生式列表 378 | """ 379 | return self._productions 380 | 381 | def symbols(self) -> Set[Symbol]: 382 | """ 383 | 获取所有符号 384 | :return: 符号集合 385 | """ 386 | return self._symbols 387 | 388 | def terminals(self) -> Set[Symbol]: 389 | """ 390 | 获取终结符号 391 | :return: 终结符号集合 392 | """ 393 | return self._terminals 394 | 395 | def non_terminals(self) -> Set[Symbol]: 396 | """ 397 | 获取非终结符号 398 | :return: 非终结符号集合 399 | """ 400 | return self._non_terminals 401 | 402 | def generator_args(self) -> Optional[Dict]: 403 | """ 404 | 获取生成器参数 405 | :return: 参数 406 | """ 407 | return self._generator_args 408 | 409 | @staticmethod 410 | def _advance(reader: SourceReader): 411 | while True: 412 | if reader.peek() == '\0': 413 | return TOKEN_EOF, None, reader.line() 414 | 415 | # 跳过空白 416 | if reader.peek().isspace(): 417 | while reader.peek().isspace(): 418 | reader.read() 419 | continue 420 | 421 | # 跳过注释 422 | if reader.peek() == '/': 423 | reader.read() 424 | if reader.peek() != '/': # 当前语法只有'//'的可能 425 | reader.raise_error(f"'/' expected, but found {repr(reader.peek())}") 426 | reader.read() 427 | while reader.peek() != '\0' and reader.peek() != '\n': # 读到末尾 428 | reader.read() 429 | continue 430 | 431 | # 符号 432 | if reader.peek() == ';': 433 | line = reader.line() 434 | reader.read() 435 | return TOKEN_EOD, None, line 436 | elif reader.peek() == '-': 437 | line = reader.line() 438 | reader.read() 439 | if reader.peek() != '>': # 当前语法只有'->'可能 440 | reader.raise_error(f"'>' expected, but found {repr(reader.peek())}") 441 | reader.read() 442 | return TOKEN_DEDUCE, None, line 443 | elif reader.peek() == '{': 444 | line = reader.line() 445 | reader.read() 446 | if reader.peek() == '%': 447 | reader.read() 448 | content = [] 449 | while True: 450 | if reader.peek() == '%': 451 | reader.read() 452 | if reader.peek() == '}': 453 | reader.read() 454 | break 455 | elif reader.peek() == '%': 456 | reader.read() 457 | content.append('%') 458 | else: 459 | reader.raise_error(f"'%' or '}}' expected, but found {repr(reader.peek())}") 460 | elif reader.peek() == '\0': 461 | reader.raise_error("Unexpected eof") 462 | else: 463 | content.append(reader.read()) 464 | return TOKEN_LITERAL, "".join(content), line 465 | else: 466 | return TOKEN_BEGIN_BLOCK, None, line 467 | elif reader.peek() == '}': 468 | line = reader.line() 469 | reader.read() 470 | return TOKEN_END_BLOCK, None, line 471 | elif reader.peek() == '(': 472 | line = reader.line() 473 | reader.read() 474 | return TOKEN_BEGIN_ARG, None, line 475 | elif reader.peek() == ')': 476 | line = reader.line() 477 | reader.read() 478 | return TOKEN_END_ARG, None, line 479 | 480 | # 关键词/Identifier/数字 481 | content = [] 482 | if reader.peek().isalpha() or reader.peek() == '_': 483 | line = reader.line() 484 | while reader.peek().isalnum() or reader.peek() == '_': 485 | content.append(reader.read()) 486 | identifier = "".join(content) 487 | if identifier == "_": 488 | return TOKEN_EMPTY, identifier, line 489 | elif identifier == "term": 490 | return TOKEN_TERM, identifier, line 491 | elif identifier == "nonterm": 492 | return TOKEN_NON_TERM, identifier, line 493 | elif identifier == "grammar": 494 | return TOKEN_GRAMMAR, identifier, line 495 | elif identifier == "generator": 496 | return TOKEN_GENERATOR, identifier, line 497 | elif identifier == "assoc": 498 | return TOKEN_ASSOC, identifier, line 499 | elif identifier == "prec": 500 | return TOKEN_PREC, identifier, line 501 | elif identifier == "left": 502 | return TOKEN_LEFT, identifier, line 503 | elif identifier == "right": 504 | return TOKEN_RIGHT, identifier, line 505 | elif identifier == "none": 506 | return TOKEN_NONE, identifier, line 507 | return TOKEN_IDENTIFIER, identifier, line 508 | if reader.peek().isnumeric(): 509 | line = reader.line() 510 | while reader.peek().isnumeric(): 511 | content.append(reader.read()) 512 | return TOKEN_INTEGER, int("".join(content)), line 513 | reader.raise_error(f"Unexpected character '{repr(reader.peek())}'") 514 | 515 | def parse(self, filename): 516 | reader = SourceReader(filename) 517 | symbols = {} 518 | productions = [] 519 | production_set = set() 520 | generator_args = None 521 | while True: 522 | token, value, line = GrammarDocument._advance(reader) 523 | if token == TOKEN_EOF: 524 | break 525 | elif token == TOKEN_TERM: 526 | # read identifier 527 | token, identifier, line = GrammarDocument._advance(reader) 528 | if token != TOKEN_IDENTIFIER: 529 | raise ParseError("Identifier required parsing term statement", line) 530 | if identifier in symbols: 531 | raise ParseError(f"Terminated symbol \"{identifier}\" redefined", line) 532 | replace = None 533 | def_line = line 534 | # read assoc or prec 535 | assoc = None 536 | prec = None 537 | while True: 538 | token, value, line = GrammarDocument._advance(reader) 539 | if token == TOKEN_ASSOC: 540 | if assoc is not None: 541 | raise ParseError("Associate type redefined", line) 542 | token, _, line = GrammarDocument._advance(reader) 543 | if token != TOKEN_BEGIN_ARG: 544 | raise ParseError("'(' expected parsing associate type", line) 545 | token, _, line = GrammarDocument._advance(reader) 546 | if token == TOKEN_LEFT: 547 | assoc = ASSOC_LEFT 548 | elif token == TOKEN_RIGHT: 549 | assoc = ASSOC_RIGHT 550 | elif token == TOKEN_NONE: 551 | assoc = ASSOC_NONE 552 | else: 553 | raise ParseError("'left', 'right' or 'none' expected parsing associate type", line) 554 | token, _, line = GrammarDocument._advance(reader) 555 | if token != TOKEN_END_ARG: 556 | raise ParseError("')' expected parsing associate type", line) 557 | elif token == TOKEN_PREC: 558 | if prec is not None: 559 | raise ParseError("Precedence redefined", line) 560 | token, _, line = GrammarDocument._advance(reader) 561 | if token != TOKEN_BEGIN_ARG: 562 | raise ParseError("'(' expected parsing precedence", line) 563 | token, prec, line = GrammarDocument._advance(reader) 564 | if token != TOKEN_INTEGER: 565 | raise ParseError("Integer expected parsing precedence", line) 566 | if prec == 0: 567 | raise ParseError("Precedence must large than zero", line) 568 | token, _, line = GrammarDocument._advance(reader) 569 | if token != TOKEN_END_ARG: 570 | raise ParseError("')' expected parsing associate type", line) 571 | else: 572 | break 573 | # replace 574 | if token == TOKEN_LITERAL: 575 | replace = value 576 | token, _, line = GrammarDocument._advance(reader) 577 | if token != TOKEN_EOD: 578 | raise ParseError("End of definition required", line) 579 | if (assoc is not None) and (prec is None): 580 | raise ParseError("Precedence must be defined while associativity defined", def_line) 581 | symbols[identifier] = Symbol(SYMBOL_TERMINAL, identifier, replace, 582 | ASSOC_UNDEF if assoc is None else assoc, 583 | 0 if prec is None else prec, 584 | def_line) 585 | elif token == TOKEN_NON_TERM: 586 | # read identifier 587 | token, identifier, line = GrammarDocument._advance(reader) 588 | if token != TOKEN_IDENTIFIER: 589 | raise ParseError("Identifier required parsing term statement", line) 590 | if identifier in symbols: 591 | raise ParseError(f"Non-terminated symbol \"{identifier}\" redefined", line) 592 | replace = None 593 | def_line = line 594 | # replace 595 | token, value, line = GrammarDocument._advance(reader) 596 | if token == TOKEN_LITERAL: 597 | replace = value 598 | token, _, line = GrammarDocument._advance(reader) 599 | if token != TOKEN_EOD: 600 | raise ParseError("End of definition required", line) 601 | symbols[identifier] = Symbol(SYMBOL_NON_TERMINAL, identifier, replace, ASSOC_UNDEF, 0, def_line) 602 | elif token == TOKEN_GRAMMAR: 603 | token, _, line = GrammarDocument._advance(reader) 604 | if token != TOKEN_BEGIN_BLOCK: 605 | raise ParseError("'{' required parsing grammar block", line) 606 | while True: 607 | token, identifier, line = GrammarDocument._advance(reader) 608 | if token == TOKEN_END_BLOCK: # } 609 | break 610 | elif token != TOKEN_IDENTIFIER: 611 | raise ParseError("Identifier required parsing production expression", line) 612 | 613 | # identifier 614 | if identifier not in symbols: 615 | raise ParseError(f"Undefined symbol \"{identifier}\" parsing production expression", line) 616 | # -> 617 | token, _, line = GrammarDocument._advance(reader) 618 | if token != TOKEN_DEDUCE: 619 | raise ParseError("Deduce operator required parsing production expression", line) 620 | right = [] 621 | replace = None 622 | prec = None 623 | empty_production = False 624 | def_line = line 625 | binding = {} 626 | while True: 627 | token, value, line = GrammarDocument._advance(reader) 628 | if token == TOKEN_EOD: # ; 629 | if not empty_production and len(right) == 0: 630 | raise ParseError("Symbol expected but found ';' parsing production expression", line) 631 | break 632 | elif token == TOKEN_LITERAL: 633 | if not empty_production and len(right) == 0: 634 | raise ParseError("Symbol expected but found replacement literal", line) 635 | replace = value 636 | token, _, line = GrammarDocument._advance(reader) 637 | if token != TOKEN_EOD: 638 | raise ParseError("End of definition required parsing production expression", line) 639 | break 640 | elif token == TOKEN_EMPTY: 641 | if len(right) != 0 or (prec is not None): 642 | raise ParseError("Epsilon symbol cannot be placed here parsing production expression", 643 | line) 644 | empty_production = True 645 | elif token == TOKEN_PREC: 646 | token, _, line = GrammarDocument._advance(reader) 647 | if token != TOKEN_BEGIN_ARG: 648 | raise ParseError("'(' required parsing precedence", line) 649 | token, prec, line = GrammarDocument._advance(reader) 650 | if token != TOKEN_INTEGER: 651 | raise ParseError("Integer expected parsing precedence", line) 652 | if prec == 0: 653 | raise ParseError("Precedence must large than zero", line) 654 | token, _, line = GrammarDocument._advance(reader) 655 | if token != TOKEN_END_ARG: 656 | raise ParseError("')' required parsing precedence", line) 657 | elif token == TOKEN_IDENTIFIER: 658 | if empty_production or (prec is not None): 659 | raise ParseError("Identifier cannot be placed here", line) 660 | if value not in symbols: 661 | raise ParseError(f"Undefined symbol \"{value}\"", line) 662 | right.append(symbols[value]) 663 | elif token == TOKEN_BEGIN_ARG: 664 | if len(right) == 0: 665 | raise ParseError("Symbol required for binding argument name", line) 666 | if right[len(right) - 1].replace() is None: 667 | raise ParseError("Symbol don't have type for binding", line) 668 | token, arg_id, line = GrammarDocument._advance(reader) 669 | if token != TOKEN_IDENTIFIER: 670 | raise ParseError("Identifier required parsing binding argument", line) 671 | token, _, line = GrammarDocument._advance(reader) 672 | if token != TOKEN_END_ARG: 673 | raise ParseError("')' expected parsing binding argument", line) 674 | binding[len(right) - 1] = arg_id 675 | else: 676 | raise ParseError("Unexpected token", line) 677 | assert len(right) > 0 or empty_production 678 | # calc prec if user not defined 679 | if prec is None: 680 | for e in reversed(right): 681 | if e.type() == SYMBOL_TERMINAL: 682 | prec = e.precedence() 683 | if prec is None: 684 | prec = 0 685 | production = Production(symbols[identifier], right, binding, replace, prec, def_line, 686 | len(productions)) 687 | if production in production_set: 688 | raise ParseError(f"Production \"{production}\" redefined", def_line) 689 | if (production.left().replace() is not None) and (production.replace() is None): 690 | raise ParseError(f"Action body expected for production \"{production}\"", def_line) 691 | productions.append(production) 692 | production_set.add(production) 693 | token, _, line = GrammarDocument._advance(reader) 694 | if token != TOKEN_EOD: 695 | raise ParseError("End of definition required parsing grammar block", line) 696 | elif token == TOKEN_GENERATOR: 697 | if generator_args is not None: 698 | raise ParseError("Generator arguments is redefined", line) 699 | try: 700 | token, json_args, line = GrammarDocument._advance(reader) 701 | except Exception as ex: 702 | raise ParseError(f"Parsing json error parsing generator block: {ex}", line) 703 | if token != TOKEN_LITERAL: 704 | raise ParseError("String literal required parsing generator block", line) 705 | token, _, line = GrammarDocument._advance(reader) 706 | if token != TOKEN_EOD: 707 | raise ParseError("';' expected parsing generator block", line) 708 | generator_args = json.loads(json_args) 709 | else: 710 | raise ParseError("Unexpected token", line) 711 | self._productions = productions 712 | self._symbols = set([symbols[s] for s in symbols]) 713 | self._terminals = set([s for s in self._symbols if s.type() == SYMBOL_TERMINAL]) 714 | self._non_terminals = set([s for s in self._symbols if s.type() == SYMBOL_NON_TERMINAL]) 715 | self._generator_args = generator_args 716 | 717 | # ---------------------------------------- LR(1)/LALR分析器部分 ---------------------------------------- 718 | # LR(1)/LALR分析器用于解算状态转移矩阵。 719 | # 通过对文法进行LR分析,可以得到类似下图的转换矩阵: 720 | # x opt eq $ | S E V 721 | # 0 s2 s4 | 722 | # 1 a | 723 | # 2 r3 r3 | 724 | # 3 s2 s4 | g8 g7 725 | # ……下略 726 | # 其中,表头表示向前看符号,每一行代表一个解析器状态,每一个格表明在看到下一个输入符号时需要进行的动作: 727 | # sX 表明一个移进操作,在移入下一个符号后跳转到状态X 728 | # rX 表明一个规约操作,在看到当前符号时按照产生式X进行规约,弹出解析栈顶部的|X|个元素 729 | # gX 表明在规约操作后,在看到栈顶符号为这个格子对应的符号时,转移状态到X状态 730 | # 同时分析器会依据之前的规则对 SR冲突、RR冲突 进行解决 731 | 732 | 733 | class ExtendProduction: 734 | """ 735 | 扩展生成式(项) 736 | 737 | 增加当前位置和向前看符号来计算闭包。 738 | """ 739 | def __init__(self, raw: Production, pos: int, lookahead: Set[Symbol]): 740 | assert len(raw) >= pos 741 | self._production = raw 742 | self._pos = pos 743 | self._lookahead = lookahead 744 | 745 | def __repr__(self): 746 | right = [repr(x) for x in self._production] 747 | right.insert(self._pos, "·") 748 | return "(%s -> %s, %s)" % (repr(self._production.left()), " ".join(right), self._lookahead) 749 | 750 | def __len__(self): 751 | return len(self._production) 752 | 753 | def __getitem__(self, item): 754 | assert isinstance(item, int) 755 | return self._production[item] 756 | 757 | def __eq__(self, obj) -> bool: 758 | if not isinstance(obj, ExtendProduction): 759 | return False 760 | if self._pos != obj._pos: 761 | return False 762 | if self._production != obj._production: 763 | return False 764 | if self._lookahead != obj._lookahead: 765 | return False 766 | return True 767 | 768 | def __ne__(self, obj) -> bool: 769 | return not self == obj 770 | 771 | def __hash__(self) -> int: 772 | ret = hash(self._pos) 773 | for x in self._lookahead: 774 | ret = ret ^ hash(x) 775 | ret = ret ^ hash(self._production) 776 | return ret 777 | 778 | def production(self) -> Production: 779 | """ 780 | 获取原始产生式 781 | :return: 产生式 782 | """ 783 | return self._production 784 | 785 | def pos(self) -> int: 786 | """ 787 | 获取当前分析位置 788 | :return: 位置 789 | """ 790 | return self._pos 791 | 792 | def lookahead(self) -> Set[Symbol]: 793 | """ 794 | 关联的向前看符号 795 | :return: 符号 796 | """ 797 | return self._lookahead 798 | 799 | 800 | class ExtendProductionSet: 801 | """ 802 | 扩展生成式集合(项集) 803 | """ 804 | def __init__(self, s: Set[ExtendProduction], state_id: Optional[int]): 805 | self._set = s 806 | self._state = state_id 807 | 808 | def __len__(self): 809 | return len(self._set) 810 | 811 | def __eq__(self, obj) -> bool: # state_id不参与比较 812 | if not isinstance(obj, ExtendProductionSet): 813 | return False 814 | return self._set == obj._set 815 | 816 | def __ne__(self, obj) -> bool: 817 | return not self == obj 818 | 819 | def __hash__(self) -> int: 820 | ret = 0 821 | for x in self._set: 822 | ret = ret ^ hash(x) 823 | return ret 824 | 825 | def __iter__(self): 826 | return iter(self._set) 827 | 828 | def __repr__(self): 829 | ret = ["["] 830 | for e in self._set: 831 | ret.append(f" {repr(e)}") 832 | ret.append("]") 833 | return "\n".join(ret) 834 | 835 | def state(self): 836 | """ 837 | 获取状态ID 838 | :return: 状态ID 839 | """ 840 | return self._state 841 | 842 | def set_state(self, state): 843 | """ 844 | 设置状态ID 845 | """ 846 | self._state = state 847 | 848 | def add(self, x: ExtendProduction): 849 | """ 850 | 向集合添加产生式 851 | :param x: 产生式 852 | """ 853 | self._set.add(x) 854 | 855 | def union(self, x): 856 | """ 857 | 合并其他集合 858 | :param x: 集合 859 | """ 860 | if isinstance(x, set): 861 | self._set = self._set.union(x) 862 | else: 863 | assert isinstance(x, ExtendProductionSet) 864 | self._set = self._set.union(x._set) 865 | 866 | def clone(self): 867 | """ 868 | 产生副本 869 | :return: 项集 870 | """ 871 | return ExtendProductionSet(set(self._set), self._state) 872 | 873 | 874 | ACTION_ACCEPT = 1 875 | ACTION_GOTO = 2 # Shift和Goto可以用一个动作表示,因为对于非终结符总是Goto的,对于终结符总是Shift的 876 | ACTION_REDUCE = 3 # 规约动作 877 | 878 | 879 | class Action: 880 | """ 881 | 语法动作 882 | """ 883 | def __init__(self, action: int, arg, ref_state: ExtendProductionSet, ref_symbol: Symbol, 884 | ref_prod: Optional[ExtendProduction]): 885 | self._action = action 886 | self._arg = arg 887 | self._ref_state = ref_state 888 | self._ref_symbol = ref_symbol 889 | self._ref_prod = ref_prod 890 | 891 | # 参数检查 892 | if action == ACTION_GOTO: 893 | assert isinstance(arg, ExtendProductionSet) 894 | elif action == ACTION_REDUCE: 895 | assert isinstance(arg, Production) 896 | assert arg.index() >= 0 897 | 898 | def __repr__(self): 899 | if self._action == ACTION_ACCEPT: 900 | return "a" 901 | elif self._action == ACTION_GOTO: 902 | assert isinstance(self._arg, ExtendProductionSet) 903 | if self._ref_symbol.type() == SYMBOL_NON_TERMINAL: 904 | return f"g{self._arg.state()}" 905 | else: 906 | return f"s{self._arg.state()}" 907 | elif self._action == ACTION_REDUCE: 908 | assert isinstance(self._arg, Production) 909 | return f"r{self._arg.index()}" 910 | return "" 911 | 912 | def action(self) -> int: 913 | """ 914 | 获取动作 915 | :return: 动作 916 | """ 917 | return self._action 918 | 919 | def arg(self): 920 | """ 921 | 获取动作参数 922 | :return: 参数 923 | """ 924 | return self._arg 925 | 926 | def ref_state(self) -> ExtendProductionSet: 927 | """ 928 | 获取关联的状态集合 929 | :return: 项集 930 | """ 931 | return self._ref_state 932 | 933 | def ref_symbol(self) -> Symbol: 934 | """ 935 | 获取关联的符号 936 | :return: 符号 937 | """ 938 | return self._ref_symbol 939 | 940 | def ref_prod(self) -> Optional[ExtendProduction]: 941 | """ 942 | 获取关联的生成式 943 | 944 | 对于Shift操作不存在关联的生成式。 945 | :return: 项 946 | """ 947 | return self._ref_prod 948 | 949 | 950 | class GrammarError(Exception): 951 | """ 952 | 解析错误 953 | """ 954 | def __init__(self, message: str): 955 | Exception.__init__(self, message) 956 | self._message = message 957 | 958 | def message(self): 959 | return self._message 960 | 961 | 962 | GRAMMAR_MODE_LR1 = 0 963 | GRAMMAR_MODE_LALR = 1 964 | 965 | 966 | class GrammarAnalyzer: 967 | def __init__(self, document: GrammarDocument): 968 | self._doc = document 969 | 970 | # 初始化 NullableSet、FirstSet 和 FollowSet 并计算 971 | # 注意这个 Set 会包含 kEofSymbol 972 | self._nullable_set = {} # type: Dict[Symbol, bool] 973 | self._first_set = {} # type: Dict[Symbol, Set[Symbol]] 974 | self._follow_set = {} # type: Dict[Symbol, Set[Symbol]] 975 | self._analyze_nullable_first_follow_set() 976 | 977 | # 初始化扩展符号表 978 | self._extend_symbols = set(self._doc.symbols()) # type: Set[Symbol] 979 | self._extend_symbols.add(kEofSymbol) 980 | 981 | # 初始化分析动作表 982 | self._actions = {} # type: Dict[Symbol, Dict[int, Action]] 983 | self._max_state = 0 # 最大的状态ID 984 | self._resolve_rr_conflict = 0 # 解决Reduce/Reduce冲突的次数 985 | self._resolve_sr_conflict_by_prec = 0 # 解决Reduce/Shift冲突的次数(通过算符优先) 986 | self._resolve_sr_conflict_by_shift = 0 # 解决Reduce/Shift冲突的次数(通过Shift优先) 987 | self._reset_actions() 988 | 989 | def _analyze_nullable_first_follow_set(self): 990 | # 对所有产生式执行拓扑排序的计算,并按照出度从小到大排序 991 | toposort_states = {} # type: Dict[Symbol, Dict] 992 | toposort_results = [] # type: List[Production] 993 | 994 | # 初始化数据集 995 | for s in self._doc.non_terminals(): 996 | toposort_states[s] = { 997 | "out": 0, # 出度 998 | "from": [], # 入度 999 | "visited": False, # 是否已处理 1000 | "productions": [], # 从当前非终结符号导出的产生式 1001 | } 1002 | for p in self._doc.productions(): 1003 | toposort_states[p.left()]["productions"].append(p) 1004 | for i in range(0, len(p)): 1005 | if p[i].type() == SYMBOL_NON_TERMINAL: 1006 | toposort_states[p.left()]["out"] += 1 1007 | toposort_states[p[i]]["from"].append(toposort_states[p.left()]) 1008 | 1009 | # 迭代进行拓扑排序直到集合为空 1010 | while len(toposort_results) < len(self._doc.productions()): 1011 | refs_min = None 1012 | for k in toposort_states: # 寻找一个出度最小节点 1013 | state = toposort_states[k] 1014 | if state["visited"]: 1015 | continue 1016 | if refs_min is None or state["out"] < refs_min["out"]: 1017 | refs_min = state 1018 | assert refs_min is not None 1019 | toposort_results.extend(refs_min["productions"]) # 将当前节点的产生式放入队列 1020 | # 从集合中隐藏当前节点 1021 | refs_min["visited"] = True 1022 | for e in refs_min["from"]: 1023 | e["out"] -= 1 1024 | assert e["out"] >= 0 1025 | assert len(toposort_results) == len(self._doc.productions()) 1026 | 1027 | # 初始化集合 1028 | nullable_set = {kEofSymbol: False} # type: Dict[Symbol, bool] 1029 | first_set = {kEofSymbol: {kEofSymbol}} # type: Dict[Symbol, Set[Symbol]] 1030 | follow_set = {kEofSymbol: set()} # type: Dict[Symbol, Set[Symbol]] 1031 | for s in self._doc.symbols(): 1032 | nullable_set[s] = False 1033 | first_set[s] = {s} if s.type() == SYMBOL_TERMINAL else set() 1034 | follow_set[s] = set() 1035 | 1036 | # 迭代到不动点计算NULLABLE、FIRST集合和FOLLOW集合 1037 | while True: 1038 | stopped = True 1039 | for p in toposort_results: 1040 | s = p.left() 1041 | 1042 | # 检查产生式是否可空,即产生式中所有项都可空能推导出当前的非终结符可空 1043 | if not nullable_set[s]: # 对于已经认为可空的永远不会变为非可空 1044 | nullable = True 1045 | for i in range(0, len(p)): 1046 | if not nullable_set[p[i]]: # 非空 1047 | nullable = False 1048 | break 1049 | if nullable_set[s] != nullable: 1050 | nullable_set[s] = nullable 1051 | stopped = False 1052 | 1053 | # 计算FIRST集 1054 | first = set(first_set[s]) 1055 | for i in range(0, len(p)): 1056 | # 若 p[0..i] 都可空,那么 first[s] = first[s] ∪ first[p[i]] 1057 | prefix_nullable = True 1058 | for j in range(0, i): 1059 | if not nullable_set[p[j]]: 1060 | prefix_nullable = False 1061 | break 1062 | if prefix_nullable: 1063 | first = first.union(first_set[p[i]]) 1064 | else: 1065 | break # 如果中间出现过不可空的,则无需继续看 1066 | if first_set[s] != first: 1067 | first_set[s] = first 1068 | stopped = False 1069 | 1070 | # 计算FOLLOW集 1071 | for i in range(0, len(p)): 1072 | x = p[i] # 注意此时计算的目标是产生式中的每个项 1073 | follow = set(follow_set[x]) # copy 1074 | # 若 p[i+1..len(p)] 都可空,那么 follow[x] = follow[x] ∪ follow[s] 1075 | postfix_nullable = True 1076 | for j in range(i + 1, len(p)): 1077 | if not nullable_set[p[j]]: 1078 | postfix_nullable = False 1079 | break 1080 | if postfix_nullable: 1081 | follow = follow.union(follow_set[s]) 1082 | # 若 p[i+1..j] 都可空,那么 follow[x] = follow[x] ∪ first[j] 1083 | for j in range(i + 1, len(p)): 1084 | inner_nullable = True 1085 | for k in range(i + 1, j): 1086 | if not nullable_set[p[k]]: 1087 | inner_nullable = False 1088 | break 1089 | if inner_nullable: 1090 | follow = follow.union(first_set[p[j]]) 1091 | if follow_set[x] != follow: 1092 | follow_set[x] = follow 1093 | stopped = False 1094 | if stopped: 1095 | break 1096 | self._nullable_set = nullable_set 1097 | self._first_set = first_set 1098 | self._follow_set = follow_set 1099 | 1100 | def _reset_actions(self): 1101 | for s in self._extend_symbols: 1102 | self._actions[s] = {} 1103 | self._max_state = 0 1104 | self._resolve_rr_conflict = 0 1105 | self._resolve_sr_conflict_by_prec = 0 1106 | self._resolve_sr_conflict_by_shift = 0 1107 | 1108 | def _closure(self, org: ExtendProductionSet) -> ExtendProductionSet: 1109 | """ 1110 | 求项集的闭包 1111 | :param org: 原始项集 1112 | :return: 项集的闭包 1113 | """ 1114 | ret = org.clone() # copy 1115 | ret.set_state(-1) # 需要外部重新赋予状态ID 1116 | add = set() 1117 | while True: 1118 | for e in ret: 1119 | if e.pos() >= len(e.production()): 1120 | continue 1121 | 1122 | x = e.production()[e.pos()] 1123 | if x.type() == SYMBOL_TERMINAL: 1124 | continue 1125 | if x.type() == SYMBOL_EOF: 1126 | assert (len(e.lookahead()) == 0) 1127 | continue 1128 | assert(x.type() != SYMBOL_ENTRY) 1129 | 1130 | # 计算 FIRST 集 1131 | first = set() 1132 | for i in range(e.pos() + 1, len(e.production()) + 1): 1133 | # 若 p[cur+1..i] 都可空,那么 first[X] = first[X] ∪ first[p[i]] 1134 | prefix_nullable = True 1135 | for j in range(e.pos() + 1, i): 1136 | if not self._nullable_set[e.production()[j]]: 1137 | prefix_nullable = False 1138 | break 1139 | if prefix_nullable: 1140 | if i == len(e.production()): 1141 | first = first.union(e.lookahead()) 1142 | else: 1143 | first = first.union(self._first_set[e.production()[i]]) 1144 | else: 1145 | break # 如果中间出现过不可空的,则无需继续看 1146 | 1147 | # 展开终结符 1148 | for p in self._doc.productions(): 1149 | if p.left() == x: 1150 | for w in first: 1151 | item = ExtendProduction(p, 0, {w}) 1152 | if item not in ret and item not in add: 1153 | add.add(item) 1154 | 1155 | if len(add) == 0: 1156 | break 1157 | ret.union(add) 1158 | add.clear() 1159 | return ret 1160 | 1161 | def _goto(self, org: ExtendProductionSet, x: Symbol) -> ExtendProductionSet: 1162 | """ 1163 | 求项集在符号 X 下可以转移到的状态 1164 | :param org: 原始项集 1165 | :param x: 转移符号 1166 | :return: 输出状态 1167 | """ 1168 | ret = set() 1169 | for e in org: 1170 | if e.pos() >= len(e.production()): 1171 | continue 1172 | s = e.production()[e.pos()] 1173 | if s != x: 1174 | continue 1175 | p = ExtendProduction(e.production(), e.pos() + 1, e.lookahead()) 1176 | if p not in ret: 1177 | ret.add(p) 1178 | return self._closure(ExtendProductionSet(ret, -1)) # 需要外部重新赋予状态ID 1179 | 1180 | def _populate_action(self, s: Symbol, state: int, act: Action): 1181 | if state in self._actions[s]: # 冲突解决 1182 | raise_error = True 1183 | conflict_type = 0 # 0: unknown 1: shift/shift冲突 2:shift/reduce冲突 3:reduce/reduce冲突 1184 | conflict_args = () 1185 | org_action = self._actions[s][state] 1186 | assert state == org_action.ref_state().state() 1187 | 1188 | # 如果存在Shift/Shift冲突,则抛出错误 1189 | if org_action.action() == ACTION_GOTO and act.action() == ACTION_GOTO: 1190 | assert isinstance(org_action.arg(), ExtendProductionSet) 1191 | assert isinstance(act.arg(), ExtendProductionSet) 1192 | conflict_type = 1 1193 | conflict_args = (s, org_action.ref_state().state(), org_action.arg(), act.arg()) 1194 | 1195 | # 针对Reduce/Reduce的情况,选择优先出现的规则 1196 | if org_action.action() == ACTION_REDUCE and act.action() == ACTION_REDUCE: 1197 | assert isinstance(org_action.arg(), Production) 1198 | assert isinstance(act.arg(), Production) 1199 | assert org_action.arg().index() != act.arg().index() 1200 | conflict_type = 3 1201 | conflict_args = (s, org_action.ref_state().state(), org_action.arg(), act.arg()) 1202 | raise_error = False 1203 | self._resolve_rr_conflict += 1 1204 | if act.arg().index() > org_action.arg().index(): 1205 | return # 不接受在后面的产生式 1206 | 1207 | # 针对Reduce/Shift的情况 1208 | if (org_action.action() == ACTION_REDUCE and act.action() == ACTION_GOTO) or \ 1209 | (org_action.action() == ACTION_GOTO and act.action() == ACTION_REDUCE): 1210 | if org_action.action() == ACTION_REDUCE: 1211 | reduce_action = org_action 1212 | shift_action = act 1213 | else: 1214 | reduce_action = act 1215 | shift_action = org_action 1216 | reduce_production = reduce_action.arg() # type: Production 1217 | shift_state = shift_action.arg() # type: ExtendProductionSet 1218 | assert isinstance(reduce_production, Production) 1219 | assert isinstance(shift_state, ExtendProductionSet) 1220 | assert shift_action.ref_symbol() == s 1221 | assert s.type() != SYMBOL_NON_TERMINAL # 非终结符不可能出现SR冲突 1222 | conflict_type = 2 1223 | conflict_args = (s, org_action.ref_state().state(), reduce_production) 1224 | 1225 | accept_reduce = None 1226 | raise_error = False 1227 | 1228 | # 首先尝试算符优先 1229 | # 语法规则保证定义了结合性时必然定义了算符优先级,对于没有定义算符优先级的表达式/符号不会通过算符优先方式解决 1230 | if s.type() == SYMBOL_TERMINAL and s.precedence() > 0 and reduce_production.precedence() > 0: 1231 | # 如果优先级一致,则考虑结合性 1232 | if s.precedence() == reduce_production.precedence(): 1233 | # 找到Reduce产生式的符号获取结合性 1234 | reduce_symbol = None 1235 | for i in range(len(reduce_production) - 1, -1, -1): 1236 | if reduce_production[i].type() == SYMBOL_TERMINAL: 1237 | reduce_symbol = reduce_production[i] 1238 | break 1239 | assert reduce_symbol is not None 1240 | 1241 | if reduce_symbol.associativity() == ASSOC_NONE or s.associativity() == ASSOC_NONE: 1242 | # 没有结合性,报错 1243 | raise_error = True 1244 | elif reduce_symbol.associativity() == ASSOC_UNDEF or s.associativity() == ASSOC_UNDEF: 1245 | # 未定义结合性,回退到Shift优先规则 1246 | pass 1247 | elif reduce_symbol.associativity() != s.associativity(): 1248 | # 结合性不一致,报错 1249 | raise_error = True 1250 | else: 1251 | # 结合性一致,按照结合性解决SR冲突 1252 | assert reduce_symbol.associativity() == s.associativity() 1253 | 1254 | # 如果为左结合,则采取Reduce操作,否则采取Shift操作 1255 | if s.associativity() == ASSOC_LEFT: 1256 | accept_reduce = True 1257 | else: 1258 | assert s.associativity() == ASSOC_RIGHT 1259 | accept_reduce = False 1260 | self._resolve_sr_conflict_by_prec += 1 1261 | else: # 优先级不一致,选择优先级高的进行reduce/shift 1262 | if reduce_production.precedence() > s.precedence(): 1263 | accept_reduce = True 1264 | else: 1265 | accept_reduce = False 1266 | self._resolve_sr_conflict_by_prec += 1 1267 | 1268 | # 在算符优先也没有解决的情况下,优先使用Shift规则 1269 | if (accept_reduce is None) and (not raise_error): 1270 | accept_reduce = False 1271 | self._resolve_sr_conflict_by_shift += 1 1272 | 1273 | # 最终决定是否接受覆盖 1274 | if accept_reduce is not None: 1275 | assert not raise_error 1276 | if accept_reduce and reduce_action == org_action: 1277 | return 1278 | elif not accept_reduce and reduce_action == act: 1279 | return 1280 | 1281 | assert conflict_type != 0 1282 | if raise_error: # 未能解决冲突 1283 | if conflict_type == 1: 1284 | raise GrammarError(f"Shift/shift conflict detected, symbol {repr(conflict_args[0])}, state: " 1285 | f"{repr(conflict_args[1])}, shift state 1: {repr(conflict_args[2])}, " 1286 | f"shift state 2: {repr(conflict_args[3])}") 1287 | elif conflict_type == 2: 1288 | raise GrammarError(f"Shift/reduce conflict detected, state: {repr(conflict_args[1])}, " 1289 | f"shift symbol: {repr(conflict_args[0])}, reduce production: " 1290 | f"{repr(conflict_args[2])}") 1291 | elif conflict_type == 3: 1292 | assert False # Reduce/reduce冲突总能被解决 1293 | self._actions[s][state] = act # 覆盖状态 1294 | 1295 | def _process_lr1(self): 1296 | # 以首个规则作为入口 1297 | entry_production = Production(kEntrySymbol, [self._doc.productions()[0].left(), kEofSymbol], {}) 1298 | entry_production_ex = ExtendProduction(entry_production, 0, set()) 1299 | entry_item_set = self._closure(ExtendProductionSet({entry_production_ex}, -1)) 1300 | entry_item_set.set_state(0) # 首个状态 1301 | 1302 | # 初始化状态 1303 | next_state = 1 1304 | states = {entry_item_set: entry_item_set.state()} # type: Dict[ExtendProductionSet, int] 1305 | q = [entry_item_set] # type: List[ExtendProductionSet] 1306 | 1307 | # 计算动作表 1308 | while len(q) > 0: 1309 | state = q.pop(0) 1310 | assert states[state] == state.state() 1311 | 1312 | # 填写规约动作 1313 | for p in state: 1314 | if p.pos() >= len(p.production()): 1315 | for x in p.lookahead(): 1316 | action = Action(ACTION_REDUCE, p.production(), state, x, p) 1317 | self._populate_action(x, state.state(), action) 1318 | 1319 | # 计算Shift/Goto/Accept 1320 | for x in self._extend_symbols: 1321 | goto = self._goto(state, x) 1322 | if len(goto) == 0: 1323 | continue 1324 | if x == kEofSymbol: 1325 | for p in goto: 1326 | if p.pos() >= len(p.production()): 1327 | action = Action(ACTION_ACCEPT, None, state, x, p) 1328 | self._populate_action(x, state.state(), action) 1329 | else: 1330 | assert False # 经由Eof推导出的状态只能是Reduce,不可能出现其他情况 1331 | else: 1332 | if goto in states: 1333 | goto.set_state(states[goto]) 1334 | else: 1335 | goto.set_state(next_state) 1336 | next_state += 1 1337 | states[goto] = goto.state() 1338 | q.append(goto) 1339 | assert goto.state() != -1 1340 | action = Action(ACTION_GOTO, goto, state, x, None) 1341 | self._populate_action(x, state.state(), action) 1342 | self._max_state = next_state - 1 1343 | 1344 | def document(self): 1345 | """ 1346 | 获取原始语法文件 1347 | :return: 文档对象 1348 | """ 1349 | return self._doc 1350 | 1351 | def actions(self): 1352 | """ 1353 | 获取计算后的动作表 1354 | :return: 动作转换表 1355 | """ 1356 | return self._actions 1357 | 1358 | def max_state(self): 1359 | """ 1360 | 获取最大的状态ID 1361 | """ 1362 | return self._max_state 1363 | 1364 | def printable_actions(self) -> str: 1365 | """ 1366 | 获取可打印动作表 1367 | :return: 字符串结果 1368 | """ 1369 | ret = [] 1370 | header = [None] # 表头 1371 | for s in self._doc.terminals(): 1372 | header.append(s) 1373 | header.append(kEofSymbol) 1374 | for s in self._doc.non_terminals(): 1375 | header.append(s) 1376 | min_width = len(str(self._max_state)) + 1 1377 | header_width = [max(min_width, len(s.id()) if s is not None else 0) for s in header] 1378 | 1379 | # 打印表头 1380 | ret.append(" | ".join([header[i].id().rjust(header_width[i]) if header[i] is not None else 1381 | "".rjust(header_width[i]) for i in range(0, len(header))])) 1382 | 1383 | # 打印所有行 1384 | for s in range(0, self._max_state + 1): 1385 | empty = True 1386 | data = [] 1387 | for i in range(0, len(header)): 1388 | if i == 0: 1389 | data.append(str(s).rjust(header_width[i])) 1390 | else: 1391 | if s in self._actions[header[i]]: 1392 | data.append(repr(self._actions[header[i]][s]).rjust(header_width[i])) 1393 | empty = False 1394 | else: 1395 | data.append("".rjust(header_width[i])) 1396 | if not empty: 1397 | ret.append(" | ".join(data)) 1398 | return "\n".join(ret) 1399 | 1400 | def process(self, mode): 1401 | """ 1402 | 处理语法 1403 | :param mode: 语法模式 1404 | """ 1405 | self._reset_actions() 1406 | if mode == GRAMMAR_MODE_LR1: 1407 | self._process_lr1() 1408 | else: 1409 | assert mode == GRAMMAR_MODE_LALR 1410 | # TODO 1411 | raise NotImplementedError() 1412 | 1413 | def resolve_stat(self) -> Tuple[int, int, int]: 1414 | return self._resolve_rr_conflict, self._resolve_sr_conflict_by_prec, self._resolve_sr_conflict_by_shift 1415 | 1416 | # ---------------------------------------- 模板渲染器 ---------------------------------------- 1417 | # 见 https://github.com/9chu/et-py 1418 | 1419 | 1420 | class TemplateNode: 1421 | def __init__(self, parent): 1422 | self.parent = parent 1423 | self.nodes = [] 1424 | 1425 | def render(self, context): 1426 | pass 1427 | 1428 | 1429 | class TemplateForNode(TemplateNode): 1430 | def __init__(self, parent, identifier, expression): 1431 | TemplateNode.__init__(self, parent) 1432 | self.identifier = identifier 1433 | self.expression = expression 1434 | 1435 | def render(self, context): 1436 | result = eval(self.expression, None, context) 1437 | origin = context[self.identifier] if self.identifier in context else None 1438 | for i in result: 1439 | context[self.identifier] = i 1440 | yield iter(self.nodes) 1441 | if origin: 1442 | context[self.identifier] = origin 1443 | 1444 | 1445 | class TemplateIfNode(TemplateNode): 1446 | def __init__(self, parent, expression): 1447 | TemplateNode.__init__(self, parent) 1448 | self.expression = expression 1449 | self.true_branch = self.nodes 1450 | 1451 | def render(self, context): 1452 | test = eval(self.expression, None, context) 1453 | if test: 1454 | yield iter(self.true_branch) 1455 | 1456 | 1457 | class TemplateIfElseNode(TemplateNode): 1458 | def __init__(self, parent, if_node): # extent from IfNode 1459 | TemplateNode.__init__(self, parent) 1460 | self.expression = if_node.expression 1461 | self.true_branch = if_node.true_branch 1462 | self.false_branch = self.nodes 1463 | 1464 | def render(self, context): 1465 | test = eval(self.expression, None, context) 1466 | if test: 1467 | yield iter(self.true_branch) 1468 | else: 1469 | yield iter(self.false_branch) 1470 | 1471 | 1472 | class TemplateExpressionNode(TemplateNode): 1473 | def __init__(self, parent, expression): 1474 | TemplateNode.__init__(self, parent) 1475 | self.expression = expression 1476 | 1477 | def render(self, context): 1478 | return eval(self.expression, None, context) 1479 | 1480 | 1481 | class TextConsumer: 1482 | def __init__(self, text): 1483 | self._text = text 1484 | self._len = len(text) 1485 | self._pos = 0 1486 | self._line = 1 1487 | self._row = 0 1488 | 1489 | def get_pos(self): 1490 | return self._pos 1491 | 1492 | def get_line(self): 1493 | return self._line 1494 | 1495 | def get_row(self): 1496 | return self._row 1497 | 1498 | def read(self): 1499 | if self._pos >= self._len: 1500 | return '\0' 1501 | ch = self._text[self._pos] 1502 | self._pos += 1 1503 | self._row += 1 1504 | if ch == '\n': 1505 | self._line += 1 1506 | self._row = 0 1507 | return ch 1508 | 1509 | def peek(self, advance=0): 1510 | if self._pos + advance >= self._len: 1511 | return '\0' 1512 | return self._text[self._pos + advance] 1513 | 1514 | def substr(self, begin, end): 1515 | return self._text[begin:end] 1516 | 1517 | 1518 | class TemplateParser: 1519 | OUTER_TOKEN_LITERAL = 1 1520 | OUTER_TOKEN_EXPRESS = 2 1521 | 1522 | RESERVED = ["and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", 1523 | "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 1524 | "raise", "return", "try", "while", "with", "yield"] 1525 | 1526 | def __init__(self, text): 1527 | self._text = text 1528 | self._consumer = TextConsumer(text) 1529 | 1530 | @staticmethod 1531 | def _is_starting_by_new_line(text): 1532 | for i in range(0, len(text)): 1533 | ch = text[i:i + 1] 1534 | if ch == '\n': 1535 | return True 1536 | elif not ch.isspace(): 1537 | break 1538 | return False 1539 | 1540 | @staticmethod 1541 | def _is_ending_by_new_line(text): 1542 | for i in range(len(text) - 1, -1, -1): 1543 | ch = text[i:i + 1] 1544 | if ch == '\n': 1545 | return True 1546 | elif not ch.isspace(): 1547 | break 1548 | return False 1549 | 1550 | @staticmethod 1551 | def _trim_left_until_new_line(text): 1552 | for i in range(0, len(text)): 1553 | ch = text[i:i+1] 1554 | if ch == '\n': 1555 | return text[i+1:] 1556 | elif not ch.isspace(): 1557 | break 1558 | return text 1559 | 1560 | @staticmethod 1561 | def _trim_right_until_new_line(text): 1562 | for i in range(len(text) - 1, -1, -1): 1563 | ch = text[i:i+1] 1564 | if ch == '\n': 1565 | return text[0:i+1] # save right \n 1566 | elif not ch.isspace(): 1567 | break 1568 | return text 1569 | 1570 | @staticmethod 1571 | def _parse_blank(consumer): 1572 | while consumer.peek().isspace(): # 跳过所有空白 1573 | consumer.read() 1574 | 1575 | @staticmethod 1576 | def _parse_identifier(consumer): 1577 | ch = consumer.peek() 1578 | if not (ch.isalpha() or ch == '_'): 1579 | return "" 1580 | chars = [consumer.read()] # ch 1581 | ch = consumer.peek() 1582 | while ch.isalnum() or ch == '_': 1583 | chars.append(consumer.read()) # ch 1584 | ch = consumer.peek() 1585 | return "".join(chars) 1586 | 1587 | @staticmethod 1588 | def _parse_inner(content, line, row): 1589 | """内层解析函数 1590 | 考虑到表达式解析非常费力不讨好,这里采用偷懒方式进行。 1591 | 表达式全部交由python自行解决,匹配仅匹配开头,此外不处理注释(意味着不能在表达式中包含注释内容)。 1592 | 当满足 for in <...> 时产生 for节点 1593 | 当满足 if <...> 时产生 if节点 1594 | 当满足 elif <...> 时产生 elif节点 1595 | 当满足 else 时产生 else节点 1596 | 当满足 end 时产生 end节点 1597 | :param content: 内层内容 1598 | :param line: 起始行 1599 | :param row: 起始列 1600 | :return: 节点名称, 表达式部分, 可选的Identifier 1601 | """ 1602 | consumer = TextConsumer(content) 1603 | TemplateParser._parse_blank(consumer) 1604 | operator = TemplateParser._parse_identifier(consumer) 1605 | identifier = None 1606 | if operator == "for": 1607 | TemplateParser._parse_blank(consumer) 1608 | identifier = TemplateParser._parse_identifier(consumer) 1609 | if identifier == "" or (identifier in TemplateParser.RESERVED): 1610 | raise ParseError("Identifier expected", consumer.get_line() + line - 1, 1611 | consumer.get_row() + row if consumer.get_line() == 1 else consumer.get_row()) 1612 | TemplateParser._parse_blank(consumer) 1613 | if TemplateParser._parse_identifier(consumer) != "in": 1614 | raise ParseError("Keyword 'in' expected", consumer.get_line() + line - 1, 1615 | consumer.get_row() + row if consumer.get_line() == 1 else consumer.get_row()) 1616 | TemplateParser._parse_blank(consumer) 1617 | expression = content[consumer.get_pos():] 1618 | if expression == "": 1619 | raise ParseError("Expression expected", consumer.get_line() + line - 1, 1620 | consumer.get_row() + row if consumer.get_line() == 1 else consumer.get_row()) 1621 | elif operator == "if" or operator == "elif": 1622 | TemplateParser._parse_blank(consumer) 1623 | expression = content[consumer.get_pos():] 1624 | if expression == "": 1625 | raise ParseError("Expression expected", consumer.get_line() + line - 1, 1626 | consumer.get_row() + row if consumer.get_line() == 1 else consumer.get_row()) 1627 | elif operator == "end" or operator == "else": 1628 | TemplateParser._parse_blank(consumer) 1629 | expression = content[consumer.get_pos():] 1630 | if expression != '': 1631 | raise ParseError("Unexpected content", consumer.get_line() + line - 1, 1632 | consumer.get_row() + row if consumer.get_line() == 1 else consumer.get_row()) 1633 | else: 1634 | operator = "" 1635 | expression = content 1636 | return operator, expression.strip(), identifier 1637 | 1638 | def _parse_outer(self): 1639 | """外层解析函数 1640 | 将输入拆分成字符串(Literal)和表达式(Expression)两个组成。 1641 | 遇到'{%'开始解析Expression,在解析Expression时允许使用'%%'转义,即'%%'->'%',这使得'%%>'->'%>'而不会结束表达式。 1642 | :return: 类型, 内容, 起始行, 起始列 1643 | """ 1644 | begin = self._consumer.get_pos() 1645 | end = begin # [begin, end) 1646 | begin_line = self._consumer.get_line() 1647 | begin_row = self._consumer.get_row() 1648 | ch = self._consumer.peek() 1649 | while ch != '\0': 1650 | if ch == '{': 1651 | ahead = self._consumer.peek(1) 1652 | if ahead == '%': 1653 | if begin != end: 1654 | return TemplateParser.OUTER_TOKEN_LITERAL, self._consumer.substr(begin, end), begin_line, \ 1655 | begin_row 1656 | self._consumer.read() # { 1657 | self._consumer.read() # % 1658 | begin_line = self._consumer.get_line() 1659 | begin_row = self._consumer.get_row() 1660 | chars = [] 1661 | while True: 1662 | ch = self._consumer.read() 1663 | if ch == '\0': 1664 | raise ParseError("Unexpected eof", self._consumer.get_line(), self._consumer.get_row()) 1665 | elif ch == '%': 1666 | if self._consumer.peek() == '}': # '%}' 1667 | self._consumer.read() 1668 | return TemplateParser.OUTER_TOKEN_EXPRESS, "".join(chars), begin_line, begin_row 1669 | elif self._consumer.peek() == '%': # '%%' -> '%' 1670 | self._consumer.read() 1671 | chars.append(ch) 1672 | self._consumer.read() 1673 | ch = self._consumer.peek() 1674 | end = self._consumer.get_pos() 1675 | return TemplateParser.OUTER_TOKEN_LITERAL, self._consumer.substr(begin, end), begin_line, begin_row 1676 | 1677 | @staticmethod 1678 | def _trim_empty_line(result): 1679 | state = 0 1680 | left = None # 需要剔除右边的元素 1681 | for i in range(0, len(result)): 1682 | cur = result[i] 1683 | p = result[i - 1] if i != 0 else None 1684 | n = result[i + 1] if i != len(result) - 1 else None 1685 | if state == 0: 1686 | # 当前是表达式,且上一个是文本 1687 | if cur[0] == TemplateParser.OUTER_TOKEN_EXPRESS: 1688 | if p is None or (p[0] == TemplateParser.OUTER_TOKEN_LITERAL and 1689 | TemplateParser._is_ending_by_new_line(p[1])): 1690 | left = i - 1 if p else None 1691 | state = 1 1692 | if state == 1: 1693 | if n is None or (n[0] == TemplateParser.OUTER_TOKEN_LITERAL and 1694 | TemplateParser._is_starting_by_new_line(n[1])): 1695 | right = i + 1 if n else None 1696 | if left is not None: 1697 | result[left] = (result[left][0], 1698 | TemplateParser._trim_right_until_new_line(result[left][1]), 1699 | result[left][2], 1700 | result[left][3]) 1701 | if right is not None: 1702 | result[right] = (result[right][0], 1703 | TemplateParser._trim_left_until_new_line(result[right][1]), 1704 | result[right][2], 1705 | result[right][3]) 1706 | state = 0 1707 | elif cur[0] != TemplateParser.OUTER_TOKEN_EXPRESS: # 行中有其他文本,不进行剔除 1708 | state = 0 1709 | 1710 | def process(self): 1711 | root = [] # 根 1712 | nodes = [] # 未闭合节点队列 1713 | outer_results = [] 1714 | while True: # 为了剔除空行,需要先解析完所有的根元素做预处理 1715 | ret = self._parse_outer() 1716 | if ret[0] == TemplateParser.OUTER_TOKEN_LITERAL and ret[1] == "": # EOF 1717 | break 1718 | outer_results.append(ret) 1719 | TemplateParser._trim_empty_line(outer_results) 1720 | for i in outer_results: 1721 | (t, content, line, row) = i 1722 | back = None if len(nodes) == 0 else nodes[len(nodes) - 1] 1723 | if t == TemplateParser.OUTER_TOKEN_LITERAL: 1724 | root.append(content) if back is None else back.nodes.append(content) 1725 | else: 1726 | assert t == TemplateParser.OUTER_TOKEN_EXPRESS 1727 | (operator, expression, identifier) = self._parse_inner(content, line, row) 1728 | if operator == "for": 1729 | node = TemplateForNode(back, identifier, expression) 1730 | root.append(node) if back is None else back.nodes.append(node) 1731 | nodes.append(node) 1732 | elif operator == "if": 1733 | node = TemplateIfNode(back, expression) 1734 | root.append(node) if back is None else back.nodes.append(node) 1735 | nodes.append(node) 1736 | elif operator == "else": 1737 | if not isinstance(back, TemplateIfNode): 1738 | raise ParseError("Unexpected else branch", line, row) 1739 | node = TemplateIfElseNode(back.parent, back) 1740 | # 从root或者父节点中删除back 1741 | if back.parent is None: 1742 | assert root[len(root) - 1] == back 1743 | root.pop() 1744 | root.append(node) 1745 | else: 1746 | parent_nodes = back.parent.nodes 1747 | assert parent_nodes[len(parent_nodes) - 1] == back 1748 | parent_nodes.pop() 1749 | parent_nodes.append(node) 1750 | # 升级并取代 1751 | nodes.pop() 1752 | nodes.append(node) 1753 | elif operator == "elif": 1754 | if not isinstance(back, TemplateIfNode): 1755 | raise ParseError("Unexpected elif branch", line, row) 1756 | closed_else = TemplateIfElseNode(back.parent, back) 1757 | # 从root或者父节点中删除back 1758 | if back.parent is None: 1759 | assert root[len(root) - 1] == back 1760 | root.pop() 1761 | root.append(closed_else) 1762 | else: 1763 | parent_nodes = back.parent.nodes 1764 | assert parent_nodes[len(parent_nodes) - 1] == back 1765 | parent_nodes.pop() 1766 | parent_nodes.append(closed_else) 1767 | node = TemplateIfNode(closed_else, expression) 1768 | closed_else.nodes.append(node) 1769 | # 取代 1770 | nodes.pop() 1771 | nodes.append(node) 1772 | elif operator == "end": 1773 | if back is None: 1774 | raise ParseError("Unexpected block end", line, row) 1775 | nodes.pop() # 完成一个节点 1776 | else: 1777 | assert operator == "" 1778 | node = TemplateExpressionNode(back, expression) 1779 | root.append(node) if back is None else back.nodes.append(node) 1780 | if len(nodes) != 0: 1781 | raise ParseError("Unclosed block", self._consumer.get_line(), self._consumer.get_row()) 1782 | return root 1783 | 1784 | 1785 | def render_template(template, **context): 1786 | p = TemplateParser(template) 1787 | root = p.process() 1788 | output = [] 1789 | stack = [iter(root)] 1790 | while stack: 1791 | node = stack.pop() 1792 | if isinstance(node, str): 1793 | output.append(node) 1794 | elif isinstance(node, TemplateExpressionNode): 1795 | output.append(str(node.render(context))) 1796 | elif isinstance(node, TemplateNode): 1797 | stack.append(node.render(context)) 1798 | else: 1799 | new_node = next(node, None) 1800 | if new_node is not None: 1801 | stack.append(node) 1802 | stack.append(new_node) 1803 | return "".join(output) 1804 | 1805 | 1806 | # ---------------------------------------- 代码生成 ---------------------------------------- 1807 | 1808 | 1809 | def generate_code(header_template: str, source_template: str, analyzer: GrammarAnalyzer, header_filename: str): 1810 | # 对所有符号进行整理,下标即最终的符号ID 1811 | symbols = [kEofSymbol] 1812 | tmp = list(analyzer.document().terminals()) 1813 | tmp.sort(key=lambda s: s.id()) 1814 | symbols.extend(tmp) 1815 | token_cnt = len(symbols) 1816 | tmp = list(analyzer.document().non_terminals()) 1817 | tmp.sort(key=lambda s: s.id()) 1818 | symbols.extend(tmp) 1819 | 1820 | # 生成token信息 1821 | token_info = [] 1822 | for i in range(0, token_cnt): 1823 | assert symbols[i].type() == SYMBOL_TERMINAL or symbols[i].type() == SYMBOL_EOF 1824 | token_info.append({ 1825 | "id": i, 1826 | "c_name": "_" if symbols[i] == kEofSymbol else symbols[i].id(), 1827 | "raw": symbols[i] 1828 | }) 1829 | 1830 | # 生成映射表 1831 | symbol_mapping = {} 1832 | for i in range(0, len(symbols)): 1833 | s = symbols[i] 1834 | symbol_mapping[s] = i 1835 | 1836 | # 生成产生式信息 1837 | production_info = [] 1838 | for i in range(0, len(analyzer.document().productions())): 1839 | p = analyzer.document().productions()[i] 1840 | assert i == p.index() 1841 | production_info.append({ 1842 | "symbol": symbol_mapping[p.left()], 1843 | "count": len(p), 1844 | "raw": p 1845 | }) 1846 | 1847 | # 生成动作表 1848 | actions = [] 1849 | state_remap_id_to_state_id = {} 1850 | state_id_to_state_remap_id = {} 1851 | offset = 0 1852 | state_cnt = 0 1853 | for i in range(0, analyzer.max_state() + 1): 1854 | empty_state = True 1855 | if i in analyzer.actions()[kEofSymbol]: 1856 | empty_state = False 1857 | else: 1858 | for s in analyzer.document().symbols(): 1859 | if i in analyzer.actions()[s]: 1860 | empty_state = False 1861 | break 1862 | if empty_state: 1863 | offset += 1 1864 | else: 1865 | assert i not in state_id_to_state_remap_id 1866 | assert (i - offset) not in state_remap_id_to_state_id 1867 | state_id_to_state_remap_id[i] = i - offset 1868 | state_remap_id_to_state_id[i - offset] = i 1869 | state_cnt += 1 1870 | for i in range(0, state_cnt): 1871 | action = [] 1872 | for j in range(0, len(symbols)): 1873 | s = symbols[j] 1874 | one_action = [0, 0] 1875 | state = state_remap_id_to_state_id[i] 1876 | if state in analyzer.actions()[s]: 1877 | act = analyzer.actions()[s][state] 1878 | one_action[0] = act.action() 1879 | if act.action() == ACTION_GOTO: 1880 | one_action[1] = state_id_to_state_remap_id[act.arg().state()] 1881 | elif act.action() == ACTION_REDUCE: 1882 | assert analyzer.document().productions()[act.arg().index()] == act.arg() 1883 | one_action[1] = act.arg().index() 1884 | action.append(one_action) 1885 | actions.append(action) 1886 | 1887 | # 生成C++类型 1888 | token_types = [] 1889 | need_monostate = False 1890 | for s in analyzer.document().terminals(): 1891 | if s.replace() is None: 1892 | need_monostate = True 1893 | else: 1894 | assert s.replace().strip() == s.replace() 1895 | assert s.replace() != "std::monostate" 1896 | if s.replace() not in token_types: 1897 | token_types.append(s.replace()) 1898 | token_types.sort() 1899 | if need_monostate or len(token_types) == 0: 1900 | token_types.insert(0, "std::monostate") 1901 | production_types = [] 1902 | need_monostate = False 1903 | for s in analyzer.document().non_terminals(): 1904 | if s.replace() is None: 1905 | need_monostate = True 1906 | else: 1907 | assert s.replace().strip() == s.replace() 1908 | assert s.replace() != "std::monostate" 1909 | if s.replace() not in production_types: 1910 | production_types.append(s.replace()) 1911 | production_types.sort() 1912 | if need_monostate or len(production_types) == 0: 1913 | production_types.insert(0, "std::monostate") 1914 | 1915 | # generate the context 1916 | args = analyzer.document().generator_args() or {} 1917 | context = { 1918 | "namespace": args.get("namespace", None), 1919 | "class_name": args.get("class_name", "Parser"), 1920 | "includes": args.get("includes", []), 1921 | "symbols": symbols, 1922 | "token_info": token_info, 1923 | "token_types": token_types, 1924 | "production_info": production_info, 1925 | "production_types": production_types, 1926 | "actions": actions, 1927 | "header_filename": header_filename, 1928 | } 1929 | 1930 | header_src = render_template(header_template, **context) 1931 | source_src = render_template(source_template, **context) 1932 | return header_src, source_src 1933 | 1934 | # ---------------------------------------- Main ---------------------------------------- 1935 | 1936 | 1937 | CPP_HEADER_TPL = """/** 1938 | * @file 1939 | * @date {% datetime.date.today() %} 1940 | * 1941 | * Auto generated code by 9chu/parser_gen. 1942 | */ 1943 | #pragma once 1944 | #include 1945 | #include 1946 | #include 1947 | 1948 | {% for f in includes %} 1949 | #include "{% f %}" 1950 | {% end %} 1951 | 1952 | {% if namespace is None %} 1953 | // namespace { 1954 | {% else %} 1955 | namespace {% namespace %} 1956 | { 1957 | {% end %} 1958 | class {% class_name %} 1959 | { 1960 | public: 1961 | enum class ParseResult 1962 | { 1963 | Undecided = 0, 1964 | Accepted = 1, 1965 | Rejected = 2, 1966 | }; 1967 | 1968 | enum class TokenTypes 1969 | { 1970 | {% for t in token_info %} 1971 | {% t["c_name"] %} = {% t["id"] %}, 1972 | {% end %} 1973 | }; 1974 | 1975 | using TokenValues = std::variant< 1976 | {% for i in range(0, len(token_types)) %} 1977 | {% token_types[i] %}{% if i != len(token_types) - 1 %},{% end %} 1978 | {% end %} 1979 | >; 1980 | 1981 | using ProductionValues = std::variant< 1982 | {% for i in range(0, len(production_types)) %} 1983 | {% production_types[i] %}{% if i != len(production_types) - 1 %},{% end %} 1984 | {% end %} 1985 | >; 1986 | 1987 | using UnionValues = std::variant; 1988 | 1989 | public: 1990 | {% class_name %}(); 1991 | 1992 | public: 1993 | ParseResult operator()(TokenTypes token, const TokenValues& value); 1994 | void Reset()noexcept; 1995 | 1996 | {% if production_info[0]["raw"].left().replace() is not None %} 1997 | const {% production_info[0]["raw"].left().replace() %}& Result()const noexcept { return m_stResult; } 1998 | {% production_info[0]["raw"].left().replace() %}& Result()noexcept { return m_stResult; } 1999 | {% end %} 2000 | 2001 | private: 2002 | std::vector m_stStack; 2003 | std::vector m_stValueStack; 2004 | 2005 | {% if production_info[0]["raw"].left().replace() is not None %} 2006 | {% production_info[0]["raw"].left().replace() %} m_stResult {}; 2007 | {% end %} 2008 | }; 2009 | {% if namespace is None %} 2010 | // } 2011 | {% else %} 2012 | } 2013 | {% end %} 2014 | """ 2015 | 2016 | CPP_SOURCE_TPL = """/** 2017 | * @file 2018 | * @date {% datetime.date.today() %} 2019 | * 2020 | * Auto generated code by 9chu/parser_gen. 2021 | */ 2022 | #include "{% header_filename %}" 2023 | 2024 | #include 2025 | 2026 | {% if namespace is not None %} 2027 | using namespace {% namespace %}; 2028 | {% end %} 2029 | 2030 | #define ACTION_ERROR 0 2031 | #define ACTION_ACCEPT 1 2032 | #define ACTION_GOTO 2 2033 | #define ACTION_REDUCE 3 2034 | 2035 | namespace { 2036 | {% for idx in range(0, len(production_info)) %} 2037 | {% class_name %}::ProductionValues Reduce{% idx %}(std::vector<{% class_name %}::UnionValues>& stack_) 2038 | { 2039 | // binding values 2040 | assert(stack_.size() >= {% len(production_info[idx]["raw"]) %}); 2041 | {% for pos in production_info[idx]["raw"].binding() %} 2042 | auto {% production_info[idx]["raw"].binding()[pos] %} = 2043 | {% if production_info[idx]["raw"][pos].type() == 2 %} 2044 | std::move(std::get<{% production_info[idx]["raw"][pos].replace() %}>( 2045 | std::get<{% class_name %}::ProductionValues>( 2046 | std::move(stack_[stack_.size() - {% len(production_info[idx]["raw"]) - pos %}]) 2047 | ) 2048 | )); 2049 | {% else %} 2050 | std::move(std::get<{% production_info[idx]["raw"][pos].replace() %}>( 2051 | std::get<{% class_name %}::TokenValues>( 2052 | std::move(stack_[stack_.size() - {% len(production_info[idx]["raw"]) - pos %}]) 2053 | ) 2054 | ));{% end %} 2055 | {% end %} 2056 | 2057 | // user code 2058 | {% if production_info[idx]["raw"].left().replace() is not None %} 2059 | auto ret = [&]() { 2060 | {% production_info[idx]["raw"].replace().strip() %} 2061 | }(); 2062 | return {% class_name %}::ProductionValues { std::move(ret) }; 2063 | {% else %} 2064 | {% if production_info[idx]["raw"].replace() is not None %} 2065 | {% production_info[idx]["raw"].replace() %} 2066 | {% end %} 2067 | return {% class_name %}::ProductionValues {}; 2068 | {% end %} 2069 | } 2070 | 2071 | {% end %} 2072 | } 2073 | 2074 | using ReduceFunction = {% class_name %}::ProductionValues(*)(std::vector<{% class_name %}::UnionValues>&); 2075 | 2076 | struct ProductionInfo 2077 | { 2078 | uint32_t NonTerminal; 2079 | uint32_t SymbolCount; 2080 | ReduceFunction Callback; 2081 | }; 2082 | 2083 | struct ActionInfo 2084 | { 2085 | uint8_t Action; 2086 | uint32_t Arg; 2087 | }; 2088 | 2089 | static const ProductionInfo kProductions[{% len(production_info) %}] = { 2090 | {% for i in range(0, len(production_info)) %} 2091 | { {% production_info[i]["symbol"] %}, {% production_info[i]["count"] %}, ::Reduce{% i %} }, 2092 | {% end %} 2093 | }; 2094 | 2095 | static const ActionInfo kActions[{% len(actions) %}][{% len(symbols) %}] = { 2096 | {% for action in actions %} 2097 | { {% for act in action %}{ {% act[0] %}, {% act[1] %} },{% end %} }, 2098 | {% end %} 2099 | }; 2100 | 2101 | {% class_name %}::{% class_name %}() 2102 | { 2103 | Reset(); 2104 | } 2105 | 2106 | {% class_name %}::ParseResult {% class_name %}::operator()(TokenTypes token, const TokenValues& value) 2107 | { 2108 | while (true) 2109 | { 2110 | assert(!m_stStack.empty()); 2111 | assert(static_cast(token) < {% len(token_info) %}); 2112 | 2113 | const ActionInfo& act = kActions[m_stStack.back()][static_cast(token)]; 2114 | if (act.Action == ACTION_ACCEPT) 2115 | { 2116 | {% if production_info[0]["raw"].left().replace() is not None %} 2117 | // store the result 2118 | assert(!m_stValueStack.empty()); 2119 | m_stResult = 2120 | std::move(std::get<{% production_info[0]["raw"].left().replace() %}>( 2121 | std::get(std::move(m_stValueStack.back())) 2122 | )); 2123 | {% end %} 2124 | 2125 | Reset(); 2126 | return ParseResult::Accepted; 2127 | } 2128 | else if (act.Action == ACTION_ERROR) 2129 | { 2130 | Reset(); 2131 | return ParseResult::Rejected; 2132 | } 2133 | else if (act.Action == ACTION_GOTO) 2134 | { 2135 | m_stStack.push_back(static_cast(token)); 2136 | m_stStack.push_back(act.Arg); 2137 | assert(m_stStack.back() < {% len(actions) %}); 2138 | 2139 | m_stValueStack.push_back(value); 2140 | } 2141 | else 2142 | { 2143 | assert(act.Action == ACTION_REDUCE); 2144 | assert(act.Arg < {% len(production_info) %}); 2145 | 2146 | const ProductionInfo& info = kProductions[act.Arg]; 2147 | auto val = info.Callback(m_stValueStack); 2148 | 2149 | assert(m_stStack.size() >= info.SymbolCount * 2); 2150 | m_stStack.resize(m_stStack.size() - info.SymbolCount * 2); 2151 | 2152 | assert(m_stValueStack.size() >= info.SymbolCount); 2153 | m_stValueStack.resize(m_stValueStack.size() - info.SymbolCount); 2154 | 2155 | m_stValueStack.emplace_back(std::move(val)); 2156 | assert(!m_stStack.empty()); 2157 | 2158 | const ActionInfo& act2 = kActions[m_stStack.back()][info.NonTerminal]; 2159 | if (act2.Action == ACTION_GOTO) 2160 | { 2161 | m_stStack.push_back(info.NonTerminal); 2162 | m_stStack.push_back(act2.Arg); 2163 | } 2164 | else 2165 | { 2166 | assert(false); 2167 | Reset(); 2168 | return ParseResult::Rejected; 2169 | } 2170 | 2171 | continue; 2172 | } 2173 | break; 2174 | } 2175 | 2176 | return ParseResult::Undecided; 2177 | } 2178 | 2179 | void {% class_name %}::Reset()noexcept 2180 | { 2181 | m_stStack.clear(); 2182 | m_stValueStack.clear(); 2183 | 2184 | // initial state 2185 | m_stStack.push_back(0); 2186 | } 2187 | """ 2188 | 2189 | 2190 | def main(): 2191 | parser = argparse.ArgumentParser(description="A LR(1)/LALR(1) parser generator for C++17.") 2192 | parser.add_argument("--header-file", type=str, help="Output header filename", default="Parser.hpp") 2193 | parser.add_argument("--source-file", type=str, help="Output source filename", default="Parser.cpp") 2194 | parser.add_argument("-o", "--output-dir", type=str, help="Output directory", default="./") 2195 | parser.add_argument("--header-template", type=str, help="User defined header template", default="") 2196 | parser.add_argument("--source-template", type=str, help="User defined source template", default="") 2197 | parser.add_argument("--lalr", action="store_true", help="Set to LALR(1) mode", default=False) 2198 | parser.add_argument("--print-actions", action="store_true", help="Print action table", default=False) 2199 | parser.add_argument("grammar", help="Grammar filename") 2200 | args = parser.parse_args() 2201 | 2202 | doc = GrammarDocument() 2203 | doc.parse(args.grammar) 2204 | 2205 | analyzer = GrammarAnalyzer(doc) 2206 | analyzer.process(GRAMMAR_MODE_LALR if args.lalr else GRAMMAR_MODE_LR1) 2207 | 2208 | if args.print_actions: 2209 | print(analyzer.printable_actions()) 2210 | 2211 | resolve_rr_cnt, resolve_sr_by_prec_cnt, resolve_sr_by_shift_cnt = analyzer.resolve_stat() 2212 | sys.stderr.write(f"Reduce/Reduce conflict resolved count: {resolve_rr_cnt}\n") 2213 | sys.stderr.write(f"Shift/Reduce conflict resolved count (by Operator Precedence): {resolve_sr_by_prec_cnt}\n") 2214 | sys.stderr.write(f"Shift/Reduce conflict resolved count (by Shift Priority): {resolve_sr_by_shift_cnt}\n") 2215 | 2216 | header_tpl_content = CPP_HEADER_TPL 2217 | source_tpl_content = CPP_SOURCE_TPL 2218 | if args.header_template != "": 2219 | with open(args.header_template, "r", encoding="utf-8") as f: 2220 | header_tpl_content = f.read() 2221 | if args.source_template != "": 2222 | with open(args.source_template, "r", encoding="utf-8") as f: 2223 | source_tpl_content = f.read() 2224 | header_output, source_output = generate_code(header_tpl_content, source_tpl_content, analyzer, args.header_file) 2225 | with open(os.path.join(args.output_dir, args.header_file), "w", encoding="utf-8") as f: 2226 | f.write(header_output) 2227 | with open(os.path.join(args.output_dir, args.source_file), "w", encoding="utf-8") as f: 2228 | f.write(source_output) 2229 | 2230 | 2231 | if __name__ == "__main__": 2232 | main() 2233 | --------------------------------------------------------------------------------