├── .gitignore ├── MicroCompiler ├── LL1Parser.py ├── Lookahead │ ├── EOF.py │ ├── Epsilon.py │ ├── FirstPlusSet.py │ ├── FirstPlusSet_test.py │ ├── FirstSet.py │ ├── FirstSet_test.py │ ├── FollowSet.py │ ├── FollowSet_test.py │ ├── NonTerminal.py │ ├── SymbolSet.py │ ├── Terminal.py │ ├── TranslationTable.py │ └── __init__.py ├── ParserGenerator │ ├── Generator.py │ ├── Generator_test.py │ ├── Lexeme.py │ ├── Lexer.py │ ├── Lexer_test.py │ ├── Parser.py │ ├── Parser_test.py │ ├── __init__.py │ ├── calculator.mbnf │ └── sample.mbnf ├── Productions.py ├── Productions_test.py ├── SkeletonParser.py ├── SkeletonParser_test.py ├── __init__.py ├── abstract_syntax_tree │ ├── __init__.py │ ├── abstract_syntax_tree.py │ ├── abstract_syntax_tree_test.py │ └── node.py ├── lexer │ ├── README.md │ ├── __init__.py │ ├── demo.py │ ├── lexer.py │ └── user_level_lexer_define.py ├── output.yaml ├── parser_builder.py ├── parser_evaluator.py ├── parser_evaluator_builder.py ├── postfix_expression │ ├── __init__.py │ ├── evaluator.py │ └── operator.py └── sample.yaml ├── README.md ├── demo ├── __init__.py ├── arithmetic_calculator │ ├── README.md │ ├── arithmetic_calculator.py │ ├── calculator.graphml │ ├── calculator.mbnf │ ├── calculator.png │ ├── calculator.yaml │ ├── ll1_grammer_generator.py │ ├── main.py │ ├── main_with_lexer.py │ ├── output.yaml │ ├── terminal_user_interface.py │ ├── test_cases.py │ ├── tests.py │ └── user_level_parser.py └── template_engine │ ├── .gitignore │ ├── README.md │ ├── __init__.py │ ├── render_engine.py │ ├── render_with_string.py │ ├── render_with_tokens.py │ ├── syntax.mbnf │ ├── user_level_lexer_define.py │ └── user_level_parser.py ├── docs └── README.md ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | ### JetBrains template 106 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 107 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 108 | 109 | # User-specific stuff: 110 | .idea/**/workspace.xml 111 | .idea/**/tasks.xml 112 | .idea/dictionaries 113 | 114 | # Sensitive or high-churn files: 115 | .idea/**/dataSources/ 116 | .idea/**/dataSources.ids 117 | .idea/**/dataSources.xml 118 | .idea/**/dataSources.local.xml 119 | .idea/**/sqlDataSources.xml 120 | .idea/**/dynamic.xml 121 | .idea/**/uiDesigner.xml 122 | 123 | # Gradle: 124 | .idea/**/gradle.xml 125 | .idea/**/libraries 126 | 127 | # CMake 128 | cmake-build-debug/ 129 | 130 | # Mongo Explorer plugin: 131 | .idea/**/mongoSettings.xml 132 | 133 | ## File-based project format: 134 | *.iws 135 | 136 | ## Plugin-specific files: 137 | 138 | # IntelliJ 139 | out/ 140 | 141 | # mpeltonen/sbt-idea plugin 142 | .idea_modules/ 143 | 144 | # JIRA plugin 145 | atlassian-ide-plugin.xml 146 | 147 | # Cursive Clojure plugin 148 | .idea/replstate.xml 149 | 150 | # Crashlytics plugin (for Android Studio and IntelliJ) 151 | com_crashlytics_export_strings.xml 152 | crashlytics.properties 153 | crashlytics-build.properties 154 | fabric.properties 155 | ### Emacs template 156 | # -*- mode: gitignore; -*- 157 | *~ 158 | \#*\# 159 | /.emacs.desktop 160 | /.emacs.desktop.lock 161 | *.elc 162 | auto-save-list 163 | tramp 164 | .\#* 165 | 166 | # Org-mode 167 | .org-id-locations 168 | *_archive 169 | 170 | # flymake-mode 171 | *_flymake.* 172 | 173 | # eshell files 174 | /eshell/history 175 | /eshell/lastdir 176 | 177 | # elpa packages 178 | /elpa/ 179 | 180 | # reftex files 181 | *.rel 182 | 183 | # AUCTeX auto folder 184 | /auto/ 185 | 186 | # cask packages 187 | .cask/ 188 | dist/ 189 | 190 | # Flycheck 191 | flycheck_*.el 192 | 193 | # server auth directory 194 | /server/ 195 | 196 | # projectiles files 197 | .projectile 198 | 199 | # directory configuration 200 | .dir-locals.el 201 | ### Vim template 202 | # Swap 203 | [._]*.s[a-v][a-z] 204 | [._]*.sw[a-p] 205 | [._]s[a-v][a-z] 206 | [._]sw[a-p] 207 | 208 | # Session 209 | Session.vim 210 | 211 | # Temporary 212 | .netrwhist 213 | *~ 214 | # Auto-generated tag files 215 | tags 216 | 217 | .idea 218 | 219 | -------------------------------------------------------------------------------- /MicroCompiler/LL1Parser.py: -------------------------------------------------------------------------------- 1 | class LL1Parser: 2 | def __init__(self, translation_table, production, lexer_list): 3 | self.translation_table = translation_table 4 | self.lexer_list = lexer_list 5 | 6 | def match(self): 7 | for lexer in self.lexer_list: 8 | pass 9 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/EOF.py: -------------------------------------------------------------------------------- 1 | class EOF: 2 | def __str__(self): 3 | return "" 4 | 5 | @property 6 | def value(self): 7 | return "" 8 | 9 | def __hash__(self): 10 | return hash("") 11 | 12 | def __eq__(self, other): 13 | if not isinstance(other, self.__class__): 14 | return False 15 | return True 16 | 17 | def __repr__(self): 18 | return "{}()".format(self.__class__.__name__) 19 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/Epsilon.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.Lookahead.Terminal import Terminal 2 | 3 | 4 | class Epsilon: 5 | def __str__(self): 6 | return "ϵ" 7 | 8 | @property 9 | def value(self): 10 | return "ϵ" 11 | 12 | def __hash__(self): 13 | return hash("ϵ") 14 | 15 | def __eq__(self, other): 16 | if not isinstance(other, self.__class__): 17 | return False 18 | return True 19 | 20 | def __repr__(self): 21 | return "{}()".format(self.__class__.__name__) 22 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FirstPlusSet.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.Lookahead.FirstSet import FirstSet 2 | from MicroCompiler.Lookahead.FollowSet import FollowSet 3 | 4 | 5 | class FirstPlusSet: 6 | def __init__(self, production): 7 | self.first_set = None 8 | self.follow_set = None 9 | self.first_set_mapping = None 10 | self.production = production 11 | 12 | self.first_plus_set = {} 13 | self.first_plus_set_mapping = {} 14 | 15 | def compute(self): 16 | if self.first_set is None: 17 | fs = FirstSet(self.production) 18 | fs.compute() 19 | self.first_set = fs.first_set 20 | self.first_set_mapping = fs.first_set_mapping 21 | 22 | if self.follow_set is None: 23 | fs = FollowSet(self.production, self.first_set) 24 | fs.compute() 25 | self.follow_set = fs.follow_set 26 | 27 | for lhs_symbol in self.production: 28 | productions = self.production[lhs_symbol] 29 | for production_index, production in enumerate(productions): 30 | symbol_set = self.first_set_mapping[lhs_symbol][production_index] 31 | 32 | self.first_plus_set_mapping.setdefault(lhs_symbol, {}) 33 | self.first_plus_set_mapping[lhs_symbol].setdefault( 34 | production_index, set() 35 | ) 36 | first_plus_set = self.first_plus_set_mapping[lhs_symbol][ 37 | production_index 38 | ] 39 | if symbol_set.include_epsilon: 40 | first_plus_set.update(symbol_set.remove_epsilon()) 41 | first_plus_set.update(self.follow_set[lhs_symbol]) 42 | else: 43 | first_plus_set.update(symbol_set) 44 | 45 | self.first_plus_set.setdefault(lhs_symbol, {}) 46 | for symbol in first_plus_set: 47 | if symbol in self.first_plus_set[lhs_symbol]: 48 | msg = "Lookahead {} index {} already exists in {}" 49 | raise ValueError( 50 | msg.format( 51 | symbol, 52 | production_index, 53 | self.first_plus_set[lhs_symbol], 54 | ) 55 | ) 56 | self.first_plus_set[lhs_symbol][symbol] = production_index 57 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FirstPlusSet_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pprint 3 | 4 | from MicroCompiler.Productions import Productions 5 | from MicroCompiler.Lookahead.Epsilon import Epsilon 6 | from MicroCompiler.Lookahead.EOF import EOF 7 | from MicroCompiler.Lookahead.FirstPlusSet import FirstPlusSet 8 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 9 | from MicroCompiler.Lookahead.Terminal import CHARACTER 10 | from MicroCompiler.Lookahead.Terminal import Terminal 11 | 12 | 13 | class TestFirstPlusSet(unittest.TestCase): 14 | def test_conception(self): 15 | statement = NonTerminal("Statement") 16 | expression = NonTerminal("Expression") 17 | semicolon = Terminal(CHARACTER, ";") 18 | plus = Terminal(CHARACTER, "+") 19 | minus = Terminal(CHARACTER, "-") 20 | 21 | production = Productions( 22 | {statement: [[expression, semicolon]], expression: [[plus], [minus]]} 23 | ) 24 | 25 | production.set_start_symbol(statement) 26 | 27 | fs = FirstPlusSet(production) 28 | fs.compute() 29 | 30 | real_result = fs.first_plus_set 31 | 32 | expect_result = { 33 | NonTerminal("Expression"): { 34 | Terminal(CHARACTER, "+"): 0, 35 | Terminal(CHARACTER, "-"): 1, 36 | }, 37 | NonTerminal("Statement"): { 38 | Terminal(CHARACTER, "+"): 0, 39 | Terminal(CHARACTER, "-"): 0, 40 | }, 41 | } 42 | 43 | self.assertEqual(real_result, expect_result) 44 | 45 | def test_epsilon(self): 46 | statement = NonTerminal("Statement") 47 | expression = NonTerminal("Expression") 48 | epsilon = Epsilon() 49 | semicolon = Terminal(CHARACTER, ";") 50 | plus = Terminal(CHARACTER, "+") 51 | minus = Terminal(CHARACTER, "-") 52 | 53 | production = Productions( 54 | { 55 | statement: [[expression, semicolon]], 56 | expression: [[plus], [minus], [epsilon]], 57 | } 58 | ) 59 | 60 | production.set_start_symbol(statement) 61 | 62 | fs = FirstPlusSet(production) 63 | fs.compute() 64 | 65 | real_result = fs.first_plus_set 66 | 67 | expect_result = { 68 | NonTerminal("Statement"): { 69 | Terminal(CHARACTER, "+"): 0, 70 | Terminal(CHARACTER, "-"): 0, 71 | Terminal(CHARACTER, ";"): 0, 72 | }, 73 | NonTerminal("Expression"): { 74 | Terminal(CHARACTER, "+"): 0, 75 | Terminal(CHARACTER, "-"): 1, 76 | Terminal(CHARACTER, ";"): 2, 77 | }, 78 | } 79 | 80 | self.assertEqual(real_result, expect_result) 81 | 82 | def test_real(self): 83 | """ 84 | Goal -> Expr ; 85 | Expr -> Term ExprTwo ; 86 | ExprTwo -> '+' Term ExprTwo 87 | | '-' Term ExprTwo 88 | | ϵ ; 89 | Term -> Factor TermTwo ; 90 | TermTwo -> '*' Factor TermTwo 91 | | '/' Factor TermTwo 92 | | ϵ ; 93 | Factor -> '(' Expr ')' 94 | | 'num' 95 | | 'name' ; 96 | """ 97 | 98 | """ 99 | Extended Backus-Naur form: 100 | 101 | Goal -> Expr 102 | Expr -> Term ExprTwo 103 | ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON 104 | Term -> Factor TermTwo 105 | TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON 106 | Factor -> ( Expr ) | num | name 107 | """ 108 | goal = NonTerminal("Goal") 109 | expr = NonTerminal("Expr") 110 | expr_two = NonTerminal("ExprTwo") 111 | term = NonTerminal("Term") 112 | term_two = NonTerminal("TermTwo") 113 | factor = NonTerminal("Factor") 114 | epsilon = Epsilon() 115 | name = Terminal(CHARACTER, "name") 116 | num = Terminal(CHARACTER, "num") 117 | plus = Terminal(CHARACTER, "+") 118 | minus = Terminal(CHARACTER, "-") 119 | div = Terminal(CHARACTER, "/") 120 | asteroid = Terminal(CHARACTER, "*") 121 | open_parenthesis = Terminal(CHARACTER, "(") 122 | close_parenthesis = Terminal(CHARACTER, ")") 123 | eof = EOF() 124 | 125 | production = Productions( 126 | { 127 | goal: [[expr]], 128 | expr: [[term, expr_two]], 129 | expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]], 130 | term: [[factor, term_two]], 131 | term_two: [ 132 | [asteroid, factor, term_two], 133 | [div, factor, term_two], 134 | [epsilon], 135 | ], 136 | factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]], 137 | } 138 | ) 139 | 140 | production.set_start_symbol(goal) 141 | 142 | fs = FirstPlusSet(production) 143 | fs.compute() 144 | 145 | real_result = fs.first_plus_set 146 | 147 | expect_result = { 148 | NonTerminal("Goal"): { 149 | Terminal(CHARACTER, "name"): 0, 150 | Terminal(CHARACTER, "num"): 0, 151 | Terminal(CHARACTER, "("): 0, 152 | }, 153 | NonTerminal("Expr"): { 154 | Terminal(CHARACTER, "name"): 0, 155 | Terminal(CHARACTER, "num"): 0, 156 | Terminal(CHARACTER, "("): 0, 157 | }, 158 | NonTerminal("ExprTwo"): { 159 | EOF(): 2, 160 | Terminal(CHARACTER, "+"): 0, 161 | Terminal(CHARACTER, "-"): 1, 162 | Terminal(CHARACTER, ")"): 2, 163 | }, 164 | NonTerminal("Term"): { 165 | Terminal(CHARACTER, "name"): 0, 166 | Terminal(CHARACTER, "num"): 0, 167 | Terminal(CHARACTER, "("): 0, 168 | }, 169 | NonTerminal("TermTwo"): { 170 | EOF(): 2, 171 | Terminal(CHARACTER, "+"): 2, 172 | Terminal(CHARACTER, "-"): 2, 173 | Terminal(CHARACTER, "/"): 1, 174 | Terminal(CHARACTER, "*"): 0, 175 | Terminal(CHARACTER, ")"): 2, 176 | }, 177 | NonTerminal("Factor"): { 178 | Terminal(CHARACTER, "name"): 2, 179 | Terminal(CHARACTER, "num"): 1, 180 | Terminal(CHARACTER, "("): 0, 181 | }, 182 | } 183 | 184 | self.assertEqual(real_result, expect_result) 185 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FirstSet.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from MicroCompiler.Productions import Productions 4 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet 5 | 6 | 7 | class FirstSet: 8 | def __init__(self, production: Productions): 9 | self.first_set = {} 10 | self.first_set_table = {} 11 | self.first_set_mapping = {} 12 | self.production = production 13 | 14 | def compute(self): 15 | # compute all terminal's first-set first 16 | for symbol in self.production.terminals: 17 | self.first_set[symbol] = SymbolSet({symbol}) 18 | 19 | epsilon = self.production.epsilon 20 | self.first_set[epsilon] = SymbolSet({epsilon}) 21 | 22 | eof = self.production.eof 23 | self.first_set[eof] = SymbolSet({eof}) 24 | 25 | # init non-terminal's first-set to empty 26 | for symbol in self.production.non_terminals: 27 | self.first_set[symbol] = SymbolSet() 28 | 29 | old_first_set = copy.deepcopy(self.first_set) 30 | 31 | while True: 32 | for symbol in self.production.non_terminals: 33 | self.compute_symbol(symbol) 34 | if self.first_set == old_first_set: 35 | break 36 | else: 37 | old_first_set = copy.deepcopy(self.first_set) 38 | 39 | def compute_symbol(self, lsh_symbol): 40 | productions = self.production[lsh_symbol] 41 | 42 | for production_index, production in enumerate(productions): 43 | rhs = SymbolSet() 44 | for symbol_index, rhs_symbol in enumerate(production): 45 | if symbol_index != len(production) - 1: 46 | rhs.update(self.first_set[rhs_symbol].remove_epsilon()) 47 | else: 48 | # keep epsilon if this is the last symbol in the production 49 | rhs.update(self.first_set[rhs_symbol]) 50 | 51 | if not self.first_set[rhs_symbol].include_epsilon: 52 | break 53 | 54 | self.first_set[lsh_symbol].update(rhs) 55 | 56 | self.first_set_mapping.setdefault(lsh_symbol, {}) 57 | self.first_set_mapping[lsh_symbol][production_index] = rhs 58 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FirstSet_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pprint 3 | 4 | from MicroCompiler.Productions import Productions 5 | from MicroCompiler.Lookahead.Epsilon import Epsilon 6 | from MicroCompiler.Lookahead.EOF import EOF 7 | from MicroCompiler.Lookahead.FirstSet import FirstSet 8 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 9 | from MicroCompiler.Lookahead.Terminal import CHARACTER 10 | from MicroCompiler.Lookahead.Terminal import Terminal 11 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet 12 | 13 | 14 | class TestFirstSet(unittest.TestCase): 15 | def test_conception(self): 16 | statement = NonTerminal("Statement") 17 | expression = NonTerminal("Expression") 18 | semicolon = Terminal(CHARACTER, ";") 19 | plus = Terminal(CHARACTER, "+") 20 | minus = Terminal(CHARACTER, "-") 21 | 22 | production = Productions( 23 | {statement: [[expression, semicolon]], expression: [[plus], [minus]]} 24 | ) 25 | 26 | production.set_start_symbol(statement) 27 | 28 | fs = FirstSet(production) 29 | fs.compute() 30 | print(fs.first_set) 31 | 32 | def test_epsilon(self): 33 | statement = NonTerminal("Statement") 34 | expression = NonTerminal("Expression") 35 | epsilon = Epsilon() 36 | semicolon = Terminal(CHARACTER, ";") 37 | plus = Terminal(CHARACTER, "+") 38 | minus = Terminal(CHARACTER, "-") 39 | 40 | production = Productions( 41 | { 42 | statement: [[expression, semicolon]], 43 | expression: [[plus], [minus], [epsilon]], 44 | } 45 | ) 46 | 47 | production.set_start_symbol(statement) 48 | 49 | fs = FirstSet(production) 50 | fs.compute() 51 | print(fs.first_set) 52 | 53 | print(fs.first_set_table) 54 | print(fs.first_set_mapping) 55 | 56 | def test_real(self): 57 | """ 58 | Goal -> Expr ; 59 | Expr -> Term ExprTwo ; 60 | ExprTwo -> '+' Term ExprTwo 61 | | '-' Term ExprTwo 62 | | ϵ ; 63 | Term -> Factor TermTwo ; 64 | TermTwo -> '*' Factor TermTwo 65 | | '/' Factor TermTwo 66 | | ϵ ; 67 | Factor -> '(' Expr ')' 68 | | 'num' 69 | | 'name' ; 70 | """ 71 | 72 | """ 73 | Extended Backus-Naur form: 74 | 75 | Goal -> Expr 76 | Expr -> Term ExprTwo 77 | ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON 78 | Term -> Factor TermTwo 79 | TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON 80 | Factor -> ( Expr ) | num | name 81 | """ 82 | goal = NonTerminal("Goal") 83 | expr = NonTerminal("Expr") 84 | expr_two = NonTerminal("ExprTwo") 85 | term = NonTerminal("Term") 86 | term_two = NonTerminal("TermTwo") 87 | factor = NonTerminal("Factor") 88 | epsilon = Epsilon() 89 | name = Terminal(CHARACTER, "name") 90 | num = Terminal(CHARACTER, "num") 91 | plus = Terminal(CHARACTER, "+") 92 | minus = Terminal(CHARACTER, "-") 93 | div = Terminal(CHARACTER, "/") 94 | asteroid = Terminal(CHARACTER, "*") 95 | open_parenthesis = Terminal(CHARACTER, "(") 96 | close_parenthesis = Terminal(CHARACTER, ")") 97 | eof = EOF() 98 | 99 | production = Productions( 100 | { 101 | goal: [[expr]], 102 | expr: [[term, term_two]], 103 | expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]], 104 | term: [[factor, term_two]], 105 | term_two: [ 106 | [asteroid, factor, term_two], 107 | [div, factor, term_two], 108 | [epsilon], 109 | ], 110 | factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]], 111 | } 112 | ) 113 | 114 | production.set_start_symbol(goal) 115 | 116 | fs = FirstSet(production) 117 | fs.compute() 118 | real_result = fs.first_set 119 | 120 | expect_result = { 121 | eof: SymbolSet({eof}), 122 | plus: SymbolSet({plus}), 123 | minus: SymbolSet({minus}), 124 | epsilon: SymbolSet({epsilon}), 125 | asteroid: SymbolSet({asteroid}), 126 | div: SymbolSet({div}), 127 | open_parenthesis: SymbolSet({open_parenthesis}), 128 | close_parenthesis: SymbolSet({close_parenthesis}), 129 | num: SymbolSet({num}), 130 | name: SymbolSet({name}), 131 | expr_two: SymbolSet({plus, minus, epsilon}), 132 | term_two: SymbolSet({asteroid, div, epsilon}), 133 | factor: SymbolSet({open_parenthesis, num, name}), 134 | term: SymbolSet({open_parenthesis, num, name}), 135 | expr: SymbolSet({open_parenthesis, num, name}), 136 | goal: SymbolSet({open_parenthesis, num, name}), 137 | } 138 | 139 | # pprint.pprint(real_result) 140 | 141 | self.maxDiff = None 142 | self.assertEqual(real_result, expect_result) 143 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FollowSet.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 4 | from MicroCompiler.Productions import Productions 5 | from MicroCompiler.Lookahead.EOF import EOF 6 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet 7 | 8 | 9 | class FollowSet: 10 | def __init__(self, production: Productions, first_set): 11 | self.production = production 12 | self.first_set = first_set 13 | self.follow_set = {} 14 | self.follow_set_table = {} 15 | 16 | def compute(self): 17 | # init the follow_set 18 | for symbol in self.production.non_terminals: 19 | self.follow_set[symbol] = SymbolSet() 20 | 21 | self.follow_set[self.production.start_symbol] = {EOF()} 22 | 23 | old_follow_set = copy.deepcopy(self.follow_set) 24 | 25 | while True: 26 | for symbol in self.production: 27 | self.compute_symbol(symbol) 28 | 29 | if old_follow_set == self.follow_set: 30 | break 31 | else: 32 | old_follow_set = copy.deepcopy(self.follow_set) 33 | 34 | def compute_symbol(self, lhs_symbol): 35 | production_set = self.production[lhs_symbol] 36 | for production in production_set: 37 | trailer = self.follow_set[lhs_symbol] 38 | for rhs_symbol in reversed(production): 39 | if isinstance(rhs_symbol, NonTerminal): 40 | self.follow_set[rhs_symbol] = self.follow_set[rhs_symbol] | trailer 41 | 42 | if self.first_set[rhs_symbol].include_epsilon: 43 | trailer = trailer | self.first_set[rhs_symbol].remove_epsilon() 44 | else: 45 | trailer = self.first_set[rhs_symbol] 46 | else: 47 | trailer = SymbolSet({rhs_symbol}) 48 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/FollowSet_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pprint 3 | 4 | from MicroCompiler.Productions import Productions 5 | from MicroCompiler.Lookahead.Epsilon import Epsilon 6 | from MicroCompiler.Lookahead.EOF import EOF 7 | from MicroCompiler.Lookahead.FirstSet import FirstSet 8 | from MicroCompiler.Lookahead.FollowSet import FollowSet 9 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 10 | from MicroCompiler.Lookahead.Terminal import CHARACTER 11 | from MicroCompiler.Lookahead.Terminal import Terminal 12 | 13 | 14 | class TestFollowSet(unittest.TestCase): 15 | def test_conception(self): 16 | statement = NonTerminal("Statement") 17 | expression = NonTerminal("Expression") 18 | semicolon = Terminal(CHARACTER, ";") 19 | plus = Terminal(CHARACTER, "+") 20 | minus = Terminal(CHARACTER, "-") 21 | 22 | production = Productions( 23 | {statement: [[expression, semicolon]], expression: [[plus], [minus]]} 24 | ) 25 | 26 | production.set_start_symbol(statement) 27 | 28 | fs = FirstSet(production) 29 | fs.compute() 30 | first_set = fs.first_set 31 | 32 | fs = FollowSet(production, first_set) 33 | fs.compute() 34 | print(fs.follow_set) 35 | 36 | def test_epsilon(self): 37 | statement = NonTerminal("Statement") 38 | expression = NonTerminal("Expression") 39 | epsilon = Epsilon() 40 | semicolon = Terminal(CHARACTER, ";") 41 | plus = Terminal(CHARACTER, "+") 42 | minus = Terminal(CHARACTER, "-") 43 | 44 | production = Productions( 45 | { 46 | statement: [[expression, semicolon]], 47 | expression: [[plus], [minus], [epsilon]], 48 | } 49 | ) 50 | 51 | production.set_start_symbol(statement) 52 | 53 | fs = FirstSet(production) 54 | fs.compute() 55 | first_set = fs.first_set 56 | 57 | fs = FollowSet(production, first_set) 58 | fs.compute() 59 | print(fs.follow_set) 60 | 61 | def test_real(self): 62 | """ 63 | Goal -> Expr ; 64 | Expr -> Term ExprTwo ; 65 | ExprTwo -> '+' Term ExprTwo 66 | | '-' Term ExprTwo 67 | | ϵ ; 68 | Term -> Factor TermTwo ; 69 | TermTwo -> '*' Factor TermTwo 70 | | '/' Factor TermTwo 71 | | ϵ ; 72 | Factor -> '(' Expr ')' 73 | | 'num' 74 | | 'name' ; 75 | """ 76 | 77 | """ 78 | Extended Backus-Naur form: 79 | 80 | Goal -> Expr 81 | Expr -> Term ExprTwo 82 | ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON 83 | Term -> Factor TermTwo 84 | TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON 85 | Factor -> ( Expr ) | num | name 86 | """ 87 | goal = NonTerminal("Goal") 88 | expr = NonTerminal("Expr") 89 | expr_two = NonTerminal("ExprTwo") 90 | term = NonTerminal("Term") 91 | term_two = NonTerminal("TermTwo") 92 | factor = NonTerminal("Factor") 93 | epsilon = Epsilon() 94 | name = Terminal(CHARACTER, "name") 95 | num = Terminal(CHARACTER, "num") 96 | plus = Terminal(CHARACTER, "+") 97 | minus = Terminal(CHARACTER, "-") 98 | div = Terminal(CHARACTER, "/") 99 | asteroid = Terminal(CHARACTER, "*") 100 | open_parenthesis = Terminal(CHARACTER, "(") 101 | close_parenthesis = Terminal(CHARACTER, ")") 102 | eof = EOF() 103 | 104 | production = Productions( 105 | { 106 | goal: [[expr]], 107 | expr: [[term, expr_two]], 108 | expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]], 109 | term: [[factor, term_two]], 110 | term_two: [ 111 | [asteroid, factor, term_two], 112 | [div, factor, term_two], 113 | [epsilon], 114 | ], 115 | factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]], 116 | } 117 | ) 118 | 119 | production.set_start_symbol(goal) 120 | 121 | fs = FirstSet(production) 122 | fs.compute() 123 | first_set = fs.first_set 124 | 125 | fs = FollowSet(production, first_set) 126 | fs.compute() 127 | real_result = fs.follow_set 128 | 129 | expect_result = { 130 | NonTerminal("Goal"): {EOF()}, 131 | NonTerminal("Expr"): {Terminal(CHARACTER, ")"), EOF()}, 132 | NonTerminal("ExprTwo"): {Terminal(CHARACTER, ")"), EOF()}, 133 | NonTerminal("Term"): { 134 | EOF(), 135 | Terminal(CHARACTER, "+"), 136 | Terminal(CHARACTER, "-"), 137 | Terminal(CHARACTER, ")"), 138 | }, 139 | NonTerminal("TermTwo"): { 140 | EOF(), 141 | Terminal(CHARACTER, "+"), 142 | Terminal(CHARACTER, "-"), 143 | Terminal(CHARACTER, ")"), 144 | }, 145 | NonTerminal("Factor"): { 146 | EOF(), 147 | Terminal(CHARACTER, "+"), 148 | Terminal(CHARACTER, "-"), 149 | Terminal(CHARACTER, "/"), 150 | Terminal(CHARACTER, "*"), 151 | Terminal(CHARACTER, ")"), 152 | }, 153 | } 154 | 155 | self.assertEqual(real_result, expect_result) 156 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/NonTerminal.py: -------------------------------------------------------------------------------- 1 | class NonTerminal: 2 | def __init__(self, name): 3 | self.name = name 4 | 5 | super().__init__() 6 | 7 | @property 8 | def value(self): 9 | return self.name 10 | 11 | def __eq__(self, other): 12 | if not isinstance(other, self.__class__): 13 | return False 14 | if self.name == other.name: 15 | return True 16 | return False 17 | 18 | def __hash__(self): 19 | return hash(self.name) 20 | 21 | def __str__(self): 22 | return self.name 23 | 24 | def __repr__(self): 25 | return "{}('{}')".format(self.__class__.__name__, self.name) 26 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/SymbolSet.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.Lookahead.Epsilon import Epsilon 2 | 3 | 4 | class SymbolSet(set): 5 | def __init__(self, *args, **kwargs): 6 | super().__init__(*args, **kwargs) 7 | 8 | @property 9 | def include_epsilon(self): 10 | return any([i for i in self if isinstance(i, Epsilon)]) 11 | 12 | def remove_epsilon(self): 13 | return self.__class__({i for i in self if not isinstance(i, Epsilon)}) 14 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/Terminal.py: -------------------------------------------------------------------------------- 1 | CHARACTER = "CHARACTER" 2 | 3 | 4 | class Terminal: 5 | def __init__(self, type_=None, data=None): 6 | if type_ is None: 7 | type_ = CHARACTER 8 | self.type_ = type_ 9 | 10 | if data is None: 11 | raise ValueError("value can not be None") 12 | self.data = data 13 | 14 | @property 15 | def value(self): 16 | return str(self.data) 17 | 18 | def __eq__(self, other): 19 | if not isinstance(other, self.__class__): 20 | return False 21 | if self.type_ == other.type_ and self.data == other.data: 22 | return True 23 | return False 24 | 25 | def __hash__(self): 26 | return hash((self.type_, self.data)) 27 | 28 | def __str__(self): 29 | return "'{}'".format(self.data) 30 | 31 | def __repr__(self): 32 | return "{}({}, '{}')".format(self.__class__.__name__, self.type_, self.data) 33 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/TranslationTable.py: -------------------------------------------------------------------------------- 1 | class TranslationTable(dict): 2 | pass 3 | -------------------------------------------------------------------------------- /MicroCompiler/Lookahead/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/Lookahead/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Generator.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | from itertools import chain 3 | 4 | import yaml 5 | 6 | from MicroCompiler.ParserGenerator.Lexer import Lexer 7 | from MicroCompiler.ParserGenerator.Parser import Parser 8 | from MicroCompiler.Lookahead.FirstPlusSet import FirstPlusSet 9 | from MicroCompiler.Lookahead.EOF import EOF 10 | from MicroCompiler.Lookahead.Epsilon import Epsilon 11 | 12 | 13 | class Generator: 14 | def __init__(self, input_file): 15 | self.translate_table = {} 16 | self.structure = {} 17 | 18 | self.input_file = input_file 19 | 20 | def generate(self): 21 | with open(self.input_file) as fd: 22 | bnf_string = fd.read() 23 | 24 | lexer = Lexer() 25 | lexer.parse(bnf_string) 26 | 27 | parser = Parser(lexer.token_list) 28 | parser.parse() 29 | productions = parser.generate_production() 30 | 31 | error_marker = "--" 32 | 33 | self.structure = { 34 | "terminals": [i.value for i in productions.terminals], 35 | "non-terminals": [i.name for i in productions.non_terminals], 36 | "eof-marker": "", 37 | "error-marker": error_marker, 38 | "start-symbol": productions.start_symbol.value, 39 | } 40 | 41 | flat_productions = [] 42 | productions_mapping = [] 43 | for lhs_symbol in productions: 44 | production = productions[lhs_symbol] 45 | for k, v in enumerate(production): 46 | productions_mapping.append(frozenset({lhs_symbol.value, k})) 47 | flat_productions.append( 48 | { 49 | lhs_symbol.value: [i.value for i in v] 50 | if not isinstance(v[0], Epsilon) 51 | else [] 52 | } 53 | ) 54 | 55 | self.structure["productions"] = {k: v for k, v in enumerate(flat_productions)} 56 | productions_mapping = {v: k for k, v in enumerate(productions_mapping)} 57 | 58 | fs = FirstPlusSet(productions) 59 | fs.compute() 60 | 61 | first_set_plus = fs.first_plus_set 62 | 63 | # for non_terminal in fs.first_plus_set: 64 | # for k, v in fs.first_plus_set.items(): 65 | # if {non_terminal, k} 66 | 67 | for non_terminal in productions.non_terminals: 68 | for terminal in chain(productions.terminals, (EOF(),)): 69 | self.translate_table.setdefault(non_terminal.value, {}) 70 | 71 | if terminal not in first_set_plus[non_terminal]: 72 | # no such translation 73 | self.translate_table[non_terminal.value][ 74 | terminal.value 75 | ] = error_marker 76 | 77 | continue 78 | 79 | inner_index = first_set_plus[non_terminal][terminal] 80 | 81 | look_for = frozenset({non_terminal.value, inner_index}) 82 | if look_for not in productions_mapping: 83 | raise ValueError( 84 | "Terminal {} in {} not in mapping {}".format( 85 | terminal, non_terminal, productions_mapping 86 | ) 87 | ) 88 | 89 | self.translate_table[non_terminal.value][ 90 | terminal.value 91 | ] = productions_mapping[look_for] 92 | 93 | self.structure["table"] = self.translate_table 94 | 95 | return self.structure 96 | 97 | def write_yaml(self, output_file): 98 | with open(output_file, "w") as fd: 99 | yaml.dump(self.structure, fd) 100 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Generator_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pprint 3 | 4 | from MicroCompiler.ParserGenerator.Generator import Generator 5 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 6 | from MicroCompiler.Lookahead.Terminal import Terminal 7 | from MicroCompiler.Lookahead.Terminal import CHARACTER 8 | 9 | 10 | class TestGenerator(unittest.TestCase): 11 | def test_construct_simple(self): 12 | g = Generator("sample.mbnf") 13 | real_result = g.generate() 14 | 15 | expect_result = { 16 | NonTerminal("statement"): { 17 | Terminal(CHARACTER, "plus"): 0, 18 | Terminal(CHARACTER, ";"): "--", 19 | Terminal(CHARACTER, "minus"): 0, 20 | }, 21 | NonTerminal("expression"): { 22 | Terminal(CHARACTER, "plus"): 1, 23 | Terminal(CHARACTER, ";"): "--", 24 | Terminal(CHARACTER, "minus"): 2, 25 | }, 26 | } 27 | 28 | g.write_yaml("../output.yaml") 29 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Lexeme.py: -------------------------------------------------------------------------------- 1 | NON_TERMINAL = "NON_TERMINAL" 2 | TERMINAL = "TERMINAL" 3 | PRODUCT = "PRODUCT" 4 | SEMICOLON = "SEMICOLON" 5 | ALTERNATIVE = "ALTERNATIVE" 6 | EPSILON = "EPSILON" 7 | 8 | 9 | class Lexeme: 10 | def __init__(self, type_, value): 11 | self.value = value 12 | self.type_ = type_ 13 | 14 | def __repr__(self): 15 | return "{}({}, '{}')".format(self.__class__.__name__, self.type_, self.value) 16 | 17 | def __str__(self): 18 | return "<{}: {}>".format(self.type_, self.value) 19 | 20 | def __eq__(self, other): 21 | if not isinstance(other, self.__class__): 22 | return False 23 | if self.type_ == other.type_ and self.value == other.value: 24 | return True 25 | return False 26 | 27 | def __hash__(self): 28 | return hash(frozenset({self.value, self.type_})) 29 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Lexer.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.ParserGenerator.Lexeme import Lexeme 2 | from MicroCompiler.ParserGenerator.Lexeme import ( 3 | NON_TERMINAL, 4 | TERMINAL, 5 | PRODUCT, 6 | ALTERNATIVE, 7 | SEMICOLON, 8 | EPSILON, 9 | ) 10 | 11 | 12 | class Lexer: 13 | def __init__(self): 14 | self.token_list = [] 15 | 16 | def parse(self, string_: str): 17 | raw_token_list = string_.split() 18 | for raw_token in raw_token_list: 19 | if raw_token.isalpha(): 20 | if raw_token == "ϵ": 21 | symbol = Lexeme(EPSILON, raw_token) 22 | self.token_list.append(symbol) 23 | else: 24 | non_terminal = Lexeme(NON_TERMINAL, raw_token) 25 | self.token_list.append(non_terminal) 26 | elif raw_token == "|": 27 | terminal = Lexeme(ALTERNATIVE, raw_token) 28 | self.token_list.append(terminal) 29 | elif raw_token == "->": 30 | terminal = Lexeme(PRODUCT, raw_token) 31 | self.token_list.append(terminal) 32 | elif raw_token == ";": 33 | terminal = Lexeme(SEMICOLON, raw_token) 34 | self.token_list.append(terminal) 35 | elif raw_token.startswith("'"): 36 | terminal = Lexeme(TERMINAL, raw_token[1:-1]) 37 | self.token_list.append(terminal) 38 | 39 | else: 40 | raise ValueError("{} is not a valid token".format(raw_token)) 41 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Lexer_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from MicroCompiler.ParserGenerator.Lexer import Lexer 4 | from MicroCompiler.ParserGenerator.Lexeme import Lexeme 5 | from MicroCompiler.ParserGenerator.Lexeme import ( 6 | NON_TERMINAL, 7 | TERMINAL, 8 | PRODUCT, 9 | SEMICOLON, 10 | ALTERNATIVE, 11 | ) 12 | 13 | 14 | class TestLexer(unittest.TestCase): 15 | def test_simple_case(self): 16 | mbnf = """ 17 | statement -> 18 | expression ';' 19 | ; 20 | 21 | expression -> 22 | 'plus' 23 | | 'minus' 24 | ; 25 | """ 26 | 27 | lexer = Lexer() 28 | lexer.parse(mbnf) 29 | real_result = lexer.token_list 30 | 31 | expect_result = [ 32 | Lexeme(NON_TERMINAL, "statement"), 33 | Lexeme(PRODUCT, "->"), 34 | Lexeme(NON_TERMINAL, "expression"), 35 | Lexeme(TERMINAL, ";"), 36 | Lexeme(SEMICOLON, ";"), 37 | Lexeme(NON_TERMINAL, "expression"), 38 | Lexeme(PRODUCT, "->"), 39 | Lexeme(TERMINAL, "plus"), 40 | Lexeme(ALTERNATIVE, "|"), 41 | Lexeme(TERMINAL, "minus"), 42 | Lexeme(SEMICOLON, ";"), 43 | ] 44 | 45 | self.assertEqual(real_result, expect_result) 46 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Parser.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 2 | from MicroCompiler.Lookahead.Terminal import Terminal 3 | from MicroCompiler.Lookahead.Epsilon import Epsilon 4 | from MicroCompiler.Productions import Productions 5 | from MicroCompiler.Lookahead.EOF import EOF 6 | from MicroCompiler.ParserGenerator.Lexeme import ( 7 | NON_TERMINAL, 8 | TERMINAL, 9 | PRODUCT, 10 | ALTERNATIVE, 11 | SEMICOLON, 12 | EPSILON, 13 | ) 14 | 15 | 16 | built_in_terminal = () 17 | 18 | # filter function list 19 | terminal_filter_list = () 20 | 21 | 22 | class Parser: 23 | def __init__(self, token_list): 24 | self.token_index = 0 25 | self.token_list = token_list 26 | 27 | self.production_dict = {} 28 | self.start_symbol = None 29 | 30 | super().__init__() 31 | 32 | def _match(self, value): 33 | if ( 34 | self.token_list[self.token_index].type_ == TERMINAL 35 | and self.token_list[self.token_index] == value 36 | ): 37 | self.token_index += 1 38 | return True 39 | else: 40 | # print("{} != {}".format(self.token_list[self.token_index], value)) 41 | return False 42 | 43 | def _match_type(self, type_): 44 | if self.token_list[self.token_index].type_ == type_: 45 | self.token_index += 1 46 | return True 47 | else: 48 | # print("{} != {}".format(self.token_list[self.token_index], type)) 49 | return False 50 | 51 | def _non_terminal(self): 52 | if self.token_index >= len(self.token_list): 53 | return False 54 | 55 | if self.token_list[self.token_index].type_ == NON_TERMINAL: 56 | self.token_index += 1 57 | return True 58 | else: 59 | # print("{} is not NON_TERMINAL".format(self.token_list[self.token_index])) 60 | return False 61 | 62 | def _terminal(self): 63 | if self.token_list[self.token_index].type_ == TERMINAL: 64 | self.token_index += 1 65 | return True 66 | else: 67 | # print("{} is not TERMINAL".format(self.token_list[self.token_index])) 68 | return False 69 | 70 | """ 71 | statement -> production ';' other_production ; 72 | other_production -> statement | ϵ ; 73 | 74 | production -> non_terminal '->' symbols other_symbols ; 75 | other_symbols -> '|' symbols other_symbols | ϵ ; 76 | 77 | symbols -> symbol other_symbol | 'ϵ' ; 78 | other_symbol -> symbol other_symbol | ϵ ; 79 | 80 | symbol -> non_terminal | terminal ; 81 | """ 82 | 83 | def parse(self): 84 | return self._statement() 85 | 86 | def _statement(self): 87 | return ( 88 | self._production() 89 | and self._match_type(SEMICOLON) 90 | and self._other_production() 91 | ) 92 | 93 | def _other_production(self): 94 | save_point = self.token_index 95 | if self._statement(): 96 | return True 97 | else: 98 | self.token_index = save_point 99 | # do nothing for epsilon 100 | return True 101 | 102 | def _production(self): 103 | save_point = self.token_index 104 | if self._non_terminal(): 105 | productions_object = [] 106 | 107 | # first non_terminal is start symbol 108 | if not self.production_dict: 109 | self.start_symbol = self.token_list[save_point] 110 | 111 | self.production_dict[self.token_list[save_point]] = productions_object 112 | 113 | return ( 114 | self._match_type(PRODUCT) 115 | and self._symbols(productions_object) 116 | and self._other_symbols(productions_object) 117 | ) 118 | else: 119 | return False 120 | 121 | def _other_symbols(self, productions_object): 122 | save_point = self.token_index 123 | if ( 124 | self._match_type(ALTERNATIVE) 125 | and self._symbols(productions_object) 126 | and self._other_symbols(productions_object) 127 | ): 128 | return True 129 | else: 130 | self.token_index = save_point 131 | # do nothing for epsilon 132 | return True 133 | 134 | def _symbols(self, productions_object): 135 | save_point = self.token_index 136 | 137 | production = [] 138 | result = self._symbol(production) and self._other_symbol(production) 139 | 140 | if result: 141 | productions_object.append(production) 142 | 143 | if not result: 144 | if self._match_type(EPSILON): 145 | productions_object.append([self.token_list[save_point]]) 146 | return True 147 | return False 148 | return True 149 | 150 | def _other_symbol(self, production): 151 | save_point = self.token_index 152 | if self._symbol(production) and self._other_symbol(production): 153 | return True 154 | else: 155 | self.token_index = save_point 156 | # do nothing for epsilon 157 | return True 158 | 159 | def _symbol(self, production): 160 | save_point = self.token_index 161 | if self._non_terminal(): 162 | production.append(self.token_list[save_point]) 163 | return True 164 | else: 165 | self.token_index = save_point 166 | result = self._terminal() 167 | 168 | if result: 169 | production.append(self.token_list[save_point]) 170 | return result 171 | 172 | def generate_production(self): 173 | formal_production = Productions() 174 | 175 | for lhs_lexeme in self.production_dict: 176 | lhs_symbol = NonTerminal(lhs_lexeme.value) 177 | production_list = [] 178 | formal_production[lhs_symbol] = production_list 179 | 180 | if lhs_lexeme == self.start_symbol: 181 | formal_production.start_symbol = lhs_symbol 182 | 183 | productions = self.production_dict[lhs_lexeme] 184 | for production in productions: 185 | production_symbols = [] 186 | production_list.append(production_symbols) 187 | for rhs_symbol in production: 188 | if rhs_symbol.type_ == EPSILON: 189 | production_symbols.append(Epsilon()) 190 | elif rhs_symbol.type_ == NON_TERMINAL: 191 | production_symbols.append(NonTerminal(rhs_symbol.value)) 192 | elif rhs_symbol.type_ == TERMINAL: 193 | production_symbols.append( 194 | Terminal(type_=None, data=rhs_symbol.value) 195 | ) 196 | 197 | return formal_production 198 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/Parser_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from MicroCompiler.ParserGenerator.Lexer import Lexer 4 | from MicroCompiler.ParserGenerator.Parser import Parser 5 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 6 | from MicroCompiler.Lookahead.Terminal import Terminal 7 | from MicroCompiler.Lookahead.Terminal import CHARACTER 8 | 9 | 10 | class TestParser(unittest.TestCase): 11 | def test_simple_case(self): 12 | mbnf = """ 13 | statement -> 14 | expression ';' 15 | ; 16 | 17 | expression -> 18 | 'plus' 19 | | 'minus' 20 | ; 21 | """ 22 | 23 | lexer = Lexer() 24 | lexer.parse(mbnf) 25 | 26 | parser = Parser(lexer.token_list) 27 | parser.parse() 28 | real_result = parser.generate_production() 29 | 30 | except_result = { 31 | NonTerminal("expression"): [ 32 | [Terminal(CHARACTER, "plus")], 33 | [Terminal(CHARACTER, "minus")], 34 | ], 35 | NonTerminal("statement"): [ 36 | [NonTerminal("expression"), Terminal(CHARACTER, ";")] 37 | ], 38 | } 39 | 40 | self.assertEqual(dict(real_result), except_result) 41 | self.assertEqual(real_result.start_symbol, NonTerminal("statement")) 42 | -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/ParserGenerator/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/calculator.mbnf: -------------------------------------------------------------------------------- 1 | Goal -> Expr ; 2 | Expr -> Term ExprTwo ; 3 | ExprTwo -> '+' Term ExprTwo 4 | | '-' Term ExprTwo 5 | | ϵ ; 6 | Term -> Factor TermTwo ; 7 | TermTwo -> '*' Factor TermTwo 8 | | '/' Factor TermTwo 9 | | ϵ ; 10 | Factor -> '(' Expr ')' 11 | | 'num' ; -------------------------------------------------------------------------------- /MicroCompiler/ParserGenerator/sample.mbnf: -------------------------------------------------------------------------------- 1 | Goal -> Expr ; 2 | Expr -> Term ExprTwo ; 3 | ExprTwo -> '+' Term ExprTwo 4 | | '-' Term ExprTwo 5 | | ϵ ; 6 | Term -> Factor TermTwo ; 7 | TermTwo -> '*' Factor TermTwo 8 | | '/' Factor TermTwo 9 | | ϵ ; 10 | Factor -> '(' Expr ')' 11 | | 'num' 12 | | 'name' ; -------------------------------------------------------------------------------- /MicroCompiler/Productions.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.Lookahead.Terminal import Terminal 2 | from MicroCompiler.Lookahead.Epsilon import Epsilon 3 | from MicroCompiler.Lookahead.EOF import EOF 4 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 5 | 6 | 7 | class Productions(dict): 8 | epsilon = Epsilon() 9 | eof = EOF() 10 | 11 | def __init__(self, *args, **kwargs): 12 | self._elements = set() 13 | self._terminals = set() 14 | self._non_terminals = set() 15 | self.start_symbol = None 16 | 17 | super().__init__(*args, **kwargs) 18 | 19 | def set_start_symbol(self, start_symbol): 20 | if start_symbol not in self: 21 | raise ValueError("start symbol must in production.") 22 | self.start_symbol = start_symbol 23 | 24 | def compute_elements(self): 25 | for non_terminal in self: 26 | self._elements.add(non_terminal) 27 | productions = self[non_terminal] 28 | for production in productions: 29 | for element in production: 30 | self._elements.add(element) 31 | 32 | self._terminals = {i for i in self._elements if isinstance(i, Terminal)} 33 | self._non_terminals = {i for i in self._elements if isinstance(i, NonTerminal)} 34 | 35 | @property 36 | def terminals(self): 37 | self.compute_elements() 38 | return {i for i in self._terminals} 39 | 40 | @property 41 | def non_terminals(self): 42 | self.compute_elements() 43 | return {i for i in self._non_terminals} 44 | 45 | def print_as_bnf(self): 46 | for lhs_symbol in self: 47 | print(lhs_symbol, " ->") 48 | rhs_symbol = self[lhs_symbol] 49 | production_str_list = [] 50 | for production in rhs_symbol: 51 | production_str_list.append(" ".join([str(i) for i in production])) 52 | print(" ", " | ".join(production_str_list)) 53 | print(";") 54 | -------------------------------------------------------------------------------- /MicroCompiler/Productions_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from MicroCompiler.Productions import Productions 4 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal 5 | from MicroCompiler.Lookahead.Terminal import CHARACTER 6 | from MicroCompiler.Lookahead.Terminal import Terminal 7 | 8 | 9 | class TestProduction(unittest.TestCase): 10 | def test_conception(self): 11 | statement = NonTerminal("Statement") 12 | expression = NonTerminal("Expression") 13 | semicolon = Terminal(CHARACTER, ";") 14 | production = Productions({statement: [[expression, semicolon]]}) 15 | 16 | production.print_as_bnf() 17 | 18 | self.assertEqual(production.terminals, {semicolon}) 19 | self.assertEqual(production.non_terminals, {statement, expression}) 20 | -------------------------------------------------------------------------------- /MicroCompiler/SkeletonParser.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | 4 | class SkeletonSyntaxError(Exception): 5 | pass 6 | 7 | 8 | class Token(object): 9 | index_counter = 0 10 | 11 | def __init__(self, type_, value=None, index=None): 12 | self.type = type_ 13 | self.value = value 14 | self.index = index if index else self.index_counter 15 | 16 | self.increase_index_counter() 17 | 18 | @classmethod 19 | def increase_index_counter(cls): 20 | cls.index_counter += 1 21 | 22 | def __repr__(self): 23 | return "{}(type_={}, value={},index={})".format( 24 | self.__class__.__name__, self.type, self.value, self.index 25 | ) 26 | 27 | 28 | class WhiteSpaceToken(Token): 29 | index_counter = 0 30 | 31 | def __init__(self, value=None, index=None): 32 | super(WhiteSpaceToken, self).__init__(type_='white_space', value=value, index=index) 33 | 34 | 35 | class Epsilon(object): 36 | def __repr__(self): 37 | return "Epsilon()" 38 | 39 | 40 | class Node(object): 41 | index_counter = 0 42 | 43 | def __init__(self, type_, value=None, index=None): 44 | self.type = type_ 45 | self.value = value 46 | self.index = index if index else self.index_counter 47 | 48 | self.increase_index_counter() 49 | 50 | @classmethod 51 | def increase_index_counter(cls): 52 | cls.index_counter += 1 53 | 54 | def __repr__(self): 55 | return "{}(type_={}, value={},index={})".format( 56 | self.__class__.__name__, self.type, self.value, self.index 57 | ) 58 | 59 | 60 | class SkeletonParser: 61 | def __init__(self, definition_file, lexeme_list): 62 | definition_file = definition_file 63 | self.lexeme_list = lexeme_list 64 | self.lexeme_index = 0 65 | 66 | with open(definition_file) as fd: 67 | definition = yaml.load(fd.read(), Loader=yaml.FullLoader) 68 | 69 | self.start_symbol = definition["start-symbol"] 70 | self.error_marker = definition["error-marker"] 71 | self.productions = definition["productions"] 72 | self.table = definition["table"] 73 | self.terminals = definition["terminals"] 74 | self.non_terminals = definition["non-terminals"] 75 | self.eof_marker = definition["eof-marker"] 76 | 77 | # debug 78 | self.token_stack = [] 79 | self.call_stack = [] 80 | 81 | def parse(self): 82 | previous_symbol = Node("") 83 | start_symbol = Node(self.start_symbol) 84 | result = self.parse_symbol(start_symbol, previous_symbol) 85 | 86 | if result: 87 | return True 88 | else: 89 | return False 90 | 91 | def call_parser_method(self, parser): 92 | value_stack = [] 93 | for symbol in self.call_stack.pop(): 94 | method = getattr(parser, symbol) 95 | value = value_stack.pop() 96 | return_value = method(value) 97 | value_stack.append(return_value) 98 | 99 | def parse_symbol(self, symbol, previous_symbol): 100 | self.token_stack.append(self.lexeme_list[self.lexeme_index]) 101 | self.call_stack.append((previous_symbol, symbol)) 102 | 103 | if symbol.type in self.terminals: 104 | 105 | lexeme = self.lexeme_list[self.lexeme_index] 106 | 107 | self.call_stack.append((symbol, lexeme)) 108 | self.token_stack.append(lexeme) 109 | 110 | self.lexeme_index += 1 111 | 112 | return True 113 | 114 | if symbol.type not in self.table: 115 | raise SkeletonSyntaxError( 116 | "Symbol: {} not in {}".format(symbol.type, self.table) 117 | ) 118 | 119 | lookahead_symbol = self.lexeme_list[self.lexeme_index] 120 | if lookahead_symbol.type not in self.table[symbol.type]: 121 | raise SkeletonSyntaxError( 122 | "Lookahead symbol: {} not in {}".format( 123 | lookahead_symbol.type, self.table[symbol.type] 124 | ) 125 | ) 126 | 127 | predict_indicator = self.table[symbol.type][lookahead_symbol.type] 128 | 129 | if predict_indicator == self.error_marker: 130 | raise SkeletonSyntaxError( 131 | "Invalid lookahead symbol: {} in {}".format( 132 | lookahead_symbol.type, symbol.type 133 | ) 134 | ) 135 | 136 | if predict_indicator not in self.productions: 137 | raise SkeletonSyntaxError("{} not in {}") 138 | 139 | production = list(self.productions[predict_indicator].values())[0] 140 | if not len(production): 141 | self.call_stack.append((symbol, Token("ϵ", Epsilon()))) 142 | self.token_stack.append(Token("ϵ")) 143 | 144 | return True 145 | 146 | result_list = [] 147 | for i in production: 148 | next_symbol = Node(i) 149 | result_list.append(self.parse_symbol(next_symbol, symbol)) 150 | 151 | if all(result_list): 152 | return True 153 | return False 154 | -------------------------------------------------------------------------------- /MicroCompiler/SkeletonParser_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from MicroCompiler.SkeletonParser import SkeletonParser, Token 4 | 5 | 6 | class TestSkeletonParser(unittest.TestCase): 7 | def test_simple(self): 8 | token_list = [Token("num", 6), Token("/"), Token("num", 2), Token("")] 9 | 10 | sp = SkeletonParser("output.yaml", token_list) 11 | self.assertTrue(sp.parse()) 12 | -------------------------------------------------------------------------------- /MicroCompiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/abstract_syntax_tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/abstract_syntax_tree/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/abstract_syntax_tree/abstract_syntax_tree.py: -------------------------------------------------------------------------------- 1 | class AbstractSyntaxTree(object): 2 | def __init__(self): 3 | self.start_node = None 4 | 5 | def set_start_node(self, node): 6 | self.start_node = node 7 | 8 | def add_production(self, from_node, to_node): 9 | from_node.add_sub_node(to_node) 10 | -------------------------------------------------------------------------------- /MicroCompiler/abstract_syntax_tree/abstract_syntax_tree_test.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | from MicroCompiler.SkeletonParser import Token, SkeletonParser, Epsilon 4 | from MicroCompiler.parser_builder import ParserBuilder 5 | from MicroCompiler.postfix_expression.operator import PythonBuiltinOperator 6 | 7 | token_list = [Token("num", 6), Token("/", operator.truediv), Token("num", 2), Token("")] 8 | # token_list = [ 9 | # Token("num", 6), 10 | # Token("/", operator.truediv), 11 | # Token("num", 2), 12 | # Token("/", operator.truediv), 13 | # Token("num", 2), 14 | # Token(""), 15 | # ] 16 | 17 | 18 | sp = SkeletonParser("output.yaml", token_list) 19 | sp.parse() 20 | 21 | from MicroCompiler.abstract_syntax_tree.abstract_syntax_tree import ( 22 | AbstractSyntaxTree as AST, 23 | ) 24 | from MicroCompiler.abstract_syntax_tree.node import create_or_get_node 25 | 26 | import matplotlib.pyplot as plt 27 | 28 | import networkx as nx 29 | 30 | DG = nx.DiGraph() 31 | 32 | ast = AST() 33 | 34 | 35 | def fallback_method(*args, **kwargs): 36 | print(*args, **kwargs) 37 | return "Done!" 38 | 39 | 40 | def get_node_label(obj): 41 | if obj.value is not None: 42 | return "{}.{}.{}#{}".format( 43 | obj.__class__.__name__, obj.type, obj.value, obj.index 44 | ) 45 | 46 | return "{}.{}#{}".format(obj.__class__.__name__, obj.type, obj.index) 47 | 48 | 49 | for parser_instance, (f, t) in enumerate(sp.call_stack): 50 | from_node = create_or_get_node(f) 51 | to_node = create_or_get_node(t) 52 | if parser_instance == 0: 53 | ast.set_start_node(from_node) 54 | 55 | ast.add_production(from_node, to_node) 56 | 57 | f_label = get_node_label(f) 58 | t_label = get_node_label(t) 59 | 60 | DG.add_node(f_label, prototype=f, reference=from_node) 61 | DG.add_node(t_label, prototype=t, reference=to_node) 62 | 63 | DG.add_edge(t_label, f_label) # sub node to node 64 | 65 | print("") 66 | 67 | # nx.draw(DG) 68 | 69 | # plt.show() 70 | 71 | # nx.write_graphml(DG, "data.graphml") 72 | # nx.write_gexf(DG, "data.gexf") 73 | 74 | ordered_list = list(nx.topological_sort(DG)) 75 | 76 | pb = ParserBuilder() 77 | pb.add_generator("failback", "Who Am I") 78 | clazz = pb.generate() 79 | 80 | 81 | class Parser(clazz): 82 | def __init__(self): 83 | self.post_expr = [] 84 | 85 | def ExprTwo(self, input_): 86 | # ExprTwo -> '+' Term ExprTwo 87 | # | '-' Term ExprTwo 88 | # | ϵ ; 89 | 90 | if len(input_) == 1: 91 | # ExprTwo -> ϵ 92 | return Epsilon() 93 | 94 | if isinstance(input_[2], Epsilon): 95 | # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo 96 | # | | 97 | # -> ϵ -> ϵ 98 | return input_[:2] 99 | 100 | if isinstance(input_[2], list): 101 | # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo 102 | # | | 103 | # -> ['+' num] -> ['-' num] 104 | post_expr = [] 105 | 106 | # value #1 107 | if isinstance(input_[1], list): 108 | post_expr.extend(input_[1]) 109 | else: 110 | post_expr.append(input_[1]) 111 | 112 | # value #2 113 | if isinstance(input_[2][1], list): 114 | post_expr.extend(input_[2][1]) 115 | else: 116 | post_expr.append(input_[2][1]) 117 | 118 | post_expr.append(input_[2][0]) # op 119 | 120 | return [input_[0], post_expr] 121 | 122 | def TermTwo(self, input_): 123 | # TermTwo -> '*' Factor TermTwo 124 | # | '/' Factor TermTwo 125 | # | ϵ ; 126 | 127 | if len(input_) == 1: 128 | # ExprTwo -> ϵ 129 | return Epsilon() 130 | 131 | if isinstance(input_[2], Epsilon): 132 | # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo 133 | # | | 134 | # -> ϵ -> ϵ 135 | return input_[:2] 136 | 137 | if isinstance(input_[2], list): 138 | # TermTwo -> '*' Factor TermTwo | '/' Factor TermTwo 139 | # | | 140 | # -> ['/' num] -> ['*' num] 141 | post_expr = [] 142 | 143 | # value #1 144 | if isinstance(input_[1], list): 145 | post_expr.extend(input_[1]) 146 | else: 147 | post_expr.append(input_[1]) 148 | 149 | # value #2 150 | if isinstance(input_[2][1], list): 151 | post_expr.extend(input_[2][1]) 152 | else: 153 | post_expr.append(input_[2][1]) 154 | 155 | post_expr.append(input_[2][0]) # op 156 | 157 | return [input_[0], post_expr] 158 | 159 | def num(self, input_): 160 | return input_[0] 161 | 162 | def Factor(self, input_): 163 | if len(input_) == 1: 164 | return input_[0] 165 | 166 | if len(input_) == 3: 167 | print(input_) 168 | return input_[1] 169 | 170 | def Division(self, input_): 171 | return PythonBuiltinOperator(input_[0], 2) 172 | 173 | def Term(self, input_): 174 | if isinstance(input_[1], Epsilon): 175 | return input_[0] 176 | 177 | post_expr = [] 178 | 179 | # value #1 180 | if isinstance(input_[0], list): 181 | post_expr.extend(input_[0]) 182 | else: 183 | post_expr.append(input_[0]) 184 | 185 | # value #2 186 | if isinstance(input_[1][1], list): 187 | post_expr.extend(input_[1][1]) 188 | else: 189 | post_expr.append(input_[1][1]) 190 | 191 | post_expr.append(input_[1][0]) # op 192 | 193 | return post_expr 194 | 195 | def Expr(self, input_): 196 | if isinstance(input_[1], Epsilon): 197 | return input_[0] 198 | 199 | def Goal(self, input_): 200 | return input_[0] 201 | 202 | def Start(self, input_): 203 | return input_[0] 204 | 205 | 206 | parser_instance = Parser() 207 | 208 | topological_ordered_list = [DG.nodes[i] for i in nx.topological_sort(DG)] 209 | 210 | from MicroCompiler.parser_evaluator import ParserEvaluator 211 | 212 | parser_evaluator = ParserEvaluator(parser_instance) 213 | final_value = parser_evaluator.eval(topological_ordered_list) 214 | 215 | print("") 216 | 217 | from MicroCompiler.postfix_expression.evaluator import Evaluator 218 | 219 | evaluator = Evaluator(final_value) 220 | result = evaluator.eval() 221 | 222 | print("") 223 | -------------------------------------------------------------------------------- /MicroCompiler/abstract_syntax_tree/node.py: -------------------------------------------------------------------------------- 1 | class Node(object): 2 | def __init__(self, label_str=None, reference=None): 3 | self.label_str = label_str 4 | self.reference = reference 5 | self.sub_node_list = [] 6 | 7 | def add_sub_node(self, node): 8 | self.sub_node_list.append(node) 9 | 10 | def __repr__(self): 11 | return "{}(label_str={}, reference={}, sub_node_list={})".format( 12 | self.__class__.__name__, self.label_str, self.reference, self.sub_node_list 13 | ) 14 | 15 | 16 | node_registry = {} 17 | 18 | 19 | def create_or_get_node(node): 20 | cls_name = node.__class__.__name__ 21 | 22 | node_id = "{}.{}".format(cls_name, node.index) 23 | label_str = "{}.{}.{}".format(node.type, cls_name, node.index) 24 | reference = node 25 | 26 | if node_id in node_registry: 27 | return node_registry[node_id] 28 | 29 | node = Node(label_str, reference) 30 | node_registry[node_id] = node 31 | 32 | return node 33 | -------------------------------------------------------------------------------- /MicroCompiler/lexer/README.md: -------------------------------------------------------------------------------- 1 | # 参考文献 2 | * http://dinosaur.compilertools.net/lex/index.html 3 | * https://courses.cs.washington.edu/courses/cse401/07au/CSE401-07lex.pdf 4 | * https://web.stanford.edu/class/archive/cs/cs143/cs143.1112/materials/lectures/lecture04.pdf 5 | * https://cs.stackexchange.com/questions/97374/how-to-implement-a-maximal-munch-lexical-analyzer-by-simulating-nfa-or-running-d 6 | * https://karkare.github.io/cs335/lectures/04LexicalAanalysis.pdf 7 | * http://user.it.uu.se/~kostis/Teaching/KT1-12/Slides/handout03.pdf 8 | * http://courses.ics.hawaii.edu/ReviewICS312/morea/Compiling/ics312_lexing.pdf 9 | * http://ocw.snu.ac.kr/sites/default/files/NOTE/7048.pdf 10 | -------------------------------------------------------------------------------- /MicroCompiler/lexer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/lexer/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/lexer/demo.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.lexer.user_level_lexer_define import lexer_define 2 | 3 | from MicroCompiler.lexer.lexer import lex_analysis 4 | 5 | input_string = "2+3 * 6" 6 | result = lex_analysis(input_string, lexer_define) 7 | 8 | print(result) 9 | -------------------------------------------------------------------------------- /MicroCompiler/lexer/lexer.py: -------------------------------------------------------------------------------- 1 | import MicroRegEx 2 | 3 | 4 | def match_token(target_char, token_name_nfa_mapping, token_name_action_mapping): 5 | history = [] 6 | for index in range(1, len(target_char) + 1): 7 | current_char_list = target_char[:index] 8 | 9 | is_accepted_mapping = {} 10 | still_alive_mapping = {} 11 | for token_object, nfa_object in token_name_nfa_mapping.items(): 12 | nfa_object.reset() 13 | nfa_object.match(current_char_list) 14 | is_accepted_mapping[token_object] = nfa_object.is_accepted() 15 | still_alive_mapping[token_object] = bool(nfa_object.current_status) 16 | 17 | history.append(is_accepted_mapping) 18 | 19 | if not any(still_alive_mapping.values()) or (index == len(target_char)): 20 | # all regex expression engine stop, find last accepted status as result 21 | for reversed_history_index, monment in enumerate(history[::-1]): 22 | if any(monment.values()): 23 | accepted_nfa_num = sum(bool(i) for i in monment.values()) 24 | if accepted_nfa_num > 1: 25 | # TODO: two token pattern matched, maybe show warning 26 | # then select first rule 27 | raise ValueError("at least two token partten match same string") 28 | 29 | first_true_parser = list(filter(lambda x: x[1], monment.items())) 30 | matched_token_type = first_true_parser[0][0] 31 | matched_str = target_char[0 : index - reversed_history_index] 32 | 33 | action = token_name_action_mapping[matched_token_type] 34 | 35 | token_object = action(matched_str) 36 | return matched_str, token_object 37 | 38 | # lexer parse failed 39 | return "", None 40 | 41 | 42 | def lex_analysis(input_string, user_defined_lexer_rule): 43 | token_name_nfa_mapping = {} 44 | token_name_action_mapping = {} 45 | for token_object, token_regex, token_action in user_defined_lexer_rule: 46 | nfa_object = MicroRegEx.compile(token_regex) 47 | token_name_nfa_mapping[token_object] = nfa_object 48 | token_name_action_mapping[token_object] = token_action 49 | 50 | result = [] 51 | 52 | current_target_char = input_string 53 | while True: 54 | if not current_target_char: 55 | # job done 56 | break 57 | 58 | matched_str, token_object = match_token( 59 | current_target_char, token_name_nfa_mapping, token_name_action_mapping 60 | ) 61 | if matched_str: 62 | current_target_char = current_target_char[len(matched_str) :] 63 | else: 64 | raise ValueError("lexer parse failed") 65 | 66 | result.append((matched_str, token_object)) 67 | 68 | return result 69 | -------------------------------------------------------------------------------- /MicroCompiler/lexer/user_level_lexer_define.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken 4 | 5 | lexer_define = [ 6 | # token type, token regex, token action 7 | ["num", r"(0|1|2|3|4|5|6|7|8|9)+", lambda x: Token("num", int(x))], 8 | ["+", r"\+", lambda x: Token("+", operator.add)], 9 | ["-", r"-", lambda x: Token("-", operator.sub)], 10 | ["*", r"\*", lambda x: Token("*", operator.mul)], 11 | ["/", r"/", lambda x: Token("/", operator.truediv)], 12 | ["(", r"\(", lambda x: Token("(")], 13 | [")", r"\)", lambda x: Token(")")], 14 | ["white space", r" +", lambda x: WhiteSpaceToken(x)], 15 | ] 16 | -------------------------------------------------------------------------------- /MicroCompiler/output.yaml: -------------------------------------------------------------------------------- 1 | eof-marker: 2 | error-marker: -- 3 | non-terminals: [ExprTwo, Term, TermTwo, Factor, Goal, Expr] 4 | productions: 5 | 0: 6 | ExprTwo: [+, Term, ExprTwo] 7 | 1: 8 | ExprTwo: ['-', Term, ExprTwo] 9 | 2: 10 | ExprTwo: [] 11 | 3: 12 | Goal: [Expr] 13 | 4: 14 | Term: [Factor, TermTwo] 15 | 5: 16 | Factor: [(, Expr, )] 17 | 6: 18 | Factor: [num] 19 | 7: 20 | Factor: [name] 21 | 8: 22 | Expr: [Term, ExprTwo] 23 | 9: 24 | TermTwo: ['*', Factor, TermTwo] 25 | 10: 26 | TermTwo: [/, Factor, TermTwo] 27 | 11: 28 | TermTwo: [] 29 | start-symbol: Goal 30 | table: 31 | Expr: {(: 8, ): --, '*': --, +: --, '-': --, /: --, : --, name: 8, num: 8} 32 | ExprTwo: {(: --, ): 2, '*': --, +: 0, '-': 1, /: --, : 2, name: --, num: --} 33 | Factor: {(: 5, ): --, '*': --, +: --, '-': --, /: --, : --, name: 7, num: 6} 34 | Goal: {(: 3, ): --, '*': --, +: --, '-': --, /: --, : --, name: 3, num: 3} 35 | Term: {(: 4, ): --, '*': --, +: --, '-': --, /: --, : --, name: 4, num: 4} 36 | TermTwo: {(: --, ): 11, '*': 9, +: 11, '-': 11, /: 10, : 11, name: --, num: --} 37 | terminals: [+, '-', (, '*', /, name, ), num] 38 | -------------------------------------------------------------------------------- /MicroCompiler/parser_builder.py: -------------------------------------------------------------------------------- 1 | import types 2 | 3 | 4 | class ParserBuilder(object): 5 | def __init__(self, parser_name="ParserClass"): 6 | self.parser_name = parser_name 7 | self.generators = dict() 8 | 9 | def add_generator(self, from_statement, to_statements): 10 | def generator(self, to_statements=to_statements): 11 | print(to_statements) 12 | # raise NotImplementedError 13 | 14 | self.generators[from_statement] = generator 15 | 16 | def generate(self): 17 | return type(self.parser_name, (), self.generators) 18 | 19 | 20 | if __name__ == "__main__": 21 | pb = ParserBuilder() 22 | pb.add_generator('some_method', 'Who Am I') 23 | clazz = pb.generate() 24 | i = clazz() 25 | print(i.some_method()) 26 | print("") 27 | -------------------------------------------------------------------------------- /MicroCompiler/parser_evaluator.py: -------------------------------------------------------------------------------- 1 | class ParserEvaluator(object): 2 | """ 3 | Call user's parser instance (subclass of Parser), directed by topological_sorted_nodes 4 | """ 5 | def __init__(self, parser_instance): 6 | self.parser_instance = parser_instance 7 | 8 | def eval(self, topological_sorted_nodes): 9 | for cmd in topological_sorted_nodes: 10 | cmd_prototype = cmd["prototype"] 11 | cmd_reference = cmd["reference"] 12 | if cmd_prototype.__class__.__name__ == "Token": 13 | # token value already set 14 | continue 15 | 16 | if cmd_prototype.__class__.__name__ == "Node": 17 | values = [i.reference.value for i in cmd_reference.sub_node_list] 18 | method_name = cmd_prototype.type 19 | legal_method_name = self.parser_instance.get_legal_method_name(method_name) 20 | method_func = getattr( 21 | self.parser_instance, legal_method_name, self.parser_instance.fallback 22 | ) 23 | return_value = method_func(values) 24 | cmd["reference"].reference.value = return_value 25 | 26 | final_value = cmd["reference"].reference.value 27 | 28 | return final_value 29 | -------------------------------------------------------------------------------- /MicroCompiler/parser_evaluator_builder.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | from MicroCompiler.abstract_syntax_tree.abstract_syntax_tree import ( 4 | AbstractSyntaxTree as AST, 5 | ) 6 | from MicroCompiler.abstract_syntax_tree.node import create_or_get_node 7 | 8 | 9 | def build_parser_evaluator(call_stack, graph_file=None): 10 | DG = nx.DiGraph() 11 | 12 | graph = nx.DiGraph() 13 | 14 | ast = AST() 15 | 16 | def fallback_method(*args, **kwargs): 17 | print(*args, **kwargs) 18 | return "Done!" 19 | 20 | def get_node_label(obj): 21 | if obj.value is not None: 22 | return "{}.{}.{}#{}".format( 23 | obj.__class__.__name__, obj.type, obj.value, obj.index 24 | ) 25 | 26 | return "{}.{}#{}".format(obj.__class__.__name__, obj.type, obj.index) 27 | 28 | for parser_instance, (f, t) in enumerate(call_stack): 29 | from_node = create_or_get_node(f) 30 | to_node = create_or_get_node(t) 31 | if parser_instance == 0: 32 | ast.set_start_node(from_node) 33 | 34 | ast.add_production(from_node, to_node) 35 | 36 | f_label = get_node_label(f) 37 | t_label = get_node_label(t) 38 | 39 | DG.add_node(f_label, prototype=f, reference=from_node) 40 | DG.add_node(t_label, prototype=t, reference=to_node) 41 | 42 | graph.add_node(f_label) 43 | graph.add_node(t_label) 44 | 45 | DG.add_edge(t_label, f_label) # sub node to node 46 | graph.add_edge(t_label, f_label) # sub node to node 47 | 48 | topological_ordered_list = [DG.nodes[i] for i in nx.topological_sort(DG)] 49 | 50 | if graph_file: 51 | nx.write_graphml(graph, graph_file) 52 | 53 | return topological_ordered_list 54 | -------------------------------------------------------------------------------- /MicroCompiler/postfix_expression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/postfix_expression/__init__.py -------------------------------------------------------------------------------- /MicroCompiler/postfix_expression/evaluator.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.postfix_expression.operator import Operator 2 | 3 | 4 | class Evaluator(object): 5 | def __init__(self, post_expr): 6 | self.post_expr = post_expr 7 | 8 | self.value_stack = [] 9 | 10 | def eval(self): 11 | for expr in self.post_expr: 12 | if isinstance(expr, Operator): # this is an operator 13 | args = self.pop_top_k_value(expr.get_operand_num()) 14 | value = expr.eval(*args) 15 | self.value_stack.append(value) 16 | else: # this is an operand 17 | self.value_stack.append(expr) 18 | 19 | assert len(self.value_stack) == 1 20 | 21 | return self.value_stack[0] 22 | 23 | def pop_top_k_value(self, k): 24 | args = [] 25 | for _ in range(k): 26 | args.insert(0, self.value_stack.pop()) 27 | 28 | return args 29 | -------------------------------------------------------------------------------- /MicroCompiler/postfix_expression/operator.py: -------------------------------------------------------------------------------- 1 | class Operator(object): 2 | def get_operand_num(self): 3 | raise NotImplementedError 4 | 5 | def eval(self, *args): 6 | raise NotImplementedError 7 | 8 | 9 | class PythonBuiltinOperator(Operator): 10 | def __init__(self, operator, operand_num): 11 | self.operator = operator 12 | self.operand_num = operand_num 13 | 14 | def get_operand_num(self): 15 | return self.operand_num 16 | 17 | def eval(self, *args): 18 | return self.operator(*args) 19 | 20 | def __repr__(self): 21 | return "{}(operator={}, operand_num={})".format( 22 | self.__class__.__name__, self.operator, self.operand_num 23 | ) 24 | -------------------------------------------------------------------------------- /MicroCompiler/sample.yaml: -------------------------------------------------------------------------------- 1 | terminals: [+, -, x, /, (, ), name, num] 2 | non-terminals: [Goal, Expr, Expr', Term, Term', Factor] 3 | eof-marker: 4 | error-marker: -- 5 | start-symbol: Goal 6 | 7 | productions: 8 | 0: {Goal: [Expr]} 9 | 1: {Expr: [Term, Expr']} 10 | 2: {Expr': [+, Term, Expr']} 11 | 3: {Expr': [-, Term, Expr']} 12 | 4: {Expr': []} 13 | 5: {Term: [Factor, Term']} 14 | 6: {Term': [x, Factor, Term']} 15 | 7: {Term': [/, Factor, Term']} 16 | 8: {Term': []} 17 | 9: {Factor: [(, Expr, )]} 18 | 10: {Factor: [num]} 19 | 11: {Factor: [name]} 20 | 21 | table: 22 | Goal: {+: --, -: --, x: --, /: --, (: 0, ): --, name: 0, num: 0, : --} 23 | Expr: {+: --, -: --, x: --, /: --, (: 1, ): --, name: 1, num: 1, : --} 24 | Expr': {+: 2, -: 3, x: --, /: --, (: 0, ): 4, name: --, num: --, : 4} 25 | Term: {+: --, -: --, x: --, /: --, (: 5, ): --, name: 5, num: 5, : --} 26 | Term': {+: 8, -: 8, x: 6, /: 7, (: --, ): 8, name: --, num: --, : 8} 27 | Factor: {+: --, -: --, x: --, /: --, (: 9, ): --, name: 11, num: 10, : --} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MicroCompiler 2 | 3 | 一个微型的 LL/LR/LALR 语法解析器,帮助编程语言设计者完成语言设计、测试等。 4 | 5 | 这个项目是我学习 Compilers: Principles,Techniques,and Tools (AKA Dragon book) 、[CS143: Compilers by Stanford University](http://web.stanford.edu/class/cs143/)、 [COMP 412: Compiler Construction for Undergraduates by Rice University](https://www.clear.rice.edu/comp412/) 和 [Engineering: CS1 - SELF PACED Compilers by Stanford University](https://lagunita.stanford.edu/courses/Engineering/Compilers/Fall2014/info) 的副产品。 6 | 7 | # 目标 8 | 实现一个完整可用的工具集合,辅助用户实现编译器前端的设计 9 | 10 | # 进展 11 | - LL语法: 基本已经完成,LL语法可以覆盖绝大多数编程语言的需求了,比如 Python 12 | - LR语法/LALR语法: 尚未完成,短期内不太可能会完成 13 | 14 | # Features 15 | 计算 `6 * (2 + 2)` 的值过程可以通过以下抽象语法树(图由本项目编译器自动生成,经过 `Cytoscape` 渲染得到): 16 | 17 | ![](demo/arithmetic_calculator/calculator.png) 18 | 19 | # 使用文档 20 | ## LL(1) 语法 21 | ### MBNF 格式 22 | MBNF 是 Micro Backus Normal Form 的缩写,是为了配合本项目的编译器特别设计的一种语法格式。MBNF 格式简单易懂,和常见的 BNF 表达式非常相似。使用者利用编写 MBNF 文件的方法,把语法信息传递给编译器。 23 | 24 | 文件 [demo/arithmetic_calculator/calculator.mbnf](demo/arithmetic_calculator/calculator.mbnf) 是一个支持 `+` `-` `*` `/` 和括号 `(` `)` 的算术计算语言的 MBNF 文件示例。 25 | 26 | ### Generator 27 | `MicroCompiler.ParserGenerator.Generator.Generator` 可以读入 MBNF 格式的语法文件并生成一个包含 `First Set`,`Fellow Set` 等信息的 LL(1) 语法解析器必须的解析器构造数据。 28 | 29 | 这样的解析器构造数据,可以序列化成人类可读的 YAML 格式。文件 [demo/arithmetic_calculator/calculator.yaml](demo/arithmetic_calculator/calculator.yaml) 就是序列化成 YAML 格式的算术计算语言(见上文)的解析器构造数据。 30 | 31 | ### SkeletonParser 32 | `MicroCompiler.SkeletonParser.SkeletonParser` 可以读入 YAML 格式的解析器构造数据和一系列 Token,判断这个 Token 序列的语法是否合法,并生成一个合法的解析依赖关系图。 33 | 34 | ### build_parser_evaluator 35 | 在依赖关系图的基础上,`MicroCompiler.parser_evaluator_builder.build_parser_evaluator` 根据依赖信息,构建抽象语法树。并得到按照拓扑排序构造的解析顺序。 36 | 37 | ### ParserBuilder 38 | `MicroCompiler.parser_builder.ParserBuilder` 能够生成一个解析器基类,用户需要继承这个基类,在用户自定义类中添加相关语法生成式的解析方法。 39 | 40 | ### ParserEvaluator 41 | `MicroCompiler.parser_evaluator.ParserEvaluator` 会在拓扑排序后的解析序列的指导下,依次执行用户自定义类中的方法,返回结果 42 | 43 | ### [可选] Evaluator 44 | 对于返回后缀表达式(逆波兰表达式)的用户自定类来说,用户可以选择使用 `MicroCompiler.postfix_expression.evaluator.Evaluator` 提供的功能,完成后缀表达式的求值工作。 45 | 46 | # 演示 47 | 为了更好的验证和演示如何使用该项目,这里提供了几个示例 48 | 49 | ## 算术计算器 50 | 求解四则运算(`+`、`-`、`*`、`、`、`(`、`)`)的算术表达式语言的解析器。项目位于 [demo/arithmetic_calculator](demo/arithmetic_calculator), 内含详细的说明文档。 51 | 52 | ## 模板引擎 53 | 简单的模板渲染引擎,可以渲染诸如 `HELLO,{{ name }}` 的模板。项目位于 [demo/template_engine](demo/template_engine), 内含详细的说明文档。 54 | 55 | # Acknowledge & Credits 56 | http://hackingoff.com/compilers 57 | -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/__init__.py -------------------------------------------------------------------------------- /demo/arithmetic_calculator/README.md: -------------------------------------------------------------------------------- 1 | # 演示项目:算术计算器 2 | ## 流程 3 | ### Grammar 文件编写 4 | 本项目采用一种简化的 BNF 格式来表示语法规则。用户需要按照自己的语言的规则和编译器的限制(LL(1) 语法限制等),编写自己的语法规则文件。 5 | 6 | 本例中,需要实现的是一个算术计算器,语法文件位于 `calculator.mbnf` 7 | 8 | ### BNF 编译 9 | 本项目需要对使用 MBNF (.mbnf)格式进行解析,并按照 LL(1) 解析器生成包含 `FirstSet`、`FollowSet` 和 `Lookahead` 等信息的编译器辅助文件。 10 | 11 | 本例中,执行 `ll1_grammer_generator.py` 脚本则可以读入 `calculator.mbnf` 文件,进行 LL(1) 语法检查并生成编译器辅助文件 `calculator.yaml`。 12 | 13 | ### 构建自定义解析器 14 | 编译器只能构建执行流程,具体的业务逻辑,需要用户按照语法规则构建一系列语法单元解析函数,完成具体的业务功能。 15 | 16 | 本例中,使用了基于后缀表达式(逆波兰表达式)来表示计算的中间结果。具体见文件 `user_level_parser.py` 17 | 18 | ### 执行解释器 19 | 在编译器辅助信息(存储在 `calculator.yaml` 文件中)的帮助下,编译器会按照拓扑排序的方法依次执行用户解析器中的方法。用户的解析器负责最终生成一个基于后缀表达式(逆波兰表达式)的中间结果。 20 | 最后这个后缀表达式将会在 `MicroCompiler/postfix_expression/evaluator.py` 中得到执行,输出最终结果。 21 | 22 | ## 运行演示程序 23 | 在本目录里执行 `python ./main.py` 就可以执行演示程序。演示程序将演示如何计算 `6 * (2 + 2)` 的值。 24 | 25 | ## 抽象语法树 26 | 为了更好的展示语法的执行过程,本例提供了一个由程序(`python ./main.py`)自动生成的图文件: `calculator.graphml`, 该文件可以利用 `Cytoscape` 等图处理程序打开,并观察其中的依赖关系。 27 | 28 | 本例中计算 `6 * (2 + 2)` 的值过程可以通过以下抽象语法树表达: 29 | 30 | ![](calculator.png) 31 | 32 | ## 代码测试 33 | 执行 `python ./tests.py` 就可以运行测试案例。具体的测试案例请查看文件 `test_cases.py`. 34 | 35 | ## 限制 36 | 由于词法分析部分没有完成,所以目前只能让用户用手动构建词法分析结果的方式进行输入。 -------------------------------------------------------------------------------- /demo/arithmetic_calculator/arithmetic_calculator.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import SkeletonParser 2 | from MicroCompiler.parser_evaluator import ParserEvaluator 3 | from MicroCompiler.postfix_expression.evaluator import Evaluator 4 | from MicroCompiler.parser_evaluator_builder import \ 5 | build_parser_evaluator 6 | 7 | 8 | def arithmetic_calculator(grammar_file, token_list, user_level_parser, graph_file=None): 9 | sp = SkeletonParser(grammar_file, token_list) 10 | sp.parse() 11 | 12 | topological_ordered_list = build_parser_evaluator(sp.call_stack, graph_file) 13 | 14 | parser_evaluator = ParserEvaluator(user_level_parser) 15 | postfix_expr = parser_evaluator.eval(topological_ordered_list) 16 | 17 | evaluator = Evaluator(postfix_expr) 18 | result = evaluator.eval() 19 | 20 | return result 21 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/calculator.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/calculator.mbnf: -------------------------------------------------------------------------------- 1 | Goal -> Expr ; 2 | Expr -> Term ExprTwo ; 3 | ExprTwo -> '+' Term ExprTwo 4 | | '-' Term ExprTwo 5 | | ϵ ; 6 | Term -> Factor TermTwo ; 7 | TermTwo -> '*' Factor TermTwo 8 | | '/' Factor TermTwo 9 | | ϵ ; 10 | Factor -> '(' Expr ')' 11 | | 'num' ; -------------------------------------------------------------------------------- /demo/arithmetic_calculator/calculator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/arithmetic_calculator/calculator.png -------------------------------------------------------------------------------- /demo/arithmetic_calculator/calculator.yaml: -------------------------------------------------------------------------------- 1 | eof-marker: 2 | error-marker: -- 3 | non-terminals: 4 | - Expr 5 | - Term 6 | - Goal 7 | - Factor 8 | - ExprTwo 9 | - TermTwo 10 | productions: 11 | 0: 12 | Goal: 13 | - Expr 14 | 1: 15 | Expr: 16 | - Term 17 | - ExprTwo 18 | 2: 19 | ExprTwo: 20 | - + 21 | - Term 22 | - ExprTwo 23 | 3: 24 | ExprTwo: 25 | - '-' 26 | - Term 27 | - ExprTwo 28 | 4: 29 | ExprTwo: [] 30 | 5: 31 | Term: 32 | - Factor 33 | - TermTwo 34 | 6: 35 | TermTwo: 36 | - '*' 37 | - Factor 38 | - TermTwo 39 | 7: 40 | TermTwo: 41 | - / 42 | - Factor 43 | - TermTwo 44 | 8: 45 | TermTwo: [] 46 | 9: 47 | Factor: 48 | - ( 49 | - Expr 50 | - ) 51 | 10: 52 | Factor: 53 | - num 54 | start-symbol: Goal 55 | table: 56 | Expr: 57 | (: 1 58 | ): -- 59 | '*': -- 60 | +: -- 61 | '-': -- 62 | /: -- 63 | : -- 64 | num: 1 65 | ExprTwo: 66 | (: -- 67 | ): 4 68 | '*': -- 69 | +: 2 70 | '-': 3 71 | /: -- 72 | : 4 73 | num: -- 74 | Factor: 75 | (: 9 76 | ): -- 77 | '*': -- 78 | +: -- 79 | '-': -- 80 | /: -- 81 | : -- 82 | num: 10 83 | Goal: 84 | (: 0 85 | ): -- 86 | '*': -- 87 | +: -- 88 | '-': -- 89 | /: -- 90 | : -- 91 | num: 0 92 | Term: 93 | (: 5 94 | ): -- 95 | '*': -- 96 | +: -- 97 | '-': -- 98 | /: -- 99 | : -- 100 | num: 5 101 | TermTwo: 102 | (: -- 103 | ): 8 104 | '*': 6 105 | +: 8 106 | '-': 8 107 | /: 7 108 | : 8 109 | num: -- 110 | terminals: 111 | - ( 112 | - '-' 113 | - '*' 114 | - + 115 | - / 116 | - num 117 | - ) 118 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/ll1_grammer_generator.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.ParserGenerator.Generator import Generator 2 | 3 | g = Generator("calculator.mbnf") 4 | g.generate() 5 | 6 | g.write_yaml("calculator.yaml") 7 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/main.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | from MicroCompiler.ParserGenerator.Generator import Generator 4 | from MicroCompiler.SkeletonParser import Token 5 | from demo.arithmetic_calculator.arithmetic_calculator import arithmetic_calculator 6 | from demo.arithmetic_calculator.user_level_parser import Parser 7 | 8 | user_level_parser = Parser() 9 | 10 | 11 | def main(token_list): 12 | g = Generator("calculator.mbnf") 13 | g.generate() 14 | g.write_yaml("calculator.yaml") 15 | 16 | result = arithmetic_calculator("calculator.yaml", token_list, user_level_parser, "calculator.graphml") 17 | print(result) 18 | 19 | 20 | if __name__ == "__main__": 21 | # equal to: 6 * (2 + 2) 22 | token_list = [ 23 | Token("num", 6), 24 | Token("*", operator.mul), 25 | Token("("), 26 | Token("num", 2), 27 | Token("+", operator.add), 28 | Token("num", 2), 29 | Token(")"), 30 | Token(""), 31 | ] 32 | 33 | main(token_list) 34 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/main_with_lexer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from MicroCompiler.ParserGenerator.Generator import Generator 4 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken 5 | from demo.arithmetic_calculator.arithmetic_calculator import arithmetic_calculator 6 | from demo.arithmetic_calculator.user_level_parser import Parser 7 | 8 | user_level_parser = Parser() 9 | 10 | from MicroCompiler.lexer.user_level_lexer_define import lexer_define 11 | 12 | from MicroCompiler.lexer.lexer import lex_analysis 13 | 14 | current_dir = os.path.dirname(os.path.realpath(__file__)) 15 | 16 | bnf_file = os.path.join(current_dir, "calculator.mbnf") 17 | ll1_grammar_file = os.path.join(current_dir, "calculator.yaml") 18 | graph_file = os.path.join(current_dir, "calculator.graphml") 19 | 20 | 21 | def main(input_string): 22 | raw_token_list = [i[1] for i in lex_analysis(input_string, lexer_define)] 23 | # remote whitespace token 24 | token_list = list(filter(lambda x: not isinstance(x, WhiteSpaceToken), raw_token_list)) 25 | # append EOF token 26 | token_list.append(Token("")) 27 | 28 | g = Generator(bnf_file) 29 | g.generate() 30 | g.write_yaml(ll1_grammar_file) 31 | 32 | result = arithmetic_calculator(ll1_grammar_file, token_list, user_level_parser, graph_file) 33 | 34 | return result 35 | 36 | 37 | if __name__ == "__main__": 38 | input_string = "2+3 * 6" 39 | 40 | result = main(input_string) 41 | 42 | print(result) 43 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/output.yaml: -------------------------------------------------------------------------------- 1 | eof-marker: 2 | error-marker: -- 3 | non-terminals: [ExprTwo, Term, TermTwo, Factor, Goal, Expr] 4 | productions: 5 | 0: 6 | ExprTwo: [+, Term, ExprTwo] 7 | 1: 8 | ExprTwo: ['-', Term, ExprTwo] 9 | 2: 10 | ExprTwo: [] 11 | 3: 12 | Goal: [Expr] 13 | 4: 14 | Term: [Factor, TermTwo] 15 | 5: 16 | Factor: [(, Expr, )] 17 | 6: 18 | Factor: [num] 19 | 7: 20 | Factor: [name] 21 | 8: 22 | Expr: [Term, ExprTwo] 23 | 9: 24 | TermTwo: ['*', Factor, TermTwo] 25 | 10: 26 | TermTwo: [/, Factor, TermTwo] 27 | 11: 28 | TermTwo: [] 29 | start-symbol: Goal 30 | table: 31 | Expr: {(: 8, ): --, '*': --, +: --, '-': --, /: --, : --, name: 8, num: 8} 32 | ExprTwo: {(: --, ): 2, '*': --, +: 0, '-': 1, /: --, : 2, name: --, num: --} 33 | Factor: {(: 5, ): --, '*': --, +: --, '-': --, /: --, : --, name: 7, num: 6} 34 | Goal: {(: 3, ): --, '*': --, +: --, '-': --, /: --, : --, name: 3, num: 3} 35 | Term: {(: 4, ): --, '*': --, +: --, '-': --, /: --, : --, name: 4, num: 4} 36 | TermTwo: {(: --, ): 11, '*': 9, +: 11, '-': 11, /: 10, : 11, name: --, num: --} 37 | terminals: [+, '-', (, '*', /, name, ), num] 38 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/terminal_user_interface.py: -------------------------------------------------------------------------------- 1 | from demo.arithmetic_calculator.main_with_lexer import main 2 | 3 | # Continue while true. 4 | while True: 5 | # Get input. 6 | print("> ", end="") 7 | value = input() 8 | 9 | # Break if user types q. 10 | if value == "q": 11 | break 12 | 13 | # echo value. 14 | print("You typed: ", value) 15 | 16 | result = main(value) 17 | 18 | # print result 19 | print("Result: ", result) 20 | 21 | # Exit message. 22 | print("You quit.") 23 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/test_cases.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import Token 2 | import operator 3 | 4 | test_cases = ( 5 | ( 6 | [ 7 | Token("num", 6), 8 | Token("+", operator.add), 9 | Token("num", 2), 10 | Token(""), 11 | ], 12 | 8, 13 | ), 14 | ( 15 | [ 16 | Token("num", 6), 17 | Token("+", operator.add), 18 | Token("num", 2), 19 | Token("+", operator.add), 20 | Token("num", 2), 21 | Token(""), 22 | ], 23 | 10, 24 | ), 25 | ( 26 | [ 27 | Token("num", 6), 28 | Token("-", operator.sub), 29 | Token("num", 2), 30 | Token(""), 31 | ], 32 | 4, 33 | ), 34 | ( 35 | [ 36 | Token("num", 6), 37 | Token("-", operator.sub), 38 | Token("num", 2), 39 | Token("-", operator.sub), 40 | Token("num", 2), 41 | Token(""), 42 | ], 43 | 2, 44 | ), 45 | ( 46 | [ 47 | Token("num", 6), 48 | Token("/", operator.truediv), 49 | Token("num", 2), 50 | Token(""), 51 | ], 52 | 3.0, 53 | ), 54 | ( 55 | [ 56 | Token("num", 12), 57 | Token("/", operator.truediv), 58 | Token("num", 6), 59 | Token("/", operator.truediv), 60 | Token("num", 2), 61 | Token(""), 62 | ], 63 | 1, 64 | ), 65 | ( 66 | [ 67 | Token("num", 6), 68 | Token("*", operator.mul), 69 | Token("num", 2), 70 | Token(""), 71 | ], 72 | 12, 73 | ), 74 | ( 75 | [ 76 | Token("num", 3), 77 | Token("*", operator.mul), 78 | Token("num", 6), 79 | Token("*", operator.mul), 80 | Token("num", 2), 81 | Token(""), 82 | ], 83 | 36, 84 | ), 85 | ( 86 | [ 87 | Token("num", 6), 88 | Token("+", operator.add), 89 | Token("num", 2), 90 | Token("/", operator.truediv), 91 | Token("num", 2), 92 | Token(""), 93 | ], 94 | 7, 95 | ), 96 | ( 97 | [ 98 | Token("num", 6), 99 | Token("/", operator.truediv), 100 | Token("num", 2), 101 | Token("+", operator.add), 102 | Token("num", 2), 103 | Token(""), 104 | ], 105 | 5, 106 | ), 107 | ( 108 | [ 109 | Token("num", 6), 110 | Token("+", operator.add), 111 | Token("num", 2), 112 | Token("*", operator.mul), 113 | Token("num", 2), 114 | Token(""), 115 | ], 116 | 10, 117 | ), 118 | ( 119 | [ 120 | Token("num", 6), 121 | Token("*", operator.mul), 122 | Token("num", 2), 123 | Token("+", operator.add), 124 | Token("num", 2), 125 | Token(""), 126 | ], 127 | 14, 128 | ), 129 | ( 130 | [ 131 | Token("num", 6), 132 | Token("*", operator.mul), 133 | Token("("), 134 | Token("num", 2), 135 | Token("+", operator.add), 136 | Token("num", 2), 137 | Token(")"), 138 | Token(""), 139 | ], 140 | 24, 141 | ), 142 | ( 143 | [ 144 | Token("("), 145 | Token("num", 2), 146 | Token("+", operator.add), 147 | Token("num", 2), 148 | Token(")"), 149 | Token("*", operator.mul), 150 | Token("num", 6), 151 | Token(""), 152 | ], 153 | 24, 154 | ), 155 | ( 156 | [ 157 | Token("("), 158 | Token("num", 2), 159 | Token("+", operator.add), 160 | Token("num", 2), 161 | Token("+", operator.add), 162 | Token("num", 2), 163 | Token(")"), 164 | Token("*", operator.mul), 165 | Token("num", 6), 166 | Token(""), 167 | ], 168 | 36, 169 | ), 170 | ( 171 | [ 172 | Token("("), 173 | Token("num", 2), 174 | Token("+", operator.add), 175 | Token("num", 2), 176 | Token("/", operator.truediv), 177 | Token("num", 2), 178 | Token(")"), 179 | Token("*", operator.mul), 180 | Token("num", 6), 181 | Token(""), 182 | ], 183 | 18, 184 | ), 185 | ( 186 | [ 187 | Token("("), 188 | Token("num", 2), 189 | Token("+", operator.add), 190 | Token("num", 2), 191 | Token("/", operator.truediv), 192 | Token("num", 2), 193 | Token(")"), 194 | Token("*", operator.mul), 195 | Token("num", 6), 196 | Token("/", operator.truediv), 197 | Token("("), 198 | Token("num", 2), 199 | Token("+", operator.add), 200 | Token("num", 2), 201 | Token("*", operator.mul), 202 | Token("num", 2), 203 | Token(")"), 204 | Token(""), 205 | ], 206 | 3, 207 | ), 208 | ) 209 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/tests.py: -------------------------------------------------------------------------------- 1 | from demo.arithmetic_calculator.arithmetic_calculator import \ 2 | arithmetic_calculator 3 | from demo.arithmetic_calculator.test_cases import test_cases 4 | from demo.arithmetic_calculator.user_level_parser import Parser 5 | 6 | user_level_parser = Parser() 7 | 8 | for index, (token_list, expected_result) in enumerate(test_cases): 9 | print("working on: ", token_list) 10 | result = arithmetic_calculator("calculator.yaml", token_list, user_level_parser) 11 | if result != expected_result: 12 | print("test failed: at #", index) 13 | print(token_list, result) 14 | break 15 | -------------------------------------------------------------------------------- /demo/arithmetic_calculator/user_level_parser.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import Epsilon 2 | from MicroCompiler.parser_builder import ParserBuilder 3 | from MicroCompiler.postfix_expression.operator import PythonBuiltinOperator 4 | 5 | pb = ParserBuilder() 6 | pb.add_generator("get_fallback_method", "fallback") 7 | clazz = pb.generate() 8 | 9 | 10 | class Parser(clazz): 11 | def __init__(self): 12 | self.post_expr = [] 13 | 14 | self.method_name_mapping = { 15 | "(": "open_parenthesis", 16 | ")": "close_parenthesis", 17 | "/": "Division", 18 | "*": "Mul", 19 | "+": "Add", 20 | "-": "Sub", 21 | "": "Start", 22 | } 23 | 24 | def fallback(self, input_): 25 | return input_ 26 | 27 | def flat_list(self, nested_list): 28 | result = [] 29 | 30 | for item in nested_list: 31 | if isinstance(item, list): 32 | flat_item = self.flat_list(item) 33 | result.extend(flat_item) 34 | else: 35 | result.append(item) 36 | 37 | return result 38 | 39 | def flat_nested_postfix_list(self, nested_postfix_list): 40 | return self.flat_list(nested_postfix_list) 41 | 42 | def get_legal_method_name(self, method_name): 43 | return ( 44 | self.method_name_mapping[method_name] 45 | if method_name in self.method_name_mapping 46 | else method_name 47 | ) 48 | 49 | def Division(self, input_): 50 | return PythonBuiltinOperator(input_[0], 2) 51 | 52 | def Mul(self, input_): 53 | return PythonBuiltinOperator(input_[0], 2) 54 | 55 | def Add(self, input_): 56 | return PythonBuiltinOperator(input_[0], 2) 57 | 58 | def Sub(self, input_): 59 | return PythonBuiltinOperator(input_[0], 2) 60 | 61 | def num(self, input_): 62 | return input_[0] 63 | 64 | def ExprTwo(self, input_): 65 | # ExprTwo -> '+' Term ExprTwo 66 | # | '-' Term ExprTwo 67 | # | ϵ ; 68 | 69 | if len(input_) == 1: 70 | # ExprTwo -> ϵ 71 | return Epsilon() 72 | 73 | if isinstance(input_[2], Epsilon): 74 | # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo 75 | # | | 76 | # -> ϵ -> ϵ 77 | return [input_[0], [input_[1]]] 78 | 79 | if isinstance(input_[2], list): 80 | # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo 81 | # | | 82 | # -> ['+' num] -> ['-' num] 83 | 84 | postfix_expr = input_[2][1] 85 | 86 | postfix_expr = postfix_expr[:] # shallow copy 87 | 88 | head = postfix_expr.pop(0) 89 | operator = input_[2][0] 90 | 91 | postfix_expr.insert(0, operator) 92 | postfix_expr.insert(0, head) 93 | postfix_expr.insert(0, input_[1]) 94 | 95 | return [input_[0], postfix_expr] 96 | 97 | def TermTwo(self, input_): 98 | # TermTwo -> '*' Factor TermTwo 99 | # | '/' Factor TermTwo 100 | # | ϵ ; 101 | 102 | if len(input_) == 1: 103 | # ExprTwo -> ϵ 104 | return Epsilon() 105 | 106 | if isinstance(input_[2], Epsilon): 107 | # ExprTwo -> '+' Factor TermTwo | '-' Factor TermTwo 108 | # | | 109 | # -> ϵ -> ϵ 110 | # 111 | # output: ['*', postfix_expr] 112 | # |----------| 113 | # type: list 114 | 115 | return [input_[0], [input_[1]]] 116 | 117 | if isinstance(input_[2], list): 118 | # TermTwo -> '*' Factor TermTwo | '/' Factor TermTwo 119 | # | | 120 | # -> ['/' num] -> ['*' num] 121 | 122 | # input_: ['*', Factor, ['/', [head, rest_of_postfix_expr]]] 123 | # |---------------------------| 124 | # post_expr (type: list) 125 | # 126 | # output: ['*', [Factor, head, '/', rest_of_postfix_expr]] 127 | 128 | postfix_expr = input_[2][1] 129 | 130 | postfix_expr = postfix_expr[:] # shallow copy 131 | 132 | head = postfix_expr.pop(0) 133 | operator = input_[2][0] 134 | 135 | postfix_expr.insert(0, operator) 136 | postfix_expr.insert(0, head) 137 | postfix_expr.insert(0, input_[1]) 138 | 139 | return [input_[0], postfix_expr] 140 | 141 | def Factor(self, input_): 142 | if len(input_) == 1: 143 | return input_[0] 144 | 145 | if len(input_) == 3: 146 | return input_[1] 147 | 148 | def Term(self, input_): 149 | if isinstance(input_[1], Epsilon): 150 | # Term -> Factor TermTwo ; 151 | # | 152 | # -> ϵ 153 | return input_[0] 154 | 155 | # Term -> Factor TermTwo ; 156 | # | 157 | # -> ['*', postfix_expr] 158 | 159 | # input_: [Factor, ['/', [head, rest_of_postfix_expr]]] 160 | # |---------------------------| 161 | # post_expr (type: list) 162 | # 163 | # output: [Factor, head, '/', rest_of_postfix_expr] 164 | 165 | postfix_expr = input_[1][1] 166 | 167 | postfix_expr = postfix_expr[:] # shallow copy 168 | 169 | head = postfix_expr.pop(0) 170 | operator = input_[1][0] 171 | 172 | postfix_expr.insert(0, operator) 173 | postfix_expr.insert(0, head) 174 | postfix_expr.insert(0, input_[0]) 175 | 176 | return postfix_expr 177 | 178 | def Expr(self, input_): 179 | if isinstance(input_[1], Epsilon): 180 | # Expr -> Term ExprTwo ; 181 | # | 182 | # -> ϵ 183 | return input_[0] 184 | 185 | # Expr -> Term ExprTwo ; 186 | # | 187 | # -> [operator, postfix_expr] 188 | 189 | # input_: [Factor, ['/', [head, rest_of_postfix_expr]]] 190 | # |---------------------------| 191 | # post_expr (type: list) 192 | # 193 | # output: [Factor, head, '/', rest_of_postfix_expr] 194 | postfix_expr = input_[1][1] 195 | 196 | postfix_expr = postfix_expr[:] # shallow copy 197 | 198 | head = postfix_expr.pop(0) 199 | 200 | operator = input_[1][0] 201 | 202 | postfix_expr.insert(0, operator) 203 | postfix_expr.insert(0, head) 204 | postfix_expr.insert(0, input_[0]) 205 | 206 | return postfix_expr 207 | 208 | def Goal(self, input_): 209 | expr = input_[0] 210 | flat_postfix_expr = self.flat_nested_postfix_list(expr) 211 | 212 | return flat_postfix_expr 213 | 214 | def Start(self, input_): 215 | return input_[0] 216 | -------------------------------------------------------------------------------- /demo/template_engine/.gitignore: -------------------------------------------------------------------------------- 1 | syntax.graphml 2 | syntax.yaml -------------------------------------------------------------------------------- /demo/template_engine/README.md: -------------------------------------------------------------------------------- 1 | # 演示项目:模版引擎 2 | 3 | ## 最终目标 4 | 实现类似于 jinja (https://github.com/pallets/jinja) 或者 inja (https://github.com/pantor/inja) 的模板系统。 5 | 6 | ## 当前进度 7 | * 实现了变量替换 8 | 9 | ## 使用示例 10 | 11 | ```python 12 | from demo.template_engine.render_with_string import render_with_string 13 | 14 | result = render_with_string("HELLO,{{ name }}", {"name": "Xiaoquan"}) 15 | print(result) 16 | ``` 17 | 18 | 输出 19 | 20 | ```text 21 | HELLO,Xiaoquan 22 | ``` 23 | 24 | ## 测试 25 | 见 `render_with_string.py` 和 `render_with_tokens.py` -------------------------------------------------------------------------------- /demo/template_engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/template_engine/__init__.py -------------------------------------------------------------------------------- /demo/template_engine/render_engine.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import SkeletonParser 2 | from MicroCompiler.parser_evaluator import ParserEvaluator 3 | from MicroCompiler.parser_evaluator_builder import build_parser_evaluator 4 | 5 | 6 | def render_engine(grammar_file, token_list, user_level_parser, graph_file=None): 7 | sp = SkeletonParser(grammar_file, token_list) 8 | sp.parse() 9 | 10 | topological_ordered_list = build_parser_evaluator(sp.call_stack, graph_file) 11 | 12 | parser_evaluator = ParserEvaluator(user_level_parser) 13 | result = parser_evaluator.eval(topological_ordered_list) 14 | 15 | return result 16 | -------------------------------------------------------------------------------- /demo/template_engine/render_with_string.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken 2 | from MicroCompiler.lexer.lexer import lex_analysis 3 | from demo.template_engine.render_with_tokens import render_with_tokens 4 | from demo.template_engine.user_level_lexer_define import lexer_define 5 | 6 | 7 | def render_with_string(input_string, data): 8 | raw_token_list = [i[1] for i in lex_analysis(input_string, lexer_define)] 9 | # remote whitespace token 10 | token_list = list( 11 | filter(lambda x: not isinstance(x, WhiteSpaceToken), raw_token_list) 12 | ) 13 | # append EOF token 14 | token_list.append(Token("")) 15 | 16 | return render_with_tokens(token_list, data) 17 | 18 | 19 | if __name__ == "__main__": 20 | input_string = "HELLO,{{ name }}" 21 | 22 | result = render_with_string(input_string, {"name": "Xiaoquan"}) 23 | 24 | print(result) 25 | -------------------------------------------------------------------------------- /demo/template_engine/render_with_tokens.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.ParserGenerator.Generator import Generator 2 | from MicroCompiler.SkeletonParser import Token 3 | from demo.template_engine.render_engine import render_engine 4 | from demo.template_engine.user_level_parser import Parser 5 | 6 | 7 | def render_with_tokens(token_list, data): 8 | # BNF to LL1 9 | g = Generator("syntax.mbnf") 10 | g.generate() 11 | g.write_yaml("syntax.yaml") 12 | 13 | # Node walker 14 | user_level_parser = Parser(data) 15 | 16 | return render_engine( 17 | "syntax.yaml", token_list, user_level_parser, "syntax.graphml" 18 | ) 19 | 20 | 21 | if __name__ == "__main__": 22 | # equal to: `Hello, {{ name }}` 23 | token_list = [ 24 | Token("const", "Hello, "), 25 | Token("{{", None), 26 | Token("var", "name"), 27 | Token("}}", None), 28 | Token(""), 29 | ] 30 | 31 | result = render_with_tokens(token_list, {"name": "Xiaoquan"}) 32 | print(result) 33 | -------------------------------------------------------------------------------- /demo/template_engine/syntax.mbnf: -------------------------------------------------------------------------------- 1 | Goal -> Expr ; 2 | Expr -> Term TermPlus 3 | | ϵ ; 4 | TermPlus -> Term TermPlus 5 | | ϵ ; 6 | Term -> 'const' 7 | | Block ; 8 | Block -> VarBlock ; 9 | VarBlock -> '{{' 'var' '}}' ; 10 | -------------------------------------------------------------------------------- /demo/template_engine/user_level_lexer_define.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken 2 | 3 | lexer_define = [ 4 | # token type, token regex, token action 5 | [ 6 | "var", 7 | ( 8 | r"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)+" 9 | r"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|0|1|2|3|4|5|6|7|8|9)*" 10 | ), 11 | lambda x: Token("var", str(x)), 12 | ], 13 | [ 14 | "const", 15 | r"(A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|,)+", 16 | lambda x: Token("const", str(x)), 17 | ], 18 | ["{{", "{{", lambda x: Token("{{", None)], 19 | ["}}", "}}", lambda x: Token("}}", None)], 20 | ["white space", r" +", lambda x: WhiteSpaceToken(x)], 21 | ] 22 | -------------------------------------------------------------------------------- /demo/template_engine/user_level_parser.py: -------------------------------------------------------------------------------- 1 | from MicroCompiler.parser_builder import ParserBuilder 2 | 3 | pb = ParserBuilder() 4 | pb.add_generator("get_fallback_method", "fallback") 5 | clazz = pb.generate() 6 | 7 | 8 | class Parser(clazz): 9 | def __init__(self, data: dict): 10 | self.method_name_mapping = { 11 | "": "Start", 12 | } 13 | self.data = data 14 | 15 | def fallback(self, input_): 16 | return "" 17 | 18 | def get_legal_method_name(self, method_name): 19 | return ( 20 | self.method_name_mapping[method_name] 21 | if method_name in self.method_name_mapping 22 | else method_name 23 | ) 24 | 25 | def const(self, input_): 26 | return input_[0] 27 | 28 | def var(self, input_): 29 | key = input_[0] 30 | return self.data[key] 31 | 32 | def VarBlock(self, input_): 33 | return input_[1] 34 | 35 | def Block(self, input_): 36 | return input_[0] 37 | 38 | def TermPlus(self, input_): 39 | if len(input_) == 1: 40 | return "" 41 | else: 42 | return "".join(input_) 43 | 44 | def Term(self, input_): 45 | return input_[0] 46 | 47 | def Expr(self, input_): 48 | if len(input_) == 1: 49 | return "" 50 | else: 51 | return "".join(input_) 52 | 53 | def Goal(self, input_): 54 | return input_[0] 55 | 56 | def Start(self, input_): 57 | return input_[0] 58 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # 参考文献 2 | * https://interpreterbook.com/ 3 | * http://craftinginterpreters.com/ 4 | * https://compilerbook.com/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYMAL -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="MicroCompiler", 5 | version="0.0.1", 6 | packages=[ 7 | "MicroCompiler", 8 | "MicroCompiler.Lookahead", 9 | "MicroCompiler.ParserGenerator", 10 | ], 11 | url="https://github.com/howl-anderson/MicroCompiler", 12 | license="MIT", 13 | author="Xiaoquan Kong", 14 | install_requires=["pyyaml", "MicroRegEx"], 15 | author_email="u1mail2me@gmail.com", 16 | description="A micro compiler project to provide LL/LR/LALR syntax parser", 17 | ) 18 | --------------------------------------------------------------------------------