├── .gitignore
├── MicroCompiler
    ├── LL1Parser.py
    ├── Lookahead
    │   ├── EOF.py
    │   ├── Epsilon.py
    │   ├── FirstPlusSet.py
    │   ├── FirstPlusSet_test.py
    │   ├── FirstSet.py
    │   ├── FirstSet_test.py
    │   ├── FollowSet.py
    │   ├── FollowSet_test.py
    │   ├── NonTerminal.py
    │   ├── SymbolSet.py
    │   ├── Terminal.py
    │   ├── TranslationTable.py
    │   └── __init__.py
    ├── ParserGenerator
    │   ├── Generator.py
    │   ├── Generator_test.py
    │   ├── Lexeme.py
    │   ├── Lexer.py
    │   ├── Lexer_test.py
    │   ├── Parser.py
    │   ├── Parser_test.py
    │   ├── __init__.py
    │   ├── calculator.mbnf
    │   └── sample.mbnf
    ├── Productions.py
    ├── Productions_test.py
    ├── SkeletonParser.py
    ├── SkeletonParser_test.py
    ├── __init__.py
    ├── abstract_syntax_tree
    │   ├── __init__.py
    │   ├── abstract_syntax_tree.py
    │   ├── abstract_syntax_tree_test.py
    │   └── node.py
    ├── lexer
    │   ├── README.md
    │   ├── __init__.py
    │   ├── demo.py
    │   ├── lexer.py
    │   └── user_level_lexer_define.py
    ├── output.yaml
    ├── parser_builder.py
    ├── parser_evaluator.py
    ├── parser_evaluator_builder.py
    ├── postfix_expression
    │   ├── __init__.py
    │   ├── evaluator.py
    │   └── operator.py
    └── sample.yaml
├── README.md
├── demo
    ├── __init__.py
    ├── arithmetic_calculator
    │   ├── README.md
    │   ├── arithmetic_calculator.py
    │   ├── calculator.graphml
    │   ├── calculator.mbnf
    │   ├── calculator.png
    │   ├── calculator.yaml
    │   ├── ll1_grammer_generator.py
    │   ├── main.py
    │   ├── main_with_lexer.py
    │   ├── output.yaml
    │   ├── terminal_user_interface.py
    │   ├── test_cases.py
    │   ├── tests.py
    │   └── user_level_parser.py
    └── template_engine
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── render_engine.py
    │   ├── render_with_string.py
    │   ├── render_with_tokens.py
    │   ├── syntax.mbnf
    │   ├── user_level_lexer_define.py
    │   └── user_level_parser.py
├── docs
    └── README.md
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | ### JetBrains template
106 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
107 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
108 | 
109 | # User-specific stuff:
110 | .idea/**/workspace.xml
111 | .idea/**/tasks.xml
112 | .idea/dictionaries
113 | 
114 | # Sensitive or high-churn files:
115 | .idea/**/dataSources/
116 | .idea/**/dataSources.ids
117 | .idea/**/dataSources.xml
118 | .idea/**/dataSources.local.xml
119 | .idea/**/sqlDataSources.xml
120 | .idea/**/dynamic.xml
121 | .idea/**/uiDesigner.xml
122 | 
123 | # Gradle:
124 | .idea/**/gradle.xml
125 | .idea/**/libraries
126 | 
127 | # CMake
128 | cmake-build-debug/
129 | 
130 | # Mongo Explorer plugin:
131 | .idea/**/mongoSettings.xml
132 | 
133 | ## File-based project format:
134 | *.iws
135 | 
136 | ## Plugin-specific files:
137 | 
138 | # IntelliJ
139 | out/
140 | 
141 | # mpeltonen/sbt-idea plugin
142 | .idea_modules/
143 | 
144 | # JIRA plugin
145 | atlassian-ide-plugin.xml
146 | 
147 | # Cursive Clojure plugin
148 | .idea/replstate.xml
149 | 
150 | # Crashlytics plugin (for Android Studio and IntelliJ)
151 | com_crashlytics_export_strings.xml
152 | crashlytics.properties
153 | crashlytics-build.properties
154 | fabric.properties
155 | ### Emacs template
156 | # -*- mode: gitignore; -*-
157 | *~
158 | \#*\#
159 | /.emacs.desktop
160 | /.emacs.desktop.lock
161 | *.elc
162 | auto-save-list
163 | tramp
164 | .\#*
165 | 
166 | # Org-mode
167 | .org-id-locations
168 | *_archive
169 | 
170 | # flymake-mode
171 | *_flymake.*
172 | 
173 | # eshell files
174 | /eshell/history
175 | /eshell/lastdir
176 | 
177 | # elpa packages
178 | /elpa/
179 | 
180 | # reftex files
181 | *.rel
182 | 
183 | # AUCTeX auto folder
184 | /auto/
185 | 
186 | # cask packages
187 | .cask/
188 | dist/
189 | 
190 | # Flycheck
191 | flycheck_*.el
192 | 
193 | # server auth directory
194 | /server/
195 | 
196 | # projectiles files
197 | .projectile
198 | 
199 | # directory configuration
200 | .dir-locals.el
201 | ### Vim template
202 | # Swap
203 | [._]*.s[a-v][a-z]
204 | [._]*.sw[a-p]
205 | [._]s[a-v][a-z]
206 | [._]sw[a-p]
207 | 
208 | # Session
209 | Session.vim
210 | 
211 | # Temporary
212 | .netrwhist
213 | *~
214 | # Auto-generated tag files
215 | tags
216 | 
217 | .idea
218 | 
219 | 


--------------------------------------------------------------------------------
/MicroCompiler/LL1Parser.py:
--------------------------------------------------------------------------------
1 | class LL1Parser:
2 |     def __init__(self, translation_table, production, lexer_list):
3 |         self.translation_table = translation_table
4 |         self.lexer_list = lexer_list
5 | 
6 |     def match(self):
7 |         for lexer in self.lexer_list:
8 |             pass
9 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/EOF.py:
--------------------------------------------------------------------------------
 1 | class EOF:
 2 |     def __str__(self):
 3 |         return "<EOF>"
 4 | 
 5 |     @property
 6 |     def value(self):
 7 |         return "<EOF>"
 8 | 
 9 |     def __hash__(self):
10 |         return hash("<EOF>")
11 | 
12 |     def __eq__(self, other):
13 |         if not isinstance(other, self.__class__):
14 |             return False
15 |         return True
16 | 
17 |     def __repr__(self):
18 |         return "{}()".format(self.__class__.__name__)
19 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/Epsilon.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.Lookahead.Terminal import Terminal
 2 | 
 3 | 
 4 | class Epsilon:
 5 |     def __str__(self):
 6 |         return "ϵ"
 7 | 
 8 |     @property
 9 |     def value(self):
10 |         return "ϵ"
11 | 
12 |     def __hash__(self):
13 |         return hash("ϵ")
14 | 
15 |     def __eq__(self, other):
16 |         if not isinstance(other, self.__class__):
17 |             return False
18 |         return True
19 | 
20 |     def __repr__(self):
21 |         return "{}()".format(self.__class__.__name__)
22 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FirstPlusSet.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.Lookahead.FirstSet import FirstSet
 2 | from MicroCompiler.Lookahead.FollowSet import FollowSet
 3 | 
 4 | 
 5 | class FirstPlusSet:
 6 |     def __init__(self, production):
 7 |         self.first_set = None
 8 |         self.follow_set = None
 9 |         self.first_set_mapping = None
10 |         self.production = production
11 | 
12 |         self.first_plus_set = {}
13 |         self.first_plus_set_mapping = {}
14 | 
15 |     def compute(self):
16 |         if self.first_set is None:
17 |             fs = FirstSet(self.production)
18 |             fs.compute()
19 |             self.first_set = fs.first_set
20 |             self.first_set_mapping = fs.first_set_mapping
21 | 
22 |         if self.follow_set is None:
23 |             fs = FollowSet(self.production, self.first_set)
24 |             fs.compute()
25 |             self.follow_set = fs.follow_set
26 | 
27 |         for lhs_symbol in self.production:
28 |             productions = self.production[lhs_symbol]
29 |             for production_index, production in enumerate(productions):
30 |                 symbol_set = self.first_set_mapping[lhs_symbol][production_index]
31 | 
32 |                 self.first_plus_set_mapping.setdefault(lhs_symbol, {})
33 |                 self.first_plus_set_mapping[lhs_symbol].setdefault(
34 |                     production_index, set()
35 |                 )
36 |                 first_plus_set = self.first_plus_set_mapping[lhs_symbol][
37 |                     production_index
38 |                 ]
39 |                 if symbol_set.include_epsilon:
40 |                     first_plus_set.update(symbol_set.remove_epsilon())
41 |                     first_plus_set.update(self.follow_set[lhs_symbol])
42 |                 else:
43 |                     first_plus_set.update(symbol_set)
44 | 
45 |                 self.first_plus_set.setdefault(lhs_symbol, {})
46 |                 for symbol in first_plus_set:
47 |                     if symbol in self.first_plus_set[lhs_symbol]:
48 |                         msg = "Lookahead {} index {} already exists in {}"
49 |                         raise ValueError(
50 |                             msg.format(
51 |                                 symbol,
52 |                                 production_index,
53 |                                 self.first_plus_set[lhs_symbol],
54 |                             )
55 |                         )
56 |                     self.first_plus_set[lhs_symbol][symbol] = production_index
57 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FirstPlusSet_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pprint
  3 | 
  4 | from MicroCompiler.Productions import Productions
  5 | from MicroCompiler.Lookahead.Epsilon import Epsilon
  6 | from MicroCompiler.Lookahead.EOF import EOF
  7 | from MicroCompiler.Lookahead.FirstPlusSet import FirstPlusSet
  8 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
  9 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 10 | from MicroCompiler.Lookahead.Terminal import Terminal
 11 | 
 12 | 
 13 | class TestFirstPlusSet(unittest.TestCase):
 14 |     def test_conception(self):
 15 |         statement = NonTerminal("Statement")
 16 |         expression = NonTerminal("Expression")
 17 |         semicolon = Terminal(CHARACTER, ";")
 18 |         plus = Terminal(CHARACTER, "+")
 19 |         minus = Terminal(CHARACTER, "-")
 20 | 
 21 |         production = Productions(
 22 |             {statement: [[expression, semicolon]], expression: [[plus], [minus]]}
 23 |         )
 24 | 
 25 |         production.set_start_symbol(statement)
 26 | 
 27 |         fs = FirstPlusSet(production)
 28 |         fs.compute()
 29 | 
 30 |         real_result = fs.first_plus_set
 31 | 
 32 |         expect_result = {
 33 |             NonTerminal("Expression"): {
 34 |                 Terminal(CHARACTER, "+"): 0,
 35 |                 Terminal(CHARACTER, "-"): 1,
 36 |             },
 37 |             NonTerminal("Statement"): {
 38 |                 Terminal(CHARACTER, "+"): 0,
 39 |                 Terminal(CHARACTER, "-"): 0,
 40 |             },
 41 |         }
 42 | 
 43 |         self.assertEqual(real_result, expect_result)
 44 | 
 45 |     def test_epsilon(self):
 46 |         statement = NonTerminal("Statement")
 47 |         expression = NonTerminal("Expression")
 48 |         epsilon = Epsilon()
 49 |         semicolon = Terminal(CHARACTER, ";")
 50 |         plus = Terminal(CHARACTER, "+")
 51 |         minus = Terminal(CHARACTER, "-")
 52 | 
 53 |         production = Productions(
 54 |             {
 55 |                 statement: [[expression, semicolon]],
 56 |                 expression: [[plus], [minus], [epsilon]],
 57 |             }
 58 |         )
 59 | 
 60 |         production.set_start_symbol(statement)
 61 | 
 62 |         fs = FirstPlusSet(production)
 63 |         fs.compute()
 64 | 
 65 |         real_result = fs.first_plus_set
 66 | 
 67 |         expect_result = {
 68 |             NonTerminal("Statement"): {
 69 |                 Terminal(CHARACTER, "+"): 0,
 70 |                 Terminal(CHARACTER, "-"): 0,
 71 |                 Terminal(CHARACTER, ";"): 0,
 72 |             },
 73 |             NonTerminal("Expression"): {
 74 |                 Terminal(CHARACTER, "+"): 0,
 75 |                 Terminal(CHARACTER, "-"): 1,
 76 |                 Terminal(CHARACTER, ";"): 2,
 77 |             },
 78 |         }
 79 | 
 80 |         self.assertEqual(real_result, expect_result)
 81 | 
 82 |     def test_real(self):
 83 |         """
 84 |         Goal -> Expr ;
 85 |         Expr -> Term ExprTwo ;
 86 |         ExprTwo -> '+' Term ExprTwo
 87 |                  | '-' Term ExprTwo
 88 |                  | ϵ ;
 89 |         Term -> Factor TermTwo ;
 90 |         TermTwo -> '*' Factor TermTwo
 91 |                  | '/' Factor TermTwo
 92 |                  | ϵ ;
 93 |         Factor -> '(' Expr ')'
 94 |                 | 'num'
 95 |                 | 'name' ;
 96 |         """
 97 | 
 98 |         """
 99 |         Extended Backus-Naur form:
100 | 
101 |         Goal -> Expr
102 |         Expr -> Term ExprTwo
103 |         ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON
104 |         Term -> Factor TermTwo
105 |         TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON
106 |         Factor -> ( Expr ) | num | name
107 |         """
108 |         goal = NonTerminal("Goal")
109 |         expr = NonTerminal("Expr")
110 |         expr_two = NonTerminal("ExprTwo")
111 |         term = NonTerminal("Term")
112 |         term_two = NonTerminal("TermTwo")
113 |         factor = NonTerminal("Factor")
114 |         epsilon = Epsilon()
115 |         name = Terminal(CHARACTER, "name")
116 |         num = Terminal(CHARACTER, "num")
117 |         plus = Terminal(CHARACTER, "+")
118 |         minus = Terminal(CHARACTER, "-")
119 |         div = Terminal(CHARACTER, "/")
120 |         asteroid = Terminal(CHARACTER, "*")
121 |         open_parenthesis = Terminal(CHARACTER, "(")
122 |         close_parenthesis = Terminal(CHARACTER, ")")
123 |         eof = EOF()
124 | 
125 |         production = Productions(
126 |             {
127 |                 goal: [[expr]],
128 |                 expr: [[term, expr_two]],
129 |                 expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]],
130 |                 term: [[factor, term_two]],
131 |                 term_two: [
132 |                     [asteroid, factor, term_two],
133 |                     [div, factor, term_two],
134 |                     [epsilon],
135 |                 ],
136 |                 factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]],
137 |             }
138 |         )
139 | 
140 |         production.set_start_symbol(goal)
141 | 
142 |         fs = FirstPlusSet(production)
143 |         fs.compute()
144 | 
145 |         real_result = fs.first_plus_set
146 | 
147 |         expect_result = {
148 |             NonTerminal("Goal"): {
149 |                 Terminal(CHARACTER, "name"): 0,
150 |                 Terminal(CHARACTER, "num"): 0,
151 |                 Terminal(CHARACTER, "("): 0,
152 |             },
153 |             NonTerminal("Expr"): {
154 |                 Terminal(CHARACTER, "name"): 0,
155 |                 Terminal(CHARACTER, "num"): 0,
156 |                 Terminal(CHARACTER, "("): 0,
157 |             },
158 |             NonTerminal("ExprTwo"): {
159 |                 EOF(): 2,
160 |                 Terminal(CHARACTER, "+"): 0,
161 |                 Terminal(CHARACTER, "-"): 1,
162 |                 Terminal(CHARACTER, ")"): 2,
163 |             },
164 |             NonTerminal("Term"): {
165 |                 Terminal(CHARACTER, "name"): 0,
166 |                 Terminal(CHARACTER, "num"): 0,
167 |                 Terminal(CHARACTER, "("): 0,
168 |             },
169 |             NonTerminal("TermTwo"): {
170 |                 EOF(): 2,
171 |                 Terminal(CHARACTER, "+"): 2,
172 |                 Terminal(CHARACTER, "-"): 2,
173 |                 Terminal(CHARACTER, "/"): 1,
174 |                 Terminal(CHARACTER, "*"): 0,
175 |                 Terminal(CHARACTER, ")"): 2,
176 |             },
177 |             NonTerminal("Factor"): {
178 |                 Terminal(CHARACTER, "name"): 2,
179 |                 Terminal(CHARACTER, "num"): 1,
180 |                 Terminal(CHARACTER, "("): 0,
181 |             },
182 |         }
183 | 
184 |         self.assertEqual(real_result, expect_result)
185 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FirstSet.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from MicroCompiler.Productions import Productions
 4 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet
 5 | 
 6 | 
 7 | class FirstSet:
 8 |     def __init__(self, production: Productions):
 9 |         self.first_set = {}
10 |         self.first_set_table = {}
11 |         self.first_set_mapping = {}
12 |         self.production = production
13 | 
14 |     def compute(self):
15 |         # compute all terminal's first-set first
16 |         for symbol in self.production.terminals:
17 |             self.first_set[symbol] = SymbolSet({symbol})
18 | 
19 |         epsilon = self.production.epsilon
20 |         self.first_set[epsilon] = SymbolSet({epsilon})
21 | 
22 |         eof = self.production.eof
23 |         self.first_set[eof] = SymbolSet({eof})
24 | 
25 |         # init non-terminal's first-set to empty
26 |         for symbol in self.production.non_terminals:
27 |             self.first_set[symbol] = SymbolSet()
28 | 
29 |         old_first_set = copy.deepcopy(self.first_set)
30 | 
31 |         while True:
32 |             for symbol in self.production.non_terminals:
33 |                 self.compute_symbol(symbol)
34 |             if self.first_set == old_first_set:
35 |                 break
36 |             else:
37 |                 old_first_set = copy.deepcopy(self.first_set)
38 | 
39 |     def compute_symbol(self, lsh_symbol):
40 |         productions = self.production[lsh_symbol]
41 | 
42 |         for production_index, production in enumerate(productions):
43 |             rhs = SymbolSet()
44 |             for symbol_index, rhs_symbol in enumerate(production):
45 |                 if symbol_index != len(production) - 1:
46 |                     rhs.update(self.first_set[rhs_symbol].remove_epsilon())
47 |                 else:
48 |                     # keep epsilon if this is the last symbol in the production
49 |                     rhs.update(self.first_set[rhs_symbol])
50 | 
51 |                 if not self.first_set[rhs_symbol].include_epsilon:
52 |                     break
53 | 
54 |             self.first_set[lsh_symbol].update(rhs)
55 | 
56 |             self.first_set_mapping.setdefault(lsh_symbol, {})
57 |             self.first_set_mapping[lsh_symbol][production_index] = rhs
58 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FirstSet_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pprint
  3 | 
  4 | from MicroCompiler.Productions import Productions
  5 | from MicroCompiler.Lookahead.Epsilon import Epsilon
  6 | from MicroCompiler.Lookahead.EOF import EOF
  7 | from MicroCompiler.Lookahead.FirstSet import FirstSet
  8 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
  9 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 10 | from MicroCompiler.Lookahead.Terminal import Terminal
 11 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet
 12 | 
 13 | 
 14 | class TestFirstSet(unittest.TestCase):
 15 |     def test_conception(self):
 16 |         statement = NonTerminal("Statement")
 17 |         expression = NonTerminal("Expression")
 18 |         semicolon = Terminal(CHARACTER, ";")
 19 |         plus = Terminal(CHARACTER, "+")
 20 |         minus = Terminal(CHARACTER, "-")
 21 | 
 22 |         production = Productions(
 23 |             {statement: [[expression, semicolon]], expression: [[plus], [minus]]}
 24 |         )
 25 | 
 26 |         production.set_start_symbol(statement)
 27 | 
 28 |         fs = FirstSet(production)
 29 |         fs.compute()
 30 |         print(fs.first_set)
 31 | 
 32 |     def test_epsilon(self):
 33 |         statement = NonTerminal("Statement")
 34 |         expression = NonTerminal("Expression")
 35 |         epsilon = Epsilon()
 36 |         semicolon = Terminal(CHARACTER, ";")
 37 |         plus = Terminal(CHARACTER, "+")
 38 |         minus = Terminal(CHARACTER, "-")
 39 | 
 40 |         production = Productions(
 41 |             {
 42 |                 statement: [[expression, semicolon]],
 43 |                 expression: [[plus], [minus], [epsilon]],
 44 |             }
 45 |         )
 46 | 
 47 |         production.set_start_symbol(statement)
 48 | 
 49 |         fs = FirstSet(production)
 50 |         fs.compute()
 51 |         print(fs.first_set)
 52 | 
 53 |         print(fs.first_set_table)
 54 |         print(fs.first_set_mapping)
 55 | 
 56 |     def test_real(self):
 57 |         """
 58 |         Goal -> Expr ;
 59 |         Expr -> Term ExprTwo ;
 60 |         ExprTwo -> '+' Term ExprTwo
 61 |                  | '-' Term ExprTwo
 62 |                  | ϵ ;
 63 |         Term -> Factor TermTwo ;
 64 |         TermTwo -> '*' Factor TermTwo
 65 |                  | '/' Factor TermTwo
 66 |                  | ϵ ;
 67 |         Factor -> '(' Expr ')'
 68 |                 | 'num'
 69 |                 | 'name' ;
 70 |         """
 71 | 
 72 |         """
 73 |         Extended Backus-Naur form:
 74 | 
 75 |         Goal -> Expr
 76 |         Expr -> Term ExprTwo
 77 |         ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON
 78 |         Term -> Factor TermTwo
 79 |         TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON
 80 |         Factor -> ( Expr ) | num | name
 81 |         """
 82 |         goal = NonTerminal("Goal")
 83 |         expr = NonTerminal("Expr")
 84 |         expr_two = NonTerminal("ExprTwo")
 85 |         term = NonTerminal("Term")
 86 |         term_two = NonTerminal("TermTwo")
 87 |         factor = NonTerminal("Factor")
 88 |         epsilon = Epsilon()
 89 |         name = Terminal(CHARACTER, "name")
 90 |         num = Terminal(CHARACTER, "num")
 91 |         plus = Terminal(CHARACTER, "+")
 92 |         minus = Terminal(CHARACTER, "-")
 93 |         div = Terminal(CHARACTER, "/")
 94 |         asteroid = Terminal(CHARACTER, "*")
 95 |         open_parenthesis = Terminal(CHARACTER, "(")
 96 |         close_parenthesis = Terminal(CHARACTER, ")")
 97 |         eof = EOF()
 98 | 
 99 |         production = Productions(
100 |             {
101 |                 goal: [[expr]],
102 |                 expr: [[term, term_two]],
103 |                 expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]],
104 |                 term: [[factor, term_two]],
105 |                 term_two: [
106 |                     [asteroid, factor, term_two],
107 |                     [div, factor, term_two],
108 |                     [epsilon],
109 |                 ],
110 |                 factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]],
111 |             }
112 |         )
113 | 
114 |         production.set_start_symbol(goal)
115 | 
116 |         fs = FirstSet(production)
117 |         fs.compute()
118 |         real_result = fs.first_set
119 | 
120 |         expect_result = {
121 |             eof: SymbolSet({eof}),
122 |             plus: SymbolSet({plus}),
123 |             minus: SymbolSet({minus}),
124 |             epsilon: SymbolSet({epsilon}),
125 |             asteroid: SymbolSet({asteroid}),
126 |             div: SymbolSet({div}),
127 |             open_parenthesis: SymbolSet({open_parenthesis}),
128 |             close_parenthesis: SymbolSet({close_parenthesis}),
129 |             num: SymbolSet({num}),
130 |             name: SymbolSet({name}),
131 |             expr_two: SymbolSet({plus, minus, epsilon}),
132 |             term_two: SymbolSet({asteroid, div, epsilon}),
133 |             factor: SymbolSet({open_parenthesis, num, name}),
134 |             term: SymbolSet({open_parenthesis, num, name}),
135 |             expr: SymbolSet({open_parenthesis, num, name}),
136 |             goal: SymbolSet({open_parenthesis, num, name}),
137 |         }
138 | 
139 |         # pprint.pprint(real_result)
140 | 
141 |         self.maxDiff = None
142 |         self.assertEqual(real_result, expect_result)
143 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FollowSet.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 4 | from MicroCompiler.Productions import Productions
 5 | from MicroCompiler.Lookahead.EOF import EOF
 6 | from MicroCompiler.Lookahead.SymbolSet import SymbolSet
 7 | 
 8 | 
 9 | class FollowSet:
10 |     def __init__(self, production: Productions, first_set):
11 |         self.production = production
12 |         self.first_set = first_set
13 |         self.follow_set = {}
14 |         self.follow_set_table = {}
15 | 
16 |     def compute(self):
17 |         # init the follow_set
18 |         for symbol in self.production.non_terminals:
19 |             self.follow_set[symbol] = SymbolSet()
20 | 
21 |         self.follow_set[self.production.start_symbol] = {EOF()}
22 | 
23 |         old_follow_set = copy.deepcopy(self.follow_set)
24 | 
25 |         while True:
26 |             for symbol in self.production:
27 |                 self.compute_symbol(symbol)
28 | 
29 |             if old_follow_set == self.follow_set:
30 |                 break
31 |             else:
32 |                 old_follow_set = copy.deepcopy(self.follow_set)
33 | 
34 |     def compute_symbol(self, lhs_symbol):
35 |         production_set = self.production[lhs_symbol]
36 |         for production in production_set:
37 |             trailer = self.follow_set[lhs_symbol]
38 |             for rhs_symbol in reversed(production):
39 |                 if isinstance(rhs_symbol, NonTerminal):
40 |                     self.follow_set[rhs_symbol] = self.follow_set[rhs_symbol] | trailer
41 | 
42 |                     if self.first_set[rhs_symbol].include_epsilon:
43 |                         trailer = trailer | self.first_set[rhs_symbol].remove_epsilon()
44 |                     else:
45 |                         trailer = self.first_set[rhs_symbol]
46 |                 else:
47 |                     trailer = SymbolSet({rhs_symbol})
48 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/FollowSet_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pprint
  3 | 
  4 | from MicroCompiler.Productions import Productions
  5 | from MicroCompiler.Lookahead.Epsilon import Epsilon
  6 | from MicroCompiler.Lookahead.EOF import EOF
  7 | from MicroCompiler.Lookahead.FirstSet import FirstSet
  8 | from MicroCompiler.Lookahead.FollowSet import FollowSet
  9 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 10 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 11 | from MicroCompiler.Lookahead.Terminal import Terminal
 12 | 
 13 | 
 14 | class TestFollowSet(unittest.TestCase):
 15 |     def test_conception(self):
 16 |         statement = NonTerminal("Statement")
 17 |         expression = NonTerminal("Expression")
 18 |         semicolon = Terminal(CHARACTER, ";")
 19 |         plus = Terminal(CHARACTER, "+")
 20 |         minus = Terminal(CHARACTER, "-")
 21 | 
 22 |         production = Productions(
 23 |             {statement: [[expression, semicolon]], expression: [[plus], [minus]]}
 24 |         )
 25 | 
 26 |         production.set_start_symbol(statement)
 27 | 
 28 |         fs = FirstSet(production)
 29 |         fs.compute()
 30 |         first_set = fs.first_set
 31 | 
 32 |         fs = FollowSet(production, first_set)
 33 |         fs.compute()
 34 |         print(fs.follow_set)
 35 | 
 36 |     def test_epsilon(self):
 37 |         statement = NonTerminal("Statement")
 38 |         expression = NonTerminal("Expression")
 39 |         epsilon = Epsilon()
 40 |         semicolon = Terminal(CHARACTER, ";")
 41 |         plus = Terminal(CHARACTER, "+")
 42 |         minus = Terminal(CHARACTER, "-")
 43 | 
 44 |         production = Productions(
 45 |             {
 46 |                 statement: [[expression, semicolon]],
 47 |                 expression: [[plus], [minus], [epsilon]],
 48 |             }
 49 |         )
 50 | 
 51 |         production.set_start_symbol(statement)
 52 | 
 53 |         fs = FirstSet(production)
 54 |         fs.compute()
 55 |         first_set = fs.first_set
 56 | 
 57 |         fs = FollowSet(production, first_set)
 58 |         fs.compute()
 59 |         print(fs.follow_set)
 60 | 
 61 |     def test_real(self):
 62 |         """
 63 |         Goal -> Expr ;
 64 |         Expr -> Term ExprTwo ;
 65 |         ExprTwo -> '+' Term ExprTwo
 66 |                  | '-' Term ExprTwo
 67 |                  | ϵ ;
 68 |         Term -> Factor TermTwo ;
 69 |         TermTwo -> '*' Factor TermTwo
 70 |                  | '/' Factor TermTwo
 71 |                  | ϵ ;
 72 |         Factor -> '(' Expr ')'
 73 |                 | 'num'
 74 |                 | 'name' ;
 75 |         """
 76 | 
 77 |         """
 78 |         Extended Backus-Naur form:
 79 | 
 80 |         Goal -> Expr
 81 |         Expr -> Term ExprTwo
 82 |         ExprTwo -> + Term ExprTwo | - Term ExprTwo | EPSILON
 83 |         Term -> Factor TermTwo
 84 |         TermTwo -> * Factor TermTwo | / Factor TermTwo | EPSILON
 85 |         Factor -> ( Expr ) | num | name
 86 |         """
 87 |         goal = NonTerminal("Goal")
 88 |         expr = NonTerminal("Expr")
 89 |         expr_two = NonTerminal("ExprTwo")
 90 |         term = NonTerminal("Term")
 91 |         term_two = NonTerminal("TermTwo")
 92 |         factor = NonTerminal("Factor")
 93 |         epsilon = Epsilon()
 94 |         name = Terminal(CHARACTER, "name")
 95 |         num = Terminal(CHARACTER, "num")
 96 |         plus = Terminal(CHARACTER, "+")
 97 |         minus = Terminal(CHARACTER, "-")
 98 |         div = Terminal(CHARACTER, "/")
 99 |         asteroid = Terminal(CHARACTER, "*")
100 |         open_parenthesis = Terminal(CHARACTER, "(")
101 |         close_parenthesis = Terminal(CHARACTER, ")")
102 |         eof = EOF()
103 | 
104 |         production = Productions(
105 |             {
106 |                 goal: [[expr]],
107 |                 expr: [[term, expr_two]],
108 |                 expr_two: [[plus, term, expr_two], [minus, term, expr_two], [epsilon]],
109 |                 term: [[factor, term_two]],
110 |                 term_two: [
111 |                     [asteroid, factor, term_two],
112 |                     [div, factor, term_two],
113 |                     [epsilon],
114 |                 ],
115 |                 factor: [[open_parenthesis, expr, close_parenthesis], [num], [name]],
116 |             }
117 |         )
118 | 
119 |         production.set_start_symbol(goal)
120 | 
121 |         fs = FirstSet(production)
122 |         fs.compute()
123 |         first_set = fs.first_set
124 | 
125 |         fs = FollowSet(production, first_set)
126 |         fs.compute()
127 |         real_result = fs.follow_set
128 | 
129 |         expect_result = {
130 |             NonTerminal("Goal"): {EOF()},
131 |             NonTerminal("Expr"): {Terminal(CHARACTER, ")"), EOF()},
132 |             NonTerminal("ExprTwo"): {Terminal(CHARACTER, ")"), EOF()},
133 |             NonTerminal("Term"): {
134 |                 EOF(),
135 |                 Terminal(CHARACTER, "+"),
136 |                 Terminal(CHARACTER, "-"),
137 |                 Terminal(CHARACTER, ")"),
138 |             },
139 |             NonTerminal("TermTwo"): {
140 |                 EOF(),
141 |                 Terminal(CHARACTER, "+"),
142 |                 Terminal(CHARACTER, "-"),
143 |                 Terminal(CHARACTER, ")"),
144 |             },
145 |             NonTerminal("Factor"): {
146 |                 EOF(),
147 |                 Terminal(CHARACTER, "+"),
148 |                 Terminal(CHARACTER, "-"),
149 |                 Terminal(CHARACTER, "/"),
150 |                 Terminal(CHARACTER, "*"),
151 |                 Terminal(CHARACTER, ")"),
152 |             },
153 |         }
154 | 
155 |         self.assertEqual(real_result, expect_result)
156 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/NonTerminal.py:
--------------------------------------------------------------------------------
 1 | class NonTerminal:
 2 |     def __init__(self, name):
 3 |         self.name = name
 4 | 
 5 |         super().__init__()
 6 | 
 7 |     @property
 8 |     def value(self):
 9 |         return self.name
10 | 
11 |     def __eq__(self, other):
12 |         if not isinstance(other, self.__class__):
13 |             return False
14 |         if self.name == other.name:
15 |             return True
16 |         return False
17 | 
18 |     def __hash__(self):
19 |         return hash(self.name)
20 | 
21 |     def __str__(self):
22 |         return self.name
23 | 
24 |     def __repr__(self):
25 |         return "{}('{}')".format(self.__class__.__name__, self.name)
26 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/SymbolSet.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.Lookahead.Epsilon import Epsilon
 2 | 
 3 | 
 4 | class SymbolSet(set):
 5 |     def __init__(self, *args, **kwargs):
 6 |         super().__init__(*args, **kwargs)
 7 | 
 8 |     @property
 9 |     def include_epsilon(self):
10 |         return any([i for i in self if isinstance(i, Epsilon)])
11 | 
12 |     def remove_epsilon(self):
13 |         return self.__class__({i for i in self if not isinstance(i, Epsilon)})
14 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/Terminal.py:
--------------------------------------------------------------------------------
 1 | CHARACTER = "CHARACTER"
 2 | 
 3 | 
 4 | class Terminal:
 5 |     def __init__(self, type_=None, data=None):
 6 |         if type_ is None:
 7 |             type_ = CHARACTER
 8 |         self.type_ = type_
 9 | 
10 |         if data is None:
11 |             raise ValueError("value can not be None")
12 |         self.data = data
13 | 
14 |     @property
15 |     def value(self):
16 |         return str(self.data)
17 | 
18 |     def __eq__(self, other):
19 |         if not isinstance(other, self.__class__):
20 |             return False
21 |         if self.type_ == other.type_ and self.data == other.data:
22 |             return True
23 |         return False
24 | 
25 |     def __hash__(self):
26 |         return hash((self.type_, self.data))
27 | 
28 |     def __str__(self):
29 |         return "'{}'".format(self.data)
30 | 
31 |     def __repr__(self):
32 |         return "{}({}, '{}')".format(self.__class__.__name__, self.type_, self.data)
33 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/TranslationTable.py:
--------------------------------------------------------------------------------
1 | class TranslationTable(dict):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/MicroCompiler/Lookahead/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/Lookahead/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Generator.py:
--------------------------------------------------------------------------------
  1 | import pprint
  2 | from itertools import chain
  3 | 
  4 | import yaml
  5 | 
  6 | from MicroCompiler.ParserGenerator.Lexer import Lexer
  7 | from MicroCompiler.ParserGenerator.Parser import Parser
  8 | from MicroCompiler.Lookahead.FirstPlusSet import FirstPlusSet
  9 | from MicroCompiler.Lookahead.EOF import EOF
 10 | from MicroCompiler.Lookahead.Epsilon import Epsilon
 11 | 
 12 | 
 13 | class Generator:
 14 |     def __init__(self, input_file):
 15 |         self.translate_table = {}
 16 |         self.structure = {}
 17 | 
 18 |         self.input_file = input_file
 19 | 
 20 |     def generate(self):
 21 |         with open(self.input_file) as fd:
 22 |             bnf_string = fd.read()
 23 | 
 24 |         lexer = Lexer()
 25 |         lexer.parse(bnf_string)
 26 | 
 27 |         parser = Parser(lexer.token_list)
 28 |         parser.parse()
 29 |         productions = parser.generate_production()
 30 | 
 31 |         error_marker = "--"
 32 | 
 33 |         self.structure = {
 34 |             "terminals": [i.value for i in productions.terminals],
 35 |             "non-terminals": [i.name for i in productions.non_terminals],
 36 |             "eof-marker": "<EOF>",
 37 |             "error-marker": error_marker,
 38 |             "start-symbol": productions.start_symbol.value,
 39 |         }
 40 | 
 41 |         flat_productions = []
 42 |         productions_mapping = []
 43 |         for lhs_symbol in productions:
 44 |             production = productions[lhs_symbol]
 45 |             for k, v in enumerate(production):
 46 |                 productions_mapping.append(frozenset({lhs_symbol.value, k}))
 47 |                 flat_productions.append(
 48 |                     {
 49 |                         lhs_symbol.value: [i.value for i in v]
 50 |                         if not isinstance(v[0], Epsilon)
 51 |                         else []
 52 |                     }
 53 |                 )
 54 | 
 55 |         self.structure["productions"] = {k: v for k, v in enumerate(flat_productions)}
 56 |         productions_mapping = {v: k for k, v in enumerate(productions_mapping)}
 57 | 
 58 |         fs = FirstPlusSet(productions)
 59 |         fs.compute()
 60 | 
 61 |         first_set_plus = fs.first_plus_set
 62 | 
 63 |         # for non_terminal in fs.first_plus_set:
 64 |         #     for k, v in fs.first_plus_set.items():
 65 |         #         if {non_terminal, k}
 66 | 
 67 |         for non_terminal in productions.non_terminals:
 68 |             for terminal in chain(productions.terminals, (EOF(),)):
 69 |                 self.translate_table.setdefault(non_terminal.value, {})
 70 | 
 71 |                 if terminal not in first_set_plus[non_terminal]:
 72 |                     # no such translation
 73 |                     self.translate_table[non_terminal.value][
 74 |                         terminal.value
 75 |                     ] = error_marker
 76 | 
 77 |                     continue
 78 | 
 79 |                 inner_index = first_set_plus[non_terminal][terminal]
 80 | 
 81 |                 look_for = frozenset({non_terminal.value, inner_index})
 82 |                 if look_for not in productions_mapping:
 83 |                     raise ValueError(
 84 |                         "Terminal {} in {} not in mapping {}".format(
 85 |                             terminal, non_terminal, productions_mapping
 86 |                         )
 87 |                     )
 88 | 
 89 |                 self.translate_table[non_terminal.value][
 90 |                     terminal.value
 91 |                 ] = productions_mapping[look_for]
 92 | 
 93 |         self.structure["table"] = self.translate_table
 94 | 
 95 |         return self.structure
 96 | 
 97 |     def write_yaml(self, output_file):
 98 |         with open(output_file, "w") as fd:
 99 |             yaml.dump(self.structure, fd)
100 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Generator_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pprint
 3 | 
 4 | from MicroCompiler.ParserGenerator.Generator import Generator
 5 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 6 | from MicroCompiler.Lookahead.Terminal import Terminal
 7 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 8 | 
 9 | 
10 | class TestGenerator(unittest.TestCase):
11 |     def test_construct_simple(self):
12 |         g = Generator("sample.mbnf")
13 |         real_result = g.generate()
14 | 
15 |         expect_result = {
16 |             NonTerminal("statement"): {
17 |                 Terminal(CHARACTER, "plus"): 0,
18 |                 Terminal(CHARACTER, ";"): "--",
19 |                 Terminal(CHARACTER, "minus"): 0,
20 |             },
21 |             NonTerminal("expression"): {
22 |                 Terminal(CHARACTER, "plus"): 1,
23 |                 Terminal(CHARACTER, ";"): "--",
24 |                 Terminal(CHARACTER, "minus"): 2,
25 |             },
26 |         }
27 | 
28 |         g.write_yaml("../output.yaml")
29 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Lexeme.py:
--------------------------------------------------------------------------------
 1 | NON_TERMINAL = "NON_TERMINAL"
 2 | TERMINAL = "TERMINAL"
 3 | PRODUCT = "PRODUCT"
 4 | SEMICOLON = "SEMICOLON"
 5 | ALTERNATIVE = "ALTERNATIVE"
 6 | EPSILON = "EPSILON"
 7 | 
 8 | 
 9 | class Lexeme:
10 |     def __init__(self, type_, value):
11 |         self.value = value
12 |         self.type_ = type_
13 | 
14 |     def __repr__(self):
15 |         return "{}({}, '{}')".format(self.__class__.__name__, self.type_, self.value)
16 | 
17 |     def __str__(self):
18 |         return "<{}: {}>".format(self.type_, self.value)
19 | 
20 |     def __eq__(self, other):
21 |         if not isinstance(other, self.__class__):
22 |             return False
23 |         if self.type_ == other.type_ and self.value == other.value:
24 |             return True
25 |         return False
26 | 
27 |     def __hash__(self):
28 |         return hash(frozenset({self.value, self.type_}))
29 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Lexer.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.ParserGenerator.Lexeme import Lexeme
 2 | from MicroCompiler.ParserGenerator.Lexeme import (
 3 |     NON_TERMINAL,
 4 |     TERMINAL,
 5 |     PRODUCT,
 6 |     ALTERNATIVE,
 7 |     SEMICOLON,
 8 |     EPSILON,
 9 | )
10 | 
11 | 
12 | class Lexer:
13 |     def __init__(self):
14 |         self.token_list = []
15 | 
16 |     def parse(self, string_: str):
17 |         raw_token_list = string_.split()
18 |         for raw_token in raw_token_list:
19 |             if raw_token.isalpha():
20 |                 if raw_token == "ϵ":
21 |                     symbol = Lexeme(EPSILON, raw_token)
22 |                     self.token_list.append(symbol)
23 |                 else:
24 |                     non_terminal = Lexeme(NON_TERMINAL, raw_token)
25 |                     self.token_list.append(non_terminal)
26 |             elif raw_token == "|":
27 |                 terminal = Lexeme(ALTERNATIVE, raw_token)
28 |                 self.token_list.append(terminal)
29 |             elif raw_token == "->":
30 |                 terminal = Lexeme(PRODUCT, raw_token)
31 |                 self.token_list.append(terminal)
32 |             elif raw_token == ";":
33 |                 terminal = Lexeme(SEMICOLON, raw_token)
34 |                 self.token_list.append(terminal)
35 |             elif raw_token.startswith("'"):
36 |                 terminal = Lexeme(TERMINAL, raw_token[1:-1])
37 |                 self.token_list.append(terminal)
38 | 
39 |             else:
40 |                 raise ValueError("{} is not a valid token".format(raw_token))
41 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Lexer_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from MicroCompiler.ParserGenerator.Lexer import Lexer
 4 | from MicroCompiler.ParserGenerator.Lexeme import Lexeme
 5 | from MicroCompiler.ParserGenerator.Lexeme import (
 6 |     NON_TERMINAL,
 7 |     TERMINAL,
 8 |     PRODUCT,
 9 |     SEMICOLON,
10 |     ALTERNATIVE,
11 | )
12 | 
13 | 
14 | class TestLexer(unittest.TestCase):
15 |     def test_simple_case(self):
16 |         mbnf = """
17 |         statement ->
18 |             expression ';'
19 |             ;
20 | 
21 |         expression ->
22 |             'plus'
23 |             | 'minus'
24 |             ;
25 |         """
26 | 
27 |         lexer = Lexer()
28 |         lexer.parse(mbnf)
29 |         real_result = lexer.token_list
30 | 
31 |         expect_result = [
32 |             Lexeme(NON_TERMINAL, "statement"),
33 |             Lexeme(PRODUCT, "->"),
34 |             Lexeme(NON_TERMINAL, "expression"),
35 |             Lexeme(TERMINAL, ";"),
36 |             Lexeme(SEMICOLON, ";"),
37 |             Lexeme(NON_TERMINAL, "expression"),
38 |             Lexeme(PRODUCT, "->"),
39 |             Lexeme(TERMINAL, "plus"),
40 |             Lexeme(ALTERNATIVE, "|"),
41 |             Lexeme(TERMINAL, "minus"),
42 |             Lexeme(SEMICOLON, ";"),
43 |         ]
44 | 
45 |         self.assertEqual(real_result, expect_result)
46 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Parser.py:
--------------------------------------------------------------------------------
  1 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
  2 | from MicroCompiler.Lookahead.Terminal import Terminal
  3 | from MicroCompiler.Lookahead.Epsilon import Epsilon
  4 | from MicroCompiler.Productions import Productions
  5 | from MicroCompiler.Lookahead.EOF import EOF
  6 | from MicroCompiler.ParserGenerator.Lexeme import (
  7 |     NON_TERMINAL,
  8 |     TERMINAL,
  9 |     PRODUCT,
 10 |     ALTERNATIVE,
 11 |     SEMICOLON,
 12 |     EPSILON,
 13 | )
 14 | 
 15 | 
 16 | built_in_terminal = ()
 17 | 
 18 | # filter function list
 19 | terminal_filter_list = ()
 20 | 
 21 | 
 22 | class Parser:
 23 |     def __init__(self, token_list):
 24 |         self.token_index = 0
 25 |         self.token_list = token_list
 26 | 
 27 |         self.production_dict = {}
 28 |         self.start_symbol = None
 29 | 
 30 |         super().__init__()
 31 | 
 32 |     def _match(self, value):
 33 |         if (
 34 |             self.token_list[self.token_index].type_ == TERMINAL
 35 |             and self.token_list[self.token_index] == value
 36 |         ):
 37 |             self.token_index += 1
 38 |             return True
 39 |         else:
 40 |             # print("{} != {}".format(self.token_list[self.token_index], value))
 41 |             return False
 42 | 
 43 |     def _match_type(self, type_):
 44 |         if self.token_list[self.token_index].type_ == type_:
 45 |             self.token_index += 1
 46 |             return True
 47 |         else:
 48 |             # print("{} != {}".format(self.token_list[self.token_index], type))
 49 |             return False
 50 | 
 51 |     def _non_terminal(self):
 52 |         if self.token_index >= len(self.token_list):
 53 |             return False
 54 | 
 55 |         if self.token_list[self.token_index].type_ == NON_TERMINAL:
 56 |             self.token_index += 1
 57 |             return True
 58 |         else:
 59 |             # print("{} is not NON_TERMINAL".format(self.token_list[self.token_index]))
 60 |             return False
 61 | 
 62 |     def _terminal(self):
 63 |         if self.token_list[self.token_index].type_ == TERMINAL:
 64 |             self.token_index += 1
 65 |             return True
 66 |         else:
 67 |             # print("{} is not TERMINAL".format(self.token_list[self.token_index]))
 68 |             return False
 69 | 
 70 |     """
 71 |     statement -> production ';' other_production ;
 72 |     other_production -> statement | ϵ ;
 73 | 
 74 |     production -> non_terminal '->' symbols other_symbols ;
 75 |     other_symbols -> '|' symbols other_symbols | ϵ ;
 76 | 
 77 |     symbols -> symbol other_symbol | 'ϵ' ;
 78 |     other_symbol -> symbol other_symbol | ϵ ;
 79 | 
 80 |     symbol -> non_terminal | terminal ;
 81 |     """
 82 | 
 83 |     def parse(self):
 84 |         return self._statement()
 85 | 
 86 |     def _statement(self):
 87 |         return (
 88 |             self._production()
 89 |             and self._match_type(SEMICOLON)
 90 |             and self._other_production()
 91 |         )
 92 | 
 93 |     def _other_production(self):
 94 |         save_point = self.token_index
 95 |         if self._statement():
 96 |             return True
 97 |         else:
 98 |             self.token_index = save_point
 99 |             # do nothing for epsilon
100 |             return True
101 | 
102 |     def _production(self):
103 |         save_point = self.token_index
104 |         if self._non_terminal():
105 |             productions_object = []
106 | 
107 |             # first non_terminal is start symbol
108 |             if not self.production_dict:
109 |                 self.start_symbol = self.token_list[save_point]
110 | 
111 |             self.production_dict[self.token_list[save_point]] = productions_object
112 | 
113 |             return (
114 |                 self._match_type(PRODUCT)
115 |                 and self._symbols(productions_object)
116 |                 and self._other_symbols(productions_object)
117 |             )
118 |         else:
119 |             return False
120 | 
121 |     def _other_symbols(self, productions_object):
122 |         save_point = self.token_index
123 |         if (
124 |             self._match_type(ALTERNATIVE)
125 |             and self._symbols(productions_object)
126 |             and self._other_symbols(productions_object)
127 |         ):
128 |             return True
129 |         else:
130 |             self.token_index = save_point
131 |             # do nothing for epsilon
132 |             return True
133 | 
134 |     def _symbols(self, productions_object):
135 |         save_point = self.token_index
136 | 
137 |         production = []
138 |         result = self._symbol(production) and self._other_symbol(production)
139 | 
140 |         if result:
141 |             productions_object.append(production)
142 | 
143 |         if not result:
144 |             if self._match_type(EPSILON):
145 |                 productions_object.append([self.token_list[save_point]])
146 |                 return True
147 |             return False
148 |         return True
149 | 
150 |     def _other_symbol(self, production):
151 |         save_point = self.token_index
152 |         if self._symbol(production) and self._other_symbol(production):
153 |             return True
154 |         else:
155 |             self.token_index = save_point
156 |             # do nothing for epsilon
157 |             return True
158 | 
159 |     def _symbol(self, production):
160 |         save_point = self.token_index
161 |         if self._non_terminal():
162 |             production.append(self.token_list[save_point])
163 |             return True
164 |         else:
165 |             self.token_index = save_point
166 |             result = self._terminal()
167 | 
168 |             if result:
169 |                 production.append(self.token_list[save_point])
170 |             return result
171 | 
172 |     def generate_production(self):
173 |         formal_production = Productions()
174 | 
175 |         for lhs_lexeme in self.production_dict:
176 |             lhs_symbol = NonTerminal(lhs_lexeme.value)
177 |             production_list = []
178 |             formal_production[lhs_symbol] = production_list
179 | 
180 |             if lhs_lexeme == self.start_symbol:
181 |                 formal_production.start_symbol = lhs_symbol
182 | 
183 |             productions = self.production_dict[lhs_lexeme]
184 |             for production in productions:
185 |                 production_symbols = []
186 |                 production_list.append(production_symbols)
187 |                 for rhs_symbol in production:
188 |                     if rhs_symbol.type_ == EPSILON:
189 |                         production_symbols.append(Epsilon())
190 |                     elif rhs_symbol.type_ == NON_TERMINAL:
191 |                         production_symbols.append(NonTerminal(rhs_symbol.value))
192 |                     elif rhs_symbol.type_ == TERMINAL:
193 |                         production_symbols.append(
194 |                             Terminal(type_=None, data=rhs_symbol.value)
195 |                         )
196 | 
197 |         return formal_production
198 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/Parser_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from MicroCompiler.ParserGenerator.Lexer import Lexer
 4 | from MicroCompiler.ParserGenerator.Parser import Parser
 5 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 6 | from MicroCompiler.Lookahead.Terminal import Terminal
 7 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 8 | 
 9 | 
10 | class TestParser(unittest.TestCase):
11 |     def test_simple_case(self):
12 |         mbnf = """
13 |         statement ->
14 |             expression ';'
15 |             ;
16 | 
17 |         expression ->
18 |             'plus'
19 |             | 'minus'
20 |             ;
21 |         """
22 | 
23 |         lexer = Lexer()
24 |         lexer.parse(mbnf)
25 | 
26 |         parser = Parser(lexer.token_list)
27 |         parser.parse()
28 |         real_result = parser.generate_production()
29 | 
30 |         except_result = {
31 |             NonTerminal("expression"): [
32 |                 [Terminal(CHARACTER, "plus")],
33 |                 [Terminal(CHARACTER, "minus")],
34 |             ],
35 |             NonTerminal("statement"): [
36 |                 [NonTerminal("expression"), Terminal(CHARACTER, ";")]
37 |             ],
38 |         }
39 | 
40 |         self.assertEqual(dict(real_result), except_result)
41 |         self.assertEqual(real_result.start_symbol, NonTerminal("statement"))
42 | 


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/ParserGenerator/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/calculator.mbnf:
--------------------------------------------------------------------------------
 1 | Goal -> Expr ;
 2 | Expr -> Term ExprTwo ;
 3 | ExprTwo -> '+' Term ExprTwo
 4 |          | '-' Term ExprTwo
 5 |          | ϵ ;
 6 | Term -> Factor TermTwo ;
 7 | TermTwo -> '*' Factor TermTwo
 8 |          | '/' Factor TermTwo
 9 |          | ϵ ;
10 | Factor -> '(' Expr ')'
11 |         | 'num' ;


--------------------------------------------------------------------------------
/MicroCompiler/ParserGenerator/sample.mbnf:
--------------------------------------------------------------------------------
 1 | Goal -> Expr ;
 2 | Expr -> Term ExprTwo ;
 3 | ExprTwo -> '+' Term ExprTwo
 4 |          | '-' Term ExprTwo
 5 |          | ϵ ;
 6 | Term -> Factor TermTwo ;
 7 | TermTwo -> '*' Factor TermTwo
 8 |          | '/' Factor TermTwo
 9 |          | ϵ ;
10 | Factor -> '(' Expr ')'
11 |         | 'num'
12 |         | 'name' ;


--------------------------------------------------------------------------------
/MicroCompiler/Productions.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.Lookahead.Terminal import Terminal
 2 | from MicroCompiler.Lookahead.Epsilon import Epsilon
 3 | from MicroCompiler.Lookahead.EOF import EOF
 4 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 5 | 
 6 | 
 7 | class Productions(dict):
 8 |     epsilon = Epsilon()
 9 |     eof = EOF()
10 | 
11 |     def __init__(self, *args, **kwargs):
12 |         self._elements = set()
13 |         self._terminals = set()
14 |         self._non_terminals = set()
15 |         self.start_symbol = None
16 | 
17 |         super().__init__(*args, **kwargs)
18 | 
19 |     def set_start_symbol(self, start_symbol):
20 |         if start_symbol not in self:
21 |             raise ValueError("start symbol must in production.")
22 |         self.start_symbol = start_symbol
23 | 
24 |     def compute_elements(self):
25 |         for non_terminal in self:
26 |             self._elements.add(non_terminal)
27 |             productions = self[non_terminal]
28 |             for production in productions:
29 |                 for element in production:
30 |                     self._elements.add(element)
31 | 
32 |         self._terminals = {i for i in self._elements if isinstance(i, Terminal)}
33 |         self._non_terminals = {i for i in self._elements if isinstance(i, NonTerminal)}
34 | 
35 |     @property
36 |     def terminals(self):
37 |         self.compute_elements()
38 |         return {i for i in self._terminals}
39 | 
40 |     @property
41 |     def non_terminals(self):
42 |         self.compute_elements()
43 |         return {i for i in self._non_terminals}
44 | 
45 |     def print_as_bnf(self):
46 |         for lhs_symbol in self:
47 |             print(lhs_symbol, " ->")
48 |             rhs_symbol = self[lhs_symbol]
49 |             production_str_list = []
50 |             for production in rhs_symbol:
51 |                 production_str_list.append(" ".join([str(i) for i in production]))
52 |             print("    ", " | ".join(production_str_list))
53 |             print(";")
54 | 


--------------------------------------------------------------------------------
/MicroCompiler/Productions_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from MicroCompiler.Productions import Productions
 4 | from MicroCompiler.Lookahead.NonTerminal import NonTerminal
 5 | from MicroCompiler.Lookahead.Terminal import CHARACTER
 6 | from MicroCompiler.Lookahead.Terminal import Terminal
 7 | 
 8 | 
 9 | class TestProduction(unittest.TestCase):
10 |     def test_conception(self):
11 |         statement = NonTerminal("Statement")
12 |         expression = NonTerminal("Expression")
13 |         semicolon = Terminal(CHARACTER, ";")
14 |         production = Productions({statement: [[expression, semicolon]]})
15 | 
16 |         production.print_as_bnf()
17 | 
18 |         self.assertEqual(production.terminals, {semicolon})
19 |         self.assertEqual(production.non_terminals, {statement, expression})
20 | 


--------------------------------------------------------------------------------
/MicroCompiler/SkeletonParser.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | 
  3 | 
  4 | class SkeletonSyntaxError(Exception):
  5 |     pass
  6 | 
  7 | 
  8 | class Token(object):
  9 |     index_counter = 0
 10 | 
 11 |     def __init__(self, type_, value=None, index=None):
 12 |         self.type = type_
 13 |         self.value = value
 14 |         self.index = index if index else self.index_counter
 15 | 
 16 |         self.increase_index_counter()
 17 | 
 18 |     @classmethod
 19 |     def increase_index_counter(cls):
 20 |         cls.index_counter += 1
 21 | 
 22 |     def __repr__(self):
 23 |         return "{}(type_={}, value={}，index={})".format(
 24 |             self.__class__.__name__, self.type, self.value, self.index
 25 |         )
 26 | 
 27 | 
 28 | class WhiteSpaceToken(Token):
 29 |     index_counter = 0
 30 | 
 31 |     def __init__(self, value=None, index=None):
 32 |         super(WhiteSpaceToken, self).__init__(type_='white_space', value=value, index=index)
 33 | 
 34 | 
 35 | class Epsilon(object):
 36 |     def __repr__(self):
 37 |         return "Epsilon()"
 38 | 
 39 | 
 40 | class Node(object):
 41 |     index_counter = 0
 42 | 
 43 |     def __init__(self, type_, value=None, index=None):
 44 |         self.type = type_
 45 |         self.value = value
 46 |         self.index = index if index else self.index_counter
 47 | 
 48 |         self.increase_index_counter()
 49 | 
 50 |     @classmethod
 51 |     def increase_index_counter(cls):
 52 |         cls.index_counter += 1
 53 | 
 54 |     def __repr__(self):
 55 |         return "{}(type_={}, value={}，index={})".format(
 56 |             self.__class__.__name__, self.type, self.value, self.index
 57 |         )
 58 | 
 59 | 
 60 | class SkeletonParser:
 61 |     def __init__(self, definition_file, lexeme_list):
 62 |         definition_file = definition_file
 63 |         self.lexeme_list = lexeme_list
 64 |         self.lexeme_index = 0
 65 | 
 66 |         with open(definition_file) as fd:
 67 |             definition = yaml.load(fd.read(), Loader=yaml.FullLoader)
 68 | 
 69 |         self.start_symbol = definition["start-symbol"]
 70 |         self.error_marker = definition["error-marker"]
 71 |         self.productions = definition["productions"]
 72 |         self.table = definition["table"]
 73 |         self.terminals = definition["terminals"]
 74 |         self.non_terminals = definition["non-terminals"]
 75 |         self.eof_marker = definition["eof-marker"]
 76 | 
 77 |         # debug
 78 |         self.token_stack = []
 79 |         self.call_stack = []
 80 | 
 81 |     def parse(self):
 82 |         previous_symbol = Node("<START>")
 83 |         start_symbol = Node(self.start_symbol)
 84 |         result = self.parse_symbol(start_symbol, previous_symbol)
 85 | 
 86 |         if result:
 87 |             return True
 88 |         else:
 89 |             return False
 90 | 
 91 |     def call_parser_method(self, parser):
 92 |         value_stack = []
 93 |         for symbol in self.call_stack.pop():
 94 |             method = getattr(parser, symbol)
 95 |             value = value_stack.pop()
 96 |             return_value = method(value)
 97 |             value_stack.append(return_value)
 98 | 
 99 |     def parse_symbol(self, symbol, previous_symbol):
100 |         self.token_stack.append(self.lexeme_list[self.lexeme_index])
101 |         self.call_stack.append((previous_symbol, symbol))
102 | 
103 |         if symbol.type in self.terminals:
104 | 
105 |             lexeme = self.lexeme_list[self.lexeme_index]
106 | 
107 |             self.call_stack.append((symbol, lexeme))
108 |             self.token_stack.append(lexeme)
109 | 
110 |             self.lexeme_index += 1
111 | 
112 |             return True
113 | 
114 |         if symbol.type not in self.table:
115 |             raise SkeletonSyntaxError(
116 |                 "Symbol: {} not in {}".format(symbol.type, self.table)
117 |             )
118 | 
119 |         lookahead_symbol = self.lexeme_list[self.lexeme_index]
120 |         if lookahead_symbol.type not in self.table[symbol.type]:
121 |             raise SkeletonSyntaxError(
122 |                 "Lookahead symbol: {} not in {}".format(
123 |                     lookahead_symbol.type, self.table[symbol.type]
124 |                 )
125 |             )
126 | 
127 |         predict_indicator = self.table[symbol.type][lookahead_symbol.type]
128 | 
129 |         if predict_indicator == self.error_marker:
130 |             raise SkeletonSyntaxError(
131 |                 "Invalid lookahead symbol: {} in {}".format(
132 |                     lookahead_symbol.type, symbol.type
133 |                 )
134 |             )
135 | 
136 |         if predict_indicator not in self.productions:
137 |             raise SkeletonSyntaxError("{} not in {}")
138 | 
139 |         production = list(self.productions[predict_indicator].values())[0]
140 |         if not len(production):
141 |             self.call_stack.append((symbol, Token("ϵ", Epsilon())))
142 |             self.token_stack.append(Token("ϵ"))
143 | 
144 |             return True
145 | 
146 |         result_list = []
147 |         for i in production:
148 |             next_symbol = Node(i)
149 |             result_list.append(self.parse_symbol(next_symbol, symbol))
150 | 
151 |         if all(result_list):
152 |             return True
153 |         return False
154 | 


--------------------------------------------------------------------------------
/MicroCompiler/SkeletonParser_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from MicroCompiler.SkeletonParser import SkeletonParser, Token
 4 | 
 5 | 
 6 | class TestSkeletonParser(unittest.TestCase):
 7 |     def test_simple(self):
 8 |         token_list = [Token("num", 6), Token("/"), Token("num", 2), Token("<EOF>")]
 9 | 
10 |         sp = SkeletonParser("output.yaml", token_list)
11 |         self.assertTrue(sp.parse())
12 | 


--------------------------------------------------------------------------------
/MicroCompiler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/abstract_syntax_tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/abstract_syntax_tree/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/abstract_syntax_tree/abstract_syntax_tree.py:
--------------------------------------------------------------------------------
 1 | class AbstractSyntaxTree(object):
 2 |     def __init__(self):
 3 |         self.start_node = None
 4 | 
 5 |     def set_start_node(self, node):
 6 |         self.start_node = node
 7 | 
 8 |     def add_production(self, from_node, to_node):
 9 |         from_node.add_sub_node(to_node)
10 | 


--------------------------------------------------------------------------------
/MicroCompiler/abstract_syntax_tree/abstract_syntax_tree_test.py:
--------------------------------------------------------------------------------
  1 | import operator
  2 | 
  3 | from MicroCompiler.SkeletonParser import Token, SkeletonParser, Epsilon
  4 | from MicroCompiler.parser_builder import ParserBuilder
  5 | from MicroCompiler.postfix_expression.operator import PythonBuiltinOperator
  6 | 
  7 | token_list = [Token("num", 6), Token("/", operator.truediv), Token("num", 2), Token("<EOF>")]
  8 | # token_list = [
  9 | #     Token("num", 6),
 10 | #     Token("/", operator.truediv),
 11 | #     Token("num", 2),
 12 | #     Token("/", operator.truediv),
 13 | #     Token("num", 2),
 14 | #     Token("<EOF>"),
 15 | # ]
 16 | 
 17 | 
 18 | sp = SkeletonParser("output.yaml", token_list)
 19 | sp.parse()
 20 | 
 21 | from MicroCompiler.abstract_syntax_tree.abstract_syntax_tree import (
 22 |     AbstractSyntaxTree as AST,
 23 | )
 24 | from MicroCompiler.abstract_syntax_tree.node import create_or_get_node
 25 | 
 26 | import matplotlib.pyplot as plt
 27 | 
 28 | import networkx as nx
 29 | 
 30 | DG = nx.DiGraph()
 31 | 
 32 | ast = AST()
 33 | 
 34 | 
 35 | def fallback_method(*args, **kwargs):
 36 |     print(*args, **kwargs)
 37 |     return "Done!"
 38 | 
 39 | 
 40 | def get_node_label(obj):
 41 |     if obj.value is not None:
 42 |         return "{}.{}.{}#{}".format(
 43 |             obj.__class__.__name__, obj.type, obj.value, obj.index
 44 |         )
 45 | 
 46 |     return "{}.{}#{}".format(obj.__class__.__name__, obj.type, obj.index)
 47 | 
 48 | 
 49 | for parser_instance, (f, t) in enumerate(sp.call_stack):
 50 |     from_node = create_or_get_node(f)
 51 |     to_node = create_or_get_node(t)
 52 |     if parser_instance == 0:
 53 |         ast.set_start_node(from_node)
 54 | 
 55 |     ast.add_production(from_node, to_node)
 56 | 
 57 |     f_label = get_node_label(f)
 58 |     t_label = get_node_label(t)
 59 | 
 60 |     DG.add_node(f_label, prototype=f, reference=from_node)
 61 |     DG.add_node(t_label, prototype=t, reference=to_node)
 62 | 
 63 |     DG.add_edge(t_label, f_label)  # sub node to node
 64 | 
 65 | print("")
 66 | 
 67 | # nx.draw(DG)
 68 | 
 69 | # plt.show()
 70 | 
 71 | # nx.write_graphml(DG, "data.graphml")
 72 | # nx.write_gexf(DG, "data.gexf")
 73 | 
 74 | ordered_list = list(nx.topological_sort(DG))
 75 | 
 76 | pb = ParserBuilder()
 77 | pb.add_generator("failback", "Who Am I")
 78 | clazz = pb.generate()
 79 | 
 80 | 
 81 | class Parser(clazz):
 82 |     def __init__(self):
 83 |         self.post_expr = []
 84 | 
 85 |     def ExprTwo(self, input_):
 86 |         # ExprTwo -> '+' Term ExprTwo
 87 |         #          | '-' Term ExprTwo
 88 |         #          | ϵ ;
 89 | 
 90 |         if len(input_) == 1:
 91 |             # ExprTwo -> ϵ
 92 |             return Epsilon()
 93 | 
 94 |         if isinstance(input_[2], Epsilon):
 95 |             # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo
 96 |             #                        |                  |
 97 |             #                        -> ϵ               -> ϵ
 98 |             return input_[:2]
 99 | 
100 |         if isinstance(input_[2], list):
101 |             # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo
102 |             #                        |                  |
103 |             #                        -> ['+' num]       -> ['-' num]
104 |             post_expr = []
105 | 
106 |             # value #1
107 |             if isinstance(input_[1], list):
108 |                 post_expr.extend(input_[1])
109 |             else:
110 |                 post_expr.append(input_[1])
111 | 
112 |             # value #2
113 |             if isinstance(input_[2][1], list):
114 |                 post_expr.extend(input_[2][1])
115 |             else:
116 |                 post_expr.append(input_[2][1])
117 | 
118 |             post_expr.append(input_[2][0])  # op
119 | 
120 |             return [input_[0], post_expr]
121 | 
122 |     def TermTwo(self, input_):
123 |         # TermTwo -> '*' Factor TermTwo
124 |         #          | '/' Factor TermTwo
125 |         #          | ϵ ;
126 | 
127 |         if len(input_) == 1:
128 |             # ExprTwo -> ϵ
129 |             return Epsilon()
130 | 
131 |         if isinstance(input_[2], Epsilon):
132 |             # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo
133 |             #                        |                  |
134 |             #                        -> ϵ               -> ϵ
135 |             return input_[:2]
136 | 
137 |         if isinstance(input_[2], list):
138 |             # TermTwo -> '*' Factor TermTwo | '/' Factor TermTwo
139 |             #                         |                    |
140 |             #                         -> ['/' num]          -> ['*' num]
141 |             post_expr = []
142 | 
143 |             # value #1
144 |             if isinstance(input_[1], list):
145 |                 post_expr.extend(input_[1])
146 |             else:
147 |                 post_expr.append(input_[1])
148 | 
149 |             # value #2
150 |             if isinstance(input_[2][1], list):
151 |                 post_expr.extend(input_[2][1])
152 |             else:
153 |                 post_expr.append(input_[2][1])
154 | 
155 |             post_expr.append(input_[2][0])  # op
156 | 
157 |             return [input_[0], post_expr]
158 | 
159 |     def num(self, input_):
160 |         return input_[0]
161 | 
162 |     def Factor(self, input_):
163 |         if len(input_) == 1:
164 |             return input_[0]
165 | 
166 |         if len(input_) == 3:
167 |             print(input_)
168 |             return input_[1]
169 | 
170 |     def Division(self, input_):
171 |         return PythonBuiltinOperator(input_[0], 2)
172 | 
173 |     def Term(self, input_):
174 |         if isinstance(input_[1], Epsilon):
175 |             return input_[0]
176 | 
177 |         post_expr = []
178 | 
179 |         # value #1
180 |         if isinstance(input_[0], list):
181 |             post_expr.extend(input_[0])
182 |         else:
183 |             post_expr.append(input_[0])
184 | 
185 |         # value #2
186 |         if isinstance(input_[1][1], list):
187 |             post_expr.extend(input_[1][1])
188 |         else:
189 |             post_expr.append(input_[1][1])
190 | 
191 |         post_expr.append(input_[1][0])  # op
192 | 
193 |         return post_expr
194 | 
195 |     def Expr(self, input_):
196 |         if isinstance(input_[1], Epsilon):
197 |             return input_[0]
198 | 
199 |     def Goal(self, input_):
200 |         return input_[0]
201 | 
202 |     def Start(self, input_):
203 |         return input_[0]
204 | 
205 | 
206 | parser_instance = Parser()
207 | 
208 | topological_ordered_list = [DG.nodes[i] for i in nx.topological_sort(DG)]
209 | 
210 | from MicroCompiler.parser_evaluator import ParserEvaluator
211 | 
212 | parser_evaluator = ParserEvaluator(parser_instance)
213 | final_value = parser_evaluator.eval(topological_ordered_list)
214 | 
215 | print("")
216 | 
217 | from MicroCompiler.postfix_expression.evaluator import Evaluator
218 | 
219 | evaluator = Evaluator(final_value)
220 | result = evaluator.eval()
221 | 
222 | print("")
223 | 


--------------------------------------------------------------------------------
/MicroCompiler/abstract_syntax_tree/node.py:
--------------------------------------------------------------------------------
 1 | class Node(object):
 2 |     def __init__(self, label_str=None, reference=None):
 3 |         self.label_str = label_str
 4 |         self.reference = reference
 5 |         self.sub_node_list = []
 6 | 
 7 |     def add_sub_node(self, node):
 8 |         self.sub_node_list.append(node)
 9 | 
10 |     def __repr__(self):
11 |         return "{}(label_str={}, reference={}, sub_node_list={})".format(
12 |             self.__class__.__name__, self.label_str, self.reference, self.sub_node_list
13 |         )
14 | 
15 | 
16 | node_registry = {}
17 | 
18 | 
19 | def create_or_get_node(node):
20 |     cls_name = node.__class__.__name__
21 | 
22 |     node_id = "{}.{}".format(cls_name, node.index)
23 |     label_str = "{}.{}.{}".format(node.type, cls_name, node.index)
24 |     reference = node
25 | 
26 |     if node_id in node_registry:
27 |         return node_registry[node_id]
28 | 
29 |     node = Node(label_str, reference)
30 |     node_registry[node_id] = node
31 | 
32 |     return node
33 | 


--------------------------------------------------------------------------------
/MicroCompiler/lexer/README.md:
--------------------------------------------------------------------------------
 1 | # 参考文献
 2 | * http://dinosaur.compilertools.net/lex/index.html
 3 | * https://courses.cs.washington.edu/courses/cse401/07au/CSE401-07lex.pdf
 4 | * https://web.stanford.edu/class/archive/cs/cs143/cs143.1112/materials/lectures/lecture04.pdf
 5 | * https://cs.stackexchange.com/questions/97374/how-to-implement-a-maximal-munch-lexical-analyzer-by-simulating-nfa-or-running-d
 6 | * https://karkare.github.io/cs335/lectures/04LexicalAanalysis.pdf
 7 | * http://user.it.uu.se/~kostis/Teaching/KT1-12/Slides/handout03.pdf
 8 | * http://courses.ics.hawaii.edu/ReviewICS312/morea/Compiling/ics312_lexing.pdf
 9 | * http://ocw.snu.ac.kr/sites/default/files/NOTE/7048.pdf
10 | 


--------------------------------------------------------------------------------
/MicroCompiler/lexer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/lexer/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/lexer/demo.py:
--------------------------------------------------------------------------------
1 | from MicroCompiler.lexer.user_level_lexer_define import lexer_define
2 | 
3 | from MicroCompiler.lexer.lexer import lex_analysis
4 | 
5 | input_string = "2+3 *  6"
6 | result = lex_analysis(input_string, lexer_define)
7 | 
8 | print(result)
9 | 


--------------------------------------------------------------------------------
/MicroCompiler/lexer/lexer.py:
--------------------------------------------------------------------------------
 1 | import MicroRegEx
 2 | 
 3 | 
 4 | def match_token(target_char, token_name_nfa_mapping, token_name_action_mapping):
 5 |     history = []
 6 |     for index in range(1, len(target_char) + 1):
 7 |         current_char_list = target_char[:index]
 8 | 
 9 |         is_accepted_mapping = {}
10 |         still_alive_mapping = {}
11 |         for token_object, nfa_object in token_name_nfa_mapping.items():
12 |             nfa_object.reset()
13 |             nfa_object.match(current_char_list)
14 |             is_accepted_mapping[token_object] = nfa_object.is_accepted()
15 |             still_alive_mapping[token_object] = bool(nfa_object.current_status)
16 | 
17 |         history.append(is_accepted_mapping)
18 | 
19 |         if not any(still_alive_mapping.values()) or (index == len(target_char)):
20 |             # all regex expression engine stop, find last accepted status as result
21 |             for reversed_history_index, monment in enumerate(history[::-1]):
22 |                 if any(monment.values()):
23 |                     accepted_nfa_num = sum(bool(i) for i in monment.values())
24 |                     if accepted_nfa_num > 1:
25 |                         # TODO: two token pattern matched, maybe show warning
26 |                         #       then select first rule
27 |                         raise ValueError("at least two token partten match same string")
28 | 
29 |                     first_true_parser = list(filter(lambda x: x[1], monment.items()))
30 |                     matched_token_type = first_true_parser[0][0]
31 |                     matched_str = target_char[0 : index - reversed_history_index]
32 | 
33 |                     action = token_name_action_mapping[matched_token_type]
34 | 
35 |                     token_object = action(matched_str)
36 |                     return matched_str, token_object
37 | 
38 |     # lexer parse failed
39 |     return "", None
40 | 
41 | 
42 | def lex_analysis(input_string, user_defined_lexer_rule):
43 |     token_name_nfa_mapping = {}
44 |     token_name_action_mapping = {}
45 |     for token_object, token_regex, token_action in user_defined_lexer_rule:
46 |         nfa_object = MicroRegEx.compile(token_regex)
47 |         token_name_nfa_mapping[token_object] = nfa_object
48 |         token_name_action_mapping[token_object] = token_action
49 | 
50 |     result = []
51 | 
52 |     current_target_char = input_string
53 |     while True:
54 |         if not current_target_char:
55 |             # job done
56 |             break
57 | 
58 |         matched_str, token_object = match_token(
59 |             current_target_char, token_name_nfa_mapping, token_name_action_mapping
60 |         )
61 |         if matched_str:
62 |             current_target_char = current_target_char[len(matched_str) :]
63 |         else:
64 |             raise ValueError("lexer parse failed")
65 | 
66 |         result.append((matched_str, token_object))
67 | 
68 |     return result
69 | 


--------------------------------------------------------------------------------
/MicroCompiler/lexer/user_level_lexer_define.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | 
 3 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken
 4 | 
 5 | lexer_define = [
 6 |     # token type, token regex, token action
 7 |     ["num", r"(0|1|2|3|4|5|6|7|8|9)+", lambda x: Token("num", int(x))],
 8 |     ["+", r"\+", lambda x: Token("+", operator.add)],
 9 |     ["-", r"-", lambda x: Token("-", operator.sub)],
10 |     ["*", r"\*", lambda x: Token("*", operator.mul)],
11 |     ["/", r"/", lambda x: Token("/", operator.truediv)],
12 |     ["(", r"\(", lambda x: Token("(")],
13 |     [")", r"\)", lambda x: Token(")")],
14 |     ["white space", r" +", lambda x: WhiteSpaceToken(x)],
15 | ]
16 | 


--------------------------------------------------------------------------------
/MicroCompiler/output.yaml:
--------------------------------------------------------------------------------
 1 | eof-marker: <EOF>
 2 | error-marker: --
 3 | non-terminals: [ExprTwo, Term, TermTwo, Factor, Goal, Expr]
 4 | productions:
 5 |   0:
 6 |     ExprTwo: [+, Term, ExprTwo]
 7 |   1:
 8 |     ExprTwo: ['-', Term, ExprTwo]
 9 |   2:
10 |     ExprTwo: []
11 |   3:
12 |     Goal: [Expr]
13 |   4:
14 |     Term: [Factor, TermTwo]
15 |   5:
16 |     Factor: [(, Expr, )]
17 |   6:
18 |     Factor: [num]
19 |   7:
20 |     Factor: [name]
21 |   8:
22 |     Expr: [Term, ExprTwo]
23 |   9:
24 |     TermTwo: ['*', Factor, TermTwo]
25 |   10:
26 |     TermTwo: [/, Factor, TermTwo]
27 |   11:
28 |     TermTwo: []
29 | start-symbol: Goal
30 | table:
31 |   Expr: {(: 8, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 8, num: 8}
32 |   ExprTwo: {(: --, ): 2, '*': --, +: 0, '-': 1, /: --, <EOF>: 2, name: --, num: --}
33 |   Factor: {(: 5, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 7, num: 6}
34 |   Goal: {(: 3, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 3, num: 3}
35 |   Term: {(: 4, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 4, num: 4}
36 |   TermTwo: {(: --, ): 11, '*': 9, +: 11, '-': 11, /: 10, <EOF>: 11, name: --, num: --}
37 | terminals: [+, '-', (, '*', /, name, ), num]
38 | 


--------------------------------------------------------------------------------
/MicroCompiler/parser_builder.py:
--------------------------------------------------------------------------------
 1 | import types
 2 | 
 3 | 
 4 | class ParserBuilder(object):
 5 |     def __init__(self, parser_name="ParserClass"):
 6 |         self.parser_name = parser_name
 7 |         self.generators = dict()
 8 | 
 9 |     def add_generator(self, from_statement, to_statements):
10 |         def generator(self, to_statements=to_statements):
11 |             print(to_statements)
12 |             # raise NotImplementedError
13 | 
14 |         self.generators[from_statement] = generator
15 | 
16 |     def generate(self):
17 |         return type(self.parser_name, (), self.generators)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     pb = ParserBuilder()
22 |     pb.add_generator('some_method', 'Who Am I')
23 |     clazz = pb.generate()
24 |     i = clazz()
25 |     print(i.some_method())
26 |     print("")
27 | 


--------------------------------------------------------------------------------
/MicroCompiler/parser_evaluator.py:
--------------------------------------------------------------------------------
 1 | class ParserEvaluator(object):
 2 |     """
 3 |     Call user's parser instance (subclass of Parser), directed by topological_sorted_nodes
 4 |     """
 5 |     def __init__(self, parser_instance):
 6 |         self.parser_instance = parser_instance
 7 | 
 8 |     def eval(self, topological_sorted_nodes):
 9 |         for cmd in topological_sorted_nodes:
10 |             cmd_prototype = cmd["prototype"]
11 |             cmd_reference = cmd["reference"]
12 |             if cmd_prototype.__class__.__name__ == "Token":
13 |                 # token value already set
14 |                 continue
15 | 
16 |             if cmd_prototype.__class__.__name__ == "Node":
17 |                 values = [i.reference.value for i in cmd_reference.sub_node_list]
18 |                 method_name = cmd_prototype.type
19 |                 legal_method_name = self.parser_instance.get_legal_method_name(method_name)
20 |                 method_func = getattr(
21 |                     self.parser_instance, legal_method_name, self.parser_instance.fallback
22 |                 )
23 |                 return_value = method_func(values)
24 |                 cmd["reference"].reference.value = return_value
25 | 
26 |         final_value = cmd["reference"].reference.value
27 | 
28 |         return final_value
29 | 


--------------------------------------------------------------------------------
/MicroCompiler/parser_evaluator_builder.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | from MicroCompiler.abstract_syntax_tree.abstract_syntax_tree import (
 4 |     AbstractSyntaxTree as AST,
 5 | )
 6 | from MicroCompiler.abstract_syntax_tree.node import create_or_get_node
 7 | 
 8 | 
 9 | def build_parser_evaluator(call_stack, graph_file=None):
10 |     DG = nx.DiGraph()
11 | 
12 |     graph = nx.DiGraph()
13 | 
14 |     ast = AST()
15 | 
16 |     def fallback_method(*args, **kwargs):
17 |         print(*args, **kwargs)
18 |         return "Done!"
19 | 
20 |     def get_node_label(obj):
21 |         if obj.value is not None:
22 |             return "{}.{}.{}#{}".format(
23 |                 obj.__class__.__name__, obj.type, obj.value, obj.index
24 |             )
25 | 
26 |         return "{}.{}#{}".format(obj.__class__.__name__, obj.type, obj.index)
27 | 
28 |     for parser_instance, (f, t) in enumerate(call_stack):
29 |         from_node = create_or_get_node(f)
30 |         to_node = create_or_get_node(t)
31 |         if parser_instance == 0:
32 |             ast.set_start_node(from_node)
33 | 
34 |         ast.add_production(from_node, to_node)
35 | 
36 |         f_label = get_node_label(f)
37 |         t_label = get_node_label(t)
38 | 
39 |         DG.add_node(f_label, prototype=f, reference=from_node)
40 |         DG.add_node(t_label, prototype=t, reference=to_node)
41 | 
42 |         graph.add_node(f_label)
43 |         graph.add_node(t_label)
44 | 
45 |         DG.add_edge(t_label, f_label)  # sub node to node
46 |         graph.add_edge(t_label, f_label)  # sub node to node
47 | 
48 |     topological_ordered_list = [DG.nodes[i] for i in nx.topological_sort(DG)]
49 | 
50 |     if graph_file:
51 |         nx.write_graphml(graph, graph_file)
52 | 
53 |     return topological_ordered_list
54 | 


--------------------------------------------------------------------------------
/MicroCompiler/postfix_expression/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/MicroCompiler/postfix_expression/__init__.py


--------------------------------------------------------------------------------
/MicroCompiler/postfix_expression/evaluator.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.postfix_expression.operator import Operator
 2 | 
 3 | 
 4 | class Evaluator(object):
 5 |     def __init__(self, post_expr):
 6 |         self.post_expr = post_expr
 7 | 
 8 |         self.value_stack = []
 9 | 
10 |     def eval(self):
11 |         for expr in self.post_expr:
12 |             if isinstance(expr, Operator):  # this is an operator
13 |                 args = self.pop_top_k_value(expr.get_operand_num())
14 |                 value = expr.eval(*args)
15 |                 self.value_stack.append(value)
16 |             else:  # this is an operand
17 |                 self.value_stack.append(expr)
18 | 
19 |         assert len(self.value_stack) == 1
20 | 
21 |         return self.value_stack[0]
22 | 
23 |     def pop_top_k_value(self, k):
24 |         args = []
25 |         for _ in range(k):
26 |             args.insert(0, self.value_stack.pop())
27 | 
28 |         return args
29 | 


--------------------------------------------------------------------------------
/MicroCompiler/postfix_expression/operator.py:
--------------------------------------------------------------------------------
 1 | class Operator(object):
 2 |     def get_operand_num(self):
 3 |         raise NotImplementedError
 4 | 
 5 |     def eval(self, *args):
 6 |         raise NotImplementedError
 7 | 
 8 | 
 9 | class PythonBuiltinOperator(Operator):
10 |     def __init__(self, operator, operand_num):
11 |         self.operator = operator
12 |         self.operand_num = operand_num
13 | 
14 |     def get_operand_num(self):
15 |         return self.operand_num
16 | 
17 |     def eval(self, *args):
18 |         return self.operator(*args)
19 | 
20 |     def __repr__(self):
21 |         return "{}(operator={}, operand_num={})".format(
22 |             self.__class__.__name__, self.operator, self.operand_num
23 |         )
24 | 


--------------------------------------------------------------------------------
/MicroCompiler/sample.yaml:
--------------------------------------------------------------------------------
 1 | terminals: [+, -, x, /, (, ), name, num]
 2 | non-terminals: [Goal, Expr, Expr', Term, Term', Factor]
 3 | eof-marker: <EOF>
 4 | error-marker: --
 5 | start-symbol: Goal
 6 | 
 7 | productions:
 8 |   0: {Goal: [Expr]}
 9 |   1: {Expr: [Term, Expr']}
10 |   2: {Expr': [+, Term, Expr']}
11 |   3: {Expr': [-, Term, Expr']}
12 |   4: {Expr': []}
13 |   5: {Term: [Factor, Term']}
14 |   6: {Term': [x, Factor, Term']}
15 |   7: {Term': [/, Factor, Term']}
16 |   8: {Term': []}
17 |   9: {Factor: [(, Expr, )]}
18 |   10: {Factor: [num]}
19 |   11: {Factor: [name]}
20 | 
21 | table:
22 |   Goal: {+: --, -: --, x: --, /: --, (: 0, ): --, name: 0, num: 0, <EOF>: --}
23 |   Expr: {+: --, -: --, x: --, /: --, (: 1, ): --, name: 1, num: 1, <EOF>: --}
24 |   Expr': {+: 2, -: 3, x: --, /: --, (: 0, ): 4, name: --, num: --, <EOF>: 4}
25 |   Term: {+: --, -: --, x: --, /: --, (: 5, ): --, name: 5, num: 5, <EOF>: --}
26 |   Term': {+: 8, -: 8, x: 6, /: 7, (: --, ): 8, name: --, num: --, <EOF>: 8}
27 |   Factor: {+: --, -: --, x: --, /: --, (: 9, ): --, name: 11, num: 10, <EOF>: --}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MicroCompiler
 2 | 
 3 | 一个微型的 LL/LR/LALR 语法解析器，帮助编程语言设计者完成语言设计、测试等。
 4 | 
 5 | 这个项目是我学习 Compilers: Principles,Techniques,and Tools (AKA Dragon book) 、[CS143: Compilers by Stanford University](http://web.stanford.edu/class/cs143/)、 [COMP 412: Compiler Construction for Undergraduates by Rice University](https://www.clear.rice.edu/comp412/) 和 [Engineering: CS1 - SELF PACED Compilers by Stanford University](https://lagunita.stanford.edu/courses/Engineering/Compilers/Fall2014/info) 的副产品。
 6 | 
 7 | # 目标
 8 | 实现一个完整可用的工具集合，辅助用户实现编译器前端的设计
 9 | 
10 | # 进展
11 | - LL语法： 基本已经完成，LL语法可以覆盖绝大多数编程语言的需求了，比如 Python
12 | - LR语法/LALR语法： 尚未完成，短期内不太可能会完成
13 | 
14 | # Features
15 | 计算 `6 * (2 + 2)` 的值过程可以通过以下抽象语法树（图由本项目编译器自动生成，经过 `Cytoscape` 渲染得到）：
16 | 
17 | ![](demo/arithmetic_calculator/calculator.png)
18 | 
19 | # 使用文档
20 | ## LL(1) 语法
21 | ### MBNF 格式
22 | MBNF 是 Micro Backus Normal Form 的缩写，是为了配合本项目的编译器特别设计的一种语法格式。MBNF 格式简单易懂，和常见的 BNF 表达式非常相似。使用者利用编写 MBNF 文件的方法，把语法信息传递给编译器。
23 | 
24 | 文件 [demo/arithmetic_calculator/calculator.mbnf](demo/arithmetic_calculator/calculator.mbnf) 是一个支持 `+` `-` `*` `/` 和括号 `(` `)` 的算术计算语言的 MBNF 文件示例。
25 | 
26 | ### Generator
27 | `MicroCompiler.ParserGenerator.Generator.Generator` 可以读入 MBNF 格式的语法文件并生成一个包含 `First Set`，`Fellow Set` 等信息的 LL(1) 语法解析器必须的解析器构造数据。
28 | 
29 | 这样的解析器构造数据，可以序列化成人类可读的 YAML 格式。文件 [demo/arithmetic_calculator/calculator.yaml](demo/arithmetic_calculator/calculator.yaml) 就是序列化成 YAML 格式的算术计算语言（见上文）的解析器构造数据。
30 | 
31 | ### SkeletonParser
32 | `MicroCompiler.SkeletonParser.SkeletonParser` 可以读入 YAML 格式的解析器构造数据和一系列 Token，判断这个 Token 序列的语法是否合法，并生成一个合法的解析依赖关系图。
33 | 
34 | ### build_parser_evaluator
35 | 在依赖关系图的基础上，`MicroCompiler.parser_evaluator_builder.build_parser_evaluator` 根据依赖信息，构建抽象语法树。并得到按照拓扑排序构造的解析顺序。
36 | 
37 | ### ParserBuilder
38 | `MicroCompiler.parser_builder.ParserBuilder` 能够生成一个解析器基类，用户需要继承这个基类，在用户自定义类中添加相关语法生成式的解析方法。
39 | 
40 | ### ParserEvaluator
41 | `MicroCompiler.parser_evaluator.ParserEvaluator` 会在拓扑排序后的解析序列的指导下，依次执行用户自定义类中的方法，返回结果
42 | 
43 | ### [可选] Evaluator
44 | 对于返回后缀表达式（逆波兰表达式）的用户自定类来说，用户可以选择使用 `MicroCompiler.postfix_expression.evaluator.Evaluator` 提供的功能，完成后缀表达式的求值工作。
45 | 
46 | # 演示
47 | 为了更好的验证和演示如何使用该项目，这里提供了几个示例
48 | 
49 | ## 算术计算器
50 | 求解四则运算（`+`、`-`、`*`、`、`、`（`、`）`）的算术表达式语言的解析器。项目位于 [demo/arithmetic_calculator](demo/arithmetic_calculator), 内含详细的说明文档。
51 | 
52 | ## 模板引擎
53 | 简单的模板渲染引擎，可以渲染诸如 `HELLO,{{ name }}` 的模板。项目位于 [demo/template_engine](demo/template_engine), 内含详细的说明文档。
54 | 
55 | # Acknowledge & Credits
56 | http://hackingoff.com/compilers
57 | 


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/__init__.py


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/README.md:
--------------------------------------------------------------------------------
 1 | # 演示项目：算术计算器
 2 | ## 流程
 3 | ### Grammar 文件编写
 4 | 本项目采用一种简化的 BNF 格式来表示语法规则。用户需要按照自己的语言的规则和编译器的限制（LL(1) 语法限制等），编写自己的语法规则文件。
 5 | 
 6 | 本例中，需要实现的是一个算术计算器，语法文件位于 `calculator.mbnf`
 7 | 
 8 | ### BNF 编译
 9 | 本项目需要对使用 MBNF （.mbnf）格式进行解析，并按照 LL(1) 解析器生成包含 `FirstSet`、`FollowSet` 和 `Lookahead` 等信息的编译器辅助文件。
10 | 
11 | 本例中，执行 `ll1_grammer_generator.py` 脚本则可以读入 `calculator.mbnf` 文件，进行 LL(1) 语法检查并生成编译器辅助文件 `calculator.yaml`。
12 | 
13 | ### 构建自定义解析器
14 | 编译器只能构建执行流程，具体的业务逻辑，需要用户按照语法规则构建一系列语法单元解析函数，完成具体的业务功能。
15 | 
16 | 本例中，使用了基于后缀表达式（逆波兰表达式）来表示计算的中间结果。具体见文件 `user_level_parser.py`
17 | 
18 | ### 执行解释器
19 | 在编译器辅助信息（存储在 `calculator.yaml` 文件中）的帮助下，编译器会按照拓扑排序的方法依次执行用户解析器中的方法。用户的解析器负责最终生成一个基于后缀表达式（逆波兰表达式）的中间结果。
20 | 最后这个后缀表达式将会在 `MicroCompiler/postfix_expression/evaluator.py` 中得到执行，输出最终结果。
21 | 
22 | ## 运行演示程序
23 | 在本目录里执行 `python ./main.py` 就可以执行演示程序。演示程序将演示如何计算 `6 * (2 + 2)` 的值。
24 | 
25 | ## 抽象语法树
26 | 为了更好的展示语法的执行过程，本例提供了一个由程序（`python ./main.py`）自动生成的图文件: `calculator.graphml`, 该文件可以利用 `Cytoscape` 等图处理程序打开，并观察其中的依赖关系。
27 | 
28 | 本例中计算 `6 * (2 + 2)` 的值过程可以通过以下抽象语法树表达：
29 | 
30 | ![](calculator.png)
31 | 
32 | ## 代码测试
33 | 执行 `python ./tests.py` 就可以运行测试案例。具体的测试案例请查看文件 `test_cases.py`.
34 | 
35 | ## 限制
36 | 由于词法分析部分没有完成，所以目前只能让用户用手动构建词法分析结果的方式进行输入。


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/arithmetic_calculator.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.SkeletonParser import SkeletonParser
 2 | from MicroCompiler.parser_evaluator import ParserEvaluator
 3 | from MicroCompiler.postfix_expression.evaluator import Evaluator
 4 | from MicroCompiler.parser_evaluator_builder import \
 5 |     build_parser_evaluator
 6 | 
 7 | 
 8 | def arithmetic_calculator(grammar_file, token_list, user_level_parser, graph_file=None):
 9 |     sp = SkeletonParser(grammar_file, token_list)
10 |     sp.parse()
11 | 
12 |     topological_ordered_list = build_parser_evaluator(sp.call_stack, graph_file)
13 | 
14 |     parser_evaluator = ParserEvaluator(user_level_parser)
15 |     postfix_expr = parser_evaluator.eval(topological_ordered_list)
16 | 
17 |     evaluator = Evaluator(postfix_expr)
18 |     result = evaluator.eval()
19 | 
20 |     return result
21 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/calculator.graphml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
 3 |   <graph edgedefault="directed">
 4 |     <node id="Node.&lt;START&gt;#0" />
 5 |     <node id="Node.Goal#1" />
 6 |     <node id="Node.Expr#2" />
 7 |     <node id="Node.Term#3" />
 8 |     <node id="Node.Factor#4" />
 9 |     <node id="Node.num#5" />
10 |     <node id="Token.num.6#0" />
11 |     <node id="Node.TermTwo#6" />
12 |     <node id="Node.*#7" />
13 |     <node id="Token.*.&lt;built-in function mul&gt;#1" />
14 |     <node id="Node.Factor#8" />
15 |     <node id="Node.(#9" />
16 |     <node id="Token.(#2" />
17 |     <node id="Node.Expr#10" />
18 |     <node id="Node.Term#11" />
19 |     <node id="Node.Factor#12" />
20 |     <node id="Node.num#13" />
21 |     <node id="Token.num.2#3" />
22 |     <node id="Node.TermTwo#14" />
23 |     <node id="Token.ϵ.Epsilon()#8" />
24 |     <node id="Node.ExprTwo#15" />
25 |     <node id="Node.+#16" />
26 |     <node id="Token.+.&lt;built-in function add&gt;#4" />
27 |     <node id="Node.Term#17" />
28 |     <node id="Node.Factor#18" />
29 |     <node id="Node.num#19" />
30 |     <node id="Token.num.2#5" />
31 |     <node id="Node.TermTwo#20" />
32 |     <node id="Token.ϵ.Epsilon()#10" />
33 |     <node id="Node.ExprTwo#21" />
34 |     <node id="Token.ϵ.Epsilon()#12" />
35 |     <node id="Node.)#22" />
36 |     <node id="Token.)#6" />
37 |     <node id="Node.TermTwo#23" />
38 |     <node id="Token.ϵ.Epsilon()#14" />
39 |     <node id="Node.ExprTwo#24" />
40 |     <node id="Token.ϵ.Epsilon()#16" />
41 |     <edge source="Node.Goal#1" target="Node.&lt;START&gt;#0" />
42 |     <edge source="Node.Expr#2" target="Node.Goal#1" />
43 |     <edge source="Node.Term#3" target="Node.Expr#2" />
44 |     <edge source="Node.Factor#4" target="Node.Term#3" />
45 |     <edge source="Node.num#5" target="Node.Factor#4" />
46 |     <edge source="Token.num.6#0" target="Node.num#5" />
47 |     <edge source="Node.TermTwo#6" target="Node.Term#3" />
48 |     <edge source="Node.*#7" target="Node.TermTwo#6" />
49 |     <edge source="Token.*.&lt;built-in function mul&gt;#1" target="Node.*#7" />
50 |     <edge source="Node.Factor#8" target="Node.TermTwo#6" />
51 |     <edge source="Node.(#9" target="Node.Factor#8" />
52 |     <edge source="Token.(#2" target="Node.(#9" />
53 |     <edge source="Node.Expr#10" target="Node.Factor#8" />
54 |     <edge source="Node.Term#11" target="Node.Expr#10" />
55 |     <edge source="Node.Factor#12" target="Node.Term#11" />
56 |     <edge source="Node.num#13" target="Node.Factor#12" />
57 |     <edge source="Token.num.2#3" target="Node.num#13" />
58 |     <edge source="Node.TermTwo#14" target="Node.Term#11" />
59 |     <edge source="Token.ϵ.Epsilon()#8" target="Node.TermTwo#14" />
60 |     <edge source="Node.ExprTwo#15" target="Node.Expr#10" />
61 |     <edge source="Node.+#16" target="Node.ExprTwo#15" />
62 |     <edge source="Token.+.&lt;built-in function add&gt;#4" target="Node.+#16" />
63 |     <edge source="Node.Term#17" target="Node.ExprTwo#15" />
64 |     <edge source="Node.Factor#18" target="Node.Term#17" />
65 |     <edge source="Node.num#19" target="Node.Factor#18" />
66 |     <edge source="Token.num.2#5" target="Node.num#19" />
67 |     <edge source="Node.TermTwo#20" target="Node.Term#17" />
68 |     <edge source="Token.ϵ.Epsilon()#10" target="Node.TermTwo#20" />
69 |     <edge source="Node.ExprTwo#21" target="Node.ExprTwo#15" />
70 |     <edge source="Token.ϵ.Epsilon()#12" target="Node.ExprTwo#21" />
71 |     <edge source="Node.)#22" target="Node.Factor#8" />
72 |     <edge source="Token.)#6" target="Node.)#22" />
73 |     <edge source="Node.TermTwo#23" target="Node.TermTwo#6" />
74 |     <edge source="Token.ϵ.Epsilon()#14" target="Node.TermTwo#23" />
75 |     <edge source="Node.ExprTwo#24" target="Node.Expr#2" />
76 |     <edge source="Token.ϵ.Epsilon()#16" target="Node.ExprTwo#24" />
77 |   </graph>
78 | </graphml>
79 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/calculator.mbnf:
--------------------------------------------------------------------------------
 1 | Goal -> Expr ;
 2 | Expr -> Term ExprTwo ;
 3 | ExprTwo -> '+' Term ExprTwo
 4 |          | '-' Term ExprTwo
 5 |          | ϵ ;
 6 | Term -> Factor TermTwo ;
 7 | TermTwo -> '*' Factor TermTwo
 8 |          | '/' Factor TermTwo
 9 |          | ϵ ;
10 | Factor -> '(' Expr ')'
11 |         | 'num' ;


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/calculator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/arithmetic_calculator/calculator.png


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/calculator.yaml:
--------------------------------------------------------------------------------
  1 | eof-marker: <EOF>
  2 | error-marker: --
  3 | non-terminals:
  4 | - Expr
  5 | - Term
  6 | - Goal
  7 | - Factor
  8 | - ExprTwo
  9 | - TermTwo
 10 | productions:
 11 |   0:
 12 |     Goal:
 13 |     - Expr
 14 |   1:
 15 |     Expr:
 16 |     - Term
 17 |     - ExprTwo
 18 |   2:
 19 |     ExprTwo:
 20 |     - +
 21 |     - Term
 22 |     - ExprTwo
 23 |   3:
 24 |     ExprTwo:
 25 |     - '-'
 26 |     - Term
 27 |     - ExprTwo
 28 |   4:
 29 |     ExprTwo: []
 30 |   5:
 31 |     Term:
 32 |     - Factor
 33 |     - TermTwo
 34 |   6:
 35 |     TermTwo:
 36 |     - '*'
 37 |     - Factor
 38 |     - TermTwo
 39 |   7:
 40 |     TermTwo:
 41 |     - /
 42 |     - Factor
 43 |     - TermTwo
 44 |   8:
 45 |     TermTwo: []
 46 |   9:
 47 |     Factor:
 48 |     - (
 49 |     - Expr
 50 |     - )
 51 |   10:
 52 |     Factor:
 53 |     - num
 54 | start-symbol: Goal
 55 | table:
 56 |   Expr:
 57 |     (: 1
 58 |     ): --
 59 |     '*': --
 60 |     +: --
 61 |     '-': --
 62 |     /: --
 63 |     <EOF>: --
 64 |     num: 1
 65 |   ExprTwo:
 66 |     (: --
 67 |     ): 4
 68 |     '*': --
 69 |     +: 2
 70 |     '-': 3
 71 |     /: --
 72 |     <EOF>: 4
 73 |     num: --
 74 |   Factor:
 75 |     (: 9
 76 |     ): --
 77 |     '*': --
 78 |     +: --
 79 |     '-': --
 80 |     /: --
 81 |     <EOF>: --
 82 |     num: 10
 83 |   Goal:
 84 |     (: 0
 85 |     ): --
 86 |     '*': --
 87 |     +: --
 88 |     '-': --
 89 |     /: --
 90 |     <EOF>: --
 91 |     num: 0
 92 |   Term:
 93 |     (: 5
 94 |     ): --
 95 |     '*': --
 96 |     +: --
 97 |     '-': --
 98 |     /: --
 99 |     <EOF>: --
100 |     num: 5
101 |   TermTwo:
102 |     (: --
103 |     ): 8
104 |     '*': 6
105 |     +: 8
106 |     '-': 8
107 |     /: 7
108 |     <EOF>: 8
109 |     num: --
110 | terminals:
111 | - (
112 | - '-'
113 | - '*'
114 | - +
115 | - /
116 | - num
117 | - )
118 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/ll1_grammer_generator.py:
--------------------------------------------------------------------------------
1 | from MicroCompiler.ParserGenerator.Generator import Generator
2 | 
3 | g = Generator("calculator.mbnf")
4 | g.generate()
5 | 
6 | g.write_yaml("calculator.yaml")
7 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/main.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | 
 3 | from MicroCompiler.ParserGenerator.Generator import Generator
 4 | from MicroCompiler.SkeletonParser import Token
 5 | from demo.arithmetic_calculator.arithmetic_calculator import arithmetic_calculator
 6 | from demo.arithmetic_calculator.user_level_parser import Parser
 7 | 
 8 | user_level_parser = Parser()
 9 | 
10 | 
11 | def main(token_list):
12 |     g = Generator("calculator.mbnf")
13 |     g.generate()
14 |     g.write_yaml("calculator.yaml")
15 | 
16 |     result = arithmetic_calculator("calculator.yaml", token_list, user_level_parser, "calculator.graphml")
17 |     print(result)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     # equal to: 6 * (2 + 2）
22 |     token_list = [
23 |         Token("num", 6),
24 |         Token("*", operator.mul),
25 |         Token("("),
26 |         Token("num", 2),
27 |         Token("+", operator.add),
28 |         Token("num", 2),
29 |         Token(")"),
30 |         Token("<EOF>"),
31 |     ]
32 | 
33 |     main(token_list)
34 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/main_with_lexer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from MicroCompiler.ParserGenerator.Generator import Generator
 4 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken
 5 | from demo.arithmetic_calculator.arithmetic_calculator import arithmetic_calculator
 6 | from demo.arithmetic_calculator.user_level_parser import Parser
 7 | 
 8 | user_level_parser = Parser()
 9 | 
10 | from MicroCompiler.lexer.user_level_lexer_define import lexer_define
11 | 
12 | from MicroCompiler.lexer.lexer import lex_analysis
13 | 
14 | current_dir = os.path.dirname(os.path.realpath(__file__))
15 | 
16 | bnf_file = os.path.join(current_dir, "calculator.mbnf")
17 | ll1_grammar_file = os.path.join(current_dir, "calculator.yaml")
18 | graph_file = os.path.join(current_dir, "calculator.graphml")
19 | 
20 | 
21 | def main(input_string):
22 |     raw_token_list = [i[1] for i in lex_analysis(input_string, lexer_define)]
23 |     # remote whitespace token
24 |     token_list = list(filter(lambda x: not isinstance(x, WhiteSpaceToken), raw_token_list))
25 |     # append EOF token
26 |     token_list.append(Token("<EOF>"))
27 | 
28 |     g = Generator(bnf_file)
29 |     g.generate()
30 |     g.write_yaml(ll1_grammar_file)
31 | 
32 |     result = arithmetic_calculator(ll1_grammar_file, token_list, user_level_parser, graph_file)
33 | 
34 |     return result
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     input_string = "2+3 *  6"
39 | 
40 |     result = main(input_string)
41 | 
42 |     print(result)
43 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/output.yaml:
--------------------------------------------------------------------------------
 1 | eof-marker: <EOF>
 2 | error-marker: --
 3 | non-terminals: [ExprTwo, Term, TermTwo, Factor, Goal, Expr]
 4 | productions:
 5 |   0:
 6 |     ExprTwo: [+, Term, ExprTwo]
 7 |   1:
 8 |     ExprTwo: ['-', Term, ExprTwo]
 9 |   2:
10 |     ExprTwo: []
11 |   3:
12 |     Goal: [Expr]
13 |   4:
14 |     Term: [Factor, TermTwo]
15 |   5:
16 |     Factor: [(, Expr, )]
17 |   6:
18 |     Factor: [num]
19 |   7:
20 |     Factor: [name]
21 |   8:
22 |     Expr: [Term, ExprTwo]
23 |   9:
24 |     TermTwo: ['*', Factor, TermTwo]
25 |   10:
26 |     TermTwo: [/, Factor, TermTwo]
27 |   11:
28 |     TermTwo: []
29 | start-symbol: Goal
30 | table:
31 |   Expr: {(: 8, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 8, num: 8}
32 |   ExprTwo: {(: --, ): 2, '*': --, +: 0, '-': 1, /: --, <EOF>: 2, name: --, num: --}
33 |   Factor: {(: 5, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 7, num: 6}
34 |   Goal: {(: 3, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 3, num: 3}
35 |   Term: {(: 4, ): --, '*': --, +: --, '-': --, /: --, <EOF>: --, name: 4, num: 4}
36 |   TermTwo: {(: --, ): 11, '*': 9, +: 11, '-': 11, /: 10, <EOF>: 11, name: --, num: --}
37 | terminals: [+, '-', (, '*', /, name, ), num]
38 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/terminal_user_interface.py:
--------------------------------------------------------------------------------
 1 | from demo.arithmetic_calculator.main_with_lexer import main
 2 | 
 3 | # Continue while true.
 4 | while True:
 5 |     # Get input.
 6 |     print("> ", end="")
 7 |     value = input()
 8 | 
 9 |     # Break if user types q.
10 |     if value == "q":
11 |         break
12 | 
13 |     # echo value.
14 |     print("You typed: ", value)
15 | 
16 |     result = main(value)
17 | 
18 |     # print result
19 |     print("Result: ", result)
20 | 
21 | # Exit message.
22 | print("You quit.")
23 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/test_cases.py:
--------------------------------------------------------------------------------
  1 | from MicroCompiler.SkeletonParser import Token
  2 | import operator
  3 | 
  4 | test_cases = (
  5 |     (
  6 |         [
  7 |             Token("num", 6),
  8 |             Token("+", operator.add),
  9 |             Token("num", 2),
 10 |             Token("<EOF>"),
 11 |         ],
 12 |         8,
 13 |     ),
 14 |     (
 15 |         [
 16 |             Token("num", 6),
 17 |             Token("+", operator.add),
 18 |             Token("num", 2),
 19 |             Token("+", operator.add),
 20 |             Token("num", 2),
 21 |             Token("<EOF>"),
 22 |         ],
 23 |         10,
 24 |     ),
 25 |     (
 26 |         [
 27 |             Token("num", 6),
 28 |             Token("-", operator.sub),
 29 |             Token("num", 2),
 30 |             Token("<EOF>"),
 31 |         ],
 32 |         4,
 33 |     ),
 34 |     (
 35 |         [
 36 |             Token("num", 6),
 37 |             Token("-", operator.sub),
 38 |             Token("num", 2),
 39 |             Token("-", operator.sub),
 40 |             Token("num", 2),
 41 |             Token("<EOF>"),
 42 |         ],
 43 |         2,
 44 |     ),
 45 |     (
 46 |         [
 47 |             Token("num", 6),
 48 |             Token("/", operator.truediv),
 49 |             Token("num", 2),
 50 |             Token("<EOF>"),
 51 |         ],
 52 |         3.0,
 53 |     ),
 54 |     (
 55 |         [
 56 |             Token("num", 12),
 57 |             Token("/", operator.truediv),
 58 |             Token("num", 6),
 59 |             Token("/", operator.truediv),
 60 |             Token("num", 2),
 61 |             Token("<EOF>"),
 62 |         ],
 63 |         1,
 64 |     ),
 65 |     (
 66 |         [
 67 |             Token("num", 6),
 68 |             Token("*", operator.mul),
 69 |             Token("num", 2),
 70 |             Token("<EOF>"),
 71 |         ],
 72 |         12,
 73 |     ),
 74 |     (
 75 |         [
 76 |             Token("num", 3),
 77 |             Token("*", operator.mul),
 78 |             Token("num", 6),
 79 |             Token("*", operator.mul),
 80 |             Token("num", 2),
 81 |             Token("<EOF>"),
 82 |         ],
 83 |         36,
 84 |     ),
 85 |     (
 86 |         [
 87 |             Token("num", 6),
 88 |             Token("+", operator.add),
 89 |             Token("num", 2),
 90 |             Token("/", operator.truediv),
 91 |             Token("num", 2),
 92 |             Token("<EOF>"),
 93 |         ],
 94 |         7,
 95 |     ),
 96 |     (
 97 |         [
 98 |             Token("num", 6),
 99 |             Token("/", operator.truediv),
100 |             Token("num", 2),
101 |             Token("+", operator.add),
102 |             Token("num", 2),
103 |             Token("<EOF>"),
104 |         ],
105 |         5,
106 |     ),
107 |     (
108 |         [
109 |             Token("num", 6),
110 |             Token("+", operator.add),
111 |             Token("num", 2),
112 |             Token("*", operator.mul),
113 |             Token("num", 2),
114 |             Token("<EOF>"),
115 |         ],
116 |         10,
117 |     ),
118 |     (
119 |         [
120 |             Token("num", 6),
121 |             Token("*", operator.mul),
122 |             Token("num", 2),
123 |             Token("+", operator.add),
124 |             Token("num", 2),
125 |             Token("<EOF>"),
126 |         ],
127 |         14,
128 |     ),
129 |     (
130 |         [
131 |             Token("num", 6),
132 |             Token("*", operator.mul),
133 |             Token("("),
134 |             Token("num", 2),
135 |             Token("+", operator.add),
136 |             Token("num", 2),
137 |             Token(")"),
138 |             Token("<EOF>"),
139 |         ],
140 |         24,
141 |     ),
142 |     (
143 |         [
144 |             Token("("),
145 |             Token("num", 2),
146 |             Token("+", operator.add),
147 |             Token("num", 2),
148 |             Token(")"),
149 |             Token("*", operator.mul),
150 |             Token("num", 6),
151 |             Token("<EOF>"),
152 |         ],
153 |         24,
154 |     ),
155 |     (
156 |         [
157 |             Token("("),
158 |             Token("num", 2),
159 |             Token("+", operator.add),
160 |             Token("num", 2),
161 |             Token("+", operator.add),
162 |             Token("num", 2),
163 |             Token(")"),
164 |             Token("*", operator.mul),
165 |             Token("num", 6),
166 |             Token("<EOF>"),
167 |         ],
168 |         36,
169 |     ),
170 |     (
171 |         [
172 |             Token("("),
173 |             Token("num", 2),
174 |             Token("+", operator.add),
175 |             Token("num", 2),
176 |             Token("/", operator.truediv),
177 |             Token("num", 2),
178 |             Token(")"),
179 |             Token("*", operator.mul),
180 |             Token("num", 6),
181 |             Token("<EOF>"),
182 |         ],
183 |         18,
184 |     ),
185 |     (
186 |         [
187 |             Token("("),
188 |             Token("num", 2),
189 |             Token("+", operator.add),
190 |             Token("num", 2),
191 |             Token("/", operator.truediv),
192 |             Token("num", 2),
193 |             Token(")"),
194 |             Token("*", operator.mul),
195 |             Token("num", 6),
196 |             Token("/", operator.truediv),
197 |             Token("("),
198 |             Token("num", 2),
199 |             Token("+", operator.add),
200 |             Token("num", 2),
201 |             Token("*", operator.mul),
202 |             Token("num", 2),
203 |             Token(")"),
204 |             Token("<EOF>"),
205 |         ],
206 |         3,
207 |     ),
208 | )
209 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/tests.py:
--------------------------------------------------------------------------------
 1 | from demo.arithmetic_calculator.arithmetic_calculator import \
 2 |     arithmetic_calculator
 3 | from demo.arithmetic_calculator.test_cases import test_cases
 4 | from demo.arithmetic_calculator.user_level_parser import Parser
 5 | 
 6 | user_level_parser = Parser()
 7 | 
 8 | for index, (token_list, expected_result) in enumerate(test_cases):
 9 |     print("working on: ", token_list)
10 |     result = arithmetic_calculator("calculator.yaml", token_list, user_level_parser)
11 |     if result != expected_result:
12 |         print("test failed: at #", index)
13 |         print(token_list, result)
14 |         break
15 | 


--------------------------------------------------------------------------------
/demo/arithmetic_calculator/user_level_parser.py:
--------------------------------------------------------------------------------
  1 | from MicroCompiler.SkeletonParser import Epsilon
  2 | from MicroCompiler.parser_builder import ParserBuilder
  3 | from MicroCompiler.postfix_expression.operator import PythonBuiltinOperator
  4 | 
  5 | pb = ParserBuilder()
  6 | pb.add_generator("get_fallback_method", "fallback")
  7 | clazz = pb.generate()
  8 | 
  9 | 
 10 | class Parser(clazz):
 11 |     def __init__(self):
 12 |         self.post_expr = []
 13 | 
 14 |         self.method_name_mapping = {
 15 |             "(": "open_parenthesis",
 16 |             ")": "close_parenthesis",
 17 |             "/": "Division",
 18 |             "*": "Mul",
 19 |             "+": "Add",
 20 |             "-": "Sub",
 21 |             "<START>": "Start",
 22 |         }
 23 | 
 24 |     def fallback(self, input_):
 25 |         return input_
 26 | 
 27 |     def flat_list(self, nested_list):
 28 |         result = []
 29 | 
 30 |         for item in nested_list:
 31 |             if isinstance(item, list):
 32 |                 flat_item = self.flat_list(item)
 33 |                 result.extend(flat_item)
 34 |             else:
 35 |                 result.append(item)
 36 | 
 37 |         return result
 38 | 
 39 |     def flat_nested_postfix_list(self, nested_postfix_list):
 40 |         return self.flat_list(nested_postfix_list)
 41 | 
 42 |     def get_legal_method_name(self, method_name):
 43 |         return (
 44 |             self.method_name_mapping[method_name]
 45 |             if method_name in self.method_name_mapping
 46 |             else method_name
 47 |         )
 48 | 
 49 |     def Division(self, input_):
 50 |         return PythonBuiltinOperator(input_[0], 2)
 51 | 
 52 |     def Mul(self, input_):
 53 |         return PythonBuiltinOperator(input_[0], 2)
 54 | 
 55 |     def Add(self, input_):
 56 |         return PythonBuiltinOperator(input_[0], 2)
 57 | 
 58 |     def Sub(self, input_):
 59 |         return PythonBuiltinOperator(input_[0], 2)
 60 | 
 61 |     def num(self, input_):
 62 |         return input_[0]
 63 | 
 64 |     def ExprTwo(self, input_):
 65 |         # ExprTwo -> '+' Term ExprTwo
 66 |         #          | '-' Term ExprTwo
 67 |         #          | ϵ ;
 68 | 
 69 |         if len(input_) == 1:
 70 |             # ExprTwo -> ϵ
 71 |             return Epsilon()
 72 | 
 73 |         if isinstance(input_[2], Epsilon):
 74 |             # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo
 75 |             #                        |                  |
 76 |             #                        -> ϵ               -> ϵ
 77 |             return [input_[0], [input_[1]]]
 78 | 
 79 |         if isinstance(input_[2], list):
 80 |             # ExprTwo -> '+' Term ExprTwo | '-' Term ExprTwo
 81 |             #                        |                  |
 82 |             #                        -> ['+' num]       -> ['-' num]
 83 | 
 84 |             postfix_expr = input_[2][1]
 85 | 
 86 |             postfix_expr = postfix_expr[:]  # shallow copy
 87 | 
 88 |             head = postfix_expr.pop(0)
 89 |             operator = input_[2][0]
 90 | 
 91 |             postfix_expr.insert(0, operator)
 92 |             postfix_expr.insert(0, head)
 93 |             postfix_expr.insert(0, input_[1])
 94 | 
 95 |             return [input_[0], postfix_expr]
 96 | 
 97 |     def TermTwo(self, input_):
 98 |         # TermTwo -> '*' Factor TermTwo
 99 |         #          | '/' Factor TermTwo
100 |         #          | ϵ ;
101 | 
102 |         if len(input_) == 1:
103 |             # ExprTwo -> ϵ
104 |             return Epsilon()
105 | 
106 |         if isinstance(input_[2], Epsilon):
107 |             # ExprTwo -> '+' Factor TermTwo | '-' Factor TermTwo
108 |             #                          |                   |
109 |             #                          -> ϵ                -> ϵ
110 |             #
111 |             # output:  ['*', postfix_expr]
112 |             #                |----------|
113 |             #                 type: list
114 | 
115 |             return [input_[0], [input_[1]]]
116 | 
117 |         if isinstance(input_[2], list):
118 |             # TermTwo -> '*' Factor TermTwo | '/' Factor TermTwo
119 |             #                         |                    |
120 |             #                         -> ['/' num]          -> ['*' num]
121 | 
122 |             # input_: ['*', Factor, ['/', [head, rest_of_postfix_expr]]]
123 |             #                             |---------------------------|
124 |             #                                post_expr (type: list)
125 |             #
126 |             # output: ['*', [Factor, head, '/', rest_of_postfix_expr]]
127 | 
128 |             postfix_expr = input_[2][1]
129 | 
130 |             postfix_expr = postfix_expr[:]  # shallow copy
131 | 
132 |             head = postfix_expr.pop(0)
133 |             operator = input_[2][0]
134 | 
135 |             postfix_expr.insert(0, operator)
136 |             postfix_expr.insert(0, head)
137 |             postfix_expr.insert(0, input_[1])
138 | 
139 |             return [input_[0], postfix_expr]
140 | 
141 |     def Factor(self, input_):
142 |         if len(input_) == 1:
143 |             return input_[0]
144 | 
145 |         if len(input_) == 3:
146 |             return input_[1]
147 | 
148 |     def Term(self, input_):
149 |         if isinstance(input_[1], Epsilon):
150 |             # Term -> Factor TermTwo ;
151 |             #                  |
152 |             #                  -> ϵ
153 |             return input_[0]
154 | 
155 |         # Term -> Factor TermTwo ;
156 |         #                   |
157 |         #                   -> ['*', postfix_expr]
158 | 
159 |         # input_: [Factor, ['/', [head, rest_of_postfix_expr]]]
160 |         #                        |---------------------------|
161 |         #                            post_expr (type: list)
162 |         #
163 |         # output: [Factor, head, '/', rest_of_postfix_expr]
164 | 
165 |         postfix_expr = input_[1][1]
166 | 
167 |         postfix_expr = postfix_expr[:]  # shallow copy
168 | 
169 |         head = postfix_expr.pop(0)
170 |         operator = input_[1][0]
171 | 
172 |         postfix_expr.insert(0, operator)
173 |         postfix_expr.insert(0, head)
174 |         postfix_expr.insert(0, input_[0])
175 | 
176 |         return postfix_expr
177 | 
178 |     def Expr(self, input_):
179 |         if isinstance(input_[1], Epsilon):
180 |             # Expr -> Term ExprTwo ;
181 |             #                |
182 |             #                -> ϵ
183 |             return input_[0]
184 | 
185 |         # Expr -> Term ExprTwo ;
186 |         #                |
187 |         #                -> [operator, postfix_expr]
188 | 
189 |         # input_: [Factor, ['/', [head, rest_of_postfix_expr]]]
190 |         #                        |---------------------------|
191 |         #                            post_expr (type: list)
192 |         #
193 |         # output: [Factor, head, '/', rest_of_postfix_expr]
194 |         postfix_expr = input_[1][1]
195 | 
196 |         postfix_expr = postfix_expr[:]  # shallow copy
197 | 
198 |         head = postfix_expr.pop(0)
199 | 
200 |         operator = input_[1][0]
201 | 
202 |         postfix_expr.insert(0, operator)
203 |         postfix_expr.insert(0, head)
204 |         postfix_expr.insert(0, input_[0])
205 | 
206 |         return postfix_expr
207 | 
208 |     def Goal(self, input_):
209 |         expr = input_[0]
210 |         flat_postfix_expr = self.flat_nested_postfix_list(expr)
211 | 
212 |         return flat_postfix_expr
213 | 
214 |     def Start(self, input_):
215 |         return input_[0]
216 | 


--------------------------------------------------------------------------------
/demo/template_engine/.gitignore:
--------------------------------------------------------------------------------
1 | syntax.graphml
2 | syntax.yaml


--------------------------------------------------------------------------------
/demo/template_engine/README.md:
--------------------------------------------------------------------------------
 1 | # 演示项目：模版引擎
 2 | 
 3 | ## 最终目标
 4 | 实现类似于 jinja (https://github.com/pallets/jinja) 或者 inja (https://github.com/pantor/inja) 的模板系统。
 5 | 
 6 | ## 当前进度
 7 | * 实现了变量替换
 8 | 
 9 | ## 使用示例
10 | 
11 | ```python
12 | from demo.template_engine.render_with_string import render_with_string
13 | 
14 | result = render_with_string("HELLO,{{ name }}", {"name": "Xiaoquan"})
15 | print(result)
16 | ```
17 | 
18 | 输出
19 | 
20 | ```text
21 | HELLO,Xiaoquan
22 | ```
23 | 
24 | ## 测试
25 | 见 `render_with_string.py` 和 `render_with_tokens.py`


--------------------------------------------------------------------------------
/demo/template_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howl-anderson/MicroCompiler/53a3901486f455b235619f7843d93425c50b563c/demo/template_engine/__init__.py


--------------------------------------------------------------------------------
/demo/template_engine/render_engine.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.SkeletonParser import SkeletonParser
 2 | from MicroCompiler.parser_evaluator import ParserEvaluator
 3 | from MicroCompiler.parser_evaluator_builder import build_parser_evaluator
 4 | 
 5 | 
 6 | def render_engine(grammar_file, token_list, user_level_parser, graph_file=None):
 7 |     sp = SkeletonParser(grammar_file, token_list)
 8 |     sp.parse()
 9 | 
10 |     topological_ordered_list = build_parser_evaluator(sp.call_stack, graph_file)
11 | 
12 |     parser_evaluator = ParserEvaluator(user_level_parser)
13 |     result = parser_evaluator.eval(topological_ordered_list)
14 | 
15 |     return result
16 | 


--------------------------------------------------------------------------------
/demo/template_engine/render_with_string.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken
 2 | from MicroCompiler.lexer.lexer import lex_analysis
 3 | from demo.template_engine.render_with_tokens import render_with_tokens
 4 | from demo.template_engine.user_level_lexer_define import lexer_define
 5 | 
 6 | 
 7 | def render_with_string(input_string, data):
 8 |     raw_token_list = [i[1] for i in lex_analysis(input_string, lexer_define)]
 9 |     # remote whitespace token
10 |     token_list = list(
11 |         filter(lambda x: not isinstance(x, WhiteSpaceToken), raw_token_list)
12 |     )
13 |     # append EOF token
14 |     token_list.append(Token("<EOF>"))
15 | 
16 |     return render_with_tokens(token_list, data)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     input_string = "HELLO,{{ name }}"
21 | 
22 |     result = render_with_string(input_string, {"name": "Xiaoquan"})
23 | 
24 |     print(result)
25 | 


--------------------------------------------------------------------------------
/demo/template_engine/render_with_tokens.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.ParserGenerator.Generator import Generator
 2 | from MicroCompiler.SkeletonParser import Token
 3 | from demo.template_engine.render_engine import render_engine
 4 | from demo.template_engine.user_level_parser import Parser
 5 | 
 6 | 
 7 | def render_with_tokens(token_list, data):
 8 |     # BNF to LL1
 9 |     g = Generator("syntax.mbnf")
10 |     g.generate()
11 |     g.write_yaml("syntax.yaml")
12 | 
13 |     # Node walker
14 |     user_level_parser = Parser(data)
15 | 
16 |     return render_engine(
17 |         "syntax.yaml", token_list, user_level_parser, "syntax.graphml"
18 |     )
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     # equal to: `Hello, {{ name }}`
23 |     token_list = [
24 |         Token("const", "Hello, "),
25 |         Token("{{", None),
26 |         Token("var", "name"),
27 |         Token("}}", None),
28 |         Token("<EOF>"),
29 |     ]
30 | 
31 |     result = render_with_tokens(token_list, {"name": "Xiaoquan"})
32 |     print(result)
33 | 


--------------------------------------------------------------------------------
/demo/template_engine/syntax.mbnf:
--------------------------------------------------------------------------------
 1 | Goal -> Expr ;
 2 | Expr -> Term TermPlus
 3 |       | ϵ ;
 4 | TermPlus -> Term TermPlus
 5 |           | ϵ ;
 6 | Term -> 'const'
 7 |       | Block ;
 8 | Block -> VarBlock ;
 9 | VarBlock -> '{{' 'var' '}}' ;
10 | 


--------------------------------------------------------------------------------
/demo/template_engine/user_level_lexer_define.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.SkeletonParser import Token, WhiteSpaceToken
 2 | 
 3 | lexer_define = [
 4 |     # token type, token regex, token action
 5 |     [
 6 |         "var",
 7 |         (
 8 |             r"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)+"
 9 |             r"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|0|1|2|3|4|5|6|7|8|9)*"
10 |         ),
11 |         lambda x: Token("var", str(x)),
12 |     ],
13 |     [
14 |         "const",
15 |         r"(A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|,)+",
16 |         lambda x: Token("const", str(x)),
17 |     ],
18 |     ["{{", "{{", lambda x: Token("{{", None)],
19 |     ["}}", "}}", lambda x: Token("}}", None)],
20 |     ["white space", r" +", lambda x: WhiteSpaceToken(x)],
21 | ]
22 | 


--------------------------------------------------------------------------------
/demo/template_engine/user_level_parser.py:
--------------------------------------------------------------------------------
 1 | from MicroCompiler.parser_builder import ParserBuilder
 2 | 
 3 | pb = ParserBuilder()
 4 | pb.add_generator("get_fallback_method", "fallback")
 5 | clazz = pb.generate()
 6 | 
 7 | 
 8 | class Parser(clazz):
 9 |     def __init__(self, data: dict):
10 |         self.method_name_mapping = {
11 |             "<START>": "Start",
12 |         }
13 |         self.data = data
14 | 
15 |     def fallback(self, input_):
16 |         return ""
17 | 
18 |     def get_legal_method_name(self, method_name):
19 |         return (
20 |             self.method_name_mapping[method_name]
21 |             if method_name in self.method_name_mapping
22 |             else method_name
23 |         )
24 | 
25 |     def const(self, input_):
26 |         return input_[0]
27 | 
28 |     def var(self, input_):
29 |         key = input_[0]
30 |         return self.data[key]
31 | 
32 |     def VarBlock(self, input_):
33 |         return input_[1]
34 | 
35 |     def Block(self, input_):
36 |         return input_[0]
37 | 
38 |     def TermPlus(self, input_):
39 |         if len(input_) == 1:
40 |             return ""
41 |         else:
42 |             return "".join(input_)
43 | 
44 |     def Term(self, input_):
45 |         return input_[0]
46 | 
47 |     def Expr(self, input_):
48 |         if len(input_) == 1:
49 |             return ""
50 |         else:
51 |             return "".join(input_)
52 | 
53 |     def Goal(self, input_):
54 |         return input_[0]
55 | 
56 |     def Start(self, input_):
57 |         return input_[0]
58 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # 参考文献
2 | * https://interpreterbook.com/
3 | * http://craftinginterpreters.com/
4 | * https://compilerbook.com/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyYMAL


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="MicroCompiler",
 5 |     version="0.0.1",
 6 |     packages=[
 7 |         "MicroCompiler",
 8 |         "MicroCompiler.Lookahead",
 9 |         "MicroCompiler.ParserGenerator",
10 |     ],
11 |     url="https://github.com/howl-anderson/MicroCompiler",
12 |     license="MIT",
13 |     author="Xiaoquan Kong",
14 |     install_requires=["pyyaml", "MicroRegEx"],
15 |     author_email="u1mail2me@gmail.com",
16 |     description="A micro compiler project to provide LL/LR/LALR syntax parser",
17 | )
18 | 


--------------------------------------------------------------------------------