├── .gitignore
├── Grammar.py
├── LICENSE
├── README.md
├── SLR_Automata.py
├── SLR_Table.py
├── Scanner.py
├── Symbol_Table.py
├── Token.py
├── format.sh
├── input
    ├── grammar.txt
    ├── grammar_assign.txt
    ├── grammar_control.txt
    ├── grammar_define.txt
    ├── grammar_expression.txt
    ├── grammar_raw.txt
    ├── input.txt
    ├── input_assign.txt
    ├── input_control.txt
    ├── input_define.txt
    ├── input_expression.txt
    ├── input_raw.txt
    └── input_scanner.txt
├── main.py
├── test_grammar.py
└── test_scanner.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.DS_Store
 2 | **/__pycache__
 3 | **/.vscode
 4 | **/.idea
 5 | Pipfile
 6 | Pipfile.lock
 7 | *_test.txt
 8 | action_table.json
 9 | goto_table.json
10 | /output/*


--------------------------------------------------------------------------------
/Grammar.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from rich.console import Console
 4 | 
 5 | console = Console()
 6 | 
 7 | 
 8 | class Grammar_Item:
 9 |     is_symbol: bool  # True for terminal symbol, False for Variable
10 |     value: str
11 | 
12 |     def __init__(self, is_symbol: bool, value: str) -> None:
13 |         self.is_symbol = is_symbol
14 |         self.value = value
15 | 
16 | 
17 | class Grammar_Production:
18 |     from_state: str
19 |     items: List[Grammar_Item]
20 |     code: str
21 | 
22 |     def __init__(self, from_state: str) -> None:
23 |         self.from_state = from_state
24 |         self.items = list()
25 |         self.code = ""
26 | 
27 |     def add(self, is_symbol: bool, value: str) -> None:
28 |         self.items.append(Grammar_Item(is_symbol, value))
29 | 
30 |     def __str__(self) -> str:
31 |         return f"{self.from_state} → " + " ".join([item.value for item in self.items])
32 | 
33 | 
34 | class Grammar:
35 |     start_symbol: str
36 |     terminal_symbols: List[str]
37 |     variable_symbols: List[str]
38 |     production_list: List[Grammar_Production]
39 | 
40 |     def __init__(self) -> None:
41 |         self.production_list = list()
42 | 
43 |     def save(self) -> None:
44 |         with open("output/grammar.txt", "w") as f:
45 |             f.write(f"Start Symbol: {self.start_symbol}\n")
46 |             f.write(f"Terminal Symbols: {' '.join(self.terminal_symbols)}\n")
47 |             f.write(f"Variable Symbols: {' '.join(self.variable_symbols)}\n")
48 |             f.write("Productions:\n")
49 |             for production in self.production_list:
50 |                 f.write(f"{production}\n")
51 | 
52 |     def read(self, path: str) -> None:
53 |         with open(path, "r") as f:
54 |             blocks = f.read().split("\n@ ")
55 | 
56 |         symbol_lines: List[str] = blocks[0].split("\n")
57 |         blocks: List[str] = blocks[1:]
58 | 
59 |         self.terminal_symbols = symbol_lines[0].split(" ")[1:]
60 |         self.variable_symbols = symbol_lines[1].split(" ")[1:]
61 |         self.start_symbol = self.variable_symbols[0]
62 | 
63 |         for block in blocks:
64 |             lines = block.split("\n")
65 |             production_line = lines[0]
66 |             code_lines = lines[1:] if len(lines) > 1 else []
67 |             from_state, production = production_line.split(" → ")
68 | 
69 |             current_grammar_production = Grammar_Production(from_state)
70 |             current_grammar_production.code = "\n".join(code_lines)
71 |             items = production.split(" ")
72 | 
73 |             for item in items:
74 |                 if item in self.terminal_symbols + ["ε"]:
75 |                     current_grammar_production.add(True, item)
76 |                 elif item in self.variable_symbols:
77 |                     current_grammar_production.add(False, item)
78 |                 else:
79 |                     console.print(f"Unknown symbol '{item}' in grammar file", style="bold red")
80 |                     exit(-1)
81 | 
82 |             self.production_list.append(current_grammar_production)
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 cometeme, M010K
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # compilers 编译原理 - 简单类 C 编译器
  2 | 
  3 | 本项目实现了一个简单的类 C 编译器，能够分析简单类 C 语言风格的程序代码。如声明语句、赋值语句、表达式、if while 控制语句等，进行语法分析并生成相应的中间代码（三地址代码）。
  4 | 
  5 | 提供了一个命令行交互程序，可以输出词法分析、语法分析、语义分析及中间代码生成过程中的各种表格以及数据集合。
  6 | 
  7 | 输入文法文件可以根据需要进行修改，同时也可以添加自定义的语义动作，从而能够让程序分析不同的语言。
  8 | 
  9 | ## 运行说明
 10 | 
 11 | 运行说明（**需要确保 python 版本为 3.7**）：
 12 | 
 13 | 1. 进入项目文件夹
 14 | 
 15 | （初次使用需要创建一个空的 output 目录）
 16 | 
 17 | 2. 安装 rich 库（若没有安装）
 18 | 
 19 | ```shell
 20 | pip install rich
 21 | ```
 22 | 
 23 | 3. 运行`main.py`程序
 24 | 
 25 | ```shell
 26 | python main.py
 27 | ```
 28 | 
 29 | ### 操作说明
 30 | 
 31 | 运行`main.py`后，命令行中会生成引导菜单（如下所示）：
 32 | 
 33 | ```shell
 34 | ---------------------------------------------------
 35 | Enter a number to show detail, or enter 'q' to quit
 36 | 
 37 | 0 - Grammar
 38 | 1 - Input Code
 39 | 2 - Scanner States
 40 | 3 - SLR States
 41 | 4 - Token Table
 42 | 5 - Symbol Table
 43 | 6 - First Set
 44 | 7 - Follow Set
 45 | 8 - Closure Set
 46 | 9 - SLR Table (Action/Goto Table)
 47 | 10 - Output Code
 48 | ---------------------------------------------------
 49 | ```
 50 | 
 51 | 下面对各个选项进行说明：
 52 | 
 53 | | 选项 |                        功能                        |
 54 | | :--: | :------------------------------------------------: |
 55 | |  0   |                   输出给定的文法                   |
 56 | |  1   |                 输出给定的程序输入                 |
 57 | |  2   |                 输出词法分析的结果                 |
 58 | |  3   | 输出SLR语法分析过程（包含分析栈以及移入/归约动作） |
 59 | |  4   |                   输出 Token 串表                    |
 60 | |  5   |                     输出符号表                     |
 61 | |  6   |                   输出 First 集合                    |
 62 | |  7   |                   输出 Follow 集合                   |
 63 | |  8   |                     输出项集族                     |
 64 | |  9   |        输出 SLR 分析表（包括 action 和 goto 表）         |
 65 | |  10  |                 输出生成的中间代码                 |
 66 | |  q   |                      退出程序                      |
 67 | 
 68 | 
 69 | ## 工程文件说明
 70 | 
 71 | 项目整体目录结构如下：
 72 | 
 73 | ```shell
 74 | .
 75 | ├── Grammar.py
 76 | ├── SLR_Automata.py
 77 | ├── SLR_Table.py
 78 | ├── Scanner.py
 79 | ├── Symbol_Table.py
 80 | ├── Token.py
 81 | ├── action_table.json
 82 | ├── format.sh
 83 | ├── goto_table.json
 84 | ├── input
 85 | │   ├── grammar.txt
 86 | │   ├── grammar_assign.txt
 87 | │   ├── grammar_control.txt
 88 | │   ├── grammar_define.txt
 89 | │   ├── grammar_expression.txt
 90 | │   ├── grammar_raw.txt
 91 | │   ├── input.txt
 92 | │   ├── input_assign.txt
 93 | │   ├── input_control.txt
 94 | │   ├── input_define.txt
 95 | │   ├── input_expression.txt
 96 | │   ├── input_raw.txt
 97 | │   └── input_scanner.txt
 98 | ├── main.py
 99 | ├── output
100 | │   ├── closure_set.txt
101 | │   ├── code.csv
102 | │   ├── first_set.txt
103 | │   ├── follow_set.txt
104 | │   ├── grammar.txt
105 | │   ├── scanner_states.csv
106 | │   ├── slr_states.csv
107 | │   ├── slr_table.csv
108 | │   ├── symbol_table.csv
109 | │   └── token_table.csv
110 | ├── test_grammar.py
111 | └── test_scanner.py
112 | ```
113 | 
114 | ### 语法文件
115 | 
116 | |  文件/文件夹  |                        说明                        |
117 | | :-----------: | :------------------------------------------------: |
118 | | input 文件夹  |           程序输入（文法、待分析的程序）           |
119 | | output 文件夹 | 词法、语法、中间代码生成时产生的所有集合以及表结构 |
120 | 
121 | 
122 | 
123 | ### 词法分析相关
124 | 
125 | |   文件/文件夹   |       说明       |
126 | | :-------------: | :--------------: |
127 | |   Scanner.py    | 词法分析器的实现 |
128 | | test_scanner.py |  词法分析器测试  |
129 | |    Token.py     |    Token 相关     |
130 | | Symbol_Table.py |    符号表相关    |
131 | 
132 | ### 语法分析/中间代码生成相关
133 | 
134 | |   文件/文件夹   |                        说明                        |
135 | | :-------------: | :------------------------------------------------: |
136 | |  SLR_Table.py   |           SLR 语法分析表以及辅助函数生成            |
137 | | SLR_Automata.py | SLR 语法分析的实现 + 中间代码生成部分语义动作的实现 |
138 | |   Grammar.py    |                  语法分析总控程序                  |
139 | | test_grammar.py |                   语法分析器测试                   |
140 | 
141 | ### 主控函数
142 | 
143 | | 文件/文件夹 |        说明        |
144 | | :---------: | :----------------: |
145 | |   main.py   | 程序入口与控制逻辑 |
146 | 


--------------------------------------------------------------------------------
/SLR_Automata.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | from typing import Dict, List, Union
  4 | 
  5 | from rich.console import Console
  6 | from rich.table import Table
  7 | 
  8 | from Grammar import Grammar, Grammar_Production
  9 | from Scanner import Scanner
 10 | from Symbol_Table import Symbol_Table, Table_Item, item_type_translate
 11 | from Token import Token, Token_Type
 12 | 
 13 | console = Console()
 14 | 
 15 | 
 16 | class SLR_Automata:
 17 |     scanner: Scanner
 18 |     symbol_table: Symbol_Table
 19 |     grammar: Grammar
 20 |     action_table: List[Dict[str, str]] = list()
 21 |     goto_table: List[Dict[str, int]] = list()
 22 |     state_output: List[List[str]]
 23 |     code_output: List[List[str]]
 24 |     current_line: int
 25 | 
 26 |     def __init__(self, scanner: Scanner, grammar: Grammar) -> None:
 27 |         self.scanner = scanner
 28 |         self.symbol_table = scanner.symbol_table
 29 |         self.grammar = grammar
 30 | 
 31 |         with open("action_table.json", "r") as f:
 32 |             self.action_table = json.loads(f.read())
 33 |         with open("goto_table.json", "r") as f:
 34 |             self.goto_table = json.loads(f.read())
 35 | 
 36 |         # init state output
 37 |         self.state_output = []
 38 |         self.code_output = []
 39 | 
 40 |     def print_state(self) -> None:
 41 |         output_table = Table(
 42 |             show_header=True,
 43 |             header_style="bold",
 44 |         )
 45 | 
 46 |         output_table.add_column("Token", justify="center")
 47 |         output_table.add_column("Stack", justify="left")
 48 |         output_table.add_column("Action", justify="center")
 49 |         output_table.add_column("Production", justify="left")
 50 | 
 51 |         for row in self.state_output:
 52 |             output_table.add_row(*row)
 53 | 
 54 |         console.print("SLR State:", style="bold")
 55 |         console.print(output_table)
 56 | 
 57 |     def print_code(self) -> None:
 58 |         output_table = Table(
 59 |             show_header=True,
 60 |             header_style="bold",
 61 |         )
 62 | 
 63 |         output_table.add_column("Line", justify="center")
 64 |         output_table.add_column("Code", justify="left")
 65 | 
 66 |         for row in self.code_output:
 67 |             output_table.add_row(*row)
 68 | 
 69 |         console.print("Code:", style="bold")
 70 |         console.print(output_table)
 71 | 
 72 |     def save(self) -> None:
 73 |         with open("output/slr_states.csv", "w") as f:
 74 |             writer = csv.writer(f)
 75 |             writer.writerow(["Token", "Stack", "Action", "Production"])
 76 |             for row in self.state_output:
 77 |                 writer.writerow(row)
 78 |         with open("output/code.csv", "w") as f:
 79 |             writer = csv.writer(f)
 80 |             writer.writerow(["Line", "Code"])
 81 |             for row in self.code_output:
 82 |                 writer.writerow(row)
 83 | 
 84 |     def gen_code(self, code: str) -> None:
 85 |         self.code_output.append([str(self.current_line), code])
 86 |         self.current_line += 1
 87 | 
 88 |     def gen_variable(self, name: str) -> int:
 89 |         item = Table_Item()
 90 |         item.name = name
 91 |         item.variable = True
 92 |         entry = self.symbol_table.add_item(item)
 93 |         return entry
 94 | 
 95 |     def make_list(self, inst: int) -> List:
 96 |         return [inst]
 97 | 
 98 |     def merge(self, l1: List, l2: List) -> List:
 99 | 
100 |         l = list()
101 |         l.extend(l1)
102 | 
103 |         for inst in l2:
104 |             if inst not in l:
105 |                 l.append(inst)
106 | 
107 |         return l
108 | 
109 |     def back_patch(self, l: List, target: int) -> None:
110 |         for inst in l:
111 |             # back patch all blank field
112 |             if inst < len(self.code_output) - 1 and not self.code_output[inst][1][-1].isdigit():
113 |                 if len(self.code_output[inst][1]) >= 5 and self.code_output[inst][1][-5:-1] == "goto":
114 |                     self.code_output[inst][1] += str(target)
115 | 
116 |     def run(self, debug: bool = True) -> None:
117 |         stack: List[int] = [0]
118 |         attributes: List[Dict[str, Union[str, int]]] = [dict()]
119 |         token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None
120 |         token_string: str = "$" if token is None else token.to_string()
121 |         self.current_line = 0
122 | 
123 |         # run automata
124 |         while True:
125 |             assert len(stack) == len(attributes)
126 | 
127 |             if token_string not in self.action_table[stack[-1]]:
128 |                 self.print_state()
129 |                 console.print(f"Current token_string: {token_string}")
130 |                 console.print(f"Current stack: {stack}")
131 |                 console.print(f"Action Table [{stack[-1]}]: {self.action_table[stack[-1]]}")
132 |                 console.print("SLR ERROR", style="bold red")
133 |                 exit(-1)
134 | 
135 |             action: str = self.action_table[stack[-1]][token_string]
136 | 
137 |             if debug:
138 |                 console.print(f"\ntoken: {token_string}")
139 |                 console.print(f"stack: {stack}")
140 |                 console.print(f"attributes: {attributes}")
141 |                 console.print(f"action: {action}")
142 | 
143 |             if action == "acc":
144 |                 self.state_output.append([token_string, str(stack), action, ""])
145 |                 break
146 | 
147 |             action_type: str = action[0]
148 |             action_value: int = int(action[1:])
149 | 
150 |             if action_type == "s":
151 |                 # shift in next state
152 |                 self.state_output.append([token_string, str(stack), action, ""])
153 | 
154 |                 stack.append(action_value)
155 | 
156 |                 if token.token_type in [Token_Type.ID, Token_Type.CONST]:
157 |                     attributes.append({"entry": -1 if token.content is None else token.content})
158 |                 else:
159 |                     attributes.append(dict())
160 | 
161 |                 token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None
162 |                 token_string: str = "$" if token is None else token.to_string()
163 |             elif action_type == "r":
164 |                 # reduced by production
165 |                 current_production: Grammar_Production = self.grammar.production_list[action_value]
166 |                 self.state_output.append([token_string, str(stack), action, str(current_production)])
167 | 
168 |                 if debug:
169 |                     console.print(f"production: {current_production}")
170 |                     print(f"code:\n{current_production.code}\n")
171 | 
172 |                 length: int = len(current_production.items)
173 |                 current_attribute = dict()
174 | 
175 |                 # run generation code
176 |                 try:
177 |                     exec(current_production.code)
178 |                 except Exception as e:
179 |                     console.print("Execute Generation Faild!", style="bold red")
180 |                     self.print_state()
181 |                     print(f"Production: {current_production}\n\n")
182 |                     print(f"code:\n\n{current_production.code}\n")
183 |                     exec(current_production.code)
184 | 
185 |                 # solve for not A → ε
186 |                 if not (current_production.items[0].is_symbol and current_production.items[0].value == "ε"):
187 |                     stack = stack[:-length]
188 |                     attributes = attributes[:-length]
189 | 
190 |                 reduce_state: str = current_production.from_state
191 |                 stack.append(self.goto_table[stack[-1]][reduce_state])
192 |                 attributes.append(current_attribute)
193 | 
194 |             else:
195 |                 self.print_state()
196 |                 console.print(f"Unknown action type {action_type}!", style="bold red")
197 |                 exit(-1)
198 | 


--------------------------------------------------------------------------------
/SLR_Table.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | from copy import deepcopy
  4 | from pprint import pformat
  5 | from typing import Dict, List
  6 | 
  7 | from rich.console import Console
  8 | from rich.table import Table
  9 | 
 10 | from Grammar import Grammar
 11 | 
 12 | console = Console()
 13 | 
 14 | 
 15 | class ItemSet:
 16 |     def __init__(self):
 17 |         self.index = 0
 18 |         self.closure_items = set()  # closure of one item
 19 |         self.transfer = dict()  # store transfer dict
 20 | 
 21 |     def get_index(self):
 22 |         return self.index
 23 | 
 24 |     def set_index(self, index: int):
 25 |         self.index = index
 26 | 
 27 |     def add_trans(self, symbol: str, next_index: int):
 28 |         self.transfer[symbol] = next_index
 29 | 
 30 |     def exists(self, item: tuple) -> bool:
 31 |         return item in self.closure_items
 32 | 
 33 |     def add(self, item: tuple) -> None:
 34 |         self.closure_items.add(item)
 35 | 
 36 |     def equal(self, other) -> bool:
 37 |         if len(self.closure_items) != len(other.closure_items):
 38 |             return False
 39 | 
 40 |         for item in self.closure_items:
 41 |             if item not in other.closure_items:
 42 |                 return False
 43 | 
 44 |         return True
 45 | 
 46 | 
 47 | class ClosureFamily:
 48 |     def __init__(self):
 49 |         self.clourse_set = list()
 50 | 
 51 |     def exists(self, item: ItemSet) -> bool:
 52 |         contain = False
 53 |         for itemset in self.clourse_set:
 54 |             if itemset.equal(item):
 55 |                 contain = True
 56 |                 break
 57 | 
 58 |         return contain
 59 | 
 60 |     def indexOf(self, item: ItemSet) -> int:
 61 |         index = -1
 62 |         for itemset in self.clourse_set:
 63 |             if itemset.equal(item):
 64 |                 index = itemset.get_index()
 65 |                 break
 66 | 
 67 |         return index
 68 | 
 69 |     def add(self, item: ItemSet) -> None:
 70 |         self.clourse_set.append(item)
 71 | 
 72 | 
 73 | class SLR_Table:
 74 |     def __init__(self, grammar: Grammar) -> None:
 75 |         self.grammar = grammar
 76 | 
 77 |         self.start_symbol = grammar.start_symbol
 78 |         self.action_symbols = grammar.terminal_symbols
 79 |         self.goto_symbols = grammar.variable_symbols[1:]
 80 |         self.all_symbols = self.goto_symbols + self.action_symbols
 81 | 
 82 |         self.all_items = list()
 83 |         self.first_items = dict()  # item with first dot, key is from_state
 84 |         self.gen_all_items()
 85 | 
 86 |         self.C = self.gen_clourse_set([(0, 0)])  # clourse set
 87 | 
 88 |         self.first = self.first_set()
 89 |         self.follow = self.follow_set()
 90 | 
 91 |     def print_first_set(self) -> None:
 92 |         console.print("First Set:", style="bold")
 93 |         console.print(self.first)
 94 | 
 95 |     def print_follow_set(self) -> None:
 96 |         console.print("Follow Set:", style="bold")
 97 |         console.print(self.follow)
 98 | 
 99 |     def print_closure_set(self):
100 |         console.print(f"Num of states: {len(self.C.clourse_set)}", style="bold")
101 |         for index, clourse in enumerate(self.C.clourse_set):
102 |             output_table = Table(
103 |                 show_header=True,
104 |                 header_style="bold",
105 |             )
106 |             output_table.add_column(f"I{clourse.index}", justify="left")
107 |             for item in clourse.closure_items:
108 |                 output_table.add_row(self.get_item(item))
109 |             # print(clourse.transfer)
110 |             console.print(output_table)
111 | 
112 |     def save(self) -> None:
113 |         with open("output/first_set.txt", "w") as f:
114 |             for k in self.first:
115 |                 f.write(f"first({k}) = {self.first[k]}\n")
116 | 
117 |         with open("output/follow_set.txt", "w") as f:
118 |             for k in self.follow:
119 |                 f.write(f"follow({k}) = {self.follow[k]}\n")
120 | 
121 |         with open("output/closure_set.txt", "w") as f:
122 |             for index, clourse in enumerate(self.C.clourse_set):
123 |                 f.write(f"I{index}\n")
124 |                 for item in clourse.closure_items:
125 |                     f.write(f"{self.get_item(item)}\n")
126 |                 f.write("\n")
127 | 
128 |         save_slr_table(self.grammar)
129 | 
130 |     def get_item(self, item: tuple) -> str:
131 |         production = self.grammar.production_list[item[0]]
132 |         right = [it.value for it in production.items]
133 | 
134 |         if "ε" in right:  # remove ε
135 |             right.remove("ε")
136 | 
137 |         right.insert(item[1], ".")
138 |         return f"{production.from_state} → {' '.join(right)}"
139 | 
140 |     def contain_varepsilon(self, symbol: str) -> bool:
141 |         contain = False
142 |         if self.first_items.get(symbol) is None:
143 |             return False
144 | 
145 |         for indices in self.first_items.get(symbol):
146 |             for index in indices:
147 |                 for item in self.grammar.production_list[index].items:
148 |                     if item.value == "ε":
149 |                         contain = True
150 |                         break
151 | 
152 |         return contain
153 | 
154 |     def get_first(self, first: dict, symbol: str):
155 |         indices = [production[0] for production in self.first_items[symbol]]  # production indices
156 |         for index in indices:
157 |             item = self.grammar.production_list[index].items[0]
158 |             if not item.is_symbol and item.value != symbol:
159 |                 first[symbol] |= set(self.get_first(first, item.value))  # recurse
160 | 
161 |         return first.get(symbol)
162 | 
163 |     def first_set(self):
164 |         first = dict()
165 | 
166 |         for item in self.action_symbols:
167 |             first[item] = item
168 | 
169 |         for from_state in self.first_items.keys():
170 |             indices = [production[0] for production in self.first_items[from_state]]  # production indices
171 |             first[from_state] = set()
172 | 
173 |             for index in indices:
174 |                 item = self.grammar.production_list[index].items[0]
175 |                 if item.is_symbol:  # add end symbol to First(from_state)
176 |                     first[from_state].add(item.value)
177 | 
178 |         for from_state in self.first_items.keys():
179 |             indices = [production[0] for production in self.first_items[from_state]]  # production indices
180 |             for index in indices:
181 |                 item = self.grammar.production_list[index].items[0]
182 |                 # add var symbol to First(from_state)
183 |                 if not item.is_symbol and item.value != from_state:
184 |                     first[from_state] |= set(self.get_first(first, item.value))
185 | 
186 |         for from_state in self.first_items.keys():
187 |             indices = [production[0] for production in self.first_items[from_state]]  # production indices
188 |             add_varepsilon = False
189 |             for index in indices:
190 |                 items = self.grammar.production_list[index].items
191 |                 length = len(items)
192 |                 if items[0].value == "ε":
193 |                     add_varepsilon = True
194 |                 # solve ε production to First(from_state)
195 |                 cur = 0
196 |                 while cur < length and not items[cur].is_symbol:
197 |                     if self.contain_varepsilon(items[cur].value):
198 |                         add_varepsilon = True
199 |                         if cur + 1 < length:
200 |                             first[from_state] |= set(first[items[cur + 1].value])
201 |                     cur += 1
202 | 
203 |             if add_varepsilon:
204 |                 first[from_state].add("ε")
205 | 
206 |         return first
207 | 
208 |     def follow_set(self):
209 |         follow = dict()
210 |         for from_state in self.first_items.keys():
211 |             follow[from_state] = set()
212 | 
213 |         follow[self.start_symbol].add("$")  # for begin symbol, add '$'
214 | 
215 |         for production in self.grammar.production_list:
216 |             items = production.items
217 |             length = len(items)
218 |             cur = 0
219 |             while cur < length:
220 |                 if not items[cur].is_symbol:
221 |                     if cur + 1 < length and items[cur + 1].is_symbol:  # B→αAa ,a is end symbol
222 |                         follow[items[cur].value].add(items[cur + 1].value)
223 |                     elif cur + 1 < length and not items[cur + 1].is_symbol:  # B→αAX ,X is not end symbol
224 |                         first_of_next = deepcopy(self.first[items[cur + 1].value])
225 |                         if "ε" in first_of_next:
226 |                             first_of_next.remove("ε")
227 |                         follow[items[cur].value] |= first_of_next
228 |                 cur += 1
229 | 
230 |         for production in self.grammar.production_list:
231 |             items = production.items
232 |             length = len(items)
233 |             cur = 0
234 |             while cur < length:
235 |                 if not items[cur].is_symbol:
236 |                     if cur + 1 >= length:
237 |                         follow[items[cur].value] |= follow[production.from_state]
238 |                     elif cur + 1 < length and self.contain_varepsilon(items[cur + 1].value):
239 |                         follow[items[cur].value] |= follow[production.from_state]
240 |                 cur += 1
241 | 
242 |         return follow
243 | 
244 |     def gen_all_items(self):
245 |         for index, production in enumerate(self.grammar.production_list):
246 | 
247 |             # store item which dot at first
248 |             if not self.first_items.get(production.from_state):
249 |                 self.first_items[str(production.from_state)] = [(index, 0)]
250 |             else:
251 |                 self.first_items[str(production.from_state)].append((index, 0))
252 | 
253 |             for dot, item in enumerate(production.items):
254 |                 self.all_items.append((index, dot))
255 | 
256 |             if len(production.items) > 1 and production.items[0].value != "ε":  # except ε
257 |                 self.all_items.append((index, len(production.items)))
258 | 
259 |     def get_clourse(self, items: list) -> ItemSet:
260 |         queue = [item for item in items]  # add items to queue
261 |         close_set = ItemSet()  # clourse for items
262 |         while queue:
263 |             item = queue.pop(0)
264 |             close_set.add(item)
265 |             candidate = self.grammar.production_list[item[0]]  # all candidate production
266 | 
267 |             if len(candidate.items) != item[1]:  # dot not at end of production
268 |                 if not candidate.items[item[1]].is_symbol:
269 |                     for it in self.first_items[candidate.items[item[1]].value]:
270 |                         if not close_set.exists(it):  # item not in close_set
271 |                             queue.append(it)
272 |                             close_set.add(it)
273 | 
274 |         return close_set
275 | 
276 |     def goto(self, itemset: ItemSet, symbol: str):
277 |         next_state = ItemSet()
278 |         for item in itemset.closure_items:
279 |             production = self.grammar.production_list[item[0]]
280 | 
281 |             if item[1] != len(production.items):  # dot not at the end
282 |                 if production.items[item[1]].value == symbol:  # match ,goto next state
283 |                     next_state.add((item[0], item[1] + 1))
284 | 
285 |         return self.get_clourse([item for item in next_state.closure_items])
286 | 
287 |     def gen_clourse_set(self, start: list) -> ClosureFamily:
288 |         C = ClosureFamily()
289 |         C.add(self.get_clourse(start))
290 | 
291 |         queue = [clourse for clourse in C.clourse_set]
292 |         index = 1
293 |         while queue:
294 |             clourse = queue.pop(0)
295 | 
296 |             for symbol in self.all_symbols:
297 |                 next = self.goto(clourse, symbol)
298 |                 if len(next.closure_items) != 0 and not C.exists(next):  # if next_state not in closure set
299 |                     next.set_index(index)
300 |                     index += 1
301 | 
302 |                     clourse.add_trans(symbol, next.get_index())
303 |                     C.add(next)
304 |                     queue.append(next)
305 |                 elif len(next.closure_items) != 0 and C.exists(next):  # if consists , add to transfer dict
306 |                     clourse.add_trans(symbol, C.indexOf(next))
307 | 
308 |         return C
309 | 
310 |     def analysis_table(self):
311 |         action = list()
312 |         goto = list()
313 | 
314 |         # C = self.gen_clourse_set([(0, 0)])
315 | 
316 |         for i in range(len(self.C.clourse_set)):
317 |             action.append(dict())
318 |             goto.append(dict())
319 | 
320 |         for clourse in self.C.clourse_set:
321 |             for item in clourse.closure_items:
322 |                 production = self.grammar.production_list[item[0]]
323 | 
324 |                 if item[1] != len(production.items):  # dot not at the end
325 |                     symbol = production.items[item[1]]
326 | 
327 |                     # solve A → ε
328 |                     if symbol.is_symbol and symbol.value == "ε":
329 |                         for f in self.follow[production.from_state]:
330 |                             action[clourse.get_index()][f] = "r" + str(item[0])
331 | 
332 |                     # get next closure index
333 |                     next_index = clourse.transfer.get(symbol.value)
334 | 
335 |                     if next_index is not None:
336 |                         if symbol.is_symbol and symbol.value != "ε":
337 |                             action[clourse.get_index()][symbol.value] = "s" + str(next_index)
338 |                         else:
339 |                             goto[clourse.get_index()][symbol.value] = next_index
340 | 
341 |                 else:  # item[1] == len(production.items) , dot at the end
342 |                     if production.from_state == self.start_symbol:
343 |                         action[clourse.get_index()]["$"] = "acc"
344 |                     else:
345 |                         for f in self.follow[production.from_state]:
346 |                             action[clourse.get_index()][f] = "r" + str(item[0])
347 | 
348 |         with open("action_table.json", "w") as f:
349 |             f.write(json.dumps(action, indent=2))
350 | 
351 |         with open("goto_table.json", "w") as f:
352 |             f.write(json.dumps(goto, indent=2))
353 | 
354 | 
355 | def print_slr_table(grammar: Grammar) -> None:
356 |     action_table_symbols: List[str] = grammar.terminal_symbols
357 |     goto_table_symbols: List[str] = grammar.variable_symbols[1:]
358 | 
359 |     with open("action_table.json", "r") as f:
360 |         action_table: List[Dict[str, str]] = json.loads(f.read())
361 |     with open("goto_table.json", "r") as f:
362 |         goto_table: List[Dict[str, int]] = json.loads(f.read())
363 | 
364 |     output_table = Table(
365 |         show_header=True,
366 |         header_style="bold",
367 |     )
368 | 
369 |     output_table.add_column("State", justify="center")
370 |     for action_table_symbol in action_table_symbols:
371 |         output_table.add_column(action_table_symbol, justify="center")
372 |     for goto_table_symbol in goto_table_symbols:
373 |         output_table.add_column(goto_table_symbol, justify="center")
374 | 
375 |     for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table):
376 |         output_row: List[str] = [str(state)]
377 |         for action_symbol in action_table_symbols:
378 |             output_row.append(action_row.get(action_symbol, ""))
379 |         for goto_symbol in goto_table_symbols:
380 |             output_row.append(str(goto_row.get(goto_symbol, "")))
381 |         output_table.add_row(*output_row)
382 | 
383 |     console.print("SLR Table (Action/Goto Table):", style="bold")
384 |     console.print(output_table)
385 | 
386 | 
387 | def save_slr_table(grammar: Grammar) -> None:
388 |     action_table_symbols: List[str] = grammar.terminal_symbols
389 |     goto_table_symbols: List[str] = grammar.variable_symbols[1:]
390 | 
391 |     with open("action_table.json", "r") as f:
392 |         action_table: List[Dict[str, str]] = json.loads(f.read())
393 |     with open("goto_table.json", "r") as f:
394 |         goto_table: List[Dict[str, int]] = json.loads(f.read())
395 | 
396 |     with open("output/slr_table.csv", "w") as f:
397 |         writter = csv.writer(f)
398 |         header: List[str] = ["State"] + action_table_symbols + goto_table_symbols
399 |         writter.writerow(header)
400 | 
401 |         for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table):
402 |             output_row: List[str] = [str(state)]
403 |             for action_symbol in action_table_symbols:
404 |                 output_row.append(action_row.get(action_symbol, ""))
405 |             for goto_symbol in goto_table_symbols:
406 |                 output_row.append(str(goto_row.get(goto_symbol, "")))
407 |             writter.writerow(output_row)
408 | 
409 | 
410 | if __name__ == "__main__":
411 |     grammar = Grammar()
412 |     grammar.read("grammar.txt")
413 | 
414 |     slr = SLR_Table(grammar)
415 |     slr.analysis_table()
416 | 
417 |     print_slr_table(grammar)
418 | 


--------------------------------------------------------------------------------
/Scanner.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | from enum import Enum, auto
  3 | from string import ascii_letters, digits, printable
  4 | from typing import List
  5 | 
  6 | from rich.console import Console
  7 | from rich.table import Table
  8 | 
  9 | from Symbol_Table import Symbol_Table, Table_Item, Table_Item_Type
 10 | from Token import Token, Token_Type
 11 | 
 12 | console = Console()
 13 | 
 14 | 
 15 | class Scanner_State(Enum):
 16 |     START = auto()
 17 |     # identifier
 18 |     ID = auto()
 19 |     # assign symbol '='
 20 |     ASSIGN = auto()
 21 |     # arithmetic operator (+, -, *, /)
 22 |     ALOP = auto()
 23 |     # relation operator
 24 |     L_G = auto()  # less '<' or greater '>'
 25 |     LE_GE = auto()  # less equal '<=' or greater equal '>='
 26 |     EQ = auto()  # equal '=='
 27 |     NOT = auto()  # not '!'
 28 |     NEQ = auto()  # not equal '!='
 29 |     # bracket
 30 |     LBRACKET = auto()  # left bracket '('
 31 |     RBRACKET = auto()  # right bracket ')'
 32 |     # semicolon
 33 |     SEMICOLON = auto()  # semicolon ';'
 34 |     # "if"
 35 |     I = auto()
 36 |     IF = auto()
 37 |     # "int"
 38 |     IN = auto()
 39 |     INT = auto()
 40 |     # "else"
 41 |     E = auto()
 42 |     EL = auto()
 43 |     ELS = auto()
 44 |     ELSE = auto()
 45 |     # "while"
 46 |     W = auto()
 47 |     WH = auto()
 48 |     WHI = auto()
 49 |     WHIL = auto()
 50 |     WHILE = auto()
 51 |     # float
 52 |     F = auto()
 53 |     FL = auto()
 54 |     FLO = auto()
 55 |     FLOA = auto()
 56 |     FLOAT = auto()
 57 |     # number
 58 |     NUMBER = auto()
 59 |     # end of the token
 60 |     END = auto()
 61 |     # not a vaild token
 62 |     ERROR = auto()
 63 | 
 64 | 
 65 | # map from scanner state to token type
 66 | STATE_TO_TOKEN = {
 67 |     Scanner_State.ID: Token_Type.ID,
 68 |     Scanner_State.ASSIGN: Token_Type.ASSIGN,
 69 |     Scanner_State.ALOP: Token_Type.ALOP,
 70 |     Scanner_State.L_G: Token_Type.RELOP,
 71 |     Scanner_State.LE_GE: Token_Type.RELOP,
 72 |     Scanner_State.EQ: Token_Type.RELOP,
 73 |     Scanner_State.NEQ: Token_Type.RELOP,
 74 |     Scanner_State.LBRACKET: Token_Type.LBRACKET,
 75 |     Scanner_State.RBRACKET: Token_Type.RBRACKET,
 76 |     Scanner_State.SEMICOLON: Token_Type.SEMICOLON,
 77 |     Scanner_State.I: Token_Type.ID,
 78 |     Scanner_State.IF: Token_Type.IF,
 79 |     Scanner_State.IN: Token_Type.ID,
 80 |     Scanner_State.INT: Token_Type.INT,
 81 |     Scanner_State.E: Token_Type.ID,
 82 |     Scanner_State.EL: Token_Type.ID,
 83 |     Scanner_State.ELS: Token_Type.ID,
 84 |     Scanner_State.ELSE: Token_Type.ELSE,
 85 |     Scanner_State.W: Token_Type.ID,
 86 |     Scanner_State.WH: Token_Type.ID,
 87 |     Scanner_State.WHI: Token_Type.ID,
 88 |     Scanner_State.WHIL: Token_Type.ID,
 89 |     Scanner_State.WHILE: Token_Type.WHILE,
 90 |     Scanner_State.F: Token_Type.ID,
 91 |     Scanner_State.FL: Token_Type.ID,
 92 |     Scanner_State.FLO: Token_Type.ID,
 93 |     Scanner_State.FLOA: Token_Type.ID,
 94 |     Scanner_State.FLOAT: Token_Type.FLOAT,
 95 |     Scanner_State.NUMBER: Token_Type.CONST,
 96 | }
 97 | 
 98 | 
 99 | DIGITS = digits  # 0~9
100 | 
101 | ID_START = ascii_letters + "_"  # a~z + A~Z + _
102 | ID_APPEND = ID_START + digits  # a~z + A~Z + _ + 0~9
103 | 
104 | ALOPS = "+-*/"
105 | RELOPS = "<>!="
106 | 
107 | SPACES = " \t\n\r\0"
108 | 
109 | ID_SEP = ALOPS + RELOPS + "();" + SPACES
110 | NUMBER_SEP = ID_SEP
111 | 
112 | ANY_SEP = printable + SPACES
113 | 
114 | 
115 | ID_TRANSITION_TEMPLATE = [(ID_APPEND, Scanner_State.ID), (ID_SEP, Scanner_State.END)]
116 | 
117 | 
118 | SCANNER_TRANSITION = {
119 |     Scanner_State.START: [
120 |         ("i", Scanner_State.I),
121 |         ("e", Scanner_State.E),
122 |         ("w", Scanner_State.W),
123 |         ("f", Scanner_State.F),
124 |         ("(", Scanner_State.LBRACKET),
125 |         (")", Scanner_State.RBRACKET),
126 |         (";", Scanner_State.SEMICOLON),
127 |         ("<>", Scanner_State.L_G),
128 |         ("!", Scanner_State.NOT),
129 |         ("=", Scanner_State.ASSIGN),
130 |         (" ", Scanner_State.START),  # remove space
131 |         (ALOPS, Scanner_State.ALOP),
132 |         (DIGITS, Scanner_State.NUMBER),
133 |         (ID_START, Scanner_State.ID),
134 |     ],
135 |     Scanner_State.ID: ID_TRANSITION_TEMPLATE,
136 |     Scanner_State.ASSIGN: [
137 |         ("=", Scanner_State.EQ),
138 |         (ANY_SEP, Scanner_State.END),
139 |     ],
140 |     Scanner_State.ALOP: [
141 |         (ANY_SEP, Scanner_State.END),
142 |     ],
143 |     Scanner_State.L_G: [
144 |         ("=", Scanner_State.LE_GE),
145 |         (ANY_SEP, Scanner_State.END),
146 |     ],
147 |     Scanner_State.LE_GE: [
148 |         (ANY_SEP, Scanner_State.END),
149 |     ],
150 |     Scanner_State.EQ: [
151 |         (ANY_SEP, Scanner_State.END),
152 |     ],
153 |     Scanner_State.NOT: [
154 |         ("=", Scanner_State.NEQ),
155 |     ],
156 |     Scanner_State.NEQ: [
157 |         (ANY_SEP, Scanner_State.END),
158 |     ],
159 |     Scanner_State.LBRACKET: [
160 |         (ANY_SEP, Scanner_State.END),
161 |     ],
162 |     Scanner_State.RBRACKET: [
163 |         (ANY_SEP, Scanner_State.END),
164 |     ],
165 |     Scanner_State.SEMICOLON: [
166 |         (ANY_SEP, Scanner_State.END),
167 |     ],
168 |     Scanner_State.I: [
169 |         ("f", Scanner_State.IF),
170 |         ("n", Scanner_State.IN),
171 |     ]
172 |     + ID_TRANSITION_TEMPLATE,
173 |     Scanner_State.IF: ID_TRANSITION_TEMPLATE,
174 |     Scanner_State.IN: [
175 |         ("t", Scanner_State.INT),
176 |     ]
177 |     + ID_TRANSITION_TEMPLATE,
178 |     Scanner_State.INT: ID_TRANSITION_TEMPLATE,
179 |     Scanner_State.E: [
180 |         ("l", Scanner_State.EL),
181 |     ]
182 |     + ID_TRANSITION_TEMPLATE,
183 |     Scanner_State.EL: [
184 |         ("s", Scanner_State.ELS),
185 |     ]
186 |     + ID_TRANSITION_TEMPLATE,
187 |     Scanner_State.ELS: [
188 |         ("e", Scanner_State.ELSE),
189 |     ]
190 |     + ID_TRANSITION_TEMPLATE,
191 |     Scanner_State.ELSE: ID_TRANSITION_TEMPLATE,
192 |     Scanner_State.W: [
193 |         ("h", Scanner_State.WH),
194 |     ]
195 |     + ID_TRANSITION_TEMPLATE,
196 |     Scanner_State.WH: [
197 |         ("i", Scanner_State.WHI),
198 |     ]
199 |     + ID_TRANSITION_TEMPLATE,
200 |     Scanner_State.WHI: [
201 |         ("l", Scanner_State.WHIL),
202 |     ]
203 |     + ID_TRANSITION_TEMPLATE,
204 |     Scanner_State.WHIL: [
205 |         ("e", Scanner_State.WHILE),
206 |     ]
207 |     + ID_TRANSITION_TEMPLATE,
208 |     Scanner_State.WHILE: ID_TRANSITION_TEMPLATE,
209 |     Scanner_State.F: [
210 |         ("l", Scanner_State.FL),
211 |     ]
212 |     + ID_TRANSITION_TEMPLATE,
213 |     Scanner_State.FL: [
214 |         ("o", Scanner_State.FLO),
215 |     ]
216 |     + ID_TRANSITION_TEMPLATE,
217 |     Scanner_State.FLO: [
218 |         ("a", Scanner_State.FLOA),
219 |     ]
220 |     + ID_TRANSITION_TEMPLATE,
221 |     Scanner_State.FLOA: [
222 |         ("t", Scanner_State.FLOAT),
223 |     ]
224 |     + ID_TRANSITION_TEMPLATE,
225 |     Scanner_State.FLOAT: ID_TRANSITION_TEMPLATE,
226 |     Scanner_State.NUMBER: [
227 |         (DIGITS, Scanner_State.NUMBER),
228 |         (NUMBER_SEP, Scanner_State.END),
229 |     ],
230 | }
231 | 
232 | 
233 | class Scanner:
234 |     pnt: int
235 |     code: str
236 |     length: int
237 |     symbol_table: Symbol_Table
238 |     state_output: List[List[str]]
239 |     token_output: List[List[str]]
240 | 
241 |     def __init__(self, code: str, symbol_table: Symbol_Table) -> None:
242 |         """init the scanner
243 | 
244 |         Args:
245 |             `code` (str): raw code that needs to be processed
246 |             `symbol_table` (Symbol_Table): symbol table for storing variables and constants
247 |         """
248 |         self.pnt = 0
249 |         self.code = code.replace("\n", "").replace("\r", "").strip(" ")  # erase line split
250 |         self.code = self.code + "\0"  # add '\0' at the end for convenience
251 |         self.length = len(self.code)
252 |         self.symbol_table = symbol_table
253 |         self.state_output = []
254 |         self.token_output = []
255 | 
256 |     def print_states(self) -> None:
257 |         output_table = Table(
258 |             show_header=True,
259 |             header_style="bold",
260 |         )
261 |         output_table.add_column("Pointer", justify="center")
262 |         output_table.add_column("Current Character", justify="center")
263 |         output_table.add_column("State Transfer", justify="left")
264 | 
265 |         for row in self.state_output:
266 |             output_table.add_row(*row)
267 | 
268 |         console.print("Scanner States:", style="bold")
269 |         console.print(output_table)
270 | 
271 |     def print_tokens(self) -> None:
272 |         output_table = Table(
273 |             show_header=True,
274 |             header_style="bold",
275 |         )
276 |         output_table.add_column("Type", justify="center")
277 |         output_table.add_column("Content", justify="center")
278 | 
279 |         for row in self.token_output:
280 |             output_table.add_row(*row)
281 | 
282 |         console.print("Tokens:", style="bold")
283 |         console.print(output_table)
284 | 
285 |     def save(self) -> None:
286 |         with open("output/scanner_states.csv", "w") as f:
287 |             writer = csv.writer(f)
288 |             writer.writerow(["Pointer", "Current Character", "State Transfer"])
289 |             for row in self.state_output:
290 |                 writer.writerow(row)
291 |         with open("output/token_table.csv", "w") as f:
292 |             writer = csv.writer(f)
293 |             writer.writerow(["Type", "Content"])
294 |             for row in self.token_output:
295 |                 writer.writerow(row)
296 | 
297 |     def has_next(self) -> bool:
298 |         """check whether the scanner has next token to output
299 | 
300 |         Returns:
301 |             `bool`: `True` if the scanner have next token
302 |         """
303 |         return self.pnt < self.length - 1
304 | 
305 |     def get_next(self) -> Token:
306 |         """get next token
307 | 
308 |         Args:
309 |             `output` (bool, optional): Set to `True` to show scan process. Defaults to `True`.
310 | 
311 |         Returns:
312 |             `Token`: The next token
313 |         """
314 |         current_state: Scanner_State = Scanner_State.START
315 |         content: str = ""
316 |         result: Token = Token()
317 | 
318 |         while True:
319 |             cur: str = self.code[self.pnt]
320 | 
321 |             transition = SCANNER_TRANSITION[current_state]
322 |             next_state: Scanner_State = Scanner_State.ERROR  # default is error
323 | 
324 |             # find next state
325 |             for pattern, to_state in transition:
326 |                 if cur in pattern:
327 |                     if to_state == Scanner_State.END:
328 |                         result.token_type = STATE_TO_TOKEN[current_state]
329 | 
330 |                         if result.token_type in [Token_Type.ID, Token_Type.CONST]:
331 |                             # for identifier or constant, the content is the entry(index) in symbol table
332 |                             entry: int = self.symbol_table.find_item_by_name(content)
333 |                             if entry == -1:
334 |                                 # cannot find, create a new row in symbol table
335 |                                 new_item = Table_Item()
336 |                                 new_item.name = content
337 |                                 new_item.variable = result.token_type == Token_Type.ID
338 |                                 entry = self.symbol_table.add_item(new_item)
339 | 
340 |                             result.content = entry
341 |                         elif result.token_type in [Token_Type.ALOP, Token_Type.RELOP]:
342 |                             # arithmetic operator (+, -, *, /) or relation operator (<, >, <=, >=, ==, !=)
343 |                             result.content = content
344 |                         else:
345 |                             result.content = None
346 | 
347 |                         self.token_output.append(
348 |                             [result.token_type.name, "" if result.content is None else str(result.content)]
349 |                         )
350 |                         return result
351 | 
352 |                     next_state = to_state
353 |                     break
354 | 
355 |             self.state_output.append([str(self.pnt), cur, f"{current_state.name} -> {next_state.name}"])
356 | 
357 |             if next_state == Scanner_State.ERROR:
358 |                 self.print_tokens()
359 |                 self.print_states()
360 |                 console.print("ERROR WHEN GETTING NEXT TOKEN!", style="bold red")
361 |                 exit(-1)
362 | 
363 |             # step to next state
364 |             current_state = next_state
365 |             if cur != " ":
366 |                 content += cur
367 |             self.pnt += 1
368 | 


--------------------------------------------------------------------------------
/Symbol_Table.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from enum import Enum, auto
 3 | from typing import List, Union
 4 | 
 5 | from rich.console import Console
 6 | from rich.table import Table
 7 | 
 8 | console = Console()
 9 | 
10 | 
11 | class Table_Item_Type(Enum):
12 |     INT = auto()
13 |     FLOAT = auto()
14 | 
15 | 
16 | item_type_translate = {"int": Table_Item_Type.INT, "float": Table_Item_Type.FLOAT}
17 | 
18 | 
19 | class Table_Item:
20 |     name: Union[str, None]
21 |     variable: Union[bool, None]  # True for variable, False for constant
22 |     item_type: Union[Table_Item_Type, None]
23 | 
24 |     def __init__(self) -> None:
25 |         self.name = None
26 |         self.variable = None
27 |         self.item_type = None
28 | 
29 |     def __str__(self) -> str:
30 |         return f"{self.name}, {'var' if self.variable else 'const'}, {'' if self.item_type is None else self.item_type.name}"
31 | 
32 | 
33 | class Symbol_Table:
34 |     size: int
35 |     table: List[Table_Item]
36 | 
37 |     def __init__(self) -> None:
38 |         self.size = 0
39 |         self.table = list()
40 | 
41 |     def output(self) -> None:
42 |         output_table = Table(
43 |             show_header=True,
44 |             header_style="bold",
45 |         )
46 |         output_table.add_column("Name", justify="center")
47 |         output_table.add_column("Var/Const", justify="center")
48 |         output_table.add_column("Type", justify="center")
49 | 
50 |         for item in self.table:
51 |             output_table.add_row(
52 |                 item.name, "Var" if item.variable else "Const", "" if item.item_type is None else item.item_type.name
53 |             )
54 | 
55 |         console.print("Symbol Table:", style="bold")
56 |         console.print(output_table)
57 | 
58 |     def save(self) -> None:
59 |         with open("output/symbol_table.csv", "w") as f:
60 |             writer = csv.writer(f)
61 |             writer.writerow(["Name", "Var/Const", "Type"])
62 |             for item in self.table:
63 |                 writer.writerow(
64 |                     [
65 |                         item.name,
66 |                         "Var" if item.variable else "Const",
67 |                         "" if item.item_type is None else item.item_type.name,
68 |                     ]
69 |                 )
70 | 
71 |     def get_size(self) -> int:
72 |         return self.size
73 | 
74 |     def find_item_by_name(self, name: str) -> int:
75 |         for idx, item in enumerate(self.table):
76 |             if item.name == name:
77 |                 return idx
78 | 
79 |         return -1  # cannot find
80 | 
81 |     def add_item(self, item: Table_Item) -> int:
82 |         self.size += 1
83 |         self.table.append(item)
84 |         return self.size - 1
85 | 


--------------------------------------------------------------------------------
/Token.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum, auto
 2 | from typing import Union
 3 | 
 4 | 
 5 | class Token_Type(Enum):
 6 |     ID = auto()  # identifier
 7 |     CONST = auto()  # constant (number)
 8 |     ASSIGN = auto()  # assign symbol '='
 9 |     ALOP = auto()  # arithmetic operator (+, -, *, /)
10 |     RELOP = auto()  # relation operator (<, >, <=, >=, ==, !=)
11 |     LBRACKET = auto()  # left bracket '('
12 |     RBRACKET = auto()  # right bracket ')'
13 |     SEMICOLON = auto()  # semicolon ';'
14 |     IF = auto()  # if
15 |     ELSE = auto()  # else
16 |     WHILE = auto()  # while
17 |     INT = auto()  # int
18 |     FLOAT = auto()  # float
19 | 
20 | 
21 | class Token:
22 |     token_type: Union[Token_Type, None]
23 |     content: Union[str, int, None]  # str for name, int for entry
24 | 
25 |     def __init__(self) -> None:
26 |         self.token_type = None  # type of the token
27 |         self.content = None  # detail content (like identifier name or operator type)
28 | 
29 |     def to_string(self) -> str:
30 |         if self.token_type in [Token_Type.ALOP, Token_Type.RELOP]:
31 |             return str(self.content)
32 |         elif self.token_type == Token_Type.ASSIGN:
33 |             return "="
34 |         elif self.token_type == Token_Type.LBRACKET:
35 |             return "("
36 |         elif self.token_type == Token_Type.RBRACKET:
37 |             return ")"
38 |         elif self.token_type == Token_Type.SEMICOLON:
39 |             return ";"
40 |         return self.token_type.name.lower()
41 | 
42 |     def __str__(self) -> str:
43 |         return f"{self.token_type.name}, {'' if self.content is None else self.content}"
44 | 


--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 | isort .
3 | black . -l 120


--------------------------------------------------------------------------------
/input/grammar.txt:
--------------------------------------------------------------------------------
  1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $
  2 | VariableSymbols: P' P D S L C E T F M N Q
  3 | 
  4 | @ P' → P
  5 | @ P → M D S
  6 | self.back_patch(attributes[-1]["nextlist"], attributes[-3]["instr"])
  7 | 
  8 | @ M → ε
  9 | offset = 0
 10 | temp_index = 0
 11 | current_attribute["instr"] = self.current_line
 12 | 
 13 | @ D → L id ; N D
 14 | @ N → ε
 15 | idx = attributes[-2]["entry"]
 16 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]]
 17 | width = attributes[-3]["width"]
 18 | self.gen_code(f"Alloc [{offset},{offset+width}] for {self.symbol_table.table[idx].name}")
 19 | offset += width
 20 | 
 21 | @ D → ε
 22 | @ L → int
 23 | current_attribute["type"] = 'int'
 24 | current_attribute["width"] = 4
 25 | 
 26 | @ L → float
 27 | current_attribute["type"] = 'float'
 28 | current_attribute["width"] = 4
 29 | 
 30 | @ S → S ; Q S
 31 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"])
 32 | current_attribute["nextlist"] = attributes[-1]["nextlist"]
 33 | 
 34 | @ Q → ε
 35 | current_attribute["instr"] = self.current_line
 36 | 
 37 | @ S → ε
 38 | current_attribute["nextlist"] = []
 39 | 
 40 | @ S → id = E
 41 | entry0 = attributes[-3]["entry"]
 42 | name0 = self.symbol_table.table[entry0].name
 43 | entry1 = attributes[-1]["entry"]
 44 | name1 = self.symbol_table.table[entry1].name
 45 | self.gen_code(f"{name0} = {name1}")
 46 | 
 47 | current_attribute["nextlist"] = [self.current_line]
 48 | 
 49 | @ S → if ( C ) Q S
 50 | truelist = attributes[-4]["truelist"]
 51 | falselist = attributes[-4]["falselist"]
 52 | instr = attributes[-2]["instr"]
 53 | self.back_patch(truelist, instr)
 54 | nextlist_s1 = attributes[-1]["nextlist"]
 55 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1)
 56 | 
 57 | @ S → while Q ( C ) Q S
 58 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"])
 59 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"])
 60 | current_attribute["nextlist"] = attributes[-4]["falselist"]
 61 | idx = attributes[-6]["instr"]
 62 | self.gen_code(f"goto {idx}")
 63 | 
 64 | @ C → E > E
 65 | current_attribute["truelist"] = self.make_list(self.current_line)
 66 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
 67 | 
 68 | idx1 = attributes[-3]["entry"]
 69 | idx2 = attributes[-1]["entry"]
 70 | name1 = self.symbol_table.table[idx1].name
 71 | name2 = self.symbol_table.table[idx2].name
 72 | 
 73 | self.gen_code(f"if {name1} > {name2} goto ")
 74 | self.gen_code(f"goto ")
 75 | 
 76 | @ C → E < E
 77 | current_attribute["truelist"] = self.make_list(self.current_line)
 78 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
 79 | 
 80 | idx1 = attributes[-3]["entry"]
 81 | idx2 = attributes[-1]["entry"]
 82 | name1 = self.symbol_table.table[idx1].name
 83 | name2 = self.symbol_table.table[idx2].name
 84 | 
 85 | self.gen_code(f"if {name1} < {name2} goto ")
 86 | self.gen_code(f"goto ")
 87 | 
 88 | 
 89 | @ C → E == E
 90 | current_attribute["truelist"] = self.make_list(self.current_line)
 91 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
 92 | 
 93 | idx1 = attributes[-3]["entry"]
 94 | idx2 = attributes[-1]["entry"]
 95 | name1 = self.symbol_table.table[idx1].name
 96 | name2 = self.symbol_table.table[idx2].name
 97 | 
 98 | self.gen_code(f"if {name1} == {name2} goto ")
 99 | self.gen_code(f"goto ")
100 | 
101 | 
102 | @ E → E + T
103 | name0 = f"temp{temp_index}"
104 | entry = self.gen_variable(name0)
105 | temp_index += 1
106 | 
107 | entry1 = attributes[-3]["entry"]
108 | name1 = self.symbol_table.table[entry1].name
109 | entry2 = attributes[-1]["entry"]
110 | name2 = self.symbol_table.table[entry2].name
111 | 
112 | current_attribute["entry"] = entry
113 | self.gen_code(f"{name0} = {name1} + {name2}")
114 | 
115 | @ E → E - T
116 | name0 = f"temp{temp_index}"
117 | entry = self.gen_variable(name0)
118 | temp_index += 1
119 | 
120 | entry1 = attributes[-3]["entry"]
121 | name1 = self.symbol_table.table[entry1].name
122 | entry2 = attributes[-1]["entry"]
123 | name2 = self.symbol_table.table[entry2].name
124 | 
125 | current_attribute["entry"] = entry
126 | self.gen_code(f"{name0} = {name1} - {name2}")
127 | 
128 | @ E → T
129 | current_attribute["entry"] = attributes[-1]["entry"]
130 | 
131 | @ T → F
132 | current_attribute["entry"] = attributes[-1]["entry"]
133 | 
134 | @ T → T * F
135 | name0 = f"temp{temp_index}"
136 | entry = self.gen_variable(name0)
137 | temp_index += 1
138 | 
139 | entry1 = attributes[-3]["entry"]
140 | name1 = self.symbol_table.table[entry1].name
141 | entry2 = attributes[-1]["entry"]
142 | name2 = self.symbol_table.table[entry2].name
143 | 
144 | current_attribute["entry"] = entry
145 | self.gen_code(f"{name0} = {name1} * {name2}")
146 | 
147 | @ T → T / F
148 | name0 = f"temp{temp_index}"
149 | entry = self.gen_variable(name0)
150 | temp_index += 1
151 | 
152 | entry1 = attributes[-3]["entry"]
153 | name1 = self.symbol_table.table[entry1].name
154 | entry2 = attributes[-1]["entry"]
155 | name2 = self.symbol_table.table[entry2].name
156 | 
157 | current_attribute["entry"] = entry
158 | self.gen_code(f"{name0} = {name1} / {name2}")
159 | 
160 | @ F → ( E )
161 | current_attribute["entry"] = attributes[-2]["entry"]
162 | 
163 | @ F → id
164 | current_attribute["entry"] = attributes[-1]["entry"]
165 | 
166 | @ F → const
167 | current_attribute["entry"] = attributes[-1]["entry"]


--------------------------------------------------------------------------------
/input/grammar_assign.txt:
--------------------------------------------------------------------------------
 1 | TerminalSymbols: id const = + - * / ( ) ; $
 2 | VariableSymbols: P' P M S E T F
 3 | 
 4 | @ P' → P
 5 | 
 6 | @ P → M S
 7 | 
 8 | @ M → ε
 9 | offset = 0
10 | temp_index = 0
11 | 
12 | @ S → S ; S
13 | 
14 | @ S → ε
15 | 
16 | @ S → id = E
17 | entry0 = attributes[-3]["entry"]
18 | name0 = self.symbol_table.table[entry0].name
19 | entry1 = attributes[-1]["entry"]
20 | name1 = self.symbol_table.table[entry1].name
21 | self.gen_code(f"{name0} = {name1}")
22 | 
23 | @ E → E + T
24 | name0 = f"temp{temp_index}"
25 | entry = self.gen_variable(name0)
26 | temp_index += 1
27 | 
28 | entry1 = attributes[-3]["entry"]
29 | name1 = self.symbol_table.table[entry1].name
30 | entry2 = attributes[-1]["entry"]
31 | name2 = self.symbol_table.table[entry2].name
32 | 
33 | current_attribute["entry"] = entry
34 | self.gen_code(f"{name0} = {name1} + {name2}")
35 | 
36 | @ E → E - T
37 | name0 = f"temp{temp_index}"
38 | entry = self.gen_variable(name0)
39 | temp_index += 1
40 | 
41 | entry1 = attributes[-3]["entry"]
42 | name1 = self.symbol_table.table[entry1].name
43 | entry2 = attributes[-1]["entry"]
44 | name2 = self.symbol_table.table[entry2].name
45 | 
46 | current_attribute["entry"] = entry
47 | self.gen_code(f"{name0} = {name1} - {name2}")
48 | 
49 | @ E → T
50 | current_attribute["entry"] = attributes[-1]["entry"]
51 | 
52 | @ T → F
53 | current_attribute["entry"] = attributes[-1]["entry"]
54 | 
55 | @ T → T * F
56 | name0 = f"temp{temp_index}"
57 | entry = self.gen_variable(name0)
58 | temp_index += 1
59 | 
60 | entry1 = attributes[-3]["entry"]
61 | name1 = self.symbol_table.table[entry1].name
62 | entry2 = attributes[-1]["entry"]
63 | name2 = self.symbol_table.table[entry2].name
64 | 
65 | current_attribute["entry"] = entry
66 | self.gen_code(f"{name0} = {name1} * {name2}")
67 | 
68 | @ T → T / F
69 | name0 = f"temp{temp_index}"
70 | entry = self.gen_variable(name0)
71 | temp_index += 1
72 | 
73 | entry1 = attributes[-3]["entry"]
74 | name1 = self.symbol_table.table[entry1].name
75 | entry2 = attributes[-1]["entry"]
76 | name2 = self.symbol_table.table[entry2].name
77 | 
78 | current_attribute["entry"] = entry
79 | self.gen_code(f"{name0} = {name1} / {name2}")
80 | 
81 | @ F → ( E )
82 | current_attribute["entry"] = attributes[-2]["entry"]
83 | 
84 | @ F → id
85 | current_attribute["entry"] = attributes[-1]["entry"]
86 | 
87 | @ F → const
88 | current_attribute["entry"] = attributes[-1]["entry"]


--------------------------------------------------------------------------------
/input/grammar_control.txt:
--------------------------------------------------------------------------------
 1 | TerminalSymbols: id ; if while ( ) > = $
 2 | VariableSymbols: P' P M S C Q E
 3 | 
 4 | @ P' → P
 5 | @ P → M S
 6 | self.back_patch(attributes[-1]["nextlist"], attributes[-2]["instr"])
 7 | @ M → ε
 8 | offset = 0
 9 | current_attribute["instr"] = self.current_line
10 | 
11 | 
12 | @ S → S ; Q S
13 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"])
14 | current_attribute["nextlist"] = attributes[-1]["nextlist"]
15 | 
16 | @ S → ε
17 | current_attribute["nextlist"] = []
18 | 
19 | @ S → E
20 | idx = attributes[-1]["entry"]
21 | name = self.symbol_table.table[idx].name
22 | self.gen_code(f"appearance of {name}")
23 | current_attribute["nextlist"] = [self.current_line]
24 | 
25 | @ S → if ( C ) Q S
26 | truelist = attributes[-4]["truelist"]
27 | falselist = attributes[-4]["falselist"]
28 | instr = attributes[-2]["instr"]
29 | self.back_patch(truelist, instr)
30 | nextlist_s1 = attributes[-1]["nextlist"]
31 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1)
32 | 
33 | @ S → while Q ( C ) Q S
34 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"])
35 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"])
36 | current_attribute["nextlist"] = attributes[-4]["falselist"]
37 | idx = attributes[-6]["instr"]
38 | self.gen_code(f"goto {idx}")
39 | 
40 | 
41 | @ Q → ε
42 | current_attribute["instr"] = self.current_line
43 | 
44 | @ C → E > E
45 | current_attribute["truelist"] = self.make_list(self.current_line)
46 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
47 | 
48 | idx1 = attributes[-3]["entry"]
49 | idx2 = attributes[-1]["entry"]
50 | name1 = self.symbol_table.table[idx1].name
51 | name2 = self.symbol_table.table[idx2].name
52 | 
53 | self.gen_code(f"if {name1} > {name2} goto ")
54 | self.gen_code(f"goto ")
55 | 
56 | 
57 | @ E → id
58 | attributes[-1]["value"] = 1
59 | current_attribute["value"] = attributes[-1]["value"]
60 | current_attribute["entry"] = attributes[-1]["entry"]
61 | 
62 | 


--------------------------------------------------------------------------------
/input/grammar_define.txt:
--------------------------------------------------------------------------------
 1 | TerminalSymbols: id int float ; $
 2 | VariableSymbols: P' P M D N S L
 3 | 
 4 | @ P' → P
 5 | @ P → M D
 6 | @ M → ε
 7 | offset = 0
 8 | 
 9 | @ D → L id ; N D
10 | @ N → ε
11 | idx = attributes[-2]["entry"]
12 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]]
13 | self.gen_code(f"Alloc [{offset},{offset+4}] for {self.symbol_table.table[idx].name}")
14 | offset += 4
15 | 
16 | @ D → ε
17 | @ L → int
18 | current_attribute["type"] = 'int'
19 | 
20 | @ L → float
21 | current_attribute["type"] = 'float'


--------------------------------------------------------------------------------
/input/grammar_expression.txt:
--------------------------------------------------------------------------------
 1 | TerminalSymbols: id + * ( ) $
 2 | VariableSymbols: S E F T
 3 | 
 4 | @ S → E
 5 | @ E → E + T
 6 | @ E → T
 7 | @ T → T * F
 8 | @ T → F
 9 | @ F → ( E )
10 | @ F → id


--------------------------------------------------------------------------------
/input/grammar_raw.txt:
--------------------------------------------------------------------------------
 1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $
 2 | VariableSymbols: P' P D S L C E T F
 3 | 
 4 | @ P' → P
 5 | @ P → D S
 6 | @ D → L id ; D
 7 | @ D → ε
 8 | @ L → int
 9 | @ L → float
10 | @ S → S ; S
11 | @ S → ε
12 | @ S → id = E
13 | @ S → if ( C ) S
14 | @ S → if ( C ) S else S
15 | @ S → while ( C ) S
16 | @ C → E > E
17 | @ C → E < E
18 | @ C → E == E
19 | @ E → E + T
20 | @ E → E - T
21 | @ E → T
22 | @ T → F
23 | @ T → T * F
24 | @ T → T / F
25 | @ F → ( E )
26 | @ F → id
27 | @ F → const


--------------------------------------------------------------------------------
/input/input.txt:
--------------------------------------------------------------------------------
 1 | int a;
 2 | int b;
 3 | float c;
 4 | 
 5 | a = 2;
 6 | b = 1;
 7 | c = (a + b) / 2;
 8 | 
 9 | if (a > (b + c))
10 |   if (c < 12)
11 |     c = (a + b) * c - a / 2;
12 | 
13 | while (a < b)
14 |   a = b + c - 1;
15 | 
16 | b = a;
17 | 


--------------------------------------------------------------------------------
/input/input_assign.txt:
--------------------------------------------------------------------------------
1 | a = b + 3;
2 | c = d - a;
3 | x = (a + b) * c - d / 2;


--------------------------------------------------------------------------------
/input/input_control.txt:
--------------------------------------------------------------------------------
1 | if (a > b)
2 |     a;
3 | while (a > b)
4 |     b;
5 | c;
6 | 


--------------------------------------------------------------------------------
/input/input_define.txt:
--------------------------------------------------------------------------------
1 | int abc;
2 | float def;


--------------------------------------------------------------------------------
/input/input_expression.txt:
--------------------------------------------------------------------------------
1 | (a*b)+c


--------------------------------------------------------------------------------
/input/input_raw.txt:
--------------------------------------------------------------------------------
 1 | int a;
 2 | int b;
 3 | float c;
 4 | a=2;
 5 | b=1;
 6 | if(a>(b+c))
 7 |   c=(a+b)/2;
 8 | else
 9 |   c=a-b;
10 | while(c<a)
11 |   c=c-b;
12 | 


--------------------------------------------------------------------------------
/input/input_scanner.txt:
--------------------------------------------------------------------------------
 1 | int a;
 2 | int b;
 3 | float c;
 4 | a=2;
 5 | b=1;
 6 | if(a>=b)
 7 |   c=a+b;
 8 | else
 9 |   c=a-b;
10 | while(c<a)
11 |   c=c-b;
12 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from rich.console import Console
  2 | 
  3 | from Grammar import Grammar
  4 | from Scanner import Scanner
  5 | from SLR_Automata import SLR_Automata
  6 | from SLR_Table import SLR_Table, print_slr_table
  7 | from Symbol_Table import Symbol_Table
  8 | 
  9 | console = Console()
 10 | 
 11 | message = """
 12 | ---------------------------------------------------
 13 | Enter a number to show detail, or enter 'q' to quit
 14 | 
 15 | 0 - Grammar
 16 | 1 - Input Code
 17 | 2 - Scanner States
 18 | 3 - SLR States
 19 | 4 - Token Table
 20 | 5 - Symbol Table
 21 | 6 - First Set
 22 | 7 - Follow Set
 23 | 8 - Closure Set
 24 | 9 - SLR Table (Action/Goto Table)
 25 | 10 - Output Code
 26 | ---------------------------------------------------
 27 | """
 28 | 
 29 | if __name__ == "__main__":
 30 |     symbol_table = Symbol_Table()
 31 | 
 32 |     # init grammar
 33 |     grammar = Grammar()
 34 |     grammar.read("input/grammar.txt")
 35 | 
 36 |     # init scanner
 37 |     with open("input/input.txt", "r") as f:
 38 |         code = f.read()
 39 |     scanner = Scanner(code, symbol_table)
 40 | 
 41 |     # generate and print slr table
 42 |     slr_table = SLR_Table(grammar)
 43 |     slr_table.analysis_table()
 44 | 
 45 |     # init slr automata
 46 |     slr_automata = SLR_Automata(scanner, grammar)
 47 |     slr_automata.run(debug=False)
 48 | 
 49 |     # save result
 50 |     grammar.save()
 51 |     symbol_table.save()
 52 |     scanner.save()
 53 |     slr_table.save()
 54 |     slr_automata.save()
 55 | 
 56 |     # output result'
 57 |     while True:
 58 |         console.print(message)
 59 |         input_string = input("> ")
 60 |         print("")
 61 | 
 62 |         if input_string == "0":
 63 |             # Grammar
 64 |             console.print(f"Grammar:", style="bold")
 65 |             console.print(f"Terminal Symbols: {grammar.terminal_symbols}")
 66 |             console.print(f"Variable Symbols: {grammar.variable_symbols}")
 67 | 
 68 |             for production in grammar.production_list:
 69 |                 console.print(f"{production.from_state} →", end="")
 70 | 
 71 |                 for item in production.items:
 72 |                     if item.is_symbol:
 73 |                         console.print(f" {item.value}", style="bold red", end="")
 74 |                     else:
 75 |                         console.print(f" {item.value}", end="")
 76 | 
 77 |                 console.print("")
 78 |         elif input_string == "1":
 79 |             # Input Code
 80 |             console.print(f"Input Code:", style="bold")
 81 |             print(code)
 82 |         elif input_string == "2":
 83 |             # Scanner States
 84 |             scanner.print_states()
 85 |         elif input_string == "3":
 86 |             # SLR States
 87 |             slr_automata.print_state()
 88 |         elif input_string == "4":
 89 |             # Token Table
 90 |             scanner.print_tokens()
 91 |         elif input_string == "5":
 92 |             # Symbol Table
 93 |             symbol_table.output()
 94 |         elif input_string == "6":
 95 |             # First Set
 96 |             slr_table.print_first_set()
 97 |         elif input_string == "7":
 98 |             # Follow Set
 99 |             slr_table.print_follow_set()
100 |         elif input_string == "8":
101 |             # Closure Set
102 |             slr_table.print_closure_set()
103 |         elif input_string == "9":
104 |             # SLR Table (Action/Goto Table)
105 |             print_slr_table(grammar)
106 |         elif input_string == "10":
107 |             # Output Code
108 |             slr_automata.print_code()
109 |         elif input_string == "q":
110 |             # quit
111 |             exit(0)
112 |         else:
113 |             console.print(f"Unknown input {input_string}!")
114 | 


--------------------------------------------------------------------------------
/test_grammar.py:
--------------------------------------------------------------------------------
 1 | from rich import console
 2 | from rich.console import Console
 3 | 
 4 | from Grammar import Grammar
 5 | 
 6 | console = Console()
 7 | 
 8 | grammar = Grammar()
 9 | grammar.read("input/grammar.txt")
10 | grammar.save()
11 | 
12 | console.print(f"Terminal Symbols: {grammar.terminal_symbols}")
13 | console.print(f"Variable Symbols: {grammar.variable_symbols}")
14 | 
15 | for production in grammar.production_list:
16 |     console.print(f"{production.from_state} →", end="")
17 | 
18 |     for item in production.items:
19 |         if item.is_symbol:
20 |             console.print(f" {item.value}", style="bold red", end="")
21 |         else:
22 |             console.print(f" {item.value}", end="")
23 | 
24 |     console.print("")
25 |     if production.code != "":
26 |         console.print(production.code, end="\n\n")
27 | 


--------------------------------------------------------------------------------
/test_scanner.py:
--------------------------------------------------------------------------------
 1 | from rich import box
 2 | from rich.console import Console
 3 | from rich.table import Table
 4 | 
 5 | from Scanner import Scanner
 6 | from Symbol_Table import Symbol_Table
 7 | from Token import Token
 8 | 
 9 | if __name__ == "__main__":
10 |     # init output
11 |     console = Console()
12 |     symbol_table = Symbol_Table()
13 | 
14 |     # run scanner
15 |     with open("input/input.txt", "r") as f:
16 |         scanner = Scanner(f.read(), symbol_table)
17 | 
18 |     while scanner.has_next():
19 |         token: Token = scanner.get_next()
20 | 
21 |     # print results
22 |     scanner.print_states()
23 |     scanner.print_tokens()
24 | 
25 |     symbol_table.save()
26 |     scanner.save()
27 | 


--------------------------------------------------------------------------------