├── .gitignore
├── Grammar.py
├── LICENSE
├── README.md
├── SLR_Automata.py
├── SLR_Table.py
├── Scanner.py
├── Symbol_Table.py
├── Token.py
├── format.sh
├── input
├── grammar.txt
├── grammar_assign.txt
├── grammar_control.txt
├── grammar_define.txt
├── grammar_expression.txt
├── grammar_raw.txt
├── input.txt
├── input_assign.txt
├── input_control.txt
├── input_define.txt
├── input_expression.txt
├── input_raw.txt
└── input_scanner.txt
├── main.py
├── test_grammar.py
└── test_scanner.py
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
2 | **/__pycache__
3 | **/.vscode
4 | **/.idea
5 | Pipfile
6 | Pipfile.lock
7 | *_test.txt
8 | action_table.json
9 | goto_table.json
10 | /output/*
--------------------------------------------------------------------------------
/Grammar.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from rich.console import Console
4 |
5 | console = Console()
6 |
7 |
8 | class Grammar_Item:
9 | is_symbol: bool # True for terminal symbol, False for Variable
10 | value: str
11 |
12 | def __init__(self, is_symbol: bool, value: str) -> None:
13 | self.is_symbol = is_symbol
14 | self.value = value
15 |
16 |
17 | class Grammar_Production:
18 | from_state: str
19 | items: List[Grammar_Item]
20 | code: str
21 |
22 | def __init__(self, from_state: str) -> None:
23 | self.from_state = from_state
24 | self.items = list()
25 | self.code = ""
26 |
27 | def add(self, is_symbol: bool, value: str) -> None:
28 | self.items.append(Grammar_Item(is_symbol, value))
29 |
30 | def __str__(self) -> str:
31 | return f"{self.from_state} → " + " ".join([item.value for item in self.items])
32 |
33 |
34 | class Grammar:
35 | start_symbol: str
36 | terminal_symbols: List[str]
37 | variable_symbols: List[str]
38 | production_list: List[Grammar_Production]
39 |
40 | def __init__(self) -> None:
41 | self.production_list = list()
42 |
43 | def save(self) -> None:
44 | with open("output/grammar.txt", "w") as f:
45 | f.write(f"Start Symbol: {self.start_symbol}\n")
46 | f.write(f"Terminal Symbols: {' '.join(self.terminal_symbols)}\n")
47 | f.write(f"Variable Symbols: {' '.join(self.variable_symbols)}\n")
48 | f.write("Productions:\n")
49 | for production in self.production_list:
50 | f.write(f"{production}\n")
51 |
52 | def read(self, path: str) -> None:
53 | with open(path, "r") as f:
54 | blocks = f.read().split("\n@ ")
55 |
56 | symbol_lines: List[str] = blocks[0].split("\n")
57 | blocks: List[str] = blocks[1:]
58 |
59 | self.terminal_symbols = symbol_lines[0].split(" ")[1:]
60 | self.variable_symbols = symbol_lines[1].split(" ")[1:]
61 | self.start_symbol = self.variable_symbols[0]
62 |
63 | for block in blocks:
64 | lines = block.split("\n")
65 | production_line = lines[0]
66 | code_lines = lines[1:] if len(lines) > 1 else []
67 | from_state, production = production_line.split(" → ")
68 |
69 | current_grammar_production = Grammar_Production(from_state)
70 | current_grammar_production.code = "\n".join(code_lines)
71 | items = production.split(" ")
72 |
73 | for item in items:
74 | if item in self.terminal_symbols + ["ε"]:
75 | current_grammar_production.add(True, item)
76 | elif item in self.variable_symbols:
77 | current_grammar_production.add(False, item)
78 | else:
79 | console.print(f"Unknown symbol '{item}' in grammar file", style="bold red")
80 | exit(-1)
81 |
82 | self.production_list.append(current_grammar_production)
83 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 cometeme, M010K
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # compilers 编译原理 - 简单类 C 编译器
2 |
3 | 本项目实现了一个简单的类 C 编译器,能够分析简单类 C 语言风格的程序代码。如声明语句、赋值语句、表达式、if while 控制语句等,进行语法分析并生成相应的中间代码(三地址代码)。
4 |
5 | 提供了一个命令行交互程序,可以输出词法分析、语法分析、语义分析及中间代码生成过程中的各种表格以及数据集合。
6 |
7 | 输入文法文件可以根据需要进行修改,同时也可以添加自定义的语义动作,从而能够让程序分析不同的语言。
8 |
9 | ## 运行说明
10 |
11 | 运行说明(**需要确保 python 版本为 3.7**):
12 |
13 | 1. 进入项目文件夹
14 |
15 | (初次使用需要创建一个空的 output 目录)
16 |
17 | 2. 安装 rich 库(若没有安装)
18 |
19 | ```shell
20 | pip install rich
21 | ```
22 |
23 | 3. 运行`main.py`程序
24 |
25 | ```shell
26 | python main.py
27 | ```
28 |
29 | ### 操作说明
30 |
31 | 运行`main.py`后,命令行中会生成引导菜单(如下所示):
32 |
33 | ```shell
34 | ---------------------------------------------------
35 | Enter a number to show detail, or enter 'q' to quit
36 |
37 | 0 - Grammar
38 | 1 - Input Code
39 | 2 - Scanner States
40 | 3 - SLR States
41 | 4 - Token Table
42 | 5 - Symbol Table
43 | 6 - First Set
44 | 7 - Follow Set
45 | 8 - Closure Set
46 | 9 - SLR Table (Action/Goto Table)
47 | 10 - Output Code
48 | ---------------------------------------------------
49 | ```
50 |
51 | 下面对各个选项进行说明:
52 |
53 | | 选项 | 功能 |
54 | | :--: | :------------------------------------------------: |
55 | | 0 | 输出给定的文法 |
56 | | 1 | 输出给定的程序输入 |
57 | | 2 | 输出词法分析的结果 |
58 | | 3 | 输出SLR语法分析过程(包含分析栈以及移入/归约动作) |
59 | | 4 | 输出 Token 串表 |
60 | | 5 | 输出符号表 |
61 | | 6 | 输出 First 集合 |
62 | | 7 | 输出 Follow 集合 |
63 | | 8 | 输出项集族 |
64 | | 9 | 输出 SLR 分析表(包括 action 和 goto 表) |
65 | | 10 | 输出生成的中间代码 |
66 | | q | 退出程序 |
67 |
68 |
69 | ## 工程文件说明
70 |
71 | 项目整体目录结构如下:
72 |
73 | ```shell
74 | .
75 | ├── Grammar.py
76 | ├── SLR_Automata.py
77 | ├── SLR_Table.py
78 | ├── Scanner.py
79 | ├── Symbol_Table.py
80 | ├── Token.py
81 | ├── action_table.json
82 | ├── format.sh
83 | ├── goto_table.json
84 | ├── input
85 | │ ├── grammar.txt
86 | │ ├── grammar_assign.txt
87 | │ ├── grammar_control.txt
88 | │ ├── grammar_define.txt
89 | │ ├── grammar_expression.txt
90 | │ ├── grammar_raw.txt
91 | │ ├── input.txt
92 | │ ├── input_assign.txt
93 | │ ├── input_control.txt
94 | │ ├── input_define.txt
95 | │ ├── input_expression.txt
96 | │ ├── input_raw.txt
97 | │ └── input_scanner.txt
98 | ├── main.py
99 | ├── output
100 | │ ├── closure_set.txt
101 | │ ├── code.csv
102 | │ ├── first_set.txt
103 | │ ├── follow_set.txt
104 | │ ├── grammar.txt
105 | │ ├── scanner_states.csv
106 | │ ├── slr_states.csv
107 | │ ├── slr_table.csv
108 | │ ├── symbol_table.csv
109 | │ └── token_table.csv
110 | ├── test_grammar.py
111 | └── test_scanner.py
112 | ```
113 |
114 | ### 语法文件
115 |
116 | | 文件/文件夹 | 说明 |
117 | | :-----------: | :------------------------------------------------: |
118 | | input 文件夹 | 程序输入(文法、待分析的程序) |
119 | | output 文件夹 | 词法、语法、中间代码生成时产生的所有集合以及表结构 |
120 |
121 |
122 |
123 | ### 词法分析相关
124 |
125 | | 文件/文件夹 | 说明 |
126 | | :-------------: | :--------------: |
127 | | Scanner.py | 词法分析器的实现 |
128 | | test_scanner.py | 词法分析器测试 |
129 | | Token.py | Token 相关 |
130 | | Symbol_Table.py | 符号表相关 |
131 |
132 | ### 语法分析/中间代码生成相关
133 |
134 | | 文件/文件夹 | 说明 |
135 | | :-------------: | :------------------------------------------------: |
136 | | SLR_Table.py | SLR 语法分析表以及辅助函数生成 |
137 | | SLR_Automata.py | SLR 语法分析的实现 + 中间代码生成部分语义动作的实现 |
138 | | Grammar.py | 语法分析总控程序 |
139 | | test_grammar.py | 语法分析器测试 |
140 |
141 | ### 主控函数
142 |
143 | | 文件/文件夹 | 说明 |
144 | | :---------: | :----------------: |
145 | | main.py | 程序入口与控制逻辑 |
146 |
--------------------------------------------------------------------------------
/SLR_Automata.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import json
3 | from typing import Dict, List, Union
4 |
5 | from rich.console import Console
6 | from rich.table import Table
7 |
8 | from Grammar import Grammar, Grammar_Production
9 | from Scanner import Scanner
10 | from Symbol_Table import Symbol_Table, Table_Item, item_type_translate
11 | from Token import Token, Token_Type
12 |
13 | console = Console()
14 |
15 |
16 | class SLR_Automata:
17 | scanner: Scanner
18 | symbol_table: Symbol_Table
19 | grammar: Grammar
20 | action_table: List[Dict[str, str]] = list()
21 | goto_table: List[Dict[str, int]] = list()
22 | state_output: List[List[str]]
23 | code_output: List[List[str]]
24 | current_line: int
25 |
26 | def __init__(self, scanner: Scanner, grammar: Grammar) -> None:
27 | self.scanner = scanner
28 | self.symbol_table = scanner.symbol_table
29 | self.grammar = grammar
30 |
31 | with open("action_table.json", "r") as f:
32 | self.action_table = json.loads(f.read())
33 | with open("goto_table.json", "r") as f:
34 | self.goto_table = json.loads(f.read())
35 |
36 | # init state output
37 | self.state_output = []
38 | self.code_output = []
39 |
40 | def print_state(self) -> None:
41 | output_table = Table(
42 | show_header=True,
43 | header_style="bold",
44 | )
45 |
46 | output_table.add_column("Token", justify="center")
47 | output_table.add_column("Stack", justify="left")
48 | output_table.add_column("Action", justify="center")
49 | output_table.add_column("Production", justify="left")
50 |
51 | for row in self.state_output:
52 | output_table.add_row(*row)
53 |
54 | console.print("SLR State:", style="bold")
55 | console.print(output_table)
56 |
57 | def print_code(self) -> None:
58 | output_table = Table(
59 | show_header=True,
60 | header_style="bold",
61 | )
62 |
63 | output_table.add_column("Line", justify="center")
64 | output_table.add_column("Code", justify="left")
65 |
66 | for row in self.code_output:
67 | output_table.add_row(*row)
68 |
69 | console.print("Code:", style="bold")
70 | console.print(output_table)
71 |
72 | def save(self) -> None:
73 | with open("output/slr_states.csv", "w") as f:
74 | writer = csv.writer(f)
75 | writer.writerow(["Token", "Stack", "Action", "Production"])
76 | for row in self.state_output:
77 | writer.writerow(row)
78 | with open("output/code.csv", "w") as f:
79 | writer = csv.writer(f)
80 | writer.writerow(["Line", "Code"])
81 | for row in self.code_output:
82 | writer.writerow(row)
83 |
84 | def gen_code(self, code: str) -> None:
85 | self.code_output.append([str(self.current_line), code])
86 | self.current_line += 1
87 |
88 | def gen_variable(self, name: str) -> int:
89 | item = Table_Item()
90 | item.name = name
91 | item.variable = True
92 | entry = self.symbol_table.add_item(item)
93 | return entry
94 |
95 | def make_list(self, inst: int) -> List:
96 | return [inst]
97 |
98 | def merge(self, l1: List, l2: List) -> List:
99 |
100 | l = list()
101 | l.extend(l1)
102 |
103 | for inst in l2:
104 | if inst not in l:
105 | l.append(inst)
106 |
107 | return l
108 |
109 | def back_patch(self, l: List, target: int) -> None:
110 | for inst in l:
111 | # back patch all blank field
112 | if inst < len(self.code_output) - 1 and not self.code_output[inst][1][-1].isdigit():
113 | if len(self.code_output[inst][1]) >= 5 and self.code_output[inst][1][-5:-1] == "goto":
114 | self.code_output[inst][1] += str(target)
115 |
116 | def run(self, debug: bool = True) -> None:
117 | stack: List[int] = [0]
118 | attributes: List[Dict[str, Union[str, int]]] = [dict()]
119 | token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None
120 | token_string: str = "$" if token is None else token.to_string()
121 | self.current_line = 0
122 |
123 | # run automata
124 | while True:
125 | assert len(stack) == len(attributes)
126 |
127 | if token_string not in self.action_table[stack[-1]]:
128 | self.print_state()
129 | console.print(f"Current token_string: {token_string}")
130 | console.print(f"Current stack: {stack}")
131 | console.print(f"Action Table [{stack[-1]}]: {self.action_table[stack[-1]]}")
132 | console.print("SLR ERROR", style="bold red")
133 | exit(-1)
134 |
135 | action: str = self.action_table[stack[-1]][token_string]
136 |
137 | if debug:
138 | console.print(f"\ntoken: {token_string}")
139 | console.print(f"stack: {stack}")
140 | console.print(f"attributes: {attributes}")
141 | console.print(f"action: {action}")
142 |
143 | if action == "acc":
144 | self.state_output.append([token_string, str(stack), action, ""])
145 | break
146 |
147 | action_type: str = action[0]
148 | action_value: int = int(action[1:])
149 |
150 | if action_type == "s":
151 | # shift in next state
152 | self.state_output.append([token_string, str(stack), action, ""])
153 |
154 | stack.append(action_value)
155 |
156 | if token.token_type in [Token_Type.ID, Token_Type.CONST]:
157 | attributes.append({"entry": -1 if token.content is None else token.content})
158 | else:
159 | attributes.append(dict())
160 |
161 | token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None
162 | token_string: str = "$" if token is None else token.to_string()
163 | elif action_type == "r":
164 | # reduced by production
165 | current_production: Grammar_Production = self.grammar.production_list[action_value]
166 | self.state_output.append([token_string, str(stack), action, str(current_production)])
167 |
168 | if debug:
169 | console.print(f"production: {current_production}")
170 | print(f"code:\n{current_production.code}\n")
171 |
172 | length: int = len(current_production.items)
173 | current_attribute = dict()
174 |
175 | # run generation code
176 | try:
177 | exec(current_production.code)
178 | except Exception as e:
179 | console.print("Execute Generation Faild!", style="bold red")
180 | self.print_state()
181 | print(f"Production: {current_production}\n\n")
182 | print(f"code:\n\n{current_production.code}\n")
183 | exec(current_production.code)
184 |
185 | # solve for not A → ε
186 | if not (current_production.items[0].is_symbol and current_production.items[0].value == "ε"):
187 | stack = stack[:-length]
188 | attributes = attributes[:-length]
189 |
190 | reduce_state: str = current_production.from_state
191 | stack.append(self.goto_table[stack[-1]][reduce_state])
192 | attributes.append(current_attribute)
193 |
194 | else:
195 | self.print_state()
196 | console.print(f"Unknown action type {action_type}!", style="bold red")
197 | exit(-1)
198 |
--------------------------------------------------------------------------------
/SLR_Table.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import json
3 | from copy import deepcopy
4 | from pprint import pformat
5 | from typing import Dict, List
6 |
7 | from rich.console import Console
8 | from rich.table import Table
9 |
10 | from Grammar import Grammar
11 |
12 | console = Console()
13 |
14 |
15 | class ItemSet:
16 | def __init__(self):
17 | self.index = 0
18 | self.closure_items = set() # closure of one item
19 | self.transfer = dict() # store transfer dict
20 |
21 | def get_index(self):
22 | return self.index
23 |
24 | def set_index(self, index: int):
25 | self.index = index
26 |
27 | def add_trans(self, symbol: str, next_index: int):
28 | self.transfer[symbol] = next_index
29 |
30 | def exists(self, item: tuple) -> bool:
31 | return item in self.closure_items
32 |
33 | def add(self, item: tuple) -> None:
34 | self.closure_items.add(item)
35 |
36 | def equal(self, other) -> bool:
37 | if len(self.closure_items) != len(other.closure_items):
38 | return False
39 |
40 | for item in self.closure_items:
41 | if item not in other.closure_items:
42 | return False
43 |
44 | return True
45 |
46 |
47 | class ClosureFamily:
48 | def __init__(self):
49 | self.clourse_set = list()
50 |
51 | def exists(self, item: ItemSet) -> bool:
52 | contain = False
53 | for itemset in self.clourse_set:
54 | if itemset.equal(item):
55 | contain = True
56 | break
57 |
58 | return contain
59 |
60 | def indexOf(self, item: ItemSet) -> int:
61 | index = -1
62 | for itemset in self.clourse_set:
63 | if itemset.equal(item):
64 | index = itemset.get_index()
65 | break
66 |
67 | return index
68 |
69 | def add(self, item: ItemSet) -> None:
70 | self.clourse_set.append(item)
71 |
72 |
73 | class SLR_Table:
74 | def __init__(self, grammar: Grammar) -> None:
75 | self.grammar = grammar
76 |
77 | self.start_symbol = grammar.start_symbol
78 | self.action_symbols = grammar.terminal_symbols
79 | self.goto_symbols = grammar.variable_symbols[1:]
80 | self.all_symbols = self.goto_symbols + self.action_symbols
81 |
82 | self.all_items = list()
83 | self.first_items = dict() # item with first dot, key is from_state
84 | self.gen_all_items()
85 |
86 | self.C = self.gen_clourse_set([(0, 0)]) # clourse set
87 |
88 | self.first = self.first_set()
89 | self.follow = self.follow_set()
90 |
91 | def print_first_set(self) -> None:
92 | console.print("First Set:", style="bold")
93 | console.print(self.first)
94 |
95 | def print_follow_set(self) -> None:
96 | console.print("Follow Set:", style="bold")
97 | console.print(self.follow)
98 |
99 | def print_closure_set(self):
100 | console.print(f"Num of states: {len(self.C.clourse_set)}", style="bold")
101 | for index, clourse in enumerate(self.C.clourse_set):
102 | output_table = Table(
103 | show_header=True,
104 | header_style="bold",
105 | )
106 | output_table.add_column(f"I{clourse.index}", justify="left")
107 | for item in clourse.closure_items:
108 | output_table.add_row(self.get_item(item))
109 | # print(clourse.transfer)
110 | console.print(output_table)
111 |
112 | def save(self) -> None:
113 | with open("output/first_set.txt", "w") as f:
114 | for k in self.first:
115 | f.write(f"first({k}) = {self.first[k]}\n")
116 |
117 | with open("output/follow_set.txt", "w") as f:
118 | for k in self.follow:
119 | f.write(f"follow({k}) = {self.follow[k]}\n")
120 |
121 | with open("output/closure_set.txt", "w") as f:
122 | for index, clourse in enumerate(self.C.clourse_set):
123 | f.write(f"I{index}\n")
124 | for item in clourse.closure_items:
125 | f.write(f"{self.get_item(item)}\n")
126 | f.write("\n")
127 |
128 | save_slr_table(self.grammar)
129 |
130 | def get_item(self, item: tuple) -> str:
131 | production = self.grammar.production_list[item[0]]
132 | right = [it.value for it in production.items]
133 |
134 | if "ε" in right: # remove ε
135 | right.remove("ε")
136 |
137 | right.insert(item[1], ".")
138 | return f"{production.from_state} → {' '.join(right)}"
139 |
140 | def contain_varepsilon(self, symbol: str) -> bool:
141 | contain = False
142 | if self.first_items.get(symbol) is None:
143 | return False
144 |
145 | for indices in self.first_items.get(symbol):
146 | for index in indices:
147 | for item in self.grammar.production_list[index].items:
148 | if item.value == "ε":
149 | contain = True
150 | break
151 |
152 | return contain
153 |
154 | def get_first(self, first: dict, symbol: str):
155 | indices = [production[0] for production in self.first_items[symbol]] # production indices
156 | for index in indices:
157 | item = self.grammar.production_list[index].items[0]
158 | if not item.is_symbol and item.value != symbol:
159 | first[symbol] |= set(self.get_first(first, item.value)) # recurse
160 |
161 | return first.get(symbol)
162 |
163 | def first_set(self):
164 | first = dict()
165 |
166 | for item in self.action_symbols:
167 | first[item] = item
168 |
169 | for from_state in self.first_items.keys():
170 | indices = [production[0] for production in self.first_items[from_state]] # production indices
171 | first[from_state] = set()
172 |
173 | for index in indices:
174 | item = self.grammar.production_list[index].items[0]
175 | if item.is_symbol: # add end symbol to First(from_state)
176 | first[from_state].add(item.value)
177 |
178 | for from_state in self.first_items.keys():
179 | indices = [production[0] for production in self.first_items[from_state]] # production indices
180 | for index in indices:
181 | item = self.grammar.production_list[index].items[0]
182 | # add var symbol to First(from_state)
183 | if not item.is_symbol and item.value != from_state:
184 | first[from_state] |= set(self.get_first(first, item.value))
185 |
186 | for from_state in self.first_items.keys():
187 | indices = [production[0] for production in self.first_items[from_state]] # production indices
188 | add_varepsilon = False
189 | for index in indices:
190 | items = self.grammar.production_list[index].items
191 | length = len(items)
192 | if items[0].value == "ε":
193 | add_varepsilon = True
194 | # solve ε production to First(from_state)
195 | cur = 0
196 | while cur < length and not items[cur].is_symbol:
197 | if self.contain_varepsilon(items[cur].value):
198 | add_varepsilon = True
199 | if cur + 1 < length:
200 | first[from_state] |= set(first[items[cur + 1].value])
201 | cur += 1
202 |
203 | if add_varepsilon:
204 | first[from_state].add("ε")
205 |
206 | return first
207 |
208 | def follow_set(self):
209 | follow = dict()
210 | for from_state in self.first_items.keys():
211 | follow[from_state] = set()
212 |
213 | follow[self.start_symbol].add("$") # for begin symbol, add '$'
214 |
215 | for production in self.grammar.production_list:
216 | items = production.items
217 | length = len(items)
218 | cur = 0
219 | while cur < length:
220 | if not items[cur].is_symbol:
221 | if cur + 1 < length and items[cur + 1].is_symbol: # B→αAa ,a is end symbol
222 | follow[items[cur].value].add(items[cur + 1].value)
223 | elif cur + 1 < length and not items[cur + 1].is_symbol: # B→αAX ,X is not end symbol
224 | first_of_next = deepcopy(self.first[items[cur + 1].value])
225 | if "ε" in first_of_next:
226 | first_of_next.remove("ε")
227 | follow[items[cur].value] |= first_of_next
228 | cur += 1
229 |
230 | for production in self.grammar.production_list:
231 | items = production.items
232 | length = len(items)
233 | cur = 0
234 | while cur < length:
235 | if not items[cur].is_symbol:
236 | if cur + 1 >= length:
237 | follow[items[cur].value] |= follow[production.from_state]
238 | elif cur + 1 < length and self.contain_varepsilon(items[cur + 1].value):
239 | follow[items[cur].value] |= follow[production.from_state]
240 | cur += 1
241 |
242 | return follow
243 |
244 | def gen_all_items(self):
245 | for index, production in enumerate(self.grammar.production_list):
246 |
247 | # store item which dot at first
248 | if not self.first_items.get(production.from_state):
249 | self.first_items[str(production.from_state)] = [(index, 0)]
250 | else:
251 | self.first_items[str(production.from_state)].append((index, 0))
252 |
253 | for dot, item in enumerate(production.items):
254 | self.all_items.append((index, dot))
255 |
256 | if len(production.items) > 1 and production.items[0].value != "ε": # except ε
257 | self.all_items.append((index, len(production.items)))
258 |
259 | def get_clourse(self, items: list) -> ItemSet:
260 | queue = [item for item in items] # add items to queue
261 | close_set = ItemSet() # clourse for items
262 | while queue:
263 | item = queue.pop(0)
264 | close_set.add(item)
265 | candidate = self.grammar.production_list[item[0]] # all candidate production
266 |
267 | if len(candidate.items) != item[1]: # dot not at end of production
268 | if not candidate.items[item[1]].is_symbol:
269 | for it in self.first_items[candidate.items[item[1]].value]:
270 | if not close_set.exists(it): # item not in close_set
271 | queue.append(it)
272 | close_set.add(it)
273 |
274 | return close_set
275 |
276 | def goto(self, itemset: ItemSet, symbol: str):
277 | next_state = ItemSet()
278 | for item in itemset.closure_items:
279 | production = self.grammar.production_list[item[0]]
280 |
281 | if item[1] != len(production.items): # dot not at the end
282 | if production.items[item[1]].value == symbol: # match ,goto next state
283 | next_state.add((item[0], item[1] + 1))
284 |
285 | return self.get_clourse([item for item in next_state.closure_items])
286 |
287 | def gen_clourse_set(self, start: list) -> ClosureFamily:
288 | C = ClosureFamily()
289 | C.add(self.get_clourse(start))
290 |
291 | queue = [clourse for clourse in C.clourse_set]
292 | index = 1
293 | while queue:
294 | clourse = queue.pop(0)
295 |
296 | for symbol in self.all_symbols:
297 | next = self.goto(clourse, symbol)
298 | if len(next.closure_items) != 0 and not C.exists(next): # if next_state not in closure set
299 | next.set_index(index)
300 | index += 1
301 |
302 | clourse.add_trans(symbol, next.get_index())
303 | C.add(next)
304 | queue.append(next)
305 | elif len(next.closure_items) != 0 and C.exists(next): # if consists , add to transfer dict
306 | clourse.add_trans(symbol, C.indexOf(next))
307 |
308 | return C
309 |
310 | def analysis_table(self):
311 | action = list()
312 | goto = list()
313 |
314 | # C = self.gen_clourse_set([(0, 0)])
315 |
316 | for i in range(len(self.C.clourse_set)):
317 | action.append(dict())
318 | goto.append(dict())
319 |
320 | for clourse in self.C.clourse_set:
321 | for item in clourse.closure_items:
322 | production = self.grammar.production_list[item[0]]
323 |
324 | if item[1] != len(production.items): # dot not at the end
325 | symbol = production.items[item[1]]
326 |
327 | # solve A → ε
328 | if symbol.is_symbol and symbol.value == "ε":
329 | for f in self.follow[production.from_state]:
330 | action[clourse.get_index()][f] = "r" + str(item[0])
331 |
332 | # get next closure index
333 | next_index = clourse.transfer.get(symbol.value)
334 |
335 | if next_index is not None:
336 | if symbol.is_symbol and symbol.value != "ε":
337 | action[clourse.get_index()][symbol.value] = "s" + str(next_index)
338 | else:
339 | goto[clourse.get_index()][symbol.value] = next_index
340 |
341 | else: # item[1] == len(production.items) , dot at the end
342 | if production.from_state == self.start_symbol:
343 | action[clourse.get_index()]["$"] = "acc"
344 | else:
345 | for f in self.follow[production.from_state]:
346 | action[clourse.get_index()][f] = "r" + str(item[0])
347 |
348 | with open("action_table.json", "w") as f:
349 | f.write(json.dumps(action, indent=2))
350 |
351 | with open("goto_table.json", "w") as f:
352 | f.write(json.dumps(goto, indent=2))
353 |
354 |
355 | def print_slr_table(grammar: Grammar) -> None:
356 | action_table_symbols: List[str] = grammar.terminal_symbols
357 | goto_table_symbols: List[str] = grammar.variable_symbols[1:]
358 |
359 | with open("action_table.json", "r") as f:
360 | action_table: List[Dict[str, str]] = json.loads(f.read())
361 | with open("goto_table.json", "r") as f:
362 | goto_table: List[Dict[str, int]] = json.loads(f.read())
363 |
364 | output_table = Table(
365 | show_header=True,
366 | header_style="bold",
367 | )
368 |
369 | output_table.add_column("State", justify="center")
370 | for action_table_symbol in action_table_symbols:
371 | output_table.add_column(action_table_symbol, justify="center")
372 | for goto_table_symbol in goto_table_symbols:
373 | output_table.add_column(goto_table_symbol, justify="center")
374 |
375 | for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table):
376 | output_row: List[str] = [str(state)]
377 | for action_symbol in action_table_symbols:
378 | output_row.append(action_row.get(action_symbol, ""))
379 | for goto_symbol in goto_table_symbols:
380 | output_row.append(str(goto_row.get(goto_symbol, "")))
381 | output_table.add_row(*output_row)
382 |
383 | console.print("SLR Table (Action/Goto Table):", style="bold")
384 | console.print(output_table)
385 |
386 |
387 | def save_slr_table(grammar: Grammar) -> None:
388 | action_table_symbols: List[str] = grammar.terminal_symbols
389 | goto_table_symbols: List[str] = grammar.variable_symbols[1:]
390 |
391 | with open("action_table.json", "r") as f:
392 | action_table: List[Dict[str, str]] = json.loads(f.read())
393 | with open("goto_table.json", "r") as f:
394 | goto_table: List[Dict[str, int]] = json.loads(f.read())
395 |
396 | with open("output/slr_table.csv", "w") as f:
397 | writter = csv.writer(f)
398 | header: List[str] = ["State"] + action_table_symbols + goto_table_symbols
399 | writter.writerow(header)
400 |
401 | for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table):
402 | output_row: List[str] = [str(state)]
403 | for action_symbol in action_table_symbols:
404 | output_row.append(action_row.get(action_symbol, ""))
405 | for goto_symbol in goto_table_symbols:
406 | output_row.append(str(goto_row.get(goto_symbol, "")))
407 | writter.writerow(output_row)
408 |
409 |
410 | if __name__ == "__main__":
411 | grammar = Grammar()
412 | grammar.read("grammar.txt")
413 |
414 | slr = SLR_Table(grammar)
415 | slr.analysis_table()
416 |
417 | print_slr_table(grammar)
418 |
--------------------------------------------------------------------------------
/Scanner.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from enum import Enum, auto
3 | from string import ascii_letters, digits, printable
4 | from typing import List
5 |
6 | from rich.console import Console
7 | from rich.table import Table
8 |
9 | from Symbol_Table import Symbol_Table, Table_Item, Table_Item_Type
10 | from Token import Token, Token_Type
11 |
12 | console = Console()
13 |
14 |
15 | class Scanner_State(Enum):
16 | START = auto()
17 | # identifier
18 | ID = auto()
19 | # assign symbol '='
20 | ASSIGN = auto()
21 | # arithmetic operator (+, -, *, /)
22 | ALOP = auto()
23 | # relation operator
24 | L_G = auto() # less '<' or greater '>'
25 | LE_GE = auto() # less equal '<=' or greater equal '>='
26 | EQ = auto() # equal '=='
27 | NOT = auto() # not '!'
28 | NEQ = auto() # not equal '!='
29 | # bracket
30 | LBRACKET = auto() # left bracket '('
31 | RBRACKET = auto() # right bracket ')'
32 | # semicolon
33 | SEMICOLON = auto() # semicolon ';'
34 | # "if"
35 | I = auto()
36 | IF = auto()
37 | # "int"
38 | IN = auto()
39 | INT = auto()
40 | # "else"
41 | E = auto()
42 | EL = auto()
43 | ELS = auto()
44 | ELSE = auto()
45 | # "while"
46 | W = auto()
47 | WH = auto()
48 | WHI = auto()
49 | WHIL = auto()
50 | WHILE = auto()
51 | # float
52 | F = auto()
53 | FL = auto()
54 | FLO = auto()
55 | FLOA = auto()
56 | FLOAT = auto()
57 | # number
58 | NUMBER = auto()
59 | # end of the token
60 | END = auto()
61 | # not a vaild token
62 | ERROR = auto()
63 |
64 |
65 | # map from scanner state to token type
66 | STATE_TO_TOKEN = {
67 | Scanner_State.ID: Token_Type.ID,
68 | Scanner_State.ASSIGN: Token_Type.ASSIGN,
69 | Scanner_State.ALOP: Token_Type.ALOP,
70 | Scanner_State.L_G: Token_Type.RELOP,
71 | Scanner_State.LE_GE: Token_Type.RELOP,
72 | Scanner_State.EQ: Token_Type.RELOP,
73 | Scanner_State.NEQ: Token_Type.RELOP,
74 | Scanner_State.LBRACKET: Token_Type.LBRACKET,
75 | Scanner_State.RBRACKET: Token_Type.RBRACKET,
76 | Scanner_State.SEMICOLON: Token_Type.SEMICOLON,
77 | Scanner_State.I: Token_Type.ID,
78 | Scanner_State.IF: Token_Type.IF,
79 | Scanner_State.IN: Token_Type.ID,
80 | Scanner_State.INT: Token_Type.INT,
81 | Scanner_State.E: Token_Type.ID,
82 | Scanner_State.EL: Token_Type.ID,
83 | Scanner_State.ELS: Token_Type.ID,
84 | Scanner_State.ELSE: Token_Type.ELSE,
85 | Scanner_State.W: Token_Type.ID,
86 | Scanner_State.WH: Token_Type.ID,
87 | Scanner_State.WHI: Token_Type.ID,
88 | Scanner_State.WHIL: Token_Type.ID,
89 | Scanner_State.WHILE: Token_Type.WHILE,
90 | Scanner_State.F: Token_Type.ID,
91 | Scanner_State.FL: Token_Type.ID,
92 | Scanner_State.FLO: Token_Type.ID,
93 | Scanner_State.FLOA: Token_Type.ID,
94 | Scanner_State.FLOAT: Token_Type.FLOAT,
95 | Scanner_State.NUMBER: Token_Type.CONST,
96 | }
97 |
98 |
99 | DIGITS = digits # 0~9
100 |
101 | ID_START = ascii_letters + "_" # a~z + A~Z + _
102 | ID_APPEND = ID_START + digits # a~z + A~Z + _ + 0~9
103 |
104 | ALOPS = "+-*/"
105 | RELOPS = "<>!="
106 |
107 | SPACES = " \t\n\r\0"
108 |
109 | ID_SEP = ALOPS + RELOPS + "();" + SPACES
110 | NUMBER_SEP = ID_SEP
111 |
112 | ANY_SEP = printable + SPACES
113 |
114 |
115 | ID_TRANSITION_TEMPLATE = [(ID_APPEND, Scanner_State.ID), (ID_SEP, Scanner_State.END)]
116 |
117 |
118 | SCANNER_TRANSITION = {
119 | Scanner_State.START: [
120 | ("i", Scanner_State.I),
121 | ("e", Scanner_State.E),
122 | ("w", Scanner_State.W),
123 | ("f", Scanner_State.F),
124 | ("(", Scanner_State.LBRACKET),
125 | (")", Scanner_State.RBRACKET),
126 | (";", Scanner_State.SEMICOLON),
127 | ("<>", Scanner_State.L_G),
128 | ("!", Scanner_State.NOT),
129 | ("=", Scanner_State.ASSIGN),
130 | (" ", Scanner_State.START), # remove space
131 | (ALOPS, Scanner_State.ALOP),
132 | (DIGITS, Scanner_State.NUMBER),
133 | (ID_START, Scanner_State.ID),
134 | ],
135 | Scanner_State.ID: ID_TRANSITION_TEMPLATE,
136 | Scanner_State.ASSIGN: [
137 | ("=", Scanner_State.EQ),
138 | (ANY_SEP, Scanner_State.END),
139 | ],
140 | Scanner_State.ALOP: [
141 | (ANY_SEP, Scanner_State.END),
142 | ],
143 | Scanner_State.L_G: [
144 | ("=", Scanner_State.LE_GE),
145 | (ANY_SEP, Scanner_State.END),
146 | ],
147 | Scanner_State.LE_GE: [
148 | (ANY_SEP, Scanner_State.END),
149 | ],
150 | Scanner_State.EQ: [
151 | (ANY_SEP, Scanner_State.END),
152 | ],
153 | Scanner_State.NOT: [
154 | ("=", Scanner_State.NEQ),
155 | ],
156 | Scanner_State.NEQ: [
157 | (ANY_SEP, Scanner_State.END),
158 | ],
159 | Scanner_State.LBRACKET: [
160 | (ANY_SEP, Scanner_State.END),
161 | ],
162 | Scanner_State.RBRACKET: [
163 | (ANY_SEP, Scanner_State.END),
164 | ],
165 | Scanner_State.SEMICOLON: [
166 | (ANY_SEP, Scanner_State.END),
167 | ],
168 | Scanner_State.I: [
169 | ("f", Scanner_State.IF),
170 | ("n", Scanner_State.IN),
171 | ]
172 | + ID_TRANSITION_TEMPLATE,
173 | Scanner_State.IF: ID_TRANSITION_TEMPLATE,
174 | Scanner_State.IN: [
175 | ("t", Scanner_State.INT),
176 | ]
177 | + ID_TRANSITION_TEMPLATE,
178 | Scanner_State.INT: ID_TRANSITION_TEMPLATE,
179 | Scanner_State.E: [
180 | ("l", Scanner_State.EL),
181 | ]
182 | + ID_TRANSITION_TEMPLATE,
183 | Scanner_State.EL: [
184 | ("s", Scanner_State.ELS),
185 | ]
186 | + ID_TRANSITION_TEMPLATE,
187 | Scanner_State.ELS: [
188 | ("e", Scanner_State.ELSE),
189 | ]
190 | + ID_TRANSITION_TEMPLATE,
191 | Scanner_State.ELSE: ID_TRANSITION_TEMPLATE,
192 | Scanner_State.W: [
193 | ("h", Scanner_State.WH),
194 | ]
195 | + ID_TRANSITION_TEMPLATE,
196 | Scanner_State.WH: [
197 | ("i", Scanner_State.WHI),
198 | ]
199 | + ID_TRANSITION_TEMPLATE,
200 | Scanner_State.WHI: [
201 | ("l", Scanner_State.WHIL),
202 | ]
203 | + ID_TRANSITION_TEMPLATE,
204 | Scanner_State.WHIL: [
205 | ("e", Scanner_State.WHILE),
206 | ]
207 | + ID_TRANSITION_TEMPLATE,
208 | Scanner_State.WHILE: ID_TRANSITION_TEMPLATE,
209 | Scanner_State.F: [
210 | ("l", Scanner_State.FL),
211 | ]
212 | + ID_TRANSITION_TEMPLATE,
213 | Scanner_State.FL: [
214 | ("o", Scanner_State.FLO),
215 | ]
216 | + ID_TRANSITION_TEMPLATE,
217 | Scanner_State.FLO: [
218 | ("a", Scanner_State.FLOA),
219 | ]
220 | + ID_TRANSITION_TEMPLATE,
221 | Scanner_State.FLOA: [
222 | ("t", Scanner_State.FLOAT),
223 | ]
224 | + ID_TRANSITION_TEMPLATE,
225 | Scanner_State.FLOAT: ID_TRANSITION_TEMPLATE,
226 | Scanner_State.NUMBER: [
227 | (DIGITS, Scanner_State.NUMBER),
228 | (NUMBER_SEP, Scanner_State.END),
229 | ],
230 | }
231 |
232 |
233 | class Scanner:
234 | pnt: int
235 | code: str
236 | length: int
237 | symbol_table: Symbol_Table
238 | state_output: List[List[str]]
239 | token_output: List[List[str]]
240 |
241 | def __init__(self, code: str, symbol_table: Symbol_Table) -> None:
242 | """init the scanner
243 |
244 | Args:
245 | `code` (str): raw code that needs to be processed
246 | `symbol_table` (Symbol_Table): symbol table for storing variables and constants
247 | """
248 | self.pnt = 0
249 | self.code = code.replace("\n", "").replace("\r", "").strip(" ") # erase line split
250 | self.code = self.code + "\0" # add '\0' at the end for convenience
251 | self.length = len(self.code)
252 | self.symbol_table = symbol_table
253 | self.state_output = []
254 | self.token_output = []
255 |
256 | def print_states(self) -> None:
257 | output_table = Table(
258 | show_header=True,
259 | header_style="bold",
260 | )
261 | output_table.add_column("Pointer", justify="center")
262 | output_table.add_column("Current Character", justify="center")
263 | output_table.add_column("State Transfer", justify="left")
264 |
265 | for row in self.state_output:
266 | output_table.add_row(*row)
267 |
268 | console.print("Scanner States:", style="bold")
269 | console.print(output_table)
270 |
271 | def print_tokens(self) -> None:
272 | output_table = Table(
273 | show_header=True,
274 | header_style="bold",
275 | )
276 | output_table.add_column("Type", justify="center")
277 | output_table.add_column("Content", justify="center")
278 |
279 | for row in self.token_output:
280 | output_table.add_row(*row)
281 |
282 | console.print("Tokens:", style="bold")
283 | console.print(output_table)
284 |
285 | def save(self) -> None:
286 | with open("output/scanner_states.csv", "w") as f:
287 | writer = csv.writer(f)
288 | writer.writerow(["Pointer", "Current Character", "State Transfer"])
289 | for row in self.state_output:
290 | writer.writerow(row)
291 | with open("output/token_table.csv", "w") as f:
292 | writer = csv.writer(f)
293 | writer.writerow(["Type", "Content"])
294 | for row in self.token_output:
295 | writer.writerow(row)
296 |
297 | def has_next(self) -> bool:
298 | """check whether the scanner has next token to output
299 |
300 | Returns:
301 | `bool`: `True` if the scanner have next token
302 | """
303 | return self.pnt < self.length - 1
304 |
305 | def get_next(self) -> Token:
306 | """get next token
307 |
308 | Args:
309 | `output` (bool, optional): Set to `True` to show scan process. Defaults to `True`.
310 |
311 | Returns:
312 | `Token`: The next token
313 | """
314 | current_state: Scanner_State = Scanner_State.START
315 | content: str = ""
316 | result: Token = Token()
317 |
318 | while True:
319 | cur: str = self.code[self.pnt]
320 |
321 | transition = SCANNER_TRANSITION[current_state]
322 | next_state: Scanner_State = Scanner_State.ERROR # default is error
323 |
324 | # find next state
325 | for pattern, to_state in transition:
326 | if cur in pattern:
327 | if to_state == Scanner_State.END:
328 | result.token_type = STATE_TO_TOKEN[current_state]
329 |
330 | if result.token_type in [Token_Type.ID, Token_Type.CONST]:
331 | # for identifier or constant, the content is the entry(index) in symbol table
332 | entry: int = self.symbol_table.find_item_by_name(content)
333 | if entry == -1:
334 | # cannot find, create a new row in symbol table
335 | new_item = Table_Item()
336 | new_item.name = content
337 | new_item.variable = result.token_type == Token_Type.ID
338 | entry = self.symbol_table.add_item(new_item)
339 |
340 | result.content = entry
341 | elif result.token_type in [Token_Type.ALOP, Token_Type.RELOP]:
342 | # arithmetic operator (+, -, *, /) or relation operator (<, >, <=, >=, ==, !=)
343 | result.content = content
344 | else:
345 | result.content = None
346 |
347 | self.token_output.append(
348 | [result.token_type.name, "" if result.content is None else str(result.content)]
349 | )
350 | return result
351 |
352 | next_state = to_state
353 | break
354 |
355 | self.state_output.append([str(self.pnt), cur, f"{current_state.name} -> {next_state.name}"])
356 |
357 | if next_state == Scanner_State.ERROR:
358 | self.print_tokens()
359 | self.print_states()
360 | console.print("ERROR WHEN GETTING NEXT TOKEN!", style="bold red")
361 | exit(-1)
362 |
363 | # step to next state
364 | current_state = next_state
365 | if cur != " ":
366 | content += cur
367 | self.pnt += 1
368 |
--------------------------------------------------------------------------------
/Symbol_Table.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from enum import Enum, auto
3 | from typing import List, Union
4 |
5 | from rich.console import Console
6 | from rich.table import Table
7 |
8 | console = Console()
9 |
10 |
11 | class Table_Item_Type(Enum):
12 | INT = auto()
13 | FLOAT = auto()
14 |
15 |
16 | item_type_translate = {"int": Table_Item_Type.INT, "float": Table_Item_Type.FLOAT}
17 |
18 |
19 | class Table_Item:
20 | name: Union[str, None]
21 | variable: Union[bool, None] # True for variable, False for constant
22 | item_type: Union[Table_Item_Type, None]
23 |
24 | def __init__(self) -> None:
25 | self.name = None
26 | self.variable = None
27 | self.item_type = None
28 |
29 | def __str__(self) -> str:
30 | return f"{self.name}, {'var' if self.variable else 'const'}, {'' if self.item_type is None else self.item_type.name}"
31 |
32 |
33 | class Symbol_Table:
34 | size: int
35 | table: List[Table_Item]
36 |
37 | def __init__(self) -> None:
38 | self.size = 0
39 | self.table = list()
40 |
41 | def output(self) -> None:
42 | output_table = Table(
43 | show_header=True,
44 | header_style="bold",
45 | )
46 | output_table.add_column("Name", justify="center")
47 | output_table.add_column("Var/Const", justify="center")
48 | output_table.add_column("Type", justify="center")
49 |
50 | for item in self.table:
51 | output_table.add_row(
52 | item.name, "Var" if item.variable else "Const", "" if item.item_type is None else item.item_type.name
53 | )
54 |
55 | console.print("Symbol Table:", style="bold")
56 | console.print(output_table)
57 |
58 | def save(self) -> None:
59 | with open("output/symbol_table.csv", "w") as f:
60 | writer = csv.writer(f)
61 | writer.writerow(["Name", "Var/Const", "Type"])
62 | for item in self.table:
63 | writer.writerow(
64 | [
65 | item.name,
66 | "Var" if item.variable else "Const",
67 | "" if item.item_type is None else item.item_type.name,
68 | ]
69 | )
70 |
71 | def get_size(self) -> int:
72 | return self.size
73 |
74 | def find_item_by_name(self, name: str) -> int:
75 | for idx, item in enumerate(self.table):
76 | if item.name == name:
77 | return idx
78 |
79 | return -1 # cannot find
80 |
81 | def add_item(self, item: Table_Item) -> int:
82 | self.size += 1
83 | self.table.append(item)
84 | return self.size - 1
85 |
--------------------------------------------------------------------------------
/Token.py:
--------------------------------------------------------------------------------
1 | from enum import Enum, auto
2 | from typing import Union
3 |
4 |
5 | class Token_Type(Enum):
6 | ID = auto() # identifier
7 | CONST = auto() # constant (number)
8 | ASSIGN = auto() # assign symbol '='
9 | ALOP = auto() # arithmetic operator (+, -, *, /)
10 | RELOP = auto() # relation operator (<, >, <=, >=, ==, !=)
11 | LBRACKET = auto() # left bracket '('
12 | RBRACKET = auto() # right bracket ')'
13 | SEMICOLON = auto() # semicolon ';'
14 | IF = auto() # if
15 | ELSE = auto() # else
16 | WHILE = auto() # while
17 | INT = auto() # int
18 | FLOAT = auto() # float
19 |
20 |
21 | class Token:
22 | token_type: Union[Token_Type, None]
23 | content: Union[str, int, None] # str for name, int for entry
24 |
25 | def __init__(self) -> None:
26 | self.token_type = None # type of the token
27 | self.content = None # detail content (like identifier name or operator type)
28 |
29 | def to_string(self) -> str:
30 | if self.token_type in [Token_Type.ALOP, Token_Type.RELOP]:
31 | return str(self.content)
32 | elif self.token_type == Token_Type.ASSIGN:
33 | return "="
34 | elif self.token_type == Token_Type.LBRACKET:
35 | return "("
36 | elif self.token_type == Token_Type.RBRACKET:
37 | return ")"
38 | elif self.token_type == Token_Type.SEMICOLON:
39 | return ";"
40 | return self.token_type.name.lower()
41 |
42 | def __str__(self) -> str:
43 | return f"{self.token_type.name}, {'' if self.content is None else self.content}"
44 |
--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 | isort .
3 | black . -l 120
--------------------------------------------------------------------------------
/input/grammar.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $
2 | VariableSymbols: P' P D S L C E T F M N Q
3 |
4 | @ P' → P
5 | @ P → M D S
6 | self.back_patch(attributes[-1]["nextlist"], attributes[-3]["instr"])
7 |
8 | @ M → ε
9 | offset = 0
10 | temp_index = 0
11 | current_attribute["instr"] = self.current_line
12 |
13 | @ D → L id ; N D
14 | @ N → ε
15 | idx = attributes[-2]["entry"]
16 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]]
17 | width = attributes[-3]["width"]
18 | self.gen_code(f"Alloc [{offset},{offset+width}] for {self.symbol_table.table[idx].name}")
19 | offset += width
20 |
21 | @ D → ε
22 | @ L → int
23 | current_attribute["type"] = 'int'
24 | current_attribute["width"] = 4
25 |
26 | @ L → float
27 | current_attribute["type"] = 'float'
28 | current_attribute["width"] = 4
29 |
30 | @ S → S ; Q S
31 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"])
32 | current_attribute["nextlist"] = attributes[-1]["nextlist"]
33 |
34 | @ Q → ε
35 | current_attribute["instr"] = self.current_line
36 |
37 | @ S → ε
38 | current_attribute["nextlist"] = []
39 |
40 | @ S → id = E
41 | entry0 = attributes[-3]["entry"]
42 | name0 = self.symbol_table.table[entry0].name
43 | entry1 = attributes[-1]["entry"]
44 | name1 = self.symbol_table.table[entry1].name
45 | self.gen_code(f"{name0} = {name1}")
46 |
47 | current_attribute["nextlist"] = [self.current_line]
48 |
49 | @ S → if ( C ) Q S
50 | truelist = attributes[-4]["truelist"]
51 | falselist = attributes[-4]["falselist"]
52 | instr = attributes[-2]["instr"]
53 | self.back_patch(truelist, instr)
54 | nextlist_s1 = attributes[-1]["nextlist"]
55 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1)
56 |
57 | @ S → while Q ( C ) Q S
58 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"])
59 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"])
60 | current_attribute["nextlist"] = attributes[-4]["falselist"]
61 | idx = attributes[-6]["instr"]
62 | self.gen_code(f"goto {idx}")
63 |
64 | @ C → E > E
65 | current_attribute["truelist"] = self.make_list(self.current_line)
66 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
67 |
68 | idx1 = attributes[-3]["entry"]
69 | idx2 = attributes[-1]["entry"]
70 | name1 = self.symbol_table.table[idx1].name
71 | name2 = self.symbol_table.table[idx2].name
72 |
73 | self.gen_code(f"if {name1} > {name2} goto ")
74 | self.gen_code(f"goto ")
75 |
76 | @ C → E < E
77 | current_attribute["truelist"] = self.make_list(self.current_line)
78 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
79 |
80 | idx1 = attributes[-3]["entry"]
81 | idx2 = attributes[-1]["entry"]
82 | name1 = self.symbol_table.table[idx1].name
83 | name2 = self.symbol_table.table[idx2].name
84 |
85 | self.gen_code(f"if {name1} < {name2} goto ")
86 | self.gen_code(f"goto ")
87 |
88 |
89 | @ C → E == E
90 | current_attribute["truelist"] = self.make_list(self.current_line)
91 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
92 |
93 | idx1 = attributes[-3]["entry"]
94 | idx2 = attributes[-1]["entry"]
95 | name1 = self.symbol_table.table[idx1].name
96 | name2 = self.symbol_table.table[idx2].name
97 |
98 | self.gen_code(f"if {name1} == {name2} goto ")
99 | self.gen_code(f"goto ")
100 |
101 |
102 | @ E → E + T
103 | name0 = f"temp{temp_index}"
104 | entry = self.gen_variable(name0)
105 | temp_index += 1
106 |
107 | entry1 = attributes[-3]["entry"]
108 | name1 = self.symbol_table.table[entry1].name
109 | entry2 = attributes[-1]["entry"]
110 | name2 = self.symbol_table.table[entry2].name
111 |
112 | current_attribute["entry"] = entry
113 | self.gen_code(f"{name0} = {name1} + {name2}")
114 |
115 | @ E → E - T
116 | name0 = f"temp{temp_index}"
117 | entry = self.gen_variable(name0)
118 | temp_index += 1
119 |
120 | entry1 = attributes[-3]["entry"]
121 | name1 = self.symbol_table.table[entry1].name
122 | entry2 = attributes[-1]["entry"]
123 | name2 = self.symbol_table.table[entry2].name
124 |
125 | current_attribute["entry"] = entry
126 | self.gen_code(f"{name0} = {name1} - {name2}")
127 |
128 | @ E → T
129 | current_attribute["entry"] = attributes[-1]["entry"]
130 |
131 | @ T → F
132 | current_attribute["entry"] = attributes[-1]["entry"]
133 |
134 | @ T → T * F
135 | name0 = f"temp{temp_index}"
136 | entry = self.gen_variable(name0)
137 | temp_index += 1
138 |
139 | entry1 = attributes[-3]["entry"]
140 | name1 = self.symbol_table.table[entry1].name
141 | entry2 = attributes[-1]["entry"]
142 | name2 = self.symbol_table.table[entry2].name
143 |
144 | current_attribute["entry"] = entry
145 | self.gen_code(f"{name0} = {name1} * {name2}")
146 |
147 | @ T → T / F
148 | name0 = f"temp{temp_index}"
149 | entry = self.gen_variable(name0)
150 | temp_index += 1
151 |
152 | entry1 = attributes[-3]["entry"]
153 | name1 = self.symbol_table.table[entry1].name
154 | entry2 = attributes[-1]["entry"]
155 | name2 = self.symbol_table.table[entry2].name
156 |
157 | current_attribute["entry"] = entry
158 | self.gen_code(f"{name0} = {name1} / {name2}")
159 |
160 | @ F → ( E )
161 | current_attribute["entry"] = attributes[-2]["entry"]
162 |
163 | @ F → id
164 | current_attribute["entry"] = attributes[-1]["entry"]
165 |
166 | @ F → const
167 | current_attribute["entry"] = attributes[-1]["entry"]
--------------------------------------------------------------------------------
/input/grammar_assign.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id const = + - * / ( ) ; $
2 | VariableSymbols: P' P M S E T F
3 |
4 | @ P' → P
5 |
6 | @ P → M S
7 |
8 | @ M → ε
9 | offset = 0
10 | temp_index = 0
11 |
12 | @ S → S ; S
13 |
14 | @ S → ε
15 |
16 | @ S → id = E
17 | entry0 = attributes[-3]["entry"]
18 | name0 = self.symbol_table.table[entry0].name
19 | entry1 = attributes[-1]["entry"]
20 | name1 = self.symbol_table.table[entry1].name
21 | self.gen_code(f"{name0} = {name1}")
22 |
23 | @ E → E + T
24 | name0 = f"temp{temp_index}"
25 | entry = self.gen_variable(name0)
26 | temp_index += 1
27 |
28 | entry1 = attributes[-3]["entry"]
29 | name1 = self.symbol_table.table[entry1].name
30 | entry2 = attributes[-1]["entry"]
31 | name2 = self.symbol_table.table[entry2].name
32 |
33 | current_attribute["entry"] = entry
34 | self.gen_code(f"{name0} = {name1} + {name2}")
35 |
36 | @ E → E - T
37 | name0 = f"temp{temp_index}"
38 | entry = self.gen_variable(name0)
39 | temp_index += 1
40 |
41 | entry1 = attributes[-3]["entry"]
42 | name1 = self.symbol_table.table[entry1].name
43 | entry2 = attributes[-1]["entry"]
44 | name2 = self.symbol_table.table[entry2].name
45 |
46 | current_attribute["entry"] = entry
47 | self.gen_code(f"{name0} = {name1} - {name2}")
48 |
49 | @ E → T
50 | current_attribute["entry"] = attributes[-1]["entry"]
51 |
52 | @ T → F
53 | current_attribute["entry"] = attributes[-1]["entry"]
54 |
55 | @ T → T * F
56 | name0 = f"temp{temp_index}"
57 | entry = self.gen_variable(name0)
58 | temp_index += 1
59 |
60 | entry1 = attributes[-3]["entry"]
61 | name1 = self.symbol_table.table[entry1].name
62 | entry2 = attributes[-1]["entry"]
63 | name2 = self.symbol_table.table[entry2].name
64 |
65 | current_attribute["entry"] = entry
66 | self.gen_code(f"{name0} = {name1} * {name2}")
67 |
68 | @ T → T / F
69 | name0 = f"temp{temp_index}"
70 | entry = self.gen_variable(name0)
71 | temp_index += 1
72 |
73 | entry1 = attributes[-3]["entry"]
74 | name1 = self.symbol_table.table[entry1].name
75 | entry2 = attributes[-1]["entry"]
76 | name2 = self.symbol_table.table[entry2].name
77 |
78 | current_attribute["entry"] = entry
79 | self.gen_code(f"{name0} = {name1} / {name2}")
80 |
81 | @ F → ( E )
82 | current_attribute["entry"] = attributes[-2]["entry"]
83 |
84 | @ F → id
85 | current_attribute["entry"] = attributes[-1]["entry"]
86 |
87 | @ F → const
88 | current_attribute["entry"] = attributes[-1]["entry"]
--------------------------------------------------------------------------------
/input/grammar_control.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id ; if while ( ) > = $
2 | VariableSymbols: P' P M S C Q E
3 |
4 | @ P' → P
5 | @ P → M S
6 | self.back_patch(attributes[-1]["nextlist"], attributes[-2]["instr"])
7 | @ M → ε
8 | offset = 0
9 | current_attribute["instr"] = self.current_line
10 |
11 |
12 | @ S → S ; Q S
13 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"])
14 | current_attribute["nextlist"] = attributes[-1]["nextlist"]
15 |
16 | @ S → ε
17 | current_attribute["nextlist"] = []
18 |
19 | @ S → E
20 | idx = attributes[-1]["entry"]
21 | name = self.symbol_table.table[idx].name
22 | self.gen_code(f"appearance of {name}")
23 | current_attribute["nextlist"] = [self.current_line]
24 |
25 | @ S → if ( C ) Q S
26 | truelist = attributes[-4]["truelist"]
27 | falselist = attributes[-4]["falselist"]
28 | instr = attributes[-2]["instr"]
29 | self.back_patch(truelist, instr)
30 | nextlist_s1 = attributes[-1]["nextlist"]
31 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1)
32 |
33 | @ S → while Q ( C ) Q S
34 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"])
35 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"])
36 | current_attribute["nextlist"] = attributes[-4]["falselist"]
37 | idx = attributes[-6]["instr"]
38 | self.gen_code(f"goto {idx}")
39 |
40 |
41 | @ Q → ε
42 | current_attribute["instr"] = self.current_line
43 |
44 | @ C → E > E
45 | current_attribute["truelist"] = self.make_list(self.current_line)
46 | current_attribute["falselist"] = self.make_list(self.current_line + 1)
47 |
48 | idx1 = attributes[-3]["entry"]
49 | idx2 = attributes[-1]["entry"]
50 | name1 = self.symbol_table.table[idx1].name
51 | name2 = self.symbol_table.table[idx2].name
52 |
53 | self.gen_code(f"if {name1} > {name2} goto ")
54 | self.gen_code(f"goto ")
55 |
56 |
57 | @ E → id
58 | attributes[-1]["value"] = 1
59 | current_attribute["value"] = attributes[-1]["value"]
60 | current_attribute["entry"] = attributes[-1]["entry"]
61 |
62 |
--------------------------------------------------------------------------------
/input/grammar_define.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id int float ; $
2 | VariableSymbols: P' P M D N S L
3 |
4 | @ P' → P
5 | @ P → M D
6 | @ M → ε
7 | offset = 0
8 |
9 | @ D → L id ; N D
10 | @ N → ε
11 | idx = attributes[-2]["entry"]
12 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]]
13 | self.gen_code(f"Alloc [{offset},{offset+4}] for {self.symbol_table.table[idx].name}")
14 | offset += 4
15 |
16 | @ D → ε
17 | @ L → int
18 | current_attribute["type"] = 'int'
19 |
20 | @ L → float
21 | current_attribute["type"] = 'float'
--------------------------------------------------------------------------------
/input/grammar_expression.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id + * ( ) $
2 | VariableSymbols: S E F T
3 |
4 | @ S → E
5 | @ E → E + T
6 | @ E → T
7 | @ T → T * F
8 | @ T → F
9 | @ F → ( E )
10 | @ F → id
--------------------------------------------------------------------------------
/input/grammar_raw.txt:
--------------------------------------------------------------------------------
1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $
2 | VariableSymbols: P' P D S L C E T F
3 |
4 | @ P' → P
5 | @ P → D S
6 | @ D → L id ; D
7 | @ D → ε
8 | @ L → int
9 | @ L → float
10 | @ S → S ; S
11 | @ S → ε
12 | @ S → id = E
13 | @ S → if ( C ) S
14 | @ S → if ( C ) S else S
15 | @ S → while ( C ) S
16 | @ C → E > E
17 | @ C → E < E
18 | @ C → E == E
19 | @ E → E + T
20 | @ E → E - T
21 | @ E → T
22 | @ T → F
23 | @ T → T * F
24 | @ T → T / F
25 | @ F → ( E )
26 | @ F → id
27 | @ F → const
--------------------------------------------------------------------------------
/input/input.txt:
--------------------------------------------------------------------------------
1 | int a;
2 | int b;
3 | float c;
4 |
5 | a = 2;
6 | b = 1;
7 | c = (a + b) / 2;
8 |
9 | if (a > (b + c))
10 | if (c < 12)
11 | c = (a + b) * c - a / 2;
12 |
13 | while (a < b)
14 | a = b + c - 1;
15 |
16 | b = a;
17 |
--------------------------------------------------------------------------------
/input/input_assign.txt:
--------------------------------------------------------------------------------
1 | a = b + 3;
2 | c = d - a;
3 | x = (a + b) * c - d / 2;
--------------------------------------------------------------------------------
/input/input_control.txt:
--------------------------------------------------------------------------------
1 | if (a > b)
2 | a;
3 | while (a > b)
4 | b;
5 | c;
6 |
--------------------------------------------------------------------------------
/input/input_define.txt:
--------------------------------------------------------------------------------
1 | int abc;
2 | float def;
--------------------------------------------------------------------------------
/input/input_expression.txt:
--------------------------------------------------------------------------------
1 | (a*b)+c
--------------------------------------------------------------------------------
/input/input_raw.txt:
--------------------------------------------------------------------------------
1 | int a;
2 | int b;
3 | float c;
4 | a=2;
5 | b=1;
6 | if(a>(b+c))
7 | c=(a+b)/2;
8 | else
9 | c=a-b;
10 | while(c=b)
7 | c=a+b;
8 | else
9 | c=a-b;
10 | while(c ")
60 | print("")
61 |
62 | if input_string == "0":
63 | # Grammar
64 | console.print(f"Grammar:", style="bold")
65 | console.print(f"Terminal Symbols: {grammar.terminal_symbols}")
66 | console.print(f"Variable Symbols: {grammar.variable_symbols}")
67 |
68 | for production in grammar.production_list:
69 | console.print(f"{production.from_state} →", end="")
70 |
71 | for item in production.items:
72 | if item.is_symbol:
73 | console.print(f" {item.value}", style="bold red", end="")
74 | else:
75 | console.print(f" {item.value}", end="")
76 |
77 | console.print("")
78 | elif input_string == "1":
79 | # Input Code
80 | console.print(f"Input Code:", style="bold")
81 | print(code)
82 | elif input_string == "2":
83 | # Scanner States
84 | scanner.print_states()
85 | elif input_string == "3":
86 | # SLR States
87 | slr_automata.print_state()
88 | elif input_string == "4":
89 | # Token Table
90 | scanner.print_tokens()
91 | elif input_string == "5":
92 | # Symbol Table
93 | symbol_table.output()
94 | elif input_string == "6":
95 | # First Set
96 | slr_table.print_first_set()
97 | elif input_string == "7":
98 | # Follow Set
99 | slr_table.print_follow_set()
100 | elif input_string == "8":
101 | # Closure Set
102 | slr_table.print_closure_set()
103 | elif input_string == "9":
104 | # SLR Table (Action/Goto Table)
105 | print_slr_table(grammar)
106 | elif input_string == "10":
107 | # Output Code
108 | slr_automata.print_code()
109 | elif input_string == "q":
110 | # quit
111 | exit(0)
112 | else:
113 | console.print(f"Unknown input {input_string}!")
114 |
--------------------------------------------------------------------------------
/test_grammar.py:
--------------------------------------------------------------------------------
1 | from rich import console
2 | from rich.console import Console
3 |
4 | from Grammar import Grammar
5 |
6 | console = Console()
7 |
8 | grammar = Grammar()
9 | grammar.read("input/grammar.txt")
10 | grammar.save()
11 |
12 | console.print(f"Terminal Symbols: {grammar.terminal_symbols}")
13 | console.print(f"Variable Symbols: {grammar.variable_symbols}")
14 |
15 | for production in grammar.production_list:
16 | console.print(f"{production.from_state} →", end="")
17 |
18 | for item in production.items:
19 | if item.is_symbol:
20 | console.print(f" {item.value}", style="bold red", end="")
21 | else:
22 | console.print(f" {item.value}", end="")
23 |
24 | console.print("")
25 | if production.code != "":
26 | console.print(production.code, end="\n\n")
27 |
--------------------------------------------------------------------------------
/test_scanner.py:
--------------------------------------------------------------------------------
1 | from rich import box
2 | from rich.console import Console
3 | from rich.table import Table
4 |
5 | from Scanner import Scanner
6 | from Symbol_Table import Symbol_Table
7 | from Token import Token
8 |
9 | if __name__ == "__main__":
10 | # init output
11 | console = Console()
12 | symbol_table = Symbol_Table()
13 |
14 | # run scanner
15 | with open("input/input.txt", "r") as f:
16 | scanner = Scanner(f.read(), symbol_table)
17 |
18 | while scanner.has_next():
19 | token: Token = scanner.get_next()
20 |
21 | # print results
22 | scanner.print_states()
23 | scanner.print_tokens()
24 |
25 | symbol_table.save()
26 | scanner.save()
27 |
--------------------------------------------------------------------------------