├── .gitignore ├── Grammar.py ├── LICENSE ├── README.md ├── SLR_Automata.py ├── SLR_Table.py ├── Scanner.py ├── Symbol_Table.py ├── Token.py ├── format.sh ├── input ├── grammar.txt ├── grammar_assign.txt ├── grammar_control.txt ├── grammar_define.txt ├── grammar_expression.txt ├── grammar_raw.txt ├── input.txt ├── input_assign.txt ├── input_control.txt ├── input_define.txt ├── input_expression.txt ├── input_raw.txt └── input_scanner.txt ├── main.py ├── test_grammar.py └── test_scanner.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | **/__pycache__ 3 | **/.vscode 4 | **/.idea 5 | Pipfile 6 | Pipfile.lock 7 | *_test.txt 8 | action_table.json 9 | goto_table.json 10 | /output/* -------------------------------------------------------------------------------- /Grammar.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from rich.console import Console 4 | 5 | console = Console() 6 | 7 | 8 | class Grammar_Item: 9 | is_symbol: bool # True for terminal symbol, False for Variable 10 | value: str 11 | 12 | def __init__(self, is_symbol: bool, value: str) -> None: 13 | self.is_symbol = is_symbol 14 | self.value = value 15 | 16 | 17 | class Grammar_Production: 18 | from_state: str 19 | items: List[Grammar_Item] 20 | code: str 21 | 22 | def __init__(self, from_state: str) -> None: 23 | self.from_state = from_state 24 | self.items = list() 25 | self.code = "" 26 | 27 | def add(self, is_symbol: bool, value: str) -> None: 28 | self.items.append(Grammar_Item(is_symbol, value)) 29 | 30 | def __str__(self) -> str: 31 | return f"{self.from_state} → " + " ".join([item.value for item in self.items]) 32 | 33 | 34 | class Grammar: 35 | start_symbol: str 36 | terminal_symbols: List[str] 37 | variable_symbols: List[str] 38 | production_list: List[Grammar_Production] 39 | 40 | def __init__(self) -> None: 41 | self.production_list = list() 42 | 43 | def save(self) -> None: 44 | with open("output/grammar.txt", "w") as f: 45 | f.write(f"Start Symbol: {self.start_symbol}\n") 46 | f.write(f"Terminal Symbols: {' '.join(self.terminal_symbols)}\n") 47 | f.write(f"Variable Symbols: {' '.join(self.variable_symbols)}\n") 48 | f.write("Productions:\n") 49 | for production in self.production_list: 50 | f.write(f"{production}\n") 51 | 52 | def read(self, path: str) -> None: 53 | with open(path, "r") as f: 54 | blocks = f.read().split("\n@ ") 55 | 56 | symbol_lines: List[str] = blocks[0].split("\n") 57 | blocks: List[str] = blocks[1:] 58 | 59 | self.terminal_symbols = symbol_lines[0].split(" ")[1:] 60 | self.variable_symbols = symbol_lines[1].split(" ")[1:] 61 | self.start_symbol = self.variable_symbols[0] 62 | 63 | for block in blocks: 64 | lines = block.split("\n") 65 | production_line = lines[0] 66 | code_lines = lines[1:] if len(lines) > 1 else [] 67 | from_state, production = production_line.split(" → ") 68 | 69 | current_grammar_production = Grammar_Production(from_state) 70 | current_grammar_production.code = "\n".join(code_lines) 71 | items = production.split(" ") 72 | 73 | for item in items: 74 | if item in self.terminal_symbols + ["ε"]: 75 | current_grammar_production.add(True, item) 76 | elif item in self.variable_symbols: 77 | current_grammar_production.add(False, item) 78 | else: 79 | console.print(f"Unknown symbol '{item}' in grammar file", style="bold red") 80 | exit(-1) 81 | 82 | self.production_list.append(current_grammar_production) 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 cometeme, M010K 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compilers 编译原理 - 简单类 C 编译器 2 | 3 | 本项目实现了一个简单的类 C 编译器,能够分析简单类 C 语言风格的程序代码。如声明语句、赋值语句、表达式、if while 控制语句等,进行语法分析并生成相应的中间代码(三地址代码)。 4 | 5 | 提供了一个命令行交互程序,可以输出词法分析、语法分析、语义分析及中间代码生成过程中的各种表格以及数据集合。 6 | 7 | 输入文法文件可以根据需要进行修改,同时也可以添加自定义的语义动作,从而能够让程序分析不同的语言。 8 | 9 | ## 运行说明 10 | 11 | 运行说明(**需要确保 python 版本为 3.7**): 12 | 13 | 1. 进入项目文件夹 14 | 15 | (初次使用需要创建一个空的 output 目录) 16 | 17 | 2. 安装 rich 库(若没有安装) 18 | 19 | ```shell 20 | pip install rich 21 | ``` 22 | 23 | 3. 运行`main.py`程序 24 | 25 | ```shell 26 | python main.py 27 | ``` 28 | 29 | ### 操作说明 30 | 31 | 运行`main.py`后,命令行中会生成引导菜单(如下所示): 32 | 33 | ```shell 34 | --------------------------------------------------- 35 | Enter a number to show detail, or enter 'q' to quit 36 | 37 | 0 - Grammar 38 | 1 - Input Code 39 | 2 - Scanner States 40 | 3 - SLR States 41 | 4 - Token Table 42 | 5 - Symbol Table 43 | 6 - First Set 44 | 7 - Follow Set 45 | 8 - Closure Set 46 | 9 - SLR Table (Action/Goto Table) 47 | 10 - Output Code 48 | --------------------------------------------------- 49 | ``` 50 | 51 | 下面对各个选项进行说明: 52 | 53 | | 选项 | 功能 | 54 | | :--: | :------------------------------------------------: | 55 | | 0 | 输出给定的文法 | 56 | | 1 | 输出给定的程序输入 | 57 | | 2 | 输出词法分析的结果 | 58 | | 3 | 输出SLR语法分析过程(包含分析栈以及移入/归约动作) | 59 | | 4 | 输出 Token 串表 | 60 | | 5 | 输出符号表 | 61 | | 6 | 输出 First 集合 | 62 | | 7 | 输出 Follow 集合 | 63 | | 8 | 输出项集族 | 64 | | 9 | 输出 SLR 分析表(包括 action 和 goto 表) | 65 | | 10 | 输出生成的中间代码 | 66 | | q | 退出程序 | 67 | 68 | 69 | ## 工程文件说明 70 | 71 | 项目整体目录结构如下: 72 | 73 | ```shell 74 | . 75 | ├── Grammar.py 76 | ├── SLR_Automata.py 77 | ├── SLR_Table.py 78 | ├── Scanner.py 79 | ├── Symbol_Table.py 80 | ├── Token.py 81 | ├── action_table.json 82 | ├── format.sh 83 | ├── goto_table.json 84 | ├── input 85 | │   ├── grammar.txt 86 | │   ├── grammar_assign.txt 87 | │   ├── grammar_control.txt 88 | │   ├── grammar_define.txt 89 | │   ├── grammar_expression.txt 90 | │   ├── grammar_raw.txt 91 | │   ├── input.txt 92 | │   ├── input_assign.txt 93 | │   ├── input_control.txt 94 | │   ├── input_define.txt 95 | │   ├── input_expression.txt 96 | │   ├── input_raw.txt 97 | │   └── input_scanner.txt 98 | ├── main.py 99 | ├── output 100 | │   ├── closure_set.txt 101 | │   ├── code.csv 102 | │   ├── first_set.txt 103 | │   ├── follow_set.txt 104 | │   ├── grammar.txt 105 | │   ├── scanner_states.csv 106 | │   ├── slr_states.csv 107 | │   ├── slr_table.csv 108 | │   ├── symbol_table.csv 109 | │   └── token_table.csv 110 | ├── test_grammar.py 111 | └── test_scanner.py 112 | ``` 113 | 114 | ### 语法文件 115 | 116 | | 文件/文件夹 | 说明 | 117 | | :-----------: | :------------------------------------------------: | 118 | | input 文件夹 | 程序输入(文法、待分析的程序) | 119 | | output 文件夹 | 词法、语法、中间代码生成时产生的所有集合以及表结构 | 120 | 121 | 122 | 123 | ### 词法分析相关 124 | 125 | | 文件/文件夹 | 说明 | 126 | | :-------------: | :--------------: | 127 | | Scanner.py | 词法分析器的实现 | 128 | | test_scanner.py | 词法分析器测试 | 129 | | Token.py | Token 相关 | 130 | | Symbol_Table.py | 符号表相关 | 131 | 132 | ### 语法分析/中间代码生成相关 133 | 134 | | 文件/文件夹 | 说明 | 135 | | :-------------: | :------------------------------------------------: | 136 | | SLR_Table.py | SLR 语法分析表以及辅助函数生成 | 137 | | SLR_Automata.py | SLR 语法分析的实现 + 中间代码生成部分语义动作的实现 | 138 | | Grammar.py | 语法分析总控程序 | 139 | | test_grammar.py | 语法分析器测试 | 140 | 141 | ### 主控函数 142 | 143 | | 文件/文件夹 | 说明 | 144 | | :---------: | :----------------: | 145 | | main.py | 程序入口与控制逻辑 | 146 | -------------------------------------------------------------------------------- /SLR_Automata.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | from typing import Dict, List, Union 4 | 5 | from rich.console import Console 6 | from rich.table import Table 7 | 8 | from Grammar import Grammar, Grammar_Production 9 | from Scanner import Scanner 10 | from Symbol_Table import Symbol_Table, Table_Item, item_type_translate 11 | from Token import Token, Token_Type 12 | 13 | console = Console() 14 | 15 | 16 | class SLR_Automata: 17 | scanner: Scanner 18 | symbol_table: Symbol_Table 19 | grammar: Grammar 20 | action_table: List[Dict[str, str]] = list() 21 | goto_table: List[Dict[str, int]] = list() 22 | state_output: List[List[str]] 23 | code_output: List[List[str]] 24 | current_line: int 25 | 26 | def __init__(self, scanner: Scanner, grammar: Grammar) -> None: 27 | self.scanner = scanner 28 | self.symbol_table = scanner.symbol_table 29 | self.grammar = grammar 30 | 31 | with open("action_table.json", "r") as f: 32 | self.action_table = json.loads(f.read()) 33 | with open("goto_table.json", "r") as f: 34 | self.goto_table = json.loads(f.read()) 35 | 36 | # init state output 37 | self.state_output = [] 38 | self.code_output = [] 39 | 40 | def print_state(self) -> None: 41 | output_table = Table( 42 | show_header=True, 43 | header_style="bold", 44 | ) 45 | 46 | output_table.add_column("Token", justify="center") 47 | output_table.add_column("Stack", justify="left") 48 | output_table.add_column("Action", justify="center") 49 | output_table.add_column("Production", justify="left") 50 | 51 | for row in self.state_output: 52 | output_table.add_row(*row) 53 | 54 | console.print("SLR State:", style="bold") 55 | console.print(output_table) 56 | 57 | def print_code(self) -> None: 58 | output_table = Table( 59 | show_header=True, 60 | header_style="bold", 61 | ) 62 | 63 | output_table.add_column("Line", justify="center") 64 | output_table.add_column("Code", justify="left") 65 | 66 | for row in self.code_output: 67 | output_table.add_row(*row) 68 | 69 | console.print("Code:", style="bold") 70 | console.print(output_table) 71 | 72 | def save(self) -> None: 73 | with open("output/slr_states.csv", "w") as f: 74 | writer = csv.writer(f) 75 | writer.writerow(["Token", "Stack", "Action", "Production"]) 76 | for row in self.state_output: 77 | writer.writerow(row) 78 | with open("output/code.csv", "w") as f: 79 | writer = csv.writer(f) 80 | writer.writerow(["Line", "Code"]) 81 | for row in self.code_output: 82 | writer.writerow(row) 83 | 84 | def gen_code(self, code: str) -> None: 85 | self.code_output.append([str(self.current_line), code]) 86 | self.current_line += 1 87 | 88 | def gen_variable(self, name: str) -> int: 89 | item = Table_Item() 90 | item.name = name 91 | item.variable = True 92 | entry = self.symbol_table.add_item(item) 93 | return entry 94 | 95 | def make_list(self, inst: int) -> List: 96 | return [inst] 97 | 98 | def merge(self, l1: List, l2: List) -> List: 99 | 100 | l = list() 101 | l.extend(l1) 102 | 103 | for inst in l2: 104 | if inst not in l: 105 | l.append(inst) 106 | 107 | return l 108 | 109 | def back_patch(self, l: List, target: int) -> None: 110 | for inst in l: 111 | # back patch all blank field 112 | if inst < len(self.code_output) - 1 and not self.code_output[inst][1][-1].isdigit(): 113 | if len(self.code_output[inst][1]) >= 5 and self.code_output[inst][1][-5:-1] == "goto": 114 | self.code_output[inst][1] += str(target) 115 | 116 | def run(self, debug: bool = True) -> None: 117 | stack: List[int] = [0] 118 | attributes: List[Dict[str, Union[str, int]]] = [dict()] 119 | token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None 120 | token_string: str = "$" if token is None else token.to_string() 121 | self.current_line = 0 122 | 123 | # run automata 124 | while True: 125 | assert len(stack) == len(attributes) 126 | 127 | if token_string not in self.action_table[stack[-1]]: 128 | self.print_state() 129 | console.print(f"Current token_string: {token_string}") 130 | console.print(f"Current stack: {stack}") 131 | console.print(f"Action Table [{stack[-1]}]: {self.action_table[stack[-1]]}") 132 | console.print("SLR ERROR", style="bold red") 133 | exit(-1) 134 | 135 | action: str = self.action_table[stack[-1]][token_string] 136 | 137 | if debug: 138 | console.print(f"\ntoken: {token_string}") 139 | console.print(f"stack: {stack}") 140 | console.print(f"attributes: {attributes}") 141 | console.print(f"action: {action}") 142 | 143 | if action == "acc": 144 | self.state_output.append([token_string, str(stack), action, ""]) 145 | break 146 | 147 | action_type: str = action[0] 148 | action_value: int = int(action[1:]) 149 | 150 | if action_type == "s": 151 | # shift in next state 152 | self.state_output.append([token_string, str(stack), action, ""]) 153 | 154 | stack.append(action_value) 155 | 156 | if token.token_type in [Token_Type.ID, Token_Type.CONST]: 157 | attributes.append({"entry": -1 if token.content is None else token.content}) 158 | else: 159 | attributes.append(dict()) 160 | 161 | token: Union[Token, None] = self.scanner.get_next() if self.scanner.has_next() else None 162 | token_string: str = "$" if token is None else token.to_string() 163 | elif action_type == "r": 164 | # reduced by production 165 | current_production: Grammar_Production = self.grammar.production_list[action_value] 166 | self.state_output.append([token_string, str(stack), action, str(current_production)]) 167 | 168 | if debug: 169 | console.print(f"production: {current_production}") 170 | print(f"code:\n{current_production.code}\n") 171 | 172 | length: int = len(current_production.items) 173 | current_attribute = dict() 174 | 175 | # run generation code 176 | try: 177 | exec(current_production.code) 178 | except Exception as e: 179 | console.print("Execute Generation Faild!", style="bold red") 180 | self.print_state() 181 | print(f"Production: {current_production}\n\n") 182 | print(f"code:\n\n{current_production.code}\n") 183 | exec(current_production.code) 184 | 185 | # solve for not A → ε 186 | if not (current_production.items[0].is_symbol and current_production.items[0].value == "ε"): 187 | stack = stack[:-length] 188 | attributes = attributes[:-length] 189 | 190 | reduce_state: str = current_production.from_state 191 | stack.append(self.goto_table[stack[-1]][reduce_state]) 192 | attributes.append(current_attribute) 193 | 194 | else: 195 | self.print_state() 196 | console.print(f"Unknown action type {action_type}!", style="bold red") 197 | exit(-1) 198 | -------------------------------------------------------------------------------- /SLR_Table.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | from copy import deepcopy 4 | from pprint import pformat 5 | from typing import Dict, List 6 | 7 | from rich.console import Console 8 | from rich.table import Table 9 | 10 | from Grammar import Grammar 11 | 12 | console = Console() 13 | 14 | 15 | class ItemSet: 16 | def __init__(self): 17 | self.index = 0 18 | self.closure_items = set() # closure of one item 19 | self.transfer = dict() # store transfer dict 20 | 21 | def get_index(self): 22 | return self.index 23 | 24 | def set_index(self, index: int): 25 | self.index = index 26 | 27 | def add_trans(self, symbol: str, next_index: int): 28 | self.transfer[symbol] = next_index 29 | 30 | def exists(self, item: tuple) -> bool: 31 | return item in self.closure_items 32 | 33 | def add(self, item: tuple) -> None: 34 | self.closure_items.add(item) 35 | 36 | def equal(self, other) -> bool: 37 | if len(self.closure_items) != len(other.closure_items): 38 | return False 39 | 40 | for item in self.closure_items: 41 | if item not in other.closure_items: 42 | return False 43 | 44 | return True 45 | 46 | 47 | class ClosureFamily: 48 | def __init__(self): 49 | self.clourse_set = list() 50 | 51 | def exists(self, item: ItemSet) -> bool: 52 | contain = False 53 | for itemset in self.clourse_set: 54 | if itemset.equal(item): 55 | contain = True 56 | break 57 | 58 | return contain 59 | 60 | def indexOf(self, item: ItemSet) -> int: 61 | index = -1 62 | for itemset in self.clourse_set: 63 | if itemset.equal(item): 64 | index = itemset.get_index() 65 | break 66 | 67 | return index 68 | 69 | def add(self, item: ItemSet) -> None: 70 | self.clourse_set.append(item) 71 | 72 | 73 | class SLR_Table: 74 | def __init__(self, grammar: Grammar) -> None: 75 | self.grammar = grammar 76 | 77 | self.start_symbol = grammar.start_symbol 78 | self.action_symbols = grammar.terminal_symbols 79 | self.goto_symbols = grammar.variable_symbols[1:] 80 | self.all_symbols = self.goto_symbols + self.action_symbols 81 | 82 | self.all_items = list() 83 | self.first_items = dict() # item with first dot, key is from_state 84 | self.gen_all_items() 85 | 86 | self.C = self.gen_clourse_set([(0, 0)]) # clourse set 87 | 88 | self.first = self.first_set() 89 | self.follow = self.follow_set() 90 | 91 | def print_first_set(self) -> None: 92 | console.print("First Set:", style="bold") 93 | console.print(self.first) 94 | 95 | def print_follow_set(self) -> None: 96 | console.print("Follow Set:", style="bold") 97 | console.print(self.follow) 98 | 99 | def print_closure_set(self): 100 | console.print(f"Num of states: {len(self.C.clourse_set)}", style="bold") 101 | for index, clourse in enumerate(self.C.clourse_set): 102 | output_table = Table( 103 | show_header=True, 104 | header_style="bold", 105 | ) 106 | output_table.add_column(f"I{clourse.index}", justify="left") 107 | for item in clourse.closure_items: 108 | output_table.add_row(self.get_item(item)) 109 | # print(clourse.transfer) 110 | console.print(output_table) 111 | 112 | def save(self) -> None: 113 | with open("output/first_set.txt", "w") as f: 114 | for k in self.first: 115 | f.write(f"first({k}) = {self.first[k]}\n") 116 | 117 | with open("output/follow_set.txt", "w") as f: 118 | for k in self.follow: 119 | f.write(f"follow({k}) = {self.follow[k]}\n") 120 | 121 | with open("output/closure_set.txt", "w") as f: 122 | for index, clourse in enumerate(self.C.clourse_set): 123 | f.write(f"I{index}\n") 124 | for item in clourse.closure_items: 125 | f.write(f"{self.get_item(item)}\n") 126 | f.write("\n") 127 | 128 | save_slr_table(self.grammar) 129 | 130 | def get_item(self, item: tuple) -> str: 131 | production = self.grammar.production_list[item[0]] 132 | right = [it.value for it in production.items] 133 | 134 | if "ε" in right: # remove ε 135 | right.remove("ε") 136 | 137 | right.insert(item[1], ".") 138 | return f"{production.from_state} → {' '.join(right)}" 139 | 140 | def contain_varepsilon(self, symbol: str) -> bool: 141 | contain = False 142 | if self.first_items.get(symbol) is None: 143 | return False 144 | 145 | for indices in self.first_items.get(symbol): 146 | for index in indices: 147 | for item in self.grammar.production_list[index].items: 148 | if item.value == "ε": 149 | contain = True 150 | break 151 | 152 | return contain 153 | 154 | def get_first(self, first: dict, symbol: str): 155 | indices = [production[0] for production in self.first_items[symbol]] # production indices 156 | for index in indices: 157 | item = self.grammar.production_list[index].items[0] 158 | if not item.is_symbol and item.value != symbol: 159 | first[symbol] |= set(self.get_first(first, item.value)) # recurse 160 | 161 | return first.get(symbol) 162 | 163 | def first_set(self): 164 | first = dict() 165 | 166 | for item in self.action_symbols: 167 | first[item] = item 168 | 169 | for from_state in self.first_items.keys(): 170 | indices = [production[0] for production in self.first_items[from_state]] # production indices 171 | first[from_state] = set() 172 | 173 | for index in indices: 174 | item = self.grammar.production_list[index].items[0] 175 | if item.is_symbol: # add end symbol to First(from_state) 176 | first[from_state].add(item.value) 177 | 178 | for from_state in self.first_items.keys(): 179 | indices = [production[0] for production in self.first_items[from_state]] # production indices 180 | for index in indices: 181 | item = self.grammar.production_list[index].items[0] 182 | # add var symbol to First(from_state) 183 | if not item.is_symbol and item.value != from_state: 184 | first[from_state] |= set(self.get_first(first, item.value)) 185 | 186 | for from_state in self.first_items.keys(): 187 | indices = [production[0] for production in self.first_items[from_state]] # production indices 188 | add_varepsilon = False 189 | for index in indices: 190 | items = self.grammar.production_list[index].items 191 | length = len(items) 192 | if items[0].value == "ε": 193 | add_varepsilon = True 194 | # solve ε production to First(from_state) 195 | cur = 0 196 | while cur < length and not items[cur].is_symbol: 197 | if self.contain_varepsilon(items[cur].value): 198 | add_varepsilon = True 199 | if cur + 1 < length: 200 | first[from_state] |= set(first[items[cur + 1].value]) 201 | cur += 1 202 | 203 | if add_varepsilon: 204 | first[from_state].add("ε") 205 | 206 | return first 207 | 208 | def follow_set(self): 209 | follow = dict() 210 | for from_state in self.first_items.keys(): 211 | follow[from_state] = set() 212 | 213 | follow[self.start_symbol].add("$") # for begin symbol, add '$' 214 | 215 | for production in self.grammar.production_list: 216 | items = production.items 217 | length = len(items) 218 | cur = 0 219 | while cur < length: 220 | if not items[cur].is_symbol: 221 | if cur + 1 < length and items[cur + 1].is_symbol: # B→αAa ,a is end symbol 222 | follow[items[cur].value].add(items[cur + 1].value) 223 | elif cur + 1 < length and not items[cur + 1].is_symbol: # B→αAX ,X is not end symbol 224 | first_of_next = deepcopy(self.first[items[cur + 1].value]) 225 | if "ε" in first_of_next: 226 | first_of_next.remove("ε") 227 | follow[items[cur].value] |= first_of_next 228 | cur += 1 229 | 230 | for production in self.grammar.production_list: 231 | items = production.items 232 | length = len(items) 233 | cur = 0 234 | while cur < length: 235 | if not items[cur].is_symbol: 236 | if cur + 1 >= length: 237 | follow[items[cur].value] |= follow[production.from_state] 238 | elif cur + 1 < length and self.contain_varepsilon(items[cur + 1].value): 239 | follow[items[cur].value] |= follow[production.from_state] 240 | cur += 1 241 | 242 | return follow 243 | 244 | def gen_all_items(self): 245 | for index, production in enumerate(self.grammar.production_list): 246 | 247 | # store item which dot at first 248 | if not self.first_items.get(production.from_state): 249 | self.first_items[str(production.from_state)] = [(index, 0)] 250 | else: 251 | self.first_items[str(production.from_state)].append((index, 0)) 252 | 253 | for dot, item in enumerate(production.items): 254 | self.all_items.append((index, dot)) 255 | 256 | if len(production.items) > 1 and production.items[0].value != "ε": # except ε 257 | self.all_items.append((index, len(production.items))) 258 | 259 | def get_clourse(self, items: list) -> ItemSet: 260 | queue = [item for item in items] # add items to queue 261 | close_set = ItemSet() # clourse for items 262 | while queue: 263 | item = queue.pop(0) 264 | close_set.add(item) 265 | candidate = self.grammar.production_list[item[0]] # all candidate production 266 | 267 | if len(candidate.items) != item[1]: # dot not at end of production 268 | if not candidate.items[item[1]].is_symbol: 269 | for it in self.first_items[candidate.items[item[1]].value]: 270 | if not close_set.exists(it): # item not in close_set 271 | queue.append(it) 272 | close_set.add(it) 273 | 274 | return close_set 275 | 276 | def goto(self, itemset: ItemSet, symbol: str): 277 | next_state = ItemSet() 278 | for item in itemset.closure_items: 279 | production = self.grammar.production_list[item[0]] 280 | 281 | if item[1] != len(production.items): # dot not at the end 282 | if production.items[item[1]].value == symbol: # match ,goto next state 283 | next_state.add((item[0], item[1] + 1)) 284 | 285 | return self.get_clourse([item for item in next_state.closure_items]) 286 | 287 | def gen_clourse_set(self, start: list) -> ClosureFamily: 288 | C = ClosureFamily() 289 | C.add(self.get_clourse(start)) 290 | 291 | queue = [clourse for clourse in C.clourse_set] 292 | index = 1 293 | while queue: 294 | clourse = queue.pop(0) 295 | 296 | for symbol in self.all_symbols: 297 | next = self.goto(clourse, symbol) 298 | if len(next.closure_items) != 0 and not C.exists(next): # if next_state not in closure set 299 | next.set_index(index) 300 | index += 1 301 | 302 | clourse.add_trans(symbol, next.get_index()) 303 | C.add(next) 304 | queue.append(next) 305 | elif len(next.closure_items) != 0 and C.exists(next): # if consists , add to transfer dict 306 | clourse.add_trans(symbol, C.indexOf(next)) 307 | 308 | return C 309 | 310 | def analysis_table(self): 311 | action = list() 312 | goto = list() 313 | 314 | # C = self.gen_clourse_set([(0, 0)]) 315 | 316 | for i in range(len(self.C.clourse_set)): 317 | action.append(dict()) 318 | goto.append(dict()) 319 | 320 | for clourse in self.C.clourse_set: 321 | for item in clourse.closure_items: 322 | production = self.grammar.production_list[item[0]] 323 | 324 | if item[1] != len(production.items): # dot not at the end 325 | symbol = production.items[item[1]] 326 | 327 | # solve A → ε 328 | if symbol.is_symbol and symbol.value == "ε": 329 | for f in self.follow[production.from_state]: 330 | action[clourse.get_index()][f] = "r" + str(item[0]) 331 | 332 | # get next closure index 333 | next_index = clourse.transfer.get(symbol.value) 334 | 335 | if next_index is not None: 336 | if symbol.is_symbol and symbol.value != "ε": 337 | action[clourse.get_index()][symbol.value] = "s" + str(next_index) 338 | else: 339 | goto[clourse.get_index()][symbol.value] = next_index 340 | 341 | else: # item[1] == len(production.items) , dot at the end 342 | if production.from_state == self.start_symbol: 343 | action[clourse.get_index()]["$"] = "acc" 344 | else: 345 | for f in self.follow[production.from_state]: 346 | action[clourse.get_index()][f] = "r" + str(item[0]) 347 | 348 | with open("action_table.json", "w") as f: 349 | f.write(json.dumps(action, indent=2)) 350 | 351 | with open("goto_table.json", "w") as f: 352 | f.write(json.dumps(goto, indent=2)) 353 | 354 | 355 | def print_slr_table(grammar: Grammar) -> None: 356 | action_table_symbols: List[str] = grammar.terminal_symbols 357 | goto_table_symbols: List[str] = grammar.variable_symbols[1:] 358 | 359 | with open("action_table.json", "r") as f: 360 | action_table: List[Dict[str, str]] = json.loads(f.read()) 361 | with open("goto_table.json", "r") as f: 362 | goto_table: List[Dict[str, int]] = json.loads(f.read()) 363 | 364 | output_table = Table( 365 | show_header=True, 366 | header_style="bold", 367 | ) 368 | 369 | output_table.add_column("State", justify="center") 370 | for action_table_symbol in action_table_symbols: 371 | output_table.add_column(action_table_symbol, justify="center") 372 | for goto_table_symbol in goto_table_symbols: 373 | output_table.add_column(goto_table_symbol, justify="center") 374 | 375 | for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table): 376 | output_row: List[str] = [str(state)] 377 | for action_symbol in action_table_symbols: 378 | output_row.append(action_row.get(action_symbol, "")) 379 | for goto_symbol in goto_table_symbols: 380 | output_row.append(str(goto_row.get(goto_symbol, ""))) 381 | output_table.add_row(*output_row) 382 | 383 | console.print("SLR Table (Action/Goto Table):", style="bold") 384 | console.print(output_table) 385 | 386 | 387 | def save_slr_table(grammar: Grammar) -> None: 388 | action_table_symbols: List[str] = grammar.terminal_symbols 389 | goto_table_symbols: List[str] = grammar.variable_symbols[1:] 390 | 391 | with open("action_table.json", "r") as f: 392 | action_table: List[Dict[str, str]] = json.loads(f.read()) 393 | with open("goto_table.json", "r") as f: 394 | goto_table: List[Dict[str, int]] = json.loads(f.read()) 395 | 396 | with open("output/slr_table.csv", "w") as f: 397 | writter = csv.writer(f) 398 | header: List[str] = ["State"] + action_table_symbols + goto_table_symbols 399 | writter.writerow(header) 400 | 401 | for state, action_row, goto_row in zip(range(len(action_table)), action_table, goto_table): 402 | output_row: List[str] = [str(state)] 403 | for action_symbol in action_table_symbols: 404 | output_row.append(action_row.get(action_symbol, "")) 405 | for goto_symbol in goto_table_symbols: 406 | output_row.append(str(goto_row.get(goto_symbol, ""))) 407 | writter.writerow(output_row) 408 | 409 | 410 | if __name__ == "__main__": 411 | grammar = Grammar() 412 | grammar.read("grammar.txt") 413 | 414 | slr = SLR_Table(grammar) 415 | slr.analysis_table() 416 | 417 | print_slr_table(grammar) 418 | -------------------------------------------------------------------------------- /Scanner.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from enum import Enum, auto 3 | from string import ascii_letters, digits, printable 4 | from typing import List 5 | 6 | from rich.console import Console 7 | from rich.table import Table 8 | 9 | from Symbol_Table import Symbol_Table, Table_Item, Table_Item_Type 10 | from Token import Token, Token_Type 11 | 12 | console = Console() 13 | 14 | 15 | class Scanner_State(Enum): 16 | START = auto() 17 | # identifier 18 | ID = auto() 19 | # assign symbol '=' 20 | ASSIGN = auto() 21 | # arithmetic operator (+, -, *, /) 22 | ALOP = auto() 23 | # relation operator 24 | L_G = auto() # less '<' or greater '>' 25 | LE_GE = auto() # less equal '<=' or greater equal '>=' 26 | EQ = auto() # equal '==' 27 | NOT = auto() # not '!' 28 | NEQ = auto() # not equal '!=' 29 | # bracket 30 | LBRACKET = auto() # left bracket '(' 31 | RBRACKET = auto() # right bracket ')' 32 | # semicolon 33 | SEMICOLON = auto() # semicolon ';' 34 | # "if" 35 | I = auto() 36 | IF = auto() 37 | # "int" 38 | IN = auto() 39 | INT = auto() 40 | # "else" 41 | E = auto() 42 | EL = auto() 43 | ELS = auto() 44 | ELSE = auto() 45 | # "while" 46 | W = auto() 47 | WH = auto() 48 | WHI = auto() 49 | WHIL = auto() 50 | WHILE = auto() 51 | # float 52 | F = auto() 53 | FL = auto() 54 | FLO = auto() 55 | FLOA = auto() 56 | FLOAT = auto() 57 | # number 58 | NUMBER = auto() 59 | # end of the token 60 | END = auto() 61 | # not a vaild token 62 | ERROR = auto() 63 | 64 | 65 | # map from scanner state to token type 66 | STATE_TO_TOKEN = { 67 | Scanner_State.ID: Token_Type.ID, 68 | Scanner_State.ASSIGN: Token_Type.ASSIGN, 69 | Scanner_State.ALOP: Token_Type.ALOP, 70 | Scanner_State.L_G: Token_Type.RELOP, 71 | Scanner_State.LE_GE: Token_Type.RELOP, 72 | Scanner_State.EQ: Token_Type.RELOP, 73 | Scanner_State.NEQ: Token_Type.RELOP, 74 | Scanner_State.LBRACKET: Token_Type.LBRACKET, 75 | Scanner_State.RBRACKET: Token_Type.RBRACKET, 76 | Scanner_State.SEMICOLON: Token_Type.SEMICOLON, 77 | Scanner_State.I: Token_Type.ID, 78 | Scanner_State.IF: Token_Type.IF, 79 | Scanner_State.IN: Token_Type.ID, 80 | Scanner_State.INT: Token_Type.INT, 81 | Scanner_State.E: Token_Type.ID, 82 | Scanner_State.EL: Token_Type.ID, 83 | Scanner_State.ELS: Token_Type.ID, 84 | Scanner_State.ELSE: Token_Type.ELSE, 85 | Scanner_State.W: Token_Type.ID, 86 | Scanner_State.WH: Token_Type.ID, 87 | Scanner_State.WHI: Token_Type.ID, 88 | Scanner_State.WHIL: Token_Type.ID, 89 | Scanner_State.WHILE: Token_Type.WHILE, 90 | Scanner_State.F: Token_Type.ID, 91 | Scanner_State.FL: Token_Type.ID, 92 | Scanner_State.FLO: Token_Type.ID, 93 | Scanner_State.FLOA: Token_Type.ID, 94 | Scanner_State.FLOAT: Token_Type.FLOAT, 95 | Scanner_State.NUMBER: Token_Type.CONST, 96 | } 97 | 98 | 99 | DIGITS = digits # 0~9 100 | 101 | ID_START = ascii_letters + "_" # a~z + A~Z + _ 102 | ID_APPEND = ID_START + digits # a~z + A~Z + _ + 0~9 103 | 104 | ALOPS = "+-*/" 105 | RELOPS = "<>!=" 106 | 107 | SPACES = " \t\n\r\0" 108 | 109 | ID_SEP = ALOPS + RELOPS + "();" + SPACES 110 | NUMBER_SEP = ID_SEP 111 | 112 | ANY_SEP = printable + SPACES 113 | 114 | 115 | ID_TRANSITION_TEMPLATE = [(ID_APPEND, Scanner_State.ID), (ID_SEP, Scanner_State.END)] 116 | 117 | 118 | SCANNER_TRANSITION = { 119 | Scanner_State.START: [ 120 | ("i", Scanner_State.I), 121 | ("e", Scanner_State.E), 122 | ("w", Scanner_State.W), 123 | ("f", Scanner_State.F), 124 | ("(", Scanner_State.LBRACKET), 125 | (")", Scanner_State.RBRACKET), 126 | (";", Scanner_State.SEMICOLON), 127 | ("<>", Scanner_State.L_G), 128 | ("!", Scanner_State.NOT), 129 | ("=", Scanner_State.ASSIGN), 130 | (" ", Scanner_State.START), # remove space 131 | (ALOPS, Scanner_State.ALOP), 132 | (DIGITS, Scanner_State.NUMBER), 133 | (ID_START, Scanner_State.ID), 134 | ], 135 | Scanner_State.ID: ID_TRANSITION_TEMPLATE, 136 | Scanner_State.ASSIGN: [ 137 | ("=", Scanner_State.EQ), 138 | (ANY_SEP, Scanner_State.END), 139 | ], 140 | Scanner_State.ALOP: [ 141 | (ANY_SEP, Scanner_State.END), 142 | ], 143 | Scanner_State.L_G: [ 144 | ("=", Scanner_State.LE_GE), 145 | (ANY_SEP, Scanner_State.END), 146 | ], 147 | Scanner_State.LE_GE: [ 148 | (ANY_SEP, Scanner_State.END), 149 | ], 150 | Scanner_State.EQ: [ 151 | (ANY_SEP, Scanner_State.END), 152 | ], 153 | Scanner_State.NOT: [ 154 | ("=", Scanner_State.NEQ), 155 | ], 156 | Scanner_State.NEQ: [ 157 | (ANY_SEP, Scanner_State.END), 158 | ], 159 | Scanner_State.LBRACKET: [ 160 | (ANY_SEP, Scanner_State.END), 161 | ], 162 | Scanner_State.RBRACKET: [ 163 | (ANY_SEP, Scanner_State.END), 164 | ], 165 | Scanner_State.SEMICOLON: [ 166 | (ANY_SEP, Scanner_State.END), 167 | ], 168 | Scanner_State.I: [ 169 | ("f", Scanner_State.IF), 170 | ("n", Scanner_State.IN), 171 | ] 172 | + ID_TRANSITION_TEMPLATE, 173 | Scanner_State.IF: ID_TRANSITION_TEMPLATE, 174 | Scanner_State.IN: [ 175 | ("t", Scanner_State.INT), 176 | ] 177 | + ID_TRANSITION_TEMPLATE, 178 | Scanner_State.INT: ID_TRANSITION_TEMPLATE, 179 | Scanner_State.E: [ 180 | ("l", Scanner_State.EL), 181 | ] 182 | + ID_TRANSITION_TEMPLATE, 183 | Scanner_State.EL: [ 184 | ("s", Scanner_State.ELS), 185 | ] 186 | + ID_TRANSITION_TEMPLATE, 187 | Scanner_State.ELS: [ 188 | ("e", Scanner_State.ELSE), 189 | ] 190 | + ID_TRANSITION_TEMPLATE, 191 | Scanner_State.ELSE: ID_TRANSITION_TEMPLATE, 192 | Scanner_State.W: [ 193 | ("h", Scanner_State.WH), 194 | ] 195 | + ID_TRANSITION_TEMPLATE, 196 | Scanner_State.WH: [ 197 | ("i", Scanner_State.WHI), 198 | ] 199 | + ID_TRANSITION_TEMPLATE, 200 | Scanner_State.WHI: [ 201 | ("l", Scanner_State.WHIL), 202 | ] 203 | + ID_TRANSITION_TEMPLATE, 204 | Scanner_State.WHIL: [ 205 | ("e", Scanner_State.WHILE), 206 | ] 207 | + ID_TRANSITION_TEMPLATE, 208 | Scanner_State.WHILE: ID_TRANSITION_TEMPLATE, 209 | Scanner_State.F: [ 210 | ("l", Scanner_State.FL), 211 | ] 212 | + ID_TRANSITION_TEMPLATE, 213 | Scanner_State.FL: [ 214 | ("o", Scanner_State.FLO), 215 | ] 216 | + ID_TRANSITION_TEMPLATE, 217 | Scanner_State.FLO: [ 218 | ("a", Scanner_State.FLOA), 219 | ] 220 | + ID_TRANSITION_TEMPLATE, 221 | Scanner_State.FLOA: [ 222 | ("t", Scanner_State.FLOAT), 223 | ] 224 | + ID_TRANSITION_TEMPLATE, 225 | Scanner_State.FLOAT: ID_TRANSITION_TEMPLATE, 226 | Scanner_State.NUMBER: [ 227 | (DIGITS, Scanner_State.NUMBER), 228 | (NUMBER_SEP, Scanner_State.END), 229 | ], 230 | } 231 | 232 | 233 | class Scanner: 234 | pnt: int 235 | code: str 236 | length: int 237 | symbol_table: Symbol_Table 238 | state_output: List[List[str]] 239 | token_output: List[List[str]] 240 | 241 | def __init__(self, code: str, symbol_table: Symbol_Table) -> None: 242 | """init the scanner 243 | 244 | Args: 245 | `code` (str): raw code that needs to be processed 246 | `symbol_table` (Symbol_Table): symbol table for storing variables and constants 247 | """ 248 | self.pnt = 0 249 | self.code = code.replace("\n", "").replace("\r", "").strip(" ") # erase line split 250 | self.code = self.code + "\0" # add '\0' at the end for convenience 251 | self.length = len(self.code) 252 | self.symbol_table = symbol_table 253 | self.state_output = [] 254 | self.token_output = [] 255 | 256 | def print_states(self) -> None: 257 | output_table = Table( 258 | show_header=True, 259 | header_style="bold", 260 | ) 261 | output_table.add_column("Pointer", justify="center") 262 | output_table.add_column("Current Character", justify="center") 263 | output_table.add_column("State Transfer", justify="left") 264 | 265 | for row in self.state_output: 266 | output_table.add_row(*row) 267 | 268 | console.print("Scanner States:", style="bold") 269 | console.print(output_table) 270 | 271 | def print_tokens(self) -> None: 272 | output_table = Table( 273 | show_header=True, 274 | header_style="bold", 275 | ) 276 | output_table.add_column("Type", justify="center") 277 | output_table.add_column("Content", justify="center") 278 | 279 | for row in self.token_output: 280 | output_table.add_row(*row) 281 | 282 | console.print("Tokens:", style="bold") 283 | console.print(output_table) 284 | 285 | def save(self) -> None: 286 | with open("output/scanner_states.csv", "w") as f: 287 | writer = csv.writer(f) 288 | writer.writerow(["Pointer", "Current Character", "State Transfer"]) 289 | for row in self.state_output: 290 | writer.writerow(row) 291 | with open("output/token_table.csv", "w") as f: 292 | writer = csv.writer(f) 293 | writer.writerow(["Type", "Content"]) 294 | for row in self.token_output: 295 | writer.writerow(row) 296 | 297 | def has_next(self) -> bool: 298 | """check whether the scanner has next token to output 299 | 300 | Returns: 301 | `bool`: `True` if the scanner have next token 302 | """ 303 | return self.pnt < self.length - 1 304 | 305 | def get_next(self) -> Token: 306 | """get next token 307 | 308 | Args: 309 | `output` (bool, optional): Set to `True` to show scan process. Defaults to `True`. 310 | 311 | Returns: 312 | `Token`: The next token 313 | """ 314 | current_state: Scanner_State = Scanner_State.START 315 | content: str = "" 316 | result: Token = Token() 317 | 318 | while True: 319 | cur: str = self.code[self.pnt] 320 | 321 | transition = SCANNER_TRANSITION[current_state] 322 | next_state: Scanner_State = Scanner_State.ERROR # default is error 323 | 324 | # find next state 325 | for pattern, to_state in transition: 326 | if cur in pattern: 327 | if to_state == Scanner_State.END: 328 | result.token_type = STATE_TO_TOKEN[current_state] 329 | 330 | if result.token_type in [Token_Type.ID, Token_Type.CONST]: 331 | # for identifier or constant, the content is the entry(index) in symbol table 332 | entry: int = self.symbol_table.find_item_by_name(content) 333 | if entry == -1: 334 | # cannot find, create a new row in symbol table 335 | new_item = Table_Item() 336 | new_item.name = content 337 | new_item.variable = result.token_type == Token_Type.ID 338 | entry = self.symbol_table.add_item(new_item) 339 | 340 | result.content = entry 341 | elif result.token_type in [Token_Type.ALOP, Token_Type.RELOP]: 342 | # arithmetic operator (+, -, *, /) or relation operator (<, >, <=, >=, ==, !=) 343 | result.content = content 344 | else: 345 | result.content = None 346 | 347 | self.token_output.append( 348 | [result.token_type.name, "" if result.content is None else str(result.content)] 349 | ) 350 | return result 351 | 352 | next_state = to_state 353 | break 354 | 355 | self.state_output.append([str(self.pnt), cur, f"{current_state.name} -> {next_state.name}"]) 356 | 357 | if next_state == Scanner_State.ERROR: 358 | self.print_tokens() 359 | self.print_states() 360 | console.print("ERROR WHEN GETTING NEXT TOKEN!", style="bold red") 361 | exit(-1) 362 | 363 | # step to next state 364 | current_state = next_state 365 | if cur != " ": 366 | content += cur 367 | self.pnt += 1 368 | -------------------------------------------------------------------------------- /Symbol_Table.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from enum import Enum, auto 3 | from typing import List, Union 4 | 5 | from rich.console import Console 6 | from rich.table import Table 7 | 8 | console = Console() 9 | 10 | 11 | class Table_Item_Type(Enum): 12 | INT = auto() 13 | FLOAT = auto() 14 | 15 | 16 | item_type_translate = {"int": Table_Item_Type.INT, "float": Table_Item_Type.FLOAT} 17 | 18 | 19 | class Table_Item: 20 | name: Union[str, None] 21 | variable: Union[bool, None] # True for variable, False for constant 22 | item_type: Union[Table_Item_Type, None] 23 | 24 | def __init__(self) -> None: 25 | self.name = None 26 | self.variable = None 27 | self.item_type = None 28 | 29 | def __str__(self) -> str: 30 | return f"{self.name}, {'var' if self.variable else 'const'}, {'' if self.item_type is None else self.item_type.name}" 31 | 32 | 33 | class Symbol_Table: 34 | size: int 35 | table: List[Table_Item] 36 | 37 | def __init__(self) -> None: 38 | self.size = 0 39 | self.table = list() 40 | 41 | def output(self) -> None: 42 | output_table = Table( 43 | show_header=True, 44 | header_style="bold", 45 | ) 46 | output_table.add_column("Name", justify="center") 47 | output_table.add_column("Var/Const", justify="center") 48 | output_table.add_column("Type", justify="center") 49 | 50 | for item in self.table: 51 | output_table.add_row( 52 | item.name, "Var" if item.variable else "Const", "" if item.item_type is None else item.item_type.name 53 | ) 54 | 55 | console.print("Symbol Table:", style="bold") 56 | console.print(output_table) 57 | 58 | def save(self) -> None: 59 | with open("output/symbol_table.csv", "w") as f: 60 | writer = csv.writer(f) 61 | writer.writerow(["Name", "Var/Const", "Type"]) 62 | for item in self.table: 63 | writer.writerow( 64 | [ 65 | item.name, 66 | "Var" if item.variable else "Const", 67 | "" if item.item_type is None else item.item_type.name, 68 | ] 69 | ) 70 | 71 | def get_size(self) -> int: 72 | return self.size 73 | 74 | def find_item_by_name(self, name: str) -> int: 75 | for idx, item in enumerate(self.table): 76 | if item.name == name: 77 | return idx 78 | 79 | return -1 # cannot find 80 | 81 | def add_item(self, item: Table_Item) -> int: 82 | self.size += 1 83 | self.table.append(item) 84 | return self.size - 1 85 | -------------------------------------------------------------------------------- /Token.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | from typing import Union 3 | 4 | 5 | class Token_Type(Enum): 6 | ID = auto() # identifier 7 | CONST = auto() # constant (number) 8 | ASSIGN = auto() # assign symbol '=' 9 | ALOP = auto() # arithmetic operator (+, -, *, /) 10 | RELOP = auto() # relation operator (<, >, <=, >=, ==, !=) 11 | LBRACKET = auto() # left bracket '(' 12 | RBRACKET = auto() # right bracket ')' 13 | SEMICOLON = auto() # semicolon ';' 14 | IF = auto() # if 15 | ELSE = auto() # else 16 | WHILE = auto() # while 17 | INT = auto() # int 18 | FLOAT = auto() # float 19 | 20 | 21 | class Token: 22 | token_type: Union[Token_Type, None] 23 | content: Union[str, int, None] # str for name, int for entry 24 | 25 | def __init__(self) -> None: 26 | self.token_type = None # type of the token 27 | self.content = None # detail content (like identifier name or operator type) 28 | 29 | def to_string(self) -> str: 30 | if self.token_type in [Token_Type.ALOP, Token_Type.RELOP]: 31 | return str(self.content) 32 | elif self.token_type == Token_Type.ASSIGN: 33 | return "=" 34 | elif self.token_type == Token_Type.LBRACKET: 35 | return "(" 36 | elif self.token_type == Token_Type.RBRACKET: 37 | return ")" 38 | elif self.token_type == Token_Type.SEMICOLON: 39 | return ";" 40 | return self.token_type.name.lower() 41 | 42 | def __str__(self) -> str: 43 | return f"{self.token_type.name}, {'' if self.content is None else self.content}" 44 | -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | isort . 3 | black . -l 120 -------------------------------------------------------------------------------- /input/grammar.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $ 2 | VariableSymbols: P' P D S L C E T F M N Q 3 | 4 | @ P' → P 5 | @ P → M D S 6 | self.back_patch(attributes[-1]["nextlist"], attributes[-3]["instr"]) 7 | 8 | @ M → ε 9 | offset = 0 10 | temp_index = 0 11 | current_attribute["instr"] = self.current_line 12 | 13 | @ D → L id ; N D 14 | @ N → ε 15 | idx = attributes[-2]["entry"] 16 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]] 17 | width = attributes[-3]["width"] 18 | self.gen_code(f"Alloc [{offset},{offset+width}] for {self.symbol_table.table[idx].name}") 19 | offset += width 20 | 21 | @ D → ε 22 | @ L → int 23 | current_attribute["type"] = 'int' 24 | current_attribute["width"] = 4 25 | 26 | @ L → float 27 | current_attribute["type"] = 'float' 28 | current_attribute["width"] = 4 29 | 30 | @ S → S ; Q S 31 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"]) 32 | current_attribute["nextlist"] = attributes[-1]["nextlist"] 33 | 34 | @ Q → ε 35 | current_attribute["instr"] = self.current_line 36 | 37 | @ S → ε 38 | current_attribute["nextlist"] = [] 39 | 40 | @ S → id = E 41 | entry0 = attributes[-3]["entry"] 42 | name0 = self.symbol_table.table[entry0].name 43 | entry1 = attributes[-1]["entry"] 44 | name1 = self.symbol_table.table[entry1].name 45 | self.gen_code(f"{name0} = {name1}") 46 | 47 | current_attribute["nextlist"] = [self.current_line] 48 | 49 | @ S → if ( C ) Q S 50 | truelist = attributes[-4]["truelist"] 51 | falselist = attributes[-4]["falselist"] 52 | instr = attributes[-2]["instr"] 53 | self.back_patch(truelist, instr) 54 | nextlist_s1 = attributes[-1]["nextlist"] 55 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1) 56 | 57 | @ S → while Q ( C ) Q S 58 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"]) 59 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"]) 60 | current_attribute["nextlist"] = attributes[-4]["falselist"] 61 | idx = attributes[-6]["instr"] 62 | self.gen_code(f"goto {idx}") 63 | 64 | @ C → E > E 65 | current_attribute["truelist"] = self.make_list(self.current_line) 66 | current_attribute["falselist"] = self.make_list(self.current_line + 1) 67 | 68 | idx1 = attributes[-3]["entry"] 69 | idx2 = attributes[-1]["entry"] 70 | name1 = self.symbol_table.table[idx1].name 71 | name2 = self.symbol_table.table[idx2].name 72 | 73 | self.gen_code(f"if {name1} > {name2} goto ") 74 | self.gen_code(f"goto ") 75 | 76 | @ C → E < E 77 | current_attribute["truelist"] = self.make_list(self.current_line) 78 | current_attribute["falselist"] = self.make_list(self.current_line + 1) 79 | 80 | idx1 = attributes[-3]["entry"] 81 | idx2 = attributes[-1]["entry"] 82 | name1 = self.symbol_table.table[idx1].name 83 | name2 = self.symbol_table.table[idx2].name 84 | 85 | self.gen_code(f"if {name1} < {name2} goto ") 86 | self.gen_code(f"goto ") 87 | 88 | 89 | @ C → E == E 90 | current_attribute["truelist"] = self.make_list(self.current_line) 91 | current_attribute["falselist"] = self.make_list(self.current_line + 1) 92 | 93 | idx1 = attributes[-3]["entry"] 94 | idx2 = attributes[-1]["entry"] 95 | name1 = self.symbol_table.table[idx1].name 96 | name2 = self.symbol_table.table[idx2].name 97 | 98 | self.gen_code(f"if {name1} == {name2} goto ") 99 | self.gen_code(f"goto ") 100 | 101 | 102 | @ E → E + T 103 | name0 = f"temp{temp_index}" 104 | entry = self.gen_variable(name0) 105 | temp_index += 1 106 | 107 | entry1 = attributes[-3]["entry"] 108 | name1 = self.symbol_table.table[entry1].name 109 | entry2 = attributes[-1]["entry"] 110 | name2 = self.symbol_table.table[entry2].name 111 | 112 | current_attribute["entry"] = entry 113 | self.gen_code(f"{name0} = {name1} + {name2}") 114 | 115 | @ E → E - T 116 | name0 = f"temp{temp_index}" 117 | entry = self.gen_variable(name0) 118 | temp_index += 1 119 | 120 | entry1 = attributes[-3]["entry"] 121 | name1 = self.symbol_table.table[entry1].name 122 | entry2 = attributes[-1]["entry"] 123 | name2 = self.symbol_table.table[entry2].name 124 | 125 | current_attribute["entry"] = entry 126 | self.gen_code(f"{name0} = {name1} - {name2}") 127 | 128 | @ E → T 129 | current_attribute["entry"] = attributes[-1]["entry"] 130 | 131 | @ T → F 132 | current_attribute["entry"] = attributes[-1]["entry"] 133 | 134 | @ T → T * F 135 | name0 = f"temp{temp_index}" 136 | entry = self.gen_variable(name0) 137 | temp_index += 1 138 | 139 | entry1 = attributes[-3]["entry"] 140 | name1 = self.symbol_table.table[entry1].name 141 | entry2 = attributes[-1]["entry"] 142 | name2 = self.symbol_table.table[entry2].name 143 | 144 | current_attribute["entry"] = entry 145 | self.gen_code(f"{name0} = {name1} * {name2}") 146 | 147 | @ T → T / F 148 | name0 = f"temp{temp_index}" 149 | entry = self.gen_variable(name0) 150 | temp_index += 1 151 | 152 | entry1 = attributes[-3]["entry"] 153 | name1 = self.symbol_table.table[entry1].name 154 | entry2 = attributes[-1]["entry"] 155 | name2 = self.symbol_table.table[entry2].name 156 | 157 | current_attribute["entry"] = entry 158 | self.gen_code(f"{name0} = {name1} / {name2}") 159 | 160 | @ F → ( E ) 161 | current_attribute["entry"] = attributes[-2]["entry"] 162 | 163 | @ F → id 164 | current_attribute["entry"] = attributes[-1]["entry"] 165 | 166 | @ F → const 167 | current_attribute["entry"] = attributes[-1]["entry"] -------------------------------------------------------------------------------- /input/grammar_assign.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id const = + - * / ( ) ; $ 2 | VariableSymbols: P' P M S E T F 3 | 4 | @ P' → P 5 | 6 | @ P → M S 7 | 8 | @ M → ε 9 | offset = 0 10 | temp_index = 0 11 | 12 | @ S → S ; S 13 | 14 | @ S → ε 15 | 16 | @ S → id = E 17 | entry0 = attributes[-3]["entry"] 18 | name0 = self.symbol_table.table[entry0].name 19 | entry1 = attributes[-1]["entry"] 20 | name1 = self.symbol_table.table[entry1].name 21 | self.gen_code(f"{name0} = {name1}") 22 | 23 | @ E → E + T 24 | name0 = f"temp{temp_index}" 25 | entry = self.gen_variable(name0) 26 | temp_index += 1 27 | 28 | entry1 = attributes[-3]["entry"] 29 | name1 = self.symbol_table.table[entry1].name 30 | entry2 = attributes[-1]["entry"] 31 | name2 = self.symbol_table.table[entry2].name 32 | 33 | current_attribute["entry"] = entry 34 | self.gen_code(f"{name0} = {name1} + {name2}") 35 | 36 | @ E → E - T 37 | name0 = f"temp{temp_index}" 38 | entry = self.gen_variable(name0) 39 | temp_index += 1 40 | 41 | entry1 = attributes[-3]["entry"] 42 | name1 = self.symbol_table.table[entry1].name 43 | entry2 = attributes[-1]["entry"] 44 | name2 = self.symbol_table.table[entry2].name 45 | 46 | current_attribute["entry"] = entry 47 | self.gen_code(f"{name0} = {name1} - {name2}") 48 | 49 | @ E → T 50 | current_attribute["entry"] = attributes[-1]["entry"] 51 | 52 | @ T → F 53 | current_attribute["entry"] = attributes[-1]["entry"] 54 | 55 | @ T → T * F 56 | name0 = f"temp{temp_index}" 57 | entry = self.gen_variable(name0) 58 | temp_index += 1 59 | 60 | entry1 = attributes[-3]["entry"] 61 | name1 = self.symbol_table.table[entry1].name 62 | entry2 = attributes[-1]["entry"] 63 | name2 = self.symbol_table.table[entry2].name 64 | 65 | current_attribute["entry"] = entry 66 | self.gen_code(f"{name0} = {name1} * {name2}") 67 | 68 | @ T → T / F 69 | name0 = f"temp{temp_index}" 70 | entry = self.gen_variable(name0) 71 | temp_index += 1 72 | 73 | entry1 = attributes[-3]["entry"] 74 | name1 = self.symbol_table.table[entry1].name 75 | entry2 = attributes[-1]["entry"] 76 | name2 = self.symbol_table.table[entry2].name 77 | 78 | current_attribute["entry"] = entry 79 | self.gen_code(f"{name0} = {name1} / {name2}") 80 | 81 | @ F → ( E ) 82 | current_attribute["entry"] = attributes[-2]["entry"] 83 | 84 | @ F → id 85 | current_attribute["entry"] = attributes[-1]["entry"] 86 | 87 | @ F → const 88 | current_attribute["entry"] = attributes[-1]["entry"] -------------------------------------------------------------------------------- /input/grammar_control.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id ; if while ( ) > = $ 2 | VariableSymbols: P' P M S C Q E 3 | 4 | @ P' → P 5 | @ P → M S 6 | self.back_patch(attributes[-1]["nextlist"], attributes[-2]["instr"]) 7 | @ M → ε 8 | offset = 0 9 | current_attribute["instr"] = self.current_line 10 | 11 | 12 | @ S → S ; Q S 13 | self.back_patch(attributes[-4]["nextlist"], attributes[-2]["instr"]) 14 | current_attribute["nextlist"] = attributes[-1]["nextlist"] 15 | 16 | @ S → ε 17 | current_attribute["nextlist"] = [] 18 | 19 | @ S → E 20 | idx = attributes[-1]["entry"] 21 | name = self.symbol_table.table[idx].name 22 | self.gen_code(f"appearance of {name}") 23 | current_attribute["nextlist"] = [self.current_line] 24 | 25 | @ S → if ( C ) Q S 26 | truelist = attributes[-4]["truelist"] 27 | falselist = attributes[-4]["falselist"] 28 | instr = attributes[-2]["instr"] 29 | self.back_patch(truelist, instr) 30 | nextlist_s1 = attributes[-1]["nextlist"] 31 | current_attribute["nextlist"] = self.merge(falselist, nextlist_s1) 32 | 33 | @ S → while Q ( C ) Q S 34 | self.back_patch(attributes[-1]["nextlist"], attributes[-6]["instr"]) 35 | self.back_patch(attributes[-4]["truelist"], attributes[-2]["instr"]) 36 | current_attribute["nextlist"] = attributes[-4]["falselist"] 37 | idx = attributes[-6]["instr"] 38 | self.gen_code(f"goto {idx}") 39 | 40 | 41 | @ Q → ε 42 | current_attribute["instr"] = self.current_line 43 | 44 | @ C → E > E 45 | current_attribute["truelist"] = self.make_list(self.current_line) 46 | current_attribute["falselist"] = self.make_list(self.current_line + 1) 47 | 48 | idx1 = attributes[-3]["entry"] 49 | idx2 = attributes[-1]["entry"] 50 | name1 = self.symbol_table.table[idx1].name 51 | name2 = self.symbol_table.table[idx2].name 52 | 53 | self.gen_code(f"if {name1} > {name2} goto ") 54 | self.gen_code(f"goto ") 55 | 56 | 57 | @ E → id 58 | attributes[-1]["value"] = 1 59 | current_attribute["value"] = attributes[-1]["value"] 60 | current_attribute["entry"] = attributes[-1]["entry"] 61 | 62 | -------------------------------------------------------------------------------- /input/grammar_define.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id int float ; $ 2 | VariableSymbols: P' P M D N S L 3 | 4 | @ P' → P 5 | @ P → M D 6 | @ M → ε 7 | offset = 0 8 | 9 | @ D → L id ; N D 10 | @ N → ε 11 | idx = attributes[-2]["entry"] 12 | self.symbol_table.table[idx].item_type = item_type_translate[attributes[-3]["type"]] 13 | self.gen_code(f"Alloc [{offset},{offset+4}] for {self.symbol_table.table[idx].name}") 14 | offset += 4 15 | 16 | @ D → ε 17 | @ L → int 18 | current_attribute["type"] = 'int' 19 | 20 | @ L → float 21 | current_attribute["type"] = 'float' -------------------------------------------------------------------------------- /input/grammar_expression.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id + * ( ) $ 2 | VariableSymbols: S E F T 3 | 4 | @ S → E 5 | @ E → E + T 6 | @ E → T 7 | @ T → T * F 8 | @ T → F 9 | @ F → ( E ) 10 | @ F → id -------------------------------------------------------------------------------- /input/grammar_raw.txt: -------------------------------------------------------------------------------- 1 | TerminalSymbols: id const int float if else while > < == = + - * / ( ) ; $ 2 | VariableSymbols: P' P D S L C E T F 3 | 4 | @ P' → P 5 | @ P → D S 6 | @ D → L id ; D 7 | @ D → ε 8 | @ L → int 9 | @ L → float 10 | @ S → S ; S 11 | @ S → ε 12 | @ S → id = E 13 | @ S → if ( C ) S 14 | @ S → if ( C ) S else S 15 | @ S → while ( C ) S 16 | @ C → E > E 17 | @ C → E < E 18 | @ C → E == E 19 | @ E → E + T 20 | @ E → E - T 21 | @ E → T 22 | @ T → F 23 | @ T → T * F 24 | @ T → T / F 25 | @ F → ( E ) 26 | @ F → id 27 | @ F → const -------------------------------------------------------------------------------- /input/input.txt: -------------------------------------------------------------------------------- 1 | int a; 2 | int b; 3 | float c; 4 | 5 | a = 2; 6 | b = 1; 7 | c = (a + b) / 2; 8 | 9 | if (a > (b + c)) 10 | if (c < 12) 11 | c = (a + b) * c - a / 2; 12 | 13 | while (a < b) 14 | a = b + c - 1; 15 | 16 | b = a; 17 | -------------------------------------------------------------------------------- /input/input_assign.txt: -------------------------------------------------------------------------------- 1 | a = b + 3; 2 | c = d - a; 3 | x = (a + b) * c - d / 2; -------------------------------------------------------------------------------- /input/input_control.txt: -------------------------------------------------------------------------------- 1 | if (a > b) 2 | a; 3 | while (a > b) 4 | b; 5 | c; 6 | -------------------------------------------------------------------------------- /input/input_define.txt: -------------------------------------------------------------------------------- 1 | int abc; 2 | float def; -------------------------------------------------------------------------------- /input/input_expression.txt: -------------------------------------------------------------------------------- 1 | (a*b)+c -------------------------------------------------------------------------------- /input/input_raw.txt: -------------------------------------------------------------------------------- 1 | int a; 2 | int b; 3 | float c; 4 | a=2; 5 | b=1; 6 | if(a>(b+c)) 7 | c=(a+b)/2; 8 | else 9 | c=a-b; 10 | while(c=b) 7 | c=a+b; 8 | else 9 | c=a-b; 10 | while(c ") 60 | print("") 61 | 62 | if input_string == "0": 63 | # Grammar 64 | console.print(f"Grammar:", style="bold") 65 | console.print(f"Terminal Symbols: {grammar.terminal_symbols}") 66 | console.print(f"Variable Symbols: {grammar.variable_symbols}") 67 | 68 | for production in grammar.production_list: 69 | console.print(f"{production.from_state} →", end="") 70 | 71 | for item in production.items: 72 | if item.is_symbol: 73 | console.print(f" {item.value}", style="bold red", end="") 74 | else: 75 | console.print(f" {item.value}", end="") 76 | 77 | console.print("") 78 | elif input_string == "1": 79 | # Input Code 80 | console.print(f"Input Code:", style="bold") 81 | print(code) 82 | elif input_string == "2": 83 | # Scanner States 84 | scanner.print_states() 85 | elif input_string == "3": 86 | # SLR States 87 | slr_automata.print_state() 88 | elif input_string == "4": 89 | # Token Table 90 | scanner.print_tokens() 91 | elif input_string == "5": 92 | # Symbol Table 93 | symbol_table.output() 94 | elif input_string == "6": 95 | # First Set 96 | slr_table.print_first_set() 97 | elif input_string == "7": 98 | # Follow Set 99 | slr_table.print_follow_set() 100 | elif input_string == "8": 101 | # Closure Set 102 | slr_table.print_closure_set() 103 | elif input_string == "9": 104 | # SLR Table (Action/Goto Table) 105 | print_slr_table(grammar) 106 | elif input_string == "10": 107 | # Output Code 108 | slr_automata.print_code() 109 | elif input_string == "q": 110 | # quit 111 | exit(0) 112 | else: 113 | console.print(f"Unknown input {input_string}!") 114 | -------------------------------------------------------------------------------- /test_grammar.py: -------------------------------------------------------------------------------- 1 | from rich import console 2 | from rich.console import Console 3 | 4 | from Grammar import Grammar 5 | 6 | console = Console() 7 | 8 | grammar = Grammar() 9 | grammar.read("input/grammar.txt") 10 | grammar.save() 11 | 12 | console.print(f"Terminal Symbols: {grammar.terminal_symbols}") 13 | console.print(f"Variable Symbols: {grammar.variable_symbols}") 14 | 15 | for production in grammar.production_list: 16 | console.print(f"{production.from_state} →", end="") 17 | 18 | for item in production.items: 19 | if item.is_symbol: 20 | console.print(f" {item.value}", style="bold red", end="") 21 | else: 22 | console.print(f" {item.value}", end="") 23 | 24 | console.print("") 25 | if production.code != "": 26 | console.print(production.code, end="\n\n") 27 | -------------------------------------------------------------------------------- /test_scanner.py: -------------------------------------------------------------------------------- 1 | from rich import box 2 | from rich.console import Console 3 | from rich.table import Table 4 | 5 | from Scanner import Scanner 6 | from Symbol_Table import Symbol_Table 7 | from Token import Token 8 | 9 | if __name__ == "__main__": 10 | # init output 11 | console = Console() 12 | symbol_table = Symbol_Table() 13 | 14 | # run scanner 15 | with open("input/input.txt", "r") as f: 16 | scanner = Scanner(f.read(), symbol_table) 17 | 18 | while scanner.has_next(): 19 | token: Token = scanner.get_next() 20 | 21 | # print results 22 | scanner.print_states() 23 | scanner.print_tokens() 24 | 25 | symbol_table.save() 26 | scanner.save() 27 | --------------------------------------------------------------------------------