├── .gitignore ├── .vscode └── ipch │ └── 90f59cb3920d6114 │ └── mmap_address.bin ├── LR.py ├── README.md ├── generate.py ├── get_predict_table.py ├── lexer.py ├── other ├── 99mul.png ├── __pycache__ │ ├── function.cpython-37.pyc │ └── wenfa.cpython-37.pyc ├── function.py ├── help.png ├── parser.py └── pcc-o.png ├── pcc.py ├── test ├── 99mul ├── 99mul.c ├── 99mul.s ├── fibonacci ├── fibonacci.c ├── fibonacci.s ├── print ├── print.c ├── print.s ├── test ├── test.c └── test.s └── to_asm.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode -------------------------------------------------------------------------------- /.vscode/ipch/90f59cb3920d6114/mmap_address.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/.vscode/ipch/90f59cb3920d6114/mmap_address.bin -------------------------------------------------------------------------------- /LR.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用非递归的预测分析表做语法分析————语法树生成 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | 8 | from get_predict_table import creat_predict_table 9 | import re 10 | from lexer import word_list 11 | 12 | predict_table = creat_predict_table() 13 | 14 | # 语法树节点 15 | class Node: 16 | def __init__(self, Type, text=None): 17 | self.type = Type 18 | self.text = text 19 | self.child = list() 20 | # 将语法树对象字符化输出 21 | def __str__(self): 22 | childs = list() 23 | for child in self.child: 24 | childs.append(child.__str__()) 25 | out = "<{type}, {text}>".format(type=self.type, text=self.text) 26 | for child in childs: 27 | if child: 28 | for line in child.split("\n"): 29 | out = out + "\n " + line 30 | return out 31 | 32 | def __repr__(self): 33 | return self.__str__() 34 | 35 | # 输出栈中节点的type 36 | def stack_text(stack): 37 | ss = [] 38 | for s in stack: 39 | ss.append(s.type) 40 | return ss 41 | 42 | def analysis(word_table, show=False): 43 | stack = [] 44 | root = Node("Program") 45 | End = Node("#") 46 | stack.append(End) 47 | stack.append(root) 48 | index = 0 49 | """ 50 | 分析预测表的三个状态 51 | 1. cur = # 解析完成 52 | 2. cur = w 输入的字符表与符号栈中节点匹配 53 | 3. cur 为非终结符,继续生成子节点 54 | 4. error 55 | """ 56 | while len(stack) != 0: 57 | cur = stack.pop() 58 | # 状态 1 59 | if cur.type == "#" and len(stack) == 0: 60 | print("分析完成!") 61 | return [True, root] 62 | # 状态 2 63 | elif cur.type == word_table[index]['type']: 64 | if show: 65 | print("符号栈:", stack_text(stack), "\n匹配字符: ", word_table[index]['word']) 66 | cur.text = word_table[index]['word'] 67 | index += 1 68 | # 状态 3 69 | else: 70 | w = word_table[index]['type'] 71 | if w in predict_table[cur.type]: 72 | if predict_table[cur.type][w] == "null": 73 | continue 74 | next_pr = predict_table[cur.type][w].split() 75 | if show: 76 | print("\n符号栈:", stack_text(stack), "\n产生式: ", cur.type,"->", predict_table[cur.type][w]) 77 | node_list = [] 78 | """ 79 | 产生式右部符号入栈 80 | 子节点入栈 81 | 注意:子节点入栈顺序应该与产生式符号相反 82 | """ 83 | for np in next_pr: 84 | node_list.append(Node(np)) 85 | for nl in node_list: 86 | cur.child.append(nl) 87 | node_list.reverse() 88 | for nl in node_list: 89 | stack.append(nl) 90 | # 状态 4 错误 91 | else: 92 | print("error", stack, cur.type , word_table[index]['type']) 93 | return [False] 94 | 95 | if __name__ == "__main__": 96 | w_list = word_list("./test/test.c") 97 | word_table = w_list.word_list 98 | root = analysis(word_table, True) 99 | if root[0]: 100 | print("\n\n是否继续打印语法树?\t1.打印 \t2.任意键退出\tTip:运行generate.py输出中间代码(四元式)\n请输入") 101 | if input() == "1": 102 | print(root[1]) 103 | print("\n\n语法树打印完成!运行 genenrate.py 生成四元式\n\n") 104 | # print(root[1]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## PCC——python实现编译器 2 | 3 | 编译原理课设,实现源码到汇编代码的翻译,链接部分使用gcc的功能 4 | 5 | ## 源码说明 6 | 7 | 1. lexer.py 词法分析器 8 | 2. get_predict_table.py 生成预测分析表 9 | 3. LR.py 非递归的语法分析器 10 | 4. generate.py 中间代码生成 11 | 5. to_asm.py 汇编代码生成 12 | 6. pcc.py 入口函数 13 | 14 | ## 使用 15 | 16 | ```python 17 | $ python pcc.py 18 | ``` 19 | 20 | **命令说明** 21 | 22 | ``` 23 | pcc -o (filename) 直接编译生成可执行程序 24 | pcc -s (filename) 生成汇编源码 25 | pcc -t (filename) 查看语法树生成过程 26 | pcc -l (filename) 查看词法分析 27 | pcc -h 查看帮助 28 | pcc -p 查看本编译器的预测分析表 29 | pcc -g 查看本编译器的语法推导 30 | exit 退出 31 | ``` 32 | 33 | ![](./other/help.png) 34 | 35 | ### 编译代码 36 | 37 | ``` 38 | pcc -o ./test/test.c 39 | ``` 40 | 41 | #### 支持的语法 42 | 43 | 1. 声明语句和赋值语句 44 | 45 | 例1: 46 | ```c 47 | int a; 48 | a = 10; 49 | int b = a; 50 | ``` 51 | 52 | 2. 混合四则运算 53 | 例2: 54 | ```c 55 | int a = 1 + 2*(3 - 4); 56 | ``` 57 | 58 | 3. 数组 59 | 例3: 60 | ```c 61 | int arr[10]; 62 | a[0] = 1; 63 | int b = a[0]; 64 | ``` 65 | 数组下标,即[]中内容也可以用表达式嵌套 66 | 例4: 67 | ```c 68 | int index = 5; 69 | arr[index] = 6; 70 | arr[arr[(index+1)*2-1]] = 7; 71 | ``` 72 | 73 | 4. 输出语句 74 | 目前printf语句的参数最多可以带三个参数 75 | 76 | 例子5 77 | ```c 78 | printf("这是个不带参数的printf"); 79 | printf("这是个参数%d",a); 80 | printf("三个参数:%d,%d,%d",a,b,c); 81 | 82 | char c="T"; 83 | printf("字符型参数:%c",c); 84 | ``` 85 | printf语句的参数也可以是表达式 86 | 87 | 例子6 88 | ```c 89 | printf("%d", d + 2*(3+4)); 90 | ``` 91 | 92 | 5. 控制语句 93 | 94 | 目前仅支持if判断,可嵌套使用 95 | 例子7 96 | ```c 97 | if(a < 2*10){ 98 | a = a + 1; 99 | if(c < a){ 100 | printf("%d", a*c); 101 | } 102 | } 103 | ``` 104 | 105 | 6. while控制语句 106 | 107 | 这个不多说,也是可嵌套 108 | ```c 109 | int i = 1; 110 | printf("正在由pcc编译器为你打印99乘法表!\n"); 111 | while(i < 10){ 112 | int j = i; 113 | while(j < 10){ 114 | printf("%d*%d=%d\t",i, j, i*j); 115 | j = j + 1; 116 | } 117 | printf("\n"); 118 | i = i +1; 119 | } 120 | ``` 121 | 122 | #### 举个栗子(打印99乘法表) 123 | 124 | ![](./other/99mul.png) 125 | 126 | **源demo** 127 | ```c 128 | int main(){ 129 | // 打印99乘法表 130 | // int a[10]; 131 | int i = 1; 132 | printf("正在由pcc编译器为你打印99乘法表!\n"); 133 | while(i < 10){ 134 | int j = i; 135 | while(j < 10){ 136 | printf("%d*%d=%d\t",i, j, i*j); 137 | j = j + 1; 138 | } 139 | printf("\n"); 140 | i = i +1; 141 | } 142 | } 143 | ``` 144 | 145 | #### 举个栗子(打印斐波那契数列) 146 | 147 | ![](./other/pcc-o.png) 148 | 149 | **c语言源码** 150 | 151 | ```c 152 | 153 | // 兰州小红鸡的注释测试 154 | int main(){ 155 | int arr[25]; 156 | int index = 0; 157 | // 求0~20的斐波那契数列 158 | arr[0] = 1; 159 | arr[1] = 2; 160 | arr[2] = 3; 161 | while(index < 10*2 ){ 162 | int b = arr[index]; 163 | arr[index+2]=arr[index+1] + b; 164 | printf("f(%d)=%d\n",index,b); 165 | index = index +1; 166 | } 167 | printf("完成斐波那契数列打印!由小鸡编译器提供——pcc\n"); 168 | } 169 | ``` 170 | 171 | **生成的中间代码(四元式)** 172 | 173 | ``` 174 | (=,0,0,index) 175 | (=,1,0,arr[]0) 176 | (=,2,0,arr[]1) 177 | (=,3,0,arr[]2) 178 | (code_block,0,0,W4) 179 | (j<,index,20,code6) 180 | (j,0,0,block6) 181 | (code_block,0,0,code6) 182 | (=,arr[]index,0,b) 183 | (+,index,1,T0) 184 | (+,arr[]T0,b,T1) 185 | (+,index,2,T2) 186 | (=,T1,0,arr[]T2) 187 | (print,index,b,-1) 188 | (+,index,1,T3) 189 | (=,T3,0,index) 190 | (j,0,0,W4) 191 | (code_block,0,0,block6) 192 | (print,-1,-1,-1) 193 | ``` 194 | 195 | **生成的汇编代码** 196 | 197 | ```s 198 | .text 199 | .section .rodata 200 | .comm T0,4,4 201 | .comm T1,4,4 202 | .comm T2,4,4 203 | .comm T3,4,4 204 | .LC0: 205 | .string "f(%d)=%d\n" 206 | .LC1: 207 | .string "完成斐波那契数列打印!由小鸡编译器提供——pcc\n" 208 | .text 209 | .globl main 210 | .type main, @function 211 | main: 212 | 213 | .cfi_startproc 214 | pushq %rbp 215 | .cfi_def_cfa_offset 16 216 | .cfi_offset 6, -16 217 | movq %rsp, %rbp 218 | .cfi_def_cfa_register 6 219 | subq $120, %rsp 220 | movl $0, -8(%rbp) 221 | movl $0, -12(%rbp) 222 | movl $1, -112(%rbp) 223 | movl $2, -108(%rbp) 224 | movl $3, -104(%rbp) 225 | .W5: 226 | movl -12(%rbp), %eax 227 | cmpl $20, %eax 228 | jle .code7 229 | jmp .block7 230 | .code7: 231 | movl -12(%rbp), %eax 232 | cltq 233 | movl -112(%rbp, %rax, 4), %ecx 234 | movl %ecx, -8(%rbp) 235 | movl -12(%rbp), %edx 236 | movl $1, %eax 237 | addl %edx, %eax 238 | movl %eax, T0(%rip) 239 | movl T0(%rip), %eax 240 | cltq 241 | movl -112(%rbp, %rax, 4), %edx 242 | movl -8(%rbp), %eax 243 | addl %edx, %eax 244 | movl %eax, T1(%rip) 245 | movl -12(%rbp), %edx 246 | movl $2, %eax 247 | addl %edx, %eax 248 | movl %eax, T2(%rip) 249 | movl T2(%rip), %eax 250 | cltq 251 | movl T1(%rip), %ecx 252 | movl %ecx, -112(%rbp, %rax, 4) 253 | movl -12(%rbp), %eax 254 | movl -8(%rbp), %edx 255 | movl %eax, %esi 256 | leaq .LC0(%rip), %rdi 257 | movl $0, %eax 258 | call printf@PLT 259 | movl -12(%rbp), %edx 260 | movl $1, %eax 261 | addl %edx, %eax 262 | movl %eax, T3(%rip) 263 | movl T3(%rip), %ecx 264 | movl %ecx, -12(%rbp) 265 | jmp .W5 266 | .block7: 267 | movl %eax, %esi 268 | leaq .LC1(%rip), %rdi 269 | movl $0, %eax 270 | call printf@PLT 271 | 272 | movl $0, %eax 273 | leave 274 | .cfi_def_cfa 7, 8 275 | ret 276 | .cfi_endproc 277 | .LFE6: 278 | .size main, .-main 279 | .ident "PCC: 1.0.0" 280 | 281 | ``` 282 | 283 | 其他命令自行发觉hhh -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | """ 2 | 语义分析:中间代码产生——四元式 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | # from parser import Node,build_ast 8 | from other.function import if_num 9 | from LR import analysis 10 | import sys, os, re 11 | sys.path.append(os.pardir) 12 | from lexer import word_list 13 | 14 | operator = { 15 | "+": lambda a, b: a+b, 16 | "-": lambda a, b: a-b, 17 | "*": lambda a, b: a*b, 18 | "/": lambda a, b: a/b 19 | } 20 | 21 | """ 22 | 四元式对象 23 | 成员: op,arg1,arg2,result 分别对于操作数,两个变量,结果 24 | 特殊的自定义四元式语法: 25 | 1. (code_block, 0, 0, block1) 代码块开始标记 26 | 2. (j, 0, 0, , +2) 跳转语句,往后跳两行 27 | 3. (j<, a, b, block1) 条件跳转 if(a 1: 110 | op = Mnode(math_op(root.child[0])) 111 | arg1 = math_op(root.child[1]) 112 | if if_num(arg1) and if_num(ft): 113 | return str(operator[op](int(arg1), int(ft))) 114 | 115 | """ 116 | 临时变量Tn 117 | ft 为父节点传入的操作符左边部分临时id 118 | """ 119 | t = "T" + str(tmp) 120 | tmp += 1 121 | mid_result.append(Mnode(op, arg1, ft,t)) 122 | ct = math_op(root.child[2], t) 123 | if ct != None: 124 | return ct 125 | return t 126 | 127 | elif root.type == "E" or root.type == "T": 128 | """ 129 | 赋值语句处理 130 | 如果存在右递归,进行四则运算的解析 131 | 不存在右递归的话直接赋值 132 | """ 133 | if len(root.child[1].child) > 1: 134 | op = math_op(root.child[1].child[0]) 135 | arg1 = math_op(root.child[0]) 136 | arg2 = math_op(root.child[1].child[1]) 137 | """静态的计算提前算好""" 138 | if if_num(arg1) and if_num(arg2): 139 | return str(operator[op](int(arg1), int(arg2))) 140 | 141 | t = "T" + str(tmp) 142 | tmp += 1 143 | mid_result.append(Mnode(op, arg1, arg2,t)) 144 | ct = math_op(root.child[1].child[2], t) 145 | if ct != None: 146 | return ct 147 | return t 148 | else: 149 | return math_op(root.child[0]) 150 | elif root.type == "F" and len(root.child) == 2: 151 | c = root.child 152 | if c[1].child != [] and c[1].child[0].type == "Size": 153 | return c[0].child[0].text + "[]" + math_op(c[1]) 154 | else: 155 | return c[0].child[0].text 156 | 157 | else: 158 | re = "" 159 | for c in root.child: 160 | cre = math_op(c) 161 | if cre != None and cre not in "[]}{)(\"'": 162 | re = cre 163 | return re 164 | 165 | 166 | """ 167 | 控制语句的程序块处理 168 | 可处理语句: 169 | 1. if语句 170 | 2. while语句 171 | 3. if和while的相互嵌套语句 172 | """ 173 | def judge(root): 174 | if root == None: 175 | return 176 | elif len(root.child) == 0 and root.text != None: 177 | return root.text 178 | if root.type == "Ptype": 179 | if root.child[0].text == "if": 180 | while_flag.append([False]) 181 | else: 182 | """ 183 | 对whilie语句进行代码块标记,方便跳转 184 | """ 185 | cur = len(mid_result) 186 | while_flag.append([True,cur]) 187 | mid_result.append(Mnode("code_block", 0, 0, "W" + str(cur))) 188 | if root.type == "Pbc": 189 | """ 190 | 判断语句括号中的的两种情况 191 | 1. (E) 192 | 2. (E1 cmp E2) 193 | """ 194 | Pm = root.child[1].child 195 | if len(Pm) == 1: 196 | mid_result.append(Mnode("j=", 1, math_op(root.child[0]),"code"+str(len(mid_result)+1))) 197 | else: 198 | mid_result.append(Mnode("j"+judge(Pm[0]), math_op(root.child[0]), math_op(Pm[1]),"code"+str(len(mid_result)+1))) 199 | return 200 | if root.type == "Pro": 201 | """ 202 | 控制语句的代码块前后做标记 203 | 判断标记 204 | 跳转->结束标记 205 | { 206 | code 207 | } 208 | while跳转->判断标记 209 | 结束标记 210 | """ 211 | w = while_flag.pop() 212 | code_block = len(mid_result) 213 | code = "block" + str(code_block) 214 | mid_result.append(Mnode("j",0, 0,code)) 215 | mid_result.append(Mnode("code_block",0,0,"code"+str(code_block))) 216 | view_astree(root) 217 | if w[0] == True: 218 | mid_result.append(Mnode("j",0,0,"W"+str(w[1]))) 219 | mid_result.append(Mnode("code_block",0,0,code)) 220 | code_block += 1 221 | return 222 | else: 223 | re = "" 224 | for c in root.child: 225 | cre = judge(c) 226 | if cre != None and cre not in "[]}{)(\"'": 227 | re = cre 228 | return re 229 | 230 | 231 | """ 232 | 输出处理 233 | 可处理语句:printf(a,b) 该语法:在括号内只能传入变量参数 234 | """ 235 | def out(root): 236 | if root == None: 237 | return 238 | elif root.type == "V": 239 | if len(root.child) <= 1: 240 | mid_result.append(Mnode("print", '-1', '-1', '-1')) 241 | return 242 | else: 243 | name = [math_op(root.child[1])] 244 | V = root.child[2] 245 | while len(V.child) > 1: 246 | name.append(math_op(V.child[1])) 247 | V = V.child[2] 248 | name.extend(['-1','-1','-1']) 249 | mid_result.append(Mnode("print", name[0], name[1], name[2])) 250 | else: 251 | for c in root.child: 252 | out(c) 253 | 254 | def creat_mcode(filename): 255 | global tmp 256 | global mid_result 257 | global arr 258 | arr = {} 259 | tmp = 0 260 | mid_result = [] 261 | w_list = word_list(filename) 262 | word_table = w_list.word_list 263 | string_list = w_list.string_list 264 | root = analysis(word_table)[1] 265 | view_astree(root) 266 | 267 | return {"name_list":w_list.name_list, "mid_code":mid_result, "tmp":tmp, "strings":string_list, "arrs":arr} 268 | 269 | if __name__ == "__main__": 270 | filename = 'test/test.c' 271 | creat_mcode(filename) 272 | for r in mid_result: 273 | print(r) 274 | -------------------------------------------------------------------------------- /get_predict_table.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用非递归的预测分析表做语法分析————预测分析表的生成 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | import sys, os, re 8 | sys.path.append(os.pardir) 9 | from lexer import word_list,k_list 10 | 11 | grammars = { 12 | "Program":["type M C Pro"], 13 | "C":["( cc )"], 14 | "cc":["null"], 15 | 16 | "Pro":["{ Pr }"], 17 | "Pr":["P Pr", "null"], 18 | "P":["type L ;", "L ;", "printf OUT ;", "Pan"], 19 | 20 | "L":["M LM"], 21 | "LM":["= FE", "Size AM","null"], 22 | "FE":["E", "TEXT", "CHAR"], 23 | "M":["name"], 24 | 25 | "E":["T ET"], 26 | "ET":["+ T ET", "- T ET", "null"], 27 | "T":["F TT"], 28 | "TT":["* F TT", "/ F TT", "null"], 29 | "F":["number", "BRA", "M MS"], 30 | "MS":["Size", "null"], 31 | "BRA": ["( E )"], 32 | 33 | "OUT":["( TXT V )"], 34 | "TXT":['TEXT'], 35 | "V":[", E VV", "null"], 36 | "VV":[", E VV", "null"], 37 | 38 | "Pan":["Ptype P_block Pro"], 39 | "Ptype":["if", "while"], 40 | "P_block":["( Pbc )"], 41 | "Pbc":["E PM"], 42 | "PM":["Cmp E", "null"], 43 | 44 | "Size":["[ E ]"], 45 | "AM":["= E", "null"] 46 | } 47 | 48 | first_table = {} 49 | follow_table = {} 50 | predict_table = {} 51 | observer = {} 52 | 53 | """ 54 | 初始化订阅者 55 | 订阅者: 用于求follow集合的过程中特殊情况: 56 | 非终结符的后继非终结符的first集合可能存在null 57 | eg: A -> BC C -> D | null D -> (A) | i 58 | 那么在一次遍历过程中,因为C的first集合存在null,所以需要将follow(A)加入follow(B) 59 | (重点)但是!此时的follow(A),并不是完整的,它可能在后续的遍历中会继续更新自身的follow集合 60 | 所以此时会出现遗漏的follow 61 | 所以我在这里用到一个订阅者模式 62 | 订阅者为一个字典,字典键值为产生式左部,字典内容为产生式右部 63 | """ 64 | def init_observer(): 65 | for k in grammars: 66 | follow_table[k] = [] 67 | observer[k] = [] 68 | for next_grammar in grammars[k]: 69 | last_k = next_grammar.split()[-1] 70 | if last_k in grammars and last_k != k: 71 | observer[k].append(last_k) 72 | """ 73 | 刷新订阅 74 | 检测到某个follow集合更新时,对其订阅的所有产生式左部的follow集合进行更新 75 | 简而言之:follow(A)发生了更新,那么曾经将follow(A)加入自身的B,C也更新其follow 76 | 并且,这是一个递归过程 77 | """ 78 | def refresh(k): 79 | for lk in observer[k]: 80 | newlk = U(follow_table[k], follow_table[lk]) 81 | if newlk != follow_table[lk]: 82 | follow_table[lk] = newlk 83 | refresh(lk) 84 | 85 | """ 86 | 合并两个list并且去重 87 | """ 88 | def U(A,B): 89 | return list(set(A+B)) 90 | 91 | """ 92 | 查找指定非终结符的first集合 93 | """ 94 | def find_first(key): 95 | if key not in grammars: 96 | return [key] 97 | l = [] 98 | for next_grammar in grammars[key]: 99 | next_k = next_grammar.split()[0] 100 | l.extend(find_first(next_k)) 101 | return l 102 | 103 | """ 104 | 查找所有非终结符follow 105 | """ 106 | def find_follow(): 107 | init_observer() 108 | follow_table["Program"] = ["#"] 109 | for k in grammars: 110 | for next_grammar in grammars[k]: 111 | next_k = next_grammar.split() 112 | 113 | for i in range(0,len(next_k)-1): 114 | if next_k[i] in grammars: 115 | if next_k[i+1] not in grammars: 116 | """ 117 | 如果后继字符不是终结符,加入 118 | """ 119 | new_follow = U([next_k[i+1]], follow_table[next_k[i]]) 120 | if new_follow != follow_table[next_k[i]]: 121 | follow_table[next_k[i]] = new_follow 122 | refresh(next_k[i]) 123 | else: 124 | new_follow = U(first_table[next_k[i+1]], follow_table[next_k[i]]) 125 | """ 126 | 如果后继字符的first集合中含有null,通知所有订阅者更新follow集合 127 | """ 128 | if "null" in first_table[next_k[i+1]]: 129 | new_follow = U(follow_table[k], new_follow) 130 | observer[k].append(next_k[i]) 131 | if new_follow != follow_table[next_k[i]]: 132 | follow_table[next_k[i]] = new_follow 133 | refresh(next_k[i]) 134 | """ 135 | 产生式左部的follow集合加入最后一个非终结符的follow集合 136 | """ 137 | if next_k[-1] in grammars: 138 | if next_k[-1] not in follow_table: 139 | follow_table[next_k[-1]] = [] 140 | if next_k[-1] != k: 141 | follow_table[next_k[-1]] = U(follow_table[next_k[-1]], follow_table[k]) 142 | 143 | for k in follow_table: 144 | if "null" in follow_table[k]: 145 | follow_table[k].remove("null") 146 | 147 | """ 148 | 获取所有非终结符的first集合 149 | 在此同时直接将first集合加入predict表中 150 | """ 151 | def get_first_table(): 152 | for k in grammars: 153 | predict_table[k] = {} 154 | first_table[k] = [] 155 | for next_grammar in grammars[k]: 156 | next_k = next_grammar.split()[0] 157 | kl = find_first(next_k) 158 | first_table[k].extend(kl) 159 | for kk in kl: 160 | if kk != "null": 161 | predict_table[k][kk] = next_grammar 162 | 163 | """ 164 | 将follow集合中的部分内容加入predict表中 165 | """ 166 | def get_predict_table(): 167 | for k in grammars: 168 | for next_grammar in grammars[k]: 169 | next_k = next_grammar.split()[0] 170 | if next_k in grammars and "null" in first_table[next_k] or next_k == "null": 171 | for fk in follow_table[k]: 172 | predict_table[k][fk] = next_grammar 173 | 174 | 175 | def creat_predict_table(): 176 | get_first_table() 177 | find_follow() 178 | get_predict_table() 179 | return predict_table 180 | 181 | def show_tables(): 182 | get_first_table() 183 | find_follow() 184 | get_predict_table() 185 | print("\nfirst集合如下\n") 186 | for k in first_table: 187 | print(k, first_table[k]) 188 | print("\nfollow集合如下\n") 189 | for k in follow_table: 190 | print(k, follow_table[k]) 191 | # print(first_table) 192 | print("\n预测表如下\n") 193 | for k in predict_table: 194 | print(k, predict_table[k]) 195 | 196 | if __name__ == "__main__": 197 | show_tables() -------------------------------------------------------------------------------- /lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境:python3.6 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | 8 | import re 9 | # 一些判断函数和字符分割函数放在同级文件function.py中 10 | import sys, os 11 | sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 12 | from other.function import if_num, if_name, have_name, printf, get_word 13 | 14 | # 运算符表 15 | y_list = {"+","-","*","/","<","<=",">",">=","=","==","!=","^",",","&","&&","|","||","%","~","<<",">>","!"} 16 | # 分隔符表 17 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"} 18 | # 关键字表 19 | k_list = { 20 | "auto", "break", "case", "const", "continue","default", "do", "else", "enum", "extern", 21 | "for", "goto", "if", "register", "return", "short", "signed", "sizeof", "static", 22 | "struct", "switch", "typedef", "union", "volatile", "while", "printf" 23 | } 24 | 25 | Cmp = ["<", ">", "==", "!=", ">=", "<="] 26 | 27 | Type = {"int","float","char","double","void","long","unsigned","string"} 28 | type_flag = "" 29 | # 括号配对判断 30 | kuo_cp = {'{':'}', '[':']', '(':')'} 31 | 32 | # 词法分析器输出对象 33 | # 成员变量:输出的单词表,源代码中的分隔符表,运算符表,变量表,关键字表 34 | # 一个方法,将源代码字符切割并存入对应表中 35 | # 对象创建实例需要传入filename参数,默认为test.c 36 | class word_list(): 37 | def __init__(self, filename='test.c'): 38 | self.word_list = [] # 输出单词列表 39 | self.separator_list = [] # 分隔符 40 | self.operator_list = [] # 运算符 41 | self.name_list = [] # 变量 42 | self.key_word_table = [] # 关键字 43 | self.string_list = [] 44 | self.flag = True # 源代码是否正确标识 45 | 46 | # get_word函数将源代码切割 47 | self.creat_table(get_word(filename)) 48 | 49 | # 创建各个表 50 | def creat_table(self, in_words): 51 | name_id = 0 52 | kuo_list = [] # 存储括号并判断是否完整匹配 53 | char_flag = False 54 | str_flag = False 55 | string_list = [] 56 | strings = "" 57 | chars = "" 58 | for word in in_words: 59 | w = word['word'] 60 | line = word['line'] 61 | if w == '"': 62 | if str_flag == False: 63 | str_flag = True 64 | else: 65 | str_flag = False 66 | self.word_list.append({'line':line, 'type':'TEXT', 'word':strings}) 67 | self.string_list.append(strings) 68 | strings = "" 69 | # self.word_list.append({'line':line, 'type':w, 'word':w}) 70 | continue 71 | # 判断是否为字符串 72 | if str_flag == True: 73 | strings += w 74 | continue 75 | if w == "'": 76 | if char_flag == False: 77 | char_flag = True 78 | else: 79 | char_flag = False 80 | self.word_list.append({'line':line, 'type':'CHAR', 'word':chars}) 81 | chars = "" 82 | continue 83 | if char_flag == True: 84 | chars += w 85 | continue 86 | # 判断为关键字 87 | if w in k_list: 88 | self.key_word_table.append({'line':line, 'type':'keyword', 'word':w}) 89 | self.word_list.append({'line':line, 'type':w, 'word':w}) 90 | elif w in Cmp: 91 | self.word_list.append({'line':line, 'type':"Cmp", 'word':w}) 92 | # 判断为关键字 93 | elif w in Type: 94 | type_flag = w 95 | self.key_word_table.append({'line':line, 'type':'type', 'word':w}) 96 | self.word_list.append({'line':line, 'type':'type', 'word':w}) 97 | # 判断为运算符 98 | elif w in y_list: 99 | self.operator_list.append({'line':line, 'type':'operator', 'word':w}) 100 | self.word_list.append({'line':line, 'type':w, 'word':w}) 101 | # 判断为分隔符 102 | elif w in f_list: 103 | if w in kuo_cp.values() or w in kuo_cp.keys(): 104 | # 左括号入栈 105 | if w in kuo_cp.keys(): 106 | kuo_list.append({'kuo':w, 'line':line}) 107 | # 右括号判断是否匹配并出栈 108 | elif w == kuo_cp[kuo_list[-1]['kuo']]: 109 | kuo_list.pop() 110 | else: 111 | print("小金提醒:在第" + str(line) + "行的' " + w + " '无法匹配,无法通过编译,请检查代码正确性!") 112 | self.flag = False 113 | return 114 | self.separator_list.append({'line':line, 'type':'separator', 'word':w}) 115 | self.word_list.append({'line':line, 'type':w, 'word':w}) 116 | # 其他字符处理 117 | else: 118 | if if_num(w): 119 | self.word_list.append({'line':line, 'type':'number', 'word':w}) 120 | # 如果是变量名要判断是否已经存在 121 | elif if_name(w): 122 | if have_name(self.name_list,w): 123 | self.word_list.append({'line':line, 'type':'name', 'word':w, 'id':name_id}) 124 | else: 125 | self.name_list.append({'line':line, 'id':name_id, 'word':0.0, 'name':w, 'flag':type_flag}) 126 | self.word_list.append({'line':line, 'type':'name', 'word':w, 'id':name_id}) 127 | name_id += 1 128 | else: 129 | print("小金提醒:在第" + str(line) + "行的变量名' " + w + " '不可识别,无法通过编译,请检查代码正确性!") 130 | self.flag = False 131 | return 132 | if kuo_list!=[]: 133 | print("小金提醒:在第" + str(kuo_list[0]['line']) + "行的' " + kuo_list[0]['kuo'] + " '无法匹配,无法通过编译,请检查代码正确性!") 134 | self.flag = False 135 | return 136 | 137 | if __name__ == '__main__': 138 | 139 | # 写了三个测试的c语言文件在同级目录 140 | # 其中test.c是正常的代码 141 | # error1.c和error2.c是错误的测试代码 142 | 143 | filename = input("请输入要编译的.c文件:") 144 | if filename == '': 145 | filename = 'test/test.c' 146 | w_list = word_list(filename) 147 | if w_list.flag: 148 | print("\n输出字符串如下") 149 | printf(w_list.word_list) 150 | print("\n\n输出变量表如下\n") 151 | printf(w_list.name_list) -------------------------------------------------------------------------------- /other/99mul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/99mul.png -------------------------------------------------------------------------------- /other/__pycache__/function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/__pycache__/function.cpython-37.pyc -------------------------------------------------------------------------------- /other/__pycache__/wenfa.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/__pycache__/wenfa.cpython-37.pyc -------------------------------------------------------------------------------- /other/function.py: -------------------------------------------------------------------------------- 1 | # 环境:python3.6 2 | # 编译原理——词法分析器 3 | # 刘金明——320160939811 4 | import re 5 | 6 | # 运算符表 7 | y_list = {"+","-","*","/","<","<=",">",">=","=","==","!=","^",",","&","&&","|","||","%","~","<<",">>","!"} 8 | # 分隔符表 9 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"} 10 | # 关键字表 11 | k_list = { 12 | "auto", "break", "case", "char", "const", "continue","default", "do", "double", "else", "enum", "extern", 13 | "float", "for", "goto", "if", "int", "long","register", "return", "short", "signed", "sizeof", "static", 14 | "struct", "switch", "typedef", "union", "unsigned", "void","volatile", "while", "printf" 15 | } 16 | 17 | Cmp = ["<", ">", "==", "!=", "<=", ">="] 18 | 19 | # 正则表达式判断是否为数字 20 | def if_num(int_word): 21 | if re.match("^([0-9]{1,}[.][0-9]*)$",int_word) or re.match("^([0-9]{1,})$",int_word) == None: 22 | return False 23 | else: 24 | return True 25 | 26 | # 判断是否为为变量名 27 | def if_name(int_word): 28 | if re.match("[a-zA-Z_][a-zA-Z0-9_]*",int_word) == None: 29 | return False 30 | else: 31 | return True 32 | 33 | # 判断是否为终结符 34 | # def END_STATE(int_word): 35 | # if 36 | 37 | # 判断变量名是否已存在 38 | def have_name(name_list,name): 39 | for n in name_list: 40 | if name == n['name']: 41 | return True 42 | return False 43 | 44 | # list的换行输出 45 | def printf(lists): 46 | for l in lists: 47 | print(l) 48 | 49 | # 分割并获取文本单词 50 | # 返回值为列表out_words 51 | # 列表元素{'word':ws, 'line':line_num}分别对应单词与所在行号 52 | def get_word(filename): 53 | global f_list 54 | out_words = [] 55 | f = open(filename,'r+',encoding='UTF-8') 56 | # 先逐行读取,并记录行号 57 | lines = f.readlines() 58 | line_num = 1 59 | # 判断是否含有注释块的标识 60 | pass_block = False 61 | for line in lines: 62 | words = list(line.split()) 63 | for w in words: 64 | # 去除注释 65 | if '*/' in w: 66 | pass_block = False 67 | continue 68 | if '//' in w or pass_block: 69 | break 70 | if '/*' in w: 71 | pass_block = True 72 | break 73 | # 分析单词 74 | if w in Cmp: 75 | out_words.append({'word':w, 'line':line_num}) 76 | continue 77 | ws = w 78 | for a in w: 79 | if a in f_list or a in y_list: 80 | # index为分隔符的位置,将被分隔符或运算符隔开的单词提取 81 | index = ws.find(a) 82 | if index!=0: 83 | # 存储单词与该单词的所在行号,方便报错定位 84 | out_words.append({'word':ws[0:index], 'line':line_num}) 85 | ws = ws[index+1:] 86 | out_words.append({'word':a, 'line':line_num}) 87 | if ws!='': 88 | out_words.append({'word':ws, 'line':line_num}) 89 | line_num += 1 90 | return out_words -------------------------------------------------------------------------------- /other/help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/help.png -------------------------------------------------------------------------------- /other/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | 语法分析:使用递归的自上而下方式 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | import sys, os, re 8 | sys.path.append(os.pardir) 9 | from lexer import word_list,k_list 10 | 11 | """ 12 | Expr -> Term ExprTail 13 | ExprTail -> + Term ExprTail 14 | | - Term ExprTail 15 | | null 16 | 17 | Term -> Factor TermTail 18 | TermTail -> * Factor TermTail 19 | | / Factor TermTail 20 | | null 21 | 22 | Factor -> (Expr) 23 | | num 24 | """ 25 | grammars = { 26 | "Program":["type M C Pro"], 27 | "C":["( cc )"], 28 | "cc":["null"], 29 | "Pro":["{ Pr }"], 30 | "Pr":["P ; Pr", "null"], 31 | "P":["type L", "L","printf OUT"], 32 | "L":["M LM"], 33 | "LM":["= E", "null"], 34 | "M":["name"], 35 | "E":["T ET"], 36 | "ET":["+ T ET", "- T ET", "null"], 37 | "T":["F TT"], 38 | "TT":["* F TT", "/ F TT", "null"], 39 | "F":["number", "BRA"], 40 | "BRA": ["( E )"], 41 | "OUT":["( \" TEXT \" , V )"], 42 | "V":["name VV", "null"], 43 | "VV":[", name VV", "null"], 44 | "END_STATE": r"(null)|(number)|(name)|(type)|(operator)|(printf)|(separator)|(TEXT)|[+\-*/=;,\")({}]" 45 | } 46 | 47 | 48 | # 运算符表 49 | operator = {"+","-","*","/","="} 50 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"} 51 | k_list = {"int", "main"} 52 | 53 | 54 | def build_ast(tokens): 55 | root = Node("Program") 56 | # 建立根节点,自上而下分析 57 | offset = root.build_ast(tokens, token_index=0) 58 | if offset == len(tokens): 59 | return root 60 | else: 61 | raise ValueError("Error Grammar4") 62 | 63 | 64 | class Node: 65 | def match_token(self, token): 66 | token_type = token['type'] 67 | token_word = token['word'] 68 | if self.type == "null" or self.type == token_type or self.type == token_word: 69 | return True 70 | return False 71 | def __init__(self, type): 72 | self.type = type 73 | self.text = None 74 | self.child = list() 75 | def build_ast(self, tokens: list, token_index=0): 76 | # 判断是否遇到终结符 77 | if re.match(grammars["END_STATE"], self.type): 78 | if self.type != "null": 79 | if token_index >= len(tokens): 80 | raise ValueError("Error Grammar1") 81 | if self.match_token(tokens[token_index]): 82 | self.text = tokens[token_index]['word'] 83 | # print(self.text, token_index) 84 | else: 85 | raise ValueError("Error Grammar2") 86 | return 1 87 | return 0 88 | 89 | # 遍历当前可能的产生式 90 | for grammar in grammars[self.type]: 91 | offset = 0 92 | # 切割下一个产生式的字符 93 | grammar_tokens = grammar.split() 94 | tmp_nodes = list() 95 | try: 96 | # 遍历下一个产生式的字符,创建新节点 97 | for grammar_token in grammar_tokens: 98 | node = Node(grammar_token) 99 | tmp_nodes.append(node) 100 | # token数组游标加上创建子节点后的游标长度 101 | offset += node.build_ast(tokens, offset+token_index) 102 | else: 103 | self.child = tmp_nodes 104 | return offset 105 | except ValueError: 106 | pass 107 | raise ValueError("Error Grammar3") 108 | 109 | # 将语法树对象字符化输出 110 | def __str__(self): 111 | childs = list() 112 | for child in self.child: 113 | childs.append(child.__str__()) 114 | out = "({type}, {text})".format(type=self.type, text=self.text) 115 | for child in childs: 116 | if child: 117 | for line in child.split("\n"): 118 | out = out + "\n\t" + line 119 | return out 120 | 121 | def __repr__(self): 122 | return self.__str__() 123 | 124 | if __name__ == "__main__": 125 | filename = 'test/test2.c' 126 | w_list = word_list(filename) 127 | word_table = w_list.word_list 128 | build_ast(word_table) 129 | print(build_ast(word_table)) 130 | print("\n\n\t小鸡提示,这是写的第一个递归方式的语法分析!\n\t请运行 LL.py 执行非递归的预测表分析方法!\n\n") 131 | -------------------------------------------------------------------------------- /other/pcc-o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/pcc-o.png -------------------------------------------------------------------------------- /pcc.py: -------------------------------------------------------------------------------- 1 | """ 2 | PCC编译器入口函数 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | from to_asm import to_asm 8 | from generate import creat_mcode 9 | from get_predict_table import grammars 10 | from LR import analysis 11 | import os 12 | from lexer import word_list 13 | 14 | head = """ 15 | :::PCC编译器——C语言编译器,当前版本1.00 16 | :::作者:小鸡\t项目地址:https://github.com/flymysql/Py-Compiler 17 | :::查看使用帮助:pcc -h 18 | """ 19 | 20 | phelp = """\tpcc -o (filename)\t直接编译生成可执行程序 21 | \tpcc -s (filename)\t生成汇编源码 22 | \tpcc -m (filename)\t查看生成的四元式 23 | \tpcc -t (filename)\t查看语法树生成过程 24 | \tpcc -l (filename)\t查看词法分析 25 | \tpcc -p \t查看本编译器的预测分析表 26 | \tpcc -g \t查看本编译器的语法推导 27 | \texit\t退出 28 | """ 29 | 30 | def begin(): 31 | print(head) 32 | while True: 33 | print("(pcc)>>>",end="") 34 | s = input() 35 | slist = s.split() 36 | if len(slist) == 0: 37 | continue 38 | if slist[0] != "pcc" or len(slist) > 3: 39 | try: 40 | os.system(s) 41 | except: 42 | print("命令错误,请重新输入") 43 | print(phelp) 44 | continue 45 | if slist[0] == "exit": 46 | print("have a good time!") 47 | return 48 | elif slist[1] == "-h": 49 | print(phelp) 50 | elif slist[1] == "-o": 51 | try: 52 | to_asm(slist[2]) 53 | os.system("gcc " + slist[2][:-1] + "s -o "+slist[2][:-2]) 54 | print("编译成功,执行:"+slist[2][:-2]) 55 | except: 56 | print("\t编译失败!!!") 57 | elif slist[1] == "-m": 58 | mid = creat_mcode(slist[2])['mid_code'] 59 | for m in mid: 60 | print(m) 61 | elif slist[1] == "-s": 62 | try: 63 | to_asm(slist[2]) 64 | name = slist[2].split("/")[-1] 65 | # os.system("gcc -c " + slist[2][:-1] + "s && gcc " + slist[2][:-1] + "o -o " + name) 66 | print("\t编译成功,生成汇编代码"+slist[2][:-1]+"s") 67 | except: 68 | print("\t编译失败!!!") 69 | elif slist[1] == "-t": 70 | w_list = word_list(slist[2]) 71 | word_table = w_list.word_list 72 | root = analysis(word_table, True) 73 | if root[0]: 74 | print("\n\n是否继续打印语法树?(可能树很高,屏幕挤不下)\t1.打印 \t2.任意键退出") 75 | if input() == "1": 76 | print(root[1]) 77 | print("\n\n语法树打印完成!") 78 | elif slist[1] == "-l": 79 | w_list = word_list(slist[2]) 80 | if w_list.flag: 81 | print("\n输出字符串如下") 82 | for w in w_list.word_list: 83 | print(w) 84 | elif slist[1] == "-p": 85 | os.system("python get_predict_table.py") 86 | elif slist[1] == "-g": 87 | for g in grammars: 88 | print(g, grammars[g]) 89 | 90 | 91 | if __name__ == "__main__": 92 | begin() -------------------------------------------------------------------------------- /test/99mul: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/99mul -------------------------------------------------------------------------------- /test/99mul.c: -------------------------------------------------------------------------------- 1 | int main(){ 2 | // 打印99乘法表 3 | // int a[10]; 4 | int i = 1; 5 | printf("正在由pcc编译器为你打印99乘法表!\n"); 6 | while(i < 10){ 7 | int j = i; 8 | while(j < 10){ 9 | printf("%d*%d=%d\t",i, j, i*j); 10 | j = j + 1; 11 | } 12 | printf("\n"); 13 | i = i +1; 14 | } 15 | } -------------------------------------------------------------------------------- /test/99mul.s: -------------------------------------------------------------------------------- 1 | .text 2 | .section .rodata 3 | .comm T0,4,4 4 | .comm T1,4,4 5 | .comm T2,4,4 6 | .LC0: 7 | .string "正在由小pcc编译器为你打印99乘法表!\n" 8 | .LC1: 9 | .string "%d*%d=%d\t" 10 | .LC2: 11 | .string "\n" 12 | .text 13 | .globl main 14 | .type main, @function 15 | main: 16 | 17 | .cfi_startproc 18 | pushq %rbp 19 | .cfi_def_cfa_offset 16 20 | .cfi_offset 6, -16 21 | movq %rsp, %rbp 22 | .cfi_def_cfa_register 6 23 | subq $12, %rsp 24 | movl $1, -4(%rbp) 25 | movl %eax, %esi 26 | leaq .LC0(%rip), %rdi 27 | movl $0, %eax 28 | call printf@PLT 29 | .W2: 30 | movl -4(%rbp), %eax 31 | cmpl $10, %eax 32 | jle .code4 33 | jmp .block4 34 | .code4: 35 | movl -4(%rbp), %ecx 36 | movl %ecx, -8(%rbp) 37 | .W7: 38 | movl -8(%rbp), %eax 39 | cmpl $10, %eax 40 | jle .code9 41 | jmp .block9 42 | .code9: 43 | movl -8(%rbp), %eax 44 | imull -4(%rbp), %eax 45 | movl %eax, T0(%rip) 46 | movl -4(%rbp), %eax 47 | movl -8(%rbp), %edx 48 | movl T0(%rip), %ecx 49 | movl %eax, %esi 50 | leaq .LC1(%rip), %rdi 51 | movl $0, %eax 52 | call printf@PLT 53 | movl -8(%rbp), %edx 54 | movl $1, %eax 55 | addl %edx, %eax 56 | movl %eax, T1(%rip) 57 | movl T1(%rip), %ecx 58 | movl %ecx, -8(%rbp) 59 | jmp .W7 60 | .block9: 61 | movl %eax, %esi 62 | leaq .LC2(%rip), %rdi 63 | movl $0, %eax 64 | call printf@PLT 65 | movl -4(%rbp), %edx 66 | movl $1, %eax 67 | addl %edx, %eax 68 | movl %eax, T2(%rip) 69 | movl T2(%rip), %ecx 70 | movl %ecx, -4(%rbp) 71 | jmp .W2 72 | .block4: 73 | 74 | movl $0, %eax 75 | leave 76 | .cfi_def_cfa 7, 8 77 | ret 78 | .cfi_endproc 79 | .LFE6: 80 | .size main, .-main 81 | .ident "PCC: 1.0.0" 82 | -------------------------------------------------------------------------------- /test/fibonacci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/fibonacci -------------------------------------------------------------------------------- /test/fibonacci.c: -------------------------------------------------------------------------------- 1 | 2 | // 兰州小红鸡的注释测试 3 | int main(){ 4 | int arr[25]; 5 | int index = 0; 6 | // 求0~20的斐波那契数列 7 | arr[0] = 1; 8 | arr[1] = 2; 9 | arr[2] = 3; 10 | while(index < 10*2 ){ 11 | int b = arr[index]; 12 | arr[index+2]=arr[index+1] + b; 13 | printf("f(%d)=%d\n",index,b); 14 | index = index +1; 15 | } 16 | printf("完成斐波那契数列打印!由小鸡编译器提供——pcc\n"); 17 | } -------------------------------------------------------------------------------- /test/fibonacci.s: -------------------------------------------------------------------------------- 1 | .text 2 | .section .rodata 3 | .comm T0,4,4 4 | .comm T1,4,4 5 | .comm T2,4,4 6 | .comm T3,4,4 7 | .LC0: 8 | .string "f(%d)=%d\n" 9 | .LC1: 10 | .string "完成斐波那契数列打印!由小鸡编译器提供——pcc\n" 11 | .text 12 | .globl main 13 | .type main, @function 14 | main: 15 | 16 | .cfi_startproc 17 | pushq %rbp 18 | .cfi_def_cfa_offset 16 19 | .cfi_offset 6, -16 20 | movq %rsp, %rbp 21 | .cfi_def_cfa_register 6 22 | subq $120, %rsp 23 | movl $0, -8(%rbp) 24 | movl $1, -112(%rbp) 25 | movl $2, -108(%rbp) 26 | movl $3, -104(%rbp) 27 | .W4: 28 | movl -8(%rbp), %eax 29 | cmpl $20, %eax 30 | jle .code6 31 | jmp .block6 32 | .code6: 33 | movl -8(%rbp), %eax 34 | cltq 35 | movl -112(%rbp, %rax, 4), %ecx 36 | movl %ecx, -12(%rbp) 37 | movl -8(%rbp), %edx 38 | movl $1, %eax 39 | addl %edx, %eax 40 | movl %eax, T0(%rip) 41 | movl T0(%rip), %eax 42 | cltq 43 | movl -112(%rbp, %rax, 4), %edx 44 | movl -12(%rbp), %eax 45 | addl %edx, %eax 46 | movl %eax, T1(%rip) 47 | movl -8(%rbp), %edx 48 | movl $2, %eax 49 | addl %edx, %eax 50 | movl %eax, T2(%rip) 51 | movl T2(%rip), %eax 52 | cltq 53 | movl T1(%rip), %ecx 54 | movl %ecx, -112(%rbp, %rax, 4) 55 | movl -8(%rbp), %eax 56 | movl -12(%rbp), %edx 57 | movl %eax, %esi 58 | leaq .LC0(%rip), %rdi 59 | movl $0, %eax 60 | call printf@PLT 61 | movl -8(%rbp), %edx 62 | movl $1, %eax 63 | addl %edx, %eax 64 | movl %eax, T3(%rip) 65 | movl T3(%rip), %ecx 66 | movl %ecx, -8(%rbp) 67 | jmp .W4 68 | .block6: 69 | movl %eax, %esi 70 | leaq .LC1(%rip), %rdi 71 | movl $0, %eax 72 | call printf@PLT 73 | 74 | movl $0, %eax 75 | leave 76 | .cfi_def_cfa 7, 8 77 | ret 78 | .cfi_endproc 79 | .LFE6: 80 | .size main, .-main 81 | .ident "PCC: 1.0.0" 82 | -------------------------------------------------------------------------------- /test/print: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/print -------------------------------------------------------------------------------- /test/print.c: -------------------------------------------------------------------------------- 1 | int main(){ 2 | int arr[2]; 3 | arr[1] = 1; 4 | arr[0] = 0; 5 | int b = 0; 6 | 7 | int c = arr[arr[b+1]]; 8 | printf("\n\n这个例子展示了在printf语句中的参数,可以是一个表达式。\n例如b*2 +(4+5)*3 = %d", b*2 +(4+5)*3 ); 9 | printf("\n\n这个例子展示了数组内下标的可用变量表示,并且可递归嵌套\nint c = arr[arr[b+1]] = %d\n\n", c); 10 | } -------------------------------------------------------------------------------- /test/print.s: -------------------------------------------------------------------------------- 1 | .text 2 | .section .rodata 3 | .comm T0,4,4 4 | .comm T1,4,4 5 | .comm T2,4,4 6 | .LC0: 7 | .string "\n\n这个例子展示了在printf语句中的参数,可以是一个表达式。\n例如b*2+(4+5)*3=%d" 8 | .LC1: 9 | .string "\n\n这个例子展示了数组内下标的可用变量表示,并且可递归嵌套\nintc=arr[arr[b+1]]=%d\n\n" 10 | .text 11 | .globl main 12 | .type main, @function 13 | main: 14 | 15 | .cfi_startproc 16 | pushq %rbp 17 | .cfi_def_cfa_offset 16 18 | .cfi_offset 6, -16 19 | movq %rsp, %rbp 20 | .cfi_def_cfa_register 6 21 | subq $24, %rsp 22 | movl $1, -16(%rbp) 23 | movl $0, -20(%rbp) 24 | movl $0, -8(%rbp) 25 | movl -8(%rbp), %edx 26 | movl $1, %eax 27 | addl %edx, %eax 28 | movl %eax, T0(%rip) 29 | movl -20(%rbp), %eax 30 | cltq 31 | movl -20(%rbp, %rax, 4), %ecx 32 | movl %ecx, -12(%rbp) 33 | movl $2, %eax 34 | imull -8(%rbp), %eax 35 | movl %eax, T1(%rip) 36 | movl T1(%rip), %edx 37 | movl $27, %eax 38 | addl %edx, %eax 39 | movl %eax, T2(%rip) 40 | movl T2(%rip), %eax 41 | movl %eax, %esi 42 | leaq .LC0(%rip), %rdi 43 | movl $0, %eax 44 | call printf@PLT 45 | movl -12(%rbp), %eax 46 | movl %eax, %esi 47 | leaq .LC1(%rip), %rdi 48 | movl $0, %eax 49 | call printf@PLT 50 | 51 | movl $0, %eax 52 | leave 53 | .cfi_def_cfa 7, 8 54 | ret 55 | .cfi_endproc 56 | .LFE6: 57 | .size main, .-main 58 | .ident "PCC: 1.0.0" 59 | -------------------------------------------------------------------------------- /test/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/test -------------------------------------------------------------------------------- /test/test.c: -------------------------------------------------------------------------------- 1 | 2 | // 兰州小红鸡的注释测试 3 | int main(){ 4 | int arr[25]; 5 | int index = 0; 6 | // 求0~20的斐波那契数列 7 | arr[0] = 1; 8 | arr[1] = 2; 9 | arr[2] = 3; 10 | while(index < 10*2 ){ 11 | int b = arr[index]; 12 | arr[index+2]=arr[index+1] + b; 13 | printf("f(%d)=%d\n",index,b); 14 | index = index +1; 15 | } 16 | printf("完成斐波那契数列打印!由小鸡编译器提供——pcc\n"); 17 | } -------------------------------------------------------------------------------- /test/test.s: -------------------------------------------------------------------------------- 1 | .text 2 | .section .rodata 3 | .comm T0,4,4 4 | .comm T1,4,4 5 | .comm T2,4,4 6 | .comm T3,4,4 7 | .comm T4,4,4 8 | .LC0: 9 | .string "f(%d)=%d%d\n" 10 | .LC1: 11 | .string "完成斐波那契数列打印!由小鸡编译器提供——pcc\n" 12 | .text 13 | .globl main 14 | .type main, @function 15 | main: 16 | 17 | .cfi_startproc 18 | pushq %rbp 19 | .cfi_def_cfa_offset 16 20 | .cfi_offset 6, -16 21 | movq %rsp, %rbp 22 | .cfi_def_cfa_register 6 23 | subq $120, %rsp 24 | movl $0, -8(%rbp) 25 | movl $1, -112(%rbp) 26 | movl $2, -108(%rbp) 27 | movl $3, -104(%rbp) 28 | .W4: 29 | movl -8(%rbp), %eax 30 | cmpl $20, %eax 31 | jle .code6 32 | jmp .block6 33 | .code6: 34 | movl -8(%rbp), %eax 35 | cltq 36 | movl -112(%rbp, %rax, 4), %ecx 37 | movl %ecx, -12(%rbp) 38 | movl -8(%rbp), %edx 39 | movl $1, %eax 40 | addl %edx, %eax 41 | movl %eax, T0(%rip) 42 | movl T0(%rip), %eax 43 | cltq 44 | movl -112(%rbp, %rax, 4), %edx 45 | movl -12(%rbp), %eax 46 | addl %edx, %eax 47 | movl %eax, T1(%rip) 48 | movl -8(%rbp), %edx 49 | movl $2, %eax 50 | addl %edx, %eax 51 | movl %eax, T2(%rip) 52 | movl T2(%rip), %eax 53 | cltq 54 | movl T1(%rip), %ecx 55 | movl %ecx, -112(%rbp, %rax, 4) 56 | movl $2, %eax 57 | imull -12(%rbp), %eax 58 | movl %eax, T3(%rip) 59 | movl -8(%rbp), %eax 60 | movl -12(%rbp), %edx 61 | movl T3(%rip), %ecx 62 | movl %eax, %esi 63 | leaq .LC0(%rip), %rdi 64 | movl $0, %eax 65 | call printf@PLT 66 | movl -8(%rbp), %edx 67 | movl $1, %eax 68 | addl %edx, %eax 69 | movl %eax, T4(%rip) 70 | movl T4(%rip), %ecx 71 | movl %ecx, -8(%rbp) 72 | jmp .W4 73 | .block6: 74 | movl %eax, %esi 75 | leaq .LC1(%rip), %rdi 76 | movl $0, %eax 77 | call printf@PLT 78 | 79 | movl $0, %eax 80 | leave 81 | .cfi_def_cfa 7, 8 82 | ret 83 | .cfi_endproc 84 | .LFE6: 85 | .size main, .-main 86 | .ident "PCC: 1.0.0" 87 | -------------------------------------------------------------------------------- /to_asm.py: -------------------------------------------------------------------------------- 1 | """ 2 | 中间代码转汇编代码 3 | 作者:刘金明 4 | 博客:me.idealli.com 5 | Github:github.com/flymysql 6 | """ 7 | from generate import creat_mcode 8 | from other.function import if_num 9 | 10 | global_head = """ 11 | ;----------------------Welcome to Pcc-------------------------- 12 | ; by 兰州小红鸡 13 | ;------------------------------------------------------------------- 14 | """ 15 | 16 | code_head = """ 17 | \t.cfi_startproc 18 | pushq %rbp 19 | .cfi_def_cfa_offset 16 20 | .cfi_offset 6, -16 21 | movq %rsp, %rbp 22 | .cfi_def_cfa_register 6 23 | """ 24 | 25 | code_footer = """ 26 | \tmovl\t$0, %eax 27 | leave 28 | \t.cfi_def_cfa 7, 8 29 | ret 30 | \t.cfi_endproc 31 | .LFE6: 32 | .size\tmain, .-main 33 | .ident\t"PCC: 1.0.0" 34 | """ 35 | 36 | 37 | """ 38 | 两个全局变量 39 | LC 字符串计数 40 | re 存储汇编代码 41 | """ 42 | LC = 0 43 | re = "" 44 | 45 | 46 | """ 47 | agrs函数,解析变量,转为汇编语言可识别的变量 48 | n:传入的变量,name变量名表 49 | 其中带[]的为数组变量,将会进行特殊的寻址处理 50 | """ 51 | def args(n, name): 52 | global re 53 | if n in name: 54 | return "-" + name[n][0] + "(%rbp)" 55 | elif "[]" in str(n): 56 | ags = n.split("[]") 57 | if if_num(ags[1]): 58 | if name[ags[0]][1] == "char": 59 | return "-" + str(int(name[ags[0]][0])-int(ags[1])) + "(%rbp)" 60 | elif name[ags[0]][1] == "int": 61 | return "-" + str(int(name[ags[0]][0])-int(ags[1])*4) + "(%rbp)" 62 | else: 63 | re += "\tmovl\t" + args(ags[1], name) + ", %eax\n\tcltq\n" 64 | if name[ags[0]][1] == "char": 65 | return "-" + name[ags[0]][0] + "(%rbp, %rax, 1)" 66 | elif name[ags[0]][1] == "int": 67 | return "-" + name[ags[0]][0] + "(%rbp, %rax, 4)" 68 | 69 | elif "T" in str(n): 70 | return n + "(%rip)" 71 | elif if_num(str(n)): 72 | return "$" + str(n) 73 | 74 | else: 75 | return n 76 | 77 | """ 78 | 变量初始化,给每个变量初始化地址。 79 | 对于数组给予相应长度地址空间 80 | 返回值[re, len] 81 | re为变量名地址对照表, len为需要数据栈的高度(这里我规定为12的倍数) 82 | """ 83 | def init_data(name_list, arrs): 84 | re = {} 85 | i = 0 86 | for n in name_list: 87 | if n['name'] != "main": 88 | if n['flag'] == "int": 89 | i += 4 90 | re[n['name']] = [str(i), "int"] 91 | elif n['flag'] == 'char': 92 | i += 1 93 | re[n['name']] = [str(i), "char"] 94 | for a in arrs: 95 | if arrs[a][1] == "int": 96 | i += int(arrs[a][0])*4 97 | re[a] = [str(i), "int"] 98 | elif arrs[a][1] == "char": 99 | i += int(arrs[a][0]) 100 | re[a] = [str(i), "char"] 101 | return [re, (int(i/12) + 1)*12] 102 | 103 | """ 104 | 字符串初始化 105 | """ 106 | def init_string(strings): 107 | re = "" 108 | for i in range(0, len(strings)): 109 | re += ".LC" + str(i) + ":\n\t.string \"" + strings[i] + "\"\n" 110 | return re 111 | 112 | """ 113 | 汇编代码生成 114 | 传入参数: 115 | 1. midcode中间代码(四元式) 116 | 2. name变量地址参照表 117 | 可解析的汇编语句有 118 | 1. 赋值语句(op,=) 119 | 2. 四则运算(op,+-*/) 120 | 3. 跳转语句(op,j) 121 | 4. 输出语句(op,print) 122 | """ 123 | def generate_code(mid_code, name): 124 | global re 125 | re = "" 126 | for m in mid_code: 127 | # args = arg(m, name) 128 | a1 = args(m.arg1, name) 129 | a2 = args(m.arg2, name) 130 | r = args(m.re, name) 131 | if m.op == "=": 132 | 133 | if m.re in name and name[m.re][1] == "char": 134 | re += "\tmovb\t$" + str(ord(m.arg1)) + ", " + r + "\n" 135 | elif m.arg1 in name or "T" in m.arg1 or "[]" in m.arg1: 136 | re += "\tmovl\t" + a1 + ", %ecx\n" 137 | re += "\tmovl\t%ecx, " + r + "\n" 138 | else: 139 | re += "\tmovl\t" + a1 + ", " + r + "\n" 140 | elif m.op == "code_block": 141 | re += "." + m.re + ":\n" 142 | continue 143 | 144 | elif "j" in m.op: 145 | if m.op == "j": 146 | re += "\tjmp\t." + m.re + "\n" 147 | else: 148 | re += "\tmovl\t" + a1 + ", %eax\n" 149 | re += "\tcmpl\t" + a2 + ", %eax\n" 150 | if ">" in m.op: 151 | re += "\tjg\t." + m.re + "\n" 152 | elif "<" in m.op: 153 | re += "\tjle\t." + m.re + "\n" 154 | elif "=" in m.op: 155 | re += "\tje\t." + m.re + "\n" 156 | 157 | elif m.op in "+-": 158 | re += "\tmovl\t" + a1 +", %edx\n" 159 | re += "\tmovl\t" + a2 +", %eax\n" 160 | if m.op == "+": 161 | re += "\taddl\t%edx, %eax\n" 162 | else: 163 | re += "\tsubl\t%edx, %eax\n" 164 | re += "\tmovl\t%eax, " + r + "\n" 165 | 166 | elif m.op in "*/": 167 | if m.arg1 in name: 168 | re += "\tmovl\t" + a2 +", %eax\n" 169 | re += "\timull\t"+ a1 +", %eax\n" 170 | re += "\tmovl\t%eax, "+ r +"\n" 171 | elif m.arg2 in name and m.arg1 not in name: 172 | re += "\tmovl\t" + a2 +", %eax\n" 173 | re += "\timull\t"+ a1 +", %eax, %eax\n" 174 | re += "\tmovl\t%eax, "+ r +"\n" 175 | elif m.arg2 not in name and m.arg1 not in name: 176 | num = int(m.arg2)*int(m.arg1) 177 | re += "\tmovl\t$" + str(num) +", "+ r +"\n" 178 | 179 | elif m.op == "print": 180 | global LC 181 | if m.arg1 != "-1": 182 | if m.arg1 in name and name[m.arg1][1] == "char": 183 | re += "\tmovsbl\t" + a1 + ", %eax\n" 184 | else: 185 | re += "\tmovl\t" + a1 + ", %eax\n" 186 | if m.arg2 != "-1": 187 | if m.arg2 in name and name[m.arg2][1] == "char": 188 | re += "\tmovsbl\t" + a2 + ", %edx\n" 189 | else: 190 | re += "\tmovl\t" + a2 + ", %edx\n" 191 | if m.re != "-1": 192 | if m.re in name and name[m.re][1] == "char": 193 | re += "\tmovsbl\t" + r + ", %ecx\n" 194 | else: 195 | re += "\tmovl\t" + r + ", %ecx\n" 196 | re += "\tmovl\t%eax, %esi\n" + "\tleaq\t.LC" + str(LC) + "(%rip), %rdi\n" 197 | LC += 1 198 | re += "\tmovl\t$0, %eax\n\tcall\tprintf@PLT\n" 199 | 200 | return re 201 | 202 | """ 203 | 字符串拼接函数 204 | 将生成的临时变量,汇编代码,头部,结束部分等一些内容拼接在一起 205 | 传入参数: 206 | 1. tmp 临时变量(其实在代码里作为全局变量) 207 | 2. strs 字符串变量 208 | 3. code 主函数汇编代码 209 | 4. subq 数据栈高度 210 | """ 211 | def connect(tmp, strs, code, subq): 212 | data = "" 213 | for i in range(0, tmp): 214 | data += "\t.comm\tT" + str(i) + ",4,4\n" 215 | re = "\t.text\n\t.section\t.rodata\n" + data + strs + \ 216 | "\t.text\n\t.globl main\n\t.type main, @function\nmain:\n" + code_head +\ 217 | "\tsubq\t$" + str(subq) + ", %rsp\n" + code + code_footer 218 | return re 219 | 220 | """ 221 | 入口函数 222 | 生成汇编代码.s文件 223 | 后续的链接等工作将交给gcc 224 | """ 225 | def to_asm(filename): 226 | global LC 227 | LC = 0 228 | mid_result = creat_mcode(filename) 229 | mid_code = mid_result['mid_code'] 230 | name_list = mid_result['name_list'] 231 | tmp = mid_result['tmp'] 232 | strings = mid_result['strings'] 233 | arrs = mid_result['arrs'] 234 | name = init_data(name_list, arrs) 235 | string_list = init_string(strings) 236 | asm = generate_code(mid_code, name[0]) 237 | result = connect(tmp, string_list, asm, name[1]) 238 | re_asm = open(filename[:-1] + "s", "w").write(result) 239 | 240 | if __name__ == "__main__": 241 | to_asm("./test/test.c") --------------------------------------------------------------------------------