├── .gitignore
├── .vscode
    └── ipch
    │   └── 90f59cb3920d6114
    │       └── mmap_address.bin
├── LR.py
├── README.md
├── generate.py
├── get_predict_table.py
├── lexer.py
├── other
    ├── 99mul.png
    ├── __pycache__
    │   ├── function.cpython-37.pyc
    │   └── wenfa.cpython-37.pyc
    ├── function.py
    ├── help.png
    ├── parser.py
    └── pcc-o.png
├── pcc.py
├── test
    ├── 99mul
    ├── 99mul.c
    ├── 99mul.s
    ├── fibonacci
    ├── fibonacci.c
    ├── fibonacci.s
    ├── print
    ├── print.c
    ├── print.s
    ├── test
    ├── test.c
    └── test.s
└── to_asm.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .vscode


--------------------------------------------------------------------------------
/.vscode/ipch/90f59cb3920d6114/mmap_address.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/.vscode/ipch/90f59cb3920d6114/mmap_address.bin


--------------------------------------------------------------------------------
/LR.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 使用非递归的预测分析表做语法分析————语法树生成
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | 
  8 | from get_predict_table import creat_predict_table
  9 | import re
 10 | from lexer import word_list
 11 | 
 12 | predict_table = creat_predict_table()
 13 | 
 14 | # 语法树节点
 15 | class Node:
 16 |     def __init__(self, Type, text=None):
 17 |         self.type = Type
 18 |         self.text = text
 19 |         self.child = list()
 20 |     # 将语法树对象字符化输出
 21 |     def __str__(self):
 22 |         childs = list()
 23 |         for child in self.child:
 24 |             childs.append(child.__str__())
 25 |         out = "<{type}, {text}>".format(type=self.type, text=self.text)
 26 |         for child in childs:
 27 |             if child:
 28 |                 for line in child.split("\n"):
 29 |                     out = out + "\n     " + line
 30 |         return out
 31 | 
 32 |     def __repr__(self):
 33 |         return self.__str__()
 34 | 
 35 | # 输出栈中节点的type
 36 | def stack_text(stack):
 37 |     ss = []
 38 |     for s in stack:
 39 |         ss.append(s.type)
 40 |     return ss
 41 | 
 42 | def analysis(word_table, show=False):
 43 |     stack = []
 44 |     root = Node("Program")
 45 |     End = Node("#")
 46 |     stack.append(End)
 47 |     stack.append(root)
 48 |     index = 0
 49 |     """
 50 |     分析预测表的三个状态
 51 |     1. cur = #  解析完成
 52 |     2. cur = w  输入的字符表与符号栈中节点匹配
 53 |     3. cur 为非终结符，继续生成子节点
 54 |     4. error
 55 |     """
 56 |     while len(stack) != 0:
 57 |         cur = stack.pop()
 58 |         # 状态 1
 59 |         if cur.type == "#" and len(stack) == 0:
 60 |             print("分析完成!")
 61 |             return [True, root]
 62 |         # 状态 2
 63 |         elif cur.type == word_table[index]['type']:
 64 |             if show:
 65 |                 print("符号栈：", stack_text(stack), "\n匹配字符: ", word_table[index]['word'])
 66 |             cur.text = word_table[index]['word']
 67 |             index += 1
 68 |         # 状态 3
 69 |         else:
 70 |             w = word_table[index]['type']
 71 |             if w in predict_table[cur.type]:
 72 |                 if predict_table[cur.type][w] == "null":
 73 |                     continue
 74 |                 next_pr = predict_table[cur.type][w].split()
 75 |                 if show:
 76 |                     print("\n符号栈：", stack_text(stack), "\n产生式: ", cur.type,"->", predict_table[cur.type][w])
 77 |                 node_list = []
 78 |                 """
 79 |                 产生式右部符号入栈
 80 |                 子节点入栈
 81 |                 注意：子节点入栈顺序应该与产生式符号相反
 82 |                 """
 83 |                 for np in next_pr:
 84 |                     node_list.append(Node(np))
 85 |                 for nl in node_list:
 86 |                     cur.child.append(nl)
 87 |                 node_list.reverse()
 88 |                 for nl in node_list:
 89 |                     stack.append(nl)
 90 |             # 状态 4 错误
 91 |             else:
 92 |                 print("error", stack, cur.type , word_table[index]['type'])
 93 |                 return [False]
 94 | 
 95 | if __name__ == "__main__":
 96 |     w_list = word_list("./test/test.c")
 97 |     word_table = w_list.word_list
 98 |     root = analysis(word_table, True)
 99 |     if root[0]:
100 |         print("\n\n是否继续打印语法树？\t1.打印 \t2.任意键退出\tTip：运行generate.py输出中间代码（四元式）\n请输入")
101 |         if input() == "1":
102 |             print(root[1])
103 |             print("\n\n语法树打印完成！运行 genenrate.py 生成四元式\n\n")
104 |         # print(root[1])


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## PCC——python实现编译器
  2 | 
  3 | 编译原理课设，实现源码到汇编代码的翻译，链接部分使用gcc的功能
  4 | 
  5 | ## 源码说明
  6 | 
  7 | 1. lexer.py     词法分析器
  8 | 2. get_predict_table.py     生成预测分析表
  9 | 3. LR.py        非递归的语法分析器
 10 | 4. generate.py  中间代码生成
 11 | 5. to_asm.py    汇编代码生成
 12 | 6. pcc.py       入口函数
 13 | 
 14 | ## 使用
 15 | 
 16 | ```python
 17 | $ python pcc.py
 18 | ```
 19 | 
 20 | **命令说明**
 21 | 
 22 | ```
 23 | pcc -o (filename)       直接编译生成可执行程序
 24 | pcc -s (filename)       生成汇编源码
 25 | pcc -t (filename)       查看语法树生成过程
 26 | pcc -l (filename)       查看词法分析
 27 | pcc -h                  查看帮助
 28 | pcc -p                  查看本编译器的预测分析表
 29 | pcc -g                  查看本编译器的语法推导
 30 | exit                    退出
 31 | ```
 32 | 
 33 | ![](./other/help.png)
 34 | 
 35 | ### 编译代码
 36 | 
 37 | ```
 38 | pcc -o ./test/test.c
 39 | ```
 40 | 
 41 | #### 支持的语法
 42 | 
 43 | 1. 声明语句和赋值语句
 44 | 
 45 | 例1：
 46 | ```c
 47 | int a;
 48 | a = 10;
 49 | int b = a;
 50 | ```
 51 | 
 52 | 2. 混合四则运算
 53 | 例2：
 54 | ```c
 55 | int a = 1 + 2*(3 - 4);
 56 | ```
 57 | 
 58 | 3. 数组
 59 | 例3：
 60 | ```c
 61 | int arr[10];
 62 | a[0] = 1;
 63 | int b = a[0];
 64 | ```
 65 | 数组下标，即[]中内容也可以用表达式嵌套
 66 | 例4:
 67 | ```c
 68 | int index = 5;
 69 | arr[index] = 6;
 70 | arr[arr[(index+1)*2-1]] = 7;
 71 | ```
 72 | 
 73 | 4. 输出语句
 74 | 目前printf语句的参数最多可以带三个参数
 75 | 
 76 | 例子5
 77 | ```c
 78 | printf("这是个不带参数的printf");
 79 | printf("这是个参数%d",a);
 80 | printf("三个参数：%d,%d,%d",a,b,c);
 81 | 
 82 | char c="T";
 83 | printf("字符型参数:%c",c);
 84 | ```
 85 | printf语句的参数也可以是表达式
 86 | 
 87 | 例子6
 88 | ```c
 89 | printf("%d", d + 2*(3+4));
 90 | ```
 91 | 
 92 | 5. 控制语句
 93 | 
 94 | 目前仅支持if判断，可嵌套使用
 95 | 例子7
 96 | ```c
 97 | if(a < 2*10){
 98 | 	a = a + 1;
 99 | 	if(c < a){
100 | 		printf("%d", a*c);
101 | 	}
102 | }
103 | ```
104 | 
105 | 6. while控制语句
106 | 
107 | 这个不多说，也是可嵌套
108 | ```c
109 | int i = 1;
110 | printf("正在由pcc编译器为你打印99乘法表！\n");
111 | while(i < 10){
112 | 	int j = i;
113 | 	while(j < 10){
114 | 		printf("%d*%d=%d\t",i, j, i*j);
115 | 		j = j + 1;
116 | 	}
117 | 	printf("\n");
118 | 	i = i +1;
119 | }
120 | ```
121 | 
122 | #### 举个栗子（打印99乘法表）
123 | 
124 | ![](./other/99mul.png)
125 | 
126 | **源demo**
127 | ```c
128 | int main(){
129 |     // 打印99乘法表
130 |     // int a[10];
131 |     int i = 1;
132 |     printf("正在由pcc编译器为你打印99乘法表！\n");
133 |     while(i < 10){
134 |         int j = i;
135 |         while(j < 10){
136 |             printf("%d*%d=%d\t",i, j, i*j);
137 |             j = j + 1;
138 |         }
139 |         printf("\n");
140 |         i = i +1;
141 |     }
142 | }
143 | ```
144 | 
145 | #### 举个栗子（打印斐波那契数列）
146 | 
147 | ![](./other/pcc-o.png)
148 | 
149 | **ｃ语言源码**
150 | 
151 | ```c
152 | 
153 | // 兰州小红鸡的注释测试
154 | int main(){
155 |     int arr[25];
156 |     int index = 0;
157 |     // 求０～２０的斐波那契数列
158 |     arr[0] = 1;
159 |     arr[1] = 2;
160 |     arr[2] = 3;
161 |     while(index < 10*2 ){
162 |         int b = arr[index];
163 |         arr[index+2]=arr[index+1] + b;
164 |         printf("f(%d)=%d\n",index,b);
165 |         index = index +1;
166 |     }
167 |     printf("完成斐波那契数列打印！由小鸡编译器提供——pcc\n");
168 | }
169 | ```
170 | 
171 | **生成的中间代码（四元式）**
172 | 
173 | ```
174 | (=,0,0,index)
175 | (=,1,0,arr[]0)
176 | (=,2,0,arr[]1)
177 | (=,3,0,arr[]2)
178 | (code_block,0,0,W4)
179 | (j<,index,20,code6)
180 | (j,0,0,block6)
181 | (code_block,0,0,code6)
182 | (=,arr[]index,0,b)
183 | (+,index,1,T0)
184 | (+,arr[]T0,b,T1)
185 | (+,index,2,T2)
186 | (=,T1,0,arr[]T2)
187 | (print,index,b,-1)
188 | (+,index,1,T3)
189 | (=,T3,0,index)
190 | (j,0,0,W4)
191 | (code_block,0,0,block6)
192 | (print,-1,-1,-1)
193 | ```
194 | 
195 | **生成的汇编代码**
196 | 
197 | ```s
198 | 	.text
199 | 	.section	.rodata
200 | 	.comm	T0,4,4
201 | 	.comm	T1,4,4
202 | 	.comm	T2,4,4
203 | 	.comm	T3,4,4
204 | .LC0:
205 | 	.string "f(%d)=%d\n"
206 | .LC1:
207 | 	.string "完成斐波那契数列打印！由小鸡编译器提供——pcc\n"
208 | 	.text
209 | 	.globl	main
210 | 	.type	main, @function
211 | main:
212 | 
213 | 	.cfi_startproc
214 | 	pushq	%rbp
215 | 	.cfi_def_cfa_offset 16
216 | 	.cfi_offset 6, -16
217 | 	movq	%rsp, %rbp
218 | 	.cfi_def_cfa_register 6
219 | 	subq	$120, %rsp
220 | 	movl	$0, -8(%rbp)
221 | 	movl	$0, -12(%rbp)
222 | 	movl	$1, -112(%rbp)
223 | 	movl	$2, -108(%rbp)
224 | 	movl	$3, -104(%rbp)
225 | .W5:
226 | 	movl	-12(%rbp), %eax
227 | 	cmpl	$20, %eax
228 | 	jle	.code7
229 | 	jmp	.block7
230 | .code7:
231 | 	movl	-12(%rbp), %eax
232 | 	cltq
233 | 	movl	-112(%rbp, %rax, 4), %ecx
234 | 	movl	%ecx, -8(%rbp)
235 | 	movl	-12(%rbp), %edx
236 | 	movl	$1, %eax
237 | 	addl	%edx, %eax
238 | 	movl	%eax, T0(%rip)
239 | 	movl	T0(%rip), %eax
240 | 	cltq
241 | 	movl	-112(%rbp, %rax, 4), %edx
242 | 	movl	-8(%rbp), %eax
243 | 	addl	%edx, %eax
244 | 	movl	%eax, T1(%rip)
245 | 	movl	-12(%rbp), %edx
246 | 	movl	$2, %eax
247 | 	addl	%edx, %eax
248 | 	movl	%eax, T2(%rip)
249 | 	movl	T2(%rip), %eax
250 | 	cltq
251 | 	movl	T1(%rip), %ecx
252 | 	movl	%ecx, -112(%rbp, %rax, 4)
253 | 	movl	-12(%rbp), %eax
254 | 	movl	-8(%rbp), %edx
255 | 	movl	%eax, %esi
256 | 	leaq	.LC0(%rip), %rdi
257 | 	movl	$0, %eax
258 | 	call	printf@PLT
259 | 	movl	-12(%rbp), %edx
260 | 	movl	$1, %eax
261 | 	addl	%edx, %eax
262 | 	movl	%eax, T3(%rip)
263 | 	movl	T3(%rip), %ecx
264 | 	movl	%ecx, -12(%rbp)
265 | 	jmp	.W5
266 | .block7:
267 | 	movl	%eax, %esi
268 | 	leaq	.LC1(%rip), %rdi
269 | 	movl	$0, %eax
270 | 	call	printf@PLT
271 | 
272 | 	movl	$0, %eax
273 | 	leave
274 | 	.cfi_def_cfa 7, 8
275 | 	ret
276 | 	.cfi_endproc
277 | .LFE6:
278 | 	.size	main, .-main
279 | 	.ident	"PCC: 1.0.0"
280 | 
281 | ```
282 | 
283 | 其他命令自行发觉hhh


--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 语义分析:中间代码产生——四元式
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | # from parser import Node,build_ast
  8 | from other.function import if_num 
  9 | from LR import analysis
 10 | import sys, os, re
 11 | sys.path.append(os.pardir)
 12 | from lexer import word_list
 13 | 
 14 | operator = {
 15 |     "+": lambda a, b: a+b,
 16 |     "-": lambda a, b: a-b,
 17 |     "*": lambda a, b: a*b,
 18 |     "/": lambda a, b: a/b
 19 | }
 20 | 
 21 | """
 22 | 四元式对象
 23 | 成员：　op，arg1,arg2,result 分别对于操作数，两个变量，结果
 24 | 特殊的自定义四元式语法：
 25 |     1.  (code_block, 0, 0, block1)   代码块开始标记
 26 |     2.  (j, 0, 0, , +2)              跳转语句，往后跳两行
 27 |     3.  (j<, ａ, b, block1)          条件跳转 if(a<b) then　jmp block1
 28 |     4.  (print, 0, 0, a)             打印变量ａ
 29 | """
 30 | class Mnode:
 31 |     def __init__(self, op="undefined", a1=None, a2=None, re=None):
 32 |         self.op = op
 33 |         self.arg1 = a1
 34 |         self.arg2 = a2
 35 |         self.re = re
 36 |     """字符化输出"""
 37 |     def __str__(self):
 38 |         return "({0},{1},{2},{3})".format(self.op, self.arg1, self.arg2, self.re)
 39 | 
 40 |     def __repr__(self):
 41 |         return self.__str__()
 42 | 
 43 | """
 44 | 两个全局 mid_result 存放四元式对象
 45 | tmp记录零时变量id
 46 | """
 47 | mid_result = []
 48 | while_flag = []
 49 | arr = {}
 50 | tmp = 0
 51 | type_flag = ""
 52 | 
 53 | """
 54 | 递归遍历语法树
 55 | 遇到相应非终结符做相应处理，遇到终结符返回终结符，其他字符递归处理其子节点
 56 | 
 57 | """
 58 | def view_astree(root, ft=None):
 59 |     global type_flag
 60 |     if root.type == "type":
 61 |         type_flag = root.text
 62 |     if root == None or root.text == "(" or root.text == ")":
 63 |         return
 64 |     elif len(root.child) == 0 and root.text != None:
 65 |         return root.text
 66 |     if root.type == "L":
 67 |         math_op(root)
 68 |     elif root.type == "Pan":
 69 |         judge(root)
 70 |     elif root.type == "OUT":
 71 |         out(root)
 72 |     else:
 73 |         re = ""
 74 |         for c in root.child:
 75 |             cre = view_astree(c)
 76 |             if cre != None  and cre not in "[]}{)(\"'":
 77 |                 re = cre
 78 |         return re
 79 | 
 80 | def math_op(root, ft=None):
 81 |     if root == None:
 82 |         return
 83 |     elif len(root.child) == 0 and root.text != None:
 84 |         return root.text
 85 |     global mid_result
 86 |     global tmp
 87 |     global arr
 88 |     global type_flag
 89 |     """
 90 |     变量声明语句，两种情况
 91 |     1. 直接赋值
 92 |     2. 不赋值
 93 |     """
 94 |     if root.type == "L":
 95 |         
 96 |         c1 = root.child[1]
 97 |         if len(c1.child) == 1:
 98 |             mid_result.append(Mnode("=",0,0,math_op(root.child[0].child[0])))
 99 |         elif c1.child[0].type == "=":
100 |             mid_result.append(Mnode("=",math_op(c1),0,math_op(root.child[0].child[0])))
101 |         else:
102 |             if len(c1.child[1].child) >1:
103 |                 cc1 = c1.child[1]
104 |                 mid_result.append(Mnode("=",math_op(cc1),0,math_op(root.child[0].child[0]) +"[]" + math_op(c1.child[0])))
105 |             if math_op(root.child[0].child[0]) not in arr:
106 |                 arr[math_op(root.child[0].child[0])] = [math_op(c1.child[0]), type_flag]
107 |                 type_flag = ""
108 |     elif root.type == "ET" or root.type == "TT":
109 |         if len(root.child) > 1:
110 |             op = Mnode(math_op(root.child[0]))
111 |             arg1 = math_op(root.child[1])
112 |             if if_num(arg1) and if_num(ft):
113 |                 return str(operator[op](int(arg1), int(ft)))
114 | 
115 |             """
116 |             临时变量Tn
117 |             ft 为父节点传入的操作符左边部分临时id
118 |             """
119 |             t = "T" + str(tmp)
120 |             tmp += 1
121 |             mid_result.append(Mnode(op, arg1, ft,t))
122 |             ct = math_op(root.child[2], t)
123 |             if ct != None:
124 |                 return ct
125 |             return t
126 | 
127 |     elif root.type == "E" or root.type == "T":
128 |         """
129 |         赋值语句处理
130 |         如果存在右递归，进行四则运算的解析
131 |         不存在右递归的话直接赋值
132 |         """
133 |         if len(root.child[1].child) > 1:
134 |             op = math_op(root.child[1].child[0])
135 |             arg1 = math_op(root.child[0])
136 |             arg2 = math_op(root.child[1].child[1])
137 |             """静态的计算提前算好"""
138 |             if if_num(arg1) and if_num(arg2):
139 |                 return str(operator[op](int(arg1), int(arg2)))
140 | 
141 |             t = "T" + str(tmp)
142 |             tmp += 1
143 |             mid_result.append(Mnode(op, arg1, arg2,t))
144 |             ct = math_op(root.child[1].child[2], t)
145 |             if ct != None:
146 |                 return ct
147 |             return t
148 |         else:
149 |             return math_op(root.child[0])
150 |     elif root.type == "F" and len(root.child) == 2:
151 |         c = root.child
152 |         if c[1].child != [] and c[1].child[0].type == "Size":
153 |             return c[0].child[0].text + "[]" + math_op(c[1])
154 |         else:
155 |             return c[0].child[0].text
156 | 
157 |     else:
158 |         re = ""
159 |         for c in root.child:
160 |             cre = math_op(c)
161 |             if cre != None and cre not in "[]}{)(\"'":
162 |                 re = cre
163 |         return re
164 | 
165 | 
166 | """
167 | 控制语句的程序块处理
168 | 可处理语句：
169 |     １. if语句
170 |     ２. while语句
171 |     ３. if和while的相互嵌套语句
172 | """
173 | def judge(root):
174 |     if root == None:
175 |         return
176 |     elif len(root.child) == 0 and root.text != None:
177 |         return root.text
178 |     if root.type == "Ptype":
179 |         if root.child[0].text == "if":
180 |             while_flag.append([False])
181 |         else:
182 |             """
183 |             对whilie语句进行代码块标记，方便跳转
184 |             """
185 |             cur = len(mid_result)
186 |             while_flag.append([True,cur])
187 |             mid_result.append(Mnode("code_block", 0, 0, "W" + str(cur)))
188 |     if root.type == "Pbc":
189 |         """
190 |         判断语句括号中的的两种情况
191 |         1. (E)
192 |         2. (E1 cmp E2)
193 |         """
194 |         Pm = root.child[1].child
195 |         if len(Pm) == 1:
196 |             mid_result.append(Mnode("j=", 1, math_op(root.child[0]),"code"+str(len(mid_result)+1)))
197 |         else:
198 |             mid_result.append(Mnode("j"+judge(Pm[0]), math_op(root.child[0]), math_op(Pm[1]),"code"+str(len(mid_result)+1)))
199 |         return
200 |     if root.type == "Pro":
201 |         """
202 |         控制语句的代码块前后做标记
203 |         判断标记
204 |         跳转->结束标记
205 |         {
206 |             code
207 |         }
208 |         while跳转->判断标记
209 |         结束标记
210 |         """
211 |         w = while_flag.pop()
212 |         code_block = len(mid_result)
213 |         code = "block" + str(code_block)
214 |         mid_result.append(Mnode("j",0, 0,code))
215 |         mid_result.append(Mnode("code_block",0,0,"code"+str(code_block)))
216 |         view_astree(root)
217 |         if w[0] == True:
218 |             mid_result.append(Mnode("j",0,0,"W"+str(w[1])))    
219 |         mid_result.append(Mnode("code_block",0,0,code))
220 |         code_block += 1
221 |         return
222 |     else:
223 |         re = ""
224 |         for c in root.child:
225 |             cre = judge(c)
226 |             if cre != None and cre not in "[]}{)(\"'":
227 |                 re = cre
228 |         return re
229 | 
230 | 
231 | """
232 | 输出处理
233 | 可处理语句：printf(a,b) 该语法：在括号内只能传入变量参数
234 | """
235 | def out(root):
236 |     if root == None:
237 |         return
238 |     elif root.type == "V":
239 |         if len(root.child) <= 1:
240 |             mid_result.append(Mnode("print", '-1', '-1', '-1'))
241 |             return
242 |         else:
243 |             name = [math_op(root.child[1])]
244 |             V = root.child[2]
245 |             while len(V.child) > 1:
246 |                 name.append(math_op(V.child[1]))
247 |                 V = V.child[2]
248 |             name.extend(['-1','-1','-1'])
249 |             mid_result.append(Mnode("print", name[0], name[1], name[2]))
250 |     else:
251 |         for c in root.child:
252 |             out(c)
253 | 
254 | def creat_mcode(filename):
255 |     global tmp
256 |     global mid_result
257 |     global arr
258 |     arr = {}
259 |     tmp = 0
260 |     mid_result = []
261 |     w_list = word_list(filename)
262 |     word_table = w_list.word_list
263 |     string_list = w_list.string_list
264 |     root = analysis(word_table)[1]
265 |     view_astree(root)
266 | 
267 |     return {"name_list":w_list.name_list, "mid_code":mid_result, "tmp":tmp, "strings":string_list, "arrs":arr}
268 |         
269 | if __name__ == "__main__":
270 |     filename = 'test/test.c'
271 |     creat_mcode(filename)
272 |     for r in mid_result:
273 |         print(r)
274 | 


--------------------------------------------------------------------------------
/get_predict_table.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 使用非递归的预测分析表做语法分析————预测分析表的生成
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | import sys, os, re
  8 | sys.path.append(os.pardir)
  9 | from lexer import word_list,k_list
 10 | 
 11 | grammars = {
 12 |     "Program":["type M C Pro"],
 13 |     "C":["( cc )"],
 14 |     "cc":["null"],
 15 | 
 16 |     "Pro":["{ Pr }"],
 17 |     "Pr":["P Pr", "null"],
 18 |     "P":["type L ;", "L ;", "printf OUT ;", "Pan"],
 19 | 
 20 |     "L":["M LM"],
 21 |     "LM":["= FE", "Size AM","null"],
 22 |     "FE":["E", "TEXT", "CHAR"],
 23 |     "M":["name"],
 24 | 
 25 |     "E":["T ET"],
 26 |     "ET":["+ T ET", "- T ET", "null"],
 27 |     "T":["F TT"],
 28 |     "TT":["* F TT", "/ F TT", "null"],
 29 |     "F":["number", "BRA", "M MS"],
 30 |     "MS":["Size", "null"],
 31 |     "BRA": ["( E )"],
 32 | 
 33 |     "OUT":["( TXT V )"],
 34 |     "TXT":['TEXT'],
 35 |     "V":[", E VV", "null"],
 36 |     "VV":[", E VV", "null"],
 37 | 
 38 |     "Pan":["Ptype P_block Pro"],
 39 |     "Ptype":["if", "while"],
 40 |     "P_block":["( Pbc )"],
 41 |     "Pbc":["E PM"],
 42 |     "PM":["Cmp E", "null"],
 43 | 
 44 |     "Size":["[ E ]"],
 45 |     "AM":["= E", "null"]
 46 | }
 47 | 
 48 | first_table = {}
 49 | follow_table = {}
 50 | predict_table = {}
 51 | observer = {}
 52 | 
 53 | """
 54 | 初始化订阅者
 55 | 订阅者： 用于求follow集合的过程中特殊情况：
 56 |     非终结符的后继非终结符的first集合可能存在null
 57 |     eg： A -> BC     C -> D | null   D -> (A) | i
 58 |     那么在一次遍历过程中，因为C的first集合存在null，所以需要将follow（A）加入follow（B）
 59 |     （重点）但是！此时的follow（A），并不是完整的，它可能在后续的遍历中会继续更新自身的follow集合
 60 |     所以此时会出现遗漏的follow
 61 |     所以我在这里用到一个订阅者模式
 62 |     订阅者为一个字典，字典键值为产生式左部，字典内容为产生式右部
 63 | """
 64 | def init_observer():
 65 |     for k in grammars:
 66 |         follow_table[k] = []
 67 |         observer[k] = []
 68 |         for next_grammar in grammars[k]:
 69 |             last_k = next_grammar.split()[-1]
 70 |             if last_k in grammars and last_k != k:
 71 |                 observer[k].append(last_k) 
 72 | """
 73 | 刷新订阅
 74 | 检测到某个follow集合更新时，对其订阅的所有产生式左部的follow集合进行更新
 75 | 简而言之：follow（A）发生了更新，那么曾经将follow（A）加入自身的B，C也更新其follow
 76 | 并且，这是一个递归过程
 77 | """
 78 | def refresh(k):
 79 |     for lk in observer[k]:
 80 |         newlk = U(follow_table[k], follow_table[lk])
 81 |         if newlk != follow_table[lk]:
 82 |             follow_table[lk] = newlk
 83 |             refresh(lk)
 84 | 
 85 | """
 86 | 合并两个list并且去重
 87 | """
 88 | def U(A,B):
 89 |     return list(set(A+B))
 90 | 
 91 | """
 92 | 查找指定非终结符的first集合
 93 | """
 94 | def find_first(key):
 95 |     if key not in grammars:
 96 |         return [key]
 97 |     l = []
 98 |     for next_grammar in grammars[key]: 
 99 |         next_k = next_grammar.split()[0]
100 |         l.extend(find_first(next_k))
101 |     return l
102 | 
103 | """
104 | 查找所有非终结符follow
105 | """
106 | def find_follow():
107 |     init_observer()
108 |     follow_table["Program"] = ["#"]
109 |     for k in grammars:
110 |         for next_grammar in grammars[k]:
111 |             next_k = next_grammar.split()
112 |             
113 |             for i in range(0,len(next_k)-1):
114 |                 if next_k[i] in grammars:
115 |                     if next_k[i+1] not in grammars:
116 |                         """
117 |                         如果后继字符不是终结符，加入
118 |                         """
119 |                         new_follow = U([next_k[i+1]], follow_table[next_k[i]])
120 |                         if new_follow != follow_table[next_k[i]]:
121 |                             follow_table[next_k[i]] = new_follow
122 |                             refresh(next_k[i])
123 |                     else:
124 |                         new_follow = U(first_table[next_k[i+1]], follow_table[next_k[i]])
125 |                         """
126 |                         如果后继字符的first集合中含有null，通知所有订阅者更新follow集合
127 |                         """
128 |                         if "null" in first_table[next_k[i+1]]:
129 |                             new_follow = U(follow_table[k], new_follow)
130 |                             observer[k].append(next_k[i])
131 |                         if new_follow != follow_table[next_k[i]]:
132 |                             follow_table[next_k[i]] = new_follow
133 |                             refresh(next_k[i])
134 |             """
135 |             产生式左部的follow集合加入最后一个非终结符的follow集合
136 |             """
137 |             if next_k[-1] in grammars:
138 |                 if next_k[-1] not in follow_table:
139 |                     follow_table[next_k[-1]] = []
140 |                 if next_k[-1] != k:
141 |                     follow_table[next_k[-1]] = U(follow_table[next_k[-1]], follow_table[k])
142 | 
143 |     for k in follow_table:
144 |         if "null" in follow_table[k]:
145 |             follow_table[k].remove("null")
146 | 
147 | """
148 | 获取所有非终结符的first集合
149 | 在此同时直接将first集合加入predict表中
150 | """
151 | def get_first_table():
152 |     for k in grammars:
153 |         predict_table[k] = {}
154 |         first_table[k] = []
155 |         for next_grammar in grammars[k]:
156 |             next_k = next_grammar.split()[0]
157 |             kl = find_first(next_k)
158 |             first_table[k].extend(kl)
159 |             for kk in kl:
160 |                 if kk != "null":
161 |                     predict_table[k][kk] = next_grammar
162 | 
163 | """
164 | 将follow集合中的部分内容加入predict表中
165 | """
166 | def get_predict_table():
167 |     for k in grammars:
168 |         for next_grammar in grammars[k]:
169 |             next_k = next_grammar.split()[0]
170 |             if next_k in grammars and "null" in first_table[next_k] or next_k == "null":
171 |                 for fk in follow_table[k]:
172 |                     predict_table[k][fk] = next_grammar
173 | 
174 | 
175 | def creat_predict_table():
176 |     get_first_table()
177 |     find_follow()
178 |     get_predict_table()
179 |     return predict_table
180 | 
181 | def show_tables():
182 |     get_first_table()
183 |     find_follow()
184 |     get_predict_table()
185 |     print("\nfirst集合如下\n")
186 |     for k in first_table:
187 |         print(k, first_table[k])
188 |     print("\nfollow集合如下\n")
189 |     for k in follow_table:
190 |         print(k, follow_table[k])
191 |     # print(first_table)
192 |     print("\n预测表如下\n")
193 |     for k in predict_table:
194 |         print(k, predict_table[k])
195 | 
196 | if __name__ == "__main__":
197 |     show_tables()


--------------------------------------------------------------------------------
/lexer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 环境：python3.6
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | 
  8 | import re
  9 | # 一些判断函数和字符分割函数放在同级文件function.py中
 10 | import sys, os
 11 | sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
 12 | from other.function import if_num, if_name, have_name, printf, get_word
 13 | 
 14 | # 运算符表
 15 | y_list = {"+","-","*","/","<","<=",">",">=","=","==","!=","^",",","&","&&","|","||","%","~","<<",">>","!"}
 16 | # 分隔符表
 17 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"}
 18 | # 关键字表
 19 | k_list = {
 20 |     "auto", "break", "case", "const", "continue","default", "do",  "else", "enum", "extern",
 21 |   "for", "goto", "if", "register", "return", "short", "signed", "sizeof", "static",
 22 |     "struct", "switch", "typedef", "union",  "volatile", "while", "printf"
 23 | }
 24 | 
 25 | Cmp = ["<", ">", "==", "!=", ">=", "<="]
 26 | 
 27 | Type = {"int","float","char","double","void","long","unsigned","string"}
 28 | type_flag = ""
 29 | # 括号配对判断
 30 | kuo_cp = {'{':'}', '[':']', '(':')'}
 31 | 
 32 | # 词法分析器输出对象
 33 | # 成员变量：输出的单词表，源代码中的分隔符表,运算符表,变量表,关键字表
 34 | # 一个方法，将源代码字符切割并存入对应表中
 35 | # 对象创建实例需要传入filename参数，默认为test.c
 36 | class word_list():
 37 |     def __init__(self, filename='test.c'):
 38 |         self.word_list = []          # 输出单词列表
 39 |         self.separator_list = []     # 分隔符
 40 |         self.operator_list = []      # 运算符
 41 |         self.name_list = []          # 变量
 42 |         self.key_word_table = []     # 关键字
 43 |         self.string_list = []
 44 |         self.flag = True             # 源代码是否正确标识
 45 |         
 46 |         # get_word函数将源代码切割
 47 |         self.creat_table(get_word(filename))
 48 | 
 49 |     # 创建各个表
 50 |     def creat_table(self, in_words):
 51 |         name_id = 0
 52 |         kuo_list = []           # 存储括号并判断是否完整匹配
 53 |         char_flag = False
 54 |         str_flag = False
 55 |         string_list = []
 56 |         strings = ""
 57 |         chars = ""
 58 |         for word in in_words:
 59 |             w = word['word']
 60 |             line = word['line']
 61 |             if w == '"':
 62 |                 if str_flag == False:
 63 |                     str_flag = True
 64 |                 else:
 65 |                     str_flag = False
 66 |                     self.word_list.append({'line':line, 'type':'TEXT', 'word':strings})
 67 |                     self.string_list.append(strings)
 68 |                     strings = ""
 69 |                 # self.word_list.append({'line':line, 'type':w, 'word':w})
 70 |                 continue
 71 |             # 判断是否为字符串
 72 |             if str_flag == True:
 73 |                 strings += w
 74 |                 continue
 75 |             if w == "'":
 76 |                 if char_flag == False:
 77 |                     char_flag = True
 78 |                 else:
 79 |                     char_flag = False
 80 |                     self.word_list.append({'line':line, 'type':'CHAR', 'word':chars})
 81 |                     chars = ""
 82 |                 continue
 83 |             if char_flag == True:
 84 |                 chars += w
 85 |                 continue
 86 |             # 判断为关键字
 87 |             if w in k_list:
 88 |                 self.key_word_table.append({'line':line, 'type':'keyword', 'word':w})
 89 |                 self.word_list.append({'line':line, 'type':w, 'word':w})
 90 |             elif w in Cmp:
 91 |                 self.word_list.append({'line':line, 'type':"Cmp", 'word':w})
 92 |             # 判断为关键字
 93 |             elif w in Type:
 94 |                 type_flag = w
 95 |                 self.key_word_table.append({'line':line, 'type':'type', 'word':w})
 96 |                 self.word_list.append({'line':line, 'type':'type', 'word':w})
 97 |             # 判断为运算符
 98 |             elif w in y_list:
 99 |                 self.operator_list.append({'line':line, 'type':'operator', 'word':w})
100 |                 self.word_list.append({'line':line, 'type':w, 'word':w})
101 |             # 判断为分隔符
102 |             elif w in f_list:
103 |                 if w in kuo_cp.values() or w in kuo_cp.keys():
104 |                     # 左括号入栈
105 |                     if w in kuo_cp.keys():
106 |                         kuo_list.append({'kuo':w, 'line':line})
107 |                     # 右括号判断是否匹配并出栈
108 |                     elif w == kuo_cp[kuo_list[-1]['kuo']]:
109 |                         kuo_list.pop()
110 |                     else:
111 |                         print("小金提醒：在第" + str(line) + "行的' " + w + " '无法匹配，无法通过编译，请检查代码正确性！")
112 |                         self.flag = False
113 |                         return
114 |                 self.separator_list.append({'line':line, 'type':'separator', 'word':w})
115 |                 self.word_list.append({'line':line, 'type':w, 'word':w})
116 |             # 其他字符处理
117 |             else:
118 |                 if if_num(w):
119 |                     self.word_list.append({'line':line, 'type':'number', 'word':w})
120 |                 # 如果是变量名要判断是否已经存在
121 |                 elif if_name(w):
122 |                     if have_name(self.name_list,w):
123 |                         self.word_list.append({'line':line, 'type':'name', 'word':w, 'id':name_id})
124 |                     else:
125 |                         self.name_list.append({'line':line, 'id':name_id, 'word':0.0, 'name':w, 'flag':type_flag})
126 |                         self.word_list.append({'line':line, 'type':'name', 'word':w, 'id':name_id})
127 |                         name_id += 1
128 |                 else:
129 |                     print("小金提醒：在第" + str(line) + "行的变量名' " + w + " '不可识别，无法通过编译，请检查代码正确性！")
130 |                     self.flag = False
131 |                     return
132 |         if kuo_list!=[]:
133 |             print("小金提醒：在第" + str(kuo_list[0]['line']) + "行的' " + kuo_list[0]['kuo'] + " '无法匹配，无法通过编译，请检查代码正确性！")
134 |             self.flag = False
135 |             return
136 |  
137 | if __name__ == '__main__':
138 | 
139 |     # 写了三个测试的c语言文件在同级目录
140 |     # 其中test.c是正常的代码
141 |     # error1.c和error2.c是错误的测试代码
142 | 
143 |     filename = input("请输入要编译的.c文件:")
144 |     if filename == '':
145 |         filename = 'test/test.c'
146 |     w_list = word_list(filename)
147 |     if w_list.flag:
148 |         print("\n输出字符串如下")
149 |         printf(w_list.word_list)
150 |         print("\n\n输出变量表如下\n")
151 |         printf(w_list.name_list)


--------------------------------------------------------------------------------
/other/99mul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/99mul.png


--------------------------------------------------------------------------------
/other/__pycache__/function.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/__pycache__/function.cpython-37.pyc


--------------------------------------------------------------------------------
/other/__pycache__/wenfa.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/__pycache__/wenfa.cpython-37.pyc


--------------------------------------------------------------------------------
/other/function.py:
--------------------------------------------------------------------------------
 1 | # 环境：python3.6
 2 | # 编译原理——词法分析器
 3 | # 刘金明——320160939811
 4 | import re
 5 | 
 6 | # 运算符表
 7 | y_list = {"+","-","*","/","<","<=",">",">=","=","==","!=","^",",","&","&&","|","||","%","~","<<",">>","!"}
 8 | # 分隔符表
 9 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"}
10 | # 关键字表
11 | k_list = {
12 |     "auto", "break", "case", "char", "const", "continue","default", "do", "double", "else", "enum", "extern",
13 |     "float", "for", "goto", "if", "int", "long","register", "return", "short", "signed", "sizeof", "static",
14 |     "struct", "switch", "typedef", "union", "unsigned", "void","volatile", "while", "printf"
15 | }
16 | 
17 | Cmp = ["<", ">", "==", "!=", "<=", ">="]
18 | 
19 | # 正则表达式判断是否为数字
20 | def if_num(int_word):
21 |     if re.match("^([0-9]{1,}[.][0-9]*)$",int_word) or re.match("^([0-9]{1,})$",int_word) == None:
22 |         return False
23 |     else:
24 |         return True
25 | 
26 | # 判断是否为为变量名
27 | def if_name(int_word):
28 |     if re.match("[a-zA-Z_][a-zA-Z0-9_]*",int_word) == None:
29 |         return False
30 |     else:
31 |         return True
32 | 
33 | # 判断是否为终结符
34 | # def END_STATE(int_word):
35 | #     if 
36 | 
37 | # 判断变量名是否已存在
38 | def have_name(name_list,name):
39 |     for n in name_list:
40 |         if name == n['name']:
41 |             return True
42 |     return False
43 | 
44 | # list的换行输出
45 | def printf(lists):
46 |     for l in lists:
47 |         print(l)
48 | 
49 | # 分割并获取文本单词
50 | # 返回值为列表out_words
51 | # 列表元素{'word':ws, 'line':line_num}分别对应单词与所在行号
52 | def get_word(filename):
53 |     global f_list
54 |     out_words = []
55 |     f = open(filename,'r+',encoding='UTF-8')
56 |     # 先逐行读取，并记录行号
57 |     lines = f.readlines()
58 |     line_num = 1
59 |     # 判断是否含有注释块的标识
60 |     pass_block = False
61 |     for line in lines:
62 |         words = list(line.split())
63 |         for w in words:
64 |             # 去除注释
65 |             if '*/' in w:
66 |                 pass_block = False
67 |                 continue
68 |             if '//' in w or pass_block:
69 |                 break
70 |             if '/*' in w:
71 |                 pass_block = True
72 |                 break
73 |             # 分析单词
74 |             if w in Cmp:
75 |                 out_words.append({'word':w, 'line':line_num})
76 |                 continue
77 |             ws = w
78 |             for a in w:
79 |                 if a in f_list or a in y_list:
80 |                     # index为分隔符的位置，将被分隔符或运算符隔开的单词提取
81 |                     index = ws.find(a)
82 |                     if index!=0:
83 |                         # 存储单词与该单词的所在行号，方便报错定位
84 |                         out_words.append({'word':ws[0:index], 'line':line_num})
85 |                     ws = ws[index+1:]
86 |                     out_words.append({'word':a, 'line':line_num})
87 |             if ws!='':
88 |                 out_words.append({'word':ws, 'line':line_num})
89 |         line_num += 1
90 |     return out_words


--------------------------------------------------------------------------------
/other/help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/help.png


--------------------------------------------------------------------------------
/other/parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 语法分析:使用递归的自上而下方式
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | import sys, os, re
  8 | sys.path.append(os.pardir)
  9 | from lexer import word_list,k_list
 10 | 
 11 | """
 12 | Expr      ->    Term ExprTail
 13 | ExprTail  ->    + Term ExprTail
 14 |           |     - Term ExprTail
 15 |           |     null
 16 | 
 17 | Term      ->    Factor TermTail
 18 | TermTail  ->    * Factor TermTail
 19 |           |     / Factor TermTail
 20 |           |     null
 21 | 
 22 | Factor    ->    (Expr)
 23 |           |     num
 24 | """
 25 | grammars = {
 26 |     "Program":["type M C Pro"],
 27 |     "C":["( cc )"],
 28 |     "cc":["null"],
 29 |     "Pro":["{ Pr }"],
 30 |     "Pr":["P ; Pr", "null"],
 31 |     "P":["type L", "L","printf OUT"],
 32 |     "L":["M LM"],
 33 |     "LM":["= E", "null"],
 34 |     "M":["name"],
 35 |     "E":["T ET"],
 36 |     "ET":["+ T ET", "- T ET", "null"],
 37 |     "T":["F TT"],
 38 |     "TT":["* F TT", "/ F TT", "null"],
 39 |     "F":["number", "BRA"],
 40 |     "BRA": ["( E )"],
 41 |     "OUT":["( \" TEXT \" , V )"],
 42 |     "V":["name VV", "null"],
 43 |     "VV":[", name VV", "null"],
 44 |     "END_STATE": r"(null)|(number)|(name)|(type)|(operator)|(printf)|(separator)|(TEXT)|[+\-*/=;,\")({}]"
 45 | }
 46 | 
 47 | 
 48 | # 运算符表
 49 | operator = {"+","-","*","/","="}
 50 | f_list = {";","(",")","[","]","{","}", ".",":","\"","#","\'","\\","?"}
 51 | k_list = {"int", "main"}
 52 | 
 53 | 
 54 | def build_ast(tokens):
 55 |     root = Node("Program")
 56 |     # 建立根节点，自上而下分析
 57 |     offset = root.build_ast(tokens, token_index=0)
 58 |     if offset == len(tokens):
 59 |         return root
 60 |     else:
 61 |         raise ValueError("Error Grammar4")
 62 | 
 63 | 
 64 | class Node:
 65 |     def match_token(self, token):
 66 |         token_type = token['type']
 67 |         token_word = token['word']
 68 |         if self.type == "null" or self.type == token_type or self.type == token_word:
 69 |             return True
 70 |         return False
 71 |     def __init__(self, type):
 72 |         self.type = type
 73 |         self.text = None
 74 |         self.child = list()
 75 |     def build_ast(self, tokens: list, token_index=0):
 76 |         # 判断是否遇到终结符
 77 |         if re.match(grammars["END_STATE"], self.type):
 78 |             if self.type != "null":
 79 |                 if token_index >= len(tokens):
 80 |                     raise ValueError("Error Grammar1")
 81 |                 if self.match_token(tokens[token_index]):
 82 |                     self.text = tokens[token_index]['word']
 83 |                     # print(self.text, token_index)
 84 |                 else:
 85 |                     raise ValueError("Error Grammar2")
 86 |                 return 1
 87 |             return 0
 88 | 
 89 |         # 遍历当前可能的产生式
 90 |         for grammar in grammars[self.type]:
 91 |             offset = 0
 92 |             # 切割下一个产生式的字符
 93 |             grammar_tokens = grammar.split()
 94 |             tmp_nodes = list()
 95 |             try:
 96 |                 # 遍历下一个产生式的字符，创建新节点
 97 |                 for grammar_token in grammar_tokens:
 98 |                     node = Node(grammar_token)
 99 |                     tmp_nodes.append(node)
100 |                     # token数组游标加上创建子节点后的游标长度
101 |                     offset += node.build_ast(tokens, offset+token_index)
102 |                 else:
103 |                     self.child = tmp_nodes
104 |                     return offset
105 |             except ValueError:
106 |                 pass
107 |         raise ValueError("Error Grammar3")
108 | 
109 |     # 将语法树对象字符化输出
110 |     def __str__(self):
111 |         childs = list()
112 |         for child in self.child:
113 |             childs.append(child.__str__())
114 |         out = "({type}, {text})".format(type=self.type, text=self.text)
115 |         for child in childs:
116 |             if child:
117 |                 for line in child.split("\n"):
118 |                         out = out + "\n\t" + line
119 |         return out
120 | 
121 |     def __repr__(self):
122 |         return self.__str__()
123 | 
124 | if __name__ == "__main__":
125 |     filename = 'test/test2.c'
126 |     w_list = word_list(filename)
127 |     word_table = w_list.word_list
128 |     build_ast(word_table)
129 |     print(build_ast(word_table))
130 |     print("\n\n\t小鸡提示，这是写的第一个递归方式的语法分析！\n\t请运行　LL.py　执行非递归的预测表分析方法！\n\n")
131 | 


--------------------------------------------------------------------------------
/other/pcc-o.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/other/pcc-o.png


--------------------------------------------------------------------------------
/pcc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PCC编译器入口函数
 3 | 作者：刘金明
 4 | 博客：me.idealli.com
 5 | Github：github.com/flymysql
 6 | """
 7 | from to_asm import to_asm
 8 | from generate import creat_mcode
 9 | from get_predict_table import grammars
10 | from LR import analysis
11 | import os
12 | from lexer import word_list
13 | 
14 | head = """
15 | :::PCC编译器——Ｃ语言编译器，当前版本1.00
16 | :::作者：小鸡\t项目地址:https://github.com/flymysql/Py-Compiler
17 | :::查看使用帮助：pcc -h
18 | """
19 | 
20 | phelp = """\tpcc -o (filename)\t直接编译生成可执行程序
21 | \tpcc -s (filename)\t生成汇编源码
22 | \tpcc -m (filename)\t查看生成的四元式
23 | \tpcc -t (filename)\t查看语法树生成过程
24 | \tpcc -l (filename)\t查看词法分析
25 | \tpcc -p \t查看本编译器的预测分析表
26 | \tpcc -g \t查看本编译器的语法推导
27 | \texit\t退出
28 | """
29 | 
30 | def begin():
31 |     print(head)
32 |     while True:
33 |         print("(pcc)>>>",end="")
34 |         s = input()
35 |         slist = s.split()
36 |         if len(slist) == 0:
37 |             continue
38 |         if slist[0] != "pcc" or len(slist) > 3:
39 |             try:
40 |                 os.system(s)
41 |             except:
42 |                 print("命令错误，请重新输入")
43 |                 print(phelp)
44 |             continue
45 |         if slist[0] == "exit":
46 |             print("have a good time!")
47 |             return
48 |         elif slist[1] == "-h":
49 |             print(phelp)
50 |         elif slist[1] == "-o":
51 |             try:
52 |                 to_asm(slist[2])
53 |                 os.system("gcc " + slist[2][:-1] + "s -o "+slist[2][:-2])
54 |                 print("编译成功，执行："+slist[2][:-2])
55 |             except:
56 |                 print("\t编译失败！！！")
57 |         elif slist[1] == "-m":
58 |             mid = creat_mcode(slist[2])['mid_code']
59 |             for m in mid:
60 |                 print(m)
61 |         elif slist[1] == "-s":
62 |             try:
63 |                 to_asm(slist[2])
64 |                 name = slist[2].split("/")[-1]
65 |                 # os.system("gcc -c " + slist[2][:-1] + "s && gcc " + slist[2][:-1] + "o -o " + name)
66 |                 print("\t编译成功，生成汇编代码"+slist[2][:-1]+"s")
67 |             except:
68 |                 print("\t编译失败！！！")
69 |         elif slist[1] == "-t":
70 |             w_list = word_list(slist[2])
71 |             word_table = w_list.word_list
72 |             root = analysis(word_table, True)
73 |             if root[0]:
74 |                 print("\n\n是否继续打印语法树？(可能树很高，屏幕挤不下)\t1.打印 \t2.任意键退出")
75 |                 if input() == "1":
76 |                     print(root[1])
77 |                     print("\n\n语法树打印完成！")
78 |         elif slist[1] == "-l":
79 |             w_list = word_list(slist[2])
80 |             if w_list.flag:
81 |                 print("\n输出字符串如下")
82 |                 for w in w_list.word_list:
83 |                     print(w)
84 |         elif slist[1] == "-p":
85 |             os.system("python get_predict_table.py")
86 |         elif slist[1] == "-g":
87 |             for g in grammars:   
88 |                 print(g, grammars[g])
89 |         
90 | 
91 | if __name__ == "__main__":
92 |     begin()


--------------------------------------------------------------------------------
/test/99mul:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/99mul


--------------------------------------------------------------------------------
/test/99mul.c:
--------------------------------------------------------------------------------
 1 | int main(){
 2 |     // 打印99乘法表
 3 |     // int a[10];
 4 |     int i = 1;
 5 |     printf("正在由pcc编译器为你打印99乘法表！\n");
 6 |     while(i < 10){
 7 |         int j = i;
 8 |         while(j < 10){
 9 |             printf("%d*%d=%d\t",i, j, i*j);
10 |             j = j + 1;
11 |         }
12 |         printf("\n");
13 |         i = i +1;
14 |     }
15 | }


--------------------------------------------------------------------------------
/test/99mul.s:
--------------------------------------------------------------------------------
 1 | 	.text
 2 | 	.section	.rodata
 3 | 	.comm	T0,4,4
 4 | 	.comm	T1,4,4
 5 | 	.comm	T2,4,4
 6 | .LC0:
 7 | 	.string "正在由小pcc编译器为你打印99乘法表！\n"
 8 | .LC1:
 9 | 	.string "%d*%d=%d\t"
10 | .LC2:
11 | 	.string "\n"
12 | 	.text
13 | 	.globl	main
14 | 	.type	main, @function
15 | main:
16 | 
17 | 	.cfi_startproc
18 | 	pushq	%rbp
19 | 	.cfi_def_cfa_offset 16
20 | 	.cfi_offset 6, -16
21 | 	movq	%rsp, %rbp
22 | 	.cfi_def_cfa_register 6
23 | 	subq	$12, %rsp
24 | 	movl	$1, -4(%rbp)
25 | 	movl	%eax, %esi
26 | 	leaq	.LC0(%rip), %rdi
27 | 	movl	$0, %eax
28 | 	call	printf@PLT
29 | .W2:
30 | 	movl	-4(%rbp), %eax
31 | 	cmpl	$10, %eax
32 | 	jle	.code4
33 | 	jmp	.block4
34 | .code4:
35 | 	movl	-4(%rbp), %ecx
36 | 	movl	%ecx, -8(%rbp)
37 | .W7:
38 | 	movl	-8(%rbp), %eax
39 | 	cmpl	$10, %eax
40 | 	jle	.code9
41 | 	jmp	.block9
42 | .code9:
43 | 	movl	-8(%rbp), %eax
44 | 	imull	-4(%rbp), %eax
45 | 	movl	%eax, T0(%rip)
46 | 	movl	-4(%rbp), %eax
47 | 	movl	-8(%rbp), %edx
48 | 	movl	T0(%rip), %ecx
49 | 	movl	%eax, %esi
50 | 	leaq	.LC1(%rip), %rdi
51 | 	movl	$0, %eax
52 | 	call	printf@PLT
53 | 	movl	-8(%rbp), %edx
54 | 	movl	$1, %eax
55 | 	addl	%edx, %eax
56 | 	movl	%eax, T1(%rip)
57 | 	movl	T1(%rip), %ecx
58 | 	movl	%ecx, -8(%rbp)
59 | 	jmp	.W7
60 | .block9:
61 | 	movl	%eax, %esi
62 | 	leaq	.LC2(%rip), %rdi
63 | 	movl	$0, %eax
64 | 	call	printf@PLT
65 | 	movl	-4(%rbp), %edx
66 | 	movl	$1, %eax
67 | 	addl	%edx, %eax
68 | 	movl	%eax, T2(%rip)
69 | 	movl	T2(%rip), %ecx
70 | 	movl	%ecx, -4(%rbp)
71 | 	jmp	.W2
72 | .block4:
73 | 
74 | 	movl	$0, %eax
75 | 	leave
76 | 	.cfi_def_cfa 7, 8
77 | 	ret
78 | 	.cfi_endproc
79 | .LFE6:
80 | 	.size	main, .-main
81 | 	.ident	"PCC: 1.0.0"
82 | 


--------------------------------------------------------------------------------
/test/fibonacci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/fibonacci


--------------------------------------------------------------------------------
/test/fibonacci.c:
--------------------------------------------------------------------------------
 1 | 
 2 | // 兰州小红鸡的注释测试
 3 | int main(){
 4 |     int arr[25];
 5 |     int index = 0;
 6 |     // 求０～２０的斐波那契数列
 7 |     arr[0] = 1;
 8 |     arr[1] = 2;
 9 |     arr[2] = 3;
10 |     while(index < 10*2 ){
11 |         int b = arr[index];
12 |         arr[index+2]=arr[index+1] + b;
13 |         printf("f(%d)=%d\n",index,b);
14 |         index = index +1;
15 |     }
16 |     printf("完成斐波那契数列打印！由小鸡编译器提供——pcc\n");
17 | }


--------------------------------------------------------------------------------
/test/fibonacci.s:
--------------------------------------------------------------------------------
 1 | 	.text
 2 | 	.section	.rodata
 3 | 	.comm	T0,4,4
 4 | 	.comm	T1,4,4
 5 | 	.comm	T2,4,4
 6 | 	.comm	T3,4,4
 7 | .LC0:
 8 | 	.string "f(%d)=%d\n"
 9 | .LC1:
10 | 	.string "完成斐波那契数列打印！由小鸡编译器提供——pcc\n"
11 | 	.text
12 | 	.globl	main
13 | 	.type	main, @function
14 | main:
15 | 
16 | 	.cfi_startproc
17 | 	pushq	%rbp
18 | 	.cfi_def_cfa_offset 16
19 | 	.cfi_offset 6, -16
20 | 	movq	%rsp, %rbp
21 | 	.cfi_def_cfa_register 6
22 | 	subq	$120, %rsp
23 | 	movl	$0, -8(%rbp)
24 | 	movl	$1, -112(%rbp)
25 | 	movl	$2, -108(%rbp)
26 | 	movl	$3, -104(%rbp)
27 | .W4:
28 | 	movl	-8(%rbp), %eax
29 | 	cmpl	$20, %eax
30 | 	jle	.code6
31 | 	jmp	.block6
32 | .code6:
33 | 	movl	-8(%rbp), %eax
34 | 	cltq
35 | 	movl	-112(%rbp, %rax, 4), %ecx
36 | 	movl	%ecx, -12(%rbp)
37 | 	movl	-8(%rbp), %edx
38 | 	movl	$1, %eax
39 | 	addl	%edx, %eax
40 | 	movl	%eax, T0(%rip)
41 | 	movl	T0(%rip), %eax
42 | 	cltq
43 | 	movl	-112(%rbp, %rax, 4), %edx
44 | 	movl	-12(%rbp), %eax
45 | 	addl	%edx, %eax
46 | 	movl	%eax, T1(%rip)
47 | 	movl	-8(%rbp), %edx
48 | 	movl	$2, %eax
49 | 	addl	%edx, %eax
50 | 	movl	%eax, T2(%rip)
51 | 	movl	T2(%rip), %eax
52 | 	cltq
53 | 	movl	T1(%rip), %ecx
54 | 	movl	%ecx, -112(%rbp, %rax, 4)
55 | 	movl	-8(%rbp), %eax
56 | 	movl	-12(%rbp), %edx
57 | 	movl	%eax, %esi
58 | 	leaq	.LC0(%rip), %rdi
59 | 	movl	$0, %eax
60 | 	call	printf@PLT
61 | 	movl	-8(%rbp), %edx
62 | 	movl	$1, %eax
63 | 	addl	%edx, %eax
64 | 	movl	%eax, T3(%rip)
65 | 	movl	T3(%rip), %ecx
66 | 	movl	%ecx, -8(%rbp)
67 | 	jmp	.W4
68 | .block6:
69 | 	movl	%eax, %esi
70 | 	leaq	.LC1(%rip), %rdi
71 | 	movl	$0, %eax
72 | 	call	printf@PLT
73 | 
74 | 	movl	$0, %eax
75 | 	leave
76 | 	.cfi_def_cfa 7, 8
77 | 	ret
78 | 	.cfi_endproc
79 | .LFE6:
80 | 	.size	main, .-main
81 | 	.ident	"PCC: 1.0.0"
82 | 


--------------------------------------------------------------------------------
/test/print:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/print


--------------------------------------------------------------------------------
/test/print.c:
--------------------------------------------------------------------------------
 1 | int main(){
 2 |     int arr[2];
 3 |     arr[1] = 1;
 4 |     arr[0] = 0;
 5 |     int b = 0;
 6 | 
 7 |     int c = arr[arr[b+1]];
 8 |     printf("\n\n这个例子展示了在printf语句中的参数，可以是一个表达式。\n例如b*2 +(4+5)*3 = %d", b*2 +(4+5)*3 );
 9 |     printf("\n\n这个例子展示了数组内下标的可用变量表示，并且可递归嵌套\nint c = arr[arr[b+1]] = %d\n\n", c);
10 | }


--------------------------------------------------------------------------------
/test/print.s:
--------------------------------------------------------------------------------
 1 | 	.text
 2 | 	.section	.rodata
 3 | 	.comm	T0,4,4
 4 | 	.comm	T1,4,4
 5 | 	.comm	T2,4,4
 6 | .LC0:
 7 | 	.string "\n\n这个例子展示了在printf语句中的参数，可以是一个表达式。\n例如b*2+(4+5)*3=%d"
 8 | .LC1:
 9 | 	.string "\n\n这个例子展示了数组内下标的可用变量表示，并且可递归嵌套\nintc=arr[arr[b+1]]=%d\n\n"
10 | 	.text
11 | 	.globl	main
12 | 	.type	main, @function
13 | main:
14 | 
15 | 	.cfi_startproc
16 | 	pushq	%rbp
17 | 	.cfi_def_cfa_offset 16
18 | 	.cfi_offset 6, -16
19 | 	movq	%rsp, %rbp
20 | 	.cfi_def_cfa_register 6
21 | 	subq	$24, %rsp
22 | 	movl	$1, -16(%rbp)
23 | 	movl	$0, -20(%rbp)
24 | 	movl	$0, -8(%rbp)
25 | 	movl	-8(%rbp), %edx
26 | 	movl	$1, %eax
27 | 	addl	%edx, %eax
28 | 	movl	%eax, T0(%rip)
29 | 	movl	-20(%rbp), %eax
30 | 	cltq
31 | 	movl	-20(%rbp, %rax, 4), %ecx
32 | 	movl	%ecx, -12(%rbp)
33 | 	movl	$2, %eax
34 | 	imull	-8(%rbp), %eax
35 | 	movl	%eax, T1(%rip)
36 | 	movl	T1(%rip), %edx
37 | 	movl	$27, %eax
38 | 	addl	%edx, %eax
39 | 	movl	%eax, T2(%rip)
40 | 	movl	T2(%rip), %eax
41 | 	movl	%eax, %esi
42 | 	leaq	.LC0(%rip), %rdi
43 | 	movl	$0, %eax
44 | 	call	printf@PLT
45 | 	movl	-12(%rbp), %eax
46 | 	movl	%eax, %esi
47 | 	leaq	.LC1(%rip), %rdi
48 | 	movl	$0, %eax
49 | 	call	printf@PLT
50 | 
51 | 	movl	$0, %eax
52 | 	leave
53 | 	.cfi_def_cfa 7, 8
54 | 	ret
55 | 	.cfi_endproc
56 | .LFE6:
57 | 	.size	main, .-main
58 | 	.ident	"PCC: 1.0.0"
59 | 


--------------------------------------------------------------------------------
/test/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flymysql/Py-Compiler/a23870b1806b3e61e3d7e4a39f31b338d7b18f0f/test/test


--------------------------------------------------------------------------------
/test/test.c:
--------------------------------------------------------------------------------
 1 | 
 2 | // 兰州小红鸡的注释测试
 3 | int main(){
 4 |     int arr[25];
 5 |     int index = 0;
 6 |     // 求０～２０的斐波那契数列
 7 |     arr[0] = 1;
 8 |     arr[1] = 2;
 9 |     arr[2] = 3;
10 |     while(index < 10*2 ){
11 |         int b = arr[index];
12 |         arr[index+2]=arr[index+1] + b;
13 |         printf("f(%d)=%d\n",index,b);
14 |         index = index +1;
15 |     }
16 |     printf("完成斐波那契数列打印！由小鸡编译器提供——pcc\n");
17 | }


--------------------------------------------------------------------------------
/test/test.s:
--------------------------------------------------------------------------------
 1 | 	.text
 2 | 	.section	.rodata
 3 | 	.comm	T0,4,4
 4 | 	.comm	T1,4,4
 5 | 	.comm	T2,4,4
 6 | 	.comm	T3,4,4
 7 | 	.comm	T4,4,4
 8 | .LC0:
 9 | 	.string "f(%d)=%d%d\n"
10 | .LC1:
11 | 	.string "完成斐波那契数列打印！由小鸡编译器提供——pcc\n"
12 | 	.text
13 | 	.globl	main
14 | 	.type	main, @function
15 | main:
16 | 
17 | 	.cfi_startproc
18 | 	pushq	%rbp
19 | 	.cfi_def_cfa_offset 16
20 | 	.cfi_offset 6, -16
21 | 	movq	%rsp, %rbp
22 | 	.cfi_def_cfa_register 6
23 | 	subq	$120, %rsp
24 | 	movl	$0, -8(%rbp)
25 | 	movl	$1, -112(%rbp)
26 | 	movl	$2, -108(%rbp)
27 | 	movl	$3, -104(%rbp)
28 | .W4:
29 | 	movl	-8(%rbp), %eax
30 | 	cmpl	$20, %eax
31 | 	jle	.code6
32 | 	jmp	.block6
33 | .code6:
34 | 	movl	-8(%rbp), %eax
35 | 	cltq
36 | 	movl	-112(%rbp, %rax, 4), %ecx
37 | 	movl	%ecx, -12(%rbp)
38 | 	movl	-8(%rbp), %edx
39 | 	movl	$1, %eax
40 | 	addl	%edx, %eax
41 | 	movl	%eax, T0(%rip)
42 | 	movl	T0(%rip), %eax
43 | 	cltq
44 | 	movl	-112(%rbp, %rax, 4), %edx
45 | 	movl	-12(%rbp), %eax
46 | 	addl	%edx, %eax
47 | 	movl	%eax, T1(%rip)
48 | 	movl	-8(%rbp), %edx
49 | 	movl	$2, %eax
50 | 	addl	%edx, %eax
51 | 	movl	%eax, T2(%rip)
52 | 	movl	T2(%rip), %eax
53 | 	cltq
54 | 	movl	T1(%rip), %ecx
55 | 	movl	%ecx, -112(%rbp, %rax, 4)
56 | 	movl	$2, %eax
57 | 	imull	-12(%rbp), %eax
58 | 	movl	%eax, T3(%rip)
59 | 	movl	-8(%rbp), %eax
60 | 	movl	-12(%rbp), %edx
61 | 	movl	T3(%rip), %ecx
62 | 	movl	%eax, %esi
63 | 	leaq	.LC0(%rip), %rdi
64 | 	movl	$0, %eax
65 | 	call	printf@PLT
66 | 	movl	-8(%rbp), %edx
67 | 	movl	$1, %eax
68 | 	addl	%edx, %eax
69 | 	movl	%eax, T4(%rip)
70 | 	movl	T4(%rip), %ecx
71 | 	movl	%ecx, -8(%rbp)
72 | 	jmp	.W4
73 | .block6:
74 | 	movl	%eax, %esi
75 | 	leaq	.LC1(%rip), %rdi
76 | 	movl	$0, %eax
77 | 	call	printf@PLT
78 | 
79 | 	movl	$0, %eax
80 | 	leave
81 | 	.cfi_def_cfa 7, 8
82 | 	ret
83 | 	.cfi_endproc
84 | .LFE6:
85 | 	.size	main, .-main
86 | 	.ident	"PCC: 1.0.0"
87 | 


--------------------------------------------------------------------------------
/to_asm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 中间代码转汇编代码
  3 | 作者：刘金明
  4 | 博客：me.idealli.com
  5 | Github：github.com/flymysql
  6 | """
  7 | from generate import creat_mcode
  8 | from other.function import if_num
  9 | 
 10 | global_head = """
 11 | ;----------------------Welcome to Pcc--------------------------
 12 | ; by 兰州小红鸡
 13 | ;-------------------------------------------------------------------
 14 | """
 15 | 
 16 | code_head = """
 17 | \t.cfi_startproc
 18 | 	pushq	%rbp
 19 | 	.cfi_def_cfa_offset 16
 20 | 	.cfi_offset 6, -16
 21 | 	movq	%rsp, %rbp
 22 | 	.cfi_def_cfa_register 6
 23 | """
 24 | 
 25 | code_footer = """
 26 | \tmovl\t$0, %eax
 27 | 	leave
 28 | \t.cfi_def_cfa 7, 8
 29 | 	ret
 30 | \t.cfi_endproc
 31 | .LFE6:
 32 | 	.size\tmain, .-main
 33 | 	.ident\t"PCC: 1.0.0"
 34 | """
 35 | 
 36 | 
 37 | """
 38 | 两个全局变量
 39 | LC 字符串计数
 40 | re 存储汇编代码
 41 | """
 42 | LC = 0
 43 | re = ""
 44 | 
 45 | 
 46 | """
 47 | agrs函数，解析变量，转为汇编语言可识别的变量
 48 | n：传入的变量，name变量名表
 49 | 其中带[]的为数组变量，将会进行特殊的寻址处理
 50 | """
 51 | def args(n, name):
 52 |     global re
 53 |     if n in name:
 54 |         return "-" + name[n][0] + "(%rbp)"
 55 |     elif "[]" in str(n):
 56 |         ags = n.split("[]")
 57 |         if if_num(ags[1]):
 58 |             if name[ags[0]][1] == "char":
 59 |                 return "-" + str(int(name[ags[0]][0])-int(ags[1])) + "(%rbp)"
 60 |             elif name[ags[0]][1] == "int":
 61 |                 return "-" + str(int(name[ags[0]][0])-int(ags[1])*4) + "(%rbp)"
 62 |         else:
 63 |             re += "\tmovl\t" + args(ags[1], name) + ", %eax\n\tcltq\n"
 64 |             if name[ags[0]][1] == "char":
 65 |                 return "-" + name[ags[0]][0] + "(%rbp, %rax, 1)"
 66 |             elif name[ags[0]][1] == "int":
 67 |                 return "-" + name[ags[0]][0] + "(%rbp, %rax, 4)"
 68 | 
 69 |     elif "T" in str(n):
 70 |         return n + "(%rip)"
 71 |     elif if_num(str(n)):
 72 |         return "$" + str(n)
 73 |     
 74 |     else:
 75 |         return n
 76 | 
 77 | """
 78 | 变量初始化，给每个变量初始化地址。
 79 | 对于数组给予相应长度地址空间
 80 | 返回值[re, len]
 81 | re为变量名地址对照表， len为需要数据栈的高度（这里我规定为12的倍数）
 82 | """
 83 | def init_data(name_list, arrs):
 84 |     re = {}
 85 |     i = 0
 86 |     for n in name_list:
 87 |         if n['name'] != "main":
 88 |             if n['flag'] == "int":
 89 |                 i += 4
 90 |                 re[n['name']] = [str(i), "int"]
 91 |             elif n['flag'] == 'char':
 92 |                 i += 1
 93 |                 re[n['name']] = [str(i), "char"]
 94 |     for a in arrs:
 95 |         if arrs[a][1] == "int":
 96 |             i += int(arrs[a][0])*4
 97 |             re[a] = [str(i), "int"]
 98 |         elif arrs[a][1] == "char":
 99 |             i += int(arrs[a][0])
100 |             re[a] = [str(i), "char"]
101 |     return [re, (int(i/12) + 1)*12]
102 | 
103 | """
104 | 字符串初始化
105 | """
106 | def init_string(strings):
107 |     re = ""
108 |     for i in range(0, len(strings)):
109 |         re += ".LC" + str(i) + ":\n\t.string \"" + strings[i] + "\"\n"
110 |     return re
111 | 
112 | """
113 | 汇编代码生成
114 | 传入参数：
115 |     1. midcode中间代码（四元式）
116 |     2. name变量地址参照表
117 | 可解析的汇编语句有
118 |     1. 赋值语句（op，=）
119 |     2. 四则运算（op，+-*/)
120 |     3. 跳转语句（op，j）
121 |     4. 输出语句（op，print）
122 | """
123 | def generate_code(mid_code, name):
124 |     global re
125 |     re = ""
126 |     for m in mid_code:
127 |         # args = arg(m, name)
128 |         a1 = args(m.arg1, name)
129 |         a2 = args(m.arg2, name)
130 |         r = args(m.re, name)
131 |         if m.op == "=":
132 | 
133 |             if m.re in name and name[m.re][1] == "char":
134 |                 re += "\tmovb\t$" + str(ord(m.arg1)) + ", " + r + "\n"
135 |             elif m.arg1 in name or "T" in m.arg1 or "[]" in m.arg1:
136 |                 re += "\tmovl\t" + a1 + ", %ecx\n"
137 |                 re += "\tmovl\t%ecx, " + r + "\n"
138 |             else:
139 |                 re += "\tmovl\t" + a1 + ", " + r + "\n"
140 |         elif m.op == "code_block":
141 |             re += "." + m.re + ":\n"
142 |             continue
143 | 
144 |         elif "j" in m.op:
145 |             if m.op == "j":
146 |                 re += "\tjmp\t." + m.re + "\n"
147 |             else: 
148 |                 re += "\tmovl\t" + a1 + ", %eax\n"
149 |                 re += "\tcmpl\t" + a2 + ", %eax\n"
150 |                 if ">" in m.op:
151 |                     re += "\tjg\t." + m.re + "\n"
152 |                 elif "<" in m.op:
153 |                     re += "\tjle\t." + m.re + "\n"
154 |                 elif "=" in m.op:
155 |                     re += "\tje\t." + m.re + "\n"
156 | 
157 |         elif m.op in "+-":
158 |             re += "\tmovl\t" + a1 +", %edx\n"
159 |             re += "\tmovl\t" + a2 +", %eax\n"
160 |             if m.op == "+":
161 |                 re += "\taddl\t%edx, %eax\n"
162 |             else:
163 |                 re += "\tsubl\t%edx, %eax\n"
164 |             re += "\tmovl\t%eax, " + r + "\n"
165 | 
166 |         elif m.op in "*/":
167 |             if m.arg1 in name:
168 |                 re += "\tmovl\t" + a2 +", %eax\n"
169 |                 re += "\timull\t"+ a1 +", %eax\n"
170 |                 re += "\tmovl\t%eax, "+ r +"\n"
171 |             elif m.arg2 in name and m.arg1 not in name:
172 |                 re += "\tmovl\t" + a2 +", %eax\n"
173 |                 re += "\timull\t"+ a1 +", %eax, %eax\n"
174 |                 re += "\tmovl\t%eax, "+ r +"\n"
175 |             elif m.arg2 not in name and m.arg1 not in name:
176 |                 num = int(m.arg2)*int(m.arg1)
177 |                 re += "\tmovl\t$" + str(num) +", "+ r +"\n"
178 | 
179 |         elif m.op == "print":
180 |             global LC
181 |             if m.arg1 != "-1":
182 |                 if m.arg1 in name and name[m.arg1][1] == "char":
183 |                     re += "\tmovsbl\t" + a1 + ", %eax\n"
184 |                 else:
185 |                     re += "\tmovl\t" + a1 + ", %eax\n"
186 |             if m.arg2 != "-1":
187 |                 if m.arg2 in name and name[m.arg2][1] == "char":
188 |                     re += "\tmovsbl\t" + a2 + ", %edx\n"
189 |                 else:
190 |                     re += "\tmovl\t" + a2 + ", %edx\n"
191 |             if m.re != "-1":
192 |                 if m.re in name and name[m.re][1] == "char":
193 |                     re += "\tmovsbl\t" + r + ", %ecx\n"
194 |                 else:
195 |                     re += "\tmovl\t" + r + ", %ecx\n"
196 |             re += "\tmovl\t%eax, %esi\n" + "\tleaq\t.LC" + str(LC) + "(%rip), %rdi\n"
197 |             LC += 1
198 |             re += "\tmovl\t$0, %eax\n\tcall\tprintf@PLT\n"
199 |             
200 |     return re
201 | 
202 | """
203 | 字符串拼接函数
204 | 将生成的临时变量，汇编代码，头部，结束部分等一些内容拼接在一起
205 | 传入参数：
206 |     1. tmp 临时变量（其实在代码里作为全局变量）
207 |     2. strs 字符串变量
208 |     3. code 主函数汇编代码
209 |     4. subq 数据栈高度
210 | """
211 | def connect(tmp, strs, code, subq):
212 |     data = ""
213 |     for i in range(0, tmp):
214 |         data += "\t.comm\tT" + str(i) + ",4,4\n"
215 |     re = "\t.text\n\t.section\t.rodata\n" + data + strs + \
216 |         "\t.text\n\t.globl	main\n\t.type	main, @function\nmain:\n" + code_head +\
217 |              "\tsubq\t$" + str(subq) + ", %rsp\n" + code + code_footer
218 |     return re
219 | 
220 | """
221 | 入口函数
222 | 生成汇编代码.s文件
223 | 后续的链接等工作将交给gcc
224 | """
225 | def to_asm(filename):
226 |     global LC
227 |     LC = 0
228 |     mid_result = creat_mcode(filename)
229 |     mid_code = mid_result['mid_code']
230 |     name_list = mid_result['name_list']
231 |     tmp = mid_result['tmp']
232 |     strings = mid_result['strings']
233 |     arrs = mid_result['arrs']
234 |     name = init_data(name_list, arrs)
235 |     string_list = init_string(strings)
236 |     asm = generate_code(mid_code, name[0])
237 |     result = connect(tmp, string_list, asm, name[1])
238 |     re_asm = open(filename[:-1] + "s", "w").write(result)
239 | 
240 | if __name__ == "__main__":
241 |     to_asm("./test/test.c")


--------------------------------------------------------------------------------