├── README.md └── code ├── 词法分析器优化版.py ├── 词法分析器.py ├── 词法分析器优化版2.py ├── 语法分析LL1.py ├── 词法test.py ├── 消除回溯.py ├── lexical_Analysis.py ├── LL1.py ├── LL1分析.py ├── LL1test.py └── 消除左递归.py /README.md: -------------------------------------------------------------------------------- 1 | # python for 编译原理作业 2 | 3 | 包括 **词法分析、语法分析、 LL1分析器** 4 | 5 | 6 | 7 | ## 访问量 8 | ![Visitor Count](https://profile-counter.glitch.me/python-university/count.svg) 9 | -------------------------------------------------------------------------------- /code/词法分析器优化版.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | #关键字 4 | keyword = {'main':1, 'int':2, 'char':3, 'if':4, 'else':5, 'for':6, 'while':7,'return':8,'void':9} 5 | #运算符和边界符 6 | Symbol = {'=': 21, '+': 22, '-': 23, '*': 24, '/': 25, '(': 26, ')': 27, '[': 28, ']': 29, '{': 30, 7 | '}': 31, ',': 32, ':': 33, ';': 34, '>': 35, '<': 36, '>=': 37, '<=': 38, '==': 39, '!=': 40} 8 | 9 | #总正则表达式 10 | # all=re.compile('(\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|[a-zA-Z_]\w*|>>|<<|::|->|\.|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|<|!|^|%|~|\?|:|,|;|\(|\)|\[|\]|\{|\}|\'|\")') 11 | all=re.compile('(\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|[a-zA-Z_]\w*|' 12 | '\".*\"|>>|<<|::|->|\.|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|' 13 | '<|!|^|%|~|\?|:|,|;|\(|\)|\[|\]|\{|\}|\"|\')') 14 | 15 | 16 | def Judge(s): 17 | if s[0].isalpha() and s in keyword: #判断关键字 18 | print('( ',s,'->',keyword[s],' )') 19 | elif s[0].isalpha() and s not in keyword and s.isalnum(): #判断标识符 20 | print('( ', s, '->', 10, ' )') 21 | elif s.isdigit(): #判断数字 22 | print('( ', s, '->', 20, ' )') 23 | elif s in Symbol: #判断运算符或边界符 24 | print('( ', s, '->', Symbol[s], ' )') 25 | else: 26 | if len(s) >= 2 and s[0] == '"' and s[-1] == '"': #判断字符串 27 | print('( ',s, '->', 50, ' )') 28 | else: #没定义或者错误串 29 | print('( ',s,'->','暂无定义',' )') 30 | 31 | 32 | 33 | 34 | if __name__ == '__main__': 35 | #读取文件 36 | f = open('E://test.txt', 'r') 37 | result = [] 38 | for line in f: 39 | if len(line) == 1: 40 | result.extend(line) 41 | else: 42 | result.extend(all.findall(line)) 43 | 44 | # 去掉列表中残留的空字符 45 | for i in result: 46 | if '' in result: 47 | result.remove('') 48 | 49 | #词法分析 50 | for i in result: 51 | Judge(i) -------------------------------------------------------------------------------- /code/词法分析器.py: -------------------------------------------------------------------------------- 1 | keyword = {'main':1, 'int':2, 'char':3, 'if':4, 'else':5, 'for':6, 'while':7,'return':8,'void':9} 2 | 3 | Symbol = {'=': 21, '+': 22, '-': 23, '*': 24, '/': 25, '(': 26, ')': 27, '[': 28, ']': 29, '{': 30, 4 | '}': 31, ',': 32, ':': 33, ';': 34, '>': 35, '<': 36, '>=': 37, '<=': 38, '==': 39, '!=': 40} 5 | import re 6 | all=re.compile('([0-9]+[a-z|A-Z|_]+[0-9]*|\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|' 7 | '[a-z|A-Z|0-9|_]*\".*?\"[a-z|A-Z|0-9|_]*|[a-zA-Z_]\w*|\".*\"|>>|<<|' 8 | '::|->|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|' 9 | '<|:|,|;|\(|\)|\[|\]|\{|\}|\"|\')') 10 | 11 | 12 | pattern = re.compile('^"(.*)"') 13 | def Judge(s): 14 | if s[0].isalpha() and s in keyword: #判断关键字 15 | print('( ',s,'->',keyword[s],' )') 16 | elif s[0].isalpha() and s not in keyword and s.isalnum(): #判断标识符 17 | print('( ', s, '->', 10, ' )') 18 | elif s.isdigit(): #判断数字 19 | print('( ', s, '->', 20, ' )') 20 | elif s in Symbol: #判断运算符或边界符 21 | print('( ', s, '->', Symbol[s], ' )') 22 | else: 23 | if len(s) >= 2 and s[0] == '"' and s[-1] == '"': #判断字符串 24 | print('( ', ''.join(pattern.findall(s)), '->', 50, ' )') 25 | else: #没定义或者错误串 26 | print('( ',s,'->','暂无定义',' )') 27 | 28 | 29 | 30 | if __name__ == '__main__': 31 | f = open('E://test.txt','r') 32 | line_s = [] 33 | for line in f: 34 | temp = line.strip().split() 35 | line_s.extend(temp) 36 | # print(line_s) 37 | result = [] 38 | for index1 in range(len(line_s)): 39 | flag = True 40 | temp_s = [] 41 | for index2 in range(len(line_s[index1])): 42 | ss = [] 43 | if re.search(all,line_s[index1]): 44 | ss.extend(all.findall(line_s[index1])) 45 | flag = False 46 | if flag: 47 | temp_s.append(line_s[index1]) 48 | else: 49 | temp_s.extend(ss) 50 | result.extend(temp_s) 51 | #去掉列表中残留的空字符 52 | for i in result: 53 | if '' in result: 54 | result.remove('') 55 | #词法分析 56 | for i in result: 57 | Judge(i) 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /code/词法分析器优化版2.py: -------------------------------------------------------------------------------- 1 | import re 2 | keyword = {'main':1, 'int':2, 'char':3, 'if':4, 'else':5, 'for':6, 'while':7,'return':8,'void':9} 3 | 4 | Symbol = {'=': 21, '+': 22, '-': 23, '*': 24, '/': 25, '(': 26, ')': 27, '[': 28, ']': 29, '{': 30, 5 | '}': 31, ',': 32, ':': 33, ';': 34, '>': 35, '<': 36, '>=': 37, '<=': 38, '==': 39, '!=': 40} 6 | 7 | #总正则表达式 8 | # all=re.compile('(\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|[a-zA-Z_]\w*|>>|<<|::|->|\.|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|<|!|^|%|~|\?|:|,|;|\(|\)|\[|\]|\{|\}|\'|\")') 9 | all=re.compile('([0-9]+[a-z|A-Z|_]+[0-9]*|\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|' 10 | '[a-z|A-Z|0-9|_]*\".*?\"[a-z|A-Z|0-9|_]*|[a-zA-Z_]\w*|\".*\"|>>|<<|' 11 | '::|->|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|' 12 | '<|!|\^|%|\~|\?|:|,|;|\(|\)|\[|\]|\{|\}|\"|\')') 13 | 14 | 15 | def Judge(s): 16 | if s[0].isalpha() and s in keyword: #判断关键字 17 | print('( ',s,'->',keyword[s],' )') 18 | elif s[0].isalpha() and s not in keyword and s.isalnum(): #判断标识符 19 | print('( ', s, '->', 10, ' )') 20 | elif s.isdigit(): #判断数字 21 | print('( ', s, '->', 20, ' )') 22 | elif s in Symbol: #判断运算符或边界符 23 | print('( ', s, '->', Symbol[s], ' )') 24 | else: 25 | if len(s) >= 2 and s[0] == '"' and s[-1] == '"': #判断字符串 26 | print('( ',s, '->', 50, ' )') 27 | else: #没定义或者错误串 28 | print('( ',s,'->','暂无定义',' )') 29 | 30 | if __name__ == '__main__': 31 | #读取文件 32 | f = open('E://test.txt', 'r') 33 | result = [] 34 | for line in f: 35 | if len(line) == 1: 36 | result.extend(line) 37 | else: 38 | result.extend(all.findall(line)) 39 | print(result) 40 | # 去掉列表中残留的空字符 41 | for i in result: 42 | if '' in result: 43 | result.remove('') 44 | 45 | #词法分析 46 | for i in result: 47 | Judge(i) 48 | 49 | 50 | 51 | 52 | # import re 53 | # all=re.compile('([0-9]+[a-z|A-Z|_]+|\d+\.\d+[eE][-+]?\d+|\d+\.\d+|[1-9]\d*|0[0-7]+|0x[0-9a-fA-F]+|' 54 | # '[a-z|A-Z|0-9|_]*\".*\"[a-z|A-Z|0-9|_]*|[a-zA-Z_]\w*|\".*\"|>>|<<|' 55 | # '::|->|\+=|\-=|\*=|/=|%=|>=|<=|==|!=|&&|\|\||\+|\-|\*|/|=|>|' 56 | # '<|!|^|%|~|\?|:|,|;|\(|\)|\[|\]|\{|\}|\"|\')') 57 | # 58 | # ss = 'int main(){ int a = 1;"}' 59 | # list = [] 60 | # f = open('E://test.txt','r') 61 | # for line in f: 62 | # if len(line) == 1: 63 | # list.extend(line) 64 | # else: 65 | # list.extend(all.findall(line)) 66 | # print(list) -------------------------------------------------------------------------------- /code/语法分析LL1.py: -------------------------------------------------------------------------------- 1 | """ 2 | 文法: 3 | E->E+T | T 4 | T->T*F | F 5 | F->(E)|i 6 | 消除左递归: 7 | E->TH (H代替E') 8 | H->+TH|e (e替代空) 9 | T->FY (Y代替T') 10 | Y->*FY|e 11 | F->(E)|i 12 | 非终结符: 13 | E,H,T,Y,F 14 | 终结符: 15 | i,+,*,(,),# 16 | """ 17 | 18 | 19 | from prettytable import PrettyTable 20 | table = PrettyTable(["步骤", "分析栈", "当前输入a","剩余输入串", "所用产生式"]) 21 | 22 | # 构造预测分析表 23 | dists = { 24 | ('E', 'i'): 'TH',('E', '('): 'TH',('H', '+'): '+TH', 25 | ('H', ')'): 'e',('H', '#'): 'e',('T', 'i'): 'FY', 26 | ('T', '('): 'FY',('Y', '+'): 'e',('Y', '*'): '*FY', 27 | ('Y', ')'): 'e',('Y', '#'): 'e',('F', 'i'): 'i', 28 | ('F', '('): '(E)', 29 | } 30 | 31 | # 构造终结符集合 32 | Vt = ('i', '+', '*', '(', ')') 33 | 34 | # 构造非终结符集合 35 | Vh = ('E', 'H', 'T', 'Y', 'F') 36 | 37 | 38 | # 获取输入栈中的内容 39 | def printstack(stack): 40 | rtu = '' 41 | for i in stack: 42 | rtu += i 43 | return rtu 44 | 45 | 46 | # 得到输入串剩余串 47 | def printstr(str, index): 48 | rtu = '' 49 | for i in range(index, len(str), 1): 50 | rtu += str[i] 51 | return rtu 52 | 53 | 54 | # 定义error函数 55 | def error(): 56 | print('Error') 57 | exit() 58 | 59 | 60 | # 总控程序 61 | def masterctrl(str): 62 | ''' 63 | 总控程序,用于进程文法的判断 64 | ''' 65 | # 用列表模拟栈 66 | stack = [] 67 | location = 0 68 | # 将#号入栈 69 | stack.append(str[location]) 70 | 71 | # 将文法开始符入栈 72 | stack.append('E') 73 | # 将输入串第一个字符读进a中 74 | location += 1 75 | a = str[location] 76 | 77 | flag = True 78 | count = 1 #计算步骤 79 | table.add_row([count, printstack(stack),a, printstr(str, location),'']) 80 | while flag: 81 | if count == 1: 82 | pass 83 | else: 84 | if x in Vt: 85 | table.add_row([count, printstack(stack),a, printstr(str, location),'']) 86 | else: 87 | temp = x + '->' + s 88 | table.add_row([count, printstack(stack),a, printstr(str, location),temp]) 89 | x = stack.pop() 90 | if x in Vt: #栈顶是终结符 91 | if x == str[location]: #该字符匹配,输入串向后挪一位 92 | location += 1 93 | a = str[location] 94 | else: #否则错误 95 | error() 96 | elif x == '#': #栈顶是结束符 97 | if x == a: #当前输入字符也是结束符,分析结束 98 | flag = False 99 | else: #否则错误 100 | error() 101 | elif (x, a) in dists.keys(): #M[x,a]是产生式 102 | s = dists[(x, a)] 103 | for i in range(len(s) - 1, -1, -1): #倒序入栈 104 | if s[i] != 'e': 105 | stack.append(s[i]) 106 | else: 107 | error() 108 | count += 1 109 | 110 | 111 | if __name__ == '__main__': 112 | str = '#i+i#' 113 | masterctrl(str) 114 | table.align['步骤'] = 'l' 115 | table.align['分析栈'] = 'l' 116 | table.align['剩余输入串'] = 'l' 117 | table.align['所用产生式'] = 'l' 118 | table.align['当前输入a'] = 'l' 119 | print(table) 120 | print("分析成功!") 121 | -------------------------------------------------------------------------------- /code/词法test.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class Token(object): 5 | 6 | # 初始化 7 | def __init__(this): 8 | # 存储分词的列表 9 | this.results = [] 10 | 11 | # 行号 12 | this.lineno = 1 13 | 14 | # 关键字 15 | this.keywords = ['auto', 'struct', 'if', 'else', 'for', 'do', 'while', 'const', 16 | 'int', 'double', 'float', 'long', 'char', 'short', 'unsigned', 17 | 'switch', 'break', 'defalut', 'continue', 'return', 'void', 'static', 18 | 'auto', 'enum', 'register', 'typeof', 'volatile', 'union', 'extern'] 19 | ''' 20 | regex中:*表示从0-, +表示1-, ?表示0-1。对应的需要转义 21 | { 表示限定符表达式开始的地方 \{ 22 | () 标记一个子表达式的开始和结束位置。子表达式可以获取共以后使用:\( \) 23 | r表示原生字符串。 24 | ''' 25 | 26 | Keyword = r'(?P(auto){1}|(double){1}|(int){1}|(if){1}|' \ 27 | r'(#include){1}|(return){1}|(char){1}|(stdio\.h){1}|(const){1})' 28 | # 运算符 29 | Operator = r'(?P\+\+|\+=|\+|--|-=|-|\*=|/=|/|%=|%)' 30 | 31 | # 分隔符/界符 32 | Separator = r'(?P[,:\{}:)(<>])' 33 | 34 | # 数字: 例如:1 1.9 35 | Number = r'(?P\d+[.]?\d+)' 36 | 37 | # 变量名 不能使用关键字命名 38 | ID = r'(?P[a-zA-Z_][a-zA-Z_0-9]*)' 39 | 40 | # 方法名 {1} 重复n次 41 | Method = r'(?P(main){1}|(printf){1})' 42 | 43 | # 错误 \S 匹配任意不是空白符的字符 44 | # Error = r'(?P.*\S+)' 45 | Error = r'\"(?P.*)\"' 46 | 47 | # 注释 ^匹配行的开始 .匹配换行符以外的任意字符 \r回车符 \n换行符 48 | Annotation = r'(?P/\*(.|[\r\n])*/|//[^\n]*)' 49 | 50 | # 进行组装,将上述正则表达式以逻辑的方式进行拼接, 按照一定的逻辑顺序 51 | # compile函数用于编译正则表达式,生成一个正则表达式对象 52 | this.patterns = re.compile('|'.join([Annotation, Keyword, Method, ID, Number, Separator, Operator, Error])) 53 | 54 | # 读文件 55 | def read_file(this, filename): 56 | with open(filename, "r") as f_input: 57 | return [line.strip() for line in f_input] 58 | 59 | # 结果写入文件 60 | def write_file(this, lines, filename='D:/results.txt'): 61 | with open(filename, "a") as f_output: 62 | for line in lines: 63 | if line: 64 | f_output.write(line) 65 | else: 66 | continue 67 | 68 | def get_token(this, line): 69 | 70 | # finditer : 在字符串中找到正则表达式所匹配的所有字串, 并把他们作为一个迭代器返回 71 | for match in re.finditer(this.patterns, line): 72 | # group():匹配的整个表达式的字符 # yield 关键字:类似return ,返回的是一个生成器,generator 73 | yield (match.lastgroup, match.group()) 74 | 75 | def run(this, line, flag=True): 76 | for token in this.get_token(line): 77 | if flag: 78 | print("line %3d :" % this.lineno, token) 79 | ''' 80 | else: 81 | yield "line %3d :" % this.lineno + str(token) + "\n" 82 | ''' 83 | 84 | def printrun(this, line, flag=True): 85 | for token in this.get_token(line): 86 | if flag: 87 | print("lines x: ", token) 88 | 89 | 90 | if __name__ == '__main__': 91 | token = Token() 92 | filepath = "E:/test.txt" 93 | 94 | lines = token.read_file(filepath) 95 | 96 | for line in lines: 97 | token.run(line, True) 98 | token.lineno += 1 99 | -------------------------------------------------------------------------------- /code/消除回溯.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | def remove_recall(production): 3 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 4 | # f = open('E://test3.txt','r') 5 | # production = [] 6 | Vn = set() 7 | # for line in f: 8 | # data = line.strip() #读取每一行 9 | # production.append(data) 10 | # Vn.add(data[0]) 11 | for i in production: 12 | Vn.add(i[0]) 13 | 14 | sets = set(sets) 15 | sets -= Vn 16 | sets = list(sets) 17 | 18 | result = {} #保存整个文法的结果 19 | for i in range(len(production)): 20 | X, Y = production[i].split('->') #左右部分开 21 | Y = Y.split('|') #右部根据|再分 22 | s = sets.pop() #弹出一个字符作为回溯的处理 23 | dicts = defaultdict(list) 24 | 25 | for Yi in Y: 26 | dicts[Yi[0]].append(Yi) #根据候选式的首字符分组放进字典 27 | 28 | temp1 = [] # 保存有回溯的候选式 29 | temp2 = [] # 保存没有回溯的候选式 30 | flag = False # 存在回溯的标志 31 | 32 | result_tmp = {} #保存每个产生式的结果 33 | ss = '' 34 | for k,v in dicts.items(): 35 | if len(v) > 1: #存在回溯 36 | flag = True 37 | #找到公共左因子ss 38 | zipped = zip(*v) #拉链函数 比如zip(*[abc,abd]) 将列表的元素作为参数传递给zip >>> (a,a),(b,b),(c,d) 39 | for i in zipped: 40 | if len(set(i)) == 1: #是公共左因子的部分就拼接 41 | ss += i[0] 42 | else: 43 | break 44 | #去掉有回溯的候选式的公共左因子 45 | for i in range(len(v)): 46 | dicts[k][i] = dicts[k][i].replace(ss,'') 47 | if dicts[k][i] == '': #候选式刚好等于公共左因子 48 | dicts[k][i] = 'ε' 49 | temp1.extend(dicts[k]) 50 | else: #不存在回溯的候选式 51 | temp2.extend(dicts[k]) 52 | 53 | #存在回溯的处理 54 | if flag: 55 | # 有回溯的候选式的合并 56 | result_tmp[s] = s + '->' 57 | for i in range(len(temp1)): 58 | if i == len(temp1) - 1: 59 | result_tmp[s] = result_tmp[s] + temp1[i] 60 | else: 61 | result_tmp[s] = result_tmp[s] + temp1[i] + '|' 62 | 63 | # 没有回溯的候选式的合并 64 | nonrecall = '' 65 | for i in range(len(temp2)): 66 | if i == len(temp2) - 1: 67 | nonrecall = nonrecall + temp2[i] 68 | else: 69 | nonrecall = nonrecall + temp2[i] + '|' 70 | result_tmp[X] = X + '->' + ss + s + '|' + nonrecall 71 | #不存在回溯的处理 72 | else: 73 | for Yi in Y: 74 | result_tmp[X] = production[i] 75 | result.update(result_tmp) 76 | result_new =[] 77 | for k,v in result.items(): 78 | result_new.append(v) 79 | print(result_new) 80 | 81 | # 消除左递归 82 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 83 | def remove_recursion(): 84 | f = open('E://test1.txt','r') 85 | production = [] 86 | Vn = set() 87 | for line in f: 88 | data = line.strip() #读取每一行 89 | production.append(data) 90 | Vn.add(data[0]) 91 | 92 | # print(production) 93 | global sets 94 | sets = set(sets) 95 | sets -= Vn 96 | sets = list(sets) 97 | 98 | #消除左递归 99 | gdict = {} #用字典保存消除左递归的文法 100 | for i in range(len(production)): 101 | X, Y = production[i].split('->') #左右部分开 102 | Y = Y.split('|') #右部根据|再分 103 | ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 104 | s = sets.pop() #弹出一个非终结符集里面没有的字母 105 | nlx = [] #保存没有左递归的候选式 106 | flag = False #存在左递归的标志 107 | temp = [] #保存有左递归的去掉头的候选式 108 | for Yi in Y: 109 | if Yi[0] == X: #该候选式存在左递归 110 | flag = True 111 | if flag: #存在左递归的处理 112 | for Yi in Y: 113 | if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 114 | temp.append(Yi[1:] + s) 115 | else: 116 | gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 117 | nlx.append(gdict[X]) 118 | 119 | #有左递归的候选式的合并 120 | gdict[s] = s + '->' 121 | for i in range(len(temp)): 122 | if i == len(temp) - 1: 123 | gdict[s] = gdict[s] + temp[i] 124 | else: 125 | gdict[s] = gdict[s] + temp[i] + '|' 126 | if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 127 | gdict[s] = gdict[s] + '|ε' 128 | 129 | # 没有左递归的候选式的合并 130 | for i in range(len(nlx)): 131 | if i == len(nlx) - 1: 132 | ss = ss + nlx[i] 133 | else: 134 | ss = ss + nlx[i] + '|' 135 | gdict[X] = X + '->' + ss 136 | 137 | else: #不存在左递归就直接添加改产生式 138 | for Yi in Y: 139 | gdict[X] = production[i] 140 | 141 | #得出消除左递归后的最终文法 142 | result = [] 143 | for k,v in gdict.items(): 144 | result.append(v) 145 | return result 146 | 147 | 148 | Code = remove_recursion() 149 | remove_recall(Code) -------------------------------------------------------------------------------- /code/lexical_Analysis.py: -------------------------------------------------------------------------------- 1 | from prettytable import PrettyTable 2 | 3 | #输出分析表的表头 4 | table = PrettyTable(["Steps", "Stack", "Input_a_now", "Remain_str", "Use_production"]) 5 | 6 | ''' 7 | 文法: 8 | E->E+T | T 9 | T->T*F | F 10 | F->(E)|i 11 | 消除左递归: 12 | E->TX (X代替E') 13 | X->+TX|ε 14 | T->FY (Y代替T') 15 | Y->*FY|ε 16 | F->(E)|i 17 | 非终结符: 18 | E,X,T,Y,F 19 | 终结符: 20 | i,+,*,(,),# 21 | ''' 22 | 23 | # 根据上面文法构造预测分析表 24 | dicts = { 25 | ('E', 'i'): 'TX', ('E', '('): 'TX', ('X', '+'): '+TX', 26 | ('X', ')'): 'ε', ('X', '#'): 'ε', ('T', 'i'): 'FY', 27 | ('T', '('): 'FY', ('Y', '+'): 'ε', ('Y', '*'): '*FY', 28 | ('Y', ')'): 'ε', ('Y', '#'): 'ε', ('F', 'i'): 'i', 29 | ('F', '('): '(E)', 30 | } 31 | 32 | #文法开始符 33 | StartSym = 'E' 34 | 35 | # 构造终结符集合 36 | Vt = ('i', '+', '*', '(', ')') 37 | 38 | # 构造非终结符集合 39 | Vn = ('E', 'X', 'T', 'Y', 'F') 40 | 41 | # 获取输入栈中的内容 42 | def Showstack(stack): 43 | ss = '' 44 | for i in stack: 45 | ss += i 46 | return ss 47 | 48 | # 得到输入串剩余串 49 | def Showstr(str, index): 50 | ss = '' 51 | for i in range(index, len(str)): 52 | ss += str[i] 53 | return ss 54 | 55 | # 定义error函数 56 | def error(): 57 | print('Error') 58 | exit() 59 | 60 | 61 | # 分析程序 62 | def Analysis(str,StartSym,table,dicts,Vt,Vn): 63 | ''' 64 | 总控程序,用于进程文法的判断 65 | ''' 66 | 67 | stack = [] # 用列表模拟栈 68 | location = 0 # 当前位置 69 | str = '#' + str + '#' # 输入串 70 | 71 | stack.append(str[location]) # 将#号入栈 72 | 73 | stack.append(StartSym) # 将文法开始符入栈 74 | 75 | location += 1 76 | a = str[location] # 将输入串第一个字符读进a中 77 | 78 | flag = True # 分析结束标志 79 | count = 1 # 计算步骤 80 | 81 | while flag: 82 | # 建表 83 | if count == 1: #文法开始 84 | temp = StartSym + '->' + dicts[(StartSym, a)] 85 | table.add_row([count, Showstack(stack), a, Showstr(str, location), temp]) 86 | else: 87 | if stack[-1] in Vt: #栈顶是终结符,所用产生式为空,即下一步的栈顶直接弹出 88 | table.add_row([count, Showstack(stack), a, Showstr(str, location), '']) 89 | elif stack[-1] in Vn: #栈顶是非终结符,所用产生式为 M[x,a] 90 | temp = stack[-1] + '->' + dicts[(stack[-1], a)] 91 | table.add_row([count, Showstack(stack), a, Showstr(str, location), temp]) 92 | else: # 栈顶是结束符‘#’,分析成功 93 | table.add_row([count, Showstack(stack), a, Showstr(str, location), "Success!"]) 94 | 95 | x = stack.pop() # x为栈顶元素 96 | if x in Vt: # 栈顶是终结符 97 | if x == str[location]: # 该字符匹配,输入串向后挪一位 98 | location += 1 99 | a = str[location] 100 | else: # 否则错误 101 | error() 102 | elif x == '#': # 栈顶是结束符 103 | if x == a: # 当前输入字符也是结束符,分析结束 104 | flag = False 105 | else: # 否则错误 106 | error() 107 | elif (x, a) in dicts.keys(): # M[x,a]是产生式 108 | s = dicts[(x, a)] 109 | for i in range(len(s) - 1, -1, -1): # 倒序入栈 110 | if s[i] != 'ε': 111 | stack.append(s[i]) 112 | else: 113 | error() 114 | count += 1 115 | 116 | 117 | def ShowTable(): 118 | # 表左对齐 119 | table.align['步骤'] = 'l' 120 | table.align['分析栈'] = 'l' 121 | table.align['剩余输入串'] = 'l' 122 | table.align['所用产生式'] = 'l' 123 | table.align['当前输入a'] = 'l' 124 | # 输出语法分析表 125 | print(table) 126 | 127 | if __name__ == '__main__': 128 | str = input('>>>') 129 | # for i in str: 130 | # if i not in Vt: 131 | # exit("存在字符在文法里不存在!!!") 132 | Analysis(str,StartSym,table,dicts,Vt,Vn) 133 | ShowTable() #表格输出结果分析结果 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | # """ 162 | # 文法: 163 | # E->E+T | T 164 | # T->T*F | F 165 | # F->(E)|i 166 | # 消除左递归: 167 | # E->TX (X代替E') 168 | # X->+TX|ε 169 | # T->FY (Y代替T') 170 | # Y->*FY|ε 171 | # F->(E)|i 172 | # 非终结符: 173 | # E,X,T,Y,F 174 | # 终结符: 175 | # i,+,*,(,),# 176 | # """ 177 | # 178 | # from prettytable import PrettyTable 179 | # table = PrettyTable(["Steps", "Stack", "Input_a_now","Remain_str", "Use_production"]) 180 | # 181 | # # 构造预测分析表 182 | # dicts = { 183 | # ('E', 'i'): 'TX',('E', '('): 'TX',('X', '+'): '+TX', 184 | # ('X', ')'): 'ε',('X', '#'): 'ε',('T', 'i'): 'FY', 185 | # ('T', '('): 'FY',('Y', '+'): 'ε',('Y', '*'): '*FY', 186 | # ('Y', ')'): 'ε',('Y', '#'): 'ε',('F', 'i'): 'i', 187 | # ('F', '('): '(E)', 188 | # } 189 | # 190 | # # 构造终结符集合 191 | # Vt = ('i', '+', '*', '(', ')') 192 | # 193 | # # 构造非终结符集合 194 | # Vn = ('E', 'X', 'T', 'Y', 'F') 195 | # 196 | # 197 | # # 获取输入栈中的内容 198 | # def Showstack(stack): 199 | # ss = '' 200 | # for i in stack: 201 | # ss += i 202 | # return ss 203 | # 204 | # 205 | # # 得到输入串剩余串 206 | # def Showstr(str, index): 207 | # ss = '' 208 | # for i in range(index, len(str), 1): 209 | # ss += str[i] 210 | # return ss 211 | # 212 | # 213 | # # 定义error函数 214 | # def error(): 215 | # print('Error') 216 | # exit() 217 | # 218 | # 219 | # # 分析程序 220 | # def Analysis(str): 221 | # ''' 222 | # 总控程序,用于进程文法的判断 223 | # ''' 224 | # 225 | # stack = [] # 用列表模拟栈 226 | # location = 0 #当前位置 227 | # str = '#' + str + '#' #输入串 228 | # 229 | # stack.append(str[location]) #将#号入栈 230 | # 231 | # 232 | # stack.append('E') # 将文法开始符入栈 233 | # 234 | # location += 1 235 | # a = str[location] # 将输入串第一个字符读进a中 236 | # 237 | # flag = True #分析结束标志 238 | # count = 1 #计算步骤 239 | # 240 | # 241 | # while flag: 242 | # #建表 243 | # if count == 1: 244 | # temp = 'E' + '->' + dicts[('E', a)] 245 | # table.add_row([count, Showstack(stack), a, Showstr(str, location), temp]) 246 | # else: 247 | # if stack[-1] in Vt: 248 | # table.add_row([count, Showstack(stack),a, Showstr(str, location),'']) 249 | # elif stack[-1] in Vn: 250 | # temp = stack[-1] + '->' + dicts[(stack[-1],a)] 251 | # table.add_row([count, Showstack(stack),a, Showstr(str, location),temp]) 252 | # else: 253 | # table.add_row([count, Showstack(stack), a, Showstr(str, location), 'Success!']) 254 | # 255 | # x = stack.pop() #x为栈顶元素 256 | # if x in Vt: #栈顶是终结符 257 | # if x == str[location]: #该字符匹配,输入串向后挪一位 258 | # location += 1 259 | # a = str[location] 260 | # else: #否则错误 261 | # error() 262 | # elif x == '#': #栈顶是结束符 263 | # if x == a: #当前输入字符也是结束符,分析结束 264 | # flag = False 265 | # else: #否则错误 266 | # error() 267 | # elif (x, a) in dicts.keys(): #M[x,a]是产生式 268 | # s = dicts[(x, a)] 269 | # for i in range(len(s) - 1, -1, -1): #倒序入栈 270 | # if s[i] != 'ε': 271 | # stack.append(s[i]) 272 | # else: 273 | # error() 274 | # count += 1 275 | # 276 | # 277 | # if __name__ == '__main__': 278 | # str = input('>>>') 279 | # for i in str: 280 | # if i not in Vt: 281 | # exit("存在字符在文法里不存在!!!") 282 | # Analysis(str) 283 | # #表左对齐 284 | # table.align['步骤'] = 'l' 285 | # table.align['分析栈'] = 'l' 286 | # table.align['剩余输入串'] = 'l' 287 | # table.align['所用产生式'] = 'l' 288 | # table.align['当前输入a'] = 'l' 289 | # print(table) 290 | -------------------------------------------------------------------------------- /code/LL1.py: -------------------------------------------------------------------------------- 1 | from prettytable import PrettyTable 2 | from lexical_Analysis import Showstack,Showstr,error,Analysis 3 | Vn = set() # 非终结符集合 4 | Vt = set() # 终结符集合 5 | First = {} # First集 6 | Follow = {} # Follow集 7 | GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 8 | Code = [] # 读入的产生式 9 | AnalysisList = {} # 分析表 10 | StartSym = "" # 开始符号 11 | EndSym = '#' # 结束符号为“#“ 12 | Epsilon = "ε" # 由于没有epsilon符号用“ε”代替 13 | dicts = {} 14 | 15 | # 构造First集 16 | def getFirst(): 17 | global Vn, Vt, First, Follow 18 | for X in Vn: 19 | First[X] = set() # 初始化非终结符First集为空 20 | for X in Vt: 21 | First[X] = set(X) # 初始化终结符First集为自己 22 | Change = True 23 | while Change: # 当First集没有更新则算法结束 24 | Change = False 25 | for X in Vn: 26 | for Y in GramaDict[X]: 27 | k = 0 28 | Continue = True 29 | while Continue and k < len(Y): 30 | if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 31 | if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 32 | '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 33 | Continue = False 34 | else: # Y1到Yi候选式都有ε存在 35 | First[X] |= First[Y[k]] - set(Epsilon) 36 | Change = True 37 | if Epsilon not in First[Y[k]]: 38 | Continue = False 39 | k += 1 40 | if Continue: # X->ε或者Y1到Yk均有ε产生式 41 | First[X] |= set(Epsilon) 42 | # FirstA[Y] |= set(Epsilon) 43 | 44 | # 构造Follow集 45 | def getFollow(): 46 | global Vn, Vt, First, Follow, StartSym 47 | for A in Vn: 48 | Follow[A] = set() 49 | Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 50 | Change = True 51 | while Change: # 当Follow集没有更新算法结束 52 | Change = False 53 | for X in Vn: 54 | for Y in GramaDict[X]: 55 | for i in range(len(Y)): 56 | if Y[i] in Vt: 57 | continue 58 | Flag = True 59 | for j in range(i + 1, len(Y)): # continue 60 | if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 61 | Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 62 | Change = True 63 | if Epsilon not in First[Y[j]]: 64 | Flag = False 65 | break 66 | if Flag: #A->αBβ and β->ε 67 | if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 68 | Follow[Y[i]] |= Follow[X] 69 | Change = True 70 | 71 | #构造分析表 72 | def getAnalysisList(): 73 | for k in GramaDict: # 初始化分析表 74 | AnalysisList[k] = dict() 75 | for e in Vt: 76 | AnalysisList[k][e] = None 77 | for k in GramaDict: 78 | l = GramaDict[k] 79 | for s in l: 80 | if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 81 | for e in Vt: 82 | if e in First[s[0]]: 83 | AnalysisList[k][e] = s 84 | 85 | if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 86 | AnalysisList[k][s[0]] = s 87 | 88 | if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 89 | for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 90 | AnalysisList[k][c] = s 91 | 92 | for item,val in AnalysisList.items(): 93 | for k in val: 94 | if val[k]: 95 | dicts.update({(item,k):val[k]}) 96 | 97 | #构造表的结构 98 | print("构造LL1分析表:") 99 | data = [] 100 | data.append('') 101 | for i in Vt: 102 | data.append(i) 103 | table1 = PrettyTable(data) 104 | for item in Vn: 105 | production = [] 106 | production.append(item) 107 | for i in AnalysisList[item]: 108 | production.append(AnalysisList[item][i]) 109 | table1.add_row(production) 110 | print(table1) 111 | 112 | 113 | # 消除左递归 114 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 115 | def remove_recursion(production): 116 | Vn = set() 117 | for item in production: 118 | Vn.add(item[0]) 119 | 120 | global sets 121 | sets = set(sets) 122 | sets -= Vn 123 | sets = list(sets) 124 | 125 | #消除左递归 126 | gdict = {} #用字典保存消除左递归的文法 127 | for i in range(len(production)): 128 | X, Y = production[i].split('->') #左右部分开 129 | Y = Y.split('|') #右部根据|再分 130 | ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 131 | s = sets.pop() #弹出一个非终结符集里面没有的字母 132 | nlx = [] #保存没有左递归的候选式 133 | flag = False #存在左递归的标志 134 | temp = [] #保存有左递归的去掉头的候选式 135 | for Yi in Y: 136 | if Yi[0] == X: #该候选式存在左递归 137 | flag = True 138 | if flag: #存在左递归的处理 139 | for Yi in Y: 140 | if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 141 | temp.append(Yi[1:] + s) 142 | else: 143 | gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 144 | nlx.append(gdict[X]) 145 | 146 | #有左递归的候选式的合并 147 | gdict[s] = s + '->' 148 | for i in range(len(temp)): 149 | if i == len(temp) - 1: 150 | gdict[s] = gdict[s] + temp[i] 151 | else: 152 | gdict[s] = gdict[s] + temp[i] + '|' 153 | if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 154 | gdict[s] = gdict[s] + '|ε' 155 | 156 | # 没有左递归的候选式的合并 157 | for i in range(len(nlx)): 158 | if i == len(nlx) - 1: 159 | ss = ss + nlx[i] 160 | else: 161 | ss = ss + nlx[i] + '|' 162 | gdict[X] = X + '->' + ss 163 | 164 | else: #不存在左递归就直接添加改产生式 165 | for Yi in Y: 166 | gdict[X] = production[i] 167 | 168 | #得出消除左递归后的最终文法 169 | result = [] 170 | for k,v in gdict.items(): 171 | result.append(v) 172 | return result 173 | 174 | #消除回溯 175 | from collections import defaultdict 176 | def remove_recall(production): 177 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 178 | Vn = set() 179 | for i in production: 180 | Vn.add(i[0]) 181 | 182 | sets = set(sets) 183 | sets -= Vn 184 | sets = list(sets) 185 | 186 | result = {} # 保存整个文法的结果 187 | for i in range(len(production)): 188 | X, Y = production[i].split('->') # 左右部分开 189 | Y = Y.split('|') # 右部根据|再分 190 | s = sets.pop() # 弹出一个字符作为回溯的处理 191 | dicts = defaultdict(list) 192 | 193 | for Yi in Y: 194 | dicts[Yi[0]].append(Yi) # 根据候选式的首字符分组放进字典 195 | 196 | temp1 = [] # 保存有回溯的候选式 197 | temp2 = [] # 保存没有回溯的候选式 198 | flag = False # 存在回溯的标志 199 | 200 | result_tmp = {} # 保存每个产生式的结果 201 | ss = '' 202 | for k, v in dicts.items(): 203 | if len(v) > 1: # 存在回溯 204 | flag = True 205 | # 找到公共左因子ss 206 | zipped = zip(*v) # 拉链函数 比如zip(*[abc,abd]) 将列表的元素作为参数传递给zip >>> (a,a),(b,b),(c,d) 207 | for i in zipped: 208 | if len(set(i)) == 1: # 是公共左因子的部分就拼接 209 | ss += i[0] 210 | else: 211 | break 212 | # 去掉有回溯的候选式的公共左因子 213 | for i in range(len(v)): 214 | dicts[k][i] = dicts[k][i].replace(ss, '') 215 | if dicts[k][i] == '': # 候选式刚好等于公共左因子 216 | dicts[k][i] = 'ε' 217 | temp1.extend(dicts[k]) 218 | else: # 不存在回溯的候选式 219 | temp2.extend(dicts[k]) 220 | 221 | # 存在回溯的处理 222 | if flag: 223 | # 有回溯的候选式的合并 224 | result_tmp[s] = s + '->' 225 | for i in range(len(temp1)): 226 | if i == len(temp1) - 1: 227 | result_tmp[s] = result_tmp[s] + temp1[i] 228 | else: 229 | result_tmp[s] = result_tmp[s] + temp1[i] + '|' 230 | 231 | # 没有回溯的候选式的合并 232 | nonrecall = '' 233 | for i in range(len(temp2)): 234 | if i == len(temp2) - 1: 235 | nonrecall = nonrecall + temp2[i] 236 | else: 237 | nonrecall = nonrecall + temp2[i] + '|' 238 | result_tmp[X] = X + '->' + ss + s + '|' + nonrecall 239 | # 不存在回溯的处理 240 | else: 241 | for Yi in Y: 242 | result_tmp[X] = production[i] 243 | result.update(result_tmp) 244 | result_new = [] 245 | for k, v in result.items(): 246 | result_new.append(v) 247 | return result_new 248 | 249 | 250 | # 总控程序 251 | def LL1(): 252 | global Vn, Vt, First, Follow, StartSym, Code 253 | f = open('E://test1.txt','r') 254 | production = [] 255 | print("原文法:") 256 | for line in f: 257 | data = line.strip() #读取每一行 258 | production.append(data) 259 | print('\t\t\t\t',data) 260 | 261 | StartSym = production[0][0] 262 | Code = remove_recursion(production) #消除左递归 263 | Code = remove_recall(Code) #消除回溯 264 | n = int(len(Code)) 265 | 266 | print('消除左递归和回溯:') 267 | for i in range(n): 268 | X, Y = Code[i].split('->') #产生式的分离 269 | print('\t\t\t\t', Code[i]) 270 | Vn.add(X) 271 | Y = Y.split('|') 272 | for Yi in Y: 273 | Vt |= set(Yi) 274 | if X not in GramaDict: 275 | GramaDict[X] = set() 276 | GramaDict[X] |= set(Y) # 生成产生式集 277 | Vt -= Vn 278 | print('非终结符:', Vn) 279 | print('终结符:', Vt) 280 | getFirst() 281 | getFollow() 282 | print("FIRST集:") 283 | for k in Vn: 284 | print(' FIRST[', k, ']: ', First[k]) 285 | print("FOLLOW集:") 286 | for k, v in Follow.items(): 287 | print(' FOLLOW[', k, ']: ', v) 288 | Vt -= set(Epsilon) 289 | Vt |= set(EndSym) 290 | 291 | getAnalysisList() #LL1得到分析表 292 | 293 | 294 | if __name__ == "__main__": 295 | LL1() 296 | str = input(">>>") 297 | for i in str: 298 | if i not in Vt: 299 | exit("输入的字符在文法里不存在!!!") 300 | table = PrettyTable(["Steps", "Stack", "Input_a_now", "Remain_str", "Use_production"]) 301 | #导入实验二写好的语法分析,接口 302 | Analysis(str,StartSym,table,dicts) 303 | 304 | #格式控制,输出语法分析表 305 | table.align['步骤'] = 'l' 306 | table.align['分析栈'] = 'l' 307 | table.align['剩余输入串'] = 'l' 308 | table.align['所用产生式'] = 'l' 309 | table.align['当前输入a'] = 'l' 310 | print(table) -------------------------------------------------------------------------------- /code/LL1分析.py: -------------------------------------------------------------------------------- 1 | from prettytable import PrettyTable 2 | from lexical_Analysis import Showstack,Showstr,error,Analysis 3 | Vn = set() # 非终结符集合 4 | Vt = set() # 终结符集合 5 | First = {} # First集 6 | Follow = {} # Follow集 7 | GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 8 | Code = [] # 读入的产生式 9 | AnalysisList = {} # 分析表 10 | StartSym = "" # 开始符号 11 | EndSym = '#' # 结束符号为“#“ 12 | Epsilon = "ε" 13 | dicts = {} 14 | 15 | # 构造First集 16 | def getFirst(): 17 | global Vn, Vt, First, Follow 18 | for X in Vn: 19 | First[X] = set() # 初始化非终结符First集为空 20 | for X in Vt: 21 | First[X] = set(X) # 初始化终结符First集为自己 22 | Change = True 23 | while Change: # 当First集没有更新则算法结束 24 | Change = False 25 | for X in Vn: 26 | for Y in GramaDict[X]: 27 | k = 0 28 | Continue = True 29 | while Continue and k < len(Y): 30 | if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 31 | if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 32 | '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 33 | Continue = False 34 | else: # Y1到Yi候选式都有ε存在 35 | First[X] |= First[Y[k]] - set(Epsilon) 36 | Change = True 37 | if Epsilon not in First[Y[k]]: 38 | Continue = False 39 | k += 1 40 | if Continue: # X->ε或者Y1到Yk均有ε产生式 41 | First[X] |= set(Epsilon) 42 | # FirstA[Y] |= set(Epsilon) 43 | 44 | # 构造Follow集 45 | def getFollow(): 46 | global Vn, Vt, First, Follow, StartSym 47 | for A in Vn: 48 | Follow[A] = set() 49 | Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 50 | Change = True 51 | while Change: # 当Follow集没有更新算法结束 52 | Change = False 53 | for X in Vn: 54 | for Y in GramaDict[X]: 55 | for i in range(len(Y)): 56 | if Y[i] in Vt: 57 | continue 58 | Flag = True 59 | for j in range(i + 1, len(Y)): # continue 60 | if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 61 | Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 62 | Change = True 63 | if Epsilon not in First[Y[j]]: 64 | Flag = False 65 | break 66 | if Flag: #A->αBβ and β->ε 67 | if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 68 | Follow[Y[i]] |= Follow[X] 69 | Change = True 70 | 71 | #构造分析表 72 | def getAnalysisList(): 73 | for k in GramaDict: # 初始化分析表 74 | AnalysisList[k] = dict() 75 | for e in Vt: 76 | AnalysisList[k][e] = None 77 | for k in GramaDict: 78 | l = GramaDict[k] 79 | for s in l: 80 | if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 81 | for e in Vt: 82 | if e in First[s[0]]: 83 | AnalysisList[k][e] = s 84 | 85 | if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 86 | AnalysisList[k][s[0]] = s 87 | 88 | if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 89 | for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 90 | AnalysisList[k][c] = s 91 | 92 | for item,val in AnalysisList.items(): 93 | for k in val: 94 | if val[k]: 95 | dicts.update({(item,k):val[k]}) 96 | 97 | #画表 98 | print("构造LL1分析表:") 99 | data = [] 100 | data.append('') 101 | for i in Vt: 102 | data.append(i) 103 | table1 = PrettyTable(data) 104 | for item in Vn: 105 | production = [] 106 | production.append(item) 107 | for i in AnalysisList[item]: 108 | production.append(AnalysisList[item][i]) 109 | table1.add_row(production) 110 | print(table1) 111 | 112 | 113 | # 消除左递归 114 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 115 | def remove_recursion(production): 116 | Vn = set() 117 | for item in production: 118 | Vn.add(item[0]) 119 | 120 | global sets 121 | sets = set(sets) 122 | sets -= Vn 123 | sets = list(sets) 124 | 125 | #消除左递归 126 | gdict = {} #用字典保存消除左递归的文法 127 | for i in range(len(production)): 128 | X, Y = production[i].split('->') #左右部分开 129 | Y = Y.split('|') #右部根据|再分 130 | ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 131 | s = sets.pop() #弹出一个非终结符集里面没有的字母 132 | nlx = [] #保存没有左递归的候选式 133 | flag = False #存在左递归的标志 134 | temp = [] #保存有左递归的去掉头的候选式 135 | for Yi in Y: 136 | if Yi[0] == X: #该候选式存在左递归 137 | flag = True 138 | if flag: #存在左递归的处理 139 | for Yi in Y: 140 | if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 141 | temp.append(Yi[1:] + s) 142 | else: 143 | gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 144 | nlx.append(gdict[X]) 145 | 146 | #有左递归的候选式的合并 147 | gdict[s] = s + '->' 148 | for i in range(len(temp)): 149 | if i == len(temp) - 1: 150 | gdict[s] = gdict[s] + temp[i] 151 | else: 152 | gdict[s] = gdict[s] + temp[i] + '|' 153 | if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 154 | gdict[s] = gdict[s] + '|ε' 155 | 156 | # 没有左递归的候选式的合并 157 | for i in range(len(nlx)): 158 | if i == len(nlx) - 1: 159 | ss = ss + nlx[i] 160 | else: 161 | ss = ss + nlx[i] + '|' 162 | gdict[X] = X + '->' + ss 163 | 164 | else: #不存在左递归就直接添加改产生式 165 | for Yi in Y: 166 | gdict[X] = production[i] 167 | 168 | #得出消除左递归后的最终文法 169 | result = [] 170 | for k,v in gdict.items(): 171 | result.append(v) 172 | return result 173 | 174 | #消除回溯 175 | from collections import defaultdict 176 | def remove_recall(production): 177 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 178 | Vn = set() 179 | for i in production: 180 | Vn.add(i[0]) 181 | 182 | sets = set(sets) 183 | sets -= Vn 184 | sets = list(sets) 185 | 186 | result = {} # 保存整个文法的结果 187 | for i in range(len(production)): 188 | X, Y = production[i].split('->') # 左右部分开 189 | Y = Y.split('|') # 右部根据|再分 190 | s = sets.pop() # 弹出一个字符作为回溯的处理 191 | dicts = defaultdict(list) 192 | 193 | for Yi in Y: 194 | dicts[Yi[0]].append(Yi) # 根据候选式的首字符分组放进字典 195 | 196 | temp1 = [] # 保存有回溯的候选式 197 | temp2 = [] # 保存没有回溯的候选式 198 | flag = False # 存在回溯的标志 199 | 200 | result_tmp = {} # 保存每个产生式的结果 201 | ss = '' 202 | for k, v in dicts.items(): 203 | if len(v) > 1: # 存在回溯 204 | flag = True 205 | # 找到公共左因子ss 206 | zipped = zip(*v) # 拉链函数 比如zip(*[abc,abd]) 将列表的元素作为参数传递给zip >>> (a,a),(b,b),(c,d) 207 | for i in zipped: 208 | if len(set(i)) == 1: # 是公共左因子的部分就拼接 209 | ss += i[0] 210 | else: 211 | break 212 | # 去掉有回溯的候选式的公共左因子 213 | for i in range(len(v)): 214 | dicts[k][i] = dicts[k][i].replace(ss, '') 215 | if dicts[k][i] == '': # 候选式刚好等于公共左因子 216 | dicts[k][i] = 'ε' 217 | temp1.extend(dicts[k]) 218 | else: # 不存在回溯的候选式 219 | temp2.extend(dicts[k]) 220 | 221 | # 存在回溯的处理 222 | if flag: 223 | # 有回溯的候选式的合并 224 | result_tmp[s] = s + '->' 225 | for i in range(len(temp1)): 226 | if i == len(temp1) - 1: 227 | result_tmp[s] = result_tmp[s] + temp1[i] 228 | else: 229 | result_tmp[s] = result_tmp[s] + temp1[i] + '|' 230 | 231 | # 没有回溯的候选式的合并 232 | nonrecall = '' 233 | for i in range(len(temp2)): 234 | if i == len(temp2) - 1: 235 | nonrecall = nonrecall + temp2[i] 236 | else: 237 | nonrecall = nonrecall + temp2[i] + '|' 238 | result_tmp[X] = X + '->' + ss + s + '|' + nonrecall 239 | # 不存在回溯的处理 240 | else: 241 | for Yi in Y: 242 | result_tmp[X] = production[i] 243 | result.update(result_tmp) 244 | result_new = [] 245 | for k, v in result.items(): 246 | result_new.append(v) 247 | return result_new 248 | 249 | 250 | # 总控程序 251 | def LL1(): 252 | global Vn, Vt, First, Follow, StartSym, Code 253 | f = open('E://test1.txt','r') 254 | production = [] 255 | print("原文法:") 256 | for line in f: #读取文件中每一行 257 | data = line.strip() 258 | production.append(data) 259 | print('\t\t\t\t',data) 260 | 261 | StartSym = production[0][0] 262 | Code = remove_recursion(production) #消除左递归 263 | Code = remove_recall(Code) #消除回溯 264 | n = int(len(Code)) 265 | 266 | print('消除左递归和回溯:') 267 | for i in range(n): 268 | X, Y = Code[i].split('->') #产生式的分离,生成左部和右部 269 | print('\t\t\t\t', Code[i]) 270 | Vn.add(X) 271 | Y = Y.split('|') #右部的分离,即每个候选式 272 | for Yi in Y: 273 | Vt |= set(Yi) 274 | if X not in GramaDict: 275 | GramaDict[X] = set() 276 | GramaDict[X] |= set(Y) # 生成产生式集 277 | Vt -= Vn 278 | print('非终结符:', Vn) 279 | print('终结符:', Vt) 280 | getFirst() # 生成全部的First集 281 | getFollow() # 生成全部的Follow集 282 | print("FIRST集:") 283 | for k in Vn: 284 | print(' FIRST[', k, ']: ', First[k]) 285 | print("FOLLOW集:") 286 | for k, v in Follow.items(): 287 | print(' FOLLOW[', k, ']: ', v) 288 | Vt -= set(Epsilon) 289 | Vt |= set(EndSym) 290 | 291 | getAnalysisList() #LL1得到分析表 292 | 293 | Vt.remove('#') 294 | 295 | 296 | 297 | if __name__ == "__main__": 298 | 299 | LL1() 300 | 301 | str = input(">>>") 302 | for i in str: 303 | if i not in Vt: 304 | exit("输入串存在字符在文法里不存在!!!") 305 | table = PrettyTable(["Steps", "Stack", "Input_a_now", "Remain_str", "Use_production"]) 306 | 307 | #这里是导入之前写好的语法分析接口 308 | Analysis(str,StartSym,table,dicts,Vt,Vn) 309 | 310 | #格式控制,输出语法分析表 311 | table.align['步骤'] = 'l' 312 | table.align['分析栈'] = 'l' 313 | table.align['剩余输入串'] = 'l' 314 | table.align['所用产生式'] = 'l' 315 | table.align['当前输入a'] = 'l' 316 | print(table) 317 | -------------------------------------------------------------------------------- /code/LL1test.py: -------------------------------------------------------------------------------- 1 | # from prettytable import PrettyTable 2 | # from lexical_Analysis import * 3 | # Vn = set() # 非终结符集合 4 | # Vt = set() # 终结符集合 5 | # First = {} # First集 6 | # Follow = {} # Follow集 7 | # GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 8 | # Code = [] # 读入的产生式 9 | # AnalysisList = {} # 分析表 10 | # StartSym = "" # 开始符号 11 | # EndSym = '#' # 结束符号为“#“ 12 | # Epsilon = "~" # 由于没有epsilon符号用“ε”代替 13 | # dicts = {} 14 | # 15 | # # 构造First集 16 | # def getFirst(): 17 | # global Vn, Vt, First, Follow 18 | # for X in Vn: 19 | # First[X] = set() # 初始化非终结符First集为空 20 | # for X in Vt: 21 | # First[X] = set(X) # 初始化终结符First集为自己 22 | # Change = True 23 | # while Change: # 当First集没有更新则算法结束 24 | # Change = False 25 | # for X in Vn: 26 | # for Y in GramaDict[X]: 27 | # k = 0 28 | # Continue = True 29 | # while Continue and k < len(Y): 30 | # if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 31 | # if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 32 | # '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 33 | # Continue = False 34 | # else: # Y1到Yi候选式都有ε存在 35 | # First[X] |= First[Y[k]] - set(Epsilon) 36 | # Change = True 37 | # if Epsilon not in First[Y[k]]: 38 | # Continue = False 39 | # k += 1 40 | # if Continue: # X->ε或者Y1到Yk均有ε产生式 41 | # First[X] |= set(Epsilon) 42 | # # FirstA[Y] |= set(Epsilon) 43 | # 44 | # # 构造Follow集 45 | # def getFollow(): 46 | # global Vn, Vt, First, Follow, StartSym 47 | # for A in Vn: 48 | # Follow[A] = set() 49 | # Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 50 | # Change = True 51 | # while Change: # 当Follow集没有更新算法结束 52 | # Change = False 53 | # for X in Vn: 54 | # for Y in GramaDict[X]: 55 | # for i in range(len(Y)): 56 | # if Y[i] in Vt: 57 | # continue 58 | # Flag = True 59 | # for j in range(i + 1, len(Y)): # continue 60 | # if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 61 | # Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 62 | # Change = True 63 | # if Epsilon not in First[Y[j]]: 64 | # Flag = False 65 | # break 66 | # if Flag: #A->αBβ and β->ε 67 | # if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 68 | # Follow[Y[i]] |= Follow[X] 69 | # Change = True 70 | # 71 | # #构造分析表 72 | # def getAnalysisList(): 73 | # for k in GramaDict: # 初始化分析表 74 | # AnalysisList[k] = dict() 75 | # for e in Vt: 76 | # AnalysisList[k][e] = None 77 | # for k in GramaDict: 78 | # l = GramaDict[k] 79 | # for s in l: 80 | # if s[0].isupper(): 81 | # for e in Vt: 82 | # if e in First[s[0]]: 83 | # AnalysisList[k][e] = s 84 | # if s[0] in Vt: 85 | # AnalysisList[k][s[0]] = s 86 | # if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 87 | # for c in Follow[k]: 88 | # AnalysisList[k][c] = s 89 | # for item,val in AnalysisList.items(): 90 | # for k in val: 91 | # if val[k]: 92 | # dicts.update({(item,k):val[k]}) 93 | # 94 | # #画表 95 | # print("构造分析表:") 96 | # data = [] 97 | # data.append('') 98 | # for i in Vt: 99 | # data.append(i) 100 | # table1 = PrettyTable(data) 101 | # for item in Vn: 102 | # temp = [] 103 | # temp.append(item) 104 | # for i in AnalysisList[item]: 105 | # temp.append(AnalysisList[item][i]) 106 | # table1.add_row(temp) 107 | # print(table1) 108 | # 109 | # 110 | # 111 | # # 读取文法 112 | # def readGrammar(): 113 | # try: 114 | # f = open('E://test1.txt', 'r') 115 | # for line in f: 116 | # Code.append(line.strip()) 117 | # except IOError as e: 118 | # print(e) 119 | # exit() 120 | # finally: 121 | # f.close() 122 | # return Code 123 | # 124 | # 125 | # 126 | # 127 | # 128 | # # 初始化 129 | # def init(): 130 | # global Vn, Vt, First, Follow, StartSym, Code 131 | # Code = readGrammar() 132 | # n = int(len(Code)) 133 | # print('产生式个数:', n) 134 | # StartSym = Code[0][0] 135 | # print('产生式:G[', StartSym, ']:') 136 | # for i in range(n): 137 | # X, Y = Code[i].split('->') 138 | # print('\t\t\t\t', Code[i]) 139 | # Vn.add(X) 140 | # Y = Y.split('|') 141 | # for Yi in Y: 142 | # Vt |= set(Yi) 143 | # if X not in GramaDict: 144 | # GramaDict[X] = set() 145 | # GramaDict[X] |= set(Y) # 生成产生式集 146 | # Vt -= Vn 147 | # print('非终结符:', Vn) 148 | # print('终结符:', Vt) 149 | # getFirst() 150 | # getFollow() 151 | # print("FIRST集:") 152 | # for k in Vn: 153 | # print(' FIRST[', k, ']: ', First[k]) 154 | # print("FOLLOW集:") 155 | # for k, v in Follow.items(): 156 | # print(' FOLLOW[', k, ']: ', v) 157 | # Vt -= set(Epsilon) 158 | # Vt |= set(EndSym) 159 | # getAnalysisList() 160 | # 161 | # 162 | # if __name__ == "__main__": 163 | # init() 164 | # str = input(">>>") 165 | # for i in str: 166 | # if i not in Vt: 167 | # exit("输入的字符在文法里不存在!!!") 168 | # 169 | # #这里是导入之前写好的词法分析函数 170 | # Analysis(str) 171 | # table.align['步骤'] = 'l' 172 | # table.align['分析栈'] = 'l' 173 | # table.align['剩余输入串'] = 'l' 174 | # table.align['所用产生式'] = 'l' 175 | # table.align['当前输入a'] = 'l' 176 | # print(table) 177 | 178 | 179 | 180 | from prettytable import PrettyTable 181 | from lexical_Analysis import * 182 | Vn = set() # 非终结符集合 183 | Vt = set() # 终结符集合 184 | First = {} # First集 185 | Follow = {} # Follow集 186 | GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 187 | Code = [] # 读入的产生式 188 | AnalysisList = {} # 分析表 189 | StartSym = "" # 开始符号 190 | EndSym = '#' # 结束符号为“#“ 191 | Epsilon = "ε" # 由于没有epsilon符号用“ε”代替 192 | dicts = {} 193 | 194 | # 构造First集 195 | def getFirst(): 196 | global Vn, Vt, First, Follow 197 | for X in Vn: 198 | First[X] = set() # 初始化非终结符First集为空 199 | for X in Vt: 200 | First[X] = set(X) # 初始化终结符First集为自己 201 | Change = True 202 | while Change: # 当First集没有更新则算法结束 203 | Change = False 204 | for X in Vn: 205 | for Y in GramaDict[X]: 206 | k = 0 207 | Continue = True 208 | while Continue and k < len(Y): 209 | if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 210 | if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 211 | '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 212 | Continue = False 213 | else: # Y1到Yi候选式都有ε存在 214 | First[X] |= First[Y[k]] - set(Epsilon) 215 | Change = True 216 | if Epsilon not in First[Y[k]]: 217 | Continue = False 218 | k += 1 219 | if Continue: # X->ε或者Y1到Yk均有ε产生式 220 | First[X] |= set(Epsilon) 221 | # FirstA[Y] |= set(Epsilon) 222 | 223 | # 构造Follow集 224 | def getFollow(): 225 | global Vn, Vt, First, Follow, StartSym 226 | for A in Vn: 227 | Follow[A] = set() 228 | Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 229 | Change = True 230 | while Change: # 当Follow集没有更新算法结束 231 | Change = False 232 | for X in Vn: 233 | for Y in GramaDict[X]: 234 | for i in range(len(Y)): 235 | if Y[i] in Vt: 236 | continue 237 | Flag = True 238 | for j in range(i + 1, len(Y)): # continue 239 | if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 240 | Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 241 | Change = True 242 | if Epsilon not in First[Y[j]]: 243 | Flag = False 244 | break 245 | if Flag: #A->αBβ and β->ε 246 | if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 247 | Follow[Y[i]] |= Follow[X] 248 | Change = True 249 | 250 | #构造分析表 251 | def getAnalysisList(): 252 | for k in GramaDict: # 初始化分析表 253 | AnalysisList[k] = dict() 254 | for e in Vt: 255 | AnalysisList[k][e] = None 256 | for k in GramaDict: 257 | l = GramaDict[k] 258 | for s in l: 259 | if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 260 | for e in Vt: 261 | if e in First[s[0]]: 262 | AnalysisList[k][e] = s 263 | 264 | if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 265 | AnalysisList[k][s[0]] = s 266 | 267 | if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 268 | for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 269 | AnalysisList[k][c] = s 270 | 271 | for item,val in AnalysisList.items(): 272 | for k in val: 273 | if val[k]: 274 | dicts.update({(item,k):val[k]}) 275 | 276 | #画表 277 | print("构造分析表:") 278 | data = [] 279 | data.append('') 280 | for i in Vt: 281 | data.append(i) 282 | table1 = PrettyTable(data) 283 | for item in Vn: 284 | production = [] 285 | production.append(item) 286 | for i in AnalysisList[item]: 287 | production.append(AnalysisList[item][i]) 288 | table1.add_row(production) 289 | print(table1) 290 | 291 | 292 | 293 | # 读取文法 294 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 295 | def readproduction(production): 296 | # f = open('E://test1.txt','r') 297 | # production = [] 298 | Vn = set() 299 | # for line in f: 300 | # data = line.strip() #读取每一行 301 | # production.append(data) 302 | # Vn.add(data[0]) 303 | for item in production: 304 | Vn.add(item[0]) 305 | 306 | # print(production) 307 | global sets 308 | sets = set(sets) 309 | sets -= Vn 310 | sets = list(sets) 311 | 312 | #消除左递归 313 | gdict = {} #用字典保存消除左递归的文法 314 | for i in range(len(production)): 315 | X, Y = production[i].split('->') #左右部分开 316 | Y = Y.split('|') #右部根据|再分 317 | ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 318 | s = sets.pop() #弹出一个非终结符集里面没有的字母 319 | nlx = [] #保存没有左递归的候选式 320 | flag = False #存在左递归的标志 321 | temp = [] #保存有左递归的去掉头的候选式 322 | for Yi in Y: 323 | if Yi[0] == X: #该候选式存在左递归 324 | flag = True 325 | if flag: #存在左递归的处理 326 | for Yi in Y: 327 | if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 328 | temp.append(Yi[1:] + s) 329 | else: 330 | gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 331 | nlx.append(gdict[X]) 332 | 333 | #有左递归的候选式的合并 334 | gdict[s] = s + '->' 335 | for i in range(len(temp)): 336 | if i == len(temp) - 1: 337 | gdict[s] = gdict[s] + temp[i] 338 | else: 339 | gdict[s] = gdict[s] + temp[i] + '|' 340 | if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 341 | gdict[s] = gdict[s] + '|ε' 342 | 343 | # 没有左递归的候选式的合并 344 | for i in range(len(nlx)): 345 | if i == len(nlx) - 1: 346 | ss = ss + nlx[i] 347 | else: 348 | ss = ss + nlx[i] + '|' 349 | gdict[X] = X + '->' + ss 350 | 351 | else: #不存在左递归就直接添加改产生式 352 | for Yi in Y: 353 | gdict[X] = production[i] 354 | 355 | #得出消除左递归后的最终文法 356 | result = [] 357 | for k,v in gdict.items(): 358 | result.append(v) 359 | return result 360 | 361 | # 初始化 362 | def init(): 363 | global Vn, Vt, First, Follow, StartSym, Code 364 | f = open('E://test1.txt','r') 365 | production = [] 366 | print("原文法:") 367 | for line in f: 368 | data = line.strip() #读取每一行 369 | production.append(data) 370 | print('\t\t\t\t',data) 371 | 372 | StartSym = production[0][0] 373 | Code = readproduction(production) 374 | n = int(len(Code)) 375 | # print('产生式个数:', n) 376 | 377 | print('产生式:G[', StartSym, ']:') 378 | for i in range(n): 379 | X, Y = Code[i].split('->') 380 | print('\t\t\t\t', Code[i]) 381 | Vn.add(X) 382 | Y = Y.split('|') 383 | for Yi in Y: 384 | Vt |= set(Yi) 385 | if X not in GramaDict: 386 | GramaDict[X] = set() 387 | GramaDict[X] |= set(Y) # 生成产生式集 388 | Vt -= Vn 389 | print('非终结符:', Vn) 390 | print('终结符:', Vt) 391 | getFirst() 392 | getFollow() 393 | print("FIRST集:") 394 | for k in Vn: 395 | print(' FIRST[', k, ']: ', First[k]) 396 | print("FOLLOW集:") 397 | for k, v in Follow.items(): 398 | print(' FOLLOW[', k, ']: ', v) 399 | Vt -= set(Epsilon) 400 | Vt |= set(EndSym) 401 | getAnalysisList() 402 | 403 | 404 | if __name__ == "__main__": 405 | init() 406 | str = input(">>>") 407 | for i in str: 408 | if i not in Vt: 409 | exit("输入的字符在文法里不存在!!!") 410 | 411 | #这里是导入之前写好的词法分析函数 412 | Analysis(str) 413 | table.align['步骤'] = 'l' 414 | table.align['分析栈'] = 'l' 415 | table.align['剩余输入串'] = 'l' 416 | table.align['所用产生式'] = 'l' 417 | table.align['当前输入a'] = 'l' 418 | print(table) 419 | -------------------------------------------------------------------------------- /code/消除左递归.py: -------------------------------------------------------------------------------- 1 | from prettytable import PrettyTable 2 | from lexical_Analysis import Showstack,Showstr,error,Analysis 3 | Vn = set() # 非终结符集合 4 | Vt = set() # 终结符集合 5 | First = {} # First集 6 | Follow = {} # Follow集 7 | GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 8 | Code = [] # 读入的产生式 9 | AnalysisList = {} # 分析表 10 | StartSym = "" # 开始符号 11 | EndSym = '#' # 结束符号为“#“ 12 | Epsilon = "ε" # 由于没有epsilon符号用“ε”代替 13 | dicts = {} 14 | 15 | # 构造First集 16 | def getFirst(): 17 | global Vn, Vt, First, Follow 18 | for X in Vn: 19 | First[X] = set() # 初始化非终结符First集为空 20 | for X in Vt: 21 | First[X] = set(X) # 初始化终结符First集为自己 22 | Change = True 23 | while Change: # 当First集没有更新则算法结束 24 | Change = False 25 | for X in Vn: 26 | for Y in GramaDict[X]: 27 | k = 0 28 | Continue = True 29 | while Continue and k < len(Y): 30 | if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 31 | if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 32 | '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 33 | Continue = False 34 | else: # Y1到Yi候选式都有ε存在 35 | First[X] |= First[Y[k]] - set(Epsilon) 36 | Change = True 37 | if Epsilon not in First[Y[k]]: 38 | Continue = False 39 | k += 1 40 | if Continue: # X->ε或者Y1到Yk均有ε产生式 41 | First[X] |= set(Epsilon) 42 | # FirstA[Y] |= set(Epsilon) 43 | 44 | # 构造Follow集 45 | def getFollow(): 46 | global Vn, Vt, First, Follow, StartSym 47 | for A in Vn: 48 | Follow[A] = set() 49 | Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 50 | Change = True 51 | while Change: # 当Follow集没有更新算法结束 52 | Change = False 53 | for X in Vn: 54 | for Y in GramaDict[X]: 55 | for i in range(len(Y)): 56 | if Y[i] in Vt: 57 | continue 58 | Flag = True 59 | for j in range(i + 1, len(Y)): # continue 60 | if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 61 | Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 62 | Change = True 63 | if Epsilon not in First[Y[j]]: 64 | Flag = False 65 | break 66 | if Flag: #A->αBβ and β->ε 67 | if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 68 | Follow[Y[i]] |= Follow[X] 69 | Change = True 70 | 71 | #构造分析表 72 | def getAnalysisList(): 73 | for k in GramaDict: # 初始化分析表 74 | AnalysisList[k] = dict() 75 | for e in Vt: 76 | AnalysisList[k][e] = None 77 | for k in GramaDict: 78 | l = GramaDict[k] 79 | for s in l: 80 | if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 81 | for e in Vt: 82 | if e in First[s[0]]: 83 | AnalysisList[k][e] = s 84 | 85 | if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 86 | AnalysisList[k][s[0]] = s 87 | 88 | if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 89 | for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 90 | AnalysisList[k][c] = s 91 | 92 | for item,val in AnalysisList.items(): 93 | for k in val: 94 | if val[k]: 95 | dicts.update({(item,k):val[k]}) 96 | 97 | #构造表的结构 98 | print("构造LL1分析表:") 99 | data = [] 100 | data.append('') 101 | for i in Vt: 102 | data.append(i) 103 | table1 = PrettyTable(data) 104 | for item in Vn: 105 | production = [] 106 | production.append(item) 107 | for i in AnalysisList[item]: 108 | production.append(AnalysisList[item][i]) 109 | table1.add_row(production) 110 | print(table1) 111 | 112 | 113 | # 消除左递归 114 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 115 | def remove_recursion(production): 116 | Vn = set() 117 | for item in production: 118 | Vn.add(item[0]) 119 | 120 | global sets 121 | sets = set(sets) 122 | sets -= Vn 123 | sets = list(sets) 124 | 125 | #消除左递归 126 | gdict = {} #用字典保存消除左递归的文法 127 | for i in range(len(production)): 128 | X, Y = production[i].split('->') #左右部分开 129 | Y = Y.split('|') #右部根据|再分 130 | ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 131 | s = sets.pop() #弹出一个非终结符集里面没有的字母 132 | nlx = [] #保存没有左递归的候选式 133 | flag = False #存在左递归的标志 134 | temp = [] #保存有左递归的去掉头的候选式 135 | for Yi in Y: 136 | if Yi[0] == X: #该候选式存在左递归 137 | flag = True 138 | if flag: #存在左递归的处理 139 | for Yi in Y: 140 | if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 141 | temp.append(Yi[1:] + s) 142 | else: 143 | gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 144 | nlx.append(gdict[X]) 145 | 146 | #有左递归的候选式的合并 147 | gdict[s] = s + '->' 148 | for i in range(len(temp)): 149 | if i == len(temp) - 1: 150 | gdict[s] = gdict[s] + temp[i] 151 | else: 152 | gdict[s] = gdict[s] + temp[i] + '|' 153 | if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 154 | gdict[s] = gdict[s] + '|ε' 155 | 156 | # 没有左递归的候选式的合并 157 | for i in range(len(nlx)): 158 | if i == len(nlx) - 1: 159 | ss = ss + nlx[i] 160 | else: 161 | ss = ss + nlx[i] + '|' 162 | gdict[X] = X + '->' + ss 163 | 164 | else: #不存在左递归就直接添加改产生式 165 | for Yi in Y: 166 | gdict[X] = production[i] 167 | 168 | #得出消除左递归后的最终文法 169 | result = [] 170 | for k,v in gdict.items(): 171 | result.append(v) 172 | return result 173 | 174 | #消除回溯 175 | from collections import defaultdict 176 | def remove_recall(production): 177 | sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 178 | Vn = set() 179 | for i in production: 180 | Vn.add(i[0]) 181 | 182 | sets = set(sets) 183 | sets -= Vn 184 | sets = list(sets) 185 | 186 | result = {} # 保存整个文法的结果 187 | for i in range(len(production)): 188 | X, Y = production[i].split('->') # 左右部分开 189 | Y = Y.split('|') # 右部根据|再分 190 | s = sets.pop() # 弹出一个字符作为回溯的处理 191 | dicts = defaultdict(list) 192 | 193 | for Yi in Y: 194 | dicts[Yi[0]].append(Yi) # 根据候选式的首字符分组放进字典 195 | 196 | temp1 = [] # 保存有回溯的候选式 197 | temp2 = [] # 保存没有回溯的候选式 198 | flag = False # 存在回溯的标志 199 | 200 | result_tmp = {} # 保存每个产生式的结果 201 | ss = '' 202 | for k, v in dicts.items(): 203 | if len(v) > 1: # 存在回溯 204 | flag = True 205 | # 找到公共左因子ss 206 | zipped = zip(*v) # 拉链函数 比如zip(*[abc,abd]) 将列表的元素作为参数传递给zip >>> (a,a),(b,b),(c,d) 207 | for i in zipped: 208 | if len(set(i)) == 1: # 是公共左因子的部分就拼接 209 | ss += i[0] 210 | else: 211 | break 212 | # 去掉有回溯的候选式的公共左因子 213 | for i in range(len(v)): 214 | dicts[k][i] = dicts[k][i].replace(ss, '') 215 | if dicts[k][i] == '': # 候选式刚好等于公共左因子 216 | dicts[k][i] = 'ε' 217 | temp1.extend(dicts[k]) 218 | else: # 不存在回溯的候选式 219 | temp2.extend(dicts[k]) 220 | 221 | # 存在回溯的处理 222 | if flag: 223 | # 有回溯的候选式的合并 224 | result_tmp[s] = s + '->' 225 | for i in range(len(temp1)): 226 | if i == len(temp1) - 1: 227 | result_tmp[s] = result_tmp[s] + temp1[i] 228 | else: 229 | result_tmp[s] = result_tmp[s] + temp1[i] + '|' 230 | 231 | # 没有回溯的候选式的合并 232 | nonrecall = '' 233 | for i in range(len(temp2)): 234 | if i == len(temp2) - 1: 235 | nonrecall = nonrecall + temp2[i] 236 | else: 237 | nonrecall = nonrecall + temp2[i] + '|' 238 | result_tmp[X] = X + '->' + ss + s + '|' + nonrecall 239 | # 不存在回溯的处理 240 | else: 241 | for Yi in Y: 242 | result_tmp[X] = production[i] 243 | result.update(result_tmp) 244 | result_new = [] 245 | for k, v in result.items(): 246 | result_new.append(v) 247 | return result_new 248 | 249 | 250 | # 总控程序 251 | def LL1(): 252 | global Vn, Vt, First, Follow, StartSym, Code 253 | f = open('E://test1.txt','r') 254 | production = [] 255 | print("原文法:") 256 | for line in f: 257 | data = line.strip() #读取每一行 258 | production.append(data) 259 | print('\t\t\t\t',data) 260 | 261 | StartSym = production[0][0] 262 | Code = remove_recursion(production) #消除左递归 263 | Code = remove_recall(Code) #消除回溯 264 | n = int(len(Code)) 265 | 266 | print('消除左递归和回溯:') 267 | for i in range(n): 268 | X, Y = Code[i].split('->') #产生式的分离 269 | print('\t\t\t\t', Code[i]) 270 | Vn.add(X) 271 | Y = Y.split('|') 272 | for Yi in Y: 273 | Vt |= set(Yi) 274 | if X not in GramaDict: 275 | GramaDict[X] = set() 276 | GramaDict[X] |= set(Y) # 生成产生式集 277 | Vt -= Vn 278 | print('非终结符:', Vn) 279 | print('终结符:', Vt) 280 | getFirst() 281 | getFollow() 282 | print("FIRST集:") 283 | for k in Vn: 284 | print(' FIRST[', k, ']: ', First[k]) 285 | print("FOLLOW集:") 286 | for k, v in Follow.items(): 287 | print(' FOLLOW[', k, ']: ', v) 288 | Vt -= set(Epsilon) 289 | Vt |= set(EndSym) 290 | 291 | getAnalysisList() #LL1得到分析表 292 | Vt.remove('#') 293 | 294 | if __name__ == "__main__": 295 | LL1() 296 | str = input(">>>") 297 | for i in str: 298 | if i not in Vt: 299 | exit("输入的字符在文法里不存在!!!") 300 | table = PrettyTable(["Steps", "Stack", "Input_a_now", "Remain_str", "Use_production"]) 301 | #导入实验二写好的语法分析,接口 302 | Analysis(str,StartSym,table,dicts,Vt,Vn) 303 | 304 | #格式控制,输出语法分析表 305 | table.align['步骤'] = 'l' 306 | table.align['分析栈'] = 'l' 307 | table.align['剩余输入串'] = 'l' 308 | table.align['所用产生式'] = 'l' 309 | table.align['当前输入a'] = 'l' 310 | print(table) 311 | 312 | 313 | 314 | 315 | # from prettytable import PrettyTable 316 | # from lexical_Analysis import * 317 | # Vn = set() # 非终结符集合 318 | # Vt = set() # 终结符集合 319 | # First = {} # First集 320 | # Follow = {} # Follow集 321 | # GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 322 | # Code = [] # 读入的产生式 323 | # AnalysisList = {} # 分析表 324 | # StartSym = "" # 开始符号 325 | # EndSym = '#' # 结束符号为“#“ 326 | # Epsilon = "ε" # 由于没有epsilon符号用“ε”代替 327 | # dicts = {} 328 | # 329 | # # 构造First集 330 | # def getFirst(): 331 | # global Vn, Vt, First, Follow 332 | # for X in Vn: 333 | # First[X] = set() # 初始化非终结符First集为空 334 | # for X in Vt: 335 | # First[X] = set(X) # 初始化终结符First集为自己 336 | # Change = True 337 | # while Change: # 当First集没有更新则算法结束 338 | # Change = False 339 | # for X in Vn: 340 | # for Y in GramaDict[X]: 341 | # k = 0 342 | # Continue = True 343 | # while Continue and k < len(Y): 344 | # if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 345 | # if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 346 | # '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 347 | # Continue = False 348 | # else: # Y1到Yi候选式都有ε存在 349 | # First[X] |= First[Y[k]] - set(Epsilon) 350 | # Change = True 351 | # if Epsilon not in First[Y[k]]: 352 | # Continue = False 353 | # k += 1 354 | # if Continue: # X->ε或者Y1到Yk均有ε产生式 355 | # First[X] |= set(Epsilon) 356 | # # FirstA[Y] |= set(Epsilon) 357 | # 358 | # # 构造Follow集 359 | # def getFollow(): 360 | # global Vn, Vt, First, Follow, StartSym 361 | # for A in Vn: 362 | # Follow[A] = set() 363 | # Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 364 | # Change = True 365 | # while Change: # 当Follow集没有更新算法结束 366 | # Change = False 367 | # for X in Vn: 368 | # for Y in GramaDict[X]: 369 | # for i in range(len(Y)): 370 | # if Y[i] in Vt: 371 | # continue 372 | # Flag = True 373 | # for j in range(i + 1, len(Y)): # continue 374 | # if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 375 | # Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 376 | # Change = True 377 | # if Epsilon not in First[Y[j]]: 378 | # Flag = False 379 | # break 380 | # if Flag: #A->αBβ and β->ε 381 | # if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 382 | # Follow[Y[i]] |= Follow[X] 383 | # Change = True 384 | # 385 | # #构造分析表 386 | # def getAnalysisList(): 387 | # for k in GramaDict: # 初始化分析表 388 | # AnalysisList[k] = dict() 389 | # for e in Vt: 390 | # AnalysisList[k][e] = None 391 | # for k in GramaDict: 392 | # l = GramaDict[k] 393 | # for s in l: 394 | # if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 395 | # for e in Vt: 396 | # if e in First[s[0]]: 397 | # AnalysisList[k][e] = s 398 | # 399 | # if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 400 | # AnalysisList[k][s[0]] = s 401 | # 402 | # if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 403 | # for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 404 | # AnalysisList[k][c] = s 405 | # 406 | # for item,val in AnalysisList.items(): 407 | # for k in val: 408 | # if val[k]: 409 | # dicts.update({(item,k):val[k]}) 410 | # 411 | # #画表 412 | # print("构造分析表:") 413 | # data = [] 414 | # data.append('') 415 | # for i in Vt: 416 | # data.append(i) 417 | # table1 = PrettyTable(data) 418 | # for item in Vn: 419 | # production = [] 420 | # production.append(item) 421 | # for i in AnalysisList[item]: 422 | # production.append(AnalysisList[item][i]) 423 | # table1.add_row(production) 424 | # print(table1) 425 | # 426 | # 427 | # 428 | # # 读取文法 429 | # sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 430 | # def readproduction(): 431 | # f = open('E://test1.txt','r') 432 | # production = [] 433 | # Vn = set() 434 | # for line in f: 435 | # data = line.strip() #读取每一行 436 | # production.append(data) 437 | # Vn.add(data[0]) 438 | # 439 | # 440 | # # print(production) 441 | # global sets 442 | # sets = set(sets) 443 | # sets -= Vn 444 | # sets = list(sets) 445 | # 446 | # #消除左递归 447 | # gdict = {} #用字典保存消除左递归的文法 448 | # for i in range(len(production)): 449 | # X, Y = production[i].split('->') #左右部分开 450 | # Y = Y.split('|') #右部根据|再分 451 | # ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 452 | # s = sets.pop() #弹出一个非终结符集里面没有的字母 453 | # nlx = [] #保存没有左递归的候选式 454 | # flag = False #存在左递归的标志 455 | # for Yi in Y: 456 | # if Yi[0] == X: #该候选式存在左递归 457 | # flag = True 458 | # if flag: #存在左递归的处理 459 | # for Yi in Y: 460 | # if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 461 | # gdict[s] = s + '->' + Yi[1:] + s 462 | # else: 463 | # gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 464 | # nlx.append(gdict[X]) 465 | # if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 466 | # gdict[s] = gdict[s] + '|ε' 467 | # 468 | # #没有左递归的候选式的合并 469 | # for i in range(len(nlx)): 470 | # if i == len(nlx) - 1: 471 | # ss = ss + nlx[i] 472 | # else: 473 | # ss = ss + nlx[i] + '|' 474 | # gdict[X] = X + '->' + ss 475 | # 476 | # else: #不存在左递归就直接添加改产生式 477 | # for Yi in Y: 478 | # gdict[X] = production[i] 479 | # 480 | # #得出消除左递归后的最终文法 481 | # result = [] 482 | # for k,v in gdict.items(): 483 | # result.append(v) 484 | # return result 485 | # 486 | # # 初始化 487 | # def init(): 488 | # global Vn, Vt, First, Follow, StartSym, Code 489 | # f = open('E://test1.txt','r') 490 | # production = [] 491 | # for line in f: 492 | # data = line.strip() #读取每一行 493 | # production.append(data) 494 | # StartSym = production[0][0] 495 | # Code = readproduction() 496 | # n = int(len(Code)) 497 | # # print('产生式个数:', n) 498 | # 499 | # print('产生式:G[', StartSym, ']:') 500 | # for i in range(n): 501 | # X, Y = Code[i].split('->') 502 | # print('\t\t\t\t', Code[i]) 503 | # Vn.add(X) 504 | # Y = Y.split('|') 505 | # for Yi in Y: 506 | # Vt |= set(Yi) 507 | # if X not in GramaDict: 508 | # GramaDict[X] = set() 509 | # GramaDict[X] |= set(Y) # 生成产生式集 510 | # Vt -= Vn 511 | # print('非终结符:', Vn) 512 | # print('终结符:', Vt) 513 | # getFirst() 514 | # getFollow() 515 | # print("FIRST集:") 516 | # for k in Vn: 517 | # print(' FIRST[', k, ']: ', First[k]) 518 | # print("FOLLOW集:") 519 | # for k, v in Follow.items(): 520 | # print(' FOLLOW[', k, ']: ', v) 521 | # Vt -= set(Epsilon) 522 | # Vt |= set(EndSym) 523 | # getAnalysisList() 524 | # 525 | # 526 | # if __name__ == "__main__": 527 | # init() 528 | # str = input(">>>") 529 | # for i in str: 530 | # if i not in Vt: 531 | # exit("输入的字符在文法里不存在!!!") 532 | # 533 | # #这里是导入之前写好的词法分析函数 534 | # Analysis(str) 535 | # table.align['步骤'] = 'l' 536 | # table.align['分析栈'] = 'l' 537 | # table.align['剩余输入串'] = 'l' 538 | # table.align['所用产生式'] = 'l' 539 | # table.align['当前输入a'] = 'l' 540 | # print(table) 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | # from prettytable import PrettyTable 549 | # from lexical_Analysis import * 550 | # Vn = set() # 非终结符集合 551 | # Vt = set() # 终结符集合 552 | # First = {} # First集 553 | # Follow = {} # Follow集 554 | # GramaDict = {} # 处理过的产生式 例如{E:{'ε','+TE'},F:{'TE','+'}} 555 | # Code = [] # 读入的产生式 556 | # AnalysisList = {} # 分析表 557 | # StartSym = "" # 开始符号 558 | # EndSym = '#' # 结束符号为“#“ 559 | # Epsilon = "ε" # 由于没有epsilon符号用“ε”代替 560 | # dicts = {} 561 | # 562 | # # 构造First集 563 | # def getFirst(): 564 | # global Vn, Vt, First, Follow 565 | # for X in Vn: 566 | # First[X] = set() # 初始化非终结符First集为空 567 | # for X in Vt: 568 | # First[X] = set(X) # 初始化终结符First集为自己 569 | # Change = True 570 | # while Change: # 当First集没有更新则算法结束 571 | # Change = False 572 | # for X in Vn: 573 | # for Y in GramaDict[X]: 574 | # k = 0 575 | # Continue = True 576 | # while Continue and k < len(Y): 577 | # if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志 578 | # if Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: 579 | # '''Epsilon not in First[Y[k]] and Y[k] in Vn and k > 0: ''' 580 | # Continue = False 581 | # else: # Y1到Yi候选式都有ε存在 582 | # First[X] |= First[Y[k]] - set(Epsilon) 583 | # Change = True 584 | # if Epsilon not in First[Y[k]]: 585 | # Continue = False 586 | # k += 1 587 | # if Continue: # X->ε或者Y1到Yk均有ε产生式 588 | # First[X] |= set(Epsilon) 589 | # # FirstA[Y] |= set(Epsilon) 590 | # 591 | # # 构造Follow集 592 | # def getFollow(): 593 | # global Vn, Vt, First, Follow, StartSym 594 | # for A in Vn: 595 | # Follow[A] = set() 596 | # Follow[StartSym].add(EndSym) # 步骤1,将结束符号加入Follow[开始符号]中 597 | # Change = True 598 | # while Change: # 当Follow集没有更新算法结束 599 | # Change = False 600 | # for X in Vn: 601 | # for Y in GramaDict[X]: 602 | # for i in range(len(Y)): 603 | # if Y[i] in Vt: 604 | # continue 605 | # Flag = True 606 | # for j in range(i + 1, len(Y)): # continue 607 | # if not First[Y[j]] - set(Epsilon) <= Follow[Y[i]]: 608 | # Follow[Y[i]] |= First[Y[j]] - set(Epsilon) # 步骤2 FIRST(β)/ε 加入到FOLLOW(B)中。 609 | # Change = True 610 | # if Epsilon not in First[Y[j]]: 611 | # Flag = False 612 | # break 613 | # if Flag: #A->αBβ and β->ε 614 | # if not Follow[X] <= Follow[Y[i]]: # 步骤3 β->ε,把FOLLOW(A)加到FOLLOW(B)中 615 | # Follow[Y[i]] |= Follow[X] 616 | # Change = True 617 | # 618 | # #构造分析表 619 | # def getAnalysisList(): 620 | # for k in GramaDict: # 初始化分析表 621 | # AnalysisList[k] = dict() 622 | # for e in Vt: 623 | # AnalysisList[k][e] = None 624 | # for k in GramaDict: 625 | # l = GramaDict[k] 626 | # for s in l: 627 | # if s[0].isupper(): # S->ɑ ɑ[0]属于非终结符时 包括 ɑ !-> ε 和 ɑ -> ε 的First(ɑ) 628 | # for e in Vt: 629 | # if e in First[s[0]]: 630 | # AnalysisList[k][e] = s 631 | # 632 | # if s[0] in Vt: ## S->ɑ ɑ[0]属于终结符 例如 select(S->+E) = First(+E) = + 633 | # AnalysisList[k][s[0]] = s 634 | # 635 | # if (s[0].isupper() and (Epsilon in First[s[0]])) or (s == Epsilon): 636 | # for c in Follow[k]: # S->ɑ and ɑ->ε 的 Follow(S) 637 | # AnalysisList[k][c] = s 638 | # 639 | # for item,val in AnalysisList.items(): 640 | # for k in val: 641 | # if val[k]: 642 | # dicts.update({(item,k):val[k]}) 643 | # 644 | # #画表 645 | # print("构造分析表:") 646 | # data = [] 647 | # data.append('') 648 | # for i in Vt: 649 | # data.append(i) 650 | # table1 = PrettyTable(data) 651 | # for item in Vn: 652 | # production = [] 653 | # production.append(item) 654 | # for i in AnalysisList[item]: 655 | # production.append(AnalysisList[item][i]) 656 | # table1.add_row(production) 657 | # print(table1) 658 | # 659 | # 660 | # 661 | # # 消除左递归 662 | # sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 663 | # def remove_recursion(): 664 | # f = open('E://test1.txt','r') 665 | # production = [] 666 | # Vn = set() 667 | # for line in f: 668 | # data = line.strip() #读取每一行 669 | # production.append(data) 670 | # Vn.add(data[0]) 671 | # 672 | # # print(production) 673 | # global sets 674 | # sets = set(sets) 675 | # sets -= Vn 676 | # sets = list(sets) 677 | # 678 | # #消除左递归 679 | # gdict = {} #用字典保存消除左递归的文法 680 | # for i in range(len(production)): 681 | # X, Y = production[i].split('->') #左右部分开 682 | # Y = Y.split('|') #右部根据|再分 683 | # ss = '' # 保存候选式没有直接左递归的 消除左递归之后的字符串 684 | # s = sets.pop() #弹出一个非终结符集里面没有的字母 685 | # nlx = [] #保存没有左递归的候选式 686 | # flag = False #存在左递归的标志 687 | # temp = [] #保存有左递归的去掉头的候选式 688 | # for Yi in Y: 689 | # if Yi[0] == X: #该候选式存在左递归 690 | # flag = True 691 | # if flag: #存在左递归的处理 692 | # for Yi in Y: 693 | # if Yi[0] == X: #对于E->E+T|T 的 E+T >>>>> E'->+TE' 694 | # temp.append(Yi[1:] + s) 695 | # else: 696 | # gdict[X] = Yi + s #对于E->E+T|T 的 T >>>>> E->TE' 697 | # nlx.append(gdict[X]) 698 | # 699 | # #有左递归的候选式的合并 700 | # gdict[s] = s + '->' 701 | # for i in range(len(temp)): 702 | # if i == len(temp) - 1: 703 | # gdict[s] = gdict[s] + temp[i] 704 | # else: 705 | # gdict[s] = gdict[s] + temp[i] + '|' 706 | # if s in gdict: #在 E'->+TE'的基础上 >>>>> E'->+TE'|ε 707 | # gdict[s] = gdict[s] + '|ε' 708 | # 709 | # # 没有左递归的候选式的合并 710 | # for i in range(len(nlx)): 711 | # if i == len(nlx) - 1: 712 | # ss = ss + nlx[i] 713 | # else: 714 | # ss = ss + nlx[i] + '|' 715 | # gdict[X] = X + '->' + ss 716 | # 717 | # else: #不存在左递归就直接添加改产生式 718 | # for Yi in Y: 719 | # gdict[X] = production[i] 720 | # 721 | # #得出消除左递归后的最终文法 722 | # result = [] 723 | # for k,v in gdict.items(): 724 | # result.append(v) 725 | # return result 726 | # 727 | # 728 | # from collections import defaultdict 729 | # 730 | # def remove_recall(production): 731 | # sets = [chr(x) for x in range(ord('A'), ord('Z') + 1)] 732 | # # f = open('E://test3.txt', 'r') 733 | # # production = [] 734 | # Vn = set() 735 | # for item in production: 736 | # Vn.add(item[0]) 737 | # 738 | # sets = set(sets) 739 | # sets -= Vn 740 | # sets = list(sets) 741 | # 742 | # result = {} # 保存整个文法的结果 743 | # for i in range(len(production)): 744 | # X, Y = production[i].split('->') # 左右部分开 745 | # Y = Y.split('|') # 右部根据|再分 746 | # s = sets.pop() # 弹出一个字符作为回溯的处理 747 | # dicts = defaultdict(list) 748 | # 749 | # for Yi in Y: 750 | # dicts[Yi[0]].append(Yi) # 根据候选式的首字符分组放进字典 751 | # 752 | # temp1 = [] # 保存有回溯的候选式 753 | # temp2 = [] # 保存没有回溯的候选式 754 | # flag = False # 存在回溯的标志 755 | # 756 | # result_tmp = {} # 保存每个产生式的结果 757 | # ss = '' 758 | # for k, v in dicts.items(): 759 | # if len(v) > 1: # 存在回溯 760 | # flag = True 761 | # # 找到公共左因子ss 762 | # zipped = zip(*v) # 拉链函数 比如zip(*[abc,abd]) 将列表的元素作为参数传递给zip >>> (a,a),(b,b),(c,d) 763 | # for i in zipped: 764 | # if len(set(i)) == 1: # 是公共左因子的部分就拼接 765 | # ss += i[0] 766 | # else: 767 | # break 768 | # # 去掉有回溯的候选式的公共左因子 769 | # for i in range(len(v)): 770 | # dicts[k][i] = dicts[k][i].replace(ss, '') 771 | # if dicts[k][i] == '': # 候选式刚好等于公共左因子 772 | # dicts[k][i] = 'ε' 773 | # temp1.extend(dicts[k]) 774 | # else: # 不存在回溯的候选式 775 | # temp2.extend(dicts[k]) 776 | # 777 | # # 存在回溯的处理 778 | # if flag: 779 | # # 有回溯的候选式的合并 780 | # result_tmp[s] = s + '->' 781 | # for i in range(len(temp1)): 782 | # if i == len(temp1) - 1: 783 | # result_tmp[s] = result_tmp[s] + temp1[i] 784 | # else: 785 | # result_tmp[s] = result_tmp[s] + temp1[i] + '|' 786 | # 787 | # # 没有回溯的候选式的合并 788 | # nonrecall = '' 789 | # for i in range(len(temp2)): 790 | # if i == len(temp2) - 1: 791 | # nonrecall = nonrecall + temp2[i] 792 | # else: 793 | # nonrecall = nonrecall + temp2[i] + '|' 794 | # result_tmp[X] = X + '->' + ss + s + '|' + nonrecall 795 | # # 不存在回溯的处理 796 | # else: 797 | # for Yi in Y: 798 | # result_tmp[X] = production[i] 799 | # result.update(result_tmp) 800 | # result_new =[] 801 | # for k,v in result.items(): 802 | # result_new.append(v) 803 | # return result_new 804 | # 805 | # 806 | # # 初始化 807 | # def init(): 808 | # global Vn, Vt, First, Follow, StartSym, Code 809 | # f = open('E://test3.txt','r') 810 | # production = [] 811 | # print("原文法:") 812 | # for line in f: 813 | # data = line.strip() #读取每一行 814 | # production.append(data) 815 | # print('\t\t\t\t',data) 816 | # StartSym = production[0][0] 817 | # Code = remove_recursion() 818 | # Code = remove_recall(Code) 819 | # 820 | # n = int(len(Code)) 821 | # # print('产生式个数:', n) 822 | # 823 | # print('产生式:G[', StartSym, ']:') 824 | # for i in range(n): 825 | # X, Y = Code[i].split('->') 826 | # print('\t\t\t\t', Code[i]) 827 | # Vn.add(X) 828 | # Y = Y.split('|') 829 | # for Yi in Y: 830 | # Vt |= set(Yi) 831 | # if X not in GramaDict: 832 | # GramaDict[X] = set() 833 | # GramaDict[X] |= set(Y) # 生成产生式集 834 | # Vt -= Vn 835 | # print('非终结符:', Vn) 836 | # print('终结符:', Vt) 837 | # getFirst() 838 | # getFollow() 839 | # print("FIRST集:") 840 | # for k in Vn: 841 | # print(' FIRST[', k, ']: ', First[k]) 842 | # print("FOLLOW集:") 843 | # for k, v in Follow.items(): 844 | # print(' FOLLOW[', k, ']: ', v) 845 | # Vt -= set(Epsilon) 846 | # Vt |= set(EndSym) 847 | # getAnalysisList() 848 | # 849 | # 850 | # if __name__ == "__main__": 851 | # init() 852 | # str = input(">>>") 853 | # for i in str: 854 | # if i not in Vt: 855 | # exit("输入的字符在文法里不存在!!!") 856 | # 857 | # #这里是导入之前写好的词法分析函数 858 | # Analysis(str) 859 | # table.align['步骤'] = 'l' 860 | # table.align['分析栈'] = 'l' 861 | # table.align['剩余输入串'] = 'l' 862 | # table.align['所用产生式'] = 'l' 863 | # table.align['当前输入a'] = 'l' 864 | # print(table) 865 | # 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | --------------------------------------------------------------------------------