├── .gitignore ├── README.md ├── lexical_analyzer.py ├── parser.py ├── test.dyd ├── test.dys ├── test.err ├── test.pro ├── test.txt └── test.var /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UESTC编译原理实验 2 | ### 内容: 3 | 这个是电子科技大学的编译原理课程的实验,使用了Python语言,实现了词法分析器与语法分析器。 4 | -------------------------------------------------------------------------------- /lexical_analyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | 4 | #global temp 5 | temp = "" 6 | state = 0 7 | ItemDict = {"begin": 1, "end": 2, "integer": 3, "if": 4, "then": 5, "else": 6, "function": 7, "read": 8, "write": 9, "=": 12, "<>": 13, "<=": 14, "<": 15, ">=": 16, ">": 17, "-": 18, "*": 19, ":=": 20, "(": 21, ")": 22, ";": 23} 8 | Token = 10 9 | Constants = 11 10 | target = "" 11 | file_name = "test.txt" 12 | error_file_name = file_name.split('.')[0]+".err" 13 | target_file_name = file_name.split('.')[0]+".dyd" 14 | 15 | def print_binary_function(print_word, num): 16 | global target 17 | target += ((16-len(print_word))*' '+print_word+' '+str(num)+'\n') 18 | 19 | 20 | def init_file(): 21 | error_file = open(error_file_name, "w") 22 | error_file.close() 23 | target_file = open(target_file_name, "w") 24 | target_file.close() 25 | 26 | 27 | def error_write(type): 28 | if type == 0: 29 | error_info = "出现了无法识别的字符" 30 | elif type == 1 : 31 | error_info = "请注意,:后必须跟=" 32 | error_file = open(error_file_name, "w") 33 | error_file.write(error_info) 34 | error_file.close() 35 | 36 | 37 | def end_word(): 38 | global temp 39 | if temp != "": 40 | if state == 1: 41 | if ItemDict.get(temp): 42 | print_binary_function(temp, ItemDict.get(temp)) 43 | else: 44 | print_binary_function(temp, Token) 45 | elif state == 3: 46 | print_binary_function(temp, Constants) 47 | else: 48 | if ItemDict.get(temp): 49 | print_binary_function(temp, ItemDict.get(temp)) 50 | else: 51 | error_write(1) 52 | temp = "" 53 | 54 | 55 | init_file() 56 | with open(file_name, "r") as file: 57 | for line in file: 58 | for word in line: 59 | if word == ' ': 60 | if state != 0: 61 | end_word() 62 | state = 0 63 | elif 'z' >= word >= 'A': 64 | if state != 0 and state != 1: 65 | end_word() 66 | temp += word 67 | state = 1 68 | elif '9' >= word >= '0': 69 | if state != 3: 70 | end_word() 71 | temp += word 72 | state = 3 73 | elif word == '=': 74 | if state not in [0, 10, 14, 17]: 75 | end_word() 76 | temp += word 77 | state = 5 78 | else: 79 | temp += word 80 | end_word() 81 | state = 0 82 | elif word == '-': 83 | end_word() 84 | temp += word 85 | state = 6 86 | elif word == '*': 87 | end_word() 88 | temp += word 89 | state = 7 90 | elif word == '(': 91 | end_word() 92 | temp += word 93 | state = 8 94 | elif word == ')': 95 | end_word() 96 | temp += word 97 | state = 9 98 | elif word == '<': 99 | end_word() 100 | temp += word 101 | state = 10 102 | elif word == '>': 103 | if state != 10: 104 | end_word() 105 | state = 14 106 | else: 107 | temp += word 108 | state = 12 109 | elif word == ':': 110 | end_word() 111 | temp += word 112 | state = 17 113 | elif word == ';': 114 | end_word() 115 | temp += word 116 | state = 20 117 | elif word == '\n': 118 | end_word() 119 | state = 0 120 | print_binary_function("EOLN", 24) 121 | else: 122 | error_write(0) 123 | end_word() 124 | print_binary_function("EOF", 25) 125 | with open(target_file_name, "w") as target_file: 126 | target_file.write(target) 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | dyd_list = [] 3 | file_name = "test.dyd" 4 | var_file_name = file_name.split('.dyd')[0] + ".var" 5 | pro_file_name = file_name.split('.dyd')[0] + ".pro" 6 | err_file_name = file_name.split('.dyd')[0] + ".err" 7 | dys_file_name = file_name.split('.dyd')[0] + ".dys" 8 | pos = 0 9 | line = 1 10 | begin_code = 1 11 | end_code = 2 12 | integer_code = 3 13 | if_code = 4 14 | then_code = 5 15 | else_code = 6 16 | function_code = 7 17 | read_code = 8 18 | write_code = 9 19 | identifier_code = 10 20 | constant_code = 11 21 | EOLN = 24 22 | EOF = 25 23 | var_define = 0 24 | var_use = 1 25 | formal_parameter = 1 26 | err_TXT = "" 27 | var_Table = [] 28 | var_count = 0 29 | procedure_Table = [] 30 | procedure_count = 0 31 | main_procedure_text = "main" 32 | now_Procedure = main_procedure_text 33 | procedure_level = 0 34 | 35 | def end_files(): 36 | with open(err_file_name, "w") as err_file: 37 | err_file.write(err_TXT) 38 | with open(var_file_name, "w") as var_file: 39 | for enum in var_Table: 40 | enum_string = "" 41 | for i in range(0, len(enum)): 42 | enum_string += str(enum[i])+' ' 43 | enum_string += '\n' 44 | var_file.write(enum_string) 45 | with open(pro_file_name, "w") as pro_file: 46 | for enum in procedure_Table: 47 | enum_string = "" 48 | for i in range(0, len(enum)): 49 | enum_string += str(enum[i])+' ' 50 | enum_string += '\n' 51 | pro_file.write(enum_string) 52 | if now() != EOF: 53 | err_print("规约完成,但函数未结束") 54 | 55 | def init_files(): 56 | dys_file = open(dys_file_name, "w") 57 | with open(file_name, "r") as dyd_file: 58 | for line in dyd_file: 59 | line = line.strip() 60 | line.replace('\n', '') 61 | dyd_line = line.split(' ') 62 | dyd_line[1] = int(dyd_line[1]) 63 | dyd_list.append(dyd_line) 64 | dys_file.write(line+'\n') 65 | err_file = open(err_file_name, "w") 66 | err_file.close() 67 | pro_file = open(pro_file_name, "w") 68 | pro_file.close() 69 | var_file = open(var_file_name, "w") 70 | var_file.close() 71 | 72 | 73 | def advanced(): 74 | now() 75 | global pos 76 | pos += 1 77 | 78 | def now_more_one(temp = 1): 79 | if dyd_list[pos+temp][1] == EOLN: 80 | return now_more_one(temp+1) 81 | else: 82 | return dyd_list[pos+temp][1] 83 | 84 | def now(): 85 | global line 86 | global pos 87 | if dyd_list[pos][1] == EOLN: 88 | line += 1 89 | pos += 1 90 | return now() 91 | else: 92 | return dyd_list[pos][1] 93 | 94 | def now_word(): 95 | global line 96 | global pos 97 | if dyd_list[pos][1] == EOLN: 98 | line += 1 99 | pos += 1 100 | return now_word() 101 | else: 102 | return dyd_list[pos][0] 103 | 104 | def err_print(err_info): 105 | global err_TXT 106 | err_TXT += err_info+" "+"当前行为:" + str(line)+'\n' 107 | #print "当前行为:" + str(line) 108 | 109 | def main_procedure(): 110 | # <程序>→<分程序> 111 | branch_procedure() 112 | return 113 | 114 | 115 | def branch_procedure(): 116 | # <分程序>→begin <说明语句表>;<执行语句表> end 117 | if now() == begin_code: 118 | advanced() 119 | declare_statement_table() 120 | if now() == 23: 121 | advanced() 122 | exec_statement_table() 123 | if now() == end_code: 124 | advanced() 125 | else: 126 | err_print("分程序错误,是否缺少 end") 127 | else: 128 | err_print("分程序错误,是否缺少 ;") 129 | else: 130 | err_print("分程序错误,是否缺少 begin") 131 | return 132 | 133 | def declare_statement_table(): 134 | # <说明语句表>→<说明语句>│<说明语句表> ;<说明语句> 135 | # 需要改写为 136 | # <说明语句表>→<说明语句><说明语句表A> 137 | # <说明语句表A>→;<说明语句><说明语句表A>│空 138 | declare_statement() 139 | declare_statement_tableA() 140 | 141 | 142 | def declare_statement_tableA(): 143 | #<说明语句表A>→;<说明语句><说明语句表A>│空 144 | # 如何判断呢 145 | if now() == 23 and now_more_one() == integer_code: 146 | advanced() 147 | declare_statement() 148 | declare_statement_tableA() 149 | else: 150 | return 151 | 152 | def declare_statement(): 153 | # <说明语句>→<变量说明>│<函数说明> 154 | if now() == integer_code: 155 | if now_more_one() == 7: 156 | func_declare() 157 | else: 158 | var_declare() 159 | else: 160 | err_print("说明语句出错,是否缺少integer") 161 | 162 | def var_declare(): 163 | # <变量说明>→integer <变量> 164 | if now() == 3: 165 | advanced() 166 | var(var_define) 167 | else: 168 | err_print("变量说明出错,是否缺少integer") 169 | 170 | def var(define_or_use,is_formal = 0): 171 | # <变量>→<标识符> 172 | global var_count 173 | if define_or_use == var_define: 174 | word = identifier() 175 | flag = True 176 | for var_table_line in var_Table: 177 | if word == var_table_line[0]: 178 | flag = False 179 | if flag: 180 | var_Table.append([word, now_Procedure, is_formal, "integer", procedure_level, var_count]) 181 | var_count += 1 182 | elif define_or_use == var_use: 183 | word = identifier() 184 | use_word = False 185 | for var in var_Table: 186 | if var[0] == word: 187 | use_word = True 188 | for procedure in procedure_Table: 189 | if procedure[0] == word: 190 | use_word = True 191 | if not use_word: 192 | err_print("符号"+word+"无定义") 193 | for pro_table_line in procedure_Table: 194 | if pro_table_line[0] == now_Procedure: 195 | if pro_table_line[3] == -1: 196 | pro_table_line[3] = line 197 | if pro_table_line[4] < line: 198 | pro_table_line[4] = line 199 | 200 | 201 | def identifier(): 202 | # <标识符>→<字母>│<标识符><字母>│ <标识符><数字> 203 | if now() == identifier_code: 204 | temp_word = now_word() 205 | advanced() 206 | return temp_word 207 | else: 208 | err_print("标识符出错") 209 | 210 | def func_declare(): 211 | # <函数说明>→integer function <标识符>(<参数>);<函数体> 212 | global now_Procedure 213 | global procedure_level 214 | procedure_level += 1 215 | last_Procedure = now_Procedure 216 | if now() == integer_code: 217 | advanced() 218 | if now() == function_code: 219 | advanced() 220 | now_Procedure = identifier() 221 | flag = True 222 | 223 | for table_line in procedure_Table: 224 | if now_Procedure == table_line[0]: 225 | flag = False 226 | if flag: 227 | procedure_Table.append([now_Procedure, "integer", procedure_level, -1, -1]) 228 | 229 | if now() == 21: 230 | advanced() 231 | parameter() 232 | if now() == 22: 233 | advanced() 234 | if now() == 23: 235 | advanced() 236 | func_body() 237 | else: 238 | err_print("函数说明出错,缺少;") 239 | else: 240 | err_print("函数说明出错,缺少)") 241 | else: 242 | err_print("函数说明出错,缺少(") 243 | else: 244 | err_print("函数说明出错,缺少function") 245 | else: 246 | err_print("函数说明出错,缺少integer") 247 | now_Procedure = last_Procedure 248 | procedure_level -= 1 249 | 250 | def parameter(): 251 | # <参数>→<变量> 252 | var(var_define, formal_parameter) 253 | return 254 | 255 | def func_body(): 256 | # <函数体>→begin <说明语句表>;<执行语句表> end 257 | if now() == begin_code: 258 | advanced() 259 | declare_statement_table() 260 | if now() == 23: 261 | advanced() 262 | exec_statement_table() 263 | if now() == end_code: 264 | advanced() 265 | else: 266 | err_print("函数体出错,缺少end") 267 | else: 268 | err_print("函数体出错,缺少;") 269 | else: 270 | err_print("函数体错误,缺少begin") 271 | 272 | def exec_statement_table(): 273 | # 左递归:<执行语句表>→<执行语句>│<执行语句表>;<执行语句> 274 | # 需要改写为 275 | # <执行语句表>→<执行语句><执行语句表A> 276 | # <执行语句表A>→;<执行语句><执行语句表A>│空 277 | exec_statement() 278 | exec_statement_tableA() 279 | return 280 | 281 | def exec_statement_tableA(): 282 | # <执行语句表A>→;<执行语句><执行语句表A>│空 283 | if now() == 23 and now_more_one() in [read_code,write_code, if_code,identifier_code]: 284 | advanced() 285 | exec_statement() 286 | exec_statement_tableA() 287 | else: 288 | return 289 | 290 | def exec_statement(): 291 | # <执行语句>→<读语句>│<写语句>│<赋值语句>│<条件语句> 292 | exec_state = now() 293 | if exec_state == read_code: 294 | read_statement() 295 | elif exec_state == write_code: 296 | write_statement() 297 | elif exec_state == if_code: 298 | condition_statement() 299 | elif exec_state == identifier_code: 300 | assign_statement() 301 | else: 302 | err_print("执行语句出错,不知道该走哪里") 303 | 304 | def read_statement(): 305 | # <读语句>→read(<变量>) 306 | if now() == 8: 307 | advanced() 308 | if now() == 21: 309 | advanced() 310 | var(var_use) 311 | if now() == 22: 312 | advanced() 313 | else: 314 | err_print("读语句出错") 315 | else: 316 | err_print("读语句出错") 317 | else: 318 | err_print("读语句出错") 319 | 320 | def write_statement(): 321 | #<写语句>→write(<变量>) 322 | if now() == 9: 323 | advanced() 324 | if now() == 21: 325 | advanced() 326 | var(var_use) 327 | if now() == 22: 328 | advanced() 329 | else: 330 | err_print("写语句出错") 331 | else: 332 | err_print("写语句出错") 333 | else: 334 | err_print("写语句出错") 335 | 336 | def assign_statement(): 337 | # <赋值语句>→<变量>:=<算术表达式> 338 | var(var_use) 339 | if now() == 20: 340 | advanced() 341 | math_expression() 342 | else: 343 | err_print("赋值语句出错") 344 | 345 | def math_expression(): 346 | # 左递归:<算术表达式>→<算术表达式>-<项>│<项> 347 | # 改写:<算术表达式>→<项><算术表达式A> 348 | #<算术表达式A>→-<项><算术表达式A>|空 349 | item() 350 | math_expressionA() 351 | 352 | def math_expressionA(): 353 | #<算术表达式A>→-<项><算术表达式A>|空 354 | if now() == 18: 355 | advanced() 356 | item() 357 | math_expressionA() 358 | else: 359 | return 360 | 361 | def item(): 362 | # 左递归:<项>→<项>*<因子>│<因子> 363 | # 改写<项>→<因子><项A> 364 | # <项A>→*<因子><项A>│空 365 | factor() 366 | itemA() 367 | 368 | def itemA(): 369 | # <项A>→*<因子><项A>│空 370 | if now() == 19: 371 | advanced() 372 | factor() 373 | itemA() 374 | else: 375 | return 376 | 377 | def factor(): 378 | # <因子>→<变量>│<常数>│<函数调用> 379 | if now() == 11: 380 | constant() 381 | elif now_more_one() ==21: 382 | func_call() 383 | else: 384 | var(var_use) 385 | 386 | def func_call(): 387 | # <函数调用>→<标识符>(<算数表达式>) 388 | # 这个文法书上漏了,感谢晓辉男神告诉我 389 | identifier() 390 | if now() == 21: 391 | advanced() 392 | math_expression() 393 | if now() == 22: 394 | advanced() 395 | else: 396 | err_print("函数调用出错") 397 | else: 398 | err_print("函数调用出错") 399 | 400 | def constant(): 401 | # <常数>→<无符号整数> 402 | unsigned_integer() 403 | 404 | def unsigned_integer(): 405 | if now() == 11: 406 | advanced() 407 | else: 408 | err_print("常数调用出错,遇到非数字") 409 | 410 | def condition_statement(): 411 | # <条件语句>→if<条件表达式>then<执行语句>else <执行语句> 412 | if now() == 4: 413 | advanced() 414 | condition_expression() 415 | if now() == 5: 416 | advanced() 417 | exec_statement() 418 | if now() == 6: 419 | advanced() 420 | exec_statement() 421 | else: 422 | err_print("条件语句出错") 423 | else: 424 | err_print("条件语句出错") 425 | else: 426 | err_print("条件语句出错") 427 | 428 | def condition_expression(): 429 | # <条件表达式>→<算术表达式><关系运算符><算术表达式> 430 | math_expression() 431 | relation_operator() 432 | math_expression() 433 | 434 | def relation_operator(): 435 | # <关系运算符> →<│<=│>│>=│=│<> 436 | if now() in [15, 14, 17, 16, 12, 13]: 437 | advanced() 438 | else: 439 | err_print("关系运算符出错") 440 | 441 | if __name__ == '__main__': 442 | init_files() 443 | main_procedure() 444 | end_files() 445 | 446 | -------------------------------------------------------------------------------- /test.dyd: -------------------------------------------------------------------------------- 1 | begin 1 2 | EOLN 24 3 | integer 3 4 | k 10 5 | ; 23 6 | EOLN 24 7 | integer 3 8 | function 7 9 | F 10 10 | ( 21 11 | n 10 12 | ) 22 13 | ; 23 14 | EOLN 24 15 | begin 1 16 | EOLN 24 17 | integer 3 18 | n 10 19 | ; 23 20 | EOLN 24 21 | if 4 22 | n 10 23 | <= 14 24 | 0 11 25 | then 5 26 | F 10 27 | := 20 28 | 1 11 29 | EOLN 24 30 | else 6 31 | F 10 32 | := 20 33 | n 10 34 | * 19 35 | F 10 36 | ( 21 37 | n 10 38 | - 18 39 | 1 11 40 | ) 22 41 | EOLN 24 42 | end 2 43 | ; 23 44 | EOLN 24 45 | read 8 46 | ( 21 47 | m 10 48 | ) 22 49 | ; 23 50 | EOLN 24 51 | k 10 52 | := 20 53 | F 10 54 | ( 21 55 | m 10 56 | ) 22 57 | ; 23 58 | EOLN 24 59 | write 9 60 | ( 21 61 | k 10 62 | ) 22 63 | EOLN 24 64 | end 2 65 | EOF 25 66 | -------------------------------------------------------------------------------- /test.dys: -------------------------------------------------------------------------------- 1 | begin 1 2 | EOLN 24 3 | integer 3 4 | k 10 5 | ; 23 6 | EOLN 24 7 | integer 3 8 | function 7 9 | F 10 10 | ( 21 11 | n 10 12 | ) 22 13 | ; 23 14 | EOLN 24 15 | begin 1 16 | EOLN 24 17 | integer 3 18 | n 10 19 | ; 23 20 | EOLN 24 21 | if 4 22 | n 10 23 | <= 14 24 | 0 11 25 | then 5 26 | F 10 27 | := 20 28 | 1 11 29 | EOLN 24 30 | else 6 31 | F 10 32 | := 20 33 | n 10 34 | * 19 35 | F 10 36 | ( 21 37 | n 10 38 | - 18 39 | 1 11 40 | ) 22 41 | EOLN 24 42 | end 2 43 | ; 23 44 | EOLN 24 45 | read 8 46 | ( 21 47 | m 10 48 | ) 22 49 | ; 23 50 | EOLN 24 51 | k 10 52 | := 20 53 | F 10 54 | ( 21 55 | m 10 56 | ) 22 57 | ; 23 58 | EOLN 24 59 | write 9 60 | ( 21 61 | k 10 62 | ) 22 63 | EOLN 24 64 | end 2 65 | EOF 25 66 | -------------------------------------------------------------------------------- /test.err: -------------------------------------------------------------------------------- 1 | 符号m无定义 当前行为:9 2 | 符号m无定义 当前行为:10 3 | -------------------------------------------------------------------------------- /test.pro: -------------------------------------------------------------------------------- 1 | F integer 1 3 7 2 | -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- 1 | begin 2 | integer k; 3 | integer function F(n); 4 | begin 5 | integer n; 6 | if n<=0 then F:=1 7 | else F:=n*F(n-1) 8 | end; 9 | read(m); 10 | k:=F(m); 11 | write(k) 12 | end -------------------------------------------------------------------------------- /test.var: -------------------------------------------------------------------------------- 1 | k main 0 integer 0 0 2 | n F 1 integer 1 1 3 | --------------------------------------------------------------------------------