├── .gitignore ├── lexer ├── .gitignore ├── test_cases │ ├── operator.cpp │ ├── input.c │ ├── input2.c │ ├── output1.txt │ ├── output.txt │ └── output2.txt ├── make.bat ├── CMakeLists.txt ├── include │ ├── Lexer.h │ └── TokenType.h └── src │ ├── main.cpp │ └── static │ ├── TokenType.cpp │ └── Lexer.cpp ├── parser ├── test_cases │ ├── input2.c │ ├── intput2.c │ ├── operator.cpp │ ├── input.c │ ├── input1.c │ ├── output1.txt │ └── output.txt ├── .gitignore ├── src │ ├── static │ │ ├── parser │ │ │ ├── abstract_tree.cpp │ │ │ └── Parser.cpp │ │ ├── TokenType.cpp │ │ └── lexer │ │ │ └── Lexer.cpp │ └── main.cpp ├── copy_lexer2parser.bat ├── make.bat ├── CMakeLists.txt └── include │ ├── parser │ ├── tmp_factor.cpp │ ├── parser.h │ └── abstract_tree.h │ ├── lexer │ └── Lexer.h │ └── TokenType.h ├── images ├── lexer输入.png ├── lexer报错显示.png ├── parser结果预览.png └── parser中的lexer指针.png ├── documents └── CMinus词法和语法规则.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .VSCodeCounter -------------------------------------------------------------------------------- /lexer/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | *.exe 3 | .VSCodeCounter -------------------------------------------------------------------------------- /parser/test_cases/input2.c: -------------------------------------------------------------------------------- 1 | a[2 + e] = 1+ b(a,112, v * d, 23,42) -------------------------------------------------------------------------------- /parser/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | *.exe 3 | .VSCodeCounter 4 | .vscode -------------------------------------------------------------------------------- /parser/test_cases/intput2.c: -------------------------------------------------------------------------------- 1 | a(1+2, c*d) * b * (c + d) + c / e[a + b * c + d] -------------------------------------------------------------------------------- /images/lexer输入.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/lexer输入.png -------------------------------------------------------------------------------- /images/lexer报错显示.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/lexer报错显示.png -------------------------------------------------------------------------------- /images/parser结果预览.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/parser结果预览.png -------------------------------------------------------------------------------- /parser/src/static/parser/abstract_tree.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/abstract_tree.h" 2 | #include "parser/Parser.h" 3 | -------------------------------------------------------------------------------- /images/parser中的lexer指针.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/parser中的lexer指针.png -------------------------------------------------------------------------------- /parser/test_cases/operator.cpp: -------------------------------------------------------------------------------- 1 | +/*+sdasdasa */ + - * / >= <= ; } { [ ] ( /*+++++ 2 | */ -- 3 | - == 4 | ) 5 | ++++ -------------------------------------------------------------------------------- /lexer/test_cases/operator.cpp: -------------------------------------------------------------------------------- 1 | +/*+sdasdasa */ + - * / >= <= ; } { [ ] ( /*+++++ 2 | */ -- 3 | - == 4 | ) 5 | ++++ 6 | 7 | /* *** */ 8 | /* in comment 9 | * expect out comment 10 | / start 11 | 其他的 in comment -------------------------------------------------------------------------------- /parser/copy_lexer2parser.bat: -------------------------------------------------------------------------------- 1 | copy .\..\lexer\include\Lexer.h .\include\lexer\Lexer.h 2 | copy .\..\lexer\include\TokenType.h .\include\TokenType.h 3 | copy ..\lexer\src\static\Lexer.cpp .\src\static\lexer\Lexer.cpp 4 | copy ..\lexer\src\static\TokenType.cpp .\src\static\TokenType.cpp 5 | -------------------------------------------------------------------------------- /lexer/test_cases/input.c: -------------------------------------------------------------------------------- 1 | /* A program to perform Euclid's 2 | Algorithm to compute gcd. */ 3 | 4 | int gcd (int u, int v) 5 | { 6 | if (v == 0) 7 | return u ; 8 | else 9 | return gcd(v,u-u/v*v); 10 | /* u-u/v*v == 11 | u mod v */ 12 | } 13 | 14 | void main(void) 15 | { 16 | int x; int y; 17 | x = input(); 18 | y = input(); 19 | output(gcd(x,y)); 20 | } -------------------------------------------------------------------------------- /parser/test_cases/input.c: -------------------------------------------------------------------------------- 1 | /* A program to perform Euclid's 2 | Algorithm to compute gcd. */ 3 | 4 | int gcd (int u, int v) 5 | { 6 | if (v == 0) 7 | return u; 8 | else 9 | return gcd(v,u-u/v*v); 10 | /* u-u/v*v == 11 | u mod v */ 12 | } 13 | 14 | void main(void) 15 | { 16 | int x; int y; 17 | x = input(); 18 | y = input(); 19 | output(gcd(x,y)); 20 | } -------------------------------------------------------------------------------- /lexer/test_cases/input2.c: -------------------------------------------------------------------------------- 1 | /* A program to perform Euclid's 2 | Algorithm to compute gcd. */ 3 | 4 | int gcd (int u, int v) 5 | { 6 | if (v == 0) 7 | return u ; 8 | else 9 | return gcd(v,u-u/v*v); 10 | /* u-u/v*v == 11 | u mod v */ 12 | } 13 | 14 | void main(void) 15 | { 16 | int x; int y; 17 | x = input(); 18 | y = input(); 19 | output(gcd(x,y)); 20 | } -------------------------------------------------------------------------------- /parser/test_cases/input1.c: -------------------------------------------------------------------------------- 1 | /* A program to perform Euclid's 2 | Algorithm to compute gcd. */ 3 | 4 | int gcd (int u, int v) 5 | { 6 | if (v == 0) 7 | return u ; 8 | else 9 | return gcd(v,u-u/v*v); 10 | /* u-u/v*v == 11 | u mod v */ 12 | } 13 | 14 | void main(void) 15 | { 16 | int x; int y; 17 | x = input(); 18 | y = input(); 19 | output(gcd(x,y)); 20 | } -------------------------------------------------------------------------------- /lexer/make.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd build 3 | cmake .. -G "MinGW Makefiles" 4 | echo ===================CMAKE DONE=========================== 5 | mingw32-make.exe 6 | echo ====================MAKE DONE=========================== 7 | cd .. 8 | echo ====================Run Code============================ 9 | .\CMinusLexer_binary.exe ./test_cases/input.c ./test_cases/output.txt 10 | @REM .\CMinusLexer_binary.exe ./test_cases/input.c 11 | -------------------------------------------------------------------------------- /parser/make.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd build 3 | cmake .. -G "MinGW Makefiles" 4 | echo ===================CMAKE DONE=========================== 5 | mingw32-make.exe 6 | echo ====================MAKE DONE=========================== 7 | cd .. 8 | echo ====================Run Code============================ 9 | @REM .\CMinusLexer_binary.exe ./test_cases/input.c ./test_cases/output1.txt 10 | .\CMinusParser_binary.exe ./test_cases/input.c 11 | -------------------------------------------------------------------------------- /parser/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "TokenType.h" 4 | #include "lexer/Lexer.h" 5 | #include "parser/Parser.h" 6 | #include "parser/abstract_tree.h" 7 | // ==========全局参数定义============ 8 | std::string input = ""; 9 | // outpur路径为空,则输出到标准输出,如果不为空,则输出到文件 10 | // std::string output = "./test_cases/output.txt"; 11 | std::string output = ""; 12 | cmlexer lex; // 初始化语法解析器类 13 | cmparser myparser(&lex); 14 | // ==================================== 15 | 16 | // 初始化输入输出文件路径 17 | void initPath(){ 18 | lex.setPath(input,output); 19 | } 20 | int main(int argc, char* argv[]){ 21 | input.assign("./test_cases/input.c"); 22 | initPath(); 23 | auto res = myparser.parse(); 24 | if(res !=nullptr) res->show(); 25 | return 0; 26 | } -------------------------------------------------------------------------------- /lexer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(CMinusLexer_library) 4 | 5 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}) 6 | 7 | ############################################################ 8 | # Create a library 9 | ############################################################ 10 | file(GLOB SOURCES "src/static/*.cpp") 11 | # Generate the static library from the library sources 12 | add_library(CMinusLexer_library STATIC 13 | ${SOURCES} 14 | ) 15 | 16 | target_include_directories(CMinusLexer_library 17 | PUBLIC 18 | ${PROJECT_SOURCE_DIR}/include 19 | ) 20 | 21 | 22 | 23 | ############################################################ 24 | # Create an executable 25 | ############################################################ 26 | 27 | # Add an executable with the above sources 28 | add_executable(CMinusLexer_binary 29 | src/main.cpp 30 | ) 31 | 32 | # link the new hello_library target with the hello_binary target 33 | target_link_libraries(CMinusLexer_binary 34 | PRIVATE 35 | CMinusLexer_library 36 | ) 37 | -------------------------------------------------------------------------------- /parser/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(CMinusParser_library) 4 | 5 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}) 6 | 7 | ############################################################ 8 | # Create a library 9 | ############################################################ 10 | file(GLOB SOURCES "src/static/*/*.cpp" "src/static/*.cpp") 11 | 12 | # Generate the static library from the library sources 13 | add_library(CMinusParser_library STATIC 14 | ${SOURCES} 15 | ) 16 | 17 | target_include_directories(CMinusParser_library 18 | PUBLIC 19 | ${PROJECT_SOURCE_DIR}/include 20 | ) 21 | 22 | 23 | 24 | ############################################################ 25 | # Create an executable 26 | ############################################################ 27 | 28 | # Add an executable with the above sources 29 | add_executable(CMinusParser_binary 30 | src/main.cpp 31 | ) 32 | 33 | # link the new hello_library target with the hello_binary target 34 | target_link_libraries(CMinusParser_binary 35 | PRIVATE 36 | CMinusParser_library 37 | ) 38 | -------------------------------------------------------------------------------- /lexer/test_cases/output1.txt: -------------------------------------------------------------------------------- 1 | dick70 2 | Keyword: int 3 | Identifier: gcd 4 | Operator: ( 5 | Keyword: int 6 | Identifier: u 7 | Operator: , 8 | Keyword: int 9 | Identifier: v 10 | Operator: ) 11 | Operator: { 12 | Keyword: if 13 | Operator: ( 14 | Identifier: v 15 | Operator: == 16 | Number: 0 17 | Operator: ) 18 | Keyword: return 19 | Identifier: u 20 | Operator: ; 21 | Keyword: else 22 | Keyword: return 23 | Identifier: gcd 24 | Operator: ( 25 | Identifier: v 26 | Operator: , 27 | Identifier: u 28 | Operator: - 29 | Identifier: u 30 | Operator: / 31 | Identifier: v 32 | Operator: * 33 | Identifier: v 34 | Operator: ) 35 | Operator: ; 36 | Operator: } 37 | Keyword: void 38 | Identifier: main 39 | Operator: ( 40 | Keyword: void 41 | Operator: ) 42 | Operator: { 43 | Keyword: int 44 | Identifier: x 45 | Operator: ; 46 | Keyword: int 47 | Identifier: y 48 | Operator: ; 49 | Identifier: x 50 | Operator: = 51 | Identifier: input 52 | Operator: ( 53 | Operator: ) 54 | Operator: ; 55 | Identifier: y 56 | Operator: = 57 | Identifier: input 58 | Operator: ( 59 | Operator: ) 60 | Operator: ; 61 | Identifier: output 62 | Operator: ( 63 | Identifier: gcd 64 | Operator: ( 65 | Identifier: x 66 | Operator: , 67 | Identifier: y 68 | Operator: ) 69 | Operator: ) 70 | Operator: ; 71 | Operator: } 72 | -------------------------------------------------------------------------------- /parser/test_cases/output1.txt: -------------------------------------------------------------------------------- 1 | dick70 2 | Keyword: int 3 | Identifier: gcd 4 | Operator: ( 5 | Keyword: int 6 | Identifier: u 7 | Operator: , 8 | Keyword: int 9 | Identifier: v 10 | Operator: ) 11 | Operator: { 12 | Keyword: if 13 | Operator: ( 14 | Identifier: v 15 | Operator: == 16 | Number: 0 17 | Operator: ) 18 | Keyword: return 19 | Identifier: u 20 | Operator: ; 21 | Keyword: else 22 | Keyword: return 23 | Identifier: gcd 24 | Operator: ( 25 | Identifier: v 26 | Operator: , 27 | Identifier: u 28 | Operator: - 29 | Identifier: u 30 | Operator: / 31 | Identifier: v 32 | Operator: * 33 | Identifier: v 34 | Operator: ) 35 | Operator: ; 36 | Operator: } 37 | Keyword: void 38 | Identifier: main 39 | Operator: ( 40 | Keyword: void 41 | Operator: ) 42 | Operator: { 43 | Keyword: int 44 | Identifier: x 45 | Operator: ; 46 | Keyword: int 47 | Identifier: y 48 | Operator: ; 49 | Identifier: x 50 | Operator: = 51 | Identifier: input 52 | Operator: ( 53 | Operator: ) 54 | Operator: ; 55 | Identifier: y 56 | Operator: = 57 | Identifier: input 58 | Operator: ( 59 | Operator: ) 60 | Operator: ; 61 | Identifier: output 62 | Operator: ( 63 | Identifier: gcd 64 | Operator: ( 65 | Identifier: x 66 | Operator: , 67 | Identifier: y 68 | Operator: ) 69 | Operator: ) 70 | Operator: ; 71 | Operator: } 72 | -------------------------------------------------------------------------------- /lexer/test_cases/output.txt: -------------------------------------------------------------------------------- 1 | #4 3 Keyword: int 2 | #4 7 Identifier: gcd 3 | #4 9 Operator: ( 4 | #4 12 Keyword: int 5 | #4 14 Identifier: u 6 | #4 15 Operator: , 7 | #4 19 Keyword: int 8 | #4 21 Identifier: v 9 | #4 22 Operator: ) 10 | #5 1 Operator: { 11 | #6 6 Keyword: if 12 | #6 8 Operator: ( 13 | #6 9 Identifier: v 14 | #6 12 Operator: == 15 | #6 14 Number: 0 16 | #6 15 Operator: ) 17 | #7 14 Keyword: return 18 | #7 16 Identifier: u 19 | #7 18 Operator: ; 20 | #8 8 Keyword: else 21 | #9 14 Keyword: return 22 | #9 18 Identifier: gcd 23 | #9 19 Operator: ( 24 | #9 20 Identifier: v 25 | #9 21 Operator: , 26 | #9 22 Identifier: u 27 | #9 23 Operator: - 28 | #9 24 Identifier: u 29 | #9 25 Operator: / 30 | #9 26 Identifier: v 31 | #9 27 Operator: * 32 | #9 28 Identifier: v 33 | #9 29 Operator: ) 34 | #9 30 Operator: ; 35 | #12 1 Operator: } 36 | #14 4 Keyword: void 37 | #14 9 Identifier: main 38 | #14 10 Operator: ( 39 | #14 14 Keyword: void 40 | #14 15 Operator: ) 41 | #15 1 Operator: { 42 | #16 7 Keyword: int 43 | #16 9 Identifier: x 44 | #16 10 Operator: ; 45 | #16 14 Keyword: int 46 | #16 16 Identifier: y 47 | #16 17 Operator: ; 48 | #17 5 Identifier: x 49 | #17 7 Operator: = 50 | #17 13 Identifier: input 51 | #17 14 Operator: ( 52 | #17 15 Operator: ) 53 | #17 16 Operator: ; 54 | #18 5 Identifier: y 55 | #18 7 Operator: = 56 | #18 13 Identifier: input 57 | #18 14 Operator: ( 58 | #18 15 Operator: ) 59 | #18 16 Operator: ; 60 | #19 10 Identifier: output 61 | #19 11 Operator: ( 62 | #19 14 Identifier: gcd 63 | #19 15 Operator: ( 64 | #19 16 Identifier: x 65 | #19 17 Operator: , 66 | #19 18 Identifier: y 67 | #19 19 Operator: ) 68 | #19 20 Operator: ) 69 | #19 21 Operator: ; 70 | #20 1 Operator: } 71 | -------------------------------------------------------------------------------- /lexer/test_cases/output2.txt: -------------------------------------------------------------------------------- 1 | #4 3 Keyword: int 2 | #4 7 Identifier: gcd 3 | #4 9 Operator: ( 4 | #4 12 Keyword: int 5 | #4 14 Identifier: u 6 | #4 15 Operator: , 7 | #4 19 Keyword: int 8 | #4 21 Identifier: v 9 | #4 22 Operator: ) 10 | #5 1 Operator: { 11 | #6 6 Keyword: if 12 | #6 8 Operator: ( 13 | #6 9 Identifier: v 14 | #6 12 Operator: == 15 | #6 14 Number: 0 16 | #6 15 Operator: ) 17 | #7 14 Keyword: return 18 | #7 16 Identifier: u 19 | #7 18 Operator: ; 20 | #8 8 Keyword: else 21 | #9 14 Keyword: return 22 | #9 18 Identifier: gcd 23 | #9 19 Operator: ( 24 | #9 20 Identifier: v 25 | #9 21 Operator: , 26 | #9 22 Identifier: u 27 | #9 23 Operator: - 28 | #9 24 Identifier: u 29 | #9 25 Operator: / 30 | #9 26 Identifier: v 31 | #9 27 Operator: * 32 | #9 28 Identifier: v 33 | #9 29 Operator: ) 34 | #9 30 Operator: ; 35 | #12 1 Operator: } 36 | #14 4 Keyword: void 37 | #14 9 Identifier: main 38 | #14 10 Operator: ( 39 | #14 14 Keyword: void 40 | #14 15 Operator: ) 41 | #15 1 Operator: { 42 | #16 7 Keyword: int 43 | #16 9 Identifier: x 44 | #16 10 Operator: ; 45 | #16 14 Keyword: int 46 | #16 16 Identifier: y 47 | #16 17 Operator: ; 48 | #17 5 Identifier: x 49 | #17 7 Operator: = 50 | #17 13 Identifier: input 51 | #17 14 Operator: ( 52 | #17 15 Operator: ) 53 | #17 16 Operator: ; 54 | #18 5 Identifier: y 55 | #18 7 Operator: = 56 | #18 13 Identifier: input 57 | #18 14 Operator: ( 58 | #18 15 Operator: ) 59 | #18 16 Operator: ; 60 | #19 10 Identifier: output 61 | #19 11 Operator: ( 62 | #19 14 Identifier: gcd 63 | #19 15 Operator: ( 64 | #19 16 Identifier: x 65 | #19 17 Operator: , 66 | #19 18 Identifier: y 67 | #19 19 Operator: ) 68 | #19 20 Operator: ) 69 | #19 21 Operator: ; 70 | #20 1 Operator: } 71 | -------------------------------------------------------------------------------- /parser/test_cases/output.txt: -------------------------------------------------------------------------------- 1 | #4 3 Keyword: int 2 | #4 7 Identifier: gcd 3 | #4 9 Operator: ( 4 | #4 12 Keyword: int 5 | #4 14 Identifier: u 6 | #4 15 Operator: , 7 | #4 19 Keyword: int 8 | #4 21 Identifier: v 9 | #4 22 Operator: ) 10 | #5 1 Operator: { 11 | #6 6 Keyword: if 12 | #6 8 Operator: ( 13 | #6 9 Identifier: v 14 | #6 12 Operator: == 15 | #6 14 Number: 0 16 | #6 15 Operator: ) 17 | #7 14 Keyword: return 18 | #7 16 Identifier: u 19 | #7 18 Operator: ; 20 | #8 8 Keyword: else 21 | #9 14 Keyword: return 22 | #9 18 Identifier: gcd 23 | #9 19 Operator: ( 24 | #9 20 Identifier: v 25 | #9 21 Operator: , 26 | #9 22 Identifier: u 27 | #9 23 Operator: - 28 | #9 24 Identifier: u 29 | #9 25 Operator: / 30 | #9 26 Identifier: v 31 | #9 27 Operator: * 32 | #9 28 Identifier: v 33 | #9 29 Operator: ) 34 | #9 30 Operator: ; 35 | #12 1 Operator: } 36 | #14 4 Keyword: void 37 | #14 9 Identifier: main 38 | #14 10 Operator: ( 39 | #14 14 Keyword: void 40 | #14 15 Operator: ) 41 | #15 1 Operator: { 42 | #16 7 Keyword: int 43 | #16 9 Identifier: x 44 | #16 10 Operator: ; 45 | #16 14 Keyword: int 46 | #16 16 Identifier: y 47 | #16 17 Operator: ; 48 | #17 5 Identifier: x 49 | #17 7 Operator: = 50 | #17 13 Identifier: input 51 | #17 14 Operator: ( 52 | #17 15 Operator: ) 53 | #17 16 Operator: ; 54 | #18 5 Identifier: y 55 | #18 7 Operator: = 56 | #18 13 Identifier: input 57 | #18 14 Operator: ( 58 | #18 15 Operator: ) 59 | #18 16 Operator: ; 60 | #19 10 Identifier: output 61 | #19 11 Operator: ( 62 | #19 14 Identifier: gcd 63 | #19 15 Operator: ( 64 | #19 16 Identifier: x 65 | #19 17 Operator: , 66 | #19 18 Identifier: y 67 | #19 19 Operator: ) 68 | #19 20 Operator: ) 69 | #19 21 Operator: ; 70 | #20 1 Operator: } 71 | -------------------------------------------------------------------------------- /parser/include/parser/tmp_factor.cpp: -------------------------------------------------------------------------------- 1 | // auto n = TreeNode::newExpNode(ExpKind::_num); 2 | // // 1.如果是运算符 (, 则进入 (expression) 的匹配 3 | // // 这个括号不会显示在输出的语法树中,但是会影响语法树的构建。 4 | // if(current_token->get_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){ 5 | // match_operator(operator_type::_slb); 6 | // n = additive_expression(pass_node); 7 | // match_operator(operator_type::_srb); 8 | // } 9 | // // 如果匹配到了ID 10 | // else if( current_token->get_token_type()== token_type::_ID){ 11 | // n->set_id(get_current_id()->get_ID()); 12 | // get_next_token(); 13 | // // 尝试匹配下一个符号 14 | // if(current_token != nullptr && current_token->get_token_type() == token_type::_operator){ 15 | // switch (get_current_operator()->get_operator_type()) 16 | // { 17 | // // 2.如果是 '(' 则匹配 ID(args) 18 | // case operator_type::_slb: 19 | // n->kind.exp = ExpKind::_call; 20 | // match_operator(operator_type::_slb); 21 | // // 如果下一个就是 ')' 则匹配完成 即匹配 empty 22 | // if(current_token != nullptr && current_token->get_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_srb){ 23 | 24 | // }else{ 25 | // n->sibling = arg_list(); 26 | // } 27 | // match_operator(operator_type::_srb); 28 | // break; 29 | 30 | // // 3.如果是 '[' 则匹配 ID[expression] 31 | // case operator_type::_mlb: 32 | // n->kind.exp = ExpKind::_var; 33 | // match_operator(operator_type::_mlb); 34 | // n->sibling = additive_expression(nullptr); // TODO 后续完成expression要进行切换 35 | // match_operator(operator_type::_mrb); 36 | // break; 37 | // // 4.如果没有则什么都不做,匹配完成 38 | // default: 39 | 40 | // break; 41 | // } 42 | // } 43 | // //5. 匹配到NUM 44 | // }else if(current_token != nullptr && get_current_token_type() == token_type::_NUM){ 45 | // n->set_number(get_current_number()->get_number()); 46 | // get_next_token(); 47 | // } 48 | // return n; -------------------------------------------------------------------------------- /lexer/include/Lexer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "TokenType.h" 10 | // 所有可能的运算符起始符号 11 | extern std::set oper_start; 12 | bool isoperator(char c); 13 | 14 | // 词法分析器的状态机的状态 15 | enum class state{ 16 | start, // 普通状态 0 17 | output, // 可以输出一个词法单元的状态 1 18 | in_oper, // 在操作符状态 2 19 | in_comm, // 在注释状态 3 20 | in_numb, // 在数值状态 4 21 | in_iden, // 在标识符状态 5 22 | ex_comm, // 即将退出注释 6 23 | unexpected_char, // 7 未知的字符 24 | undefined_operator, // 8 未知的运算符 25 | unexpected_state // 异常状态转换 26 | }; 27 | 28 | // 用来管理错误类型的 29 | extern std::unordered_map error_map; 30 | std::string get_error_str(state s); 31 | 32 | // 主要的词法分析器类 33 | class cmlexer{ 34 | int bufflen = 0; 35 | int lineno = 1; // 当前行号 36 | int linepos = 0; // 当前字符号 37 | 38 | std::string buffer; // 当前token的缓冲区 39 | std::string line_buff; // 当前行缓冲区 40 | size_t line_idx = 0; // 当前行索引 41 | bool is_eof = false; 42 | // 在output状态之后,需要通过这个状态量来确保能重新读入上一个仅仅用来判断“状态”但没读入缓冲区的变量 43 | bool next = 1; 44 | state _s; //当前状态 45 | 46 | std::string inputPath, outputPath; 47 | public: 48 | // 按顺序保存结果的指针数组 49 | std::vector results; 50 | std::ifstream ifs; 51 | std::ofstream ofs; 52 | bool if_std_output = true; // 是否在标准输出输出词法分析结果 53 | bool output_redirect = false; // 是否重定向输出,如果没有则采用标准输入输出 54 | 55 | //设置存取路径 56 | void setPath(std::string i, std::string o); 57 | int getNextChar(); 58 | void ungetNextChar(); 59 | // 60 | state read_next(char c, bool next); 61 | 62 | //获取下一个token元素 63 | token_base * get_next_token(); 64 | token_base * get_next_token(std::ifstream &local_ifs); 65 | 66 | // 从输入流解析整个文件的函数,无参数为默认按照ifs读取 67 | void lexing_file(std::ifstream &ifstream); 68 | void lexing_file(); 69 | // 获得当前最新的头部token 70 | token_base* get_result(){ 71 | _s = state::start; 72 | return results.back(); 73 | } 74 | std::vector* get_results(){ 75 | return &results; 76 | } 77 | inline std::string get_buffer(){ 78 | return buffer; 79 | } 80 | inline int get_lineno(){ 81 | return lineno; 82 | } 83 | inline int get_pos(){ 84 | return linepos; 85 | } 86 | // 恢复初始状态 87 | void reset_status(){ 88 | _s = state::start; 89 | buffer.clear(); 90 | is_eof = 0; 91 | } 92 | // 查看是否错误 93 | bool error_state(){ 94 | if(_s >= state::unexpected_char) 95 | return 1; 96 | else 97 | return 0; 98 | } 99 | }; -------------------------------------------------------------------------------- /parser/include/lexer/Lexer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "TokenType.h" 10 | // 所有可能的运算符起始符号 11 | extern std::set oper_start; 12 | bool isoperator(char c); 13 | 14 | // 词法分析器的状态机的状态 15 | enum class state{ 16 | start, // 普通状态 0 17 | output, // 可以输出一个词法单元的状态 1 18 | in_oper, // 在操作符状态 2 19 | in_comm, // 在注释状态 3 20 | in_numb, // 在数值状态 4 21 | in_iden, // 在标识符状态 5 22 | ex_comm, // 即将退出注释 6 23 | unexpected_char, // 7 未知的字符 24 | undefined_operator, // 8 未知的运算符 25 | unexpected_state // 异常状态转换 26 | }; 27 | 28 | // 用来管理错误类型的 29 | extern std::unordered_map error_map; 30 | std::string get_error_str(state s); 31 | 32 | // 主要的词法分析器类 33 | class cmlexer{ 34 | int bufflen = 0; 35 | int lineno = 1; // 当前行号 36 | int linepos = 0; // 当前字符号 37 | 38 | std::string buffer; // 当前token的缓冲区 39 | std::string line_buff; // 当前行缓冲区 40 | size_t line_idx = 0; // 当前行索引 41 | bool is_eof = false; 42 | // 在output状态之后,需要通过这个状态量来确保能重新读入上一个仅仅用来判断“状态”但没读入缓冲区的变量 43 | bool next = 1; 44 | state _s; //当前状态 45 | 46 | std::string inputPath, outputPath; 47 | public: 48 | // 按顺序保存结果的指针数组 49 | std::vector results; 50 | std::ifstream ifs; 51 | std::ofstream ofs; 52 | bool if_std_output = true; // 是否在标准输出输出词法分析结果 53 | bool output_redirect = false; // 是否重定向输出,如果没有则采用标准输入输出 54 | 55 | //设置存取路径 56 | void setPath(std::string i, std::string o); 57 | private: 58 | int getNextChar(); 59 | void ungetNextChar(); 60 | // 61 | state read_next(char c, bool next); 62 | 63 | //获取下一个token元素 64 | public: 65 | token_base * get_next_token(); 66 | token_base * get_next_token(std::ifstream &local_ifs); 67 | 68 | // 从输入流解析整个文件的函数,无参数为默认按照ifs读取 69 | void lexing_file(std::ifstream &ifstream); 70 | void lexing_file(); 71 | // 获得当前最新的头部token 72 | token_base* get_result(){ 73 | _s = state::start; 74 | return results.back(); 75 | } 76 | std::vector* get_results(){ 77 | return &results; 78 | } 79 | inline std::string get_buffer(){ 80 | return buffer; 81 | } 82 | inline int get_lineno(){ 83 | return lineno; 84 | } 85 | inline int get_pos(){ 86 | return linepos; 87 | } 88 | // 恢复初始状态 89 | void reset_status(){ 90 | _s = state::start; 91 | buffer.clear(); 92 | is_eof = 0; 93 | } 94 | // 查看是否错误 95 | bool error_state(){ 96 | if(_s >= state::unexpected_char) 97 | return 1; 98 | else 99 | return 0; 100 | } 101 | }; -------------------------------------------------------------------------------- /lexer/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "TokenType.h" 4 | #include "Lexer.h" 5 | 6 | // ==========全局参数定义============ 7 | std::string input = ""; 8 | // outpur路径为空,则输出到标准输出,如果不为空,则输出到文件 9 | // std::string output = "./test_cases/output.txt"; 10 | std::string output = ""; 11 | cmlexer lex; // 初始化语法解析器类 12 | // ==================================== 13 | 14 | // 初始化输入输出文件路径 15 | void initPath(){ 16 | lex.setPath(input,output); 17 | } 18 | // 存放测试用的函数 19 | namespace tests 20 | { 21 | // 利用简单字符串测试分析器的函数 22 | void testLexer(std::string str){ 23 | bool next = true; 24 | for(int i=0 ; i < str.size(); ){ 25 | // printf("%c\n",str[i]); 26 | auto s = lex.read_next(str[i], next); 27 | // std::cout <<"Status: " <<(int) s << std::endl; 28 | if(!next){ 29 | next = 1; 30 | } 31 | if(s == state::output){ 32 | auto res = lex.get_result(); 33 | std::cout <<'#'<< res->get_line() << "\t" <get_pos() << '\t' << res->to_string() << std::endl; 34 | next = false; 35 | continue; 36 | } 37 | i++; // next为false(刚进行output后则加加) 38 | } 39 | } 40 | // // 带行数输出原始代码——测试buffer 41 | // void showInputFile(){ 42 | // char c; 43 | // while((c = lex.getNextChar()) != EOF){ 44 | // std::cout << c; 45 | // } 46 | // } 47 | // 测试所有Token单元是否书写正确 48 | void showTokens(){ 49 | token_keyword i(keyword_type::_else, 1,4); 50 | token_operator j(string2operator("+"), 2,3); 51 | token_identifier k("value", 4, 2); 52 | token_number l("123",2,4); 53 | std::cout << "\n"; 54 | std::cout << i.to_string() << "\n"; 55 | std::cout << j.to_string() << std::endl; 56 | std::cout << k.to_string() << std::endl; 57 | std::cout << l.to_string() << std::endl; 58 | } 59 | // 测试运算符解析是否正确的部分 60 | void operatorLexer(){ 61 | std::string buf; 62 | bool next = 1; 63 | std::string a ; 64 | while(std::getline(lex.ifs, a)){ 65 | a += '\n'; 66 | // std::cout << a << std::endl; 67 | testLexer(a); 68 | } 69 | } 70 | } 71 | // 按行数显示所有的文本内容 72 | int main(int argc, char* argv[]){ 73 | lex.if_std_output = true; // 是否在标准输出展示结果 74 | // 根据输入数据的个数来判定操作 75 | switch (argc) 76 | { 77 | case 1: // 解析 78 | std::cout << "ERROR:缺少参数,请输入需要解析的文件路径!"<< std::endl; 79 | break; 80 | case 2: // 如果有一个参数,则从文件路径读入,并从标准输出输出结果 81 | input.assign(argv[1]); 82 | initPath(); 83 | lex.lexing_file(); 84 | break; 85 | case 3: 86 | input.assign(argv[1]); 87 | output.assign(argv[2]); 88 | initPath(); 89 | lex.lexing_file(); 90 | break; 91 | default: 92 | std::cout << "ERROR:参数过多,请确认是否只有'输入路径'、'输出路径'两个参数。"<< std::endl; 93 | break; 94 | } 95 | // keyword_type key; 96 | // key = string2keyword("if"); 97 | // std::cout << keyword2string(key) << std::endl; 98 | 99 | // tests::showInputFile(); 100 | // tests::showTokens(); 101 | // tests::operatorLexer(); 102 | // lex.lexing_file(); // 默认按照内部的ifs读取数据 103 | 104 | // 最后一总从信息中输出 105 | // for(auto i : lex.results){ 106 | // std::cout << '#' << i->get_line() << "\t" << i->get_pos() << "\t" << i->to_string() << std::endl; 107 | // } 108 | 109 | return 0; 110 | } -------------------------------------------------------------------------------- /documents/CMinus词法和语法规则.md: -------------------------------------------------------------------------------- 1 | ## C-Minus 的词法规则 2 | - 关键字: if else int return void while 3 | - 专用符号: + - * / < <= > >= == != = ; , ( ) [ ] { } /* */ 4 | - 其他标记为 ID 和 NUM ,通过下列正则表达式定义: 5 | ``` 6 | ID = letter letter* 7 | NUM = digit digit* 8 | Letter = a|..|z|A|..|Z 9 | Digit = 0|..|9 10 | ``` 11 | - 空格由空白、换行符、制表符组成。 12 | - 注释由 /\*...\*/ 围起来。 13 | 14 | ## C-Minus 的语法规则 15 | **C-Minus 的 BNF 语法如下:** 16 | 1. ❤program -> declaration_list 17 | 2. ❤declaration_list -> declaration_list declaration | declaration 18 | 3. ❤declaration -> var_declaration | fun_declaration 19 | 4. ❤var_declaration -> type_specifier ID | type_specifier ID [ NUM ] 20 | 5. ❤type_specifier -> INT | VOID 21 | 6. ❤fun_declaration -> type_specifier ID ( params ) compound_stmt 22 | 7. ❤params -> param_list | VOID 23 | 8. ❤param_list -> param_list , param | param 24 | 9. ❤param -> type_specifier ID | type_specifier ID [ ] 25 | 10. ❤compound_stmt -> { local_declarations statement_list } 26 | 11. ❤local_declarations -> local_declarations var_declaration | empty 27 | 12. ❤statement_list -> statement_list statement | empty 28 | 13. ❤statement -> expression_stmt | compound_stmt | selection_stmt | iteration_stmt | return_stmt 29 | 14. ❤expression_stmt -> expression ; | ; 30 | 15. ❤selection_stmt -> IF ( expression ) statement | IF ( expression ) statement ELSE statement 31 | 16. ❤iteration_stmt -> WHILE ( expression ) statement 32 | 17. ❤return_stmt -> RETURN; | RETURN expression; 33 | 18. ❤expression -> var = expression | simple_expression 34 | 19. ❤var -> ID | ID [ expression ] 35 | 20. ❤simple_expression -> additive_expression relop additive_expression | additive_expression 36 | 21. ❤relop -> LE | LT | GT | GE | EQ | NE 37 | 22. ❤additive_expression -> additive_expression addop term | term 38 | 23. ❤addop -> + | - 39 | 24. ❤term -> term mulop factor | factor 40 | 25. ❤mulop -> * | / 41 | 26. ❤factor -> ( expression ) | var | call | NUM 42 | 27. ❤call -> ID ( args ) 43 | 28. ❤args -> arg_list | empty 44 | 29. ❤arglist -> arg_list , expression | expression 45 | 46 | ## EBNF语法如下: 47 | ``` 48 | program → declaration-list 49 | 50 | declaration-list → declaration-list {declaration} 51 | 52 | declaration → var-declaration|fun-declaration 53 | 54 | var-declaration → type-specifier ID | type-specifier ID[NUM]; 55 | 56 | type-specifier → int|void 57 | 58 | fun-declaration → type-specifier ID(params)|compound-stmt 59 | 60 | params → params-list|void 61 | 62 | param-list → param{,param} 63 | 64 | param → type-specifier ID{[]} 65 | 66 | compound-stmt → {local-declarations statement-list} 67 | 68 | local-declarations → empty{var-declaration} 69 | 70 | statement-list → {statement} 71 | 72 | statement → expression-stmt|compound-stmt|selection-stmt| 73 | 74 | iteration-stmt|return-stmt 75 | 76 | expression-stmt → [expression]; 77 | 78 | selection-stmt → if(expression) statement [else statement] 79 | 80 | iteration-stmt → while(expression) statement 81 | 82 | return-stmt → return[expression]; 83 | 84 | expression → var=expression|simple-expression 85 | 86 | var → ID|ID[expression] 87 | 88 | simple-expression → additive-expression {relop additive-expression} 89 | 90 | relop → <=|<|>|>=|==|!= 91 | 92 | additive-expression → term{addop term} 93 | 94 | addop → +|- 95 | 96 | term → factor{mulop factor} 97 | 98 | mulop → *|/ 99 | 100 | factor → (expression)|var|call|NUM 101 | 102 | call → ID(args) //done 103 | 104 | args → arg-list|empty //done 105 | 106 | arg-list → expression{, expression} //done 107 | ``` -------------------------------------------------------------------------------- /lexer/src/static/TokenType.cpp: -------------------------------------------------------------------------------- 1 | #include "TokenType.h" 2 | #include 3 | #include 4 | const int MAX_TOKEN_SIZE = 256; 5 | char TokenString[MAX_TOKEN_SIZE + 1]; 6 | int TokenIndex = 0; 7 | // 定义转换表类型模板 8 | template using map_t = std::unordered_map<_kT, _vT>; 9 | // 定义散列表,便于快速转换 10 | map_t keyword_map = { 11 | {"if",keyword_type::_if}, 12 | {"else", keyword_type::_else}, 13 | {"int", keyword_type::_int}, 14 | {"return", keyword_type::_return}, 15 | {"void", keyword_type::_void}, 16 | {"while", keyword_type::_while} 17 | }; 18 | 19 | map_t operator_map = { 20 | {"/*", operator_type::_comment}, 21 | {"+", operator_type::_add}, 22 | {"-", operator_type::_sub}, 23 | {"*", operator_type::_mul}, 24 | {"/", operator_type::_div}, 25 | {"<", operator_type::_les}, 26 | {"<=", operator_type::_leq}, 27 | {">", operator_type::_gre}, 28 | {">=", operator_type::_geq}, 29 | {"==", operator_type::_equ}, 30 | {"!=", operator_type::_neq}, 31 | {"=", operator_type::_asi}, 32 | {";", operator_type::_sem}, 33 | {",", operator_type::_com}, 34 | {"(", operator_type::_slb}, 35 | {")", operator_type::_srb}, 36 | {"[", operator_type::_mlb}, 37 | {"]", operator_type::_mrb}, 38 | {"{", operator_type::_llb}, 39 | {"}", operator_type::_lrb} 40 | }; 41 | // keyword子类型的转换函数 42 | keyword_type string2keyword(std::string str){ 43 | if(keyword_map.count(str)){ 44 | return keyword_map.at(str); 45 | } 46 | else{ 47 | return keyword_type::_null; 48 | } 49 | } 50 | std::string keyword2string(keyword_type t){ 51 | switch (t) 52 | { 53 | case keyword_type::_if: 54 | return "if"; 55 | case keyword_type::_else: 56 | return "else"; 57 | case keyword_type::_int: 58 | return "int"; 59 | case keyword_type::_return: 60 | return "return"; 61 | case keyword_type::_void: 62 | return "void"; 63 | case keyword_type::_while: 64 | return "while"; 65 | default: 66 | return ""; 67 | break; 68 | } 69 | } 70 | 71 | // operator子类型的转换函数 72 | operator_type string2operator(std::string str){ 73 | if(operator_map.count(str)){ 74 | return operator_map.at(str); 75 | } 76 | else{ 77 | return operator_type::_null; 78 | } 79 | } 80 | std::string operator2string(operator_type o){ 81 | switch (o) 82 | { 83 | case operator_type::_add: 84 | return "+"; 85 | case operator_type::_sub: 86 | return "-"; 87 | case operator_type::_mul: 88 | return "*"; 89 | case operator_type::_div: 90 | return "/"; 91 | case operator_type::_les: 92 | return "<"; 93 | case operator_type::_leq: 94 | return "<="; 95 | case operator_type::_gre: 96 | return ">"; 97 | case operator_type::_geq: 98 | return ">="; 99 | case operator_type::_equ: 100 | return "=="; 101 | case operator_type::_neq: 102 | return "!="; 103 | case operator_type::_asi: 104 | return "="; 105 | case operator_type::_sem: 106 | return ";"; 107 | case operator_type::_com: 108 | return ","; 109 | case operator_type::_slb: 110 | return "("; 111 | case operator_type::_srb: 112 | return ")"; 113 | case operator_type::_mlb: 114 | return "["; 115 | case operator_type::_mrb: 116 | return "]"; 117 | case operator_type::_llb: 118 | return "{"; 119 | case operator_type::_lrb: 120 | return "}"; 121 | default: 122 | return ""; 123 | break; 124 | } 125 | } 126 | 127 | -------------------------------------------------------------------------------- /parser/src/static/TokenType.cpp: -------------------------------------------------------------------------------- 1 | #include "TokenType.h" 2 | #include 3 | #include 4 | const int MAX_TOKEN_SIZE = 256; 5 | char TokenString[MAX_TOKEN_SIZE + 1]; 6 | int TokenIndex = 0; 7 | // 定义转换表类型模板 8 | template using map_t = std::unordered_map<_kT, _vT>; 9 | // 定义散列表,便于快速转换 10 | map_t keyword_map = { 11 | {"if",keyword_type::_if}, 12 | {"else", keyword_type::_else}, 13 | {"int", keyword_type::_int}, 14 | {"return", keyword_type::_return}, 15 | {"void", keyword_type::_void}, 16 | {"while", keyword_type::_while} 17 | }; 18 | 19 | map_t operator_map = { 20 | {"/*", operator_type::_comment}, 21 | {"+", operator_type::_add}, 22 | {"-", operator_type::_sub}, 23 | {"*", operator_type::_mul}, 24 | {"/", operator_type::_div}, 25 | {"<", operator_type::_les}, 26 | {"<=", operator_type::_leq}, 27 | {">", operator_type::_gre}, 28 | {">=", operator_type::_geq}, 29 | {"==", operator_type::_equ}, 30 | {"!=", operator_type::_neq}, 31 | {"=", operator_type::_asi}, 32 | {";", operator_type::_sem}, 33 | {",", operator_type::_com}, 34 | {"(", operator_type::_slb}, 35 | {")", operator_type::_srb}, 36 | {"[", operator_type::_mlb}, 37 | {"]", operator_type::_mrb}, 38 | {"{", operator_type::_llb}, 39 | {"}", operator_type::_lrb} 40 | }; 41 | // keyword子类型的转换函数 42 | keyword_type string2keyword(std::string str){ 43 | if(keyword_map.count(str)){ 44 | return keyword_map.at(str); 45 | } 46 | else{ 47 | return keyword_type::_null; 48 | } 49 | } 50 | std::string keyword2string(keyword_type t){ 51 | switch (t) 52 | { 53 | case keyword_type::_if: 54 | return "if"; 55 | case keyword_type::_else: 56 | return "else"; 57 | case keyword_type::_int: 58 | return "int"; 59 | case keyword_type::_return: 60 | return "return"; 61 | case keyword_type::_void: 62 | return "void"; 63 | case keyword_type::_while: 64 | return "while"; 65 | default: 66 | return ""; 67 | break; 68 | } 69 | } 70 | 71 | // operator子类型的转换函数 72 | operator_type string2operator(std::string str){ 73 | if(operator_map.count(str)){ 74 | return operator_map.at(str); 75 | } 76 | else{ 77 | return operator_type::_null; 78 | } 79 | } 80 | std::string operator2string(operator_type o){ 81 | switch (o) 82 | { 83 | case operator_type::_add: 84 | return "+"; 85 | case operator_type::_sub: 86 | return "-"; 87 | case operator_type::_mul: 88 | return "*"; 89 | case operator_type::_div: 90 | return "/"; 91 | case operator_type::_les: 92 | return "<"; 93 | case operator_type::_leq: 94 | return "<="; 95 | case operator_type::_gre: 96 | return ">"; 97 | case operator_type::_geq: 98 | return ">="; 99 | case operator_type::_equ: 100 | return "=="; 101 | case operator_type::_neq: 102 | return "!="; 103 | case operator_type::_asi: 104 | return "="; 105 | case operator_type::_sem: 106 | return ";"; 107 | case operator_type::_com: 108 | return ","; 109 | case operator_type::_slb: 110 | return "("; 111 | case operator_type::_srb: 112 | return ")"; 113 | case operator_type::_mlb: 114 | return "["; 115 | case operator_type::_mrb: 116 | return "]"; 117 | case operator_type::_llb: 118 | return "{"; 119 | case operator_type::_lrb: 120 | return "}"; 121 | default: 122 | return ""; 123 | break; 124 | } 125 | } 126 | 127 | -------------------------------------------------------------------------------- /lexer/include/TokenType.h: -------------------------------------------------------------------------------- 1 | # pragma once 2 | # include 3 | extern const int MAX_TOKEN_SIZE; 4 | extern char TokenString[]; 5 | extern int TokenIndex; 6 | // 定义CMinus所需的4中不同的token类型 7 | enum class token_type{ 8 | _null, 9 | _keyword, // 关键字类型 10 | _operator, // 运算符 11 | _ID, // identifier 12 | _NUM, // 数值类型 13 | }; 14 | 15 | // 定义子类型——关键字 16 | enum class keyword_type{ 17 | // 字如其名的转义 18 | _null, _if, _else, _int, _return, _void, _while, 19 | }; 20 | // 子类型——关键字 对应的转换函数 21 | keyword_type string2keyword(std::string str); 22 | std::string keyword2string(keyword_type key); 23 | 24 | // 定义子类型——运算符类型 25 | enum class operator_type{ 26 | _null, 27 | _comment, // /* 28 | _add, // + 29 | _sub, // - 30 | _mul, // * 31 | _div, // / 32 | _les, // < 33 | _leq, // <= less equal 34 | _gre, // > 35 | _geq, // >= greater equal 36 | _equ, // == equal 37 | _neq, // != not equal 38 | _asi, // = 39 | _sem, // ; semicolon 40 | _com, // , 41 | _slb, // ( small left bracket 42 | _srb, // ) smal right bracket 43 | _mlb, // [ medium left bracket 44 | _mrb, // ] medius rigth bracket 45 | _llb, // { large left bracket 46 | _lrb, // } large right bracket 47 | }; 48 | // 定义相应的转换函数 49 | operator_type string2operator(std::string str); 50 | std::string operator2string(operator_type); 51 | 52 | // 定义token基本类,并根据行为派生4中不同的token对象 53 | class token_base{ 54 | std::size_t _line = 0, _pos = 0; 55 | public: 56 | token_base() = default; 57 | token_base(std::size_t l, std::size_t p): _line(l), _pos(p){} 58 | virtual ~token_base() = default; 59 | // 返回token类型 60 | virtual token_type get_token_type() const{ 61 | return token_type::_null; 62 | } 63 | virtual std::string to_string() const = 0; 64 | inline std::size_t get_line() const{ 65 | return _line; 66 | } 67 | inline std::size_t get_pos() const{ 68 | return _pos; 69 | } 70 | }; 71 | // keyword类 72 | class token_keyword final : public token_base { 73 | keyword_type _type; 74 | public: 75 | ~token_keyword() = default; 76 | token_keyword(keyword_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 77 | 78 | std::string to_string() const override{ 79 | return "Keyword: \t" + keyword2string(_type); 80 | } 81 | token_type get_token_type() const override{ 82 | return token_type::_keyword; 83 | } 84 | }; 85 | // 运算符token类 86 | class token_operator final : public token_base { 87 | operator_type _type; 88 | public: 89 | ~token_operator() = default; 90 | token_operator(operator_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 91 | 92 | std::string to_string() const override{ 93 | return "Operator: \t" + operator2string(_type); 94 | } 95 | token_type get_token_type() const override{ 96 | return token_type::_operator; 97 | } 98 | }; 99 | // 数字token类 100 | class token_identifier final : public token_base { 101 | std::string _ID; 102 | public: 103 | ~token_identifier() = default; 104 | token_identifier(std::string id, std::size_t l, std::size_t p): token_base(l,p), _ID(id){} 105 | 106 | std::string to_string() const override{ 107 | return "Identifier: \t" + _ID; 108 | } 109 | token_type get_token_type() const override{ 110 | return token_type::_ID; 111 | } 112 | }; 113 | 114 | class token_number final : public token_base { 115 | std::string _number; 116 | public: 117 | ~token_number() = default; 118 | token_number(std::string number, std::size_t l, std::size_t p): token_base(l,p), _number(number){} 119 | 120 | std::string to_string() const override{ 121 | return "Number: \t" + _number; 122 | } 123 | token_type get_token_type() const override{ 124 | return token_type::_NUM; 125 | } 126 | }; 127 | -------------------------------------------------------------------------------- /parser/include/parser/parser.h: -------------------------------------------------------------------------------- 1 | # pragma once 2 | #include "lexer/Lexer.h" 3 | #include "TokenType.h" 4 | #include "parser/abstract_tree.h" 5 | class TreeNode; 6 | class cmparser{ 7 | cmlexer *lexer; // 获取lexer指针,以操作lexer进行语法分析 8 | token_base * current_token; 9 | size_t idx_token; 10 | bool error_state = 0; 11 | // 抽象语法树的根节点 12 | TreeNode * AST; 13 | public: 14 | // 获取下一个token 指针 15 | token_base * get_next_token(); 16 | // 获取当前token 指针 17 | token_base * get_current_token(); 18 | // 获取当前token 类型 19 | token_type get_current_token_type(); 20 | // 在确认token类型后,以keyword 获取 21 | token_keyword* get_current_keyword(); 22 | // 在确认token类型后, 以 number获取 23 | token_number* get_current_number(); 24 | // 在确认token类型后, 以运算符获取 25 | token_operator* get_current_operator(); 26 | // 在确认Token类型后, 以标识符获取 27 | token_identifier* get_current_id(); 28 | //发生语法错误的报错,传入的字符串作为报错信息 29 | void SyntaxError(std::string str); 30 | // Match下一个关键字,如果不匹配则抛异常 31 | bool match_keyword(keyword_type t); 32 | // Match 下一个运算符,如果和参数列表中的不匹配则抛异常 33 | bool match_operator(operator_type t); 34 | public: 35 | cmparser() = default; 36 | // 需要传入lexer作为初始化起点 37 | cmparser(cmlexer * lex): lexer(lex) {} 38 | ~cmparser() = default; 39 | void reset_status(){ 40 | idx_token = 0; 41 | } 42 | // 运行parser 43 | TreeNode * parse(); 44 | private: 45 | // program -> declaration_list 46 | TreeNode * program(); 47 | 48 | // declaration_list -> declaration {declaration} 49 | TreeNode * declaration_list(); 50 | 51 | // declaration -> var_declaration | fun_declaration 52 | // FIRST(var_declaration) ∩ FIRST(fun_declaration) = type_specifier ID 53 | TreeNode * declaration(); 54 | 55 | // fun_declaration -> type_specifier ID ( params ) compound_stmt 56 | TreeNode * fun_declaration(TreeNode * type_specifier, TreeNode * ID); 57 | 58 | // params -> paramlist | VOID 59 | TreeNode * params(); 60 | 61 | // param_list -> param {, param} 62 | TreeNode * param_list(TreeNode * pass_node); 63 | 64 | // param -> type_specifier ID [ \[\] ] //用转义符表达真实匹配的信息 65 | TreeNode * param(TreeNode * pass_node); 66 | 67 | TreeNode * compound_stmt(); 68 | TreeNode * local_declarations(); 69 | 70 | // var_declaration -> type_specifier ID; | type_specifier ID [ NUM ] ; 71 | // 有声明列表则返回对应节点,否则返回nullptr 72 | // 因为存在“左公共因子” ,所以通过传参数来区别上层传下来的内容 73 | TreeNode * var_declaration(TreeNode * type_specifier, TreeNode * ID); 74 | 75 | // statement_list -> {statement} 76 | TreeNode * statement_list(); 77 | 78 | // statement -> expression_stmt | compound_stmt | selection_stmt | iteration_stmt | return_stmt 79 | TreeNode * statement(); 80 | 81 | //expression_stmt -> expression ; | ; 82 | TreeNode* expression_stmt(); 83 | //selection_stmt -> IF ( expression ) statement | IF ( expression ) statement ELSE statement 84 | TreeNode * selection_stmt(); 85 | 86 | // iteration_stmt -> WHILE ( expression ) statement 87 | TreeNode * iteration_stmt(); 88 | 89 | // return_stmt -> RETURN; | RETURN expression; 90 | TreeNode * return_stmt(); 91 | 92 | // expression -> var = expression | simple_expression 93 | TreeNode * expression(); 94 | //simple_expression -> additive_expression [ relop additive_expression ] 95 | // relop -> < > <= >= == != 96 | TreeNode * simple_expression(TreeNode * pass_node); 97 | 98 | // additive_expression -> term {addop term} 99 | // addop -> + | - 100 | TreeNode * additive_expression(TreeNode * pass_node); 101 | // term -> factor {mulop factor } 102 | // mulop -> * | / 103 | TreeNode * term(TreeNode * pass_node); 104 | // factor -> 105 | // 1.(expression) 106 | // 2.ID (args) // call 107 | // 3.ID [expression] // var 108 | // 4.ID // var 109 | // 5.NUM // NUM 110 | TreeNode * factor(TreeNode * pass_node); 111 | 112 | // var -> ID | ID [ expression ] 113 | TreeNode * var(); 114 | 115 | //call -> ID ( args ) 116 | TreeNode *call(TreeNode *k); 117 | 118 | // args -> empty | expression {, expression} 119 | // 此处已经在call中解决了empty的问题 120 | TreeNode * arg_list(); 121 | }; 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CMinus 词法分析器和语法分析器 2 | > 本项目基于C++开发 3 | - 本仓库为四川大学编译原理课程设计仓库 4 | - 目标为完成CMinus词法分析器和语法分析器,本质上是C语言的一个子集,功能较为简单,便于本科生快速实现理解原理。其[具体词法语法规则参见此链接](documents/CMinus词法和语法规则.md) 5 | - 仓库内容按模块分为`lexer`和`parser`的子文件夹,分别实现了**词法分析器**和**语法分析器**对应的功能。 6 | 7 | ## 功能展示 8 | ### 词法分析器 Lexer 9 | > 有些教材会称作scanner而非本文用的lexer 10 | > 11 | 在`./lexer`文件夹下运行`make.bat`后,会默认从`./lexer/test_cases`下的`input.c`文件读入信息 12 | 13 | 结果会在内部转化为一个装有所有`token`类型的`vector`,同时会在`./lexer/test_cases/output.txt`中展示每一个词法单元的类型,值,行号和列号。 14 | 15 | - 一个经典的例子: 16 | - 输入: 17 | ```c {class=line-numbers} 18 | /* A program to perform Euclid's 19 | Algorithm to compute gcd. */ 20 | 21 | int gcd (int u, int v) 22 | { 23 | if (v == 0) 24 | return u ; 25 | else 26 | return gcd(v,u-u/v*v); 27 | /* u-u/v*v == 28 | u mod v */ 29 | } 30 | 31 | void main(void) 32 | { 33 | int x; int y; 34 | x = input(); 35 | y = input(); 36 | output(gcd(x,y)); 37 | } 38 | ``` 39 | 40 | - 输出 41 | ```c{class=line-numbers} 42 | #4 3 Keyword: int 43 | #4 7 Identifier: gcd 44 | #4 9 Operator: ( 45 | #4 12 Keyword: int 46 | #4 14 Identifier: u 47 | #4 15 Operator: , 48 | #4 19 Keyword: int 49 | #4 21 Identifier: v 50 | #4 22 Operator: ) 51 | #5 1 Operator: { 52 | #6 6 Keyword: if 53 | #6 8 Operator: ( 54 | #6 9 Identifier: v 55 | #6 12 Operator: == 56 | #6 14 Number: 0 57 | #6 15 Operator: ) 58 | #7 14 Keyword: return 59 | #7 16 Identifier: u 60 | #7 18 Operator: ; 61 | #8 8 Keyword: else 62 | #9 14 Keyword: return 63 | #9 18 Identifier: gcd 64 | #9 19 Operator: ( 65 | #9 20 Identifier: v 66 | #9 21 Operator: , 67 | #9 22 Identifier: u 68 | #9 23 Operator: - 69 | #9 24 Identifier: u 70 | #9 25 Operator: / 71 | #9 26 Identifier: v 72 | #9 27 Operator: * 73 | #9 28 Identifier: v 74 | #9 29 Operator: ) 75 | #9 30 Operator: ; 76 | #12 1 Operator: } 77 | #14 4 Keyword: void 78 | #14 9 Identifier: main 79 | #14 10 Operator: ( 80 | #14 14 Keyword: void 81 | #14 15 Operator: ) 82 | #15 1 Operator: { 83 | #16 7 Keyword: int 84 | #16 9 Identifier: x 85 | #16 10 Operator: ; 86 | #16 14 Keyword: int 87 | #16 16 Identifier: y 88 | #16 17 Operator: ; 89 | #17 5 Identifier: x 90 | #17 7 Operator: = 91 | #17 13 Identifier: input 92 | #17 14 Operator: ( 93 | #17 15 Operator: ) 94 | #17 16 Operator: ; 95 | #18 5 Identifier: y 96 | #18 7 Operator: = 97 | #18 13 Identifier: input 98 | #18 14 Operator: ( 99 | #18 15 Operator: ) 100 | #18 16 Operator: ; 101 | #19 10 Identifier: output 102 | #19 11 Operator: ( 103 | #19 14 Identifier: gcd 104 | #19 15 Operator: ( 105 | #19 16 Identifier: x 106 | #19 17 Operator: , 107 | #19 18 Identifier: y 108 | #19 19 Operator: ) 109 | #19 20 Operator: ) 110 | #19 21 Operator: ; 111 | #20 1 Operator: } 112 | ``` 113 | 114 | 每一个`Token`和源代码中的内容一一对应。 115 | - 此lexer具有一些基本的报错能力: 116 | ![](images/lexer报错显示.png) 117 | 118 | ### 语法分析器 Parser 119 | 对于语法分析器,则可以在`parser`文件夹下运行对应的`make.bat`来将`./parser/test_cases`下的`input.c`文件的内容转化为语法树,并以缩进和颜色的方式输出对应的语法树,同样对于上面的例子,有如下结果: 120 | 121 | ![](images/parser结果预览.png) 122 | 123 | 本质上`parser`的成员中含有一个`lexer`的指针,使用了`lexer`的`get_next_token()`成员函数作为输入逐一分析。 124 | 125 | ![](images/parser中的lexer指针.png) 126 | ## CMAKE编译指令 127 | 本项目采用`CMake`构建,是C/C++工程开发目前最常见的集成工具之一,如果你不熟悉CMAKE,可以参考如下两个仓库学习如何使用CMAKE编译C/C++工程文件: 128 | 129 | [![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=SFUMECJF&repo=cmake-examples-Chinese)](https://github.com/SFUMECJF/cmake-examples-Chinese) 130 | 131 | [![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=ttroy50&repo=cmake-examples)](https://github.com/ttroy50/cmake-examples) 132 | 133 | > 跟着上面的教程走一遍,大概1个多小时能掌握基本的使用方法。而事实上,如果你不想这么麻烦,大致把头文件`*.h`和`*.cpp`文件适当组装到一个文件中也是能正常运行的,不过不便于debug和寻找模块对应代码。 134 | 135 | -------- 136 | 掌握CMAKE之后(或者只打算跑一下demo看看结果),请将路径切换到对应文件夹模块下来编译源代码: 137 | ### 直接编译方式: 138 | 在目录下提供了`make.bat`文件,在`Windows`环境下,安装过CMAKE之后,可以通过在命令行中批处理来自动化完成编译运行过程。 139 | ```powershell 140 | ./make.bat 141 | ``` 142 | ### 手动编译方式 143 | - 在build路径中执行如下指令以获得`MinGW Makefiles`(我这里使用minGW编译,如果你有其他的编译工具,请自行选择): 144 | ``` 145 | cmake .. -G 'MinGW Makefiles' 146 | ``` 147 | - 随后在这个文件夹下运行·MinGW·即可开始编译: 148 | ``` 149 | mingw32-make.exe 150 | ``` 151 | - 在**命令行中运行**根目录下得到的`*.exe`文件即可。 152 | ```powershell 153 | ./CMinusLexer_binary.exe 154 | ``` 155 | 156 | -------- 157 | 158 | 祝大家学有所成, 欢迎反馈bug等信息~ :) -------------------------------------------------------------------------------- /parser/include/TokenType.h: -------------------------------------------------------------------------------- 1 | # pragma once 2 | # include 3 | extern const int MAX_TOKEN_SIZE; 4 | extern char TokenString[]; 5 | extern int TokenIndex; 6 | /* 定义CMinus所需的4中不同的token类型 7 | _keyword 8 | _operator 9 | _ID 10 | _NUM 11 | */ 12 | enum class token_type{ 13 | _null, 14 | _keyword, // 关键字类型 15 | _operator, // 运算符 16 | _ID, // identifier 17 | _NUM, // 数值类型 18 | }; 19 | 20 | /* 定义子类型——关键字 21 | _null, _if, _else, _int, _return, _void, _while 22 | */ 23 | enum class keyword_type{ 24 | // 字如其名的转义 25 | _null, _if, _else, _int, _return, _void, _while, 26 | }; 27 | // 子类型——关键字 对应的转换函数 28 | keyword_type string2keyword(std::string str); 29 | std::string keyword2string(keyword_type key); 30 | 31 | // 定义子类型——运算符类型 32 | // _null, 33 | // _comment, // /* 34 | // _add, // + 35 | // _sub, // - 36 | // _mul, // * 37 | // _div, // / 38 | // _les, // < 39 | // _leq, // <= less equal 40 | // _gre, // > 41 | // _geq, // >= greater equal 42 | // _equ, // == equal 43 | // _neq, // != not equal 44 | // _asi, // = 45 | // _sem, // ; semicolon 46 | // _com, // , 47 | // _slb, // ( small left bracket 48 | // _srb, // ) smal right bracket 49 | // _mlb, // [ medium left bracket 50 | // _mrb, // ] medius rigth bracket 51 | // _llb, // { large left bracket 52 | // _lrb, // } large right bracket 53 | enum class operator_type{ 54 | _null, 55 | _comment, // /* 56 | _add, // + 57 | _sub, // - 58 | _mul, // * 59 | _div, // / 60 | _les, // < 61 | _leq, // <= less equal 62 | _gre, // > 63 | _geq, // >= greater equal 64 | _equ, // == equal 65 | _neq, // != not equal 66 | _asi, // = 67 | _sem, // ; semicolon 68 | _com, // , 69 | _slb, // ( small left bracket 70 | _srb, // ) smal right bracket 71 | _mlb, // [ medium left bracket 72 | _mrb, // ] medius rigth bracket 73 | _llb, // { large left bracket 74 | _lrb, // } large right bracket 75 | }; 76 | // 定义相应的转换函数 77 | operator_type string2operator(std::string str); 78 | std::string operator2string(operator_type); 79 | 80 | // 定义token基本类,并根据行为派生4中不同的token对象 81 | class token_base{ 82 | std::size_t _line = 0, _pos = 0; 83 | public: 84 | token_base() = default; 85 | token_base(std::size_t l, std::size_t p): _line(l), _pos(p){} 86 | virtual ~token_base() = default; 87 | // 返回token类型 88 | virtual token_type get_token_type() const{ 89 | return token_type::_null; 90 | } 91 | virtual std::string to_string() const = 0; 92 | inline std::size_t get_line() const{ 93 | return _line; 94 | } 95 | inline std::size_t get_pos() const{ 96 | return _pos; 97 | } 98 | }; 99 | // keyword类 100 | class token_keyword final : public token_base { 101 | keyword_type _type; 102 | public: 103 | ~token_keyword() = default; 104 | token_keyword(keyword_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 105 | 106 | std::string to_string() const override{ 107 | return "Keyword: \t" + keyword2string(_type); 108 | } 109 | // 返回keyword类型 110 | keyword_type get_keyword_type() const{ 111 | return _type; 112 | } 113 | // 返回token类型(keyword) 114 | token_type get_token_type() const override{ 115 | return token_type::_keyword; 116 | } 117 | }; 118 | // 运算符token类 119 | class token_operator final : public token_base { 120 | operator_type _type; 121 | public: 122 | ~token_operator() = default; 123 | token_operator(operator_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 124 | operator_type get_operator_type(){ 125 | return _type; 126 | } 127 | std::string to_string() const override{ 128 | return "Operator: \t" + operator2string(_type); 129 | } 130 | token_type get_token_type() const override{ 131 | return token_type::_operator; 132 | } 133 | }; 134 | // 数字token类 135 | class token_identifier final : public token_base { 136 | std::string _ID; 137 | public: 138 | ~token_identifier() = default; 139 | token_identifier(std::string id, std::size_t l, std::size_t p): token_base(l,p), _ID(id){} 140 | std::string get_ID() const{ 141 | return _ID; 142 | } 143 | std::string to_string() const override{ 144 | return "Identifier: \t" + _ID; 145 | } 146 | token_type get_token_type() const override{ 147 | return token_type::_ID; 148 | } 149 | }; 150 | 151 | class token_number final : public token_base { 152 | std::string _number; 153 | public: 154 | ~token_number() = default; 155 | token_number(std::string number, std::size_t l, std::size_t p): token_base(l,p), _number(number){} 156 | std::string get_number() const { 157 | return _number; 158 | } 159 | std::string to_string() const override{ 160 | return "Number: \t" + _number; 161 | } 162 | token_type get_token_type() const override{ 163 | return token_type::_NUM; 164 | } 165 | }; 166 | -------------------------------------------------------------------------------- /lexer/src/static/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include "Lexer.h" 2 | 3 | std::set oper_start = { 4 | '+', '-', '*', '/', '<', '>', '=', '!', '=',';' , ',', '(', ')', '[', ']', '{', '}' 5 | }; 6 | bool isoperator(char c){ 7 | return oper_start.count(c); 8 | } 9 | std::unordered_map error_map = { 10 | {state::unexpected_char, "未知的字符!"}, 11 | {state::undefined_operator, "无法识别的运算符!"}, 12 | {state::unexpected_state, "状态机异常,请联系作者debug!"} 13 | }; 14 | std::string get_error_str(state s){ 15 | if(error_map.count(s)){ 16 | return error_map.at(s); 17 | }else{ 18 | return ""; 19 | } 20 | } 21 | 22 | // 设置输入输出的路径 23 | void cmlexer::setPath(std::string i, std::string o){ 24 | ifs.open(i); 25 | if(o != ""){ 26 | output_redirect = true; 27 | ofs.open(o); 28 | std::cout.rdbuf(ofs.rdbuf()); 29 | } 30 | } 31 | 32 | state cmlexer::read_next(char c, bool next){ 33 | if(next){ //后移一位 34 | linepos++; // 加位置 35 | } 36 | switch (_s){ // todo : innumb 37 | case state::start :{ 38 | if(c == '\0') { 39 | return _s; 40 | } 41 | else if(c == '\n'){ 42 | linepos = 0; 43 | lineno++; // 加行数 44 | return _s; 45 | } 46 | else if(isspace(c)){ 47 | return _s; 48 | } 49 | else if(isdigit(c)){ 50 | buffer += c; 51 | return _s = state::in_numb; 52 | } 53 | else if(isoperator(c)){ 54 | buffer += c; 55 | return _s = state::in_oper; 56 | } 57 | else if(isalpha(c)){ 58 | buffer += c; 59 | return _s = state::in_iden; 60 | }else if (c == EOF){ 61 | return _s = state::output; 62 | }else{ 63 | return _s = state::unexpected_char; 64 | } 65 | } 66 | case state::in_oper :{ 67 | if(!isoperator(c)){ // 新读进来的不是运算符 68 | auto oper = string2operator(buffer); 69 | buffer.clear(); 70 | if(oper == operator_type::_null){ // 不存在 71 | return _s = state::undefined_operator; 72 | } 73 | else if(oper == operator_type::_comment){ // 如果是注释则进入注释状态 74 | return _s = state::in_comm; 75 | }else{ // 其他状态则正常输出 76 | results.emplace_back(new token_operator(oper, lineno, linepos - 1)); 77 | return _s = state::output; 78 | } 79 | }else{ // 新读进来的也是运算符 【x=-1】【y<=1】 80 | auto oper = string2operator(buffer); 81 | if(oper != operator_type::_null && string2operator(buffer + c) == operator_type::_null){ 82 | buffer.clear(); 83 | results.emplace_back(new token_operator(oper, lineno, linepos - 1)); 84 | return _s = state::output; 85 | }else if (oper != operator_type::_null && string2operator( buffer + c) != operator_type::_null){ 86 | if(operator_type::_comment == (string2operator(buffer + c))){ 87 | buffer.clear(); 88 | return _s = state::in_comm; 89 | } 90 | } 91 | buffer += c; 92 | return _s; // 因为下一个还是运算符,保持运算符不变 93 | } 94 | 95 | } 96 | case state::in_comm:{ 97 | if(c == '\n'){ 98 | linepos = 0; 99 | lineno++; 100 | return _s; 101 | }else if(c == '*'){ 102 | return _s = state::ex_comm; 103 | }else{ 104 | return _s = state::in_comm; 105 | } 106 | } 107 | case state::ex_comm:{ 108 | if(c == '/'){ 109 | return _s = state::start; 110 | }else if (c == '\n'){ 111 | linepos = 0; 112 | lineno++; 113 | } 114 | return _s = state::in_comm; 115 | } 116 | case state::in_iden:{ 117 | if(isalpha(c)){ 118 | buffer += c; 119 | return _s; 120 | }else{ // 这里需要先检测是否是关键字 121 | auto key_type = string2keyword(buffer); 122 | if(key_type == keyword_type::_null){ 123 | // 说明不是关键字,按照标识符处理 124 | results.emplace_back(new token_identifier(buffer, lineno, linepos - 1)); 125 | } 126 | else{ 127 | // 按照关键字处理 128 | results.emplace_back(new token_keyword(key_type, lineno, linepos - 1)); 129 | } 130 | buffer.clear(); 131 | return _s = state::output; 132 | 133 | } 134 | } 135 | case state::in_numb:{ 136 | if(isdigit(c)){ 137 | buffer += c; 138 | return _s; 139 | }else{ 140 | results.emplace_back(new token_number(buffer, lineno, linepos - 1)); 141 | buffer.clear(); 142 | return _s = state::output; 143 | } 144 | } 145 | default: 146 | return _s = state::unexpected_state; 147 | } 148 | } 149 | token_base * cmlexer::get_next_token(){ 150 | return get_next_token(ifs); 151 | } 152 | token_base * cmlexer::get_next_token(std::ifstream &local_ifs){ 153 | do{ 154 | if(is_eof){ 155 | return nullptr; 156 | }else{ 157 | if(line_idx >= line_buff.size()){ 158 | if(!std::getline(local_ifs, line_buff)){ 159 | is_eof = true; 160 | } 161 | line_idx = 0; 162 | line_buff += "\n"; 163 | // std::cout << line_buff; 164 | } 165 | read_next(line_buff[line_idx], next); 166 | if(!next){ 167 | next = 1; 168 | } 169 | if(error_state()){ 170 | std::cout << "ERROR :" << get_error_str(_s) << std::endl; 171 | for (char &ch : line_buff) if (ch == '\t') ch = ' '; 172 | std::cout << ' ' << line_buff << std::flush; 173 | std::cout << ' ' << std::string(get_pos() - 1, ' ') << "^" << std::endl; 174 | std::cout << " " << "In the line " << get_lineno() << ", position " << get_pos() << std::endl << std::endl; reset_status(); 175 | return nullptr; 176 | } 177 | if(_s == state::output){ 178 | next = 0; 179 | return this->get_result(); 180 | } 181 | line_idx++; 182 | } 183 | }while(1); 184 | } 185 | 186 | void cmlexer::lexing_file(){ 187 | lexing_file(ifs); 188 | } 189 | 190 | void cmlexer::lexing_file(std::ifstream &local_ifs){ 191 | token_base* res; 192 | while((res = get_next_token()) != nullptr){ 193 | if(if_std_output) 194 | std::cout <<'#'<< res->get_line() << "\t" <get_pos() << '\t' << res->to_string() << std::endl; 195 | } 196 | } -------------------------------------------------------------------------------- /parser/src/static/lexer/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include "lexer/Lexer.h" 2 | 3 | std::set oper_start = { 4 | '+', '-', '*', '/', '<', '>', '=', '!', '=',';' , ',', '(', ')', '[', ']', '{', '}' 5 | }; 6 | bool isoperator(char c){ 7 | return oper_start.count(c); 8 | } 9 | std::unordered_map error_map = { 10 | {state::unexpected_char, "未知的字符!"}, 11 | {state::undefined_operator, "无法识别的运算符!"}, 12 | {state::unexpected_state, "状态机异常,请联系作者debug!"} 13 | }; 14 | std::string get_error_str(state s){ 15 | if(error_map.count(s)){ 16 | return error_map.at(s); 17 | }else{ 18 | return ""; 19 | } 20 | } 21 | 22 | // 设置输入输出的路径 23 | void cmlexer::setPath(std::string i, std::string o){ 24 | ifs.open(i); 25 | if(o != ""){ 26 | output_redirect = true; 27 | ofs.open(o); 28 | std::cout.rdbuf(ofs.rdbuf()); 29 | } 30 | } 31 | 32 | state cmlexer::read_next(char c, bool next){ 33 | if(next){ //后移一位 34 | linepos++; // 加位置 35 | } 36 | switch (_s){ // todo : innumb 37 | case state::start :{ 38 | if(c == '\0') { 39 | return _s; 40 | } 41 | else if(c == '\n'){ 42 | linepos = 0; 43 | lineno++; // 加行数 44 | return _s; 45 | } 46 | else if(isspace(c)){ 47 | return _s; 48 | } 49 | else if(isdigit(c)){ 50 | buffer += c; 51 | return _s = state::in_numb; 52 | } 53 | else if(isoperator(c)){ 54 | buffer += c; 55 | return _s = state::in_oper; 56 | } 57 | else if(isalpha(c)){ 58 | buffer += c; 59 | return _s = state::in_iden; 60 | }else if (c == EOF){ 61 | return _s = state::output; 62 | }else{ 63 | return _s = state::unexpected_char; 64 | } 65 | } 66 | case state::in_oper :{ 67 | if(!isoperator(c)){ // 新读进来的不是运算符 68 | auto oper = string2operator(buffer); 69 | buffer.clear(); 70 | if(oper == operator_type::_null){ // 不存在 71 | return _s = state::undefined_operator; 72 | } 73 | else if(oper == operator_type::_comment){ // 如果是注释则进入注释状态 74 | return _s = state::in_comm; 75 | }else{ // 其他状态则正常输出 76 | results.emplace_back(new token_operator(oper, lineno, linepos - 1)); 77 | return _s = state::output; 78 | } 79 | }else{ // 新读进来的也是运算符 【x=-1】【y<=1】 80 | auto oper = string2operator(buffer); 81 | if(oper != operator_type::_null && string2operator(buffer + c) == operator_type::_null){ 82 | buffer.clear(); 83 | results.emplace_back(new token_operator(oper, lineno, linepos - 1)); 84 | return _s = state::output; 85 | }else if (oper != operator_type::_null && string2operator( buffer + c) != operator_type::_null){ 86 | if(operator_type::_comment == (string2operator(buffer + c))){ 87 | buffer.clear(); 88 | return _s = state::in_comm; 89 | } 90 | } 91 | buffer += c; 92 | return _s; // 因为下一个还是运算符,保持运算符不变 93 | } 94 | 95 | } 96 | case state::in_comm:{ 97 | if(c == '\n'){ 98 | linepos = 0; 99 | lineno++; 100 | return _s; 101 | }else if(c == '*'){ 102 | return _s = state::ex_comm; 103 | }else{ 104 | return _s = state::in_comm; 105 | } 106 | } 107 | case state::ex_comm:{ 108 | if(c == '/'){ 109 | return _s = state::start; 110 | }else if (c == '\n'){ 111 | linepos = 0; 112 | lineno++; 113 | } 114 | return _s = state::in_comm; 115 | } 116 | case state::in_iden:{ 117 | if(isalpha(c)){ 118 | buffer += c; 119 | return _s; 120 | }else{ // 这里需要先检测是否是关键字 121 | auto key_type = string2keyword(buffer); 122 | if(key_type == keyword_type::_null){ 123 | // 说明不是关键字,按照标识符处理 124 | results.emplace_back(new token_identifier(buffer, lineno, linepos - 1)); 125 | } 126 | else{ 127 | // 按照关键字处理 128 | results.emplace_back(new token_keyword(key_type, lineno, linepos - 1)); 129 | } 130 | buffer.clear(); 131 | return _s = state::output; 132 | 133 | } 134 | } 135 | case state::in_numb:{ 136 | if(isdigit(c)){ 137 | buffer += c; 138 | return _s; 139 | }else{ 140 | results.emplace_back(new token_number(buffer, lineno, linepos - 1)); 141 | buffer.clear(); 142 | return _s = state::output; 143 | } 144 | } 145 | default: 146 | return _s = state::unexpected_state; 147 | } 148 | } 149 | token_base * cmlexer::get_next_token(){ 150 | return get_next_token(ifs); 151 | } 152 | token_base * cmlexer::get_next_token(std::ifstream &local_ifs){ 153 | do{ 154 | if(is_eof){ 155 | return nullptr; 156 | }else{ 157 | if(line_idx >= line_buff.size()){ 158 | if(!std::getline(local_ifs, line_buff)){ 159 | is_eof = true; 160 | } 161 | line_idx = 0; 162 | line_buff += "\n"; 163 | // std::cout << line_buff; 164 | } 165 | read_next(line_buff[line_idx], next); 166 | if(!next){ 167 | next = 1; 168 | } 169 | if(error_state()){ 170 | std::cout << "ERROR :" << get_error_str(_s) << std::endl; 171 | for (char &ch : line_buff) if (ch == '\t') ch = ' '; 172 | std::cout << ' ' << line_buff << std::flush; 173 | std::cout << ' ' << std::string(get_pos() - 1, ' ') << "^" << std::endl; 174 | std::cout << " " << "In the line " << get_lineno() << ", position " << get_pos() << std::endl << std::endl; reset_status(); 175 | return nullptr; 176 | } 177 | if(_s == state::output){ 178 | next = 0; 179 | return this->get_result(); 180 | } 181 | line_idx++; 182 | } 183 | }while(1); 184 | } 185 | 186 | void cmlexer::lexing_file(){ 187 | lexing_file(ifs); 188 | } 189 | 190 | void cmlexer::lexing_file(std::ifstream &local_ifs){ 191 | token_base* res; 192 | while((res = get_next_token()) != nullptr){ 193 | if(if_std_output) 194 | std::cout <<'#'<< res->get_line() << "\t" <get_pos() << '\t' << res->to_string() << std::endl; 195 | } 196 | } -------------------------------------------------------------------------------- /parser/include/parser/abstract_tree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TokenType.h" 3 | #include 4 | #include 5 | class cmparser; 6 | // 单个节点的最大子节点个数 7 | const int MAXCHILDREN = 4; 8 | // 区分节点类型的枚举类型 9 | enum class Node_type{ 10 | _expK, // 表达式类型 11 | _stmtK, // 语句类型 12 | }; 13 | // 语句类型的多种节点 14 | enum class StmtKind{ 15 | // 注释中为“语法规则有,但是没有实现”的非终结符: 16 | _program, // 程序入口 17 | _declaration_list, // 声明列表 18 | _declaration, // 一次声明 19 | _var_declaration,// 变量声明 20 | // type_specifier - 只有int和void 21 | _fun_declaration, // 函数声明 22 | // params - 只有param_list和void,直接用param代替 23 | // param_list - 用多个param 的 sibiling代替 24 | _param, // 由param组成params 25 | _param_array, // 数组形参 - 是原来的表达式中没有的 26 | _compound_stmt, // {local_declarations statement_list} 27 | // local_declarations - 用多个var_declaration或者nullptr代替 28 | // statement_list - 用多个不同种类的statement直接代替 29 | _expression_stmt, // 表达式语句 30 | _selection_stmt,// 分支语句 31 | _iteration_stmt,// 循环语句 32 | _return_stmt, // 返回语句 33 | }; 34 | // 表达式类型节点 35 | enum class ExpKind{ 36 | _void, // Void节点 37 | _int, // int节点 38 | _assign, // 赋值语句 39 | _term, // 乘法项 40 | _factor, // 因子 41 | _var, // 变量 42 | _array_var, // 数组变量 43 | _call, // 函数调用 44 | _args, // 函数形参列表 45 | _num, //只含有数值的节点 46 | _id, // 只含有ID的节点 47 | _opK, // 二元运算符节点 48 | _empty, 49 | }; 50 | //抽象语法树的节点类 51 | class TreeNode{ 52 | public: 53 | TreeNode() = default; 54 | ~TreeNode() = default; 55 | // 确定是什么类型,表达式 or stmt? 56 | Node_type _type; 57 | TreeNode * child[MAXCHILDREN]; 58 | TreeNode * sibling; 59 | // 具体到节点内的类型、什么表达式或什么stmt 60 | union { 61 | ExpKind exp; 62 | StmtKind stmt; 63 | } kind; 64 | // 如果是Exp 65 | union { 66 | operator_type op; 67 | keyword_type keyword; 68 | int num; 69 | char * id; 70 | } attr; 71 | // 用于保存当前节点的行号 72 | int lineno = 0; 73 | // 生成一个表达式节点并返回 74 | static TreeNode * newExpNode(ExpKind kind, int line){ 75 | TreeNode * p = new TreeNode(); 76 | p->_type = Node_type::_expK; 77 | for(auto &i : p->child) i = nullptr; 78 | p->sibling = nullptr; 79 | p->kind.exp = kind; 80 | p->lineno = line; 81 | return p; 82 | } 83 | // 生成一个语句节点并返回 84 | static TreeNode * newStmtNode(StmtKind kind, int line){ 85 | TreeNode * p = new TreeNode(); 86 | p->_type = Node_type::_stmtK; 87 | for(auto &i : p->child) i = nullptr; 88 | p->sibling = nullptr; 89 | p->kind.stmt = kind; 90 | p->lineno = line; 91 | return p; 92 | } 93 | // 设置运算符表达式中的运算符类型 94 | void set_operator(operator_type oper){ 95 | if(_type != Node_type::_expK){ 96 | throw "Error, 尝试对于非表达式节点赋值!"; 97 | } 98 | kind.exp = ExpKind::_opK; 99 | attr.op = oper; 100 | } 101 | 102 | //keyword不需要单独存储,从表达式类型即可反向构建 103 | 104 | //设置当前树节点 数值 并设置节点类型 105 | void set_number(std::string num_string){ 106 | if(_type != Node_type::_expK){ 107 | throw "Error, 尝试对于非表达式节点赋值!"; 108 | } 109 | kind.exp = ExpKind::_num; 110 | attr.num = atoi(num_string.c_str()); 111 | } 112 | 113 | // 设置当前节点的 字符值,并设置节点类型 114 | void set_id(std::string id_string){ 115 | if(_type != Node_type::_expK){ 116 | throw "Error, 尝试对于非表达式节点赋值!"; 117 | } 118 | kind.exp = ExpKind::_id; 119 | attr.id = (char*) malloc(strlen(id_string.c_str()) + 1); 120 | strcpy(attr.id, id_string.c_str()); 121 | } 122 | void to_string(){ 123 | switch (_type) 124 | { 125 | case Node_type::_stmtK: 126 | std:: cout << "\033[1;34;40m<"; 127 | break; 128 | case Node_type::_expK: 129 | std:: cout << "\033[1;35;40m<"; 130 | break; 131 | } 132 | 133 | switch (_type) 134 | { 135 | case Node_type::_stmtK: 136 | switch (kind.stmt) 137 | { 138 | case StmtKind::_program: 139 | std::cout << "Program" ; 140 | break; 141 | case StmtKind::_declaration_list: 142 | std::cout << "Declaration_list" ; 143 | break; 144 | case StmtKind::_declaration: 145 | std::cout << "Declaration" ; 146 | break; 147 | case StmtKind::_var_declaration: 148 | std::cout << "Var_declaration" ; 149 | break; 150 | case StmtKind::_fun_declaration: 151 | std::cout << "Fun_declaration"; 152 | break; 153 | case StmtKind::_param: 154 | std::cout << "Param" ; 155 | break; 156 | case StmtKind::_param_array: 157 | std::cout << "Param_array"; 158 | break; 159 | case StmtKind::_compound_stmt: 160 | std::cout << "Compound_stmt"; 161 | break; 162 | case StmtKind::_expression_stmt: 163 | std::cout << "Expression_stmt" ; 164 | case StmtKind::_selection_stmt: 165 | std::cout << "Selection_stmt" ; 166 | break; 167 | case StmtKind::_iteration_stmt: 168 | std::cout << "Iteration_stmt" ; 169 | break; 170 | case StmtKind::_return_stmt: 171 | std::cout << "Return_stmt"; 172 | break; 173 | default: 174 | break; 175 | } 176 | break; 177 | 178 | case Node_type::_expK: 179 | switch (kind.exp) 180 | { 181 | case ExpKind::_void: 182 | std::cout << "Type_identifier : VOID"; 183 | break; 184 | case ExpKind::_int: 185 | std::cout << "Type_identifier : INT"; 186 | break; 187 | case ExpKind::_assign: 188 | std::cout << "Assign: = "; 189 | break; 190 | case ExpKind::_var: 191 | std::cout << "Var"; 192 | break; 193 | 194 | case ExpKind::_array_var: 195 | std::cout << "Array_var : " << attr.id << "[]"; 196 | break; 197 | case ExpKind::_call: 198 | std::cout << "Call"; 199 | break; 200 | case ExpKind::_args: 201 | if(attr.op == operator_type::_com){ 202 | // 后续的形参列表需要标注 “逗号” 203 | std::cout <<"Args: "<< operator2string(attr.op); 204 | } 205 | else{ 206 | // 第一个形参则直接输出即可 207 | std::cout<< "Args"; 208 | } 209 | 210 | break; 211 | case ExpKind::_num: 212 | std::cout << "NUM : " << attr.num; 213 | break; 214 | case ExpKind::_id: 215 | std::cout <<"Identifier : " <\033[0m (" <dfs(layer + 1); 247 | } 248 | } 249 | if(sibling != nullptr){ 250 | std::cout << "--sibling start--" << std::endl; 251 | sibling->dfs(layer); 252 | std::cout << "--sibling end--" << std::endl; 253 | } 254 | } 255 | void dfs(int layer){ 256 | std::cout << ""; 257 | for(int i = 0 ; i < layer ; i++){ 258 | std::cout << " "; 259 | } 260 | to_string(); 261 | for(int i = 0 ; i < MAXCHILDREN ; i++){ 262 | if(child[i] != nullptr){ 263 | child[i]->dfs(layer + 1); 264 | } 265 | } 266 | if(sibling != nullptr){ 267 | sibling->dfs(layer); 268 | } 269 | } 270 | 271 | void show(){ 272 | // debug_dfs(1); 273 | dfs(0); 274 | } 275 | 276 | }; 277 | -------------------------------------------------------------------------------- /parser/src/static/parser/Parser.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/Parser.h" 2 | // #include "abstract_tree.h" 3 | 4 | token_base * cmparser::get_next_token(){ 5 | current_token = lexer->get_next_token(); 6 | if(current_token != nullptr){ 7 | std::cout << current_token->to_string() << std::endl; 8 | } 9 | return current_token; 10 | } 11 | 12 | token_base * cmparser::get_current_token(){ 13 | return current_token; 14 | } 15 | token_type cmparser::get_current_token_type(){ 16 | return current_token->get_token_type(); 17 | } 18 | token_keyword* cmparser::get_current_keyword(){ 19 | if(current_token==nullptr) 20 | return nullptr; 21 | if(current_token->get_token_type() == token_type::_keyword){ 22 | return (token_keyword *) current_token; 23 | }else{ 24 | // SyntaxError("token类型不是预期的关键字类型!"); 25 | return nullptr; 26 | } 27 | } 28 | token_number* cmparser::get_current_number(){ 29 | if(current_token==nullptr) 30 | return nullptr; 31 | if(current_token->get_token_type() == token_type::_NUM){ 32 | return (token_number *) current_token; 33 | }else{ 34 | // SyntaxError("Token类型不是预期的Number类型!"); 35 | return nullptr; 36 | } 37 | } 38 | token_operator* cmparser::get_current_operator(){ 39 | if(current_token==nullptr) 40 | return nullptr; 41 | if(current_token->get_token_type() == token_type::_operator){ 42 | return (token_operator *) current_token; 43 | }else{ 44 | // SyntaxError("Token类型不是预期的运算符类型!"); 45 | return nullptr; 46 | } 47 | } 48 | token_identifier* cmparser::get_current_id(){ 49 | if(current_token==nullptr) 50 | return nullptr; 51 | if(current_token->get_token_type() == token_type::_ID){ 52 | return (token_identifier *) current_token; 53 | }else{ 54 | // SyntaxError("Token类型不是预期的标识符类型!"); 55 | return nullptr; 56 | } 57 | } 58 | 59 | void cmparser::SyntaxError(std::string error_string){ 60 | std:: cout << ">>> "; 61 | std::cout << "SyntaxError:\033[1;31;40m" << error_string << "\033[0m at line: \033[1;32;40m" << current_token->get_line() << "\033[0m pos: \033[1;32;40m" << current_token->get_pos() << "\033[0m" << std::endl; 62 | error_state = 1; // 进入错误状态 63 | throw -1; 64 | } 65 | 66 | bool cmparser::match_keyword(keyword_type t){ 67 | // 匹配成功,读入下一个字符 68 | if(get_current_token()->get_token_type() == token_type::_keyword && ((token_keyword*) current_token)->get_keyword_type() == t){ 69 | get_next_token(); 70 | return 1; 71 | } 72 | else{ 73 | SyntaxError(" Unexpected token type --> " + get_current_token()->to_string() + "\n Should be ->" + keyword2string(t) + " \n"); 74 | error_state = 1; 75 | return 0; 76 | } 77 | } 78 | bool cmparser::match_operator(operator_type t){ 79 | // 匹配成功,读入下一个字符 80 | if(get_current_token()->get_token_type() == token_type::_operator && ((token_operator*) current_token)->get_operator_type() == t){ 81 | get_next_token(); 82 | return 1; 83 | } 84 | else{ 85 | SyntaxError(" Unexpected token type --> " + get_current_token()->to_string() + "\n Should be -> '" + operator2string(t) + "'\n "); 86 | error_state = 1; 87 | return 0; 88 | } 89 | } 90 | 91 | TreeNode * cmparser::parse(){ 92 | get_next_token(); 93 | auto res = program(); 94 | if(current_token != nullptr){ 95 | SyntaxError("Unexpected Exit!"); 96 | return nullptr; 97 | }else{ 98 | return res; 99 | } 100 | } 101 | 102 | TreeNode * cmparser::program(){ 103 | std::cout << "program" << std::endl; 104 | auto t = TreeNode::newStmtNode(StmtKind::_program, current_token->get_line()); 105 | t->child[0] = declaration_list(); 106 | std::cout << "\033[1;32;40m----FINISH PARSING----\033[0m" << std::endl; 107 | return t; 108 | } 109 | 110 | TreeNode * cmparser::declaration_list(){ 111 | auto f = TreeNode::newStmtNode(StmtKind::_declaration_list,current_token->get_line()); 112 | f->child[0] = declaration(); 113 | auto p = f->child[0]; 114 | // 如果符合进一步的FIRST集合 115 | if(current_token != nullptr && get_current_token_type() == token_type::_ID || (get_current_token_type() == token_type::_keyword && (get_current_keyword()->get_keyword_type() == keyword_type::_int || get_current_keyword()->get_keyword_type() == keyword_type::_void) ) ){ 116 | p->sibling = declaration(); 117 | p = p->sibling; 118 | } 119 | return f; 120 | } 121 | 122 | TreeNode * cmparser::declaration(){ 123 | std::cout << "declaration" << std::endl; 124 | auto t = TreeNode::newStmtNode(StmtKind::_declaration,current_token->get_line()); 125 | TreeNode * local_type_specifier; 126 | TreeNode * local_ID; 127 | if(current_token != nullptr && get_current_token_type() == token_type::_keyword){ 128 | switch (get_current_keyword()->get_keyword_type()) 129 | { 130 | case keyword_type::_void : 131 | local_type_specifier = TreeNode::newExpNode(ExpKind::_void,current_token->get_line()); 132 | match_keyword(keyword_type::_void); 133 | break; 134 | case keyword_type::_int: 135 | local_type_specifier = TreeNode::newExpNode(ExpKind::_int,current_token->get_line()); 136 | match_keyword(keyword_type::_int); 137 | break; 138 | default: 139 | SyntaxError("不合法的声明!"); 140 | get_next_token(); 141 | break; 142 | } 143 | if(current_token != nullptr && get_current_token_type() == token_type::_ID){ 144 | local_ID = TreeNode::newExpNode(ExpKind::_id,current_token->get_line()); 145 | local_ID->set_id(get_current_id()->get_ID()); 146 | get_next_token(); 147 | }else{ 148 | SyntaxError("不合法的声明!"); 149 | get_next_token(); 150 | } 151 | } 152 | if(current_token != nullptr && get_current_token_type() == token_type::_operator){ 153 | switch(get_current_operator()->get_operator_type()){ 154 | case operator_type::_mlb: 155 | t->child[0] = var_declaration(local_type_specifier, local_ID); 156 | break; 157 | case operator_type::_slb: 158 | t->child[0] = fun_declaration(local_type_specifier, local_ID); 159 | break; 160 | default: 161 | t->child[0] = var_declaration(local_type_specifier, local_ID); 162 | break; 163 | } 164 | } 165 | else{ 166 | t->child[0] = var_declaration(local_type_specifier, local_ID); 167 | } 168 | return t; 169 | } 170 | TreeNode * cmparser::fun_declaration(TreeNode * type_specifier, TreeNode * ID){ 171 | std::cout << "fun_declaration" << std::endl; 172 | auto t = TreeNode::newStmtNode(StmtKind::_fun_declaration,current_token->get_line()); 173 | if(type_specifier == nullptr){ 174 | if(current_token != nullptr && get_current_token_type() == token_type::_keyword){ 175 | TreeNode * k; 176 | switch (get_current_keyword()->get_keyword_type()) 177 | { 178 | case keyword_type::_void : 179 | k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line()); 180 | t->child[0] = k; 181 | match_keyword(keyword_type::_void); 182 | break; 183 | case keyword_type::_int: 184 | k = TreeNode::newExpNode(ExpKind::_int,current_token->get_line()); 185 | t->child[0] = k; 186 | match_keyword(keyword_type::_int); 187 | break; 188 | default: 189 | SyntaxError("不合法的函数声明!"); 190 | get_next_token(); 191 | break; 192 | } 193 | if(current_token != nullptr && get_current_token_type() != token_type::_ID){ 194 | auto k = TreeNode::newExpNode(ExpKind::_id,current_token->get_line()); 195 | k->set_id(get_current_id()->get_ID()); 196 | t->child[1] = k; 197 | get_next_token(); 198 | }else{ 199 | SyntaxError("不合法的函数声明!"); 200 | delete(t); 201 | get_next_token(); 202 | } 203 | } 204 | }else{ 205 | t->child[0] = type_specifier; 206 | t->child[1] = ID; 207 | } 208 | match_operator(operator_type::_slb); 209 | auto j = params(); 210 | t->child[2] = j; 211 | match_operator(operator_type::_srb); 212 | j = compound_stmt(); 213 | t->child[3] = j; 214 | return t; 215 | } 216 | 217 | TreeNode * cmparser::params(){ 218 | std::cout << "params" << std::endl; 219 | if(current_token != nullptr && get_current_token_type() == token_type::_keyword && get_current_keyword()->get_keyword_type() == keyword_type::_void){ 220 | auto k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line()); 221 | match_keyword(keyword_type::_void); 222 | if(current_token != nullptr && get_current_token_type() != token_type::_ID){ 223 | k->kind.stmt = StmtKind::_param; 224 | return k; 225 | }else{ 226 | return param_list(k); 227 | } 228 | }else{ 229 | return param_list(nullptr); 230 | } 231 | } 232 | 233 | TreeNode * cmparser::param_list(TreeNode * pass_node){ 234 | std::cout << "param_list" << std::endl; 235 | auto t = param(pass_node); 236 | auto p = t; 237 | while (current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_com) 238 | { 239 | match_operator(operator_type::_com); 240 | auto k = param(nullptr); 241 | p->sibling = k; 242 | p = k; 243 | } 244 | return t; 245 | } 246 | 247 | TreeNode * cmparser::param(TreeNode * pass_node){ 248 | std::cout << "param" << std::endl; 249 | auto t = TreeNode::newStmtNode(StmtKind::_param,current_token->get_line()); 250 | // 匹配开头的 type_specifier 251 | if(current_token != nullptr &&get_current_token_type() == token_type::_keyword){ 252 | // 如果上面没有传下来消除左公共因子的参数 253 | if(pass_node == nullptr){ 254 | if(get_current_keyword()->get_keyword_type() == keyword_type::_void){ 255 | auto k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line()); 256 | t->child[0] = k; 257 | match_keyword(keyword_type::_void); 258 | } 259 | else if(get_current_keyword()->get_keyword_type() == keyword_type::_int){ 260 | auto k = TreeNode::newExpNode(ExpKind::_int,current_token->get_line()); 261 | t->child[0] = k; 262 | match_keyword(keyword_type::_int); 263 | } 264 | }else{// 如果上面为了消除左公共因子的参数传下来了了 265 | t->child[0] = pass_node; 266 | } 267 | // 进一步匹配ID 268 | if(current_token != nullptr && get_current_token_type() == token_type::_ID){ 269 | auto k = TreeNode::newExpNode(ExpKind::_id,current_token->get_line()); 270 | k->set_id(get_current_id()->get_ID()); 271 | t->child[1] = k; 272 | get_next_token(); 273 | } 274 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){ 275 | t->kind.stmt = StmtKind::_param_array; // 转换为数组保存 276 | match_operator(operator_type::_mlb); 277 | match_operator(operator_type::_mrb); 278 | } 279 | return t; 280 | }else{ 281 | SyntaxError("不符合形参列表的规范!"); 282 | return nullptr; 283 | } 284 | } 285 | 286 | TreeNode * cmparser::compound_stmt(){ 287 | std::cout << "compound_stmt" << std::endl; 288 | auto t = TreeNode::newStmtNode(StmtKind::_compound_stmt,current_token->get_line()); 289 | match_operator(operator_type::_llb); 290 | t->child[0] = local_declarations(); 291 | t->child[1] = statement_list(); 292 | match_operator(operator_type::_lrb); 293 | return t; 294 | } 295 | 296 | TreeNode * cmparser::local_declarations(){ 297 | std::cout << "local_declarations" << std::endl; 298 | TreeNode * t = var_declaration(nullptr, nullptr); 299 | TreeNode * p = nullptr; 300 | while (current_token != nullptr && get_current_token_type() == token_type::_keyword && (get_current_keyword()->get_keyword_type() == keyword_type::_int || get_current_keyword() ->get_keyword_type() == keyword_type::_void)) 301 | { 302 | p = var_declaration(nullptr, nullptr); 303 | t->sibling = p; 304 | if(p != nullptr){ 305 | p = p->sibling; 306 | } 307 | } 308 | return t; 309 | } 310 | 311 | TreeNode * cmparser::var_declaration(TreeNode * type_specifier, TreeNode * ID){ 312 | std::cout << "var_declaration" << std::endl; 313 | TreeNode * t = nullptr; 314 | // 如果没有从上面传下来提前读的TypeID和specifier 315 | if(type_specifier == nullptr){ 316 | if(current_token != nullptr && get_current_token_type() == token_type::_keyword){ 317 | t = TreeNode::newStmtNode(StmtKind::_var_declaration,current_token->get_line()); 318 | // p匹配 void 或 int 319 | if(get_current_keyword()->get_keyword_type() == keyword_type::_void ){ 320 | auto n = TreeNode::newExpNode(ExpKind::_void,current_token->get_line()); 321 | t->child[0] = n; 322 | match_keyword(keyword_type::_void); 323 | }else if(get_current_keyword()->get_keyword_type() == keyword_type::_int){ 324 | auto n = TreeNode::newExpNode(ExpKind::_int,current_token->get_line()); 325 | t->child[0] = n; 326 | match_keyword(keyword_type::_int); 327 | }else{ 328 | delete(t); 329 | return nullptr; 330 | } 331 | // 匹配ID 332 | if(current_token != nullptr && get_current_token_type() == token_type::_ID){ 333 | auto n = TreeNode::newExpNode(ExpKind::_id,current_token->get_line()); 334 | n->set_id(get_current_id()->get_ID()); 335 | t->child[1] = n; 336 | get_next_token(); 337 | }else{ 338 | delete(t); 339 | return nullptr; 340 | } 341 | }else{ 342 | return nullptr; 343 | } 344 | }else {// 如果从上面传下来了已经读好的 type_specifier 和ID 345 | t = TreeNode::newStmtNode(StmtKind::_var_declaration,current_token->get_line()); 346 | t->child[0] = type_specifier; 347 | t->child[1] = ID; 348 | } 349 | // 如果存在方括号 350 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){ 351 | match_operator(operator_type::_mlb); 352 | TreeNode * q3 = TreeNode::newExpNode(ExpKind::_num,current_token->get_line()); 353 | q3->set_number(get_current_number()->get_number()); 354 | t->child[2] = q3; 355 | get_next_token(); 356 | match_operator(operator_type::_mrb); 357 | } 358 | match_operator(operator_type::_sem); 359 | return t; 360 | } 361 | 362 | TreeNode * cmparser::statement_list(){ 363 | std::cout << "statement_list" << std::endl; 364 | auto t = statement(); 365 | auto p = t; 366 | if(t == nullptr){ 367 | return nullptr; 368 | }else{ 369 | while(current_token != nullptr && 370 | // keyword 371 | ( 372 | get_current_token_type() == token_type::_keyword && ( 373 | get_current_keyword()->get_keyword_type() == keyword_type::_if || 374 | get_current_keyword()->get_keyword_type() == keyword_type::_while || 375 | get_current_keyword()->get_keyword_type() == keyword_type::_return ) 376 | )||(// operator 377 | get_current_token_type() == token_type::_operator &&( 378 | get_current_operator()->get_operator_type() == operator_type::_llb || 379 | get_current_operator()->get_operator_type() == operator_type::_sem || 380 | get_current_operator()->get_operator_type() == operator_type::_slb) 381 | )||( 382 | get_current_token_type() == token_type::_ID 383 | )||( 384 | get_current_token_type() == token_type::_NUM 385 | ) 386 | ){ 387 | auto q = statement(); 388 | if(q != nullptr){ 389 | p->sibling = q; 390 | p = q; 391 | } 392 | } 393 | } 394 | return t; 395 | } 396 | 397 | TreeNode * cmparser::statement(){ 398 | std::cout << "statement" << std::endl; 399 | TreeNode * t = nullptr; 400 | if(current_token != nullptr){ 401 | switch (get_current_token_type()) 402 | { 403 | case token_type::_keyword : 404 | switch (get_current_keyword()->get_keyword_type()) 405 | { 406 | case keyword_type::_if: 407 | t = selection_stmt(); 408 | break; 409 | 410 | case keyword_type::_while: 411 | t = iteration_stmt(); 412 | break; 413 | 414 | case keyword_type::_return: 415 | t = return_stmt(); 416 | break; 417 | default: 418 | SyntaxError("语句中出现了非法的关键字!"); 419 | get_next_token(); 420 | break; 421 | } 422 | break; 423 | 424 | case token_type::_ID: case token_type::_NUM: 425 | t = expression_stmt(); 426 | break; 427 | 428 | case token_type::_operator: 429 | switch (get_current_operator()->get_operator_type()) 430 | { 431 | // 匹配 '{' 432 | case operator_type::_llb : 433 | t = compound_stmt(); 434 | break; 435 | case operator_type::_sem: case operator_type::_slb: 436 | t = expression_stmt(); 437 | break; 438 | 439 | default: 440 | SyntaxError("语句中出现了非法的运算符!"); 441 | get_next_token(); 442 | break; 443 | } 444 | break; 445 | default: 446 | SyntaxError("语句中出现了非法的token类型!"); 447 | get_next_token(); 448 | break; 449 | } 450 | } 451 | return t; 452 | } 453 | 454 | TreeNode* cmparser::expression_stmt(){ 455 | std::cout << "expression_stmt" << std::endl; 456 | TreeNode *t = nullptr; 457 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_sem){ 458 | match_operator(operator_type::_sem); 459 | //TODO 有可能返回空指针会炸 460 | }else{ 461 | t = expression(); 462 | } 463 | return t; 464 | } 465 | 466 | TreeNode * cmparser::selection_stmt(){ 467 | std::cout << "selection_stmt" << std::endl; 468 | auto t = TreeNode::newStmtNode(StmtKind::_selection_stmt,current_token->get_line()); 469 | // match if 470 | match_keyword(keyword_type::_if); 471 | // match ( 472 | match_operator(operator_type::_slb); 473 | t->child[0] = expression(); 474 | // match ) 475 | match_operator(operator_type::_srb); 476 | t->child[1] = statement(); 477 | if(current_token != nullptr && get_current_token_type() == token_type::_keyword && get_current_keyword()->get_keyword_type() == keyword_type::_else){ 478 | match_keyword(keyword_type::_else); 479 | t->child[2] = statement(); 480 | } 481 | return t; 482 | } 483 | 484 | TreeNode * cmparser::iteration_stmt(){ 485 | std::cout << "iteration_stmt" << std::endl; 486 | auto t = TreeNode::newStmtNode(StmtKind::_iteration_stmt,current_token->get_line()); 487 | // match while 488 | match_keyword(keyword_type::_while); 489 | // match ( 490 | match_operator(operator_type::_slb); 491 | t->child[0] = expression(); 492 | // match ) 493 | match_operator(operator_type::_srb); 494 | t->child[1] = statement(); 495 | return t; 496 | } 497 | 498 | TreeNode * cmparser::return_stmt(){ 499 | std::cout << "return_stmt" << std::endl; 500 | auto t = TreeNode::newStmtNode(StmtKind::_return_stmt,current_token->get_line()); 501 | match_keyword(keyword_type::_return); 502 | // 匹配 ';' 503 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_sem){ 504 | match_operator(operator_type::_sem); 505 | }else{ 506 | t->child[0] = expression(); 507 | match_operator(operator_type::_sem); 508 | } 509 | return t; 510 | } 511 | 512 | TreeNode * cmparser::expression(){ 513 | std::cout << "expression" << std::endl; 514 | TreeNode *t = var(); 515 | // 不是以ID开头,只能是simple_expression 516 | if(t == nullptr){ 517 | t = simple_expression(nullptr); 518 | }else{ 519 | // 赋值语句 var = expression 520 | if(current_token != nullptr && get_current_token_type()== token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_asi) { 521 | auto p = TreeNode::newExpNode(ExpKind::_assign,current_token->get_line()); 522 | p->attr.op = operator_type::_asi; 523 | match_operator(operator_type::_asi); 524 | p->child[0] = t; 525 | p->child[1] = expression(); 526 | return p; 527 | }else{ 528 | //传入分为三种情况,需要在simple中进行区分: 529 | // 1. t = id 下一步是其他的 530 | // 2. t = id[expression] 531 | // 3. t = id 下一步是( 532 | t = simple_expression(t); 533 | } 534 | } 535 | return t; 536 | } 537 | 538 | TreeNode * cmparser::simple_expression(TreeNode * pass_node){ 539 | std::cout << "simple_expression" << std::endl; 540 | auto n = additive_expression(pass_node); 541 | // 如果是operator 542 | if(current_token != nullptr && get_current_token_type() == token_type::_operator){ 543 | auto current_op_type = get_current_operator()->get_operator_type(); 544 | // 如果是6个关系运算符 545 | if(current_op_type >= operator_type::_les && current_op_type <= operator_type::_neq){ 546 | auto p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line()); 547 | p->attr.op = current_op_type; 548 | match_operator(current_op_type); 549 | p->child[0] = n; 550 | n = p; 551 | n->child[1] = additive_expression(nullptr); 552 | } 553 | } 554 | return n; 555 | } 556 | 557 | TreeNode * cmparser::additive_expression(TreeNode * pass_node){ 558 | std::cout << "additive_expression" << std::endl; 559 | auto n = term(pass_node); 560 | // 如果个符号是加号或者减号 561 | while(get_current_operator() != nullptr && (get_current_operator()->get_operator_type() == operator_type::_add || get_current_operator()->get_operator_type() == operator_type::_sub)){ 562 | // 将这个运算符赋予节点 563 | auto p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line()); 564 | p->child[0] = n; 565 | p->set_operator(get_current_operator()->get_operator_type()); 566 | // 匹配此字符 567 | match_operator(get_current_operator()->get_operator_type()); 568 | n = p; 569 | n->child[1] = term(nullptr); 570 | } 571 | return n; 572 | } 573 | 574 | TreeNode * cmparser::term(TreeNode * pass_node){ 575 | std::cout << "term" << std::endl; 576 | auto n = factor(pass_node); 577 | while(get_current_operator() != nullptr && (get_current_operator()->get_operator_type() == operator_type::_mul || get_current_operator()->get_operator_type() == operator_type::_div)){ 578 | TreeNode *p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line()); 579 | p->child[0] = n; 580 | p->set_operator(get_current_operator()->get_operator_type()); 581 | match_operator(get_current_operator()->get_operator_type()); 582 | n = p; 583 | n->child[1] = factor(nullptr); 584 | } 585 | return n; 586 | } 587 | 588 | TreeNode * cmparser::factor(TreeNode * pass_node){ 589 | std::cout << "factor" << std::endl; 590 | TreeNode *t; 591 | // 如果从上层传下来了需要的 592 | if(pass_node != nullptr){ 593 | // 匹配 ID(args) 594 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){ 595 | t = call(pass_node); 596 | 597 | }else{ 598 | t = pass_node; 599 | } 600 | }else{ 601 | //如果从上层没传下来需要的节点 602 | if(current_token != nullptr){ 603 | switch (get_current_token_type()) 604 | { 605 | case token_type::_operator: 606 | // 1. 匹配 (expression) 607 | if(get_current_operator()->get_operator_type() == operator_type::_slb){ 608 | match_operator(operator_type::_slb); 609 | }else{ 610 | SyntaxError("非法的字符"); 611 | get_next_token(); 612 | } 613 | t = expression(); 614 | match_operator(operator_type::_srb); 615 | break; 616 | // 匹配 NUM 617 | case token_type::_NUM: 618 | t = TreeNode::newExpNode(ExpKind::_num,current_token->get_line()); 619 | t->set_number(get_current_number()->get_number()); 620 | get_next_token(); 621 | break; 622 | // 匹配纯ID 623 | case token_type::_ID: 624 | t = var(); 625 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){ 626 | t = call(t) ; 627 | } 628 | break; 629 | default: 630 | SyntaxError("非法的字符"); 631 | get_next_token(); 632 | break; 633 | } 634 | } 635 | } 636 | return t; 637 | } 638 | 639 | TreeNode * cmparser::var(){ 640 | std::cout << "var" << std::endl; 641 | auto t = TreeNode::newExpNode(ExpKind::_var,current_token->get_line()); 642 | if(current_token != nullptr && get_current_token_type() == token_type::_ID){ 643 | auto j = TreeNode::newExpNode(ExpKind::_id,current_token->get_line()); 644 | j->set_id(get_current_id()->get_ID()); 645 | t->child[0] = j; 646 | get_next_token(); 647 | //如果匹配左括号 '[' 则为ID[expression] 648 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){ 649 | t->kind.exp = ExpKind::_array_var; 650 | match_operator(operator_type::_mlb); 651 | t->child[1] = expression(); 652 | match_operator(operator_type::_mrb); 653 | } 654 | return t; 655 | }else{ 656 | return nullptr; 657 | } 658 | } 659 | 660 | TreeNode * cmparser::call(TreeNode *k){ 661 | std::cout << "call" << std::endl; 662 | TreeNode *t = TreeNode::newExpNode(ExpKind::_call,current_token->get_line()); 663 | if (k != nullptr){ 664 | t->child[0] = k; 665 | } 666 | match_operator(operator_type::_slb); 667 | if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_srb){ 668 | match_operator(operator_type::_srb); 669 | t->child[1] = TreeNode::newExpNode(ExpKind::_empty,current_token->get_line()); 670 | }else if(k != nullptr){ 671 | t->child[1] = arg_list(); 672 | match_operator(operator_type::_srb); 673 | } 674 | return t; 675 | } 676 | 677 | TreeNode * cmparser::arg_list(){ 678 | std::cout << "arg_list" << std::endl; 679 | auto t = TreeNode::newExpNode(ExpKind::_args,current_token->get_line()); 680 | auto n = expression(); 681 | t->child[0] = n; 682 | // 如果下一项为 ',' 683 | TreeNode * p = t; // 滑动指针 684 | while(current_token!=nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_com){ 685 | match_operator(operator_type::_com); 686 | TreeNode *new_node = TreeNode::newExpNode(ExpKind::_args,current_token->get_line()); 687 | new_node->child[0] = expression(); 688 | new_node->attr.op = operator_type::_com; 689 | p->sibling = new_node; 690 | p = p->sibling; 691 | } 692 | return t; 693 | } --------------------------------------------------------------------------------