├── .gitignore
├── lexer
    ├── .gitignore
    ├── test_cases
    │   ├── operator.cpp
    │   ├── input.c
    │   ├── input2.c
    │   ├── output1.txt
    │   ├── output.txt
    │   └── output2.txt
    ├── make.bat
    ├── CMakeLists.txt
    ├── include
    │   ├── Lexer.h
    │   └── TokenType.h
    └── src
    │   ├── main.cpp
    │   └── static
    │       ├── TokenType.cpp
    │       └── Lexer.cpp
├── parser
    ├── test_cases
    │   ├── input2.c
    │   ├── intput2.c
    │   ├── operator.cpp
    │   ├── input.c
    │   ├── input1.c
    │   ├── output1.txt
    │   └── output.txt
    ├── .gitignore
    ├── src
    │   ├── static
    │   │   ├── parser
    │   │   │   ├── abstract_tree.cpp
    │   │   │   └── Parser.cpp
    │   │   ├── TokenType.cpp
    │   │   └── lexer
    │   │   │   └── Lexer.cpp
    │   └── main.cpp
    ├── copy_lexer2parser.bat
    ├── make.bat
    ├── CMakeLists.txt
    └── include
    │   ├── parser
    │       ├── tmp_factor.cpp
    │       ├── parser.h
    │       └── abstract_tree.h
    │   ├── lexer
    │       └── Lexer.h
    │   └── TokenType.h
├── images
    ├── lexer输入.png
    ├── lexer报错显示.png
    ├── parser结果预览.png
    └── parser中的lexer指针.png
├── documents
    └── CMinus词法和语法规则.md
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .VSCodeCounter


--------------------------------------------------------------------------------
/lexer/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | *.exe
3 | .VSCodeCounter


--------------------------------------------------------------------------------
/parser/test_cases/input2.c:
--------------------------------------------------------------------------------
1 | a[2 + e] = 1+  b(a,112, v * d, 23,42)


--------------------------------------------------------------------------------
/parser/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | *.exe
3 | .VSCodeCounter
4 | .vscode


--------------------------------------------------------------------------------
/parser/test_cases/intput2.c:
--------------------------------------------------------------------------------
1 | a(1+2, c*d) * b * (c + d) + c / e[a + b * c + d]


--------------------------------------------------------------------------------
/images/lexer输入.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/lexer输入.png


--------------------------------------------------------------------------------
/images/lexer报错显示.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/lexer报错显示.png


--------------------------------------------------------------------------------
/images/parser结果预览.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/parser结果预览.png


--------------------------------------------------------------------------------
/parser/src/static/parser/abstract_tree.cpp:
--------------------------------------------------------------------------------
1 | #include "parser/abstract_tree.h"
2 | #include "parser/Parser.h"
3 | 


--------------------------------------------------------------------------------
/images/parser中的lexer指针.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/CMinus-Lexer-Parser/HEAD/images/parser中的lexer指针.png


--------------------------------------------------------------------------------
/parser/test_cases/operator.cpp:
--------------------------------------------------------------------------------
1 |   +/*+sdasdasa */    +  - * / >= <= ; } { [ ] ( /*+++++
2 |   */ --
3 |  - ==
4 |               )
5 | ++++


--------------------------------------------------------------------------------
/lexer/test_cases/operator.cpp:
--------------------------------------------------------------------------------
 1 |   +/*+sdasdasa */    +  - * / >= <= ; } { [ ] ( /*+++++
 2 |   */ --
 3 |  - ==
 4 |               )
 5 | ++++
 6 | 
 7 | /* *** */
 8 | /* in comment
 9 | * expect out comment
10 |   / start
11 |   其他的 in comment 


--------------------------------------------------------------------------------
/parser/copy_lexer2parser.bat:
--------------------------------------------------------------------------------
1 | copy  .\..\lexer\include\Lexer.h .\include\lexer\Lexer.h
2 | copy  .\..\lexer\include\TokenType.h .\include\TokenType.h
3 | copy  ..\lexer\src\static\Lexer.cpp .\src\static\lexer\Lexer.cpp
4 | copy  ..\lexer\src\static\TokenType.cpp .\src\static\TokenType.cpp
5 | 


--------------------------------------------------------------------------------
/lexer/test_cases/input.c:
--------------------------------------------------------------------------------
 1 | /* A program to perform Euclid's
 2 | Algorithm to compute gcd. */
 3 | 
 4 | int gcd (int u, int v)
 5 | { 
 6 |     if (v == 0)
 7 |         return u ;
 8 |     else
 9 |         return gcd(v,u-u/v*v);
10 |     /* u-u/v*v ==
11 |     u mod v */
12 | }
13 | 
14 | void main(void)
15 | {
16 |     int x; int y;
17 |     x = input();
18 |     y = input();
19 |     output(gcd(x,y));
20 | }


--------------------------------------------------------------------------------
/parser/test_cases/input.c:
--------------------------------------------------------------------------------
 1 | /* A program to perform Euclid's
 2 | Algorithm to compute gcd. */
 3 | 
 4 | int gcd (int u, int v)
 5 | { 
 6 |     if (v == 0)
 7 |         return u;
 8 |     else
 9 |         return gcd(v,u-u/v*v);
10 |     /* u-u/v*v ==
11 |     u mod v */
12 | }
13 | 
14 | void main(void)
15 | {
16 |     int x; int y;
17 |     x = input();
18 |     y = input();
19 |     output(gcd(x,y));
20 | }


--------------------------------------------------------------------------------
/lexer/test_cases/input2.c:
--------------------------------------------------------------------------------
 1 | /* A program to perform Euclid's
 2 | Algorithm to compute gcd. */
 3 | 
 4 | int gcd (int u, int v)
 5 | { 
 6 |     if (v == 0)
 7 |         return u ;
 8 |     else
 9 |         return gcd(v,u-u/v*v);
10 |     /* u-u/v*v ==
11 |     u mod v */
12 | }
13 | 
14 | void main(void)
15 | {
16 |     int x; int y;
17 |     x = input();
18 |     y = input();
19 |     output(gcd(x,y));
20 | }


--------------------------------------------------------------------------------
/parser/test_cases/input1.c:
--------------------------------------------------------------------------------
 1 | /* A program to perform Euclid's
 2 | Algorithm to compute gcd. */
 3 | 
 4 | int gcd (int u, int v)
 5 | { 
 6 |     if (v == 0)
 7 |         return u ;
 8 |     else
 9 |         return gcd(v,u-u/v*v);
10 |     /* u-u/v*v ==
11 |     u mod v */
12 | }
13 | 
14 | void main(void)
15 | {
16 |     int x; int y;
17 |     x = input();
18 |     y = input();
19 |     output(gcd(x,y));
20 | }


--------------------------------------------------------------------------------
/lexer/make.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | cd build
 3 | cmake .. -G "MinGW Makefiles"
 4 | echo ===================CMAKE DONE===========================
 5 | mingw32-make.exe
 6 | echo ====================MAKE DONE===========================
 7 | cd ..
 8 | echo ====================Run Code============================
 9 | .\CMinusLexer_binary.exe  ./test_cases/input.c ./test_cases/output.txt
10 | @REM .\CMinusLexer_binary.exe  ./test_cases/input.c
11 | 


--------------------------------------------------------------------------------
/parser/make.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | cd build
 3 | cmake .. -G "MinGW Makefiles"
 4 | echo ===================CMAKE DONE===========================
 5 | mingw32-make.exe
 6 | echo ====================MAKE DONE===========================
 7 | cd ..
 8 | echo ====================Run Code============================
 9 | @REM .\CMinusLexer_binary.exe  ./test_cases/input.c ./test_cases/output1.txt
10 | .\CMinusParser_binary.exe  ./test_cases/input.c
11 | 


--------------------------------------------------------------------------------
/parser/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include "TokenType.h"
 4 | #include "lexer/Lexer.h"
 5 | #include "parser/Parser.h"
 6 | #include "parser/abstract_tree.h"
 7 | // ==========全局参数定义============
 8 | std::string input = "";
 9 | // outpur路径为空，则输出到标准输出，如果不为空，则输出到文件
10 | // std::string output = "./test_cases/output.txt";
11 | std::string output = "";
12 | cmlexer lex; // 初始化语法解析器类
13 | cmparser myparser(&lex);
14 | // ====================================
15 | 
16 | // 初始化输入输出文件路径
17 | void initPath(){
18 |     lex.setPath(input,output);
19 | }
20 | int main(int argc, char* argv[]){
21 |     input.assign("./test_cases/input.c");
22 |     initPath();
23 |     auto res = myparser.parse();
24 |     if(res !=nullptr)   res->show();
25 |     return 0;
26 | }


--------------------------------------------------------------------------------
/lexer/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | 
 3 | project(CMinusLexer_library)
 4 | 
 5 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR})
 6 | 
 7 | ############################################################
 8 | # Create a library
 9 | ############################################################
10 | file(GLOB SOURCES "src/static/*.cpp")
11 | # Generate the static library from the library sources
12 | add_library(CMinusLexer_library STATIC 
13 |     ${SOURCES}
14 | )
15 | 
16 | target_include_directories(CMinusLexer_library
17 |     PUBLIC 
18 |         ${PROJECT_SOURCE_DIR}/include
19 | )
20 | 
21 | 
22 | 
23 | ############################################################
24 | # Create an executable
25 | ############################################################
26 | 
27 | # Add an executable with the above sources
28 | add_executable(CMinusLexer_binary
29 |     src/main.cpp
30 | )
31 | 
32 | # link the new hello_library target with the hello_binary target
33 | target_link_libraries(CMinusLexer_binary
34 |     PRIVATE 
35 |         CMinusLexer_library
36 | )
37 | 


--------------------------------------------------------------------------------
/parser/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | 
 3 | project(CMinusParser_library)
 4 | 
 5 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR})
 6 | 
 7 | ############################################################
 8 | # Create a library
 9 | ############################################################
10 | file(GLOB SOURCES "src/static/*/*.cpp" "src/static/*.cpp")
11 | 
12 | # Generate the static library from the library sources
13 | add_library(CMinusParser_library STATIC 
14 |     ${SOURCES}
15 | )
16 | 
17 | target_include_directories(CMinusParser_library
18 |     PUBLIC 
19 |         ${PROJECT_SOURCE_DIR}/include
20 | )
21 | 
22 | 
23 | 
24 | ############################################################
25 | # Create an executable
26 | ############################################################
27 | 
28 | # Add an executable with the above sources
29 | add_executable(CMinusParser_binary
30 |     src/main.cpp
31 | )
32 | 
33 | # link the new hello_library target with the hello_binary target
34 | target_link_libraries(CMinusParser_binary
35 |     PRIVATE 
36 |         CMinusParser_library
37 | )
38 | 


--------------------------------------------------------------------------------
/lexer/test_cases/output1.txt:
--------------------------------------------------------------------------------
 1 | dick70
 2 | Keyword:      	int
 3 | Identifier: 	gcd
 4 | Operator:   	(
 5 | Keyword:      	int
 6 | Identifier: 	u
 7 | Operator:   	,
 8 | Keyword:      	int
 9 | Identifier: 	v
10 | Operator:   	)
11 | Operator:   	{
12 | Keyword:      	if
13 | Operator:   	(
14 | Identifier: 	v
15 | Operator:   	==
16 | Number:     	0
17 | Operator:   	)
18 | Keyword:      	return
19 | Identifier: 	u
20 | Operator:   	;
21 | Keyword:      	else
22 | Keyword:      	return
23 | Identifier: 	gcd
24 | Operator:   	(
25 | Identifier: 	v
26 | Operator:   	,
27 | Identifier: 	u
28 | Operator:   	-
29 | Identifier: 	u
30 | Operator:   	/
31 | Identifier: 	v
32 | Operator:   	*
33 | Identifier: 	v
34 | Operator:   	)
35 | Operator:   	;
36 | Operator:   	}
37 | Keyword:      	void
38 | Identifier: 	main
39 | Operator:   	(
40 | Keyword:      	void
41 | Operator:   	)
42 | Operator:   	{
43 | Keyword:      	int
44 | Identifier: 	x
45 | Operator:   	;
46 | Keyword:      	int
47 | Identifier: 	y
48 | Operator:   	;
49 | Identifier: 	x
50 | Operator:   	=
51 | Identifier: 	input
52 | Operator:   	(
53 | Operator:   	)
54 | Operator:   	;
55 | Identifier: 	y
56 | Operator:   	=
57 | Identifier: 	input
58 | Operator:   	(
59 | Operator:   	)
60 | Operator:   	;
61 | Identifier: 	output
62 | Operator:   	(
63 | Identifier: 	gcd
64 | Operator:   	(
65 | Identifier: 	x
66 | Operator:   	,
67 | Identifier: 	y
68 | Operator:   	)
69 | Operator:   	)
70 | Operator:   	;
71 | Operator:   	}
72 | 


--------------------------------------------------------------------------------
/parser/test_cases/output1.txt:
--------------------------------------------------------------------------------
 1 | dick70
 2 | Keyword:      	int
 3 | Identifier: 	gcd
 4 | Operator:   	(
 5 | Keyword:      	int
 6 | Identifier: 	u
 7 | Operator:   	,
 8 | Keyword:      	int
 9 | Identifier: 	v
10 | Operator:   	)
11 | Operator:   	{
12 | Keyword:      	if
13 | Operator:   	(
14 | Identifier: 	v
15 | Operator:   	==
16 | Number:     	0
17 | Operator:   	)
18 | Keyword:      	return
19 | Identifier: 	u
20 | Operator:   	;
21 | Keyword:      	else
22 | Keyword:      	return
23 | Identifier: 	gcd
24 | Operator:   	(
25 | Identifier: 	v
26 | Operator:   	,
27 | Identifier: 	u
28 | Operator:   	-
29 | Identifier: 	u
30 | Operator:   	/
31 | Identifier: 	v
32 | Operator:   	*
33 | Identifier: 	v
34 | Operator:   	)
35 | Operator:   	;
36 | Operator:   	}
37 | Keyword:      	void
38 | Identifier: 	main
39 | Operator:   	(
40 | Keyword:      	void
41 | Operator:   	)
42 | Operator:   	{
43 | Keyword:      	int
44 | Identifier: 	x
45 | Operator:   	;
46 | Keyword:      	int
47 | Identifier: 	y
48 | Operator:   	;
49 | Identifier: 	x
50 | Operator:   	=
51 | Identifier: 	input
52 | Operator:   	(
53 | Operator:   	)
54 | Operator:   	;
55 | Identifier: 	y
56 | Operator:   	=
57 | Identifier: 	input
58 | Operator:   	(
59 | Operator:   	)
60 | Operator:   	;
61 | Identifier: 	output
62 | Operator:   	(
63 | Identifier: 	gcd
64 | Operator:   	(
65 | Identifier: 	x
66 | Operator:   	,
67 | Identifier: 	y
68 | Operator:   	)
69 | Operator:   	)
70 | Operator:   	;
71 | Operator:   	}
72 | 


--------------------------------------------------------------------------------
/lexer/test_cases/output.txt:
--------------------------------------------------------------------------------
 1 | #4	3	Keyword:      	int
 2 | #4	7	Identifier: 	gcd
 3 | #4	9	Operator:   	(
 4 | #4	12	Keyword:      	int
 5 | #4	14	Identifier: 	u
 6 | #4	15	Operator:   	,
 7 | #4	19	Keyword:      	int
 8 | #4	21	Identifier: 	v
 9 | #4	22	Operator:   	)
10 | #5	1	Operator:   	{
11 | #6	6	Keyword:      	if
12 | #6	8	Operator:   	(
13 | #6	9	Identifier: 	v
14 | #6	12	Operator:   	==
15 | #6	14	Number:     	0
16 | #6	15	Operator:   	)
17 | #7	14	Keyword:      	return
18 | #7	16	Identifier: 	u
19 | #7	18	Operator:   	;
20 | #8	8	Keyword:      	else
21 | #9	14	Keyword:      	return
22 | #9	18	Identifier: 	gcd
23 | #9	19	Operator:   	(
24 | #9	20	Identifier: 	v
25 | #9	21	Operator:   	,
26 | #9	22	Identifier: 	u
27 | #9	23	Operator:   	-
28 | #9	24	Identifier: 	u
29 | #9	25	Operator:   	/
30 | #9	26	Identifier: 	v
31 | #9	27	Operator:   	*
32 | #9	28	Identifier: 	v
33 | #9	29	Operator:   	)
34 | #9	30	Operator:   	;
35 | #12	1	Operator:   	}
36 | #14	4	Keyword:      	void
37 | #14	9	Identifier: 	main
38 | #14	10	Operator:   	(
39 | #14	14	Keyword:      	void
40 | #14	15	Operator:   	)
41 | #15	1	Operator:   	{
42 | #16	7	Keyword:      	int
43 | #16	9	Identifier: 	x
44 | #16	10	Operator:   	;
45 | #16	14	Keyword:      	int
46 | #16	16	Identifier: 	y
47 | #16	17	Operator:   	;
48 | #17	5	Identifier: 	x
49 | #17	7	Operator:   	=
50 | #17	13	Identifier: 	input
51 | #17	14	Operator:   	(
52 | #17	15	Operator:   	)
53 | #17	16	Operator:   	;
54 | #18	5	Identifier: 	y
55 | #18	7	Operator:   	=
56 | #18	13	Identifier: 	input
57 | #18	14	Operator:   	(
58 | #18	15	Operator:   	)
59 | #18	16	Operator:   	;
60 | #19	10	Identifier: 	output
61 | #19	11	Operator:   	(
62 | #19	14	Identifier: 	gcd
63 | #19	15	Operator:   	(
64 | #19	16	Identifier: 	x
65 | #19	17	Operator:   	,
66 | #19	18	Identifier: 	y
67 | #19	19	Operator:   	)
68 | #19	20	Operator:   	)
69 | #19	21	Operator:   	;
70 | #20	1	Operator:   	}
71 | 


--------------------------------------------------------------------------------
/lexer/test_cases/output2.txt:
--------------------------------------------------------------------------------
 1 | #4	3	Keyword:      	int
 2 | #4	7	Identifier: 	gcd
 3 | #4	9	Operator:   	(
 4 | #4	12	Keyword:      	int
 5 | #4	14	Identifier: 	u
 6 | #4	15	Operator:   	,
 7 | #4	19	Keyword:      	int
 8 | #4	21	Identifier: 	v
 9 | #4	22	Operator:   	)
10 | #5	1	Operator:   	{
11 | #6	6	Keyword:      	if
12 | #6	8	Operator:   	(
13 | #6	9	Identifier: 	v
14 | #6	12	Operator:   	==
15 | #6	14	Number:     	0
16 | #6	15	Operator:   	)
17 | #7	14	Keyword:      	return
18 | #7	16	Identifier: 	u
19 | #7	18	Operator:   	;
20 | #8	8	Keyword:      	else
21 | #9	14	Keyword:      	return
22 | #9	18	Identifier: 	gcd
23 | #9	19	Operator:   	(
24 | #9	20	Identifier: 	v
25 | #9	21	Operator:   	,
26 | #9	22	Identifier: 	u
27 | #9	23	Operator:   	-
28 | #9	24	Identifier: 	u
29 | #9	25	Operator:   	/
30 | #9	26	Identifier: 	v
31 | #9	27	Operator:   	*
32 | #9	28	Identifier: 	v
33 | #9	29	Operator:   	)
34 | #9	30	Operator:   	;
35 | #12	1	Operator:   	}
36 | #14	4	Keyword:      	void
37 | #14	9	Identifier: 	main
38 | #14	10	Operator:   	(
39 | #14	14	Keyword:      	void
40 | #14	15	Operator:   	)
41 | #15	1	Operator:   	{
42 | #16	7	Keyword:      	int
43 | #16	9	Identifier: 	x
44 | #16	10	Operator:   	;
45 | #16	14	Keyword:      	int
46 | #16	16	Identifier: 	y
47 | #16	17	Operator:   	;
48 | #17	5	Identifier: 	x
49 | #17	7	Operator:   	=
50 | #17	13	Identifier: 	input
51 | #17	14	Operator:   	(
52 | #17	15	Operator:   	)
53 | #17	16	Operator:   	;
54 | #18	5	Identifier: 	y
55 | #18	7	Operator:   	=
56 | #18	13	Identifier: 	input
57 | #18	14	Operator:   	(
58 | #18	15	Operator:   	)
59 | #18	16	Operator:   	;
60 | #19	10	Identifier: 	output
61 | #19	11	Operator:   	(
62 | #19	14	Identifier: 	gcd
63 | #19	15	Operator:   	(
64 | #19	16	Identifier: 	x
65 | #19	17	Operator:   	,
66 | #19	18	Identifier: 	y
67 | #19	19	Operator:   	)
68 | #19	20	Operator:   	)
69 | #19	21	Operator:   	;
70 | #20	1	Operator:   	}
71 | 


--------------------------------------------------------------------------------
/parser/test_cases/output.txt:
--------------------------------------------------------------------------------
 1 | #4	3	Keyword:      	int
 2 | #4	7	Identifier: 	gcd
 3 | #4	9	Operator:   	(
 4 | #4	12	Keyword:      	int
 5 | #4	14	Identifier: 	u
 6 | #4	15	Operator:   	,
 7 | #4	19	Keyword:      	int
 8 | #4	21	Identifier: 	v
 9 | #4	22	Operator:   	)
10 | #5	1	Operator:   	{
11 | #6	6	Keyword:      	if
12 | #6	8	Operator:   	(
13 | #6	9	Identifier: 	v
14 | #6	12	Operator:   	==
15 | #6	14	Number:     	0
16 | #6	15	Operator:   	)
17 | #7	14	Keyword:      	return
18 | #7	16	Identifier: 	u
19 | #7	18	Operator:   	;
20 | #8	8	Keyword:      	else
21 | #9	14	Keyword:      	return
22 | #9	18	Identifier: 	gcd
23 | #9	19	Operator:   	(
24 | #9	20	Identifier: 	v
25 | #9	21	Operator:   	,
26 | #9	22	Identifier: 	u
27 | #9	23	Operator:   	-
28 | #9	24	Identifier: 	u
29 | #9	25	Operator:   	/
30 | #9	26	Identifier: 	v
31 | #9	27	Operator:   	*
32 | #9	28	Identifier: 	v
33 | #9	29	Operator:   	)
34 | #9	30	Operator:   	;
35 | #12	1	Operator:   	}
36 | #14	4	Keyword:      	void
37 | #14	9	Identifier: 	main
38 | #14	10	Operator:   	(
39 | #14	14	Keyword:      	void
40 | #14	15	Operator:   	)
41 | #15	1	Operator:   	{
42 | #16	7	Keyword:      	int
43 | #16	9	Identifier: 	x
44 | #16	10	Operator:   	;
45 | #16	14	Keyword:      	int
46 | #16	16	Identifier: 	y
47 | #16	17	Operator:   	;
48 | #17	5	Identifier: 	x
49 | #17	7	Operator:   	=
50 | #17	13	Identifier: 	input
51 | #17	14	Operator:   	(
52 | #17	15	Operator:   	)
53 | #17	16	Operator:   	;
54 | #18	5	Identifier: 	y
55 | #18	7	Operator:   	=
56 | #18	13	Identifier: 	input
57 | #18	14	Operator:   	(
58 | #18	15	Operator:   	)
59 | #18	16	Operator:   	;
60 | #19	10	Identifier: 	output
61 | #19	11	Operator:   	(
62 | #19	14	Identifier: 	gcd
63 | #19	15	Operator:   	(
64 | #19	16	Identifier: 	x
65 | #19	17	Operator:   	,
66 | #19	18	Identifier: 	y
67 | #19	19	Operator:   	)
68 | #19	20	Operator:   	)
69 | #19	21	Operator:   	;
70 | #20	1	Operator:   	}
71 | 


--------------------------------------------------------------------------------
/parser/include/parser/tmp_factor.cpp:
--------------------------------------------------------------------------------
 1 |   // auto n = TreeNode::newExpNode(ExpKind::_num);
 2 |         // // 1.如果是运算符 （, 则进入 (expression) 的匹配  
 3 |         // // 这个括号不会显示在输出的语法树中，但是会影响语法树的构建。
 4 |         // if(current_token->get_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){
 5 |         //     match_operator(operator_type::_slb);
 6 |         //     n = additive_expression(pass_node);
 7 |         //     match_operator(operator_type::_srb);
 8 |         // }
 9 |         // // 如果匹配到了ID
10 |         // else if( current_token->get_token_type()== token_type::_ID){
11 |         //     n->set_id(get_current_id()->get_ID());
12 |         //    get_next_token();
13 |         //    // 尝试匹配下一个符号
14 |         //     if(current_token != nullptr && current_token->get_token_type() ==    token_type::_operator){
15 |         //         switch (get_current_operator()->get_operator_type())
16 |         //         {
17 |         //         // 2.如果是 '(' 则匹配   ID(args)
18 |         //         case operator_type::_slb:
19 |         //             n->kind.exp = ExpKind::_call;
20 |         //             match_operator(operator_type::_slb);
21 |         //             // 如果下一个就是 ')'  则匹配完成 即匹配 empty
22 |         //             if(current_token != nullptr && current_token->get_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_srb){
23 |                             
24 |         //             }else{
25 |         //                 n->sibling = arg_list();
26 |         //             }
27 |         //             match_operator(operator_type::_srb);
28 |         //         break;
29 | 
30 |         //         // 3.如果是 '[' 则匹配 ID[expression]
31 |         //         case operator_type::_mlb:
32 |         //             n->kind.exp = ExpKind::_var;
33 |         //             match_operator(operator_type::_mlb);
34 |         //             n->sibling = additive_expression(nullptr); // TODO 后续完成expression要进行切换
35 |         //             match_operator(operator_type::_mrb);
36 |         //         break; 
37 |         //         // 4.如果没有则什么都不做，匹配完成
38 |         //         default:
39 |             
40 |         //         break;
41 |         //         }
42 |         //     }
43 |         // //5. 匹配到NUM               
44 |         // }else if(current_token != nullptr && get_current_token_type() == token_type::_NUM){
45 |         //     n->set_number(get_current_number()->get_number());
46 |         //     get_next_token();
47 |         // }
48 |         // return n;


--------------------------------------------------------------------------------
/lexer/include/Lexer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include<iostream>
 3 | #include<fstream>
 4 | #include<cstring>
 5 | #include<vector>
 6 | #include<cctype>
 7 | #include<unordered_map>
 8 | #include<set>
 9 | #include "TokenType.h"
10 | // 所有可能的运算符起始符号
11 | extern std::set<char> oper_start;
12 | bool isoperator(char c);
13 | 
14 | // 词法分析器的状态机的状态
15 | enum class state{
16 |     start,   // 普通状态 0
17 |     output,  // 可以输出一个词法单元的状态 1
18 |     in_oper, // 在操作符状态 2
19 |     in_comm, // 在注释状态 3
20 |     in_numb, // 在数值状态 4
21 |     in_iden, // 在标识符状态 5
22 |     ex_comm, // 即将退出注释 6
23 |     unexpected_char, // 7 未知的字符
24 |     undefined_operator, // 8 未知的运算符
25 |     unexpected_state    // 异常状态转换
26 | };
27 | 
28 | // 用来管理错误类型的
29 | extern std::unordered_map<state, std::string> error_map;
30 | std::string get_error_str(state s);
31 | 
32 | // 主要的词法分析器类
33 | class cmlexer{
34 |     int bufflen = 0;
35 |     int lineno = 1;         // 当前行号 
36 |     int linepos = 0;        // 当前字符号
37 |     
38 |     std::string buffer;     // 当前token的缓冲区
39 |     std::string line_buff;  // 当前行缓冲区
40 |     size_t line_idx = 0;   // 当前行索引
41 |     bool is_eof = false;
42 |     // 在output状态之后，需要通过这个状态量来确保能重新读入上一个仅仅用来判断“状态”但没读入缓冲区的变量
43 |     bool next = 1;          
44 |     state _s; //当前状态
45 | 
46 |     std::string inputPath, outputPath;
47 | public:
48 |     // 按顺序保存结果的指针数组
49 |     std::vector<token_base*> results;
50 |     std::ifstream ifs;
51 |     std::ofstream ofs;
52 |     bool if_std_output = true;    // 是否在标准输出输出词法分析结果
53 |     bool output_redirect = false; // 是否重定向输出，如果没有则采用标准输入输出
54 | 
55 |     //设置存取路径
56 |     void setPath(std::string i, std::string o);
57 |     int getNextChar();
58 |     void ungetNextChar();
59 |     // 
60 |     state read_next(char c, bool next);
61 | 
62 |     //获取下一个token元素
63 |     token_base * get_next_token();
64 |     token_base * get_next_token(std::ifstream &local_ifs);
65 | 
66 |     // 从输入流解析整个文件的函数，无参数为默认按照ifs读取
67 |     void lexing_file(std::ifstream &ifstream); 
68 |     void lexing_file();
69 |     // 获得当前最新的头部token
70 |     token_base* get_result(){
71 |         _s = state::start;
72 |         return results.back();
73 |     }
74 |     std::vector<token_base*>* get_results(){
75 |         return &results;
76 |     }
77 |     inline std::string get_buffer(){
78 |         return buffer;
79 |     }
80 |     inline int get_lineno(){
81 |         return lineno;
82 |     }
83 |     inline int get_pos(){
84 |         return linepos;
85 |     }
86 |     // 恢复初始状态
87 |     void reset_status(){
88 |         _s = state::start;
89 |         buffer.clear();
90 |         is_eof = 0;
91 |     }
92 |     // 查看是否错误
93 |     bool error_state(){
94 |         if(_s >= state::unexpected_char)
95 |             return 1;
96 |         else
97 |             return 0;
98 |     }
99 | };


--------------------------------------------------------------------------------
/parser/include/lexer/Lexer.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include<iostream>
  3 | #include<fstream>
  4 | #include<cstring>
  5 | #include<vector>
  6 | #include<cctype>
  7 | #include<unordered_map>
  8 | #include<set>
  9 | #include "TokenType.h"
 10 | // 所有可能的运算符起始符号
 11 | extern std::set<char> oper_start;
 12 | bool isoperator(char c);
 13 | 
 14 | // 词法分析器的状态机的状态
 15 | enum class state{
 16 |     start,   // 普通状态 0
 17 |     output,  // 可以输出一个词法单元的状态 1
 18 |     in_oper, // 在操作符状态 2
 19 |     in_comm, // 在注释状态 3
 20 |     in_numb, // 在数值状态 4
 21 |     in_iden, // 在标识符状态 5
 22 |     ex_comm, // 即将退出注释 6
 23 |     unexpected_char, // 7 未知的字符
 24 |     undefined_operator, // 8 未知的运算符
 25 |     unexpected_state    // 异常状态转换
 26 | };
 27 | 
 28 | // 用来管理错误类型的
 29 | extern std::unordered_map<state, std::string> error_map;
 30 | std::string get_error_str(state s);
 31 | 
 32 | // 主要的词法分析器类
 33 | class cmlexer{
 34 |     int bufflen = 0;
 35 |     int lineno = 1;         // 当前行号 
 36 |     int linepos = 0;        // 当前字符号
 37 |     
 38 |     std::string buffer;     // 当前token的缓冲区
 39 |     std::string line_buff;  // 当前行缓冲区
 40 |     size_t line_idx = 0;   // 当前行索引
 41 |     bool is_eof = false;
 42 |     // 在output状态之后，需要通过这个状态量来确保能重新读入上一个仅仅用来判断“状态”但没读入缓冲区的变量
 43 |     bool next = 1;          
 44 |     state _s; //当前状态
 45 | 
 46 |     std::string inputPath, outputPath;
 47 | public:
 48 |     // 按顺序保存结果的指针数组
 49 |     std::vector<token_base*> results;
 50 |     std::ifstream ifs;
 51 |     std::ofstream ofs;
 52 |     bool if_std_output = true;    // 是否在标准输出输出词法分析结果
 53 |     bool output_redirect = false; // 是否重定向输出，如果没有则采用标准输入输出
 54 | 
 55 |     //设置存取路径
 56 |     void setPath(std::string i, std::string o);
 57 | private:
 58 |     int getNextChar();
 59 |     void ungetNextChar();
 60 |     // 
 61 |     state read_next(char c, bool next);
 62 | 
 63 |     //获取下一个token元素
 64 | public:
 65 |     token_base * get_next_token();
 66 |     token_base * get_next_token(std::ifstream &local_ifs);
 67 | 
 68 |     // 从输入流解析整个文件的函数，无参数为默认按照ifs读取
 69 |     void lexing_file(std::ifstream &ifstream); 
 70 |     void lexing_file();
 71 |     // 获得当前最新的头部token
 72 |     token_base* get_result(){
 73 |         _s = state::start;
 74 |         return results.back();
 75 |     }
 76 |     std::vector<token_base*>* get_results(){
 77 |         return &results;
 78 |     }
 79 |     inline std::string get_buffer(){
 80 |         return buffer;
 81 |     }
 82 |     inline int get_lineno(){
 83 |         return lineno;
 84 |     }
 85 |     inline int get_pos(){
 86 |         return linepos;
 87 |     }
 88 |     // 恢复初始状态
 89 |     void reset_status(){
 90 |         _s = state::start;
 91 |         buffer.clear();
 92 |         is_eof = 0;
 93 |     }
 94 |     // 查看是否错误
 95 |     bool error_state(){
 96 |         if(_s >= state::unexpected_char)
 97 |             return 1;
 98 |         else
 99 |             return 0;
100 |     }
101 | };


--------------------------------------------------------------------------------
/lexer/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include "TokenType.h"
  4 | #include "Lexer.h"
  5 | 
  6 | // ==========全局参数定义============
  7 | std::string input = "";
  8 | // outpur路径为空，则输出到标准输出，如果不为空，则输出到文件
  9 | // std::string output = "./test_cases/output.txt";
 10 | std::string output = "";
 11 | cmlexer lex; // 初始化语法解析器类
 12 | // ====================================
 13 | 
 14 | // 初始化输入输出文件路径
 15 | void initPath(){
 16 |     lex.setPath(input,output);
 17 | }
 18 | // 存放测试用的函数
 19 | namespace tests 
 20 | {
 21 |     // 利用简单字符串测试分析器的函数
 22 |     void testLexer(std::string str){
 23 |         bool next = true;
 24 |         for(int i=0 ; i < str.size(); ){
 25 |             // printf("%c\n",str[i]);
 26 |             auto s = lex.read_next(str[i], next);
 27 |             // std::cout <<"Status: " <<(int) s << std::endl;
 28 |             if(!next){
 29 |                 next = 1;
 30 |             }
 31 |             if(s == state::output){
 32 |                 auto res = lex.get_result();
 33 |                 std::cout <<'#'<< res->get_line() << "\t" <<res->get_pos() << '\t' << res->to_string() << std::endl;
 34 |                 next = false;
 35 |                 continue;
 36 |             }
 37 |             i++; // next为false（刚进行output后则加加）
 38 |         }
 39 |     }
 40 |     // // 带行数输出原始代码——测试buffer
 41 |     // void showInputFile(){
 42 |     //     char c;
 43 |     //     while((c = lex.getNextChar()) != EOF){
 44 |     //         std::cout << c;
 45 |     //     }
 46 |     // }
 47 |     // 测试所有Token单元是否书写正确
 48 |     void showTokens(){
 49 |         token_keyword i(keyword_type::_else, 1,4);
 50 |         token_operator j(string2operator("+"), 2,3);
 51 |         token_identifier k("value", 4, 2);
 52 |         token_number l("123",2,4);
 53 |         std::cout << "\n";
 54 |         std::cout << i.to_string() << "\n";
 55 |         std::cout << j.to_string() << std::endl;
 56 |         std::cout << k.to_string() << std::endl;
 57 |         std::cout << l.to_string() << std::endl;
 58 |     }
 59 |     // 测试运算符解析是否正确的部分
 60 |     void operatorLexer(){
 61 |         std::string buf;
 62 |         bool next = 1;
 63 |         std::string a ;
 64 |         while(std::getline(lex.ifs, a)){
 65 |             a += '\n';
 66 |             // std::cout << a << std::endl;
 67 |             testLexer(a);
 68 |         }
 69 |     }
 70 | }
 71 | // 按行数显示所有的文本内容
 72 | int main(int argc, char* argv[]){
 73 |     lex.if_std_output = true; // 是否在标准输出展示结果
 74 |     // 根据输入数据的个数来判定操作
 75 |     switch (argc)
 76 |     {
 77 |     case 1: // 解析
 78 |         std::cout << "ERROR:缺少参数，请输入需要解析的文件路径！"<< std::endl;
 79 |         break;
 80 |     case 2: // 如果有一个参数，则从文件路径读入，并从标准输出输出结果
 81 |         input.assign(argv[1]);
 82 |         initPath();
 83 |         lex.lexing_file();
 84 |         break;
 85 |     case 3:
 86 |         input.assign(argv[1]);
 87 |         output.assign(argv[2]);
 88 |         initPath();
 89 |         lex.lexing_file();
 90 |         break;
 91 |     default:
 92 |         std::cout << "ERROR:参数过多，请确认是否只有'输入路径'、'输出路径'两个参数。"<< std::endl;
 93 |         break;
 94 |     }
 95 |     // keyword_type key;
 96 |     // key = string2keyword("if");
 97 |     // std::cout << keyword2string(key) << std::endl;
 98 |  
 99 |     // tests::showInputFile();
100 |     // tests::showTokens();
101 |     // tests::operatorLexer();
102 |     // lex.lexing_file(); // 默认按照内部的ifs读取数据
103 | 
104 |     // 最后一总从信息中输出
105 |     // for(auto i : lex.results){
106 |     //     std::cout << '#' << i->get_line() << "\t" << i->get_pos() << "\t" << i->to_string() << std::endl;
107 |     // }
108 | 
109 |     return 0;
110 | }


--------------------------------------------------------------------------------
/documents/CMinus词法和语法规则.md:
--------------------------------------------------------------------------------
  1 | ## C-Minus 的词法规则
  2 | - 关键字： if  else  int  return  void  while
  3 | - 专用符号：  +  -  *  /  <  <=  >  >=  ==  !=  =  ;  ,  (  )  [  ]  {  }  /*  */
  4 | - 其他标记为 ID 和 NUM ，通过下列正则表达式定义：
  5 | ```
  6 | ID = letter letter*
  7 | NUM = digit digit*
  8 | Letter = a|..|z|A|..|Z
  9 | Digit = 0|..|9
 10 | ```
 11 | - 空格由空白、换行符、制表符组成。
 12 | - 注释由 /\*...\*/ 围起来。
 13 | 
 14 | ## C-Minus 的语法规则
 15 | **C-Minus 的 BNF 语法如下：**
 16 | 1. ❤program -> declaration_list
 17 | 2. ❤declaration_list -> declaration_list declaration  |  declaration
 18 | 3. ❤declaration -> var_declaration  |  fun_declaration
 19 | 4. ❤var_declaration -> type_specifier ID  |  type_specifier ID [ NUM ] 
 20 | 5. ❤type_specifier -> INT  |  VOID
 21 | 6. ❤fun_declaration -> type_specifier ID ( params )  compound_stmt
 22 | 7. ❤params -> param_list  |  VOID
 23 | 8. ❤param_list -> param_list , param  |  param
 24 | 9. ❤param -> type_specifier ID  |  type_specifier ID [ ]
 25 | 10. ❤compound_stmt -> { local_declarations statement_list }
 26 | 11. ❤local_declarations -> local_declarations var_declaration  |  empty
 27 | 12. ❤statement_list -> statement_list statement  |  empty
 28 | 13. ❤statement -> expression_stmt  |  compound_stmt  |  selection_stmt  |  iteration_stmt  |  return_stmt
 29 | 14. ❤expression_stmt -> expression ;  |  ;
 30 | 15. ❤selection_stmt -> IF ( expression ) statement  |  IF ( expression ) statement ELSE statement
 31 | 16. ❤iteration_stmt -> WHILE ( expression ) statement
 32 | 17. ❤return_stmt -> RETURN;  |  RETURN expression; 
 33 | 18. ❤expression -> var = expression  |  simple_expression
 34 | 19. ❤var -> ID  |  ID [ expression ]
 35 | 20. ❤simple_expression -> additive_expression relop additive_expression  |  additive_expression
 36 | 21. ❤relop -> LE  |  LT  |  GT  |  GE  |  EQ  |  NE
 37 | 22. ❤additive_expression -> additive_expression addop term  |  term
 38 | 23. ❤addop -> +  |  -
 39 | 24. ❤term -> term mulop factor  |  factor
 40 | 25. ❤mulop -> *  |  /
 41 | 26. ❤factor -> ( expression )  |  var  |  call  |  NUM         
 42 | 27. ❤call -> ID ( args )  
 43 | 28. ❤args -> arg_list  |  empty                               
 44 | 29. ❤arglist -> arg_list , expression  |  expression
 45 | 
 46 | ## EBNF语法如下：
 47 | ```
 48 | program → declaration-list
 49 | 
 50 | declaration-list → declaration-list {declaration}
 51 | 
 52 | declaration → var-declaration|fun-declaration
 53 | 
 54 | var-declaration → type-specifier ID | type-specifier ID[NUM];
 55 | 
 56 | type-specifier → int|void
 57 | 
 58 | fun-declaration → type-specifier ID(params)|compound-stmt
 59 | 
 60 | params → params-list|void
 61 | 
 62 | param-list → param{,param}
 63 | 
 64 | param → type-specifier ID{[]}
 65 | 
 66 | compound-stmt → {local-declarations statement-list}
 67 | 
 68 | local-declarations → empty{var-declaration}
 69 | 
 70 | statement-list → {statement}
 71 | 
 72 | statement → expression-stmt|compound-stmt|selection-stmt|
 73 | 
 74 | iteration-stmt|return-stmt
 75 | 
 76 | expression-stmt → [expression];
 77 | 
 78 | selection-stmt → if(expression) statement [else statement]
 79 | 
 80 | iteration-stmt → while(expression) statement
 81 | 
 82 | return-stmt → return[expression];
 83 | 
 84 | expression → var=expression|simple-expression
 85 | 
 86 | var → ID|ID[expression]
 87 | 
 88 | simple-expression → additive-expression {relop additive-expression}
 89 | 
 90 | relop → <=|<|>|>=|==|!=
 91 | 
 92 | additive-expression → term{addop term}
 93 | 
 94 | addop → +|-
 95 | 
 96 | term → factor{mulop factor}
 97 | 
 98 | mulop → *|/
 99 | 
100 | factor → (expression)|var|call|NUM
101 | 
102 | call → ID(args)                     //done
103 | 
104 | args → arg-list|empty               //done
105 | 
106 | arg-list → expression{, expression} //done
107 | ```


--------------------------------------------------------------------------------
/lexer/src/static/TokenType.cpp:
--------------------------------------------------------------------------------
  1 | #include "TokenType.h"
  2 | #include <unordered_map>
  3 | #include <string>
  4 | const int MAX_TOKEN_SIZE = 256;
  5 | char TokenString[MAX_TOKEN_SIZE + 1];
  6 | int TokenIndex = 0;
  7 | // 定义转换表类型模板
  8 | template<typename _kT, typename _vT> using map_t = std::unordered_map<_kT, _vT>;
  9 | // 定义散列表，便于快速转换
 10 | map_t<std::string, keyword_type> keyword_map = {
 11 |     {"if",keyword_type::_if},
 12 |     {"else", keyword_type::_else},
 13 |     {"int", keyword_type::_int},
 14 |     {"return", keyword_type::_return},
 15 |     {"void", keyword_type::_void},
 16 |     {"while", keyword_type::_while}
 17 | };
 18 | 
 19 | map_t<std::string, operator_type> operator_map = {
 20 |     {"/*", operator_type::_comment},
 21 |     {"+", operator_type::_add},
 22 |     {"-", operator_type::_sub},
 23 |     {"*", operator_type::_mul},
 24 |     {"/", operator_type::_div},
 25 |     {"<", operator_type::_les},
 26 |     {"<=", operator_type::_leq},
 27 |     {">", operator_type::_gre},
 28 |     {">=", operator_type::_geq},
 29 |     {"==", operator_type::_equ},
 30 |     {"!=", operator_type::_neq},
 31 |     {"=", operator_type::_asi},
 32 |     {";", operator_type::_sem},
 33 |     {",", operator_type::_com},
 34 |     {"(", operator_type::_slb},
 35 |     {")", operator_type::_srb},
 36 |     {"[", operator_type::_mlb},
 37 |     {"]", operator_type::_mrb},
 38 |     {"{", operator_type::_llb},
 39 |     {"}", operator_type::_lrb}
 40 | };
 41 | // keyword子类型的转换函数
 42 | keyword_type string2keyword(std::string str){
 43 |     if(keyword_map.count(str)){
 44 |         return keyword_map.at(str);
 45 |     }
 46 |     else{
 47 |         return keyword_type::_null;
 48 |     }
 49 | }
 50 | std::string keyword2string(keyword_type t){
 51 |     switch (t)
 52 |     {
 53 |     case keyword_type::_if:
 54 |         return "if";
 55 |     case keyword_type::_else:
 56 |         return "else";
 57 |     case keyword_type::_int:
 58 |         return "int";
 59 |     case keyword_type::_return:
 60 |         return "return";
 61 |     case keyword_type::_void:
 62 |         return "void";
 63 |     case keyword_type::_while:
 64 |         return "while";
 65 |     default:
 66 |         return "<invalid token>";
 67 |         break;
 68 |     }
 69 | }
 70 | 
 71 | // operator子类型的转换函数
 72 | operator_type string2operator(std::string str){
 73 |     if(operator_map.count(str)){
 74 |         return operator_map.at(str);
 75 |     }
 76 |     else{
 77 |         return operator_type::_null;
 78 |     }
 79 | }
 80 | std::string operator2string(operator_type o){
 81 |     switch (o)
 82 |     {
 83 |     case operator_type::_add:
 84 |         return "+";
 85 |     case operator_type::_sub:
 86 |         return "-";
 87 |     case operator_type::_mul:
 88 |         return "*";
 89 |     case operator_type::_div:
 90 |         return "/";
 91 |     case operator_type::_les:
 92 |         return "<";
 93 |     case operator_type::_leq:
 94 |         return "<=";
 95 |     case operator_type::_gre:
 96 |         return ">";
 97 |     case operator_type::_geq:
 98 |         return ">=";
 99 |     case operator_type::_equ:
100 |         return "==";
101 |     case operator_type::_neq:
102 |         return "!=";
103 |     case operator_type::_asi:
104 |         return "=";
105 |     case operator_type::_sem:
106 |         return ";";
107 |     case operator_type::_com:
108 |         return ",";
109 |     case operator_type::_slb:
110 |         return "(";
111 |     case operator_type::_srb:
112 |         return ")";
113 |     case operator_type::_mlb:
114 |         return "[";
115 |     case operator_type::_mrb:
116 |         return "]";
117 |     case operator_type::_llb:
118 |         return "{";
119 |     case operator_type::_lrb:
120 |         return "}";   
121 |     default:
122 |         return "<invalid token>";
123 |         break;
124 |     }
125 | }
126 | 
127 | 


--------------------------------------------------------------------------------
/parser/src/static/TokenType.cpp:
--------------------------------------------------------------------------------
  1 | #include "TokenType.h"
  2 | #include <unordered_map>
  3 | #include <string>
  4 | const int MAX_TOKEN_SIZE = 256;
  5 | char TokenString[MAX_TOKEN_SIZE + 1];
  6 | int TokenIndex = 0;
  7 | // 定义转换表类型模板
  8 | template<typename _kT, typename _vT> using map_t = std::unordered_map<_kT, _vT>;
  9 | // 定义散列表，便于快速转换
 10 | map_t<std::string, keyword_type> keyword_map = {
 11 |     {"if",keyword_type::_if},
 12 |     {"else", keyword_type::_else},
 13 |     {"int", keyword_type::_int},
 14 |     {"return", keyword_type::_return},
 15 |     {"void", keyword_type::_void},
 16 |     {"while", keyword_type::_while}
 17 | };
 18 | 
 19 | map_t<std::string, operator_type> operator_map = {
 20 |     {"/*", operator_type::_comment},
 21 |     {"+", operator_type::_add},
 22 |     {"-", operator_type::_sub},
 23 |     {"*", operator_type::_mul},
 24 |     {"/", operator_type::_div},
 25 |     {"<", operator_type::_les},
 26 |     {"<=", operator_type::_leq},
 27 |     {">", operator_type::_gre},
 28 |     {">=", operator_type::_geq},
 29 |     {"==", operator_type::_equ},
 30 |     {"!=", operator_type::_neq},
 31 |     {"=", operator_type::_asi},
 32 |     {";", operator_type::_sem},
 33 |     {",", operator_type::_com},
 34 |     {"(", operator_type::_slb},
 35 |     {")", operator_type::_srb},
 36 |     {"[", operator_type::_mlb},
 37 |     {"]", operator_type::_mrb},
 38 |     {"{", operator_type::_llb},
 39 |     {"}", operator_type::_lrb}
 40 | };
 41 | // keyword子类型的转换函数
 42 | keyword_type string2keyword(std::string str){
 43 |     if(keyword_map.count(str)){
 44 |         return keyword_map.at(str);
 45 |     }
 46 |     else{
 47 |         return keyword_type::_null;
 48 |     }
 49 | }
 50 | std::string keyword2string(keyword_type t){
 51 |     switch (t)
 52 |     {
 53 |     case keyword_type::_if:
 54 |         return "if";
 55 |     case keyword_type::_else:
 56 |         return "else";
 57 |     case keyword_type::_int:
 58 |         return "int";
 59 |     case keyword_type::_return:
 60 |         return "return";
 61 |     case keyword_type::_void:
 62 |         return "void";
 63 |     case keyword_type::_while:
 64 |         return "while";
 65 |     default:
 66 |         return "<invalid token>";
 67 |         break;
 68 |     }
 69 | }
 70 | 
 71 | // operator子类型的转换函数
 72 | operator_type string2operator(std::string str){
 73 |     if(operator_map.count(str)){
 74 |         return operator_map.at(str);
 75 |     }
 76 |     else{
 77 |         return operator_type::_null;
 78 |     }
 79 | }
 80 | std::string operator2string(operator_type o){
 81 |     switch (o)
 82 |     {
 83 |     case operator_type::_add:
 84 |         return "+";
 85 |     case operator_type::_sub:
 86 |         return "-";
 87 |     case operator_type::_mul:
 88 |         return "*";
 89 |     case operator_type::_div:
 90 |         return "/";
 91 |     case operator_type::_les:
 92 |         return "<";
 93 |     case operator_type::_leq:
 94 |         return "<=";
 95 |     case operator_type::_gre:
 96 |         return ">";
 97 |     case operator_type::_geq:
 98 |         return ">=";
 99 |     case operator_type::_equ:
100 |         return "==";
101 |     case operator_type::_neq:
102 |         return "!=";
103 |     case operator_type::_asi:
104 |         return "=";
105 |     case operator_type::_sem:
106 |         return ";";
107 |     case operator_type::_com:
108 |         return ",";
109 |     case operator_type::_slb:
110 |         return "(";
111 |     case operator_type::_srb:
112 |         return ")";
113 |     case operator_type::_mlb:
114 |         return "[";
115 |     case operator_type::_mrb:
116 |         return "]";
117 |     case operator_type::_llb:
118 |         return "{";
119 |     case operator_type::_lrb:
120 |         return "}";   
121 |     default:
122 |         return "<invalid token>";
123 |         break;
124 |     }
125 | }
126 | 
127 | 


--------------------------------------------------------------------------------
/lexer/include/TokenType.h:
--------------------------------------------------------------------------------
  1 | # pragma once
  2 | # include<string>
  3 | extern const int MAX_TOKEN_SIZE;
  4 | extern char TokenString[];
  5 | extern int TokenIndex;
  6 | // 定义CMinus所需的4中不同的token类型
  7 | enum class token_type{
  8 |     _null,
  9 |     _keyword,   // 关键字类型
 10 |     _operator,  // 运算符
 11 |     _ID,        // identifier
 12 |     _NUM,       // 数值类型
 13 | };
 14 | 
 15 | // 定义子类型——关键字
 16 | enum class keyword_type{
 17 |     // 字如其名的转义
 18 |     _null, _if, _else, _int, _return, _void, _while, 
 19 | };
 20 | // 子类型——关键字 对应的转换函数
 21 | keyword_type string2keyword(std::string str);
 22 | std::string keyword2string(keyword_type key);
 23 | 
 24 | // 定义子类型——运算符类型
 25 | enum class operator_type{
 26 |     _null,
 27 |     _comment, // /*
 28 |     _add, // + 
 29 |     _sub, // -
 30 |     _mul, // * 
 31 |     _div, // /
 32 |     _les, // <
 33 |     _leq, // <= less equal
 34 |     _gre, // >
 35 |     _geq, // >= greater equal
 36 |     _equ, // == equal
 37 |     _neq, // != not equal
 38 |     _asi, // =
 39 |     _sem, // ; semicolon
 40 |     _com, // ,
 41 |     _slb, // ( small left bracket
 42 |     _srb, // ) smal right bracket
 43 |     _mlb, // [ medium left bracket
 44 |     _mrb, // ] medius rigth bracket
 45 |     _llb, // { large left bracket
 46 |     _lrb, // } large right bracket
 47 | };
 48 | // 定义相应的转换函数
 49 | operator_type string2operator(std::string str);
 50 | std::string operator2string(operator_type);
 51 | 
 52 | // 定义token基本类，并根据行为派生4中不同的token对象
 53 | class token_base{
 54 |     std::size_t _line = 0, _pos = 0;
 55 | public:
 56 |     token_base() = default;
 57 |     token_base(std::size_t l, std::size_t p): _line(l), _pos(p){}
 58 |     virtual  ~token_base() = default;
 59 |     // 返回token类型
 60 |     virtual token_type get_token_type() const{
 61 |         return token_type::_null;
 62 |     }
 63 |     virtual std::string to_string() const = 0;
 64 |     inline std::size_t get_line() const{
 65 |         return _line;
 66 |     }
 67 |     inline std::size_t get_pos() const{
 68 |         return _pos;
 69 |     }
 70 | };
 71 | // keyword类
 72 | class  token_keyword final : public token_base {
 73 |     keyword_type _type;
 74 | public:
 75 |     ~token_keyword() = default;
 76 |     token_keyword(keyword_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 
 77 | 
 78 |     std::string to_string() const override{
 79 |         return "Keyword:      \t" + keyword2string(_type);    
 80 |     }
 81 |     token_type get_token_type() const override{
 82 |         return token_type::_keyword;
 83 |     }
 84 | };
 85 | // 运算符token类
 86 | class  token_operator final : public token_base {
 87 |     operator_type _type;
 88 | public:
 89 |     ~token_operator() = default;
 90 |     token_operator(operator_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 
 91 | 
 92 |     std::string to_string() const override{
 93 |         return "Operator:   \t" + operator2string(_type);    
 94 |     }
 95 |     token_type get_token_type() const override{
 96 |         return token_type::_operator;
 97 |     }
 98 | };
 99 | // 数字token类
100 | class  token_identifier final : public token_base {
101 |     std::string _ID;
102 | public:
103 |     ~token_identifier() = default;
104 |     token_identifier(std::string id, std::size_t l, std::size_t p): token_base(l,p), _ID(id){} 
105 | 
106 |     std::string to_string() const override{
107 |         return "Identifier: \t" + _ID;   
108 |     }
109 |     token_type get_token_type() const override{
110 |         return token_type::_ID;
111 |     }
112 | };
113 | 
114 | class  token_number final : public token_base {
115 |     std::string _number;
116 | public:
117 |     ~token_number() = default;
118 |     token_number(std::string number, std::size_t l, std::size_t p): token_base(l,p), _number(number){} 
119 | 
120 |     std::string to_string() const override{
121 |         return "Number:     \t" + _number;    
122 |     }
123 |     token_type get_token_type() const override{
124 |         return token_type::_NUM;
125 |     }
126 | };
127 | 


--------------------------------------------------------------------------------
/parser/include/parser/parser.h:
--------------------------------------------------------------------------------
  1 | # pragma once
  2 | #include "lexer/Lexer.h"
  3 | #include "TokenType.h"
  4 | #include "parser/abstract_tree.h"
  5 | class TreeNode;
  6 | class cmparser{
  7 |     cmlexer *lexer; // 获取lexer指针，以操作lexer进行语法分析
  8 |     token_base * current_token;
  9 |     size_t idx_token;
 10 |     bool error_state = 0;
 11 |     // 抽象语法树的根节点
 12 |     TreeNode * AST;     
 13 | public:
 14 |     // 获取下一个token 指针
 15 |     token_base * get_next_token();
 16 |     // 获取当前token 指针
 17 |     token_base * get_current_token();
 18 |     // 获取当前token 类型
 19 |     token_type get_current_token_type();
 20 |     // 在确认token类型后，以keyword 获取
 21 |     token_keyword* get_current_keyword();
 22 |     // 在确认token类型后， 以 number获取
 23 |     token_number* get_current_number();
 24 |     // 在确认token类型后， 以运算符获取
 25 |     token_operator* get_current_operator();
 26 |     // 在确认Token类型后， 以标识符获取
 27 |     token_identifier* get_current_id();
 28 |     //发生语法错误的报错，传入的字符串作为报错信息
 29 |     void SyntaxError(std::string str);
 30 |     // Match下一个关键字，如果不匹配则抛异常
 31 |     bool match_keyword(keyword_type t);
 32 |     // Match 下一个运算符，如果和参数列表中的不匹配则抛异常
 33 |     bool match_operator(operator_type t);
 34 | public:
 35 |     cmparser() = default;
 36 |     // 需要传入lexer作为初始化起点
 37 |     cmparser(cmlexer * lex): lexer(lex) {}
 38 |     ~cmparser() = default;
 39 |     void reset_status(){
 40 |         idx_token = 0;
 41 |     }
 42 |     // 运行parser
 43 |     TreeNode * parse();
 44 | private: 
 45 |     // program -> declaration_list
 46 |     TreeNode * program();
 47 | 
 48 |     // declaration_list ->  declaration {declaration}
 49 |     TreeNode * declaration_list();
 50 | 
 51 |     // declaration -> var_declaration  |  fun_declaration
 52 |     // FIRST(var_declaration) ∩ FIRST(fun_declaration) = type_specifier ID
 53 |     TreeNode * declaration();
 54 | 
 55 |     // fun_declaration -> type_specifier ID ( params )  compound_stmt
 56 |     TreeNode * fun_declaration(TreeNode * type_specifier, TreeNode * ID);
 57 | 
 58 |     // params -> paramlist | VOID
 59 |     TreeNode * params();
 60 | 
 61 |     // param_list -> param {, param}
 62 |     TreeNode * param_list(TreeNode * pass_node);
 63 | 
 64 |     // param -> type_specifier ID [ \[\] ] //用转义符表达真实匹配的信息
 65 |     TreeNode * param(TreeNode * pass_node);
 66 | 
 67 |     TreeNode * compound_stmt();
 68 |     TreeNode * local_declarations();
 69 | 
 70 |     // var_declaration -> type_specifier ID; | type_specifier ID [ NUM ] ; 
 71 |     // 有声明列表则返回对应节点，否则返回nullptr
 72 |     // 因为存在“左公共因子” ，所以通过传参数来区别上层传下来的内容
 73 |     TreeNode * var_declaration(TreeNode * type_specifier, TreeNode * ID);
 74 | 
 75 |     // statement_list -> {statement}
 76 |     TreeNode * statement_list();
 77 | 
 78 |     // statement -> expression_stmt  |  compound_stmt  |  selection_stmt  |  iteration_stmt  |  return_stmt
 79 |     TreeNode * statement();
 80 | 
 81 |     //expression_stmt -> expression ; | ;
 82 |     TreeNode* expression_stmt();
 83 |     //selection_stmt -> IF ( expression ) statement | IF ( expression ) statement ELSE statement
 84 |     TreeNode * selection_stmt();
 85 |     
 86 |     // iteration_stmt -> WHILE ( expression ) statement
 87 |     TreeNode * iteration_stmt();
 88 | 
 89 |     // return_stmt -> RETURN;  |  RETURN expression; 
 90 |     TreeNode * return_stmt();
 91 | 
 92 |     // expression -> var = expression  |  simple_expression
 93 |     TreeNode * expression();
 94 |     //simple_expression -> additive_expression [ relop additive_expression ]
 95 |     // relop -> < > <= >= == !=
 96 |     TreeNode * simple_expression(TreeNode * pass_node);
 97 | 
 98 |     // additive_expression -> term {addop term}
 99 |     // addop -> + | -
100 |     TreeNode * additive_expression(TreeNode * pass_node);
101 |     // term -> factor {mulop  factor }
102 |     // mulop -> * | /
103 |     TreeNode * term(TreeNode * pass_node);
104 |     // factor ->
105 |         // 1.(expression)
106 |         // 2.ID (args)       // call
107 |         // 3.ID [expression] // var
108 |         // 4.ID              // var
109 |         // 5.NUM             // NUM
110 |     TreeNode * factor(TreeNode * pass_node);
111 | 
112 |     // var -> ID  |  ID [ expression ]
113 |     TreeNode * var();
114 | 
115 |     //call -> ID ( args )
116 |     TreeNode *call(TreeNode *k);
117 | 
118 |     // args -> empty | expression {, expression} 
119 |     // 此处已经在call中解决了empty的问题
120 |     TreeNode * arg_list();
121 | };
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CMinus 词法分析器和语法分析器
  2 | > 本项目基于C++开发
  3 | - 本仓库为四川大学编译原理课程设计仓库
  4 | - 目标为完成CMinus词法分析器和语法分析器，本质上是C语言的一个子集，功能较为简单，便于本科生快速实现理解原理。其[具体词法语法规则参见此链接](documents/CMinus词法和语法规则.md)
  5 | - 仓库内容按模块分为`lexer`和`parser`的子文件夹，分别实现了**词法分析器**和**语法分析器**对应的功能。
  6 | 
  7 | ## 功能展示
  8 | ### 词法分析器 Lexer
  9 | > 有些教材会称作scanner而非本文用的lexer
 10 | > 
 11 | 在`./lexer`文件夹下运行`make.bat`后，会默认从`./lexer/test_cases`下的`input.c`文件读入信息
 12 | 
 13 | 结果会在内部转化为一个装有所有`token`类型的`vector`，同时会在`./lexer/test_cases/output.txt`中展示每一个词法单元的类型，值，行号和列号。
 14 | 
 15 | - 一个经典的例子：
 16 |   - 输入：
 17 |     ```c {class=line-numbers}
 18 |       /* A program to perform Euclid's
 19 |       Algorithm to compute gcd. */
 20 | 
 21 |       int gcd (int u, int v)
 22 |       { 
 23 |           if (v == 0)
 24 |               return u ;
 25 |           else
 26 |               return gcd(v,u-u/v*v);
 27 |           /* u-u/v*v ==
 28 |           u mod v */
 29 |       }
 30 | 
 31 |       void main(void)
 32 |       {
 33 |           int x; int y;
 34 |           x = input();
 35 |           y = input();
 36 |           output(gcd(x,y));
 37 |       }
 38 |     ```
 39 | 
 40 |   - 输出
 41 |     ```c{class=line-numbers}
 42 |       #4	3	Keyword:      	int
 43 |       #4	7	Identifier: 	gcd
 44 |       #4	9	Operator:   	(
 45 |       #4	12	Keyword:      	int
 46 |       #4	14	Identifier: 	u
 47 |       #4	15	Operator:   	,
 48 |       #4	19	Keyword:      	int
 49 |       #4	21	Identifier: 	v
 50 |       #4	22	Operator:   	)
 51 |       #5	1	Operator:   	{
 52 |       #6	6	Keyword:      	if
 53 |       #6	8	Operator:   	(
 54 |       #6	9	Identifier: 	v
 55 |       #6	12	Operator:   	==
 56 |       #6	14	Number:     	0
 57 |       #6	15	Operator:   	)
 58 |       #7	14	Keyword:      	return
 59 |       #7	16	Identifier: 	u
 60 |       #7	18	Operator:   	;
 61 |       #8	8	Keyword:      	else
 62 |       #9	14	Keyword:      	return
 63 |       #9	18	Identifier: 	gcd
 64 |       #9	19	Operator:   	(
 65 |       #9	20	Identifier: 	v
 66 |       #9	21	Operator:   	,
 67 |       #9	22	Identifier: 	u
 68 |       #9	23	Operator:   	-
 69 |       #9	24	Identifier: 	u
 70 |       #9	25	Operator:   	/
 71 |       #9	26	Identifier: 	v
 72 |       #9	27	Operator:   	*
 73 |       #9	28	Identifier: 	v
 74 |       #9	29	Operator:   	)
 75 |       #9	30	Operator:   	;
 76 |       #12	1	Operator:   	}
 77 |       #14	4	Keyword:      	void
 78 |       #14	9	Identifier: 	main
 79 |       #14	10	Operator:   	(
 80 |       #14	14	Keyword:      	void
 81 |       #14	15	Operator:   	)
 82 |       #15	1	Operator:   	{
 83 |       #16	7	Keyword:      	int
 84 |       #16	9	Identifier: 	x
 85 |       #16	10	Operator:   	;
 86 |       #16	14	Keyword:      	int
 87 |       #16	16	Identifier: 	y
 88 |       #16	17	Operator:   	;
 89 |       #17	5	Identifier: 	x
 90 |       #17	7	Operator:   	=
 91 |       #17	13	Identifier: 	input
 92 |       #17	14	Operator:   	(
 93 |       #17	15	Operator:   	)
 94 |       #17	16	Operator:   	;
 95 |       #18	5	Identifier: 	y
 96 |       #18	7	Operator:   	=
 97 |       #18	13	Identifier: 	input
 98 |       #18	14	Operator:   	(
 99 |       #18	15	Operator:   	)
100 |       #18	16	Operator:   	;
101 |       #19	10	Identifier: 	output
102 |       #19	11	Operator:   	(
103 |       #19	14	Identifier: 	gcd
104 |       #19	15	Operator:   	(
105 |       #19	16	Identifier: 	x
106 |       #19	17	Operator:   	,
107 |       #19	18	Identifier: 	y
108 |       #19	19	Operator:   	)
109 |       #19	20	Operator:   	)
110 |       #19	21	Operator:   	;
111 |       #20	1	Operator:   	}
112 |     ```
113 | 
114 |     每一个`Token`和源代码中的内容一一对应。
115 | - 此lexer具有一些基本的报错能力：
116 |   ![](images/lexer报错显示.png)
117 | 
118 | ### 语法分析器 Parser
119 | 对于语法分析器，则可以在`parser`文件夹下运行对应的`make.bat`来将`./parser/test_cases`下的`input.c`文件的内容转化为语法树，并以缩进和颜色的方式输出对应的语法树，同样对于上面的例子，有如下结果：
120 | 
121 |   ![](images/parser结果预览.png)
122 | 
123 | 本质上`parser`的成员中含有一个`lexer`的指针，使用了`lexer`的`get_next_token()`成员函数作为输入逐一分析。
124 | 
125 |   ![](images/parser中的lexer指针.png)
126 | ## CMAKE编译指令
127 | 本项目采用`CMake`构建，是C/C++工程开发目前最常见的集成工具之一，如果你不熟悉CMAKE，可以参考如下两个仓库学习如何使用CMAKE编译C/C++工程文件：
128 | 
129 | [![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=SFUMECJF&repo=cmake-examples-Chinese)](https://github.com/SFUMECJF/cmake-examples-Chinese)
130 | 
131 | [![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=ttroy50&repo=cmake-examples)](https://github.com/ttroy50/cmake-examples)
132 | 
133 | > 跟着上面的教程走一遍，大概1个多小时能掌握基本的使用方法。而事实上，如果你不想这么麻烦，大致把头文件`*.h`和`*.cpp`文件适当组装到一个文件中也是能正常运行的，不过不便于debug和寻找模块对应代码。
134 | 
135 | --------
136 | 掌握CMAKE之后（或者只打算跑一下demo看看结果），请将路径切换到对应文件夹模块下来编译源代码：
137 | ### 直接编译方式：
138 | 在目录下提供了`make.bat`文件，在`Windows`环境下，安装过CMAKE之后，可以通过在命令行中批处理来自动化完成编译运行过程。
139 | ```powershell
140 | ./make.bat
141 | ```
142 | ### 手动编译方式
143 | - 在build路径中执行如下指令以获得`MinGW Makefiles`(我这里使用minGW编译，如果你有其他的编译工具，请自行选择）：
144 | ```
145 | cmake .. -G 'MinGW Makefiles'
146 | ```
147 | - 随后在这个文件夹下运行·MinGW·即可开始编译：
148 | ```
149 | mingw32-make.exe
150 | ``` 
151 | - 在**命令行中运行**根目录下得到的`*.exe`文件即可。
152 | ```powershell
153 | ./CMinusLexer_binary.exe
154 | ```
155 | 
156 | --------
157 | 
158 | 祝大家学有所成， 欢迎反馈bug等信息~ :)


--------------------------------------------------------------------------------
/parser/include/TokenType.h:
--------------------------------------------------------------------------------
  1 | # pragma once
  2 | # include<string>
  3 | extern const int MAX_TOKEN_SIZE;
  4 | extern char TokenString[];
  5 | extern int TokenIndex;
  6 | /* 定义CMinus所需的4中不同的token类型
  7 | _keyword
  8 | _operator
  9 | _ID
 10 | _NUM
 11 | */
 12 | enum class token_type{
 13 |     _null,
 14 |     _keyword,   // 关键字类型
 15 |     _operator,  // 运算符
 16 |     _ID,        // identifier
 17 |     _NUM,       // 数值类型
 18 | };
 19 | 
 20 | /* 定义子类型——关键字
 21 | _null, _if, _else, _int, _return, _void, _while
 22 | */
 23 | enum class keyword_type{
 24 |     // 字如其名的转义
 25 |     _null, _if, _else, _int, _return, _void, _while, 
 26 | };
 27 | // 子类型——关键字 对应的转换函数
 28 | keyword_type string2keyword(std::string str);
 29 | std::string keyword2string(keyword_type key);
 30 | 
 31 | // 定义子类型——运算符类型
 32 | //     _null,
 33 | //     _comment, // /*
 34 | //     _add, // + 
 35 | //     _sub, // -
 36 | //     _mul, // * 
 37 | //     _div, // /
 38 | //     _les, // <
 39 | //     _leq, // <= less equal
 40 | //     _gre, // >
 41 | //     _geq, // >= greater equal
 42 | //     _equ, // == equal
 43 | //     _neq, // != not equal
 44 | //     _asi, // =
 45 | //     _sem, // ; semicolon
 46 | //     _com, // ,
 47 | //     _slb, // ( small left bracket
 48 | //     _srb, // ) smal right bracket
 49 | //     _mlb, // [ medium left bracket
 50 | //     _mrb, // ] medius rigth bracket
 51 | //     _llb, // { large left bracket
 52 | //     _lrb, // } large right bracket
 53 | enum class operator_type{
 54 |     _null,
 55 |     _comment, // /*
 56 |     _add, // + 
 57 |     _sub, // -
 58 |     _mul, // * 
 59 |     _div, // /
 60 |     _les, // <
 61 |     _leq, // <= less equal
 62 |     _gre, // >
 63 |     _geq, // >= greater equal
 64 |     _equ, // == equal
 65 |     _neq, // != not equal
 66 |     _asi, // =
 67 |     _sem, // ; semicolon
 68 |     _com, // ,
 69 |     _slb, // ( small left bracket
 70 |     _srb, // ) smal right bracket
 71 |     _mlb, // [ medium left bracket
 72 |     _mrb, // ] medius rigth bracket
 73 |     _llb, // { large left bracket
 74 |     _lrb, // } large right bracket
 75 | };
 76 | // 定义相应的转换函数
 77 | operator_type string2operator(std::string str);
 78 | std::string operator2string(operator_type);
 79 | 
 80 | // 定义token基本类，并根据行为派生4中不同的token对象
 81 | class token_base{
 82 |     std::size_t _line = 0, _pos = 0;
 83 | public:
 84 |     token_base() = default;
 85 |     token_base(std::size_t l, std::size_t p): _line(l), _pos(p){}
 86 |     virtual  ~token_base() = default;
 87 |     // 返回token类型
 88 |     virtual token_type get_token_type() const{
 89 |         return token_type::_null;
 90 |     }
 91 |     virtual std::string to_string() const = 0;
 92 |     inline std::size_t get_line() const{
 93 |         return _line;
 94 |     }
 95 |     inline std::size_t get_pos() const{
 96 |         return _pos;
 97 |     }
 98 | };
 99 | // keyword类
100 | class  token_keyword final : public token_base {
101 |     keyword_type _type;
102 | public:
103 |     ~token_keyword() = default;
104 |     token_keyword(keyword_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 
105 | 
106 |     std::string to_string() const override{
107 |         return "Keyword:      \t" + keyword2string(_type);
108 |     }
109 |     // 返回keyword类型
110 |     keyword_type get_keyword_type() const{
111 |         return _type;
112 |     }
113 |     // 返回token类型（keyword）
114 |     token_type get_token_type() const override{
115 |         return token_type::_keyword;
116 |     }
117 | };
118 | // 运算符token类
119 | class  token_operator final : public token_base {
120 |     operator_type _type;
121 | public:
122 |     ~token_operator() = default;
123 |     token_operator(operator_type t, std::size_t l, std::size_t p): token_base(l,p), _type(t){} 
124 |     operator_type get_operator_type(){
125 |         return _type;
126 |     }
127 |     std::string to_string() const override{
128 |         return "Operator:   \t" + operator2string(_type);    
129 |     }
130 |     token_type get_token_type() const override{
131 |         return token_type::_operator;
132 |     }
133 | };
134 | // 数字token类
135 | class  token_identifier final : public token_base {
136 |     std::string _ID;
137 | public:
138 |     ~token_identifier() = default;
139 |     token_identifier(std::string id, std::size_t l, std::size_t p): token_base(l,p), _ID(id){} 
140 |     std::string get_ID() const{
141 |         return _ID;
142 |     }
143 |     std::string to_string() const override{
144 |         return "Identifier: \t" + _ID;   
145 |     }
146 |     token_type get_token_type() const override{
147 |         return token_type::_ID;
148 |     }
149 | };
150 | 
151 | class  token_number final : public token_base {
152 |     std::string _number;
153 | public:
154 |     ~token_number() = default;
155 |     token_number(std::string number, std::size_t l, std::size_t p): token_base(l,p), _number(number){} 
156 |     std::string get_number() const {
157 |         return _number;
158 |     }
159 |     std::string to_string() const override{
160 |         return "Number:     \t" + _number;    
161 |     }
162 |     token_type get_token_type() const override{
163 |         return token_type::_NUM;
164 |     }
165 | };
166 | 


--------------------------------------------------------------------------------
/lexer/src/static/Lexer.cpp:
--------------------------------------------------------------------------------
  1 | #include "Lexer.h"
  2 | 
  3 | std::set<char> oper_start = {
  4 |     '+', '-', '*', '/', '<', '>', '=', '!', '=',';' , ',', '(', ')', '[', ']', '{', '}'
  5 | };
  6 | bool isoperator(char c){
  7 |     return oper_start.count(c);
  8 | }
  9 | std::unordered_map<state, std::string> error_map = {
 10 |     {state::unexpected_char, "未知的字符！"},
 11 |     {state::undefined_operator, "无法识别的运算符!"},
 12 |     {state::unexpected_state, "状态机异常，请联系作者debug！"}
 13 | };
 14 | std::string get_error_str(state s){
 15 |     if(error_map.count(s)){
 16 |         return error_map.at(s);
 17 |     }else{
 18 |         return "<invalid error>";
 19 |     }
 20 | }
 21 | 
 22 | // 设置输入输出的路径
 23 | void cmlexer::setPath(std::string i, std::string o){
 24 |     ifs.open(i);
 25 |     if(o != ""){
 26 |         output_redirect = true;
 27 |         ofs.open(o);
 28 |         std::cout.rdbuf(ofs.rdbuf());
 29 |     }
 30 | }
 31 | 
 32 | state cmlexer::read_next(char c, bool next){
 33 |     if(next){   //后移一位
 34 |         linepos++;  // 加位置
 35 |     }
 36 |     switch (_s){ // todo :  innumb
 37 |     case state::start :{
 38 |         if(c == '\0') {
 39 |             return _s;
 40 |         }
 41 |         else if(c == '\n'){
 42 |             linepos = 0;
 43 |             lineno++;   // 加行数
 44 |             return _s;
 45 |         }
 46 |         else if(isspace(c)){
 47 |             return _s;
 48 |         }
 49 |         else if(isdigit(c)){
 50 |             buffer += c;
 51 |             return _s = state::in_numb;
 52 |         }
 53 |         else if(isoperator(c)){
 54 |             buffer += c;
 55 |             return _s = state::in_oper;
 56 |         }
 57 |         else if(isalpha(c)){
 58 |             buffer += c;
 59 |             return _s = state::in_iden;
 60 |         }else if (c == EOF){
 61 |             return _s = state::output;
 62 |         }else{
 63 |             return _s = state::unexpected_char;
 64 |         }
 65 |     }
 66 |     case state::in_oper :{
 67 |         if(!isoperator(c)){ // 新读进来的不是运算符
 68 |             auto oper = string2operator(buffer);
 69 |             buffer.clear();
 70 |             if(oper == operator_type::_null){ // 不存在
 71 |                 return _s = state::undefined_operator;
 72 |             }           
 73 |             else if(oper == operator_type::_comment){ // 如果是注释则进入注释状态
 74 |                 return _s = state::in_comm;
 75 |             }else{  // 其他状态则正常输出
 76 |                 results.emplace_back(new token_operator(oper, lineno, linepos - 1));
 77 |                 return _s = state::output;
 78 |             }
 79 |         }else{  // 新读进来的也是运算符 【x=-1】【y<=1】
 80 |             auto oper = string2operator(buffer);
 81 |             if(oper != operator_type::_null && string2operator(buffer + c) == operator_type::_null){
 82 |                 buffer.clear();
 83 |                 results.emplace_back(new token_operator(oper, lineno, linepos - 1));
 84 |                 return _s = state::output;
 85 |             }else if (oper != operator_type::_null && string2operator( buffer + c) != operator_type::_null){
 86 |                 if(operator_type::_comment == (string2operator(buffer + c))){
 87 |                     buffer.clear();
 88 |                     return _s = state::in_comm;
 89 |                 }
 90 |             }
 91 |             buffer += c;
 92 |             return _s;      // 因为下一个还是运算符，保持运算符不变
 93 |         }
 94 | 
 95 |     }
 96 |     case state::in_comm:{   
 97 |         if(c == '\n'){
 98 |             linepos = 0;
 99 |             lineno++;
100 |             return _s;
101 |         }else if(c == '*'){
102 |             return _s = state::ex_comm;
103 |         }else{
104 |             return _s = state::in_comm;
105 |         }
106 |     }
107 |     case state::ex_comm:{
108 |         if(c == '/'){
109 |             return _s = state::start;
110 |         }else if (c == '\n'){
111 |             linepos = 0;
112 |             lineno++;
113 |         }
114 |         return _s = state::in_comm;
115 |     }
116 |     case state::in_iden:{
117 |         if(isalpha(c)){
118 |             buffer += c;
119 |             return _s;
120 |         }else{ // 这里需要先检测是否是关键字
121 |             auto key_type = string2keyword(buffer);
122 |             if(key_type == keyword_type::_null){
123 |                 // 说明不是关键字，按照标识符处理     
124 |                 results.emplace_back(new token_identifier(buffer, lineno, linepos - 1));
125 |             }
126 |             else{
127 |                 // 按照关键字处理
128 |                 results.emplace_back(new token_keyword(key_type, lineno, linepos - 1));
129 |             }
130 |             buffer.clear();
131 |             return _s = state::output;
132 |           
133 |         }
134 |     }
135 |     case state::in_numb:{
136 |         if(isdigit(c)){
137 |             buffer += c;
138 |             return _s;
139 |         }else{
140 |             results.emplace_back(new token_number(buffer, lineno, linepos - 1));
141 |             buffer.clear();
142 |             return _s = state::output;
143 |         }
144 |     }
145 |     default:
146 |         return _s = state::unexpected_state;
147 |     }
148 | }
149 | token_base * cmlexer::get_next_token(){
150 |     return get_next_token(ifs);
151 | }
152 | token_base * cmlexer::get_next_token(std::ifstream &local_ifs){
153 |     do{
154 |         if(is_eof){
155 |             return nullptr;
156 |         }else{
157 |             if(line_idx >= line_buff.size()){
158 |                 if(!std::getline(local_ifs, line_buff)){
159 |                     is_eof = true;
160 |                 }
161 |                 line_idx = 0;
162 |                 line_buff += "\n";
163 |                 // std::cout << line_buff;
164 |             }
165 |             read_next(line_buff[line_idx], next);
166 |             if(!next){
167 |                 next = 1;
168 |             }
169 |             if(error_state()){
170 |                 std::cout << "ERROR :" << get_error_str(_s) << std::endl;
171 |                 for (char &ch : line_buff) if (ch == '\t') ch = ' ';
172 | 				std::cout << ' ' << line_buff << std::flush;
173 | 				std::cout << ' ' << std::string(get_pos() - 1, ' ') << "^" << std::endl;
174 |                 std::cout << " " << "In the line " << get_lineno() << ", position " << get_pos() << std::endl << std::endl; reset_status();
175 |                 return nullptr;
176 |             }
177 |             if(_s == state::output){
178 |                 next = 0;
179 |                 return this->get_result();
180 |             }
181 |             line_idx++;
182 |         }
183 |     }while(1);
184 | }
185 | 
186 | void cmlexer::lexing_file(){
187 |     lexing_file(ifs);
188 | }
189 | 
190 | void cmlexer::lexing_file(std::ifstream &local_ifs){
191 |     token_base* res;
192 |     while((res = get_next_token()) != nullptr){
193 |         if(if_std_output)
194 |             std::cout <<'#'<< res->get_line() << "\t" <<res->get_pos() << '\t' << res->to_string() << std::endl;
195 |     }
196 | }


--------------------------------------------------------------------------------
/parser/src/static/lexer/Lexer.cpp:
--------------------------------------------------------------------------------
  1 | #include "lexer/Lexer.h"
  2 | 
  3 | std::set<char> oper_start = {
  4 |     '+', '-', '*', '/', '<', '>', '=', '!', '=',';' , ',', '(', ')', '[', ']', '{', '}'
  5 | };
  6 | bool isoperator(char c){
  7 |     return oper_start.count(c);
  8 | }
  9 | std::unordered_map<state, std::string> error_map = {
 10 |     {state::unexpected_char, "未知的字符！"},
 11 |     {state::undefined_operator, "无法识别的运算符!"},
 12 |     {state::unexpected_state, "状态机异常，请联系作者debug！"}
 13 | };
 14 | std::string get_error_str(state s){
 15 |     if(error_map.count(s)){
 16 |         return error_map.at(s);
 17 |     }else{
 18 |         return "<invalid error>";
 19 |     }
 20 | }
 21 | 
 22 | // 设置输入输出的路径
 23 | void cmlexer::setPath(std::string i, std::string o){
 24 |     ifs.open(i);
 25 |     if(o != ""){
 26 |         output_redirect = true;
 27 |         ofs.open(o);
 28 |         std::cout.rdbuf(ofs.rdbuf());
 29 |     }
 30 | }
 31 | 
 32 | state cmlexer::read_next(char c, bool next){
 33 |     if(next){   //后移一位
 34 |         linepos++;  // 加位置
 35 |     }
 36 |     switch (_s){ // todo :  innumb
 37 |     case state::start :{
 38 |         if(c == '\0') {
 39 |             return _s;
 40 |         }
 41 |         else if(c == '\n'){
 42 |             linepos = 0;
 43 |             lineno++;   // 加行数
 44 |             return _s;
 45 |         }
 46 |         else if(isspace(c)){
 47 |             return _s;
 48 |         }
 49 |         else if(isdigit(c)){
 50 |             buffer += c;
 51 |             return _s = state::in_numb;
 52 |         }
 53 |         else if(isoperator(c)){
 54 |             buffer += c;
 55 |             return _s = state::in_oper;
 56 |         }
 57 |         else if(isalpha(c)){
 58 |             buffer += c;
 59 |             return _s = state::in_iden;
 60 |         }else if (c == EOF){
 61 |             return _s = state::output;
 62 |         }else{
 63 |             return _s = state::unexpected_char;
 64 |         }
 65 |     }
 66 |     case state::in_oper :{
 67 |         if(!isoperator(c)){ // 新读进来的不是运算符
 68 |             auto oper = string2operator(buffer);
 69 |             buffer.clear();
 70 |             if(oper == operator_type::_null){ // 不存在
 71 |                 return _s = state::undefined_operator;
 72 |             }           
 73 |             else if(oper == operator_type::_comment){ // 如果是注释则进入注释状态
 74 |                 return _s = state::in_comm;
 75 |             }else{  // 其他状态则正常输出
 76 |                 results.emplace_back(new token_operator(oper, lineno, linepos - 1));
 77 |                 return _s = state::output;
 78 |             }
 79 |         }else{  // 新读进来的也是运算符 【x=-1】【y<=1】
 80 |             auto oper = string2operator(buffer);
 81 |             if(oper != operator_type::_null && string2operator(buffer + c) == operator_type::_null){
 82 |                 buffer.clear();
 83 |                 results.emplace_back(new token_operator(oper, lineno, linepos - 1));
 84 |                 return _s = state::output;
 85 |             }else if (oper != operator_type::_null && string2operator( buffer + c) != operator_type::_null){
 86 |                 if(operator_type::_comment == (string2operator(buffer + c))){
 87 |                     buffer.clear();
 88 |                     return _s = state::in_comm;
 89 |                 }
 90 |             }
 91 |             buffer += c;
 92 |             return _s;      // 因为下一个还是运算符，保持运算符不变
 93 |         }
 94 | 
 95 |     }
 96 |     case state::in_comm:{   
 97 |         if(c == '\n'){
 98 |             linepos = 0;
 99 |             lineno++;
100 |             return _s;
101 |         }else if(c == '*'){
102 |             return _s = state::ex_comm;
103 |         }else{
104 |             return _s = state::in_comm;
105 |         }
106 |     }
107 |     case state::ex_comm:{
108 |         if(c == '/'){
109 |             return _s = state::start;
110 |         }else if (c == '\n'){
111 |             linepos = 0;
112 |             lineno++;
113 |         }
114 |         return _s = state::in_comm;
115 |     }
116 |     case state::in_iden:{
117 |         if(isalpha(c)){
118 |             buffer += c;
119 |             return _s;
120 |         }else{ // 这里需要先检测是否是关键字
121 |             auto key_type = string2keyword(buffer);
122 |             if(key_type == keyword_type::_null){
123 |                 // 说明不是关键字，按照标识符处理     
124 |                 results.emplace_back(new token_identifier(buffer, lineno, linepos - 1));
125 |             }
126 |             else{
127 |                 // 按照关键字处理
128 |                 results.emplace_back(new token_keyword(key_type, lineno, linepos - 1));
129 |             }
130 |             buffer.clear();
131 |             return _s = state::output;
132 |           
133 |         }
134 |     }
135 |     case state::in_numb:{
136 |         if(isdigit(c)){
137 |             buffer += c;
138 |             return _s;
139 |         }else{
140 |             results.emplace_back(new token_number(buffer, lineno, linepos - 1));
141 |             buffer.clear();
142 |             return _s = state::output;
143 |         }
144 |     }
145 |     default:
146 |         return _s = state::unexpected_state;
147 |     }
148 | }
149 | token_base * cmlexer::get_next_token(){
150 |     return get_next_token(ifs);
151 | }
152 | token_base * cmlexer::get_next_token(std::ifstream &local_ifs){
153 |     do{
154 |         if(is_eof){
155 |             return nullptr;
156 |         }else{
157 |             if(line_idx >= line_buff.size()){
158 |                 if(!std::getline(local_ifs, line_buff)){
159 |                     is_eof = true;
160 |                 }
161 |                 line_idx = 0;
162 |                 line_buff += "\n";
163 |                 // std::cout << line_buff;
164 |             }
165 |             read_next(line_buff[line_idx], next);
166 |             if(!next){
167 |                 next = 1;
168 |             }
169 |             if(error_state()){
170 |                 std::cout << "ERROR :" << get_error_str(_s) << std::endl;
171 |                 for (char &ch : line_buff) if (ch == '\t') ch = ' ';
172 | 				std::cout << ' ' << line_buff << std::flush;
173 | 				std::cout << ' ' << std::string(get_pos() - 1, ' ') << "^" << std::endl;
174 |                 std::cout << " " << "In the line " << get_lineno() << ", position " << get_pos() << std::endl << std::endl; reset_status();
175 |                 return nullptr;
176 |             }
177 |             if(_s == state::output){
178 |                 next = 0;
179 |                 return this->get_result();
180 |             }
181 |             line_idx++;
182 |         }
183 |     }while(1);
184 | }
185 | 
186 | void cmlexer::lexing_file(){
187 |     lexing_file(ifs);
188 | }
189 | 
190 | void cmlexer::lexing_file(std::ifstream &local_ifs){
191 |     token_base* res;
192 |     while((res = get_next_token()) != nullptr){
193 |         if(if_std_output)
194 |             std::cout <<'#'<< res->get_line() << "\t" <<res->get_pos() << '\t' << res->to_string() << std::endl;
195 |     }
196 | }


--------------------------------------------------------------------------------
/parser/include/parser/abstract_tree.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "TokenType.h"
  3 | #include <iostream>
  4 | #include <cstring>
  5 | class cmparser;
  6 | // 单个节点的最大子节点个数
  7 | const int MAXCHILDREN = 4;
  8 | // 区分节点类型的枚举类型
  9 | enum class Node_type{
 10 |     _expK, // 表达式类型
 11 |     _stmtK, // 语句类型
 12 | };
 13 | // 语句类型的多种节点
 14 | enum class StmtKind{
 15 |     // 注释中为“语法规则有，但是没有实现”的非终结符：
 16 |     _program,       // 程序入口
 17 |     _declaration_list, // 声明列表
 18 |     _declaration,   // 一次声明
 19 |     _var_declaration,// 变量声明
 20 |         // type_specifier       - 只有int和void 
 21 |     _fun_declaration, // 函数声明
 22 |         // params               - 只有param_list和void，直接用param代替
 23 |         // param_list           - 用多个param 的 sibiling代替
 24 |     _param,        // 由param组成params
 25 |     _param_array,  // 数组形参 - 是原来的表达式中没有的
 26 |     _compound_stmt, // {local_declarations statement_list}
 27 |         // local_declarations   - 用多个var_declaration或者nullptr代替
 28 |         // statement_list       - 用多个不同种类的statement直接代替
 29 |     _expression_stmt, //  表达式语句
 30 |     _selection_stmt,// 分支语句
 31 |     _iteration_stmt,// 循环语句
 32 |     _return_stmt,   // 返回语句
 33 | };
 34 | // 表达式类型节点
 35 | enum class ExpKind{
 36 |     _void,       // Void节点
 37 |     _int,        // int节点
 38 |     _assign,     // 赋值语句
 39 |     _term,       // 乘法项
 40 |     _factor,     // 因子
 41 |     _var,        // 变量
 42 |     _array_var,  // 数组变量
 43 |     _call,       // 函数调用
 44 |     _args,      // 函数形参列表
 45 |     _num,       //只含有数值的节点
 46 |     _id,        // 只含有ID的节点
 47 |     _opK,       // 二元运算符节点
 48 |     _empty,
 49 | };
 50 | //抽象语法树的节点类
 51 | class TreeNode{
 52 | public:
 53 |     TreeNode() = default;
 54 |     ~TreeNode() = default;
 55 |     // 确定是什么类型，表达式 or stmt？
 56 |     Node_type _type;
 57 |     TreeNode * child[MAXCHILDREN];
 58 |     TreeNode * sibling;
 59 |     // 具体到节点内的类型、什么表达式或什么stmt
 60 |     union {
 61 |         ExpKind exp;
 62 |         StmtKind stmt;
 63 |     } kind;
 64 |     // 如果是Exp
 65 |     union {
 66 |         operator_type op;
 67 |         keyword_type keyword;
 68 |         int num;
 69 |         char * id;
 70 |     } attr;
 71 |     // 用于保存当前节点的行号
 72 |     int lineno = 0;
 73 |     // 生成一个表达式节点并返回
 74 |     static TreeNode * newExpNode(ExpKind kind, int line){
 75 |         TreeNode * p = new TreeNode();
 76 |         p->_type = Node_type::_expK;
 77 |         for(auto &i : p->child) i = nullptr;
 78 |         p->sibling = nullptr;
 79 |         p->kind.exp = kind;
 80 |         p->lineno = line;
 81 |         return p;
 82 |     }
 83 |     // 生成一个语句节点并返回
 84 |     static TreeNode * newStmtNode(StmtKind kind, int line){
 85 |         TreeNode * p = new TreeNode();
 86 |         p->_type = Node_type::_stmtK;
 87 |         for(auto &i : p->child) i = nullptr;
 88 |         p->sibling = nullptr;
 89 |         p->kind.stmt = kind;
 90 |         p->lineno = line;
 91 |         return p;
 92 |     }
 93 |     // 设置运算符表达式中的运算符类型
 94 |     void set_operator(operator_type oper){
 95 |         if(_type != Node_type::_expK){
 96 |             throw "Error, 尝试对于非表达式节点赋值！";
 97 |         }
 98 |         kind.exp = ExpKind::_opK;
 99 |         attr.op = oper;
100 |     }
101 | 
102 |     //keyword不需要单独存储，从表达式类型即可反向构建
103 | 
104 |     //设置当前树节点 数值 并设置节点类型
105 |     void set_number(std::string num_string){
106 |         if(_type != Node_type::_expK){
107 |             throw "Error, 尝试对于非表达式节点赋值！";
108 |         }
109 |         kind.exp = ExpKind::_num;
110 |         attr.num = atoi(num_string.c_str());
111 |     }
112 |     
113 |     // 设置当前节点的 字符值,并设置节点类型
114 |     void set_id(std::string id_string){
115 |         if(_type != Node_type::_expK){
116 |             throw "Error, 尝试对于非表达式节点赋值！";
117 |         }
118 |         kind.exp = ExpKind::_id;
119 |         attr.id = (char*) malloc(strlen(id_string.c_str()) + 1);
120 |         strcpy(attr.id, id_string.c_str());
121 |     }
122 |     void to_string(){
123 |         switch (_type)
124 |         {
125 |         case Node_type::_stmtK:
126 |             std:: cout << "\033[1;34;40m<";
127 |             break;
128 |         case Node_type::_expK:
129 |             std:: cout << "\033[1;35;40m<";
130 |             break;
131 |         }
132 | 
133 |         switch (_type)
134 |         {
135 |         case Node_type::_stmtK:
136 |             switch (kind.stmt)
137 |             {
138 |             case StmtKind::_program:
139 |                 std::cout << "Program" ;
140 |                 break;
141 |             case StmtKind::_declaration_list:
142 |                 std::cout << "Declaration_list" ;
143 |             break;
144 |             case StmtKind::_declaration:
145 |                 std::cout << "Declaration" ;
146 |             break;
147 |             case StmtKind::_var_declaration:
148 |                 std::cout << "Var_declaration" ;
149 |             break;
150 |             case StmtKind::_fun_declaration:
151 |                 std::cout << "Fun_declaration";
152 |             break;
153 |             case StmtKind::_param:
154 |                 std::cout << "Param" ;
155 |             break;
156 |             case StmtKind::_param_array:
157 |                 std::cout << "Param_array";
158 |             break;
159 |             case StmtKind::_compound_stmt:
160 |                 std::cout << "Compound_stmt";
161 |             break;
162 |             case StmtKind::_expression_stmt:
163 |                 std::cout << "Expression_stmt" ;
164 |             case StmtKind::_selection_stmt:
165 |                 std::cout << "Selection_stmt" ;
166 |             break;
167 |             case StmtKind::_iteration_stmt:
168 |                 std::cout << "Iteration_stmt" ;
169 |             break;
170 |             case StmtKind::_return_stmt:
171 |                 std::cout << "Return_stmt";
172 |             break;
173 |             default:
174 |                 break;
175 |             }
176 |             break;
177 |         
178 |         case Node_type::_expK:
179 |             switch (kind.exp)
180 |             {
181 |             case ExpKind::_void:
182 |                 std::cout << "Type_identifier : VOID";
183 |             break;
184 |             case ExpKind::_int:
185 |                 std::cout << "Type_identifier : INT";
186 |             break;
187 |             case ExpKind::_assign:
188 |                 std::cout << "Assign: = ";
189 |             break;
190 |             case ExpKind::_var:
191 |                 std::cout << "Var";
192 |             break;
193 | 
194 |             case ExpKind::_array_var:
195 |                 std::cout << "Array_var : " << attr.id << "[]";
196 |             break;
197 |             case ExpKind::_call:
198 |                 std::cout << "Call";
199 |             break;
200 |             case ExpKind::_args:
201 |                 if(attr.op == operator_type::_com){
202 |                     // 后续的形参列表需要标注 “逗号”
203 |                     std::cout <<"Args: "<< operator2string(attr.op);
204 |                 }
205 |                 else{
206 |                     // 第一个形参则直接输出即可
207 |                     std::cout<< "Args";
208 |                 }
209 | 
210 |             break;
211 |             case ExpKind::_num:
212 |                 std::cout << "NUM : " << attr.num;
213 |             break;
214 |             case ExpKind::_id:
215 |                 std::cout <<"Identifier : " <<attr.id;
216 |             break;
217 |             case ExpKind::_opK: // 中央节点存运算符，左右节点存下一部运算
218 |                 std::cout <<"Operator : " << operator2string(attr.op);
219 |             break;
220 |             case ExpKind::_empty:
221 |                 std::cout << "Empty";
222 |             break;
223 |             default:
224 |             break;
225 |             }
226 |         break;
227 |         default:
228 |             break;
229 |         }
230 |         std::cout <<">\033[0m (" <<lineno  << ")" << std::endl;
231 |     }
232 |     void debug_dfs(int layer){
233 |         for(int i = 0 ; i < MAXCHILDREN ; i++){
234 |             if(ExpKind::_args != kind.exp){
235 |                 if(i == 1){
236 |                     std::cout  << "└";
237 |                     for(int i = 0 ; i < layer ; i++){
238 |                         std::cout << "-";
239 |                     }
240 |                     to_string();
241 |                     std::cout << std::endl;
242 |                 }
243 |             }
244 |            
245 |             if(child[i] != nullptr){
246 |                 child[i]->dfs(layer + 1);
247 |             }
248 |         }
249 |         if(sibling != nullptr){
250 |             std::cout << "--sibling start--" << std::endl;
251 |             sibling->dfs(layer);
252 |             std::cout << "--sibling   end--" << std::endl;
253 |         }
254 |     }
255 |     void dfs(int layer){
256 |         std::cout  << "";
257 |         for(int i = 0 ; i < layer ; i++){
258 |             std::cout << " ";
259 |         }
260 |         to_string();
261 |         for(int i = 0 ; i < MAXCHILDREN ; i++){
262 |             if(child[i] != nullptr){
263 |                 child[i]->dfs(layer + 1);
264 |             }
265 |         }
266 |         if(sibling != nullptr){
267 |             sibling->dfs(layer);
268 |         }
269 |     }
270 | 
271 |     void show(){
272 |         // debug_dfs(1);
273 |         dfs(0);
274 |     }
275 | 
276 | };
277 | 


--------------------------------------------------------------------------------
/parser/src/static/parser/Parser.cpp:
--------------------------------------------------------------------------------
  1 | #include "parser/Parser.h"
  2 | // #include "abstract_tree.h"
  3 | 
  4 | token_base * cmparser::get_next_token(){
  5 |     current_token = lexer->get_next_token();
  6 |     if(current_token != nullptr){
  7 |         std::cout << current_token->to_string() << std::endl;
  8 |     }
  9 |     return current_token;
 10 | }
 11 | 
 12 | token_base * cmparser::get_current_token(){
 13 |     return current_token;
 14 | }
 15 | token_type cmparser::get_current_token_type(){
 16 |     return current_token->get_token_type();
 17 | }
 18 | token_keyword* cmparser::get_current_keyword(){
 19 |     if(current_token==nullptr)
 20 |         return nullptr;
 21 |     if(current_token->get_token_type() == token_type::_keyword){
 22 |         return (token_keyword *) current_token;
 23 |     }else{
 24 |         // SyntaxError("token类型不是预期的关键字类型!");
 25 |         return nullptr;
 26 |     }
 27 | }
 28 | token_number* cmparser::get_current_number(){
 29 |     if(current_token==nullptr)
 30 |         return nullptr;
 31 |     if(current_token->get_token_type() == token_type::_NUM){
 32 |         return (token_number *) current_token;
 33 |     }else{
 34 |         // SyntaxError("Token类型不是预期的Number类型！");
 35 |         return nullptr;
 36 |     }
 37 | }
 38 | token_operator* cmparser::get_current_operator(){
 39 |     if(current_token==nullptr)
 40 |         return nullptr;
 41 |     if(current_token->get_token_type() == token_type::_operator){
 42 |         return (token_operator *) current_token;
 43 |     }else{
 44 |         // SyntaxError("Token类型不是预期的运算符类型！");
 45 |         return nullptr;
 46 |     }
 47 | }
 48 | token_identifier* cmparser::get_current_id(){
 49 |     if(current_token==nullptr)
 50 |         return nullptr;
 51 |     if(current_token->get_token_type() == token_type::_ID){
 52 |         return (token_identifier *) current_token;
 53 |     }else{
 54 |         // SyntaxError("Token类型不是预期的标识符类型！");
 55 |         return nullptr;
 56 |     }
 57 | }    
 58 | 
 59 | void cmparser::SyntaxError(std::string error_string){
 60 |     std:: cout << ">>> ";
 61 |     std::cout << "SyntaxError:\033[1;31;40m" << error_string << "\033[0m at line: \033[1;32;40m" << current_token->get_line() << "\033[0m pos: \033[1;32;40m" << current_token->get_pos() << "\033[0m" << std::endl;
 62 |     error_state = 1; // 进入错误状态
 63 |     throw -1;
 64 | }
 65 | 
 66 | bool cmparser::match_keyword(keyword_type t){
 67 |     // 匹配成功，读入下一个字符
 68 |     if(get_current_token()->get_token_type() == token_type::_keyword && ((token_keyword*) current_token)->get_keyword_type() == t){
 69 |         get_next_token();
 70 |         return 1;
 71 |     }
 72 |     else{
 73 |          SyntaxError(" Unexpected token type -->  " +  get_current_token()->to_string()  + "\n    Should be ->" + keyword2string(t) + "    \n");
 74 |         error_state = 1;
 75 |         return 0;
 76 |     }
 77 | }
 78 | bool cmparser::match_operator(operator_type t){
 79 |     // 匹配成功，读入下一个字符
 80 |     if(get_current_token()->get_token_type() == token_type::_operator && ((token_operator*) current_token)->get_operator_type() == t){
 81 |         get_next_token();
 82 |         return 1;
 83 |     }
 84 |     else{
 85 |         SyntaxError(" Unexpected token type -->  " +  get_current_token()->to_string()  + "\n    Should be -> '" + operator2string(t) + "'\n   ");
 86 |         error_state = 1;
 87 |         return 0;
 88 |     }
 89 | }
 90 | 
 91 | TreeNode * cmparser::parse(){
 92 |         get_next_token();
 93 |         auto res = program();
 94 |         if(current_token != nullptr){
 95 |             SyntaxError("Unexpected Exit!");
 96 |             return nullptr;
 97 |         }else{
 98 |             return res;
 99 |         }
100 | }
101 | 
102 | TreeNode * cmparser::program(){
103 |         std::cout << "program" << std::endl;
104 |         auto t = TreeNode::newStmtNode(StmtKind::_program, current_token->get_line());
105 |         t->child[0] =  declaration_list();
106 |         std::cout << "\033[1;32;40m----FINISH PARSING----\033[0m" << std::endl;
107 |         return t;
108 |     }
109 | 
110 | TreeNode * cmparser::declaration_list(){
111 |     auto f = TreeNode::newStmtNode(StmtKind::_declaration_list,current_token->get_line());
112 |     f->child[0] = declaration();
113 |     auto p = f->child[0];
114 |     // 如果符合进一步的FIRST集合
115 |     if(current_token != nullptr && get_current_token_type() == token_type::_ID || (get_current_token_type() == token_type::_keyword && (get_current_keyword()->get_keyword_type() == keyword_type::_int || get_current_keyword()->get_keyword_type() == keyword_type::_void) ) ){
116 |         p->sibling = declaration();
117 |         p = p->sibling;
118 |     }
119 |     return f;
120 | }
121 | 
122 | TreeNode * cmparser::declaration(){
123 |     std::cout << "declaration" << std::endl;
124 |     auto t = TreeNode::newStmtNode(StmtKind::_declaration,current_token->get_line());
125 |     TreeNode * local_type_specifier;
126 |     TreeNode * local_ID;
127 |     if(current_token != nullptr && get_current_token_type() == token_type::_keyword){
128 |         switch (get_current_keyword()->get_keyword_type())
129 |         {
130 |         case keyword_type::_void :
131 |             local_type_specifier = TreeNode::newExpNode(ExpKind::_void,current_token->get_line());
132 |             match_keyword(keyword_type::_void);
133 |             break;
134 |         case keyword_type::_int:
135 |             local_type_specifier = TreeNode::newExpNode(ExpKind::_int,current_token->get_line());
136 |             match_keyword(keyword_type::_int);
137 |             break;
138 |         default:
139 |             SyntaxError("不合法的声明！");
140 |             get_next_token();
141 |             break;
142 |         }
143 |         if(current_token != nullptr && get_current_token_type() == token_type::_ID){
144 |             local_ID = TreeNode::newExpNode(ExpKind::_id,current_token->get_line());
145 |             local_ID->set_id(get_current_id()->get_ID());
146 |             get_next_token();
147 |         }else{
148 |             SyntaxError("不合法的声明！");
149 |             get_next_token();
150 |         }
151 |     }
152 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator){
153 |         switch(get_current_operator()->get_operator_type()){
154 |             case operator_type::_mlb:
155 |                 t->child[0] = var_declaration(local_type_specifier, local_ID);
156 |                 break;
157 |             case operator_type::_slb:
158 |                 t->child[0] = fun_declaration(local_type_specifier, local_ID);
159 |             break;
160 |             default:
161 |                 t->child[0] = var_declaration(local_type_specifier, local_ID);
162 |                 break;
163 |         }
164 |     }
165 |     else{
166 |         t->child[0] = var_declaration(local_type_specifier, local_ID);
167 |     }
168 |     return t;
169 | }
170 | TreeNode * cmparser::fun_declaration(TreeNode * type_specifier, TreeNode * ID){
171 |     std::cout << "fun_declaration" << std::endl;
172 |     auto t = TreeNode::newStmtNode(StmtKind::_fun_declaration,current_token->get_line());
173 |     if(type_specifier == nullptr){
174 |         if(current_token != nullptr && get_current_token_type() == token_type::_keyword){
175 |             TreeNode * k;
176 |             switch (get_current_keyword()->get_keyword_type())
177 |             {
178 |             case keyword_type::_void :
179 |                 k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line());
180 |                 t->child[0] = k;
181 |                 match_keyword(keyword_type::_void);
182 |                 break;
183 |             case keyword_type::_int:
184 |                 k = TreeNode::newExpNode(ExpKind::_int,current_token->get_line());
185 |                 t->child[0] = k;
186 |                 match_keyword(keyword_type::_int);
187 |                 break;
188 |             default:
189 |                 SyntaxError("不合法的函数声明！");
190 |                 get_next_token();
191 |                 break;
192 |             }
193 |             if(current_token != nullptr && get_current_token_type() != token_type::_ID){
194 |                 auto k = TreeNode::newExpNode(ExpKind::_id,current_token->get_line());
195 |                 k->set_id(get_current_id()->get_ID());
196 |                 t->child[1] = k;
197 |                 get_next_token();
198 |             }else{
199 |                 SyntaxError("不合法的函数声明！");
200 |                 delete(t);
201 |                 get_next_token();
202 |             }
203 |         }
204 |     }else{
205 |         t->child[0] = type_specifier;
206 |         t->child[1] = ID;
207 |     }
208 |     match_operator(operator_type::_slb);
209 |     auto j = params();
210 |     t->child[2] = j;
211 |     match_operator(operator_type::_srb);
212 |     j = compound_stmt();
213 |     t->child[3] = j;
214 |     return t;
215 | }
216 | 
217 | TreeNode * cmparser::params(){
218 |     std::cout << "params" << std::endl;
219 |     if(current_token != nullptr && get_current_token_type() == token_type::_keyword && get_current_keyword()->get_keyword_type() == keyword_type::_void){
220 |         auto k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line());
221 |         match_keyword(keyword_type::_void);
222 |         if(current_token != nullptr && get_current_token_type() != token_type::_ID){
223 |             k->kind.stmt = StmtKind::_param;
224 |             return k;
225 |         }else{
226 |             return param_list(k);
227 |         }
228 |     }else{
229 |         return param_list(nullptr);
230 |     }
231 | }
232 | 
233 | TreeNode * cmparser::param_list(TreeNode * pass_node){
234 |     std::cout << "param_list" << std::endl;
235 |     auto t = param(pass_node);
236 |     auto p = t;
237 |     while (current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_com)
238 |     {
239 |         match_operator(operator_type::_com);
240 |         auto k = param(nullptr);
241 |         p->sibling = k;
242 |         p = k;
243 |     }
244 |     return t;
245 | }
246 | 
247 | TreeNode * cmparser::param(TreeNode * pass_node){
248 |     std::cout << "param" << std::endl;
249 |     auto t = TreeNode::newStmtNode(StmtKind::_param,current_token->get_line());
250 |     // 匹配开头的 type_specifier
251 |     if(current_token != nullptr &&get_current_token_type() == token_type::_keyword){
252 |         // 如果上面没有传下来消除左公共因子的参数
253 |         if(pass_node == nullptr){
254 |             if(get_current_keyword()->get_keyword_type() == keyword_type::_void){
255 |                 auto k = TreeNode::newExpNode(ExpKind::_void,current_token->get_line());
256 |                 t->child[0] = k;
257 |                 match_keyword(keyword_type::_void);
258 |             }
259 |             else if(get_current_keyword()->get_keyword_type() == keyword_type::_int){
260 |                 auto k = TreeNode::newExpNode(ExpKind::_int,current_token->get_line());
261 |                 t->child[0] = k;
262 |                 match_keyword(keyword_type::_int);
263 |             }
264 |         }else{// 如果上面为了消除左公共因子的参数传下来了了
265 |             t->child[0] = pass_node;
266 |         }
267 |         // 进一步匹配ID
268 |         if(current_token != nullptr && get_current_token_type() == token_type::_ID){
269 |             auto k = TreeNode::newExpNode(ExpKind::_id,current_token->get_line());
270 |             k->set_id(get_current_id()->get_ID());
271 |             t->child[1] = k;
272 |             get_next_token();
273 |         }
274 |         if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){
275 |             t->kind.stmt = StmtKind::_param_array; // 转换为数组保存
276 |             match_operator(operator_type::_mlb);
277 |             match_operator(operator_type::_mrb);
278 |         }
279 |         return t;
280 |     }else{
281 |         SyntaxError("不符合形参列表的规范！");
282 |         return nullptr;
283 |     }
284 | }
285 | 
286 | TreeNode * cmparser::compound_stmt(){
287 |     std::cout << "compound_stmt" << std::endl;
288 |     auto t = TreeNode::newStmtNode(StmtKind::_compound_stmt,current_token->get_line());
289 |     match_operator(operator_type::_llb);
290 |     t->child[0] = local_declarations();
291 |     t->child[1] = statement_list();
292 |     match_operator(operator_type::_lrb);
293 |     return t;
294 | }
295 | 
296 | TreeNode * cmparser::local_declarations(){
297 |     std::cout << "local_declarations" << std::endl;
298 |     TreeNode * t = var_declaration(nullptr, nullptr);
299 |     TreeNode * p = nullptr;
300 |     while (current_token != nullptr && get_current_token_type() == token_type::_keyword && (get_current_keyword()->get_keyword_type() == keyword_type::_int || get_current_keyword() ->get_keyword_type() == keyword_type::_void))
301 |     {
302 |         p = var_declaration(nullptr, nullptr);
303 |         t->sibling = p;
304 |         if(p != nullptr){
305 |             p = p->sibling;
306 |         }
307 |     }
308 |     return t;
309 | }
310 | 
311 | TreeNode * cmparser::var_declaration(TreeNode * type_specifier, TreeNode * ID){
312 |     std::cout << "var_declaration" << std::endl;
313 |     TreeNode * t = nullptr;
314 |     // 如果没有从上面传下来提前读的TypeID和specifier
315 |     if(type_specifier == nullptr){
316 |         if(current_token != nullptr && get_current_token_type() == token_type::_keyword){
317 |             t = TreeNode::newStmtNode(StmtKind::_var_declaration,current_token->get_line());
318 |             // p匹配 void 或 int
319 |             if(get_current_keyword()->get_keyword_type() == keyword_type::_void ){
320 |                 auto n = TreeNode::newExpNode(ExpKind::_void,current_token->get_line());
321 |                 t->child[0] = n;
322 |                 match_keyword(keyword_type::_void);
323 |             }else if(get_current_keyword()->get_keyword_type() == keyword_type::_int){
324 |                 auto n = TreeNode::newExpNode(ExpKind::_int,current_token->get_line());
325 |                 t->child[0] = n;
326 |                 match_keyword(keyword_type::_int);
327 |             }else{
328 |                 delete(t);
329 |                 return nullptr;
330 |             }
331 |             // 匹配ID
332 |             if(current_token != nullptr && get_current_token_type() == token_type::_ID){
333 |                 auto n = TreeNode::newExpNode(ExpKind::_id,current_token->get_line());
334 |                 n->set_id(get_current_id()->get_ID());
335 |                 t->child[1] = n;
336 |                 get_next_token();
337 |             }else{
338 |                 delete(t);
339 |                 return nullptr;
340 |             }
341 |         }else{
342 |             return nullptr;
343 |         }
344 |     }else {// 如果从上面传下来了已经读好的 type_specifier 和ID
345 |         t = TreeNode::newStmtNode(StmtKind::_var_declaration,current_token->get_line());
346 |         t->child[0] = type_specifier;
347 |         t->child[1] = ID;
348 |     }
349 |     // 如果存在方括号
350 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){
351 |             match_operator(operator_type::_mlb);
352 |             TreeNode * q3 = TreeNode::newExpNode(ExpKind::_num,current_token->get_line());
353 |             q3->set_number(get_current_number()->get_number());
354 |             t->child[2] = q3;
355 |             get_next_token();
356 |             match_operator(operator_type::_mrb);
357 |     }
358 |     match_operator(operator_type::_sem);
359 |     return t;        
360 | }
361 | 
362 | TreeNode * cmparser::statement_list(){
363 |     std::cout << "statement_list" << std::endl;
364 |     auto t = statement();
365 |     auto p = t;
366 |     if(t == nullptr){
367 |         return nullptr;
368 |     }else{
369 |         while(current_token != nullptr &&
370 |         // keyword
371 |         (
372 |         get_current_token_type() == token_type::_keyword && (
373 |             get_current_keyword()->get_keyword_type() == keyword_type::_if || 
374 |             get_current_keyword()->get_keyword_type() == keyword_type::_while ||
375 |             get_current_keyword()->get_keyword_type() == keyword_type::_return )
376 |         )||(// operator
377 |         get_current_token_type() == token_type::_operator &&(
378 |             get_current_operator()->get_operator_type() == operator_type::_llb ||
379 |             get_current_operator()->get_operator_type() == operator_type::_sem ||
380 |             get_current_operator()->get_operator_type() == operator_type::_slb)
381 |         )||(
382 |             get_current_token_type() == token_type::_ID
383 |         )||(
384 |             get_current_token_type() == token_type::_NUM
385 |         )
386 |         ){
387 |             auto q = statement();
388 |             if(q != nullptr){
389 |                 p->sibling = q;
390 |                 p = q;
391 |             }
392 |         }
393 |     }
394 |     return t;
395 | }
396 | 
397 | TreeNode * cmparser::statement(){
398 |     std::cout << "statement" << std::endl;
399 |     TreeNode * t = nullptr;
400 |     if(current_token != nullptr){
401 |         switch (get_current_token_type())
402 |         {
403 |         case token_type::_keyword :
404 |             switch (get_current_keyword()->get_keyword_type())
405 |             {
406 |             case keyword_type::_if:
407 |                 t = selection_stmt();
408 |             break;
409 |             
410 |             case keyword_type::_while:
411 |                 t = iteration_stmt();
412 |             break;
413 | 
414 |             case keyword_type::_return:
415 |                 t = return_stmt();
416 |             break;  
417 |             default:
418 |                 SyntaxError("语句中出现了非法的关键字！");
419 |                 get_next_token();
420 |                 break;
421 |             }
422 |             break;
423 |         
424 |         case token_type::_ID: case token_type::_NUM:
425 |             t = expression_stmt();
426 |             break;
427 | 
428 |         case token_type::_operator:
429 |             switch (get_current_operator()->get_operator_type())
430 |             {
431 |             // 匹配 '{'
432 |             case operator_type::_llb :
433 |                 t = compound_stmt();
434 |                 break;
435 |             case operator_type::_sem: case operator_type::_slb:
436 |                 t = expression_stmt();
437 |                 break;        
438 | 
439 |             default:
440 |                 SyntaxError("语句中出现了非法的运算符！");
441 |                 get_next_token();
442 |                 break;
443 |             }
444 |             break;
445 |         default:
446 |             SyntaxError("语句中出现了非法的token类型！");
447 |             get_next_token();
448 |             break;
449 |         }
450 |     }
451 |     return t;
452 | }
453 | 
454 | TreeNode* cmparser::expression_stmt(){
455 |     std::cout << "expression_stmt" << std::endl;
456 |     TreeNode *t = nullptr;
457 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_sem){
458 |         match_operator(operator_type::_sem);
459 |         //TODO 有可能返回空指针会炸
460 |     }else{
461 |         t = expression();
462 |     }
463 |     return t;
464 | }
465 | 
466 | TreeNode * cmparser::selection_stmt(){
467 |     std::cout << "selection_stmt" << std::endl;
468 |     auto t = TreeNode::newStmtNode(StmtKind::_selection_stmt,current_token->get_line());
469 |     // match if
470 |     match_keyword(keyword_type::_if);
471 |     // match (
472 |     match_operator(operator_type::_slb);
473 |     t->child[0] = expression();
474 |     // match )
475 |     match_operator(operator_type::_srb);
476 |     t->child[1] = statement();
477 |     if(current_token != nullptr && get_current_token_type() == token_type::_keyword && get_current_keyword()->get_keyword_type() == keyword_type::_else){
478 |         match_keyword(keyword_type::_else);
479 |         t->child[2] = statement();
480 |     }
481 |     return t;
482 | }
483 | 
484 | TreeNode * cmparser::iteration_stmt(){
485 |     std::cout << "iteration_stmt" << std::endl;
486 |     auto t = TreeNode::newStmtNode(StmtKind::_iteration_stmt,current_token->get_line());
487 |     // match while
488 |     match_keyword(keyword_type::_while);
489 |     // match (
490 |     match_operator(operator_type::_slb);
491 |     t->child[0] = expression();
492 |     // match )
493 |     match_operator(operator_type::_srb);
494 |     t->child[1] = statement();
495 |     return t;
496 | }
497 | 
498 | TreeNode * cmparser::return_stmt(){
499 |     std::cout << "return_stmt" << std::endl;
500 |     auto t = TreeNode::newStmtNode(StmtKind::_return_stmt,current_token->get_line());
501 |     match_keyword(keyword_type::_return);
502 |     // 匹配 ';'
503 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_sem){
504 |         match_operator(operator_type::_sem);
505 |     }else{
506 |         t->child[0] = expression();
507 |         match_operator(operator_type::_sem);
508 |     }
509 |     return t;
510 | }
511 | 
512 | TreeNode * cmparser::expression(){
513 |     std::cout << "expression" << std::endl;
514 |     TreeNode *t = var();
515 |     // 不是以ID开头，只能是simple_expression
516 |     if(t == nullptr){
517 |         t = simple_expression(nullptr); 
518 |     }else{
519 |         // 赋值语句 var = expression
520 |         if(current_token != nullptr && get_current_token_type()== token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_asi) {
521 |             auto p = TreeNode::newExpNode(ExpKind::_assign,current_token->get_line());
522 |             p->attr.op = operator_type::_asi;
523 |             match_operator(operator_type::_asi);
524 |             p->child[0] = t;
525 |             p->child[1] = expression();
526 |             return p;
527 |         }else{
528 |             //传入分为三种情况,需要在simple中进行区分：
529 |             // 1. t = id 下一步是其他的
530 |             // 2. t = id[expression]
531 |             // 3. t = id 下一步是(    
532 |             t = simple_expression(t);
533 |         }
534 |     }
535 |     return t;
536 | }
537 | 
538 | TreeNode * cmparser::simple_expression(TreeNode * pass_node){
539 |     std::cout << "simple_expression" << std::endl;
540 |     auto n = additive_expression(pass_node);
541 |     // 如果是operator
542 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator){
543 |         auto current_op_type = get_current_operator()->get_operator_type();
544 |         // 如果是6个关系运算符
545 |         if(current_op_type >= operator_type::_les && current_op_type <= operator_type::_neq){
546 |             auto p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line());
547 |             p->attr.op = current_op_type;
548 |             match_operator(current_op_type);
549 |             p->child[0] = n;
550 |             n = p;
551 |             n->child[1] = additive_expression(nullptr);
552 |         }
553 |     }
554 |     return n;
555 | }
556 | 
557 | TreeNode * cmparser::additive_expression(TreeNode * pass_node){
558 |     std::cout << "additive_expression" << std::endl;
559 |     auto n = term(pass_node);
560 |     // 如果个符号是加号或者减号
561 |     while(get_current_operator() != nullptr && (get_current_operator()->get_operator_type() == operator_type::_add || get_current_operator()->get_operator_type() == operator_type::_sub)){
562 |         // 将这个运算符赋予节点
563 |         auto p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line());
564 |         p->child[0] = n;
565 |         p->set_operator(get_current_operator()->get_operator_type());
566 |         // 匹配此字符
567 |         match_operator(get_current_operator()->get_operator_type());
568 |         n = p;
569 |         n->child[1] = term(nullptr);
570 |     }
571 |     return n;
572 | }
573 | 
574 | TreeNode * cmparser::term(TreeNode * pass_node){
575 |     std::cout << "term" << std::endl;
576 |     auto n = factor(pass_node);
577 |     while(get_current_operator() != nullptr && (get_current_operator()->get_operator_type() == operator_type::_mul || get_current_operator()->get_operator_type() == operator_type::_div)){
578 |         TreeNode *p = TreeNode::newExpNode(ExpKind::_opK,current_token->get_line());
579 |         p->child[0] = n;
580 |         p->set_operator(get_current_operator()->get_operator_type());
581 |         match_operator(get_current_operator()->get_operator_type());
582 |         n = p;
583 |         n->child[1] = factor(nullptr);
584 |     }
585 |     return n;
586 | }
587 | 
588 | TreeNode * cmparser::factor(TreeNode * pass_node){
589 |     std::cout << "factor" << std::endl;
590 |     TreeNode *t;
591 |     // 如果从上层传下来了需要的
592 |     if(pass_node != nullptr){
593 |         // 匹配 ID(args)
594 |         if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){
595 |             t = call(pass_node);
596 | 
597 |         }else{
598 |             t = pass_node;
599 |         }
600 |     }else{
601 |         //如果从上层没传下来需要的节点
602 |         if(current_token != nullptr){
603 |             switch (get_current_token_type())
604 |             {
605 |             case token_type::_operator:
606 |                 // 1. 匹配 (expression)
607 |                 if(get_current_operator()->get_operator_type() == operator_type::_slb){
608 |                     match_operator(operator_type::_slb);
609 |                 }else{
610 |                     SyntaxError("非法的字符");
611 |                     get_next_token();
612 |                 }
613 |                 t = expression();
614 |                 match_operator(operator_type::_srb);
615 |             break;
616 |             // 匹配 NUM
617 |             case token_type::_NUM:
618 |                 t = TreeNode::newExpNode(ExpKind::_num,current_token->get_line());
619 |                 t->set_number(get_current_number()->get_number());
620 |                 get_next_token();
621 |             break;
622 |             // 匹配纯ID
623 |             case token_type::_ID:
624 |                 t = var();
625 |                 if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_slb){
626 |                     t = call(t)  ;
627 |                 }
628 |             break;
629 |             default:
630 |                 SyntaxError("非法的字符");
631 |                 get_next_token();
632 |             break;
633 |             }
634 |         }
635 |     }
636 |     return t;  
637 | }
638 | 
639 | TreeNode * cmparser::var(){
640 |     std::cout << "var" << std::endl;
641 |     auto t = TreeNode::newExpNode(ExpKind::_var,current_token->get_line());
642 |     if(current_token != nullptr && get_current_token_type() == token_type::_ID){
643 |         auto j = TreeNode::newExpNode(ExpKind::_id,current_token->get_line());
644 |         j->set_id(get_current_id()->get_ID());
645 |         t->child[0] = j;
646 |         get_next_token();
647 |         //如果匹配左括号 '[' 则为ID[expression]
648 |         if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_mlb){
649 |             t->kind.exp = ExpKind::_array_var;
650 |             match_operator(operator_type::_mlb);
651 |             t->child[1] = expression();
652 |             match_operator(operator_type::_mrb);
653 |         }
654 |         return t;
655 |     }else{
656 |         return nullptr;
657 |     }
658 | }
659 | 
660 |  TreeNode * cmparser::call(TreeNode *k){
661 |     std::cout << "call" << std::endl;
662 |     TreeNode *t = TreeNode::newExpNode(ExpKind::_call,current_token->get_line());
663 |     if (k != nullptr){
664 |         t->child[0] = k;
665 |     }
666 |     match_operator(operator_type::_slb);
667 |     if(current_token != nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_srb){
668 |         match_operator(operator_type::_srb);
669 |         t->child[1] = TreeNode::newExpNode(ExpKind::_empty,current_token->get_line());
670 |     }else if(k != nullptr){
671 |         t->child[1] = arg_list();
672 |         match_operator(operator_type::_srb);
673 |     }
674 |     return t;
675 | }
676 | 
677 | TreeNode * cmparser::arg_list(){
678 |     std::cout << "arg_list" << std::endl;
679 |     auto t = TreeNode::newExpNode(ExpKind::_args,current_token->get_line());
680 |     auto n = expression(); 
681 |     t->child[0] = n;
682 |     // 如果下一项为 ','
683 |     TreeNode * p = t; // 滑动指针
684 |     while(current_token!=nullptr && get_current_token_type() == token_type::_operator && get_current_operator()->get_operator_type() == operator_type::_com){
685 |         match_operator(operator_type::_com);
686 |         TreeNode *new_node = TreeNode::newExpNode(ExpKind::_args,current_token->get_line());
687 |         new_node->child[0] = expression();
688 |         new_node->attr.op = operator_type::_com;
689 |         p->sibling = new_node;
690 |         p = p->sibling;
691 |     }
692 |     return t;
693 | }


--------------------------------------------------------------------------------