├── .gitignore
├── relocation.h
├── types.h
├── executor.h
├── makefile
├── lex.h
├── dependency.h
├── symbol.h
├── dependency.c
├── main.c
├── parser.h
├── relocation.c
├── README.md
├── executor.c
├── lex.c
└── parser.c


/.gitignore:
--------------------------------------------------------------------------------
1 | main
2 | *.swp
3 | *.o
4 | 


--------------------------------------------------------------------------------
/relocation.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gyumeijie/an-embedded-c-interpreter/HEAD/relocation.h


--------------------------------------------------------------------------------
/types.h:
--------------------------------------------------------------------------------
 1 | #ifndef TYPES_H
 2 | #define TYPES_H
 3 | 
 4 | typedef enum{
 5 |     False = 0, 
 6 |     True = 1
 7 | } Boolean;
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/executor.h:
--------------------------------------------------------------------------------
 1 | #ifndef EXECUTOR_H
 2 | #define EXECUTOR_H
 3 | 
 4 | #include "symbol.h"
 5 | 
 6 | #define STACK_SIZE 1024
 7 | 
 8 | extern int executor_init();
 9 | extern void run_code(int* code_start);
10 | 
11 | static int eval(int* pc, int* sp, double* fsp);
12 | #endif
13 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -m32 # need the -m32 option on 64bit machines
 3 | OBJDIR = .
 4 | TARGET = main
 5 | CSOURCES = ${shell find  ${SRCDIR} -name \*.c}
 6 | OBJECTS = ${shell for obj in ${CSOURCES:.c=.o}; do echo ${OBJDIR}/`basename $$obj`;done}
 7 | 
 8 | ${OBJDIR}/%.o: %.c
 9 | 	${CC} -c ${CFLAGS} ${CPPFLAGS} $< -o $@
10 | 
11 | ${TARGET}: ${OBJECTS} 
12 | 	${CC}  ${CFLAGS} ${LDFLAGS} ${OBJECTS} -o $@
13 | 
14 | clean:
15 | 	rm -f *.o
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/lex.h:
--------------------------------------------------------------------------------
 1 | #ifndef LEX_H
 2 | #define LEX_H
 3 | 
 4 | #include "symbol.h"
 5 | #include "types.h"
 6 | 
 7 | extern void prepare_for_tokenization(const char* src_code, int* symbol_table);
 8 | 
 9 | extern void next();
10 | 
11 | extern void match(int tk); 
12 | 
13 | static Boolean is_valid_identifier_leading_character(char ch);
14 | 
15 | static Boolean is_valid_identifier_character(char ch);
16 | 
17 | static Boolean is_digit(char ch);
18 | 
19 | static void process_fraction(char* float_string, int start_idx);
20 | 
21 | static int digitalize_hex_character(char ch);
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dependency.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEPENDENCY_H
 2 | #define DEPENDENCY_H
 3 | 
 4 | // TODO 如果后面需要增加对注入的依赖其读写进行控制
 5 | // 还需要增加一些字段
 6 | struct dependency{
 7 |   char* var_name;
 8 |   void* var_addr;
 9 |   int var_type;
10 | };
11 | 
12 | struct dependency_items{
13 |     int num_items;
14 |     int cur_items;
15 |     struct dependency* items;
16 | };
17 | 
18 | extern struct dependency_items* init_dependency_items(int num_items);
19 | 
20 | extern int add_dependency_item
21 | (   
22 |     struct dependency_items* dep_itemsp,
23 |     char* var_name, 
24 |     void* var_addr, 
25 |     int var_type
26 | );
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/symbol.h:
--------------------------------------------------------------------------------
 1 | #ifndef SYMBOL_H
 2 | #define SYMBOL_H
 3 | 
 4 | // 指令 
 5 | enum{ 
 6 |     LEA, IMM, FIMM, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, LD, LF, LI, LC, SD, SF, 
 7 |     SI, SC, ATOB, BTOA, PUSF, PUSH, OR, XOR, AND, EQF, EQ, NEF, NE, LTF, LT,
 8 |     GTF, GT, LEF, LE, GEF, GE, SHL, SHR, ADDF, ADD, SUB,MULF ,MUL, DIVF, DIV, 
 9 |     MOD, NOP,
10 |     
11 |     //公共函数也作为指令
12 |     OPEN, READ, CLOS, PRTF, MALC, MSET, MCMP, EXIT
13 | };
14 | 
15 | 
16 | // 标记 
17 | enum {
18 |    Num = 128, Fun, Sys, Glo, Ext, Id,
19 |    Char, Int, Float, Double, If, Else, While, Return
20 | 
21 | };
22 | 
23 | enum {
24 |    Assign = 256, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt,
25 |    Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
26 | };
27 | 
28 | // 重定位类型
29 | enum {Text_Rel, Data_Rel};
30 | 
31 | // 变量类型
32 | enum { CHAR, INT, FLOAT, DOUBLE, PTR};
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/dependency.c:
--------------------------------------------------------------------------------
 1 | #include "dependency.h"
 2 | #include <stdlib.h>
 3 | 
 4 | /**
 5 |  *  因为不同的应用条件下需要注入的依赖数量是可变的，因此需要一个
 6 |  *  统一的接口来进行描述，引入dependency_items就是用来解决这个问
 7 |  *  题的
 8 |  */
 9 | 
10 | struct dependency_items* init_dependency_items(int num_items)
11 | {
12 |    struct dependency_items* dep_itemsp;
13 |    struct dependency* items;
14 |    dep_itemsp = malloc(sizeof(struct dependency_items));
15 |    dep_itemsp->num_items = num_items;
16 |    dep_itemsp->cur_items = 0;
17 |    items = malloc(sizeof(struct dependency) * num_items);
18 |    dep_itemsp->items = items;
19 | 
20 |    return dep_itemsp;
21 | }
22 | 
23 | 
24 | int add_dependency_item
25 | (   
26 |     struct dependency_items* dep_itemsp,
27 |     char* var_name, 
28 |     void* var_addr, 
29 |     int var_type
30 | )
31 | {
32 |     if (dep_itemsp == NULL ||
33 |         dep_itemsp->cur_items == dep_itemsp->num_items){
34 |             return -1;
35 |     }
36 | 
37 |     struct dependency* dep = &dep_itemsp->items[dep_itemsp->cur_items++];
38 |     dep->var_name = var_name;
39 |     dep->var_addr = var_addr;
40 |     dep->var_type = var_type;
41 | 
42 |     return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "parser.h"
 3 | #include "executor.h"
 4 | #include "dependency.h"
 5 | 
 6 | int main()
 7 | {
 8 |    // Initalization works
 9 |    parser_init();
10 |    executor_init();
11 | 
12 |    // This two variables is in the main program, and will be used in the following code block
13 |    int data = 13; 
14 |    int result;
15 |    
16 |    // Add the dependency information
17 |    struct dependency_items* dep_itemsp;
18 |    dep_itemsp = init_dependency_items(2);
19 |    add_dependency_item(dep_itemsp, "data", &data, INT);
20 |    add_dependency_item(dep_itemsp, "result", &result, INT);
21 | 
22 |    // Get the code block ready
23 |    char* src = "use{} action{result = data * 6;}";
24 |    
25 |    /*
26 |     Compile the source code of the block, together with the dependencies which supply basic information about the 
27 |     dependending variables
28 |    */ 
29 |    int* code = compile_src_code(dep_itemsp, src);
30 |    
31 |    /* 
32 |      Fire the compiled code, during excution the block interacts with the main program through acessing the dependending
33 |      variables
34 |     */
35 |    run_code(code);
36 | 
37 |    // Check the excution: the result will be 78
38 |    printf("result is %d\n",  result);
39 | }
40 | 


--------------------------------------------------------------------------------
/parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_H
 2 | #define PARSER_H
 3 | 
 4 | #include "symbol.h"
 5 | #include "types.h"
 6 | #include "dependency.h"
 7 | 
 8 | enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
 9 | 
10 | static int  *text_start;
11 | static char *data_start;
12 | 
13 | static int *text, // text segment
14 |            *stack;// stack
15 | 
16 | static char *src;
17 | 
18 | static int poolsize; 
19 | 
20 | char *data; 
21 | extern int *current_id; 
22 | extern int line;       
23 | extern int integral_token_val;  
24 | extern double real_token_val;   
25 | extern int token; 
26 | extern int num_type;
27 | static int  *symbols; 
28 | static int basetype;  
29 | static int expr_type;
30 | 
31 | 
32 | static void expression(int level); 
33 | 
34 | static void statement();
35 | 
36 | static void parse_block_code();
37 | 
38 | extern int parser_init();
39 | 
40 | static int type_of_token(int token);
41 | 
42 | static void load_real_number_constant(double float_const);
43 | 
44 | static void load_integral_number_constant(int int_const);
45 | 
46 | static int get_base_type(int type);
47 | 
48 | static int emit_store_directive(int type);
49 | 
50 | static int emit_load_directive(int type);
51 | 
52 | static void check_assignment_types(int left_type, int right_type);
53 | 
54 | static Boolean does_operate_on_constant();
55 | 
56 | static void emit_code_for_binary_left ( int** reserve1, int** reserve2);
57 | 
58 | static void emit_code_for_binary_right
59 | (
60 |    int operator_for_real_number,
61 |    int operator_for_integral_number,
62 |    int** reserve1,
63 |    int** reserve2
64 | );
65 | 
66 | static void numtype_to_strtype(int num_type, char* repr);
67 | 
68 | static void reset_complie_environment();
69 | 
70 | static  void init_symbol_table();
71 | 
72 | extern int* compile_src_code
73 | (
74 |     struct dependency_items* dep_itemsp,   
75 |     const char* src_code
76 | );
77 | 
78 | static void  inject_dependency(struct dependency_items* dep_itemsp);
79 | 
80 | #endif 
81 | 
82 | 


--------------------------------------------------------------------------------
/relocation.c:
--------------------------------------------------------------------------------
 1 | #include "relocation.h"
 2 | #include "symbol.h"
 3 | #include <stdio.h>
 4 | #include <string.h>
 5 | #include <malloc.h>
 6 | 
 7 | 
 8 | int* relocation
 9 | (
10 |    int* old_text_start,
11 |    int* old_text_end,
12 |    char* old_data_start,
13 |    char* old_data_end
14 | )
15 | {
16 |     // text当前地址是使用了的，而data当前地址是未使用的
17 |     int actual_text_len = old_text_end - old_text_start + 1;
18 |     int actual_data_len = old_data_end - old_data_start;
19 |     
20 |     // 注意text是int为单位的，data是char为单位的
21 |     int* new_text = malloc(actual_text_len * sizeof(int));
22 |     char* new_data = malloc(actual_data_len * sizeof(char));
23 |     memset(new_text, 0, actual_text_len);
24 |     memset(new_data, 0, actual_data_len);
25 | 
26 |     memcpy(new_data, (void*)old_data_start, actual_data_len);
27 |     
28 |     do_relocation(new_text, new_data);
29 | 
30 |     memcpy(new_text, (void*)old_text_start, actual_text_len * sizeof(int));
31 |  
32 |     // 对于text段的第一个单元未使用是一个bug，后面如果有时间可以尝试去解决
33 |     return new_text + 1;
34 | }
35 | 
36 | 
37 | void add_relocation_item
38 | (
39 |     int *text_location, 
40 |     int offset, 
41 |     int kind
42 | )
43 | {
44 |     struct relocation_item item = {text_location, offset, kind};
45 |     relocation_items[cur_put_item] = item;
46 | 
47 |     cur_put_item++;
48 |     num_rel_items++;
49 |     
50 | }
51 | 
52 | 
53 | static void do_relocation
54 | (   
55 |    const int* new_text_addr, 
56 |    const char* new_data_addr
57 | )
58 | {
59 |    int i;
60 |    for (i=0; i<num_rel_items; i++){
61 |       int *text_location = relocation_items[i].text_location;
62 |       int offset = relocation_items[i].offset;
63 |       int kind = relocation_items[i].kind;
64 | 
65 |       // 虽然data是以char为单位的, text是以int为单位的当把它们转型成
66 |       // 整型数offset的计算理应是不一样的，但是在添加重定位项目的时候
67 |       // 已经考虑它们的单位的差异性了，这里可以统一使用offset，而不用
68 |       // 乘上相应的单位(data是1，text是4)
69 |       if (kind == Data_Rel){
70 |          *text_location = ((int)new_data_addr + offset);
71 |       }else{
72 |          *text_location = ((int)new_text_addr + offset);
73 |       }
74 |    }
75 | 
76 |    // 为重定位下一个代码段初始化
77 |    reset_relocation_items();
78 | }
79 | 
80 | 
81 | static void reset_relocation_items()
82 | {
83 |    num_rel_items = 0;
84 |    cur_put_item = 0;
85 | }
86 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # introduction
 2 | This is a very simple interpreter for c-like code, inspired by [c4](https://github.com/rswier/c4). The key difference between c4 and this interpreter is the latter one is embedded, and by it's very name, we can use this embedded interpreter to interpret and run a c-like code sinppet which, more often than not, is used to configure the main program, and can take the form of sole xml-file or string.
 3 | 
 4 | # components
 5 | This tiny embedded c interpreter is mainly composed of four parts:
 6 | - dependency
 7 | - lex
 8 | - parser
 9 | - executor
10 | 
11 | # example
12 | Suppose a scenario where there are two variables in main program named `result` and `data`, and we want to multiply the `data` by a number say 6, and store the result in the `result` variable. This sounds simple right? but what we do here is to write the logical code as an external configuration data not directly in the program, yes it is data not code. 
13 | 
14 | In order to realize this, we need the program to have the ability to parse the configuration data with logic into some thing can be executed,say bytecode, and the bytecode can intereact with the main program by accessing the two variables,`result` and `data`. The following steps show how to do it.
15 | 
16 | 1. Initilizatize the parser and the executor
17 | ```c
18 |  parser_init();  
19 |  executor_init();
20 | ```
21 | 
22 | 2. Add dependecy
23 | ```c
24 |    dep_itemsp = init_dependency_items(2);
25 |    add_dependency_item(dep_itemsp, "data", &data, INT);
26 |    add_dependency_item(dep_itemsp, "result", &result, INT);
27 | ```
28 | >Notice that the ***data*** and ***result*** are variables in the main program, both with integer type, and the two will be used
29 | in the code block.
30 | 
31 | 3. Get the code block ready
32 | ```c
33 |  char* src = "use{} action{result = data * 6;}";
34 | ```
35 | >***"use{} action{result = data * 6;}"*** is what we call ***code block***, which can be place in the source file or configuration
36 | file, say xml-format file. The following xml snippet is a simple demo of xml-format code block:
37 | > ```xml
38 | > <code_block name="arbitary-name-you-like">
39 | >   use{} action{result = data * 6;} 
40 | > </code_block>
41 | > ```
42 | 
43 | 4. Compile the source code
44 | ```c
45 | int* code = compile_src_code(dep_itemsp, src);
46 | ```
47 | 
48 | 5. Run the compiled byte code
49 | ```c
50 | run_code(code);
51 | ```
52 | When excution is done, the ***result*** ,in the main program, will have a value of 6, given ***data*** is 1.
53 | 
54 | # usage
55 | 1. dowload the repository
56 | ```bash
57 | git clone https://github.com/Gyumeijie/an-embedded-c-interpreter.git
58 | ```
59 | 2. cd into the `an-embedded-c-interpreter` directory
60 | ```bash
61 | cd an-embedded-c-interpreter
62 | ```
63 | 3. type `make` command
64 | ```bash
65 | make
66 | ```
67 | 4. run the code
68 | ```bash
69 | ./main
70 | ```
71 | 
72 | # link
73 | There is a project named [satellite-borne-device-management](https://github.com/Gyumeijie/satellite-borne-device-management) uses this embedded interpreter to configure the program.
74 | 
75 | # todo 
76 | - [ ] redesign the APIs for more usablity.
77 | > Mainly center on seperating dependency variable info into two parts, the first part is static decalrative info 
78 | > including type and name of a dependency; And the second part is about the dynamic runtime info concerning address
79 | > of that dependency.
80 | - [ ] refactor code for more Maintainability.
81 | - [ ] support more grammar.
82 | - [ ] add safety check for accessing dependency variable(s) in code block.
83 | 
84 | 


--------------------------------------------------------------------------------
/executor.c:
--------------------------------------------------------------------------------
  1 | #include "executor.h" 
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <memory.h>
  5 | #include <string.h>
  6 | 
  7 | // 虚拟机寄存器
  8 | static int *bp, ax, cycle; 
  9 | static int* stack;
 10 | static double* fstack;
 11 | 
 12 | 
 13 | int executor_init()
 14 | {
 15 |     // 运行是会需要，该部分只要虚拟机运行就行了
 16 |     if (!(stack = malloc(STACK_SIZE * sizeof(int)))) {
 17 |         printf("could not malloc(%d) for stack area\n", STACK_SIZE);
 18 |         return -1;
 19 |     }
 20 | 
 21 |     // 运行是会需要，该部分只要虚拟机运行就行了
 22 |     if (!(fstack = malloc(STACK_SIZE * sizeof(double)))) {
 23 |         printf("could not malloc(%d) for stack area\n", STACK_SIZE);
 24 |         return -1;
 25 |     }
 26 | 
 27 | }
 28 | 
 29 | 
 30 | void run_code(int* code_start)
 31 | {
 32 |    // 初始化堆栈
 33 |    int* sp = (int *)(stack + STACK_SIZE);
 34 |    double* fsp = (double *)(fstack + STACK_SIZE);
 35 |    eval(code_start, sp, fsp);
 36 | }
 37 | 
 38 | 
 39 | static int eval(int* pc, int* sp, double *fsp) {
 40 |     int op, *args;
 41 |     cycle = 0;
 42 |     // 临时增加用来保存浮点数的
 43 |     double bx;
 44 |     while (true) {
 45 |         cycle ++;
 46 |         // 在有main函数的时候是从main函数开始执行的，如果要去掉main函数的化
 47 |         // 就要正确设置pc否则就会内存错误
 48 |         op = *pc++; 
 49 | 
 50 |         // TODO 使用switch 减少无效的if/else判断，因为在调试的时候发现要查找某个
 51 |         // op的时候 如果op很后面那么前面就需要进行很多的if/else的条件判断
 52 |  
 53 |         if (true) {
 54 |             printf("%d> %.4s", cycle,
 55 |                    & "LEA ,IMM ,FIMM,JMP ,CALL,JZ  ,JNZ ,ENT ,ADJ ,LEV ,LD  ,"
 56 |                    "LF  ,LI  ,LC  ,SD  ,SF  ,SI  ,SC  ,ATOB,BTOA,PUSF,PUSH,OR  ,XOR ,AND ,"
 57 |                    "EQF ,EQ  ,NEF ,NE  ,LTF ,LT  ,GTF ,GT  ,LEF ,LE  ,GEF ,GE  ,SHL ,SHR ,ADDF,ADD ,SUB ,MULF,MUL ,DIVF,DIV ,MOD ,"
 58 |                    "NOP ,OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT"[op * 5]);
 59 |             if (op <= ADJ)
 60 |                 printf(" %0x\n", *pc);
 61 |             else
 62 |                 printf("\n");
 63 |         }
 64 | 
 65 |         // 加载立即数到寄存器ax中，加载整数以及地址
 66 |         if (op == IMM)       {ax = *pc++;}                                     
 67 | 
 68 |         // TODO 加载float类型的常量，这里的常量是内部的而不是外部导入的
 69 |         // 外部导入的都是通过地址去加载的
 70 |         else if (op == FIMM) {double* addr = (double*)pc; bx = *addr; pc += 2;}                                     
 71 | 
 72 |         // 加载字符类型数据到ax中,原来ax中保存的是地址
 73 |         else if (op == LC)   {ax = *(char *)ax;}                              
 74 | 
 75 |         // 加载整型数据到ax中,原来ax中保存的是地址
 76 |         else if (op == LI)   {ax = *(int *)ax;}        
 77 | 
 78 |         else if (op == LF)   {bx = *(float *)ax; printf("bx is %lf\n", bx);}        
 79 |         
 80 |         else if (op == LD)   {bx = *(double *)ax;}        
 81 | 
 82 |         else if (op == SC)   {ax = *(char *)*sp++ = ax;} 
 83 |         else if (op == SI)   {*(int *)*sp++ = ax; printf("ax %d\n", ax);}        
 84 | 
 85 |         // 因为外部注入的变量类型可能是float类型的也可能是double类型的
 86 |         // 所以存储的时候需要区分开来因此设计了两条指令存储float类型的
 87 |         else if (op == SF)   {*(float*)*sp++ = bx; printf("bx is %lf\n", bx);}      
 88 |         else if (op == SD)   {*(double*)*sp++ = bx;}      
 89 | 
 90 |         else if (op == ATOB) { bx = (double)ax;}
 91 | 
 92 |         else if (op == BTOA) { ax = (int)bx;}
 93 | 
 94 |         else if (op == NOP) { ;}
 95 | 
 96 |         else if (op == PUSH) {*--sp = ax;}                                    
 97 |         else if (op == PUSF) {*--fsp = bx;}
 98 | 
 99 |         else if (op == JMP)  {pc = (int *)*pc;}                              
100 |         else if (op == JZ)   {pc = ax ? pc + 1 : (int *)*pc;}               
101 |         else if (op == JNZ)  {pc = ax ? (int *)*pc : pc + 1;}      
102 |         else if (op == CALL) {*--sp = (int)(pc+1); pc = (int *)*pc;} 
103 |         else if (op == ENT)  {*--sp = (int)bp; bp = sp; sp = sp - *pc++;}     
104 | 
105 |         // 清理函数调用传递进来的参数
106 |         else if (op == ADJ)  {sp = sp + *pc++;}                              
107 |         else if (op == LEV)  {sp = bp; bp = (int *)*sp++; pc = (int *)*sp++;} 
108 |         else if (op == LEA)  {ax = (int)(bp + *pc++);}  
109 | 
110 |         // 逻辑运算符
111 |         else if (op == OR)  ax = *sp++ | ax;
112 |         else if (op == XOR) ax = *sp++ ^ ax;
113 |         else if (op == AND) ax = *sp++ & ax;
114 | 
115 |         // 比较运算符
116 |         else if (op == EQ)  ax = *sp++ == ax;
117 |         else if (op == EQF)  {ax = (*fsp++ == bx);}
118 |         else if (op == NE)  ax = *sp++ != ax;
119 |         else if (op == NEF)  {ax = (*fsp++ != bx);}
120 |         else if (op == LT)  ax = *sp++ < ax;
121 |         else if (op == LTF) {ax = (*fsp++ < bx);}
122 |         else if (op == LE)  ax = *sp++ <= ax;
123 |         else if (op == LEF) {ax = (*fsp++ <= bx);}
124 |         else if (op == GT)  ax = *sp++ >  ax;
125 |         else if (op == GTF) {ax = (*fsp++ > bx);}
126 |         else if (op == GE)  ax = *sp++ >= ax;
127 |         else if (op == GEF) {ax = (*fsp++ >= bx);}
128 | 
129 |         else if (op == SHL) ax = *sp++ << ax;
130 |         else if (op == SHR) ax = *sp++ >> ax;
131 | 
132 |         else if (op == ADD) ax = *sp++ + ax;
133 |         else if (op == ADDF) { bx = *fsp++ + bx; }
134 |         else if (op == SUB) ax = *sp++ - ax;
135 |         // else if (op == SUBF) bx = *fsp++ - bx;
136 |         else if (op == MUL) ax = *sp++ * ax;
137 | 
138 |         else if (op == MULF) bx = *fsp++ * bx;
139 | 
140 |         else if (op == DIV) {if (ax == 0) exit(-1); ax = *sp++ / ax;}
141 | 
142 |         else if (op == DIVF){if (bx == 0.0)exit(-1); bx = *fsp++ / bx;}
143 | 
144 |         else if (op == MOD) ax = *sp++ % ax;
145 |         
146 |         // 唯一退出的代码
147 |         else if (op == EXIT) { printf("exit(%d)\n", *sp); return *sp;}
148 |         else {
149 |             printf("unknown instruction:%d\n", op);
150 |             return -1;
151 |         }
152 |     }
153 | }
154 | 
155 | 
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/lex.c:
--------------------------------------------------------------------------------
  1 | #include "lex.h"
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <stdlib.h>
  5 | 
  6 | static const char* src;
  7 | static int  *symbols;
  8 | 
  9 | // 标识符的描述信息 
 10 | enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
 11 | 
 12 | int *current_id;
 13 | int token;
 14 | // 保存int char等整型数
 15 | int integral_token_val;
 16 | // 保存float double的浮点类型数
 17 | double real_token_val;
 18 | int line;
 19 | int num_type;
 20 | extern char* data;
 21 | 
 22 | void prepare_for_tokenization(const char* src_code, int* symbol_table)
 23 | {
 24 |     src = src_code;
 25 |     symbols = symbol_table;
 26 | }
 27 | 
 28 | void next() {
 29 |     char *last_pos;
 30 |     int hash;
 31 | 
 32 |     while (token = *src) {
 33 |         ++src;
 34 | 
 35 |         if (token == '\n') {
 36 |             ++line;
 37 |         }
 38 | 
 39 |         else if (token == '#') {
 40 |             //跳过宏定义，因为不支持
 41 |             while (*src != 0 && *src != '\n') {
 42 |                 src++;
 43 |             }
 44 |         }
 45 |         
 46 |         //解析标识符
 47 |         else if (is_valid_identifier_leading_character(token)) {
 48 | 
 49 |             last_pos = (char*)src - 1;
 50 |             hash = token;
 51 | 
 52 |             while (is_valid_identifier_character(*src)) {
 53 |                 hash = hash * 147 + *src;
 54 |                 src++;
 55 |             }
 56 |            
 57 |             // 搜索符号表
 58 |             // 这里默认设置的IdSize即标识符的长度是10，如果两个符号的前面10个是
 59 |             // 相同的，那么就区分不出来了,可以根据实际情况下重新设置其大小
 60 |             current_id = symbols;
 61 |             int id_len = src - last_pos;
 62 |             while (current_id[Token]) {
 63 |                 if (current_id[Hash] == hash && 
 64 |                     !memcmp((char *)current_id[Name], last_pos, id_len)) {
 65 |                     token = current_id[Token];
 66 | 
 67 |                     return;
 68 |                 }
 69 |                 //查找下一个条目
 70 |                 current_id = current_id + IdSize;
 71 |             }
 72 | 
 73 |             //如果没有找到就在新的symbols表项中创建一个ID条目
 74 |             current_id[Name] = (int)last_pos;
 75 |             current_id[Hash] = hash;
 76 |             token = current_id[Token] = Id;
 77 | 
 78 |             return;
 79 |         }
 80 |         
 81 |         // 如果是字面量的话就计算其数值
 82 |         else if (token >= '0' && token <= '9') {
 83 | 
 84 |             // 保存浮点数字面量，之后用转换函数进行转换
 85 |             char float_string[64];
 86 |             const char* string_begin = src;
 87 | 
 88 |             // 这里注意一些十进制单独0的情况
 89 |             integral_token_val = token - '0';
 90 |             num_type = INT;
 91 |             if (integral_token_val > 0) {
 92 |                 float_string[0] = token;
 93 |                 int idx = 1;
 94 | 
 95 |                 // 十进制
 96 |                 while (*src >= '0' && *src <= '9') {
 97 |                     integral_token_val = integral_token_val*10 + *src++ - '0';
 98 |                 }
 99 | 
100 |                 // 检测是否可能是浮点，即检测下一个字符是否是'.'
101 |                 // 对于浮点数暂时不支持如0001.xxx的浮点数形式
102 |                 if (*src == '.'){
103 |                     memcpy(&float_string[1], string_begin, src - string_begin);
104 |                     idx = idx + src - string_begin;
105 |                     float_string[idx] = '.';
106 |                     
107 |                     process_fraction(float_string, idx + 1);
108 | 
109 |                     real_token_val = strtod(float_string, NULL);
110 |                     num_type = FLOAT;
111 |                 }
112 | 
113 |             } else {
114 |                 // '0'开头的数，八进制或者十六进制或者是小数
115 |                 if (*src == 'x' || *src == 'X') {
116 |                     // 十六进制
117 |                     token = *++src;
118 |                     int sum = 0;
119 |                     while ((token >= '0' && token <= '9') || 
120 |                            (token >= 'a' && token <= 'f') || 
121 |                            (token >= 'A' && token <= 'F')) {
122 |                         sum = sum*16 + digitalize_hex_character((char)token);
123 |                         token = *++src;
124 |                     }
125 |                     integral_token_val = sum;
126 | 
127 |                 }else if(*src == '.'){
128 |                     // 小数0.xxxxxx形式 
129 |                     float_string[0] = '0';
130 |                     float_string[1] = '.';
131 | 
132 |                     process_fraction(float_string, 2);
133 |             
134 |                     real_token_val = strtod(float_string, NULL);
135 |                     num_type = FLOAT;
136 |                 }else{
137 |                     // 八进制用的比较少暂时不支持
138 |                 }
139 |             }
140 | 
141 |             token = Num;
142 |             return;
143 |         }
144 | 
145 |         else if (token == '.'){
146 |            //处理.xxxxx形式的浮点数
147 |            char float_string[32];
148 |            float_string[0] = '.';
149 |            process_fraction(float_string, 1);
150 |          
151 |            real_token_val = strtod(float_string, NULL);
152 |            token = Num;
153 |            num_type = FLOAT;
154 |            return;
155 |         }
156 | 
157 |         else if (token == '/') {
158 |             if (*src == '/') {
159 |                 //跳过注释 
160 |                 while (*src != 0 && *src != '\n') {
161 |                     ++src;
162 |                 }
163 |             } else { 
164 |                 token = Div;
165 |                 return;
166 |             }
167 |         }
168 | 
169 |         else if (token == '"') {
170 |             // 解析字符串常量，目前只支持转义字符'\n', 字符串常量的值存放在data
171 |             // 段
172 |             last_pos = data;
173 | 
174 |             //存取字符字面量
175 |             while (*src != 0 && *src != token) {
176 |                 integral_token_val = *src++;
177 |                 // 处理字符串中的转义字符
178 |                 if (integral_token_val == '\\') {
179 |                     integral_token_val = *src++;
180 |                     if (integral_token_val == 'n') {
181 |                         integral_token_val = '\n';
182 |                     }
183 |                 }
184 | 
185 |                 //存放字符串常量中的字符
186 |                 *data++ = integral_token_val;
187 |             }
188 | 
189 |             src++;
190 |             integral_token_val = (int)last_pos;
191 | 
192 |             return;
193 |         }
194 | 
195 |         else if (token == '\''){
196 |             integral_token_val = *src++;
197 |         
198 |             //处理单引号中的转义字符
199 |             if (integral_token_val == '\\'){
200 |                 integral_token_val = *src++;
201 |                 if (integral_token_val == 'n') {
202 |                      integral_token_val = '\n';
203 |                 }
204 |             }
205 | 
206 |             //单引号中只能有一个转义字符（两个字符）和一个非转义字符,如果还有其
207 |             //它的字符则报错
208 |             if (*src != '\''){
209 |                printf("%d: bad char value\n", line);
210 |                exit(-1);
211 |             }
212 | 
213 |             src++;
214 |             // 如'c', 就返回Num，token_val以赋值为相应的ascii值
215 |             token = Num; 
216 | 
217 |             return;
218 |         }
219 | 
220 |         else if (token == '=') {
221 |             // 解析 '==' 和 '='
222 |             if (*src == '=') {
223 |                 src ++;
224 |                 token = Eq;
225 |             } else {
226 |                 token = Assign;
227 |             }
228 |             return;
229 |         }
230 |         else if (token == '+') {
231 |             // 解析 '+' 和 '++'
232 |             if (*src == '+') {
233 |                 src ++;
234 |                 token = Inc;
235 |             } else {
236 |                 token = Add;
237 |             }
238 |             return;
239 |         }
240 |         else if (token == '-') {
241 |             // 解析 '-' 和 '--'
242 |             if (*src == '-') {
243 |                 src ++;
244 |                 token = Dec;
245 |             } else {
246 |                 token = Sub;
247 |             }
248 |             return;
249 |         }
250 |         else if (token == '!') {
251 |             // 解析'!='
252 |             if (*src == '=') {
253 |                 src++;
254 |                 token = Ne;
255 |             }
256 |             return;
257 |         }
258 |         else if (token == '<') {
259 |             // 解析 '<=', '<<' or '<'
260 |             if (*src == '=') {
261 |                 src ++;
262 |                 token = Le;
263 |             } else if (*src == '<') {
264 |                 src ++;
265 |                 token = Shl;
266 |             } else {
267 |                 token = Lt;
268 |             }
269 |             return;
270 |         }
271 |         else if (token == '>') {
272 |             //解析'>='，'>>' 或者 '>'
273 |             if (*src == '=') {
274 |                 src ++;
275 |                 token = Ge;
276 |             } else if (*src == '>') {
277 |                 src ++;
278 |                 token = Shr;
279 |             } else {
280 |                 token = Gt;
281 |             }
282 |             return;
283 |         }
284 |         else if (token == '|') {
285 |             //解析'|'和'||'
286 |             if (*src == '|') {
287 |                 src ++;
288 |                 token = Lor;
289 |             } else {
290 |                 token = Or;
291 |             }
292 |             return;
293 |         }
294 |         else if (token == '&') {
295 |             //解析'&'和'&&'
296 |             if (*src == '&') {
297 |                 src ++;
298 |                 token = Lan;
299 |             } else {
300 |                 token = And;
301 |             }
302 |             return;
303 |         }
304 |         else if (token == '^') {
305 |             token = Xor;
306 |             return;
307 |         }
308 |         else if (token == '%') {
309 |             token = Mod;
310 |             return;
311 |         }
312 |         else if (token == '*') {
313 |             token = Mul;
314 |             return;
315 |         }
316 |         else if (token == '[') {
317 |             token = Brak;
318 |             return;
319 |         }
320 |         else if (token == '?') {
321 |             token = Cond;
322 |             return;
323 |         }
324 |         else if (token == '~' || 
325 |                  token == ';' || 
326 |                  token == '{' || 
327 |                  token == '}' || 
328 |                  token == '(' || 
329 |                  token == ')' ||
330 |                  token == ']' || 
331 |                  token == ',' ||
332 |                  token == ':') {
333 |             //直接将这些字符作为token返回 
334 |             return;
335 |         }
336 |         else{
337 |            //其它情况忽略
338 |         }
339 |     }
340 | }
341 | 
342 | 
343 | void match(int expected_token) {
344 |     if (token == expected_token) {
345 |         next();
346 |     } else {
347 |         printf("%d: expected token: %d\n", line, expected_token);
348 |         exit(-1);
349 |     }
350 | }
351 | 
352 | 
353 | static Boolean is_valid_identifier_leading_character(char ch)
354 | {
355 | 
356 |     if ( (ch >= 'a' && ch <= 'z') ||
357 |          (ch >= 'A' && ch <= 'Z') ||
358 |          (ch == '_')){ 
359 |            return True;
360 |          }
361 | 
362 |     return False;
363 | }
364 | 
365 | 
366 | static Boolean is_valid_identifier_character(char ch)
367 | {
368 | 
369 |     if (is_valid_identifier_leading_character(ch) || is_digit(ch)){
370 |         return True;
371 |     }
372 | 
373 |     return False;
374 | }
375 | 
376 | 
377 | static Boolean is_digit(char ch)
378 | {
379 |     return (ch >= '0' && ch <= '9') ? True : False;
380 | }
381 | 
382 | 
383 | //处理浮点数的小数部分
384 | static void process_fraction(char* float_string, int start_idx)
385 | {
386 |    int idx = start_idx;
387 | 
388 |    token = *++src;
389 |    while ((token >= '0' && token <= '9')){ 
390 |        float_string[idx] = token;
391 |        idx++;
392 |        token = *++src;
393 |    }
394 |         
395 |    //判断是否是非法的浮点数字面量，处理完正常部分的浮点数后如果后面不是这些字符
396 |    //的话，那么这个浮点字面量是非法的，同时也能处理上面出现非法字符的情况，例如
397 |    //"12.0a" 这样的字面量
398 |    //printf("trailing charater of float literal '%c'\n", token);
399 |    if (! (token == ',' || token == ';' || token == ' ')){
400 |        printf("%d: bad float literal\n", line);
401 |        exit(-1);
402 |     }
403 | 
404 |     float_string[idx] = '\0';
405 |     printf("float val:%lf\n", strtod(float_string, NULL));
406 | }
407 | 
408 | 
409 | //将十六进制的字符转化成相应的数字
410 | static int digitalize_hex_character(char ch)
411 | {
412 |    if ((ch >= '0' && ch <= '9')){
413 |       return ch - '0';      
414 |    }else if ((token >= 'a' && token <= 'f')){
415 |       return ch - 'a' + 10;  
416 |    }else{
417 |       return ch - 'A' + 10;  
418 |    }
419 | }
420 | 


--------------------------------------------------------------------------------
/parser.c:
--------------------------------------------------------------------------------
   1 | #include <stdio.h>
   2 | #include <stdlib.h>
   3 | #include <memory.h>
   4 | #include <string.h>
   5 | #include "parser.h"
   6 | #include "executor.h"
   7 | #include "lex.h"
   8 | #include "relocation.h"
   9 | #include "dependency.h"
  10 | 
  11 | static void expression(int level) 
  12 | {
  13 | 
  14 |     int *id;
  15 |     int tmp;
  16 | 
  17 |     {
  18 |         if (!token) {
  19 |             printf("%d: unexpected token EOF of expression\n", line);
  20 |             exit(-1);
  21 |         }
  22 | 
  23 |         // 处理数值
  24 |         if (token == Num) {
  25 |             match(Num);
  26 |            //TODO 进一步判断是否是浮点类型
  27 |             
  28 |             if (num_type == INT){
  29 |                load_integral_number_constant(integral_token_val);
  30 |                expr_type = INT;
  31 |             }else{
  32 |             //TODO 加载浮点常量，浮点常量double类型存储
  33 |                load_real_number_constant(real_token_val);
  34 |                expr_type = FLOAT;
  35 |             }
  36 |         }
  37 | 
  38 |         // 处理字符串常量
  39 |         else if (token == '"') {
  40 | 
  41 |             *++text = IMM;
  42 |             *++text = integral_token_val;
  43 | 
  44 |             match('"');
  45 |             while (token == '"') {
  46 |                 match('"');
  47 |             }
  48 | 
  49 |             // 字符串常量不需要重定位
  50 |             // data段初始化的时候都为0，所以不需要显示的在末尾添加'\0'，下面是
  51 |             // 为了使得data段在4字节边界上对齐，例如如果字符串的长度为11个字节
  52 |             // 的话，那么对齐后实际分配的data空间是12个字节
  53 |             data = (char *)(((int)data + sizeof(int)) & (-sizeof(int)));
  54 | 
  55 |             expr_type = PTR;
  56 |         }
  57 | 
  58 |         // 处理标识符
  59 |         else if (token == Id) {
  60 | 
  61 |             match(Id);
  62 |             id = current_id;
  63 | 
  64 |             //函数调用
  65 |             if (token == '(') {
  66 |                 match('(');
  67 | 
  68 |                 int num_args = 0; //实参的个数
  69 |                 while (token != ')') {
  70 |                     // 将参数压人栈中
  71 |                     // TODO 如果参数是浮点类型的话就读不到数据了
  72 |                     expression(Assign);
  73 |                     *++text = PUSH;
  74 |                     num_args++;
  75 | 
  76 |                     if (token == ',') {
  77 |                         match(',');
  78 |                     }
  79 | 
  80 |                 }
  81 |                 match(')');
  82 | 
  83 |                 // 系统函数, id[Value]保存的是函数的OP代码
  84 |                 if (id[Class] == Sys) {
  85 |                     *++text = id[Value];
  86 |                 }
  87 |                 // 自定义的函数
  88 |                 else if (id[Class] == Fun) {
  89 |                     *++text = CALL;
  90 |                     *++text = id[Value];
  91 |                 }
  92 |                 else {
  93 |                     printf("%d: bad function call\n", line);
  94 |                     exit(-1);
  95 |                 }
  96 | 
  97 |                 // 如果函数调用有传递参数，那么函数返回后需要清理这些参数对应的
  98 |                 // 栈空间
  99 |                 if (num_args > 0) {
 100 |                     *++text = ADJ;
 101 |                     *++text = num_args;
 102 |                 }
 103 | 
 104 |                 //变量的类型
 105 |                 expr_type = id[Type];
 106 |             }
 107 |             else if (id[Class] == Num) {
 108 |             // 枚举类型
 109 |                 *++text = IMM;
 110 |                 *++text = id[Value];
 111 |                 expr_type = INT;
 112 |             }
 113 |             else {
 114 |             // 普通变量 
 115 |             
 116 |                 if (id[Class] == Ext) {
 117 |                     *++text = IMM;                
 118 |                     *++text = id[Value]; //id[Value]都是保存其地址
 119 |                 }
 120 |                 else if (id[Class] == Glo) {
 121 |                     *++text = IMM;                
 122 |                     *++text = id[Value]; //id[Value]都是保存其地址
 123 |                     int offset = (id[Value] - (int)data_start);
 124 |                     add_relocation_item(text, offset, Data_Rel);                    
 125 |                 }
 126 |                 else {
 127 |                     printf("%d: undefined variable\n", line);
 128 |                     exit(-1);
 129 |                 }
 130 | 
 131 | 
 132 |                 expr_type = id[Type];
 133 | 
 134 |                 //根据变量的类型选择相应的加载指令
 135 |                 *++text = emit_load_directive(expr_type);
 136 |             }
 137 |         }
 138 | 
 139 |         // 强制类型转换以及不同的括号分组
 140 |         else if (token == '(') {
 141 |             match('(');
 142 | 
 143 |             // 强制类型转换
 144 |             if (token == Int || token == Char || token == Float || Double) {
 145 |                 int cast_type = type_of_token(token);
 146 |                 match(token);
 147 |                 while (token == Mul) {
 148 |                     match(Mul);
 149 |                     cast_type = cast_type + PTR;
 150 |                 }
 151 |                 match(')');
 152 | 
 153 |                 //转型的优先级和Inc(++)一样
 154 |                 expression(Inc); 
 155 | 
 156 |                 check_assignment_types(cast_type, expr_type);
 157 |                 
 158 |                 // 强制类型转换整体的表达式的类型应该和转型的类型是一样的例如
 159 |                 // (int **)var, 那么不管var之前是什么类型的变量，转型后的类型
 160 |                 // 就是(int **)
 161 |                 expr_type  = cast_type;
 162 | 
 163 |             } else {
 164 |             // 普通的括号分组
 165 |                 expression(Assign);
 166 |                 match(')');
 167 |             }
 168 |         }
 169 | 
 170 |         else if (token == Mul) {
 171 |             match(Mul);
 172 | 
 173 |             //解引用的优先级和Inc(++)一样
 174 |             expression(Inc); 
 175 | 
 176 |             printf("expr_type %d\n", expr_type);
 177 |             if (expr_type >= PTR) {
 178 |                 expr_type = expr_type - PTR;
 179 |             } else {
 180 |                 printf("%d: bad dereference\n", line);
 181 |                 exit(-1);
 182 |             }
 183 | 
 184 |             //float** f;   1.0 + **f
 185 |             //那么通过Load操作逐步解引用addr (LI) (LF)
 186 |             //
 187 |             *++text = emit_load_directive(expr_type); 
 188 |         }
 189 | 
 190 |         else if (token == And) {
 191 |             match(And);
 192 | 
 193 |             //取地址的优先级和Inc(++)一样
 194 |             expression(Inc); 
 195 | 
 196 |             //如果是&var的话，直接通过load操作前面的IMM操作就可以加载其地址了
 197 |             //"&"后面的只能是变量而不能是常量，但是这里存在一个bug: 如果&const
 198 |             //而这个const的数值恰好是LC LI LF和LD其中一个，所以为了保险起见加上
 199 |             //对这种情况的判断;其次&的优先级比较高所以像&(1+2)之类的都是不合法的
 200 |             if (!does_operate_on_constant() &&
 201 |                  (*text == LC || *text == LI || *text == LF || *text == LD)){
 202 |                 text--;
 203 |             }else {
 204 |                 printf("%d: bad address of\n", line);
 205 |                 exit(-1);
 206 |             }
 207 | 
 208 |             expr_type = expr_type + PTR;
 209 |         }
 210 | 
 211 |         else if (token == '!') {
 212 |             match('!');
 213 | 
 214 |             //逻辑非的优先级和Inc(++)一样
 215 |             expression(Inc);
 216 | 
 217 |             // 使用expr == 0 进行判断
 218 |             // 如果是"!"后面的表达式类型是浮点类型，则将bx寄存器中的数转型成整
 219 |             // 型并放置在ax中指令BTOA就是这个作用
 220 |             if (expr_type == FLOAT || expr_type == DOUBLE){
 221 |                 *++text = BTOA;                
 222 |             }
 223 | 
 224 |             *++text = PUSH;
 225 |             *++text = IMM;
 226 |             *++text = 0;
 227 |             *++text = EQ;
 228 | 
 229 |             //最后整个表达式(!<expr>)的类型是INT
 230 |             expr_type = INT;
 231 |         }
 232 | 
 233 |         else if (token == '~') {
 234 |             // bitwise not
 235 |             match('~');
 236 | 
 237 |             //按位非的优先级和Inc(++)一样
 238 |             expression(Inc); 
 239 |         
 240 |             //位操作的话表达式的类型一定要正确，因此需要检查一些类型
 241 |             if (expr_type == FLOAT || expr_type == DOUBLE){
 242 |                 printf("%d: wrong type argument to bit-complement\n", line);
 243 |                 exit(-1); 
 244 |             }
 245 | 
 246 |             //使用<expr> XOR -1来时实现按位非，具体细节如下
 247 |             //(1111 1111)  -1
 248 |             //(0110 0011)  XOR
 249 |             //______________
 250 |             //
 251 |             //(1001 1100)
 252 |             *++text = PUSH; 
 253 |             *++text = IMM;  
 254 |             *++text = -1;
 255 |             *++text = XOR;
 256 | 
 257 |             //最后整个表达式(~<expr>)的类型是INT
 258 |             expr_type = INT;
 259 |         }
 260 |         else if (token == Add) {
 261 |             // +var, 不做实际的操作
 262 |             match(Add);
 263 | 
 264 |             //正号优先级和Inc(++)一样
 265 |             expression(Inc);
 266 | 
 267 |             //最后整个表达式(+<expr>)的类型和<expr>相同
 268 |             expr_type = expr_type;
 269 |         }
 270 |         else if (token == Sub) {
 271 |             // -var
 272 |             match(Sub);
 273 | 
 274 |             if (token == Num) {
 275 |                 if (num_type == INT || num_type == CHAR){
 276 |                    load_integral_number_constant(-integral_token_val);
 277 |                 }else{
 278 |                    load_real_number_constant(-real_token_val);
 279 |                 }
 280 |                 match(Num);
 281 |             } else {
 282 |                 //TODO 
 283 |                 *++text = IMM;
 284 |                 *++text = -1;   
 285 |                 *++text = PUSH;
 286 |                 expression(Inc);
 287 |                 *++text = MUL; 
 288 |             }
 289 | 
 290 |         }
 291 | 
 292 |         else if (token == Inc || token == Dec) {
 293 |             int save_token = token;
 294 |             match(token);
 295 |             expression(Inc);
 296 | 
 297 |             if (does_operate_on_constant()){
 298 |                 printf("%d:Inc or Dec cannot apply on constant\n", line);
 299 |                 exit(-1);
 300 |             } 
 301 | 
 302 |             // 暂时不支持浮点类型的变量(包括指针类型)++或--操作
 303 |             if (get_base_type(expr_type) > INT){
 304 |                 printf("%d: sorry, Inc or Dec is not supported for floating\n",
 305 |                       line);
 306 |                 exit(-1);
 307 |             }
 308 | 
 309 | 
 310 |             if (*text == LC) {
 311 |                 *text = PUSH;  
 312 |                 *++text = LC;
 313 |             } else if (*text == LI) {
 314 |                 *text = PUSH;
 315 |                 *++text = LI;
 316 |             } else {
 317 |                 printf("%d: bad lvalue of pre-increment\n", line);
 318 |                 exit(-1);
 319 |             }
 320 | 
 321 |             *++text = PUSH;
 322 |             *++text = IMM;
 323 |             *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 324 |             *++text = (save_token == Inc) ? ADD : SUB;
 325 |             *++text = (expr_type == CHAR) ? SC : SI;
 326 |         }
 327 |         else {
 328 |             printf("%d: bad expression\n", line);
 329 |             exit(-1);
 330 |         }
 331 |     }
 332 |  
 333 | 
 334 |     //处理二元操作符以及后缀操作符
 335 |     {
 336 |         // 根据当前的操作符优先级进行操作
 337 |         while (token >= level) {
 338 |             int left_type = expr_type;
 339 |             if (token == Assign) {
 340 |                 // var = expr;
 341 |                 match(Assign);
 342 | 
 343 |                 // 变量如果是充当左值话就修改指令，使用PUSH指令保存其地址
 344 |                 // 如果是用作右值的话，就使用Load指令加载
 345 |                 // 左值不是变量，报错
 346 |                 if (*text == LC || *text == LI || *text == LF || *text == LD) {   
 347 |                     *text = PUSH; 
 348 |                 } else {
 349 |                     printf("%d: bad lvalue in assignment\n", line);
 350 |                     exit(-1);
 351 |                 }
 352 | 
 353 |                 // 然后计算右边表达式的值，并将结果保存到ax或者bx
 354 |                 expression(Assign);
 355 | 
 356 |                 //类型兼容的函数
 357 |                 printf("assign left %d , right %d\n", left_type, expr_type);
 358 |                 check_assignment_types(left_type, expr_type);
 359 | 
 360 |                 //如果两个是类型兼容的话，那么整个表达式的类型就是左操作数的类型
 361 |                 expr_type = left_type; 
 362 |                 *++text = emit_store_directive(expr_type);
 363 |             }
 364 | 
 365 |             else if (token == Cond) {
 366 |                 // expr ? a : b;
 367 |                 match(Cond);
 368 |                 int *addr;
 369 | 
 370 |                 // 如果结果是float类型的，那么将bx中的数转型移到ax中
 371 |                 // 转型的精度损失不会影响条件的真假性
 372 |                 if (expr_type == FLOAT || expr_type == DOUBLE){
 373 |                   *++text = BTOA;
 374 |                 }
 375 | 
 376 |                 *++text = JZ;
 377 |                 addr = ++text;
 378 |                 expression(Assign);
 379 |                 if (token == ':') {
 380 |                     match(':');
 381 |                 } else {
 382 |                     printf("%d: missing colon in conditional\n", line);
 383 |                     exit(-1);
 384 |                 }
 385 |                 int offset = (text + 3 - text_start)*sizeof(int);
 386 |                 add_relocation_item(addr, offset, Text_Rel);
 387 |                 *addr = (int)(text + 3);
 388 |                 *++text = JMP;
 389 | 
 390 |                 addr = ++text;
 391 |                 expression(Cond);
 392 |                 offset = (text + 1 - text_start)*sizeof(int);
 393 |                 add_relocation_item(addr, offset, Text_Rel);
 394 |                 *addr = (int)(text + 1);
 395 |             }
 396 | 
 397 |             else if (token == Lor) {
 398 |                 // logic or
 399 |                 match(Lor);
 400 | 
 401 |                 int *addr;
 402 | 
 403 |                 // 如果结果是float类型的，那么将bx中的数转型移到ax中
 404 |                 // 转型的精度损失不会影响条件的真假性
 405 |                 if (expr_type == FLOAT || expr_type == DOUBLE){
 406 |                   *++text = BTOA;
 407 |                 }
 408 | 
 409 |                 *++text = JNZ;
 410 |                 addr = ++text;
 411 |                 expression(Lan);
 412 | 
 413 |                 int offset = (text + 1 - text_start)*sizeof(int);
 414 |                 add_relocation_item(addr, offset, Text_Rel);
 415 |                 *addr = (int)(text + 1);
 416 |                 expr_type = INT;
 417 |             }
 418 |             else if (token == Lan) {
 419 |                 // logic and
 420 |                 match(Lan);
 421 | 
 422 |                 int *addr;
 423 | 
 424 |                 // 如果结果是float类型的，那么将bx中的数转型移到ax中
 425 |                 // 转型的精度损失不会影响条件的真假性
 426 |                 if (expr_type == FLOAT || expr_type == DOUBLE){
 427 |                   *++text = BTOA;
 428 |                 }
 429 | 
 430 |                 *++text = JZ;
 431 |                 addr = ++text;
 432 |                 expression(Or);
 433 | 
 434 |                 int offset = (text + 1 - text_start)*sizeof(int);
 435 |                 add_relocation_item(addr, offset, Text_Rel);
 436 |                 *addr = (int)(text + 1);
 437 | 
 438 |                 expr_type = INT;
 439 |             }
 440 |             else if (token == Or) {
 441 |                 // bitwise or
 442 |                 match(Or);
 443 |                 *++text = PUSH;
 444 |                 expression(Xor);
 445 | 
 446 |                //位操作的话表达式的类型一定要正确，因此需要检查一些类型
 447 |                if (expr_type == FLOAT || expr_type == DOUBLE){
 448 |                    printf("%d: wrong type argument to bitwise or\n", line);
 449 |                    exit(-1); 
 450 |                 }
 451 | 
 452 |                 *++text = OR;
 453 |                 expr_type = INT;
 454 |             }
 455 |             else if (token == Xor) {
 456 |                 // bitwise xor
 457 |                 match(Xor);
 458 |                 *++text = PUSH;
 459 |                 expression(And);
 460 | 
 461 |                 //位操作的话表达式的类型一定要正确，因此需要检查一些类型
 462 |                 if (expr_type == FLOAT || expr_type == DOUBLE){
 463 |                    printf("%d: wrong type argument to bitwise xor\n", line);
 464 |                    exit(-1); 
 465 |                 }
 466 | 
 467 |                 *++text = XOR;
 468 |                 expr_type = INT;
 469 |             }
 470 |             else if (token == And) {
 471 |                 // bitwise and
 472 |                 match(And);
 473 |                 *++text = PUSH;
 474 |                 expression(Eq);
 475 | 
 476 |                 //位操作的话表达式的类型一定要正确，因此需要检查一些类型
 477 |                 if (expr_type == FLOAT || expr_type == DOUBLE){
 478 |                    printf("%d: wrong type argument to bitwise xor\n", line);
 479 |                    exit(-1); 
 480 |                 }
 481 | 
 482 |                 *++text = AND;
 483 |                 expr_type = INT;
 484 |             }
 485 |             else if (token == Eq) {
 486 |                 // equal ==
 487 |                 match(Eq);
 488 |                 int *reserve1 = NULL, *reserve2 = NULL;
 489 | 
 490 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 491 |                 //*++text = PUSH;
 492 | 
 493 |                 expression(Ne);
 494 | 
 495 |                 //*++text = EQ;
 496 |                 emit_code_for_binary_right(EQF, EQ, &reserve1, &reserve2);
 497 | 
 498 |                 expr_type = INT;
 499 |             }
 500 |             else if (token == Ne) {
 501 |                 // not equal !=
 502 |                 match(Ne);
 503 |                 int *reserve1 = NULL, *reserve2 = NULL;
 504 | 
 505 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 506 |                 //*++text = PUSH;
 507 |                 
 508 |                 expression(Lt);
 509 | 
 510 |                 //*++text = NE;
 511 |                 emit_code_for_binary_right(NEF, NE, &reserve1, &reserve2);
 512 | 
 513 |                 expr_type = INT;
 514 |             }
 515 |             else if (token == Lt) {
 516 |                 // less than
 517 |                 match(Lt);
 518 |                 int *reserve1 = NULL, *reserve2 = NULL;
 519 | 
 520 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 521 |                 //*++text = PUSH;
 522 |                 
 523 |                 expression(Shl);
 524 | 
 525 |                 //*++text = LT;
 526 |                 emit_code_for_binary_right(LTF, LT, &reserve1, &reserve2);
 527 | 
 528 |                 expr_type = INT;
 529 |             }
 530 |             else if (token == Gt) {
 531 |                 // greater than
 532 |                 match(Gt);
 533 |                 int *reserve1 = NULL, *reserve2 = NULL;
 534 | 
 535 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 536 |                 //*++text = PUSH;
 537 |                 
 538 |                 expression(Shl);
 539 | 
 540 |                 //*++text = GT;
 541 |                 emit_code_for_binary_right(GTF, GT, &reserve1, &reserve2);
 542 | 
 543 |                 expr_type = INT;
 544 |             }
 545 |             else if (token == Le) {
 546 |                 // less than or equal to
 547 |                 match(Le);
 548 |                 int *reserve1 = NULL, *reserve2 = NULL;
 549 | 
 550 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 551 |                 //*++text = PUSH;
 552 |                 
 553 |                 expression(Shl);
 554 | 
 555 |                 //*++text = LE;
 556 |                 emit_code_for_binary_right(LEF, LE, &reserve1, &reserve2);
 557 | 
 558 |                 expr_type = INT;
 559 |             }
 560 |             else if (token == Ge) {
 561 |                 // greater than or equal to
 562 |                 match(Ge);
 563 |                 int *reserve1 = NULL, *reserve2 = NULL;
 564 | 
 565 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 566 |                 //*++text = PUSH;
 567 |             
 568 |                 expression(Shl);
 569 | 
 570 |                 //*++text = GE;
 571 |                 emit_code_for_binary_right(GEF, GE, &reserve1, &reserve2);
 572 | 
 573 |                 expr_type = INT;
 574 |             }
 575 |             else if (token == Shl) {
 576 |                 // shift left
 577 |                 match(Shl);
 578 |                 int save_type = expr_type;
 579 | 
 580 |                 *++text = PUSH;
 581 |                 expression(Add);
 582 | 
 583 |                 // 两侧的操作数只能是char以及int型的
 584 |                 if ((save_type == FLOAT || save_type == DOUBLE) ||
 585 |                     (expr_type == FLOAT || save_type == DOUBLE)){
 586 |                    printf("%d: wrong type argument to shift left\n", line);
 587 |                    exit(-1); 
 588 |                 }
 589 | 
 590 |                 *++text = SHL;
 591 |                 
 592 |                 expr_type = INT;
 593 |             }
 594 |             else if (token == Shr) {
 595 |                 // shift right
 596 |                 match(Shr);
 597 |                 int save_type = expr_type;
 598 | 
 599 |                 *++text = PUSH;
 600 |                 expression(Add);
 601 | 
 602 |                 // 两侧的操作数只能是char以及int型的
 603 |                 if ((save_type == FLOAT || save_type == DOUBLE) ||
 604 |                     (expr_type == FLOAT || save_type == DOUBLE)){
 605 |                    printf("%d: wrong type argument to shitf right\n", line);
 606 |                    exit(-1); 
 607 |                 }
 608 | 
 609 |                 *++text = SHR;
 610 |                 
 611 |                 expr_type = INT;
 612 |             }
 613 |             //TODO 先尝试让浮点的加法操作正常工作 
 614 |             else if (token == Add) {
 615 |                 // add
 616 |                 match(Add);
 617 | 
 618 |                 int *reserve1 = NULL, *reserve2 = NULL;
 619 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 620 | 
 621 |                 //计算表达式右边的值
 622 |                 expression(Mul);
 623 |                 
 624 |                 printf("+ right type %d\n", expr_type);
 625 |                 //TODO expr_type = tmp;
 626 |                 //如果操作数是指针类型的话
 627 |                 if (expr_type > PTR) { 
 628 |                     *++text = PUSH;
 629 |                     *++text = IMM;
 630 |                     *++text = sizeof(int);
 631 |                     *++text = MUL;
 632 |                 }
 633 | 
 634 |                 emit_code_for_binary_right(ADDF, ADD, &reserve1, &reserve2);
 635 | 
 636 |             }
 637 |             else if (token == Sub) {
 638 |                 // sub
 639 |                 match(Sub);
 640 |                 *++text = PUSH;
 641 |                 expression(Mul);
 642 |                 if (tmp > PTR && tmp == expr_type) {
 643 |                     // pointer subtraction
 644 |                     *++text = SUB;
 645 |                     *++text = PUSH;
 646 |                     *++text = IMM;
 647 |                     *++text = sizeof(int);
 648 |                     *++text = DIV;
 649 |                     expr_type = INT;
 650 |                 } else if (tmp > PTR) {
 651 |                     // pointer movement
 652 |                     *++text = PUSH;
 653 |                     *++text = IMM;
 654 |                     *++text = sizeof(int);
 655 |                     *++text = MUL;
 656 |                     *++text = SUB;
 657 | 
 658 |                     expr_type = tmp;
 659 |                 } else {
 660 |                     // numeral subtraction
 661 |                     *++text = SUB;
 662 | 
 663 |                     expr_type = tmp;
 664 |                 }
 665 |             }
 666 |             else if (token == Mul) { // multiply
 667 |                 match(Mul);
 668 | 
 669 |                 int *reserve1 = NULL, *reserve2 = NULL;
 670 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 671 |                 //*++text = PUSH;
 672 |                 
 673 |                 expression(Inc);
 674 | 
 675 |                 emit_code_for_binary_right(MULF, MUL, &reserve1, &reserve2);
 676 |                 //*++text = MUL;
 677 | 
 678 |                 //TODO
 679 |                 //expr_type = tmp;
 680 |             }
 681 |             else if (token == Div) {
 682 |                 // divide
 683 |                 match(Div);
 684 | 
 685 |                 int *reserve1 = NULL, *reserve2 = NULL;
 686 |                 emit_code_for_binary_left(&reserve1, &reserve2);
 687 |                 //*++text = PUSH;
 688 |                 expression(Inc);
 689 | 
 690 |                 emit_code_for_binary_right(DIVF, DIV, &reserve1, &reserve2);
 691 |                 //*++text = DIV;
 692 | 
 693 |                 //expr_type = tmp;
 694 |             }
 695 |             else if (token == Mod) {
 696 |                 // Modulo
 697 |                 match(Mod);
 698 | 
 699 |                 int save_type = expr_type;
 700 |                 *++text = PUSH;
 701 | 
 702 |                 expression(Inc);
 703 |                 // 只有两个数是整型数(CHAR或INT)才可以
 704 |                 if (!((save_type == INT || save_type == CHAR) &&
 705 |                       (expr_type == INT || expr_type == CHAR))){
 706 |                      printf("%d:invalid operands to binary\n", line);
 707 |                      exit(-1); 
 708 |                 }
 709 | 
 710 |                 *++text = MOD;
 711 | 
 712 |                 expr_type = INT;
 713 |                 //expr_type = tmp;
 714 |             }
 715 |             else if (token == Inc || token == Dec) {
 716 |                 // postfix inc(++) and dec(--)
 717 |                 // 注意: 浮点数是不支持++或--操作的
 718 |                 if (*text == LI) {
 719 |                     *text = PUSH;
 720 |                     *++text = LI;
 721 |                 }
 722 |                 else if (*text == LC) {
 723 |                     *text = PUSH;
 724 |                     *++text = LC;
 725 |                 }
 726 |                 else {
 727 |                     printf("%d: bad value in increment\n", line);
 728 |                     exit(-1);
 729 |                 }
 730 | 
 731 |                 *++text = PUSH;
 732 |                 *++text = IMM;
 733 |                 *++text = (expr_type > PTR) ? sizeof(int) : sizeof(char);
 734 |                 *++text = (token == Inc) ? ADD : SUB;
 735 |                 //SC store char; SI store int
 736 |                 *++text = (expr_type == CHAR) ? SC : SI;
 737 | 
 738 |                 match(token);
 739 |             }
 740 | 
 741 |             //数组的访问,但是好像没有数组的声明
 742 |             else if (token == Brak) {
 743 |                 // array access var[xx]
 744 |                 match(Brak);
 745 |                 int array_type = expr_type;
 746 | 
 747 |                 *++text = PUSH; //将var的值作为地址放在栈中
 748 |                 expression(Assign);
 749 |                 match(']');
 750 | 
 751 |                 //什么时候需要将type保存到tmp
 752 |                 if (array_type > PTR) {
 753 |                     // pointer, `not char *`
 754 |                     *++text = PUSH; //xx的结果放在栈中(计算偏移量)
 755 |                     *++text = IMM; 
 756 |                     *++text = sizeof(int);
 757 |                     *++text = MUL; 
 758 |                 }
 759 |                 else if (array_type < PTR) {
 760 |                     printf("%d: pointer type expected\n", line);
 761 |                     exit(-1);
 762 |                 }
 763 | 
 764 |                 expr_type = array_type - PTR;
 765 |                 *++text = ADD; //计算地址:首地址 + 偏移量
 766 | 
 767 |                 //a[10] 等价于 *(a + 10)
 768 |                 //LC load char; LI load int
 769 |                 *++text = (expr_type == CHAR) ? LC : LI;
 770 |             }
 771 |             else {
 772 |                 printf("%d: compiler error, token = %d\n", line, token);
 773 |                 exit(-1);
 774 |             }
 775 |         }
 776 |     }
 777 | }
 778 | 
 779 | 
 780 | 
 781 | static void statement() {
 782 | 
 783 |     int *a, *b; 
 784 | 
 785 |     if (token == If) {
 786 |         // 为if语句产生的汇编代码，不像gcc等正规编译器会进行一系列的优化操作
 787 |         // if (...) <statement> [else <statement>]
 788 |         //                     //按照顺序来
 789 |         //   if (...)           <cond>  
 790 |         //                      JZ a    
 791 |         //     <statement>      <statement>
 792 |         //   else:              JMP b //跳过else部分
 793 |         // a:
 794 |         //     <statement>      <statement>
 795 |         // b:                   b:
 796 |         //
 797 |         
 798 |         match(If);
 799 |         match('(');
 800 |         //解析条件
 801 |         expression(Assign);  
 802 |         match(')');
 803 | 
 804 |         // 如果结果是float类型的，那么将bx中的数转型移到ax中
 805 |         // 转型的精度损失不会影响条件的真假性
 806 |         if (expr_type == FLOAT || expr_type == DOUBLE){
 807 |                *++text = BTOA;
 808 |          }
 809 | 
 810 |         *++text = JZ;
 811 |         b = ++text; //先为标号b分配一个地址空间
 812 | 
 813 |         //解析if中的语句
 814 |         statement(); //跳过这些细节     
 815 | 
 816 |         int offset;
 817 |         //解析else部分
 818 |         if (token == Else) { 
 819 |             //match包含了next操作, 如果有else if那么statement()后就会匹配if
 820 |             match(Else);
 821 | 
 822 |             // emit code for JMP B
 823 |             // TODO 这里需要重定位location: b, offset: text+3 - text_start
 824 |             offset = (text + 3 - text_start)*sizeof(int);
 825 |             add_relocation_item(b, offset, Text_Rel);
 826 |             *b = (int)(text + 3);
 827 |             *++text = JMP;
 828 |             b = ++text;
 829 | 
 830 |             statement(); //跳过这些细节
 831 |         }
 832 | 
 833 | 
 834 |         // TODO 这里需要重定位location: b, offset: text+1 - text_start
 835 |         offset = (text + 1 - text_start)*sizeof(int);
 836 |         add_relocation_item(b, offset, Text_Rel);
 837 |         *b = (int)(text + 1); //编译完后再填充标号b的内容
 838 |     }
 839 | 
 840 | 
 841 |     //TODO 实现break, continue
 842 |     else if (token == While) {
 843 |         //
 844 |         // a:                     a:
 845 |         //    while (<cond>)        <cond>
 846 |         //                          JZ b
 847 |         //     <statement>          <statement>
 848 |         //                          JMP a
 849 |         // b:                     b:
 850 |         match(While);
 851 | 
 852 |         a = text + 1; //a开始将存放<cond>
 853 | 
 854 |         match('(');
 855 |         expression(Assign);
 856 |         match(')');
 857 | 
 858 |         // 如果结果是float类型的，那么将bx中的数转型移到ax中
 859 |         // 转型的精度损失不会影响条件的真假性
 860 |         if (expr_type == FLOAT || expr_type == DOUBLE){
 861 |               *++text = BTOA;
 862 |         }
 863 | 
 864 |         *++text = JZ;
 865 |         b = ++text; //先为标号b分配一个地址空间
 866 | 
 867 |         //TODO 将两个标号打包压入堆栈中（主要是为了while循环）
 868 |         //如果堆栈为空的时候，即此时的环境不是在while循环中，那么报错
 869 |         //start_label1: ,  end_label2: 
 870 |         //start_label2: ,  end_label2: 
 871 |         
 872 |         statement();
 873 | 
 874 |         int offset;
 875 | 
 876 |         //相当于continue
 877 |         *++text = JMP;
 878 |         //TODO 这里需要一个重定位location:text, offset=a - text_start 
 879 |         *++text = (int)a;
 880 |         offset = (a - text_start)*sizeof(int);
 881 |         add_relocation_item(text, offset, Text_Rel);
 882 | 
 883 | 
 884 |         //相当于break
 885 |         //编译完后在填充标号b的内容, b开始存放其它命令
 886 |         //TODO 这里也需要一个重定位location:b, offset=text+1-text_start
 887 | 
 888 |         offset = (text + 1 - text_start)*sizeof(int);
 889 |         add_relocation_item(b, offset, Text_Rel);
 890 |         *b = (int)(text + 1); //b开始存放其它命令
 891 |     }
 892 | 
 893 |     //匹配if/while中的语句
 894 |     else if (token == '{') {
 895 |         // { <statement> ... }
 896 |         match('{');
 897 | 
 898 |         while (token != '}') {
 899 |             statement();
 900 |         }
 901 | 
 902 |         match('}');
 903 |     }
 904 | 
 905 |     else if (token == Return) {
 906 |         // return [expression];
 907 |         match(Return);
 908 | 
 909 |         if (token != ';') {
 910 |             expression(Assign);
 911 |         }
 912 | 
 913 |         match(';');
 914 | 
 915 |         // emit code for return
 916 |         *++text = LEV;
 917 |     }
 918 | 
 919 |     else if (token == ';') {
 920 |         // empty statement
 921 |         match(';');
 922 |     }
 923 | 
 924 |     else {
 925 |         // a = b; or function_call();
 926 |         //printf("assignement\n");
 927 |         expression(Assign);
 928 |         match(';');
 929 |     }
 930 | }
 931 | 
 932 | 
 933 | 
 934 | static void global_declaration() 
 935 | {
 936 |     // 解析变量声明的类型
 937 |     if (token == Int) {
 938 |         match(Int);
 939 |         basetype = INT;
 940 |         printf("Int token\n");
 941 |     }
 942 |     else if (token == Char) {
 943 |         match(Char);
 944 |         basetype = CHAR;
 945 |         printf("Char token\n");
 946 |     }
 947 |     else if (token == Float){
 948 |         match(Float);
 949 |         basetype = FLOAT;
 950 |         printf("Float token\n");
 951 |     }else if (token == Double){
 952 |         match(Double);
 953 |         basetype = DOUBLE;
 954 |         printf("Double token\n");
 955 |     }
 956 | 
 957 | 
 958 |     // 解析可由逗号分割的变量声明 
 959 |     while (token != ';' && token != '}') {
 960 |         int final_type = basetype;
 961 | 
 962 |         // 解析指针类型，因为会存在如 "int ****var;" 的多级指针声明需要用一个循
 963 |         // 环来解析，注意因为在词法分析阶段解析标识符的时候遇到非标识符的字符
 964 |         // 就会停止下来，因此 "int**** var;" 这种形式也是可以的
 965 |         while (token == Mul) {
 966 |             match(Mul);
 967 |             final_type = final_type + PTR;
 968 |         }
 969 | 
 970 |         if (token != Id) {
 971 |             // 如果记号不是标识符的话则为非法声明
 972 |             printf("%d: bad global declaration\n", line);
 973 |             exit(-1);
 974 |         }
 975 |         if (current_id[Class]) {
 976 |             // 标识符已经存在
 977 |             printf("%d: duplicate global declaration\n", line);
 978 |             exit(-1);
 979 |         }
 980 |     
 981 |         match(Id);
 982 | 
 983 |         //设置了Type和Value，等程序后面引用的时候就能正确加载
 984 |         current_id[Type] = final_type;
 985 | 
 986 |         if(token == Brak){
 987 |             //TODO 新增支持数组声明, 数组下标要是整数      
 988 |             static int* addr_keeper;
 989 |             next();
 990 |             if (token != Num){
 991 |                printf("%d: bad index\n", line);
 992 |             }
 993 |             int num = integral_token_val;
 994 | 
 995 |             current_id[Class] = Glo;
 996 |             //为什么current_id[Value] = (int)data就不行,可能数组的访问就是用指针
 997 |             //实现的 data是char*类型的
 998 | 
 999 |             //数组变量是指针变量，数组的起始地址保存在该指针变量中
1000 |             //这里不能使用(int)&data因为data是全局变量，其数值后面是会变化的
1001 |             //因此需要另外一个变量来
1002 |             //从data中先分配一个空间用于存放下面数组的首地址
1003 |             addr_keeper = (int*)data;
1004 |             data = data + sizeof(int);
1005 |             *addr_keeper = (int)data;
1006 | 
1007 |             current_id[Value] = (int)addr_keeper;
1008 |            // current_id[Value] = (int)&data;
1009 |            // printf("saved value %p\n",&data);
1010 |             current_id[Type] = final_type + PTR;
1011 |             
1012 |             int* array_addr = (int*)data;
1013 |             printf("array addr  %p\n", array_addr);
1014 |             data = data + num * sizeof(int);
1015 |             printf("new data addr  %p\n", data);
1016 | 
1017 |             match(Num);
1018 |             match(']');
1019 | 
1020 |             
1021 |             //TODO 新增支持数组的初始化
1022 |             if (token == Assign){
1023 |                 //like int array[4] = {1,3,5,6};
1024 |                 int i;
1025 |                 
1026 |                 match(Assign);
1027 |                 match('{');
1028 |                 for (i=0; i<num; i++){
1029 |                    if (token != Num){
1030 |                        printf("%d: bad initailzer\n", line);
1031 |                    }
1032 | 
1033 |                    printf("token_val is %d\n", integral_token_val);
1034 |                    printf("address %p\n", array_addr+i);
1035 |                    array_addr[i] = integral_token_val;
1036 |                    match(Num);
1037 |                    if (token == ',') {
1038 |                       match(',');
1039 |                    }else if (token == '}'){
1040 |                       match('}');
1041 |                       break;
1042 |                    }else{
1043 |                        printf("%d: bad token\n", line);
1044 |                    }
1045 |                 }
1046 |                 if (token == '}'){
1047 |                    match('}');
1048 |                 }
1049 |             }
1050 | 
1051 |         }else {
1052 | 
1053 |             //TODO 根据变量的类型不同分配不同大小空间的
1054 |             current_id[Class] = Glo; 
1055 |             current_id[Value] = (int)data; 
1056 |            
1057 |             //新增的代码支持初始化
1058 |             if (token == Assign){
1059 |                // 如果复杂的话就去掉初始化
1060 |                // 例如 int a = 10;
1061 |                next();
1062 |                if (token != Num){
1063 |                   printf("%d: bad initailzer\n", line);
1064 |                }
1065 | 
1066 |                // 根据变量类型存储相应的值
1067 |                if (basetype == CHAR || basetype == INT){
1068 |                    *(int*)data = (num_type == INT) ? integral_token_val :
1069 |                                                           real_token_val;
1070 |                }else if (basetype == FLOAT){
1071 |                     // 因为可能会出现float f = 1这样的情况，所以需要判断
1072 |                     // 右边的数值是声明类型的
1073 |                    *(float*)data = (num_type == FLOAT) ? real_token_val : 
1074 |                                                       integral_token_val;
1075 |                }
1076 |                else if (basetype == DOUBLE){
1077 |                    // 同上double d = 1
1078 |                    *(double*)data = (num_type == FLOAT) ? real_token_val :
1079 |                                                        integral_token_val;
1080 |                }else{
1081 |                    // TODO 指针的赋值
1082 |                }
1083 | 
1084 | 
1085 |                //注意只有初始化的时候才需要匹配Num
1086 |                match(Num);
1087 |             }
1088 | 
1089 |             //更新data地址，按照变量的类型
1090 |             if ((basetype == INT)  || 
1091 |                 (basetype == CHAR) ||
1092 |                 (final_type > PTR)){
1093 |                 data = data + sizeof(int);
1094 |             }else if(basetype == FLOAT){
1095 |                 // 内部的float类型以及double类型运算都是在类型为double的
1096 |                 // 寄存器的，但是存放在data上还是要区分这两种数据类型
1097 |                 data = data + sizeof(float);
1098 |             }else{
1099 |                 data = data + sizeof(double);
1100 |             }
1101 |         }
1102 | 
1103 |         if (token == ',') {
1104 |             match(',');
1105 |         }
1106 |     }
1107 | 
1108 |     next();
1109 | }
1110 | 
1111 | static void parse_block_code()
1112 | { 
1113 |     char* token_name;
1114 |     
1115 |     //use
1116 |     next();
1117 |     token_name = (char*)current_id[Name];
1118 |     if (strncmp("use", token_name, 3)){
1119 |        printf("bad use block\n");
1120 |        return;
1121 |     }
1122 |     next();
1123 |     match('{');
1124 |     while (token != '}') {
1125 |         //printf("token %d golbal decalration\n", token);
1126 |         global_declaration();
1127 |     }
1128 |     match('}');
1129 | 
1130 |     //action
1131 |     token_name = (char*)current_id[Name];
1132 |     if (strncmp("action", token_name, 6)){
1133 |        printf("bad action block\n");
1134 |        return;
1135 |     }
1136 | 
1137 |     next();
1138 |     match('{');
1139 |     while (token != '}') {
1140 |         //printf("token %d golbal decalration\n", token);
1141 |         statement();
1142 |     }
1143 |     match('}');
1144 | 
1145 | }
1146 | 
1147 | 
1148 | //只初始化一次
1149 | int parser_init()
1150 | {
1151 |     int i, fd;
1152 |     int *tmp;
1153 | 
1154 |     poolsize = 256 * 1024;
1155 |     line = 1;
1156 | 
1157 |     //编译之后应该只保存text data
1158 |     // allocate memory
1159 |     if (!(text = malloc(poolsize))) {
1160 |         printf("could not malloc(%d) for text area\n", poolsize);
1161 |         return -1;
1162 |     }
1163 |     text_start = text;
1164 |     
1165 |     if (!(data = malloc(poolsize))) {
1166 |         printf("could not malloc(%d) for data area\n", poolsize);
1167 |         return -1;
1168 |     }
1169 |     data_start = data;
1170 | 
1171 |     //只是编译的时候会使用 
1172 |     if (!(symbols = malloc(poolsize))) {
1173 |         printf("could not malloc(%d) for symbol table\n", poolsize);
1174 |         return -1;
1175 |     }
1176 | 
1177 |     memset(text, 0, poolsize);
1178 |     memset(data, 0, poolsize);
1179 | 
1180 | }
1181 | 
1182 | 
1183 | //每次编译新的代码片段的时候都需要重新设置一下符号表
1184 | //TODO 这些是公共的部分应该只初始化一次
1185 | //不同代码块的以及注入的符号单独成一张符号表
1186 | //两张符号表: 公共符号表以及私有符号表
1187 | static  void init_symbol_table()
1188 | {
1189 |     memset(symbols, 0, poolsize);
1190 | 
1191 |     // 注意这个顺序要和symbol.h中的对应起来，否则会报错误
1192 |     // 然后和symbol.h的放在一起，让相关的东西在一起方便以后修改
1193 |     char* keyword = "char int float double if else while return";
1194 |     prepare_for_tokenization(keyword, symbols); 
1195 | 
1196 |     int i = Char;
1197 |     while (i <= Return) {
1198 |         next();
1199 |         current_id[Token] = i++;
1200 |     }
1201 | 
1202 |     // 解析src中的符号并将其加入到当前标识中，即不需要
1203 |     // 这个步骤可以作为单独的函数提炼出来，方便后面加入新的符号
1204 |     char* libfunc = "open read close printf malloc memset memcmp exit void";
1205 |     prepare_for_tokenization(libfunc, symbols);
1206 | 
1207 |     i = OPEN;
1208 |     while (i <= EXIT) {
1209 |         next();
1210 |         current_id[Class] = Sys;
1211 |         current_id[Type] = FLOAT;
1212 |         current_id[Value] = i++;
1213 |     } 
1214 | 
1215 |     // 处理void类型
1216 |     next(); current_id[Token] = Char; 
1217 | }
1218 | 
1219 | 
1220 | int* compile_src_code
1221 | (
1222 |    struct dependency_items* dep_itemsp,   
1223 |    const char* src_code
1224 | )
1225 | {
1226 |  
1227 |    // 初始化符号表 
1228 |    init_symbol_table();
1229 | 
1230 |    // 注入依赖
1231 |    inject_dependency(dep_itemsp);
1232 |    
1233 |    // 解析源代码
1234 |    prepare_for_tokenization(src_code, symbols);
1235 | 
1236 |    // 解析代码块
1237 |    parse_block_code();
1238 | 
1239 |    // 手动添加退出代码
1240 |    *++text = EXIT;
1241 | 
1242 |    // 重定位文本段
1243 |    int* relocated_code = relocation(text_start, text, data_start, data);
1244 | 
1245 |    // 为编译下一个代码块初始化编译环境:text段和data段
1246 |    reset_complie_environment();
1247 | 
1248 |    return relocated_code;
1249 | }
1250 | 
1251 | 
1252 | static void inject_dependency(struct dependency_items* dep_itemsp)
1253 | {
1254 |    int num_dep_items = dep_itemsp->num_items;
1255 |    struct dependency* items = dep_itemsp->items;
1256 | 
1257 |    int i;
1258 |    for (i=0; i<num_dep_items; i++){
1259 |       src = items[i].var_name;
1260 | 
1261 |       prepare_for_tokenization(src, symbols);
1262 | 
1263 |       next();
1264 | 
1265 |       current_id[Class] = Ext;
1266 |       current_id[Type] = items[i].var_type;
1267 |       current_id[Value] = (int)items[i].var_addr;
1268 |    }
1269 | }
1270 | 
1271 | 
1272 | static void reset_complie_environment()
1273 | {
1274 |     memset(text, 0, (text - text_start) * sizeof(int));
1275 |     memset(data, 0, data - data_start);
1276 |     data = (char*)data_start;
1277 |     text = (int*)text_start;
1278 | }
1279 | 
1280 | 
1281 | static int emit_store_directive(int type)
1282 | {
1283 |    // 如果变量类型是指针类型的，先将保存其地址
1284 |    // 最后等expr_type减为其基本类型时再用正确的
1285 |    // 存储指令保存相应类型的值
1286 |    if (type >= PTR) return SI;
1287 | 
1288 |    return  (type == CHAR) ? SC : 
1289 |            (type == INT) ? SI : 
1290 |            (type == FLOAT) ? SF: SD;
1291 | }
1292 | 
1293 | 
1294 | static int emit_load_directive(int type)
1295 | {
1296 |    // 如果变量类型是指针类型的要先将加载其地址，最后等expr_type
1297 |    // 减为其基本类型时再用相应的加载指令加载相应类型的值，例如
1298 |    // f的类型是float**，那么为表达式**f产生的指令序列如下所示:
1299 |    // IMM f_addr LI LI LF 
1300 |    if (type >= PTR) return LI;
1301 | 
1302 |    // 处理基本类型char int float double
1303 |    return  (type == CHAR) ? LC : 
1304 |            (type == INT) ? LI :
1305 |            (type == FLOAT) ? LF : LD;
1306 |    
1307 | }
1308 | 
1309 | 
1310 | static int type_of_token(int token)
1311 | {
1312 |     return (token == Char) ? CHAR : 
1313 |            (token == Int) ? INT :
1314 |            (token == Float) ? FLOAT : DOUBLE; 
1315 | 
1316 | }
1317 | 
1318 | 
1319 | static void load_real_number_constant(double float_const)
1320 | {
1321 |     //加载浮点常量
1322 |     double* addr;
1323 | 
1324 |     *++text = FIMM;
1325 |     addr = (double*)(text + 1);
1326 |     *addr = float_const;
1327 | 
1328 |     // 内部浮点数常量使用double类型存储占8个字节，因为text是
1329 |     // int*类型的text+2相当于是偏移了8个字节的大小
1330 |     text += 2;
1331 | }
1332 | 
1333 | 
1334 | static void load_integral_number_constant(int int_const)
1335 | {
1336 |     *++text = IMM;
1337 |     *++text = int_const;
1338 | }
1339 | 
1340 | 
1341 | static int get_base_type(int type)
1342 | {
1343 |     // CHAR INT FLOAT DOUBLE PTR
1344 |     // 因为PTR的值最大，其它非基本类型都是4个基本类型加上若干个
1345 |     // PTR得到的，因此可以通过取摸来去除指针类型得到基本类型
1346 |     return (type % PTR); 
1347 | }
1348 | 
1349 | 
1350 | static Boolean does_operate_on_constant()
1351 | {
1352 |   // TODO 这个条件只是必要条件，即如果一个操作符号
1353 |   // 正在操作一个常量的话，那么在该函数被调用的时候
1354 |   // 命令序列应该满足下面的条件，但是还没有找到充分
1355 |   // 条件
1356 |   return (*(text-1) == IMM || *(text-2) == FIMM);
1357 | }
1358 | 
1359 | 
1360 | static void emit_code_for_binary_left
1361 | (
1362 |    int** reserve1,
1363 |    int** reserve2
1364 | )
1365 | {
1366 |     printf("+ left type %d\n", expr_type);
1367 |     if (expr_type == FLOAT || expr_type == DOUBLE){
1368 |       // 将加载到bx的数压人到fsp栈中
1369 |         *++text = PUSF;
1370 |      }else{
1371 |       // 如果后面的表达式的类型是浮点的话，需要修改指令
1372 |         *++text = NOP;
1373 |         *reserve1 = text;
1374 |         *++text = PUSH; 
1375 |         *reserve2 = text;
1376 |      }        
1377 | }
1378 | 
1379 | 
1380 | static void emit_code_for_binary_right
1381 | (
1382 |    int operator_for_real_number,
1383 |    int operator_for_integral_number,
1384 |    int** reserve1,
1385 |    int** reserve2
1386 | )
1387 | {
1388 |      printf("+ right type %d\n", expr_type);
1389 |      if (expr_type == FLOAT || expr_type == DOUBLE){
1390 |          // 左边的表达式如果是整型的话需要使用ATOB将ax的值转换
1391 |          // 成double类型存放bx中, 指令修改如下
1392 |          if (*reserve1 != NULL){
1393 |               *(*reserve1) = ATOB;
1394 |               *(*reserve2) = PUSF;
1395 |            }
1396 | 
1397 |            expr_type = DOUBLE;
1398 |            *++text = operator_for_real_number;  
1399 |       }else{
1400 |           // 前面的是浮点，后面是整型
1401 |           if (*reserve1 == NULL){
1402 |                // 直接将ax的数值转型并存放在bx中，前面的操作数已经压人
1403 |                // fsp栈中了
1404 |                *++text = ATOB; 
1405 |                *++text = operator_for_real_number;  
1406 |                expr_type = DOUBLE;
1407 |            }else{
1408 |             // 两个操作数类型都是整型的
1409 |                *++text = operator_for_integral_number;  
1410 |                expr_type = INT;
1411 |           }
1412 |      }
1413 | }
1414 | 
1415 | 
1416 | // 检测赋值"left_type = right_type"和转型"(left_type)right_type"
1417 | // 转型也可以看成是一种赋值例如int(10.5) + 10.0 结果是20而不是
1418 | // 20.5，所以这里用同一个函数进行检测
1419 | static void check_assignment_types(int left_type, int right_type)
1420 | {
1421 |     if (left_type == right_type) return;
1422 |  
1423 |     // 为了安全起见不同类型的指针以及基本类型和指针类型不能相互赋值
1424 |     // 和转型，因此这里的转型是"受限的转型"
1425 |     if (left_type >= PTR || right_type >= PTR){
1426 |         
1427 |         //
1428 |         if ( left_type < 2*PTR && right_type < 2*PTR) return;
1429 | 
1430 |         char left_str_repr[64], right_str_repr[64];
1431 |         numtype_to_strtype(left_type, left_str_repr);
1432 |         numtype_to_strtype(right_type, right_str_repr);
1433 |         printf("%d: bad types in assignment or cast:\n", line);
1434 |         printf("left type: %s, right type: %s\n", left_str_repr, right_str_repr);
1435 |         exit(-1);
1436 |     }
1437 | 
1438 |     // 赋值左边是浮点型，右边是整型
1439 |     if ((left_type == FLOAT || left_type == DOUBLE) &&
1440 |         (right_type == INT || right_type == CHAR)){
1441 | 
1442 |             *++text = ATOB; 
1443 |         
1444 |      }else if ((left_type == INT || left_type == CHAR) &&
1445 |               (right_type == FLOAT || right_type == DOUBLE)){
1446 |     // 赋值左边是整型，右边是浮点型
1447 | 
1448 |             *++text = BTOA; 
1449 |      }else{
1450 |     // 其它情况的都是允许的而且不需要进行额外的工作 
1451 |      }
1452 | }
1453 | 
1454 | 
1455 | static void numtype_to_strtype(int num_type, char* repr)
1456 | {
1457 |     int type = num_type;
1458 |     int pointer_level = 0;
1459 |     while (type > PTR){
1460 |        pointer_level++;
1461 |        type -= PTR;
1462 |     }
1463 | 
1464 |     char* base_type = type == CHAR ?  "char" :
1465 |                       type == INT  ?  "int"  :
1466 |                       type == FLOAT ? "float" : "double";
1467 | 
1468 |     strcpy(repr, base_type);
1469 |     int start_index = strlen(base_type);
1470 |     while (pointer_level){
1471 |         repr[start_index++] = '*';
1472 |         pointer_level--;
1473 |     }
1474 |     
1475 |     repr[start_index] = '\0';
1476 | }
1477 | 


--------------------------------------------------------------------------------