├── LICENSE ├── src ├── sysy.l ├── main.cpp ├── sysy.y ├── RISCV.h └── AST.h ├── CMakeLists.txt └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 George M 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/sysy.l: -------------------------------------------------------------------------------- 1 | %option noyywrap 2 | %option nounput 3 | %option noinput 4 | 5 | %{ 6 | 7 | #include 8 | #include 9 | #include "sysy.tab.hpp" 10 | using namespace std; 11 | 12 | %} 13 | 14 | WhiteSpace [ \t\n\r]* 15 | LineComment "//".* 16 | BlockComment "/*"([^*]*|(\*+[^/]))*"*/" 17 | 18 | Identifier [a-zA-Z_][a-zA-Z0-9_]* 19 | 20 | Decimal [1-9][0-9]* 21 | Octal 0[0-7]* 22 | Hexadecimal 0[xX][0-9a-fA-F]+ 23 | 24 | %% 25 | 26 | {WhiteSpace} { } 27 | {LineComment} { } 28 | {BlockComment} { } 29 | 30 | "int" { return INT; } 31 | "void" { return VOID; } 32 | "return" { return RETURN; } 33 | "const" { return CONST; } 34 | "if" { return IF; } 35 | "else" { return ELSE; } 36 | "while" { return WHILE; } 37 | "break" { return BREAK; } 38 | "continue" { return CONTINUE; } 39 | 40 | {Identifier} { yylval.str_val = new string(yytext); return IDENT; } 41 | 42 | {Decimal} { yylval.int_val = strtol(yytext, nullptr, 0); return INT_CONST; } 43 | {Octal} { yylval.int_val = strtol(yytext, nullptr, 0); return INT_CONST; } 44 | {Hexadecimal} { yylval.int_val = strtol(yytext, nullptr, 0); return INT_CONST; } 45 | 46 | "<=" { return LE; } 47 | ">=" { return GE; } 48 | "==" { return EQ; } 49 | "!=" { return NE; } 50 | "&&" { return AND; } 51 | "||" { return OR; } 52 | 53 | . { return yytext[0]; } 54 | 55 | %% 56 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "AST.h" 8 | #include "RISCV.h" 9 | #include "koopa.h" 10 | #define _SUB_MODE 11 | using namespace std; 12 | 13 | 14 | extern FILE *yyin; 15 | extern int yyparse(unique_ptr &ast); 16 | 17 | 18 | int main(int argc, const char *argv[]) 19 | { 20 | assert(argc == 5); 21 | auto mode = argv[1]; 22 | auto input = argv[2]; 23 | auto output = argv[4]; 24 | 25 | yyin = fopen(input, "r"); 26 | #ifdef _SUB_MODE 27 | freopen(output, "w", stdout); 28 | #endif 29 | assert(yyin); 30 | 31 | unique_ptr ast; 32 | auto ret = yyparse(ast); 33 | assert(!ret); 34 | 35 | if (string(mode) == "-koopa")ast->dumpIR(); 36 | else if (string(mode) == "-riscv" || string(mode) == "-perf") 37 | { 38 | stringstream ss; 39 | streambuf* cout_buf = cout.rdbuf(); 40 | cout.rdbuf(ss.rdbuf()); 41 | ast->dumpIR(); 42 | string ir_str = ss.str(); 43 | const char *ir = ir_str.data(); 44 | cout.rdbuf(cout_buf); 45 | koopa_program_t program; 46 | koopa_error_code_t ret = koopa_parse_from_string(ir, &program); 47 | assert(ret == KOOPA_EC_SUCCESS); 48 | koopa_raw_program_builder_t builder = koopa_new_raw_program_builder(); 49 | koopa_raw_program_t raw = koopa_build_raw_program(builder, program); 50 | koopa_delete_program(program); 51 | freopen(output, "w", stdout); 52 | Visit(raw); 53 | koopa_delete_raw_program_builder(builder); 54 | } 55 | else if (string(mode) == "-test")ast->dump(); 56 | else cout << "NotImplementedError" << endl; 57 | cout << endl; 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13) 2 | project(compiler) 3 | 4 | # settings 5 | # set to OFF to enable C mode 6 | set(CPP_MODE ON) 7 | if(CPP_MODE) 8 | set(FB_EXT ".cpp") 9 | else() 10 | set(FB_EXT ".c") 11 | endif() 12 | message(STATUS "Flex/Bison generated source file extension: ${FB_EXT}") 13 | 14 | # enable all warnings 15 | if(MSVC) 16 | add_compile_options(/W3) 17 | else() 18 | # disable warnings caused by old version of Flex 19 | add_compile_options(-Wall -Wno-register) 20 | endif() 21 | 22 | # options about libraries and includes 23 | set(LIB_DIR "$ENV{CDE_LIBRARY_PATH}/native" CACHE STRING "directory of libraries") 24 | set(INC_DIR "$ENV{CDE_INCLUDE_PATH}" CACHE STRING "directory of includes") 25 | message(STATUS "Library directory: ${LIB_DIR}") 26 | message(STATUS "Include directory: ${INC_DIR}") 27 | 28 | # find Flex/Bison 29 | find_package(FLEX REQUIRED) 30 | find_package(BISON REQUIRED) 31 | 32 | # generate lexer/parser 33 | file(GLOB_RECURSE L_SOURCES "src/*.l") 34 | file(GLOB_RECURSE Y_SOURCES "src/*.y") 35 | if(NOT (L_SOURCES STREQUAL "" AND Y_SOURCES STREQUAL "")) 36 | string(REGEX REPLACE ".*/(.*)\\.l" "${CMAKE_CURRENT_BINARY_DIR}/\\1.lex${FB_EXT}" L_OUTPUTS "${L_SOURCES}") 37 | string(REGEX REPLACE ".*/(.*)\\.y" "${CMAKE_CURRENT_BINARY_DIR}/\\1.tab${FB_EXT}" Y_OUTPUTS "${Y_SOURCES}") 38 | flex_target(Lexer ${L_SOURCES} ${L_OUTPUTS}) 39 | bison_target(Parser ${Y_SOURCES} ${Y_OUTPUTS} COMPILE_FLAGS -v) 40 | add_flex_bison_dependency(Lexer Parser) 41 | endif() 42 | 43 | # project link directories 44 | link_directories(${LIB_DIR}) 45 | 46 | # project include directories 47 | include_directories(src) 48 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 49 | include_directories(${INC_DIR}) 50 | 51 | # all of C/C++ source files 52 | file(GLOB_RECURSE C_SOURCES "src/*.c") 53 | file(GLOB_RECURSE CXX_SOURCES "src/*.cpp") 54 | file(GLOB_RECURSE CC_SOURCES "src/*.cc") 55 | set(SOURCES ${C_SOURCES} ${CXX_SOURCES} ${CC_SOURCES} 56 | ${FLEX_Lexer_OUTPUTS} ${BISON_Parser_OUTPUT_SOURCE}) 57 | 58 | # executable 59 | add_executable(compiler ${SOURCES}) 60 | set_target_properties(compiler PROPERTIES C_STANDARD 11 CXX_STANDARD 17) 61 | target_link_libraries(compiler koopa pthread dl) 62 | -------------------------------------------------------------------------------- /src/sysy.y: -------------------------------------------------------------------------------- 1 | %code requires { 2 | #include 3 | #include 4 | #include "AST.h" 5 | } 6 | 7 | %{ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "AST.h" 14 | 15 | int yylex(); 16 | void yyerror(std::unique_ptr &ast, const char *s); 17 | 18 | using namespace std; 19 | 20 | %} 21 | 22 | %parse-param { std::unique_ptr &ast } 23 | 24 | %union { 25 | std::string *str_val; 26 | int int_val; 27 | BaseAST *ast_val; 28 | std::vector> *vec_val; 29 | } 30 | 31 | %token INT VOID RETURN CONST IF ELSE WHILE BREAK CONTINUE 32 | %token IDENT 33 | %token INT_CONST 34 | %token LE GE EQ NE AND OR 35 | 36 | %type UNARYOP MULOP ADDOP RELOP EQOP ANDOP OROP 37 | %type FuncDef Block Stmt Exp PrimaryExp UnaryExp AddExp 38 | %type MulExp RelExp EqExp LAndExp LOrExp Decl ConstDecl ConstDef 39 | %type ConstInitVal BlockItem ConstExp VarDecl VarDef InitVal 40 | %type SimpleStmt OpenStmt ClosedStmt CompUnitList FuncFParam 41 | %type BlockItemList ConstDefList VarDefList FuncFParams FuncRParams 42 | %type ConstExpList ConstInitValList InitValList ExpList 43 | %type Number 44 | %type LVal Type 45 | 46 | %% 47 | 48 | CompUnit 49 | : CompUnitList { 50 | auto comp_unit = unique_ptr($1); 51 | ast = move(comp_unit); 52 | } 53 | ; 54 | 55 | CompUnitList 56 | : FuncDef { 57 | auto comp_unit = new CompUnitAST(); 58 | auto func_def = unique_ptr($1); 59 | comp_unit->func_def_list.push_back(move(func_def)); 60 | $$ = comp_unit; 61 | } 62 | | Decl { 63 | auto comp_unit = new CompUnitAST(); 64 | auto decl = unique_ptr($1); 65 | comp_unit->decl_list.push_back(move(decl)); 66 | $$ = comp_unit; 67 | } 68 | | CompUnitList FuncDef { 69 | auto comp_unit = (CompUnitAST*)($1); 70 | auto func_def = unique_ptr($2); 71 | comp_unit->func_def_list.push_back(move(func_def)); 72 | $$ = comp_unit; 73 | } 74 | | CompUnitList Decl { 75 | auto comp_unit = (CompUnitAST*)($1); 76 | auto decl = unique_ptr($2); 77 | comp_unit->decl_list.push_back(move(decl)); 78 | $$ = comp_unit; 79 | } 80 | ; 81 | 82 | FuncDef 83 | : Type IDENT '(' ')' Block { 84 | auto func_def = new FuncDefAST(); 85 | func_def->func_type = *unique_ptr($1); 86 | func_def->ident = *unique_ptr($2); 87 | func_def->block = unique_ptr($5); 88 | $$ = func_def; 89 | } 90 | | Type IDENT '(' FuncFParams ')' Block { 91 | auto func_def = new FuncDefAST(); 92 | func_def->func_type = *unique_ptr($1); 93 | func_def->ident = *unique_ptr($2); 94 | vector> *v_ptr = ($4); 95 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 96 | func_def->params.push_back(move(*it)); 97 | func_def->block = unique_ptr($6); 98 | ((BlockAST*)(func_def->block).get())->func = func_def->ident; 99 | $$ = func_def; 100 | } 101 | ; 102 | 103 | FuncFParams 104 | : FuncFParam { 105 | vector> *v = new vector>; 106 | v->push_back(unique_ptr($1)); 107 | $$ = v; 108 | } 109 | | FuncFParams ',' FuncFParam { 110 | vector> *v = ($1); 111 | v->push_back(unique_ptr($3)); 112 | $$ = v; 113 | } 114 | ; 115 | 116 | FuncFParam 117 | : Type IDENT { 118 | auto param = new FuncFParamAST(); 119 | param->type = FuncFParamType::var; 120 | param->b_type = *unique_ptr($1); 121 | param->ident = *unique_ptr($2); 122 | $$ = param; 123 | } 124 | | Type IDENT '[' ']' { 125 | auto param = new FuncFParamAST(); 126 | param->type = FuncFParamType::list; 127 | param->b_type = *unique_ptr($1); 128 | param->ident = *unique_ptr($2); 129 | $$ = param; 130 | } 131 | | Type IDENT '[' ']' ConstExpList { 132 | auto param = new FuncFParamAST(); 133 | param->type = FuncFParamType::list; 134 | param->b_type = *unique_ptr($1); 135 | param->ident = *unique_ptr($2); 136 | vector> *v_ptr = ($5); 137 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 138 | param->const_exp_list.push_back(move(*it)); 139 | $$ = param; 140 | } 141 | ; 142 | 143 | FuncRParams 144 | : Exp { 145 | vector> *v = new vector>; 146 | v->push_back(unique_ptr($1)); 147 | $$ = v; 148 | } 149 | | FuncRParams ',' Exp { 150 | vector> *v = ($1); 151 | v->push_back(unique_ptr($3)); 152 | $$ = v; 153 | } 154 | ; 155 | 156 | Block 157 | : '{' BlockItemList '}' { 158 | auto block = new BlockAST(); 159 | vector> *v_ptr = ($2); 160 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 161 | block->block_item_list.push_back(move(*it)); 162 | $$ = block; 163 | } 164 | ; 165 | 166 | Stmt 167 | : OpenStmt { 168 | auto stmt = ($1); 169 | $$ = stmt; 170 | } 171 | | ClosedStmt { 172 | auto stmt = ($1); 173 | $$ = stmt; 174 | } 175 | ; 176 | 177 | ClosedStmt 178 | : SimpleStmt { 179 | auto stmt = new StmtAST(); 180 | stmt->type = StmtType::simple; 181 | stmt->exp_simple = unique_ptr($1); 182 | $$ = stmt; 183 | } 184 | | IF '(' Exp ')' ClosedStmt ELSE ClosedStmt { 185 | auto stmt = new StmtAST(); 186 | stmt->type = StmtType::ifelse; 187 | stmt->exp_simple = unique_ptr($3); 188 | stmt->if_stmt = unique_ptr($5); 189 | stmt->else_stmt = unique_ptr($7); 190 | $$ = stmt; 191 | } 192 | | WHILE '(' Exp ')' ClosedStmt { 193 | auto stmt = new StmtAST(); 194 | stmt->type = StmtType::while_; 195 | stmt->exp_simple = unique_ptr($3); 196 | stmt->while_stmt = unique_ptr($5); 197 | $$ = stmt; 198 | } 199 | ; 200 | 201 | OpenStmt 202 | : IF '(' Exp ')' Stmt { 203 | auto stmt = new StmtAST(); 204 | stmt->type = StmtType::if_; 205 | stmt->exp_simple = unique_ptr($3); 206 | stmt->if_stmt = unique_ptr($5); 207 | $$ = stmt; 208 | } 209 | | IF '(' Exp ')' ClosedStmt ELSE OpenStmt { 210 | auto stmt = new StmtAST(); 211 | stmt->type = StmtType::ifelse; 212 | stmt->exp_simple = unique_ptr($3); 213 | stmt->if_stmt = unique_ptr($5); 214 | stmt->else_stmt = unique_ptr($7); 215 | $$ = stmt; 216 | } 217 | | WHILE '(' Exp ')' OpenStmt { 218 | auto stmt = new StmtAST(); 219 | stmt->type = StmtType::while_; 220 | stmt->exp_simple = unique_ptr($3); 221 | stmt->while_stmt = unique_ptr($5); 222 | $$ = stmt; 223 | } 224 | ; 225 | 226 | SimpleStmt 227 | : RETURN Exp ';' { 228 | auto stmt = new SimpleStmtAST(); 229 | stmt->type = SimpleStmtType::ret; 230 | stmt->block_exp = unique_ptr($2); 231 | $$ = stmt; 232 | } 233 | | RETURN ';' { 234 | auto stmt = new SimpleStmtAST(); 235 | stmt->type = SimpleStmtType::ret; 236 | stmt->block_exp = nullptr; 237 | $$ = stmt; 238 | } 239 | | LVal '=' Exp ';' { 240 | auto stmt = new SimpleStmtAST(); 241 | stmt->type = SimpleStmtType::lval; 242 | stmt->lval = *unique_ptr($1); 243 | stmt->block_exp = unique_ptr($3); 244 | $$ = stmt; 245 | } 246 | | IDENT ExpList '=' Exp ';' { 247 | auto stmt = new SimpleStmtAST(); 248 | stmt->type = SimpleStmtType::list; 249 | stmt->lval = *unique_ptr($1); 250 | vector> *v_ptr = ($2); 251 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 252 | stmt->exp_list.push_back(move(*it)); 253 | stmt->block_exp = unique_ptr($4); 254 | $$ = stmt; 255 | } 256 | | Block { 257 | auto stmt = new SimpleStmtAST(); 258 | stmt->type = SimpleStmtType::block; 259 | stmt->block_exp = unique_ptr($1); 260 | $$ = stmt; 261 | } 262 | | Exp ';' { 263 | auto stmt = new SimpleStmtAST(); 264 | stmt->type = SimpleStmtType::exp; 265 | stmt->block_exp = unique_ptr($1); 266 | $$ = stmt; 267 | } 268 | | ';' { 269 | auto stmt = new SimpleStmtAST(); 270 | stmt->type = SimpleStmtType::exp; 271 | stmt->block_exp = nullptr; 272 | $$ = stmt; 273 | } 274 | | BREAK ';' { 275 | auto stmt = new SimpleStmtAST(); 276 | stmt->type = SimpleStmtType::break_; 277 | $$ = stmt; 278 | } 279 | | CONTINUE ';' { 280 | auto stmt = new SimpleStmtAST(); 281 | stmt->type = SimpleStmtType::continue_; 282 | $$ = stmt; 283 | } 284 | ; 285 | 286 | Exp 287 | : LOrExp { 288 | auto exp = new ExpAST(); 289 | exp->l_or_exp = unique_ptr($1); 290 | $$ = exp; 291 | } 292 | ; 293 | 294 | LOrExp 295 | : LAndExp { 296 | auto l_or_exp = new LOrExpAST(); 297 | l_or_exp->op = ""; 298 | l_or_exp->l_and_exp = unique_ptr($1); 299 | $$ = l_or_exp; 300 | } 301 | | LOrExp OROP LAndExp { 302 | auto l_or_exp = new LOrExpAST(); 303 | l_or_exp->l_or_exp = unique_ptr($1); 304 | l_or_exp->op = *unique_ptr($2); 305 | l_or_exp->l_and_exp = unique_ptr($3); 306 | $$ = l_or_exp; 307 | } 308 | ; 309 | 310 | LAndExp 311 | : EqExp { 312 | auto l_and_exp = new LAndExpAST(); 313 | l_and_exp->op = ""; 314 | l_and_exp->eq_exp = unique_ptr($1); 315 | $$ = l_and_exp; 316 | } 317 | | LAndExp ANDOP EqExp { 318 | auto l_and_exp = new LAndExpAST(); 319 | l_and_exp->l_and_exp = unique_ptr($1); 320 | l_and_exp->op = *unique_ptr($2); 321 | l_and_exp->eq_exp = unique_ptr($3); 322 | $$ = l_and_exp; 323 | } 324 | ; 325 | 326 | EqExp 327 | : RelExp { 328 | auto eq_exp = new EqExpAST(); 329 | eq_exp->op = ""; 330 | eq_exp->rel_exp = unique_ptr($1); 331 | $$ = eq_exp; 332 | } 333 | | EqExp EQOP RelExp { 334 | auto eq_exp = new EqExpAST(); 335 | eq_exp->eq_exp = unique_ptr($1); 336 | eq_exp->op = *unique_ptr($2); 337 | eq_exp->rel_exp = unique_ptr($3); 338 | $$ = eq_exp; 339 | } 340 | ; 341 | 342 | RelExp 343 | : AddExp { 344 | auto rel_exp = new RelExpAST(); 345 | rel_exp->op = ""; 346 | rel_exp->add_exp = unique_ptr($1); 347 | $$ = rel_exp; 348 | } 349 | | RelExp RELOP AddExp { 350 | auto rel_exp = new RelExpAST(); 351 | rel_exp->rel_exp = unique_ptr($1); 352 | rel_exp->op = *unique_ptr($2); 353 | rel_exp->add_exp = unique_ptr($3); 354 | $$ = rel_exp; 355 | } 356 | ; 357 | 358 | AddExp 359 | : MulExp { 360 | auto add_exp = new AddExpAST(); 361 | add_exp->op = ""; 362 | add_exp->mul_exp = unique_ptr($1); 363 | $$ = add_exp; 364 | } 365 | | AddExp ADDOP MulExp { 366 | auto add_exp = new AddExpAST(); 367 | add_exp->add_exp = unique_ptr($1); 368 | add_exp->op = *unique_ptr($2); 369 | add_exp->mul_exp = unique_ptr($3); 370 | $$ = add_exp; 371 | } 372 | ; 373 | 374 | MulExp 375 | : UnaryExp { 376 | auto mul_exp = new MulExpAST(); 377 | mul_exp->op = ""; 378 | mul_exp->unary_exp = unique_ptr($1); 379 | $$ = mul_exp; 380 | } 381 | | MulExp MULOP UnaryExp { 382 | auto mul_exp = new MulExpAST(); 383 | mul_exp->mul_exp = unique_ptr($1); 384 | mul_exp->op = *unique_ptr($2); 385 | mul_exp->unary_exp = unique_ptr($3); 386 | $$ = mul_exp; 387 | } 388 | ; 389 | 390 | UnaryExp 391 | : PrimaryExp { 392 | auto unary_exp = new UnaryExpAST(); 393 | unary_exp->type = UnaryExpType::primary; 394 | unary_exp->exp = unique_ptr($1); 395 | $$ = unary_exp; 396 | } 397 | | UNARYOP UnaryExp { 398 | auto unary_exp = new UnaryExpAST(); 399 | unary_exp->type = UnaryExpType::unary; 400 | unary_exp->op = *unique_ptr($1); 401 | unary_exp->exp = unique_ptr($2); 402 | $$ = unary_exp; 403 | } 404 | | IDENT '(' ')' { 405 | auto unary_exp = new UnaryExpAST(); 406 | unary_exp->type = UnaryExpType::func_call; 407 | unary_exp->ident = *unique_ptr($1); 408 | $$ = unary_exp; 409 | } 410 | | IDENT '(' FuncRParams ')' { 411 | auto unary_exp = new UnaryExpAST(); 412 | unary_exp->type = UnaryExpType::func_call; 413 | unary_exp->ident = *unique_ptr($1); 414 | vector> *v_ptr = ($3); 415 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 416 | unary_exp->params.push_back(move(*it)); 417 | $$ = unary_exp; 418 | } 419 | ; 420 | 421 | PrimaryExp 422 | : '(' Exp ')' { 423 | auto primary_exp = new PrimaryExpAST(); 424 | primary_exp->type = PrimaryExpType::exp; 425 | primary_exp->exp = unique_ptr($2); 426 | $$ = primary_exp; 427 | } 428 | | Number { 429 | auto primary_exp = new PrimaryExpAST(); 430 | primary_exp->type = PrimaryExpType::number; 431 | primary_exp->number = ($1); 432 | $$ = primary_exp; 433 | } 434 | | LVal { 435 | auto primary_exp = new PrimaryExpAST(); 436 | primary_exp->type = PrimaryExpType::lval; 437 | primary_exp->lval = *unique_ptr($1); 438 | $$ = primary_exp; 439 | } 440 | | IDENT ExpList { 441 | auto primary_exp = new PrimaryExpAST(); 442 | primary_exp->type = PrimaryExpType::list; 443 | primary_exp->lval = *unique_ptr($1); 444 | vector> *v_ptr = ($2); 445 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 446 | primary_exp->exp_list.push_back(move(*it)); 447 | $$ = primary_exp; 448 | } 449 | ; 450 | 451 | Decl 452 | : ConstDecl { 453 | auto decl = new DeclAST(); 454 | decl->type = DeclType::const_decl; 455 | decl->decl = unique_ptr($1); 456 | $$ = decl; 457 | } 458 | | VarDecl { 459 | auto decl = new DeclAST(); 460 | decl->type = DeclType::var_decl; 461 | decl->decl = unique_ptr($1); 462 | $$ = decl; 463 | } 464 | ; 465 | 466 | ConstDecl 467 | : CONST Type ConstDefList ';' { 468 | auto const_decl = new ConstDeclAST(); 469 | const_decl->b_type = *unique_ptr($2); 470 | vector> *v_ptr = ($3); 471 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 472 | const_decl->const_def_list.push_back(move(*it)); 473 | $$ = const_decl; 474 | } 475 | ; 476 | 477 | ConstDef 478 | : IDENT '=' ConstInitVal { 479 | auto const_def = new ConstDefAST(); 480 | const_def->ident = *unique_ptr($1); 481 | const_def->const_init_val = unique_ptr($3); 482 | $$ = const_def; 483 | } 484 | | IDENT ConstExpList '=' ConstInitVal { 485 | auto const_def = new ConstDefAST(); 486 | const_def->ident = *unique_ptr($1); 487 | vector> *v_ptr = ($2); 488 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 489 | const_def->const_exp_list.push_back(move(*it)); 490 | const_def->const_init_val = unique_ptr($4); 491 | $$ = const_def; 492 | } 493 | ; 494 | 495 | ConstInitVal 496 | : ConstExp { 497 | auto const_init_val = new ConstInitValAST(); 498 | const_init_val->type = ConstInitValType::const_exp; 499 | const_init_val->const_exp = unique_ptr($1); 500 | $$ = const_init_val; 501 | } 502 | | '{' '}' { 503 | auto const_init_val = new ConstInitValAST(); 504 | const_init_val->type = ConstInitValType::list; 505 | $$ = const_init_val; 506 | } 507 | | '{' ConstInitValList '}' { 508 | auto const_init_val = new ConstInitValAST(); 509 | const_init_val->type = ConstInitValType::list; 510 | vector> *v_ptr = ($2); 511 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 512 | const_init_val->const_init_val_list.push_back(move(*it)); 513 | $$ = const_init_val; 514 | } 515 | ; 516 | 517 | BlockItem 518 | : Decl { 519 | auto block_item = new BlockItemAST(); 520 | block_item->type = BlockItemType::decl; 521 | block_item->content = unique_ptr($1); 522 | $$ = block_item; 523 | } 524 | | Stmt { 525 | auto block_item = new BlockItemAST(); 526 | block_item->type = BlockItemType::stmt; 527 | block_item->content = unique_ptr($1); 528 | $$ = block_item; 529 | } 530 | ; 531 | 532 | ConstExp 533 | : Exp { 534 | auto const_exp = new ConstExpAST(); 535 | const_exp->exp = unique_ptr($1); 536 | $$ = const_exp; 537 | } 538 | ; 539 | 540 | VarDecl 541 | : Type VarDefList ';' { 542 | auto var_decl = new VarDeclAST(); 543 | var_decl->b_type = *unique_ptr($1); 544 | vector> *v_ptr = ($2); 545 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 546 | var_decl->var_def_list.push_back(move(*it)); 547 | $$ = var_decl; 548 | } 549 | ; 550 | 551 | VarDef 552 | : IDENT { 553 | auto var_def = new VarDefAST(); 554 | var_def->ident = *unique_ptr($1); 555 | var_def->has_init_val = false; 556 | $$ = var_def; 557 | } 558 | | IDENT '=' InitVal { 559 | auto var_def = new VarDefAST(); 560 | var_def->ident = *unique_ptr($1); 561 | var_def->has_init_val = true; 562 | var_def->init_val = unique_ptr($3); 563 | $$ = var_def; 564 | } 565 | | IDENT ConstExpList { 566 | auto var_def = new VarDefAST(); 567 | var_def->ident = *unique_ptr($1); 568 | var_def->has_init_val = false; 569 | vector> *v_ptr = ($2); 570 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 571 | var_def->exp_list.push_back(move(*it)); 572 | $$ = var_def; 573 | } 574 | | IDENT ConstExpList '=' InitVal { 575 | auto var_def = new VarDefAST(); 576 | var_def->ident = *unique_ptr($1); 577 | var_def->has_init_val = true; 578 | vector> *v_ptr = ($2); 579 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 580 | var_def->exp_list.push_back(move(*it)); 581 | var_def->init_val = unique_ptr($4); 582 | $$ = var_def; 583 | } 584 | ; 585 | 586 | InitVal 587 | : Exp { 588 | auto init_val = new InitValAST(); 589 | init_val->type = InitValType::exp; 590 | init_val->exp = unique_ptr($1); 591 | $$ = init_val; 592 | } 593 | | '{' '}' { 594 | auto init_val = new InitValAST(); 595 | init_val->type = InitValType::list; 596 | $$ = init_val; 597 | } 598 | | '{' InitValList '}' { 599 | auto init_val = new InitValAST(); 600 | init_val->type = InitValType::list; 601 | vector> *v_ptr = ($2); 602 | for (auto it = v_ptr->begin(); it != v_ptr->end(); it++) 603 | init_val->init_val_list.push_back(move(*it)); 604 | $$ = init_val; 605 | } 606 | ; 607 | 608 | BlockItemList 609 | : { 610 | vector> *v = new vector>; 611 | $$ = v; 612 | } 613 | | BlockItemList BlockItem { 614 | vector> *v = ($1); 615 | v->push_back(unique_ptr($2)); 616 | $$ = v; 617 | } 618 | ; 619 | 620 | ConstDefList 621 | : ConstDef { 622 | vector> *v = new vector>; 623 | v->push_back(unique_ptr($1)); 624 | $$ = v; 625 | } 626 | | ConstDefList ',' ConstDef { 627 | vector> *v = ($1); 628 | v->push_back(unique_ptr($3)); 629 | $$ = v; 630 | } 631 | ; 632 | 633 | VarDefList 634 | : VarDef { 635 | vector> *v = new vector>; 636 | v->push_back(unique_ptr($1)); 637 | $$ = v; 638 | } 639 | | VarDefList ',' VarDef { 640 | vector> *v = ($1); 641 | v->push_back(unique_ptr($3)); 642 | $$ = v; 643 | } 644 | ; 645 | 646 | ConstExpList 647 | : '[' ConstExp ']' { 648 | vector> *v = new vector>; 649 | v->push_back(unique_ptr($2)); 650 | $$ = v; 651 | } 652 | | ConstExpList '[' ConstExp ']' { 653 | vector> *v = ($1); 654 | v->push_back(unique_ptr($3)); 655 | $$ = v; 656 | } 657 | ; 658 | 659 | ExpList 660 | : '[' Exp ']' { 661 | vector> *v = new vector>; 662 | v->push_back(unique_ptr($2)); 663 | $$ = v; 664 | } 665 | | ExpList '[' Exp ']' { 666 | vector> *v = ($1); 667 | v->push_back(unique_ptr($3)); 668 | $$ = v; 669 | } 670 | ; 671 | 672 | ConstInitValList 673 | : ConstInitVal { 674 | vector> *v = new vector>; 675 | v->push_back(unique_ptr($1)); 676 | $$ = v; 677 | } 678 | | ConstInitValList ',' ConstInitVal { 679 | vector> *v = ($1); 680 | v->push_back(unique_ptr($3)); 681 | $$ = v; 682 | } 683 | ; 684 | 685 | InitValList 686 | : InitVal { 687 | vector> *v = new vector>; 688 | v->push_back(unique_ptr($1)); 689 | $$ = v; 690 | } 691 | | InitValList ',' InitVal { 692 | vector> *v = ($1); 693 | v->push_back(unique_ptr($3)); 694 | $$ = v; 695 | } 696 | ; 697 | 698 | Number 699 | : INT_CONST { 700 | $$ = ($1); 701 | } 702 | ; 703 | 704 | LVal 705 | : IDENT { 706 | string *lval = new string(*unique_ptr($1)); 707 | $$ = lval; 708 | } 709 | ; 710 | 711 | Type 712 | : INT { 713 | string *type = new string("int"); 714 | $$ = type; 715 | } 716 | | VOID { 717 | string *type = new string("void"); 718 | $$ = type; 719 | } 720 | ; 721 | 722 | UNARYOP 723 | : '+' { 724 | string *op = new string("+"); 725 | $$ = op; 726 | } 727 | | '-' { 728 | string *op = new string("-"); 729 | $$ = op; 730 | } 731 | | '!' { 732 | string *op = new string("!"); 733 | $$ = op; 734 | } 735 | ; 736 | 737 | MULOP 738 | : '*' { 739 | string *op = new string("*"); 740 | $$ = op; 741 | } 742 | | '/' { 743 | string *op = new string("/"); 744 | $$ = op; 745 | } 746 | | '%' { 747 | string *op = new string("%"); 748 | $$ = op; 749 | } 750 | ; 751 | 752 | ADDOP 753 | : '+' { 754 | string *op = new string("+"); 755 | $$ = op; 756 | } 757 | | '-' { 758 | string *op = new string("-"); 759 | $$ = op; 760 | } 761 | ; 762 | 763 | RELOP 764 | : LE { 765 | string *op = new string("<="); 766 | $$ = op; 767 | } 768 | | GE { 769 | string *op = new string(">="); 770 | $$ = op; 771 | } 772 | | '<' { 773 | string *op = new string("<"); 774 | $$ = op; 775 | } 776 | | '>' { 777 | string *op = new string(">"); 778 | $$ = op; 779 | } 780 | ; 781 | 782 | EQOP 783 | : EQ { 784 | string *op = new string("=="); 785 | $$ = op; 786 | } 787 | | NE { 788 | string *op = new string("!="); 789 | $$ = op; 790 | } 791 | ; 792 | 793 | ANDOP 794 | : AND { 795 | string *op = new string("&&"); 796 | $$ = op; 797 | } 798 | ; 799 | 800 | OROP 801 | : OR { 802 | string *op = new string("||"); 803 | $$ = op; 804 | } 805 | ; 806 | 807 | %% 808 | 809 | void yyerror(unique_ptr &ast, const char *s) 810 | { 811 | extern int yylineno; 812 | extern char *yytext; 813 | cerr << "ERROR: " << s << " at symbol '" << yytext << "' on line " 814 | << yylineno << endl; 815 | } 816 | -------------------------------------------------------------------------------- /src/RISCV.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "koopa.h" 8 | 9 | 10 | struct Reg { int reg_name; int reg_offset; }; 11 | std::string reg_names[16] = {"t0", "t1", "t2", "t3", "t4", "t5", "t6", 12 | "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "x0"}; 13 | koopa_raw_value_t registers[16]; 14 | int reg_stats[16] = {0}; 15 | koopa_raw_value_t present_value = 0; 16 | std::map value_map; 17 | int global_num = 0; 18 | std::map global_values; 19 | int stack_size = 0, stack_top = 0; 20 | bool restore_ra = false; 21 | 22 | 23 | void Visit(const koopa_raw_program_t &program); 24 | void Visit(const koopa_raw_slice_t &slice); 25 | void Visit(const koopa_raw_function_t &func); 26 | void Visit(const koopa_raw_basic_block_t &bb); 27 | void Visit(const koopa_raw_return_t &ret); 28 | Reg Visit(const koopa_raw_value_t &value); 29 | Reg Visit(const koopa_raw_integer_t &integer); 30 | Reg Visit(const koopa_raw_binary_t &binary); 31 | Reg Visit(const koopa_raw_load_t &load); 32 | void Visit(const koopa_raw_store_t &store); 33 | void Visit(const koopa_raw_branch_t &branch); 34 | void Visit(const koopa_raw_jump_t &jump); 35 | Reg Visit(const koopa_raw_call_t &call); 36 | Reg Visit(const koopa_raw_get_elem_ptr_t &get_elem_ptr); 37 | Reg Visit(const koopa_raw_get_ptr_t &get_ptr); 38 | std::string Visit(const koopa_raw_global_alloc_t &global); 39 | int find_reg(int stat); 40 | void clear_registers(bool save_temps = true); 41 | int cal_size(const koopa_raw_type_t &ty); 42 | void init_aggregate(const koopa_raw_value_t &aggr); 43 | 44 | 45 | void Visit(const koopa_raw_program_t &program) 46 | { 47 | Visit(program.values); 48 | Visit(program.funcs); 49 | } 50 | 51 | 52 | void Visit(const koopa_raw_slice_t &slice) 53 | { 54 | for (size_t i = 0; i < slice.len; i++) 55 | { 56 | auto ptr = slice.buffer[i]; 57 | switch (slice.kind) 58 | { 59 | case KOOPA_RSIK_FUNCTION: 60 | Visit(reinterpret_cast(ptr)); 61 | break; 62 | case KOOPA_RSIK_BASIC_BLOCK: 63 | Visit(reinterpret_cast(ptr)); 64 | break; 65 | case KOOPA_RSIK_VALUE: 66 | Visit(reinterpret_cast(ptr)); 67 | break; 68 | default: 69 | assert(false); 70 | } 71 | } 72 | } 73 | 74 | 75 | void Visit(const koopa_raw_function_t &func) 76 | { 77 | if (func->bbs.len == 0)return; 78 | std::cout << "\t.text" << std::endl; 79 | std::cout << "\t.globl " << (func->name + 1) << std::endl; 80 | std::cout << (func->name + 1) << ":" << std::endl; 81 | assert(stack_size == 0); assert(stack_top == 0); 82 | int max_arg_num = 0; 83 | for (size_t i = 0; i < func->bbs.len; i++) 84 | { 85 | auto ptr = func->bbs.buffer[i]; 86 | koopa_raw_basic_block_t bb = 87 | reinterpret_cast(ptr); 88 | for (size_t j = 0; j < bb->insts.len; j++) 89 | { 90 | ptr = bb->insts.buffer[j]; 91 | koopa_raw_value_t inst = reinterpret_cast(ptr); 92 | if (inst->ty->tag != KOOPA_RTT_UNIT) 93 | { 94 | if (inst->kind.tag == KOOPA_RVT_ALLOC) 95 | stack_size += cal_size(inst->ty->data.pointer.base); 96 | else stack_size += 4; 97 | } 98 | if (inst->kind.tag == KOOPA_RVT_CALL) 99 | { 100 | restore_ra = true; 101 | int arg_num = inst->kind.data.call.args.len; 102 | if (arg_num > max_arg_num)max_arg_num = arg_num; 103 | } 104 | } 105 | } 106 | int arg_stack_size = 0; 107 | if (max_arg_num > 8)arg_stack_size = (max_arg_num - 8) * 4; 108 | stack_size += arg_stack_size; 109 | stack_top += arg_stack_size; 110 | if (restore_ra)stack_size += 4; 111 | stack_size = ceil(stack_size / 16.0) * 16; 112 | if (stack_size > 0 && stack_size <= 2048) 113 | std::cout << "\taddi sp, sp, -" << stack_size << std::endl; 114 | else if (stack_size > 2048) 115 | { 116 | std::cout << "\tli s11, -" << stack_size << std::endl; 117 | std::cout << "\tadd sp, sp, s11" << std::endl; 118 | } 119 | if (restore_ra) 120 | { 121 | if (stack_size - 4 >= -2048 && stack_size - 4 <= 2047) 122 | std::cout << "\tsw ra, " << stack_size - 4 << "(sp)" << 123 | std::endl; 124 | else 125 | { 126 | std::cout << "\tli s11, " << stack_size - 4 << std::endl; 127 | std::cout << "\tadd s11, sp, s11" << std::endl; 128 | std::cout << "\tsw ra, (s11)" << std::endl; 129 | } 130 | } 131 | for (size_t i = 0; i < func->params.len; i++) 132 | { 133 | auto ptr = func->params.buffer[i]; 134 | koopa_raw_value_t param = reinterpret_cast(ptr); 135 | if (i < 8) 136 | { 137 | struct Reg param_var = { static_cast(i + 7), -1 }; 138 | value_map[param] = param_var; 139 | // reg_stats[i + 7] = 1; 140 | // registers[i + 7] = param; 141 | // for now param will only be used once at the beginning of a 142 | // function and never used again, so we don't need to maintain 143 | // the correct register states for it 144 | } 145 | else 146 | { 147 | int offset = stack_size + (i - 8) * 4; 148 | struct Reg param_var = { -1, offset }; 149 | value_map[param] = param_var; 150 | } 151 | } 152 | Visit(func->bbs); 153 | stack_size = stack_top = 0; 154 | for (int i = 0; i < 16; i++)reg_stats[i] = 0; 155 | value_map.clear(); 156 | restore_ra = false; 157 | std::cout << std::endl; 158 | } 159 | 160 | 161 | void Visit(const koopa_raw_basic_block_t &bb) 162 | { 163 | std::cout << bb->name + 1 << ":" << std::endl; 164 | Visit(bb->insts); 165 | } 166 | 167 | 168 | Reg Visit(const koopa_raw_value_t &value) 169 | { 170 | koopa_raw_value_t old_value = present_value; 171 | present_value = value; 172 | if (value_map.count(value)) 173 | { 174 | if (value_map[value].reg_name == -1) 175 | { 176 | int reg_name = find_reg(1); 177 | value_map[value].reg_name = reg_name; 178 | int reg_offset = value_map[value].reg_offset; 179 | if (reg_offset >= -2048 && reg_offset <= 2047) 180 | std::cout << "\tlw " << reg_names[reg_name] << ", " << 181 | reg_offset << "(sp)" << std::endl; 182 | else 183 | { 184 | std::cout << "\tli s11, " << reg_offset << std::endl; 185 | std::cout << "\tadd s11, sp, s11" << std::endl; 186 | std::cout << "\tlw " << reg_names[reg_name] << ", (s11)" << 187 | std::endl; 188 | } 189 | } 190 | present_value = old_value; 191 | return value_map[value]; 192 | } 193 | 194 | const auto &kind = value->kind; 195 | struct Reg result_var = {-1, -1}; 196 | switch (kind.tag) 197 | { 198 | case KOOPA_RVT_RETURN: 199 | Visit(kind.data.ret); 200 | break; 201 | case KOOPA_RVT_INTEGER: 202 | result_var = Visit(kind.data.integer); 203 | break; 204 | case KOOPA_RVT_BINARY: 205 | result_var = Visit(kind.data.binary); 206 | value_map[value] = result_var; 207 | assert(result_var.reg_name >= 0); 208 | break; 209 | case KOOPA_RVT_ALLOC: 210 | result_var.reg_offset = stack_top; 211 | assert(value->ty->tag == KOOPA_RTT_POINTER); 212 | stack_top += cal_size(value->ty->data.pointer.base); 213 | value_map[value] = result_var; 214 | break; 215 | case KOOPA_RVT_GLOBAL_ALLOC: 216 | global_values[value] = Visit(kind.data.global_alloc); 217 | break; 218 | case KOOPA_RVT_LOAD: 219 | result_var = Visit(kind.data.load); 220 | value_map[value] = result_var; 221 | assert(result_var.reg_name >= 0); 222 | break; 223 | case KOOPA_RVT_STORE: 224 | Visit(kind.data.store); 225 | break; 226 | case KOOPA_RVT_BRANCH: 227 | Visit(kind.data.branch); 228 | break; 229 | case KOOPA_RVT_GET_ELEM_PTR: 230 | result_var = Visit(kind.data.get_elem_ptr); 231 | value_map[value] = result_var; 232 | assert(result_var.reg_name >= 0); 233 | break; 234 | case KOOPA_RVT_GET_PTR: 235 | result_var = Visit(kind.data.get_ptr); 236 | value_map[value] = result_var; 237 | assert(result_var.reg_name >= 0); 238 | break; 239 | case KOOPA_RVT_JUMP: 240 | Visit(kind.data.jump); 241 | break; 242 | case KOOPA_RVT_CALL: 243 | result_var = Visit(kind.data.call); 244 | value_map[value] = result_var; 245 | if (value->ty->tag != KOOPA_RTT_UNIT) // has ret 246 | { 247 | registers[result_var.reg_name] = value; 248 | reg_stats[result_var.reg_name] = 1; 249 | } 250 | assert(result_var.reg_name >= 0); 251 | break; 252 | default: 253 | assert(false); 254 | } 255 | present_value = old_value; 256 | return result_var; 257 | } 258 | 259 | 260 | void Visit(const koopa_raw_return_t &ret) 261 | { 262 | koopa_raw_value_t ret_value = ret.value; 263 | if (ret_value) 264 | { 265 | struct Reg result_var = Visit(ret_value); 266 | assert(result_var.reg_name >= 0); 267 | if (result_var.reg_name != 7) 268 | std::cout << "\tmv a0, " << reg_names[result_var.reg_name] << 269 | std::endl; 270 | } 271 | clear_registers(false); 272 | if (restore_ra) 273 | { 274 | if (stack_size - 4 >= -2048 && stack_size - 4 <= 2047) 275 | std::cout << "\tlw ra, " << stack_size - 4 << "(sp)" << 276 | std::endl; 277 | else 278 | { 279 | std::cout << "\tli t0, " << stack_size - 4 << std::endl; 280 | std::cout << "\tadd t0, sp, t0" << std::endl; 281 | std::cout << "\tlw ra, (t0)" << std::endl; 282 | } 283 | } 284 | if (stack_size > 0 && stack_size <= 2047) 285 | std::cout << "\taddi sp, sp, " << stack_size << std::endl; 286 | else if (stack_size > 2047) 287 | { 288 | std::cout << "\tli t0, " << stack_size << std::endl; 289 | std::cout << "\tadd sp, sp, t0" << std::endl; 290 | } 291 | std::cout << "\tret" << std::endl; 292 | } 293 | 294 | 295 | Reg Visit(const koopa_raw_integer_t &integer) 296 | { 297 | int32_t int_val = integer.value; 298 | struct Reg result_var = {-1, -1}; 299 | if (int_val == 0) { result_var.reg_name = 15; return result_var; } 300 | result_var.reg_name = find_reg(0); 301 | std::cout << "\tli " << reg_names[result_var.reg_name] << ", " << 302 | int_val << std::endl; 303 | return result_var; 304 | } 305 | 306 | 307 | Reg Visit(const koopa_raw_binary_t &binary) 308 | { 309 | struct Reg left_val = Visit(binary.lhs); 310 | int left_reg = left_val.reg_name; 311 | int old_stat = reg_stats[left_reg]; 312 | reg_stats[left_reg] = 2; 313 | struct Reg right_val = Visit(binary.rhs); 314 | int right_reg = right_val.reg_name; 315 | reg_stats[left_reg] = old_stat; 316 | old_stat = reg_stats[right_reg]; 317 | reg_stats[right_reg] = 2; 318 | struct Reg result_var = {find_reg(1), -1}; 319 | reg_stats[right_reg] = old_stat; 320 | std::string left_name = reg_names[left_reg]; 321 | std::string right_name = reg_names[right_reg]; 322 | std::string result_name = reg_names[result_var.reg_name]; 323 | switch (binary.op) 324 | { 325 | case 0: // ne 326 | if (right_name == "x0") 327 | { 328 | std::cout << "\tsnez " << result_name << ", " << left_name << 329 | std::endl; 330 | break; 331 | } 332 | if (left_name == "x0") 333 | { 334 | std::cout << "\tsnez " << result_name << ", " << right_name << 335 | std::endl; 336 | break; 337 | } 338 | std::cout << "\txor " << result_name << ", " << left_name << ", " << 339 | right_name << std::endl; 340 | std::cout << "\tsnez " << result_name << ", " << result_name << 341 | std::endl; 342 | break; 343 | case 1: // eq 344 | if (right_name == "x0") 345 | { 346 | std::cout << "\tseqz " << result_name << ", " << left_name << 347 | std::endl; 348 | break; 349 | } 350 | if (left_name == "x0") 351 | { 352 | std::cout << "\tseqz " << result_name << ", " << right_name << 353 | std::endl; 354 | break; 355 | } 356 | std::cout << "\txor " << result_name << ", " << left_name << ", " << 357 | right_name << std::endl; 358 | std::cout << "\tseqz " << result_name << ", " << result_name << 359 | std::endl; 360 | break; 361 | case 2: // gt 362 | std::cout << "\tsgt " << result_name << ", " << left_name << ", " << 363 | right_name << std::endl; 364 | break; 365 | case 3: // lt 366 | std::cout << "\tslt " << result_name << ", " << left_name << ", " << 367 | right_name << std::endl; 368 | break; 369 | case 4: // ge 370 | std::cout << "\tslt " << result_name << ", " << left_name << ", " << 371 | right_name << std::endl; 372 | std::cout << "\txori " << result_name << ", " << result_name << ", 1" 373 | << std::endl; 374 | break; 375 | case 5: // le 376 | std::cout << "\tsgt " << result_name << ", " << left_name << ", " << 377 | right_name << std::endl; 378 | std::cout << "\txori " << result_name << ", " << result_name << ", 1" 379 | << std::endl; 380 | break; 381 | case 6: // add 382 | std::cout << "\tadd " << result_name << ", " << left_name << ", " << 383 | right_name << std::endl; 384 | break; 385 | case 7: // sub 386 | std::cout << "\tsub " << result_name << ", " << left_name << ", " << 387 | right_name << std::endl; 388 | break; 389 | case 8: // mul 390 | std::cout << "\tmul " << result_name << ", " << left_name << ", " << 391 | right_name << std::endl; 392 | break; 393 | case 9: // div 394 | std::cout << "\tdiv " << result_name << ", " << left_name << ", " << 395 | right_name << std::endl; 396 | break; 397 | case 10: // mod 398 | std::cout << "\trem " << result_name << ", " << left_name << ", " << 399 | right_name << std::endl; 400 | break; 401 | case 11: // and 402 | std::cout << "\tand " << result_name << ", " << left_name << ", " << 403 | right_name << std::endl; 404 | break; 405 | case 12: // or 406 | std::cout << "\tor " << result_name << ", " << left_name << ", " << 407 | right_name << std::endl; 408 | break; 409 | default: 410 | assert(false); 411 | } 412 | return result_var; 413 | } 414 | 415 | 416 | Reg Visit(const koopa_raw_load_t &load) 417 | { 418 | koopa_raw_value_t src = load.src; 419 | if (src->kind.tag == KOOPA_RVT_GLOBAL_ALLOC) 420 | { 421 | int reg_name = find_reg(1); 422 | struct Reg result_var = {reg_name, -1}; 423 | std::cout << "\tla " << reg_names[reg_name] << ", " << 424 | global_values[src] << std::endl; 425 | std::cout << "\tlw " << reg_names[reg_name] << ", 0(" << 426 | reg_names[reg_name] << ")" << std::endl; 427 | return result_var; 428 | } 429 | else if (src->kind.tag == KOOPA_RVT_GET_ELEM_PTR || 430 | src->kind.tag == KOOPA_RVT_GET_PTR) 431 | { 432 | struct Reg result_var = {find_reg(2), -1}; 433 | struct Reg src_var = Visit(load.src); 434 | reg_stats[result_var.reg_name] = 1; 435 | std::cout << "\tlw " << reg_names[result_var.reg_name] << ", (" << 436 | reg_names[src_var.reg_name] << ")" << std::endl; 437 | return result_var; 438 | } 439 | // we have to make sure one offset is at most loaded to one register 440 | if (value_map[src].reg_name >= 0)return value_map[src]; 441 | int reg_name = find_reg(1), reg_offset = value_map[src].reg_offset; 442 | struct Reg result_var = {reg_name, reg_offset}; 443 | if (reg_offset >= -2048 && reg_offset <= 2047) 444 | std::cout << "\tlw " << reg_names[reg_name] << ", " << reg_offset << 445 | "(sp)" << std::endl; 446 | else 447 | { 448 | std::cout << "\tli s11, " << reg_offset << std::endl; 449 | std::cout << "\tadd s11, s11, sp" << std::endl; 450 | std::cout << "\tlw " << reg_names[reg_name] << ", (s11)" << 451 | std::endl; 452 | } 453 | return result_var; 454 | } 455 | 456 | 457 | void Visit(const koopa_raw_store_t &store) 458 | { 459 | struct Reg value = Visit(store.value); 460 | koopa_raw_value_t dest = store.dest; 461 | assert(value.reg_name >= 0); 462 | if (dest->kind.tag == KOOPA_RVT_GLOBAL_ALLOC) 463 | { 464 | std::cout << "\tla s11, " << global_values[dest] << std::endl; 465 | std::cout << "\tsw " << reg_names[value.reg_name] << ", 0(s11)" << 466 | std::endl; 467 | return; 468 | } 469 | else if (dest->kind.tag == KOOPA_RVT_GET_ELEM_PTR || 470 | dest->kind.tag == KOOPA_RVT_GET_PTR) 471 | { 472 | int old_stat = reg_stats[value.reg_name]; 473 | reg_stats[value.reg_name] = 2; 474 | struct Reg dest_var = Visit(dest); 475 | assert(dest_var.reg_name >= 0); 476 | reg_stats[value.reg_name] = old_stat; 477 | std::cout << "\tsw " << reg_names[value.reg_name] << ", (" << 478 | reg_names[dest_var.reg_name] << ")" << std::endl; 479 | return; 480 | } 481 | assert(value_map.count(dest)); 482 | if (value_map[dest].reg_offset == -1) 483 | { 484 | value_map[dest].reg_offset = stack_top; 485 | stack_top += 4; 486 | } 487 | else // old register loaded from reg_offset is outdated ... 488 | for (int i = 0; i < 16; i++) 489 | if (i == value.reg_name)continue; 490 | else if (reg_stats[i] > 0 && value_map[registers[i]].reg_offset == 491 | value_map[dest].reg_offset) 492 | { 493 | reg_stats[i] = 0; // ... so clear it and update value_map 494 | value_map[registers[i]].reg_name = value.reg_name; 495 | } 496 | int reg_name = value.reg_name, reg_offset = value_map[dest].reg_offset; 497 | if (reg_offset >= -2048 && reg_offset <= 2047) 498 | std::cout << "\tsw " << reg_names[reg_name] << ", " << reg_offset << 499 | "(sp)" << std::endl; 500 | else 501 | { 502 | std::cout << "\tli s11, " << reg_offset << std::endl; 503 | std::cout << "\tadd s11, s11, sp" << std::endl; 504 | std::cout << "\tsw " << reg_names[reg_name] << ", (s11)" << 505 | std::endl; 506 | } 507 | } 508 | 509 | 510 | void Visit(const koopa_raw_branch_t &branch) 511 | { 512 | std::string true_label = branch.true_bb->name + 1; 513 | std::string false_label = branch.false_bb->name + 1; 514 | int cond_reg = Visit(branch.cond).reg_name; 515 | clear_registers(false); 516 | std::cout << "\tbnez " << reg_names[cond_reg] << ", " << true_label 517 | << std::endl; 518 | std::cout << "\tj " << false_label << std::endl; 519 | } 520 | 521 | 522 | void Visit(const koopa_raw_jump_t &jump) 523 | { 524 | clear_registers(false); 525 | std::string target_label = jump.target->name + 1; 526 | std::cout << "\tj " << target_label << std::endl; 527 | } 528 | 529 | 530 | Reg Visit(const koopa_raw_call_t &call) 531 | { 532 | struct Reg result_var = { 7, -1 }; 533 | clear_registers(); 534 | std::vector old_stats; 535 | for (size_t i = 0; i < call.args.len; i++) 536 | { 537 | auto ptr = call.args.buffer[i]; 538 | koopa_raw_value_t arg = reinterpret_cast(ptr); 539 | struct Reg arg_var = Visit(arg); 540 | assert(arg_var.reg_name >= 0); 541 | if (i < 8) 542 | { 543 | if (arg_var.reg_name != i + 7) 544 | std::cout << "\tmv " << reg_names[i + 7] << ", " << 545 | reg_names[arg_var.reg_name] << std::endl; 546 | old_stats.push_back(reg_stats[i + 7]); 547 | reg_stats[i + 7] = 2; 548 | } 549 | else if ((i - 8) * 4 >= -2048 && (i - 8) * 4 <= 2047) 550 | std::cout << "\tsw " << reg_names[arg_var.reg_name] << ", " << 551 | (i - 8) * 4 << "(sp)" << std::endl; 552 | else 553 | { 554 | std::cout << "\tli s11, " << (i - 8) * 4 << std::endl; 555 | std::cout << "\tadd s11, s11, sp" << std::endl; 556 | std::cout << "\tsw " << reg_names[arg_var.reg_name] << ", (s11)" 557 | << std::endl; 558 | } 559 | } 560 | for (int i = 0; i < old_stats.size(); i++)reg_stats[i + 7] = old_stats[i]; 561 | std::cout << "\tcall " << call.callee->name + 1 << std::endl; 562 | clear_registers(false); 563 | return result_var; 564 | } 565 | 566 | 567 | std::string Visit(const koopa_raw_global_alloc_t &global) 568 | { 569 | std::string name = "var_" + std::to_string(global_num++); 570 | std::cout << "\t.data" << std::endl; 571 | std::cout << "\t.globl " << name << std::endl; 572 | std::cout << name << ":" << std::endl; 573 | switch (global.init->kind.tag) 574 | { 575 | case KOOPA_RVT_ZERO_INIT: 576 | std::cout << "\t.zero " << cal_size(global.init->ty) << std::endl << 577 | std::endl; 578 | break; 579 | case KOOPA_RVT_INTEGER: 580 | std::cout << "\t.word " << global.init->kind.data.integer.value << 581 | std::endl << std::endl; 582 | break; 583 | case KOOPA_RVT_AGGREGATE: 584 | init_aggregate(global.init); 585 | std::cout << std::endl; 586 | break; 587 | default: 588 | assert(false); 589 | } 590 | return name; 591 | } 592 | 593 | 594 | Reg Visit(const koopa_raw_get_elem_ptr_t &get_elem_ptr) 595 | { 596 | if (get_elem_ptr.src->kind.tag == KOOPA_RVT_GLOBAL_ALLOC) 597 | { 598 | assert(global_values.count(get_elem_ptr.src)); 599 | struct Reg result_var = {find_reg(2), -1}; 600 | koopa_raw_type_t arr = get_elem_ptr.src->ty->data.pointer.base; 601 | int total_size = cal_size(arr), len = arr->data.array.len; 602 | assert(total_size % len == 0); 603 | int elem_size = total_size / len; 604 | struct Reg ind_var = Visit(get_elem_ptr.index); 605 | int ind_reg = ind_var.reg_name; 606 | reg_stats[result_var.reg_name] = 1; 607 | std::cout << "\tla " << reg_names[result_var.reg_name] << ", " << 608 | global_values[get_elem_ptr.src] << std::endl; 609 | std::cout << "\tli s11, " << elem_size << std::endl; 610 | std::cout << "\tmul s11, s11, " << reg_names[ind_reg] << std::endl; 611 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 612 | reg_names[result_var.reg_name] << ", s11" << std::endl; 613 | return result_var; 614 | } 615 | struct Reg src_var = value_map[get_elem_ptr.src]; 616 | koopa_raw_type_t arr = get_elem_ptr.src->ty->data.pointer.base; 617 | struct Reg result_var = {find_reg(2), -1}; 618 | int src_reg, src_old_stat; 619 | if (get_elem_ptr.src->name && get_elem_ptr.src->name[0] == '@') 620 | { 621 | int offset = src_var.reg_offset; 622 | assert(offset >= 0); // variables have positive offset 623 | if (offset >= -2048 && offset <= 2047) 624 | std::cout << "\taddi " << reg_names[result_var.reg_name] << 625 | ", sp, " << offset << std::endl; 626 | else 627 | { 628 | std::cout << "\tli s11, " << offset << std::endl; 629 | std::cout << "\tadd " << reg_names[result_var.reg_name] << 630 | ", sp, s11" << std::endl; 631 | } 632 | } 633 | else 634 | { 635 | src_var = Visit(get_elem_ptr.src); 636 | src_reg = src_var.reg_name; 637 | assert(src_reg >= 0); 638 | src_old_stat = reg_stats[src_reg]; 639 | reg_stats[src_reg] = 2; 640 | } 641 | int total_size = cal_size(arr), len = arr->data.array.len; 642 | assert(total_size % len == 0); 643 | int elem_size = total_size / len; 644 | struct Reg ind_var = Visit(get_elem_ptr.index), tmp_var; 645 | if (elem_size != 0 && ind_var.reg_name != 15) 646 | { 647 | int ind_reg = ind_var.reg_name; 648 | int ind_old_stat = reg_stats[ind_reg]; 649 | reg_stats[ind_reg] = 2; 650 | tmp_var = {find_reg(0), -1}; 651 | reg_stats[ind_reg] = ind_old_stat; 652 | std::cout << "\tli " << reg_names[tmp_var.reg_name] << ", " << 653 | elem_size << std::endl; 654 | std::cout << "\tmul " << reg_names[tmp_var.reg_name] << ", " << 655 | reg_names[tmp_var.reg_name] << ", " << reg_names[ind_reg] << 656 | std::endl; 657 | } 658 | else tmp_var = {15, -1}; 659 | reg_stats[result_var.reg_name] = 1; 660 | if (get_elem_ptr.src->name && get_elem_ptr.src->name[0] == '@') 661 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 662 | reg_names[result_var.reg_name] << ", " << 663 | reg_names[tmp_var.reg_name] << std::endl; 664 | else 665 | { 666 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 667 | reg_names[src_reg] << ", " << reg_names[tmp_var.reg_name] 668 | << std::endl; 669 | reg_stats[src_reg] = src_old_stat; 670 | } 671 | return result_var; 672 | } 673 | 674 | 675 | Reg Visit(const koopa_raw_get_ptr_t &get_ptr) 676 | { 677 | struct Reg src_var = value_map[get_ptr.src]; 678 | koopa_raw_type_t arr = get_ptr.src->ty->data.pointer.base; 679 | struct Reg result_var = {find_reg(2), -1}; 680 | int elem_size = cal_size(arr); 681 | struct Reg ind_var = Visit(get_ptr.index), tmp_var; 682 | if (elem_size != 0 && ind_var.reg_name != 15) 683 | { 684 | int ind_reg = ind_var.reg_name; 685 | int ind_old_stat = reg_stats[ind_reg]; 686 | reg_stats[ind_reg] = 2; 687 | tmp_var = {find_reg(0), -1}; 688 | reg_stats[ind_reg] = ind_old_stat; 689 | std::cout << "\tli " << reg_names[tmp_var.reg_name] << ", " << 690 | elem_size << std::endl; 691 | std::cout << "\tmul " << reg_names[tmp_var.reg_name] << ", " << 692 | reg_names[tmp_var.reg_name] << ", " << reg_names[ind_reg] << 693 | std::endl; 694 | } 695 | else tmp_var = {15, -1}; 696 | reg_stats[result_var.reg_name] = 1; 697 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 698 | reg_names[src_var.reg_name] << ", " << 699 | reg_names[tmp_var.reg_name] << std::endl; 700 | return result_var; 701 | } 702 | 703 | 704 | int find_reg(int stat) 705 | { 706 | for (int i = 0; i < 15; i++) 707 | if (reg_stats[i] == 0) 708 | { 709 | registers[i] = present_value; 710 | reg_stats[i] = stat; 711 | return i; 712 | } 713 | for (int i = 0; i < 15; i++) 714 | { 715 | if (reg_stats[i] == 1) 716 | { 717 | value_map[registers[i]].reg_name = -1; 718 | int offset = value_map[registers[i]].reg_offset; 719 | if (offset == -1) 720 | { 721 | offset = stack_top; 722 | stack_top += 4; 723 | value_map[registers[i]].reg_offset = offset; 724 | } 725 | if (offset >= -2048 && offset <= 2047) 726 | std::cout << "\tsw " << reg_names[i] << ", " << offset << 727 | "(sp)" << std::endl; 728 | else 729 | { 730 | std::cout << "\tli s11, " << offset << std::endl; 731 | std::cout << "\tadd s11, s11, sp" << std::endl; 732 | std::cout << "\tsw " << reg_names[i] << ", (s11)" << 733 | std::endl; 734 | } 735 | registers[i] = present_value; 736 | reg_stats[i] = stat; 737 | return i; 738 | } 739 | } 740 | assert(false); 741 | return -1; 742 | } 743 | 744 | 745 | void clear_registers(bool save_temps) 746 | { 747 | for (int i = 0; i < 15; i++) 748 | if (reg_stats[i] > 0) 749 | { 750 | value_map[registers[i]].reg_name = -1; 751 | int offset = value_map[registers[i]].reg_offset; 752 | if (offset == -1) 753 | { 754 | offset = stack_top; 755 | stack_top += 4; 756 | value_map[registers[i]].reg_offset = offset; 757 | if (save_temps) 758 | { 759 | if (offset >= -2048 && offset <= 2047) 760 | std::cout << "\tsw " << reg_names[i] << ", " << 761 | offset << "(sp)" << std::endl; 762 | else 763 | { 764 | std::cout << "\tli s11, " << offset << std::endl; 765 | std::cout << "\tadd s11, s11, sp" << std::endl; 766 | std::cout << "\tsw " << reg_names[i] << ", (s11)" << 767 | std::endl; 768 | } 769 | } 770 | } 771 | reg_stats[i] = 0; 772 | } 773 | } 774 | 775 | 776 | int cal_size(const koopa_raw_type_t &ty) 777 | { 778 | assert(ty->tag != KOOPA_RTT_UNIT); 779 | if (ty->tag == KOOPA_RTT_ARRAY) 780 | { 781 | int prev = cal_size(ty->data.array.base); 782 | int len = ty->data.array.len; 783 | return len * prev; 784 | } 785 | return 4; 786 | } 787 | 788 | 789 | void init_aggregate(const koopa_raw_value_t &aggr) 790 | { 791 | koopa_raw_slice_t elems = aggr->kind.data.aggregate.elems; 792 | for (size_t i = 0; i < elems.len; i++) 793 | { 794 | auto ptr = elems.buffer[i]; 795 | assert(elems.kind == KOOPA_RSIK_VALUE); 796 | auto value = reinterpret_cast(ptr); 797 | if (value->kind.tag == KOOPA_RVT_INTEGER) 798 | std::cout << "\t.word " << value->kind.data.integer.value << 799 | std::endl; 800 | else if (value->kind.tag == KOOPA_RVT_AGGREGATE) 801 | init_aggregate(value); 802 | else assert(false); 803 | } 804 | } 805 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SysY Compiler 2 | 3 | Compiles SysY into RISC-V. Below is my Chinese version lab report. 4 | 5 | PKU Compiler Principles course documents: [PKU Compiler Principles](https://pku-minic.github.io/online-doc/#/). 6 | 7 | PKU Compiler Principles development environment: [compiler-dev](https://github.com/pku-minic/compiler-dev). 8 | 9 | ## 一、编译器概述 10 | 11 | ### 1.1 基本功能 12 | 13 | 本编译器基本具备如下功能: 14 | 15 | 1. 将 SysY 语言程序编译为 Koopa IR。 16 | 2. 将 Koopa IR 编译为 RISC-V 指令。 17 | 3. 实现 RISC-V 程序的寄存器分配以优化性能。 18 | 19 | ### 1.2 主要特点 20 | 21 | 我开发的编译器的主要特点是实现简单、正确性高,并且进行了部分性能优化。编译过程完全利用嵌套的 AST 数据结构递归实现,只需一次遍历,代码简洁;通过了性能测试的所有测试用例以及 Koopa IR 和 RISC-V 的几乎全部测试用例,正确性较高;使用分级的寄存器分配策略,提高了性能。 22 | 23 | ## 二、编译器设计 24 | 25 | ### 2.1 主要模块组成 26 | 27 | 编译器由 3 个主要模块组成:```sysy.l``` 和 ```sysy.y``` 负责词法和语法分析,```AST.h``` 负责将 SysY 源代码编译成 Koopa IR,```RISCV.h``` 负责将 Koopa IR 编译成 RISC-V 指令。 28 | 29 | ### 2.2 主要数据结构 30 | 31 | 本编译器最核心的数据结构是 AST 树。如果将一个 SysY 程序视作一棵树,那么一个 ```class CompUnitAST``` 的实例就是这棵树的根,其数据成员包括所有函数定义和全局变量定义。函数定义用数据结构 ```class FuncDefAST``` 表示,数据成员包括其返回类型、标识符、参数指针和函数块指针;常量声明和定义分别用 ```class ConstDeclAST``` 和 ```class ConstDefAST``` 表示,变量声明和定义分别用 ```class VarDeclAST``` 和 ```class VarDefAST``` 表示。表示语句块、语句和表达式的数据结构分别为 ```class BlockAST```、```class StmtAST``` 和 ```class ExpAST```,而它们又可以根据语法规则衍生出很多其他的 AST 数据结构。 32 | 33 | 所有 AST 类都是基类 ```class BaseAST``` 的衍生类,其定义为: 34 | 35 | ```cpp 36 | class BaseAST 37 | { 38 | public: 39 | virtual ~BaseAST() = default; 40 | virtual void dump() const = 0; 41 | virtual std::string dumpIR() const = 0; 42 | virtual int dumpExp() const { assert(false); return -1; } 43 | virtual std::string get_ident() const { assert(false); return ""; } 44 | virtual std::string get_type() const { assert(false); return ""; } 45 | virtual int get_dim() const { assert(false); return -1; } 46 | virtual std::vector dumpList(std::vector) const { exit(1); } 47 | }; 48 | ``` 49 | 50 | 其中 ```dump()``` 递归地输出语法树结构,可以用来 debug;```dumpIR()``` 递归地生成中间代码;```dumpExp()``` 用于编译期生成代码,主要包括常量表达式的计算以及全局数组初始化等;```dumpList()``` 用于计算数组的初始值,将不完整的初始化列表补全;以及其他的一些辅助函数。 51 | 52 | 我们用一个 ```vector``` 来表示当前活跃的符号表,它是由若干符号表构成的 ```vector```: 53 | 54 | ```cpp 55 | static std::vector>> 56 | symbol_tables; 57 | ``` 58 | 59 | 每一个符号表都将当前语句块的一个变量(用字符串表示)映射到它的名字或数值。第一个符号表为全局符号表,它永远都是活跃的。此外我们维护一些额外信息: 60 | 61 | ```cpp 62 | static std::map var_num; 63 | static std::map is_list; 64 | static std::map is_func_param; 65 | static std::map list_dim; 66 | ``` 67 | 68 | 以便于中间代码的生成。 69 | 70 | 对于函数,我们维护一个函数列表,并记录关于这些函数的额外信息,以便于语义分析: 71 | 72 | ```cpp 73 | static std::map function_table; 74 | static std::map function_ret_type; 75 | static std::map function_param_num; 76 | static std::map> function_param_idents; 77 | static std::map> function_param_names; 78 | static std::map> function_param_types; 79 | ``` 80 | 81 | 这些数据结构所记录的信息就如它们的名字所示。 82 | 83 | ### 2.3 主要设计考虑及算法选择 84 | 85 | #### 2.3.1 符号表的设计考虑 86 | 87 | 我的符号表数据结构是用一个 ```vector``` 表示的,每个元素都是一个符号表,对应于语句块(```class BlockAST```)的嵌套关系,第一个元素为全局符号表,最后一个元素为当前所在块的符号表。这样当我们退出一个语句块时,只需要从 ```vector``` 中弹出一个元素;而进入一个语句块时,只需要向 ```vector``` 中加入一个元素。 88 | 89 | #### 2.3.2 寄存器分配策略 90 | 91 | 我采用的是较为朴素的寄存器分配策略。每个寄存器有一个 ```reg_stats``` 值,可能为 ```0```、```1``` 或 ```2```。```0``` 表示当前寄存器中没有值或这个值不重要(即不用被保存回栈),```1``` 表示当前值在之后还可能被用到;```2``` 表示当前值不能被替换。搜索寄存器时,我们首先看有没有 ```reg_stats``` 为 ```0``` 的寄存器,若有则直接返回该寄存器;否则我们就找到一个 ```reg_stats``` 为 ```1``` 的寄存器,将其原来的值保存回栈,并作为分配的寄存器。```reg_stats``` 为 ```2``` 的寄存器是被保护的,它不能被换出,说明它在当前语句的处理中还会用到;也正因如此我们需要在 ```reg_stats``` 为 ```2``` 的寄存器被使用后将它的 ```reg_stats``` 还原为 ```1```,以避免无寄存器可用的情况。 92 | 93 | #### 2.3.3 采用的优化策略 94 | 95 | 我进行了一些简单的优化,如维护一个活跃值表,当加载一个值时,如果它是活跃的,那么就不用去栈上加载,直接返回它所在的寄存器;控制流无法到达的语句不用生成(如:```ret``` 后的语句;基本块内 ```break``` 或 ```continue``` 后的语句;以及形如 ```if (...)ret; else ret;``` 或 ```if (...)break; else ret;``` 之类的语句后的语句);和 0 相加或相乘的代码不用生成,直接计算出结果;没有作用的代码不用生成(如将一个寄存器的值保存或移动到它自己);用一个特殊寄存器(```s11```)作为临时值寄存器,以避免频繁的临时寄存器调度等。 96 | 97 | #### 2.3.4 其它补充设计考虑 98 | 99 | 我将相似的 AST 数据结构用一个类实现,用一个枚举类区分其类型,使代码更简洁;各个语法树元素之间尽可能通过递归的方式互相传递信息,一些关键信息如符号表或函数表则用全局变量记录;所有 AST 类都继承至 ```BaseAST```,共用同名的成员函数以实现多态。 100 | 101 | ## 三、编译器实现 102 | 103 | ### 3.1 各阶段编码细节 104 | 105 | #### Lv1. main 函数和 Lv2. 初试目标代码生成 106 | 107 | 这部分比较简单,中间代码生成部分就按照 AST 树的结构递归处理即可;目标代码生成部分也只需要处理两种 ```value```:```ret``` 和 ```integer```。 108 | 109 | ```cpp 110 | void visit(const koopa_raw_return_t &ret) 111 | { 112 | koopa_raw_value_t ret_value = ret.value; 113 | visit(ret_value); 114 | std::cout << "\tret" << std::endl; 115 | } 116 | 117 | 118 | void visit(const koopa_raw_integer_t &integer) 119 | { 120 | int32_t int_val = integer.value; 121 | std::cout << "\tli a0," << int_val << std::endl; 122 | return; 123 | } 124 | ``` 125 | 126 | #### Lv3. 表达式 127 | 128 | 这部分的主要内容是利用 AST 的层次结构处理优先级,优先级较低的运算在语法树中层次较高。语法树从上到下依次为: 129 | 130 | ```cpp 131 | class ExpAST : public BaseAST; 132 | class LOrExpAST : public BaseAST; 133 | class LAndExpAST : public BaseAST; 134 | class EqExpAST : public BaseAST; 135 | class RelExpAST : public BaseAST; 136 | class AddExpAST : public BaseAST; 137 | class MulExpAST : public BaseAST; 138 | class UnaryExpAST : public BaseAST; 139 | class PrimaryExpAST : public BaseAST; 140 | ``` 141 | 142 | 每一级 AST 都有下一级 AST 作为其数据成员,这样对语法树递归处理时便能正确实现优先级。 143 | 144 | #### Lv4. 常量和变量 145 | 146 | 本章添加 ```class DeclAST``` 用于表示声明,```class ConstDeclAST```、```class ConstDefAST``` 和 ```class ConstInitValAST``` 用于表示常量声明、定义和初始值,```class VarDeclAST```、```class VarDefAST``` 和 ```class InitValAST``` 用于表示变量声明、定义和初始值。```block``` 可以包含多个语句,我们用一个 ```vector``` 来表示;并且 ```SimpleStmt``` 和 ```PrimaryStmt``` 中也可以包含 ```LVal``` 了,为此我们需要实现一个符号表。如前所述,符号表被组织成一个栈,在进入 ```block``` 时加入一个符号表、在退出 ```block``` 时弹出一个符号表。 147 | 148 | ```cpp 149 | class VarDefAST : public BaseAST 150 | { 151 | public: 152 | std::string ident; 153 | // ... 154 | std::string dumpIR() const override 155 | { 156 | std::string var_name = "@" + ident; 157 | std::string name = var_name + "_" + 158 | std::to_string(var_num[var_name]++); 159 | std::cout << '\t' << name << " = alloc i32" << std::endl; 160 | symbol_tables.back()[ident] = name; 161 | // ... 162 | } 163 | }; 164 | ``` 165 | 166 | 常量并不需要保存至符号表,我们需要在编译器将其值求出来,为此我们实现 ```dumpExp()``` 函数来编译器对表达式求值。实际上我的代码中常量还是在 ```symbol_table``` 中的,只不过它的值不是字符串而是数字(用 ```std::variant``` 实现)。 167 | 168 | 在生成目标代码时维护一个 ```value_map```,若所求值已经存在则不需要再次求值,直接加载即可。 169 | 170 | ```cpp 171 | Reg Visit(const koopa_raw_value_t &value) 172 | { 173 | koopa_raw_value_t old_value = present_value; 174 | present_value = value; 175 | if (value_map.count(value)) 176 | { 177 | if (value_map[value].reg_name == -1) 178 | { 179 | int reg_name = find_reg(1); 180 | value_map[value].reg_name = reg_name; 181 | std::cout << "\tlw " << reg_names[reg_name] << ", " << 182 | value_map[value].reg_offset << "(sp)" << std::endl; 183 | } 184 | present_value = old_value; 185 | return value_map[value]; 186 | } 187 | } 188 | ``` 189 | 190 | #### Lv5. 语句块和作用域 191 | 192 | 本章的改动在于 ```block``` 也可以作为 ```stmt```,并需要正确维护符号表。这体现在代码上主要是 ```SimpleStmtAST``` 的数据成员有可能是 ```block``` 也有可能是 ```exp```,但由于多态性,这并没有什么影响。 193 | 194 | ```cpp 195 | class SimpleStmtAST : public BaseAST 196 | { 197 | public: 198 | SimpleStmtType type; 199 | std::string lval; 200 | std::unique_ptr block_exp; 201 | // ... 202 | std::string dumpIR() const override 203 | { 204 | if (type == SimpleStmtType::ret) 205 | { 206 | // ... 207 | } 208 | else if (type == SimpleStmtType::lval) 209 | { 210 | std::string result_var = block_exp->dumpIR(); 211 | std::variant value = look_up_symbol_tables(lval); 212 | assert(value.index() == 1); 213 | std::cout << "\tstore " << result_var << ", " << 214 | std::get(value) << std::endl; 215 | } 216 | else if (type == SimpleStmtType::exp) 217 | { 218 | if (block_exp != nullptr)block_exp->dumpIR(); 219 | } 220 | else if (type == SimpleStmtType::block)return block_exp->dumpIR(); 221 | else assert(false); 222 | return ""; 223 | } 224 | }; 225 | ``` 226 | 227 | #### Lv6. if 语句 228 | 229 | 我们利用下述文法处理 if/else 的二义性: 230 | 231 | ```bison 232 | Stmt : OpenStmt 233 | | ClosedStmt; 234 | ClosedStmt : SimpleStmt 235 | | IF '(' Exp ')' ClosedStmt ELSE ClosedStmt; 236 | OpenStmt : IF '(' Exp ')' Stmt 237 | | IF '(' Exp ')' ClosedStmt ELSE OpenStmt; 238 | ``` 239 | 240 | 为了实现短路求值,我们需要更改 ```LOrExpAST``` 和 ```LAndExpAST``` 的代码。这里以 ```LOrExpAST``` 为例: 241 | 242 | ```cpp 243 | class LOrExpAST : public BaseAST 244 | { 245 | public: 246 | std::string op; 247 | std::unique_ptr l_and_exp; 248 | std::unique_ptr l_or_exp; 249 | // ... 250 | std::string dumpIR() const override 251 | { 252 | std::string result_var = ""; 253 | if (op == "")result_var = l_and_exp->dumpIR(); 254 | else if (op == "||") 255 | { 256 | std::string left_result = l_or_exp->dumpIR(); 257 | std::string then_label = "\%then_" + std::to_string(if_else_num); 258 | std::string else_label = "\%else_" + std::to_string(if_else_num); 259 | std::string end_label = "\%end_" + std::to_string(if_else_num++); 260 | std::string result_var_ptr = "%" + std::to_string(symbol_num++); 261 | std::cout << '\t' << result_var_ptr << " = alloc i32" << std::endl; 262 | std::cout << "\tbr " << left_result << ", " << then_label << ", " 263 | << else_label << std::endl; 264 | std::cout << then_label << ":" << std::endl; 265 | std::cout << "\tstore 1, " << result_var_ptr << std::endl; 266 | std::cout << "\tjump " << end_label << std::endl; 267 | std::cout << else_label << ":" << std::endl; 268 | std::string tmp_result_var = "%" + std::to_string(symbol_num++); 269 | std::string right_result = l_and_exp->dumpIR(); 270 | std::cout << '\t' << tmp_result_var << " = ne " << right_result 271 | << ", 0" << std::endl; 272 | std::cout << "\tstore " << tmp_result_var << ", " << result_var_ptr 273 | << std::endl; 274 | std::cout << "\tjump " << end_label << std::endl; 275 | std::cout << end_label << ":" << std::endl; 276 | result_var = "%" + std::to_string(symbol_num++); 277 | std::cout << '\t' << result_var << " = load " << result_var_ptr 278 | << std::endl; 279 | } 280 | else assert(false); 281 | return result_var; 282 | } 283 | }; 284 | ``` 285 | 286 | 目标代码生成只需要额外考虑 ```br``` 和 ```jump``` 指令,以 ```br``` 为例: 287 | 288 | ```cpp 289 | void Visit(const koopa_raw_branch_t &branch) 290 | { 291 | std::string true_label = branch.true_bb->name + 1; 292 | std::string false_label = branch.false_bb->name + 1; 293 | int cond_reg = Visit(branch.cond).reg_name; 294 | std::cout << "\tbnez " << reg_names[cond_reg] << ", " << true_label 295 | << std::endl; 296 | std::cout << "\tj " << false_label << std::endl; 297 | } 298 | ``` 299 | 300 | #### Lv7. while 语句 301 | 302 | 本章一个是实现 ```while```,一个是实现 ```break``` 和 ```continue```。实现 ```while``` 只需在 ```StmtAST``` 结构中新加一种 ```type``` 即可,代码如下: 303 | 304 | ```cpp 305 | else if (type == StmtType::while_) 306 | { 307 |     std::string entry_label = "\%while_" + std::to_string(while_num); 308 |     std::string body_label = "\%do_" + std::to_string(while_num); 309 |     std::string end_label = "\%while_end_" + std::to_string(while_num); 310 |     while_stack.push_back(while_num++); 311 |     std::cout << "\tjump " << entry_label << std::endl; 312 |     std::cout << entry_label << ":" << std::endl; 313 |     std::string while_result = exp_simple->dumpIR(); 314 |     std::cout << "\tbr " << while_result << ", " << body_label << ", " 315 |         << end_label << std::endl; 316 |     std::cout << body_label << ":" << std::endl; 317 |     std::string while_stmt_type = while_stmt->dumpIR(); 318 |     if (while_stmt_type != "ret" && while_stmt_type != "break" && 319 |         while_stmt_type != "cont") 320 |     std::cout << "\tjump " << entry_label << std::endl; 321 |     std::cout << end_label << ":" << std::endl; 322 |     while_stack.pop_back(); 323 | } 324 | ``` 325 | 326 | 实现 ```while``` 和 ```break``` 也只需在 ```SimpleStmtAST``` 中新加入两种 ```type``` 即可。我们维护一个 ```while_stack``` 用于保存 ```while``` 语句块的 ```label```,这样在生成 ```break``` 和 ```continue``` 时就可以得到跳转地址了。以 ```break``` 为例: 327 | 328 | ```cpp 329 | else if (type == SimpleStmtType::break_) 330 | { 331 | assert(!while_stack.empty()); 332 | int while_no = while_stack.back(); 333 | std::string end_label = "\%while_end_" + std::to_string(while_no); 334 | std::cout << "\tjump " << end_label << std::endl; 335 | return "break"; 336 | } 337 | ``` 338 | 339 | 需要注意的一点是同一个基本块内 ```ret```、```break``` 以及 ```continue``` 后的语句都不会被到达,不必为它们生成代码;形如 ```if (...)ret; else ret;``` 或者 ```if (...)break; else break;``` 之类的语句也是如此。 340 | 341 | #### Lv8. 函数和全局变量 342 | 343 | 首先是函数。函数我们要处理两个事情,一个是函数的调用,一个是函数参数的保存。中间代码生成时,函数调用只需在 ```UnaryExpAST``` 中加一种 ```type```: 344 | 345 | ```cpp 346 | else if (type == UnaryExpType::func_call) 347 | { 348 | std::vector param_vars; 349 | for (auto&& param : params) 350 | param_vars.push_back(param->dumpIR()); 351 | assert(function_table.count(ident)); 352 | assert(function_param_num[ident] == params.size()); 353 | std::string result_var = ""; 354 | if (function_ret_type[ident] == "int") 355 | result_var = "%" + std::to_string(symbol_num++); 356 | std::string name = function_table[ident]; 357 | std::cout << '\t'; 358 | if (function_ret_type[ident] == "int") 359 | std::cout << result_var << " = "; 360 | std::cout << "call " << name << "("; 361 | for (int i = 0; i < param_vars.size(); i++) 362 | { 363 | std::cout << param_vars[i]; 364 | if (i != param_vars.size() - 1)std::cout << ", "; 365 | } 366 | std::cout << ")" << std::endl; 367 | return result_var; 368 | } 369 | ``` 370 | 371 | 而函数开头要把函数的相关信息保存在相关数据结构中,在 ```block``` 生成中间代码时先将函数参数保存在栈上。目标代码生成时,函数调用前要把参数移至相应位置: 372 | 373 | ```cpp 374 | Reg Visit(const koopa_raw_call_t &call) 375 | { 376 | struct Reg result_var = { 7, -1 }; 377 | clear_registers(); 378 | for (size_t i = 0; i < call.args.len; i++) 379 | { 380 | auto ptr = call.args.buffer[i]; 381 | koopa_raw_value_t arg = reinterpret_cast(ptr); 382 | struct Reg arg_var = Visit(arg); 383 | assert(arg_var.reg_name >= 0); 384 | if (i < 8) 385 | { 386 | if (arg_var.reg_name != i + 7) 387 | std::cout << "\tmv " << reg_names[i + 7] << ", " << 388 | reg_names[arg_var.reg_name] << std::endl; 389 | } 390 | else 391 | std::cout << "\tsw " << reg_names[arg_var.reg_name] << ", " << 392 | (i - 8) * 4 << "(sp)" << std::endl; 393 | } 394 | std::cout << "\tcall " << call.callee->name + 1 << std::endl; 395 | return result_var; 396 | } 397 | ``` 398 | 399 | 而函数开头要计算栈的偏移量,并视情况减少栈指针和保存 ```ra``` 寄存器。函数调用前要保存所有 ```reg_stats``` 大于 ```0``` 的寄存器,函数调用后要假设所有调用者保存的寄存器的值都已经被重写。 400 | 401 | SysY 库函数比较简单,在代码开头先定义好这些函数就行了。 402 | 403 | 至于全局变量,它如果有初始值的话一定是编译期可求的,我们不能仍然像局部变量一样用 ```dumpIR()``` 生成代码,而是另外实现 ```dumpExp()``` 以在全局分配变量和指定其初始值。在目标代码生成则需额外考虑一种 ```global``` 类型。 404 | 405 | ```cpp 406 | std::string Visit(const koopa_raw_global_alloc_t &global) 407 | { 408 | std::string name = "var_" + std::to_string(global_num++); 409 | std::cout << "\t.data" << std::endl; 410 | std::cout << "\t.globl " << name << std::endl; 411 | std::cout << name << ":" << std::endl; 412 | switch (global.init->kind.tag) 413 | { 414 | case KOOPA_RVT_ZERO_INIT: 415 | std::cout << "\t.zero 4" << std::endl << std::endl; 416 | break; 417 | case KOOPA_RVT_INTEGER: 418 | std::cout << "\t.word " << global.init->kind.data.integer.value << 419 | std::endl << std::endl; 420 | break; 421 | default: 422 | assert(false); 423 | } 424 | return name; 425 | } 426 | ``` 427 | 428 | #### Lv9. 数组 429 | 430 | 数组的一大难点在于多维数组的初始化。为此我实现了一系列的成员函数来递归地实现多维数组的处理。首先是 ```dumpListType```,递归地打印出数组类型: 431 | 432 | ```cpp 433 | void dumpListType(std::vector widths) const 434 | { 435 | if (widths.size() == 1) 436 | { 437 | std::cout << "[i32, " << widths[0] << "]"; return; 438 | } 439 | std::vector rec = std::vector(widths.begin() + 1, 440 | widths.end()); 441 | std::cout << "["; dumpListType(rec); 442 | std::cout << ", " << widths.front() << "]"; 443 | } 444 | ``` 445 | 446 | 函数 ```dumpList()``` 递归地将数组的初始值补全: 447 | 448 | ```cpp 449 | std::vector dumpList(std::vector widths) const override 450 | { 451 | std::vector ret; 452 | if (widths.size() == 1) 453 | { 454 | for (auto&& const_init_val : const_init_val_list) 455 | { 456 | assert(const_init_val->get_ident() == "const_exp"); 457 | ret.push_back(std::stoi(const_init_val->dumpIR())); 458 | const_list_num++; 459 | } 460 | int num_zeros = widths[0] - ret.size(); 461 | for (int i = 0; i < num_zeros; i++) 462 | { 463 | ret.push_back(0); const_list_num++; 464 | } 465 | return ret; 466 | } 467 | std::vector products = widths; 468 | for (int i = products.size() - 2; i >= 0; i--) 469 | products[i] *= products[i + 1]; 470 | int total_size = products[0]; 471 | for (auto&& const_init_val : const_init_val_list) 472 | if (const_init_val->get_ident() == "const_exp") 473 | { 474 | ret.push_back(std::stoi(const_init_val->dumpIR())); 475 | const_list_num++; continue; 476 | } 477 | else if (const_init_val->get_ident() == "list") 478 | { 479 | int init_num; 480 | for (init_num = 1; init_num < widths.size(); init_num++) 481 | if (const_list_num % products[init_num] == 0)break; 482 | else if (init_num == widths.size() - 1)assert(false); 483 | std::vector rec = std::vector(widths.begin() + 484 | init_num, widths.end()); 485 | std::vector tmp = const_init_val->dumpList(rec); 486 | ret.insert(ret.end(), tmp.begin(), tmp.end()); 487 | } 488 | else assert(false); 489 | int num_zeros = total_size - ret.size(); 490 | for (int i = 0; i < num_zeros; i++) 491 | { 492 | ret.push_back(0); const_list_num++; 493 | } 494 | return ret; 495 | } 496 | ``` 497 | 498 | 函数 ```dumpListInit()``` 用于递归地实现局部数组的初始化: 499 | 500 | ```cpp 501 | void dumpListInit(std::string prev, std::vector widths, int depth, 502 | std::vector init_list) const 503 | { 504 | if (depth >= widths.size()) 505 | { 506 | std::cout << "\tstore " << init_list[const_list_num++] << ", " << 507 | prev << std::endl; 508 | return; 509 | } 510 | for (int i = 0; i < widths[depth]; i++) 511 | { 512 | std::string result_var = "%" + std::to_string(symbol_num++); 513 | std::cout << '\t' << result_var << " = getelemptr " << prev << ", " 514 | << i << std::endl; 515 | dumpListInit(result_var, widths, depth + 1, init_list); 516 | } 517 | } 518 | ``` 519 | 520 | 函数 ```printInitList()``` 递归地打印出数组的初始值: 521 | 522 | ```cpp 523 | void printInitList(std::vector widths, int depth, 524 | std::vector init_list) const 525 | { 526 | if (depth >= widths.size()) 527 | { 528 | std::cout << init_list[const_list_num++]; return; 529 | } 530 | std::cout << "{"; 531 | for (int i = 0; i < widths[depth]; i++) 532 | { 533 | printInitList(widths, depth + 1, init_list); 534 | if (i != widths[depth] - 1)std::cout << ", "; 535 | } 536 | std::cout << "}"; 537 | } 538 | ``` 539 | 540 | 当数组作为左值的时候,我们要根据其是否是函数参数来选择使用 ```getptr``` 还是 ```getelemptr```: 541 | 542 | ```cpp 543 | else if (type == SimpleStmtType::list) 544 | { 545 | std::variant value = look_up_symbol_tables(lval); 546 | assert(value.index() == 1); 547 | std::string result_var, name, prev = std::get(value); 548 | assert(list_dim[prev] == exp_list.size()); 549 | for (auto&& exp : exp_list) 550 | { 551 | result_var = exp->dumpIR(); 552 | name = "%" + std::to_string(symbol_num++); 553 | if (is_func_param[prev]) 554 | { 555 | std::cout << '\t' << name << " = load " << prev << 556 | std::endl; 557 | std::string tmp = "%" + std::to_string(symbol_num++); 558 | std::cout << '\t' << tmp << " = getptr " << name << ", " 559 | << result_var << std::endl; 560 | name = tmp; 561 | } 562 | else 563 | std::cout << '\t' << name << " = getelemptr " << prev << 564 | ", " << result_var << std::endl; 565 | prev = name; 566 | } 567 | result_var = block_exp->dumpIR(); 568 | std::cout << "\tstore " << result_var << ", " << prev << std::endl; 569 | } 570 | ``` 571 | 572 | 当数组作为右值的时候,我们也要根据其是否是函数参数、是指针还是整数(例如,对于数组 ```arr[2][2]```,```arr[1]``` 是指针,```arr[1][1]``` 是整数),选择适当的指令: 573 | 574 | ```cpp 575 | else if (type == PrimaryExpType::list) 576 | { 577 | std::variant value = look_up_symbol_tables(lval); 578 | assert(value.index() == 1); 579 | std::string name, prev = std::get(value); 580 | int dim = list_dim[prev]; 581 | bool list = is_list[prev], func_param = is_func_param[prev]; 582 | for (auto&& exp : exp_list) 583 | { 584 | result_var = exp->dumpIR(); 585 | name = "%" + std::to_string(symbol_num++); 586 | if (is_func_param[prev]) 587 | { 588 | std::cout << '\t' << name << " = load " << prev << 589 | std::endl; 590 | std::string tmp = "%" + std::to_string(symbol_num++); 591 | std::cout << '\t' << tmp << " = getptr " << name << 592 | ", " << result_var << std::endl; 593 | name = tmp; 594 | } 595 | else 596 | std::cout << '\t' << name << " = getelemptr " << prev << 597 | ", " << result_var << std::endl; 598 | prev = name; 599 | } 600 | if (exp_list.size() == dim) 601 | { 602 | result_var = "%" + std::to_string(symbol_num++); 603 | std::cout << '\t' << result_var << " = load " << prev << 604 | std::endl; 605 | } 606 | else if (list || func_param) 607 | { 608 | result_var = "%" + std::to_string(symbol_num++); 609 | std::cout << '\t' << result_var << " = getelemptr " << prev << 610 | ", 0" << std::endl; 611 | } 612 | else result_var = name; 613 | } 614 | ``` 615 | 616 | 此外需要注意的是,数组名字也是可以被作为参数被单独传入函数的(例如 ```call f(arr)```),因此对 ```lval``` 的处理也要适当修改,判断其是否是数组(维护一个 ```is_list[]```): 617 | 618 | ```cpp 619 | else if (type == PrimaryExpType::lval) 620 | { 621 | std::variant value = look_up_symbol_tables(lval); 622 | if (value.index() == 0) 623 | result_var = std::to_string(std::get(value)); 624 | else if (is_list[std::get(value)]) 625 | { 626 | result_var = "%" + std::to_string(symbol_num++); 627 | std::cout << '\t' << result_var << " = getelemptr " << 628 | std::get(value) << ", 0" << std::endl; 629 | } 630 | else 631 | { 632 | result_var = "%" + std::to_string(symbol_num++); 633 | std::cout << '\t' << result_var << " = load " << 634 | std::get(value) << std::endl; 635 | } 636 | } 637 | ``` 638 | 639 | 目标代码生成部分,我们首先实现 ```cal_size()``` 函数来计算数组类型的大小,以便计算函数所需要的栈空间(注意数组作为函数参数的话只需要 4 字节的空间就够了,毕竟传进来的只是个指针): 640 | 641 | ```cpp 642 | int cal_size(const koopa_raw_type_t &ty) 643 | { 644 | assert(ty->tag != KOOPA_RTT_UNIT); 645 | if (ty->tag == KOOPA_RTT_ARRAY) 646 | { 647 | int prev = cal_size(ty->data.array.base); 648 | int len = ty->data.array.len; 649 | return len * prev; 650 | } 651 | return 4; 652 | } 653 | ``` 654 | 655 | 然后我们分别实现对 ```getelemptr``` 和 ```getptr``` 的处理: 656 | 657 | ```cpp 658 | Reg Visit(const koopa_raw_get_elem_ptr_t &get_elem_ptr) 659 | { 660 | if (get_elem_ptr.src->kind.tag == KOOPA_RVT_GLOBAL_ALLOC) 661 | { 662 | assert(global_values.count(get_elem_ptr.src)); 663 | struct Reg result_var = {find_reg(2), -1}; 664 | koopa_raw_type_t arr = get_elem_ptr.src->ty->data.pointer.base; 665 | int total_size = cal_size(arr), len = arr->data.array.len; 666 | assert(total_size % len == 0); 667 | int elem_size = total_size / len; 668 | struct Reg ind_var = Visit(get_elem_ptr.index); 669 | int ind_reg = ind_var.reg_name; 670 | reg_stats[result_var.reg_name] = 1; 671 | std::cout << "\tla " << reg_names[result_var.reg_name] << ", " << 672 | global_values[get_elem_ptr.src] << std::endl; 673 | std::cout << "\tli s11, " << elem_size << std::endl; 674 | std::cout << "\tmul s11, s11, " << reg_names[ind_reg] << std::endl; 675 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 676 | reg_names[result_var.reg_name] << ", s11" << std::endl; 677 | return result_var; 678 | } 679 | struct Reg src_var = value_map[get_elem_ptr.src]; 680 | koopa_raw_type_t arr = get_elem_ptr.src->ty->data.pointer.base; 681 | struct Reg result_var = {find_reg(2), -1}; 682 | int src_reg, src_old_stat; 683 | if (get_elem_ptr.src->name && get_elem_ptr.src->name[0] == '@') 684 | { 685 | int offset = src_var.reg_offset; 686 | assert(offset >= 0); // variables have positive offset 687 | if (offset >= -2048 && offset <= 2047) 688 | std::cout << "\taddi " << reg_names[result_var.reg_name] << 689 | ", sp, " << offset << std::endl; 690 | else 691 | { 692 | std::cout << "\tli s11, " << offset << std::endl; 693 | std::cout << "\tadd " << reg_names[result_var.reg_name] << 694 | ", sp, s11" << std::endl; 695 | } 696 | } 697 | else 698 | { 699 | src_var = Visit(get_elem_ptr.src); 700 | src_reg = src_var.reg_name; 701 | assert(src_reg >= 0); 702 | src_old_stat = reg_stats[src_reg]; 703 | reg_stats[src_reg] = 2; 704 | } 705 | int total_size = cal_size(arr), len = arr->data.array.len; 706 | assert(total_size % len == 0); 707 | int elem_size = total_size / len; 708 | struct Reg ind_var = Visit(get_elem_ptr.index), tmp_var; 709 | if (elem_size != 0 && ind_var.reg_name != 15) 710 | { 711 | int ind_reg = ind_var.reg_name; 712 | int ind_old_stat = reg_stats[ind_reg]; 713 | reg_stats[ind_reg] = 2; 714 | tmp_var = {find_reg(0), -1}; 715 | reg_stats[ind_reg] = ind_old_stat; 716 | std::cout << "\tli " << reg_names[tmp_var.reg_name] << ", " << 717 | elem_size << std::endl; 718 | std::cout << "\tmul " << reg_names[tmp_var.reg_name] << ", " << 719 | reg_names[tmp_var.reg_name] << ", " << reg_names[ind_reg] << 720 | std::endl; 721 | } 722 | else tmp_var = {15, -1}; 723 | reg_stats[result_var.reg_name] = 1; 724 | if (get_elem_ptr.src->name && get_elem_ptr.src->name[0] == '@') 725 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 726 | reg_names[result_var.reg_name] << ", " << 727 | reg_names[tmp_var.reg_name] << std::endl; 728 | else 729 | { 730 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 731 | reg_names[src_reg] << ", " << reg_names[tmp_var.reg_name] 732 | << std::endl; 733 | reg_stats[src_reg] = src_old_stat; 734 | } 735 | return result_var; 736 | } 737 | 738 | 739 | Reg Visit(const koopa_raw_get_ptr_t &get_ptr) 740 | { 741 | struct Reg src_var = value_map[get_ptr.src]; 742 | koopa_raw_type_t arr = get_ptr.src->ty->data.pointer.base; 743 | struct Reg result_var = {find_reg(2), -1}; 744 | int elem_size = cal_size(arr); 745 | struct Reg ind_var = Visit(get_ptr.index), tmp_var; 746 | if (elem_size != 0 && ind_var.reg_name != 15) 747 | { 748 | int ind_reg = ind_var.reg_name; 749 | int ind_old_stat = reg_stats[ind_reg]; 750 | reg_stats[ind_reg] = 2; 751 | tmp_var = {find_reg(0), -1}; 752 | reg_stats[ind_reg] = ind_old_stat; 753 | std::cout << "\tli " << reg_names[tmp_var.reg_name] << ", " << 754 | elem_size << std::endl; 755 | std::cout << "\tmul " << reg_names[tmp_var.reg_name] << ", " << 756 | reg_names[tmp_var.reg_name] << ", " << reg_names[ind_reg] << 757 | std::endl; 758 | } 759 | else tmp_var = {15, -1}; 760 | reg_stats[result_var.reg_name] = 1; 761 | std::cout << "\tadd " << reg_names[result_var.reg_name] << ", " << 762 | reg_names[src_var.reg_name] << ", " << 763 | reg_names[tmp_var.reg_name] << std::endl; 764 | return result_var; 765 | } 766 | ``` 767 | 768 | 最后 ```load```、```store```、```global``` 的处理也要略加改动,这里以 ```load``` 为例: 769 | 770 | ```cpp 771 | else if (src->kind.tag == KOOPA_RVT_GET_ELEM_PTR || 772 | src->kind.tag == KOOPA_RVT_GET_PTR) 773 | { 774 | struct Reg result_var = {find_reg(2), -1}; 775 | struct Reg src_var = Visit(load.src); 776 | reg_stats[result_var.reg_name] = 1; 777 | std::cout << "\tlw " << reg_names[result_var.reg_name] << ", (" << 778 | reg_names[src_var.reg_name] << ")" << std::endl; 779 | return result_var; 780 | } 781 | ``` 782 | 783 | ### 3.2 工具软件介绍 784 | 785 | 1. Flex/Bison: 进行词法分析和语法分析; 786 | 2. LibKoopa: 用于生成 Koopa IR 中间代码的结构,以便 RISC-V 目标代码的生成。 787 | 3. Git/Docker: 版本控制和运行环境。 788 | 789 | ### 3.3 测试情况说明 790 | 791 | 在课程文档中,对 SysY 和 Koopa IR 的符号名有如下规范: 792 | 793 | - 变量/常量的名字可以是 `main`; 794 | 795 | - SysY 程序声明的函数名不能和 SysY 库函数名相同; 796 | 797 | - 局部变量名可以和函数名相同; 798 | 799 | - 全局变量和局部变量的作用域可以重叠,局部变量会覆盖同名全局变量; 800 | 801 | - SysY 标准中未指定函数形式参数应该被放入何种作用域; 802 | 803 | - 在 Koopa IR 中,全局的符号不能和其他全局符号同名,局部的符号(位于函数内部的符号)不能和其他全局符号以及局部符号同名。上述规则对具名符号和临时符号都适用。 804 | 805 | 不过测试用例中似乎没有怎么测试这些规范。我在写代码时为了遵守这些规范,对中间代码中的变量名做了一些处理,以确保不会有重名的符号。另外一个容易踩的坑是,有的同学在实现 ```if```、```while``` 和逻辑表达式时,可能会用 ```then```、```end``` 之类的名字作为跳转的 label,而这些 label 也是有可能和变量重名的。 806 | 807 | 为了避免这些情况,一个可行的测试用例如下: 808 | 809 | ```c 810 | int x = 3; 811 | 812 | int f(int then) 813 | { 814 | return x + then; 815 | } 816 | 817 | int main() 818 | { 819 | int f = 2, x = 5; 820 | if (x > 3)f = 3; 821 | const int main[2] = {1, 2}; 822 | return f(main[1] + x + f); 823 | } 824 | ``` 825 | 826 | 上述测试用例检测了局部变量和全局变量重名、局部变量和函数重名、常量/变量和 ```main``` 重名,以及变量和 label 重名的情况。值得一提的是在 perf 的某个测试用例中用到了 ```end``` 作为变量名,而我一开始并没有处理变量和 label 重名的情况,也用 ```end``` 作为 ```if``` 的跳转 label,导致目标代码编译不通过。 827 | 828 | ## 四、实习总结 829 | 830 | ### 4.1 收获和体会 831 | 832 | 体会到了写编译器的不容易。能踩的坑实在太多了!而且非常不方便 debug,毕竟生成出来的中间代码和目标代码都没有办法像我们在 IDE 中那样一步步运行,还能打印出某个时刻变量的值。因此设计好测试用例(以及设计更多的测试用例)是非常重要的,否则就会面对一长段 wrong answer 的代码而无从下手。 833 | 834 | ### 4.2 学习过程中的难点,以及对实习过程和内容的建议 835 | 836 | 我在数组参数那一部分遇到了很大的困难,被 Koopa IR 的各种类型搞得很晕。我觉得这部分的文档是有改进空间的,文档中只给出了两个对数组完全解引用到 ```i32``` 的例子,但对于数组部分解引用、数组参数部分解引用、以及数组用作函数参数、甚至对数组参数部分解引用然后再用作函数参数,都要自己纠结很长时间才能搞明白。个人认为文档中至少是可以给出一个数组部分解引用(即不是一直解引用到 ```i32```)的例子的。另外文档中也可以举例说明一下各种 Koopa 指令对数组类型的影响,例如 ```*[[i32, 3], 4]``` 被 ```getptr``` 作用后的类型为 ```*[[i32, 3], 4]```、被 ```getelemptr``` 作用后的类型为 ```*[i32, 3]```,```alloc``` 会使类型增加一个 ```*``` 号、```load``` 会使类型减少一个 ```*``` 号等。 837 | 838 | ### 4.3 对老师讲解内容与方式的建议 839 | 840 | 出于显而易见的原因,没有多少人会在编译 lab 中实现教材中提到的那些中间代码优化和目标代码优化,如图着色法分配寄存器、活跃变量分析、可用表达式分析、控制流图和到达定值等。但我其实对这部分的实现是比较感兴趣的,尤其是现在流行的那些编译器都用到了怎样的优化、它们是怎样实现的,我认为这些是可以考虑在实践课中介绍一下的。 841 | -------------------------------------------------------------------------------- /src/AST.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | static int symbol_num = 0; 12 | static int if_else_num = 0; 13 | static int while_num = 0; 14 | static int const_list_num = 0; 15 | static int var_list_num = 0; 16 | enum class FuncFParamType { var, list }; 17 | enum class UnaryExpType { primary, unary, func_call }; 18 | enum class PrimaryExpType { exp, number, lval, list }; 19 | enum class StmtType { if_, ifelse, simple, while_ }; 20 | enum class SimpleStmtType { lval, exp, block, ret, break_, continue_, list }; 21 | enum class DeclType { const_decl, var_decl }; 22 | enum class ConstInitValType { const_exp, list }; 23 | enum class BlockItemType { decl, stmt }; 24 | enum class InitValType { exp, list }; 25 | static std::vector>> 26 | symbol_tables; 27 | static std::map var_num; 28 | static std::map is_list; 29 | static std::map is_func_param; 30 | static std::map list_dim; 31 | static std::vector while_stack; 32 | static std::map function_table; 33 | static std::map function_ret_type; 34 | static std::map function_param_num; 35 | static std::map> function_param_idents; 36 | static std::map> function_param_names; 37 | static std::map> function_param_types; 38 | static std::string present_func_type; 39 | 40 | 41 | static std::variant look_up_symbol_tables(std::string l_val) 42 | { 43 | for (auto it = symbol_tables.rbegin(); it != symbol_tables.rend(); it++) 44 | if (it->count(l_val)) 45 | return (*it)[l_val]; 46 | assert(false); 47 | return -1; 48 | } 49 | 50 | 51 | class BaseAST 52 | { 53 | public: 54 | virtual ~BaseAST() = default; 55 | virtual void dump() const = 0; 56 | virtual std::string dumpIR() const = 0; 57 | virtual int dumpExp() const { assert(false); return -1; } 58 | virtual std::string get_ident() const { assert(false); return ""; } 59 | virtual std::string get_type() const { assert(false); return ""; } 60 | virtual int get_dim() const { assert(false); return -1; } 61 | virtual std::vector dumpList(std::vector) const { exit(1); } 62 | }; 63 | 64 | 65 | class CompUnitAST : public BaseAST 66 | { 67 | public: 68 | std::vector> func_def_list; 69 | std::vector> decl_list; 70 | void dump() const override 71 | { 72 | std::cout << "CompUnitAST { "; 73 | for (auto&& func_def : func_def_list) 74 | func_def->dump(); 75 | std::cout << " } "; 76 | } 77 | std::string dumpIR() const override 78 | { 79 | std::cout << "decl @getint(): i32" << std::endl; 80 | std::cout << "decl @getch(): i32" << std::endl; 81 | std::cout << "decl @getarray(*i32): i32" << std::endl; 82 | std::cout << "decl @putint(i32)" << std::endl; 83 | std::cout << "decl @putch(i32)" << std::endl; 84 | std::cout << "decl @putarray(i32, *i32)" << std::endl; 85 | std::cout << "decl @starttime()" << std::endl; 86 | std::cout << "decl @stoptime()" << std::endl << std::endl; 87 | function_table["getint"] = "@getint"; 88 | function_table["getch"] = "@getch"; 89 | function_table["getarray"] = "@getarray"; 90 | function_table["putint"] = "@putint"; 91 | function_table["putch"] = "@putch"; 92 | function_table["putarray"] = "@putarray"; 93 | function_table["starttime"] = "@starttime"; 94 | function_table["stoptime"] = "@stoptime"; 95 | function_ret_type["getint"] = "int"; 96 | function_ret_type["getch"] = "int"; 97 | function_ret_type["getarray"] = "int"; 98 | function_ret_type["putint"] = "void"; 99 | function_ret_type["putch"] = "void"; 100 | function_ret_type["putarray"] = "void"; 101 | function_ret_type["starttime"] = "void"; 102 | function_ret_type["stoptime"] = "void"; 103 | function_param_num["getint"] = 0; 104 | function_param_num["getch"] = 0; 105 | function_param_num["getarray"] = 1; 106 | function_param_num["putint"] = 1; 107 | function_param_num["putch"] = 1; 108 | function_param_num["putarray"] = 2; 109 | function_param_num["starttime"] = 0; 110 | function_param_num["stoptime"] = 0; 111 | std::map> global_syms; 112 | symbol_tables.push_back(global_syms); 113 | for (auto&& decl : decl_list)decl->dumpExp(); 114 | std::cout << std::endl; 115 | for (auto&& func_def : func_def_list)func_def->dumpIR(); 116 | symbol_tables.pop_back(); 117 | return ""; 118 | } 119 | }; 120 | 121 | 122 | class FuncFParamAST : public BaseAST 123 | { 124 | public: 125 | FuncFParamType type; 126 | std::string b_type; 127 | std::string ident; 128 | std::vector> const_exp_list; 129 | void dump() const override { std::cout << get_type() << " " << ident; } 130 | std::string dumpIR() const override 131 | { 132 | assert(b_type == "int"); 133 | std::string param_name = "@" + ident; 134 | std::string name = param_name + "_" + 135 | std::to_string(var_num[param_name]++); 136 | std::cout << name; 137 | return name; 138 | } 139 | std::string get_ident() const override { return ident; } 140 | std::string dumpListType(std::vector widths) const 141 | { 142 | if (widths.size() == 0)return "i32"; 143 | std::vector rec = std::vector(widths.begin() + 1, 144 | widths.end()); 145 | return "[" + dumpListType(rec) + ", " + std::to_string(widths.front()) 146 | + "]"; 147 | } 148 | std::string get_type() const override 149 | { 150 | if (type == FuncFParamType::var)return "i32"; 151 | std::vector widths; 152 | for (auto&& const_exp : const_exp_list) 153 | widths.push_back(std::stoi(const_exp->dumpIR())); 154 | return "*" + dumpListType(widths); 155 | } 156 | int get_dim() const override { return const_exp_list.size() + 1; } 157 | }; 158 | 159 | 160 | class FuncDefAST : public BaseAST 161 | { 162 | public: 163 | std::string func_type; 164 | std::string ident; 165 | std::vector> params; 166 | std::unique_ptr block; 167 | void dump() const override 168 | { 169 | std::cout << "FuncDefAST { " << func_type << ", " << ident << ", "; 170 | for (int i = 0; i < params.size(); i++) 171 | { 172 | params[i]->dump(); 173 | if (i != params.size() - 1)std::cout << ", "; 174 | } 175 | block->dump(); 176 | std::cout << " } "; 177 | } 178 | std::string dumpIR() const override 179 | { 180 | std::string name = "@" + ident; 181 | assert(!symbol_tables[0].count(ident)); 182 | assert(!function_table.count(ident)); 183 | function_table[ident] = name; 184 | function_ret_type[ident] = func_type; 185 | function_param_num[ident] = params.size(); 186 | present_func_type = func_type; 187 | std::vector idents, names, types; 188 | std::cout << "fun " << name << "("; 189 | for (int i = 0; i < params.size(); i++) 190 | { 191 | idents.push_back(params[i]->get_ident()); 192 | names.push_back(params[i]->dumpIR()); 193 | types.push_back(params[i]->get_type()); 194 | if (types.back() != "i32") 195 | { 196 | std::string tmp = names.back(); tmp[0] = '%'; 197 | list_dim[tmp] = params[i]->get_dim(); 198 | } 199 | std::cout << ": " << params[i]->get_type(); 200 | if (i != params.size() - 1)std::cout << ", "; 201 | } 202 | function_param_idents[ident] = move(idents); 203 | function_param_names[ident] = move(names); 204 | function_param_types[ident] = move(types); 205 | std::cout << ")"; 206 | if (func_type == "int")std::cout << ": i32"; 207 | else if (func_type != "void")assert(false); 208 | std::cout << " {" << std::endl << "\%entry_" << ident << ":" << 209 | std::endl; 210 | std::string block_type = block->dumpIR(); 211 | if (block_type != "ret") 212 | { 213 | if (func_type == "int")std::cout << "\tret 0" << std::endl; 214 | else if (func_type == "void")std::cout << "\tret" << std::endl; 215 | else assert(false); 216 | } 217 | std::cout << "}" << std::endl << std::endl; 218 | return block_type; 219 | } 220 | }; 221 | 222 | 223 | class BlockAST : public BaseAST 224 | { 225 | public: 226 | std::vector> block_item_list; 227 | std::string func = ""; 228 | void dump() const override 229 | { 230 | std::cout << "BlockAST { "; 231 | for (auto&& block_item : block_item_list)block_item->dump(); 232 | std::cout << " } "; 233 | } 234 | std::string dumpIR() const override 235 | { 236 | std::string block_type = ""; 237 | std::map> symbol_table; 238 | if (func != "") 239 | { 240 | std::vector idents = function_param_idents[func]; 241 | std::vector names = function_param_names[func]; 242 | std::vector types = function_param_types[func]; 243 | for (int i = 0; i < names.size(); i++) 244 | { 245 | std::string ident = idents[i]; 246 | std::string name = names[i]; name[0] = '%'; 247 | symbol_table[ident] = name; 248 | is_func_param[name] = 1; 249 | std::cout << '\t' << name << " = alloc "; 250 | std::cout << types[i] << std::endl; 251 | std::cout << "\tstore " << names[i] << ", " << name << 252 | std::endl; 253 | } 254 | } 255 | symbol_tables.push_back(symbol_table); 256 | for (auto&& block_item : block_item_list) 257 | { 258 | block_type = block_item->dumpIR(); 259 | if (block_type == "ret" || block_type == "break" || 260 | block_type == "cont")break; 261 | } 262 | symbol_tables.pop_back(); 263 | return block_type; 264 | } 265 | }; 266 | 267 | 268 | class StmtAST : public BaseAST 269 | { 270 | public: 271 | StmtType type; 272 | std::unique_ptr exp_simple; 273 | std::unique_ptr if_stmt; 274 | std::unique_ptr else_stmt; 275 | std::unique_ptr while_stmt; 276 | void dump() const override 277 | { 278 | if (type == StmtType::simple) 279 | { 280 | std::cout << "StmtAST { "; 281 | exp_simple->dump(); 282 | std::cout << " } "; 283 | } 284 | else if (type == StmtType::if_) 285 | { 286 | std::cout << "IF { "; 287 | exp_simple->dump(); 288 | std::cout << " } THEN { "; 289 | if_stmt->dump(); 290 | std::cout << " } "; 291 | } 292 | else if (type == StmtType::ifelse) 293 | { 294 | std::cout << "IF { "; 295 | exp_simple->dump(); 296 | std::cout << " } THEN { "; 297 | if_stmt->dump(); 298 | std::cout << " } ELSE { "; 299 | else_stmt->dump(); 300 | std::cout << " } "; 301 | } 302 | else if (type == StmtType::while_) 303 | { 304 | std::cout << "WHILE { "; 305 | exp_simple->dump(); 306 | std::cout << " } DO { "; 307 | while_stmt->dump(); 308 | std::cout << " } "; 309 | } 310 | else assert(false); 311 | } 312 | std::string dumpIR() const override 313 | { 314 | if (type == StmtType::simple)return exp_simple->dumpIR(); 315 | else if (type == StmtType::if_) 316 | { 317 | std::string if_result = exp_simple->dumpIR(); 318 | std::string then_label = "\%then__" + std::to_string(if_else_num); 319 | std::string end_label = "\%end__" + std::to_string(if_else_num++); 320 | std::cout << "\tbr " << if_result << ", " << then_label << ", " << 321 | end_label << std::endl; 322 | std::cout << then_label << ":" << std::endl; 323 | std::string if_stmt_type = if_stmt->dumpIR(); 324 | if (if_stmt_type != "ret" && if_stmt_type != "break" && 325 | if_stmt_type != "cont") 326 | std::cout << "\tjump " << end_label << std::endl; 327 | std::cout << end_label << ":" << std::endl; 328 | } 329 | else if (type == StmtType::ifelse) 330 | { 331 | std::string if_result = exp_simple->dumpIR(); 332 | std::string then_label = "\%then__" + std::to_string(if_else_num); 333 | std::string else_label = "\%else__" + std::to_string(if_else_num); 334 | std::string end_label = "\%end__" + std::to_string(if_else_num++); 335 | std::cout << "\tbr " << if_result << ", " << then_label << ", " << 336 | else_label << std::endl; 337 | std::cout << then_label << ":" << std::endl; 338 | std::string if_stmt_type = if_stmt->dumpIR(); 339 | if (if_stmt_type != "ret" && if_stmt_type != "break" && 340 | if_stmt_type != "cont") 341 | std::cout << "\tjump " << end_label << std::endl; 342 | std::cout << else_label << ":" << std::endl; 343 | std::string else_stmt_type = else_stmt->dumpIR(); 344 | if (else_stmt_type != "ret" && else_stmt_type != "break" && 345 | else_stmt_type != "cont") 346 | std::cout << "\tjump " << end_label << std::endl; 347 | if ((if_stmt_type == "ret" || if_stmt_type == "break" || 348 | if_stmt_type == "cont") && (else_stmt_type == "ret" || 349 | else_stmt_type == "break" || else_stmt_type == "cont")) 350 | return "ret"; 351 | else std::cout << end_label << ":" << std::endl; 352 | } 353 | else if (type == StmtType::while_) 354 | { 355 | std::string entry_label = "\%while__" + std::to_string(while_num); 356 | std::string body_label = "\%do__" + std::to_string(while_num); 357 | std::string end_label = "\%while_end__" + std::to_string(while_num); 358 | while_stack.push_back(while_num++); 359 | std::cout << "\tjump " << entry_label << std::endl; 360 | std::cout << entry_label << ":" << std::endl; 361 | std::string while_result = exp_simple->dumpIR(); 362 | std::cout << "\tbr " << while_result << ", " << body_label << ", " 363 | << end_label << std::endl; 364 | std::cout << body_label << ":" << std::endl; 365 | std::string while_stmt_type = while_stmt->dumpIR(); 366 | if (while_stmt_type != "ret" && while_stmt_type != "break" && 367 | while_stmt_type != "cont") 368 | std::cout << "\tjump " << entry_label << std::endl; 369 | std::cout << end_label << ":" << std::endl; 370 | while_stack.pop_back(); 371 | } 372 | else assert(false); 373 | return ""; 374 | } 375 | }; 376 | 377 | 378 | class SimpleStmtAST : public BaseAST 379 | { 380 | public: 381 | SimpleStmtType type; 382 | std::string lval; 383 | std::vector> exp_list; 384 | std::unique_ptr block_exp; 385 | void dump() const override 386 | { 387 | if (type == SimpleStmtType::ret) 388 | { 389 | std::cout << "RETURN { "; 390 | block_exp->dump(); 391 | std::cout << " } "; 392 | } 393 | else if (type == SimpleStmtType::lval) 394 | { 395 | std::cout << "LVAL { " << lval << " = "; 396 | block_exp->dump(); 397 | std::cout << " } "; 398 | } 399 | else if (type == SimpleStmtType::list) 400 | { 401 | std::cout << "LVAL { " << lval; 402 | for (auto&& exp : exp_list) 403 | { 404 | std::cout << '['; exp->dump(); std::cout << ']'; 405 | } 406 | std::cout << " = "; 407 | block_exp->dump(); 408 | std::cout << " } "; 409 | } 410 | else if (type == SimpleStmtType::exp) 411 | { 412 | if (block_exp != nullptr) 413 | { 414 | std::cout << "EXP { "; 415 | block_exp->dump(); 416 | std::cout << " } "; 417 | } 418 | } 419 | else if (type == SimpleStmtType::block) 420 | { 421 | std::cout << "BLOCK { "; 422 | block_exp->dump(); 423 | std::cout << " } "; 424 | } 425 | else if (type == SimpleStmtType::break_)std::cout << "BREAK "; 426 | else if (type == SimpleStmtType::continue_)std::cout << "CONTINUE "; 427 | else assert(false); 428 | } 429 | std::string dumpIR() const override 430 | { 431 | if (type == SimpleStmtType::ret) 432 | { 433 | if (block_exp == nullptr) 434 | { 435 | if (present_func_type == "int") 436 | std::cout << "\tret 0" << std::endl; 437 | else std::cout << "\tret" << std::endl; 438 | } 439 | else 440 | { 441 | std::string result_var = block_exp->dumpIR(); 442 | std::cout << "\tret " << result_var << std::endl; 443 | } 444 | return "ret"; 445 | } 446 | else if (type == SimpleStmtType::lval) 447 | { 448 | std::string result_var = block_exp->dumpIR(); 449 | std::variant value = look_up_symbol_tables(lval); 450 | assert(value.index() == 1); 451 | std::cout << "\tstore " << result_var << ", " << 452 | std::get(value) << std::endl; 453 | } 454 | else if (type == SimpleStmtType::list) 455 | { 456 | std::variant value = look_up_symbol_tables(lval); 457 | assert(value.index() == 1); 458 | std::string result_var, name, prev = std::get(value); 459 | assert(list_dim[prev] == exp_list.size()); 460 | for (auto&& exp : exp_list) 461 | { 462 | result_var = exp->dumpIR(); 463 | name = "%" + std::to_string(symbol_num++); 464 | if (is_func_param[prev]) 465 | { 466 | std::cout << '\t' << name << " = load " << prev << 467 | std::endl; 468 | std::string tmp = "%" + std::to_string(symbol_num++); 469 | std::cout << '\t' << tmp << " = getptr " << name << ", " 470 | << result_var << std::endl; 471 | name = tmp; 472 | } 473 | else 474 | std::cout << '\t' << name << " = getelemptr " << prev << 475 | ", " << result_var << std::endl; 476 | prev = name; 477 | } 478 | result_var = block_exp->dumpIR(); 479 | std::cout << "\tstore " << result_var << ", " << prev << std::endl; 480 | } 481 | else if (type == SimpleStmtType::exp) 482 | { 483 | if (block_exp != nullptr)block_exp->dumpIR(); 484 | } 485 | else if (type == SimpleStmtType::block)return block_exp->dumpIR(); 486 | else if (type == SimpleStmtType::break_) 487 | { 488 | assert(!while_stack.empty()); 489 | int while_no = while_stack.back(); 490 | std::string end_label = "\%while_end__" + std::to_string(while_no); 491 | std::cout << "\tjump " << end_label << std::endl; 492 | return "break"; 493 | } 494 | else if (type == SimpleStmtType::continue_) 495 | { 496 | assert(!while_stack.empty()); 497 | int while_no = while_stack.back(); 498 | std::string entry_label = "\%while__" + std::to_string(while_no); 499 | std::cout << "\tjump " << entry_label << std::endl; 500 | return "cont"; 501 | } 502 | else assert(false); 503 | return ""; 504 | } 505 | }; 506 | 507 | 508 | class ExpAST : public BaseAST 509 | { 510 | public: 511 | std::unique_ptr l_or_exp; 512 | void dump() const override 513 | { 514 | std::cout << "ExpAST { "; 515 | l_or_exp->dump(); 516 | std::cout << " } "; 517 | } 518 | std::string dumpIR() const override 519 | { 520 | return l_or_exp->dumpIR(); 521 | } 522 | virtual int dumpExp() const override 523 | { 524 | return l_or_exp->dumpExp(); 525 | } 526 | }; 527 | 528 | 529 | class LOrExpAST : public BaseAST 530 | { 531 | public: 532 | std::string op; 533 | std::unique_ptr l_and_exp; 534 | std::unique_ptr l_or_exp; 535 | void dump() const override 536 | { 537 | if (op == "")l_and_exp->dump(); 538 | else 539 | { 540 | l_or_exp->dump(); 541 | std::cout << op; 542 | l_and_exp->dump(); 543 | } 544 | } 545 | std::string dumpIR() const override 546 | { 547 | std::string result_var = ""; 548 | if (op == "")result_var = l_and_exp->dumpIR(); 549 | else if (op == "||") 550 | { 551 | std::string left_result = l_or_exp->dumpIR(); 552 | std::string then_label = "\%then__" + std::to_string(if_else_num); 553 | std::string else_label = "\%else__" + std::to_string(if_else_num); 554 | std::string end_label = "\%end__" + std::to_string(if_else_num++); 555 | std::string result_var_ptr = "%" + std::to_string(symbol_num++); 556 | std::cout << '\t' << result_var_ptr << " = alloc i32" << std::endl; 557 | std::cout << "\tbr " << left_result << ", " << then_label << ", " 558 | << else_label << std::endl; 559 | std::cout << then_label << ":" << std::endl; 560 | std::cout << "\tstore 1, " << result_var_ptr << std::endl; 561 | std::cout << "\tjump " << end_label << std::endl; 562 | std::cout << else_label << ":" << std::endl; 563 | std::string tmp_result_var = "%" + std::to_string(symbol_num++); 564 | std::string right_result = l_and_exp->dumpIR(); 565 | std::cout << '\t' << tmp_result_var << " = ne " << right_result 566 | << ", 0" << std::endl; 567 | std::cout << "\tstore " << tmp_result_var << ", " << result_var_ptr 568 | << std::endl; 569 | std::cout << "\tjump " << end_label << std::endl; 570 | std::cout << end_label << ":" << std::endl; 571 | result_var = "%" + std::to_string(symbol_num++); 572 | std::cout << '\t' << result_var << " = load " << result_var_ptr 573 | << std::endl; 574 | } 575 | else assert(false); 576 | return result_var; 577 | } 578 | virtual int dumpExp() const override 579 | { 580 | int result = 1; 581 | if (op == "")result = l_and_exp->dumpExp(); 582 | else if (op == "||") 583 | { 584 | int left_result = l_or_exp->dumpExp(); 585 | if (left_result)return 1; 586 | result = l_and_exp->dumpExp() != 0; 587 | } 588 | else assert(false); 589 | return result; 590 | } 591 | }; 592 | 593 | 594 | class LAndExpAST : public BaseAST 595 | { 596 | public: 597 | std::string op; 598 | std::unique_ptr eq_exp; 599 | std::unique_ptr l_and_exp; 600 | void dump() const override 601 | { 602 | if (op == "")eq_exp->dump(); 603 | else 604 | { 605 | l_and_exp->dump(); 606 | std::cout << op; 607 | eq_exp->dump(); 608 | } 609 | } 610 | std::string dumpIR() const override 611 | { 612 | std::string result_var = ""; 613 | if (op == "")result_var = eq_exp->dumpIR(); 614 | else if (op == "&&") 615 | { 616 | std::string left_result = l_and_exp->dumpIR(); 617 | std::string then_label = "\%then__" + std::to_string(if_else_num); 618 | std::string else_label = "\%else__" + std::to_string(if_else_num); 619 | std::string end_label = "\%end__" + std::to_string(if_else_num++); 620 | std::string result_var_ptr = "%" + std::to_string(symbol_num++); 621 | std::cout << '\t' << result_var_ptr << " = alloc i32" << std::endl; 622 | std::cout << "\tbr " << left_result << ", " << then_label << ", " 623 | << else_label << std::endl; 624 | std::cout << then_label << ":" << std::endl; 625 | std::string tmp_result_var = "%" + std::to_string(symbol_num++); 626 | std::string right_result = eq_exp->dumpIR(); 627 | std::cout << '\t' << tmp_result_var << " = ne " << right_result 628 | << ", 0" << std::endl; 629 | std::cout << "\tstore " << tmp_result_var << ", " << result_var_ptr 630 | << std::endl; 631 | std::cout << "\tjump " << end_label << std::endl; 632 | std::cout << else_label << ":" << std::endl; 633 | std::cout << "\tstore 0, " << result_var_ptr << std::endl; 634 | std::cout << "\tjump " << end_label << std::endl; 635 | std::cout << end_label << ":" << std::endl; 636 | result_var = "%" + std::to_string(symbol_num++); 637 | std::cout << '\t' << result_var << " = load " << result_var_ptr 638 | << std::endl; 639 | } 640 | else assert(false); 641 | return result_var; 642 | } 643 | virtual int dumpExp() const override 644 | { 645 | int result = 0; 646 | if (op == "")result = eq_exp->dumpExp(); 647 | else if (op == "&&") 648 | { 649 | int left_result = l_and_exp->dumpExp(); 650 | if (left_result == 0)return 0; 651 | result = eq_exp->dumpExp() != 0; 652 | } 653 | else assert(false); 654 | return result; 655 | } 656 | }; 657 | 658 | 659 | class EqExpAST : public BaseAST 660 | { 661 | public: 662 | std::string op; 663 | std::unique_ptr rel_exp; 664 | std::unique_ptr eq_exp; 665 | void dump() const override 666 | { 667 | if (op == "")rel_exp->dump(); 668 | else 669 | { 670 | eq_exp->dump(); 671 | std::cout << op; 672 | rel_exp->dump(); 673 | } 674 | } 675 | std::string dumpIR() const override 676 | { 677 | std::string result_var = ""; 678 | if (op == "")result_var = rel_exp->dumpIR(); 679 | else 680 | { 681 | std::string left_result = eq_exp->dumpIR(); 682 | std::string right_result = rel_exp->dumpIR(); 683 | result_var = "%" + std::to_string(symbol_num++); 684 | if (op == "==") 685 | std::cout << '\t' << result_var << " = eq " << left_result << 686 | ", " << right_result << std::endl; 687 | else if (op == "!=") 688 | std::cout << '\t' << result_var << " = ne " << left_result << 689 | ", " << right_result << std::endl; 690 | else assert(false); 691 | } 692 | return result_var; 693 | } 694 | virtual int dumpExp() const override 695 | { 696 | int result = 0; 697 | if (op == "")result = rel_exp->dumpExp(); 698 | else 699 | { 700 | int left_result = eq_exp->dumpExp(); 701 | int right_result = rel_exp->dumpExp(); 702 | if (op == "==")result = left_result == right_result; 703 | else if (op == "!=")result = left_result != right_result; 704 | else assert(false); 705 | } 706 | return result; 707 | } 708 | }; 709 | 710 | 711 | class RelExpAST : public BaseAST 712 | { 713 | public: 714 | std::string op; 715 | std::unique_ptr add_exp; 716 | std::unique_ptr rel_exp; 717 | void dump() const override 718 | { 719 | if (op == "")add_exp->dump(); 720 | else 721 | { 722 | rel_exp->dump(); 723 | std::cout << op; 724 | add_exp->dump(); 725 | } 726 | } 727 | std::string dumpIR() const override 728 | { 729 | std::string result_var = ""; 730 | if (op == "")result_var = add_exp->dumpIR(); 731 | else 732 | { 733 | std::string left_result = rel_exp->dumpIR(); 734 | std::string right_result = add_exp->dumpIR(); 735 | result_var = "%" + std::to_string(symbol_num++); 736 | if (op == "<") 737 | std::cout << '\t' << result_var << " = lt " << left_result << 738 | ", " << right_result << std::endl; 739 | else if (op == ">") 740 | std::cout << '\t' << result_var << " = gt " << left_result << 741 | ", " << right_result << std::endl; 742 | else if (op == "<=") 743 | std::cout << '\t' << result_var << " = le " << left_result << 744 | ", " << right_result << std::endl; 745 | else if (op == ">=") 746 | std::cout << '\t' << result_var << " = ge " << left_result << 747 | ", " << right_result << std::endl; 748 | else assert(false); 749 | } 750 | return result_var; 751 | } 752 | virtual int dumpExp() const override 753 | { 754 | int result = 0; 755 | if (op == "")result = add_exp->dumpExp(); 756 | else 757 | { 758 | int left_result = rel_exp->dumpExp(); 759 | int right_result = add_exp->dumpExp(); 760 | if (op == ">")result = left_result > right_result; 761 | else if (op == ">=")result = left_result >= right_result; 762 | else if (op == "<")result = left_result < right_result; 763 | else if (op == "<=")result = left_result <= right_result; 764 | else assert(false); 765 | } 766 | return result; 767 | } 768 | }; 769 | 770 | 771 | class AddExpAST : public BaseAST 772 | { 773 | public: 774 | std::string op; 775 | std::unique_ptr mul_exp; 776 | std::unique_ptr add_exp; 777 | void dump() const override 778 | { 779 | if (op == "")mul_exp->dump(); 780 | else 781 | { 782 | add_exp->dump(); 783 | std::cout << op; 784 | mul_exp->dump(); 785 | } 786 | } 787 | std::string dumpIR() const override 788 | { 789 | std::string result_var = ""; 790 | if (op == "")result_var = mul_exp->dumpIR(); 791 | else 792 | { 793 | std::string left_result = add_exp->dumpIR(); 794 | std::string right_result = mul_exp->dumpIR(); 795 | result_var = "%" + std::to_string(symbol_num++); 796 | if (op == "+") 797 | std::cout << '\t' << result_var << " = add " << left_result << 798 | ", " << right_result << std::endl; 799 | else if (op == "-") 800 | std::cout << '\t' << result_var << " = sub " << left_result << 801 | ", " << right_result << std::endl; 802 | else assert(false); 803 | } 804 | return result_var; 805 | } 806 | virtual int dumpExp() const override 807 | { 808 | int result = 0; 809 | if (op == "")result = mul_exp->dumpExp(); 810 | else 811 | { 812 | int left_result = add_exp->dumpExp(); 813 | int right_result = mul_exp->dumpExp(); 814 | if (op == "+")result = left_result + right_result; 815 | else if (op == "-")result = left_result - right_result; 816 | else assert(false); 817 | } 818 | return result; 819 | } 820 | }; 821 | 822 | 823 | class MulExpAST : public BaseAST 824 | { 825 | public: 826 | std::string op; 827 | std::unique_ptr unary_exp; 828 | std::unique_ptr mul_exp; 829 | void dump() const override 830 | { 831 | if (op == "")unary_exp->dump(); 832 | else 833 | { 834 | mul_exp->dump(); 835 | std::cout << op; 836 | unary_exp->dump(); 837 | } 838 | } 839 | std::string dumpIR() const override 840 | { 841 | std::string result_var = ""; 842 | if (op == "")result_var = unary_exp->dumpIR(); 843 | else 844 | { 845 | std::string left_result = mul_exp->dumpIR(); 846 | std::string right_result = unary_exp->dumpIR(); 847 | result_var = "%" + std::to_string(symbol_num++); 848 | if (op == "*") 849 | std::cout << '\t' << result_var << " = mul " << left_result << 850 | ", " << right_result << std::endl; 851 | else if (op == "/") 852 | std::cout << '\t' << result_var << " = div " << left_result << 853 | ", " << right_result << std::endl; 854 | else if (op == "%") 855 | std::cout << '\t' << result_var << " = mod " << left_result << 856 | ", " << right_result << std::endl; 857 | else assert(false); 858 | } 859 | return result_var; 860 | } 861 | virtual int dumpExp() const override 862 | { 863 | int result = 0; 864 | if (op == "")result = unary_exp->dumpExp(); 865 | else 866 | { 867 | int left_result = mul_exp->dumpExp(); 868 | int right_result = unary_exp->dumpExp(); 869 | if (op == "*")result = left_result * right_result; 870 | else if (op == "/")result = left_result / right_result; 871 | else if (op == "%")result = left_result % right_result; 872 | else assert(false); 873 | } 874 | return result; 875 | } 876 | }; 877 | 878 | 879 | class UnaryExpAST : public BaseAST 880 | { 881 | public: 882 | UnaryExpType type; 883 | std::string op; 884 | std::unique_ptr exp; 885 | std::string ident; 886 | std::vector> params; 887 | void dump() const override 888 | { 889 | if (type == UnaryExpType::func_call) 890 | { 891 | std::cout << ident << "("; 892 | for (int i = 0; i < params.size(); i++) 893 | { 894 | params[i]->dump(); 895 | if (i != params.size() - 1)std::cout << ", "; 896 | } 897 | return; 898 | } 899 | if (type == UnaryExpType::unary)std::cout << op; 900 | exp->dump(); 901 | } 902 | std::string dumpIR() const override 903 | { 904 | if (type == UnaryExpType::primary)return exp->dumpIR(); 905 | else if (type == UnaryExpType::unary) 906 | { 907 | std::string result_var = exp->dumpIR(); 908 | std::string next_var = "%" + std::to_string(symbol_num); 909 | if (op == "+")return result_var; 910 | else if (op == "-")std::cout << '\t' << next_var << " = sub 0, " << 911 | result_var << std::endl; 912 | else if (op == "!")std::cout << '\t' << next_var << " = eq " << 913 | result_var << ", 0" << std::endl; 914 | else assert(false); 915 | symbol_num++; 916 | return next_var; 917 | } 918 | else if (type == UnaryExpType::func_call) 919 | { 920 | std::vector param_vars; 921 | for (auto&& param : params) 922 | param_vars.push_back(param->dumpIR()); 923 | assert(function_table.count(ident)); 924 | assert(function_param_num[ident] == params.size()); 925 | std::string result_var = ""; 926 | if (function_ret_type[ident] == "int") 927 | result_var = "%" + std::to_string(symbol_num++); 928 | std::string name = function_table[ident]; 929 | std::cout << '\t'; 930 | if (function_ret_type[ident] == "int") 931 | std::cout << result_var << " = "; 932 | std::cout << "call " << name << "("; 933 | for (int i = 0; i < param_vars.size(); i++) 934 | { 935 | std::cout << param_vars[i]; 936 | if (i != param_vars.size() - 1)std::cout << ", "; 937 | } 938 | std::cout << ")" << std::endl; 939 | return result_var; 940 | } 941 | else assert(false); 942 | return ""; 943 | } 944 | virtual int dumpExp() const override 945 | { 946 | int result = 0; 947 | if (type == UnaryExpType::primary)result = exp->dumpExp(); 948 | else if (type == UnaryExpType::unary) 949 | { 950 | int tmp = exp->dumpExp(); 951 | if (op == "+")result = tmp; 952 | else if (op == "-")result = -tmp; 953 | else if (op == "!")result = !tmp; 954 | else assert(false); 955 | } 956 | else assert(false); 957 | return result; 958 | } 959 | }; 960 | 961 | 962 | class PrimaryExpAST : public BaseAST 963 | { 964 | public: 965 | PrimaryExpType type; 966 | std::unique_ptr exp; 967 | std::string lval; 968 | std::vector> exp_list; 969 | int number; 970 | void dump() const override 971 | { 972 | if (type == PrimaryExpType::exp)exp->dump(); 973 | else if (type == PrimaryExpType::number)std::cout << number; 974 | else if (type == PrimaryExpType::lval)std::cout << lval; 975 | else if (type == PrimaryExpType::list) 976 | { 977 | std::cout << lval; 978 | for (auto&& exp : exp_list) 979 | { 980 | std::cout << '['; exp->dump(); std::cout << ']'; 981 | } 982 | } 983 | else assert(false); 984 | } 985 | std::string dumpIR() const override 986 | { 987 | std::string result_var = ""; 988 | if (type == PrimaryExpType::exp)result_var = exp->dumpIR(); 989 | else if (type == PrimaryExpType::number) 990 | result_var = std::to_string(number); 991 | else if (type == PrimaryExpType::lval) 992 | { 993 | std::variant value = look_up_symbol_tables(lval); 994 | if (value.index() == 0) 995 | result_var = std::to_string(std::get(value)); 996 | else if (is_list[std::get(value)]) 997 | { 998 | result_var = "%" + std::to_string(symbol_num++); 999 | std::cout << '\t' << result_var << " = getelemptr " << 1000 | std::get(value) << ", 0" << std::endl; 1001 | } 1002 | else 1003 | { 1004 | result_var = "%" + std::to_string(symbol_num++); 1005 | std::cout << '\t' << result_var << " = load " << 1006 | std::get(value) << std::endl; 1007 | } 1008 | } 1009 | else if (type == PrimaryExpType::list) 1010 | { 1011 | std::variant value = look_up_symbol_tables(lval); 1012 | assert(value.index() == 1); 1013 | std::string name, prev = std::get(value); 1014 | int dim = list_dim[prev]; 1015 | bool list = is_list[prev], func_param = is_func_param[prev]; 1016 | for (auto&& exp : exp_list) 1017 | { 1018 | result_var = exp->dumpIR(); 1019 | name = "%" + std::to_string(symbol_num++); 1020 | if (is_func_param[prev]) 1021 | { 1022 | std::cout << '\t' << name << " = load " << prev << 1023 | std::endl; 1024 | std::string tmp = "%" + std::to_string(symbol_num++); 1025 | std::cout << '\t' << tmp << " = getptr " << name << 1026 | ", " << result_var << std::endl; 1027 | name = tmp; 1028 | } 1029 | else 1030 | std::cout << '\t' << name << " = getelemptr " << prev << 1031 | ", " << result_var << std::endl; 1032 | prev = name; 1033 | } 1034 | if (exp_list.size() == dim) 1035 | { 1036 | result_var = "%" + std::to_string(symbol_num++); 1037 | std::cout << '\t' << result_var << " = load " << prev << 1038 | std::endl; 1039 | } 1040 | else if (list || func_param) 1041 | { 1042 | result_var = "%" + std::to_string(symbol_num++); 1043 | std::cout << '\t' << result_var << " = getelemptr " << prev << 1044 | ", 0" << std::endl; 1045 | } 1046 | else result_var = name; 1047 | } 1048 | else assert(false); 1049 | return result_var; 1050 | } 1051 | virtual int dumpExp() const override 1052 | { 1053 | int result = 0; 1054 | if (type == PrimaryExpType::exp)result = exp->dumpExp(); 1055 | else if (type == PrimaryExpType::number)result = number; 1056 | else if (type == PrimaryExpType::lval) 1057 | { 1058 | std::variant value = look_up_symbol_tables(lval); 1059 | assert(value.index() == 0); 1060 | result = std::get(value); 1061 | } 1062 | else assert(false); 1063 | return result; 1064 | } 1065 | }; 1066 | 1067 | 1068 | class DeclAST : public BaseAST 1069 | { 1070 | public: 1071 | DeclType type; 1072 | std::unique_ptr decl; 1073 | void dump() const override { decl->dump(); } 1074 | std::string dumpIR() const override { return decl->dumpIR(); } 1075 | int dumpExp() const override { return decl->dumpExp(); } 1076 | }; 1077 | 1078 | 1079 | class ConstDeclAST : public BaseAST 1080 | { 1081 | public: 1082 | std::string b_type; 1083 | std::vector> const_def_list; 1084 | void dump() const override 1085 | { 1086 | assert(b_type == "int"); 1087 | for (auto&& const_def : const_def_list)const_def->dump(); 1088 | } 1089 | std::string dumpIR() const override 1090 | { 1091 | assert(b_type == "int"); 1092 | for (auto&& const_def : const_def_list)const_def->dumpIR(); 1093 | return ""; 1094 | } 1095 | int dumpExp() const override 1096 | { 1097 | assert(b_type == "int"); 1098 | for (auto&& const_def : const_def_list)const_def->dumpExp(); 1099 | return 0; 1100 | } 1101 | }; 1102 | 1103 | 1104 | class ConstDefAST : public BaseAST 1105 | { 1106 | public: 1107 | std::string ident; 1108 | std::vector> const_exp_list; 1109 | std::unique_ptr const_init_val; 1110 | void dump() const override 1111 | { 1112 | std::cout << "ConstDefAST{" << ident << "="; 1113 | const_init_val->dump(); 1114 | std::cout << "} "; 1115 | } 1116 | void dumpListType(std::vector widths) const 1117 | { 1118 | if (widths.size() == 1) 1119 | { 1120 | std::cout << "[i32, " << widths[0] << "]"; return; 1121 | } 1122 | std::vector rec = std::vector(widths.begin() + 1, 1123 | widths.end()); 1124 | std::cout << "["; dumpListType(rec); 1125 | std::cout << ", " << widths.front() << "]"; 1126 | } 1127 | void dumpListInit(std::string prev, std::vector widths, int depth, 1128 | std::vector init_list) const 1129 | { 1130 | if (depth >= widths.size()) 1131 | { 1132 | std::cout << "\tstore " << init_list[const_list_num++] << ", " << 1133 | prev << std::endl; 1134 | return; 1135 | } 1136 | for (int i = 0; i < widths[depth]; i++) 1137 | { 1138 | std::string result_var = "%" + std::to_string(symbol_num++); 1139 | std::cout << '\t' << result_var << " = getelemptr " << prev << ", " 1140 | << i << std::endl; 1141 | dumpListInit(result_var, widths, depth + 1, init_list); 1142 | } 1143 | } 1144 | void printInitList(std::vector widths, int depth, 1145 | std::vector init_list) const 1146 | { 1147 | if (depth >= widths.size()) 1148 | { 1149 | std::cout << init_list[const_list_num++]; return; 1150 | } 1151 | std::cout << "{"; 1152 | for (int i = 0; i < widths[depth]; i++) 1153 | { 1154 | printInitList(widths, depth + 1, init_list); 1155 | if (i != widths[depth] - 1)std::cout << ", "; 1156 | } 1157 | std::cout << "}"; 1158 | } 1159 | std::string dumpIR() const override 1160 | { 1161 | if (const_exp_list.empty()) 1162 | symbol_tables.back()[ident] = std::stoi(const_init_val->dumpIR()); 1163 | else 1164 | { 1165 | std::vector widths, init_list; 1166 | for (auto&& const_exp : const_exp_list) 1167 | widths.push_back(std::stoi(const_exp->dumpIR())); 1168 | const_list_num = 0; 1169 | init_list = const_init_val->dumpList(widths); 1170 | std::string var_name = "@" + ident; 1171 | std::string name = var_name + "_" + 1172 | std::to_string(var_num[var_name]++); 1173 | symbol_tables.back()[ident] = name; 1174 | is_list[name] = 1; 1175 | list_dim[name] = widths.size(); 1176 | std::cout << '\t' << name << " = alloc "; 1177 | dumpListType(widths); 1178 | std::cout << std::endl; 1179 | const_list_num = 0; 1180 | for (int i = 0; i < widths[0]; i++) 1181 | { 1182 | std::string result_var = "%" + std::to_string(symbol_num++); 1183 | std::cout << '\t' << result_var << " = getelemptr " << name << 1184 | ", " << i << std::endl; 1185 | dumpListInit(result_var, widths, 1, init_list); 1186 | } 1187 | } 1188 | return ""; 1189 | } 1190 | int dumpExp() const override 1191 | { 1192 | if (const_exp_list.empty()) 1193 | symbol_tables.back()[ident] = std::stoi(const_init_val->dumpIR()); 1194 | else 1195 | { 1196 | std::vector widths, init_list; 1197 | for (auto&& const_exp : const_exp_list) 1198 | widths.push_back(std::stoi(const_exp->dumpIR())); 1199 | const_list_num = 0; 1200 | init_list = const_init_val->dumpList(widths); 1201 | std::string var_name = "@" + ident; 1202 | std::string name = var_name + "_" + 1203 | std::to_string(var_num[var_name]++); 1204 | symbol_tables.back()[ident] = name; 1205 | is_list[name] = 1; 1206 | list_dim[name] = widths.size(); 1207 | std::cout << "global " << name << " = alloc "; 1208 | dumpListType(widths); 1209 | std::cout << ", "; 1210 | const_list_num = 0; 1211 | printInitList(widths, 0, init_list); 1212 | std::cout << std::endl; 1213 | } 1214 | return 0; 1215 | } 1216 | }; 1217 | 1218 | 1219 | class ConstInitValAST : public BaseAST 1220 | { 1221 | public: 1222 | ConstInitValType type; 1223 | std::unique_ptr const_exp; 1224 | std::vector> const_init_val_list; 1225 | void dump() const override 1226 | { 1227 | if (type == ConstInitValType::const_exp) 1228 | std::cout << const_exp->dumpExp(); 1229 | else if (type == ConstInitValType::list) 1230 | { 1231 | std::cout << "{"; 1232 | for (int i = 0; i < const_init_val_list.size(); i++) 1233 | { 1234 | const_init_val_list[i]->dump(); 1235 | if (i != const_init_val_list.size() - 1)std::cout << ","; 1236 | } 1237 | std::cout << "} "; 1238 | } 1239 | else assert(false); 1240 | } 1241 | std::string dumpIR() const override 1242 | { 1243 | assert(type == ConstInitValType::const_exp); 1244 | return std::to_string(const_exp->dumpExp()); 1245 | } 1246 | std::vector dumpList(std::vector widths) const override 1247 | { 1248 | std::vector ret; 1249 | if (widths.size() == 1) 1250 | { 1251 | for (auto&& const_init_val : const_init_val_list) 1252 | { 1253 | assert(const_init_val->get_ident() == "const_exp"); 1254 | ret.push_back(std::stoi(const_init_val->dumpIR())); 1255 | const_list_num++; 1256 | } 1257 | int num_zeros = widths[0] - ret.size(); 1258 | for (int i = 0; i < num_zeros; i++) 1259 | { 1260 | ret.push_back(0); const_list_num++; 1261 | } 1262 | return ret; 1263 | } 1264 | std::vector products = widths; 1265 | for (int i = products.size() - 2; i >= 0; i--) 1266 | products[i] *= products[i + 1]; 1267 | int total_size = products[0]; 1268 | for (auto&& const_init_val : const_init_val_list) 1269 | if (const_init_val->get_ident() == "const_exp") 1270 | { 1271 | ret.push_back(std::stoi(const_init_val->dumpIR())); 1272 | const_list_num++; continue; 1273 | } 1274 | else if (const_init_val->get_ident() == "list") 1275 | { 1276 | int init_num; 1277 | for (init_num = 1; init_num < widths.size(); init_num++) 1278 | if (const_list_num % products[init_num] == 0)break; 1279 | else if (init_num == widths.size() - 1)assert(false); 1280 | std::vector rec = std::vector(widths.begin() + 1281 | init_num, widths.end()); 1282 | std::vector tmp = const_init_val->dumpList(rec); 1283 | ret.insert(ret.end(), tmp.begin(), tmp.end()); 1284 | } 1285 | else assert(false); 1286 | int num_zeros = total_size - ret.size(); 1287 | for (int i = 0; i < num_zeros; i++) 1288 | { 1289 | ret.push_back(0); const_list_num++; 1290 | } 1291 | return ret; 1292 | } 1293 | std::string get_ident() const override 1294 | { 1295 | if (type == ConstInitValType::const_exp)return "const_exp"; 1296 | else if (type == ConstInitValType::list)return "list"; 1297 | else assert(false); 1298 | } 1299 | }; 1300 | 1301 | 1302 | class BlockItemAST : public BaseAST 1303 | { 1304 | public: 1305 | BlockItemType type; 1306 | std::unique_ptr content; 1307 | void dump() const override { content->dump(); } 1308 | std::string dumpIR() const override { return content->dumpIR(); } 1309 | }; 1310 | 1311 | 1312 | class ConstExpAST : public BaseAST 1313 | { 1314 | public: 1315 | std::unique_ptr exp; 1316 | void dump() const override { std::cout << exp->dumpExp(); } 1317 | std::string dumpIR() const override 1318 | { 1319 | return std::to_string(exp->dumpExp()); 1320 | } 1321 | virtual int dumpExp() const override { return exp->dumpExp(); } 1322 | }; 1323 | 1324 | 1325 | class VarDeclAST : public BaseAST 1326 | { 1327 | public: 1328 | std::string b_type; 1329 | std::vector> var_def_list; 1330 | void dump() const override 1331 | { 1332 | assert(b_type == "int"); 1333 | for (auto&& var_def : var_def_list)var_def->dump(); 1334 | } 1335 | std::string dumpIR() const override 1336 | { 1337 | assert(b_type == "int"); 1338 | for (auto&& var_def : var_def_list)var_def->dumpIR(); 1339 | return ""; 1340 | } 1341 | int dumpExp() const override 1342 | { 1343 | assert(b_type == "int"); 1344 | for (auto&& var_def : var_def_list)var_def->dumpExp(); 1345 | return 0; 1346 | } 1347 | }; 1348 | 1349 | 1350 | class VarDefAST : public BaseAST 1351 | { 1352 | public: 1353 | std::string ident; 1354 | bool has_init_val; 1355 | std::vector> exp_list; 1356 | std::unique_ptr init_val; 1357 | void dump() const override 1358 | { 1359 | std::cout << "VarDefAST{" << ident; 1360 | if (has_init_val) 1361 | { 1362 | std::cout << "="; 1363 | init_val->dump(); 1364 | } 1365 | std::cout << "} "; 1366 | } 1367 | void dumpListType(std::vector widths) const 1368 | { 1369 | if (widths.size() == 1) 1370 | { 1371 | std::cout << "[i32, " << widths[0] << "]"; return; 1372 | } 1373 | std::vector rec = std::vector(widths.begin() + 1, 1374 | widths.end()); 1375 | std::cout << "["; dumpListType(rec); 1376 | std::cout << ", " << widths.front() << "]"; 1377 | } 1378 | void dumpListInit(std::string prev, std::vector widths, int depth, 1379 | std::vector init_list) const 1380 | { 1381 | if (depth >= widths.size()) 1382 | { 1383 | std::cout << "\tstore " << init_list[var_list_num++] << ", " << 1384 | prev << std::endl; 1385 | return; 1386 | } 1387 | for (int i = 0; i < widths[depth]; i++) 1388 | { 1389 | std::string result_var = "%" + std::to_string(symbol_num++); 1390 | std::cout << '\t' << result_var << " = getelemptr " << prev << ", " 1391 | << i << std::endl; 1392 | dumpListInit(result_var, widths, depth + 1, init_list); 1393 | } 1394 | } 1395 | void printInitList(std::vector widths, int depth, 1396 | std::vector init_list) const 1397 | { 1398 | if (depth >= widths.size()) 1399 | { 1400 | std::cout << init_list[var_list_num++]; return; 1401 | } 1402 | std::cout << "{"; 1403 | for (int i = 0; i < widths[depth]; i++) 1404 | { 1405 | printInitList(widths, depth + 1, init_list); 1406 | if (i != widths[depth] - 1)std::cout << ", "; 1407 | } 1408 | std::cout << "}"; 1409 | } 1410 | std::string dumpIR() const override 1411 | { 1412 | if (exp_list.empty()) 1413 | { 1414 | std::string var_name = "@" + ident; 1415 | std::string name = var_name + "_" + 1416 | std::to_string(var_num[var_name]++); 1417 | std::cout << '\t' << name << " = alloc i32" << std::endl; 1418 | symbol_tables.back()[ident] = name; 1419 | if (has_init_val) 1420 | { 1421 | std::string val_var = init_val->dumpIR(); 1422 | std::cout << "\tstore " << val_var << ", " << name << 1423 | std::endl; 1424 | } 1425 | } 1426 | else 1427 | { 1428 | std::vector widths, init_list; 1429 | for (auto&& exp : exp_list) 1430 | widths.push_back(std::stoi(exp->dumpIR())); 1431 | std::string var_name = "@" + ident; 1432 | std::string name = var_name + "_" + 1433 | std::to_string(var_num[var_name]++); 1434 | symbol_tables.back()[ident] = name; 1435 | is_list[name] = 1; 1436 | list_dim[name] = widths.size(); 1437 | std::cout << '\t' << name << " = alloc "; 1438 | dumpListType(widths); 1439 | std::cout << std::endl; 1440 | if (has_init_val) 1441 | { 1442 | var_list_num = 0; 1443 | init_list = init_val->dumpList(widths); 1444 | var_list_num = 0; 1445 | for (int i = 0; i < widths[0]; i++) 1446 | { 1447 | std::string result_var = "%" + 1448 | std::to_string(symbol_num++); 1449 | std::cout << '\t' << result_var << " = getelemptr " << name 1450 | << ", " << i << std::endl; 1451 | dumpListInit(result_var, widths, 1, init_list); 1452 | } 1453 | } 1454 | } 1455 | return ""; 1456 | } 1457 | int dumpExp() const override 1458 | { 1459 | if (exp_list.empty()) 1460 | { 1461 | std::string var_name = "@" + ident; 1462 | std::string name = var_name + "_" + 1463 | std::to_string(var_num[var_name]++); 1464 | symbol_tables.back()[ident] = name; 1465 | std::cout << "global " << name << " = alloc i32, "; 1466 | if (has_init_val) 1467 | { 1468 | std::string val_var = init_val->dumpIR(); 1469 | if (val_var[0] == '@' || val_var[0] == '%')assert(false); 1470 | else if (val_var != "0")std::cout << val_var << std::endl; 1471 | else std::cout << "zeroinit" << std::endl; 1472 | } 1473 | else std::cout << "zeroinit" << std::endl; 1474 | } 1475 | else 1476 | { 1477 | std::vector widths, init_list; 1478 | for (auto&& exp : exp_list) 1479 | widths.push_back(std::stoi(exp->dumpIR())); 1480 | std::string var_name = "@" + ident; 1481 | std::string name = var_name + "_" + 1482 | std::to_string(var_num[var_name]++); 1483 | symbol_tables.back()[ident] = name; 1484 | is_list[name] = 1; 1485 | list_dim[name] = widths.size(); 1486 | std::cout << "global " << name << " = alloc "; 1487 | dumpListType(widths); 1488 | if (has_init_val) 1489 | { 1490 | var_list_num = 0; 1491 | init_list = init_val->dumpList(widths); 1492 | var_list_num = 0; 1493 | std::cout << ", "; 1494 | printInitList(widths, 0, init_list); 1495 | std::cout << std::endl; 1496 | } 1497 | else std::cout << ", zeroinit" << std::endl; 1498 | } 1499 | return 0; 1500 | } 1501 | }; 1502 | 1503 | 1504 | class InitValAST : public BaseAST 1505 | { 1506 | public: 1507 | InitValType type; 1508 | std::unique_ptr exp; 1509 | std::vector> init_val_list; 1510 | void dump() const override 1511 | { 1512 | if (type == InitValType::exp)exp->dump(); 1513 | else if (type == InitValType::list) 1514 | { 1515 | std::cout << "{"; 1516 | for (int i = 0; i < init_val_list.size(); i++) 1517 | { 1518 | init_val_list[i]->dump(); 1519 | if (i != init_val_list.size() - 1)std::cout << ","; 1520 | } 1521 | std::cout << "} "; 1522 | } 1523 | else assert(false); 1524 | } 1525 | std::string dumpIR() const override 1526 | { 1527 | assert(type == InitValType::exp); 1528 | return exp->dumpIR(); 1529 | } 1530 | int dumpExp() const override 1531 | { 1532 | assert(type == InitValType::exp); 1533 | return exp->dumpExp(); 1534 | } 1535 | std::vector dumpList(std::vector widths) const override 1536 | { 1537 | std::vector ret; 1538 | if (widths.size() == 1) 1539 | { 1540 | for (auto&& init_val : init_val_list) 1541 | { 1542 | assert(init_val->get_ident() == "exp"); 1543 | ret.push_back(init_val->dumpExp()); 1544 | var_list_num++; 1545 | } 1546 | int num_zeros = widths[0] - ret.size(); 1547 | for (int i = 0; i < num_zeros; i++) 1548 | { 1549 | ret.push_back(0); var_list_num++; 1550 | } 1551 | return ret; 1552 | } 1553 | std::vector products = widths; 1554 | for (int i = products.size() - 2; i >= 0; i--) 1555 | products[i] *= products[i + 1]; 1556 | int total_size = products[0]; 1557 | for (auto&& init_val : init_val_list) 1558 | if (init_val->get_ident() == "exp") 1559 | { 1560 | ret.push_back(init_val->dumpExp()); 1561 | var_list_num++; continue; 1562 | } 1563 | else if (init_val->get_ident() == "list") 1564 | { 1565 | int init_num; 1566 | for (init_num = 1; init_num < widths.size(); init_num++) 1567 | if (var_list_num % products[init_num] == 0)break; 1568 | else if (init_num == widths.size() - 1)assert(false); 1569 | std::vector rec = std::vector(widths.begin() + 1570 | init_num, widths.end()); 1571 | std::vector tmp = init_val->dumpList(rec); 1572 | ret.insert(ret.end(), tmp.begin(), tmp.end()); 1573 | } 1574 | else assert(false); 1575 | int num_zeros = total_size - ret.size(); 1576 | for (int i = 0; i < num_zeros; i++) 1577 | { 1578 | ret.push_back(0); var_list_num++; 1579 | } 1580 | return ret; 1581 | } 1582 | std::string get_ident() const override 1583 | { 1584 | if (type == InitValType::exp)return "exp"; 1585 | else if (type == InitValType::list)return "list"; 1586 | else assert(false); 1587 | } 1588 | }; 1589 | --------------------------------------------------------------------------------