├── .clang-format ├── .gitignore ├── 9cc.h ├── LICENSE ├── Makefile ├── README.md ├── examples └── nqueen.c ├── gen_ir.c ├── gen_x86.c ├── irdump.c ├── liveness.c ├── main.c ├── opt.c ├── parse.c ├── preprocess.c ├── regalloc.c ├── sema.c ├── test ├── gcc.c ├── test.c ├── test1.inc ├── test2.inc └── token.c ├── token.c ├── util.c └── util_test.c /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | AllowShortFunctionsOnASingleLine: false 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.o 3 | tmp* 4 | 9cc 5 | a.out 6 | -------------------------------------------------------------------------------- /9cc.h: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /// util.c 13 | 14 | noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2))); 15 | char *format(char *fmt, ...) __attribute__((format(printf, 1, 2))); 16 | 17 | typedef struct { 18 | void **data; 19 | int capacity; 20 | int len; 21 | } Vector; 22 | 23 | Vector *new_vec(void); 24 | void vec_push(Vector *v, void *elem); 25 | void vec_pushi(Vector *v, int val); 26 | void *vec_pop(Vector *v); 27 | void *vec_last(Vector *v); 28 | bool vec_contains(Vector *v, void *elem); 29 | bool vec_union1(Vector *v, void *elem); 30 | 31 | typedef struct { 32 | Vector *keys; 33 | Vector *vals; 34 | } Map; 35 | 36 | Map *new_map(void); 37 | void map_put(Map *map, char *key, void *val); 38 | void map_puti(Map *map, char *key, int val); 39 | void *map_get(Map *map, char *key); 40 | int map_geti(Map *map, char *key, int default_); 41 | bool map_exists(Map *map, char *key); 42 | 43 | typedef struct { 44 | char *data; 45 | int capacity; 46 | int len; 47 | } StringBuilder; 48 | 49 | StringBuilder *new_sb(void); 50 | void sb_add(StringBuilder *sb, char c); 51 | void sb_append(StringBuilder *sb, char *s); 52 | void sb_append_n(StringBuilder *sb, char *s, int len); 53 | char *sb_get(StringBuilder *sb); 54 | 55 | typedef struct Type Type; 56 | 57 | typedef struct Type { 58 | int ty; 59 | int size; // sizeof 60 | int align; // alignof 61 | 62 | // Pointer 63 | Type *ptr_to; 64 | 65 | // Array 66 | Type *ary_of; 67 | int len; 68 | 69 | // Struct 70 | Map *members; 71 | int offset; 72 | 73 | // Function 74 | Type *returning; 75 | } Type; 76 | 77 | Type *ptr_to(Type *base); 78 | Type *ary_of(Type *base, int len); 79 | Type *void_ty(); 80 | Type *bool_ty(); 81 | Type *char_ty(); 82 | Type *int_ty(); 83 | Type *func_ty(Type *returning); 84 | bool same_type(Type *x, Type *y); 85 | int roundup(int x, int align); 86 | 87 | /// util_test.c 88 | 89 | void util_test(); 90 | 91 | /// token.c 92 | 93 | enum { 94 | TK_NUM = 256, // Number literal 95 | TK_STR, // String literal 96 | TK_IDENT, // Identifier 97 | TK_ARROW, // -> 98 | TK_EXTERN, // "extern" 99 | TK_TYPEDEF, // "typedef" 100 | TK_INT, // "int" 101 | TK_CHAR, // "char" 102 | TK_VOID, // "void" 103 | TK_STRUCT, // "struct" 104 | TK_BOOL, // "_Bool" 105 | TK_IF, // "if" 106 | TK_ELSE, // "else" 107 | TK_FOR, // "for" 108 | TK_DO, // "do" 109 | TK_WHILE, // "while" 110 | TK_SWITCH, // "switch" 111 | TK_CASE, // "case" 112 | TK_BREAK, // "break" 113 | TK_CONTINUE, // "continue" 114 | TK_EQ, // == 115 | TK_NE, // != 116 | TK_LE, // <= 117 | TK_GE, // >= 118 | TK_LOGOR, // || 119 | TK_LOGAND, // && 120 | TK_SHL, // << 121 | TK_SHR, // >> 122 | TK_INC, // ++ 123 | TK_DEC, // -- 124 | TK_MUL_EQ, // *= 125 | TK_DIV_EQ, // /= 126 | TK_MOD_EQ, // %= 127 | TK_ADD_EQ, // += 128 | TK_SUB_EQ, // -= 129 | TK_SHL_EQ, // <<= 130 | TK_SHR_EQ, // >>= 131 | TK_AND_EQ, // &= 132 | TK_XOR_EQ, // ^= 133 | TK_OR_EQ, // |= 134 | TK_RETURN, // "return" 135 | TK_SIZEOF, // "sizeof" 136 | TK_ALIGNOF, // "_Alignof" 137 | TK_TYPEOF, // "typeof" 138 | TK_PARAM, // Function-like macro parameter 139 | TK_EOF, // End marker 140 | }; 141 | 142 | // Token type 143 | typedef struct { 144 | int ty; // Token type 145 | int val; // Number literal 146 | char *name; // Identifier 147 | 148 | // String literal 149 | char *str; 150 | char len; 151 | 152 | // For preprocessor 153 | bool stringize; 154 | 155 | // For error reporting 156 | char *buf; 157 | char *path; 158 | char *start; 159 | char *end; 160 | } Token; 161 | 162 | Vector *tokenize(char *path, bool add_eof); 163 | noreturn void bad_token(Token *t, char *msg); 164 | void warn_token(Token *t, char *msg); 165 | int get_line_number(Token *t); 166 | char *stringize(Vector *tokens); 167 | 168 | /// preprocess.c 169 | 170 | Vector *preprocess(Vector *tokens); 171 | 172 | /// parse.c 173 | 174 | extern int nlabel; 175 | 176 | enum { 177 | ND_NUM = 256, // Number literal 178 | ND_STRUCT, // Struct 179 | ND_DECL, // declaration 180 | ND_VARDEF, // Variable definition 181 | ND_VARREF, // Variable reference 182 | ND_CAST, // Cast 183 | ND_IF, // "if" 184 | ND_FOR, // "for" 185 | ND_DO_WHILE, // do ... while 186 | ND_SWITCH, // switch 187 | ND_CASE, // case 188 | ND_BREAK, // break 189 | ND_CONTINUE, // continue 190 | ND_ADDR, // address-of operator ("&") 191 | ND_DEREF, // pointer dereference ("*") 192 | ND_DOT, // Struct member access 193 | ND_EQ, // == 194 | ND_NE, // != 195 | ND_LE, // <= 196 | ND_LOGAND, // && 197 | ND_LOGOR, // || 198 | ND_SHL, // << 199 | ND_SHR, // >> 200 | ND_MOD, // % 201 | ND_RETURN, // "return" 202 | ND_CALL, // Function call 203 | ND_FUNC, // Function definition 204 | ND_COMP_STMT, // Compound statement 205 | ND_EXPR_STMT, // Expression statement 206 | ND_STMT_EXPR, // Statement expression (GNU extn.) 207 | ND_NULL, // Null statement 208 | }; 209 | 210 | enum { 211 | VOID = 1, 212 | BOOL, 213 | CHAR, 214 | INT, 215 | PTR, 216 | ARY, 217 | STRUCT, 218 | FUNC, 219 | }; 220 | 221 | typedef struct Reg Reg; 222 | 223 | // Represents a variable. 224 | typedef struct { 225 | Type *ty; 226 | char *name; 227 | bool is_local; 228 | 229 | // Local variables are compiled to offsets from RBP. 230 | int offset; 231 | 232 | // Global variables are compiled to labels with optional 233 | // initialized data. 234 | char *data; 235 | 236 | // For optimization passes. 237 | bool address_taken; 238 | Reg *promoted; 239 | } Var; 240 | 241 | typedef struct Node Node; 242 | typedef struct BB BB; 243 | 244 | // AST node 245 | typedef struct Node { 246 | int op; // Node type 247 | Type *ty; // C type 248 | Node *lhs; // left-hand side 249 | Node *rhs; // right-hand side 250 | int val; // Number literal 251 | Node *expr; // "return" or expresson stmt 252 | Vector *stmts; // Compound statement 253 | 254 | char *name; 255 | 256 | // For ND_VARREF 257 | Var *var; 258 | 259 | // "if" ( cond ) then "else" els 260 | // "for" ( init; cond; inc ) body 261 | // "while" ( cond ) body 262 | // "do" body "while" ( cond ) 263 | // "switch" ( cond ) body 264 | // "case" val ":" body 265 | Node *cond; 266 | Node *then; 267 | Node *els; 268 | Node *init; 269 | Node *inc; 270 | Node *body; 271 | 272 | // For switch and case 273 | Vector *cases; 274 | BB *bb; 275 | 276 | // For case, break and continue 277 | Node *target; 278 | BB *break_; 279 | BB *continue_; 280 | 281 | // Function definition 282 | Vector *params; 283 | 284 | // Function call 285 | Vector *args; 286 | 287 | // For error reporting 288 | Token *token; 289 | } Node; 290 | 291 | typedef struct { 292 | char *name; 293 | Node *node; 294 | Vector *lvars; 295 | Vector *bbs; 296 | } Function; 297 | 298 | // Represents toplevel constructs. 299 | typedef struct { 300 | Vector *gvars; 301 | Vector *funcs; 302 | } Program; 303 | 304 | Program *parse(Vector *tokens); 305 | 306 | Node *new_int_node(int val, Token *t); 307 | 308 | /// sema.c 309 | 310 | Type *get_type(Node *node); 311 | void sema(Program *prog); 312 | 313 | /// ir_dump.c 314 | 315 | void dump_ir(Vector *irv); 316 | 317 | /// gen_ir.c 318 | 319 | enum { 320 | IR_ADD = 1, 321 | IR_SUB, 322 | IR_MUL, 323 | IR_DIV, 324 | IR_IMM, 325 | IR_BPREL, 326 | IR_MOV, 327 | IR_RETURN, 328 | IR_CALL, 329 | IR_LABEL_ADDR, 330 | IR_EQ, 331 | IR_NE, 332 | IR_LE, 333 | IR_LT, 334 | IR_AND, 335 | IR_OR, 336 | IR_XOR, 337 | IR_SHL, 338 | IR_SHR, 339 | IR_MOD, 340 | IR_JMP, 341 | IR_BR, 342 | IR_LOAD, 343 | IR_LOAD_SPILL, 344 | IR_STORE, 345 | IR_STORE_ARG, 346 | IR_STORE_SPILL, 347 | IR_NOP, 348 | }; 349 | 350 | typedef struct Reg { 351 | int vn; // virtual register number 352 | int rn; // real register number 353 | 354 | // For optimizer 355 | Reg *promoted; 356 | 357 | // For regalloc 358 | int def; 359 | int last_use; 360 | bool spill; 361 | Var *var; 362 | } Reg; 363 | 364 | typedef struct BB { 365 | int label; 366 | Vector *ir; 367 | Reg *param; 368 | 369 | // For liveness analysis 370 | Vector *succ; 371 | Vector *pred; 372 | Vector *def_regs; 373 | Vector *in_regs; 374 | Vector *out_regs; 375 | } BB; 376 | 377 | typedef struct { 378 | int op; 379 | 380 | Reg *r0; 381 | Reg *r1; 382 | Reg *r2; 383 | 384 | int imm; 385 | int label; 386 | Var *var; 387 | 388 | BB *bb1; 389 | BB *bb2; 390 | 391 | // Load/store size in bytes 392 | int size; 393 | 394 | // Function call 395 | char *name; 396 | int nargs; 397 | Reg *args[6]; 398 | 399 | // For liveness tracking 400 | Vector *kill; 401 | 402 | // For SSA 403 | Reg *bbarg; 404 | } IR; 405 | 406 | void gen_ir(Program *prog); 407 | Reg *new_reg(); 408 | 409 | /// opt.c 410 | 411 | void optimize(Program *prog); 412 | 413 | /// liveness.c 414 | 415 | void liveness(Program *prog); 416 | 417 | /// liveness.c 418 | 419 | void liveness(Program *prog); 420 | 421 | /// regalloc.c 422 | 423 | void alloc_regs(Program *prog); 424 | 425 | /// gen_x86.c 426 | 427 | extern char *regs[]; 428 | extern char *regs8[]; 429 | extern char *regs32[]; 430 | extern int num_regs; 431 | 432 | void gen_x86(Program *prog); 433 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Rui Ueyama 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -std=c11 -g 2 | SRCS=$(wildcard *.c) 3 | OBJS=$(SRCS:.c=.o) 4 | 5 | 9cc: $(OBJS) 6 | cc -o $@ $(OBJS) $(LDFLAGS) 7 | 8 | $(OBJS): 9cc.h 9 | 10 | test: 9cc test/test.c 11 | ./9cc -test 12 | 13 | @./9cc test/test.c > tmp-test1.s 14 | @gcc -c -o tmp-test2.o test/gcc.c 15 | @gcc -static -o tmp-test1 tmp-test1.s tmp-test2.o 16 | @./tmp-test1 17 | 18 | @./9cc test/token.c > tmp-test2.s 19 | @gcc -static -o tmp-test2 tmp-test2.s 20 | @./tmp-test2 21 | 22 | clean: 23 | rm -f 9cc *.o *~ tmp* a.out test/*~ 24 | 25 | format: 26 | clang-format -i *.c *.h 27 | 28 | .PHONY: test clean format 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 9cc C compiler 2 | ============== 3 | 4 | Note: 9cc is no longer an active project, and the successor is 5 | [chibicc](https://github.com/rui314/chibicc). 6 | 7 | 9cc is a successor of my [8cc](https://github.com/rui314/8cc) C compiler. 8 | In this new project, I'm trying to write code that can be understood 9 | extremely easily while creating a compiler that generates reasonably 10 | efficient assembly. 11 | 12 | 9cc has more stages than 8cc. Here is an overview of the internals: 13 | 14 | 1. Compiles an input string to abstract syntax trees. 15 | 2. Runs a semantic analyzer on the trees to add a type to each tree node. 16 | 3. Converts the trees to intermediate code (IR), which in some degree 17 | resembles x86-64 instructions but has an infinite number of registers. 18 | 4. Maps an infinite number of registers to a finite number of registers. 19 | 5. Generates x86-64 instructions from the IR. 20 | 21 | There are a few important design choices that I made to keep the code as 22 | simple as I can get: 23 | 24 | - Like 8cc, no memory management is the memory management policy in 9cc. 25 | We allocate memory using malloc() but never call free(). 26 | I know that people find the policy odd, but this is actually a 27 | reasonable design choice for short-lived programs such as compilers. 28 | This policy greatly simplifies code and also eliminates use-after-free 29 | bugs entirely. 30 | 31 | - 9cc's parser is a hand-written recursive descendent parser, so that the 32 | compiler doesn't have any blackbox such as lex/yacc. 33 | 34 | - I stick with plain old tools such as Make or shell script so that you 35 | don't need to learn about new stuff other than the compiler source code 36 | itself. 37 | 38 | - We use brute force if it makes code simpler. We don't try too hard to 39 | implement sophisticated data structures to make the compiler run faster. 40 | If the performance becomes a problem, we can fix it at that moment. 41 | 42 | - Entire contents are loaded into memory at once if it makes code simpler. 43 | We don't use character IO to read from an input file; instead, we read 44 | an entire file to a char array in a batch. Likewise, we tokenize a 45 | whole file in a batch rather than doing it concurrently with the parser. 46 | 47 | Overall, 9cc is still in its very early stage. I hope to continue 48 | improving it to the point where 9cc can compile real-world C programs such 49 | as Linux kernel. That is an ambitious goal, but I believe it's achievable, 50 | so stay tuned! 51 | -------------------------------------------------------------------------------- /examples/nqueen.c: -------------------------------------------------------------------------------- 1 | // How to run: 2 | // 3 | // $ ./9cc examples/nqueen.c > tmp-nqueen.s 4 | // $ gcc -static -o tmp-nqueen tmp-nqueen.s 5 | // $ ./tmp-nqueen 6 | 7 | int print_board(int board[][10]) { 8 | for (int i = 0; i < 10; i++) { 9 | for (int j = 0; j < 10; j++) 10 | if (board[i][j]) 11 | printf("Q "); 12 | else 13 | printf(". "); 14 | printf("\n"); 15 | } 16 | printf("\n\n"); 17 | } 18 | 19 | int conflict(int board[][10], int row, int col) { 20 | for (int i = 0; i < row; i++) { 21 | if (board[i][col]) 22 | return 1; 23 | int j = row - i; 24 | if (0 < col - j + 1 && board[i][col - j]) 25 | return 1; 26 | if (col + j < 10 && board[i][col + j]) 27 | return 1; 28 | } 29 | return 0; 30 | } 31 | 32 | int solve(int board[][10], int row) { 33 | if (row > 9) { 34 | print_board(board); 35 | return 0; 36 | } 37 | for (int i = 0; i < 10; i++) { 38 | if (conflict(board, row, i)) { 39 | } else { 40 | board[row][i] = 1; 41 | solve(board, row + 1); 42 | board[row][i] = 0; 43 | } 44 | } 45 | } 46 | 47 | int main() { 48 | int board[100]; 49 | for (int i = 0; i < 100; i++) 50 | board[i] = 0; 51 | solve(board, 0); 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /gen_ir.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | // 9cc's code generation is two-pass. In the first pass, abstract 4 | // syntax trees are compiled to IR (intermediate representation). 5 | // 6 | // IR resembles the real x86-64 instruction set, but it has infinite 7 | // number of registers. We don't try too hard to reuse registers in 8 | // this pass. Instead, we "kill" registers to mark them as dead when 9 | // we are done with them and use new registers. 10 | // 11 | // Such infinite number of registers are mapped to a finite registers 12 | // in a later pass. 13 | 14 | static Function *fn; 15 | static BB *out; 16 | static int nreg = 1; 17 | 18 | static BB *new_bb() { 19 | BB *bb = calloc(1, sizeof(BB)); 20 | bb->label = nlabel++; 21 | bb->ir = new_vec(); 22 | bb->succ = new_vec(); 23 | bb->pred = new_vec(); 24 | bb->def_regs = new_vec(); 25 | bb->in_regs = new_vec(); 26 | bb->out_regs = new_vec(); 27 | vec_push(fn->bbs, bb); 28 | return bb; 29 | } 30 | 31 | static IR *new_ir(int op) { 32 | IR *ir = calloc(1, sizeof(IR)); 33 | ir->op = op; 34 | vec_push(out->ir, ir); 35 | return ir; 36 | } 37 | 38 | Reg *new_reg() { 39 | Reg *r = calloc(1, sizeof(Reg)); 40 | r->vn = nreg++; 41 | r->rn = -1; 42 | return r; 43 | } 44 | 45 | static IR *emit(int op, Reg *r0, Reg *r1, Reg *r2) { 46 | IR *ir = new_ir(op); 47 | ir->r0 = r0; 48 | ir->r1 = r1; 49 | ir->r2 = r2; 50 | return ir; 51 | } 52 | 53 | static IR *br(Reg *r, BB *then, BB *els) { 54 | IR *ir = new_ir(IR_BR); 55 | ir->r2 = r; 56 | ir->bb1 = then; 57 | ir->bb2 = els; 58 | return ir; 59 | } 60 | 61 | static IR *jmp(BB *bb) { 62 | IR *ir = new_ir(IR_JMP); 63 | ir->bb1 = bb; 64 | return ir; 65 | } 66 | 67 | static IR *jmp_arg(BB *bb, Reg *r) { 68 | IR *ir = new_ir(IR_JMP); 69 | ir->bb1 = bb; 70 | ir->bbarg = r; 71 | return ir; 72 | } 73 | 74 | static Reg *imm(int imm) { 75 | Reg *r = new_reg(); 76 | IR *ir = new_ir(IR_IMM); 77 | ir->r0 = r; 78 | ir->imm = imm; 79 | return r; 80 | } 81 | 82 | static Reg *gen_expr(Node *node); 83 | 84 | static void load(Node *node, Reg *dst, Reg *src) { 85 | IR *ir = emit(IR_LOAD, dst, NULL, src); 86 | ir->size = node->ty->size; 87 | } 88 | 89 | // In C, all expressions that can be written on the left-hand side of 90 | // the '=' operator must have an address in memory. In other words, if 91 | // you can apply the '&' operator to take an address of some 92 | // expression E, you can assign E to a new value. 93 | // 94 | // Other expressions, such as `1+2`, cannot be written on the lhs of 95 | // '=', since they are just temporary values that don't have an address. 96 | // 97 | // The stuff that can be written on the lhs of '=' is called lvalue. 98 | // Other values are called rvalue. An lvalue is essentially an address. 99 | // 100 | // When lvalues appear on the rvalue context, they are converted to 101 | // rvalues by loading their values from their addresses. You can think 102 | // '&' as an operator that suppresses such automatic lvalue-to-rvalue 103 | // conversion. 104 | // 105 | // This function evaluates a given node as an lvalue. 106 | static Reg *gen_lval(Node *node) { 107 | if (node->op == ND_DEREF) 108 | return gen_expr(node->expr); 109 | 110 | if (node->op == ND_DOT) { 111 | Reg *r1 = new_reg(); 112 | Reg *r2 = gen_lval(node->expr); 113 | Reg *r3 = imm(node->ty->offset); 114 | emit(IR_ADD, r1, r2, r3); 115 | return r1; 116 | } 117 | 118 | assert(node->op == ND_VARREF); 119 | Var *var = node->var; 120 | 121 | IR *ir; 122 | if (var->is_local) { 123 | ir = new_ir(IR_BPREL); 124 | ir->r0 = new_reg(); 125 | ir->var = var; 126 | } else { 127 | ir = new_ir(IR_LABEL_ADDR); 128 | ir->r0 = new_reg(); 129 | ir->name = var->name; 130 | } 131 | return ir->r0; 132 | } 133 | 134 | static Reg *gen_binop(int op, Node *node) { 135 | Reg *r1 = new_reg(); 136 | Reg *r2 = gen_expr(node->lhs); 137 | Reg *r3 = gen_expr(node->rhs); 138 | emit(op, r1, r2, r3); 139 | return r1; 140 | } 141 | 142 | static void gen_stmt(Node *node); 143 | 144 | static Reg *gen_expr(Node *node) { 145 | switch (node->op) { 146 | case ND_NUM: 147 | return imm(node->val); 148 | case ND_EQ: 149 | return gen_binop(IR_EQ, node); 150 | case ND_NE: 151 | return gen_binop(IR_NE, node); 152 | case ND_LOGAND: { 153 | BB *bb = new_bb(); 154 | BB *set0 = new_bb(); 155 | BB *set1 = new_bb(); 156 | BB *last = new_bb(); 157 | 158 | br(gen_expr(node->lhs), bb, set0); 159 | 160 | out = bb; 161 | br(gen_expr(node->rhs), set1, set0); 162 | 163 | out = set0; 164 | jmp_arg(last, imm(0)); 165 | 166 | out = set1; 167 | jmp_arg(last, imm(1)); 168 | 169 | out = last; 170 | out->param = new_reg(); 171 | return out->param; 172 | } 173 | case ND_LOGOR: { 174 | BB *bb = new_bb(); 175 | BB *set0 = new_bb(); 176 | BB *set1 = new_bb(); 177 | BB *last = new_bb(); 178 | 179 | Reg *r1 = gen_expr(node->lhs); 180 | br(r1, set1, bb); 181 | 182 | out = bb; 183 | Reg *r2 = gen_expr(node->rhs); 184 | br(r2, set1, set0); 185 | 186 | out = set0; 187 | jmp_arg(last, imm(0)); 188 | 189 | out = set1; 190 | jmp_arg(last, imm(1)); 191 | 192 | out = last; 193 | out->param = new_reg(); 194 | return out->param; 195 | } 196 | case ND_VARREF: 197 | case ND_DOT: { 198 | Reg *r = new_reg(); 199 | load(node, r, gen_lval(node)); 200 | return r; 201 | } 202 | case ND_CALL: { 203 | Reg *args[6]; 204 | for (int i = 0; i < node->args->len; i++) 205 | args[i] = gen_expr(node->args->data[i]); 206 | 207 | IR *ir = new_ir(IR_CALL); 208 | ir->r0 = new_reg(); 209 | ir->name = node->name; 210 | ir->nargs = node->args->len; 211 | memcpy(ir->args, args, sizeof(args)); 212 | return ir->r0; 213 | } 214 | case ND_ADDR: 215 | return gen_lval(node->expr); 216 | case ND_DEREF: { 217 | Reg *r = new_reg(); 218 | load(node, r, gen_expr(node->expr)); 219 | return r; 220 | } 221 | case ND_CAST: { 222 | Reg *r1 = gen_expr(node->expr); 223 | if (node->ty->ty != BOOL) 224 | return r1; 225 | Reg *r2 = new_reg(); 226 | emit(IR_NE, r2, r1, imm(0)); 227 | return r2; 228 | } 229 | case ND_STMT_EXPR: 230 | for (int i = 0; i < node->stmts->len; i++) 231 | gen_stmt(node->stmts->data[i]); 232 | return gen_expr(node->expr); 233 | case '=': { 234 | Reg *r1 = gen_expr(node->rhs); 235 | Reg *r2 = gen_lval(node->lhs); 236 | 237 | IR *ir = emit(IR_STORE, NULL, r2, r1); 238 | ir->size = node->ty->size; 239 | return r1; 240 | } 241 | case '+': 242 | return gen_binop(IR_ADD, node); 243 | case '-': 244 | return gen_binop(IR_SUB, node); 245 | case '*': 246 | return gen_binop(IR_MUL, node); 247 | case '/': 248 | return gen_binop(IR_DIV, node); 249 | case '%': 250 | return gen_binop(IR_MOD, node); 251 | case '<': 252 | return gen_binop(IR_LT, node); 253 | case ND_LE: 254 | return gen_binop(IR_LE, node); 255 | case '&': 256 | return gen_binop(IR_AND, node); 257 | case '|': 258 | return gen_binop(IR_OR, node); 259 | case '^': 260 | return gen_binop(IR_XOR, node); 261 | case ND_SHL: 262 | return gen_binop(IR_SHL, node); 263 | case ND_SHR: 264 | return gen_binop(IR_SHR, node); 265 | case '~': { 266 | Reg *r1 = new_reg(); 267 | Reg *r2 = gen_expr(node->expr); 268 | emit(IR_XOR, r1, r2, imm(-1)); 269 | return r1; 270 | } 271 | case ',': 272 | gen_expr(node->lhs); 273 | return gen_expr(node->rhs); 274 | case '?': { 275 | BB *then = new_bb(); 276 | BB *els = new_bb(); 277 | BB *last = new_bb(); 278 | 279 | br(gen_expr(node->cond), then, els); 280 | 281 | out = then; 282 | jmp_arg(last, gen_expr(node->then)); 283 | 284 | out = els; 285 | jmp_arg(last, gen_expr(node->els)); 286 | 287 | out = last; 288 | out->param = new_reg(); 289 | return out->param; 290 | } 291 | case '!': { 292 | Reg *r1 = new_reg(); 293 | Reg *r2 = gen_expr(node->expr); 294 | emit(IR_EQ, r1, r2, imm(0)); 295 | return r1; 296 | } 297 | default: 298 | assert(0 && "unknown AST type"); 299 | } 300 | } 301 | 302 | static void gen_stmt(Node *node) { 303 | switch (node->op) { 304 | case ND_NULL: 305 | return; 306 | case ND_IF: { 307 | BB *then = new_bb(); 308 | BB *els = new_bb(); 309 | BB *last = new_bb(); 310 | 311 | br(gen_expr(node->cond), then, els); 312 | 313 | out = then; 314 | gen_stmt(node->then); 315 | jmp(last); 316 | 317 | out = els; 318 | if (node->els) 319 | gen_stmt(node->els); 320 | jmp(last); 321 | 322 | out = last; 323 | return; 324 | } 325 | case ND_FOR: { 326 | BB *cond = new_bb(); 327 | node->continue_ = new_bb(); 328 | BB *body = new_bb(); 329 | node->break_ = new_bb(); 330 | 331 | if (node->init) 332 | gen_stmt(node->init); 333 | jmp(cond); 334 | 335 | out = cond; 336 | if (node->cond) { 337 | Reg *r = gen_expr(node->cond); 338 | br(r, body, node->break_); 339 | } else { 340 | jmp(body); 341 | } 342 | 343 | out = body; 344 | gen_stmt(node->body); 345 | jmp(node->continue_); 346 | 347 | out = node->continue_; 348 | if (node->inc) 349 | gen_expr(node->inc); 350 | jmp(cond); 351 | 352 | out = node->break_; 353 | return; 354 | } 355 | case ND_DO_WHILE: { 356 | node->continue_ = new_bb(); 357 | BB *body = new_bb(); 358 | node->break_ = new_bb(); 359 | 360 | jmp(body); 361 | 362 | out = body; 363 | gen_stmt(node->body); 364 | jmp(node->continue_); 365 | 366 | out = node->continue_; 367 | Reg *r = gen_expr(node->cond); 368 | br(r, body, node->break_); 369 | 370 | out = node->break_; 371 | return; 372 | } 373 | case ND_SWITCH: { 374 | node->break_ = new_bb(); 375 | node->continue_ = new_bb(); 376 | 377 | Reg *r = gen_expr(node->cond); 378 | for (int i = 0; i < node->cases->len; i++) { 379 | Node *case_ = node->cases->data[i]; 380 | case_->bb = new_bb(); 381 | 382 | BB *next = new_bb(); 383 | Reg *r2 = new_reg(); 384 | emit(IR_EQ, r2, r, imm(case_->val)); 385 | br(r2, case_->bb, next); 386 | out = next; 387 | } 388 | jmp(node->break_); 389 | 390 | gen_stmt(node->body); 391 | jmp(node->break_); 392 | 393 | out = node->break_; 394 | return; 395 | } 396 | case ND_CASE: 397 | jmp(node->bb); 398 | out = node->bb; 399 | gen_stmt(node->body); 400 | break; 401 | case ND_BREAK: 402 | jmp(node->target->break_); 403 | out = new_bb(); 404 | break; 405 | case ND_CONTINUE: 406 | jmp(node->target->continue_); 407 | out = new_bb(); 408 | break; 409 | case ND_RETURN: { 410 | Reg *r = gen_expr(node->expr); 411 | IR *ir = new_ir(IR_RETURN); 412 | ir->r2 = r; 413 | out = new_bb(); 414 | return; 415 | } 416 | case ND_EXPR_STMT: 417 | gen_expr(node->expr); 418 | return; 419 | case ND_COMP_STMT: 420 | for (int i = 0; i < node->stmts->len; i++) 421 | gen_stmt(node->stmts->data[i]); 422 | return; 423 | default: 424 | error("unknown node: %d", node->op); 425 | } 426 | } 427 | 428 | static void gen_param(Var *var, int i) { 429 | IR *ir = new_ir(IR_STORE_ARG); 430 | ir->var = var; 431 | ir->imm = i; 432 | ir->size = var->ty->size; 433 | var->address_taken = true; 434 | } 435 | 436 | void gen_ir(Program *prog) { 437 | for (int i = 0; i < prog->funcs->len; i++) { 438 | fn = prog->funcs->data[i]; 439 | 440 | assert(fn->node->op == ND_FUNC); 441 | 442 | // Add an empty entry BB to make later analysis easy. 443 | out = new_bb(); 444 | BB *bb = new_bb(); 445 | jmp(bb); 446 | out = bb; 447 | 448 | // Emit IR. 449 | Vector *params = fn->node->params; 450 | for (int i = 0; i < params->len; i++) 451 | gen_param(params->data[i], i); 452 | 453 | gen_stmt(fn->node->body); 454 | 455 | // Make it always ends with a return to make later analysis easy. 456 | new_ir(IR_RETURN)->r2 = imm(0); 457 | 458 | // Later passes shouldn't need the AST, so make it explicit. 459 | fn->node = NULL; 460 | } 461 | } 462 | -------------------------------------------------------------------------------- /gen_x86.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | // This pass generates x86-64 assembly from IR. 4 | 5 | char *regs[] = {"r10", "r11", "rbx", "r12", "r13", "r14", "r15"}; 6 | char *regs8[] = {"r10b", "r11b", "bl", "r12b", "r13b", "r14b", "r15b"}; 7 | char *regs32[] = {"r10d", "r11d", "ebx", "r12d", "r13d", "r14d", "r15d"}; 8 | 9 | int num_regs = sizeof(regs) / sizeof(*regs); 10 | 11 | static char *argregs[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"}; 12 | static char *argregs8[] = {"dil", "sil", "dl", "cl", "r8b", "r9b"}; 13 | static char *argregs32[] = {"edi", "esi", "edx", "ecx", "r8d", "r9d"}; 14 | 15 | __attribute__((format(printf, 1, 2))) static void p(char *fmt, ...); 16 | __attribute__((format(printf, 1, 2))) static void emit(char *fmt, ...); 17 | 18 | static void p(char *fmt, ...) { 19 | va_list ap; 20 | va_start(ap, fmt); 21 | vprintf(fmt, ap); 22 | printf("\n"); 23 | } 24 | 25 | static void emit(char *fmt, ...) { 26 | va_list ap; 27 | va_start(ap, fmt); 28 | printf("\t"); 29 | vprintf(fmt, ap); 30 | printf("\n"); 31 | } 32 | 33 | static void emit_cmp(char *insn, IR *ir) { 34 | int r0 = ir->r0->rn; 35 | int r1 = ir->r1->rn; 36 | int r2 = ir->r2->rn; 37 | 38 | emit("cmp %s, %s", regs[r1], regs[r2]); 39 | emit("%s %s", insn, regs8[r0]); 40 | emit("movzb %s, %s", regs[r0], regs8[r0]); 41 | } 42 | 43 | static char *reg(int r, int size) { 44 | if (size == 1) 45 | return regs8[r]; 46 | if (size == 4) 47 | return regs32[r]; 48 | assert(size == 8); 49 | return regs[r]; 50 | } 51 | 52 | static char *argreg(int r, int size) { 53 | if (size == 1) 54 | return argregs8[r]; 55 | if (size == 4) 56 | return argregs32[r]; 57 | assert(size == 8); 58 | return argregs[r]; 59 | } 60 | 61 | static void emit_ir(IR *ir, char *ret) { 62 | int r0 = ir->r0 ? ir->r0->rn : 0; 63 | int r1 = ir->r1 ? ir->r1->rn : 0; 64 | int r2 = ir->r2 ? ir->r2->rn : 0; 65 | 66 | switch (ir->op) { 67 | case IR_IMM: 68 | emit("mov %s, %d", regs[r0], ir->imm); 69 | break; 70 | case IR_BPREL: 71 | emit("lea %s, [rbp%d]", regs[r0], ir->var->offset); 72 | break; 73 | case IR_MOV: 74 | emit("mov %s, %s", regs[r0], regs[r2]); 75 | break; 76 | case IR_RETURN: 77 | emit("mov rax, %s", regs[r2]); 78 | emit("jmp %s", ret); 79 | break; 80 | case IR_CALL: 81 | for (int i = 0; i < ir->nargs; i++) 82 | emit("mov %s, %s", argregs[i], regs[ir->args[i]->rn]); 83 | 84 | emit("push r10"); 85 | emit("push r11"); 86 | emit("mov rax, 0"); 87 | emit("call %s", ir->name); 88 | emit("pop r11"); 89 | emit("pop r10"); 90 | emit("mov %s, rax", regs[r0]); 91 | break; 92 | case IR_LABEL_ADDR: 93 | emit("lea %s, %s", regs[r0], ir->name); 94 | break; 95 | case IR_EQ: 96 | emit_cmp("sete", ir); 97 | break; 98 | case IR_NE: 99 | emit_cmp("setne", ir); 100 | break; 101 | case IR_LT: 102 | emit_cmp("setl", ir); 103 | break; 104 | case IR_LE: 105 | emit_cmp("setle", ir); 106 | break; 107 | case IR_AND: 108 | emit("and %s, %s", regs[r0], regs[r2]); 109 | break; 110 | case IR_OR: 111 | emit("or %s, %s", regs[r0], regs[r2]); 112 | break; 113 | case IR_XOR: 114 | emit("xor %s, %s", regs[r0], regs[r2]); 115 | break; 116 | case IR_SHL: 117 | emit("mov cl, %s", regs8[r2]); 118 | emit("shl %s, cl", regs[r0]); 119 | break; 120 | case IR_SHR: 121 | emit("mov cl, %s", regs8[r2]); 122 | emit("shr %s, cl", regs[r0]); 123 | break; 124 | case IR_JMP: 125 | if (ir->bbarg) 126 | emit("mov %s, %s", regs[ir->bb1->param->rn], regs[ir->bbarg->rn]); 127 | emit("jmp .L%d", ir->bb1->label); 128 | break; 129 | case IR_BR: 130 | emit("cmp %s, 0", regs[r2]); 131 | emit("jne .L%d", ir->bb1->label); 132 | emit("jmp .L%d", ir->bb2->label); 133 | break; 134 | case IR_LOAD: 135 | emit("mov %s, [%s]", reg(r0, ir->size), regs[r2]); 136 | if (ir->size == 1) 137 | emit("movzb %s, %s", regs[r0], regs8[r0]); 138 | break; 139 | case IR_LOAD_SPILL: 140 | emit("mov %s, [rbp%d]", regs[r0], ir->var->offset); 141 | break; 142 | case IR_STORE: 143 | emit("mov [%s], %s", regs[r1], reg(r2, ir->size)); 144 | break; 145 | case IR_STORE_ARG: 146 | emit("mov [rbp%d], %s", ir->var->offset, argreg(ir->imm, ir->size)); 147 | break; 148 | case IR_STORE_SPILL: 149 | emit("mov [rbp%d], %s", ir->var->offset, regs[r1]); 150 | break; 151 | case IR_ADD: 152 | emit("add %s, %s", regs[r0], regs[r2]); 153 | break; 154 | case IR_SUB: 155 | emit("sub %s, %s", regs[r0], regs[r2]); 156 | break; 157 | case IR_MUL: 158 | emit("mov rax, %s", regs[r2]); 159 | emit("imul %s", regs[r0]); 160 | emit("mov %s, rax", regs[r0]); 161 | break; 162 | case IR_DIV: 163 | emit("mov rax, %s", regs[r0]); 164 | emit("cqo"); 165 | emit("idiv %s", regs[r2]); 166 | emit("mov %s, rax", regs[r0]); 167 | break; 168 | case IR_MOD: 169 | emit("mov rax, %s", regs[r0]); 170 | emit("cqo"); 171 | emit("idiv %s", regs[r2]); 172 | emit("mov %s, rdx", regs[r0]); 173 | break; 174 | case IR_NOP: 175 | break; 176 | default: 177 | assert(0 && "unknown operator"); 178 | } 179 | } 180 | 181 | void emit_code(Function *fn) { 182 | // Assign an offset from RBP to each local variable. 183 | int off = 0; 184 | for (int i = 0; i < fn->lvars->len; i++) { 185 | Var *var = fn->lvars->data[i]; 186 | off += var->ty->size; 187 | off = roundup(off, var->ty->align); 188 | var->offset = -off; 189 | } 190 | 191 | // Emit assembly 192 | char *ret = format(".Lend%d", nlabel++); 193 | 194 | p(".text"); 195 | p(".global %s", fn->name); 196 | p("%s:", fn->name); 197 | emit("push rbp"); 198 | emit("mov rbp, rsp"); 199 | emit("sub rsp, %d", roundup(off, 16)); 200 | emit("push r12"); 201 | emit("push r13"); 202 | emit("push r14"); 203 | emit("push r15"); 204 | 205 | for (int i = 0; i < fn->bbs->len; i++) { 206 | BB *bb = fn->bbs->data[i]; 207 | p(".L%d:", bb->label); 208 | for (int i = 0; i < bb->ir->len; i++) { 209 | IR *ir = bb->ir->data[i]; 210 | emit_ir(ir, ret); 211 | } 212 | } 213 | 214 | p("%s:", ret); 215 | emit("pop r15"); 216 | emit("pop r14"); 217 | emit("pop r13"); 218 | emit("pop r12"); 219 | emit("mov rsp, rbp"); 220 | emit("pop rbp"); 221 | emit("ret"); 222 | } 223 | 224 | static char *backslash_escape(char *s, int len) { 225 | static char escaped[256] = { 226 | ['\b'] = 'b', ['\f'] = 'f', ['\n'] = 'n', ['\r'] = 'r', 227 | ['\t'] = 't', ['\\'] = '\\', ['\''] = '\'', ['"'] = '"', 228 | }; 229 | 230 | StringBuilder *sb = new_sb(); 231 | for (int i = 0; i < len; i++) { 232 | uint8_t c = s[i]; 233 | char esc = escaped[c]; 234 | if (esc) { 235 | sb_add(sb, '\\'); 236 | sb_add(sb, esc); 237 | } else if (isgraph(c) || c == ' ') { 238 | sb_add(sb, c); 239 | } else { 240 | sb_append(sb, format("\\%03o", c)); 241 | } 242 | } 243 | return sb_get(sb); 244 | } 245 | 246 | static void emit_data(Var *var) { 247 | if (var->data) { 248 | p(".data"); 249 | p("%s:", var->name); 250 | emit(".ascii \"%s\"", backslash_escape(var->data, var->ty->size)); 251 | return; 252 | } 253 | 254 | p(".bss"); 255 | p("%s:", var->name); 256 | emit(".zero %d", var->ty->size); 257 | } 258 | 259 | void gen_x86(Program *prog) { 260 | p(".intel_syntax noprefix"); 261 | 262 | for (int i = 0; i < prog->gvars->len; i++) 263 | emit_data(prog->gvars->data[i]); 264 | 265 | for (int i = 0; i < prog->funcs->len; i++) 266 | emit_code(prog->funcs->data[i]); 267 | } 268 | -------------------------------------------------------------------------------- /irdump.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | static int regno(Reg *r) { 4 | if (!r) 5 | return 0; 6 | if (r->rn != -1) 7 | return r->rn; 8 | return r->vn; 9 | } 10 | 11 | static char *tostr_call(IR *ir) { 12 | StringBuilder *sb = new_sb(); 13 | sb_append(sb, format("r%d = %s(", regno(ir->r0), ir->name)); 14 | for (int i = 0; i < ir->nargs; i++) { 15 | if (i != 0) 16 | sb_append(sb, ", "); 17 | sb_append(sb, format("r%d", regno(ir->args[i]))); 18 | } 19 | sb_append(sb, ")"); 20 | return sb_get(sb); 21 | } 22 | 23 | static char *tostr(IR *ir) { 24 | int r0 = regno(ir->r0); 25 | int r1 = regno(ir->r1); 26 | int r2 = regno(ir->r2); 27 | 28 | switch (ir->op) { 29 | case IR_ADD: 30 | return format("r%d = r%d + r%d", r0, r1, r2); 31 | case IR_CALL: 32 | return tostr_call(ir); 33 | case IR_DIV: 34 | return format("r%d = r%d / r%d", r0, r1, r2); 35 | case IR_IMM: 36 | return format("r%d = %d", r0, ir->imm); 37 | case IR_JMP: 38 | if (ir->bbarg) 39 | return format("JMP .L%d (r%d)", ir->bb1->label, regno(ir->bbarg)); 40 | return format("JMP .L%d", ir->bb1->label); 41 | case IR_LABEL_ADDR: 42 | return format("r%d = .L%d", r0, ir->label); 43 | case IR_EQ: 44 | return format("r%d = r%d == r%d", r0, r1, r2); 45 | case IR_NE: 46 | return format("r%d = r%d != r%d", r0, r1, r2); 47 | case IR_LE: 48 | return format("r%d = r%d <= r%d", r0, r1, r2); 49 | case IR_LT: 50 | return format("r%d = r%d < r%d", r0, r1, r2); 51 | case IR_AND: 52 | return format("r%d = r%d & r%d", r0, r1, r2); 53 | case IR_OR: 54 | return format("r%d = r%d | r%d", r0, r1, r2); 55 | case IR_XOR: 56 | return format("r%d = r%d ^ r%d", r0, r1, r2); 57 | case IR_SHL: 58 | return format("r%d = r%d << r%d", r0, r1, r2); 59 | case IR_SHR: 60 | return format("r%d = r%d >> r%d", r0, r1, r2); 61 | case IR_LOAD: 62 | return format("LOAD%d r%d, r%d", ir->size, r0, r2); 63 | case IR_LOAD_SPILL: 64 | return format("LOAD_SPILL r%d, %d", r0, ir->imm); 65 | case IR_MOD: 66 | return format("r%d = r%d %% r%d", r0, r1, r2); 67 | case IR_MOV: 68 | return format("r%d = r%d", r0, r2); 69 | case IR_MUL: 70 | return format("r%d = r%d * r%d", r0, r1, r2); 71 | case IR_NOP: 72 | return "NOP"; 73 | case IR_RETURN: 74 | return format("RET r%d", r2); 75 | case IR_STORE: 76 | return format("STORE%d r%d, r%d", ir->size, r1, r2); 77 | case IR_STORE_ARG: 78 | return format("STORE_ARG%d %d %s (%d)", ir->size, ir->imm, ir->var->name, 79 | ir->var->offset); 80 | case IR_STORE_SPILL: 81 | return format("STORE_SPILL r%d, %d", r1, ir->imm); 82 | case IR_SUB: 83 | return format("r%d = r%d - r%d", r0, r1, r2); 84 | case IR_BPREL: 85 | return format("BPREL r%d %s (%d)", r0, ir->var->name, ir->var->offset); 86 | case IR_BR: 87 | return format("BR r%d .L%d .L%d", r2, ir->bb1->label, ir->bb2->label); 88 | default: 89 | assert(0 && "unknown op"); 90 | } 91 | } 92 | 93 | static void print_rel(char *name, Vector *v) { 94 | if (v->len == 0) 95 | return; 96 | fprintf(stderr, " %s=", name); 97 | for (int i = 0; i < v->len; i++) { 98 | BB *bb = v->data[i]; 99 | if (i > 0) 100 | fprintf(stderr, ","); 101 | fprintf(stderr, ".L%d", bb->label); 102 | } 103 | } 104 | 105 | static void print_regs(char *name, Vector *v) { 106 | if (v->len == 0) 107 | return; 108 | fprintf(stderr, " %s=", name); 109 | for (int i = 0; i < v->len; i++) { 110 | Reg *r = v->data[i]; 111 | if (i > 0) 112 | fprintf(stderr, ","); 113 | fprintf(stderr, "r%d", regno(r)); 114 | } 115 | } 116 | 117 | static void print_bb(BB *bb) { 118 | if (bb->param) 119 | fprintf(stderr, ".L%d(r%d)", bb->label, regno(bb->param)); 120 | else 121 | fprintf(stderr, ".L%d", bb->label); 122 | 123 | print_rel("pred", bb->pred); 124 | print_rel("succ", bb->succ); 125 | print_regs("defs", bb->def_regs); 126 | print_regs("in", bb->in_regs); 127 | print_regs("out", bb->out_regs); 128 | fprintf(stderr, "\n"); 129 | } 130 | 131 | void dump_ir(Vector *irv) { 132 | for (int i = 0; i < irv->len; i++) { 133 | Function *fn = irv->data[i]; 134 | fprintf(stderr, "%s:\n", fn->name); 135 | 136 | for (int i = 0; i < fn->bbs->len; i++) { 137 | BB *bb = fn->bbs->data[i]; 138 | print_bb(bb); 139 | 140 | for (int i = 0; i < bb->ir->len; i++) { 141 | IR *ir = bb->ir->data[i]; 142 | fprintf(stderr, "\t%s\n", tostr(ir)); 143 | } 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /liveness.c: -------------------------------------------------------------------------------- 1 | // Liveness analysis. 2 | 3 | #include "9cc.h" 4 | 5 | // Fill bb->succ and bb->pred. 6 | static void add_edges(BB *bb) { 7 | if (bb->succ->len > 0) 8 | return; 9 | assert(bb->ir->len); 10 | 11 | IR *ir = bb->ir->data[bb->ir->len - 1]; 12 | 13 | if (ir->bb1) { 14 | vec_push(bb->succ, ir->bb1); 15 | vec_push(ir->bb1->pred, bb); 16 | add_edges(ir->bb1); 17 | } 18 | 19 | if (ir->bb2) { 20 | vec_push(bb->succ, ir->bb2); 21 | vec_push(ir->bb2->pred, bb); 22 | add_edges(ir->bb2); 23 | } 24 | } 25 | 26 | // Initializes bb->def_regs. 27 | static void set_def_regs(BB *bb) { 28 | if (bb->param) 29 | vec_union1(bb->def_regs, bb->param); 30 | 31 | for (int i = 0; i < bb->ir->len; i++) { 32 | IR *ir = bb->ir->data[i]; 33 | if (ir->r0) 34 | vec_union1(bb->def_regs, ir->r0); 35 | } 36 | } 37 | 38 | // Back-propagate r in the call flow graph. 39 | static void propagate(BB *bb, Reg *r) { 40 | if (!r || vec_contains(bb->def_regs, r)) 41 | return; 42 | 43 | if (!vec_union1(bb->in_regs, r)) 44 | return; 45 | 46 | for (int i = 0; i < bb->pred->len; i++) { 47 | BB *pred = bb->pred->data[i]; 48 | if (vec_union1(pred->out_regs, r)) 49 | propagate(pred, r); 50 | } 51 | } 52 | 53 | // Initializes bb->in_regs and bb->out_regs. 54 | static void visit(BB *bb, IR *ir) { 55 | propagate(bb, ir->r1); 56 | propagate(bb, ir->r2); 57 | propagate(bb, ir->bbarg); 58 | 59 | if (ir->op == IR_CALL) 60 | for (int i = 0; i < ir->nargs; i++) 61 | propagate(bb, ir->args[i]); 62 | } 63 | 64 | void liveness(Program *prog) { 65 | for (int i = 0; i < prog->funcs->len; i++) { 66 | Function *fn = prog->funcs->data[i]; 67 | add_edges(fn->bbs->data[0]); 68 | 69 | for (int i = 0; i < fn->bbs->len; i++) { 70 | BB *bb = fn->bbs->data[i]; 71 | set_def_regs(bb); 72 | 73 | for (int i = 0; i < bb->ir->len; i++) { 74 | IR *ir = bb->ir->data[i]; 75 | visit(bb, ir); 76 | } 77 | } 78 | 79 | // Incoming registers of the entry BB correspond to 80 | // uninitialized variables in a program. 81 | // Add dummy definitions to make later analysis easy. 82 | BB *ent = fn->bbs->data[0]; 83 | for (int i = 0; i < ent->in_regs->len; i++) { 84 | Reg *r = ent->in_regs->data[i]; 85 | IR *ir = calloc(1, sizeof(IR)); 86 | ir->op = IR_MOV; 87 | ir->r0 = r; 88 | ir->imm = 0; 89 | vec_push(ent->ir, ir); 90 | vec_push(ent->def_regs, r); 91 | } 92 | ent->in_regs = new_vec(); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | void usage() { 4 | error("Usage: 9cc [-test] [-dump-ir1] [-dump-ir2] "); 5 | } 6 | 7 | int main(int argc, char **argv) { 8 | if (argc == 1) 9 | usage(); 10 | 11 | if (argc == 2 && !strcmp(argv[1], "-test")) { 12 | util_test(); 13 | return 0; 14 | } 15 | 16 | char *path; 17 | bool dump_ir1 = false; 18 | bool dump_ir2 = false; 19 | 20 | if (argc == 3 && !strcmp(argv[1], "-dump-ir1")) { 21 | dump_ir1 = true; 22 | path = argv[2]; 23 | } else if (argc == 3 && !strcmp(argv[1], "-dump-ir2")) { 24 | dump_ir2 = true; 25 | path = argv[2]; 26 | } else { 27 | if (argc != 2) 28 | usage(); 29 | path = argv[1]; 30 | } 31 | 32 | // Tokenize and parse. 33 | Vector *tokens = tokenize(path, true); 34 | Program *prog = parse(tokens); 35 | sema(prog); 36 | gen_ir(prog); 37 | 38 | if (dump_ir1) 39 | dump_ir(prog->funcs); 40 | 41 | optimize(prog); 42 | liveness(prog); 43 | alloc_regs(prog); 44 | 45 | if (dump_ir2) 46 | dump_ir(prog->funcs); 47 | 48 | gen_x86(prog); 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /opt.c: -------------------------------------------------------------------------------- 1 | // Optimization pass. In this pass, we promote all non-address-taken 2 | // integer variables to register values. As a result, we may have more 3 | // register values than the number of the physical registers, but 4 | // that's fine. Regalloc will spill them out to memory. 5 | 6 | #include "9cc.h" 7 | 8 | // Rewrite 9 | // 10 | // BPREL r1, 11 | // STORE r1, r2 12 | // LOAD r3, r1 13 | // 14 | // to 15 | // 16 | // NOP 17 | // r4 = r2 18 | // r3 = r4 19 | static void opt(IR *ir) { 20 | if (ir->op == IR_BPREL) { 21 | Var *var = ir->var; 22 | if (var->address_taken || var->ty->ty != INT) 23 | return; 24 | 25 | if (!var->promoted) 26 | var->promoted = new_reg(); 27 | 28 | ir->op = IR_NOP; 29 | ir->r0->promoted = var->promoted; 30 | return; 31 | } 32 | 33 | if (ir->op == IR_LOAD) { 34 | if (!ir->r2->promoted) 35 | return; 36 | ir->op = IR_MOV; 37 | ir->r2 = ir->r2->promoted; 38 | return; 39 | } 40 | 41 | if (ir->op == IR_STORE) { 42 | if (!ir->r1->promoted) 43 | return; 44 | ir->op = IR_MOV; 45 | ir->r0 = ir->r1->promoted; 46 | ir->r1 = NULL; 47 | return; 48 | } 49 | } 50 | 51 | void optimize(Program *prog) { 52 | for (int i = 0; i < prog->funcs->len; i++) { 53 | Function *fn = prog->funcs->data[i]; 54 | for (int i = 0; i < fn->bbs->len; i++) { 55 | BB *bb = fn->bbs->data[i]; 56 | for (int i = 0; i < bb->ir->len; i++) 57 | opt(bb->ir->data[i]); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /parse.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | // This is a recursive-descendent parser which constructs abstract 4 | // syntax tree from input tokens. 5 | // 6 | // Variable names are resolved at this stage. We create a Var object 7 | // when we see a variable definition and use it when we see a variable 8 | // reference. 9 | // 10 | // Types are added to variables and literals. For other nodes, Sema 11 | // will add type for them. 12 | // 13 | // Semantic checking is omitted from this parser to make the code in 14 | // this file closely resemble the C's BNF. Invalid expressions, such 15 | // as `1+2=3`, are accepted at this stage. Such errors are detected in 16 | // a later pass. 17 | 18 | int nlabel = 1; 19 | 20 | typedef struct Env { 21 | Map *vars; 22 | Map *typedefs; 23 | Map *tags; 24 | struct Env *prev; 25 | } Env; 26 | 27 | static Program *prog; 28 | static Vector *lvars; 29 | static Vector *breaks; 30 | static Vector *continues; 31 | static Vector *switches; 32 | 33 | static Vector *tokens; 34 | static int pos; 35 | struct Env *env; 36 | 37 | static Node null_stmt = {ND_NULL}; 38 | 39 | static Env *new_env(Env *prev) { 40 | Env *env = calloc(1, sizeof(Env)); 41 | env->vars = new_map(); 42 | env->typedefs = new_map(); 43 | env->tags = new_map(); 44 | env->prev = prev; 45 | return env; 46 | } 47 | 48 | static Var *find_var(char *name) { 49 | for (Env *e = env; e; e = e->prev) { 50 | Var *var = map_get(e->vars, name); 51 | if (var) 52 | return var; 53 | } 54 | return NULL; 55 | } 56 | 57 | static Type *find_typedef(char *name) { 58 | for (Env *e = env; e; e = e->prev) { 59 | Type *ty = map_get(e->typedefs, name); 60 | if (ty) 61 | return ty; 62 | } 63 | return NULL; 64 | } 65 | 66 | static Type *find_tag(char *name) { 67 | for (Env *e = env; e; e = e->prev) { 68 | Type *ty = map_get(e->tags, name); 69 | if (ty) 70 | return ty; 71 | } 72 | return NULL; 73 | } 74 | 75 | static Var *add_lvar(Type *ty, char *name) { 76 | Var *var = calloc(1, sizeof(Var)); 77 | var->ty = ty; 78 | var->is_local = true; 79 | var->name = name; 80 | map_put(env->vars, name, var); 81 | vec_push(lvars, var); 82 | return var; 83 | } 84 | 85 | static Var *add_gvar(Type *ty, char *name, char *data, bool is_extern) { 86 | Var *var = calloc(1, sizeof(Var)); 87 | var->ty = ty; 88 | var->is_local = false; 89 | var->name = name; 90 | var->data = data; 91 | map_put(env->vars, name, var); 92 | if (!is_extern) 93 | vec_push(prog->gvars, var); 94 | return var; 95 | } 96 | 97 | static Node *assign(); 98 | static Node *expr(); 99 | static Node *stmt(); 100 | 101 | static void expect(int ty) { 102 | Token *t = tokens->data[pos]; 103 | if (t->ty == ty) { 104 | pos++; 105 | return; 106 | } 107 | 108 | if (isprint(ty)) 109 | bad_token(t, format("%c expected", ty)); 110 | assert(ty == TK_WHILE); 111 | bad_token(t, "'while' expected"); 112 | } 113 | 114 | static bool consume(int ty) { 115 | Token *t = tokens->data[pos]; 116 | if (t->ty != ty) 117 | return false; 118 | pos++; 119 | return true; 120 | } 121 | 122 | static bool is_typename() { 123 | Token *t = tokens->data[pos]; 124 | if (t->ty == TK_IDENT) 125 | return find_typedef(t->name); 126 | return t->ty == TK_INT || t->ty == TK_CHAR || t->ty == TK_VOID || 127 | t->ty == TK_STRUCT || t->ty == TK_TYPEOF || t->ty == TK_BOOL; 128 | } 129 | 130 | static Node *declaration_type(); 131 | 132 | static void fix_struct_offsets(Type *ty) { 133 | Vector *types = ty->members->vals; 134 | 135 | int off = 0; 136 | for (int i = 0; i < types->len; i++) { 137 | Type *t2 = types->data[i]; 138 | off = roundup(off, t2->align); 139 | t2->offset = off; 140 | off += t2->size; 141 | 142 | if (ty->align < t2->align) 143 | ty->align = t2->align; 144 | } 145 | ty->size = roundup(off, ty->align); 146 | } 147 | 148 | static Type *decl_specifiers() { 149 | Token *t = tokens->data[pos++]; 150 | 151 | if (t->ty == TK_IDENT) { 152 | Type *ty = find_typedef(t->name); 153 | if (!ty) 154 | pos--; 155 | return ty; 156 | } 157 | 158 | if (t->ty == TK_VOID) 159 | return void_ty(); 160 | if (t->ty == TK_BOOL) 161 | return bool_ty(); 162 | if (t->ty == TK_CHAR) 163 | return char_ty(); 164 | if (t->ty == TK_INT) 165 | return int_ty(); 166 | 167 | if (t->ty == TK_TYPEOF) { 168 | expect('('); 169 | Node *node = expr(); 170 | expect(')'); 171 | return get_type(node); 172 | } 173 | 174 | if (t->ty == TK_STRUCT) { 175 | Token *t = tokens->data[pos]; 176 | Type *ty = NULL; 177 | char *tag = NULL; 178 | 179 | if (t->ty == TK_IDENT) { 180 | pos++; 181 | tag = t->name; 182 | ty = find_tag(tag); 183 | } 184 | 185 | if (!ty) { 186 | ty = calloc(1, sizeof(Type)); 187 | ty->ty = STRUCT; 188 | } 189 | 190 | if (consume('{')) { 191 | ty->members = new_map(); 192 | while (!consume('}')) { 193 | Node *node = declaration_type(); 194 | map_put(ty->members, node->name, node->ty); 195 | } 196 | fix_struct_offsets(ty); 197 | } 198 | 199 | if (!tag && !ty->members) 200 | bad_token(t, "bad struct definition"); 201 | if (tag) 202 | map_put(env->tags, tag, ty); 203 | return ty; 204 | } 205 | 206 | bad_token(t, "typename expected"); 207 | } 208 | 209 | static Node *new_node(int op, Token *t) { 210 | Node *node = calloc(1, sizeof(Node)); 211 | node->op = op; 212 | node->token = t; 213 | return node; 214 | } 215 | 216 | static Node *new_binop(int op, Token *t, Node *lhs, Node *rhs) { 217 | Node *node = new_node(op, t); 218 | node->lhs = lhs; 219 | node->rhs = rhs; 220 | return node; 221 | } 222 | 223 | static Node *new_expr(int op, Token *t, Node *expr) { 224 | Node *node = new_node(op, t); 225 | node->expr = expr; 226 | return node; 227 | } 228 | 229 | static Node *new_varref(Token *t, Var *var) { 230 | Node *node = new_node(ND_VARREF, t); 231 | node->ty = var->ty; 232 | node->var = var; 233 | return node; 234 | } 235 | 236 | static Node *new_deref(Token *t, Var *var) { 237 | return new_expr(ND_DEREF, t, new_varref(t, var)); 238 | } 239 | 240 | Node *new_int_node(int val, Token *t) { 241 | Node *node = new_node(ND_NUM, t); 242 | node->ty = int_ty(); 243 | node->val = val; 244 | return node; 245 | } 246 | 247 | static Node *compound_stmt(); 248 | 249 | static char *ident() { 250 | Token *t = tokens->data[pos++]; 251 | if (t->ty != TK_IDENT) 252 | bad_token(t, "identifier expected"); 253 | return t->name; 254 | } 255 | 256 | static Node *string_literal(Token *t) { 257 | Type *ty = ary_of(char_ty(), t->len); 258 | char *name = format(".L.str%d", nlabel++); 259 | 260 | Node *node = new_node(ND_VARREF, t); 261 | node->ty = ty; 262 | node->var = add_gvar(ty, name, t->str, false); 263 | return node; 264 | } 265 | 266 | static Node *local_variable(Token *t) { 267 | Var *var = find_var(t->name); 268 | if (!var) 269 | bad_token(t, "undefined variable"); 270 | Node *node = new_node(ND_VARREF, t); 271 | node->ty = var->ty; 272 | node->name = t->name; 273 | node->var = var; 274 | return node; 275 | } 276 | 277 | static Node *function_call(Token *t) { 278 | Var *var = find_var(t->name); 279 | 280 | Node *node = new_node(ND_CALL, t); 281 | node->name = t->name; 282 | node->args = new_vec(); 283 | 284 | if (var && var->ty->ty == FUNC) { 285 | node->ty = var->ty; 286 | } else { 287 | warn_token(t, "undefined function"); 288 | node->ty = func_ty(int_ty()); 289 | } 290 | 291 | while (!consume(')')) { 292 | if (node->args->len > 0) 293 | expect(','); 294 | vec_push(node->args, assign()); 295 | } 296 | return node; 297 | } 298 | 299 | static Node *stmt_expr() { 300 | Token *t = tokens->data[pos]; 301 | Vector *v = new_vec(); 302 | 303 | env = new_env(env); 304 | do { 305 | vec_push(v, stmt()); 306 | } while (!consume('}')); 307 | expect(')'); 308 | env = env->prev; 309 | 310 | Node *last = vec_pop(v); 311 | if (last->op != ND_EXPR_STMT) 312 | bad_token(last->token, "statement expression returning void"); 313 | 314 | Node *node = new_node(ND_STMT_EXPR, t); 315 | node->stmts = v; 316 | node->expr = last->expr; 317 | return node; 318 | } 319 | 320 | static Node *primary() { 321 | Token *t = tokens->data[pos++]; 322 | 323 | if (t->ty == '(') { 324 | if (consume('{')) 325 | return stmt_expr(); 326 | Node *node = expr(); 327 | expect(')'); 328 | return node; 329 | } 330 | 331 | if (t->ty == TK_NUM) 332 | return new_int_node(t->val, t); 333 | 334 | if (t->ty == TK_STR) 335 | return string_literal(t); 336 | 337 | if (t->ty == TK_IDENT) { 338 | if (consume('(')) 339 | return function_call(t); 340 | return local_variable(t); 341 | } 342 | 343 | bad_token(t, "primary expression expected"); 344 | } 345 | 346 | static Node *mul(); 347 | 348 | static Node *new_stmt_expr(Token *t, Vector *exprs) { 349 | Node *last = vec_pop(exprs); 350 | 351 | Vector *v = new_vec(); 352 | for (int i = 0; i < exprs->len; i++) 353 | vec_push(v, new_expr(ND_EXPR_STMT, t, exprs->data[i])); 354 | 355 | Node *node = new_node(ND_STMT_EXPR, t); 356 | node->stmts = v; 357 | node->expr = last; 358 | return node; 359 | } 360 | 361 | // `x++` where x is of type T is compiled as 362 | // `({ T *y = &x; T z = *y; *y = *y + 1; *z; })`. 363 | static Node *new_post_inc(Token *t, Node *e, int imm) { 364 | Vector *v = new_vec(); 365 | 366 | Var *var1 = add_lvar(ptr_to(e->ty), "tmp"); 367 | Var *var2 = add_lvar(e->ty, "tmp"); 368 | 369 | vec_push(v, new_binop('=', t, new_varref(t, var1), new_expr(ND_ADDR, t, e))); 370 | vec_push(v, new_binop('=', t, new_varref(t, var2), new_deref(t, var1))); 371 | vec_push(v, new_binop( 372 | '=', t, new_deref(t, var1), 373 | new_binop('+', t, new_deref(t, var1), new_int_node(imm, t)))); 374 | vec_push(v, new_varref(t, var2)); 375 | return new_stmt_expr(t, v); 376 | } 377 | 378 | static Node *postfix() { 379 | Node *lhs = primary(); 380 | 381 | for (;;) { 382 | Token *t = tokens->data[pos]; 383 | 384 | if (consume(TK_INC)) { 385 | lhs = new_post_inc(t, lhs, 1); 386 | continue; 387 | } 388 | 389 | if (consume(TK_DEC)) { 390 | lhs = new_post_inc(t, lhs, -1); 391 | continue; 392 | } 393 | 394 | if (consume('.')) { 395 | lhs = new_expr(ND_DOT, t, lhs); 396 | lhs->name = ident(); 397 | continue; 398 | } 399 | 400 | if (consume(TK_ARROW)) { 401 | lhs = new_expr(ND_DOT, t, new_expr(ND_DEREF, t, lhs)); 402 | lhs->name = ident(); 403 | continue; 404 | } 405 | 406 | if (consume('[')) { 407 | Node *node = new_binop('+', t, lhs, assign()); 408 | lhs = new_expr(ND_DEREF, t, node); 409 | expect(']'); 410 | continue; 411 | } 412 | return lhs; 413 | } 414 | } 415 | 416 | static Node *new_assign_eq(int op, Node *lhs, Node *rhs); 417 | 418 | static Node *unary() { 419 | Token *t = tokens->data[pos]; 420 | 421 | if (consume('-')) 422 | return new_binop('-', t, new_int_node(0, t), unary()); 423 | if (consume('*')) 424 | return new_expr(ND_DEREF, t, unary()); 425 | if (consume('&')) 426 | return new_expr(ND_ADDR, t, unary()); 427 | if (consume('!')) 428 | return new_expr('!', t, unary()); 429 | if (consume('~')) 430 | return new_expr('~', t, unary()); 431 | if (consume(TK_SIZEOF)) 432 | return new_int_node(get_type(unary())->size, t); 433 | if (consume(TK_ALIGNOF)) 434 | return new_int_node(get_type(unary())->align, t); 435 | if (consume(TK_INC)) 436 | return new_assign_eq('+', unary(), new_int_node(1, t)); 437 | if (consume(TK_DEC)) 438 | return new_assign_eq('-', unary(), new_int_node(1, t)); 439 | return postfix(); 440 | } 441 | 442 | static Node *mul() { 443 | Node *lhs = unary(); 444 | for (;;) { 445 | Token *t = tokens->data[pos]; 446 | if (consume('*')) 447 | lhs = new_binop('*', t, lhs, unary()); 448 | else if (consume('/')) 449 | lhs = new_binop('/', t, lhs, unary()); 450 | else if (consume('%')) 451 | lhs = new_binop('%', t, lhs, unary()); 452 | else 453 | return lhs; 454 | } 455 | } 456 | 457 | static Node *add() { 458 | Node *lhs = mul(); 459 | for (;;) { 460 | Token *t = tokens->data[pos]; 461 | if (consume('+')) 462 | lhs = new_binop('+', t, lhs, mul()); 463 | else if (consume('-')) 464 | lhs = new_binop('-', t, lhs, mul()); 465 | else 466 | return lhs; 467 | } 468 | } 469 | 470 | static Node *shift() { 471 | Node *lhs = add(); 472 | for (;;) { 473 | Token *t = tokens->data[pos]; 474 | if (consume(TK_SHL)) 475 | lhs = new_binop(ND_SHL, t, lhs, add()); 476 | else if (consume(TK_SHR)) 477 | lhs = new_binop(ND_SHR, t, lhs, add()); 478 | else 479 | return lhs; 480 | } 481 | } 482 | 483 | static Node *relational() { 484 | Node *lhs = shift(); 485 | for (;;) { 486 | Token *t = tokens->data[pos]; 487 | if (consume('<')) 488 | lhs = new_binop('<', t, lhs, shift()); 489 | else if (consume('>')) 490 | lhs = new_binop('<', t, shift(), lhs); 491 | else if (consume(TK_LE)) 492 | lhs = new_binop(ND_LE, t, lhs, shift()); 493 | else if (consume(TK_GE)) 494 | lhs = new_binop(ND_LE, t, shift(), lhs); 495 | else 496 | return lhs; 497 | } 498 | } 499 | 500 | static Node *equality() { 501 | Node *lhs = relational(); 502 | for (;;) { 503 | Token *t = tokens->data[pos]; 504 | if (consume(TK_EQ)) 505 | lhs = new_binop(ND_EQ, t, lhs, relational()); 506 | else if (consume(TK_NE)) 507 | lhs = new_binop(ND_NE, t, lhs, relational()); 508 | else 509 | return lhs; 510 | } 511 | } 512 | 513 | static Node *bit_and() { 514 | Node *lhs = equality(); 515 | while (consume('&')) { 516 | Token *t = tokens->data[pos]; 517 | lhs = new_binop('&', t, lhs, equality()); 518 | } 519 | return lhs; 520 | } 521 | 522 | static Node *bit_xor() { 523 | Node *lhs = bit_and(); 524 | while (consume('^')) { 525 | Token *t = tokens->data[pos]; 526 | lhs = new_binop('^', t, lhs, bit_and()); 527 | } 528 | return lhs; 529 | } 530 | 531 | static Node *bit_or() { 532 | Node *lhs = bit_xor(); 533 | while (consume('|')) { 534 | Token *t = tokens->data[pos]; 535 | lhs = new_binop('|', t, lhs, bit_xor()); 536 | } 537 | return lhs; 538 | } 539 | 540 | static Node *logand() { 541 | Node *lhs = bit_or(); 542 | while (consume(TK_LOGAND)) { 543 | Token *t = tokens->data[pos]; 544 | lhs = new_binop(ND_LOGAND, t, lhs, bit_or()); 545 | } 546 | return lhs; 547 | } 548 | 549 | static Node *logor() { 550 | Node *lhs = logand(); 551 | while (consume(TK_LOGOR)) { 552 | Token *t = tokens->data[pos]; 553 | lhs = new_binop(ND_LOGOR, t, lhs, logand()); 554 | } 555 | return lhs; 556 | } 557 | 558 | static Node *conditional() { 559 | Node *cond = logor(); 560 | Token *t = tokens->data[pos]; 561 | if (!consume('?')) 562 | return cond; 563 | 564 | Node *node = new_node('?', t); 565 | node->cond = cond; 566 | node->then = expr(); 567 | expect(':'); 568 | node->els = conditional(); 569 | return node; 570 | } 571 | 572 | // `x op= y` where x is of type T is compiled as 573 | // `({ T *z = &x; *z = *z op y; })`. 574 | static Node *new_assign_eq(int op, Node *lhs, Node *rhs) { 575 | Vector *v = new_vec(); 576 | Token *t = lhs->token; 577 | 578 | // T *z = &x 579 | Var *var = add_lvar(ptr_to(lhs->ty), "tmp"); 580 | vec_push(v, new_binop('=', t, new_varref(t, var), new_expr(ND_ADDR, t, lhs))); 581 | 582 | // *z = *z op y 583 | vec_push(v, new_binop('=', t, new_deref(t, var), 584 | new_binop(op, t, new_deref(t, var), rhs))); 585 | return new_stmt_expr(t, v); 586 | } 587 | 588 | static Node *assign() { 589 | Node *lhs = conditional(); 590 | Token *t = tokens->data[pos]; 591 | 592 | if (consume('=')) 593 | return new_binop('=', t, lhs, assign()); 594 | if (consume(TK_MUL_EQ)) 595 | return new_assign_eq('*', lhs, assign()); 596 | if (consume(TK_DIV_EQ)) 597 | return new_assign_eq('/', lhs, assign()); 598 | if (consume(TK_MOD_EQ)) 599 | return new_assign_eq('%', lhs, assign()); 600 | if (consume(TK_ADD_EQ)) 601 | return new_assign_eq('+', lhs, assign()); 602 | if (consume(TK_SUB_EQ)) 603 | return new_assign_eq('-', lhs, assign()); 604 | if (consume(TK_SHL_EQ)) 605 | return new_assign_eq(ND_SHL, lhs, assign()); 606 | if (consume(TK_SHR_EQ)) 607 | return new_assign_eq(ND_SHR, lhs, assign()); 608 | if (consume(TK_AND_EQ)) 609 | return new_assign_eq(ND_LOGAND, lhs, assign()); 610 | if (consume(TK_XOR_EQ)) 611 | return new_assign_eq('^', lhs, assign()); 612 | if (consume(TK_OR_EQ)) 613 | return new_assign_eq('|', lhs, assign()); 614 | return lhs; 615 | } 616 | 617 | static Node *expr() { 618 | Node *lhs = assign(); 619 | Token *t = tokens->data[pos]; 620 | if (!consume(',')) 621 | return lhs; 622 | return new_binop(',', t, lhs, expr()); 623 | } 624 | 625 | static int const_expr() { 626 | Token *t = tokens->data[pos]; 627 | Node *node = expr(); 628 | if (node->op != ND_NUM) 629 | bad_token(t, "constant expression expected"); 630 | return node->val; 631 | } 632 | 633 | static Type *read_array(Type *ty) { 634 | Vector *v = new_vec(); 635 | 636 | while (consume('[')) { 637 | if (consume(']')) { 638 | vec_pushi(v, -1); 639 | continue; 640 | } 641 | vec_pushi(v, const_expr()); 642 | expect(']'); 643 | } 644 | 645 | for (int i = v->len - 1; i >= 0; i--) { 646 | int len = (intptr_t)v->data[i]; 647 | ty = ary_of(ty, len); 648 | } 649 | return ty; 650 | } 651 | 652 | static Node *declarator(Type *ty); 653 | 654 | static Node *direct_decl(Type *ty) { 655 | Token *t = tokens->data[pos]; 656 | Node *node; 657 | Type *placeholder = calloc(1, sizeof(Type)); 658 | 659 | if (t->ty == TK_IDENT) { 660 | node = new_node(ND_VARDEF, t); 661 | node->ty = placeholder; 662 | node->name = ident(); 663 | } else if (consume('(')) { 664 | node = declarator(placeholder); 665 | expect(')'); 666 | } else { 667 | bad_token(t, "bad direct-declarator"); 668 | } 669 | 670 | // Read the second half of type name (e.g. `[3][5]`). 671 | *placeholder = *read_array(ty); 672 | 673 | // Read an initializer. 674 | if (consume('=')) 675 | node->init = assign(); 676 | return node; 677 | } 678 | 679 | static Node *declarator(Type *ty) { 680 | while (consume('*')) 681 | ty = ptr_to(ty); 682 | return direct_decl(ty); 683 | } 684 | 685 | static Node *declaration_type() { 686 | Type *ty = decl_specifiers(); 687 | Node *node = declarator(ty); 688 | expect(';'); 689 | return node; 690 | } 691 | 692 | static Node *declaration() { 693 | Type *ty = decl_specifiers(); 694 | Node *node = declarator(ty); 695 | expect(';'); 696 | Var *var = add_lvar(node->ty, node->name); 697 | 698 | if (!node->init) 699 | return &null_stmt; 700 | 701 | // Convert `T var = init` to `T var; var = init`. 702 | Token *t = node->token; 703 | Node *lhs = new_varref(t, var); 704 | Node *rhs = node->init; 705 | node->init = NULL; 706 | 707 | Node *expr = new_binop('=', t, lhs, rhs); 708 | return new_expr(ND_EXPR_STMT, t, expr); 709 | } 710 | 711 | static Var *param_declaration() { 712 | Type *ty = decl_specifiers(); 713 | Node *node = declarator(ty); 714 | ty = node->ty; 715 | if (ty->ty == ARY) 716 | ty = ptr_to(ty->ary_of); 717 | return add_lvar(ty, node->name); 718 | } 719 | 720 | static Node *expr_stmt() { 721 | Token *t = tokens->data[pos]; 722 | Node *node = new_expr(ND_EXPR_STMT, t, expr()); 723 | expect(';'); 724 | return node; 725 | } 726 | 727 | static Node *stmt() { 728 | Token *t = tokens->data[pos++]; 729 | 730 | switch (t->ty) { 731 | case TK_TYPEDEF: { 732 | Node *node = declaration_type(); 733 | assert(node->name); 734 | map_put(env->typedefs, node->name, node->ty); 735 | return &null_stmt; 736 | } 737 | case TK_IF: { 738 | Node *node = new_node(ND_IF, t); 739 | expect('('); 740 | node->cond = expr(); 741 | expect(')'); 742 | 743 | node->then = stmt(); 744 | 745 | if (consume(TK_ELSE)) 746 | node->els = stmt(); 747 | return node; 748 | } 749 | case TK_FOR: { 750 | Node *node = new_node(ND_FOR, t); 751 | expect('('); 752 | env = new_env(env); 753 | vec_push(breaks, node); 754 | vec_push(continues, node); 755 | 756 | if (is_typename()) 757 | node->init = declaration(); 758 | else if (!consume(';')) 759 | node->init = expr_stmt(); 760 | 761 | if (!consume(';')) { 762 | node->cond = expr(); 763 | expect(';'); 764 | } 765 | 766 | if (!consume(')')) { 767 | node->inc = expr(); 768 | expect(')'); 769 | } 770 | 771 | node->body = stmt(); 772 | 773 | vec_pop(breaks); 774 | vec_pop(continues); 775 | env = env->prev; 776 | return node; 777 | } 778 | case TK_WHILE: { 779 | Node *node = new_node(ND_FOR, t); 780 | vec_push(breaks, node); 781 | vec_push(continues, node); 782 | 783 | expect('('); 784 | node->cond = expr(); 785 | expect(')'); 786 | node->body = stmt(); 787 | 788 | vec_pop(breaks); 789 | vec_pop(continues); 790 | return node; 791 | } 792 | case TK_DO: { 793 | Node *node = new_node(ND_DO_WHILE, t); 794 | vec_push(breaks, node); 795 | vec_push(continues, node); 796 | 797 | node->body = stmt(); 798 | expect(TK_WHILE); 799 | expect('('); 800 | node->cond = expr(); 801 | expect(')'); 802 | expect(';'); 803 | 804 | vec_pop(breaks); 805 | vec_pop(continues); 806 | return node; 807 | } 808 | case TK_SWITCH: { 809 | Node *node = new_node(ND_SWITCH, t); 810 | node->cases = new_vec(); 811 | 812 | expect('('); 813 | node->cond = expr(); 814 | expect(')'); 815 | 816 | vec_push(breaks, node); 817 | vec_push(switches, node); 818 | node->body = stmt(); 819 | vec_pop(breaks); 820 | vec_pop(switches); 821 | return node; 822 | } 823 | case TK_CASE: { 824 | if (switches->len == 0) 825 | bad_token(t, "stray case"); 826 | Node *node = new_node(ND_CASE, t); 827 | node->val = const_expr(); 828 | expect(':'); 829 | node->body = stmt(); 830 | 831 | Node *n = vec_last(switches); 832 | vec_push(n->cases, node); 833 | return node; 834 | } 835 | case TK_BREAK: { 836 | if (breaks->len == 0) 837 | bad_token(t, "stray break"); 838 | Node *node = new_node(ND_BREAK, t); 839 | node->target = vec_last(breaks); 840 | return node; 841 | } 842 | case TK_CONTINUE: { 843 | if (continues->len == 0) 844 | bad_token(t, "stray continue"); 845 | Node *node = new_node(ND_CONTINUE, t); 846 | node->target = vec_last(breaks); 847 | return node; 848 | } 849 | case TK_RETURN: { 850 | Node *node = new_node(ND_RETURN, t); 851 | node->expr = expr(); 852 | expect(';'); 853 | return node; 854 | } 855 | case '{': 856 | return compound_stmt(); 857 | case ';': 858 | return &null_stmt; 859 | default: 860 | pos--; 861 | if (is_typename()) 862 | return declaration(); 863 | return expr_stmt(); 864 | } 865 | } 866 | 867 | static Node *compound_stmt() { 868 | Token *t = tokens->data[pos]; 869 | Node *node = new_node(ND_COMP_STMT, t); 870 | node->stmts = new_vec(); 871 | 872 | env = new_env(env); 873 | while (!consume('}')) 874 | vec_push(node->stmts, stmt()); 875 | env = env->prev; 876 | return node; 877 | } 878 | 879 | static void toplevel() { 880 | bool is_typedef = consume(TK_TYPEDEF); 881 | bool is_extern = consume(TK_EXTERN); 882 | 883 | Type *ty = decl_specifiers(); 884 | while (consume('*')) 885 | ty = ptr_to(ty); 886 | 887 | char *name = ident(); 888 | 889 | // Function 890 | if (consume('(')) { 891 | Vector *params = new_vec(); 892 | while (!consume(')')) { 893 | if (params->len > 0) 894 | expect(','); 895 | vec_push(params, param_declaration()); 896 | } 897 | 898 | Token *t = tokens->data[pos]; 899 | Node *node = new_node(ND_DECL, t); 900 | 901 | lvars = new_vec(); 902 | breaks = new_vec(); 903 | continues = new_vec(); 904 | switches = new_vec(); 905 | 906 | node->name = name; 907 | node->params = params; 908 | 909 | node->ty = calloc(1, sizeof(Type)); 910 | node->ty->ty = FUNC; 911 | node->ty->returning = ty; 912 | 913 | add_lvar(node->ty, name); 914 | 915 | if (consume(';')) 916 | return; 917 | 918 | node->op = ND_FUNC; 919 | t = tokens->data[pos]; 920 | expect('{'); 921 | if (is_typedef) 922 | bad_token(t, "typedef has function definition"); 923 | node->body = compound_stmt(); 924 | 925 | Function *fn = calloc(1, sizeof(Function)); 926 | fn->name = name; 927 | fn->node = node; 928 | fn->lvars = lvars; 929 | fn->bbs = new_vec(); 930 | vec_push(prog->funcs, fn); 931 | return; 932 | } 933 | 934 | ty = read_array(ty); 935 | expect(';'); 936 | 937 | if (is_typedef) { 938 | map_put(env->typedefs, name, ty); 939 | return; 940 | } 941 | 942 | // Global variable 943 | add_gvar(ty, name, NULL, is_extern); 944 | } 945 | 946 | static bool is_eof() { 947 | Token *t = tokens->data[pos]; 948 | return t->ty == TK_EOF; 949 | } 950 | 951 | Program *parse(Vector *tokens_) { 952 | tokens = tokens_; 953 | pos = 0; 954 | env = new_env(NULL); 955 | 956 | prog = calloc(1, sizeof(Program)); 957 | prog->gvars = new_vec(); 958 | prog->funcs = new_vec(); 959 | 960 | while (!is_eof()) 961 | toplevel(); 962 | return prog; 963 | } 964 | -------------------------------------------------------------------------------- /preprocess.c: -------------------------------------------------------------------------------- 1 | // C preprocessor 2 | 3 | #include "9cc.h" 4 | 5 | static Map *macros; 6 | 7 | typedef struct Env { 8 | Vector *input; 9 | Vector *output; 10 | int pos; 11 | struct Env *prev; 12 | } Env; 13 | 14 | static Env *env; 15 | 16 | static Env *new_env(Env *prev, Vector *input) { 17 | Env *env = calloc(1, sizeof(Env)); 18 | env->input = input; 19 | env->output = new_vec(); 20 | env->prev = prev; 21 | return env; 22 | } 23 | 24 | enum { 25 | OBJLIKE, 26 | FUNCLIKE, 27 | }; 28 | 29 | typedef struct Macro { 30 | int ty; 31 | Vector *tokens; 32 | Vector *params; 33 | } Macro; 34 | 35 | static Macro *new_macro(int ty, char *name) { 36 | Macro *m = calloc(1, sizeof(Macro)); 37 | m->ty = ty; 38 | m->tokens = new_vec(); 39 | m->params = new_vec(); 40 | map_put(macros, name, m); 41 | return m; 42 | } 43 | 44 | static void append(Vector *v) { 45 | for (int i = 0; i < v->len; i++) 46 | vec_push(env->output, v->data[i]); 47 | } 48 | 49 | static void emit(Token *t) { 50 | vec_push(env->output, t); 51 | } 52 | 53 | static Token *next() { 54 | assert(env->pos < env->input->len); 55 | return env->input->data[env->pos++]; 56 | } 57 | 58 | static bool is_eof() { 59 | return env->pos == env->input->len; 60 | } 61 | 62 | static Token *get(int ty, char *msg) { 63 | Token *t = next(); 64 | if (t->ty != ty) 65 | bad_token(t, msg); 66 | return t; 67 | } 68 | 69 | static char *ident(char *msg) { 70 | Token *t = get(TK_IDENT, "parameter name expected"); 71 | return t->name; 72 | } 73 | 74 | static Token *peek() { 75 | return env->input->data[env->pos]; 76 | } 77 | 78 | static bool consume(int ty) { 79 | if (peek()->ty != ty) 80 | return false; 81 | env->pos++; 82 | return true; 83 | } 84 | 85 | static Vector *read_until_eol() { 86 | Vector *v = new_vec(); 87 | while (!is_eof()) { 88 | Token *t = next(); 89 | if (t->ty == '\n') 90 | break; 91 | vec_push(v, t); 92 | } 93 | return v; 94 | } 95 | 96 | static Token *new_int(Token *tmpl, int val) { 97 | Token *t = calloc(1, sizeof(Token)); 98 | *t = *tmpl; 99 | t->ty = TK_NUM; 100 | t->val = val; 101 | return t; 102 | } 103 | 104 | static Token *new_string(Token *tmpl, char *str, int len) { 105 | Token *t = calloc(1, sizeof(Token)); 106 | *t = *tmpl; 107 | t->ty = TK_STR; 108 | t->str = str; 109 | t->len = len; 110 | return t; 111 | } 112 | 113 | static Token *new_param(Token *tmpl, int val) { 114 | Token *t = calloc(1, sizeof(Token)); 115 | *t = *tmpl; 116 | t->ty = TK_PARAM; 117 | t->val = val; 118 | return t; 119 | } 120 | 121 | static bool is_ident(Token *t, char *s) { 122 | return t->ty == TK_IDENT && !strcmp(t->name, s); 123 | } 124 | 125 | // Replaces macro parameter tokens with TK_PARAM tokens. 126 | static void replace_macro_params(Macro *m) { 127 | Vector *params = m->params; 128 | Vector *tokens = m->tokens; 129 | 130 | Map *map = new_map(); 131 | for (int i = 0; i < params->len; i++) { 132 | char *name = params->data[i]; 133 | map_puti(map, name, i); 134 | } 135 | 136 | for (int i = 0; i < tokens->len; i++) { 137 | Token *t = tokens->data[i]; 138 | if (t->ty != TK_IDENT) 139 | continue; 140 | int n = map_geti(map, t->name, -1); 141 | if (n == -1) 142 | continue; 143 | tokens->data[i] = new_param(t, n); 144 | } 145 | } 146 | 147 | // Replaces '#' followed by a macro parameter with one token. 148 | static void replace_hash_ident(Macro *m) { 149 | Vector *tokens = m->tokens; 150 | Vector *v = new_vec(); 151 | 152 | int i = 0; 153 | for (; i < tokens->len - 1; i++) { 154 | Token *t1 = tokens->data[i]; 155 | Token *t2 = tokens->data[i + 1]; 156 | 157 | if (t1->ty == '#' && t2->ty == TK_PARAM) { 158 | t2->stringize = true; 159 | vec_push(v, t2); 160 | i++; 161 | } else { 162 | vec_push(v, t1); 163 | } 164 | } 165 | 166 | if (i == tokens->len - 1) 167 | vec_push(v, tokens->data[i]); 168 | m->tokens = v; 169 | } 170 | 171 | static Vector *read_one_arg() { 172 | Vector *v = new_vec(); 173 | Token *start = peek(); 174 | int level = 0; 175 | 176 | while (!is_eof()) { 177 | Token *t = peek(); 178 | if (level == 0) 179 | if (t->ty == ')' || t->ty == ',') 180 | return v; 181 | 182 | next(); 183 | if (t->ty == '(') 184 | level++; 185 | else if (t->ty == ')') 186 | level--; 187 | vec_push(v, t); 188 | } 189 | bad_token(start, "unclosed macro argument"); 190 | } 191 | 192 | static Vector *read_args() { 193 | Vector *v = new_vec(); 194 | if (consume(')')) 195 | return v; 196 | vec_push(v, read_one_arg()); 197 | while (!consume(')')) { 198 | get(',', "comma expected"); 199 | vec_push(v, read_one_arg()); 200 | } 201 | return v; 202 | } 203 | 204 | static bool emit_special_macro(Token *t) { 205 | if (is_ident(t, "__LINE__")) { 206 | emit(new_int(t, get_line_number(t))); 207 | return true; 208 | } 209 | return false; 210 | } 211 | 212 | static void apply_objlike(Macro *m, Token *start) { 213 | for (int i = 0; i < m->tokens->len; i++) { 214 | Token *t = m->tokens->data[i]; 215 | if (emit_special_macro(t)) 216 | continue; 217 | emit(t); 218 | } 219 | } 220 | 221 | static void apply_funclike(Macro *m, Token *start) { 222 | get('(', "comma expected"); 223 | 224 | Vector *args = read_args(); 225 | if (m->params->len != args->len) 226 | bad_token(start, "number of parameter does not match"); 227 | 228 | for (int i = 0; i < m->tokens->len; i++) { 229 | Token *t = m->tokens->data[i]; 230 | if (emit_special_macro(t)) 231 | continue; 232 | 233 | if (t->ty == TK_PARAM) { 234 | if (t->stringize) { 235 | char *s = stringize(args->data[t->val]); 236 | emit(new_string(t, s, strlen(s) + 1)); 237 | } else { 238 | append(args->data[t->val]); 239 | } 240 | continue; 241 | } 242 | emit(t); 243 | } 244 | } 245 | 246 | static void apply(Macro *m, Token *start) { 247 | if (m->ty == OBJLIKE) 248 | apply_objlike(m, start); 249 | else 250 | apply_funclike(m, start); 251 | } 252 | 253 | static void define_funclike(char *name) { 254 | Macro *m = new_macro(FUNCLIKE, name); 255 | while (!consume(')')) { 256 | if (m->params->len > 0) 257 | get(',', ", expected"); 258 | vec_push(m->params, ident("parameter name expected")); 259 | } 260 | 261 | m->tokens = read_until_eol(); 262 | replace_macro_params(m); 263 | replace_hash_ident(m); 264 | } 265 | 266 | static void define_objlike(char *name) { 267 | Macro *m = new_macro(OBJLIKE, name); 268 | m->tokens = read_until_eol(); 269 | } 270 | 271 | static void define() { 272 | char *name = ident("macro name expected"); 273 | if (consume('(')) 274 | return define_funclike(name); 275 | return define_objlike(name); 276 | } 277 | 278 | static void include() { 279 | Token *t = get(TK_STR, "string expected"); 280 | char *path = t->str; 281 | get('\n', "newline expected"); 282 | append(tokenize(path, false)); 283 | } 284 | 285 | Vector *preprocess(Vector *tokens) { 286 | if (!macros) 287 | macros = new_map(); 288 | env = new_env(env, tokens); 289 | 290 | while (!is_eof()) { 291 | Token *t = next(); 292 | 293 | if (t->ty == TK_IDENT) { 294 | Macro *m = map_get(macros, t->name); 295 | if (m) 296 | apply(m, t); 297 | else 298 | emit(t); 299 | continue; 300 | } 301 | 302 | if (t->ty != '#') { 303 | emit(t); 304 | continue; 305 | } 306 | 307 | t = get(TK_IDENT, "identifier expected"); 308 | 309 | if (!strcmp(t->name, "define")) 310 | define(); 311 | else if (!strcmp(t->name, "include")) 312 | include(); 313 | else 314 | bad_token(t, "unknown directive"); 315 | } 316 | 317 | Vector *v = env->output; 318 | env = env->prev; 319 | return v; 320 | } 321 | -------------------------------------------------------------------------------- /regalloc.c: -------------------------------------------------------------------------------- 1 | // Linear scan register allocator. 2 | // 3 | // Before this pass, it is assumed that we have infinite number of 4 | // registers. This pass maps them to finite number of registers. 5 | // Here is the algorithm: 6 | // 7 | // First, we find the definition and the last use for each register. 8 | // A register is considered "live" in the range. At the definition of 9 | // some register R, if all physical registers are already allocated, 10 | // one of them (including R itself) needs to be spilled to the stack. 11 | // As long as one register is spilled, the algorithm is logically 12 | // correct. As a heuristic, we spill a register whose last use is 13 | // furthest. 14 | // 15 | // We then insert load and store instructions for spilled registesr. 16 | // The last register (num_regs-1'th register) is reserved for that 17 | // purpose. 18 | 19 | #include "9cc.h" 20 | 21 | // Rewrite `A = B op C` to `A = B; A = A op C`. 22 | static void three_to_two(BB *bb) { 23 | Vector *v = new_vec(); 24 | 25 | for (int i = 0; i < bb->ir->len; i++) { 26 | IR *ir = bb->ir->data[i]; 27 | 28 | if (!ir->r0 || !ir->r1) { 29 | vec_push(v, ir); 30 | continue; 31 | } 32 | 33 | assert(ir->r0 != ir->r1); 34 | 35 | IR *ir2 = calloc(1, sizeof(IR)); 36 | ir2->op = IR_MOV; 37 | ir2->r0 = ir->r0; 38 | ir2->r2 = ir->r1; 39 | vec_push(v, ir2); 40 | 41 | ir->r1 = ir->r0; 42 | vec_push(v, ir); 43 | } 44 | bb->ir = v; 45 | } 46 | 47 | static void set_last_use(Reg *r, int ic) { 48 | if (r && r->last_use < ic) 49 | r->last_use = ic; 50 | } 51 | 52 | static Vector *collect_regs(Function *fn) { 53 | Vector *v = new_vec(); 54 | int ic = 1; // instruction counter 55 | 56 | for (int i = 0; i < fn->bbs->len; i++) { 57 | BB *bb = fn->bbs->data[i]; 58 | 59 | if (bb->param) { 60 | bb->param->def = ic; 61 | vec_push(v, bb->param); 62 | } 63 | 64 | for (int i = 0; i < bb->ir->len; i++, ic++) { 65 | IR *ir = bb->ir->data[i]; 66 | 67 | if (ir->r0 && !ir->r0->def) { 68 | ir->r0->def = ic; 69 | vec_push(v, ir->r0); 70 | } 71 | 72 | set_last_use(ir->r1, ic); 73 | set_last_use(ir->r2, ic); 74 | set_last_use(ir->bbarg, ic); 75 | 76 | if (ir->op == IR_CALL) 77 | for (int i = 0; i < ir->nargs; i++) 78 | set_last_use(ir->args[i], ic); 79 | } 80 | 81 | for (int i = 0; i < bb->out_regs->len; i++) { 82 | Reg *r = bb->out_regs->data[i]; 83 | set_last_use(r, ic); 84 | } 85 | } 86 | 87 | return v; 88 | } 89 | 90 | static int choose_to_spill(Reg **used) { 91 | int k = 0; 92 | for (int i = 1; i < num_regs; i++) 93 | if (used[k]->last_use < used[i]->last_use) 94 | k = i; 95 | return k; 96 | } 97 | 98 | // Allocate registers. 99 | static void scan(Vector *regs) { 100 | Reg **used = calloc(num_regs, sizeof(Reg *)); 101 | 102 | for (int i = 0; i < regs->len; i++) { 103 | Reg *r = regs->data[i]; 104 | 105 | // Find an unused slot. 106 | bool found = false; 107 | for (int i = 0; i < num_regs - 1; i++) { 108 | if (used[i] && r->def < used[i]->last_use) 109 | continue; 110 | r->rn = i; 111 | used[i] = r; 112 | found = true; 113 | break; 114 | } 115 | 116 | if (found) 117 | continue; 118 | 119 | // Choose a register to spill and mark it as "spilled". 120 | used[num_regs - 1] = r; 121 | int k = choose_to_spill(used); 122 | 123 | r->rn = k; 124 | used[k]->rn = num_regs - 1; 125 | used[k]->spill = true; 126 | used[k] = r; 127 | } 128 | } 129 | 130 | static void spill_store(Vector *v, IR *ir) { 131 | Reg *r = ir->r0; 132 | if (!r || !r->spill) 133 | return; 134 | 135 | IR *ir2 = calloc(1, sizeof(IR)); 136 | ir2->op = IR_STORE_SPILL; 137 | ir2->r1 = r; 138 | ir2->var = r->var; 139 | vec_push(v, ir2); 140 | } 141 | 142 | static void spill_load(Vector *v, IR *ir, Reg *r) { 143 | if (!r || !r->spill) 144 | return; 145 | 146 | IR *ir2 = calloc(1, sizeof(IR)); 147 | ir2->op = IR_LOAD_SPILL; 148 | ir2->r0 = r; 149 | ir2->var = r->var; 150 | vec_push(v, ir2); 151 | } 152 | 153 | static void emit_spill_code(BB *bb) { 154 | Vector *v = new_vec(); 155 | 156 | for (int i = 0; i < bb->ir->len; i++) { 157 | IR *ir = bb->ir->data[i]; 158 | 159 | spill_load(v, ir, ir->r1); 160 | spill_load(v, ir, ir->r2); 161 | spill_load(v, ir, ir->bbarg); 162 | vec_push(v, ir); 163 | spill_store(v, ir); 164 | } 165 | bb->ir = v; 166 | } 167 | 168 | void alloc_regs(Program *prog) { 169 | for (int i = 0; i < prog->funcs->len; i++) { 170 | Function *fn = prog->funcs->data[i]; 171 | 172 | // Convert SSA to x86-ish two-address form. 173 | for (int i = 0; i < fn->bbs->len; i++) { 174 | BB *bb = fn->bbs->data[i]; 175 | three_to_two(bb); 176 | } 177 | 178 | // Allocate registers and decide which registers to spill. 179 | Vector *regs = collect_regs(fn); 180 | scan(regs); 181 | 182 | // Reserve a stack area for spilled registers. 183 | for (int i = 0; i < regs->len; i++) { 184 | Reg *r = regs->data[i]; 185 | if (!r->spill) 186 | continue; 187 | 188 | Var *var = calloc(1, sizeof(Var)); 189 | var->ty = ptr_to(int_ty()); 190 | var->is_local = true; 191 | var->name = "spill"; 192 | 193 | r->var = var; 194 | vec_push(fn->lvars, var); 195 | } 196 | 197 | // Convert accesses to spilled registers to loads and stores. 198 | for (int i = 0; i < fn->bbs->len; i++) { 199 | BB *bb = fn->bbs->data[i]; 200 | emit_spill_code(bb); 201 | } 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /sema.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | // Semantics analyzer. This pass plays a few important roles as shown 4 | // below: 5 | // 6 | // - Add types to nodes. For example, a tree that represents "1+2" is 7 | // typed as INT because the result type of an addition of two 8 | // integers is integer. 9 | // 10 | // - Insert nodes to make array-to-pointer conversion explicit. 11 | // Recall that, in C, "array of T" is automatically converted to 12 | // "pointer to T" in most contexts. 13 | // 14 | // - Insert nodes for implicit cast so that they are explicitly 15 | // represented in AST. 16 | // 17 | // - Scales operands for pointer arithmetic. E.g. ptr+1 becomes ptr+4 18 | // for integer and becomes ptr+8 for pointer. 19 | // 20 | // - Reject bad assignments, such as `1=2+3`. 21 | 22 | static Node *maybe_decay(Node *base, bool decay) { 23 | if (!decay || base->ty->ty != ARY) 24 | return base; 25 | 26 | Node *node = calloc(1, sizeof(Node)); 27 | node->op = ND_ADDR; 28 | node->ty = ptr_to(base->ty->ary_of); 29 | node->expr = base; 30 | node->token = base->token; 31 | return node; 32 | } 33 | 34 | noreturn static void bad_node(Node *node, char *msg) { 35 | bad_token(node->token, msg); 36 | } 37 | 38 | static void check_lval(Node *node) { 39 | int op = node->op; 40 | if (op != ND_VARREF && op != ND_DEREF && op != ND_DOT) 41 | bad_node(node, "not an lvalue"); 42 | } 43 | 44 | static Node *scale_ptr(int op, Node *base, Type *ty) { 45 | Node *node = calloc(1, sizeof(Node)); 46 | node->op = op; 47 | node->lhs = base; 48 | node->rhs = new_int_node(ty->ptr_to->size, base->token); 49 | node->token = base->token; 50 | return node; 51 | } 52 | 53 | static Node *cast(Node *base, Type *ty) { 54 | Node *node = calloc(1, sizeof(Node)); 55 | node->op = ND_CAST; 56 | node->ty = ty; 57 | node->expr = base; 58 | node->token = base->token; 59 | return node; 60 | } 61 | 62 | static void check_int(Node *node) { 63 | int t = node->ty->ty; 64 | if (t != INT && t != CHAR && t != BOOL) 65 | bad_node(node, "not an integer"); 66 | } 67 | 68 | static Node *do_walk(Node *node, bool decay); 69 | 70 | static Node *walk(Node *node) { 71 | return do_walk(node, true); 72 | } 73 | 74 | static Node *walk_nodecay(Node *node) { 75 | return do_walk(node, false); 76 | } 77 | 78 | static Node *do_walk(Node *node, bool decay) { 79 | switch (node->op) { 80 | case ND_NUM: 81 | case ND_NULL: 82 | case ND_BREAK: 83 | case ND_CONTINUE: 84 | return node; 85 | case ND_VARREF: 86 | return maybe_decay(node, decay); 87 | case ND_IF: 88 | node->cond = walk(node->cond); 89 | node->then = walk(node->then); 90 | if (node->els) 91 | node->els = walk(node->els); 92 | return node; 93 | case ND_FOR: 94 | if (node->init) 95 | node->init = walk(node->init); 96 | if (node->cond) 97 | node->cond = walk(node->cond); 98 | if (node->inc) 99 | node->inc = walk(node->inc); 100 | node->body = walk(node->body); 101 | return node; 102 | case ND_DO_WHILE: 103 | case ND_SWITCH: 104 | node->cond = walk(node->cond); 105 | node->body = walk(node->body); 106 | return node; 107 | case ND_CASE: 108 | node->body = walk(node->body); 109 | return node; 110 | case '+': 111 | node->lhs = walk(node->lhs); 112 | node->rhs = walk(node->rhs); 113 | 114 | if (node->rhs->ty->ty == PTR) { 115 | Node *n = node->lhs; 116 | node->lhs = node->rhs; 117 | node->rhs = n; 118 | } 119 | check_int(node->rhs); 120 | 121 | if (node->lhs->ty->ty == PTR) { 122 | node->rhs = scale_ptr('*', node->rhs, node->lhs->ty); 123 | node->ty = node->lhs->ty; 124 | } else { 125 | node->ty = int_ty(); 126 | } 127 | return node; 128 | case '-': { 129 | node->lhs = walk(node->lhs); 130 | node->rhs = walk(node->rhs); 131 | 132 | Type *lty = node->lhs->ty; 133 | Type *rty = node->rhs->ty; 134 | 135 | if (lty->ty == PTR && rty->ty == PTR) { 136 | if (!same_type(rty, lty)) 137 | bad_node(node, "incompatible pointer"); 138 | node = scale_ptr('/', node, lty); 139 | node->ty = lty; 140 | } else { 141 | node->ty = int_ty(); 142 | } 143 | return node; 144 | } 145 | case '=': 146 | node->lhs = walk_nodecay(node->lhs); 147 | check_lval(node->lhs); 148 | node->rhs = walk(node->rhs); 149 | if (node->lhs->ty->ty == BOOL) 150 | node->rhs = cast(node->rhs, bool_ty()); 151 | node->ty = node->lhs->ty; 152 | return node; 153 | case ND_DOT: { 154 | node->expr = walk(node->expr); 155 | if (node->expr->ty->ty != STRUCT) 156 | bad_node(node, "struct expected before '.'"); 157 | 158 | Type *ty = node->expr->ty; 159 | if (!ty->members) 160 | bad_node(node, "incomplete type"); 161 | 162 | node->ty = map_get(ty->members, node->name); 163 | if (!node->ty) 164 | bad_node(node, format("member missing: %s", node->name)); 165 | return maybe_decay(node, decay); 166 | } 167 | case '?': 168 | node->cond = walk(node->cond); 169 | node->then = walk(node->then); 170 | node->els = walk(node->els); 171 | node->ty = node->then->ty; 172 | return node; 173 | case '*': 174 | case '/': 175 | case '%': 176 | case '<': 177 | case '|': 178 | case '^': 179 | case '&': 180 | case ND_EQ: 181 | case ND_NE: 182 | case ND_LE: 183 | case ND_SHL: 184 | case ND_SHR: 185 | case ND_LOGAND: 186 | case ND_LOGOR: 187 | node->lhs = walk(node->lhs); 188 | node->rhs = walk(node->rhs); 189 | check_int(node->lhs); 190 | check_int(node->rhs); 191 | node->ty = int_ty(); 192 | return node; 193 | case ',': 194 | node->lhs = walk(node->lhs); 195 | node->rhs = walk(node->rhs); 196 | node->ty = node->rhs->ty; 197 | return node; 198 | case '!': 199 | case '~': 200 | node->expr = walk(node->expr); 201 | check_int(node->expr); 202 | node->ty = int_ty(); 203 | return node; 204 | case ND_ADDR: 205 | node->expr = walk(node->expr); 206 | check_lval(node->expr); 207 | node->ty = ptr_to(node->expr->ty); 208 | if (node->expr->op == ND_VARREF) 209 | node->expr->var->address_taken = true; 210 | return node; 211 | case ND_DEREF: 212 | node->expr = walk(node->expr); 213 | 214 | if (node->expr->ty->ty != PTR) 215 | bad_node(node, "operand must be a pointer"); 216 | 217 | if (node->expr->ty->ptr_to->ty == VOID) 218 | bad_node(node, "cannot dereference void pointer"); 219 | 220 | node->ty = node->expr->ty->ptr_to; 221 | return maybe_decay(node, decay); 222 | case ND_RETURN: 223 | case ND_EXPR_STMT: 224 | node->expr = walk(node->expr); 225 | return node; 226 | case ND_CALL: 227 | for (int i = 0; i < node->args->len; i++) 228 | node->args->data[i] = walk(node->args->data[i]); 229 | node->ty = node->ty->returning; 230 | return node; 231 | case ND_COMP_STMT: { 232 | for (int i = 0; i < node->stmts->len; i++) 233 | node->stmts->data[i] = walk(node->stmts->data[i]); 234 | return node; 235 | } 236 | case ND_STMT_EXPR: { 237 | for (int i = 0; i < node->stmts->len; i++) 238 | node->stmts->data[i] = walk(node->stmts->data[i]); 239 | node->expr = walk(node->expr); 240 | node->ty = node->expr->ty; 241 | return node; 242 | } 243 | default: 244 | assert(0 && "unknown node type"); 245 | } 246 | } 247 | 248 | Type *get_type(Node *node) { 249 | return walk_nodecay(node)->ty; 250 | } 251 | 252 | void sema(Program *prog) { 253 | for (int i = 0; i < prog->funcs->len; i++) { 254 | Function *fn = prog->funcs->data[i]; 255 | Node *node = fn->node; 256 | assert(node->op == ND_FUNC); 257 | node->body = walk(node->body); 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /test/gcc.c: -------------------------------------------------------------------------------- 1 | // This file is compiled by gcc. 2 | 3 | int global_arr[1] = {5}; 4 | -------------------------------------------------------------------------------- /test/test.c: -------------------------------------------------------------------------------- 1 | extern void *stderr; 2 | 3 | int printf(); 4 | int fprintf(); 5 | int exit(); 6 | 7 | #define EXPECT(expected, expr) \ 8 | do { \ 9 | int e1 = (expected); \ 10 | int e2 = (expr); \ 11 | if (e1 == e2) { \ 12 | fprintf(stderr, "%s => %d\n", #expr, e2); \ 13 | } else { \ 14 | fprintf(stderr, "line %d: %s: %d expected, but got %d\n", \ 15 | __LINE__, #expr, e1, e2); \ 16 | exit(1); \ 17 | } \ 18 | } while (0) 19 | 20 | int one() { return 1; } 21 | int two() { return 2; } 22 | int plus(int x, int y) { return x + y; } 23 | int mul(int x, int y) { return x * y; } 24 | int add(int a, int b, int c, int d, int e, int f) { return a+b+c+d+e+f; } 25 | int add2(int (*a)[2]) { return a[0][0] + a[1][0]; } 26 | int add3(int a[][2]) { return a[0][0] + a[1][0]; } 27 | int add4(int a[2][2]) { return a[0][0] + a[1][0]; } 28 | void nop() {} 29 | 30 | int var1; 31 | int var2[5]; 32 | extern int global_arr[1]; 33 | typedef int myint; 34 | 35 | // Single-line comment test 36 | 37 | /*************************** 38 | * Multi-line comment test * 39 | ***************************/ 40 | 41 | int main() { 42 | EXPECT(0, 0); 43 | EXPECT(1, 1); 44 | EXPECT(493, 0755); 45 | EXPECT(48879, 0xBEEF); 46 | EXPECT(255, 0Xff); 47 | EXPECT(2, 1+1); 48 | EXPECT(10, 2*3+4); 49 | EXPECT(26, 2*3+4*5); 50 | EXPECT(5, 50/10); 51 | EXPECT(9, 6*3/2); 52 | EXPECT(45, (2+3)*(4+5)); 53 | EXPECT(153, 1+2+3+4+5+6+7+8+9+10+11+12+13+14+15+16+17); 54 | 55 | EXPECT(2, ({ int a=2; a; })); 56 | EXPECT(10, ({ int a=2; int b; b=3+2; a*b; })); 57 | EXPECT(2, ({ int i=3; if (1) i=2; i; })); 58 | EXPECT(3, ({ int i=3; if (0) i=2; i; })); 59 | EXPECT(2, ({ int i=0; if (1) i=2; else i=3; i; })); 60 | EXPECT(3, ({ int i=0; if (0) i=2; else i=3; i; })); 61 | 62 | EXPECT(5, plus(2, 3)); 63 | EXPECT(1, one()); 64 | EXPECT(3, one()+two()); 65 | EXPECT(6, mul(2, 3)); 66 | EXPECT(21, add(1,2,3,4,5,6)); 67 | 68 | EXPECT(0, 0 || 0); 69 | EXPECT(1, 1 || 0); 70 | EXPECT(1, 0 || 1); 71 | EXPECT(1, 1 || 1); 72 | 73 | EXPECT(0, 0 && 0); 74 | EXPECT(0, 1 && 0); 75 | EXPECT(0, 0 && 1); 76 | EXPECT(1, 1 && 1); 77 | 78 | EXPECT(0, 0 < 0); 79 | EXPECT(0, 1 < 0); 80 | EXPECT(1, 0 < 1); 81 | EXPECT(0, 0 > 0); 82 | EXPECT(0, 0 > 1); 83 | EXPECT(1, 1 > 0); 84 | 85 | EXPECT(0, 4 == 5); 86 | EXPECT(1, 5 == 5); 87 | EXPECT(1, 4 != 5); 88 | EXPECT(0, 5 != 5); 89 | 90 | EXPECT(1, 4 <= 5); 91 | EXPECT(1, 5 <= 5); 92 | EXPECT(0, 6 <= 5); 93 | 94 | EXPECT(0, 4 >= 5); 95 | EXPECT(1, 5 >= 5); 96 | EXPECT(1, 6 >= 5); 97 | 98 | EXPECT(8, 1 << 3); 99 | EXPECT(4, 16 >> 2); 100 | 101 | EXPECT(4, 19 % 5); 102 | EXPECT(0, 9 % 3); 103 | 104 | EXPECT(0-3, -3); 105 | 106 | EXPECT(0, !1); 107 | EXPECT(1, !0); 108 | 109 | EXPECT(-1, ~0); 110 | EXPECT(-4, ~3); 111 | 112 | EXPECT(3, ({ int i = 3; i++; })); 113 | EXPECT(4, ({ int i = 3; ++i; })); 114 | EXPECT(3, ({ int i = 3; i--; })); 115 | EXPECT(2, ({ int i = 3; --i; })); 116 | 117 | EXPECT(5, 0 ? 3 : 5); 118 | EXPECT(3, 1 ? 3 : 5); 119 | 120 | EXPECT(3, (1, 2, 3)); 121 | 122 | EXPECT(11, 9 | 2); 123 | EXPECT(11, 9 | 3); 124 | EXPECT(5, 6 ^ 3); 125 | EXPECT(2, 6 & 3); 126 | EXPECT(0, 6 & 0); 127 | 128 | EXPECT(3, ({ int x; int y; x=y=3; x; })); 129 | EXPECT(3, ({ int x; int y; x=y=3; y; })); 130 | 131 | EXPECT(45, ({ int x=0; int y=0; do { y=y+x; x=x+1; } while (x < 10); y; })); 132 | EXPECT(1, ({ int x=0; do {x++; break;} while (1); x; })); 133 | EXPECT(1, ({ int x=0; do {x++; continue;} while (0); x; })); 134 | 135 | EXPECT(60, ({ int sum=0; int i; for (i=10; i<15; i=i+1) sum = sum + i; sum;})); 136 | EXPECT(89, ({ int i=1; int j=1; for (int k=0; k<10; k=k+1) { int m=i+j; i=j; j=m; } i;})); 137 | EXPECT(1, ({ int i=1; for (int i = 5; i < 10; i++); i; })); 138 | EXPECT(5, ({ int i=0; for (; i < 10; i++) if (i==5) break; i; })); 139 | EXPECT(10, ({ int i=0; for (;;) { i++; if (i==10) break; } i; })); 140 | 141 | EXPECT(7, ({ int i=0; for (int j=0; j < 10; j++) { if (j<3) continue; i++; } i; })); 142 | 143 | EXPECT(45, ({ int i=0; int j=0; while (i<10) { j=j+i; i=i+1; } j;})); 144 | 145 | EXPECT(6, ({ int x=0; switch(3) { case 2: x=5; break; case 3: x=6; break; case 4: x=7; break; } x; })); 146 | EXPECT(7, ({ int x=0; switch(3) { case 2: x=5; case 3: x=6; case 4: x=7; } x; })); 147 | EXPECT(0, ({ int x=0; switch(3) case 1: x=5; x; })); 148 | 149 | EXPECT(3, ({ int ary[2]; *ary=1; *(ary+1)=2; *ary + *(ary+1);})); 150 | EXPECT(5, ({ int x; int *p = &x; x = 5; *p;})); 151 | EXPECT(4, ({ int *p; (p+5)-(p+1); })); 152 | 153 | EXPECT(40, ({ int ary[2][5]; sizeof(ary);})); 154 | EXPECT(8, ({ int ary[2][2]; ary[0][0]=3; ary[1][0]=5; add2(ary);})); 155 | EXPECT(8, ({ int ary[2][2]; ary[0][0]=3; ary[1][0]=5; add3(ary);})); 156 | EXPECT(8, ({ int ary[2][2]; ary[0][0]=3; ary[1][0]=5; add4(ary);})); 157 | 158 | EXPECT(3, ({ int ary[2]; ary[0]=1; ary[1]=2; ary[0] + ary[0+1];})); 159 | EXPECT(5, ({ int x; int *p = &x; x = 5; p[0];})); 160 | EXPECT(1, ({ int ary[2]; ary[0]=1; ary[1]=2; int *p=ary; *p++;})); 161 | EXPECT(2, ({ int ary[2]; ary[0]=1; ary[1]=2; int *p=ary; *++p;})); 162 | 163 | EXPECT(1, ({ char x; sizeof x; })); 164 | EXPECT(4, ({ int x; sizeof(x); })); 165 | EXPECT(8, ({ int *x; sizeof x; })); 166 | EXPECT(16, ({ int x[4]; sizeof x; })); 167 | EXPECT(4, sizeof("abc")); 168 | EXPECT(7, sizeof("abc" "def")); 169 | EXPECT(9, sizeof("ab\0c" "\0def")); 170 | 171 | EXPECT(1, ({ char x; _Alignof x; })); 172 | EXPECT(4, ({ int x; _Alignof(x); })); 173 | EXPECT(8, ({ int *x; _Alignof x; })); 174 | EXPECT(4, ({ int x[4]; _Alignof x; })); 175 | EXPECT(8, ({ int *x[4]; _Alignof x; })); 176 | 177 | EXPECT(5, ({ char x = 5; x; })); 178 | EXPECT(42, ({ int x = 0; char *p = &x; p[0] = 42; x; })); 179 | 180 | EXPECT(0, '\0'); 181 | EXPECT(0, '\00'); 182 | EXPECT(0, '\000'); 183 | EXPECT(1, '\1'); 184 | EXPECT(7, '\7'); 185 | EXPECT(64, '\100'); 186 | 187 | EXPECT(64, "\10000"[0]); 188 | EXPECT('0', "\10000"[1]); 189 | EXPECT('0', "\10000"[2]); 190 | EXPECT(0, "\10000"[3]); 191 | EXPECT(255, "\xffxyz"[0]); 192 | EXPECT('x', "\xffxyz"[1]); 193 | 194 | EXPECT('a', ({ char *p = "abc"; p[0]; })); 195 | EXPECT('b', ({ char *p = "abc"; p[1]; })); 196 | EXPECT('c', ({ char *p = "abc"; p[2]; })); 197 | EXPECT(0, ({ char *p = "abc"; p[3]; })); 198 | 199 | EXPECT(1, ({ int x = 1; { int x = 2; } x; })); 200 | 201 | EXPECT(0, var1); 202 | EXPECT(5, ({ var1 = 5; var1; })); 203 | EXPECT(20, sizeof(var2)); 204 | EXPECT(15, ({ var2[0] = 5; var2[4] = 10; var2[0] + var2[4]; })); 205 | EXPECT(5, global_arr[0]); 206 | 207 | EXPECT(4, ({ struct { int a; } x; sizeof(x); })); 208 | EXPECT(8, ({ struct { char a; int b; } x; sizeof(x); })); 209 | EXPECT(12, ({ struct { char a; char b; int c; char d; } x; sizeof(x); })); 210 | EXPECT(3, ({ struct { int a; } x; x.a=3; x.a; })); 211 | EXPECT(8, ({ struct { char a; int b; } x; x.a=3; x.b=5; x.a+x.b; })); 212 | EXPECT(8, ({ struct { char a; int b; } x; struct { char a; int b; } *p = &x; x.a=3; x.b=5; p->a+p->b; })); 213 | EXPECT(8, ({ struct tag { char a; int b; } x; struct tag *p = &x; x.a=3; x.b=5; p->a+p->b; })); 214 | EXPECT(48, ({ struct { struct { int b; int c[5]; } a[2]; } x; sizeof(x); })); 215 | 216 | EXPECT(8, ({ 217 | struct { 218 | struct { 219 | int b; 220 | int c[5]; 221 | } a[2]; 222 | } x; 223 | x.a[0].b = 3; 224 | x.a[0].c[1] = 5; 225 | x.a[0].b + x.a[0].c[1]; 226 | })); 227 | 228 | EXPECT(3, ({ typedef int foo; foo x = 3; x; })); 229 | EXPECT(4, ({ myint foo = 3; sizeof(foo); })); 230 | 231 | EXPECT(1, ({ typedef struct foo_ foo; 1; })); 232 | 233 | EXPECT(15, ({ int i=5; i*=3; i; })); 234 | EXPECT(1, ({ int i=5; i/=3; i; })); 235 | EXPECT(2, ({ int i=5; i%=3; i; })); 236 | EXPECT(8, ({ int i=5; i+=3; i; })); 237 | EXPECT(2, ({ int i=5; i-=3; i; })); 238 | EXPECT(40, ({ int i=5; i<<=3; i; })); 239 | EXPECT(0, ({ int i=5; i>>=3; i; })); 240 | EXPECT(1, ({ int i=5; i&=3; i; })); 241 | EXPECT(6, ({ int i=5; i^=3; i; })); 242 | EXPECT(7, ({ int i=5; i|=3; i; })); 243 | 244 | EXPECT(5, ({ int x; typeof(x) y = 5; y; })); 245 | EXPECT(1, ({ char x; typeof(x) y = 257; y; })); 246 | EXPECT(2, ({ char x; typeof(x) y[2]; y[0]=257; y[1]=1; y[0]+y[1]; })); 247 | 248 | EXPECT(0, ({ _Bool x = 0; x; })); 249 | EXPECT(1, ({ _Bool x = 1; x; })); 250 | EXPECT(0, ({ _Bool x; x = 0; x; })); 251 | EXPECT(1, ({ _Bool x; x = 2; x; })); 252 | EXPECT(0, ({ _Bool x; int y = 0; x = y; x; })); 253 | EXPECT(1, ({ _Bool x; int y = -1; x = y; x; })); 254 | EXPECT(0, ({ _Bool x; _Bool y = 0; x = y; x; })); 255 | EXPECT(1, ({ _Bool x; _Bool y = 1; x = y; x; })); 256 | EXPECT(1, ({ _Bool x = 0; !x; })); 257 | EXPECT(0, ({ _Bool x = 1; !x; })); 258 | EXPECT(-1, ({ _Bool x = 0; ~x; })); 259 | EXPECT(-2, ({ _Bool x = 1; ~x; })); 260 | 261 | EXPECT(128, ((((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))+((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1))))+(((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))+((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))))+((((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))+((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1))))+(((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))+((((1+1)+(1+1))+(1+1)+(1+1))+(((1+1)+(1+1))+(1+1)+(1+1)))))); 262 | 263 | printf("OK\n"); 264 | return 0; 265 | } 266 | -------------------------------------------------------------------------------- /test/test1.inc: -------------------------------------------------------------------------------- 1 | int printf(); 2 | 3 | int main() { 4 | #include "test/test2.inc" 5 | 1; 2; 6 | return 0; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /test/test2.inc: -------------------------------------------------------------------------------- 1 | #define OK "O%c\n", 'K' 2 | 3 | printf(OK); 4 | -------------------------------------------------------------------------------- /test/token.c: -------------------------------------------------------------------------------- 1 | // This file contains tests for the tokenizer and the preprocessor. 2 | 3 | // a line comment \ 4 | continues\ 5 | to this line 6 | 7 | /* 8 | /* block comment 9 | ** 10 | */ 11 | 12 | #include "test/test1.inc" 13 | -------------------------------------------------------------------------------- /token.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | typedef struct Env { 4 | char *path; 5 | char *buf; 6 | Vector *tokens; 7 | struct Env *prev; 8 | } Env; 9 | 10 | static Env *env; 11 | static Map *keywords; 12 | 13 | static FILE *open_file(char *path) { 14 | if (!strcmp(path, "-")) 15 | return stdin; 16 | 17 | FILE *fp = fopen(path, "r"); 18 | if (!fp) { 19 | perror(path); 20 | exit(1); 21 | } 22 | return fp; 23 | } 24 | 25 | static char *read_file(FILE *fp) { 26 | StringBuilder *sb = new_sb(); 27 | char buf[4096]; 28 | for (;;) { 29 | int nread = fread(buf, 1, sizeof(buf), fp); 30 | if (nread == 0) 31 | break; 32 | sb_append_n(sb, buf, nread); 33 | } 34 | 35 | // We want to make sure that a souce file ends with a newline. 36 | // Add not only one but two to protect against a backslash at EOF. 37 | sb_append(sb, "\n\n"); 38 | return sb_get(sb); 39 | } 40 | 41 | static Env *new_env(Env *prev, char *path, char *buf) { 42 | Env *env = calloc(1, sizeof(Env)); 43 | env->path = strcmp(path, "-") ? path : "(stdin)"; 44 | env->buf = buf; 45 | env->tokens = new_vec(); 46 | env->prev = prev; 47 | return env; 48 | } 49 | 50 | // Returns true if s1 starts with s2. 51 | static bool startswith(char *s1, char *s2) { 52 | return !strncmp(s1, s2, strlen(s2)); 53 | } 54 | 55 | // Error reporting 56 | 57 | // Finds a line pointed by a given pointer from the input file 58 | // to print it out. 59 | static void print_line(char *buf, char *path, char *pos) { 60 | char *start = buf; 61 | int line = 0; 62 | int col = 0; 63 | 64 | for (char *p = buf; p; p++) { 65 | if (*p == '\n') { 66 | start = p + 1; 67 | line++; 68 | col = 0; 69 | continue; 70 | } 71 | 72 | if (p != pos) { 73 | col++; 74 | continue; 75 | } 76 | 77 | fprintf(stderr, "error at %s:%d:%d\n\n", path, line + 1, col + 1); 78 | 79 | // Print out the line containing the error location. 80 | int linelen = strchr(p, '\n') - start; 81 | fprintf(stderr, "%.*s\n", linelen, start); 82 | 83 | // Show tabs for tabs and spaces for other characters 84 | // so that the column matches. 85 | for (int i = 0; i < col; i++) 86 | fprintf(stderr, (start[i] == '\t') ? "\t" : " "); 87 | 88 | fprintf(stderr, "^\n\n"); 89 | return; 90 | } 91 | } 92 | 93 | void warn_token(Token *t, char *msg) { 94 | if (t->start) 95 | print_line(t->buf, t->path, t->start); 96 | fprintf(stderr, msg); 97 | fprintf(stderr, "\n"); 98 | } 99 | 100 | noreturn void bad_token(Token *t, char *msg) { 101 | warn_token(t, msg); 102 | exit(1); 103 | } 104 | 105 | noreturn static void bad_position(char *p, char *msg) { 106 | print_line(env->buf, env->path, p); 107 | error(msg); 108 | } 109 | 110 | int get_line_number(Token *t) { 111 | int n = 0; 112 | for (char *p = t->buf; p < t->end; p++) 113 | if (*p == '\n') 114 | n++; 115 | return n; 116 | } 117 | 118 | // Returns true if Token t followed a space or a comment 119 | // in an original source file. 120 | static bool need_space(Token *t) { 121 | char *s = t->start; 122 | if (t->buf <= s - 1 && isspace(s[-1])) 123 | return true; 124 | return t->buf <= s - 2 && startswith(s - 2, "*/"); 125 | } 126 | 127 | // For C preprocessor. 128 | char *stringize(Vector *tokens) { 129 | StringBuilder *sb = new_sb(); 130 | 131 | for (int i = 0; i < tokens->len; i++) { 132 | Token *t = tokens->data[i]; 133 | if (t->ty == '\n') 134 | continue; 135 | if (i > 0 && need_space(t)) 136 | sb_add(sb, ' '); 137 | 138 | assert(t->start && t->end); 139 | sb_append_n(sb, t->start, t->end - t->start); 140 | } 141 | return sb_get(sb); 142 | } 143 | 144 | // Atomic unit in the grammar is called "token". 145 | // For example, `123`, `"abc"` and `while` are tokens. 146 | // The tokenizer splits an input string into tokens. 147 | // Spaces and comments are removed by the tokenizer. 148 | 149 | static Token *add(int ty, char *start) { 150 | Token *t = calloc(1, sizeof(Token)); 151 | t->ty = ty; 152 | t->start = start; 153 | t->path = env->path; 154 | t->buf = env->buf; 155 | vec_push(env->tokens, t); 156 | return t; 157 | } 158 | 159 | static struct { 160 | char *name; 161 | int ty; 162 | } symbols[] = { 163 | {"<<=", TK_SHL_EQ}, {">>=", TK_SHR_EQ}, {"!=", TK_NE}, 164 | {"&&", TK_LOGAND}, {"++", TK_INC}, {"--", TK_DEC}, 165 | {"->", TK_ARROW}, {"<<", TK_SHL}, {"<=", TK_LE}, 166 | {"==", TK_EQ}, {">=", TK_GE}, {">>", TK_SHR}, 167 | {"||", TK_LOGOR}, {"*=", TK_MUL_EQ}, {"/=", TK_DIV_EQ}, 168 | {"%=", TK_MOD_EQ}, {"+=", TK_ADD_EQ}, {"-=", TK_SUB_EQ}, 169 | {"&=", TK_AND_EQ}, {"^=", TK_XOR_EQ}, {"|=", TK_OR_EQ}, 170 | {NULL, 0}, 171 | }; 172 | 173 | static Map *keyword_map() { 174 | Map *map = new_map(); 175 | map_puti(map, "_Alignof", TK_ALIGNOF); 176 | map_puti(map, "_Bool", TK_BOOL); 177 | map_puti(map, "break", TK_BREAK); 178 | map_puti(map, "case", TK_CASE); 179 | map_puti(map, "char", TK_CHAR); 180 | map_puti(map, "continue", TK_CONTINUE); 181 | map_puti(map, "do", TK_DO); 182 | map_puti(map, "else", TK_ELSE); 183 | map_puti(map, "extern", TK_EXTERN); 184 | map_puti(map, "for", TK_FOR); 185 | map_puti(map, "if", TK_IF); 186 | map_puti(map, "int", TK_INT); 187 | map_puti(map, "return", TK_RETURN); 188 | map_puti(map, "sizeof", TK_SIZEOF); 189 | map_puti(map, "struct", TK_STRUCT); 190 | map_puti(map, "switch", TK_SWITCH); 191 | map_puti(map, "typedef", TK_TYPEDEF); 192 | map_puti(map, "typeof", TK_TYPEOF); 193 | map_puti(map, "void", TK_VOID); 194 | map_puti(map, "while", TK_WHILE); 195 | return map; 196 | } 197 | 198 | static char *block_comment(char *pos) { 199 | for (char *p = pos + 2; *p; p++) 200 | if (startswith(p, "*/")) 201 | return p + 2; 202 | bad_position(pos, "unclosed comment"); 203 | } 204 | 205 | static int isoctal(char c) { 206 | return '0' <= c && c <= '7'; 207 | } 208 | 209 | static int hex(char c) { 210 | if ('0' <= c && c <= '9') 211 | return c - '0'; 212 | if ('a' <= c && c <= 'f') 213 | return c - 'a' + 10; 214 | assert('A' <= c && c <= 'F'); 215 | return c - 'A' + 10; 216 | } 217 | 218 | // Read a single character in a char or string literal. 219 | static char *c_char(int *res, char *p) { 220 | // Nonescaped 221 | if (*p != '\\') { 222 | *res = *p; 223 | return p + 1; 224 | } 225 | p++; 226 | 227 | static char escaped[256] = { 228 | ['a'] = '\a', ['b'] = '\b', ['f'] = '\f', 229 | ['n'] = '\n', ['r'] = '\r', ['t'] = '\t', 230 | ['v'] = '\v', ['e'] = '\033', ['E'] = '\033', 231 | }; 232 | 233 | // Simple (e.g. `\n` or `\a`) 234 | int esc = escaped[(uint8_t)*p]; 235 | if (esc) { 236 | *res = esc; 237 | return p + 1; 238 | } 239 | 240 | // Hexadecimal 241 | if (*p == 'x') { 242 | *res = 0; 243 | p++; 244 | while (isxdigit(*p)) 245 | *res = *res * 16 + hex(*p++); 246 | return p; 247 | } 248 | 249 | // Octal 250 | if (isoctal(*p)) { 251 | int i = *p++ - '0'; 252 | if (isoctal(*p)) 253 | i = i * 8 + *p++ - '0'; 254 | if (isoctal(*p)) 255 | i = i * 8 + *p++ - '0'; 256 | *res = i; 257 | return p; 258 | } 259 | 260 | *res = *p; 261 | return p + 1; 262 | } 263 | 264 | static char *char_literal(char *p) { 265 | Token *t = add(TK_NUM, p++); 266 | p = c_char(&t->val, p); 267 | if (*p != '\'') 268 | bad_token(t, "unclosed character literal"); 269 | t->end = p + 1; 270 | return p + 1; 271 | } 272 | 273 | static char *string_literal(char *p) { 274 | Token *t = add(TK_STR, p++); 275 | StringBuilder *sb = new_sb(); 276 | 277 | while (*p != '"') { 278 | if (!*p) 279 | bad_token(t, "unclosed string literal"); 280 | int c; 281 | p = c_char(&c, p); 282 | sb_add(sb, c); 283 | } 284 | 285 | t->str = sb_get(sb); 286 | t->len = sb->len; 287 | t->end = p + 1; 288 | return p + 1; 289 | } 290 | 291 | static char *ident(char *p) { 292 | int len = 1; 293 | while (isalpha(p[len]) || isdigit(p[len]) || p[len] == '_') 294 | len++; 295 | 296 | char *name = strndup(p, len); 297 | int ty = map_geti(keywords, name, TK_IDENT); 298 | Token *t = add(ty, p); 299 | t->name = name; 300 | t->end = p + len; 301 | return p + len; 302 | } 303 | 304 | static char *hexadecimal(char *p) { 305 | Token *t = add(TK_NUM, p); 306 | p += 2; 307 | 308 | if (!isxdigit(*p)) 309 | bad_token(t, "bad hexadecimal number"); 310 | 311 | while (isxdigit(*p)) 312 | t->val = t->val * 16 + hex(*p++); 313 | t->end = p; 314 | return p; 315 | } 316 | 317 | static char *octal(char *p) { 318 | Token *t = add(TK_NUM, p++); 319 | while (isoctal(*p)) 320 | t->val = t->val * 8 + *p++ - '0'; 321 | t->end = p; 322 | return p; 323 | } 324 | 325 | static char *decimal(char *p) { 326 | Token *t = add(TK_NUM, p); 327 | while (isdigit(*p)) 328 | t->val = t->val * 10 + *p++ - '0'; 329 | t->end = p; 330 | return p; 331 | } 332 | 333 | static char *number(char *p) { 334 | if (startswith(p, "0x") || startswith(p, "0X")) 335 | return hexadecimal(p); 336 | if (*p == '0') 337 | return octal(p); 338 | return decimal(p); 339 | } 340 | 341 | static void scan() { 342 | char *p = env->buf; 343 | 344 | loop: 345 | while (*p) { 346 | // New line (preprocessor-only token) 347 | if (*p == '\n') { 348 | Token *t = add(*p, p); 349 | p++; 350 | t->end = p; 351 | continue; 352 | } 353 | 354 | // Whitespace 355 | if (isspace(*p)) { 356 | p++; 357 | continue; 358 | } 359 | 360 | // Line comment 361 | if (startswith(p, "//")) { 362 | while (*p && *p != '\n') 363 | p++; 364 | continue; 365 | } 366 | 367 | // Block comment 368 | if (startswith(p, "/*")) { 369 | p = block_comment(p); 370 | continue; 371 | } 372 | 373 | // Character literal 374 | if (*p == '\'') { 375 | p = char_literal(p); 376 | continue; 377 | } 378 | 379 | // String literal 380 | if (*p == '"') { 381 | p = string_literal(p); 382 | continue; 383 | } 384 | 385 | // Multi-letter symbol 386 | for (int i = 0; symbols[i].name; i++) { 387 | char *name = symbols[i].name; 388 | if (!startswith(p, name)) 389 | continue; 390 | 391 | Token *t = add(symbols[i].ty, p); 392 | p += strlen(name); 393 | t->end = p; 394 | goto loop; 395 | } 396 | 397 | // Single-letter symbol 398 | if (strchr("+-*/;=(),{}<>[]&.!?:|^%~#", *p)) { 399 | Token *t = add(*p, p); 400 | p++; 401 | t->end = p; 402 | continue; 403 | } 404 | 405 | // Keyword or identifier 406 | if (isalpha(*p) || *p == '_') { 407 | p = ident(p); 408 | continue; 409 | } 410 | 411 | // Number 412 | if (isdigit(*p)) { 413 | p = number(p); 414 | continue; 415 | } 416 | 417 | bad_position(p, "cannot tokenize"); 418 | } 419 | } 420 | 421 | static void replace_crlf(char *p) { 422 | for (char *q = p; *q;) { 423 | if (startswith(q, "\r\n")) 424 | q++; 425 | *p++ = *q++; 426 | } 427 | *p = '\0'; 428 | } 429 | 430 | // Concatenates continuation lines. We keep the total number of 431 | // newline characters the same to keep the line counter sane. 432 | static void remove_backslash_newline(char *p) { 433 | int cnt = 0; 434 | for (char *q = p; *q;) { 435 | if (startswith(q, "\\\n")) { 436 | cnt++; 437 | q += 2; 438 | continue; 439 | } 440 | if (*q == '\n') { 441 | for (int i = 0; i < cnt + 1; i++) 442 | *p++ = '\n'; 443 | q++; 444 | cnt = 0; 445 | continue; 446 | } 447 | *p++ = *q++; 448 | } 449 | *p = '\0'; 450 | } 451 | 452 | static Vector *strip_newline_tokens(Vector *tokens) { 453 | Vector *v = new_vec(); 454 | for (int i = 0; i < tokens->len; i++) { 455 | Token *t = tokens->data[i]; 456 | if (t->ty != '\n') 457 | vec_push(v, t); 458 | } 459 | return v; 460 | } 461 | 462 | static void append(Token *x, Token *y) { 463 | StringBuilder *sb = new_sb(); 464 | sb_append_n(sb, x->str, x->len - 1); 465 | sb_append_n(sb, y->str, y->len - 1); 466 | x->str = sb_get(sb); 467 | x->len = sb->len; 468 | } 469 | 470 | static Vector *join_string_literals(Vector *tokens) { 471 | Vector *v = new_vec(); 472 | Token *last = NULL; 473 | 474 | for (int i = 0; i < tokens->len; i++) { 475 | Token *t = tokens->data[i]; 476 | if (last && last->ty == TK_STR && t->ty == TK_STR) { 477 | append(last, t); 478 | continue; 479 | } 480 | 481 | last = t; 482 | vec_push(v, t); 483 | } 484 | return v; 485 | } 486 | 487 | Vector *tokenize(char *path, bool add_eof) { 488 | if (!keywords) 489 | keywords = keyword_map(); 490 | 491 | FILE *fp = open_file(path); 492 | char *buf = read_file(fp); 493 | replace_crlf(buf); 494 | remove_backslash_newline(buf); 495 | 496 | env = new_env(env, path, buf); 497 | scan(); 498 | if (add_eof) 499 | add(TK_EOF, NULL); 500 | Vector *v = env->tokens; 501 | env = env->prev; 502 | 503 | v = preprocess(v); 504 | v = strip_newline_tokens(v); 505 | return join_string_literals(v); 506 | } 507 | -------------------------------------------------------------------------------- /util.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | noreturn void error(char *fmt, ...) { 4 | va_list ap; 5 | va_start(ap, fmt); 6 | vfprintf(stderr, fmt, ap); 7 | fprintf(stderr, "\n"); 8 | exit(1); 9 | } 10 | 11 | char *format(char *fmt, ...) { 12 | char buf[2048]; 13 | va_list ap; 14 | va_start(ap, fmt); 15 | vsnprintf(buf, sizeof(buf), fmt, ap); 16 | va_end(ap); 17 | return strdup(buf); 18 | } 19 | 20 | Vector *new_vec() { 21 | Vector *v = malloc(sizeof(Vector)); 22 | v->data = malloc(sizeof(void *) * 16); 23 | v->capacity = 16; 24 | v->len = 0; 25 | return v; 26 | } 27 | 28 | void vec_push(Vector *v, void *elem) { 29 | if (v->len == v->capacity) { 30 | v->capacity *= 2; 31 | v->data = realloc(v->data, sizeof(void *) * v->capacity); 32 | } 33 | v->data[v->len++] = elem; 34 | } 35 | 36 | void vec_pushi(Vector *v, int val) { 37 | vec_push(v, (void *)(intptr_t)val); 38 | } 39 | 40 | void *vec_pop(Vector *v) { 41 | assert(v->len); 42 | return v->data[--v->len]; 43 | } 44 | 45 | void *vec_last(Vector *v) { 46 | assert(v->len); 47 | return v->data[v->len - 1]; 48 | } 49 | 50 | bool vec_contains(Vector *v, void *elem) { 51 | for (int i = 0; i < v->len; i++) 52 | if (v->data[i] == elem) 53 | return true; 54 | return false; 55 | } 56 | 57 | bool vec_union1(Vector *v, void *elem) { 58 | if (vec_contains(v, elem)) 59 | return false; 60 | vec_push(v, elem); 61 | return true; 62 | } 63 | 64 | Map *new_map(void) { 65 | Map *map = malloc(sizeof(Map)); 66 | map->keys = new_vec(); 67 | map->vals = new_vec(); 68 | return map; 69 | } 70 | 71 | void map_put(Map *map, char *key, void *val) { 72 | vec_push(map->keys, key); 73 | vec_push(map->vals, val); 74 | } 75 | 76 | void map_puti(Map *map, char *key, int val) { 77 | map_put(map, key, (void *)(intptr_t)val); 78 | } 79 | 80 | void *map_get(Map *map, char *key) { 81 | for (int i = map->keys->len - 1; i >= 0; i--) 82 | if (!strcmp(map->keys->data[i], key)) 83 | return map->vals->data[i]; 84 | return NULL; 85 | } 86 | 87 | int map_geti(Map *map, char *key, int default_) { 88 | for (int i = map->keys->len - 1; i >= 0; i--) 89 | if (!strcmp(map->keys->data[i], key)) 90 | return (intptr_t)map->vals->data[i]; 91 | return default_; 92 | } 93 | 94 | StringBuilder *new_sb(void) { 95 | StringBuilder *sb = malloc(sizeof(StringBuilder)); 96 | sb->data = malloc(8); 97 | sb->capacity = 8; 98 | sb->len = 0; 99 | return sb; 100 | } 101 | 102 | static void sb_grow(StringBuilder *sb, int len) { 103 | if (sb->len + len <= sb->capacity) 104 | return; 105 | 106 | while (sb->len + len > sb->capacity) 107 | sb->capacity *= 2; 108 | sb->data = realloc(sb->data, sb->capacity); 109 | } 110 | 111 | void sb_add(StringBuilder *sb, char c) { 112 | sb_grow(sb, 1); 113 | sb->data[sb->len++] = c; 114 | } 115 | 116 | void sb_append(StringBuilder *sb, char *s) { 117 | sb_append_n(sb, s, strlen(s)); 118 | } 119 | 120 | void sb_append_n(StringBuilder *sb, char *s, int len) { 121 | sb_grow(sb, len); 122 | memcpy(sb->data + sb->len, s, len); 123 | sb->len += len; 124 | } 125 | 126 | char *sb_get(StringBuilder *sb) { 127 | sb_add(sb, '\0'); 128 | return sb->data; 129 | } 130 | 131 | int roundup(int x, int align) { 132 | return (x + align - 1) & ~(align - 1); 133 | } 134 | 135 | Type *ptr_to(Type *base) { 136 | Type *ty = calloc(1, sizeof(Type)); 137 | ty->ty = PTR; 138 | ty->size = 8; 139 | ty->align = 8; 140 | ty->ptr_to = base; 141 | return ty; 142 | } 143 | 144 | Type *ary_of(Type *base, int len) { 145 | Type *ty = calloc(1, sizeof(Type)); 146 | ty->ty = ARY; 147 | ty->size = base->size * len; 148 | ty->align = base->align; 149 | ty->ary_of = base; 150 | ty->len = len; 151 | return ty; 152 | } 153 | 154 | static Type *new_ty(int ty, int size) { 155 | Type *ret = calloc(1, sizeof(Type)); 156 | ret->ty = ty; 157 | ret->size = size; 158 | ret->align = size; 159 | return ret; 160 | } 161 | 162 | Type *void_ty() { 163 | return new_ty(VOID, 0); 164 | } 165 | 166 | Type *bool_ty() { 167 | return new_ty(BOOL, 1); 168 | } 169 | 170 | Type *char_ty() { 171 | return new_ty(CHAR, 1); 172 | } 173 | 174 | Type *int_ty() { 175 | return new_ty(INT, 4); 176 | } 177 | 178 | Type *func_ty(Type *returning) { 179 | Type *ty = calloc(1, sizeof(Type)); 180 | ty->returning = returning; 181 | return ty; 182 | } 183 | 184 | bool same_type(Type *x, Type *y) { 185 | if (x->ty != y->ty) 186 | return false; 187 | 188 | switch (x->ty) { 189 | case PTR: 190 | return same_type(x->ptr_to, y->ptr_to); 191 | case ARY: 192 | return x->size == y->size && same_type(x->ary_of, y->ary_of); 193 | case STRUCT: 194 | case FUNC: 195 | return x == y; 196 | default: 197 | return true; 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /util_test.c: -------------------------------------------------------------------------------- 1 | #include "9cc.h" 2 | 3 | // Unit tests for our data structures. 4 | // 5 | // This kind of file is usually built as an independent executable in 6 | // a common build config, but in 9cc I took a different approach. 7 | // This file is just a part of the main executable. This scheme greatly 8 | // simplifies build config. 9 | // 10 | // In return for the simplicity, the main executable becomes slightly 11 | // larger, but that's not a problem for toy programs like 9cc. 12 | // What is most important is to write tests while keeping everything simple. 13 | 14 | static void expect(int line, int expected, int actual) { 15 | if (expected == actual) 16 | return; 17 | fprintf(stderr, "%d: %d expected, but got %d\n", line, expected, actual); 18 | exit(1); 19 | } 20 | 21 | static void vec_test() { 22 | Vector *vec = new_vec(); 23 | expect(__LINE__, 0, vec->len); 24 | 25 | for (int i = 0; i < 100; i++) 26 | vec_push(vec, (void *)(intptr_t)i); 27 | 28 | expect(__LINE__, 100, vec->len); 29 | expect(__LINE__, 0, (intptr_t)vec->data[0]); 30 | expect(__LINE__, 50, (intptr_t)vec->data[50]); 31 | expect(__LINE__, 99, (intptr_t)vec->data[99]); 32 | } 33 | 34 | static void map_test() { 35 | Map *map = new_map(); 36 | expect(__LINE__, 0, (intptr_t)map_get(map, "foo")); 37 | 38 | map_put(map, "foo", (void *)2); 39 | expect(__LINE__, 2, (intptr_t)map_get(map, "foo")); 40 | 41 | map_put(map, "bar", (void *)4); 42 | expect(__LINE__, 4, (intptr_t)map_get(map, "bar")); 43 | 44 | map_put(map, "foo", (void *)6); 45 | expect(__LINE__, 6, (intptr_t)map_get(map, "foo")); 46 | } 47 | 48 | static void sb_test() { 49 | StringBuilder *sb1 = new_sb(); 50 | expect(__LINE__, 0, strlen(sb_get(sb1))); 51 | 52 | StringBuilder *sb2 = new_sb(); 53 | sb_append(sb2, "foo"); 54 | expect(__LINE__, 1, !strcmp(sb_get(sb2), "foo")); 55 | 56 | StringBuilder *sb3 = new_sb(); 57 | sb_append(sb3, "foo"); 58 | sb_append(sb3, "bar"); 59 | expect(__LINE__, 1, !strcmp(sb_get(sb3), "foobar")); 60 | 61 | StringBuilder *sb4 = new_sb(); 62 | sb_append(sb4, "foo"); 63 | sb_append(sb4, "bar"); 64 | sb_append(sb4, "foo"); 65 | sb_append(sb4, "bar"); 66 | expect(__LINE__, 1, !strcmp(sb_get(sb4), "foobarfoobar")); 67 | } 68 | 69 | void util_test() { 70 | vec_test(); 71 | map_test(); 72 | sb_test(); 73 | } 74 | --------------------------------------------------------------------------------