├── .gitignore ├── 8cc.h ├── LICENSE ├── Makefile ├── README.rdoc ├── cpp.c ├── decl.c ├── dict.c ├── elf.c ├── error.c ├── file.c ├── gen.c ├── init.c ├── inst.h ├── keyword.h ├── lex.c ├── list.c ├── main.c ├── parse.c ├── run.c ├── sample └── nqueen.c ├── string.c ├── test ├── Makefile ├── cpp.c ├── decl.c ├── dict.c ├── elf.c ├── error.c ├── file.c ├── gen.c ├── init.c ├── lex.c ├── list.c ├── main.c ├── parse.c ├── run.c ├── string.c └── unittest.h └── utils ├── dump ├── dump1 ├── dump2 ├── run └── run1 /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | 8cc 3 | tmp* 4 | .tmp* 5 | *# 6 | .#* 7 | nqueen 8 | *.orig 9 | *.rej 10 | test/alltests 11 | -------------------------------------------------------------------------------- /8cc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 8cc.h - 8cc C compiler header 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #ifndef ECC_H 9 | #define ECC_H 10 | 11 | #define _POSIX_SOURCE 12 | #define _BSD_SOURCE 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | /* 28 | * Primitive data types 29 | */ 30 | 31 | typedef int8_t s8; 32 | typedef uint8_t u8; 33 | typedef int16_t s16; 34 | typedef uint16_t u16; 35 | typedef int32_t s32; 36 | typedef uint32_t u32; 37 | typedef int64_t s64; 38 | typedef uint64_t u64; 39 | typedef intptr_t intptr; 40 | 41 | /* 42 | * ELF file format constants 43 | */ 44 | 45 | #define SHT_NULL 0 46 | #define SHT_PROGBITS 1 47 | #define SHT_SYMTAB 2 48 | #define SHT_STRTAB 3 49 | #define SHT_RELA 4 50 | #define SHT_HASH 5 51 | #define SHT_DYNAMIC 6 52 | #define SHT_NOTE 7 53 | #define SHT_NOBITS 8 54 | #define SHT_REL 9 55 | #define SHT_SHLIB 10 56 | #define SHT_DYNSYM 11 57 | 58 | #define SHF_WRITE 1 59 | #define SHF_ALLOC 2 60 | #define SHF_EXECINSTR 4 61 | 62 | #define SHN_UNDEF 0 63 | #define SHN_LORESERVE 0xff00 64 | #define SHN_LOPROC 0xff00 65 | #define SHN_HIPROC 0xff1f 66 | #define SHN_ABS 0xfff1 67 | #define SHN_COMMON 0xfff2 68 | #define SHN_HIRESERVE 0xffff 69 | 70 | #define ELF64_ST_BIND(i) ((i)>>4) 71 | #define ELF64_ST_TYPE(i) ((i)&0xf) 72 | #define ELF64_ST_INFO(b, t) (((b)<<4)+((t)&0xf)) 73 | 74 | #define STB_LOCAL 0 75 | #define STB_GLOBAL 1 76 | #define STB_WEAK 2 77 | 78 | #define STT_NOTYPE 0 79 | #define STT_OBJECT 1 80 | #define STT_FUNC 2 81 | #define STT_SECTION 3 82 | #define STT_FILE 4 83 | 84 | #define ELF64_R_SYM(i) ((i) >> 32) 85 | #define ELF64_R_TYPE(i) ((i) & 0xffffffffL) 86 | #define ELF64_R_INFO(s, t) ((((int64_t) (s)) << 32) + (((int64_t) (t)) & 0xffffffffL)) 87 | 88 | #define R_X86_64_NONE 0 89 | #define R_X86_64_64 1 90 | #define R_X86_64_PC32 2 91 | #define R_X86_64_GOT32 3 92 | #define R_X86_64_PLT32 4 93 | #define R_X86_64_COPY 5 94 | #define R_X86_64_GLOB_DAT 6 95 | #define R_X86_64_JUMP_SLOT 7 96 | #define R_X86_64_RELATIVE 8 97 | #define R_X86_64_GOTPCREL 9 98 | #define R_X86_64_32 10 99 | #define R_X86_64_32S 11 100 | #define R_X86_64_16 12 101 | #define R_X86_64_PC16 13 102 | #define R_X86_64_8 14 103 | #define R_X86_64_PC8 15 104 | 105 | #ifdef __GNUC__ 106 | # define NORETURN __attribute__((noreturn)) 107 | #else 108 | # define NORETURN 109 | #endif 110 | 111 | /*============================================================ 112 | * Byte String 113 | */ 114 | 115 | #define STRING_INITIAL_SIZE 32 116 | 117 | typedef struct String { 118 | char *buf; 119 | int nalloc; 120 | int len; 121 | int pos; 122 | } String; 123 | 124 | #define STRING_LEN(b) ((b)->len) 125 | #define STRING_BODY(b) ((b)->buf) 126 | 127 | extern String *make_string(void); 128 | extern String *to_string(char *str); 129 | extern bool string_equal(String *a, String *b); 130 | extern void string_append(String *b, char *p); 131 | extern String *string_copy(String *b); 132 | extern String *string_prepend(String *b, char *p); 133 | extern void o1(String *b, int byte); 134 | extern void out(String *b, void *data, size_t size); 135 | extern void ostr(String *b, char *str); 136 | extern void o2(String *b, u16 data); 137 | extern void o3(String *b, u32 data); 138 | extern void o4(String *b, u32 data); 139 | extern void o8(String *b, u64 data); 140 | extern void align(String *b, int n); 141 | extern void string_seek(String *b, int pos); 142 | extern void string_vprintf(String *b, char *format, va_list ap); 143 | extern void string_printf(String *b, char *format, ...); 144 | extern String *make_string_printf(char *format, ...); 145 | 146 | /*============================================================ 147 | * Error Handlers 148 | */ 149 | 150 | typedef struct Exception { 151 | jmp_buf jmpbuf; 152 | String *msg; 153 | } Exception; 154 | 155 | extern Exception *current_handler; 156 | 157 | #define TRY(e_) (current_handler = (e_), !setjmp((e_)->jmpbuf)) 158 | 159 | extern Exception *make_exception(void); 160 | extern NORETURN void error(char *format, ...); 161 | extern void debug(char *format, ...); 162 | extern void warn(char *format, ...); 163 | extern NORETURN void print_parse_error(int line, int column, char *msg, va_list ap); 164 | extern void print_stack_trace(void); 165 | extern void print_stack_trace_safe(void); 166 | 167 | #define panic(fmt, ...) \ 168 | do { \ 169 | print_stack_trace(); \ 170 | error("[INTERNAL ERROR] %s:%d: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); \ 171 | } while (0) 172 | 173 | #define ASSERT(expr) \ 174 | do { \ 175 | if (!(expr)) { \ 176 | print_stack_trace(); \ 177 | panic("Assertion failed: %s", #expr); \ 178 | } \ 179 | } while (0) 180 | 181 | /*============================================================ 182 | * List 183 | */ 184 | 185 | #define LIST_INITIAL_SIZE 8 186 | 187 | typedef struct List { 188 | void **elems; 189 | int nalloc; 190 | int len; 191 | } List; 192 | 193 | #define LIST_REF(lis, i) (((lis)->elems)[i]) 194 | #define LIST_LEN(lis) ((lis)->len) 195 | #define LIST_TOP(lis) (LIST_REF((lis), 0)) 196 | #define LIST_BOTTOM(lis) (LIST_REF((lis), LIST_LEN(lis) - 1)) 197 | #define LIST_IS_EMPTY(lis) ((lis)->len == 0) 198 | 199 | extern List *make_list(void); 200 | extern List *make_list1(void *e0); 201 | extern List *make_list2(void *e0, void *e1); 202 | extern List *make_listn(void *e0, ...); 203 | extern void list_push(List *list, void *e); 204 | extern void list_push(List *list, void *e); 205 | extern void *list_pop(List *list); 206 | extern void *list_unshift(List *list); 207 | extern List *sublist(List *orig, int off); 208 | extern void list_append(List *a, List *b); 209 | extern List *list_reverse(List *list); 210 | extern List *list_copy(List *list); 211 | extern bool list_in(List *list, String *e); 212 | extern List *list_union(List *a, List *b); 213 | extern List *list_union1(List *list, String *b); 214 | extern List *list_intersect(List *a, List *b); 215 | 216 | /*============================================================ 217 | * Dictionary (Hash table) 218 | */ 219 | 220 | #define DICT_INITIAL_SIZE 16 221 | 222 | typedef struct Bucket { 223 | u32 hashval; 224 | void *key; 225 | void *elem; 226 | } Bucket; 227 | 228 | #define DICT_TYPE_STRING 0 229 | #define DICT_TYPE_ADDRESS 1 230 | 231 | typedef u32 dict_hash_fn(void *e); 232 | typedef bool dict_equal_fn(void *a, void *b); 233 | 234 | typedef struct Dict { 235 | dict_hash_fn *hashfn; 236 | dict_equal_fn *equalfn; 237 | Bucket *buckets; 238 | int nalloc; 239 | int nelem; 240 | } Dict; 241 | 242 | extern Dict *make_dict(dict_hash_fn hashfn, dict_equal_fn equalfn); 243 | extern Dict *make_string_dict(void); 244 | extern Dict *make_string_dict(void); 245 | extern Dict *make_address_dict(void); 246 | extern void dict_put(Dict *dict, void *key, void *obj); 247 | extern void *dict_get(Dict *dict, void *key); 248 | extern bool dict_delete(Dict *dict, void *key); 249 | extern bool dict_has(Dict *dict, void *key); 250 | extern int dict_size(Dict *dict); 251 | 252 | typedef struct DictIter { 253 | Dict *dict; 254 | int idx; 255 | } DictIter; 256 | 257 | extern DictIter *make_dict_iter(Dict* dict); 258 | extern void *dict_iter_next(DictIter* dict); 259 | 260 | /* 261 | * ELF headers (internal representation; not necessarily correspondent 262 | * to on-disk ELF format) 263 | */ 264 | 265 | typedef struct Section { 266 | String *body; 267 | char *name; 268 | int shstrtab_off; 269 | int type; 270 | int flags; 271 | int align; 272 | int link; 273 | List *rels; // list of Reloc 274 | int info; 275 | int entsize; 276 | int shndx; 277 | int symindex; 278 | // For executing binary in memory. 279 | void *memory_pos; 280 | } Section; 281 | 282 | typedef struct Symbol { 283 | String *name; 284 | Section *section; 285 | long value; 286 | int bind; 287 | int type; 288 | int sectidx; 289 | int defined; 290 | int index; 291 | } Symbol; 292 | 293 | typedef struct Reloc { 294 | long off; 295 | char *sym; 296 | Section *section; 297 | int type; 298 | u64 addend; 299 | } Reloc; 300 | 301 | typedef struct Elf { 302 | List *sections; 303 | int shnum; 304 | int symtabnum; 305 | Dict *syms; 306 | } Elf; 307 | 308 | extern Elf *new_elf(void); 309 | extern void write_elf(FILE *outfile, Elf *elf); 310 | extern Section *find_section(Elf *elf, char *name); 311 | extern Symbol *find_symbol(Elf *elf, char *name); 312 | 313 | /*============================================================ 314 | * File 315 | */ 316 | 317 | #define FILE_STDIO 1 318 | #define FILE_STRING 2 319 | 320 | typedef struct File { 321 | int type; // FILE_STDIO or FILE_STRING 322 | // If FILE_STDIO 323 | FILE *stream; 324 | // If FILE_STRING 325 | char *buf; 326 | int pos; 327 | 328 | int line; 329 | int column; 330 | int last_column; 331 | String *filename; 332 | int ungotten[2]; 333 | bool eof_flag; 334 | } File; 335 | 336 | extern File *make_file(FILE *stream, char *filename); 337 | extern File *make_string_file(String *s); 338 | extern File *open_file(char *path); 339 | extern void close_file(File *file); 340 | extern void unreadc(int c, File *file); 341 | extern int peekc(File *file); 342 | extern int readc(File *file); 343 | extern bool next_char_is(File *file, int c); 344 | 345 | /*============================================================ 346 | * Lexer 347 | */ 348 | 349 | typedef enum KeywordType { 350 | KEYWORD_NON_ONE_CHAR_BEGIN = 255, 351 | #define KEYWORD(k, s) k, 352 | #define PUNCT(k, s) k, 353 | # include "keyword.h" 354 | #undef PUNCT 355 | #undef KEYWORD 356 | } KeywordType; 357 | 358 | typedef union TokenValue { 359 | int i; 360 | float f; 361 | String *str; 362 | } TokenValue; 363 | 364 | typedef enum TokType { 365 | TOKTYPE_INVALID, 366 | TOKTYPE_KEYWORD, 367 | TOKTYPE_IDENT, 368 | TOKTYPE_PUNCT, 369 | TOKTYPE_CPPNUM, 370 | TOKTYPE_CHAR, 371 | TOKTYPE_STRING, 372 | TOKTYPE_INT, 373 | TOKTYPE_FLOAT, 374 | // The following two types are used only in CPP. 375 | TOKTYPE_NEWLINE, 376 | TOKTYPE_SPACE, 377 | TOKTYPE_MACRO_PARAM, 378 | } TokType; 379 | 380 | typedef struct Token { 381 | TokType toktype; 382 | TokenValue val; 383 | int line; 384 | int column; 385 | // Used in preprocessor macro expansion. 386 | List *hideset; 387 | // True if token follows space character. Used only when concatenating 388 | // tokens for # operator (the only case we need to preserve existence of 389 | // space.) 390 | bool space; 391 | } Token; 392 | 393 | typedef enum { 394 | COND_IF, COND_ELIF, COND_ELSE, COND_IFDEF, COND_IFNDEF, COND_ENDIF, 395 | } CondInclType; 396 | 397 | typedef struct CppContext { 398 | File *file; 399 | bool at_bol; 400 | // For #include. 401 | List *file_stack; 402 | // Macro definitions. 403 | Dict *defs; 404 | // Pushback buffer for preprocessing tokens. 405 | List *ungotten; 406 | // Never read file if true. 407 | bool in_macro; 408 | // Used by conditional inclusion, such as #if. 409 | List *incl; 410 | // System include paths. 411 | List *include_path; 412 | // For __DATE__ and __TIME__ macros. 413 | struct tm *tm; 414 | } CppContext; 415 | 416 | extern Token *read_cpp_token(CppContext *ctx); 417 | extern void expect_newline(CppContext *ctx); 418 | extern Token *copy_token(Token *tok); 419 | extern Token *make_token(CppContext *ctx, TokType type, TokenValue val); 420 | extern Token *make_str_literal(CppContext *ctx, String *val); 421 | extern Token *make_cppnum(CppContext *ctx, String *val); 422 | extern bool is_next_space(CppContext *ctx); 423 | extern CondInclType skip_cond_incl(CppContext *ctx); 424 | extern String *read_header_name(CppContext *ctx, bool *std); 425 | 426 | extern CppContext *make_cpp_context(File *file); 427 | extern void do_include(CppContext *ctx, File *file); 428 | extern void unget_cpp_token(CppContext *ctx, Token *tok); 429 | extern Token *peek_cpp_token(CppContext *ctx); 430 | extern NORETURN void error_cpp_ctx(CppContext *ctx, char *msg, ...); 431 | 432 | /* 433 | * A special value indicating that the result of macro expansion 434 | * varies depending on context. Used for __DATE__ and __TIME__. 435 | */ 436 | #define VARIABLE_MACRO ((void *)1) 437 | 438 | /*============================================================ 439 | * C Preprocessor 440 | */ 441 | 442 | struct ReadContext; 443 | extern Token *read_token(struct ReadContext *ctx); 444 | extern void define_predefined_macros(CppContext *ctx); 445 | extern void cpp_write(CppContext *ctx, FILE *out); 446 | 447 | /*============================================================ 448 | * Parser 449 | */ 450 | 451 | struct Var; 452 | struct Ctype; 453 | 454 | /* 455 | * Represents basic block of program. Code must contain one of OP_RETURN OP_JMP 456 | * or OP_IF which is the end of the basic block. Other instructions following 457 | * the instruction are dead code and safely be ignored. 458 | */ 459 | typedef struct Block { 460 | int pos; 461 | List *code; 462 | } Block; 463 | 464 | typedef struct Function { 465 | String *name; 466 | struct Ctype *rettype; 467 | List *params; 468 | Block *entry; 469 | } Function; 470 | 471 | /* 472 | * Read context for lexer and parser 473 | */ 474 | typedef struct ReadContext { 475 | File *file; 476 | Elf *elf; 477 | List *scope; 478 | // The entry basic block for the fucntion being read. 479 | Block *entry; 480 | // The stack of basic blocks. Instructions for code being processsed are 481 | // emitted to the top basic block of the stack. 482 | List *blockstack; 483 | // Pushback buffer for tokens. 484 | List *ungotten; 485 | // Function being read. 486 | Function *func; 487 | // "break" and "continue" targets. NULL means we're outside of loop or 488 | // switch. 489 | Block *onbreak; 490 | Block *oncontinue; 491 | // Labels and their jump destination basic blocks. Used by goto. 492 | Dict *label; 493 | // Labels and their jump origination basic blocks. When the parser visits 494 | // a forward-referencing goto (having labels which has not yet seen), the 495 | // label and the block is stored to the dictionary. Such blocks are 496 | // processed when the labels are read. 497 | Dict *label_tbf; 498 | // For CPP. 499 | CppContext *cppctx; 500 | // True if constant expression is needed. 501 | bool in_const_expr; 502 | } ReadContext; 503 | 504 | typedef union Cvalue { 505 | char c; 506 | int i; 507 | long l; 508 | float f; 509 | String *s; 510 | } Cvalue; 511 | 512 | typedef enum CtypeEnum { 513 | CTYPE_INVALID, 514 | CTYPE_PTR, 515 | CTYPE_ARRAY, 516 | CTYPE_LLONG, 517 | CTYPE_LONG, 518 | CTYPE_INT, 519 | CTYPE_SHORT, 520 | CTYPE_CHAR, 521 | CTYPE_FLOAT, 522 | CTYPE_DOUBLE, 523 | } CtypeEnum; 524 | 525 | typedef struct Ctype { 526 | CtypeEnum type; 527 | bool signedp; 528 | struct Ctype *ptr; 529 | int size; // valid iff type == CTYPE_ARRAY 530 | } Ctype; 531 | 532 | extern List *parse(File *file, Elf *elf); 533 | extern int ctype_sizeof(Ctype *ctype); 534 | extern bool ctype_equal(Ctype *ctype, int type); 535 | extern ReadContext *make_read_context(File *file, Elf *elf, CppContext *ctx); 536 | extern char *token_to_string(Token *tok); 537 | extern struct Var *read_comma_expr(ReadContext *ctx); 538 | extern NORETURN void error_token(Token *tok, char *msg, ...); 539 | 540 | /*============================================================ 541 | * Code generator 542 | */ 543 | 544 | extern bool flag_debug; 545 | 546 | // x86 general-purpose registers 547 | #define RAX 0 548 | #define RCX 1 549 | #define RDX 2 550 | #define RBX 3 551 | #define RSP 4 552 | #define RBP 5 553 | #define RSI 6 554 | #define RDI 7 555 | #define R8 8 556 | #define R9 9 557 | #define R10 10 558 | #define R11 11 559 | #define R12 12 560 | #define R13 13 561 | #define R14 14 562 | #define R15 15 563 | 564 | #define EXT_REG(x) ((x) >= R8) 565 | 566 | // XMM registers 567 | #define XMM0 0 568 | #define XMM1 1 569 | #define XMM2 2 570 | #define XMM3 3 571 | #define XMM4 4 572 | #define XMM5 5 573 | #define XMM6 6 574 | #define XMM7 7 575 | #define XMM8 8 576 | #define XMM9 9 577 | #define XMM10 10 578 | #define XMM11 11 579 | #define XMM12 12 580 | #define XMM13 13 581 | #define XMM14 14 582 | #define XMM15 15 583 | 584 | #define VAR_IMM 0 585 | #define VAR_EXTERN 1 586 | #define VAR_LOCAL 2 587 | 588 | typedef struct Var { 589 | int stype; 590 | Ctype *ctype; 591 | String *name; 592 | Cvalue val; // Immediate value. Only valid when stype == VAR_IMM 593 | struct Var *loc; // Non-NULL if lvalue 594 | // For register allocation 595 | bool need_save; 596 | } Var; 597 | 598 | enum { 599 | OP_DUMMY = 255, 600 | #define INST(x) x, 601 | #include "inst.h" 602 | #undef INST 603 | }; 604 | 605 | typedef struct Inst { 606 | int op; 607 | List *args; 608 | Cvalue val; 609 | } Inst; 610 | 611 | extern void assemble(Elf *elf, List *fns); 612 | extern Symbol *make_symbol(String *name, Section *sect, long value, int bind, int type, int defined); 613 | 614 | extern Inst *make_inst0(int op); 615 | extern Inst *make_inst1(int op, void *v0); 616 | extern Inst *make_inst2(int op, void *v0, void *v1); 617 | extern Inst *make_inst3(int op, void *v0, void *v1, void *v2); 618 | extern Inst *make_inst4(int op, void *v0, void *v1, void *v2, void *v4); 619 | extern Inst *make_instn(int op, List *args); 620 | extern bool is_flonum(Ctype *ctype); 621 | extern void print_function(Function *func); 622 | 623 | 624 | /*============================================================ 625 | * Initializer 626 | */ 627 | 628 | void eightcc_init(void); 629 | 630 | /*============================================================ 631 | * C script 632 | */ 633 | 634 | extern int run_main(Elf *elf, int argc, char **argv); 635 | extern int run_string(char *code); 636 | 637 | 638 | #endif /* ECC_H */ 639 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2010 Rui Ueyama . All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 14 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 16 | NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 23 | DAMAGE. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | CC=gcc 3 | CFLAGS=-std=c99 -Wall -g 4 | LDFLAGS=-ldl -rdynamic 5 | OBJS=main.o string.o list.o gen.o lex.o cpp.o parse.o file.o dict.o error.o elf.o run.o init.o decl.o 6 | 7 | all: 8cc 8 | 9 | 8cc: $(OBJS) 10 | $(CC) -Wall $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) 11 | 12 | $(OBJS): 8cc.h 13 | 14 | nqueen: 8cc sample/nqueen.c 15 | ./8cc sample/nqueen.c nqueen.o 16 | gcc -o nqueen nqueen.o 17 | 18 | test: 19 | $(MAKE) -C test test 20 | 21 | clean: 22 | -rm -f 8cc $(OBJS) *.o nqueen .tmpTEST* 23 | $(MAKE) -C test clean 24 | 25 | .PHONY: test clean all 26 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = 8cc C compiler 2 | 3 | This is my hobby project to make a comprephensive, C99-compliant C compiler that 4 | supports precise garbage collection. My ultimate goal is to make it a drop-in 5 | replacement for GCC for projects using Boehm's conservative GC. 6 | 7 | I started developing the compiler May 2010, and as of July 2010, it can compile 8 | non-trivial programs, such as 8 queen problem solver. Still a lot of features 9 | are missing, like global variables, floating point numbers, declarations, 10 | miscellaneous operators, etc. 11 | 12 | = Architecture 13 | 14 | 8cc is currently a two pass compiler. In the first pass, C source code is 15 | parsed with hand-written recursive-descent parser and operator-precedence 16 | parser, and compiled into IL (intermediate langauge). IL is a list of objects 17 | representing an instruction code and zero-or-more operands. Here is a partial 18 | list of instructions: 19 | 20 | * + dst src0 src1 (addition) 21 | * = dst src (assignment) 22 | * ADDRESS dst src (pointer arithmetic "&") 23 | * IF var then-block else-block continue-block 24 | 25 | The operands of IF instruction are basic blocks, that don't contain any branch 26 | instructions in the middle of it. All branch instructions jump to the beginning 27 | of a basic block. IF instruction, for example, is compiled to the machine codes 28 | tha jumps either then-block or else-block, depending on the value of variable 29 | "var". "for" and "while" loops in C are implemented using IF operator. The 30 | program in IL consists of the entry basic block and all basic blocks reachable 31 | from the entry. 32 | 33 | The point of the first pass is that flattens nested C expressions. For example, 34 | C expression "A + B + C" is compiled to something like "T0 = A + B" and "T1 = T0 35 | + C" (the result of the expression is T1.) No AST would be created. This 36 | should contributes compilation speed and simplifies the code by eliminating 37 | extra pass to convert AST to IL. 38 | 39 | In the second pass, ILs are converted to x86-64 machine instructions. At the 40 | moment, all variables are assigned to the stack, and registers are used only as 41 | scratch memory. For instance, "A = B + C" is compiled to the code that (1) 42 | loads the value of variable B in the stack to register RAX, (2) loads C to 43 | register R11, (3) addd them, and (4) stores the result in RAX to varaible A. It 44 | always copies values between the registers and the stack even if the value will 45 | be used just after the current instruction. 46 | 47 | 8cc directly outputs ELF object file, like TCC but unlike the most other 48 | ccompilers. No assembler is needed. 49 | 50 | At last, GCC would be invoked to link an object file to make an executable. 51 | 52 | = Future plans 53 | 54 | I'll implement all C99 features. After that, I'd go with copying GC. Because 55 | compiler knows precise type, runtime can do precise GC as long as compiler 56 | retains type information to a binary in the form that is accessible from 57 | runtime. Type of heap-allocated objects would be detected by recognizing the 58 | special form in source code, malloc(sizeof(type) * size). Once it's done, I'd 59 | implement flow analysis and SSA conversion, and then implement efficient 60 | register allocation algorithm, such as Linear Scan Register Allocation to 61 | improve performance. 62 | -------------------------------------------------------------------------------- /cpp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cpp.c - C preprocessor 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | /* 11 | * References: 12 | * C99 spec: http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf 13 | */ 14 | 15 | 16 | /*============================================================================== 17 | * Utility functions for token handling. 18 | */ 19 | 20 | static Token *read_if(CppContext *ctx, char *str) { 21 | Token *tok = read_cpp_token(ctx); 22 | if (tok && tok->toktype == TOKTYPE_IDENT && !strcmp(STRING_BODY(tok->val.str), str)) 23 | return tok; 24 | unget_cpp_token(ctx, tok); 25 | return NULL; 26 | } 27 | 28 | static bool is_punct(Token *tok, int v) { 29 | return tok->toktype == TOKTYPE_PUNCT && tok->val.i == v; 30 | } 31 | 32 | void expect_newline(CppContext *ctx) { 33 | Token *tok = read_cpp_token(ctx); 34 | if (tok && tok->toktype != TOKTYPE_NEWLINE) 35 | error_token(tok, "newline expected, but got '%s'", token_to_string(tok)); 36 | 37 | } 38 | 39 | /*============================================================================== 40 | * Data structure representing a macro 41 | */ 42 | 43 | typedef void special_macro_handler(CppContext *ctx, Token *tok); 44 | 45 | // Object-like macro, function-like macro and special macro 46 | // (e.g. __FILE__ or __LINE__). 47 | #define MACRO_OBJ 1 48 | #define MACRO_FUNC 2 49 | #define MACRO_SPECIAL 3 50 | 51 | typedef struct Macro { 52 | int type; 53 | // For object-like and function-like 54 | int nargs; 55 | List *body; 56 | bool is_varg; 57 | // For special macros 58 | special_macro_handler *fn; 59 | } Macro; 60 | 61 | static Macro *make_obj_macro(List *body) { 62 | Macro *r = malloc(sizeof(Macro)); 63 | r->type = MACRO_OBJ; 64 | r->body = body; 65 | r->is_varg = false; 66 | return r; 67 | } 68 | 69 | static Macro *make_func_macro(List *body, int nargs, bool is_varg) { 70 | Macro *r = malloc(sizeof(Macro)); 71 | r->type = MACRO_FUNC; 72 | r->nargs = nargs; 73 | r->body = body; 74 | r->is_varg = is_varg; 75 | return r; 76 | } 77 | 78 | static Macro *make_special_macro(special_macro_handler *fn) { 79 | Macro *r = malloc(sizeof(Macro)); 80 | r->type = MACRO_SPECIAL; 81 | r->fn = fn; 82 | return r; 83 | } 84 | 85 | /*============================================================================== 86 | * Keyword recognizer and utility functions. 87 | */ 88 | 89 | static Dict *keyword_dict(void) { 90 | /* 91 | * If "dict" is already initialized, returns it. Otherwise, we'll add all 92 | * keywords to a newly created dictionary. Temporary variable is needed as 93 | * we don't want to export the dictionary being built to the other threads. 94 | */ 95 | static Dict *dict; 96 | if (dict) return dict; 97 | Dict *tmp = make_string_dict(); 98 | #define KEYWORD(id_, str_) \ 99 | dict_put(tmp, to_string(str_), (void *)id_); 100 | #define PUNCT(id_, str_) 101 | # include "keyword.h" 102 | #undef PUNCT 103 | #undef KEYWORD 104 | dict = tmp; 105 | return dict; 106 | } 107 | 108 | static Dict *punct_dict(void) { 109 | static Dict *dict; 110 | if (dict) return dict; 111 | Dict *tmp = make_string_dict(); 112 | #define KEYWORD(k, s) 113 | #define PUNCT(k, s) \ 114 | dict_put(tmp, to_string(s), (void *)k); 115 | # include "keyword.h" 116 | #undef PUNCT 117 | #undef KEYWORD 118 | dict = tmp; 119 | return dict; 120 | } 121 | 122 | static Token *to_keyword_maybe(Token *tok) { 123 | ASSERT(tok->toktype == TOKTYPE_IDENT); 124 | int id = (intptr)dict_get(keyword_dict(), tok->val.str); 125 | if (id) { 126 | Token *r = copy_token(tok); 127 | r->toktype = TOKTYPE_KEYWORD; 128 | r->val.i = id; 129 | return r; 130 | } 131 | return tok; 132 | } 133 | 134 | static Token *cppnum_to_float(Token *tok) { 135 | Token *r = copy_token(tok); 136 | r->toktype = TOKTYPE_FLOAT; 137 | r->val.f = atof(STRING_BODY(tok->val.str)); 138 | return r; 139 | } 140 | 141 | static Token *cppnum_to_int(Token *tok) { 142 | char *p = STRING_BODY(tok->val.str); 143 | int base = 10; 144 | int val = 0; 145 | // Read prefix such as "0" or "0x". 146 | if (*p == '0') { 147 | p++; 148 | if (*p == 'x' || *p == 'X') { 149 | base = 16; 150 | p++; 151 | } else if (*p == 'b' || *p == 'B') { 152 | // Binary constant using '0b' prefix is GNU extension 153 | base = 2; 154 | p++; 155 | } else { 156 | base = 8; 157 | } 158 | } 159 | // Read numbers until non-number character. 160 | for (; *p; p++) { 161 | int v; 162 | if ('0' <= *p && *p <= '9') 163 | v = *p - '0'; 164 | else if ('a' <= *p && *p <= 'f') 165 | v = *p - 'a' + 10; 166 | else if ('A' <= *p && *p <= 'F') 167 | v = *p - 'A' + 10; 168 | else 169 | break; 170 | if (v >= base) 171 | error_token(tok, "invalid digit '%c' in base %d number", *p, base); 172 | val *= base; 173 | val += v; 174 | } 175 | // Ignore all suffixes for now 176 | while (*p == 'U' || *p == 'u' || *p == 'L' || *p == 'l') 177 | p++; 178 | if (*p) 179 | error_token(tok, "invalid char '%c' in a number '%s'", *p, STRING_BODY(tok->val.str)); 180 | 181 | Token *r = copy_token(tok); 182 | r->toktype = TOKTYPE_INT; 183 | r->val.i = val; 184 | return r; 185 | } 186 | 187 | static Token *cppnum_to_num(Token *tok) { 188 | return strchr(STRING_BODY(tok->val.str), '.') 189 | ? cppnum_to_float(tok) 190 | : cppnum_to_int(tok); 191 | } 192 | 193 | static Token *cpp_token_to_token(Token *tok) { 194 | if (!tok) 195 | return NULL; 196 | tok->hideset = NULL; 197 | if (tok->toktype == TOKTYPE_IDENT) 198 | return to_keyword_maybe(tok); 199 | if (tok->toktype == TOKTYPE_CPPNUM) 200 | return cppnum_to_num(tok); 201 | if (tok->toktype == TOKTYPE_PUNCT) { 202 | Token *r = copy_token(tok); 203 | r->toktype = TOKTYPE_KEYWORD; 204 | return r; 205 | } 206 | ASSERT(tok->toktype == TOKTYPE_CHAR || tok->toktype == TOKTYPE_STRING); 207 | return tok; 208 | } 209 | 210 | /*============================================================================== 211 | * C99 6.10.8 Predefined macro names. 212 | */ 213 | 214 | static void handle_pragma(CppContext *ctx); 215 | 216 | static void def_obj_macro(CppContext *ctx, char *name, Token *tok) { 217 | List *list = make_list1(tok); 218 | dict_put(ctx->defs, to_string(name), make_obj_macro(list)); 219 | } 220 | 221 | static void def_special_macro(CppContext *ctx, char *name, special_macro_handler *fn) { 222 | dict_put(ctx->defs, to_string(name), make_special_macro(fn)); 223 | } 224 | 225 | /* 226 | * Returns a struct tm representing now. The result is cached in the context. 227 | */ 228 | static struct tm *get_tm(CppContext *ctx) { 229 | if (ctx->tm) 230 | return ctx->tm; 231 | time_t timet = time(NULL); 232 | struct tm *now = malloc(sizeof(struct tm)); 233 | localtime_r(&timet, now); 234 | ctx->tm = now; 235 | return now; 236 | } 237 | 238 | static void handle_date_macro(CppContext *ctx, Token *tmpl) { 239 | Token *tok = copy_token(tmpl); 240 | tok->toktype = TOKTYPE_STRING; 241 | char *month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 242 | struct tm *now = get_tm(ctx); 243 | tok->val.str = make_string_printf("%s %02d %04d", month[now->tm_mon], now->tm_mday, 1900 + now->tm_year); 244 | unget_cpp_token(ctx, tok); 245 | } 246 | 247 | static void handle_time_macro(CppContext *ctx, Token *tmpl) { 248 | Token *tok = copy_token(tmpl); 249 | tok->toktype = TOKTYPE_STRING; 250 | struct tm *now = get_tm(ctx); 251 | tok->val.str = make_string_printf("%02d:%02d:%02d", now->tm_hour, now->tm_min, now->tm_sec); 252 | unget_cpp_token(ctx, tok); 253 | } 254 | 255 | static void handle_file_macro(CppContext *ctx, Token *tmpl) { 256 | Token *tok = copy_token(tmpl); 257 | tok->toktype = TOKTYPE_STRING; 258 | tok->val.str = ctx->file->filename; 259 | unget_cpp_token(ctx, tok); 260 | } 261 | 262 | static void handle_line_macro(CppContext *ctx, Token *tmpl) { 263 | Token *tok = copy_token(tmpl); 264 | tok->toktype = TOKTYPE_CPPNUM; 265 | tok->val.str = make_string_printf("%d", ctx->file->line); 266 | unget_cpp_token(ctx, tok); 267 | } 268 | 269 | /* 270 | * C99 6.10.9 Pragma operator. 271 | * 272 | * _Pragma("tokens ...") is equivalent to #pragma tokens .... 273 | */ 274 | static void handle_pragma_macro(CppContext *ctx, Token *ignore) { 275 | Token *tok = read_cpp_token(ctx); 276 | if (!is_punct(tok, '(')) 277 | error_token(tok, "'(' expected, but got '%s'", token_to_string(tok)); 278 | Token *body = read_cpp_token(ctx); 279 | if (body->toktype != TOKTYPE_STRING) 280 | error_token(body, "string expected, but got '%s'", token_to_string(body)); 281 | tok = read_cpp_token(ctx); 282 | if (!is_punct(tok, ')')) 283 | error_token(tok, "')' expected, but got '%s'", token_to_string(tok)); 284 | 285 | File *file = make_string_file(body->val.str); 286 | do_include(ctx, file); 287 | handle_pragma(ctx); 288 | } 289 | 290 | void define_predefined_macros(CppContext *ctx) { 291 | def_obj_macro(ctx, "__8CC__", make_cppnum(ctx, to_string("1"))); 292 | def_obj_macro(ctx, "__STDC__", make_cppnum(ctx, to_string("1"))); 293 | def_obj_macro(ctx, "__STDC_HOSTED__", make_cppnum(ctx, to_string("1"))); 294 | def_obj_macro(ctx, "__STDC_VERSION__", make_cppnum(ctx, to_string("199901L"))); 295 | def_special_macro(ctx, "__DATE__", handle_date_macro); 296 | def_special_macro(ctx, "__TIME__", handle_time_macro); 297 | def_special_macro(ctx, "__FILE__", handle_file_macro); 298 | def_special_macro(ctx, "__LINE__", handle_line_macro); 299 | def_special_macro(ctx, "_Pragma", handle_pragma_macro); 300 | } 301 | 302 | /* 303 | * The following macros are not defined for now. 304 | * __STDC_MB_MIGHT_NEQ_WC__ 305 | * __STDC_IEC_559__ 306 | * __STDC_IEC_559_COMPLEX__ 307 | * __STDC_ISO_10646__ 308 | */ 309 | 310 | /*============================================================================== 311 | * Macro expansion. 312 | * 313 | * Preprocessor macro expansion is woefully underspecified in the C standard. 314 | * An insufficient implementation could not fully expand all recursive macros, 315 | * while too aggressive expansion could go into infinite replacement loop. 316 | * 317 | * In order to prevent infinite expansion, the macro expander here maintains 318 | * "hide set" for each token. If a token is placed as a result of macro X's 319 | * expansion, the name "X" is added to the hide set of the token. The token 320 | * will not trigger execution of macro X because of the hide set, that prevents 321 | * infinite loop. 322 | * 323 | * There are many phases in macro expansion. In the first phase, a macro 324 | * arguments are parsed as a sequence of tokens separated by comma. The macro's 325 | * replacement list is read next, to see if there are any macro argument used as 326 | * the operand of # or ## operator. If so, the arguments are converted to 327 | * string or concatenated, respectively. All the other arguments are fully 328 | * macro-expanded, and then the macro parameters will be replaced by the 329 | * expanded arguments. The resulting sequence of tokens are read again by the 330 | * macro expander, until the first token is not a macro. 331 | * 332 | * A implementation that process macro expansion in a different order could 333 | * produce fully-expanded but wrong results of a macro. 334 | * 335 | * The implementation here is based on Dave Prosser's expansion algorithm, that 336 | * is said the ANSI C committee used as a basis for the standard's wording, 337 | * described in the following PDF. I believe the algorithm and my implemention 338 | * is correct, as it correctly expands all example macros in the C 339 | * specification, while I'm not 100% sure. 340 | * 341 | * Blog dds: Dave Prosser's C Preprocessing Algorithm 342 | * http://www.spinellis.gr/blog/20060626/ 343 | */ 344 | 345 | static Token *expand_one(CppContext *ctx); 346 | 347 | static CppContext *make_virt_cpp_context(CppContext *ctx, List *tokens) { 348 | CppContext *r = make_cpp_context(NULL); 349 | r->at_bol = false; 350 | r->defs = ctx->defs; 351 | r->ungotten = list_reverse(tokens); 352 | r->in_macro = true; 353 | return r; 354 | } 355 | 356 | /* 357 | * Expands all macros in a given token list, as if they consisted the rest of 358 | * the source file. A new preprocessing contexts are created, and the tokens 359 | * are pushed to the context so that subsequent read_cpp_token() will get them, 360 | * as if these were the content of a file. 361 | * 362 | * expand_one() and expand_all() calls each other to get the fully expanded form 363 | * of the given tokens. 364 | */ 365 | static List *expand_all(CppContext *ctx, List *ts) { 366 | List *r = make_list(); 367 | CppContext *virt = make_virt_cpp_context(ctx, ts); 368 | Token *tok; 369 | while ((tok = expand_one(virt)) != NULL) 370 | list_push(r, tok); 371 | return r; 372 | } 373 | 374 | static void pushback(CppContext *ctx, List *ts) { 375 | for (int i = LIST_LEN(ts) - 1; i >= 0; i--) 376 | unget_cpp_token(ctx, (Token *)LIST_REF(ts, i)); 377 | } 378 | 379 | /* 380 | * Reads comma-separated arguments of function-like macro invocation. Comma 381 | * characters in matching parentheses are not considered as separator. 382 | * 383 | * (C99 6.10.3 Macro replacement, sentence 10) 384 | */ 385 | static List *read_args_int(CppContext *ctx, Macro *macro) { 386 | List *r = make_list(); 387 | List *arg = make_list(); 388 | int depth = 0; 389 | 390 | Token *tok = peek_cpp_token(ctx); 391 | if (!tok || !is_punct(tok, '(')) 392 | return NULL; 393 | read_cpp_token(ctx); 394 | 395 | for (Token *tok1 = read_cpp_token(ctx); ; tok1 = read_cpp_token(ctx)) { 396 | if (!tok1) 397 | error_token(tok, "unterminated macro argument list"); 398 | if (tok1->toktype == TOKTYPE_NEWLINE) 399 | continue; 400 | if (depth) { 401 | if (is_punct(tok1, ')')) 402 | depth--; 403 | list_push(arg, tok1); 404 | continue; 405 | } 406 | if (is_punct(tok1, '(')) 407 | depth++; 408 | if (is_punct(tok1, ')')) { 409 | unget_cpp_token(ctx, tok1); 410 | list_push(r, arg); 411 | return r; 412 | } 413 | bool in_threedots = macro->is_varg && LIST_LEN(r) + 1 == macro->nargs; 414 | if (is_punct(tok1, ',') && !in_threedots) { 415 | list_push(r, arg); 416 | arg = make_list(); 417 | continue; 418 | } 419 | list_push(arg, tok1); 420 | } 421 | } 422 | 423 | /* 424 | * Reads macro arguments. If the number of macro arguments does not match with 425 | * the number of parameters, it will raise an error. 426 | */ 427 | static List *read_args(CppContext *ctx, Macro *macro) { 428 | List *args = read_args_int(ctx, macro); 429 | if (!args) return NULL; 430 | 431 | /* 432 | * In CPP syntax, we cannot distinguish zero-argument macro invocation from 433 | * one argument macro invocation, because macro argument can be blank. For 434 | * example, the following macro invocation 435 | * 436 | * FOO() 437 | * 438 | * is valid for both definitions shown below. 439 | * 440 | * #define FOO() 1 441 | * #define FOO(x) x 442 | * 443 | * In the latter case, macro parameter "x" become an empty sequence of 444 | * identifiers, thus FOO() will be replaced with the empty. 445 | * 446 | * The argument list is set to empty here if macro takes no parameters and 447 | * the argument is empty. 448 | */ 449 | if (macro->nargs == 0 && LIST_LEN(args) == 1 && LIST_LEN((List *)LIST_REF(args, 0)) == 0) 450 | list_pop(args); 451 | 452 | if ((macro->is_varg && LIST_LEN(args) < macro->nargs) 453 | || (!macro->is_varg && LIST_LEN(args) != macro->nargs)) 454 | error_cpp_ctx(ctx, "Macro argument number does not match"); 455 | return args; 456 | } 457 | 458 | /* 459 | * Added a given hide set to tokens in a given list. 460 | */ 461 | static List *add_hide_set(List *tokens, List *hideset) { 462 | List *r = make_list(); 463 | for (int i = 0; i < LIST_LEN(tokens); i++) { 464 | Token *t = copy_token((Token *)LIST_REF(tokens, i)); 465 | t->hideset = list_union(t->hideset, hideset); 466 | list_push(r, t); 467 | } 468 | return r; 469 | } 470 | 471 | /* 472 | * Writes a string representation of a given character to a buffer. If the 473 | * character is quote, it'll be esacaped with backslash. 474 | */ 475 | static void stringize_char(String *b, char c, char quote) { 476 | if (!isascii(c)) 477 | string_printf(b, "\\x%02x", (u8)c); 478 | else if (c == '\\' || c == quote) 479 | string_printf(b, "\\%c", c); 480 | else 481 | string_printf(b, "%c", c); 482 | } 483 | 484 | static void paste(String *b, Token *tok) { 485 | switch (tok->toktype) { 486 | case TOKTYPE_IDENT: 487 | case TOKTYPE_CPPNUM: 488 | string_append(b, STRING_BODY(tok->val.str)); 489 | return; 490 | case TOKTYPE_PUNCT: 491 | string_append(b, token_to_string(tok)); 492 | return; 493 | default: 494 | error_token(tok, "pasting invalid token: '%s'", token_to_string(tok)); 495 | } 496 | } 497 | 498 | /* 499 | * Joins given two tokens and returns it. Used by "##" operator. 500 | */ 501 | static Token *glue_tokens(Token *t0, Token *t1) { 502 | String *b = make_string(); 503 | paste(b, t0); 504 | paste(b, t1); 505 | Token *r = copy_token(t0); 506 | if (isdigit(STRING_BODY(b)[0])) { 507 | r->toktype = TOKTYPE_CPPNUM; 508 | r->val.str = b; 509 | return r; 510 | } 511 | int punct = (intptr)dict_get(punct_dict(), b); 512 | if (punct) { 513 | r->toktype = TOKTYPE_PUNCT; 514 | r->val.i = punct; 515 | return r; 516 | } 517 | r->toktype = TOKTYPE_IDENT; 518 | r->val.str = b; 519 | return r; 520 | } 521 | 522 | /* 523 | * Joins a given token with the last token of a list. 524 | */ 525 | static void glue_push(List *ls, Token *tok) { 526 | ASSERT(!LIST_IS_EMPTY(ls)); 527 | Token *last = list_pop(ls); 528 | list_push(ls, glue_tokens(last, tok)); 529 | } 530 | 531 | /* 532 | * Join tokens in a given list. If sep is true, a space is put 533 | * between tokens. If false, no separator. 534 | */ 535 | static String *join_tokens(List *arg, bool sep) { 536 | String *s = make_string(); 537 | for (int i = 0; i < LIST_LEN(arg); i++) { 538 | Token *tok = LIST_REF(arg, i); 539 | if (sep && STRING_LEN(s) && tok->space) 540 | o1(s, ' '); 541 | switch (tok->toktype) { 542 | case TOKTYPE_IDENT: 543 | case TOKTYPE_CPPNUM: 544 | string_append(s, STRING_BODY(tok->val.str)); 545 | break; 546 | case TOKTYPE_PUNCT: 547 | string_append(s, token_to_string(tok)); 548 | break; 549 | case TOKTYPE_CHAR: { 550 | // TODO: retain original spelling 551 | o1(s, '\''); 552 | stringize_char(s, tok->val.i, '\''); 553 | string_append(s, "\'"); 554 | break; 555 | } 556 | case TOKTYPE_STRING: { 557 | o1(s, '"'); 558 | for (char *p = STRING_BODY(tok->val.str); *p; p++) 559 | stringize_char(s, *p, '\"'); 560 | string_append(s, "\""); 561 | break; 562 | } 563 | default: 564 | panic("invalid token type: %d", tok->toktype); 565 | } 566 | } 567 | return s; 568 | } 569 | 570 | /* 571 | * Write a string representation of a given token sequence. Used by 572 | * # operator. 573 | */ 574 | static Token *stringize(Token *tmpl, List *arg) { 575 | Token *r = copy_token(tmpl); 576 | r->toktype = TOKTYPE_STRING; 577 | r->val.str = join_tokens(arg, true); 578 | return r; 579 | } 580 | 581 | /* 582 | * Substitutes parameters in macro definition body with actual arguments. 583 | */ 584 | static List *subst(CppContext *ctx, Macro *macro, List *args, List *hideset) { 585 | List *r = make_list(); 586 | for (int i = 0; i < LIST_LEN(macro->body); i++) { 587 | bool islast = (i == LIST_LEN(macro->body) - 1); 588 | Token *t0 = LIST_REF(macro->body, i); 589 | Token *t1 = islast ? NULL : LIST_REF(macro->body, i + 1); 590 | bool t0_param = t0->toktype == TOKTYPE_MACRO_PARAM; 591 | bool t1_param = !islast && t1->toktype == TOKTYPE_MACRO_PARAM; 592 | 593 | if (is_punct(t0, '#') && t1_param) { 594 | list_push(r, stringize(t0, LIST_REF(args, t1->val.i))); 595 | i++; 596 | continue; 597 | } 598 | if (is_punct(t0, KEYWORD_TWOSHARPS) && t1_param) { 599 | List *arg = LIST_REF(args, t1->val.i); 600 | if (!LIST_IS_EMPTY(arg)) { 601 | glue_push(r, (Token *)LIST_REF(arg, 0)); 602 | List *tmp = make_list(); 603 | for (int i = 1; i < LIST_LEN(arg); i++) 604 | list_push(tmp, LIST_REF(arg, i)); 605 | list_append(r, expand_all(ctx, tmp)); 606 | } 607 | i++; 608 | continue; 609 | } 610 | if (is_punct(t0, KEYWORD_TWOSHARPS) && !islast) { 611 | hideset = t1->hideset; // wrong? 612 | glue_push(r, t1); 613 | i++; 614 | continue; 615 | } 616 | if (t0_param && !islast && is_punct(t1, KEYWORD_TWOSHARPS)) { 617 | hideset = t1->hideset; // wrong? 618 | List *arg = LIST_REF(args, t0->val.i); 619 | if (LIST_IS_EMPTY(arg)) 620 | i++; 621 | else 622 | list_append(r, arg); 623 | continue; 624 | } 625 | if (t0_param) { 626 | List *arg = LIST_REF(args, t0->val.i); 627 | list_append(r, expand_all(ctx, arg)); 628 | continue; 629 | } 630 | list_push(r, t0); 631 | } 632 | return add_hide_set(r, hideset); 633 | } 634 | 635 | /* 636 | * Reads a token from a given preprocessing context, expands it if macro, and 637 | * returns it. 638 | */ 639 | static Token *expand_one(CppContext *ctx) { 640 | Token *tok = read_cpp_token(ctx); 641 | if (!tok) return NULL; 642 | if (tok->toktype != TOKTYPE_IDENT) 643 | return tok; 644 | String *name = tok->val.str; 645 | Macro *macro = dict_get(ctx->defs, name); 646 | if (!macro) 647 | return tok; 648 | if (list_in(tok->hideset, name)) 649 | return tok; 650 | 651 | switch (macro->type) { 652 | case MACRO_OBJ: { 653 | List *ts = subst(ctx, macro, make_list(), list_union1(tok->hideset, name)); 654 | pushback(ctx, ts); 655 | return expand_one(ctx); 656 | } 657 | case MACRO_FUNC: { 658 | List *args = read_args(ctx, macro); 659 | Token *rparen = read_cpp_token(ctx); 660 | List *hideset = list_union1(list_intersect(tok->hideset, rparen->hideset), name); 661 | List *ts = subst(ctx, macro, args, hideset); 662 | pushback(ctx, ts); 663 | return expand_one(ctx); 664 | } 665 | case MACRO_SPECIAL: 666 | macro->fn(ctx, tok); 667 | return expand_one(ctx); 668 | } 669 | panic("should not reach here"); 670 | } 671 | 672 | /*============================================================================== 673 | * Preprocessor directives. 674 | */ 675 | 676 | static bool is_defined(CppContext *ctx, Token *tok) { 677 | if (!tok || tok->toktype != TOKTYPE_IDENT) 678 | error_token(tok, "identifier expected, but got '%s'", token_to_string(tok)); 679 | return dict_has(ctx->defs, tok->val.str); 680 | } 681 | 682 | /* 683 | * Reads "defined" unary operator of the form "defined " or 684 | * "defined()". The token "defined" is already read when the 685 | * function is called. 686 | * 687 | * (C99 6.10.1 Conditional inclusion, paragraph 1) 688 | */ 689 | static Token *read_defined(CppContext *ctx) { 690 | Token *tok = read_cpp_token(ctx); 691 | if (is_punct(tok, '(')) { 692 | tok = read_cpp_token(ctx); 693 | Token *tok1 = read_cpp_token(ctx); 694 | if (!tok1 || !is_punct(tok1, ')')) 695 | error_token(tok1, "')' expected, but got '%s'", token_to_string(tok1)); 696 | } 697 | Token *r = copy_token(tok); 698 | r->toktype = TOKTYPE_CPPNUM; 699 | r->val.i = is_defined(ctx, tok); 700 | return r; 701 | } 702 | 703 | /* 704 | * Evaluate a given tokens as an integer constant expression and returns the 705 | * result. 706 | */ 707 | static int eval_const_expr(CppContext *cppctx, List *tokens) { 708 | if (LIST_LEN(tokens) == 1 && ((Token *)LIST_REF(tokens, 0))->toktype == TOKTYPE_CPPNUM) 709 | return ((Token *)LIST_REF(tokens, 0))->val.i; 710 | 711 | CppContext *virt = make_virt_cpp_context(cppctx, tokens); 712 | ReadContext *readctx = make_read_context(cppctx->file, NULL, virt); 713 | readctx->in_const_expr = true; 714 | Var *var = read_comma_expr(readctx); 715 | 716 | Token *tok = read_token(readctx); 717 | if (tok) 718 | error_token(tok, "newline expected, but got '%s'", token_to_string(tok)); 719 | 720 | ASSERT(var->stype == VAR_IMM); 721 | if (!ctype_equal(var->ctype, CTYPE_INT)) 722 | error_cpp_ctx(cppctx, "integer expected"); 723 | return var->val.i; 724 | } 725 | 726 | /* 727 | * Reads an constant expression for #if directive. In preprocessor constant 728 | * expression, all undefined identifiers are replaced with 0. 729 | * 730 | * (C99 6.10.1 Conditional inclusion, paragraph 4) 731 | */ 732 | static int read_constant_expr(CppContext *ctx) { 733 | List *tokens = make_list(); 734 | for (;;) { 735 | Token *tok = expand_one(ctx); 736 | if (!tok || tok->toktype == TOKTYPE_NEWLINE) 737 | break; 738 | if (tok->toktype == TOKTYPE_IDENT && !strcmp("defined", STRING_BODY(tok->val.str))) 739 | tok = read_defined(ctx); 740 | list_push(tokens, tok); 741 | } 742 | return eval_const_expr(ctx, tokens); 743 | } 744 | 745 | /* 746 | * #ifdef 747 | * (C99 6.10.1 Conditional inclusion, paragraph 5) 748 | */ 749 | static int read_ifdef(CppContext *ctx) { 750 | int r = is_defined(ctx, read_cpp_token(ctx)); 751 | expect_newline(ctx); 752 | return r; 753 | } 754 | 755 | /* 756 | * Handles #if, #elif, #ifdef and #ifndef. If condition does not meet, the 757 | * function calls skip_cond_include(), defined in lex.c, to skip all tokens 758 | * until the next #elif, #else of #endif. 759 | * 760 | * (C99 6.10.1 Conditional inclusion) 761 | */ 762 | static void handle_cond_incl(CppContext *ctx, CondInclType type) { 763 | bool cond; 764 | switch (type) { 765 | case COND_IF: 766 | cond = read_constant_expr(ctx); 767 | break; 768 | case COND_IFDEF: 769 | cond = read_ifdef(ctx); 770 | break; 771 | case COND_IFNDEF: 772 | cond = !read_ifdef(ctx); 773 | break; 774 | case COND_ELIF: 775 | error_cpp_ctx(ctx, "stray #elif"); 776 | case COND_ELSE: 777 | expect_newline(ctx); 778 | if (LIST_IS_EMPTY(ctx->incl)) 779 | error_cpp_ctx(ctx, "stray #else"); 780 | bool in_else = (intptr)list_pop(ctx->incl); 781 | if (in_else) 782 | error_cpp_ctx(ctx, "#else appears twice"); 783 | CondInclType type1 = skip_cond_incl(ctx); 784 | if (type1 == COND_ELIF) 785 | error_cpp_ctx(ctx, "stray #elif"); 786 | if (type1 == COND_ELSE) 787 | error_cpp_ctx(ctx, "stray #else"); 788 | return; 789 | case COND_ENDIF: 790 | expect_newline(ctx); 791 | if (LIST_IS_EMPTY(ctx->incl)) 792 | error_cpp_ctx(ctx, "stray #endif"); 793 | list_pop(ctx->incl); 794 | return; 795 | } 796 | if (cond) { 797 | list_push(ctx->incl, (void *)false); 798 | return; 799 | } 800 | // skip_cond_incl() returns one of COND_ELIF, COND_ELSE or COND_ENDIF. 801 | CondInclType type1 = skip_cond_incl(ctx); 802 | if (type1 == COND_ELIF) 803 | handle_cond_incl(ctx, COND_IF); 804 | else if (type1 == COND_ELSE) 805 | list_push(ctx->incl, (void *)true); 806 | } 807 | 808 | /* 809 | * Reads function-like macro arguments. Returns true if the argument list ends 810 | * with "...". Otherwise false. 811 | */ 812 | static bool read_funclike_define_args(CppContext *ctx, Dict *param) { 813 | for (;;) { 814 | Token *tok = read_cpp_token(ctx); 815 | if (is_punct(tok, ')')) 816 | return false; 817 | if (dict_size(param)) { 818 | if (!is_punct(tok, ',')) 819 | error_token(tok, "',' expected, but got '%s'", token_to_string(tok)); 820 | tok = read_cpp_token(ctx); 821 | } 822 | if (!tok || tok->toktype == TOKTYPE_NEWLINE) 823 | error_token(tok, "missing ')' in macro parameter list"); 824 | if (is_punct(tok, KEYWORD_THREEDOTS)) { 825 | Token *subst = make_token(ctx, TOKTYPE_MACRO_PARAM, (TokenValue)dict_size(param)); 826 | dict_put(param, to_string("__VA_ARGS__"), subst); 827 | Token *tok1 = read_cpp_token(ctx); 828 | if (!is_punct(tok1, ')')) 829 | error_token(tok1, "')' expected, but got '%s'", token_to_string(tok1)); 830 | return true; 831 | } 832 | if (tok->toktype != TOKTYPE_IDENT) 833 | error_token(tok, "identifier expected, but got '%s'", token_to_string(tok)); 834 | Token *subst = make_token(ctx, TOKTYPE_MACRO_PARAM, (TokenValue)dict_size(param)); 835 | dict_put(param, tok->val.str, subst); 836 | } 837 | } 838 | 839 | /* 840 | * Reads function-like macro body. Macro body is a sequence of tokens ends with 841 | * a newline. 842 | * 843 | * Macro parameters in the body will be replaced with a special token whose type 844 | * is TOKTYPE_MACRO_PARAM. When macro is executed, these tokens will then be 845 | * replaced with macro arguments. 846 | */ 847 | static List *read_funclike_define_body(CppContext *ctx, Dict *param) { 848 | List *body = make_list(); 849 | // Read macro body list 850 | for (;;) { 851 | Token *tok = read_cpp_token(ctx); 852 | if (!tok || tok->toktype == TOKTYPE_NEWLINE) 853 | return body; 854 | if (tok->toktype == TOKTYPE_IDENT) { 855 | Token *subst = dict_get(param, tok->val.str); 856 | if (subst) { 857 | list_push(body, subst); 858 | continue; 859 | } 860 | } 861 | list_push(body, tok); 862 | } 863 | return body; 864 | } 865 | 866 | /* 867 | * Stores a given macro to a CppContex. 868 | * 869 | * TODO: Print warning message if a macro is redefined. Redefinition is valid 870 | * only when the a one is the same as the old one. (C99 6.10.3p2) 871 | */ 872 | 873 | static void store_macro(CppContext *ctx, String *name, Macro *macro) { 874 | dict_put(ctx->defs, name, macro); 875 | } 876 | 877 | /* 878 | * Reads function-like macro definition. 879 | */ 880 | static void read_funclike_define(CppContext *ctx, String *name) { 881 | Dict *param = make_string_dict(); 882 | bool is_varg = read_funclike_define_args(ctx, param); 883 | List *body = read_funclike_define_body(ctx, param); 884 | store_macro(ctx, name, make_func_macro(body, dict_size(param), is_varg)); 885 | } 886 | 887 | /* 888 | * #define 889 | * (C99 6.10.3 Macro replacement) 890 | */ 891 | static void read_define(CppContext *ctx) { 892 | Token *name = read_cpp_token(ctx); 893 | if (name->toktype != TOKTYPE_IDENT) 894 | error_cpp_ctx(ctx, "macro name must be an identifier, but got '%s'", token_to_string(name)); 895 | 896 | bool is_space = is_next_space(ctx); 897 | Token *tok = read_cpp_token(ctx); 898 | 899 | if (!is_space && tok && is_punct(tok, '(')) { 900 | read_funclike_define(ctx, name->val.str); 901 | return; 902 | } 903 | 904 | List *body = make_list(); 905 | while (tok && tok->toktype != TOKTYPE_NEWLINE) { 906 | list_push(body, tok); 907 | tok = read_cpp_token(ctx); 908 | } 909 | store_macro(ctx, name->val.str, make_obj_macro(body)); 910 | } 911 | 912 | /* 913 | * #undef 914 | * (C99 6.10.5 Scope of macro definisions, paragraph 2) 915 | */ 916 | static void read_undef(CppContext *ctx) { 917 | Token *name = read_cpp_token(ctx); 918 | if (!name || name->toktype != TOKTYPE_IDENT) 919 | error_token(name, "undef works only to an identifier, but got '%s'", token_to_string(name)); 920 | expect_newline(ctx); 921 | dict_delete(ctx->defs, name->val.str); 922 | } 923 | 924 | 925 | /* 926 | * Reads a file name of #include directive. If the file name is quoted with <>, 927 | * "std" will set to true. If quoted with doublequote, set to false. We use 928 | * expand_one() rather than read_cpp_token(), because macros are allowed to be 929 | * used in #include. 930 | * (C99 6.10.2 Source file inclusion) 931 | */ 932 | static void read_cpp_header_name(CppContext *ctx, String **name, bool *std) { 933 | if (LIST_IS_EMPTY(ctx->ungotten)) { 934 | *name = read_header_name(ctx, std); 935 | if (name) 936 | return; 937 | } 938 | 939 | Token *tok = expand_one(ctx); 940 | if (!tok || tok->toktype == TOKTYPE_NEWLINE) 941 | error_token(tok, "expected file name, but got '%s'", token_to_string(tok)); 942 | if (tok->toktype == TOKTYPE_STRING) { 943 | *name = tok->val.str; 944 | *std = false; 945 | return; 946 | } 947 | List *tokens = make_list(); 948 | if (is_punct(tok, '<')) { 949 | for (;;) { 950 | Token *tok = expand_one(ctx); 951 | if (!tok || tok->toktype == TOKTYPE_NEWLINE) 952 | error_token(tok, "premature end of header name"); 953 | if (is_punct(tok, '>')) 954 | break; 955 | list_push(tokens, tok); 956 | } 957 | *name = join_tokens(tokens, false); 958 | *std = true; 959 | return; 960 | } 961 | error_token(tok, "'<' expected, but got '%s'", token_to_string(tok)); 962 | } 963 | 964 | /* 965 | * Constructs a file path by joining path0 and path1. 966 | */ 967 | static String *construct_path(String *path0, String *path1) { 968 | char *s0 = STRING_BODY(path0); 969 | char *s1 = STRING_BODY(path1); 970 | if (!*s0) 971 | return path1; 972 | return make_string_printf("%s/%s", s0, s1); 973 | } 974 | 975 | /* 976 | * Find a header file for a given header name. If header was quoted 977 | * with <>, list "path" would include "/usr/include" and 978 | * "/usr/local/include". Otherwise just "". 979 | */ 980 | 981 | static File *open_header(CppContext *ctx, String *name, List *paths) { 982 | for (int i = 0; i < LIST_LEN(paths); i++) { 983 | String *path = construct_path((String *)LIST_REF(paths, i), name); 984 | FILE *stream = fopen(STRING_BODY(path), "r"); 985 | if (!stream) 986 | continue; 987 | return make_file(stream, STRING_BODY(path)); 988 | } 989 | error_cpp_ctx(ctx, "Cannot find header: '%s'", STRING_BODY(name)); 990 | } 991 | 992 | /* 993 | * #include 994 | * (C99 6.10.2 Source file inclusion) 995 | */ 996 | static void handle_include(CppContext *ctx) { 997 | String *name; 998 | bool std; 999 | read_cpp_header_name(ctx, &name, &std); 1000 | expect_newline(ctx); 1001 | 1002 | List *include_path = std 1003 | ? ctx->include_path 1004 | : make_list1(to_string("")); 1005 | File *file = open_header(ctx, name, include_path); 1006 | do_include(ctx, file); 1007 | } 1008 | 1009 | /* 1010 | * #line 1011 | * (C99 6.10.4 Line control) 1012 | * 1013 | * Line directive must be one of the following form in macro-expanded form: 1014 | * 1015 | * #line digit-sequence 1016 | * #line digit-sequence "s-char-sequenceopt" 1017 | */ 1018 | static void handle_line_directive(CppContext *ctx) { 1019 | Token *tok = expand_one(ctx); 1020 | if (!tok || tok->toktype != TOKTYPE_CPPNUM) 1021 | error_token(tok, "number expected, but got '%s'", token_to_string(tok)); 1022 | int line = cppnum_to_num(tok)->val.i; 1023 | 1024 | tok = expand_one(ctx); 1025 | if (tok && tok->toktype == TOKTYPE_NEWLINE) { 1026 | ctx->file->line = line; 1027 | return; 1028 | } 1029 | if (tok && tok->toktype == TOKTYPE_STRING) { 1030 | expect_newline(ctx); 1031 | ctx->file->line = line; 1032 | ctx->file->filename = tok->val.str; 1033 | return; 1034 | } 1035 | error_token(tok, "filename expected, but got '%s'", token_to_string(tok)); 1036 | } 1037 | 1038 | /* 1039 | * #pragma 1040 | * (C99 6.10.5 6.10.6 Pragma directive) 1041 | * 1042 | * No pragmas including standard C's are not supported for now. 1043 | */ 1044 | static void handle_pragma(CppContext *ctx) { 1045 | error_cpp_ctx(ctx, "No pragmas supported"); 1046 | } 1047 | 1048 | /* 1049 | * #error 1050 | * (C99 6.10.5 Error directive) 1051 | */ 1052 | static void read_error_directive(CppContext *ctx, Token *define) { 1053 | String *buf = make_string(); 1054 | Token *tok = read_cpp_token(ctx); 1055 | while(tok && tok->toktype != TOKTYPE_NEWLINE) { 1056 | o1(buf, ' '); 1057 | string_append(buf, token_to_string(tok)); 1058 | tok = read_cpp_token(ctx); 1059 | } 1060 | error_token(define, "error: #error:%s", STRING_BODY(buf)); 1061 | } 1062 | 1063 | static void read_directive(CppContext *ctx) { 1064 | Token *tok; 1065 | if (read_if(ctx, "define")) read_define(ctx); 1066 | else if (read_if(ctx, "undef")) read_undef(ctx); 1067 | else if (read_if(ctx, "if")) handle_cond_incl(ctx, COND_IF); 1068 | else if (read_if(ctx, "elif")) handle_cond_incl(ctx, COND_ELIF); 1069 | else if (read_if(ctx, "else")) handle_cond_incl(ctx, COND_ELSE); 1070 | else if (read_if(ctx, "ifdef")) handle_cond_incl(ctx, COND_IFDEF); 1071 | else if (read_if(ctx, "ifndef")) handle_cond_incl(ctx, COND_IFNDEF); 1072 | else if (read_if(ctx, "endif")) handle_cond_incl(ctx, COND_ENDIF); 1073 | else if (read_if(ctx, "include")) handle_include(ctx); 1074 | else if (read_if(ctx, "line")) handle_line_directive(ctx); 1075 | else if (read_if(ctx, "pragma")) handle_pragma(ctx); 1076 | else if ( (tok = read_if(ctx, "error")) ) { 1077 | read_error_directive(ctx, tok); 1078 | } else { 1079 | tok = read_cpp_token(ctx); 1080 | if (tok && tok->toktype == TOKTYPE_NEWLINE) 1081 | // 6.10.7 NULL directive. Do nothing. 1082 | return; 1083 | error_token(tok, "unsupported preprocessor directive: '%s'", token_to_string(tok)); 1084 | } 1085 | } 1086 | 1087 | /*============================================================================== 1088 | * -E option. 1089 | * 1090 | * write_cpp() writes preprocessed tokens to a given file. This is useful for 1091 | * debugging. 1092 | */ 1093 | 1094 | static Token *read_token_int(CppContext *ctx) { 1095 | for (;;) { 1096 | Token *tok = read_cpp_token(ctx); 1097 | if (!tok) 1098 | return NULL; 1099 | if (tok->toktype == TOKTYPE_NEWLINE) { 1100 | ctx->at_bol = true; 1101 | return tok; 1102 | } 1103 | if (ctx->at_bol && is_punct(tok, '#')) { 1104 | read_directive(ctx); 1105 | ctx->at_bol = true; 1106 | continue; 1107 | } 1108 | ctx->at_bol = false; 1109 | unget_cpp_token(ctx, tok); 1110 | return expand_one(ctx); 1111 | } 1112 | } 1113 | 1114 | static void write_cpp_token(FILE *out, Token *tok, bool is_bol) { 1115 | if (!is_bol && tok->space) 1116 | fprintf(out, " "); 1117 | switch (tok->toktype) { 1118 | case TOKTYPE_IDENT: 1119 | case TOKTYPE_CPPNUM: 1120 | fprintf(out, "%s", STRING_BODY(tok->val.str)); 1121 | break; 1122 | case TOKTYPE_PUNCT: 1123 | fprintf(out, "%s", token_to_string(tok)); 1124 | break; 1125 | case TOKTYPE_CHAR: { 1126 | String *b = make_string(); 1127 | stringize_char(b, tok->val.i, '\''); 1128 | fprintf(out, "'%s'", STRING_BODY(b)); 1129 | break; 1130 | } 1131 | case TOKTYPE_STRING: { 1132 | fprintf(out, "\""); 1133 | for (char *p = STRING_BODY(tok->val.str); *p; p++) { 1134 | String *b = make_string(); 1135 | stringize_char(b, *p, '\"'); 1136 | fprintf(out, "'%s'", STRING_BODY(b)); 1137 | } 1138 | fprintf(out, "\""); 1139 | break; 1140 | } 1141 | default: 1142 | panic("invalid token type: %d", tok->toktype); 1143 | } 1144 | } 1145 | 1146 | void cpp_write(CppContext *ctx, FILE *out) { 1147 | bool is_bol = true; 1148 | for (;;) { 1149 | Token *tok = read_token_int(ctx); 1150 | if (!tok) return; 1151 | if (tok->toktype == TOKTYPE_NEWLINE) { 1152 | fprintf(out, "\n"); 1153 | is_bol = true; 1154 | continue; 1155 | } 1156 | write_cpp_token(out, tok, is_bol); 1157 | is_bol = false; 1158 | } 1159 | } 1160 | 1161 | /*============================================================================== 1162 | * Entry function of the preprocessor. 1163 | * 1164 | * read_token() reads preprocessing tokens by calling read_cpp_token(), which is 1165 | * defined in lex.c. There are six types of tokens can be returned from 1166 | * read_cpp_token(). 1167 | * 1168 | * identifier 1169 | * pp-number 1170 | * character-constant 1171 | * string-literal 1172 | * punctuator 1173 | * newline 1174 | * 1175 | * read_token() evaluates preprocessing directives (such as "#define") appearing 1176 | * in the sequence of preprocessing tokens, as well as expanding macros. The 1177 | * resulting tokens are then converted to ordinary tokens before returning to 1178 | * the main compiler. 1179 | * 1180 | * Preprocessing numbers will be converted to integer or float numbers. 1181 | * Punctuators to keywords. Identifiers to keywords (if reserved words) or keep 1182 | * as is. Newline tokens removed. 1183 | */ 1184 | 1185 | Token *read_token(ReadContext *readctx) { 1186 | if (!LIST_IS_EMPTY(readctx->ungotten)) 1187 | return list_pop(readctx->ungotten); 1188 | 1189 | for (;;) { 1190 | Token *tok = read_token_int(readctx->cppctx); 1191 | if (!tok) return NULL; 1192 | if (tok->toktype == TOKTYPE_NEWLINE) 1193 | continue; 1194 | return cpp_token_to_token(tok); 1195 | } 1196 | } 1197 | -------------------------------------------------------------------------------- /decl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * decl.c - C declaration parser 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | -------------------------------------------------------------------------------- /dict.c: -------------------------------------------------------------------------------- 1 | /* 2 | * dict.c - dictionary (hash table) implementation 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | #define DELETED ((void *)-1) 11 | #define BUCKET_EMPTY(ent) ((ent)->key == NULL || (ent)->key == DELETED) 12 | 13 | static bool store(Dict *dict, void *key, u32 hv, void *obj); 14 | 15 | /* 16 | * This is an implementation of open-addressing hash table. Buckets 17 | * having NULL in 'key' field are considered as vacant buckets. 18 | */ 19 | 20 | static Dict *make_dict_int(dict_hash_fn hashfn, dict_equal_fn equalfn, int size) { 21 | Dict *r = malloc(sizeof(Dict)); 22 | r->hashfn = hashfn; 23 | r->equalfn = equalfn; 24 | r->buckets = malloc(sizeof(Bucket) * size); 25 | r->nalloc = size; 26 | r->nelem = 0; 27 | for (int i = 0; i < size; i++) 28 | r->buckets[i].key = NULL; 29 | return r; 30 | } 31 | 32 | Dict *make_dict(dict_hash_fn *hashfn, dict_equal_fn *equalfn) { 33 | return make_dict_int(hashfn, equalfn, DICT_INITIAL_SIZE); 34 | } 35 | 36 | /*============================================================ 37 | * Hash functions 38 | */ 39 | 40 | static u32 string_hash(void *e) { 41 | u32 hv = 0; 42 | String *str = (String *)e; 43 | char *ptr = STRING_BODY(str); 44 | for (int i = 0; i < STRING_LEN(str) && ptr[i]; i++) { 45 | hv = (hv << 5) - hv + (unsigned char)ptr[i]; 46 | } 47 | return hv; 48 | } 49 | 50 | static u32 address_hash(void *ptr) { 51 | u32 hv = ((u32)(intptr_t)ptr) * 2654435761UL; 52 | return hv; 53 | } 54 | 55 | static bool dict_string_equal(void *a, void *b) { 56 | return string_equal(a, b); 57 | } 58 | 59 | static bool dict_address_equal(void *a, void *b) { 60 | return a == b; 61 | } 62 | 63 | Dict *make_string_dict(void) { 64 | return make_dict_int(string_hash, dict_string_equal, DICT_INITIAL_SIZE); 65 | } 66 | 67 | Dict *make_address_dict(void) { 68 | return make_dict_int(address_hash, dict_address_equal, DICT_INITIAL_SIZE); 69 | } 70 | 71 | /*============================================================ 72 | * Rehashing 73 | */ 74 | 75 | static void rehash(Dict *dict) { 76 | Dict *newdict = make_dict_int(dict->hashfn, dict->equalfn, dict->nalloc * 2); 77 | for (int i = 0; i < dict->nalloc; i++) { 78 | Bucket *ent = &dict->buckets[i]; 79 | if (BUCKET_EMPTY(ent)) 80 | continue; 81 | store(newdict, ent->key, ent->hashval, ent->elem); 82 | } 83 | dict->buckets = newdict->buckets; 84 | dict->nalloc = newdict->nalloc; 85 | } 86 | 87 | /*============================================================ 88 | * Accessors 89 | */ 90 | 91 | /* 92 | * Returns a pointer to a bucket to which a given key would be stored. 93 | */ 94 | static Bucket *find_bucket(Dict *dict, void *key, u32 hv, bool put) { 95 | int start = hv % dict->nalloc; 96 | Bucket *ent; 97 | for (int i = start; i < start + dict->nalloc; i++) { 98 | ent = &dict->buckets[i % dict->nalloc]; 99 | if (put && ent->key == DELETED) return ent; 100 | if (!ent->key) return ent; 101 | if (ent->hashval != hv) 102 | continue; 103 | if (dict->equalfn(ent->key, key)) 104 | return ent; 105 | } 106 | panic("no space found in dictionary"); 107 | } 108 | 109 | /* 110 | * Puts a given object to a dictionary. Returns true iff the given key was 111 | * already associated with a value. 112 | */ 113 | static bool store(Dict *dict, void *key, u32 hv, void *obj) { 114 | Bucket *ent = find_bucket(dict, key, hv, true); 115 | bool r = !BUCKET_EMPTY(ent); 116 | ent->hashval = hv; 117 | ent->key = key; 118 | ent->elem = obj; 119 | return r; 120 | } 121 | 122 | /* 123 | * Call rehash() if half buckets are already in use. Otherwise, do 124 | * nothing. 125 | */ 126 | static void ensure_room(Dict *dict) { 127 | if (dict->nelem * 2 <= dict->nalloc) 128 | return; 129 | rehash(dict); 130 | } 131 | 132 | void dict_put(Dict *dict, void *key, void *obj) { 133 | ensure_room(dict); 134 | u32 hv = dict->hashfn(key); 135 | bool overwrite = store(dict, key, hv, obj); 136 | if (!overwrite) dict->nelem++; 137 | } 138 | 139 | bool dict_delete(Dict *dict, void *key) { 140 | Bucket *ent = find_bucket(dict, key, dict->hashfn(key), false); 141 | if (BUCKET_EMPTY(ent)) return false; 142 | ent->hashval = 0; 143 | ent->key = DELETED; 144 | ent->elem = NULL; 145 | dict->nelem--; 146 | return true; 147 | } 148 | 149 | void *dict_get(Dict *dict, void *key) { 150 | Bucket *ent = find_bucket(dict, key, dict->hashfn(key), false); 151 | if (BUCKET_EMPTY(ent)) return NULL; 152 | return ent->elem; 153 | } 154 | 155 | bool dict_has(Dict *dict, void *key) { 156 | Bucket *ent = find_bucket(dict, key, dict->hashfn(key), false); 157 | return !BUCKET_EMPTY(ent); 158 | } 159 | 160 | int dict_size(Dict *dict) { 161 | return dict->nelem; 162 | } 163 | 164 | /*============================================================ 165 | * Iterator 166 | */ 167 | 168 | DictIter *make_dict_iter(Dict* dict) { 169 | DictIter *r = malloc(sizeof(DictIter)); 170 | r->dict = dict; 171 | r->idx = 0; 172 | return r; 173 | } 174 | 175 | void *dict_iter_next(DictIter* iter) { 176 | while (iter->idx < iter->dict->nalloc) { 177 | Bucket *ent = &iter->dict->buckets[iter->idx]; 178 | iter->idx++; 179 | if (!BUCKET_EMPTY(ent)) { 180 | void **r = malloc(sizeof(void *) * 2); 181 | r[0] = ent->key; 182 | r[1] = ent->elem; 183 | return r; 184 | } 185 | } 186 | return NULL; 187 | } 188 | -------------------------------------------------------------------------------- /elf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * elf.c - ELF file format handlers 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | /* 11 | * Functions to create an ELF format object file that you can link to create an 12 | * executable. 13 | * 14 | * ELF object file consists with the file header and one or more "sections". 15 | * Some sections contains data used by executable itself, such as data section 16 | * (contains initialized data), text section (code), or bss (uninitialized 17 | * data). Other sections are used by linkers and loaders. These includes 18 | * relocation information and symbol tables. 19 | * 20 | * In order to understand what the functions in this file actually do, you may 21 | * want to read documents regarding ELF format first. Here is list of documents 22 | * I found useful for the purpose. 23 | * 24 | * Linkers and Loaders by John R. Levine, published by Morgan-Kauffman in 25 | * October 1999, ISBN 1-55860-496-0. 26 | * http://linker.iecc.com/ 27 | * 28 | * Tool Interface Standard (TIS) Executable and Linking Format (ELF) 29 | * Specification - Version 1.2 (May 1995) 30 | * http://refspecs.freestandards.org/elf/elf.pdf 31 | * 32 | * ELF-64 Object File Format - Version 1.5 Draft 2 (May 27, 1998) 33 | * http://downloads.openwatcom.org/ftp/devel/docs/elf-64-gen.pdf 34 | * 35 | * Ulrich Drepper (August 20, 2006). How To Write Shared Libraries. 4.0. 36 | * http://people.redhat.com/drepper/dsohowto.pdf 37 | */ 38 | 39 | /* First 16 bytes of ELF file on x86-64. */ 40 | static u8 elf_ident[] = {0x7f, 0x45, 0x4c, 0x46, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 41 | 42 | static Elf *make_elf(void) { 43 | Elf *elf = malloc(sizeof(Elf)); 44 | elf->sections = make_list(); 45 | elf->shnum = 0; 46 | elf->symtabnum = 0; 47 | elf->syms = make_string_dict(); 48 | return elf; 49 | } 50 | 51 | static Section *make_section(char *name, int type) { 52 | Section *sect = malloc(sizeof(Section)); 53 | sect->body = make_string(); 54 | sect->name = malloc(strlen(name) + 1); 55 | strcpy(sect->name, name); 56 | sect->shstrtab_off = 0; 57 | sect->type = type; 58 | sect->flags = 0; 59 | sect->align = 1; 60 | sect->rels = make_list(); 61 | sect->link = 0; 62 | sect->info = 0; 63 | sect->entsize = 0; 64 | sect->symindex = 0; 65 | sect->memory_pos = NULL; 66 | return sect; 67 | } 68 | 69 | static void add_section(Elf *elf, Section *sect) { 70 | list_push(elf->sections, sect); 71 | sect->shndx = LIST_LEN(elf->sections); 72 | } 73 | 74 | Elf *new_elf(void) { 75 | Elf *elf = make_elf(); 76 | 77 | Section *data = make_section(".data", SHT_PROGBITS); 78 | data->flags = SHF_ALLOC | SHF_WRITE; 79 | data->align = 4; 80 | add_section(elf, data); 81 | 82 | Section *text = make_section(".text", SHT_PROGBITS); 83 | text->flags = SHF_ALLOC | SHF_EXECINSTR; 84 | text->align = 16; 85 | add_section(elf, text); 86 | return elf; 87 | } 88 | 89 | /*============================================================ 90 | * Symbol table 91 | */ 92 | 93 | static void write_one_symbol(Symbol *sym, int *index, String *symtab, String *strtab) { 94 | if (sym->name) { 95 | o4(symtab, STRING_LEN(strtab)); // st_name 96 | ostr(strtab, STRING_BODY(sym->name)); 97 | } else { 98 | o4(symtab, 0); // st_name 99 | } 100 | o1(symtab, ELF64_ST_INFO(sym->bind, sym->type)); // st_info; 101 | o1(symtab, 0); // st_other; 102 | if (sym->defined) { 103 | o2(symtab, sym->section->shndx); // st_shndx 104 | } else { 105 | o2(symtab, 0); // st_shndx 106 | } 107 | o8(symtab, sym->value); // st_value 108 | o8(symtab, 0); // st_size 109 | sym->index = (*index)++; 110 | } 111 | 112 | /* 113 | * Symbols whose attribute is LOCAL must come before other symbols in symbol 114 | * table. This function is called twice. In the first pass, localonly is false 115 | * so this outputs local symbols. In the second pass, this outputs non-local 116 | * ones. 117 | */ 118 | static void write_sym_to_buf(Elf *elf, int *index, String *symtab, String *strtab, bool localonly) { 119 | DictIter *iter = make_dict_iter(elf->syms); 120 | void **p; 121 | for (p = dict_iter_next(iter); p; p = dict_iter_next(iter)) { 122 | Symbol *sym = p[1]; 123 | if (localonly && sym->bind != STB_LOCAL) 124 | continue; 125 | if (!localonly && sym->bind == STB_LOCAL) 126 | continue; 127 | write_one_symbol(sym, index, symtab, strtab); 128 | } 129 | } 130 | 131 | static void write_section_sym(Elf *elf, int *index, String *symtab, String *strtab) { 132 | for (int i = 0; i < LIST_LEN(elf->sections); i++) { 133 | Section *sect = LIST_REF(elf->sections, i); 134 | Symbol *sym = make_symbol(NULL, sect, 0, STB_LOCAL, STT_SECTION, 1); 135 | write_one_symbol(sym, index, symtab, strtab); 136 | sect->symindex = sym->index; 137 | } 138 | } 139 | 140 | static void add_symtab(Elf *elf) { 141 | String *symtabb = make_string(); 142 | String *strtabb = make_string(); 143 | o1(strtabb, 0); 144 | // Null symbol 145 | for (int i = 0; i < 24; i++) o1(symtabb, 0); 146 | // File symbol 147 | o4(symtabb, STRING_LEN(strtabb)); // st_name 148 | ostr(strtabb, "noname"); 149 | o1(symtabb, ELF64_ST_INFO(STB_LOCAL, STT_FILE)); // st_info 150 | o1(symtabb, 0); // other 151 | o2(symtabb, SHN_ABS); // st_shndx 152 | o8(symtabb, 0); // st_value 153 | o8(symtabb, 0); // st_size 154 | 155 | int index = 2; 156 | write_sym_to_buf(elf, &index, symtabb, strtabb, true); 157 | int localidx = index; 158 | write_section_sym(elf, &index, symtabb, strtabb); 159 | write_sym_to_buf(elf, &index, symtabb, strtabb, false); 160 | elf->symtabnum = LIST_LEN(elf->sections) + 1; 161 | 162 | Section *symtab = make_section(".symtab", SHT_SYMTAB); 163 | symtab->body = symtabb; 164 | symtab->link = LIST_LEN(elf->sections) + 2; 165 | symtab->info = localidx + 2; 166 | symtab->entsize = 24; 167 | symtab->align = 4; 168 | add_section(elf, symtab); 169 | 170 | Section *strtab = make_section(".strtab", SHT_STRTAB); 171 | strtab->body = strtabb; 172 | add_section(elf, strtab); 173 | } 174 | 175 | /*============================================================ 176 | * Relocations 177 | */ 178 | 179 | Symbol *find_symbol(Elf *elf, char *name) { 180 | Symbol *sym = dict_get(elf->syms, to_string(name)); 181 | if (!sym) 182 | error("cannot find symbol '%s'", name); 183 | return sym; 184 | } 185 | 186 | static void add_reloc(Elf *elf) { 187 | char name[100]; 188 | for (int i = 0; i < LIST_LEN(elf->sections); i++) { 189 | Section *sect = LIST_REF(elf->sections, i); 190 | if (LIST_LEN(sect->rels) == 0) 191 | continue; 192 | String *b = make_string(); 193 | for (int j = 0; j < LIST_LEN(sect->rels); j++) { 194 | Reloc *rel = LIST_REF(sect->rels, j); 195 | o8(b, rel->off); 196 | if (rel->sym) { 197 | o8(b, ELF64_R_INFO(find_symbol(elf, rel->sym)->index, rel->type)); 198 | } else { 199 | o8(b, ELF64_R_INFO(rel->section->symindex, rel->type)); 200 | } 201 | o8(b, rel->addend); 202 | } 203 | 204 | strcpy(name, ".rela"); 205 | strcpy(name + 5, sect->name); 206 | Section *relsec = make_section(name, SHT_RELA); 207 | relsec->link = elf->symtabnum; 208 | relsec->info = i + 1; 209 | relsec->body = b; 210 | relsec->entsize = 24; 211 | relsec->align = 4; 212 | add_section(elf, relsec); 213 | } 214 | } 215 | 216 | /*============================================================ 217 | * ".shstrtab" section 218 | */ 219 | 220 | static void add_shstrtab(Elf *elf) { 221 | Section *shstr = make_section(".shstrtab", SHT_STRTAB); 222 | elf->shnum = LIST_LEN(elf->sections) + 1; 223 | add_section(elf, shstr); 224 | String *b = make_string(); 225 | o1(b, 0); 226 | for (int i = 0; i < LIST_LEN(elf->sections); i++) { 227 | Section *sect = LIST_REF(elf->sections, i); 228 | sect->shstrtab_off = STRING_LEN(b); 229 | ostr(b, sect->name); 230 | } 231 | shstr->body = b; 232 | } 233 | 234 | /*============================================================ 235 | * Outputs ELF file 236 | */ 237 | 238 | static void write_section(String *header, String *content, Section *sect, int offset) { 239 | o4(header, sect->shstrtab_off); // sh_name 240 | o4(header, sect->type); // sh_type 241 | o8(header, sect->flags); // sh_flags 242 | o8(header, 0); // sh_addr 243 | o8(header, STRING_LEN(content) + offset); // sh_offset 244 | o8(header, STRING_LEN(sect->body)); // sh_size 245 | o4(header, sect->link); // sh_link = SHN_UNDEF 246 | o4(header, sect->info); // sh_info 247 | o8(header, sect->align); // sh_addralign 248 | o8(header, sect->entsize); // sh_entsize 249 | out(content, STRING_BODY(sect->body), STRING_LEN(sect->body)); 250 | align(content, 16); 251 | } 252 | 253 | void write_elf(FILE *outfile, Elf *elf) { 254 | add_symtab(elf); 255 | add_reloc(elf); 256 | add_shstrtab(elf); 257 | 258 | // Section header 259 | String *sh = make_string(); 260 | for (int i = 0; i < 64; i++) 261 | o1(sh, 0); // NULL section header 262 | 263 | // Body 264 | String *content = make_string(); 265 | for (int i = 0; i < LIST_LEN(elf->sections); i++) { 266 | write_section(sh, content, LIST_REF(elf->sections, i), 64); 267 | } 268 | align(content, 16); 269 | 270 | // ELF header 271 | String *eh = make_string(); 272 | int numsect = LIST_LEN(elf->sections) + 1; 273 | out(eh, elf_ident, sizeof(elf_ident)); 274 | o2(eh, 1); // e_type = ET_REL 275 | o2(eh, 62); // e_machine = EM_X86_64 276 | o4(eh, 1); // e_version = EV_CURRENT 277 | o8(eh, 0); // e_entry 278 | o8(eh, 0); // e_phoff 279 | o8(eh, STRING_LEN(content) + 64); // e_shoff; 280 | o4(eh, 0); // e_flags 281 | o2(eh, 64); // e_ehsize 282 | o2(eh, 0); // e_phentsize 283 | o2(eh, 0); // e_phnum 284 | o2(eh, 64); // e_shentsize 285 | o2(eh, numsect); // e_shnum 286 | o2(eh, elf->shnum); // e_shstrndx 287 | 288 | fwrite(STRING_BODY(eh), STRING_LEN(eh), 1, outfile); 289 | fwrite(STRING_BODY(content), STRING_LEN(content), 1, outfile); 290 | fwrite(STRING_BODY(sh), STRING_LEN(sh), 1, outfile); 291 | fclose(outfile); 292 | } 293 | -------------------------------------------------------------------------------- /error.c: -------------------------------------------------------------------------------- 1 | /* 2 | * error.c - error handlers 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | #include 10 | 11 | Exception *current_handler; 12 | 13 | Exception *make_exception(void) { 14 | Exception *e = malloc(sizeof(Exception)); 15 | e->msg = NULL; 16 | return e; 17 | } 18 | 19 | static void print(char *pre, char *format, va_list ap) { 20 | fprintf(stderr, "%s", pre); 21 | vfprintf(stderr, format, ap); 22 | fprintf(stderr, "\n"); 23 | } 24 | 25 | void debug(char *format, ...) { 26 | va_list ap; 27 | va_start(ap, format); 28 | vfprintf(stderr, format, ap); 29 | va_end(ap); 30 | } 31 | 32 | static NORETURN void verror(char *format, va_list ap) { 33 | if (current_handler) { 34 | Exception *e = current_handler; 35 | current_handler = NULL; 36 | e->msg = to_string("ERROR: "); 37 | string_vprintf(e->msg, format, ap); 38 | longjmp(e->jmpbuf, 1); 39 | } 40 | print("ERROR: ", format, ap); 41 | exit(-1); 42 | } 43 | 44 | static void vwarn(char *format, va_list ap) { 45 | print("WARN: ", format, ap); 46 | } 47 | 48 | NORETURN void error(char *format, ...) { 49 | va_list ap; 50 | va_start(ap, format); 51 | verror(format, ap); 52 | va_end(ap); 53 | } 54 | 55 | void warn(char *format, ...) { 56 | va_list ap; 57 | va_start(ap, format); 58 | vwarn(format, ap); 59 | va_end(ap); 60 | } 61 | 62 | NORETURN void print_parse_error(int line, int column, char *msg, va_list ap) { 63 | String *b = make_string_printf("Line %d:%d: ", line, column); 64 | string_append(b, msg); 65 | verror(STRING_BODY(b), ap); 66 | } 67 | 68 | static void print_stack_trace_int(bool safe) { 69 | void *buf[20]; 70 | int size = backtrace(buf, sizeof(buf)); 71 | fprintf(stderr, "Stack trace:\n"); 72 | fflush(stderr); 73 | 74 | if (safe) 75 | backtrace_symbols_fd(buf, size, STDERR_FILENO); 76 | else { 77 | char **strs = backtrace_symbols(buf, size); 78 | for (int i = 0; i < size; i++) 79 | fprintf(stderr, " %s\n", strs[i]); 80 | free(strs); 81 | } 82 | } 83 | 84 | void print_stack_trace(void) { 85 | print_stack_trace_int(false); 86 | } 87 | 88 | /* 89 | * print_stack_trace() that don't call malloc(). 90 | */ 91 | void print_stack_trace_safe(void) { 92 | print_stack_trace_int(true); 93 | } 94 | -------------------------------------------------------------------------------- /file.c: -------------------------------------------------------------------------------- 1 | /* 2 | * file.c - I/O implementation 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | /* 11 | * A wrapper object for stdio's FILE. You can pushback up to two characters to 12 | * a File. 13 | * 14 | * You can parse C source by peeking one character. The only case I know of 15 | * where you have to peek two characters is %:%: (bigraph token ##). 16 | */ 17 | 18 | static void init_file(File *file, char *filename) { 19 | file->line = 1; 20 | file->column = 1; 21 | file->last_column = 0; 22 | file->filename = to_string(filename); 23 | file->ungotten[0] = EOF; 24 | file->ungotten[1] = EOF; 25 | file->eof_flag = false; 26 | } 27 | 28 | File *make_file(FILE *stream, char *filename) { 29 | File *r = malloc(sizeof(File)); 30 | r->type = FILE_STDIO; 31 | r->stream = stream; 32 | init_file(r, filename); 33 | return r; 34 | } 35 | 36 | File *make_string_file(String *s) { 37 | ASSERT(STRING_BODY(s)[STRING_LEN(s)-1] == '\0'); 38 | 39 | File *r = malloc(sizeof(File)); 40 | r->type = FILE_STRING; 41 | r->buf = STRING_BODY(s); 42 | r->pos = 0; 43 | init_file(r, "-"); 44 | return r; 45 | } 46 | 47 | File *open_file(char *path) { 48 | if (!strcmp(path, "-")) { 49 | return make_file(stdin, "-"); 50 | } 51 | FILE *stream = fopen(path, "r"); 52 | if (stream == NULL) { 53 | perror("fopen failed: "); 54 | exit(-1); 55 | } 56 | return make_file(stream, path); 57 | } 58 | 59 | void close_file(File *file) { 60 | if (file->type == FILE_STDIO) 61 | fclose(file->stream); 62 | } 63 | 64 | void unreadc(int c, File *file) { 65 | if (c == '\0' || c == EOF) 66 | return; 67 | if (c == '\n') { 68 | file->line--; 69 | file->column = file->last_column; 70 | } else { 71 | file->column--; 72 | } 73 | if (file->ungotten[0] == EOF) 74 | file->ungotten[0] = c; 75 | else if (file->ungotten[1] == EOF) 76 | file->ungotten[1] = c; 77 | else 78 | panic("pushback buffer is full: '%c'", c); 79 | file->eof_flag = false; 80 | } 81 | 82 | /* 83 | * Returns the next character without consuming it. 84 | */ 85 | int peekc(File *file) { 86 | int c = readc(file); 87 | unreadc(c, file); 88 | return c; 89 | } 90 | 91 | /* 92 | * Consume next character iff the same as a given charcter. 93 | */ 94 | bool next_char_is(File *file, int c) { 95 | int c1 = readc(file); 96 | if (c == c1) 97 | return true; 98 | unreadc(c1, file); 99 | return false; 100 | } 101 | 102 | int readc_int(File *file) { 103 | if (file->eof_flag) 104 | return EOF; 105 | if (file->ungotten[1] != EOF) { 106 | int c = file->ungotten[1]; 107 | file->ungotten[1] = EOF; 108 | return c; 109 | } 110 | if (file->ungotten[0] != EOF) { 111 | int c = file->ungotten[0]; 112 | file->ungotten[0] = EOF; 113 | return c; 114 | } 115 | 116 | if (file->type == FILE_STDIO) 117 | return getc(file->stream); 118 | if (file->type == FILE_STRING) 119 | return file->buf[file->pos++]; 120 | panic("unknown file type: %c", file->type); 121 | } 122 | 123 | static void next_line(File *file, int c) { 124 | file->line++; 125 | file->last_column = file->column; 126 | file->column = 1; 127 | if (c == '\r') { 128 | int c1 = readc_int(file); 129 | if (c1 != '\n') 130 | unreadc(c1, file); 131 | } 132 | } 133 | 134 | /* 135 | * Abstracts C source file. This does two things: 136 | * 137 | * - Converts "\r\n" or "\r" to "\n". 138 | ** 139 | * - Removes backslash and following end-of-line marker. This needs 140 | * to happen before preprocessing and before the lexical analysis 141 | * of the C program. (C:ARM p.13 2.1.2 Whitespace and Line 142 | * Termination) 143 | */ 144 | int readc(File *file) { 145 | int c = readc_int(file); 146 | if (c == EOF || c == '\0') { 147 | file->eof_flag = true; 148 | return EOF; 149 | } 150 | if (c == '\\') { 151 | int c1 = readc_int(file); 152 | if (c1 == '\r' || c1 == '\n') { 153 | next_line(file, c1); 154 | return readc(file); 155 | } 156 | unreadc(c1, file); 157 | return c; 158 | } 159 | if (c == '\r' || c == '\n') { 160 | next_line(file, c); 161 | return '\n'; 162 | } 163 | file->column++; 164 | return c; 165 | } 166 | -------------------------------------------------------------------------------- /gen.c: -------------------------------------------------------------------------------- 1 | /* 2 | * gen.c - code generator 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | /* 11 | * Code generator for x86-64. 12 | * 13 | * Reference documents for x86-64 instruction sets: 14 | * 15 | * Intel 64 and IA-32 Architecture, Volume 1: Basic Architectures (March 2010) 16 | * http://www.intel.com/Assets/PDF/manual/253665.pdf 17 | * 18 | * Intel 64 and IA-32 Architecture, Volume 2A:Instruction Set Reference, A-M 19 | * http://www.intel.com/Assets/PDF/manual/253666.pdf 20 | * 21 | * Intel 64 and IA-32 Architecture, Volume 2B: Instruction Set Reference, N-Z 22 | * http://www.intel.com/Assets/PDF/manual/253667.pdf 23 | */ 24 | 25 | bool flag_debug = false; 26 | 27 | typedef struct CompiledVar { 28 | int off; 29 | } CompiledVar; 30 | 31 | static CompiledVar *make_compiled_var(int off) { 32 | CompiledVar *r = malloc(sizeof(CompiledVar)); 33 | r->off = off; 34 | return r; 35 | } 36 | 37 | #define RESERVED ((void *)-1) 38 | #define USABLE(ctx, reg) ((ctx)->var[reg] != RESERVED && !(ctx)->var[reg]) 39 | #define INUSE(ctx, reg) ((ctx)->var[reg] != RESERVED && (ctx)->var[reg]) 40 | 41 | typedef struct Context { 42 | Elf *elf; 43 | String *text; 44 | Dict *stack; 45 | Dict *global; 46 | List *func_tbf; 47 | int sp; 48 | // For register allocation 49 | Var *var[16]; 50 | int lastuse[16]; 51 | int serial; 52 | } Context; 53 | 54 | static void reset_context(Context *ctx) { 55 | static void *init[] = { RESERVED, 0, 0, 0, RESERVED, RESERVED, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 56 | for (int i = 0; i < 16; i++) { 57 | ctx->var[i] = init[i]; 58 | ctx->lastuse[i] = 0; 59 | } 60 | ctx->serial = 1; 61 | } 62 | 63 | static Context *make_context(Elf *elf) { 64 | Context *r = malloc(sizeof(Context)); 65 | r->elf = elf; 66 | r->text = find_section(elf, ".text")->body; 67 | r->stack = make_address_dict(); 68 | r->global = make_address_dict(); 69 | r->func_tbf = NULL; 70 | r->sp = 0; 71 | reset_context(r); 72 | return r; 73 | } 74 | 75 | Section *find_section(Elf *elf, char *name) { 76 | for (int i = 0; i < LIST_LEN(elf->sections); i++) { 77 | Section *sect = LIST_REF(elf->sections, i); 78 | if (!strcmp(sect->name, name)) 79 | return sect; 80 | } 81 | panic("cannot find section '%s'", name); 82 | } 83 | 84 | Symbol *make_symbol(String *name, Section *sect, long value, int bind, int type, int defined) { 85 | Symbol *sym = malloc(sizeof(Symbol)); 86 | sym->name = name; 87 | sym->section = sect; 88 | sym->value = value; 89 | sym->bind = bind; 90 | sym->type = type; 91 | sym->defined = defined; 92 | sym->index = -1; 93 | return sym; 94 | } 95 | 96 | static Reloc *make_reloc(long off, char *sym, Section *section, int type, u64 addend) { 97 | Reloc *rel = malloc(sizeof(Reloc)); 98 | rel->off = off; 99 | rel->sym = sym; 100 | rel->section = section; 101 | rel->type = type; 102 | rel->addend = addend; 103 | return rel; 104 | } 105 | 106 | /*============================================================================== 107 | * X86 machine code generation. 108 | */ 109 | 110 | static void emit1(Context *ctx, u8 b) { o1(ctx->text, b); } 111 | static void emit2(Context *ctx, u16 w) { o2(ctx->text, w); } 112 | static void emit3(Context *ctx, u32 d) { o3(ctx->text, d); } 113 | static void emit4(Context *ctx, u32 d) { o4(ctx->text, d); } 114 | static void emit8(Context *ctx, u64 q) { o8(ctx->text, q); } 115 | 116 | static void spill(Context *ctx, int reg); 117 | 118 | /* 119 | * Mod R/M and SIB 120 | * 121 | * Many x86 instructions takes Mod R/M and optional SIB and displacement bytes. 122 | * As described in Chapter 2 in the Intel 64 and IA-32 Architecture Software 123 | * Developer's Manual, Volume 2A, there are four different use cases of these 124 | * bytes. 125 | * 126 | * - Memory addressing without a SIB byte 127 | * - Register-register addressing 128 | * - Memory addressing with a SIB byte 129 | * - Register operand coded in opcode byte 130 | * 131 | * For the details of x86 machine code format, refer to the Intel Developer's 132 | * Manuals. 133 | */ 134 | 135 | static void emit_prefix(Context *ctx, int size, int reg0, int reg1) { 136 | if (size == 2) 137 | emit1(ctx, 0x66); 138 | if (size != 8 && !EXT_REG(reg0) && !EXT_REG(reg1)) 139 | return; 140 | int rex = 0x40; 141 | if (size == 8) rex |= 8; 142 | if (EXT_REG(reg0)) rex |= 4; 143 | if (EXT_REG(reg1)) rex |= 1; 144 | o1(ctx->text, rex); 145 | } 146 | 147 | static void emit_op(Context *ctx, int op) { 148 | if (op < 0x100) 149 | emit1(ctx, op); 150 | else if (op < 0x10000) 151 | emit2(ctx, op); 152 | else 153 | emit3(ctx, op); 154 | } 155 | 156 | static void emit_modrm(Context *ctx, int mod, int reg, int rm) { 157 | int byte = (mod << 6) | ((reg & 7) << 3) | (rm & 7); 158 | o1(ctx->text, byte); 159 | } 160 | 161 | static void emit_memop(Context *ctx, int size, int op, int reg0, int reg1, int off) { 162 | emit_prefix(ctx, size, reg0, reg1); 163 | emit_op(ctx, op); 164 | int oneword = -128 <= off && off <= 127; 165 | emit_modrm(ctx, oneword ? 1 : 2, reg0, reg1); 166 | if (oneword) 167 | emit1(ctx, off); 168 | else 169 | emit4(ctx, off); 170 | } 171 | 172 | static void emit_regop(Context *ctx, int size, int op, int reg0, int reg1) { 173 | emit_prefix(ctx, size, reg0, reg1); 174 | emit_op(ctx, op); 175 | emit_modrm(ctx, 3, reg0, reg1); 176 | } 177 | 178 | /*====================================================================== 179 | * Instructions for intermediate code 180 | */ 181 | 182 | static void handle_block(Context *ctx, Block *block); 183 | 184 | static Inst *make_inst(int op) { 185 | Inst *r = malloc(sizeof(Inst)); 186 | r->op = op; 187 | r->args = make_list(); 188 | return r; 189 | } 190 | 191 | Inst *make_inst0(int op) { 192 | Inst *r = make_inst(op); 193 | return r; 194 | } 195 | 196 | Inst *make_inst1(int op, void *v0) { 197 | Inst *r = make_inst(op); 198 | list_push(r->args, v0); 199 | return r; 200 | } 201 | 202 | Inst *make_inst2(int op, void *v0, void *v1) { 203 | Inst *r = make_inst(op); 204 | list_push(r->args, v0); 205 | list_push(r->args, v1); 206 | return r; 207 | } 208 | 209 | Inst *make_inst3(int op, void *v0, void *v1, void *v2) { 210 | Inst *r = make_inst(op); 211 | list_push(r->args, v0); 212 | list_push(r->args, v1); 213 | list_push(r->args, v2); 214 | return r; 215 | } 216 | 217 | Inst *make_inst4(int op, void *v0, void *v1, void *v2, void *v3) { 218 | Inst *r = make_inst(op); 219 | list_push(r->args, v0); 220 | list_push(r->args, v1); 221 | list_push(r->args, v2); 222 | list_push(r->args, v3); 223 | return r; 224 | } 225 | 226 | Inst *make_instn(int op, List *args) { 227 | Inst *r = make_inst(op); 228 | r->args = args; 229 | return r; 230 | } 231 | 232 | bool is_flonum(Ctype *ctype) { 233 | return ctype->type == CTYPE_FLOAT || ctype->type == CTYPE_DOUBLE; 234 | } 235 | 236 | /* 237 | * Code generator 238 | */ 239 | 240 | static int var_abs_pos(Context *ctx, Var *var) { 241 | ASSERT(var->stype == VAR_IMM); 242 | ASSERT(var->ctype->type == CTYPE_ARRAY); 243 | CompiledVar *cvar = dict_get(ctx->global, var); 244 | if (!cvar) { 245 | Section *data = find_section(ctx->elf, ".data"); 246 | cvar = make_compiled_var(STRING_LEN(data->body)); 247 | out(data->body, STRING_BODY(var->val.s), STRING_LEN(var->val.s)); 248 | dict_put(ctx->global, var, cvar); 249 | } 250 | return cvar->off; 251 | } 252 | 253 | static int var_stack_pos(Context *ctx, Var *var) { 254 | CompiledVar *cvar = dict_get(ctx->stack, var); 255 | if (cvar == NULL) { 256 | ctx->sp += ctype_sizeof(var->ctype); 257 | cvar = make_compiled_var(ctx->sp * -8); 258 | dict_put(ctx->stack, var, cvar); 259 | } 260 | return cvar->off; 261 | } 262 | 263 | static void add_reloc(Section *text, long off, char *sym, Section *section, int type, u64 addend) { 264 | Reloc *rel = make_reloc(off, sym, section, type, addend); 265 | list_push(text->rels, rel); 266 | } 267 | 268 | 269 | // Registers for function argument passing. 270 | static const int grp_arg[] = { RDI, RSI, RDX, RCX, R8, R9 }; 271 | static const int xmm_arg[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 272 | 273 | static void load_imm(Context *ctx, int reg, Var *var) { 274 | if (var->ctype->type == CTYPE_INT) { 275 | // MOV reg, imm 276 | emit_prefix(ctx, 8, 0, reg); 277 | emit1(ctx, 0xb8 | (reg & 7)); 278 | emit8(ctx, var->val.i); 279 | return; 280 | } 281 | if (var->ctype->type == CTYPE_ARRAY) { 282 | Section *text = find_section(ctx->elf, ".text"); 283 | Section *data = find_section(ctx->elf, ".data"); 284 | // MOV rax/r11, imm 285 | emit_prefix(ctx, 8, 0, reg); 286 | emit1(ctx, 0xb8 | (reg & 7)); 287 | int off = var_abs_pos(ctx, var); 288 | add_reloc(text, STRING_LEN(ctx->text), NULL, data, R_X86_64_64, off); 289 | emit8(ctx, 0); 290 | return; 291 | } 292 | panic("unsupported IMM ctype: %d", var->ctype->type); 293 | } 294 | 295 | static void assign(Context *ctx, Var *var, int reg); 296 | 297 | static void load(Context *ctx, int reg, Var *var) { 298 | spill(ctx, reg); 299 | if (var->stype == VAR_IMM) { 300 | load_imm(ctx, reg, var); 301 | return; 302 | } 303 | for (int i = 0; i < 16; i++) 304 | if (INUSE(ctx, i) && reg != i && ctx->var[i] == var) { 305 | emit_regop(ctx, 8, 0x8b, reg, i); 306 | ctx->var[i] = NULL; 307 | assign(ctx, var, i); 308 | return; 309 | } 310 | 311 | int off = var_stack_pos(ctx, var); 312 | int size = ctype_sizeof(var->ctype); 313 | int signedp = var->ctype->signedp; 314 | // MOVSX/MOVZX reg, [rbp+off] 315 | if (size == 1) 316 | emit_memop(ctx, size, signedp ? 0xbe0f : 0xb60f, reg, RBP, off); 317 | else if (size == 2) 318 | emit_memop(ctx, size, signedp ? 0xbf0f : 0xb70f, reg, RBP, off); 319 | else if (size == 4) 320 | emit_memop(ctx, signedp ? 8 : 4, signedp ? 0x63 : 0x8b, reg, RBP, off); 321 | else 322 | emit_memop(ctx, size, 0x8b, reg, RBP, off); 323 | } 324 | 325 | static void save(Context *ctx, Var *dst, int reg) { 326 | int off = var_stack_pos(ctx, dst); 327 | int size = ctype_sizeof(dst->ctype); 328 | // MOV [rbp+off], reg 329 | if (size == 1) 330 | emit_memop(ctx, 1, 0x88, reg, RBP, off); 331 | else 332 | emit_memop(ctx, size, 0x89, reg, RBP, off); 333 | } 334 | 335 | static u64 flonum_to_u64(double d) { 336 | u64 *p = (u64 *)&d; 337 | return *p; 338 | } 339 | 340 | static void load_xmm_imm(Context *ctx, int xmmreg, Var *var) { 341 | ASSERT(is_flonum(var->ctype)); 342 | // SUB rsp, 8 343 | emit3(ctx, 0xec8148); 344 | emit4(ctx, 8); 345 | // MOV rax, imm 346 | emit2(ctx, 0xb848); 347 | emit8(ctx, flonum_to_u64(var->val.f)); 348 | // MOV [rsp], rax 349 | emit4(ctx, 0x24048948); 350 | // MOVSD reg, [rsp] 351 | emit_prefix(ctx, 4, 0, xmmreg); 352 | emit3(ctx, 0x100ff2); 353 | emit_modrm(ctx, 0, xmmreg, RSP); 354 | emit1(ctx, 0x24); 355 | // ADD rsp, 8 356 | emit3(ctx, 0xc48148); 357 | emit4(ctx, 8); 358 | } 359 | 360 | static void load_xmm(Context *ctx, int reg, Var *var) { 361 | spill(ctx, RAX); 362 | if (var->stype == VAR_IMM) { 363 | load_xmm_imm(ctx, reg, var); 364 | return; 365 | } 366 | int off = var_stack_pos(ctx, var); 367 | if (var->ctype->type == CTYPE_FLOAT) { 368 | // MOVSS reg, [rbp+off] 369 | emit_prefix(ctx, 4, reg, reg); 370 | emit_memop(ctx, 4, 0x100ff3, reg, RBP, off); 371 | // CVTPS2PD reg, reg 372 | emit_prefix(ctx, 4, reg, reg); 373 | emit2(ctx, 0x5a0f); 374 | emit_modrm(ctx, 3, reg, reg); 375 | } else { 376 | // MOVSD reg, [rbp+off] 377 | emit_prefix(ctx, 4, reg, reg); 378 | emit_memop(ctx, 4, 0x100ff2, reg, RBP, off); 379 | } 380 | } 381 | 382 | static void save_xmm(Context *ctx, Var *var, int reg) { 383 | ASSERT(is_flonum(var->ctype)); 384 | int off = var_stack_pos(ctx, var); 385 | if (var->ctype->type == CTYPE_FLOAT) { 386 | // CVTPD2PS reg, reg 387 | emit_prefix(ctx, 4, reg, reg); 388 | emit3(ctx, 0x5a0f66); 389 | emit_modrm(ctx, 3, reg, reg); 390 | // MOVSS off, reg 391 | emit_prefix(ctx, 4, reg, reg); 392 | emit_memop(ctx, 4, 0x110ff3, reg, RBP, off); 393 | } else { 394 | // MOVSD off, reg 395 | emit_prefix(ctx, 4, reg, reg); 396 | emit_memop(ctx, 4, 0x110ff2, reg, RBP, off); 397 | } 398 | } 399 | 400 | /*============================================================================== 401 | * Register allocation 402 | */ 403 | 404 | static void assign(Context *ctx, Var *var, int reg) { 405 | for (int i = 0; i < 16; i++) 406 | if (ctx->var[i] == var) 407 | ctx->var[i] = NULL; 408 | ctx->var[reg] = var; 409 | ctx->lastuse[reg] = ctx->serial++; 410 | ctx->var[reg]->need_save = true; 411 | } 412 | 413 | static void spill(Context *ctx, int reg) { 414 | if (!INUSE(ctx, reg)) 415 | return; 416 | if (!ctx->var[reg]->need_save) 417 | return; 418 | save(ctx, ctx->var[reg], reg); 419 | ctx->var[reg] = NULL; 420 | } 421 | 422 | static void save_all(Context *ctx) { 423 | for (int i = 0; i < 16; i++) 424 | spill(ctx, i); 425 | } 426 | 427 | static void save_var(Context *ctx, Var *var) { 428 | for (int i = 0; i < 16; i++) 429 | if (ctx->var[i] == var) { 430 | spill(ctx, i); 431 | return; 432 | } 433 | } 434 | 435 | static int load_reg_replace(Context *ctx, int reg, Var *var) { 436 | spill(ctx, reg); 437 | load(ctx, reg, var); 438 | ctx->var[reg] = var; 439 | ctx->lastuse[reg] = ctx->serial++; 440 | var->need_save = false; 441 | return reg; 442 | } 443 | 444 | static int load_reg(Context *ctx, Var *var) { 445 | int reg; 446 | for (reg = 1; reg < 16; reg++) 447 | if (ctx->var[reg] == var) 448 | return load_reg_replace(ctx, reg, var); 449 | for (reg = 1; reg < 16; reg++) 450 | if (USABLE(ctx, reg)) 451 | return load_reg_replace(ctx, reg, var); 452 | 453 | int serial = INT_MAX; 454 | int oldest = 0; 455 | for (reg = 1; reg < 16; reg++) 456 | if (INUSE(ctx, reg) && !ctx->var[reg]->need_save && ctx->lastuse[reg] < serial) { 457 | serial = ctx->lastuse[reg]; 458 | oldest = reg; 459 | } 460 | if (serial != INT_MAX) 461 | return load_reg_replace(ctx, oldest, var); 462 | 463 | serial = INT_MAX; 464 | for (reg = 1; reg < 16; reg++) 465 | if (INUSE(ctx, reg) && ctx->lastuse[reg] < serial) { 466 | serial = ctx->lastuse[reg]; 467 | oldest = reg; 468 | } 469 | return load_reg_replace(ctx, reg, var); 470 | } 471 | 472 | /*============================================================================== 473 | * IL handlers 474 | */ 475 | 476 | static void handle_int_to_float(Context *ctx, Inst *inst) { 477 | Var *dst = LIST_REF(inst->args, 0); 478 | Var *src = LIST_REF(inst->args, 1); 479 | ASSERT(is_flonum(dst->ctype)); 480 | ASSERT(src->ctype->type == CTYPE_INT); 481 | load(ctx, RAX, src); 482 | // cvtsi2sd xmm0, rax 483 | emit4(ctx, 0x2a0f48f2); 484 | emit1(ctx, 0xc0); 485 | save_xmm(ctx, dst, XMM0); 486 | } 487 | 488 | static void handle_float_to_int(Context *ctx, Inst *inst) { 489 | Var *dst = LIST_REF(inst->args, 0); 490 | Var *src = LIST_REF(inst->args, 1); 491 | ASSERT(dst->ctype->type == CTYPE_INT); 492 | ASSERT(is_flonum(src->ctype)); 493 | spill(ctx, RAX); 494 | load_xmm(ctx, XMM0, src); 495 | // CVTTSD2SI eax, xmm0 496 | emit4(ctx, 0xc02c0ff2); 497 | save(ctx, dst, RAX); 498 | } 499 | 500 | static void handle_func_call(Context *ctx, Inst *inst) { 501 | Var *fn = LIST_REF(inst->args, 0); 502 | Section *text = find_section(ctx->elf, ".text"); 503 | int gpr = 0; 504 | int xmm = 0; 505 | 506 | save_all(ctx); 507 | for (int i = 2; i < LIST_LEN(inst->args); i++) { 508 | Var *var = LIST_REF(inst->args, i); 509 | if (is_flonum(var->ctype)) 510 | load_xmm(ctx, xmm_arg[xmm++], var); 511 | else 512 | load(ctx, grp_arg[gpr++], var); 513 | } 514 | if (!dict_get(ctx->elf->syms, fn->name)) { 515 | Symbol *sym = make_symbol(fn->name, text, 0, STB_GLOBAL, STT_NOTYPE, 0); 516 | dict_put(ctx->elf->syms, fn->name, sym); 517 | } 518 | emit1(ctx, 0xb8); // MOV eax 519 | emit4(ctx, xmm); 520 | emit1(ctx, 0xe8); // CALL 521 | list_push(ctx->func_tbf, fn->name); 522 | list_push(ctx->func_tbf, (void *)(intptr)STRING_LEN(ctx->text)); 523 | emit4(ctx, 0); 524 | 525 | // Save function return value to the stack; 526 | Var *rval = LIST_REF(inst->args, 1); 527 | save(ctx, rval, RAX); 528 | } 529 | 530 | static void finish_func_call(Elf *elf, Dict *func, List *tbf) { 531 | Section *text = find_section(elf, ".text"); 532 | for (int i = 0; i < LIST_LEN(tbf); i = i + 2) { 533 | String *fname = LIST_REF(tbf, i); 534 | u32 pos = (intptr)LIST_REF(tbf, i + 1); 535 | if (dict_has(func, fname)) { 536 | string_seek(text->body, pos); 537 | o4(text->body, (u32)(intptr)dict_get(func, fname) - pos - 4); 538 | string_seek(text->body, STRING_LEN(text->body)); 539 | } else { 540 | add_reloc(text, pos, STRING_BODY(fname), NULL, R_X86_64_PC32, -4); 541 | } 542 | } 543 | } 544 | 545 | static void handle_add_or_sub(Context *ctx, Inst *inst, bool add) { 546 | Var *dst = LIST_REF(inst->args, 0); 547 | Var *src0 = LIST_REF(inst->args, 1); 548 | Var *src1 = LIST_REF(inst->args, 2); 549 | if (is_flonum(src0->ctype)) { 550 | load_xmm(ctx, XMM0, src0); 551 | load_xmm(ctx, XMM7, src1); 552 | // ADDSD/SUBSD xmm0, xmm7 553 | emit4(ctx, add ? 0xc7580ff2 : 0xc75c0ff2); 554 | save_xmm(ctx, dst, XMM0); 555 | return; 556 | } 557 | int reg0 = load_reg(ctx, src0); 558 | int reg1 = load_reg(ctx, src1); 559 | spill(ctx, reg0); 560 | // ADD/SUB reg0, reg1 561 | emit_regop(ctx, 8, add ? 1 : 0x29, reg1, reg0); 562 | ctx->var[reg0] = dst; 563 | } 564 | 565 | static void handle_imul(Context *ctx, Inst *inst) { 566 | Var *dst = LIST_REF(inst->args, 0); 567 | Var *src0 = LIST_REF(inst->args, 1); 568 | Var *src1 = LIST_REF(inst->args, 2); 569 | if (is_flonum(src0->ctype)) { 570 | load_xmm(ctx, XMM0, src0); 571 | load_xmm(ctx, XMM7, src1); 572 | // MULSD xmm0 xmm7 573 | emit4(ctx, 0xc7590ff2); 574 | save_xmm(ctx, dst, XMM0); 575 | return; 576 | } 577 | int reg0 = load_reg(ctx, src0); 578 | int reg1 = load_reg(ctx, src1); 579 | // IMUL reg0, reg1 580 | emit_regop(ctx, 8, 0xaf0f, reg0, reg1); 581 | assign(ctx, dst, reg0); 582 | save_all(ctx); 583 | } 584 | 585 | static void handle_idiv(Context *ctx, Inst *inst) { 586 | Var *dst = LIST_REF(inst->args, 0); 587 | Var *src0 = LIST_REF(inst->args, 1); 588 | Var *src1 = LIST_REF(inst->args, 2); 589 | if (is_flonum(src0->ctype)) { 590 | load_xmm(ctx, XMM0, src0); 591 | load_xmm(ctx, XMM7, src1); 592 | // DIVSD xmm0, xmm7 593 | emit4(ctx, 0xc75e0ff2); 594 | save_xmm(ctx, dst, XMM0); 595 | return; 596 | } 597 | spill(ctx, RAX); 598 | spill(ctx, RDX); 599 | // XOR edx, edx 600 | emit_regop(ctx, 4, 0x31, RDX, RDX); 601 | 602 | ctx->var[RAX] = ctx->var[RDX] = RESERVED; 603 | load(ctx, RAX, src0); 604 | int reg = load_reg(ctx, src1); 605 | ctx->var[RAX] = ctx->var[RDX] = NULL; 606 | 607 | // IDIV reg 608 | emit_regop(ctx, 8, 0xf7, 7, reg); 609 | assign(ctx, dst, RAX); 610 | } 611 | 612 | static void handle_not(Context *ctx, Inst *inst) { 613 | Var *dst = LIST_REF(inst->args, 0); 614 | Var *src = LIST_REF(inst->args, 1); 615 | load(ctx, RAX, src); 616 | // TEST eax, eax 617 | emit2(ctx, 0xc085); 618 | // SETE al 619 | emit3(ctx, 0xc0940f); 620 | // MOVZBL eax, al 621 | emit3(ctx, 0xc0b60f); 622 | assign(ctx, dst, RAX); 623 | } 624 | 625 | static void emit_cmp(Context *ctx, Inst *inst, int op) { 626 | Var *dst = LIST_REF(inst->args, 0); 627 | Var *src0 = LIST_REF(inst->args, 1); 628 | Var *src1 = LIST_REF(inst->args, 2); 629 | save_all(ctx); 630 | load(ctx, RAX, src0); 631 | int reg1 = load_reg(ctx, src1); 632 | // CMP rax, reg1 633 | emit_regop(ctx, 8, 0x39, reg1, RAX); 634 | emit_regop(ctx, 4, op, 0, RAX); 635 | // MOVZX eax, al 636 | emit_regop(ctx, 4, 0xb60f, 0, RAX); 637 | assign(ctx, dst, RAX); 638 | } 639 | 640 | static void emit_fcmp(Context *ctx, Inst *inst, u32 op) { 641 | Var *dst = LIST_REF(inst->args, 0); 642 | Var *src0 = LIST_REF(inst->args, 1); 643 | Var *src1 = LIST_REF(inst->args, 2); 644 | spill(ctx, RAX); 645 | load_xmm(ctx, XMM0, src1); 646 | load_xmm(ctx, XMM7, src0); 647 | // UCOMISD xmm0, xmm7 648 | emit4(ctx, 0xc72e0f66); 649 | emit3(ctx, op); 650 | // MOVZX eax, al 651 | emit3(ctx, 0xc0b60f); 652 | assign(ctx, dst, RAX); 653 | } 654 | 655 | static void handle_less(Context *ctx, Inst *inst) { 656 | if (is_flonum(((Var *)LIST_REF(inst->args, 1))->ctype)) { 657 | // SETL al 658 | // emit_fcmp(ctx, inst, 0xc09c0f); 659 | // SETA al 660 | emit_fcmp(ctx, inst, 0xc0970f); 661 | } else { 662 | // SETL al 663 | emit_cmp(ctx, inst, 0x9c0f); 664 | } 665 | save_all(ctx); 666 | } 667 | 668 | static void handle_less_equal(Context *ctx, Inst *inst) { 669 | if (is_flonum(((Var *)LIST_REF(inst->args, 1))->ctype)) { 670 | // SETAE al 671 | emit_fcmp(ctx, inst, 0xc0930f); 672 | } else { 673 | // SETLE al 674 | emit_cmp(ctx, inst, 0x9e0f); 675 | } 676 | save_all(ctx); 677 | } 678 | 679 | static void handle_neg(Context *ctx, Inst *inst) { 680 | Var *dst = LIST_REF(inst->args, 0); 681 | Var *src = LIST_REF(inst->args, 1); 682 | int reg = load_reg(ctx, src); 683 | // NOT reg 684 | emit_regop(ctx, 4, 0xf7, 2, reg); 685 | assign(ctx, dst, reg); 686 | } 687 | 688 | static void handle_inst3(Context *ctx, Inst *inst, int op) { 689 | Var *dst = LIST_REF(inst->args, 0); 690 | Var *src0 = LIST_REF(inst->args, 1); 691 | Var *src1 = LIST_REF(inst->args, 2); 692 | int reg0 = load_reg(ctx, src0); 693 | int reg1 = load_reg(ctx, src1); 694 | emit_regop(ctx, 4, op, reg1, reg0); 695 | assign(ctx, dst, reg0); 696 | } 697 | 698 | static void handle_and(Context *ctx, Inst *inst) { 699 | handle_inst3(ctx, inst, 0x21); 700 | } 701 | 702 | static void handle_or(Context *ctx, Inst *inst) { 703 | handle_inst3(ctx, inst, 0x09); 704 | } 705 | 706 | static void handle_xor(Context *ctx, Inst *inst) { 707 | handle_inst3(ctx, inst, 0x31); 708 | } 709 | 710 | static void handle_shift(Context *ctx, Inst *inst, int op1) { 711 | Var *dst = LIST_REF(inst->args, 0); 712 | Var *src0 = LIST_REF(inst->args, 1); 713 | Var *src1 = LIST_REF(inst->args, 2); 714 | load_reg_replace(ctx, RCX, src1); 715 | int reg = load_reg(ctx, src0); 716 | emit_regop(ctx, 8, 0xd3, op1, reg); 717 | assign(ctx, dst, reg); 718 | } 719 | 720 | static void handle_shl(Context *ctx, Inst *inst) { 721 | handle_shift(ctx, inst, 4); 722 | } 723 | 724 | static void handle_shr(Context *ctx, Inst *inst) { 725 | handle_shift(ctx, inst, 5); 726 | } 727 | 728 | static void handle_assign(Context *ctx, Inst *inst) { 729 | Var *var = LIST_REF(inst->args, 0); 730 | Var *val = LIST_REF(inst->args, 1); 731 | ASSERT(var->ctype->type != CTYPE_ARRAY); 732 | ASSERT(val); 733 | if (is_flonum(((Var *)LIST_REF(inst->args, 1))->ctype)) { 734 | load_xmm(ctx, XMM0, val); 735 | save_xmm(ctx, var, XMM0); 736 | } else { 737 | int reg = load_reg(ctx, val); 738 | assign(ctx, var, reg); 739 | } 740 | } 741 | 742 | static void handle_equal(Context *ctx, Inst *inst, bool eq) { 743 | // SETE al or SETNE al 744 | if (is_flonum(((Var *)LIST_REF(inst->args, 1))->ctype)) { 745 | emit_fcmp(ctx, inst, eq ? 0xc0940f : 0xc0950f); 746 | } else { 747 | emit_cmp(ctx, inst, eq ? 0x940f : 0x950f); 748 | } 749 | save_all(ctx); 750 | } 751 | 752 | static void handle_address(Context *ctx, Inst *inst) { 753 | Var *p = LIST_REF(inst->args, 0); 754 | Var *v = LIST_REF(inst->args, 1); 755 | spill(ctx, RAX); 756 | if (v->stype == VAR_IMM && v->ctype->type == CTYPE_ARRAY) { 757 | load(ctx, RAX, v); 758 | } else { 759 | save_var(ctx, v); 760 | int off = var_stack_pos(ctx, v); 761 | // LEA rax, [ebp+off] 762 | emit_memop(ctx, 8, 0x8d, RAX, RBP, off); 763 | } 764 | assign(ctx, p, RAX); 765 | } 766 | 767 | static void handle_deref(Context *ctx, Inst *inst) { 768 | Var *v = LIST_REF(inst->args, 0); 769 | Var *p = LIST_REF(inst->args, 1); 770 | int reg = load_reg(ctx, p); 771 | // MOV rax, [rax] 772 | emit_memop(ctx, 8, 0x8b, reg, reg, 0); 773 | assign(ctx, v, reg); 774 | } 775 | 776 | static void handle_assign_deref(Context *ctx, Inst *inst) { 777 | Var *loc = LIST_REF(inst->args, 0); 778 | Var *v = LIST_REF(inst->args, 1); 779 | save_all(ctx); 780 | int reg = load_reg(ctx, v); 781 | load(ctx, R11, loc); 782 | // MOV [R11], reg 783 | emit_memop(ctx, 8, 0x89, reg, R11, 0); 784 | } 785 | 786 | static void handle_if(Context *ctx, Inst *inst) { 787 | Var *cond = LIST_REF(inst->args, 0); 788 | Block *then = LIST_REF(inst->args, 1); 789 | Block *els = LIST_REF(inst->args, 2); 790 | Block *cont = LIST_REF(inst->args, 3); 791 | 792 | // TEST reg, reg 793 | int reg = load_reg(ctx, cond); 794 | emit_regop(ctx, 4, 0x85, reg, reg); 795 | save_all(ctx); 796 | 797 | // JE offset 798 | emit2(ctx, 0x840f); 799 | emit4(ctx, 0); // filled later 800 | int pos0 = STRING_LEN(ctx->text); 801 | handle_block(ctx, then); 802 | save_all(ctx); 803 | 804 | int pos1; 805 | if (els) { 806 | // JMP offset 807 | emit1(ctx, 0xe9); 808 | emit4(ctx, 0); // filled later 809 | pos1 = STRING_LEN(ctx->text); 810 | handle_block(ctx, els); 811 | save_all(ctx); 812 | } 813 | 814 | // Backfill 815 | int save = STRING_LEN(ctx->text); 816 | string_seek(ctx->text, pos0 - 4); 817 | if (els) { 818 | emit4(ctx, pos1 - pos0); 819 | string_seek(ctx->text, pos1 - 4); 820 | emit4(ctx, save - pos1); 821 | } else { 822 | emit4(ctx, save - pos0); 823 | } 824 | string_seek(ctx->text, save); 825 | 826 | handle_block(ctx, cont); 827 | } 828 | 829 | static void handle_alloc(Context *ctx, Inst *inst) { 830 | Var *v = LIST_REF(inst->args, 0); 831 | var_stack_pos(ctx, v); 832 | } 833 | 834 | static void jump_to(Context *ctx, int pos) { 835 | // JMP offset 836 | emit1(ctx, 0xe9); 837 | emit4(ctx, pos - STRING_LEN(ctx->text) - 4); 838 | } 839 | 840 | static void handle_jmp(Context *ctx, Inst *inst) { 841 | Block *dst = LIST_REF(inst->args, 0); 842 | save_all(ctx); 843 | if (dst->pos < 0) { 844 | handle_block(ctx, dst); 845 | } else { 846 | jump_to(ctx, dst->pos); 847 | } 848 | } 849 | 850 | static void handle_return(Context *ctx, Inst *inst) { 851 | Var *retval = LIST_REF(inst->args, 0); 852 | load(ctx, RAX, retval); 853 | emit1(ctx, 0xc9); // LEAVE 854 | emit1(ctx, 0xc3); // RET 855 | } 856 | 857 | static void handle_block(Context *ctx, Block *block) { 858 | save_all(ctx); 859 | reset_context(ctx); 860 | if (block->pos >= 0) { 861 | jump_to(ctx, block->pos); 862 | return; 863 | } 864 | block->pos = STRING_LEN(ctx->text); 865 | for (int i = 0; i < LIST_LEN(block->code); i++) { 866 | Inst *inst = LIST_REF(block->code, i); 867 | switch (inst->op) { 868 | case '+': case '-': 869 | handle_add_or_sub(ctx, inst, inst->op == '+'); 870 | break; 871 | case '*': 872 | handle_imul(ctx, inst); 873 | break; 874 | case '/': 875 | handle_idiv(ctx, inst); 876 | break; 877 | case '!': 878 | handle_not(ctx, inst); 879 | break; 880 | case '|': 881 | handle_or(ctx, inst); 882 | break; 883 | case '^': 884 | handle_xor(ctx, inst); 885 | break; 886 | case '<': 887 | handle_less(ctx, inst); 888 | break; 889 | case '~': 890 | handle_neg(ctx, inst); 891 | break; 892 | case '&': 893 | handle_and(ctx, inst); 894 | break; 895 | case OP_SHL: 896 | handle_shl(ctx, inst); 897 | break; 898 | case OP_SHR: 899 | handle_shr(ctx, inst); 900 | break; 901 | case OP_LE: 902 | handle_less_equal(ctx, inst); 903 | break; 904 | case OP_FUNC_CALL: 905 | handle_func_call(ctx, inst); 906 | break; 907 | case OP_EQ: 908 | handle_equal(ctx, inst, true); 909 | break; 910 | case OP_NE: 911 | handle_equal(ctx, inst, false); 912 | break; 913 | case OP_ADDRESS: 914 | handle_address(ctx, inst); 915 | break; 916 | case OP_DEREF: 917 | handle_deref(ctx, inst); 918 | break; 919 | case OP_ASSIGN: 920 | handle_assign(ctx, inst); 921 | break; 922 | case OP_ASSIGN_DEREF: 923 | handle_assign_deref(ctx, inst); 924 | break; 925 | case OP_ALLOC: 926 | handle_alloc(ctx, inst); 927 | break; 928 | case OP_I2F: 929 | handle_int_to_float(ctx, inst); 930 | break; 931 | case OP_F2I: 932 | handle_float_to_int(ctx, inst); 933 | break; 934 | case OP_IF: 935 | handle_if(ctx, inst); 936 | return; // return here 937 | case OP_JMP: 938 | handle_jmp(ctx, inst); 939 | return; // return here 940 | case OP_RETURN: 941 | handle_return(ctx, inst); 942 | return; // return here 943 | default: 944 | error("unknown op\n"); 945 | } 946 | // NOP 947 | emit1(ctx, 0x90); 948 | } 949 | } 950 | 951 | /*============================================================================== 952 | * Debug print 953 | */ 954 | 955 | 956 | typedef struct DebugPrintContext { 957 | Dict *visited_block; 958 | Dict *visited_var; 959 | int serial; 960 | } DebugPrintContext; 961 | 962 | static char *serial_to_str(int serial) { 963 | String *b = make_string(); 964 | o1(b, '$'); 965 | while (serial) { 966 | o1(b, 'A' + (serial % 26) - 1); 967 | serial /= 26; 968 | } 969 | o1(b, '\0'); 970 | return STRING_BODY(b); 971 | } 972 | 973 | String *ctype_to_string(Ctype *ctype) { 974 | String *b = make_string(); 975 | if (!ctype->signedp) 976 | o1(b, 'u'); 977 | switch (ctype->type) { 978 | case CTYPE_PTR: 979 | o1(b, '*'); 980 | string_append(b, STRING_BODY(ctype_to_string(ctype->ptr))); 981 | break; 982 | case CTYPE_ARRAY: 983 | string_append(b, STRING_BODY(ctype_to_string(ctype->ptr))); 984 | string_printf(b, "[%d]", ctype->size); 985 | break; 986 | case CTYPE_LLONG: string_append(b, "long long"); break; 987 | case CTYPE_LONG: string_append(b, "long"); break; 988 | case CTYPE_INT: string_append(b, "int"); break; 989 | case CTYPE_SHORT: string_append(b, "short"); break; 990 | case CTYPE_CHAR: string_append(b, "char"); break; 991 | case CTYPE_FLOAT: string_append(b, "float"); break; 992 | case CTYPE_DOUBLE: string_append(b, "double"); break; 993 | default: 994 | string_printf(b, "[[unknown type:%d]]", ctype->type); 995 | } 996 | return b; 997 | } 998 | 999 | static void print_var(Var *v, DebugPrintContext *ctx) { 1000 | debug("%s ", STRING_BODY(ctype_to_string(v->ctype))); 1001 | if (v->stype == VAR_IMM) { 1002 | switch (v->ctype->type) { 1003 | case CTYPE_CHAR: 1004 | debug("'%c'", v->val.i); 1005 | return; 1006 | case CTYPE_LLONG: 1007 | case CTYPE_LONG: 1008 | case CTYPE_SHORT: 1009 | case CTYPE_INT: 1010 | debug("%ld", v->val.i); 1011 | return; 1012 | case CTYPE_FLOAT: 1013 | case CTYPE_DOUBLE: 1014 | debug("%f", v->val.f); 1015 | return; 1016 | case CTYPE_PTR: 1017 | debug("(ptr)"); 1018 | return; 1019 | case CTYPE_ARRAY: 1020 | debug("(array) "); 1021 | return; 1022 | default: 1023 | panic("unknown type: %d", v->ctype->type); 1024 | } 1025 | } 1026 | if (v->name) 1027 | debug("%s", STRING_BODY(v->name)); 1028 | else if (dict_has(ctx->visited_var, v)) 1029 | debug("%s", (char *)dict_get(ctx->visited_var, v)); 1030 | else { 1031 | char *s = serial_to_str(ctx->serial++); 1032 | dict_put(ctx->visited_var, v, s); 1033 | debug("%s", s); 1034 | } 1035 | } 1036 | 1037 | static void print_var_list(List *vars, DebugPrintContext *ctx) { 1038 | for (int i = 0; i < LIST_LEN(vars); i++) { 1039 | Var *p = LIST_REF(vars, i); 1040 | if (i > 0) debug(", "); 1041 | print_var(p, ctx); 1042 | } 1043 | } 1044 | 1045 | static void print_block(Block *block, DebugPrintContext *ctx); 1046 | 1047 | static bool print_inst(Inst *inst, DebugPrintContext *ctx) { 1048 | if (inst->op < 255) 1049 | debug(" %c ", inst->op); 1050 | else { 1051 | switch (inst->op) { 1052 | #define INST(x) case x: debug(" %s ", #x); break; 1053 | #include "inst.h" 1054 | #undef INST 1055 | } 1056 | } 1057 | switch (inst->op) { 1058 | case OP_IF: 1059 | print_var((Var *)LIST_REF(inst->args, 0), ctx); 1060 | debug("\n\n"); 1061 | print_block((Block *)LIST_REF(inst->args, 1), ctx); 1062 | debug("\n"); 1063 | if (LIST_REF(inst->args, 2)) 1064 | print_block((Block *)LIST_REF(inst->args, 2), ctx); 1065 | else 1066 | debug(" (no else)"); 1067 | debug("\n"); 1068 | print_block((Block *)LIST_REF(inst->args, 3), ctx); 1069 | return true; 1070 | case OP_JMP: 1071 | debug("\n"); 1072 | print_block((Block *)LIST_REF(inst->args, 0), ctx); 1073 | return true; 1074 | default: 1075 | print_var_list(inst->args, ctx); 1076 | debug("\n"); 1077 | return false; 1078 | } 1079 | } 1080 | 1081 | static void print_block(Block *block, DebugPrintContext *ctx) { 1082 | if (dict_has(ctx->visited_block, block)) { 1083 | debug(" (block %p)\n", block); 1084 | return; 1085 | } 1086 | dict_put(ctx->visited_block, block, (void *)1); 1087 | for (int i = 0; i < LIST_LEN(block->code); i++) 1088 | if (print_inst((Inst *)LIST_REF(block->code, i), ctx)) 1089 | break; 1090 | } 1091 | 1092 | void print_function(Function *func) { 1093 | DebugPrintContext ctx; 1094 | ctx.visited_block = make_address_dict(); 1095 | ctx.visited_var = make_address_dict(); 1096 | ctx.serial = 1; 1097 | 1098 | debug("%s: ", STRING_BODY(func->name)); 1099 | print_var_list(func->params, &ctx); 1100 | debug("\n"); 1101 | print_block(func->entry, &ctx); 1102 | } 1103 | 1104 | /*============================================================================== 1105 | * Entry function 1106 | */ 1107 | 1108 | static void save_params(Context *ctx, Function *func) { 1109 | for (int i = 0; i < LIST_LEN(func->params); i++) { 1110 | Var *param = LIST_REF(func->params, i); 1111 | save(ctx, param, grp_arg[i]); 1112 | } 1113 | } 1114 | 1115 | static void assemble1(Context *ctx, Function *func) { 1116 | emit1(ctx, 0x55); // PUSH rbp 1117 | emit3(ctx, 0xe58948); // MOV rbp, rsp 1118 | 1119 | // SUB rsp, 0 1120 | emit3(ctx, 0xec8148); 1121 | int pos = STRING_LEN(ctx->text); 1122 | emit4(ctx, 0); // filled later 1123 | emit1(ctx, 0x90); // NOP 1124 | 1125 | save_params(ctx, func); 1126 | handle_block(ctx, func->entry); 1127 | 1128 | // Backfill SUB rsp, 0 1129 | int off = ((ctx->sp + 2) & ~1) * 8; // need to be 16-byte aligned 1130 | string_seek(ctx->text, pos); 1131 | emit4(ctx, off); 1132 | string_seek(ctx->text, STRING_LEN(ctx->text)); 1133 | } 1134 | 1135 | void assemble(Elf *elf, List *fns) { 1136 | Section *text = find_section(elf, ".text"); 1137 | Dict *dict = make_string_dict(); 1138 | List *tbf = make_list(); 1139 | for (int i = 0; i < LIST_LEN(fns); i++) { 1140 | Context *ctx = make_context(elf); 1141 | ctx->func_tbf = tbf; 1142 | Function *func = LIST_REF(fns, i); 1143 | 1144 | if (flag_debug) 1145 | print_function(func); 1146 | 1147 | Symbol *fsym = make_symbol(func->name, text, STRING_LEN(ctx->text), STB_GLOBAL, STT_NOTYPE, 1); 1148 | dict_put(ctx->elf->syms, func->name, fsym); 1149 | 1150 | dict_put(dict, func->name, (void *)(intptr)STRING_LEN(ctx->text)); 1151 | assemble1(ctx, func); 1152 | } 1153 | finish_func_call(elf, dict, tbf); 1154 | } 1155 | -------------------------------------------------------------------------------- /init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * init.c - program initializer 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | #include 10 | 11 | typedef void (*sighandler)(int); 12 | 13 | static void set_signal_handler(sighandler handler) { 14 | struct sigaction act; 15 | act.sa_handler = handler; 16 | sigemptyset(&act.sa_mask); 17 | act.sa_flags = 0; 18 | if (sigaction(SIGSEGV, &act, NULL)) { 19 | perror("sigaction failed: "); 20 | exit(-1); 21 | } 22 | } 23 | 24 | static void sigsegv_handler(int signo) { 25 | set_signal_handler(SIG_DFL); 26 | fprintf(stderr, "\n"); 27 | print_stack_trace_safe(); 28 | } 29 | 30 | void eightcc_init(void) { 31 | set_signal_handler(sigsegv_handler); 32 | } 33 | -------------------------------------------------------------------------------- /inst.h: -------------------------------------------------------------------------------- 1 | INST(OP_LE) 2 | INST(OP_ADDRESS) 3 | INST(OP_DEREF) 4 | INST(OP_ASSIGN) 5 | INST(OP_ASSIGN_DEREF) 6 | INST(OP_ALLOC) 7 | INST(OP_FUNC_CALL) 8 | INST(OP_IF) 9 | INST(OP_JMP) 10 | INST(OP_EQ) 11 | INST(OP_NE) 12 | INST(OP_RETURN) 13 | INST(OP_SHL) 14 | INST(OP_SHR) 15 | INST(OP_I2F) 16 | INST(OP_F2I) 17 | -------------------------------------------------------------------------------- /keyword.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | /* 7 | * List of keywords (C99 6.4.1) 8 | */ 9 | KEYWORD(KEYWORD_AUTO, "auto") 10 | KEYWORD(KEYWORD_BREAK, "break") 11 | KEYWORD(KEYWORD_CASE, "case") 12 | KEYWORD(KEYWORD_CHAR, "char") 13 | KEYWORD(KEYWORD_CONST, "const") 14 | KEYWORD(KEYWORD_CONTINUE, "continue") 15 | KEYWORD(KEYWORD_DEFAULT, "default") 16 | KEYWORD(KEYWORD_DO, "do") 17 | KEYWORD(KEYWORD_DOUBLE, "double") 18 | KEYWORD(KEYWORD_ELSE, "else") 19 | KEYWORD(KEYWORD_ENUM, "enum") 20 | KEYWORD(KEYWORD_EXTERN, "extern") 21 | KEYWORD(KEYWORD_FLOAT, "float") 22 | KEYWORD(KEYWORD_FOR, "for") 23 | KEYWORD(KEYWORD_GOTO, "goto") 24 | KEYWORD(KEYWORD_IF, "if") 25 | KEYWORD(KEYWORD_INLINE, "inline") 26 | KEYWORD(KEYWORD_INT, "int") 27 | KEYWORD(KEYWORD_LONG, "long") 28 | KEYWORD(KEYWORD_REGISTER, "register") 29 | KEYWORD(KEYWORD_RESTRICT, "restrict") 30 | KEYWORD(KEYWORD_RETURN, "return") 31 | KEYWORD(KEYWORD_SHORT, "short") 32 | KEYWORD(KEYWORD_SIGNED, "signed") 33 | KEYWORD(KEYWORD_SIZEOF, "sizeof") 34 | KEYWORD(KEYWORD_STATIC, "static") 35 | KEYWORD(KEYWORD_STRUCT, "struct") 36 | KEYWORD(KEYWORD_SWITCH, "switch") 37 | KEYWORD(KEYWORD_TYPEDEF, "typedef") 38 | KEYWORD(KEYWORD_UNION, "union") 39 | KEYWORD(KEYWORD_UNSIGNED, "unsigned") 40 | KEYWORD(KEYWORD_VOID, "void") 41 | KEYWORD(KEYWORD_VOLATILE, "volatile") 42 | KEYWORD(KEYWORD_WHILE, "while") 43 | KEYWORD(KEYWORD__BOOL, "_Bool") 44 | KEYWORD(KEYWORD__COMPLEX, "_Complex") 45 | KEYWORD(KEYWORD__IMAGINARY, "_Imaginary") 46 | 47 | PUNCT(KEYWORD_A_ADD, "+=") 48 | PUNCT(KEYWORD_A_SUB, "-=") 49 | PUNCT(KEYWORD_A_MUL, "*=") 50 | PUNCT(KEYWORD_A_DIV, "/=") 51 | PUNCT(KEYWORD_A_MOD, "%=") 52 | PUNCT(KEYWORD_A_AND, "&=") 53 | PUNCT(KEYWORD_A_OR, "|=") 54 | PUNCT(KEYWORD_A_XOR, "^=") 55 | PUNCT(KEYWORD_A_LSH, "<<=") 56 | PUNCT(KEYWORD_A_RSH, ">>=") 57 | PUNCT(KEYWORD_EQ, "==") 58 | PUNCT(KEYWORD_NE, "!=") 59 | PUNCT(KEYWORD_GE, ">=") 60 | PUNCT(KEYWORD_LE, "<=") 61 | PUNCT(KEYWORD_INC, "++") 62 | PUNCT(KEYWORD_DEC, "--") 63 | PUNCT(KEYWORD_LOG_AND, "&&") 64 | PUNCT(KEYWORD_LOG_OR, "|") 65 | PUNCT(KEYWORD_LSH, "<<") 66 | PUNCT(KEYWORD_RSH, ">>") 67 | PUNCT(KEYWORD_TWOSHARPS, "##") 68 | PUNCT(KEYWORD_ARROW, "->") 69 | PUNCT(KEYWORD_THREEDOTS, "...") 70 | -------------------------------------------------------------------------------- /lex.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lex.c - lexical analyzer 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | /* 9 | * There are two classes of tokens during translation: "preprocessing tokens" 10 | * and "tokens". C source files are parsed into preprocessing tokens first, C 11 | * preprocessor handles preprecessing directives and macro replacement, and then 12 | * the preprocessing tokens are converted to tokens. Main C compiler sees only 13 | * the sequence of tokens. 14 | * 15 | * The functions in this file does parses C source file into sequence of 16 | * preprocessor token. Preprocessor token is defined as the below (C99 17 | * 6.4): 18 | * 19 | * preprocessing-token: 20 | * header-name 21 | * identifier 22 | * pp-number 23 | * character-constant 24 | * string-literal 25 | * punctuator 26 | * each non-white-space character that cannot be one of the above 27 | */ 28 | 29 | #include "8cc.h" 30 | 31 | /*============================================================================== 32 | * Functions to make CPP processing context. 33 | */ 34 | 35 | static List *make_include_path(void) { 36 | return make_list2(to_string("/usr/include"), to_string("/usr/local/include")); 37 | } 38 | 39 | CppContext *make_cpp_context(File *file) { 40 | CppContext *r = malloc(sizeof(CppContext)); 41 | r->file = file; 42 | r->at_bol = true; 43 | r->file_stack = make_list(); 44 | r->defs = make_string_dict(); 45 | r->ungotten = make_list(); 46 | r->in_macro = false; 47 | r->incl = make_list(); 48 | r->include_path = make_include_path(); 49 | r->tm = NULL; 50 | define_predefined_macros(r); 51 | return r; 52 | } 53 | 54 | void do_include(CppContext *ctx, File *file) { 55 | list_push(ctx->file_stack, ctx->file); 56 | ctx->file = file; 57 | } 58 | 59 | /*============================================================================== 60 | * Utility functions. 61 | */ 62 | 63 | static bool iswhitespace(int c) { 64 | return c == ' ' || c == '\t' || c == '\f' || c == '\v'; 65 | } 66 | 67 | NORETURN void error_cpp_ctx(CppContext *ctx, char *msg, ...) { 68 | va_list ap; 69 | va_start(ap, msg); 70 | print_parse_error(ctx->file->line, ctx->file->column, msg, ap); 71 | va_end(ap); 72 | } 73 | 74 | void unget_cpp_token(CppContext *ctx, Token *tok) { 75 | if (tok) 76 | list_push(ctx->ungotten, tok); 77 | } 78 | 79 | Token *peek_cpp_token(CppContext *ctx) { 80 | Token *tok = read_cpp_token(ctx); 81 | unget_cpp_token(ctx, tok); 82 | return tok; 83 | } 84 | 85 | static bool next_two_chars(CppContext *ctx, char c0, char c1) { 86 | int v0 = readc(ctx->file); 87 | if (c0 != v0) { 88 | unreadc(v0, ctx->file); 89 | return false; 90 | } 91 | int v1 = readc(ctx->file); 92 | if (c1 == v1) 93 | return true; 94 | unreadc(v1, ctx->file); 95 | unreadc(v0, ctx->file); 96 | return false; 97 | } 98 | 99 | /*============================================================================== 100 | * Functions to make tokens. 101 | */ 102 | 103 | Token *copy_token(Token *tok) { 104 | Token *r = malloc(sizeof(Token)); 105 | *r = *tok; 106 | return r; 107 | } 108 | 109 | Token *make_token(CppContext *ctx, TokType type, TokenValue val) { 110 | Token *r = malloc(sizeof(Token)); 111 | r->toktype = type; 112 | r->val = val; 113 | if (ctx->file) { 114 | r->line = ctx->file->line; 115 | r->column = ctx->file->column; 116 | } else { 117 | r->line = r->column = 0; 118 | } 119 | r->hideset = make_list(); 120 | r->space = false; 121 | return r; 122 | } 123 | 124 | static Token *make_ident(CppContext *ctx, String *val) { 125 | return make_token(ctx, TOKTYPE_IDENT, (TokenValue)val); 126 | } 127 | 128 | Token *make_cppnum(CppContext *ctx, String *val) { 129 | return make_token(ctx, TOKTYPE_CPPNUM, (TokenValue)val); 130 | } 131 | 132 | static Token *make_punct(CppContext *ctx, int c) { 133 | return make_token(ctx, TOKTYPE_PUNCT, (TokenValue)c); 134 | } 135 | 136 | static Token *make_char_const(CppContext *ctx, char c) { 137 | return make_token(ctx, TOKTYPE_CHAR, (TokenValue)(int)c); 138 | } 139 | 140 | Token *make_str_literal(CppContext *ctx, String *val) { 141 | return make_token(ctx, TOKTYPE_STRING, (TokenValue)val); 142 | } 143 | 144 | static Token *make_cpp_token(CppContext *ctx, TokType type) { 145 | return make_token(ctx, type, (TokenValue)0); 146 | } 147 | 148 | /*============================================================================== 149 | * Functions to handle comments. Comment will be treated as if it were one 150 | * whitespace character. (See C99 5.1.1.2 Translation phases, phase 4) 151 | */ 152 | static void skip_comment(CppContext *ctx) { 153 | int prev = '\0'; 154 | for (;;) { 155 | int c = readc(ctx->file); 156 | if (c == EOF) 157 | error_cpp_ctx(ctx, "premature end of input file in comment"); 158 | if (c == '/' && prev == '*') 159 | return; 160 | prev = c; 161 | } 162 | } 163 | 164 | static void skip_line_comment(CppContext *ctx) { 165 | for (;;) { 166 | int c = readc(ctx->file); 167 | if (c == EOF) return; 168 | if (c == '\n') { 169 | unreadc(c, ctx->file); 170 | return; 171 | } 172 | } 173 | } 174 | 175 | /*============================================================================== 176 | * Parser. These methods reads source file and returns preprocessing tokens. 177 | */ 178 | 179 | /* 180 | * C99 6.4.8 Preprocessing numbers 181 | * 182 | * pp-number 183 | * digit 184 | * . digit 185 | * pp-number digit 186 | * pp-number identifier-nondigit 187 | * pp-number [eEpP] sign 188 | * pp-number . 189 | * 190 | * (C99 6.4.2 Identifiers) 191 | * identifier-nondigit: 192 | * nondigit 193 | * universal-character-name 194 | * other implementation-defined characters 195 | * nondigit: 196 | * [_a-zA-Z] 197 | */ 198 | static Token *read_cppnum(CppContext *ctx, int c) { 199 | // c must be [0-9] or '.' 200 | String *buf = make_string(); 201 | o1(buf, c); 202 | if (c == '.') { 203 | int c1 = readc(ctx->file); 204 | if (!isdigit(c1)) 205 | error_cpp_ctx(ctx, "number expected, but got '%c'", c1); 206 | o1(buf, c1); 207 | } 208 | for (;;) { 209 | c = readc(ctx->file); 210 | if (isalnum(c) || c == '_' || c == '.') { 211 | o1(buf, c); 212 | } else if (c == 'e' || c == 'E' || c == 'p' || c == 'P') { 213 | o1(buf, c); 214 | int c1 = readc(ctx->file); 215 | if (c1 != '+' && c1 != '-') 216 | error_cpp_ctx(ctx, "'+' or '-' expected, but got '%c'", c1); 217 | o1(buf, c1); 218 | } else { 219 | unreadc(c, ctx->file); 220 | return make_cppnum(ctx, buf); 221 | } 222 | } 223 | } 224 | 225 | static int hextodec(char c) { 226 | c = tolower(c); 227 | if ('0' <= c && c <= '9') 228 | return c - '0'; 229 | return c - 'a' + 10; 230 | } 231 | 232 | /* 233 | * escape-character: 234 | * "\" escape-code 235 | * universal-character-name 236 | * 237 | * escape-code: 238 | * character-escape-code 239 | * octal-escape-code 240 | * hex-escape-code 241 | * 242 | * character-escape-code: 243 | * one of: n t b r f v \ ' " a ? e 244 | * ('\e' is GNU extension) 245 | * 246 | * octal-escape-code: 247 | * [0-7]{,3} 248 | * 249 | * hex-escape-code: 250 | * "x" [0-9a-fA-F]+ 251 | * 252 | * universal-character-name: 253 | * "\u" [0-9a-fA-F]{4} 254 | * "\U" [0-9a-fA-F]{8} 255 | */ 256 | static char read_escape_char(File *file) { 257 | int c = readc(file); 258 | int r; 259 | switch (c) { 260 | case EOF: 261 | error("line %d:%d: premature end of input file while reading a literal string or a character", file->line, file->column); 262 | case 'a': return '\a'; 263 | case 'b': return '\b'; 264 | case 'e': return '\033'; // GNU extension 265 | case 't': return '\t'; 266 | case 'n': return '\n'; 267 | case 'v': return '\v'; 268 | case 'f': return '\f'; 269 | case 'r': return '\r'; 270 | case '0': case '1': case '2': case '3': case '4': 271 | case '5': case '6': case '7': case '8': case '9': 272 | r = c - '0'; 273 | c = readc(file); 274 | if (isdigit(c)) { 275 | r = r * 8 + (c - '0'); 276 | c = readc(file); 277 | if (isdigit(c)) { 278 | r = r * 8 + (c - '0'); 279 | } else { 280 | unreadc(c, file); 281 | } 282 | } else { 283 | unreadc(c, file); 284 | } 285 | return r; 286 | case 'x': 287 | c = readc(file); 288 | if (!isxdigit(c)) 289 | error("line %d:%d: hexdigit expected, but got '%c'", file->line, file->column, c); 290 | r = hextodec(c); 291 | c = readc(file); 292 | if (isxdigit(c)) 293 | r = r * 16 + hextodec(c); 294 | else 295 | unreadc(c, file); 296 | return r; 297 | default: return (char)c; 298 | } 299 | } 300 | 301 | /* 302 | * C99 6.4.5 String literals 303 | * 304 | * string-constant: 305 | * '"' s-char* '"' 306 | * 'L"' s-char* '"' 307 | * 308 | * s-char: 309 | * any source character except the double quote, backslash or newline 310 | * escape-character 311 | */ 312 | static String *read_str(CppContext *ctx) { 313 | String *b = make_string(); 314 | for (;;) { 315 | int c = readc(ctx->file); 316 | switch (c) { 317 | case '"': 318 | o1(b, '\0'); 319 | return b; 320 | case '\\': 321 | o1(b, read_escape_char(ctx->file)); 322 | break; 323 | case EOF: 324 | error_cpp_ctx(ctx, "premature end of input file while reading a literal string"); 325 | case '\n': 326 | error_cpp_ctx(ctx, "newline is not allowed being in a string literal"); 327 | default: 328 | o1(b, c); 329 | } 330 | } 331 | } 332 | 333 | /* 334 | * character-constant: 335 | * "'" c-char* "'" 336 | * "L'" c-char* "'" 337 | * 338 | * c-char: 339 | * any source character except the single quote, backslash or newline 340 | * escape-character 341 | */ 342 | static char read_char(CppContext *ctx) { 343 | int c = readc(ctx->file); 344 | if (c == EOF) 345 | error_cpp_ctx(ctx, "premature end of input file while reading a literal character"); 346 | char r = (c != '\\') ? c : read_escape_char(ctx->file); 347 | c = readc(ctx->file); 348 | if (c != '\'') 349 | error_cpp_ctx(ctx, "'\'' (single quote) expected, but got '%c'", c); 350 | return r; 351 | } 352 | 353 | static String *read_ident(File *file, char c0) { 354 | String *b = make_string(); 355 | o1(b, c0); 356 | for (;;) { 357 | int c1 = readc(file); 358 | if (isalnum(c1) || c1 == '_') { 359 | o1(b, c1); 360 | } else { 361 | unreadc(c1, file); 362 | return b; 363 | } 364 | } 365 | } 366 | 367 | static void skip_whitespace(CppContext *ctx) { 368 | int c = readc(ctx->file); 369 | while (iswhitespace(c)) 370 | c = readc(ctx->file); 371 | unreadc(c, ctx->file); 372 | } 373 | 374 | /* 375 | * Reads operators such as += or *=. 376 | */ 377 | static Token *maybe_read_equal(CppContext *ctx, int t0, int t1) { 378 | if (next_char_is(ctx->file, '=')) 379 | return make_punct(ctx, t1); 380 | return make_punct(ctx, t0); 381 | } 382 | 383 | /* 384 | * Reads operators such as ++ or --. 385 | */ 386 | static Token *maybe_read_rep0(CppContext *ctx, int t0, int t1, int t2) { 387 | if (next_char_is(ctx->file, t0)) 388 | return make_punct(ctx, t2); 389 | return maybe_read_equal(ctx, t0, t1); 390 | } 391 | 392 | /* 393 | * Reads operators such as <<= or >>=. 394 | */ 395 | static Token *maybe_read_rep1(CppContext *ctx, int t0, int t1, int t2, int t3) { 396 | if (next_char_is(ctx->file, t0)) 397 | return maybe_read_equal(ctx, t2, t3); 398 | return maybe_read_equal(ctx, t0, t1); 399 | } 400 | 401 | /* 402 | * Returns the next token. This considers a squence of whitespace characters a 403 | * token, unlike read_cpp_token(). 404 | */ 405 | static Token *read_cpp_token_int(CppContext *ctx) { 406 | if (!LIST_IS_EMPTY(ctx->ungotten)) 407 | return list_pop(ctx->ungotten); 408 | if (ctx->in_macro) 409 | return NULL; 410 | 411 | for (;;) { 412 | int c = readc(ctx->file); 413 | switch (c) { 414 | case ' ': case '\t': case '\f': case '\v': 415 | skip_whitespace(ctx); 416 | return make_cpp_token(ctx, TOKTYPE_SPACE); 417 | case '\n': 418 | return make_cpp_token(ctx, TOKTYPE_NEWLINE); 419 | case '/': 420 | if (next_char_is(ctx->file, '*')) { 421 | skip_comment(ctx); 422 | return make_cpp_token(ctx, TOKTYPE_SPACE); 423 | } 424 | if (next_char_is(ctx->file, '/')) { 425 | skip_line_comment(ctx); 426 | return make_cpp_token(ctx, TOKTYPE_SPACE); 427 | } 428 | return maybe_read_equal(ctx, '/', KEYWORD_A_DIV); 429 | case '#': 430 | if (next_char_is(ctx->file, '#')) 431 | return make_punct(ctx, KEYWORD_TWOSHARPS); 432 | return make_punct(ctx, '#'); 433 | case '.': 434 | if (next_two_chars(ctx, '.', '.')) 435 | return make_punct(ctx, KEYWORD_THREEDOTS); 436 | if (isdigit(peekc(ctx->file))) 437 | return read_cppnum(ctx, c); 438 | return make_punct(ctx, '.'); 439 | case '0': case '1': case '2': case '3': case '4': 440 | case '5': case '6': case '7': case '8': case '9': 441 | return read_cppnum(ctx, c); 442 | case '"': 443 | return make_str_literal(ctx, read_str(ctx)); 444 | case '\'': 445 | return make_char_const(ctx, read_char(ctx)); 446 | 447 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': 448 | case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 449 | case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': 450 | case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': 451 | case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': 452 | case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': 453 | case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': 454 | case 'X': case 'Y': case 'Z': case '_': 455 | return make_ident(ctx, read_ident(ctx->file, c)); 456 | 457 | case '=': return maybe_read_equal(ctx, '=', KEYWORD_EQ); 458 | case '*': return maybe_read_equal(ctx, '*', KEYWORD_A_MUL); 459 | case '^': return maybe_read_equal(ctx, '^', KEYWORD_A_XOR); 460 | case '!': return maybe_read_equal(ctx, '!', KEYWORD_NE); 461 | 462 | case '+': return maybe_read_rep0(ctx, '+', KEYWORD_A_ADD, KEYWORD_INC); 463 | case '&': return maybe_read_rep0(ctx, '&', KEYWORD_A_AND, KEYWORD_LOG_AND); 464 | case '|': return maybe_read_rep0(ctx, '|', KEYWORD_A_OR, KEYWORD_LOG_OR); 465 | 466 | case '-': 467 | if (next_char_is(ctx->file, '>')) 468 | return make_punct(ctx, KEYWORD_ARROW); 469 | return maybe_read_rep0(ctx, '-', KEYWORD_A_SUB, KEYWORD_DEC); 470 | 471 | /* 472 | * The following six tokens (so-called "digraphs") 473 | * <: :> <% %> %: %:%: 474 | * are equivalent to the following six tokens. 475 | * [ ] { } # ## 476 | * (C99 6.4.6 Punctuators, paragraph 3) 477 | */ 478 | case '<': 479 | if (next_char_is(ctx->file, ':')) 480 | return make_punct(ctx, '['); 481 | if (next_char_is(ctx->file, '%')) 482 | return make_punct(ctx, '{'); 483 | return maybe_read_rep1(ctx, '<', KEYWORD_LE, KEYWORD_LSH, KEYWORD_A_LSH); 484 | case ':': 485 | if (next_char_is(ctx->file, '>')) 486 | return make_punct(ctx, ']'); 487 | return make_punct(ctx, ':'); 488 | case '%': 489 | if (next_char_is(ctx->file, '>')) 490 | return make_punct(ctx, '}'); 491 | if (next_char_is(ctx->file, ':')) { 492 | if (next_two_chars(ctx, '%', ':')) 493 | return make_punct(ctx, KEYWORD_TWOSHARPS); 494 | return make_punct(ctx, '#'); 495 | } 496 | return maybe_read_equal(ctx, '%', KEYWORD_A_MOD); 497 | 498 | case '>': 499 | return maybe_read_rep1(ctx, '>', KEYWORD_GE, KEYWORD_RSH, KEYWORD_A_RSH); 500 | 501 | case '(': case ')': case ',': case ';': case '[': case ']': 502 | case '{': case '}': case '?': case '~': 503 | return make_punct(ctx, c); 504 | case EOF: 505 | return NULL; 506 | default: 507 | error_cpp_ctx(ctx, "unimplemented '%c'", c); 508 | } 509 | } 510 | } 511 | 512 | /*============================================================================== 513 | * Preprocessor conditional inclusion. 514 | * 515 | * If a condition of #if, #ifdef, #ifndef or #elif is false, we don't actually 516 | * need to parse the subsequent tokens until #elif or #endif appears. 517 | * skip_cond_incl() skips parsing functions and directly reads characters from 518 | * file. As the function toches the file rather than parsed tokens, this is 519 | * defeined in this file rather than in cpp.c. 520 | */ 521 | 522 | static void skip_char(CppContext *ctx) { read_char(ctx); } 523 | static void skip_string(CppContext *ctx) { read_str(ctx); } 524 | 525 | static void skip_line(CppContext *ctx) { 526 | for (;;) { 527 | int c = readc(ctx->file); 528 | if (c == '\n') 529 | return; 530 | if (c == EOF) 531 | error_cpp_ctx(ctx, "unterminated conditional inclusion"); 532 | if (c == '\'') 533 | skip_char(ctx); 534 | else if (c == '"') 535 | skip_string(ctx); 536 | } 537 | } 538 | 539 | CondInclType skip_cond_incl(CppContext *ctx) { 540 | ASSERT(LIST_IS_EMPTY(ctx->ungotten)); 541 | int nest = 0; 542 | for (;;) { 543 | skip_whitespace(ctx); 544 | if (!next_char_is(ctx->file, '#')) { 545 | skip_line(ctx); 546 | continue; 547 | } 548 | Token *tok = read_cpp_token(ctx); 549 | if (tok->toktype == TOKTYPE_NEWLINE) 550 | continue; 551 | if (tok->toktype != TOKTYPE_IDENT) { 552 | skip_line(ctx); 553 | continue; 554 | } 555 | if (!strcmp(STRING_BODY(tok->val.str), "if") 556 | || !strcmp(STRING_BODY(tok->val.str), "ifdef") 557 | || !strcmp(STRING_BODY(tok->val.str), "ifndef")) { 558 | nest++; 559 | } else if (!nest && !strcmp(STRING_BODY(tok->val.str), "else")) { 560 | expect_newline(ctx); 561 | return COND_ELSE; 562 | } else if (!nest && !strcmp(STRING_BODY(tok->val.str), "elif")) { 563 | return COND_ELIF; 564 | } else if (!strcmp(STRING_BODY(tok->val.str), "endif")) { 565 | if (nest) { 566 | nest--; 567 | } else { 568 | expect_newline(ctx); 569 | return COND_ENDIF; 570 | } 571 | } 572 | skip_line(ctx); 573 | } 574 | } 575 | 576 | /*============================================================================== 577 | * C99 6.4.7 Header names 578 | * 579 | * #include directive needs special tokenize to read a token in <> or "". 580 | * 581 | * header-name: 582 | * < h-char-sequence > 583 | * " q-char-sequence " 584 | * h-char-sequence: 585 | * [^>\n]+ 586 | * q-char-sequence: 587 | * [^"\n]+ 588 | */ 589 | 590 | String *read_header_name(CppContext *ctx, bool *std) { 591 | skip_whitespace(ctx); 592 | char close; 593 | int c = readc(ctx->file); 594 | if (c == '"') { 595 | *std = false; 596 | close = '"'; 597 | } else if (c == '<') { 598 | *std = true; 599 | close = '>'; 600 | } else { 601 | unreadc(c, ctx->file); 602 | return NULL; 603 | } 604 | 605 | String *r = make_string(); 606 | for (;;) { 607 | c = readc(ctx->file); 608 | if (c == EOF || c == '\n') 609 | error_cpp_ctx(ctx, "premature end of header name"); 610 | if (c == close) 611 | break; 612 | o1(r, c); 613 | } 614 | if (STRING_LEN(r) == 0) 615 | error_cpp_ctx(ctx, "header name should not be empty"); 616 | o1(r, '\0'); 617 | return r; 618 | } 619 | 620 | /*============================================================================== 621 | * Public interfaces to be used by the preprocessor. 622 | */ 623 | 624 | /* 625 | * Returns iff the next token a next token is immediately preceded by 626 | * whitespace. Token is not consumed. 627 | * 628 | * This function will be used only when the preprocessor reads #define 629 | * directive, which is the only place where the parser needs to be aware whether 630 | * whitespace exists between tokens or not. For example, the following macro 631 | * FOO is function-like, that takes an argument named "x". 632 | * 633 | * #define FOO(x) ... 634 | * 635 | * On the other hand, macro BAR shown below is not function-like and will be 636 | * expanded to "(x) ...". 637 | * 638 | * #define BAR (x) ... 639 | */ 640 | bool is_next_space(CppContext *ctx) { 641 | Token *tok = read_cpp_token_int(ctx); 642 | unget_cpp_token(ctx, tok); 643 | return tok->toktype == TOKTYPE_SPACE; 644 | } 645 | 646 | /* 647 | * Returns the next token. 648 | */ 649 | Token *read_cpp_token(CppContext *ctx) { 650 | Token *tok = read_cpp_token_int(ctx); 651 | while (tok && tok->toktype == TOKTYPE_SPACE) { 652 | tok = read_cpp_token_int(ctx); 653 | if (tok) tok->space = true; 654 | } 655 | if (!tok && !LIST_IS_EMPTY(ctx->file_stack)) { 656 | close_file(ctx->file); 657 | ctx->file = list_pop(ctx->file_stack); 658 | return make_cpp_token(ctx, TOKTYPE_NEWLINE); 659 | } 660 | return tok; 661 | } 662 | -------------------------------------------------------------------------------- /list.c: -------------------------------------------------------------------------------- 1 | /* 2 | * list.c - array list implementation 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | static List *make_list_int(int size) { 11 | List *obj = malloc(sizeof(List)); 12 | obj->elems = malloc(sizeof(void*) * size); 13 | for (int i = 0; i < size; i++) 14 | obj->elems[i] = NULL; 15 | obj->nalloc = size; 16 | obj->len = 0; 17 | return obj; 18 | } 19 | 20 | List *make_list(void) { 21 | return make_list_int(LIST_INITIAL_SIZE); 22 | } 23 | 24 | List *make_list1(void *e0) { 25 | List *r = make_list(); 26 | list_push(r, e0); 27 | return r; 28 | } 29 | 30 | List *make_list2(void *e0, void *e1) { 31 | List *r = make_list(); 32 | list_push(r, e0); 33 | list_push(r, e1); 34 | return r; 35 | } 36 | 37 | List *make_listn(void *e0, ...) { 38 | List *r = make_list(); 39 | list_push(r, e0); 40 | 41 | va_list ap; 42 | va_start(ap, e0); 43 | for (;;) { 44 | void *e = va_arg(ap, void *); 45 | if (!e) break; 46 | list_push(r, e); 47 | } 48 | va_end(ap); 49 | 50 | return r; 51 | } 52 | 53 | static void ensure_room(List *list) { 54 | if (list->len < list->nalloc) return; 55 | int newsize = list->nalloc * 2; 56 | void **buf = malloc(sizeof(void*) * newsize); 57 | memcpy(buf, list->elems, sizeof(void*) * list->len); 58 | for (int i = list->len; i < newsize; i++) 59 | buf[i] = NULL; 60 | list->elems = buf; 61 | list->nalloc = newsize; 62 | } 63 | 64 | void list_push(List *list, void *e) { 65 | ensure_room(list); 66 | list->elems[list->len++] = e; 67 | } 68 | 69 | void *list_pop(List *list) { 70 | if (list->len == 0) 71 | error("list empty"); 72 | void *r = list->elems[--list->len]; 73 | list->elems[list->len] = NULL; 74 | return r; 75 | } 76 | 77 | void *list_unshift(List *list) { 78 | if (list->len == 0) 79 | panic("list empty"); 80 | void *r = list->elems[0]; 81 | for (int i = 1; i < list->len; i++) 82 | list->elems[i - 1] = list->elems[i]; 83 | list->len--; 84 | return r; 85 | } 86 | 87 | List *sublist(List *orig, int off) { 88 | List *r = malloc(sizeof(List)); 89 | r->elems = orig->elems + off; 90 | r->nalloc = orig->nalloc - off; 91 | r->len = orig->len - off; 92 | return r; 93 | } 94 | 95 | void list_append(List *a, List *b) { 96 | for (int i = 0; i < LIST_LEN(b); i++) 97 | list_push(a, LIST_REF(b, i)); 98 | } 99 | 100 | List *list_reverse(List *list) { 101 | List *r = make_list_int(list->nalloc); 102 | for (int i = LIST_LEN(list) - 1; i >= 0; i--) 103 | list_push(r, LIST_REF(list, i)); 104 | return r; 105 | } 106 | 107 | List *list_copy(List *list) { 108 | List *r = make_list_int(list->nalloc); 109 | for (int i = 0; i < LIST_LEN(list); i++) 110 | list_push(r, LIST_REF(list, i)); 111 | return r; 112 | } 113 | 114 | bool list_in(List *list, String *e) { 115 | for (int i = 0; i < LIST_LEN(list); i++) 116 | if (string_equal(LIST_REF(list, i), e)) 117 | return true; 118 | return false; 119 | } 120 | 121 | List *list_union(List *a, List *b) { 122 | if (LIST_IS_EMPTY(a)) return b; 123 | if (LIST_IS_EMPTY(b)) return a; 124 | List *r = list_copy(a); 125 | for (int i = 0; i < LIST_LEN(b); i++) 126 | if (!list_in(r, LIST_REF(b, i))) 127 | list_push(r, LIST_REF(b, i)); 128 | return r; 129 | } 130 | 131 | List *list_union1(List *list, String *e) { 132 | if (list_in(list, e)) 133 | return list; 134 | List *r = list_copy(list); 135 | list_push(r, e); 136 | return r; 137 | } 138 | 139 | List *list_intersect(List *a, List *b) { 140 | if (LIST_IS_EMPTY(a)) return a; 141 | if (LIST_IS_EMPTY(b)) return b; 142 | List *r = make_list(); 143 | for (int i = 0; i < LIST_LEN(a); i++) 144 | if (list_in(b, LIST_REF(a, i))) 145 | list_push(r, LIST_REF(a, i)); 146 | return r; 147 | } 148 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * main.c - main program 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | static void usage(void) { 11 | fprintf(stderr, 12 | "Usage: 8cc [ -d ] [ -run ] \n" 13 | " 8cc [ -E ] \n" 14 | " 8cc [ -d ] \n"); 15 | exit(-1); 16 | } 17 | 18 | int main(int argc, char **argv) { 19 | eightcc_init(); 20 | 21 | bool flag_cscript = false; 22 | bool flag_cpp_only = false; 23 | char *infile = NULL; 24 | char *outfile = NULL; 25 | 26 | for (int i = 1; i < argc; i++) { 27 | if (!strcmp(argv[i], "-run")) 28 | flag_cscript = true; 29 | else if (!strcmp(argv[i], "-d")) 30 | flag_debug = true; 31 | else if (!strcmp(argv[i], "-E")) 32 | flag_cpp_only = true; 33 | else if (argv[i][0] == '-' && argv[i][1] != '\0') 34 | usage(); 35 | else if (!infile) { 36 | infile = argv[i]; 37 | } 38 | else if (!outfile) 39 | outfile = argv[i]; 40 | else 41 | usage(); 42 | } 43 | 44 | if (!infile) 45 | usage(); 46 | if ((flag_cscript || flag_cpp_only) && outfile) 47 | usage(); 48 | if (!(flag_cscript || flag_cpp_only) && !outfile) 49 | usage(); 50 | 51 | File *in = open_file(infile); 52 | Elf *elf = new_elf(); 53 | 54 | if (flag_cpp_only) { 55 | cpp_write(make_cpp_context(in), stdout); 56 | return 0; 57 | } 58 | 59 | List *fns = parse(in, elf); 60 | assemble(elf, fns); 61 | 62 | if (flag_cscript) { 63 | run_main(elf, argc - 1, argv + 1); 64 | } else { 65 | FILE *out = fopen(outfile, "w"); 66 | write_elf(out, elf); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /run.c: -------------------------------------------------------------------------------- 1 | /* 2 | * run.c - compile and execute directly 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | /* 9 | * The functions in this file implements C script feature, that directly 10 | * executes compiled code without writing it out to a file. 11 | * 12 | * In order to call the main function in the compiled code, we have to do the 13 | * following steps: 14 | * 15 | * - compiles a given string as C source, 16 | * - maps the resulting ELF sections to mmap()'ed memory regions, 17 | * - relocate if needed, 18 | * - and call the main function in the memory regions. 19 | * 20 | * A special care need to be taken for reloation. In x86-64, RIP-relative CALL 21 | * instruction supports only 2^32 offset. That means you cannot jump beyond 22 | * 2^31 bytes backward or forward from a CALL instruction. It is very common 23 | * that library functions used by a program are far beyond the limit. We need 24 | * to use a jump table as a workaround. 25 | * 26 | * We allocate a jump table close to the memory region for the binary. Offsets 27 | * written for relocation refer a jump table entry. Machine code to jump to the 28 | * library functions are written to the jump table entry, so that CALL 29 | * instructions will jump to desired functions by indirection. 30 | */ 31 | 32 | #define _GNU_SOURCE 1 // for MAP_ANONYMOUS 33 | #include "8cc.h" 34 | #include 35 | #include 36 | 37 | typedef struct JumpTable { 38 | void *ptr; 39 | int off; 40 | } JumpTable; 41 | 42 | static void *allocate_memory(void *ptr, int size, bool exec) { 43 | int prot = PROT_READ | PROT_WRITE; 44 | prot |= exec ? PROT_EXEC : 0; 45 | int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT; 46 | void *r = mmap(NULL, size, prot, flags, 0, 0); 47 | if (r == MAP_FAILED) { 48 | perror("mmap failed"); 49 | exit(-1); 50 | } 51 | if (ptr) 52 | memcpy(r, ptr, size); 53 | return r; 54 | } 55 | 56 | static void allocate(Section *sect) { 57 | if (!(sect->flags & SHF_ALLOC)) 58 | return; 59 | void *ptr = STRING_BODY(sect->body); 60 | int size = STRING_LEN(sect->body); 61 | if (!size) 62 | return; 63 | bool exec = sect->flags & SHF_EXECINSTR; 64 | sect->memory_pos = allocate_memory(ptr, size, exec); 65 | } 66 | 67 | static JumpTable *allocate_jump_table(void) { 68 | JumpTable *r = malloc(sizeof(JumpTable)); 69 | r->ptr = allocate_memory(NULL, 4096, true); 70 | r->off = 0; 71 | return r; 72 | } 73 | 74 | static intptr add_jump(JumpTable *tab, u64 off) { 75 | intptr r = (intptr)tab->ptr + tab->off; 76 | 77 | // mov r11, off 78 | *((u8 *)tab->ptr + tab->off++) = 0x49; 79 | *((u8 *)tab->ptr + tab->off++) = 0xbb; 80 | for (int i = 0; i < 8; i++) 81 | *((u8 *)tab->ptr + tab->off++) = (off >> (i * 8)) & 0xff; 82 | 83 | // jmp r11 84 | *((u8 *)tab->ptr + tab->off++) = 0x41; 85 | *((u8 *)tab->ptr + tab->off++) = 0xff; 86 | *((u8 *)tab->ptr + tab->off++) = 0xe3; 87 | return r; 88 | } 89 | 90 | static void relocate(Elf *elf, Section *sect, JumpTable *tab) { 91 | if (!(sect->flags & SHF_ALLOC)) 92 | return; 93 | for (int i = 0; i < LIST_LEN(sect->rels); i++) { 94 | Reloc *rel = LIST_REF(sect->rels, i); 95 | if (rel->sym) { 96 | ASSERT(rel->type == R_X86_64_PC32); 97 | intptr loc = (intptr)dlsym(RTLD_DEFAULT, rel->sym); 98 | if (!loc) 99 | error("cannot resolve symbol '%s'", rel->sym); 100 | intptr loc1 = add_jump(tab, loc); 101 | ASSERT(loc1 < 0xffffffffUL); 102 | Symbol *sym = find_symbol(elf, rel->sym); 103 | u32 *ptr = (void *)((intptr)(sym->section->memory_pos) + rel->off); 104 | *ptr = loc1 - (intptr)ptr - 4; 105 | continue; 106 | } 107 | ASSERT(rel->type == R_X86_64_64); 108 | intptr loc = (intptr)(rel->section->memory_pos) + rel->addend; 109 | u64 *ptr = (void *)((intptr)(sect->memory_pos) + rel->off); 110 | *ptr = loc; 111 | } 112 | } 113 | 114 | static void release(Section *sect) { 115 | if (sect->flags & SHF_ALLOC) 116 | munmap(sect->memory_pos, STRING_LEN(sect->body)); 117 | } 118 | 119 | typedef int (*main_fn_type)(int argc, char **argv); 120 | 121 | static int call_main(main_fn_type main_fn, int argc, char **argv) { 122 | return main_fn(argc, argv); 123 | } 124 | 125 | int run_main(Elf *elf, int argc, char **argv) { 126 | for (int i = 0; i < LIST_LEN(elf->sections); i++) 127 | allocate((Section *)LIST_REF(elf->sections, i)); 128 | JumpTable *tab = allocate_jump_table(); 129 | for (int i = 0; i < LIST_LEN(elf->sections); i++) 130 | relocate(elf, (Section *)LIST_REF(elf->sections, i), tab); 131 | 132 | Symbol *sym = find_symbol(elf, "main"); 133 | main_fn_type main_fn = (void *)(((intptr)sym->section->memory_pos) + sym->value); 134 | int r = call_main(main_fn, argc, argv); 135 | 136 | for (int i = 0; i < LIST_LEN(elf->sections); i++) 137 | release(LIST_REF(elf->sections, i)); 138 | return r; 139 | } 140 | 141 | int run_string(char *code) { 142 | File *file = make_string_file(to_string(code)); 143 | Elf *elf = new_elf(); 144 | List *fns = parse(file, elf); 145 | assemble(elf, fns); 146 | return run_main(elf, 1, (char *[]){ "-" }); 147 | } 148 | -------------------------------------------------------------------------------- /sample/nqueen.c: -------------------------------------------------------------------------------- 1 | #define N 8 2 | #define N2 64 3 | 4 | print_board(int *board[N]) { 5 | for (int i = 0; i < N; i++) { 6 | for (int j = 0; j < N; j++) 7 | printf(board[i][j] ? "Q " : ". "); 8 | printf("\n"); 9 | } 10 | printf("\n\n"); 11 | } 12 | 13 | conflict(int *board[N], int row, int col) { 14 | for (int i = 0; i < row; i++) { 15 | if (board[i][col]) 16 | return 1; 17 | int j = row - i; 18 | if (0 <= (col - j) && board[i][col - j]) 19 | return 1; 20 | if ((col + j) < N && board[i][col + j]) 21 | return 1; 22 | } 23 | return 0; 24 | } 25 | 26 | solve(int *board[N], int row) { 27 | if (row == N) { 28 | print_board(board); 29 | return; 30 | } 31 | for (int i = 0; i < N; i++) { 32 | if (!conflict(board, row, i)) { 33 | board[row][i] = 1; 34 | solve(board, row + 1); 35 | board[row][i] = 0; 36 | } 37 | } 38 | } 39 | 40 | main() { 41 | int board[N2]; 42 | for (int i = 0; i < N2; i++) 43 | board[i] = 0; 44 | solve(board, 0); 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /string.c: -------------------------------------------------------------------------------- 1 | /* 2 | * string.c - byte string implementation 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | #include "8cc.h" 9 | 10 | static String *make_string_int(int size) { 11 | String *obj = malloc(sizeof(String)); 12 | obj->buf = malloc(size); 13 | obj->nalloc = size; 14 | obj->len = 0; 15 | obj->pos = 0; 16 | return obj; 17 | } 18 | 19 | String *make_string(void) { 20 | return make_string_int(STRING_INITIAL_SIZE); 21 | } 22 | 23 | String *to_string(char *str) { 24 | int size = STRING_INITIAL_SIZE; 25 | int needed = strlen(str) + 1; 26 | if (size < needed) size <<= 1; 27 | String *r = make_string_int(size); 28 | string_append(r, str); 29 | return r; 30 | } 31 | 32 | String *string_copy(String *b) { 33 | String *r = malloc(sizeof(String)); 34 | *r = *b; 35 | r->buf = malloc(r->nalloc); 36 | memcpy(r->buf, b->buf, r->len); 37 | return r; 38 | } 39 | 40 | bool string_equal(String *a, String *b) { 41 | return !strcmp(STRING_BODY(a), STRING_BODY(b)); 42 | } 43 | 44 | void string_append(String *b, char *p) { 45 | out(b, p, strlen(p) + 1); 46 | string_seek(b, b->len - 1); 47 | } 48 | 49 | String *string_prepend(String *b, char *p) { 50 | String *r = make_string(); 51 | string_append(r, p); 52 | string_append(r, STRING_BODY(b)); 53 | return r; 54 | } 55 | 56 | static void ensure_room(String *b, long room) { 57 | if (b->nalloc >= (b->pos + room)) 58 | return; 59 | long newsize = b->nalloc * 2; 60 | char *buf = malloc(newsize); 61 | memcpy(buf, b->buf, b->len); 62 | b->buf = buf; 63 | b->nalloc = newsize; 64 | } 65 | 66 | void o1(String *b, int byte) { 67 | ensure_room(b, 1); 68 | b->buf[b->pos++] = byte; 69 | if (b->len < b->pos) 70 | b->len = b->pos; 71 | } 72 | 73 | void out(String *b, void *data, size_t size) { 74 | ensure_room(b, size); 75 | for (int i = 0; i < size; i++) 76 | o1(b, ((char*)data)[i]); 77 | } 78 | 79 | void ostr(String *b, char *str) { 80 | out(b, str, strlen(str) + 1); 81 | } 82 | 83 | void o2(String *b, u16 data) { 84 | out(b, &data, 2); 85 | } 86 | 87 | void o3(String *b, u32 data) { 88 | out(b, &data, 3); 89 | } 90 | 91 | void o4(String *b, u32 data) { 92 | out(b, &data, 4); 93 | } 94 | 95 | void o8(String *b, u64 data) { 96 | out(b, &data, 8); 97 | } 98 | 99 | void align(String *b, int n) { 100 | int pad = n - b->len % n; 101 | for (int i = 0; i < pad; i++) 102 | o1(b, 0); 103 | } 104 | 105 | void string_seek(String *b, int pos) { 106 | if (pos > b->len) 107 | error("can't seek beyond the string boundary"); 108 | b->pos = pos; 109 | } 110 | 111 | void string_vprintf(String *b, char *format, va_list ap) { 112 | char buf[256]; 113 | int required = vsnprintf(buf, sizeof(buf), format, ap); 114 | if (required < sizeof(buf)) { 115 | string_append(b, buf); 116 | return; 117 | } 118 | char *p = malloc(required + 1); 119 | vsnprintf(p, required + 1, format, ap); 120 | string_append(b, p); 121 | } 122 | 123 | void string_printf(String *b, char *format, ...) { 124 | va_list ap; 125 | va_start(ap, format); 126 | string_vprintf(b, format, ap); 127 | va_end(ap); 128 | } 129 | 130 | String *make_string_printf(char *format, ...) { 131 | va_list ap; 132 | va_start(ap, format); 133 | String *b = make_string(); 134 | string_vprintf(b, format, ap); 135 | va_end(ap); 136 | return b; 137 | } 138 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | CC=gcc 3 | CFLAGS=-std=c99 -Wall -g 4 | LDFLAGS=-ldl -rdynamic 5 | OBJS=string.o list.o gen.o lex.o cpp.o parse.o file.o dict.o error.o elf.o run.o init.o decl.o 6 | HEADERS=../8cc.h unittest.h 7 | TESTS=string alltests 8 | 9 | test: alltests 10 | @./alltests 11 | 12 | $(OBJS): %.o: %.c ../%.c $(HEADERS) 13 | 14 | alltests: $(OBJS) main.o 15 | $(CC) -Wall $(CFLAGS) $(LDFLAGS) -o $@ $^ 16 | 17 | clean: 18 | -rm -f *.o alltests .tmpTEST* 19 | 20 | .PHONEY: clean test 21 | -------------------------------------------------------------------------------- /test/cpp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../cpp.c" 8 | 9 | static void set_date_time(CppContext *ctx) { 10 | struct tm *tm = malloc(sizeof(struct tm)); 11 | tm->tm_sec = 2; // seconds 12 | tm->tm_min = 55; // minutes 13 | tm->tm_hour = 17; // hours 14 | tm->tm_mday = 5; // day of the month 15 | tm->tm_mon = 0; // month 16 | tm->tm_year = 80; // year 17 | tm->tm_wday = 0; // day of the week 18 | tm->tm_yday = 5; // day in the year 19 | tm->tm_isdst = 0; // daylight saving time 20 | ctx->tm = tm; 21 | } 22 | 23 | static ReadContext *make_test_read_context(char *str) { 24 | File *file = mkfile(str); 25 | Elf *elf = new_elf(); 26 | CppContext *cppctx = make_cpp_context(file); 27 | set_date_time(cppctx); 28 | return make_read_context(file, elf, cppctx); 29 | } 30 | 31 | static CppContext *make_test_cpp_context(char *str) { 32 | CppContext *ctx = make_cpp_context(mkfile(str)); 33 | set_date_time(ctx); 34 | return ctx; 35 | } 36 | 37 | static List *parse_string(char *str) { 38 | ReadContext *ctx = make_test_read_context(str); 39 | List *expanded = make_list(); 40 | for (Token *tok = read_token(ctx); tok; tok = read_token(ctx)) 41 | list_push(expanded, tok); 42 | return expanded; 43 | } 44 | 45 | static bool list_equal(List *list0, List *list1) { 46 | if (LIST_LEN(list0) != LIST_LEN(list1)) 47 | return false; 48 | for (int i = 0; i < LIST_LEN(list0); i++) { 49 | Token *t0 = LIST_REF(list0, i); 50 | Token *t1 = LIST_REF(list1, i); 51 | if (t0->toktype != t1->toktype) 52 | return false; 53 | switch (t0->toktype) { 54 | case TOKTYPE_IDENT: 55 | case TOKTYPE_CPPNUM: 56 | case TOKTYPE_STRING: 57 | if (!string_equal(t0->val.str, t1->val.str)) 58 | return false; 59 | break; 60 | case TOKTYPE_KEYWORD: 61 | case TOKTYPE_CHAR: 62 | case TOKTYPE_INT: 63 | case TOKTYPE_PUNCT: 64 | if (t0->val.i != t1->val.i) 65 | return false; 66 | break; 67 | case TOKTYPE_FLOAT: 68 | if (t0->val.f != t1->val.f) 69 | return false; 70 | break; 71 | default: 72 | panic("invalid token: '%d'", t0->toktype); 73 | } 74 | } 75 | return true; 76 | } 77 | 78 | static char *token_list_to_string(List *list) { 79 | String *b = make_string(); 80 | for (int i = 0; i < LIST_LEN(list); i++) { 81 | if (STRING_LEN(b)) 82 | o1(b, ' '); 83 | Token *t = LIST_REF(list, i); 84 | string_printf(b, "%s", token_to_string(t)); 85 | } 86 | return STRING_BODY(b); 87 | } 88 | 89 | static void test(char *expect, char *input) { 90 | List *list0 = parse_string(expect); 91 | List *list1 = parse_string(input); 92 | if (!list_equal(list0, list1)) 93 | error("'%s' expected, but got '%s'\n", expect, token_list_to_string(list1)); 94 | } 95 | 96 | /* 97 | * #define 98 | */ 99 | TEST(cpp_define) { 100 | test("\"%d\" 3", 101 | "#define MSG \"%d\"\n" 102 | " # define NUM 3 \n" 103 | " MSG NUM"); 104 | 105 | test("3", 106 | "#define FOO(x) x\n" 107 | "FOO(3)"); 108 | 109 | test("3", 110 | "#define FOO() 3\n" 111 | "FOO()"); 112 | 113 | test("3", 114 | "#define FOO(x) 3\n" 115 | "FOO()"); 116 | 117 | test("3", 118 | "#define FOO(x) 3\n" 119 | "FOO(bar)"); 120 | 121 | test("int x=5; (x);", 122 | "#define FOO (x)\n" 123 | "int x=5; FOO;"); 124 | 125 | test("3, 4", 126 | "#define FOO(x, y) x, y\n" 127 | "FOO(3, 4)"); 128 | } 129 | 130 | /* 131 | * Recursively expanded macros. 132 | */ 133 | TEST(cpp_recursive_macro) { 134 | test("\"ok\"", 135 | "#define FOO() \"ok\"\n" 136 | "#define BAR FOO()\n" 137 | "BAR"); 138 | 139 | test("\"ok\"", 140 | "#define FOO() \"ok\"\n" 141 | "#define BAR() FOO()\n" 142 | "BAR()"); 143 | 144 | // Examples in 6.10.3.5 145 | test("\"vers2\"", 146 | "#define str(s) # s\n" 147 | "#define xstr(s) str(s)\n" 148 | "#define INCFILE(n) vers ## n\n" 149 | "xstr(INCFILE(2))"); 150 | 151 | test("int foo=1; foo, 3", 152 | "int foo = 1;\n" 153 | "#define foo foo, 3\n" 154 | "foo"); 155 | 156 | test("\"hello\", \"hello\" \", world\"", 157 | "#define glue(a, b) a ## b\n" 158 | "#define xglue(a, b) glue(a, b)\n" 159 | "#define HIGHLOW \"hello\"\n" 160 | "#define LOW LOW \", world\"\n" 161 | "glue(HIGH, LOW), xglue(HIGH, LOW)"); 162 | 163 | test("123 45 67 89 10 11 12", 164 | "#define t(x,y,z) x ## y ## z\n" 165 | "t(1,2,3) t(,4,5) t(6,,7) t(8,9,) t(10,,) t(,11,) t(,,12) t(,,)"); 166 | 167 | test("(1-1)", 168 | "#define OBJ_LIKE (1-1)\n" 169 | "OBJ_LIKE"); 170 | 171 | test("(1-1)", 172 | "#define OBJ_LIKE /* white space */ (1-1) /* other */\n" 173 | "OBJ_LIKE"); 174 | 175 | test("(7)", 176 | "#define FUNC_LIKE(a) ( a )\n" 177 | "FUNC_LIKE(7)"); 178 | 179 | test("(7)", 180 | "#define FUNC_LIKE( a )( /* note the white space */ \\\n" 181 | "a /* other stuff on this line\n" 182 | "*/ )\n" 183 | "FUNC_LIKE(7)"); 184 | 185 | // More tests 186 | test("int A=1; 1+A", 187 | "int A=1;\n" 188 | "#define A 1+A\n" 189 | "#define B(x) x\n" 190 | "B(A)"); 191 | } 192 | 193 | /* 194 | * Variable argument list. 195 | */ 196 | TEST(cpp_va_args) { 197 | test("foo, bar, baz", 198 | "#define p(...) foo, __VA_ARGS__\n" 199 | "p(bar, baz)"); 200 | } 201 | 202 | /* 203 | * # operator 204 | */ 205 | TEST(cpp_sharp) { 206 | test("\"abc\"", 207 | "#define STR(x) #x\n" 208 | "STR(abc)"); 209 | 210 | test("1, \"abc\"", 211 | "#define STR(x, y) x, #y\n" 212 | "STR(1, abc)"); 213 | 214 | test("\"123\"", 215 | "#define STR(x) #x\n" 216 | "STR(123)"); 217 | 218 | test("\"123 abc\"", 219 | "#define STR(x) #x\n" 220 | "STR(123 abc)"); 221 | 222 | test("\"'0' '\\\\xfe'\"", 223 | "#define STR(x) #x\n" 224 | "STR('0' '\\xfe')"); 225 | 226 | test("\"\\\"a\\\\\\\"b\\\"\"", 227 | "#define STR(x) #x\n" 228 | "STR(\"a\\\"b\")"); 229 | 230 | test("\"()\"", 231 | "#define C(x) #x\n" 232 | "C(())"); 233 | 234 | test("4.4", 235 | "#define CAT(x, y) x ## y\n" 236 | "CAT(4, .4)"); 237 | } 238 | 239 | /* 240 | * ## operator 241 | */ 242 | TEST(cpp_twosharps) { 243 | test("ab", 244 | "#define CONC(x, y) x ## y\n" 245 | "CONC(a, b)"); 246 | 247 | /* 248 | * A tricky function-like macro in C99 spec. 249 | */ 250 | test("\"x ## y\"", 251 | "#define hash_hash # ## #\n" 252 | "#define mkstr(a) # a\n" 253 | "#define in_between(a) mkstr(a)\n" 254 | "#define join(c, d) in_between(c hash_hash d)\n" 255 | "join(x, y)"); 256 | } 257 | 258 | /* 259 | * #undef 260 | */ 261 | TEST(cpp_undef) { 262 | test("X", 263 | "#define X 17\n" 264 | "#undef X\n" 265 | "X"); 266 | 267 | // It is not an error if undef's argument is not defined 268 | test("X", 269 | "#undef X\n" 270 | "X"); 271 | } 272 | 273 | /* 274 | * #if, #elif, #else and #endif 275 | */ 276 | TEST(cpp_conditional_include) { 277 | test("a", 278 | "#define X\n" 279 | "#if defined(X)\n" 280 | "a\n" 281 | "#endif"); 282 | 283 | test("\"a\"", 284 | "#define X\n" 285 | "#if defined(X)\n" 286 | "\"a\"\n" 287 | "#else\n" 288 | "\"b\"\n" 289 | "#endif"); 290 | 291 | test("b", 292 | "#if defined(X)\n" 293 | "a\n" 294 | "#endif\n" 295 | "b"); 296 | 297 | test("'b'", 298 | "#if defined(X)\n" 299 | "'a'\n" 300 | "#else\n" 301 | "'b'\n" 302 | "#endif"); 303 | 304 | test("b", 305 | "#if defined(X)\n" 306 | "# if foo\n" 307 | "# else\n" 308 | "# endif\n" 309 | "# if bar\n" 310 | "# endif\n" 311 | "a\n" 312 | "#else\n" 313 | "b\n" 314 | "#endif"); 315 | 316 | test("b", 317 | "#if defined(X)\n" 318 | "# ifdef\n" 319 | "# endif\n" 320 | "# ifndef\n" 321 | "# endif\n" 322 | "a\n" 323 | "#else\n" 324 | "b\n" 325 | "#endif"); 326 | 327 | test("b", 328 | "#ifdef X\n" 329 | " # ifdef\n" 330 | "# endif\n" 331 | "# ifndef\n" 332 | "# endif\n" 333 | "a\n" 334 | "#else\n" 335 | "b\n" 336 | "#endif"); 337 | 338 | test("a", 339 | "#ifndef X\n" 340 | "a\n" 341 | "#else\n" 342 | "b\n" 343 | "#endif"); 344 | 345 | test("\"c\"", 346 | "#if defined(X)\n" 347 | "a\n" 348 | "#elif defined(Y)\n" 349 | "b\n" 350 | "#else\n" 351 | "\"c\"\n" 352 | "#endif"); 353 | } 354 | 355 | /* 356 | * Integer constant expression 357 | */ 358 | TEST(cpp_constant_expr) { 359 | test("a", 360 | "#if 1 + 2\n" 361 | "a\n" 362 | "#endif"); 363 | test("b", 364 | "#if 1 - 1\n" 365 | "a\n" 366 | "#else\n" 367 | "b\n" 368 | "#endif"); 369 | test("a", 370 | "#if 2 / 1\n" 371 | "a\n" 372 | "#else\n" 373 | "b\n" 374 | "#endif"); 375 | } 376 | 377 | /* 378 | * Null directive 379 | */ 380 | TEST(cpp_null_directive) { 381 | test("X", 382 | "#\n" 383 | "X"); 384 | } 385 | 386 | /* 387 | * #line 388 | */ 389 | TEST(cpp_line_directive) { 390 | test("50", 391 | "#line 50\n" 392 | "__LINE__"); 393 | 394 | test("50 \"foo\"", 395 | "#line 50 \"foo\"\n" 396 | "__LINE__ __FILE__"); 397 | } 398 | 399 | /* 400 | * Predefined macros 401 | */ 402 | TEST(cpp_predefined_macros) { 403 | test("1", "__8CC__"); 404 | test("\"Jan 05 1980\"", "__DATE__"); 405 | test("\"17:55:02\"", "__TIME__"); 406 | test("\"-\"", "__FILE__"); 407 | test("1", "__LINE__"); 408 | test("1", "__STDC__"); 409 | test("1", "__STDC_HOSTED__"); 410 | test("199901", "__STDC_VERSION__"); 411 | } 412 | 413 | /* 414 | * Bigraphs 415 | */ 416 | TEST(cpp_bigraph) { 417 | test("[ ] { } # ##;", 418 | "<: :> <% %> %: %:%:;"); 419 | } 420 | 421 | /* 422 | * #include 423 | */ 424 | TEST(cpp_include) { 425 | String *name; 426 | bool std; 427 | 428 | CppContext *ctx = make_test_cpp_context(""); 429 | read_cpp_header_name(ctx, &name, &std); 430 | EQ_STR("foo", STRING_BODY(name)); 431 | EQ(std, true); 432 | 433 | ctx = make_test_cpp_context("\"bar\""); 434 | read_cpp_header_name(ctx, &name, &std); 435 | EQ_STR("bar", STRING_BODY(name)); 436 | EQ(std, false); 437 | } 438 | 439 | /* 440 | * #pragma and _Pragma() 441 | */ 442 | TEST(pragma) { 443 | Exception *e = make_exception(); 444 | if (TRY(e)) 445 | parse_string("#pragma foo"); 446 | CONTAINS("No pragmas supported", STRING_BODY(e->msg)); 447 | 448 | if (TRY(e)) 449 | parse_string("_Pragma(\"foo\")"); 450 | CONTAINS("No pragmas supported", STRING_BODY(e->msg)); 451 | } 452 | 453 | 454 | TEST(cpp_include_buffered) { 455 | String *name; 456 | bool std; 457 | 458 | CppContext *ctx = make_test_cpp_context(""); 459 | peek_cpp_token(ctx); 460 | read_cpp_header_name(ctx, &name, &std); 461 | EQ_STR("foo", STRING_BODY(name)); 462 | EQ(std, true); 463 | 464 | ctx = make_test_cpp_context("\"bar\""); 465 | peek_cpp_token(ctx); 466 | read_cpp_header_name(ctx, &name, &std); 467 | EQ_STR("bar", STRING_BODY(name)); 468 | EQ(std, false); 469 | } 470 | 471 | TEST(cpp_open_header) { 472 | CppContext *ctx = make_test_cpp_context(""); 473 | List *paths = make_list(); 474 | list_push(paths, to_string("/")); 475 | list_push(paths, to_string("/dev")); 476 | list_push(paths, to_string("")); 477 | 478 | File *file = open_header(ctx, to_string("null"), paths); 479 | EQ_STR("/dev/null", STRING_BODY(file->filename)); 480 | } 481 | -------------------------------------------------------------------------------- /test/decl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../decl.c" 7 | -------------------------------------------------------------------------------- /test/dict.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../dict.c" 8 | 9 | /* 10 | * Dictionary 11 | */ 12 | 13 | TEST(dict) { 14 | Dict *dict = make_string_dict(); 15 | EQ(0, dict->nelem); 16 | String *k = to_string("abc"); 17 | dict_put(dict, k, (void *)-1); 18 | EQ(1, dict->nelem); 19 | EQ(-1, (long) dict_get(dict, k)); 20 | EQ(NULL, dict_get(dict, to_string("nonexistent"))); 21 | 22 | // test rehashing 23 | for (int i = 0; i < DICT_INITIAL_SIZE * 2; i++) { 24 | String *b = make_string_printf("key%d", i); 25 | dict_put(dict, b, (void *)(long)i); 26 | } 27 | EQ(1 + DICT_INITIAL_SIZE * 2, dict->nelem); 28 | for (int i = 0; i < DICT_INITIAL_SIZE * 2; i++) { 29 | String *b = make_string_printf("key%d", i); 30 | EQ(i, (int)(long)dict_get(dict, b)); 31 | } 32 | 33 | // Store duplicate key 34 | dict_put(dict, k, (void *)-2); 35 | EQ(-2, (long) dict_get(dict, k)); 36 | EQ(1 + DICT_INITIAL_SIZE * 2, dict->nelem); 37 | 38 | // Removal 39 | bool existed = dict_delete(dict, k); 40 | EQ(DICT_INITIAL_SIZE * 2, dict->nelem); 41 | EQ(NULL, dict_get(dict, k)); 42 | EQ(true, existed); 43 | existed = dict_delete(dict, k); 44 | EQ(false, existed); 45 | 46 | // Address dictionary 47 | dict = make_address_dict(); 48 | k = to_string("abc"); 49 | dict_put(dict, k, (void *)-1); 50 | EQ(-1, (long) dict_get(dict, k)); 51 | EQ(NULL, dict_get(dict, to_string("abc"))); 52 | } 53 | 54 | TEST(test_dict_iter) { 55 | Dict *dict = make_string_dict(); 56 | DictIter *iter = make_dict_iter(dict); 57 | EQ(NULL, dict_iter_next(iter)); 58 | 59 | dict_put(dict, to_string("key"), (void *)-1); 60 | iter = make_dict_iter(dict); 61 | void **p = dict_iter_next(iter); 62 | EQ_STR(STRING_BODY((String *)to_string("key")), STRING_BODY((String *)p[0])); 63 | EQ(-1, (intptr)p[1]); 64 | EQ(NULL, dict_iter_next(iter)); 65 | } 66 | 67 | 68 | -------------------------------------------------------------------------------- /test/elf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../elf.c" 7 | -------------------------------------------------------------------------------- /test/error.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../error.c" 8 | 9 | TEST(error) { 10 | Exception *e = make_exception(); 11 | if (TRY(e)) 12 | error("foo"); 13 | EQ_STR("ERROR: foo", STRING_BODY(e->msg)); 14 | } 15 | -------------------------------------------------------------------------------- /test/file.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../file.c" 8 | 9 | static FILE *create_file(char *content) { 10 | char tmpl[] = "tmpXXXXXX"; 11 | int fd = mkstemp(tmpl); 12 | if (fd < 0) { 13 | perror("fd: "); 14 | exit(-1); 15 | } 16 | unlink(tmpl); 17 | write(fd, content, strlen(content)); 18 | lseek(fd, 0, SEEK_SET); 19 | return fdopen(fd, "r"); 20 | } 21 | 22 | /* 23 | * File IO 24 | */ 25 | 26 | TEST(stdio_file) { 27 | FILE *stream = create_file("a\n"); 28 | File *file = make_file(stream, "foo"); 29 | EQ_STR(STRING_BODY(file->filename), "foo"); 30 | EQ_CHAR('a', readc(file)); 31 | } 32 | 33 | TEST(string_file) { 34 | File *file = mkfile("a\n"); 35 | EQ_CHAR('a', readc(file)); 36 | } 37 | 38 | TEST(file_unreadc) { 39 | File *file = mkfile("a\n"); 40 | EQ(1, file->line); 41 | EQ(1, file->column); 42 | EQ_CHAR('a', readc(file)); 43 | EQ(2, file->column); 44 | EQ_CHAR('\n', readc(file)); 45 | EQ(2, file->line); 46 | EQ(1, file->column); 47 | unreadc('\n', file); 48 | EQ(1, file->line); 49 | EQ(2, file->column); 50 | EQ_CHAR('\n', readc(file)); 51 | EQ(2, file->line); 52 | EQ(1, file->column); 53 | } 54 | 55 | TEST(file_next_char_is) { 56 | File *file = mkfile("ab"); 57 | EQ(false, next_char_is(file, 'b')); 58 | EQ(false, next_char_is(file, 'b')); 59 | EQ(true, next_char_is(file, 'a')); 60 | EQ(false, next_char_is(file, 'a')); 61 | EQ(true, next_char_is(file, 'b')); 62 | } 63 | 64 | TEST(file_simple) { 65 | File *file = mkfile("ab\nc\r\r\nd\r"); 66 | 67 | EQ(1, file->line); 68 | EQ(1, file->column); 69 | EQ_CHAR('a', readc(file)); 70 | EQ_CHAR('b', readc(file)); 71 | EQ(3, file->column); 72 | EQ_CHAR('\n', readc(file)); 73 | EQ(2, file->line); 74 | EQ(1, file->column); 75 | EQ_CHAR('c', readc(file)); 76 | EQ_CHAR('\n', readc(file)); 77 | EQ(3, file->line); 78 | EQ_CHAR('\n', readc(file)); 79 | EQ(4, file->line); 80 | EQ_CHAR('d', readc(file)); 81 | EQ_CHAR('\n', readc(file)); 82 | EQ(5, file->line); 83 | EQ_CHAR(EOF, readc(file)); 84 | } 85 | 86 | TEST(file_backslash_at_eol) { 87 | File *file = mkfile("2\\\n0\\\r\n10"); 88 | 89 | EQ(1, file->line); 90 | EQ(1, file->column); 91 | EQ_CHAR('2', readc(file)); 92 | EQ_CHAR('0', readc(file)); 93 | EQ_CHAR('1', readc(file)); 94 | EQ_CHAR('0', readc(file)); 95 | EQ(3, file->line); 96 | EQ(3, file->column); 97 | } 98 | -------------------------------------------------------------------------------- /test/gen.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../gen.c" 7 | -------------------------------------------------------------------------------- /test/init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../init.c" 7 | -------------------------------------------------------------------------------- /test/lex.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../lex.c" 7 | -------------------------------------------------------------------------------- /test/list.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../list.c" 8 | 9 | /* 10 | * List 11 | */ 12 | 13 | TEST(list) { 14 | List *list = make_list(); 15 | EQ(0, LIST_LEN(list)); 16 | 17 | list_push(list, (void *)17); 18 | list_push(list, (void *)42); 19 | EQ(2, (intptr)LIST_LEN(list)); 20 | EQ(17, (intptr)LIST_REF(list, 0)); 21 | EQ(42, (intptr)LIST_REF(list, 1)); 22 | EQ(42, (intptr)list_pop(list)); 23 | EQ(17, (intptr)list_pop(list)); 24 | EQ(0, LIST_LEN(list)); 25 | 26 | list_push(list, (void *)17); 27 | list_push(list, (void *)42); 28 | EQ(17, (intptr)list_unshift(list)); 29 | EQ(42, (intptr)list_unshift(list)); 30 | 31 | // list_reverse() 32 | List *list1 = make_list(); 33 | list_push(list1, (void *)17); 34 | list_push(list1, (void *)42); 35 | List *rev = list_reverse(list1); 36 | EQ(42, (intptr)LIST_REF(rev, 0)); 37 | EQ(17, (intptr)LIST_REF(rev, 1)); 38 | } 39 | 40 | TEST(list_as_set) { 41 | List *a = make_list(); 42 | List *b = make_list(); 43 | list_push(a, to_string("abc")); 44 | list_push(a, to_string("def")); 45 | list_push(b, to_string("abc")); 46 | list_push(b, to_string("XYZ")); 47 | 48 | List *uni = list_union(a, b); 49 | EQ(3, LIST_LEN(uni)); 50 | EQ(true, list_in(uni, to_string("abc"))); 51 | EQ(true, list_in(uni, to_string("def"))); 52 | EQ(true, list_in(uni, to_string("XYZ"))); 53 | 54 | List *uni1 = list_union(a, make_list()); 55 | EQ(2, LIST_LEN(uni1)); 56 | List *uni2 = list_union(make_list(), a); 57 | EQ(2, LIST_LEN(uni2)); 58 | 59 | List *uni3 = list_union1(a, to_string("123")); 60 | EQ(3, LIST_LEN(uni3)); 61 | EQ(true, list_in(uni3, to_string("123"))); 62 | 63 | List *uni4 = list_union1(a, to_string("abc")); 64 | EQ(2, LIST_LEN(uni4)); 65 | 66 | List *intersect = list_intersect(a, b); 67 | EQ(1, LIST_LEN(intersect)); 68 | EQ(true, list_in(uni, to_string("abc"))); 69 | 70 | List *intersect1 = list_intersect(a, make_list()); 71 | EQ(0, LIST_LEN(intersect1)); 72 | List *intersect2 = list_intersect(make_list(), a); 73 | EQ(0, LIST_LEN(intersect2)); 74 | } 75 | 76 | -------------------------------------------------------------------------------- /test/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include 8 | #include 9 | #include 10 | 11 | List* test_funcs; 12 | 13 | static bool run_in_memory; 14 | 15 | void eq_str(int line, char *expected, char *got) { 16 | if (strcmp(expected, got)) 17 | error("line %d: \"%s\" expected, but got \"%s\"", line, expected, got); 18 | } 19 | 20 | void eq_str1(int line, char *expected, char *got, char *msg) { 21 | if (strcmp(expected, got)) 22 | error("line %d: \"%s\" expected, but got \"%s\"\n %s", line, expected, got, msg); 23 | } 24 | 25 | void eq_string(int line, char *expected, String *got) { 26 | eq_str(line, expected, STRING_BODY(got)); 27 | } 28 | 29 | void eq_char(int line, int expected, int got) { 30 | if (expected != got) 31 | error("line %d: '%c' expected, but got '%c'", line, expected, got); 32 | } 33 | 34 | void contains(int line, char *expected, char *got) { 35 | if (!strstr(got, expected)) 36 | error("line %d: '%s' expected, but got '%s'", line, expected, got); 37 | } 38 | 39 | /* 40 | * Parser 41 | */ 42 | 43 | File *mkfile(char *str) { 44 | return make_string_file(to_string(str)); 45 | } 46 | 47 | ReadContext *mkctx(char *str) { 48 | File *file = mkfile(str); 49 | return make_read_context(file, NULL, make_cpp_context(file)); 50 | } 51 | 52 | /* 53 | * Compile and run 54 | */ 55 | 56 | static void run_command(char *command, ...) { 57 | char *args[10]; 58 | int i = 0; 59 | va_list ap; 60 | va_start(ap, command); 61 | args[i++] = command; 62 | char *arg; 63 | do { 64 | arg = va_arg(ap, char *); 65 | args[i++] = arg; 66 | } while (arg); 67 | 68 | pid_t pid; 69 | int status; 70 | if ( (pid = fork()) ) 71 | do { 72 | if (waitpid(pid, &status, 0) < 0) { 73 | perror("waitpid failed:"); 74 | exit(-1); 75 | } 76 | } while (!WIFEXITED(status)); 77 | else 78 | execvp(command, args); 79 | if (WEXITSTATUS(status)) 80 | error("'%s' failed", command); 81 | } 82 | 83 | static String *read_fd(int fd) { 84 | char buf[512]; 85 | String *b = make_string(); 86 | for (;;) { 87 | int nread = read(fd, buf, sizeof(buf)); 88 | if (nread == 0) break; 89 | out(b, buf, nread); 90 | } 91 | o1(b, '\0'); 92 | close(fd); 93 | return b; 94 | } 95 | 96 | static int wait_child(pid_t pid) { 97 | int status; 98 | do { 99 | if (waitpid(pid, &status, 0) < 0) { 100 | perror("waitpid failed:"); 101 | exit(-1); 102 | } 103 | } while (!WIFEXITED(status)); 104 | return WEXITSTATUS(status); 105 | } 106 | 107 | static String *run_command_string(char *command) { 108 | int pipefd[2]; 109 | pid_t pid; 110 | pipe(pipefd); 111 | if ( (pid = fork()) ) { 112 | close(pipefd[1]); 113 | String *b = read_fd(pipefd[0]); 114 | if (wait_child(pid)) 115 | error("'%s' failed", command); 116 | return b; 117 | } else { 118 | close(pipefd[0]); 119 | dup2(pipefd[1], 1); 120 | close(pipefd[1]); 121 | execlp(command, command, (char *)NULL); 122 | return NULL; // dummy 123 | } 124 | } 125 | 126 | static void run_fast_test(char *expected, char *input) { 127 | pid_t pid; 128 | int pipefd[2]; 129 | pipe(pipefd); 130 | if ( (pid = fork()) ) { 131 | close(pipefd[1]); 132 | String *b = read_fd(pipefd[0]); 133 | if (wait_child(pid)) 134 | error("'%s' failed", input); 135 | EQ_STR1(expected, STRING_BODY(b), input); 136 | } else { 137 | close(pipefd[0]); 138 | dup2(pipefd[1], 1); 139 | close(pipefd[1]); 140 | exit(run_string(input)); 141 | } 142 | } 143 | 144 | static void test(char *expected, char *input) { 145 | if (run_in_memory) { 146 | run_fast_test(expected, input); 147 | return; 148 | } 149 | 150 | char source[] = ".tmpTEST-src-XXXXXX"; 151 | int fd = mkstemp(source); 152 | FILE *file = fdopen(fd, "w"); 153 | fwrite(input, 1, strlen(input), file); 154 | fclose(file); 155 | 156 | char object[] = ".tmpTEST-obj-XXXXXX"; 157 | fd = mkstemp(object); 158 | close(fd); 159 | 160 | char exec[] = "./.tmpTEST-exec-XXXXXX"; 161 | fd = mkstemp(exec); 162 | close(fd); 163 | 164 | struct stat statbuf; 165 | if (stat("../8cc", &statbuf) == 0) 166 | run_command("../8cc", source, object, (char *)NULL); 167 | else if (stat("./8cc", &statbuf) == 0) 168 | run_command("./8cc", source, object, (char *)NULL); 169 | else 170 | panic("8cc not found"); 171 | unlink(source); 172 | run_command("gcc", "-o", exec, object, (char *)NULL); 173 | unlink(object); 174 | String *s = run_command_string(exec); 175 | unlink(exec); 176 | EQ_STR1(expected, STRING_BODY(s), input); 177 | } 178 | 179 | /*============================================================================== 180 | * Various tests 181 | */ 182 | 183 | /* 184 | * Basic tests 185 | */ 186 | TEST(basic_types) { 187 | test("", "main(){-1;}"); 188 | test("Hello, world!", "main(){printf(\"Hello, world!\");}"); 189 | test("Hello, world!", "main(){printf(\"Hello, %s\", \"world!\");}"); 190 | test("3", "main(){int i=3; printf(\"%d\", i);}"); 191 | test("6", "main(){int i=3; int j=0; j=i+3; printf(\"%d\", j);}"); 192 | test("50", "main(){int i=atoi(\"50\"); int j = i; printf(\"%d\", j);}"); 193 | test("15", "main(){int i=3; int j=i+5+7; printf(\"%d\", j);}"); 194 | test("-5", "main(){int i=3; int j=5-i-7; printf(\"%d\", j);}"); 195 | test("0", "main(){int i=3; int j=5-i-7; printf(\"%d\", j+5);}"); 196 | test("0", "main(){int i=3; int j=5-i-7; printf(\"%d\", j+5);}"); 197 | test("3.5", "main(){printf(\"%.1f\", 3.0 + 0.5);}"); 198 | test("2.5", "main(){printf(\"%.1f\", 3.0 - 0.5);}"); 199 | test("9.9", "main(){printf(\"%.1f\", 1.1 * 9.0);}"); 200 | test("3.0", "main(){printf(\"%.1f\", 9.9 / 3.3);}"); 201 | } 202 | 203 | /* 204 | * Operator precedences 205 | */ 206 | TEST(op_precedence) { 207 | test("10", "main(){int i=3; int j=1+i*3; printf(\"%d\", j);}"); 208 | test("5", "main(){int i=9; int j=1+i/3+9/i; printf(\"%d\", j);}"); 209 | } 210 | 211 | /* 212 | * Assignment 213 | */ 214 | TEST(assignment) { 215 | test("3", "main(){int i=3; int j=i; printf(\"%d\", j);}"); 216 | test("5", "main(){int i=3; i+=2; printf(\"%d\", i);}"); 217 | test("1", "main(){int i=3; i-=2; printf(\"%d\", i);}"); 218 | test("6", "main(){int i=3; i*=2; printf(\"%d\", i);}"); 219 | test("2", "main(){int i=6; i/=3; printf(\"%d\", i);}"); 220 | } 221 | 222 | /* 223 | * Comma operator 224 | */ 225 | TEST(comma_op) { 226 | test("321", "main(){int i=3; while (i) printf(\"%d\", i), i=i-1;}"); 227 | } 228 | 229 | /* 230 | * Parenthesized operator 231 | */ 232 | TEST(parenthesized_op) { 233 | test("25", "main(){int i=2; int j=(i+3)*5; printf(\"%d\", j);}"); 234 | } 235 | 236 | /* 237 | * "if" statement 238 | */ 239 | TEST(if_op) { 240 | test("true", "main(){int i=1; if (i) { printf(\"true\"); } else { printf(\"false\"); }}"); 241 | test("false", "main(){int i=0; if (i) { printf(\"true\"); } else { printf(\"false\"); }}"); 242 | test("true", "main(){int i=1; if (i) printf(\"true\"); else printf(\"false\");}"); 243 | } 244 | 245 | /* 246 | * "while" statement 247 | */ 248 | TEST(while_stmt) { 249 | test("54321a", "main(){int i=5; while (i) { printf(\"%d\", i); i=i-1; } printf(\"a\");}"); 250 | test("54321a", "main(){int i=5; while (i) printf(\"%d\", i), i=i-1; printf(\"a\");}"); 251 | } 252 | 253 | /* 254 | * "for" statement 255 | */ 256 | TEST(for_stmt) { 257 | test("321", "main(){int i=0; for (i=3;i;i=i-1) { printf(\"%d\", i); }}"); 258 | test("321", "main(){int i=0; for (i=3;i;i=i-1) printf(\"%d\", i);}"); 259 | test("321", "main(){for (int i=3;i;i=i-1) printf(\"%d\", i);}"); 260 | } 261 | 262 | /* 263 | * "do" statement 264 | */ 265 | TEST(do_stmt) { 266 | test("321", "main(){int i=3; do { printf(\"%d\", i); i=i-1;} while (i);}"); 267 | test("321", "main(){int i=3; do printf(\"%d\", i), i=i-1; while (i);}"); 268 | } 269 | 270 | /* 271 | * "==" and "!=" operators 272 | */ 273 | TEST(eq_and_ne) { 274 | test("1", "main(){int i=5; int j=5; int k=i==j; printf(\"%d\", k);}"); 275 | test("0", "main(){int i=3; int j=5; int k=i==j; printf(\"%d\", k);}"); 276 | test("true", "main(){int i=5; int j=5; if (i==j) { printf(\"true\"); } else { printf(\"false\"); }}"); 277 | test("false", "main(){int i=3; int j=5; if (i==j) { printf(\"true\"); } else { printf(\"false\"); }}"); 278 | // "!=" 279 | test("0", "main(){int i=5; int j=5; int k=i!=j; printf(\"%d\", k);}"); 280 | test("1", "main(){int i=3; int j=5; int k=i!=j; printf(\"%d\", k);}"); 281 | test("false", "main(){int i=5; int j=5; if (i!=j) { printf(\"true\"); } else { printf(\"false\"); }}"); 282 | test("true", "main(){int i=3; int j=5; if (i!=j) { printf(\"true\"); } else { printf(\"false\"); }}"); 283 | // Flonum 284 | test("1", "main(){printf(\"%d\", 1.2 == 1.2);}"); 285 | test("0", "main(){printf(\"%d\", 1.2 == 1.0);}"); 286 | test("0", "main(){printf(\"%d\", 1.2 != 1.2);}"); 287 | test("1", "main(){printf(\"%d\", 1.2 != 1.0);}"); 288 | } 289 | 290 | /* 291 | * "!" operator 292 | */ 293 | TEST(not_op) { 294 | test("1", "main(){int i=0; printf(\"%d\", !i);}"); 295 | test("0", "main(){int i=1; printf(\"%d\", !i);}"); 296 | test("0", "main(){int i=0; printf(\"%d\", !!i);}"); 297 | test("1", "main(){int i=9; printf(\"%d\", !!i);}"); 298 | } 299 | 300 | /* 301 | * "~" operator 302 | */ 303 | TEST(bit_not) { 304 | test("-1 -2", "main(){printf(\"%d %d\", ~0, ~1);}"); 305 | test("-1 -2", "main(){int i=0; int j=1; printf(\"%d %d\", ~i, ~j);}"); 306 | } 307 | 308 | /* 309 | * "&" operator 310 | */ 311 | TEST(bit_and) { 312 | test("20", "main(){printf(\"%d\", 30&21);}"); 313 | test("20", "main(){int i=30; i&=21; printf(\"%d\", i);}"); 314 | } 315 | 316 | /* 317 | * "|" operator 318 | */ 319 | TEST(bit_or) { 320 | test("30", "main(){printf(\"%d\", 20|26);}"); 321 | test("30", "main(){int i=20; i|=26; printf(\"%d\", i);}"); 322 | } 323 | 324 | /* 325 | * "^" operator 326 | */ 327 | TEST(xor) { 328 | test("4", "main(){printf(\"%d\", 7^3);}"); 329 | test("7", "main(){int i=4; i^=3; printf(\"%d\", i);}"); 330 | } 331 | 332 | /* 333 | * Shift operators 334 | */ 335 | TEST(shift) { 336 | test("1", "main(){printf(\"%d\", 1<<0);}"); 337 | test("4", "main(){printf(\"%d\", 1<<2);}"); 338 | test("1", "main(){printf(\"%d\", 1>>0);}"); 339 | test("1", "main(){printf(\"%d\", 4>>2);}"); 340 | test("16", "main(){int i=4; i<<=2; printf(\"%d\", i);}"); 341 | test("2", "main(){int i=8; i>>=2; printf(\"%d\", i);}"); 342 | } 343 | 344 | /* 345 | * comparison operators 346 | */ 347 | TEST(comparison) { 348 | test("1", "main(){int i=3; int j=5; printf(\"%d\", ij);}"); 350 | test("0", "main(){int i=3; int j=3; printf(\"%d\", ij);}"); 352 | test("1", "main(){int i=3; int j=5; printf(\"%d\", i<=j);}"); 353 | test("0", "main(){int i=3; int j=5; printf(\"%d\", i>=j);}"); 354 | test("1", "main(){int i=3; int j=3; printf(\"%d\", i<=j);}"); 355 | test("1", "main(){int i=3; int j=3; printf(\"%d\", i>=j);}"); 356 | test("1", "main(){printf(\"%d\", 0<=1);}"); 357 | test("0", "main(){printf(\"%d\", 1<=0);}"); 358 | test("1", "main(){printf(\"%d\", 1>=0);}"); 359 | test("0", "main(){printf(\"%d\", 0>=1);}"); 360 | test("0", "main(){printf(\"%d\", 0<=-1);}"); 361 | test("1", "main(){printf(\"%d\", -1<=0);}"); 362 | test("0", "main(){printf(\"%d\", -1>=0);}"); 363 | test("1", "main(){printf(\"%d\", 0>=-1);}"); 364 | // Floating point numbers 365 | test("1", "main(){float i=3.0; float j=5.0; printf(\"%d\", ij);}"); 367 | test("0", "main(){float i=3.0; float j=3.0; printf(\"%d\", ij);}"); 369 | test("1", "main(){float i=3.0; float j=5.0; printf(\"%d\", i<=j);}"); 370 | test("0", "main(){float i=3.0; float j=5.0; printf(\"%d\", i>=j);}"); 371 | test("1", "main(){float i=3.0; float j=3.0; printf(\"%d\", i<=j);}"); 372 | test("1", "main(){float i=3.0; float j=3.0; printf(\"%d\", i>=j);}"); 373 | test("1", "main(){printf(\"%d\", 0.0<=1.0);}"); 374 | test("0", "main(){printf(\"%d\", 1.0<=0.0);}"); 375 | test("1", "main(){printf(\"%d\", 1.0>=0.0);}"); 376 | test("0", "main(){printf(\"%d\", 0.0>=1.0);}"); 377 | test("0", "main(){printf(\"%d\", 0.0<=-1.0);}"); 378 | test("1", "main(){printf(\"%d\", -1.0<=0.0);}"); 379 | test("0", "main(){printf(\"%d\", -1.0>=0.0);}"); 380 | test("1", "main(){printf(\"%d\", 0.0>=-1.0);}"); 381 | } 382 | 383 | /* 384 | * "?:" operator 385 | */ 386 | TEST(ternary) { 387 | test("17", "main(){int i=1; printf(\"%d\", i?17:42);}"); 388 | test("42", "main(){int i=0; printf(\"%d\", i?17:42);}"); 389 | test("2", "main(){int i=1; int j=i?i+1:i-1; printf(\"%d\", j);}"); 390 | test("0", "main(){int i=1; int j=i-1?i+1:i-1; printf(\"%d\", j);}"); 391 | test("-1", "main(){int i=0; int j=i?i+1:i-1; printf(\"%d\", j);}"); 392 | } 393 | 394 | /* 395 | * && operator 396 | */ 397 | TEST(log_and) { 398 | test("ab", "main(){1 && printf(\"a\"); printf(\"b\");}"); 399 | test("b", "main(){0 && printf(\"a\"); printf(\"b\");}"); 400 | test("3", "main(){int i = 1 && 3; printf(\"%d\", i);}"); 401 | test("0", "main(){int i = 5 && 3 && 0; printf(\"%d\", i);}"); 402 | test("0", "main(){int i = 0 && 0; printf(\"%d\", i);}"); 403 | } 404 | 405 | /* 406 | * || operator 407 | */ 408 | TEST(log_or) { 409 | test("b", "main(){1 || printf(\"a\"); printf(\"b\");}"); 410 | test("ab", "main(){0 || printf(\"a\"); printf(\"b\");}"); 411 | test("1", "main(){int i = 1 || 3; printf(\"%d\", i);}"); 412 | test("3", "main(){int i = 0 || 3 || 5; printf(\"%d\", i);}"); 413 | test("0", "main(){int i = 0 || 0; printf(\"%d\", i);}"); 414 | } 415 | 416 | /* 417 | * "++" and "--" operators 418 | */ 419 | TEST(inc_and_dec) { 420 | test("12", "main(){int i=1; printf(\"%d\", i++);printf(\"%d\", i);}"); 421 | test("22", "main(){int i=1; printf(\"%d\", ++i);printf(\"%d\", i);}"); 422 | test("54", "main(){int i=5; printf(\"%d\", i--);printf(\"%d\", i);}"); 423 | test("44", "main(){int i=5; printf(\"%d\", --i);printf(\"%d\", i);}"); 424 | test("3 5", "main(){int a[2]; a[0]=3; a[1]=5; int *p=a; *p++; printf(\"%d %d\", a[0], *p);}"); 425 | } 426 | 427 | /* 428 | * "break" and "continue" 429 | */ 430 | TEST(break_and_continue) { 431 | test("bar", "main(){int i=1; while (1) { if (i) { break; } printf(\"foo\"); } printf(\"bar\");}"); 432 | test("aac", "main(){int i=2; while (i) { if (i) { printf(\"a\"); i=i-1; continue; } printf(\"b\"); } printf(\"c\");}"); 433 | test("32a", "main(){for (int i=3;i;i=i-1) { printf(\"%d\", i); if (i==2) { break; } } printf(\"a\");}"); 434 | test("321a", "main(){for (int i=3;i;i) { if (i) { printf(\"%d\", i); i=i-1; continue; } } printf(\"a\");}"); 435 | } 436 | 437 | /* 438 | * "goto" statement 439 | */ 440 | TEST(goto_stmt) { 441 | test("acbd", "main(){A: printf(\"a\"); goto C; B: printf(\"b\"); goto D; C: printf(\"c\"); goto B; D: printf(\"d\");}"); 442 | } 443 | 444 | /* 445 | * "return" statement 446 | */ 447 | TEST(return_stmt) { 448 | test("a", "main(){printf(\"a\"); return 0;}"); 449 | } 450 | 451 | /* 452 | * Function call 453 | */ 454 | TEST(function_call) { 455 | test("foo", "main() { bar(); } bar() { printf(\"foo\"); }"); 456 | test("foo", "bar() { printf(\"foo\"); } main() { bar(); }"); 457 | test("17", "main() { printf(\"%d\", bar()); } bar() { return 17; }"); 458 | // functions taking parameters 459 | test("1 2", "main() { bar(1, 2); } bar(int i, int j) { printf(\"%d %d\", i, j); }"); 460 | test("17 42", "main() { int p[3]; p[0]=17; p[1]=42; bar(p); } bar(int *p) { printf(\"%d %d\", p[0], p[1]); }"); 461 | test("3", "int main() { printf(\"%d\", foo(3)); } int foo(int arg) { return arg; }"); 462 | } 463 | 464 | /* 465 | * Pointer operations 466 | */ 467 | TEST(pointer_op) { 468 | test("17", "main(){long i=17; long *j=&i; printf(\"%d\", *j);}"); 469 | test("17", "main(){long i=17; long *j=&i; long **k=&j; printf(\"%d\", **k);}"); 470 | test("42", "main(){long i=17; long *j=&i; *j=42; printf(\"%d\", *j);}"); 471 | test("42", "main(){long i=17; long *j=&i; long **k=&j; **k=42; printf(\"%d\", **k);}"); 472 | } 473 | 474 | /* 475 | * Array 476 | */ 477 | TEST(array) { 478 | test("17", "main(){int i[20]; printf(\"17\");}"); 479 | test("17 42", "main(){int i[20]; i[0]=17; i[19]=42; printf(\"%d %d\", i[0], i[19]);}"); 480 | test("17 42", "main(){int i[20]; int *p=i; p[0]=17; p[1]=42; printf(\"%d %d\", *p, p[1]);}"); 481 | test("5", "main(){int i[20]; int *p=i; int *q=p+5; printf(\"%d\", q-p);}"); 482 | test("123", "main(){ int a[3][3]; a[0][1]=1; a[2][0]=2; a[2][2]=3; printf(\"%d%d%d\", a[0][1], a[2][0], a[2][2]);}"); 483 | test("012345678", "main(){int a[3][3]; for (int i=0;i<3;i++) for (int j=0;j<3;j++) a[i][j]=i*3+j; for (int i=0;i<9;i++) printf(\"%d\",*(*a+i));}"); 484 | test("bx", "main(){printf(0 ? \"a\" : \"b\"); printf(1 ? \"x\" : \"y\");}"); 485 | } 486 | 487 | /* 488 | * Aray and pointer arithmetic 489 | */ 490 | TEST(array_and_pointer_arith) { 491 | test("17 42", "main(){int i[20]; i[0]=17; i[1]=42; int *j=i; printf(\"%d \", *j); j++; printf(\"%d\", *j);}"); 492 | } 493 | 494 | /* 495 | * Array and function parameter 496 | */ 497 | TEST(array_as_function_param) { 498 | test("012345678", "main() { int a[9]; for (int i=0;i<9;i++) a[i]=i; f(a); } f(int a[][3]) { for (int i=0;i<3;i++) for (int j=0;j<3;j++) printf(\"%d\", a[i][j]); }"); 499 | test("012345678", "main() { int a[9]; for (int i=0;i<9;i++) a[i]=i; f(a); } f(int *a[3]) { for (int i=0;i<3;i++) for (int j=0;j<3;j++) printf(\"%d\", a[i][j]); }"); 500 | } 501 | 502 | /* 503 | * Char type 504 | */ 505 | TEST(char_type) { 506 | test("3 257", "main(){char c=3; int i=c+254; printf(\"%d %d\", c, i);}"); 507 | test("2", "main(){char c=255+3; printf(\"%d\", c);}"); 508 | test("-1", "main(){char c=255; printf(\"%d\", c);}"); 509 | test("255", "main(){unsigned char c=255; printf(\"%d\", c);}"); 510 | } 511 | 512 | /* 513 | * Literal string 514 | */ 515 | TEST(literal_string) { 516 | test("Hello", "main(){char *p=\"Hello\"; printf(\"%s\", p);}"); 517 | test("Hello world", "main(){char *p=\"Hello \" \"world\"; printf(\"%s\", p);}"); 518 | } 519 | 520 | /* 521 | * Type conversion between floating point and integer 522 | */ 523 | TEST(float_to_int) { 524 | test("3.0", "main(){float f=3; printf(\"%.1f\", f);}"); 525 | test("3", "main(){int i=3.0; printf(\"%d\", i);}"); 526 | } 527 | 528 | /* 529 | * Binary, octal, and hexadecimal numbers 530 | */ 531 | TEST(numbers) { 532 | test("511", "main(){printf(\"%d\", 0777);}"); 533 | test("255 255", "main(){printf(\"%d %d\", 0xff, 0XFF);}"); 534 | test("7 7", "main(){printf(\"%d %d\", 0b111, 0B111);}"); 535 | } 536 | 537 | /*============================================================================== 538 | * Entry point 539 | */ 540 | 541 | static void *run_tests() { 542 | while (LIST_LEN(test_funcs) > 0) { 543 | char *fname = (char *)list_pop(test_funcs); 544 | printf(" %s\n", fname); 545 | void (*testfn)(void) = list_pop(test_funcs); 546 | testfn(); 547 | } 548 | } 549 | 550 | int main(int argc, char **argv) { 551 | printf("Running unit tests ...\n"); 552 | run_in_memory = !(argc == 2 && !strcmp(argv[1], "-n")); 553 | run_tests(); 554 | printf("ALL TESTS PASSED\n"); 555 | } 556 | -------------------------------------------------------------------------------- /test/parse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "../parse.c" 7 | #include "unittest.h" 8 | 9 | /* 10 | * Parser 11 | */ 12 | 13 | TEST(read_comment) { 14 | ReadContext *ctx = mkctx("/* 1 * */ 2 // 3 \n 4"); 15 | EQ(2, read_token(ctx)->val.i); 16 | EQ(4, read_token(ctx)->val.i); 17 | EQ(NULL, read_token(ctx)); 18 | } 19 | 20 | TEST(read_float) { 21 | ReadContext *ctx = mkctx("1 2.0"); 22 | Token *tok = read_token(ctx); 23 | EQ(TOKTYPE_INT, tok->toktype); 24 | EQ(1, tok->val.i); 25 | 26 | tok = read_token(ctx); 27 | EQ(TOKTYPE_FLOAT, tok->toktype); 28 | EQ(2.0, tok->val.f); 29 | } 30 | 31 | TEST(read_char) { 32 | ReadContext *ctx = mkctx("'a' '\\n' '\\0' '\\23' '\\233' '\\x3' '\\x3f'"); 33 | Token *tok = read_token(ctx); 34 | EQ(TOKTYPE_CHAR, tok->toktype); 35 | EQ('a', tok->val.i); 36 | EQ('\n', read_token(ctx)->val.i); 37 | EQ('\0', read_token(ctx)->val.i); 38 | EQ('\23', read_token(ctx)->val.i); 39 | EQ('\233', read_token(ctx)->val.i); 40 | EQ('\x3', read_token(ctx)->val.i); 41 | EQ('\x3f', read_token(ctx)->val.i); 42 | } 43 | 44 | #define TEST_READ_KEYWORDS(ctx_, type_) \ 45 | do { \ 46 | Token *tok = read_token(ctx_); \ 47 | EQ(TOKTYPE_KEYWORD, tok->toktype); \ 48 | EQ(tok->val.i, type_); \ 49 | } while (0) 50 | 51 | TEST(read_keywords) { 52 | ReadContext *ctx = mkctx("int float ( ) { } ! = ^ == ++ -- ||"); 53 | TEST_READ_KEYWORDS(ctx, KEYWORD_INT); 54 | TEST_READ_KEYWORDS(ctx, KEYWORD_FLOAT); 55 | TEST_READ_KEYWORDS(ctx, '('); 56 | TEST_READ_KEYWORDS(ctx, ')'); 57 | TEST_READ_KEYWORDS(ctx, '{'); 58 | TEST_READ_KEYWORDS(ctx, '}'); 59 | TEST_READ_KEYWORDS(ctx, '!'); 60 | TEST_READ_KEYWORDS(ctx, '='); 61 | TEST_READ_KEYWORDS(ctx, '^'); 62 | TEST_READ_KEYWORDS(ctx, KEYWORD_EQ); 63 | TEST_READ_KEYWORDS(ctx, KEYWORD_INC); 64 | TEST_READ_KEYWORDS(ctx, KEYWORD_DEC); 65 | TEST_READ_KEYWORDS(ctx, KEYWORD_LOG_OR); 66 | } 67 | 68 | TEST(read_unget_token) { 69 | ReadContext *ctx = mkctx("int float ("); 70 | Token *t0 = read_token(ctx); 71 | Token *t1 = read_token(ctx); 72 | Token *t2 = read_token(ctx); 73 | unget_token(ctx, t2); 74 | unget_token(ctx, t1); 75 | unget_token(ctx, t0); 76 | 77 | TEST_READ_KEYWORDS(ctx, KEYWORD_INT); 78 | TEST_READ_KEYWORDS(ctx, KEYWORD_FLOAT); 79 | TEST_READ_KEYWORDS(ctx, '('); 80 | } 81 | 82 | /* 83 | * A function used by sizeof operator. 84 | */ 85 | TEST(ctype_sizeof) { 86 | EQ(1, ctype_sizeof(make_ctype(CTYPE_CHAR))); 87 | EQ(2, ctype_sizeof(make_ctype(CTYPE_SHORT))); 88 | EQ(4, ctype_sizeof(make_ctype(CTYPE_INT))); 89 | EQ(8, ctype_sizeof(make_ctype(CTYPE_LONG))); 90 | EQ(8, ctype_sizeof(make_ctype_ptr(make_ctype(CTYPE_CHAR)))); 91 | EQ(8, ctype_sizeof(make_ctype_ptr(make_ctype(CTYPE_INT)))); 92 | EQ(8, ctype_sizeof(make_ctype_ptr(make_ctype_ptr((make_ctype(CTYPE_INT)))))); 93 | EQ(20, ctype_sizeof(make_ctype_array(make_ctype(CTYPE_CHAR), 20))); 94 | EQ(36, ctype_sizeof(make_ctype_array(make_ctype_array(make_ctype(CTYPE_INT), 3), 3))); 95 | } 96 | -------------------------------------------------------------------------------- /test/run.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 1 2 | #include "unittest.h" 3 | #include "../run.c" 4 | 5 | TEST(run_string) { 6 | EQ(1, run_string("main() { return 1; }")); 7 | } 8 | -------------------------------------------------------------------------------- /test/string.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Rui Ueyama . All rights reserved. 3 | * This code is available under the simplified BSD license. See LICENSE for details. 4 | */ 5 | 6 | #include "unittest.h" 7 | #include "../string.c" 8 | 9 | static int64_t qword(char *str) { 10 | int64_t r = 0; 11 | for (int i = strlen(str) - 1; i >= 0; i--) 12 | r = (r << 8) | str[i]; 13 | return r; 14 | } 15 | 16 | TEST(string) { 17 | String *b = make_string(); 18 | NOT_NULL(b); 19 | EQ(STRING_LEN(b), 0); 20 | 21 | o1(b, 'a'); 22 | EQ(STRING_LEN(b), 1); 23 | EQ(STRING_BODY(b)[0], 'a'); 24 | 25 | o8(b, qword("bcdefghi")); 26 | EQ(STRING_LEN(b), 9); 27 | EQ(strcmp(STRING_BODY(b), "abcdefghi"), 0); 28 | } 29 | -------------------------------------------------------------------------------- /test/unittest.h: -------------------------------------------------------------------------------- 1 | /* 2 | * unittest.c - unit test utilities 3 | * 4 | * Copyright 2010 Rui Ueyama . All rights reserved. 5 | * This code is available under the simplified BSD license. See LICENSE for details. 6 | */ 7 | 8 | /* 9 | * License of all the other files in this directory is the same as above. 10 | */ 11 | 12 | #include "../8cc.h" 13 | 14 | #define CONSTRUCTOR __attribute__((constructor)) 15 | 16 | #define IS_NULL(p) do { if (p) error("Line %d: must be null " #p, __LINE__); } while (0) 17 | #define NOT_NULL(p) do { if (!(p)) error("Line %d: must not be null " #p, __LINE__); } while (0) 18 | #define EQ(x, y) do { if ((x) != (y)) error("Line %d: must be the same: '%s' and '%s'", __LINE__, #x, #y); } while (0) 19 | #define EQ_CHAR(x, y) do { eq_char(__LINE__, (x), (y)); } while (0) 20 | #define EQ_STR(x, y) do { eq_str(__LINE__, (x), (y)); } while (0) 21 | #define EQ_STR1(x, y, msg) do { eq_str1(__LINE__, (x), (y), msg); } while (0) 22 | #define EQ_STRING(x, y) do { eq_string(__LINE__, (x), (y)); } while (0) 23 | #define CONTAINS(x, y) do { contains(__LINE__, (x), (y)); } while (0) 24 | 25 | extern List* test_funcs; 26 | 27 | #define TEST(name) \ 28 | static void TEST_##name(void); \ 29 | CONSTRUCTOR static void name##_TEST_INIT(void) { \ 30 | if (!test_funcs) \ 31 | test_funcs = make_list(); \ 32 | list_push(test_funcs, TEST_##name); \ 33 | list_push(test_funcs, #name); \ 34 | } \ 35 | static void TEST_##name(void) 36 | 37 | extern void eq_str(int line, char *expected, char *got); 38 | extern void eq_str1(int line, char *expected, char *got, char *msg); 39 | extern void eq_string(int line, char *expected, String *got); 40 | extern void eq_char(int line, int expected, int got); 41 | extern File *mkfile(char *str); 42 | extern ReadContext *mkctx(char *str); 43 | extern void contains(int line, char *expected, char *got); 44 | -------------------------------------------------------------------------------- /utils/dump: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "main() { $1 }" | ./8cc - run.o && objdump -d run.o 3 | -------------------------------------------------------------------------------- /utils/dump1: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "$1" | ./8cc - run.o && objdump -d run.o 3 | -------------------------------------------------------------------------------- /utils/dump2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "$1" | ./8cc -d -run - 3 | echo 4 | -------------------------------------------------------------------------------- /utils/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "main() { 3 | $1 }" | ./8cc - run.o && gcc -o run run.o && ./run 4 | -------------------------------------------------------------------------------- /utils/run1: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "$1" | ./8cc - run.o && gcc -o run run.o && ./run 3 | --------------------------------------------------------------------------------